From 139ddd726a8f103a52e7f0096b8e470d2bee0160 Mon Sep 17 00:00:00 2001 From: Maxwell Salzberg Date: Tue, 11 Oct 2011 13:16:06 -0700 Subject: [PATCH] wip oembed refactor. still need to make a oembed_helper, and move some tests to their new home, but e but we are looking preeeetttttyyyy good. --- app/helpers/markdownify_helper.rb | 9 +---- app/models/jobs/gather_o_embed_data.rb | 37 ++----------------- app/models/o_embed_cache.rb | 22 +++++++++++ app/models/post.rb | 7 +--- app/models/status_message.rb | 13 +++++++ config/initializers/oembed.rb | 18 ++++++++- ...0111011193702_add_oembed_cache_to_posts.rb | 9 +++++ db/schema.rb | 3 +- lib/diaspora/markdownify.rb | 15 +------- spec/models/status_message_spec.rb | 8 ++++ 10 files changed, 79 insertions(+), 62 deletions(-) create mode 100644 db/migrate/20111011193702_add_oembed_cache_to_posts.rb diff --git a/app/helpers/markdownify_helper.rb b/app/helpers/markdownify_helper.rb index eba13fc7d..9ba17f019 100644 --- a/app/helpers/markdownify_helper.rb +++ b/app/helpers/markdownify_helper.rb @@ -31,14 +31,7 @@ module MarkdownifyHelper return '' if message.blank? - #renderer = Redcarpet::Render::HTML.new(render_options) - if render_options[:oembed] - puts "oembed" - renderer = Diaspora::Markdownify::HTMLwithOEmbed.new(render_options) - else - puts "not oembed" - renderer = Diaspora::Markdownify::HTML.new(render_options) - end + renderer = Diaspora::Markdownify::HTML.new(render_options) markdown = Redcarpet::Markdown.new(renderer, markdown_options) message = markdown.render(message).html_safe diff --git a/app/models/jobs/gather_o_embed_data.rb b/app/models/jobs/gather_o_embed_data.rb index fd07f7462..72fe83d29 100644 --- a/app/models/jobs/gather_o_embed_data.rb +++ b/app/models/jobs/gather_o_embed_data.rb @@ -7,39 +7,10 @@ module Jobs class GatherOEmbedData < Base @queue = :http_service - class GatherOEmbedRenderer < Redcarpet::Render::HTML - include ActionView::Helpers::TextHelper - include ActionView::Helpers::TagHelper - - def isHttp?(url) - URI.parse(url).scheme.downcase == 'http' - end - - def autolink(link, type) - url = auto_link(link, :link => :urls).scan(/href=["']?((?:.(?!["']?\s+(?:\S+)=|[>"']))+.)["']?/).first.first - url = CGI::unescapeHTML(url) - - return url if OEmbedCache.exists?(:url => url) or not isHttp?(url) - - begin - res = ::OEmbed::Providers.get(url, {:maxwidth => 420, :maxheight => 420, :frame => 1, :iframe => 1}) - rescue Exception => e - # noop - else - data = res.fields - data['trusted_endpoint_url'] = res.provider.endpoint - cache = OEmbedCache.new(:url => url, :data => data) - cache.save - end - - return url - end - end - - def self.perform(text) - renderer = GatherOEmbedRenderer.new({}) - markdown = Redcarpet::Markdown.new(renderer, {:autolink => true}) - message = markdown.render(text) + def self.perform(post_id, url) + post = Post.find(post_id) + post.o_embed_cache = OEmbedCache.find_or_create_by_url(url) + post.save end end end diff --git a/app/models/o_embed_cache.rb b/app/models/o_embed_cache.rb index 79f472f8c..6ae29b8a3 100644 --- a/app/models/o_embed_cache.rb +++ b/app/models/o_embed_cache.rb @@ -1,3 +1,25 @@ class OEmbedCache < ActiveRecord::Base serialize :data + attr_accessible :url + + has_many :posts + + def self.find_or_create_by_url(url) + cache = OEmbedCache.find_or_build_by_url(url) + return cache if cache.persisted? + cache.fetch_and_save_oembed_data! + cache + end + + def fetch_and_save_oembed_data! + begin + response = OEmbed::Providers.get(self.url, {:maxwidth => 420, :maxheight => 420, :frame => 1, :iframe => 1}) + rescue Exception => e + # noop + else + self.data = response.fields + self.data['trusted_endpoint_url'] = response.provider.endpoint + cache.save + end + end end diff --git a/app/models/post.rb b/app/models/post.rb index 20613b5a5..7cf9d4185 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -28,6 +28,8 @@ class Post < ActiveRecord::Base has_many :reshares, :class_name => "Reshare", :foreign_key => :root_guid, :primary_key => :guid has_many :resharers, :class_name => 'Person', :through => :reshares, :source => :author + has_one :o_embed_cache + belongs_to :author, :class_name => 'Person' validates :guid, :uniqueness => true @@ -66,11 +68,6 @@ class Post < ActiveRecord::Base @reshare_count ||= Post.where(:root_guid => self.guid).count end - def text= nd - Resque.enqueue(Jobs::GatherOEmbedData, nd) - write_attribute(:text, nd) - end - def diaspora_handle= nd self.author = Person.where(:diaspora_handle => nd).first write_attribute(:diaspora_handle, nd) diff --git a/app/models/status_message.rb b/app/models/status_message.rb index 1c0f6b0be..72ac5cf67 100644 --- a/app/models/status_message.rb +++ b/app/models/status_message.rb @@ -22,10 +22,13 @@ class StatusMessage < Post validate :presence_of_content attr_accessible :text, :provider_display_name + attr_accessor :oembed_url serialize :youtube_titles, Hash after_create :create_mentions + after_create :queue_gather_oembed_data, :if => :contains_oembed_url_in_text? + #scopes scope :where_person_is_mentioned, lambda{|person| joins(:mentions).where(:mentions => {:person_id => person.id})} @@ -153,6 +156,16 @@ class StatusMessage < Post self.text.blank? && self.photos.blank? end + def queue_gather_oembed_data + Resque.enqueue(Jobs::GatherOEmbedData, self.oembed_url) + end + + def contains_oembed_url_in_text? + require 'uri' + urls = URI.extract(self.raw_message, ['http', 'https']) + self.oembed_url = urls.find{|url| ENDPOINT_HOSTS_STRING.match(URI.parse(url).host)} + end + protected def presence_of_content diff --git a/config/initializers/oembed.rb b/config/initializers/oembed.rb index 5db195434..8947649c8 100644 --- a/config/initializers/oembed.rb +++ b/config/initializers/oembed.rb @@ -1,4 +1,20 @@ require 'oembed' +require 'uri' + OEmbed::Providers.register_all OEmbed::Providers.register_fallback(OEmbed::ProviderDiscovery) - +# +# SECURITY NOTICE! CROSS-SITE SCRIPTING! +# these endpoints may inject html code into our page +# note that 'trusted_endpoint_url' is the only information +# in OEmbed that we can trust. anything else may be spoofed! +SECURE_ENDPOINTS = [::OEmbed::Providers::Youtube.endpoint, + ::OEmbed::Providers::Viddler.endpoint, + ::OEmbed::Providers::Qik.endpoint, + ::OEmbed::Providers::Revision3.endpoint, + ::OEmbed::Providers::Hulu.endpoint, + ::OEmbed::Providers::Vimeo.endpoint, + 'http://soundcloud.com/oembed', + 'http://cubbi.es/oembed' + ] +ENDPOINT_HOSTS_STRING = SECURE_ENDPOINTS.map{|e| URI.parse(e.split('{')[0]).host}.to_s diff --git a/db/migrate/20111011193702_add_oembed_cache_to_posts.rb b/db/migrate/20111011193702_add_oembed_cache_to_posts.rb new file mode 100644 index 000000000..371faef7c --- /dev/null +++ b/db/migrate/20111011193702_add_oembed_cache_to_posts.rb @@ -0,0 +1,9 @@ +class AddOembedCacheToPosts < ActiveRecord::Migration + def self.up + add_column :posts, :o_embed_cache_id, :integer + end + + def self.down + remove_column :posts, :o_embed_cache_id + end +end diff --git a/db/schema.rb b/db/schema.rb index 5b997a392..547f8cf09 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended to check this file into your version control system. -ActiveRecord::Schema.define(:version => 20111003232053) do +ActiveRecord::Schema.define(:version => 20111011193702) do create_table "aspect_memberships", :force => true do |t| t.integer "aspect_id", :null => false @@ -290,6 +290,7 @@ ActiveRecord::Schema.define(:version => 20111003232053) do t.string "status_message_guid" t.integer "likes_count", :default => 0 t.integer "comments_count", :default => 0 + t.integer "o_embed_cache_id" end add_index "posts", ["author_id", "root_guid"], :name => "index_posts_on_author_id_and_root_guid", :unique => true diff --git a/lib/diaspora/markdownify.rb b/lib/diaspora/markdownify.rb index 5958b195b..974f97e8a 100644 --- a/lib/diaspora/markdownify.rb +++ b/lib/diaspora/markdownify.rb @@ -25,24 +25,11 @@ module Diaspora url = auto_link(link, :link => :urls).scan(/href=["']?((?:.(?!["']?\s+(?:\S+)=|[>"']))+.)["']?/).first.first url = CGI::unescapeHTML(url) - # SECURITY NOTICE! CROSS-SITE SCRIPTING! - # these endpoints may inject html code into our page - secure_endpoints = [::OEmbed::Providers::Youtube.endpoint, - ::OEmbed::Providers::Viddler.endpoint, - ::OEmbed::Providers::Qik.endpoint, - ::OEmbed::Providers::Revision3.endpoint, - ::OEmbed::Providers::Hulu.endpoint, - ::OEmbed::Providers::Vimeo.endpoint, - 'http://soundcloud.com/oembed', - ] - - # note that 'trusted_endpoint_url' is the only information - # in OEmbed that we can trust. anything else may be spoofed! cache = OEmbedCache.find_by_url(url) if not cache.nil? and cache.data.has_key?('type') case cache.data['type'] when 'video', 'rich' - if secure_endpoints.include?(cache.data['trusted_endpoint_url']) and cache.data.has_key?('html') + if SECURE_ENDPOINTS.include?(cache.data['trusted_endpoint_url']) and cache.data.has_key?('html') rep = raw(cache.data['html']) elsif cache.data.has_key?('thumbnail_url') img_options = {} diff --git a/spec/models/status_message_spec.rb b/spec/models/status_message_spec.rb index b4d653488..5f1e1a4ac 100644 --- a/spec/models/status_message_spec.rb +++ b/spec/models/status_message_spec.rb @@ -312,4 +312,12 @@ STR @status_message.after_dispatch(alice) end end + + describe '#contains_url_in_text?' do + it 'returns an array of all urls found in the raw message' do + sm = Factory(:status_message, :text => 'http://youtube.com is so cool. so is https://joindiaspora.com') + sm.contains_oembed_url_in_text?.should_not be_nil + sm.oembed_url.should == 'http://youtube.com' + end + end end