From 74a6f42501d67e68aa5fada565477548b4135daf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonne=20Ha=C3=9F?= Date: Sat, 6 Dec 2014 17:53:48 +0100 Subject: [PATCH] Bye opengraph_parser, hi open_graph_reader opengraph_parser is basically unmainted, issues are ignored or deliberately closed without fixing. It pollutes the global namespace and has no verification of correctness. The opengraph gem has basically the same issues, not really maintained, unreleased patches on master since over a year, not really smart either. So I created my own version and while at it, why not strive try to be complete and robust, although it's still a work in progress. This also improves general URL detection by parsing them from the message after stripping markdown. An additional dependency was added to support fetching sites that require cookies to work at all. For the same reason Faraday's default redirect limit was bumped. --- Gemfile | 13 +++++++------ Gemfile.lock | 16 ++++++++++++---- app/models/open_graph_cache.rb | 15 ++++++++------- config/initializers/faraday.rb | 3 ++- config/initializers/open_graph_reader.rb | 4 ++++ lib/diaspora/message_renderer.rb | 2 +- spec/lib/diaspora/message_renderer_spec.rb | 5 +++++ spec/workers/gather_open_graph_data_spec.rb | 13 ++++++++----- 8 files changed, 47 insertions(+), 24 deletions(-) create mode 100644 config/initializers/open_graph_reader.rb diff --git a/Gemfile b/Gemfile index 101d789de..88e1d2e7d 100644 --- a/Gemfile +++ b/Gemfile @@ -110,12 +110,12 @@ gem 'messagebus_ruby_api', '1.0.3' # Parsing -gem 'nokogiri', '1.6.4.1' -gem 'redcarpet', '3.2.0' -gem 'twitter-text', '1.10.0' -gem 'roxml', '3.1.6' -gem 'ruby-oembed', '0.8.11' -gem 'opengraph_parser', '0.2.3' +gem 'nokogiri', '1.6.4.1' +gem 'redcarpet', '3.2.0' +gem 'twitter-text', '1.10.0' +gem 'roxml', '3.1.6' +gem 'ruby-oembed', '0.8.11' +gem 'open_graph_reader', '0.3.1' # Services @@ -140,6 +140,7 @@ gem 'acts-as-taggable-on', '3.4.2' gem 'addressable', '2.3.6', :require => 'addressable/uri' gem 'faraday', '0.9.0' gem 'faraday_middleware', '0.9.1' +gem 'faraday-cookie_jar', '0.0.6' gem 'typhoeus', '0.6.9' # Views diff --git a/Gemfile.lock b/Gemfile.lock index 3dc8c4732..4d1476c36 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -141,6 +141,8 @@ GEM http_parser.rb (~> 0.6) nokogiri (~> 1.6) diff-lcs (1.2.5) + domain_name (0.5.22) + unf (>= 0.0.5, < 1.0.0) eco (1.0.0) coffee-script eco-source @@ -165,6 +167,9 @@ GEM railties (>= 3.0.0) faraday (0.9.0) multipart-post (>= 1.2, < 3) + faraday-cookie_jar (0.0.6) + faraday (>= 0.7.4) + http-cookie (~> 1.0.0) faraday_middleware (0.9.1) faraday (>= 0.7.4, < 0.10) ffi (1.9.6) @@ -268,6 +273,8 @@ GEM hike (1.2.3) hiredis (0.5.2) hitimes (1.2.2) + http-cookie (1.0.2) + domain_name (~> 0.5) http_accept_language (2.0.2) http_parser.rb (0.6.0) i18n (0.6.11) @@ -361,9 +368,9 @@ GEM omniauth-oauth (~> 1.0) omniauth-wordpress (0.2.1) omniauth-oauth2 (~> 1.1.0) - opengraph_parser (0.2.3) - addressable - nokogiri + open_graph_reader (0.3.1) + faraday (~> 0.9.0) + nokogiri (~> 1.6) opennebula (4.10.1) json nokogiri @@ -618,6 +625,7 @@ DEPENDENCIES entypo-rails (= 2.2.2) factory_girl_rails (= 4.5.0) faraday (= 0.9.0) + faraday-cookie_jar (= 0.0.6) faraday_middleware (= 0.9.1) fixture_builder (= 0.3.6) fog (= 1.25.0) @@ -650,7 +658,7 @@ DEPENDENCIES omniauth-tumblr (= 1.1) omniauth-twitter (= 1.0.1) omniauth-wordpress (= 0.2.1) - opengraph_parser (= 0.2.3) + open_graph_reader (= 0.3.1) pry pry-byebug pry-debundle diff --git a/app/models/open_graph_cache.rb b/app/models/open_graph_cache.rb index 072d30a98..b1d357bbf 100644 --- a/app/models/open_graph_cache.rb +++ b/app/models/open_graph_cache.rb @@ -30,16 +30,17 @@ class OpenGraphCache < ActiveRecord::Base end def fetch_and_save_opengraph_data! - response = OpenGraph.new(self.url) + object = OpenGraphReader.fetch!(self.url) - return if response.blank? || response.type.blank? + return unless object - self.title = response.title.truncate(255) - self.ob_type = response.type - self.image = response.images[0] - self.url = response.url - self.description = response.description + self.title = object.og.title.truncate(255) + self.ob_type = object.og.type + self.image = object.og.image.url + self.url = object.og.url + self.description = object.og.description self.save + rescue OpenGraphReader::NoOpenGraphDataError end end diff --git a/config/initializers/faraday.rb b/config/initializers/faraday.rb index d22e92e35..a1f57bb21 100644 --- a/config/initializers/faraday.rb +++ b/config/initializers/faraday.rb @@ -18,6 +18,7 @@ options = { } Faraday.default_connection = Faraday::Connection.new(options) do |b| - b.use FaradayMiddleware::FollowRedirects + b.use FaradayMiddleware::FollowRedirects, limit: 8 + b.use :cookie_jar b.adapter Faraday.default_adapter end diff --git a/config/initializers/open_graph_reader.rb b/config/initializers/open_graph_reader.rb new file mode 100644 index 000000000..68af98f12 --- /dev/null +++ b/config/initializers/open_graph_reader.rb @@ -0,0 +1,4 @@ +OpenGraphReader.configure do |config| + config.synthesize_title = true + config.synthesize_image_url = true +end diff --git a/lib/diaspora/message_renderer.rb b/lib/diaspora/message_renderer.rb index f0eb11bd8..820da2a51 100644 --- a/lib/diaspora/message_renderer.rb +++ b/lib/diaspora/message_renderer.rb @@ -228,7 +228,7 @@ module Diaspora # Extracts all the urls from the raw message and return them in the form of a string # Different URLs are seperated with a space def urls - @urls ||= Twitter::Extractor.extract_urls(@raw_message) + @urls ||= Twitter::Extractor.extract_urls(plain_text_without_markdown) end def raw diff --git a/spec/lib/diaspora/message_renderer_spec.rb b/spec/lib/diaspora/message_renderer_spec.rb index 129415d63..fc73e5dff 100644 --- a/spec/lib/diaspora/message_renderer_spec.rb +++ b/spec/lib/diaspora/message_renderer_spec.rb @@ -178,5 +178,10 @@ describe Diaspora::MessageRenderer do text = "[Perdu](http://perdu.com/) and [DuckDuckGo](https://duckduckgo.com/) can help you" expect(message(text).urls).to eql ["http://perdu.com/", "https://duckduckgo.com/"] end + + it "extracts urls from continous markdown correctly" do + text = "[![Image](https://www.antifainfoblatt.de/sites/default/files/public/styles/front_full/public/jockpalfreeman.png?itok=OPjHKpmt)](https://www.antifainfoblatt.de/artikel/%E2%80%9Eschlie%C3%9Flich-waren-es-zu-viele%E2%80%9C)" + expect(message(text).urls).to eq ["https://www.antifainfoblatt.de/sites/default/files/public/styles/front_full/public/jockpalfreeman.png?itok=OPjHKpmt", "https://www.antifainfoblatt.de/artikel/%E2%80%9Eschlie%C3%9Flich-waren-es-zu-viele%E2%80%9C"] + end end end diff --git a/spec/workers/gather_open_graph_data_spec.rb b/spec/workers/gather_open_graph_data_spec.rb index 3416047c4..ac7c4eecd 100644 --- a/spec/workers/gather_open_graph_data_spec.rb +++ b/spec/workers/gather_open_graph_data_spec.rb @@ -3,7 +3,7 @@ describe Workers::GatherOpenGraphData do before do @ogsite_title = 'Homepage' @ogsite_type = 'website' - @ogsite_image = '/img/something.png' + @ogsite_image = 'http://www.we-support-open-graph.com/img/something.png' @ogsite_url = 'http://www.we-support-open-graph.com' @ogsite_description = 'Homepage' @@ -31,9 +31,12 @@ describe Workers::GatherOpenGraphData do @status_message = FactoryGirl.create(:status_message) - stub_request(:get, @ogsite_url).to_return(:status => 200, :body => @ogsite_body) - stub_request(:get, @no_open_graph_url).to_return(:status => 200, :body => 'hello there') - stub_request(:get, @oglong_url).to_return(:status => 200, :body => @oglong_body) + stub_request(:head, @ogsite_url).to_return(status: 200, body: "", headers: {'Content-Type' => 'text/html; utf-8'}) + stub_request(:get, @ogsite_url).to_return(status: 200, body: @ogsite_body, headers: {'Content-Type' => 'text/html; utf-8'}) + stub_request(:head, @no_open_graph_url).to_return(status: 200, body: "", headers: {'Content-Type' => 'text/html; utf-8'}) + stub_request(:get, @no_open_graph_url).to_return(:status => 200, :body => 'Hihello there', headers: {'Content-Type' => 'text/html; utf-8'}) + stub_request(:head, @oglong_url).to_return(status: 200, body: "", headers: {'Content-Type' => 'text/html; utf-8'}) + stub_request(:get, @oglong_url).to_return(status: 200, body: @oglong_body, headers: {'Content-Type' => 'text/html; utf-8'}) end @@ -59,7 +62,7 @@ describe Workers::GatherOpenGraphData do expect(ogc.title).to eq(@ogsite_title) expect(ogc.ob_type).to eq(@ogsite_type) - expect(ogc.image).to eq(@ogsite_url + @ogsite_image) + expect(ogc.image).to eq(@ogsite_image) expect(ogc.url).to eq(@ogsite_url) expect(ogc.description).to eq(@ogsite_description)