Bye opengraph_parser, hi open_graph_reader
opengraph_parser is basically unmainted, issues are ignored or deliberately closed without fixing. It pollutes the global namespace and has no verification of correctness. The opengraph gem has basically the same issues, not really maintained, unreleased patches on master since over a year, not really smart either. So I created my own version and while at it, why not strive try to be complete and robust, although it's still a work in progress. This also improves general URL detection by parsing them from the message after stripping markdown. An additional dependency was added to support fetching sites that require cookies to work at all. For the same reason Faraday's default redirect limit was bumped.
This commit is contained in:
parent
60a5d2a5ba
commit
74a6f42501
8 changed files with 47 additions and 24 deletions
13
Gemfile
13
Gemfile
|
|
@ -110,12 +110,12 @@ gem 'messagebus_ruby_api', '1.0.3'
|
|||
|
||||
# Parsing
|
||||
|
||||
gem 'nokogiri', '1.6.4.1'
|
||||
gem 'redcarpet', '3.2.0'
|
||||
gem 'twitter-text', '1.10.0'
|
||||
gem 'roxml', '3.1.6'
|
||||
gem 'ruby-oembed', '0.8.11'
|
||||
gem 'opengraph_parser', '0.2.3'
|
||||
gem 'nokogiri', '1.6.4.1'
|
||||
gem 'redcarpet', '3.2.0'
|
||||
gem 'twitter-text', '1.10.0'
|
||||
gem 'roxml', '3.1.6'
|
||||
gem 'ruby-oembed', '0.8.11'
|
||||
gem 'open_graph_reader', '0.3.1'
|
||||
|
||||
|
||||
# Services
|
||||
|
|
@ -140,6 +140,7 @@ gem 'acts-as-taggable-on', '3.4.2'
|
|||
gem 'addressable', '2.3.6', :require => 'addressable/uri'
|
||||
gem 'faraday', '0.9.0'
|
||||
gem 'faraday_middleware', '0.9.1'
|
||||
gem 'faraday-cookie_jar', '0.0.6'
|
||||
gem 'typhoeus', '0.6.9'
|
||||
|
||||
# Views
|
||||
|
|
|
|||
16
Gemfile.lock
16
Gemfile.lock
|
|
@ -141,6 +141,8 @@ GEM
|
|||
http_parser.rb (~> 0.6)
|
||||
nokogiri (~> 1.6)
|
||||
diff-lcs (1.2.5)
|
||||
domain_name (0.5.22)
|
||||
unf (>= 0.0.5, < 1.0.0)
|
||||
eco (1.0.0)
|
||||
coffee-script
|
||||
eco-source
|
||||
|
|
@ -165,6 +167,9 @@ GEM
|
|||
railties (>= 3.0.0)
|
||||
faraday (0.9.0)
|
||||
multipart-post (>= 1.2, < 3)
|
||||
faraday-cookie_jar (0.0.6)
|
||||
faraday (>= 0.7.4)
|
||||
http-cookie (~> 1.0.0)
|
||||
faraday_middleware (0.9.1)
|
||||
faraday (>= 0.7.4, < 0.10)
|
||||
ffi (1.9.6)
|
||||
|
|
@ -268,6 +273,8 @@ GEM
|
|||
hike (1.2.3)
|
||||
hiredis (0.5.2)
|
||||
hitimes (1.2.2)
|
||||
http-cookie (1.0.2)
|
||||
domain_name (~> 0.5)
|
||||
http_accept_language (2.0.2)
|
||||
http_parser.rb (0.6.0)
|
||||
i18n (0.6.11)
|
||||
|
|
@ -361,9 +368,9 @@ GEM
|
|||
omniauth-oauth (~> 1.0)
|
||||
omniauth-wordpress (0.2.1)
|
||||
omniauth-oauth2 (~> 1.1.0)
|
||||
opengraph_parser (0.2.3)
|
||||
addressable
|
||||
nokogiri
|
||||
open_graph_reader (0.3.1)
|
||||
faraday (~> 0.9.0)
|
||||
nokogiri (~> 1.6)
|
||||
opennebula (4.10.1)
|
||||
json
|
||||
nokogiri
|
||||
|
|
@ -618,6 +625,7 @@ DEPENDENCIES
|
|||
entypo-rails (= 2.2.2)
|
||||
factory_girl_rails (= 4.5.0)
|
||||
faraday (= 0.9.0)
|
||||
faraday-cookie_jar (= 0.0.6)
|
||||
faraday_middleware (= 0.9.1)
|
||||
fixture_builder (= 0.3.6)
|
||||
fog (= 1.25.0)
|
||||
|
|
@ -650,7 +658,7 @@ DEPENDENCIES
|
|||
omniauth-tumblr (= 1.1)
|
||||
omniauth-twitter (= 1.0.1)
|
||||
omniauth-wordpress (= 0.2.1)
|
||||
opengraph_parser (= 0.2.3)
|
||||
open_graph_reader (= 0.3.1)
|
||||
pry
|
||||
pry-byebug
|
||||
pry-debundle
|
||||
|
|
|
|||
|
|
@ -30,16 +30,17 @@ class OpenGraphCache < ActiveRecord::Base
|
|||
end
|
||||
|
||||
def fetch_and_save_opengraph_data!
|
||||
response = OpenGraph.new(self.url)
|
||||
object = OpenGraphReader.fetch!(self.url)
|
||||
|
||||
return if response.blank? || response.type.blank?
|
||||
return unless object
|
||||
|
||||
self.title = response.title.truncate(255)
|
||||
self.ob_type = response.type
|
||||
self.image = response.images[0]
|
||||
self.url = response.url
|
||||
self.description = response.description
|
||||
self.title = object.og.title.truncate(255)
|
||||
self.ob_type = object.og.type
|
||||
self.image = object.og.image.url
|
||||
self.url = object.og.url
|
||||
self.description = object.og.description
|
||||
|
||||
self.save
|
||||
rescue OpenGraphReader::NoOpenGraphDataError
|
||||
end
|
||||
end
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ options = {
|
|||
}
|
||||
|
||||
Faraday.default_connection = Faraday::Connection.new(options) do |b|
|
||||
b.use FaradayMiddleware::FollowRedirects
|
||||
b.use FaradayMiddleware::FollowRedirects, limit: 8
|
||||
b.use :cookie_jar
|
||||
b.adapter Faraday.default_adapter
|
||||
end
|
||||
|
|
|
|||
4
config/initializers/open_graph_reader.rb
Normal file
4
config/initializers/open_graph_reader.rb
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
OpenGraphReader.configure do |config|
|
||||
config.synthesize_title = true
|
||||
config.synthesize_image_url = true
|
||||
end
|
||||
|
|
@ -228,7 +228,7 @@ module Diaspora
|
|||
# Extracts all the urls from the raw message and return them in the form of a string
|
||||
# Different URLs are seperated with a space
|
||||
def urls
|
||||
@urls ||= Twitter::Extractor.extract_urls(@raw_message)
|
||||
@urls ||= Twitter::Extractor.extract_urls(plain_text_without_markdown)
|
||||
end
|
||||
|
||||
def raw
|
||||
|
|
|
|||
|
|
@ -178,5 +178,10 @@ describe Diaspora::MessageRenderer do
|
|||
text = "[Perdu](http://perdu.com/) and [DuckDuckGo](https://duckduckgo.com/) can help you"
|
||||
expect(message(text).urls).to eql ["http://perdu.com/", "https://duckduckgo.com/"]
|
||||
end
|
||||
|
||||
it "extracts urls from continous markdown correctly" do
|
||||
text = "[](https://www.antifainfoblatt.de/artikel/%E2%80%9Eschlie%C3%9Flich-waren-es-zu-viele%E2%80%9C)"
|
||||
expect(message(text).urls).to eq ["https://www.antifainfoblatt.de/sites/default/files/public/styles/front_full/public/jockpalfreeman.png?itok=OPjHKpmt", "https://www.antifainfoblatt.de/artikel/%E2%80%9Eschlie%C3%9Flich-waren-es-zu-viele%E2%80%9C"]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@ describe Workers::GatherOpenGraphData do
|
|||
before do
|
||||
@ogsite_title = 'Homepage'
|
||||
@ogsite_type = 'website'
|
||||
@ogsite_image = '/img/something.png'
|
||||
@ogsite_image = 'http://www.we-support-open-graph.com/img/something.png'
|
||||
@ogsite_url = 'http://www.we-support-open-graph.com'
|
||||
@ogsite_description = 'Homepage'
|
||||
|
||||
|
|
@ -31,9 +31,12 @@ describe Workers::GatherOpenGraphData do
|
|||
|
||||
@status_message = FactoryGirl.create(:status_message)
|
||||
|
||||
stub_request(:get, @ogsite_url).to_return(:status => 200, :body => @ogsite_body)
|
||||
stub_request(:get, @no_open_graph_url).to_return(:status => 200, :body => '<html><body>hello there</body></html>')
|
||||
stub_request(:get, @oglong_url).to_return(:status => 200, :body => @oglong_body)
|
||||
stub_request(:head, @ogsite_url).to_return(status: 200, body: "", headers: {'Content-Type' => 'text/html; utf-8'})
|
||||
stub_request(:get, @ogsite_url).to_return(status: 200, body: @ogsite_body, headers: {'Content-Type' => 'text/html; utf-8'})
|
||||
stub_request(:head, @no_open_graph_url).to_return(status: 200, body: "", headers: {'Content-Type' => 'text/html; utf-8'})
|
||||
stub_request(:get, @no_open_graph_url).to_return(:status => 200, :body => '<html><head><title>Hi</title><body>hello there</body></html>', headers: {'Content-Type' => 'text/html; utf-8'})
|
||||
stub_request(:head, @oglong_url).to_return(status: 200, body: "", headers: {'Content-Type' => 'text/html; utf-8'})
|
||||
stub_request(:get, @oglong_url).to_return(status: 200, body: @oglong_body, headers: {'Content-Type' => 'text/html; utf-8'})
|
||||
|
||||
end
|
||||
|
||||
|
|
@ -59,7 +62,7 @@ describe Workers::GatherOpenGraphData do
|
|||
|
||||
expect(ogc.title).to eq(@ogsite_title)
|
||||
expect(ogc.ob_type).to eq(@ogsite_type)
|
||||
expect(ogc.image).to eq(@ogsite_url + @ogsite_image)
|
||||
expect(ogc.image).to eq(@ogsite_image)
|
||||
expect(ogc.url).to eq(@ogsite_url)
|
||||
expect(ogc.description).to eq(@ogsite_description)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue