diff --git a/Changelog.md b/Changelog.md index 5219d1865..3328d4968 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,6 +1,7 @@ # 0.6.2.0 ## Refactor +* Use string-direction gem for rtl detection [#7181](https://github.com/diaspora/diaspora/pull/7181) ## Bug fixes * Fix fetching comments after fetching likes [#7167](https://github.com/diaspora/diaspora/pull/7167) diff --git a/Gemfile b/Gemfile index e4614cfd7..9bf0486c6 100644 --- a/Gemfile +++ b/Gemfile @@ -135,6 +135,10 @@ gem "twitter-text", "1.14.0" gem "ruby-oembed", "0.10.1" gem "open_graph_reader", "0.6.1" +# RTL support + +gem "string-direction", "1.2.0" + # Security Headers gem "secure_headers", "3.5.0" diff --git a/Gemfile.lock b/Gemfile.lock index 73f20f63e..73559ce03 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -819,6 +819,8 @@ GEM activesupport (>= 3.0) sprockets (>= 2.8, < 4.0) state_machine (1.2.0) + string-direction (1.2.0) + yard (~> 0.8) swd (1.0.1) activesupport (>= 3) attr_required (>= 0.0.5) @@ -1031,6 +1033,7 @@ DEPENDENCIES spring (= 2.0.0) spring-commands-cucumber (= 1.0.1) spring-commands-rspec (= 1.0.4) + string-direction (= 1.2.0) test_after_commit (= 1.1.0) timecop (= 0.8.1) turbo_dev_assets (= 0.0.2) @@ -1046,4 +1049,4 @@ DEPENDENCIES will_paginate (= 3.1.5) BUNDLED WITH - 1.13.5 + 1.13.6 diff --git a/lib/direction_detector.rb b/lib/direction_detector.rb index 9724c80c8..c7ffd055a 100644 --- a/lib/direction_detector.rb +++ b/lib/direction_detector.rb @@ -2,36 +2,16 @@ # Copyright (c) 2010-2011, Diaspora Inc. This file is # licensed under the Affero General Public License version 3 or later. See # the COPYRIGHT file. -# Deeply inspired by https://gitorious.org/statusnet/mainline/blobs/master/plugins/DirectionDetector/DirectionDetectorPlugin.php class String - RTL_RANGES = [ - [1536, 1791], # arabic, persian, urdu, kurdish, ... - [65136, 65279], # arabic peresent 2 - [64336, 65023], # arabic peresent 1 - [1424, 1535], # hebrew - [64256, 64335], # hebrew peresent - [1792, 1871], # syriac - [1920, 1983], # thaana - [1984, 2047], # nko - [11568, 11647] # tifinagh - ] RTL_CLEANER_REGEXES = [ /@[^ ]+|#[^ ]+/u, # mention, tag /^RT[: ]{1}| RT | RT: |[♺♻:]/u # retweet ] def is_rtl? return false if self.strip.empty? - count = 0 - self.split(" ").each do |word| - if starts_with_rtl_char?(word) - count += 1 - else - count -= 1 - end - end - return true if count > 0 # more than half of the words are rtl words - return starts_with_rtl_char?(self) # otherwise let the first word decide + detector = StringDirection::Detector.new(:dominant) + detector.rtl? self end # Diaspora specific @@ -42,14 +22,4 @@ class String end string.is_rtl? end - - def starts_with_rtl_char?(string = self) - stripped = string.strip - return false if stripped.empty? - char = stripped.unpack('U*').first - RTL_RANGES.each do |limit| - return true if char >= limit[0] && char <= limit[1] - end - return false - end end diff --git a/spec/lib/direction_detector_spec.rb b/spec/lib/direction_detector_spec.rb index b19e854db..9b28700e3 100644 --- a/spec/lib/direction_detector_spec.rb +++ b/spec/lib/direction_detector_spec.rb @@ -24,29 +24,6 @@ describe String do let(:hebrew_arabic) { "#{hebrew} #{arabic}" } - describe "#stats_with_rtl_char?" do - it 'returns true or false correctly' do - expect(english.starts_with_rtl_char?).to be false - expect(chinese.starts_with_rtl_char?).to be false - expect(arabic.starts_with_rtl_char?).to be true - expect(hebrew.starts_with_rtl_char?).to be true - expect(hebrew_arabic.starts_with_rtl_char?).to be true - end - - it 'only looks at the first char' do - expect(english_chinese.starts_with_rtl_char?).to be false - expect(chinese_english.starts_with_rtl_char?).to be false - expect(english_arabic.starts_with_rtl_char?).to be false - expect(hebrew_english.starts_with_rtl_char?).to be true - expect(arabic_chinese.starts_with_rtl_char?).to be true - end - - it 'ignores whitespaces' do - expect(" \n \r \t".starts_with_rtl_char?).to be false - expect(" #{arabic} ".starts_with_rtl_char?).to be true - end - end - describe "#is_rtl?" do it 'returns true or false correctly' do expect(english.is_rtl?).to be false @@ -65,17 +42,16 @@ describe String do expect("#{english} #{arabic} #{arabic}".is_rtl?).to be true end - it "fallbacks to the first word if there's no majority" do - expect(hebrew_english.is_rtl?).to be true - expect(english_hebrew.is_rtl?).to be false - expect(arabic_english.is_rtl?).to be true - expect(english_arabic.is_rtl?).to be false - end - it 'ignores whitespaces' do expect(" \n \r \t".is_rtl?).to be false expect(" #{arabic} ".is_rtl?).to be true end + + it "ignores byte order marks" do + expect("\u{feff}".is_rtl?).to be false + expect("\u{feff}#{arabic}".is_rtl?).to be true + expect("\u{feff}#{english}".is_rtl?).to be false + end end describe '#cleaned_is_rtl?' do