finally (hopefully) fix non-ascii urls + new & old tests

2012-03-30 00:29:34 +02:00 · 2012-03-30 00:29:34 +02:00 · ae3e225e28
commit ae3e225e28
parent 52c9d76a86
4 changed files with 142 additions and 9 deletions
--- a/app/assets/javascripts/app/helpers/text_formatter.js
+++ b/app/assets/javascripts/app/helpers/text_formatter.js
@ -17,20 +17,31 @@
    converter.hooks.chain("preConversion", function(text) {

      // add < > around plain urls, effectively making them "autolinks"
+      // regex copied from: http://daringfireball.net/2010/07/improved_regex_for_matching_urls (slightly modified)
      var urlRegex = /(^|\s)\b((?:[a-z][\w-]+:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))/gi;
      text = text.replace(urlRegex, function(wholematch, space, url) {
        return space+"<"+url+">";
      });

      // process links
-      var linkRegex = /(\[.*\]:\s)?(<|\()(((https?|ftp):\/{1,3})([^'">\s]+))(>|\))/gi;
+      // regex copied from: https://code.google.com/p/pagedown/source/browse/Markdown.Converter.js#1198 (and slightly expanded)
+      var linkRegex = /(\[.*\]:\s)?(<|\()((https?|ftp):[^'">\s]+)(>|\))/gi;
      text = text.replace(linkRegex, function() {
-        var protocol = arguments[4];
-        var unicodeUrl = arguments[6];
-        var asciiUrl = protocol+punycode.toASCII(unicodeUrl);
+        var unicodeUrl = arguments[3];
+        var addr = parse_url(unicodeUrl);
+        var asciiUrl = // rebuild the url
+          (!addr.scheme ? '' : addr.scheme +
+          (!addr.scheme=="mailto" ? ':' : '://')) +
+          (!addr.user ? '' : addr.user +
+          (!addr.pass ? '' : ':'+addr.pass) + '@') +
+          punycode.toASCII(addr.host) +
+          (!addr.port ? '' : ':' + addr.port) +
+          (!addr.path ? '' : addr.path) +
+          (!addr.query ? '' : '?' + addr.query) +
+          (!addr.fragment ? '' : '#' + addr.fragment);
        if( !arguments[1] || arguments[1] == "") { // inline link
-          if(arguments[2] == "<") return "["+protocol+unicodeUrl+"]("+asciiUrl+")"; // without link text
-          else return arguments[2]+asciiUrl+arguments[7]; // with link text
+          if(arguments[2] == "<") return "["+unicodeUrl+"]("+asciiUrl+")"; // without link text
+          else return arguments[2]+asciiUrl+arguments[5]; // with link text
        } else { // reference style link
          return arguments[1]+asciiUrl;
        }
--- a/app/assets/javascripts/main.js
+++ b/app/assets/javascripts/main.js
@ -24,6 +24,7 @@
 //= require handlebars-1.0.0.beta.6
 //= require markdown
 //= require punycode
+//= require parse_url
 //= require clear-form
 //= require validation
 //= require app/app
@ -39,4 +40,4 @@
 //= require mentions
 //= require bootstrap/bootstrap-twipsy
 //= require bootstrap/bootstrap-popover
-//= require bootstrap/bootstrap-dropdown
+//= require bootstrap/bootstrap-dropdown
--- a/spec/javascripts/app/views/post_view_spec.js
+++ b/spec/javascripts/app/views/post_view_spec.js
@ -1,3 +1,73 @@
 describe("app.views.Post", function(){
-  //check out StreamPost
-})
+  context("markdown rendering", function() {
+    beforeEach(function() {
+      loginAs({name: "alice", avatar : {small : "http://avatar.com/photo.jpg"}});
+      Diaspora.I18n.loadLocale({stream : {
+        reshares : {
+          one : "<%= count %> reshare",
+          other : "<%= count %> reshares"
+        },
+        likes : {
+          zero : "<%= count %> Likes",
+          one : "<%= count %> Like",
+          other : "<%= count %> Likes"
+        }
+      }})
+
+      var posts = $.parseJSON(spec.readFixture("stream_json"))["posts"];
+
+      this.collection = new app.collections.Posts(posts);
+      this.statusMessage = this.collection.models[0];
+      this.reshare = this.collection.models[1];
+
+      // example from issue #2665
+      this.evilUrl  = "http://www.bürgerentscheid-krankenhäuser.de";
+      this.asciiUrl = "http://www.xn--brgerentscheid-krankenhuser-xkc78d.de";
+    });
+
+    it("correctly handles non-ascii characters in urls", function() {
+      this.statusMessage.set({text: "<"+this.evilUrl+">"});
+      var view = new app.views.StreamPost({model : this.statusMessage}).render();
+
+      expect($(view.el).html()).toContain(this.asciiUrl);
+      expect($(view.el).html()).toContain(this.evilUrl);
+    });
+
+    it("doesn't break link texts for non-ascii urls", function() {
+      var linkText = "check out this awesome link!";
+      this.statusMessage.set({text: "["+linkText+"]("+this.evilUrl+")"});
+      var view = new app.views.StreamPost({model: this.statusMessage}).render();
+
+      expect($(view.el).html()).toContain(this.asciiUrl);
+      expect($(view.el).html()).toContain(linkText);
+    });
+
+    it("doesn't break reference style links for non-ascii urls", function() {
+      var postContent = "blabla blab [my special link][1] bla blabla\n\n[1]: "+this.evilUrl+" and an optional title)";
+      this.statusMessage.set({text: postContent});
+      var view = new app.views.StreamPost({model: this.statusMessage}).render();
+
+      expect($(view.el).html()).not.toContain(this.evilUrl);
+      expect($(view.el).html()).toContain(this.asciiUrl);
+    });
+
+    it("correctly handles images with non-ascii urls", function() {
+      var postContent = "![logo](http://bündnis-für-krankenhäuser.de/wp-content/uploads/2011/11/cropped-logohp.jpg)";
+      var niceImg = '"http://xn--bndnis-fr-krankenhuser-i5b27cha.de/wp-content/uploads/2011/11/cropped-logohp.jpg"';
+      this.statusMessage.set({text: postContent});
+      var view = new app.views.StreamPost({model: this.statusMessage}).render();
+
+      expect($(view.el).html()).toContain(niceImg);
+    });
+
+    it("correctly handles even more special links", function() {
+      var specialLink = "http://موقع.وزارة-الاتصالات.مصر/"; // example from #3082
+      var normalLink = "http://xn--4gbrim.xn----ymcbaaajlc6dj7bxne2c.xn--wgbh1c/";
+      this.statusMessage.set({text: specialLink });
+      var view = new app.views.StreamPost({model: this.statusMessage}).render();
+
+      expect($(view.el).html()).toContain(specialLink);
+      expect($(view.el).html()).toContain(normalLink);
+    });
+  });
+});
--- a/vendor/assets/javascripts/parse_url.js
+++ b/vendor/assets/javascripts/parse_url.js
@ -0,0 +1,51 @@
+function parse_url (str, component) {
+    // http://kevin.vanzonneveld.net
+    // +      original by: Steven Levithan (http://blog.stevenlevithan.com)
+    // + reimplemented by: Brett Zamir (http://brett-zamir.me)
+    // + input by: Lorenzo Pisani
+    // + input by: Tony
+    // + improved by: Brett Zamir (http://brett-zamir.me)
+    // %          note: Based on http://stevenlevithan.com/demo/parseuri/js/assets/parseuri.js
+    // %          note: blog post at http://blog.stevenlevithan.com/archives/parseuri
+    // %          note: demo at http://stevenlevithan.com/demo/parseuri/js/assets/parseuri.js
+    // %          note: Does not replace invalid characters with '_' as in PHP, nor does it return false with
+    // %          note: a seriously malformed URL.
+    // %          note: Besides function name, is essentially the same as parseUri as well as our allowing
+    // %          note: an extra slash after the scheme/protocol (to allow file:/// as in PHP)
+    // *     example 1: parse_url('http://username:password@hostname/path?arg=value#anchor');
+    // *     returns 1: {scheme: 'http', host: 'hostname', user: 'username', pass: 'password', path: '/path', query: 'arg=value', fragment: 'anchor'}
+    var key = ['source', 'scheme', 'authority', 'userInfo', 'user', 'pass', 'host', 'port', 
+                        'relative', 'path', 'directory', 'file', 'query', 'fragment'],
+        ini = (this.php_js && this.php_js.ini) || {},
+        mode = (ini['phpjs.parse_url.mode'] && 
+            ini['phpjs.parse_url.mode'].local_value) || 'php',
+        parser = {
+            php: /^(?:([^:\/?#]+):)?(?:\/\/()(?:(?:()(?:([^:@]*):?([^:@]*))?@)?([^:\/?#]*)(?::(\d*))?))?()(?:(()(?:(?:[^?#\/]*\/)*)()(?:[^?#]*))(?:\?([^#]*))?(?:#(.*))?)/,
+            strict: /^(?:([^:\/?#]+):)?(?:\/\/((?:(([^:@]*):?([^:@]*))?@)?([^:\/?#]*)(?::(\d*))?))?((((?:[^?#\/]*\/)*)([^?#]*))(?:\?([^#]*))?(?:#(.*))?)/,
+            loose: /^(?:(?![^:@]+:[^:@\/]*@)([^:\/?#.]+):)?(?:\/\/\/?)?((?:(([^:@]*):?([^:@]*))?@)?([^:\/?#]*)(?::(\d*))?)(((\/(?:[^?#](?![^?#\/]*\.[^?#\/.]+(?:[?#]|$)))*\/?)?([^?#\/]*))(?:\?([^#]*))?(?:#(.*))?)/ // Added one optional slash to post-scheme to catch file:/// (should restrict this)
+        };
+
+    var m = parser[mode].exec(str),
+        uri = {},
+        i = 14;
+    while (i--) {
+        if (m[i]) {
+          uri[key[i]] = m[i];  
+        }
+    }
+
+    if (component) {
+        return uri[component.replace('PHP_URL_', '').toLowerCase()];
+    }
+    if (mode !== 'php') {
+        var name = (ini['phpjs.parse_url.queryKey'] && 
+                ini['phpjs.parse_url.queryKey'].local_value) || 'queryKey';
+        parser = /(?:^|&)([^&=]*)=?([^&]*)/g;
+        uri[name] = {};
+        uri[key[12]].replace(parser, function ($0, $1, $2) {
+            if ($1) {uri[name][$1] = $2;}
+        });
+    }
+    delete uri.source;
+    return uri;
+}