From ae3e225e28df6ab4b5a3a41babad79ea474ec361 Mon Sep 17 00:00:00 2001 From: Florian Staudacher Date: Fri, 30 Mar 2012 00:29:34 +0200 Subject: [PATCH] finally (hopefully) fix non-ascii urls + new & old tests --- .../javascripts/app/helpers/text_formatter.js | 23 ++++-- app/assets/javascripts/main.js | 3 +- spec/javascripts/app/views/post_view_spec.js | 74 ++++++++++++++++++- vendor/assets/javascripts/parse_url.js | 51 +++++++++++++ 4 files changed, 142 insertions(+), 9 deletions(-) create mode 100644 vendor/assets/javascripts/parse_url.js diff --git a/app/assets/javascripts/app/helpers/text_formatter.js b/app/assets/javascripts/app/helpers/text_formatter.js index 84f4ba7e6..38d68a7bb 100644 --- a/app/assets/javascripts/app/helpers/text_formatter.js +++ b/app/assets/javascripts/app/helpers/text_formatter.js @@ -17,20 +17,31 @@ converter.hooks.chain("preConversion", function(text) { // add < > around plain urls, effectively making them "autolinks" + // regex copied from: http://daringfireball.net/2010/07/improved_regex_for_matching_urls (slightly modified) var urlRegex = /(^|\s)\b((?:[a-z][\w-]+:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))/gi; text = text.replace(urlRegex, function(wholematch, space, url) { return space+"<"+url+">"; }); // process links - var linkRegex = /(\[.*\]:\s)?(<|\()(((https?|ftp):\/{1,3})([^'">\s]+))(>|\))/gi; + // regex copied from: https://code.google.com/p/pagedown/source/browse/Markdown.Converter.js#1198 (and slightly expanded) + var linkRegex = /(\[.*\]:\s)?(<|\()((https?|ftp):[^'">\s]+)(>|\))/gi; text = text.replace(linkRegex, function() { - var protocol = arguments[4]; - var unicodeUrl = arguments[6]; - var asciiUrl = protocol+punycode.toASCII(unicodeUrl); + var unicodeUrl = arguments[3]; + var addr = parse_url(unicodeUrl); + var asciiUrl = // rebuild the url + (!addr.scheme ? '' : addr.scheme + + (!addr.scheme=="mailto" ? ':' : '://')) + + (!addr.user ? '' : addr.user + + (!addr.pass ? '' : ':'+addr.pass) + '@') + + punycode.toASCII(addr.host) + + (!addr.port ? '' : ':' + addr.port) + + (!addr.path ? '' : addr.path) + + (!addr.query ? '' : '?' + addr.query) + + (!addr.fragment ? '' : '#' + addr.fragment); if( !arguments[1] || arguments[1] == "") { // inline link - if(arguments[2] == "<") return "["+protocol+unicodeUrl+"]("+asciiUrl+")"; // without link text - else return arguments[2]+asciiUrl+arguments[7]; // with link text + if(arguments[2] == "<") return "["+unicodeUrl+"]("+asciiUrl+")"; // without link text + else return arguments[2]+asciiUrl+arguments[5]; // with link text } else { // reference style link return arguments[1]+asciiUrl; } diff --git a/app/assets/javascripts/main.js b/app/assets/javascripts/main.js index edff642ef..42ebb8cdc 100644 --- a/app/assets/javascripts/main.js +++ b/app/assets/javascripts/main.js @@ -24,6 +24,7 @@ //= require handlebars-1.0.0.beta.6 //= require markdown //= require punycode +//= require parse_url //= require clear-form //= require validation //= require app/app @@ -39,4 +40,4 @@ //= require mentions //= require bootstrap/bootstrap-twipsy //= require bootstrap/bootstrap-popover -//= require bootstrap/bootstrap-dropdown \ No newline at end of file +//= require bootstrap/bootstrap-dropdown diff --git a/spec/javascripts/app/views/post_view_spec.js b/spec/javascripts/app/views/post_view_spec.js index 58225d1af..2bdf7af71 100644 --- a/spec/javascripts/app/views/post_view_spec.js +++ b/spec/javascripts/app/views/post_view_spec.js @@ -1,3 +1,73 @@ describe("app.views.Post", function(){ - //check out StreamPost -}) + context("markdown rendering", function() { + beforeEach(function() { + loginAs({name: "alice", avatar : {small : "http://avatar.com/photo.jpg"}}); + Diaspora.I18n.loadLocale({stream : { + reshares : { + one : "<%= count %> reshare", + other : "<%= count %> reshares" + }, + likes : { + zero : "<%= count %> Likes", + one : "<%= count %> Like", + other : "<%= count %> Likes" + } + }}) + + var posts = $.parseJSON(spec.readFixture("stream_json"))["posts"]; + + this.collection = new app.collections.Posts(posts); + this.statusMessage = this.collection.models[0]; + this.reshare = this.collection.models[1]; + + // example from issue #2665 + this.evilUrl = "http://www.bürgerentscheid-krankenhäuser.de"; + this.asciiUrl = "http://www.xn--brgerentscheid-krankenhuser-xkc78d.de"; + }); + + it("correctly handles non-ascii characters in urls", function() { + this.statusMessage.set({text: "<"+this.evilUrl+">"}); + var view = new app.views.StreamPost({model : this.statusMessage}).render(); + + expect($(view.el).html()).toContain(this.asciiUrl); + expect($(view.el).html()).toContain(this.evilUrl); + }); + + it("doesn't break link texts for non-ascii urls", function() { + var linkText = "check out this awesome link!"; + this.statusMessage.set({text: "["+linkText+"]("+this.evilUrl+")"}); + var view = new app.views.StreamPost({model: this.statusMessage}).render(); + + expect($(view.el).html()).toContain(this.asciiUrl); + expect($(view.el).html()).toContain(linkText); + }); + + it("doesn't break reference style links for non-ascii urls", function() { + var postContent = "blabla blab [my special link][1] bla blabla\n\n[1]: "+this.evilUrl+" and an optional title)"; + this.statusMessage.set({text: postContent}); + var view = new app.views.StreamPost({model: this.statusMessage}).render(); + + expect($(view.el).html()).not.toContain(this.evilUrl); + expect($(view.el).html()).toContain(this.asciiUrl); + }); + + it("correctly handles images with non-ascii urls", function() { + var postContent = "![logo](http://bündnis-für-krankenhäuser.de/wp-content/uploads/2011/11/cropped-logohp.jpg)"; + var niceImg = '"http://xn--bndnis-fr-krankenhuser-i5b27cha.de/wp-content/uploads/2011/11/cropped-logohp.jpg"'; + this.statusMessage.set({text: postContent}); + var view = new app.views.StreamPost({model: this.statusMessage}).render(); + + expect($(view.el).html()).toContain(niceImg); + }); + + it("correctly handles even more special links", function() { + var specialLink = "http://موقع.وزارة-الاتصالات.مصر/"; // example from #3082 + var normalLink = "http://xn--4gbrim.xn----ymcbaaajlc6dj7bxne2c.xn--wgbh1c/"; + this.statusMessage.set({text: specialLink }); + var view = new app.views.StreamPost({model: this.statusMessage}).render(); + + expect($(view.el).html()).toContain(specialLink); + expect($(view.el).html()).toContain(normalLink); + }); + }); +}); diff --git a/vendor/assets/javascripts/parse_url.js b/vendor/assets/javascripts/parse_url.js new file mode 100644 index 000000000..f609c3119 --- /dev/null +++ b/vendor/assets/javascripts/parse_url.js @@ -0,0 +1,51 @@ +function parse_url (str, component) { + // http://kevin.vanzonneveld.net + // + original by: Steven Levithan (http://blog.stevenlevithan.com) + // + reimplemented by: Brett Zamir (http://brett-zamir.me) + // + input by: Lorenzo Pisani + // + input by: Tony + // + improved by: Brett Zamir (http://brett-zamir.me) + // % note: Based on http://stevenlevithan.com/demo/parseuri/js/assets/parseuri.js + // % note: blog post at http://blog.stevenlevithan.com/archives/parseuri + // % note: demo at http://stevenlevithan.com/demo/parseuri/js/assets/parseuri.js + // % note: Does not replace invalid characters with '_' as in PHP, nor does it return false with + // % note: a seriously malformed URL. + // % note: Besides function name, is essentially the same as parseUri as well as our allowing + // % note: an extra slash after the scheme/protocol (to allow file:/// as in PHP) + // * example 1: parse_url('http://username:password@hostname/path?arg=value#anchor'); + // * returns 1: {scheme: 'http', host: 'hostname', user: 'username', pass: 'password', path: '/path', query: 'arg=value', fragment: 'anchor'} + var key = ['source', 'scheme', 'authority', 'userInfo', 'user', 'pass', 'host', 'port', + 'relative', 'path', 'directory', 'file', 'query', 'fragment'], + ini = (this.php_js && this.php_js.ini) || {}, + mode = (ini['phpjs.parse_url.mode'] && + ini['phpjs.parse_url.mode'].local_value) || 'php', + parser = { + php: /^(?:([^:\/?#]+):)?(?:\/\/()(?:(?:()(?:([^:@]*):?([^:@]*))?@)?([^:\/?#]*)(?::(\d*))?))?()(?:(()(?:(?:[^?#\/]*\/)*)()(?:[^?#]*))(?:\?([^#]*))?(?:#(.*))?)/, + strict: /^(?:([^:\/?#]+):)?(?:\/\/((?:(([^:@]*):?([^:@]*))?@)?([^:\/?#]*)(?::(\d*))?))?((((?:[^?#\/]*\/)*)([^?#]*))(?:\?([^#]*))?(?:#(.*))?)/, + loose: /^(?:(?![^:@]+:[^:@\/]*@)([^:\/?#.]+):)?(?:\/\/\/?)?((?:(([^:@]*):?([^:@]*))?@)?([^:\/?#]*)(?::(\d*))?)(((\/(?:[^?#](?![^?#\/]*\.[^?#\/.]+(?:[?#]|$)))*\/?)?([^?#\/]*))(?:\?([^#]*))?(?:#(.*))?)/ // Added one optional slash to post-scheme to catch file:/// (should restrict this) + }; + + var m = parser[mode].exec(str), + uri = {}, + i = 14; + while (i--) { + if (m[i]) { + uri[key[i]] = m[i]; + } + } + + if (component) { + return uri[component.replace('PHP_URL_', '').toLowerCase()]; + } + if (mode !== 'php') { + var name = (ini['phpjs.parse_url.queryKey'] && + ini['phpjs.parse_url.queryKey'].local_value) || 'queryKey'; + parser = /(?:^|&)([^&=]*)=?([^&]*)/g; + uri[name] = {}; + uri[key[12]].replace(parser, function ($0, $1, $2) { + if ($1) {uri[name][$1] = $2;} + }); + } + delete uri.source; + return uri; +}