finally (hopefully) fix non-ascii urls + new & old tests

This commit is contained in:
Florian Staudacher 2012-03-30 00:29:34 +02:00
parent 52c9d76a86
commit ae3e225e28
4 changed files with 142 additions and 9 deletions

View file

@ -17,20 +17,31 @@
converter.hooks.chain("preConversion", function(text) {
// add < > around plain urls, effectively making them "autolinks"
// regex copied from: http://daringfireball.net/2010/07/improved_regex_for_matching_urls (slightly modified)
var urlRegex = /(^|\s)\b((?:[a-z][\w-]+:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))/gi;
text = text.replace(urlRegex, function(wholematch, space, url) {
return space+"<"+url+">";
});
// process links
var linkRegex = /(\[.*\]:\s)?(<|\()(((https?|ftp):\/{1,3})([^'">\s]+))(>|\))/gi;
// regex copied from: https://code.google.com/p/pagedown/source/browse/Markdown.Converter.js#1198 (and slightly expanded)
var linkRegex = /(\[.*\]:\s)?(<|\()((https?|ftp):[^'">\s]+)(>|\))/gi;
text = text.replace(linkRegex, function() {
var protocol = arguments[4];
var unicodeUrl = arguments[6];
var asciiUrl = protocol+punycode.toASCII(unicodeUrl);
var unicodeUrl = arguments[3];
var addr = parse_url(unicodeUrl);
var asciiUrl = // rebuild the url
(!addr.scheme ? '' : addr.scheme +
(!addr.scheme=="mailto" ? ':' : '://')) +
(!addr.user ? '' : addr.user +
(!addr.pass ? '' : ':'+addr.pass) + '@') +
punycode.toASCII(addr.host) +
(!addr.port ? '' : ':' + addr.port) +
(!addr.path ? '' : addr.path) +
(!addr.query ? '' : '?' + addr.query) +
(!addr.fragment ? '' : '#' + addr.fragment);
if( !arguments[1] || arguments[1] == "") { // inline link
if(arguments[2] == "<") return "["+protocol+unicodeUrl+"]("+asciiUrl+")"; // without link text
else return arguments[2]+asciiUrl+arguments[7]; // with link text
if(arguments[2] == "<") return "["+unicodeUrl+"]("+asciiUrl+")"; // without link text
else return arguments[2]+asciiUrl+arguments[5]; // with link text
} else { // reference style link
return arguments[1]+asciiUrl;
}

View file

@ -24,6 +24,7 @@
//= require handlebars-1.0.0.beta.6
//= require markdown
//= require punycode
//= require parse_url
//= require clear-form
//= require validation
//= require app/app
@ -39,4 +40,4 @@
//= require mentions
//= require bootstrap/bootstrap-twipsy
//= require bootstrap/bootstrap-popover
//= require bootstrap/bootstrap-dropdown
//= require bootstrap/bootstrap-dropdown

View file

@ -1,3 +1,73 @@
describe("app.views.Post", function(){
//check out StreamPost
})
context("markdown rendering", function() {
beforeEach(function() {
loginAs({name: "alice", avatar : {small : "http://avatar.com/photo.jpg"}});
Diaspora.I18n.loadLocale({stream : {
reshares : {
one : "<%= count %> reshare",
other : "<%= count %> reshares"
},
likes : {
zero : "<%= count %> Likes",
one : "<%= count %> Like",
other : "<%= count %> Likes"
}
}})
var posts = $.parseJSON(spec.readFixture("stream_json"))["posts"];
this.collection = new app.collections.Posts(posts);
this.statusMessage = this.collection.models[0];
this.reshare = this.collection.models[1];
// example from issue #2665
this.evilUrl = "http://www.bürgerentscheid-krankenhäuser.de";
this.asciiUrl = "http://www.xn--brgerentscheid-krankenhuser-xkc78d.de";
});
it("correctly handles non-ascii characters in urls", function() {
this.statusMessage.set({text: "<"+this.evilUrl+">"});
var view = new app.views.StreamPost({model : this.statusMessage}).render();
expect($(view.el).html()).toContain(this.asciiUrl);
expect($(view.el).html()).toContain(this.evilUrl);
});
it("doesn't break link texts for non-ascii urls", function() {
var linkText = "check out this awesome link!";
this.statusMessage.set({text: "["+linkText+"]("+this.evilUrl+")"});
var view = new app.views.StreamPost({model: this.statusMessage}).render();
expect($(view.el).html()).toContain(this.asciiUrl);
expect($(view.el).html()).toContain(linkText);
});
it("doesn't break reference style links for non-ascii urls", function() {
var postContent = "blabla blab [my special link][1] bla blabla\n\n[1]: "+this.evilUrl+" and an optional title)";
this.statusMessage.set({text: postContent});
var view = new app.views.StreamPost({model: this.statusMessage}).render();
expect($(view.el).html()).not.toContain(this.evilUrl);
expect($(view.el).html()).toContain(this.asciiUrl);
});
it("correctly handles images with non-ascii urls", function() {
var postContent = "![logo](http://bündnis-für-krankenhäuser.de/wp-content/uploads/2011/11/cropped-logohp.jpg)";
var niceImg = '"http://xn--bndnis-fr-krankenhuser-i5b27cha.de/wp-content/uploads/2011/11/cropped-logohp.jpg"';
this.statusMessage.set({text: postContent});
var view = new app.views.StreamPost({model: this.statusMessage}).render();
expect($(view.el).html()).toContain(niceImg);
});
it("correctly handles even more special links", function() {
var specialLink = "http://موقع.وزارة-الاتصالات.مصر/"; // example from #3082
var normalLink = "http://xn--4gbrim.xn----ymcbaaajlc6dj7bxne2c.xn--wgbh1c/";
this.statusMessage.set({text: specialLink });
var view = new app.views.StreamPost({model: this.statusMessage}).render();
expect($(view.el).html()).toContain(specialLink);
expect($(view.el).html()).toContain(normalLink);
});
});
});

51
vendor/assets/javascripts/parse_url.js vendored Normal file
View file

@ -0,0 +1,51 @@
function parse_url (str, component) {
// http://kevin.vanzonneveld.net
// + original by: Steven Levithan (http://blog.stevenlevithan.com)
// + reimplemented by: Brett Zamir (http://brett-zamir.me)
// + input by: Lorenzo Pisani
// + input by: Tony
// + improved by: Brett Zamir (http://brett-zamir.me)
// % note: Based on http://stevenlevithan.com/demo/parseuri/js/assets/parseuri.js
// % note: blog post at http://blog.stevenlevithan.com/archives/parseuri
// % note: demo at http://stevenlevithan.com/demo/parseuri/js/assets/parseuri.js
// % note: Does not replace invalid characters with '_' as in PHP, nor does it return false with
// % note: a seriously malformed URL.
// % note: Besides function name, is essentially the same as parseUri as well as our allowing
// % note: an extra slash after the scheme/protocol (to allow file:/// as in PHP)
// * example 1: parse_url('http://username:password@hostname/path?arg=value#anchor');
// * returns 1: {scheme: 'http', host: 'hostname', user: 'username', pass: 'password', path: '/path', query: 'arg=value', fragment: 'anchor'}
var key = ['source', 'scheme', 'authority', 'userInfo', 'user', 'pass', 'host', 'port',
'relative', 'path', 'directory', 'file', 'query', 'fragment'],
ini = (this.php_js && this.php_js.ini) || {},
mode = (ini['phpjs.parse_url.mode'] &&
ini['phpjs.parse_url.mode'].local_value) || 'php',
parser = {
php: /^(?:([^:\/?#]+):)?(?:\/\/()(?:(?:()(?:([^:@]*):?([^:@]*))?@)?([^:\/?#]*)(?::(\d*))?))?()(?:(()(?:(?:[^?#\/]*\/)*)()(?:[^?#]*))(?:\?([^#]*))?(?:#(.*))?)/,
strict: /^(?:([^:\/?#]+):)?(?:\/\/((?:(([^:@]*):?([^:@]*))?@)?([^:\/?#]*)(?::(\d*))?))?((((?:[^?#\/]*\/)*)([^?#]*))(?:\?([^#]*))?(?:#(.*))?)/,
loose: /^(?:(?![^:@]+:[^:@\/]*@)([^:\/?#.]+):)?(?:\/\/\/?)?((?:(([^:@]*):?([^:@]*))?@)?([^:\/?#]*)(?::(\d*))?)(((\/(?:[^?#](?![^?#\/]*\.[^?#\/.]+(?:[?#]|$)))*\/?)?([^?#\/]*))(?:\?([^#]*))?(?:#(.*))?)/ // Added one optional slash to post-scheme to catch file:/// (should restrict this)
};
var m = parser[mode].exec(str),
uri = {},
i = 14;
while (i--) {
if (m[i]) {
uri[key[i]] = m[i];
}
}
if (component) {
return uri[component.replace('PHP_URL_', '').toLowerCase()];
}
if (mode !== 'php') {
var name = (ini['phpjs.parse_url.queryKey'] &&
ini['phpjs.parse_url.queryKey'].local_value) || 'queryKey';
parser = /(?:^|&)([^&=]*)=?([^&]*)/g;
uri[name] = {};
uri[key[12]].replace(parser, function ($0, $1, $2) {
if ($1) {uri[name][$1] = $2;}
});
}
delete uri.source;
return uri;
}