replace the monstrous link regex with a slightly less complicated one,
based on the one from the markdown parser, fixes #3553 adding a list of urls for testing the regex in the specs
This commit is contained in:
parent
e6ec41930b
commit
ded69477ba
4 changed files with 151 additions and 11 deletions
|
|
@ -1,3 +1,9 @@
|
|||
|
||||
// cache url regex globally, for direct acces when testing
|
||||
$(function() {
|
||||
Diaspora.url_regex = /(^|\s)\b((?:(?:https?|ftp):(?:\/{1,3})|www\.)(?:[^"<>\)\s]|\(([^\s()<>]+|(\([^\s()<>]+\)))\))+)(?=\s|$)/gi;
|
||||
});
|
||||
|
||||
(function(){
|
||||
//make it so I take text and mentions rather than the modelapp.helpers.textFormatter(
|
||||
var textFormatter = function textFormatter(text, model) {
|
||||
|
|
@ -17,20 +23,29 @@
|
|||
converter.hooks.chain("preConversion", function(text) {
|
||||
|
||||
// add < > around plain urls, effectively making them "autolinks"
|
||||
// regex copied from: http://daringfireball.net/2010/07/improved_regex_for_matching_urls (slightly modified)
|
||||
var urlRegex = /(^|\s)\b((?:[a-z][\w-]+:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))/gi;
|
||||
text = text.replace(urlRegex, function(wholematch, space, url) {
|
||||
if( url.match(/^[^\w]/) ) return wholematch; // evil witchcraft, noop
|
||||
return space+"<"+url+">";
|
||||
text = text.replace(Diaspora.url_regex, function() {
|
||||
var url = arguments[2];
|
||||
if( url.match(/^[^\w]/) ) return url; // evil witchcraft, noop
|
||||
return arguments[1]+"<"+url+">";
|
||||
});
|
||||
|
||||
// process links
|
||||
// regex copied from: https://code.google.com/p/pagedown/source/browse/Markdown.Converter.js#1198 (and slightly expanded)
|
||||
var linkRegex = /(\[.*\]:\s)?(<|\()((?:(https?|ftp):\/\/[^\/'">\s]|www)[^'">\s]+?)(>|\))/gi;
|
||||
var linkRegex = /(\[.*\]:\s)?(<|\()((?:(https?|ftp):\/\/[^\/'">\s]|www)[^'">\s]+?)([>\)]{1,2})/gi;
|
||||
text = text.replace(linkRegex, function() {
|
||||
var unicodeUrl = arguments[3];
|
||||
var urlSuffix = arguments[5];
|
||||
|
||||
unicodeUrl = ( unicodeUrl.match(/^www/) ) ? ('http://' + unicodeUrl) : unicodeUrl;
|
||||
|
||||
// handle parentheses, especially in case the link ends with ')'
|
||||
if( urlSuffix.indexOf(')') != -1 && urlSuffix.indexOf('>') != -1 ) {
|
||||
unicodeUrl += ')';
|
||||
urlSuffix = '>';
|
||||
}
|
||||
// markdown doesn't like '(' or ')' anywhere, except where it wants
|
||||
var workingUrl = unicodeUrl.replace(/\(/, "%28").replace(/\)/, "%29");
|
||||
|
||||
var addr = parse_url(unicodeUrl);
|
||||
if( !addr.host ) addr.host = ""; // must not be 'undefined'
|
||||
|
||||
|
|
@ -45,8 +60,8 @@
|
|||
(!addr.query ? '' : '?' + encodeURI(addr.query) ) +
|
||||
(!addr.fragment ? '' : '#' + encodeURI(addr.fragment) );
|
||||
if( !arguments[1] || arguments[1] == "") { // inline link
|
||||
if(arguments[2] == "<") return "["+unicodeUrl+"]("+asciiUrl+")"; // without link text
|
||||
else return arguments[2]+asciiUrl+arguments[5]; // with link text
|
||||
if(arguments[2] == "<") return "["+workingUrl+"]("+asciiUrl+")"; // without link text
|
||||
else return arguments[2]+asciiUrl+urlSuffix; // with link text
|
||||
} else { // reference style link
|
||||
return arguments[1]+asciiUrl;
|
||||
}
|
||||
|
|
|
|||
43
spec/fixtures/bad_urls.txt
vendored
Normal file
43
spec/fixtures/bad_urls.txt
vendored
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
#
|
||||
# http://mathiasbynens.be/demo/url-regex
|
||||
# lines starting with '#' are ignored
|
||||
#
|
||||
http://
|
||||
http://.
|
||||
http://..
|
||||
http://../
|
||||
http://?
|
||||
http://??
|
||||
http://??/
|
||||
http://#
|
||||
http://##
|
||||
http://##/
|
||||
http://foo.bar?q=Spaces should be encoded
|
||||
//
|
||||
//a
|
||||
///a
|
||||
///
|
||||
http:///a
|
||||
foo.com
|
||||
rdar://1234
|
||||
h://test
|
||||
http:// shouldfail.com
|
||||
:// should fail
|
||||
http://foo.bar/foo(bar)baz quux
|
||||
ftps://foo.bar/
|
||||
http://-error-.invalid/
|
||||
http://a.b--c.de/
|
||||
http://-a.b.co
|
||||
http://a.b-.co
|
||||
http://0.0.0.0
|
||||
http://10.1.1.0
|
||||
http://10.1.1.255
|
||||
http://224.1.1.1
|
||||
http://1.1.1.1.1
|
||||
http://123.123.123
|
||||
http://3628126748
|
||||
http://.www.foo.bar/
|
||||
http://www.foo.bar./
|
||||
http://.www.foo.bar./
|
||||
http://10.1.1.1
|
||||
http://10.1.1.254
|
||||
40
spec/fixtures/good_urls.txt
vendored
Normal file
40
spec/fixtures/good_urls.txt
vendored
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
#
|
||||
# http://mathiasbynens.be/demo/url-regex
|
||||
# lines starting with '#' are ignored
|
||||
#
|
||||
http://foo.com/blah_blah
|
||||
http://foo.com/blah_blah/
|
||||
http://foo.com/blah_blah_(wikipedia)
|
||||
http://foo.com/blah_blah_(wikipedia)_(again)
|
||||
http://www.example.com/wpstyle/?p=364
|
||||
https://www.example.com/foo/?bar=baz&inga=42&quux
|
||||
http://✪df.ws/123
|
||||
http://userid:password@example.com:8080
|
||||
http://userid:password@example.com:8080/
|
||||
http://userid@example.com
|
||||
http://userid@example.com/
|
||||
http://userid@example.com:8080
|
||||
http://userid@example.com:8080/
|
||||
http://userid:password@example.com
|
||||
http://userid:password@example.com/
|
||||
http://142.42.1.1/
|
||||
http://142.42.1.1:8080/
|
||||
http://➡.ws/䨹
|
||||
http://⌘.ws
|
||||
http://⌘.ws/
|
||||
http://foo.com/blah_(wikipedia)#cite-1
|
||||
http://foo.com/blah_(wikipedia)_blah#cite-1
|
||||
http://foo.com/unicode_(✪)_in_parens
|
||||
http://foo.com/(something)?after=parens
|
||||
http://☺.damowmow.com/
|
||||
http://code.google.com/events/#&product=browser
|
||||
http://j.mp
|
||||
ftp://foo.bar/baz
|
||||
http://foo.bar/?q=Test%20URL-encoded%20stuff
|
||||
http://مثال.إختبار
|
||||
http://例子.测试
|
||||
http://उदाहरण.परीक्षा
|
||||
# http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com
|
||||
http://1337.net
|
||||
http://a.b-c.de
|
||||
http://223.255.255.254
|
||||
|
|
@ -41,7 +41,7 @@ describe("app.helpers.textFormatter", function(){
|
|||
//
|
||||
// var formattedText = this.formatter.markdownify(links.join(" "))
|
||||
|
||||
var formattedText = this.formatter.markdownify(links.join(" and "))
|
||||
var formattedText = this.formatter.markdownify(links.join(" and "));
|
||||
var wrapper = $("<div>").html(formattedText);
|
||||
|
||||
_.each(links, function(link) {
|
||||
|
|
@ -87,14 +87,16 @@ describe("app.helpers.textFormatter", function(){
|
|||
"http://bündnis-für-krankenhäuser.de/wp-content/uploads/2011/11/cropped-logohp.jpg",
|
||||
"http://موقع.وزارة-الاتصالات.مصر/", // example from #3082
|
||||
"http:///scholar.google.com/citations?view_op=top_venues",
|
||||
"http://lyricstranslate.com/en/someone-you-നിന്നെ-പോലൊരാള്.html" // example from #3063
|
||||
"http://lyricstranslate.com/en/someone-you-നിന്നെ-പോലൊരാള്.html", // example from #3063,
|
||||
"http://de.wikipedia.org/wiki/Liste_der_Abkürzungen_(Netzjargon)" // #3645
|
||||
];
|
||||
this.asciiUrls = [
|
||||
"http://www.xn--brgerentscheid-krankenhuser-xkc78d.de",
|
||||
"http://xn--bndnis-fr-krankenhuser-i5b27cha.de/wp-content/uploads/2011/11/cropped-logohp.jpg",
|
||||
"http://xn--4gbrim.xn----ymcbaaajlc6dj7bxne2c.xn--wgbh1c/",
|
||||
"http:///scholar.google.com/citations?view_op=top_venues",
|
||||
"http://lyricstranslate.com/en/someone-you-%E0%B4%A8%E0%B4%BF%E0%B4%A8%E0%B5%8D%E0%B4%A8%E0%B5%86-%E0%B4%AA%E0%B5%8B%E0%B4%B2%E0%B5%8A%E0%B4%B0%E0%B4%BE%E0%B4%B3%E0%B5%8D%E2%80%8D.html"
|
||||
"http://lyricstranslate.com/en/someone-you-%E0%B4%A8%E0%B4%BF%E0%B4%A8%E0%B5%8D%E0%B4%A8%E0%B5%86-%E0%B4%AA%E0%B5%8B%E0%B4%B2%E0%B5%8A%E0%B4%B0%E0%B4%BE%E0%B4%B3%E0%B5%8D%E2%80%8D.html",
|
||||
"http://de.wikipedia.org/wiki/Liste_der_Abk%C3%BCrzungen_%28Netzjargon%29"
|
||||
];
|
||||
});
|
||||
|
||||
|
|
@ -141,6 +143,46 @@ describe("app.helpers.textFormatter", function(){
|
|||
|
||||
});
|
||||
|
||||
context("misc breakage and/or other issues with weird urls", function(){
|
||||
it("doesn't crash Chromium - RUN ME WITH CHROMIUM! (issue #3553)", function() {
|
||||
|
||||
var text_part = 'Revert "rails admin is conflicting with client side validations: see https://github.com/sferik/rails_admin/issues/985"';
|
||||
var link_part = 'https://github.com/diaspora/diaspora/commit/61f40fc6bfe6bb859c995023b5a17d22c9b5e6e5';
|
||||
var content = '['+text_part+']('+link_part+')';
|
||||
var parsed = this.formatter.markdownify(content);
|
||||
|
||||
var link = 'href="' + link_part + '"';
|
||||
var text = '>'+ text_part +'<';
|
||||
|
||||
expect(parsed).toContain(link);
|
||||
expect(parsed).toContain(text);
|
||||
});
|
||||
|
||||
it("tests a bunch of benchmark urls", function(){
|
||||
var self = this;
|
||||
$.ajax({
|
||||
async: false,
|
||||
cache: false,
|
||||
url: '/spec/fixtures/good_urls.txt',
|
||||
success: function(data) { self.url_list = data.split("\n"); }
|
||||
});
|
||||
|
||||
_.each(this.url_list, function(url) {
|
||||
// 'comments'
|
||||
if( url.match(/^#/) ) return;
|
||||
|
||||
// regex.test is stupid, use match and boolean-ify it
|
||||
var result = !!url.match(Diaspora.url_regex);
|
||||
expect(result).toBeTruthy();
|
||||
if( !result && console && console.log ) {
|
||||
console.log(url);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// TODO: try to match the 'bad_urls.txt' and have as few matches as possible
|
||||
});
|
||||
|
||||
})
|
||||
|
||||
describe(".hashtagify", function(){
|
||||
|
|
|
|||
Loading…
Reference in a new issue