diff --git a/app/assets/javascripts/app/helpers/text_formatter.js b/app/assets/javascripts/app/helpers/text_formatter.js index b33fb3b53..a4602b12f 100644 --- a/app/assets/javascripts/app/helpers/text_formatter.js +++ b/app/assets/javascripts/app/helpers/text_formatter.js @@ -1,3 +1,9 @@ + +// cache url regex globally, for direct acces when testing +$(function() { + Diaspora.url_regex = /(^|\s)\b((?:(?:https?|ftp):(?:\/{1,3})|www\.)(?:[^"<>\)\s]|\(([^\s()<>]+|(\([^\s()<>]+\)))\))+)(?=\s|$)/gi; +}); + (function(){ //make it so I take text and mentions rather than the modelapp.helpers.textFormatter( var textFormatter = function textFormatter(text, model) { @@ -17,20 +23,29 @@ converter.hooks.chain("preConversion", function(text) { // add < > around plain urls, effectively making them "autolinks" - // regex copied from: http://daringfireball.net/2010/07/improved_regex_for_matching_urls (slightly modified) - var urlRegex = /(^|\s)\b((?:[a-z][\w-]+:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))/gi; - text = text.replace(urlRegex, function(wholematch, space, url) { - if( url.match(/^[^\w]/) ) return wholematch; // evil witchcraft, noop - return space+"<"+url+">"; + text = text.replace(Diaspora.url_regex, function() { + var url = arguments[2]; + if( url.match(/^[^\w]/) ) return url; // evil witchcraft, noop + return arguments[1]+"<"+url+">"; }); // process links // regex copied from: https://code.google.com/p/pagedown/source/browse/Markdown.Converter.js#1198 (and slightly expanded) - var linkRegex = /(\[.*\]:\s)?(<|\()((?:(https?|ftp):\/\/[^\/'">\s]|www)[^'">\s]+?)(>|\))/gi; + var linkRegex = /(\[.*\]:\s)?(<|\()((?:(https?|ftp):\/\/[^\/'">\s]|www)[^'">\s]+?)([>\)]{1,2})/gi; text = text.replace(linkRegex, function() { var unicodeUrl = arguments[3]; + var urlSuffix = arguments[5]; + unicodeUrl = ( unicodeUrl.match(/^www/) ) ? ('http://' + unicodeUrl) : unicodeUrl; + // handle parentheses, especially in case the link ends with ')' + if( urlSuffix.indexOf(')') != -1 && urlSuffix.indexOf('>') != -1 ) { + unicodeUrl += ')'; + urlSuffix = '>'; + } + // markdown doesn't like '(' or ')' anywhere, except where it wants + var workingUrl = unicodeUrl.replace(/\(/, "%28").replace(/\)/, "%29"); + var addr = parse_url(unicodeUrl); if( !addr.host ) addr.host = ""; // must not be 'undefined' @@ -45,8 +60,8 @@ (!addr.query ? '' : '?' + encodeURI(addr.query) ) + (!addr.fragment ? '' : '#' + encodeURI(addr.fragment) ); if( !arguments[1] || arguments[1] == "") { // inline link - if(arguments[2] == "<") return "["+unicodeUrl+"]("+asciiUrl+")"; // without link text - else return arguments[2]+asciiUrl+arguments[5]; // with link text + if(arguments[2] == "<") return "["+workingUrl+"]("+asciiUrl+")"; // without link text + else return arguments[2]+asciiUrl+urlSuffix; // with link text } else { // reference style link return arguments[1]+asciiUrl; } diff --git a/spec/fixtures/bad_urls.txt b/spec/fixtures/bad_urls.txt new file mode 100644 index 000000000..61ccd7b96 --- /dev/null +++ b/spec/fixtures/bad_urls.txt @@ -0,0 +1,43 @@ +# +# http://mathiasbynens.be/demo/url-regex +# lines starting with '#' are ignored +# +http:// +http://. +http://.. +http://../ +http://? +http://?? +http://??/ +http://# +http://## +http://##/ +http://foo.bar?q=Spaces should be encoded +// +//a +///a +/// +http:///a +foo.com +rdar://1234 +h://test +http:// shouldfail.com +:// should fail +http://foo.bar/foo(bar)baz quux +ftps://foo.bar/ +http://-error-.invalid/ +http://a.b--c.de/ +http://-a.b.co +http://a.b-.co +http://0.0.0.0 +http://10.1.1.0 +http://10.1.1.255 +http://224.1.1.1 +http://1.1.1.1.1 +http://123.123.123 +http://3628126748 +http://.www.foo.bar/ +http://www.foo.bar./ +http://.www.foo.bar./ +http://10.1.1.1 +http://10.1.1.254 \ No newline at end of file diff --git a/spec/fixtures/good_urls.txt b/spec/fixtures/good_urls.txt new file mode 100644 index 000000000..2ef9ff98b --- /dev/null +++ b/spec/fixtures/good_urls.txt @@ -0,0 +1,40 @@ +# +# http://mathiasbynens.be/demo/url-regex +# lines starting with '#' are ignored +# +http://foo.com/blah_blah +http://foo.com/blah_blah/ +http://foo.com/blah_blah_(wikipedia) +http://foo.com/blah_blah_(wikipedia)_(again) +http://www.example.com/wpstyle/?p=364 +https://www.example.com/foo/?bar=baz&inga=42&quux +http://✪df.ws/123 +http://userid:password@example.com:8080 +http://userid:password@example.com:8080/ +http://userid@example.com +http://userid@example.com/ +http://userid@example.com:8080 +http://userid@example.com:8080/ +http://userid:password@example.com +http://userid:password@example.com/ +http://142.42.1.1/ +http://142.42.1.1:8080/ +http://➡.ws/䨹 +http://⌘.ws +http://⌘.ws/ +http://foo.com/blah_(wikipedia)#cite-1 +http://foo.com/blah_(wikipedia)_blah#cite-1 +http://foo.com/unicode_(✪)_in_parens +http://foo.com/(something)?after=parens +http://☺.damowmow.com/ +http://code.google.com/events/#&product=browser +http://j.mp +ftp://foo.bar/baz +http://foo.bar/?q=Test%20URL-encoded%20stuff +http://مثال.إختبار +http://例子.测试 +http://उदाहरण.परीक्षा +# http://-.~_!$&'()*+,;=:%40:80%2f::::::@example.com +http://1337.net +http://a.b-c.de +http://223.255.255.254 \ No newline at end of file diff --git a/spec/javascripts/app/helpers/text_formatter_spec.js b/spec/javascripts/app/helpers/text_formatter_spec.js index d175e833e..51acb2816 100644 --- a/spec/javascripts/app/helpers/text_formatter_spec.js +++ b/spec/javascripts/app/helpers/text_formatter_spec.js @@ -41,7 +41,7 @@ describe("app.helpers.textFormatter", function(){ // // var formattedText = this.formatter.markdownify(links.join(" ")) - var formattedText = this.formatter.markdownify(links.join(" and ")) + var formattedText = this.formatter.markdownify(links.join(" and ")); var wrapper = $("