From b7b8a115516a52226ce166464833129a81d10c8f Mon Sep 17 00:00:00 2001 From: Florian Staudacher Date: Sat, 24 Mar 2012 00:10:36 +0100 Subject: [PATCH 1/3] fix #2665 - allow umlauts and other non-ascii characters in url --- config/assets.yml | 5 +- .../javascripts/app/helpers/text_formatter.js | 16 + public/javascripts/vendor/punycode.js | 512 ++++++++++++++++++ spec/javascripts/app/views/post_view_spec.js | 14 +- spec/javascripts/support/jasmine.yml | 1 + 5 files changed, 545 insertions(+), 3 deletions(-) create mode 100644 public/javascripts/vendor/punycode.js diff --git a/config/assets.yml b/config/assets.yml index 8bf27bd77..e2a445ec6 100644 --- a/config/assets.yml +++ b/config/assets.yml @@ -38,18 +38,19 @@ javascripts: - public/javascripts/vendor/backbone.js - public/javascripts/vendor/handlebars-1.0.0.beta.6.js - public/javascripts/vendor/markdown/* + - public/javascripts/vendor/punycode.js - public/javascripts/app/app.js - public/javascripts/app/helpers/* - public/javascripts/app/router.js - public/javascripts/app/views.js - - public/javascripts/app/models/post.js + - public/javascripts/app/models/post.js - public/javascripts/app/models/* - public/javascripts/app/pages/* - public/javascripts/app/collections/* - public/javascripts/app/views/stream_object_view.js - public/javascripts/app/views/content_view.js - public/javascripts/app/views/*.js - - public/javascripts/app/views/**/*.js + - public/javascripts/app/views/**/*.js - public/javascripts/diaspora.js - public/javascripts/helpers/*.js diff --git a/public/javascripts/app/helpers/text_formatter.js b/public/javascripts/app/helpers/text_formatter.js index 08b4e224b..f3c6bc675 100644 --- a/public/javascripts/app/helpers/text_formatter.js +++ b/public/javascripts/app/helpers/text_formatter.js @@ -13,6 +13,22 @@ textFormatter.markdownify = function markdownify(text){ var converter = Markdown.getSanitizingConverter(); + // punycode non-ascii chars in urls + converter.hooks.chain("preConversion", function(text) { + // remove < > around markdown-style urls + var mdUrlRegex = /<((https?|ftp):[^'">\s]+)>/gi; + text = text.replace(mdUrlRegex, function(wholematch, m1) { + return m1; + }); + + // regex shamelessly copied from http://daringfireball.net/2010/07/improved_regex_for_matching_urls + var urlRegex = /\b((?:[a-z][\w-]+:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))/g; + return text.replace(urlRegex, function(url){ + var newUrl = "["+url+"]("+punycode.toASCII(url)+")"; // console.log( punycode.toASCII(url) ); + return newUrl; + }); + }); + converter.hooks.chain("postConversion", function (text) { return text.replace(/(\"(?:(?:http|https):\/\/)?[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(?:\/\S*)?\")(\>)/g, '$1 target="_blank">') }); diff --git a/public/javascripts/vendor/punycode.js b/public/javascripts/vendor/punycode.js new file mode 100644 index 000000000..8688f2efd --- /dev/null +++ b/public/javascripts/vendor/punycode.js @@ -0,0 +1,512 @@ +/*! http://mths.be/punycode by @mathias */ +;(function(root) { + + /** + * The `punycode` object. + * @name punycode + * @type Object + */ + var punycode, + + /** Detect free variables `define`, `exports`, `module` and `require` */ + freeDefine = typeof define == 'function' && typeof define.amd == 'object' && + define.amd && define, + freeExports = typeof exports == 'object' && exports, + freeModule = typeof module == 'object' && module, + freeRequire = typeof require == 'function' && require, + + /** Highest positive signed 32-bit float value */ + maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1 + + /** Bootstring parameters */ + base = 36, + tMin = 1, + tMax = 26, + skew = 38, + damp = 700, + initialBias = 72, + initialN = 128, // 0x80 + delimiter = '-', // '\x2D' + + /** Regular expressions */ + regexNonASCII = /[^ -~]/, // unprintable ASCII chars + non-ASCII chars + regexPunycode = /^xn--/, + + /** Error messages */ + errors = { + 'overflow': 'Overflow: input needs wider integers to process.', + 'ucs2decode': 'UCS-2(decode): illegal sequence', + 'ucs2encode': 'UCS-2(encode): illegal value', + 'not-basic': 'Illegal input >= 0x80 (not a basic code point)', + 'invalid-input': 'Invalid input' + }, + + /** Convenience shortcuts */ + baseMinusTMin = base - tMin, + floor = Math.floor, + stringFromCharCode = String.fromCharCode, + + /** Temporary variable */ + key; + + /*--------------------------------------------------------------------------*/ + + /** + * A generic error utility function. + * @private + * @param {String} type The error type. + * @returns {Error} Throws a `RangeError` with the applicable error message. + */ + function error(type) { + throw RangeError(errors[type]); + } + + /** + * A generic `Array#map` utility function. + * @private + * @param {Array} array The array to iterate over. + * @param {Function} callback The function that gets called for every array + * item. + * @returns {Array} A new array of values returned by the callback function. + */ + function map(array, fn) { + var length = array.length; + while (length--) { + array[length] = fn(array[length]); + } + return array; + } + + /** + * A simple `Array#map`-like wrapper to work with domain name strings. + * @private + * @param {String} domain The domain name. + * @param {Function} callback The function that gets called for every + * character. + * @returns {Array} A new string of characters returned by the callback + * function. + */ + function mapDomain(string, fn) { + var glue = '.'; + return map(string.split(glue), fn).join(glue); + } + + /** + * Creates an array containing the decimal code points of each Unicode + * character in the string. While JavaScript uses UCS-2 internally, + * this function will convert a pair of surrogate halves (each of which + * UCS-2 exposes as separate characters) into a single code point, + * matching UTF-16. + * @see `punycode.ucs2.encode` + * @see + * @memberOf punycode.ucs2 + * @name decode + * @param {String} string The Unicode input string (UCS-2). + * @returns {Array} The new array of code points. + */ + function ucs2decode(string) { + var output = [], + counter = 0, + length = string.length, + value, + extra; + while (counter < length) { + value = string.charCodeAt(counter++); + if ((value & 0xF800) == 0xD800) { + extra = string.charCodeAt(counter++); + if ((value & 0xFC00) != 0xD800 || (extra & 0xFC00) != 0xDC00) { + error('ucs2decode'); + } + value = ((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000; + } + output.push(value); + } + return output; + } + + /** + * Creates a string based on an array of decimal code points. + * @see `punycode.ucs2.decode` + * @memberOf punycode.ucs2 + * @name encode + * @param {Array} codePoints The array of decimal code points. + * @returns {String} The new Unicode string (UCS-2). + */ + function ucs2encode(array) { + return map(array, function(value) { + var output = ''; + if ((value & 0xF800) == 0xD800) { + error('ucs2encode'); + } + if (value > 0xFFFF) { + value -= 0x10000; + output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800); + value = 0xDC00 | value & 0x3FF; + } + output += stringFromCharCode(value); + return output; + }).join(''); + } + + /** + * Converts a basic code point into a digit/integer. + * @see `digitToBasic()` + * @private + * @param {Number} codePoint The basic (decimal) code point. + * @returns {Number} The numeric value of a basic code point (for use in + * representing integers) in the range `0` to `base - 1`, or `base` if + * the code point does not represent a value. + */ + function basicToDigit(codePoint) { + return codePoint - 48 < 10 + ? codePoint - 22 + : codePoint - 65 < 26 + ? codePoint - 65 + : codePoint - 97 < 26 + ? codePoint - 97 + : base; + } + + /** + * Converts a digit/integer into a basic code point. + * @see `basicToDigit()` + * @private + * @param {Number} digit The numeric value of a basic code point. + * @returns {Number} The basic code point whose value (when used for + * representing integers) is `digit`, which needs to be in the range + * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is + * used; else, the lowercase form is used. The behavior is undefined + * if flag is non-zero and `digit` has no uppercase form. + */ + function digitToBasic(digit, flag) { + // 0..25 map to ASCII a..z or A..Z + // 26..35 map to ASCII 0..9 + return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5); + } + + /** + * Bias adaptation function as per section 3.4 of RFC 3492. + * http://tools.ietf.org/html/rfc3492#section-3.4 + * @private + */ + function adapt(delta, numPoints, firstTime) { + var k = 0; + delta = firstTime ? floor(delta / damp) : delta >> 1; + delta += floor(delta / numPoints); + for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) { + delta = floor(delta / baseMinusTMin); + } + return floor(k + (baseMinusTMin + 1) * delta / (delta + skew)); + } + + /** + * Converts a basic code point to lowercase is `flag` is falsy, or to + * uppercase if `flag` is truthy. The code point is unchanged if it's + * caseless. The behavior is undefined if `codePoint` is not a basic code + * point. + * @private + * @param {Number} codePoint The numeric value of a basic code point. + * @returns {Number} The resulting basic code point. + */ + function encodeBasic(codePoint, flag) { + codePoint -= (codePoint - 97 < 26) << 5; + return codePoint + (!flag && codePoint - 65 < 26) << 5; + } + + /** + * Converts a Punycode string of ASCII code points to a string of Unicode + * code points. + * @memberOf punycode + * @param {String} input The Punycode string of ASCII code points. + * @returns {String} The resulting string of Unicode code points. + */ + function decode(input) { + // Don't use UCS-2 + var output = [], + inputLength = input.length, + out, + i = 0, + n = initialN, + bias = initialBias, + basic, + j, + index, + oldi, + w, + k, + digit, + t, + length, + /** Cached calculation results */ + baseMinusT; + + // Handle the basic code points: let `basic` be the number of input code + // points before the last delimiter, or `0` if there is none, then copy + // the first basic code points to the output. + + basic = input.lastIndexOf(delimiter); + if (basic < 0) { + basic = 0; + } + + for (j = 0; j < basic; ++j) { + // if it's not a basic code point + if (input.charCodeAt(j) >= 0x80) { + error('not-basic'); + } + output.push(input.charCodeAt(j)); + } + + // Main decoding loop: start just after the last delimiter if any basic code + // points were copied; start at the beginning otherwise. + + for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) { + + // `index` is the index of the next character to be consumed. + // Decode a generalized variable-length integer into `delta`, + // which gets added to `i`. The overflow checking is easier + // if we increase `i` as we go, then subtract off its starting + // value at the end to obtain `delta`. + for (oldi = i, w = 1, k = base; /* no condition */; k += base) { + + if (index >= inputLength) { + error('invalid-input'); + } + + digit = basicToDigit(input.charCodeAt(index++)); + + if (digit >= base || digit > floor((maxInt - i) / w)) { + error('overflow'); + } + + i += digit * w; + t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias); + + if (digit < t) { + break; + } + + baseMinusT = base - t; + if (w > floor(maxInt / baseMinusT)) { + error('overflow'); + } + + w *= baseMinusT; + + } + + out = output.length + 1; + bias = adapt(i - oldi, out, oldi == 0); + + // `i` was supposed to wrap around from `out` to `0`, + // incrementing `n` each time, so we'll fix that now: + if (floor(i / out) > maxInt - n) { + error('overflow'); + } + + n += floor(i / out); + i %= out; + + // Insert `n` at position `i` of the output + output.splice(i++, 0, n); + + } + + return ucs2encode(output); + } + + /** + * Converts a string of Unicode code points to a Punycode string of ASCII + * code points. + * @memberOf punycode + * @param {String} input The string of Unicode code points. + * @returns {String} The resulting Punycode string of ASCII code points. + */ + function encode(input) { + var n, + delta, + handledCPCount, + basicLength, + bias, + j, + m, + q, + k, + t, + currentValue, + output = [], + /** `inputLength` will hold the number of code points in `input`. */ + inputLength, + /** Cached calculation results */ + handledCPCountPlusOne, + baseMinusT, + qMinusT; + + // Convert the input in UCS-2 to Unicode + input = ucs2decode(input); + + // Cache the length + inputLength = input.length; + + // Initialize the state + n = initialN; + delta = 0; + bias = initialBias; + + // Handle the basic code points + for (j = 0; j < inputLength; ++j) { + currentValue = input[j]; + if (currentValue < 0x80) { + output.push(stringFromCharCode(currentValue)); + } + } + + handledCPCount = basicLength = output.length; + + // `handledCPCount` is the number of code points that have been handled; + // `basicLength` is the number of basic code points. + + // Finish the basic string - if it is not empty - with a delimiter + if (basicLength) { + output.push(delimiter); + } + + // Main encoding loop: + while (handledCPCount < inputLength) { + + // All non-basic code points < n have been handled already. Find the next + // larger one: + for (m = maxInt, j = 0; j < inputLength; ++j) { + currentValue = input[j]; + if (currentValue >= n && currentValue < m) { + m = currentValue; + } + } + + // Increase `delta` enough to advance the decoder's state to , + // but guard against overflow + handledCPCountPlusOne = handledCPCount + 1; + if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) { + error('overflow'); + } + + delta += (m - n) * handledCPCountPlusOne; + n = m; + + for (j = 0; j < inputLength; ++j) { + currentValue = input[j]; + + if (currentValue < n && ++delta > maxInt) { + error('overflow'); + } + + if (currentValue == n) { + // Represent delta as a generalized variable-length integer + for (q = delta, k = base; /* no condition */; k += base) { + t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias); + if (q < t) { + break; + } + qMinusT = q - t; + baseMinusT = base - t; + output.push( + stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0)) + ); + q = floor(qMinusT / baseMinusT); + } + + output.push(stringFromCharCode(digitToBasic(q, 0))); + bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength); + delta = 0; + ++handledCPCount; + } + } + + ++delta; + ++n; + + } + return output.join(''); + } + + /** + * Converts a Punycode string representing a domain name to Unicode. Only the + * Punycoded parts of the domain name will be converted, i.e. it doesn't + * matter if you call it on a string that has already been converted to + * Unicode. + * @memberOf punycode + * @param {String} domain The Punycode domain name to convert to Unicode. + * @returns {String} The Unicode representation of the given Punycode + * string. + */ + function toUnicode(domain) { + return mapDomain(domain, function(string) { + return regexPunycode.test(string) + ? decode(string.slice(4).toLowerCase()) + : string; + }); + } + + /** + * Converts a Unicode string representing a domain name to Punycode. Only the + * non-ASCII parts of the domain name will be converted, i.e. it doesn't + * matter if you call it with a domain that's already in ASCII. + * @memberOf punycode + * @param {String} domain The domain name to convert, as a Unicode string. + * @returns {String} The Punycode representation of the given domain name. + */ + function toASCII(domain) { + return mapDomain(domain, function(string) { + return regexNonASCII.test(string) + ? 'xn--' + encode(string) + : string; + }); + } + + /*--------------------------------------------------------------------------*/ + + /** Define the public API */ + punycode = { + /** + * A string representing the current Punycode.js version number. + * @memberOf punycode + * @type String + */ + 'version': '1.0.0', + /** + * An object of methods to convert from JavaScript's internal character + * representation (UCS-2) to decimal Unicode code points, and back. + * @see + * @memberOf punycode + * @type Object + */ + 'ucs2': { + 'decode': ucs2decode, + 'encode': ucs2encode + }, + 'decode': decode, + 'encode': encode, + 'toASCII': toASCII, + 'toUnicode': toUnicode + }; + + /** Expose `punycode` */ + if (freeExports) { + if (freeModule && freeModule.exports == freeExports) { + // in Node.js or Ringo 0.8+ + freeModule.exports = punycode; + } else { + // in Narwhal or Ringo 0.7- + for (key in punycode) { + punycode.hasOwnProperty(key) && (freeExports[key] = punycode[key]); + } + } + } else if (freeDefine) { + // via curl.js or RequireJS + define('punycode', punycode); + } else { + // in a browser or Rhino + root.punycode = punycode; + } + +}(this)); \ No newline at end of file diff --git a/spec/javascripts/app/views/post_view_spec.js b/spec/javascripts/app/views/post_view_spec.js index 4c292b879..b1ec3b956 100644 --- a/spec/javascripts/app/views/post_view_spec.js +++ b/spec/javascripts/app/views/post_view_spec.js @@ -38,7 +38,7 @@ describe("app.views.Post", function(){ expect($(view.el).html()).not.toContain("0 Reshares") }) }) - + context("likes", function(){ it("displays a like count", function(){ this.statusMessage.set({likes_count : 1}) @@ -141,5 +141,17 @@ describe("app.views.Post", function(){ }) }) + context("markdown rendering", function() { + it("correctly handles non-ascii characters in urls", function() { + // example from issue #2665 + var evilUrl = "http://www.bürgerentscheid-krankenhäuser.de"; + this.statusMessage.set({text: "<"+evilUrl+">"}); + var view = new app.views.Post({model : this.statusMessage}).render(); + + expect($(view.el).html()).toContain("http://www.xn--brgerentscheid-krankenhuser-xkc78d.de"); + expect($(view.el).html()).toContain(evilUrl); + }); + }); + }) }); diff --git a/spec/javascripts/support/jasmine.yml b/spec/javascripts/support/jasmine.yml index c610f3783..83fbd1874 100644 --- a/spec/javascripts/support/jasmine.yml +++ b/spec/javascripts/support/jasmine.yml @@ -30,6 +30,7 @@ src_files: - public/javascripts/vendor/timeago.js - public/javascripts/vendor/facebox.js - public/javascripts/vendor/markdown/* + - public/javascripts/vendor/punycode.js - public/javascripts/vendor/jquery.placeholder.js - public/javascripts/vendor/backbone.js - public/javascripts/fileuploader-custom.js From c5139c9a71b2a5cad23ca67698434ab11e81c450 Mon Sep 17 00:00:00 2001 From: Florian Staudacher Date: Sat, 24 Mar 2012 01:51:41 +0100 Subject: [PATCH 2/3] improve behaviour, add more tests --- .../javascripts/app/helpers/text_formatter.js | 27 ++++++++++------ spec/javascripts/app/views/post_view_spec.js | 31 ++++++++++++++++--- 2 files changed, 44 insertions(+), 14 deletions(-) diff --git a/public/javascripts/app/helpers/text_formatter.js b/public/javascripts/app/helpers/text_formatter.js index f3c6bc675..1e1e03aef 100644 --- a/public/javascripts/app/helpers/text_formatter.js +++ b/public/javascripts/app/helpers/text_formatter.js @@ -15,18 +15,27 @@ // punycode non-ascii chars in urls converter.hooks.chain("preConversion", function(text) { - // remove < > around markdown-style urls - var mdUrlRegex = /<((https?|ftp):[^'">\s]+)>/gi; - text = text.replace(mdUrlRegex, function(wholematch, m1) { - return m1; + + // add < > around plain urls, effectively making them "autolinks" + var urlRegex = /(^|\s)\b((?:[a-z][\w-]+:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))/gi; + text = text.replace(urlRegex, function(wholematch, space, url) { + return space+"<"+url+">"; }); - // regex shamelessly copied from http://daringfireball.net/2010/07/improved_regex_for_matching_urls - var urlRegex = /\b((?:[a-z][\w-]+:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))/g; - return text.replace(urlRegex, function(url){ - var newUrl = "["+url+"]("+punycode.toASCII(url)+")"; // console.log( punycode.toASCII(url) ); - return newUrl; + // process links + var linkRegex = /(\[.*\]:\s)?(<|\()((https?|ftp):[^'">\s]+)(>|\))/gi; + text = text.replace(linkRegex, function() { + var unicodeUrl = arguments[3]; + var asciiUrl = punycode.toASCII(unicodeUrl); + if(arguments[1] == "") { // inline link + if(arguments[2] == "<") return "["+unicodeUrl+"]("+asciiUrl+")"; // without link text + else return arguments[2]+asciiUrl+arguments[5]; // with link text + } else { // reference style link + return arguments[1]+asciiUrl; + } }); + + return text; }); converter.hooks.chain("postConversion", function (text) { diff --git a/spec/javascripts/app/views/post_view_spec.js b/spec/javascripts/app/views/post_view_spec.js index b1ec3b956..df6f31b73 100644 --- a/spec/javascripts/app/views/post_view_spec.js +++ b/spec/javascripts/app/views/post_view_spec.js @@ -142,14 +142,35 @@ describe("app.views.Post", function(){ }) context("markdown rendering", function() { - it("correctly handles non-ascii characters in urls", function() { + beforeEach(function() { // example from issue #2665 - var evilUrl = "http://www.bürgerentscheid-krankenhäuser.de"; - this.statusMessage.set({text: "<"+evilUrl+">"}); + this.evilUrl = "http://www.bürgerentscheid-krankenhäuser.de"; + this.asciiUrl = "http://www.xn--brgerentscheid-krankenhuser-xkc78d.de"; + }); + + it("correctly handles non-ascii characters in urls", function() { + this.statusMessage.set({text: "<"+this.evilUrl+">"}); var view = new app.views.Post({model : this.statusMessage}).render(); - expect($(view.el).html()).toContain("http://www.xn--brgerentscheid-krankenhuser-xkc78d.de"); - expect($(view.el).html()).toContain(evilUrl); + expect($(view.el).html()).toContain(this.asciiUrl); + expect($(view.el).html()).toContain(this.evilUrl); + }); + + it("doesn't break link texts for non-ascii urls", function() { + var linkText = "check out this awesome link!"; + this.statusMessage.set({text: "["+linkText+"]("+this.evilUrl+")"}); + var view = new app.views.Post({model: this.statusMessage}).render(); + + expect($(view.el).html()).toContain(this.asciiUrl); + expect($(view.el).html()).toContain(linkText); + }); + + it("doesn't break reference style links for non-ascii urls", function() { + var postContent = "blabla blab [my special link][1] bla blabla\n\n[1]: "+this.evilUrl+" and an optional title)"; + this.statusMessage.set({text: postContent}); + var view = new app.views.Post({model: this.statusMessage}).render(); + + expect($(view.el).html()).not.toContain(this.evilUrl); }); }); From 49801644cc8f2c3f0ceb0dbcfeae5b3ebb153c32 Mon Sep 17 00:00:00 2001 From: Florian Staudacher Date: Sat, 24 Mar 2012 02:57:32 +0100 Subject: [PATCH 3/3] also handle images and links without sub-domain --- public/javascripts/app/helpers/text_formatter.js | 11 ++++++----- spec/javascripts/app/views/post_view_spec.js | 12 +++++++++++- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/public/javascripts/app/helpers/text_formatter.js b/public/javascripts/app/helpers/text_formatter.js index 1e1e03aef..a5c198805 100644 --- a/public/javascripts/app/helpers/text_formatter.js +++ b/public/javascripts/app/helpers/text_formatter.js @@ -23,13 +23,14 @@ }); // process links - var linkRegex = /(\[.*\]:\s)?(<|\()((https?|ftp):[^'">\s]+)(>|\))/gi; + var linkRegex = /(\[.*\]:\s)?(<|\()(((https?|ftp):\/{1,3})([^'">\s]+))(>|\))/gi; text = text.replace(linkRegex, function() { - var unicodeUrl = arguments[3]; - var asciiUrl = punycode.toASCII(unicodeUrl); + var protocol = arguments[4]; + var unicodeUrl = arguments[6]; + var asciiUrl = protocol+punycode.toASCII(unicodeUrl); if(arguments[1] == "") { // inline link - if(arguments[2] == "<") return "["+unicodeUrl+"]("+asciiUrl+")"; // without link text - else return arguments[2]+asciiUrl+arguments[5]; // with link text + if(arguments[2] == "<") return "["+protocol+unicodeUrl+"]("+asciiUrl+")"; // without link text + else return arguments[2]+asciiUrl+arguments[7]; // with link text } else { // reference style link return arguments[1]+asciiUrl; } diff --git a/spec/javascripts/app/views/post_view_spec.js b/spec/javascripts/app/views/post_view_spec.js index df6f31b73..d1ef0e2b3 100644 --- a/spec/javascripts/app/views/post_view_spec.js +++ b/spec/javascripts/app/views/post_view_spec.js @@ -171,8 +171,18 @@ describe("app.views.Post", function(){ var view = new app.views.Post({model: this.statusMessage}).render(); expect($(view.el).html()).not.toContain(this.evilUrl); + expect($(view.el).html()).toContain(this.asciiUrl); }); - }); + it("correctly handles images with non-ascii urls", function() { + var postContent = "![logo](http://bündnis-für-krankenhäuser.de/wp-content/uploads/2011/11/cropped-logohp.jpg)"; + var niceImg = '"http://xn--bndnis-fr-krankenhuser-i5b27cha.de/wp-content/uploads/2011/11/cropped-logohp.jpg"'; + this.statusMessage.set({text: postContent}); + var view = new app.views.Post({model: this.statusMessage}).render(); + + expect($(view.el).html()).toContain(niceImg); + }); + + }); }) });