From b7b8a115516a52226ce166464833129a81d10c8f Mon Sep 17 00:00:00 2001
From: Florian Staudacher <florian_staudacher@yahoo.de>
Date: Sat, 24 Mar 2012 00:10:36 +0100
Subject: [PATCH 1/3] fix #2665 - allow umlauts and other non-ascii characters
 in url

---
 config/assets.yml                             |   5 +-
 .../javascripts/app/helpers/text_formatter.js |  16 +
 public/javascripts/vendor/punycode.js         | 512 ++++++++++++++++++
 spec/javascripts/app/views/post_view_spec.js  |  14 +-
 spec/javascripts/support/jasmine.yml          |   1 +
 5 files changed, 545 insertions(+), 3 deletions(-)
 create mode 100644 public/javascripts/vendor/punycode.js

diff --git a/config/assets.yml b/config/assets.yml
index 8bf27bd77..e2a445ec6 100644
--- a/config/assets.yml
+++ b/config/assets.yml
@@ -38,18 +38,19 @@ javascripts:
     - public/javascripts/vendor/backbone.js
     - public/javascripts/vendor/handlebars-1.0.0.beta.6.js
     - public/javascripts/vendor/markdown/*
+    - public/javascripts/vendor/punycode.js
     - public/javascripts/app/app.js
     - public/javascripts/app/helpers/*
     - public/javascripts/app/router.js
     - public/javascripts/app/views.js
-    - public/javascripts/app/models/post.js 
+    - public/javascripts/app/models/post.js
     - public/javascripts/app/models/*
     - public/javascripts/app/pages/*
     - public/javascripts/app/collections/*
     - public/javascripts/app/views/stream_object_view.js
     - public/javascripts/app/views/content_view.js
     - public/javascripts/app/views/*.js
-    - public/javascripts/app/views/**/*.js 
+    - public/javascripts/app/views/**/*.js
 
     - public/javascripts/diaspora.js
     - public/javascripts/helpers/*.js
diff --git a/public/javascripts/app/helpers/text_formatter.js b/public/javascripts/app/helpers/text_formatter.js
index 08b4e224b..f3c6bc675 100644
--- a/public/javascripts/app/helpers/text_formatter.js
+++ b/public/javascripts/app/helpers/text_formatter.js
@@ -13,6 +13,22 @@
   textFormatter.markdownify = function markdownify(text){
     var converter = Markdown.getSanitizingConverter();
 
+    // punycode non-ascii chars in urls
+    converter.hooks.chain("preConversion", function(text) {
+      // remove < > around markdown-style urls
+      var mdUrlRegex = /<((https?|ftp):[^'">\s]+)>/gi;
+      text = text.replace(mdUrlRegex, function(wholematch, m1) {
+        return m1;
+      });
+
+      // regex shamelessly copied from http://daringfireball.net/2010/07/improved_regex_for_matching_urls
+      var urlRegex = /\b((?:[a-z][\w-]+:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))/g;
+      return text.replace(urlRegex, function(url){
+        var newUrl = "["+url+"]("+punycode.toASCII(url)+")"; // console.log( punycode.toASCII(url) );
+        return newUrl;
+      });
+    });
+
     converter.hooks.chain("postConversion", function (text) {
       return text.replace(/(\"(?:(?:http|https):\/\/)?[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(?:\/\S*)?\")(\>)/g, '$1 target="_blank">')
     });
diff --git a/public/javascripts/vendor/punycode.js b/public/javascripts/vendor/punycode.js
new file mode 100644
index 000000000..8688f2efd
--- /dev/null
+++ b/public/javascripts/vendor/punycode.js
@@ -0,0 +1,512 @@
+/*! http://mths.be/punycode by @mathias */
+;(function(root) {
+
+	/**
+	 * The `punycode` object.
+	 * @name punycode
+	 * @type Object
+	 */
+	var punycode,
+
+	/** Detect free variables `define`, `exports`, `module` and `require` */
+	freeDefine = typeof define == 'function' && typeof define.amd == 'object' &&
+		define.amd && define,
+	freeExports = typeof exports == 'object' && exports,
+	freeModule = typeof module == 'object' && module,
+	freeRequire = typeof require == 'function' && require,
+
+	/** Highest positive signed 32-bit float value */
+	maxInt = 2147483647, // aka. 0x7FFFFFFF or 2^31-1
+
+	/** Bootstring parameters */
+	base = 36,
+	tMin = 1,
+	tMax = 26,
+	skew = 38,
+	damp = 700,
+	initialBias = 72,
+	initialN = 128, // 0x80
+	delimiter = '-', // '\x2D'
+
+	/** Regular expressions */
+	regexNonASCII = /[^ -~]/, // unprintable ASCII chars + non-ASCII chars
+	regexPunycode = /^xn--/,
+
+	/** Error messages */
+	errors = {
+		'overflow': 'Overflow: input needs wider integers to process.',
+		'ucs2decode': 'UCS-2(decode): illegal sequence',
+		'ucs2encode': 'UCS-2(encode): illegal value',
+		'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
+		'invalid-input': 'Invalid input'
+	},
+
+	/** Convenience shortcuts */
+	baseMinusTMin = base - tMin,
+	floor = Math.floor,
+	stringFromCharCode = String.fromCharCode,
+
+	/** Temporary variable */
+	key;
+
+	/*--------------------------------------------------------------------------*/
+
+	/**
+	 * A generic error utility function.
+	 * @private
+	 * @param {String} type The error type.
+	 * @returns {Error} Throws a `RangeError` with the applicable error message.
+	 */
+	function error(type) {
+		throw RangeError(errors[type]);
+	}
+
+	/**
+	 * A generic `Array#map` utility function.
+	 * @private
+	 * @param {Array} array The array to iterate over.
+	 * @param {Function} callback The function that gets called for every array
+	 * item.
+	 * @returns {Array} A new array of values returned by the callback function.
+	 */
+	function map(array, fn) {
+		var length = array.length;
+		while (length--) {
+			array[length] = fn(array[length]);
+		}
+		return array;
+	}
+
+	/**
+	 * A simple `Array#map`-like wrapper to work with domain name strings.
+	 * @private
+	 * @param {String} domain The domain name.
+	 * @param {Function} callback The function that gets called for every
+	 * character.
+	 * @returns {Array} A new string of characters returned by the callback
+	 * function.
+	 */
+	function mapDomain(string, fn) {
+		var glue = '.';
+		return map(string.split(glue), fn).join(glue);
+	}
+
+	/**
+	 * Creates an array containing the decimal code points of each Unicode
+	 * character in the string. While JavaScript uses UCS-2 internally,
+	 * this function will convert a pair of surrogate halves (each of which
+	 * UCS-2 exposes as separate characters) into a single code point,
+	 * matching UTF-16.
+	 * @see `punycode.ucs2.encode`
+	 * @see <http://mathiasbynens.be/notes/javascript-encoding>
+	 * @memberOf punycode.ucs2
+	 * @name decode
+	 * @param {String} string The Unicode input string (UCS-2).
+	 * @returns {Array} The new array of code points.
+	 */
+	function ucs2decode(string) {
+		var output = [],
+		    counter = 0,
+		    length = string.length,
+		    value,
+		    extra;
+		while (counter < length) {
+			value = string.charCodeAt(counter++);
+			if ((value & 0xF800) == 0xD800) {
+				extra = string.charCodeAt(counter++);
+				if ((value & 0xFC00) != 0xD800 || (extra & 0xFC00) != 0xDC00) {
+					error('ucs2decode');
+				}
+				value = ((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000;
+			}
+			output.push(value);
+		}
+		return output;
+	}
+
+	/**
+	 * Creates a string based on an array of decimal code points.
+	 * @see `punycode.ucs2.decode`
+	 * @memberOf punycode.ucs2
+	 * @name encode
+	 * @param {Array} codePoints The array of decimal code points.
+	 * @returns {String} The new Unicode string (UCS-2).
+	 */
+	function ucs2encode(array) {
+		return map(array, function(value) {
+			var output = '';
+			if ((value & 0xF800) == 0xD800) {
+				error('ucs2encode');
+			}
+			if (value > 0xFFFF) {
+				value -= 0x10000;
+				output += stringFromCharCode(value >>> 10 & 0x3FF | 0xD800);
+				value = 0xDC00 | value & 0x3FF;
+			}
+			output += stringFromCharCode(value);
+			return output;
+		}).join('');
+	}
+
+	/**
+	 * Converts a basic code point into a digit/integer.
+	 * @see `digitToBasic()`
+	 * @private
+	 * @param {Number} codePoint The basic (decimal) code point.
+	 * @returns {Number} The numeric value of a basic code point (for use in
+	 * representing integers) in the range `0` to `base - 1`, or `base` if
+	 * the code point does not represent a value.
+	 */
+	function basicToDigit(codePoint) {
+		return codePoint - 48 < 10
+			? codePoint - 22
+			: codePoint - 65 < 26
+				? codePoint - 65
+				: codePoint - 97 < 26
+					? codePoint - 97
+					: base;
+	}
+
+	/**
+	 * Converts a digit/integer into a basic code point.
+	 * @see `basicToDigit()`
+	 * @private
+	 * @param {Number} digit The numeric value of a basic code point.
+	 * @returns {Number} The basic code point whose value (when used for
+	 * representing integers) is `digit`, which needs to be in the range
+	 * `0` to `base - 1`. If `flag` is non-zero, the uppercase form is
+	 * used; else, the lowercase form is used. The behavior is undefined
+	 * if flag is non-zero and `digit` has no uppercase form.
+	 */
+	function digitToBasic(digit, flag) {
+		//  0..25 map to ASCII a..z or A..Z
+		// 26..35 map to ASCII 0..9
+		return digit + 22 + 75 * (digit < 26) - ((flag != 0) << 5);
+	}
+
+	/**
+	 * Bias adaptation function as per section 3.4 of RFC 3492.
+	 * http://tools.ietf.org/html/rfc3492#section-3.4
+	 * @private
+	 */
+	function adapt(delta, numPoints, firstTime) {
+		var k = 0;
+		delta = firstTime ? floor(delta / damp) : delta >> 1;
+		delta += floor(delta / numPoints);
+		for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) {
+			delta = floor(delta / baseMinusTMin);
+		}
+		return floor(k + (baseMinusTMin + 1) * delta / (delta + skew));
+	}
+
+	/**
+	 * Converts a basic code point to lowercase is `flag` is falsy, or to
+	 * uppercase if `flag` is truthy. The code point is unchanged if it's
+	 * caseless. The behavior is undefined if `codePoint` is not a basic code
+	 * point.
+	 * @private
+	 * @param {Number} codePoint The numeric value of a basic code point.
+	 * @returns {Number} The resulting basic code point.
+	 */
+	function encodeBasic(codePoint, flag) {
+		codePoint -= (codePoint - 97 < 26) << 5;
+		return codePoint + (!flag && codePoint - 65 < 26) << 5;
+	}
+
+	/**
+	 * Converts a Punycode string of ASCII code points to a string of Unicode
+	 * code points.
+	 * @memberOf punycode
+	 * @param {String} input The Punycode string of ASCII code points.
+	 * @returns {String} The resulting string of Unicode code points.
+	 */
+	function decode(input) {
+		// Don't use UCS-2
+		var output = [],
+		    inputLength = input.length,
+		    out,
+		    i = 0,
+		    n = initialN,
+		    bias = initialBias,
+		    basic,
+		    j,
+		    index,
+		    oldi,
+		    w,
+		    k,
+		    digit,
+		    t,
+		    length,
+		    /** Cached calculation results */
+		    baseMinusT;
+
+		// Handle the basic code points: let `basic` be the number of input code
+		// points before the last delimiter, or `0` if there is none, then copy
+		// the first basic code points to the output.
+
+		basic = input.lastIndexOf(delimiter);
+		if (basic < 0) {
+			basic = 0;
+		}
+
+		for (j = 0; j < basic; ++j) {
+			// if it's not a basic code point
+			if (input.charCodeAt(j) >= 0x80) {
+				error('not-basic');
+			}
+			output.push(input.charCodeAt(j));
+		}
+
+		// Main decoding loop: start just after the last delimiter if any basic code
+		// points were copied; start at the beginning otherwise.
+
+		for (index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {
+
+			// `index` is the index of the next character to be consumed.
+			// Decode a generalized variable-length integer into `delta`,
+			// which gets added to `i`. The overflow checking is easier
+			// if we increase `i` as we go, then subtract off its starting
+			// value at the end to obtain `delta`.
+			for (oldi = i, w = 1, k = base; /* no condition */; k += base) {
+
+				if (index >= inputLength) {
+					error('invalid-input');
+				}
+
+				digit = basicToDigit(input.charCodeAt(index++));
+
+				if (digit >= base || digit > floor((maxInt - i) / w)) {
+					error('overflow');
+				}
+
+				i += digit * w;
+				t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
+
+				if (digit < t) {
+					break;
+				}
+
+				baseMinusT = base - t;
+				if (w > floor(maxInt / baseMinusT)) {
+					error('overflow');
+				}
+
+				w *= baseMinusT;
+
+			}
+
+			out = output.length + 1;
+			bias = adapt(i - oldi, out, oldi == 0);
+
+			// `i` was supposed to wrap around from `out` to `0`,
+			// incrementing `n` each time, so we'll fix that now:
+			if (floor(i / out) > maxInt - n) {
+				error('overflow');
+			}
+
+			n += floor(i / out);
+			i %= out;
+
+			// Insert `n` at position `i` of the output
+			output.splice(i++, 0, n);
+
+		}
+
+		return ucs2encode(output);
+	}
+
+	/**
+	 * Converts a string of Unicode code points to a Punycode string of ASCII
+	 * code points.
+	 * @memberOf punycode
+	 * @param {String} input The string of Unicode code points.
+	 * @returns {String} The resulting Punycode string of ASCII code points.
+	 */
+	function encode(input) {
+		var n,
+		    delta,
+		    handledCPCount,
+		    basicLength,
+		    bias,
+		    j,
+		    m,
+		    q,
+		    k,
+		    t,
+		    currentValue,
+		    output = [],
+		    /** `inputLength` will hold the number of code points in `input`. */
+		    inputLength,
+		    /** Cached calculation results */
+		    handledCPCountPlusOne,
+		    baseMinusT,
+		    qMinusT;
+
+		// Convert the input in UCS-2 to Unicode
+		input = ucs2decode(input);
+
+		// Cache the length
+		inputLength = input.length;
+
+		// Initialize the state
+		n = initialN;
+		delta = 0;
+		bias = initialBias;
+
+		// Handle the basic code points
+		for (j = 0; j < inputLength; ++j) {
+			currentValue = input[j];
+			if (currentValue < 0x80) {
+				output.push(stringFromCharCode(currentValue));
+			}
+		}
+
+		handledCPCount = basicLength = output.length;
+
+		// `handledCPCount` is the number of code points that have been handled;
+		// `basicLength` is the number of basic code points.
+
+		// Finish the basic string - if it is not empty - with a delimiter
+		if (basicLength) {
+			output.push(delimiter);
+		}
+
+		// Main encoding loop:
+		while (handledCPCount < inputLength) {
+
+			// All non-basic code points < n have been handled already. Find the next
+			// larger one:
+			for (m = maxInt, j = 0; j < inputLength; ++j) {
+				currentValue = input[j];
+				if (currentValue >= n && currentValue < m) {
+					m = currentValue;
+				}
+			}
+
+			// Increase `delta` enough to advance the decoder's <n,i> state to <m,0>,
+			// but guard against overflow
+			handledCPCountPlusOne = handledCPCount + 1;
+			if (m - n > floor((maxInt - delta) / handledCPCountPlusOne)) {
+				error('overflow');
+			}
+
+			delta += (m - n) * handledCPCountPlusOne;
+			n = m;
+
+			for (j = 0; j < inputLength; ++j) {
+				currentValue = input[j];
+
+				if (currentValue < n && ++delta > maxInt) {
+					error('overflow');
+				}
+
+				if (currentValue == n) {
+					// Represent delta as a generalized variable-length integer
+					for (q = delta, k = base; /* no condition */; k += base) {
+						t = k <= bias ? tMin : (k >= bias + tMax ? tMax : k - bias);
+						if (q < t) {
+							break;
+						}
+						qMinusT = q - t;
+						baseMinusT = base - t;
+						output.push(
+							stringFromCharCode(digitToBasic(t + qMinusT % baseMinusT, 0))
+						);
+						q = floor(qMinusT / baseMinusT);
+					}
+
+					output.push(stringFromCharCode(digitToBasic(q, 0)));
+					bias = adapt(delta, handledCPCountPlusOne, handledCPCount == basicLength);
+					delta = 0;
+					++handledCPCount;
+				}
+			}
+
+			++delta;
+			++n;
+
+		}
+		return output.join('');
+	}
+
+	/**
+	 * Converts a Punycode string representing a domain name to Unicode. Only the
+	 * Punycoded parts of the domain name will be converted, i.e. it doesn't
+	 * matter if you call it on a string that has already been converted to
+	 * Unicode.
+	 * @memberOf punycode
+	 * @param {String} domain The Punycode domain name to convert to Unicode.
+	 * @returns {String} The Unicode representation of the given Punycode
+	 * string.
+	 */
+	function toUnicode(domain) {
+		return mapDomain(domain, function(string) {
+			return regexPunycode.test(string)
+				? decode(string.slice(4).toLowerCase())
+				: string;
+		});
+	}
+
+	/**
+	 * Converts a Unicode string representing a domain name to Punycode. Only the
+	 * non-ASCII parts of the domain name will be converted, i.e. it doesn't
+	 * matter if you call it with a domain that's already in ASCII.
+	 * @memberOf punycode
+	 * @param {String} domain The domain name to convert, as a Unicode string.
+	 * @returns {String} The Punycode representation of the given domain name.
+	 */
+	function toASCII(domain) {
+		return mapDomain(domain, function(string) {
+			return regexNonASCII.test(string)
+				? 'xn--' + encode(string)
+				: string;
+		});
+	}
+
+	/*--------------------------------------------------------------------------*/
+
+	/** Define the public API */
+	punycode = {
+		/**
+		 * A string representing the current Punycode.js version number.
+		 * @memberOf punycode
+		 * @type String
+		 */
+		'version': '1.0.0',
+		/**
+		 * An object of methods to convert from JavaScript's internal character
+		 * representation (UCS-2) to decimal Unicode code points, and back.
+		 * @see <http://mathiasbynens.be/notes/javascript-encoding>
+		 * @memberOf punycode
+		 * @type Object
+		 */
+		'ucs2': {
+			'decode': ucs2decode,
+			'encode': ucs2encode
+		},
+		'decode': decode,
+		'encode': encode,
+		'toASCII': toASCII,
+		'toUnicode': toUnicode
+	};
+
+	/** Expose `punycode` */
+	if (freeExports) {
+		if (freeModule && freeModule.exports == freeExports) {
+			// in Node.js or Ringo 0.8+
+			freeModule.exports = punycode;
+		} else {
+			// in Narwhal or Ringo 0.7-
+			for (key in punycode) {
+				punycode.hasOwnProperty(key) && (freeExports[key] = punycode[key]);
+			}
+		}
+	} else if (freeDefine) {
+		// via curl.js or RequireJS
+		define('punycode', punycode);
+	} else {
+		// in a browser or Rhino
+		root.punycode = punycode;
+	}
+
+}(this));
\ No newline at end of file
diff --git a/spec/javascripts/app/views/post_view_spec.js b/spec/javascripts/app/views/post_view_spec.js
index 4c292b879..b1ec3b956 100644
--- a/spec/javascripts/app/views/post_view_spec.js
+++ b/spec/javascripts/app/views/post_view_spec.js
@@ -38,7 +38,7 @@ describe("app.views.Post", function(){
           expect($(view.el).html()).not.toContain("0 Reshares")
         })
     })
-    
+
     context("likes", function(){
         it("displays a like count", function(){
           this.statusMessage.set({likes_count : 1})
@@ -141,5 +141,17 @@ describe("app.views.Post", function(){
       })
     })
 
+    context("markdown rendering", function() {
+      it("correctly handles non-ascii characters in urls", function() {
+        // example from issue #2665
+        var evilUrl = "http://www.bürgerentscheid-krankenhäuser.de";
+        this.statusMessage.set({text: "<"+evilUrl+">"});
+        var view = new app.views.Post({model : this.statusMessage}).render();
+
+        expect($(view.el).html()).toContain("http://www.xn--brgerentscheid-krankenhuser-xkc78d.de");
+        expect($(view.el).html()).toContain(evilUrl);
+      });
+    });
+
   })
 });
diff --git a/spec/javascripts/support/jasmine.yml b/spec/javascripts/support/jasmine.yml
index c610f3783..83fbd1874 100644
--- a/spec/javascripts/support/jasmine.yml
+++ b/spec/javascripts/support/jasmine.yml
@@ -30,6 +30,7 @@ src_files:
   - public/javascripts/vendor/timeago.js
   - public/javascripts/vendor/facebox.js
   - public/javascripts/vendor/markdown/*
+  - public/javascripts/vendor/punycode.js
   - public/javascripts/vendor/jquery.placeholder.js
   - public/javascripts/vendor/backbone.js
   - public/javascripts/fileuploader-custom.js

From c5139c9a71b2a5cad23ca67698434ab11e81c450 Mon Sep 17 00:00:00 2001
From: Florian Staudacher <florian_staudacher@yahoo.de>
Date: Sat, 24 Mar 2012 01:51:41 +0100
Subject: [PATCH 2/3] improve behaviour, add more tests

---
 .../javascripts/app/helpers/text_formatter.js | 27 ++++++++++------
 spec/javascripts/app/views/post_view_spec.js  | 31 ++++++++++++++++---
 2 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/public/javascripts/app/helpers/text_formatter.js b/public/javascripts/app/helpers/text_formatter.js
index f3c6bc675..1e1e03aef 100644
--- a/public/javascripts/app/helpers/text_formatter.js
+++ b/public/javascripts/app/helpers/text_formatter.js
@@ -15,18 +15,27 @@
 
     // punycode non-ascii chars in urls
     converter.hooks.chain("preConversion", function(text) {
-      // remove < > around markdown-style urls
-      var mdUrlRegex = /<((https?|ftp):[^'">\s]+)>/gi;
-      text = text.replace(mdUrlRegex, function(wholematch, m1) {
-        return m1;
+
+      // add < > around plain urls, effectively making them "autolinks"
+      var urlRegex = /(^|\s)\b((?:[a-z][\w-]+:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))/gi;
+      text = text.replace(urlRegex, function(wholematch, space, url) {
+        return space+"<"+url+">";
       });
 
-      // regex shamelessly copied from http://daringfireball.net/2010/07/improved_regex_for_matching_urls
-      var urlRegex = /\b((?:[a-z][\w-]+:(?:\/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}\/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'".,<>?«»“”‘’]))/g;
-      return text.replace(urlRegex, function(url){
-        var newUrl = "["+url+"]("+punycode.toASCII(url)+")"; // console.log( punycode.toASCII(url) );
-        return newUrl;
+      // process links
+      var linkRegex = /(\[.*\]:\s)?(<|\()((https?|ftp):[^'">\s]+)(>|\))/gi;
+      text = text.replace(linkRegex, function() {
+        var unicodeUrl = arguments[3];
+        var asciiUrl = punycode.toASCII(unicodeUrl);
+        if(arguments[1] == "") { // inline link
+          if(arguments[2] == "<") return "["+unicodeUrl+"]("+asciiUrl+")"; // without link text
+          else return arguments[2]+asciiUrl+arguments[5]; // with link text
+        } else { // reference style link
+          return arguments[1]+asciiUrl;
+        }
       });
+
+      return text;
     });
 
     converter.hooks.chain("postConversion", function (text) {
diff --git a/spec/javascripts/app/views/post_view_spec.js b/spec/javascripts/app/views/post_view_spec.js
index b1ec3b956..df6f31b73 100644
--- a/spec/javascripts/app/views/post_view_spec.js
+++ b/spec/javascripts/app/views/post_view_spec.js
@@ -142,14 +142,35 @@ describe("app.views.Post", function(){
     })
 
     context("markdown rendering", function() {
-      it("correctly handles non-ascii characters in urls", function() {
+      beforeEach(function() {
         // example from issue #2665
-        var evilUrl = "http://www.bürgerentscheid-krankenhäuser.de";
-        this.statusMessage.set({text: "<"+evilUrl+">"});
+        this.evilUrl  = "http://www.bürgerentscheid-krankenhäuser.de";
+        this.asciiUrl = "http://www.xn--brgerentscheid-krankenhuser-xkc78d.de";
+      });
+
+      it("correctly handles non-ascii characters in urls", function() {
+        this.statusMessage.set({text: "<"+this.evilUrl+">"});
         var view = new app.views.Post({model : this.statusMessage}).render();
 
-        expect($(view.el).html()).toContain("http://www.xn--brgerentscheid-krankenhuser-xkc78d.de");
-        expect($(view.el).html()).toContain(evilUrl);
+        expect($(view.el).html()).toContain(this.asciiUrl);
+        expect($(view.el).html()).toContain(this.evilUrl);
+      });
+
+      it("doesn't break link texts for non-ascii urls", function() {
+        var linkText = "check out this awesome link!";
+        this.statusMessage.set({text: "["+linkText+"]("+this.evilUrl+")"});
+        var view = new app.views.Post({model: this.statusMessage}).render();
+
+        expect($(view.el).html()).toContain(this.asciiUrl);
+        expect($(view.el).html()).toContain(linkText);
+      });
+
+      it("doesn't break reference style links for non-ascii urls", function() {
+        var postContent = "blabla blab [my special link][1] bla blabla\n\n[1]: "+this.evilUrl+" and an optional title)";
+        this.statusMessage.set({text: postContent});
+        var view = new app.views.Post({model: this.statusMessage}).render();
+
+        expect($(view.el).html()).not.toContain(this.evilUrl);
       });
     });
 

From 49801644cc8f2c3f0ceb0dbcfeae5b3ebb153c32 Mon Sep 17 00:00:00 2001
From: Florian Staudacher <florian_staudacher@yahoo.de>
Date: Sat, 24 Mar 2012 02:57:32 +0100
Subject: [PATCH 3/3] also handle images and links without sub-domain

---
 public/javascripts/app/helpers/text_formatter.js | 11 ++++++-----
 spec/javascripts/app/views/post_view_spec.js     | 12 +++++++++++-
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/public/javascripts/app/helpers/text_formatter.js b/public/javascripts/app/helpers/text_formatter.js
index 1e1e03aef..a5c198805 100644
--- a/public/javascripts/app/helpers/text_formatter.js
+++ b/public/javascripts/app/helpers/text_formatter.js
@@ -23,13 +23,14 @@
       });
 
       // process links
-      var linkRegex = /(\[.*\]:\s)?(<|\()((https?|ftp):[^'">\s]+)(>|\))/gi;
+      var linkRegex = /(\[.*\]:\s)?(<|\()(((https?|ftp):\/{1,3})([^'">\s]+))(>|\))/gi;
       text = text.replace(linkRegex, function() {
-        var unicodeUrl = arguments[3];
-        var asciiUrl = punycode.toASCII(unicodeUrl);
+        var protocol = arguments[4];
+        var unicodeUrl = arguments[6];
+        var asciiUrl = protocol+punycode.toASCII(unicodeUrl);
         if(arguments[1] == "") { // inline link
-          if(arguments[2] == "<") return "["+unicodeUrl+"]("+asciiUrl+")"; // without link text
-          else return arguments[2]+asciiUrl+arguments[5]; // with link text
+          if(arguments[2] == "<") return "["+protocol+unicodeUrl+"]("+asciiUrl+")"; // without link text
+          else return arguments[2]+asciiUrl+arguments[7]; // with link text
         } else { // reference style link
           return arguments[1]+asciiUrl;
         }
diff --git a/spec/javascripts/app/views/post_view_spec.js b/spec/javascripts/app/views/post_view_spec.js
index df6f31b73..d1ef0e2b3 100644
--- a/spec/javascripts/app/views/post_view_spec.js
+++ b/spec/javascripts/app/views/post_view_spec.js
@@ -171,8 +171,18 @@ describe("app.views.Post", function(){
         var view = new app.views.Post({model: this.statusMessage}).render();
 
         expect($(view.el).html()).not.toContain(this.evilUrl);
+        expect($(view.el).html()).toContain(this.asciiUrl);
       });
-    });
 
+      it("correctly handles images with non-ascii urls", function() {
+        var postContent = "![logo](http://bündnis-für-krankenhäuser.de/wp-content/uploads/2011/11/cropped-logohp.jpg)";
+        var niceImg = '"http://xn--bndnis-fr-krankenhuser-i5b27cha.de/wp-content/uploads/2011/11/cropped-logohp.jpg"';
+        this.statusMessage.set({text: postContent});
+        var view = new app.views.Post({model: this.statusMessage}).render();
+
+        expect($(view.el).html()).toContain(niceImg);
+      });
+
+    });
   })
 });