From e5203182bfff291728bd423da8a4c084aa04d2b6 Mon Sep 17 00:00:00 2001 From: Benjamin Neff Date: Tue, 31 May 2016 04:06:41 +0200 Subject: [PATCH] replace invalid characters from xml --- lib/diaspora_federation/entity.rb | 6 +++++- spec/lib/diaspora_federation/entity_spec.rb | 7 +++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/diaspora_federation/entity.rb b/lib/diaspora_federation/entity.rb index 07e9acc..e433837 100644 --- a/lib/diaspora_federation/entity.rb +++ b/lib/diaspora_federation/entity.rb @@ -36,6 +36,10 @@ module DiasporaFederation extend PropertiesDSL include Logging + # Invalid XML characters + # @see https://www.w3.org/TR/REC-xml/#charsets "Extensible Markup Language (XML) 1.0" + INVALID_XML_REGEX = /[^\x09\x0A\x0D\x20-\uD7FF\uE000-\uFFFD\u{10000}-\u{10FFFF}]/ + # Initializes the Entity with the given attribute hash and freezes the created # instance it returns. # @@ -230,7 +234,7 @@ module DiasporaFederation def simple_node(doc, name, value) xml_name = self.class.xml_names[name] Nokogiri::XML::Element.new(xml_name ? xml_name.to_s : name, doc).tap do |node| - node.content = value unless value.empty? + node.content = value.gsub(INVALID_XML_REGEX, "\uFFFD") unless value.empty? end end diff --git a/spec/lib/diaspora_federation/entity_spec.rb b/spec/lib/diaspora_federation/entity_spec.rb index f770815..a9a9a4b 100644 --- a/spec/lib/diaspora_federation/entity_spec.rb +++ b/spec/lib/diaspora_federation/entity_spec.rb @@ -91,6 +91,13 @@ module DiasporaFederation expect(%w(test1 test2 test3 test4)).to include(node.name) end end + + it "replaces invalid XML characters" do + entity = Entities::TestEntity.new(test: "asdfasdf asdf💩asdf\nasdf") + xml = entity.to_xml.to_xml + parsed = Entities::TestEntity.from_xml(Nokogiri::XML::Document.parse(xml).root).test + expect(parsed).to eq("asdf�asdf asdf💩asdf\nasdf") + end end describe ".from_xml" do