From 2679b0d9934d9f7686c5a77660c7128a437b1ee6 Mon Sep 17 00:00:00 2001 From: Alexander Borsuk Date: Fri, 12 May 2023 06:04:06 +0200 Subject: [PATCH] [kml] Do not save ASCII control symbols that are invalid for XML1.0 Signed-off-by: Alexander Borsuk --- kml/serdes.cpp | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/kml/serdes.cpp b/kml/serdes.cpp index 8bb1b944a2..66b5f4fc27 100644 --- a/kml/serdes.cpp +++ b/kml/serdes.cpp @@ -180,8 +180,28 @@ uint32_t ToRGBA(Channel red, Channel green, Channel blue, Channel alpha) static_cast(blue) << 8 | static_cast(alpha); } -void SaveStringWithCDATA(KmlWriter::WriterWrapper & writer, std::string const & s) +void SaveStringWithCDATA(KmlWriter::WriterWrapper & writer, std::string s) { + if (s.empty()) + return; + + // Expat loads XML 1.0 only. Sometimes users copy and paste bookmark descriptions or even names from the web. + // Rarely, in these copy-pasted texts, there are invalid XML1.0 symbols. + // See https://en.wikipedia.org/wiki/Valid_characters_in_XML + // A robust solution requires parsing invalid XML on loading (then users can restore "bad" XML files), see + // https://github.com/organicmaps/organicmaps/issues/3837 + // When a robust solution is implemented, this workaround can be removed for better performance/battery. + // + // This solution is a simple ASCII-range check that does not check symbols from other unicode ranges + // (they will require a more complex and slower approach of converting UTF-8 string to unicode first). + // It should be enough for many cases, according to user reports and wrong characters in their data. + s.erase(std::remove_if(s.begin(), s.end(), [](unsigned char c) + { + if (c >= 0x20 || c == 0x09 || c == 0x0a || c == 0x0d) + return false; + return true; + }), s.end()); + if (s.empty()) return;