SL-10924 Fix data escaping to accomodate CEF update

2019-04-11 20:29:50 +03:00 · 2019-04-11 20:29:50 +03:00 · e4a244a6cb
parent 9194a97e94
commit e4a244a6cb
3 changed files with 107 additions and 15 deletions
--- a/indra/llcommon/lluri.cpp
+++ b/indra/llcommon/lluri.cpp
@ -173,6 +173,19 @@ namespace
 			"-._~";
 		return s;
 	}
+	const std::string path()
+	{
+		static const std::string s =
+			"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+			"abcdefghijklmnopqrstuvwxyz"
+			"0123456789"
+			"$-_.+"
+			"!*'(),"
+			"{}|\\^~[]`"
+			"<>#%"
+			";/?:@&=";
+		return s;
+	}
 	const std::string sub_delims()
 	{
 		static const std::string s = "!$&'()*+,;=";
@ -187,6 +200,12 @@ namespace
 		{ return LLURI::escape(s, unreserved() + ":@!$'()*+,"); }	 // sub_delims - "&;=" + ":@"
 	std::string escapeQueryValue(const std::string& s)
 		{ return LLURI::escape(s, unreserved() + ":@!$'()*+,="); }	// sub_delims - "&;" + ":@"
+	std::string escapeUriQuery(const std::string& s)
+		{ return LLURI::escape(s, unreserved() + ":@?&$;*+=%/"); }
+	std::string escapeUriData(const std::string& s)
+		{ return LLURI::escape(s, unreserved()); }
+	std::string escapeUriPath(const std::string& s)
+		{ return LLURI::escape(s, path()); }
 }

 //static
@ -202,6 +221,84 @@ std::string LLURI::escape(const std::string& str)
 	return escape(str, default_allowed, true);
 }

+//static
+std::string LLURI::escapePathAndData(const std::string &str)
+{
+    std::string result;
+
+    const std::string data_marker = "data:";
+    if (str.compare(0, data_marker.length(), data_marker) == 0)
+    {
+        // This is not url, but data, data part needs to be properly escaped
+        // data part is separated by ',' from header. Minimal data uri is "data:,"
+        // See "data URI scheme"
+        size_t separator = str.find(',');
+        if (separator != std::string::npos)
+        {
+            size_t header_size = separator + 1;
+            std::string header = str.substr(0, header_size);
+            // base64 is url-safe
+            if (header.find("base64") != std::string::npos)
+            {
+                // assume url-safe data
+                result = str;
+            }
+            else
+            {
+                std::string data = str.substr(header_size, str.length() - header_size);
+
+                // Notes: File can be partially pre-escaped, that's why escaping ignores '%'
+                // It somewhat limits user from displaying strings like "%20" in text
+                // but that's how viewer worked for a while and user can double-encode it
+
+                // Header doesn't need escaping
+                result = header + escapeUriData(data);
+            }
+        }
+    }
+    else
+    {
+        // try processing it as path with query separator
+        // The query component is indicated by the first question
+        // mark("?") character and terminated by a number sign("#")
+        size_t delim_pos = str.find('?');
+        if (delim_pos == std::string::npos)
+        {
+            // alternate separator
+            delim_pos = str.find(';');
+        }
+
+        if (delim_pos != std::string::npos)
+        {
+            size_t path_size = delim_pos + 1;
+            std::string query;
+            std::string fragment;
+
+            size_t fragment_pos = str.find('#');
+            if (fragment_pos != std::string::npos)
+            {
+                query = str.substr(path_size, fragment_pos - path_size);
+                fragment = str.substr(fragment_pos);
+            }
+            else
+            {
+                query = str.substr(path_size);
+            }
+
+            std::string path = str.substr(0, path_size);
+
+            result = escapeUriPath(path) + escapeUriQuery(query) + escapeUriPath(fragment);
+        }
+    }
+
+    if (result.empty())
+    {
+        // Not a known scheme or no data part, try just escaping as Uri path
+        result = escapeUriPath(str);
+    }
+    return result;
+}
+
 LLURI::LLURI()
 {
 }
--- a/indra/llcommon/lluri.h
+++ b/indra/llcommon/lluri.h
@ -157,6 +157,14 @@ public:
 		const std::string& allowed,
 		bool is_allowed_sorted = false);

+	/**
+	 * @brief Break string into data part and path or sheme
+	 * and escape path (if present) and data.
+	 * Data part is not allowed to have path related symbols
+	 * @param str The raw URI to escape.
+	 */
+	static std::string escapePathAndData(const std::string &str);
+
 	/**
 	 * @brief unescape an escaped URI string.
 	 *
--- a/indra/newview/llviewermedia.cpp
+++ b/indra/newview/llviewermedia.cpp
@ -1901,21 +1901,8 @@ void LLViewerMediaImpl::loadURI()
 		// trim whitespace from front and back of URL - fixes EXT-5363
 		LLStringUtil::trim( mMediaURL );

-		// *HACK: we don't know if the URI coming in is properly escaped
-		// (the contract doesn't specify whether it is escaped or not.
-		// but LLQtWebKit expects it to be, so we do our best to encode
-		// special characters)
-		// The strings below were taken right from http://www.ietf.org/rfc/rfc1738.txt
-		// Note especially that '%' and '/' are there.
-		std::string uri = LLURI::escape(mMediaURL,
-										"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
-										"0123456789"
-										"$-_.+"
-										"!*'(),"
-										"{}|\\^~[]`"
-										"<>#%"
-										";/?:@&=",
-										false);
+		// URI often comes unescaped
+		std::string uri = LLURI::escapePathAndData(mMediaURL);
        {
            // Do not log the query parts
            LLURI u(uri);