@@ -94,7 +94,8 @@ class PathMatcher {
9494 // The info associated with each method. The path matcher nodes
9595 // will hold pointers to MethodData objects in this vector.
9696 std::vector<std::unique_ptr<MethodData>> methods_;
97- UrlUnescapeSpec unescape_spec_;
97+ UrlUnescapeSpec path_unescape_spec_;
98+ bool query_param_unescape_plus_;
9899
99100 private:
100101 friend class PathMatcherBuilder <Method>;
@@ -127,8 +128,14 @@ class PathMatcherBuilder {
127128 const std::string& body_field_path, Method method);
128129
129130 // Change unescaping behavior, see UrlUnescapeSpec for available options.
130- void SetUrlUnescapeSpec (UrlUnescapeSpec unescape_spec) {
131- unescape_spec_ = unescape_spec;
131+ // This only applies to path, not query parameters.
132+ void SetUrlUnescapeSpec (UrlUnescapeSpec path_unescape_spec) {
133+ path_unescape_spec_ = path_unescape_spec;
134+ }
135+ // If true, unescape '+' in query parameters to space. Default is false.
136+ // To support [HTML 2.0 or RFC1866](https://tools.ietf.org/html/rfc1866#section-8.2.1).
137+ void SetQueryParamUnescapePlus (bool query_param_unescape_plus) {
138+ query_param_unescape_plus_ = query_param_unescape_plus;
132139 }
133140
134141 // Returns a unique_ptr to a thread safe PathMatcher that contains all
@@ -151,8 +158,9 @@ class PathMatcherBuilder {
151158 std::unordered_set<std::string> custom_verbs_;
152159 typedef typename PathMatcher<Method>::MethodData MethodData;
153160 std::vector<std::unique_ptr<MethodData>> methods_;
154- UrlUnescapeSpec unescape_spec_ =
161+ UrlUnescapeSpec path_unescape_spec_ =
155162 UrlUnescapeSpec::kAllCharactersExceptReserved ;
163+ bool query_param_unescape_plus_ = false ;
156164
157165 friend class PathMatcher <Method>;
158166};
@@ -222,44 +230,53 @@ inline int hex_digit_to_int(char c) {
222230//
223231// If the next three characters are an escaped character then this function will
224232// also return what character is escaped.
225- bool GetEscapedChar (const std::string& src, size_t i,
226- UrlUnescapeSpec unescape_spec, char * out) {
233+ //
234+ // If unescape_plus is true, unescape '+' to space.
235+ //
236+ // return value: 0: not unescaped, >0: unescaped, number of used original characters.
237+ //
238+ int GetEscapedChar (const std::string& src, size_t i,
239+ UrlUnescapeSpec unescape_spec, bool unescape_plus, char * out) {
240+ if (unescape_plus && src[i] == ' +' ) {
241+ *out = ' ' ;
242+ return 1 ;
243+ }
227244 if (i + 2 < src.size () && src[i] == ' %' ) {
228245 if (ascii_isxdigit (src[i + 1 ]) && ascii_isxdigit (src[i + 2 ])) {
229246 char c =
230247 (hex_digit_to_int (src[i + 1 ]) << 4 ) | hex_digit_to_int (src[i + 2 ]);
231248 switch (unescape_spec) {
232249 case UrlUnescapeSpec::kAllCharactersExceptReserved :
233250 if (IsReservedChar (c)) {
234- return false ;
251+ return 0 ;
235252 }
236253 break ;
237254 case UrlUnescapeSpec::kAllCharactersExceptSlash :
238255 if (c == ' /' ) {
239- return false ;
256+ return 0 ;
240257 }
241258 break ;
242259 case UrlUnescapeSpec::kAllCharacters :
243260 break ;
244261 }
245262 *out = c;
246- return true ;
263+ return 3 ;
247264 }
248265 }
249- return false ;
266+ return 0 ;
250267}
251268
252269// Unescapes string 'part' and returns the unescaped string. Reserved characters
253270// (as specified in RFC 6570) are not escaped if unescape_reserved_chars is
254271// false.
255272std::string UrlUnescapeString (const std::string& part,
256- UrlUnescapeSpec unescape_spec) {
273+ UrlUnescapeSpec unescape_spec, bool unescape_plus ) {
257274 std::string unescaped;
258275 // Check whether we need to escape at all.
259276 bool needs_unescaping = false ;
260277 char ch = ' \0 ' ;
261278 for (size_t i = 0 ; i < part.size (); ++i) {
262- if (GetEscapedChar (part, i, unescape_spec, &ch)) {
279+ if (GetEscapedChar (part, i, unescape_spec, unescape_plus, &ch) > 0 ) {
263280 needs_unescaping = true ;
264281 break ;
265282 }
@@ -275,9 +292,10 @@ std::string UrlUnescapeString(const std::string& part,
275292 char * p = begin;
276293
277294 for (size_t i = 0 ; i < part.size ();) {
278- if (GetEscapedChar (part, i, unescape_spec, &ch)) {
295+ int skip = GetEscapedChar (part, i, unescape_spec, unescape_plus, &ch);
296+ if (skip > 0 ) {
279297 *p++ = ch;
280- i += 3 ;
298+ i += skip ;
281299 } else {
282300 *p++ = part[i];
283301 i += 1 ;
@@ -291,8 +309,8 @@ std::string UrlUnescapeString(const std::string& part,
291309template <class VariableBinding >
292310void ExtractBindingsFromPath (const std::vector<HttpTemplate::Variable>& vars,
293311 const std::vector<std::string>& parts,
294- std::vector<VariableBinding>* bindings ,
295- UrlUnescapeSpec unescape_spec ) {
312+ UrlUnescapeSpec unescape_spec ,
313+ std::vector<VariableBinding>* bindings ) {
296314 for (const auto & var : vars) {
297315 // Determine the subpath bound to the variable based on the
298316 // [start_segment, end_segment) segment range of the variable.
@@ -316,7 +334,7 @@ void ExtractBindingsFromPath(const std::vector<HttpTemplate::Variable>& vars,
316334 // Joins parts with "/" to form a path string.
317335 for (size_t i = var.start_segment ; i < end_segment; ++i) {
318336 // For multipart matches only unescape non-reserved characters.
319- binding.value += UrlUnescapeString (parts[i], var_unescape_spec);
337+ binding.value += UrlUnescapeString (parts[i], var_unescape_spec, false );
320338 if (i < end_segment - 1 ) {
321339 binding.value += " /" ;
322340 }
@@ -329,6 +347,7 @@ template <class VariableBinding>
329347void ExtractBindingsFromQueryParameters (
330348 const std::string& query_params,
331349 const std::unordered_set<std::string>& system_params,
350+ bool query_param_unescape_plus,
332351 std::vector<VariableBinding>* bindings) {
333352 // The bindings in URL the query parameters have the following form:
334353 // <field_path1>=value1&<field_path2>=value2&...&<field_pathN>=valueN
@@ -350,7 +369,8 @@ void ExtractBindingsFromQueryParameters(
350369 VariableBinding binding;
351370 split (name, ' .' , binding.field_path );
352371 binding.value = UrlUnescapeString (param.substr (pos + 1 ),
353- UrlUnescapeSpec::kAllCharacters );
372+ UrlUnescapeSpec::kAllCharacters ,
373+ query_param_unescape_plus);
354374 bindings->emplace_back (std::move (binding));
355375 }
356376 }
@@ -424,7 +444,8 @@ PathMatcher<Method>::PathMatcher(PathMatcherBuilder<Method>&& builder)
424444 : root_ptr_(std::move(builder.root_ptr_)),
425445 custom_verbs_ (std::move(builder.custom_verbs_)),
426446 methods_(std::move(builder.methods_)),
427- unescape_spec_(builder.unescape_spec_) {}
447+ path_unescape_spec_(builder.path_unescape_spec_),
448+ query_param_unescape_plus_(builder.query_param_unescape_plus_) {}
428449
429450// Lookup is a wrapper method for the recursive node Lookup. First, the wrapper
430451// splits the request path into slash-separated path parts. Next, the method
@@ -460,11 +481,11 @@ Method PathMatcher<Method>::Lookup(
460481 MethodData* method_data = reinterpret_cast <MethodData*>(lookup_result.data );
461482 if (variable_bindings != nullptr ) {
462483 variable_bindings->clear ();
463- ExtractBindingsFromPath (method_data->variables , parts, variable_bindings,
464- unescape_spec_ );
484+ ExtractBindingsFromPath (method_data->variables , parts,
485+ path_unescape_spec_, variable_bindings );
465486 ExtractBindingsFromQueryParameters (
466487 query_params, method_data->system_query_parameter_names ,
467- variable_bindings);
488+ query_param_unescape_plus_, variable_bindings);
468489 }
469490 if (body_field_path != nullptr ) {
470491 *body_field_path = method_data->body_field_path ;
0 commit comments