From e2bbecb79f8b28232d91acb8a91c9be34e077932 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Thu, 7 Nov 2024 09:27:50 -0800 Subject: [PATCH 1/2] router: represent methods as `http::Method` Currently, the `Router` stores HTTP methods for an endpoint as `String`s. This is a bit unfortunate, as it means the already-parsed `http::Method` for each request has to be un-parsed back into a `String` to look it up in the map, allocating a new `String` each time and converting it to uppercases to perform the lookup. Switching to the typed `http::Method` representation should make things a bit more efficient (especially as `Method` also implements inline storage for extension methods up to a certain length) and provide additional type safety. This does require switching from a `BTreeMap` of strings to handlers to a `HashMap` of `Method`s to handlers, as `Method` does not implement `Ord`/`PartialOrd`, but ordering shouldn't matter here. --- dropshot/src/api_description.rs | 18 +++++++++--------- dropshot/src/router.rs | 27 +++++++++++++-------------- dropshot/src/server.rs | 2 +- 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/dropshot/src/api_description.rs b/dropshot/src/api_description.rs index 3f832982b..480eed911 100644 --- a/dropshot/src/api_description.rs +++ b/dropshot/src/api_description.rs @@ -670,15 +670,15 @@ impl ApiDescription { _ => panic!("reference not expected"), }; - let method_ref = match &method[..] { - "GET" => &mut pathitem.get, - "PUT" => &mut pathitem.put, - "POST" => &mut pathitem.post, - "DELETE" => &mut pathitem.delete, - "OPTIONS" => &mut pathitem.options, - "HEAD" => &mut pathitem.head, - "PATCH" => &mut pathitem.patch, - "TRACE" => &mut pathitem.trace, + let method_ref = match method { + http::Method::GET => &mut pathitem.get, + http::Method::PUT => &mut pathitem.put, + http::Method::POST => &mut pathitem.post, + http::Method::DELETE => &mut pathitem.delete, + http::Method::OPTIONS => &mut pathitem.options, + http::Method::HEAD => &mut pathitem.head, + http::Method::PATCH => &mut pathitem.patch, + http::Method::TRACE => &mut pathitem.trace, other => panic!("unexpected method `{}`", other), }; let mut operation = openapiv3::Operation::default(); diff --git a/dropshot/src/router.rs b/dropshot/src/router.rs index ed3af536d..1a39b8ab5 100644 --- a/dropshot/src/router.rs +++ b/dropshot/src/router.rs @@ -14,6 +14,7 @@ use http::StatusCode; use percent_encoding::percent_decode_str; use std::collections::BTreeMap; use std::collections::BTreeSet; +use std::collections::HashMap; use std::sync::Arc; /// `HttpRouter` is a simple data structure for routing incoming HTTP requests to @@ -81,7 +82,7 @@ pub struct HttpRouter { #[derive(Debug)] struct HttpRouterNode { /// Handlers, etc. for each of the HTTP methods defined for this node. - methods: BTreeMap>, + methods: HashMap>, /// Edges linking to child nodes. edges: Option>, } @@ -217,7 +218,7 @@ pub struct RouterLookupResult { impl HttpRouterNode { pub fn new() -> Self { - HttpRouterNode { methods: BTreeMap::new(), edges: None } + HttpRouterNode { methods: HashMap::new(), edges: None } } } @@ -385,8 +386,7 @@ impl HttpRouter { }; } - let methodname = method.as_str().to_uppercase(); - if node.methods.contains_key(&methodname) { + if node.methods.contains_key(&method) { panic!( "URI path \"{}\": attempted to create duplicate route for \ method \"{}\"", @@ -394,7 +394,7 @@ impl HttpRouter { ); } - node.methods.insert(methodname, endpoint); + node.methods.insert(method, endpoint); } /// Look up the route handler for an HTTP request having method `method` and @@ -478,9 +478,8 @@ impl HttpRouter { )); } - let methodname = method.as_str().to_uppercase(); node.methods - .get(&methodname) + .get(&method) .map(|handler| RouterLookupResult { handler: Arc::clone(&handler.handler), operation_id: handler.operation_id.clone(), @@ -512,7 +511,7 @@ fn insert_var( } impl<'a, Context: ServerContext> IntoIterator for &'a HttpRouter { - type Item = (String, String, &'a ApiEndpoint); + type Item = (String, Method, &'a ApiEndpoint); type IntoIter = HttpRouterIter<'a, Context>; fn into_iter(self) -> Self::IntoIter { HttpRouterIter::new(self) @@ -529,7 +528,7 @@ impl<'a, Context: ServerContext> IntoIterator for &'a HttpRouter { /// blank string and an iterator over the root node's children. pub struct HttpRouterIter<'a, Context: ServerContext> { method: - Box)> + 'a>, + Box)> + 'a>, path: Vec<(PathSegment, Box>)>, } type PathIter<'a, Context> = @@ -592,7 +591,7 @@ impl<'a, Context: ServerContext> HttpRouterIter<'a, Context> { } impl<'a, Context: ServerContext> Iterator for HttpRouterIter<'a, Context> { - type Item = (String, String, &'a ApiEndpoint); + type Item = (String, Method, &'a ApiEndpoint); fn next(&mut self) -> Option { // If there are no path components left then we've reached the end of @@ -1309,10 +1308,10 @@ mod test { assert_eq!( ret, vec![ - ("/".to_string(), "GET".to_string(),), + ("/".to_string(), http::Method::GET,), ( "/projects/{project_id}/instances".to_string(), - "GET".to_string(), + http::Method::GET, ), ] ); @@ -1335,8 +1334,8 @@ mod test { assert_eq!( ret, vec![ - ("/".to_string(), "GET".to_string(),), - ("/".to_string(), "POST".to_string(),), + ("/".to_string(), http::Method::GET,), + ("/".to_string(), http::Method::POST), ] ); } diff --git a/dropshot/src/server.rs b/dropshot/src/server.rs index ab6135bbf..ce1188ee3 100644 --- a/dropshot/src/server.rs +++ b/dropshot/src/server.rs @@ -206,7 +206,7 @@ impl HttpServerStarter { for (path, method, _) in &app_state.router { debug!(&log, "registered endpoint"; - "method" => &method, + "method" => %method, "path" => &path ); } From bfadc03a42fe383014a528b6dc0b0f2eb13882c1 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Thu, 7 Nov 2024 09:46:08 -0800 Subject: [PATCH 2/2] router: Reduce string copies in path traversal Presently, the router's `input_path_to_segments` function segments the whole input path, copying each each segment from the input path into a new owned `String`, and returns a `Vec`. This `Vec` is then immediately iterated over by reference, and each `&String` segment is then `to_string()`ed into a _second_ owned `String` that's used when returning the list of path variables. Path segments which are not returned (i.e., literals) are still copied into a new `String` which is used only to look up that segment in the current node's map of edges, which doesn't require an owned `String` to perform. This all feels a bit unfortunate, as we are allocating path segments a bunch of times more than we need to. Ideally, we would iterate over slices borrowed from the `InputPath`, and allocate `String`s only when necessary in order to return a path variable's value in the lookup result, or when we percent-decode something. This commit makes that change. Now, `input_path_to_segments` returns an `Iterator, InputPathError>>`, which borrows the segment from the path unless it was necessary to allocate a string to percent-decode the segment. This does mean that we return an `Iterator` of `Result`s rather than a `Result` and thus have to handle a potential error every time we get the next segment, but I don't think that's terrible --- it's kind of the inherent cost of doing path segmentation lazily. This also means that if we do encounter an error, or if the path is determined not to route to an endpoint, we don't continue wasting time segmenting the rest of the path; in the present implementation, we always segment the whole path up front before traversing it. I haven't done any real performance analysis of this, but I imagine the lazy approach is more efficient; it certainly *feels* nicer. It's also less vulnerable to issues where we spend a bunch of time segmenting a really long path that will never route to anything. --- dropshot/src/router.rs | 70 +++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 28 deletions(-) diff --git a/dropshot/src/router.rs b/dropshot/src/router.rs index 1a39b8ab5..5dc273553 100644 --- a/dropshot/src/router.rs +++ b/dropshot/src/router.rs @@ -12,6 +12,7 @@ use crate::ApiEndpointBodyContentType; use http::Method; use http::StatusCode; use percent_encoding::percent_decode_str; +use std::borrow::Cow; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::collections::HashMap; @@ -408,36 +409,41 @@ impl HttpRouter { method: &Method, path: InputPath<'_>, ) -> Result, HttpError> { - let all_segments = input_path_to_segments(&path).map_err(|_| { - HttpError::for_bad_request( - None, - String::from("invalid path encoding"), - ) - })?; - let mut all_segments = all_segments.into_iter(); + let mut all_segments = input_path_to_segments(&path); let mut node = &self.root; let mut variables = VariableSet::new(); - while let Some(segment) = all_segments.next() { - let segment_string = segment.to_string(); + while let Some(maybe_segment) = all_segments.next() { + let segment = maybe_segment.map_err(|e| { + HttpError::for_bad_request( + None, + format!("invalid path encoding: {e}"), + ) + })?; node = match &node.edges { None => None, Some(HttpRouterEdges::Literals(edges)) => { - edges.get(&segment_string) + edges.get(segment.as_ref()) } Some(HttpRouterEdges::VariableSingle(varname, ref node)) => { variables.insert( varname.clone(), - VariableValue::String(segment_string), + VariableValue::String(segment.into_owned()), ); Some(node) } Some(HttpRouterEdges::VariableRest(varname, node)) => { - let mut rest = vec![segment]; - while let Some(segment) = all_segments.next() { - rest.push(segment); + let mut rest = vec![segment.into_owned()]; + while let Some(maybe_segment) = all_segments.next() { + let segment = maybe_segment.map_err(|e| { + HttpError::for_bad_request( + None, + format!("invalid path encoding: {e}"), + ) + })?; + rest.push(segment.into_owned()); } variables.insert( varname.clone(), @@ -629,6 +635,14 @@ impl<'a, Context: ServerContext> Iterator for HttpRouterIter<'a, Context> { } } +#[derive(Debug, thiserror::Error)] +enum InputPathError { + #[error(transparent)] + PercentDecode(#[from] std::str::Utf8Error), + #[error("dot-segments are not permitted")] + DotSegment, +} + /// Helper function for taking a Uri path and producing a `Vec` of /// URL-decoded strings, each representing one segment of the path. The input is /// percent-encoded. Empty segments i.e. due to consecutive "/" characters or a @@ -652,7 +666,9 @@ impl<'a, Context: ServerContext> Iterator for HttpRouterIter<'a, Context> { /// that consumers may be susceptible to other information leaks, for example /// if a client were able to follow a symlink to the root of the filesystem. As /// always, it is incumbent on the consumer and *critical* to validate input. -fn input_path_to_segments(path: &InputPath) -> Result, String> { +fn input_path_to_segments<'path>( + path: &'path InputPath, +) -> impl Iterator, InputPathError>> + 'path { // We're given the "path" portion of a URI and we want to construct an // array of the segments of the path. Relevant references: // @@ -681,17 +697,12 @@ fn input_path_to_segments(path: &InputPath) -> Result, String> { // should be ignored). The net result is that that crate doesn't buy us // much here, but it does create more work, so we'll just split it // ourselves. - path.0 - .split('/') - .filter(|segment| !segment.is_empty()) - .map(|segment| match segment { - "." | ".." => Err("dot-segments are not permitted".to_string()), - _ => Ok(percent_decode_str(segment) - .decode_utf8() - .map_err(|e| e.to_string())? - .to_string()), - }) - .collect() + path.0.split('/').filter(|segment| !segment.is_empty()).map(|segment| { + match segment { + "." | ".." => Err(InputPathError::DotSegment), + _ => Ok(percent_decode_str(segment).decode_utf8()?), + } + }) } /// Whereas in `input_path_to_segments()` we must accommodate any user input, when @@ -728,6 +739,7 @@ mod test { use super::super::handler::RouteHandler; use super::input_path_to_segments; use super::HttpRouter; + use super::InputPathError; use super::PathSegment; use crate::api_description::ApiEndpointBodyContentType; use crate::from_map::from_map; @@ -1342,8 +1354,10 @@ mod test { #[test] fn test_segments() { - let segs = - input_path_to_segments(&"//foo/bar/baz%2fbuzz".into()).unwrap(); + let segs = input_path_to_segments(&"//foo/bar/baz%2fbuzz".into()) + .map(|seg| Ok::(seg?.into_owned())) + .collect::, _>>() + .unwrap(); assert_eq!(segs, vec!["foo", "bar", "baz/buzz"]); }