diff --git a/compiler/rustc_lexer/src/cursor.rs b/compiler/rustc_lexer/src/cursor.rs index 165262b82c75d..93352824696b5 100644 --- a/compiler/rustc_lexer/src/cursor.rs +++ b/compiler/rustc_lexer/src/cursor.rs @@ -102,6 +102,28 @@ impl<'a> Cursor<'a> { Some(c) } + pub(crate) fn bump_if(&mut self, byte: char) -> bool { + let mut chars = self.chars.clone(); + if chars.next() == Some(byte) { + self.chars = chars; + true + } else { + false + } + } + + /// Bumps the cursor if the next character is either of the two expected characters. + pub(crate) fn bump_if_either(&mut self, byte1: char, byte2: char) -> bool { + let mut chars = self.chars.clone(); + if let Some(c) = chars.next() + && (c == byte1 || c == byte2) + { + self.chars = chars; + return true; + } + false + } + /// Moves to a substring by a number of bytes. pub(crate) fn bump_bytes(&mut self, n: usize) { self.chars = self.as_str()[n..].chars(); @@ -115,11 +137,35 @@ impl<'a> Cursor<'a> { self.bump(); } } + /// Eats characters until the given byte is found. + /// Returns true if the byte was found, false if end of file was reached. + pub(crate) fn eat_until(&mut self, byte: u8) -> bool { + match memchr::memchr(byte, self.as_str().as_bytes()) { + Some(index) => { + self.bump_bytes(index); + true + } + None => { + self.chars = "".chars(); + false + } + } + } - pub(crate) fn eat_until(&mut self, byte: u8) { - self.chars = match memchr::memchr(byte, self.as_str().as_bytes()) { - Some(index) => self.as_str()[index..].chars(), - None => "".chars(), + /// Eats characters until any of the given bytes is found, then consumes past it. + /// Returns the found byte if any, or None if end of file was reached. + pub(crate) fn eat_past_either(&mut self, byte1: u8, byte2: u8) -> Option { + let bytes = self.as_str().as_bytes(); + match memchr::memchr2(byte1, byte2, bytes) { + Some(index) => { + let found = bytes[index]; + self.bump_bytes(index + 1); + Some(found) + } + None => { + self.chars = "".chars(); + None + } } } } diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index f6790f7ed1e96..ff39b95772c6a 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -563,7 +563,6 @@ impl Cursor<'_> { self.eat_while(|ch| ch != '\n' && is_horizontal_whitespace(ch)); let invalid_infostring = self.first() != '\n'; - let mut found = false; let nl_fence_pattern = format!("\n{:-<1$}", "", length_opening as usize); if let Some(closing) = self.as_str().find(&nl_fence_pattern) { // candidate found @@ -576,10 +575,7 @@ impl Cursor<'_> { // ---- // combine those stuff into this frontmatter token such that it gets detected later. self.eat_until(b'\n'); - found = true; - } - - if !found { + } else { // recovery strategy: a closing statement might have preceding whitespace/newline // but not have enough dashes to properly close. In this case, we eat until there, // and report a mismatch in the parser. @@ -656,23 +652,25 @@ impl Cursor<'_> { }; let mut depth = 1usize; - while let Some(c) = self.bump() { + while let Some(c) = self.eat_past_either(b'/', b'*') { match c { - '/' if self.first() == '*' => { - self.bump(); - depth += 1; + b'/' => { + if self.bump_if('*') { + depth += 1; + } } - '*' if self.first() == '/' => { - self.bump(); - depth -= 1; - if depth == 0 { - // This block comment is closed, so for a construction like "/* */ */" - // there will be a successfully parsed block comment "/* */" - // and " */" will be processed separately. - break; + b'*' => { + if self.bump_if('/') { + depth -= 1; + if depth == 0 { + // This block comment is closed, so for a construction like "/* */ */" + // there will be a successfully parsed block comment "/* */" + // and " */" will be processed separately. + break; + } } } - _ => (), + _ => unreachable!(), } } @@ -935,19 +933,15 @@ impl Cursor<'_> { /// if string is terminated. fn double_quoted_string(&mut self) -> bool { debug_assert!(self.prev() == '"'); - while let Some(c) = self.bump() { + while let Some(c) = self.eat_past_either(b'"', b'\\') { match c { - '"' => { + b'"' => { return true; } - '\\' if self.first() == '\\' || self.first() == '"' => { - // Bump again to skip escaped character. - self.bump(); - } - _ => (), + b'\\' => _ = self.bump_if_either('\\', '"'), + _ => unreachable!(), } } - // End of file reached. false } @@ -963,9 +957,8 @@ impl Cursor<'_> { debug_assert!(self.prev() != '#'); let mut n_start_hashes: u32 = 0; - while self.first() == '#' { + while self.bump_if('#') { n_start_hashes += 1; - self.bump(); } if self.first() != '"' { @@ -1025,9 +1018,8 @@ impl Cursor<'_> { // Count opening '#' symbols. let mut eaten = 0; - while self.first() == '#' { + while self.bump_if('#') { eaten += 1; - self.bump(); } let n_start_hashes = eaten; @@ -1043,9 +1035,7 @@ impl Cursor<'_> { // Skip the string contents and on each '#' character met, check if this is // a raw string termination. loop { - self.eat_until(b'"'); - - if self.is_eof() { + if !self.eat_until(b'"') { return Err(RawStrError::NoTerminator { expected: n_start_hashes, found: max_hashes, @@ -1117,9 +1107,7 @@ impl Cursor<'_> { /// and returns false otherwise. fn eat_float_exponent(&mut self) -> bool { debug_assert!(self.prev() == 'e' || self.prev() == 'E'); - if self.first() == '-' || self.first() == '+' { - self.bump(); - } + self.bump_if_either('-', '+'); self.eat_decimal_digits() }