Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 50 additions & 4 deletions compiler/rustc_lexer/src/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,28 @@ impl<'a> Cursor<'a> {
Some(c)
}

pub(crate) fn bump_if(&mut self, byte: char) -> bool {
let mut chars = self.chars.clone();
if chars.next() == Some(byte) {
self.chars = chars;
true
} else {
false
}
}

/// Bumps the cursor if the next character is either of the two expected characters.
pub(crate) fn bump_if_either(&mut self, byte1: char, byte2: char) -> bool {
let mut chars = self.chars.clone();
if let Some(c) = chars.next()
&& (c == byte1 || c == byte2)
{
self.chars = chars;
return true;
}
false
}

/// Moves to a substring by a number of bytes.
pub(crate) fn bump_bytes(&mut self, n: usize) {
self.chars = self.as_str()[n..].chars();
Expand All @@ -115,11 +137,35 @@ impl<'a> Cursor<'a> {
self.bump();
}
}
/// Eats characters until the given byte is found.
/// Returns true if the byte was found, false if end of file was reached.
pub(crate) fn eat_until(&mut self, byte: u8) -> bool {
match memchr::memchr(byte, self.as_str().as_bytes()) {
Some(index) => {
self.bump_bytes(index);
true
}
None => {
self.chars = "".chars();
false
}
}
}

pub(crate) fn eat_until(&mut self, byte: u8) {
self.chars = match memchr::memchr(byte, self.as_str().as_bytes()) {
Some(index) => self.as_str()[index..].chars(),
None => "".chars(),
/// Eats characters until any of the given bytes is found, then consumes past it.
/// Returns the found byte if any, or None if end of file was reached.
pub(crate) fn eat_past_either(&mut self, byte1: u8, byte2: u8) -> Option<u8> {
let bytes = self.as_str().as_bytes();
match memchr::memchr2(byte1, byte2, bytes) {
Some(index) => {
let found = bytes[index];
self.bump_bytes(index + 1);
Some(found)
}
None => {
self.chars = "".chars();
None
}
}
}
}
60 changes: 24 additions & 36 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,6 @@ impl Cursor<'_> {
self.eat_while(|ch| ch != '\n' && is_horizontal_whitespace(ch));
let invalid_infostring = self.first() != '\n';

let mut found = false;
let nl_fence_pattern = format!("\n{:-<1$}", "", length_opening as usize);
if let Some(closing) = self.as_str().find(&nl_fence_pattern) {
// candidate found
Expand All @@ -576,10 +575,7 @@ impl Cursor<'_> {
// ----
// combine those stuff into this frontmatter token such that it gets detected later.
self.eat_until(b'\n');
found = true;
}

if !found {
} else {
// recovery strategy: a closing statement might have preceding whitespace/newline
// but not have enough dashes to properly close. In this case, we eat until there,
// and report a mismatch in the parser.
Expand Down Expand Up @@ -656,23 +652,25 @@ impl Cursor<'_> {
};

let mut depth = 1usize;
while let Some(c) = self.bump() {
while let Some(c) = self.eat_past_either(b'/', b'*') {
match c {
'/' if self.first() == '*' => {
self.bump();
depth += 1;
b'/' => {
if self.bump_if('*') {
depth += 1;
}
}
'*' if self.first() == '/' => {
self.bump();
depth -= 1;
if depth == 0 {
// This block comment is closed, so for a construction like "/* */ */"
// there will be a successfully parsed block comment "/* */"
// and " */" will be processed separately.
break;
b'*' => {
if self.bump_if('/') {
depth -= 1;
if depth == 0 {
// This block comment is closed, so for a construction like "/* */ */"
// there will be a successfully parsed block comment "/* */"
// and " */" will be processed separately.
break;
}
}
}
_ => (),
_ => unreachable!(),
}
}

Expand Down Expand Up @@ -935,19 +933,15 @@ impl Cursor<'_> {
/// if string is terminated.
fn double_quoted_string(&mut self) -> bool {
debug_assert!(self.prev() == '"');
while let Some(c) = self.bump() {
while let Some(c) = self.eat_past_either(b'"', b'\\') {
match c {
'"' => {
b'"' => {
return true;
}
'\\' if self.first() == '\\' || self.first() == '"' => {
// Bump again to skip escaped character.
self.bump();
}
_ => (),
b'\\' => _ = self.bump_if_either('\\', '"'),
_ => unreachable!(),
}
}
// End of file reached.
false
}

Expand All @@ -963,9 +957,8 @@ impl Cursor<'_> {
debug_assert!(self.prev() != '#');

let mut n_start_hashes: u32 = 0;
while self.first() == '#' {
while self.bump_if('#') {
n_start_hashes += 1;
self.bump();
}

if self.first() != '"' {
Expand Down Expand Up @@ -1025,9 +1018,8 @@ impl Cursor<'_> {

// Count opening '#' symbols.
let mut eaten = 0;
while self.first() == '#' {
while self.bump_if('#') {
eaten += 1;
self.bump();
}
let n_start_hashes = eaten;

Expand All @@ -1043,9 +1035,7 @@ impl Cursor<'_> {
// Skip the string contents and on each '#' character met, check if this is
// a raw string termination.
loop {
self.eat_until(b'"');

if self.is_eof() {
if !self.eat_until(b'"') {
return Err(RawStrError::NoTerminator {
expected: n_start_hashes,
found: max_hashes,
Expand Down Expand Up @@ -1117,9 +1107,7 @@ impl Cursor<'_> {
/// and returns false otherwise.
fn eat_float_exponent(&mut self) -> bool {
debug_assert!(self.prev() == 'e' || self.prev() == 'E');
if self.first() == '-' || self.first() == '+' {
self.bump();
}
self.bump_if_either('-', '+');
self.eat_decimal_digits()
}

Expand Down
Loading