@@ -52,8 +52,15 @@ pub(crate) fn parse_token_trees<'a>(
5252 }
5353
5454 let cursor = Cursor::new(src);
55- let string_reader =
56- StringReader { sess, start_pos, pos: start_pos, src, cursor, override_span };
55+ let string_reader = StringReader {
56+ sess,
57+ start_pos,
58+ pos: start_pos,
59+ src,
60+ cursor,
61+ override_span,
62+ nbsp_is_whitespace: false,
63+ };
5764 tokentrees::TokenTreesReader::parse_all_token_trees(string_reader)
5865}
5966
@@ -68,6 +75,10 @@ struct StringReader<'a> {
6875 /// Cursor for getting lexer tokens.
6976 cursor: Cursor<'a>,
7077 override_span: Option<Span>,
78+ /// When a "unknown start of token: \u{a0}" has already been emitted earlier
79+ /// in this file, it's safe to treat further occurrences of the non-breaking
80+ /// space character as whitespace.
81+ nbsp_is_whitespace: bool,
7182}
7283
7384impl<'a> StringReader<'a> {
@@ -239,6 +250,16 @@ impl<'a> StringReader<'a> {
239250 }
240251 let mut it = self.str_from_to_end(start).chars();
241252 let c = it.next().unwrap();
253+ if c == '\u{00a0}' {
254+ // If an error has already been reported on non-breaking
255+ // space characters earlier in the file, treat all
256+ // subsequent occurrences as whitespace.
257+ if self.nbsp_is_whitespace {
258+ preceded_by_whitespace = true;
259+ continue;
260+ }
261+ self.nbsp_is_whitespace = true;
262+ }
242263 let repeats = it.take_while(|c1| *c1 == c).count();
243264 let mut err =
244265 self.struct_err_span_char(start, self.pos + Pos::from_usize(repeats * c.len_utf8()), "unknown start of token", c);
@@ -486,7 +507,7 @@ impl<'a> StringReader<'a> {
486507
487508 /// Slice of the source text from `start` up to but excluding `self.pos`,
488509 /// meaning the slice does not include the character `self.ch`.
489- fn str_from(&self, start: BytePos) -> &str {
510+ fn str_from(&self, start: BytePos) -> &'a str {
490511 self.str_from_to(start, self.pos)
491512 }
492513
@@ -497,12 +518,12 @@ impl<'a> StringReader<'a> {
497518 }
498519
499520 /// Slice of the source text spanning from `start` up to but excluding `end`.
500- fn str_from_to(&self, start: BytePos, end: BytePos) -> &str {
521+ fn str_from_to(&self, start: BytePos, end: BytePos) -> &'a str {
501522 &self.src[self.src_index(start)..self.src_index(end)]
502523 }
503524
504525 /// Slice of the source text spanning from `start` until the end
505- fn str_from_to_end(&self, start: BytePos) -> &str {
526+ fn str_from_to_end(&self, start: BytePos) -> &'a str {
506527 &self.src[self.src_index(start)..]
507528 }
508529
0 commit comments