From c8167e7442fb1abf76dbbfe4c10fceb8aaea264f Mon Sep 17 00:00:00 2001 From: w15dev Date: Mon, 7 Jul 2025 01:48:59 +0300 Subject: [PATCH] Add supporting utf-8 symbols --- calculator/src/math_expression_parser/mod.rs | 2 +- .../src/math_expression_tokenizer/mod.rs | 101 +++++++++--------- 2 files changed, 52 insertions(+), 51 deletions(-) diff --git a/calculator/src/math_expression_parser/mod.rs b/calculator/src/math_expression_parser/mod.rs index 2e5fd39..9b37050 100644 --- a/calculator/src/math_expression_parser/mod.rs +++ b/calculator/src/math_expression_parser/mod.rs @@ -30,7 +30,7 @@ impl MathExpressionParser { let mut parsed_expression = MathExpression { expression: vec![] }; let mut braces = vec![]; - while let Ok((token, idx)) = tokenizer.next_token() { + while let Ok((idx, token)) = tokenizer.next_token() { match token { Token::OpenBrace => { braces.push(idx); diff --git a/calculator/src/math_expression_tokenizer/mod.rs b/calculator/src/math_expression_tokenizer/mod.rs index af2a83e..ab90283 100644 --- a/calculator/src/math_expression_tokenizer/mod.rs +++ b/calculator/src/math_expression_tokenizer/mod.rs @@ -20,12 +20,12 @@ pub enum MathExpressionTokenizerError { pub struct MathExpressionTokenizer { expr: String, - curr_idx: usize, + curr_byte_idx: usize, } pub trait TokenizerTraits { fn has_token(&self) -> bool; - fn next_token(&mut self) -> Result<(Token, usize), MathExpressionTokenizerError>; + fn next_token(&mut self) -> Result<(usize, Token), MathExpressionTokenizerError>; fn curr_index(&self) -> usize; } @@ -36,52 +36,55 @@ impl TokenizerTraits for MathExpressionTokenizer { } fn curr_index(&self) -> usize { - self.curr_idx + self.curr_byte_idx } - fn next_token(&mut self) -> Result<(Token, usize), MathExpressionTokenizerError> { + fn next_token(&mut self) -> Result<(usize, Token), MathExpressionTokenizerError> { if !self.has_token() { return Err(MathExpressionTokenizerError::NoToken); } - self.curr_idx = self.skip_spaces(); - - match self.expr.chars().nth(self.curr_idx).unwrap() { - '(' => { - self.curr_idx += 1; - Ok((Token::OpenBrace, self.curr_idx - 1)) - } - ')' => { - self.curr_idx += 1; - Ok((Token::CloseBrace, self.curr_idx - 1)) - } - op @ ('+' | '-' | '*' | '/') => { - self.curr_idx += 1; - Ok((Token::Operator(op), self.curr_idx - 1)) - } + self.curr_byte_idx = self.skip_spaces(); + let old_value = self.curr_byte_idx; + + match self.expr[self.curr_byte_idx..].chars().next().unwrap() { + '(' => Ok(( + std::mem::replace(&mut self.curr_byte_idx, old_value + 1), + Token::OpenBrace, + )), + ')' => Ok(( + std::mem::replace(&mut self.curr_byte_idx, old_value + 1), + Token::CloseBrace, + )), + op @ ('+' | '-' | '*' | '/') => Ok(( + std::mem::replace(&mut self.curr_byte_idx, old_value + 1), + Token::Operator(op), + )), _ => { - let (digit, mut idx) = self.parse_digits()?; - std::mem::swap(&mut self.curr_idx, &mut idx); - Ok((Token::Digit(digit), idx)) + let (digit, idx) = self.parse_digits()?; + Ok(( + std::mem::replace(&mut self.curr_byte_idx, idx), + Token::Digit(digit), + )) } } } } impl MathExpressionTokenizer { - pub fn new(math_expr: String) -> Result { - if math_expr.is_empty() { + pub fn new(expr: String) -> Result { + if expr.is_empty() { return Err(MathExpressionTokenizerError::InvalidArgument); } Ok(Self { - expr: math_expr, - curr_idx: 0, + expr, + curr_byte_idx: 0, }) } fn parse_digits(&self) -> Result<(f64, usize), MathExpressionTokenizerError> { - let s = &self.expr[self.curr_idx..]; + let s = &self.expr[self.curr_byte_idx..]; let offset = s .char_indices() @@ -90,23 +93,24 @@ impl MathExpressionTokenizer { .unwrap_or(s.len()); match s[..offset].parse::() { - Ok(number) => Ok((number, self.curr_idx + offset)), + Ok(number) => Ok((number, self.curr_byte_idx + offset)), Err(_) => Err(MathExpressionTokenizerError::InvalidToken { - idx: self.curr_idx, + idx: self.curr_byte_idx, ch: s.chars().nth(0).unwrap(), }), } } fn skip_spaces(&self) -> usize { - self.expr[self.curr_idx..] + self.expr[self.curr_byte_idx..] .char_indices() - .position(|(_, char)| !char.is_whitespace()) - .map(|idx| self.curr_idx + idx) + .find(|(_, char)| !char.is_whitespace()) + .map(|(idx, _)| self.curr_byte_idx + idx) .unwrap_or(self.expr.len()) } } +// Unit tests #[cfg(test)] mod tests { use super::*; @@ -121,7 +125,7 @@ mod tests { fn test_zero_number_tokens() { let mut tokenizer = MathExpressionTokenizer::new("0".to_string()).unwrap(); assert!(tokenizer.has_token()); - let (token, idx) = tokenizer.next_token().unwrap(); + let (idx, token) = tokenizer.next_token().unwrap(); assert_eq!(idx, 0); if let Token::Digit(number) = token { @@ -132,12 +136,12 @@ mod tests { let mut tokenizer = MathExpressionTokenizer::new("-0".to_string()).unwrap(); assert!(tokenizer.has_token()); - let (token, idx) = tokenizer.next_token().unwrap(); + let (idx, token) = tokenizer.next_token().unwrap(); assert_eq!(idx, 0); assert!(matches!(token, Token::Operator('-'))); assert!(tokenizer.has_token()); - let (token, idx) = tokenizer.next_token().unwrap(); + let (idx, token) = tokenizer.next_token().unwrap(); assert_eq!(idx, 1); if let Token::Digit(number) = token { @@ -152,7 +156,7 @@ mod tests { fn test_valid_positive_number_tokens(n in any::().prop_filter("Positive numbers", |&x| x > 0.0)) { let mut tokenizer = MathExpressionTokenizer::new(format!("{}", n)).unwrap(); assert!(tokenizer.has_token()); - let (token, idx) = tokenizer.next_token().unwrap(); + let (idx, token) = tokenizer.next_token().unwrap(); assert_eq!(idx, 0); if let Token::Digit(number) = token @@ -168,12 +172,12 @@ mod tests { fn test_valid_negative_number_tokens(n in any::().prop_filter("Positive numbers", |&x| x < 0.0)) { let mut tokenizer = MathExpressionTokenizer::new(format!("{}", n)).unwrap(); assert!(tokenizer.has_token()); - let (token, idx) = tokenizer.next_token().unwrap(); + let (idx, token) = tokenizer.next_token().unwrap(); assert_eq!(idx, 0); assert!(matches!(token, Token::Operator('-'))); assert!(tokenizer.has_token()); - let (token, idx) = tokenizer.next_token().unwrap(); + let (idx, token) = tokenizer.next_token().unwrap(); assert_eq!(idx, 1); if let Token::Digit(number) = token @@ -186,12 +190,12 @@ mod tests { } #[test] - fn test_valid_operator_tokens(s in r"[+\-*/ ]{1,50}".prop_filter("no leading space", |s| !s.starts_with(char::is_whitespace))) { + fn test_valid_operator_tokens(s in r"[+\-*/\s]{1,50}".prop_filter("no leading space", |s| !s.starts_with(char::is_whitespace))) { let mut tokenizer = MathExpressionTokenizer::new(s.clone()).unwrap(); assert!(tokenizer.has_token()); - while let Ok((token, idx)) = tokenizer.next_token() { - let op = s.chars().nth(idx).unwrap(); + while let Ok((idx, token)) = tokenizer.next_token() { + let op = s[idx..].chars().next().unwrap(); assert_eq!(token, Token::Operator(op)); } @@ -199,12 +203,12 @@ mod tests { } #[test] - fn test_braces_tokens(s in r"[() ]{1,50}".prop_filter("no leading space", |s| !s.starts_with(char::is_whitespace))) { + fn test_braces_tokens(s in r"[()\s]{1,50}".prop_filter("no leading space", |s| !s.starts_with(char::is_whitespace))) { let mut tokenizer = MathExpressionTokenizer::new(s.clone()).unwrap(); assert!(tokenizer.has_token()); - while let Ok((token, idx)) = tokenizer.next_token() { - let op = s.chars().nth(idx).unwrap(); + while let Ok((idx, token)) = tokenizer.next_token() { + let op = s[idx..].chars().next().unwrap(); if op == '(' { assert_eq!(token, Token::OpenBrace); @@ -219,15 +223,12 @@ mod tests { } #[test] - fn test_valid_sequence_tokens(s in r"[0-9+\-*/() ]{1,10}".prop_filter("no leading space", |s| !s.starts_with(char::is_whitespace))) { + fn test_valid_sequence_tokens(s in r"[0-9+\-*/()\s]{1,10}".prop_filter("no leading space", |s| !s.starts_with(char::is_whitespace))) { let mut tokenizer = MathExpressionTokenizer::new(s.clone()).unwrap(); assert!(tokenizer.has_token()); - println!("Ch:{}",s); - - while let Ok((token, idx)) = tokenizer.next_token() { - let ch = s.chars().nth(idx).unwrap(); - println!("Ch: {} - {}", ch, s); + while let Ok((idx, token)) = tokenizer.next_token() { + let ch = s[idx..].chars().next().unwrap(); match token { Token::OpenBrace => { assert_eq!(ch, '(');