Skip to content

Commit add7c05

Browse files
author
Avi SZYCHTER
committed
Added parseNumber to have a unique way ofparsing numbers + use of all the great new Token functions
1 parent ca242d5 commit add7c05

File tree

2 files changed

+53
-29
lines changed

2 files changed

+53
-29
lines changed

include/Tokenizer.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ class Tokenizer {
2525
char getNextChar();
2626
char getCurrentChar() const;
2727

28+
std::string parseNumber(bool& is_float);
29+
2830
private:
2931
virtual char doGetNextChar() = 0;
3032

src/parsing/Tokenizer.cpp

Lines changed: 51 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -150,24 +150,22 @@ Token Tokenizer::getNextToken() {
150150
currentToken = Token(Token::Eof, "");
151151
}
152152
else if (isSeparator(currentChar)) {
153-
currentToken = Token(Token::Separator, std::string(1, currentChar));
153+
currentToken = Token::createSeparator(currentChar);
154154
currentChar = getNextChar();
155155
}
156156
else if (currentChar == '+') {
157-
currentToken = Token(Token::ArithmeticSign, std::string(1, currentChar));
157+
currentToken = Token::createArithmeticSign(currentChar);
158158
currentChar = getNextChar();
159159
}
160160
else if (currentChar == '-') {
161161
currentChar = getNextChar();
162162
if (!isDigit(currentChar))
163-
currentToken = Token(Token::ArithmeticSign, "-");
163+
currentToken = Token::createArithmeticSign('-');
164164
else { // Negative number
165-
std::string literal = "-";
166-
while (isDigit(currentChar) || currentChar == '.') {
167-
literal += currentChar;
168-
currentChar = getNextChar();
169-
}
170-
currentToken = Token(Token::Number, literal);
165+
bool is_float;
166+
std::string literal = "-" + parseNumber(is_float);
167+
168+
currentToken = Token::createNumber(literal, false, is_float);
171169
}
172170
}
173171
else if (currentChar == '\"') {
@@ -190,20 +188,10 @@ Token Tokenizer::getNextToken() {
190188
currentToken = Token(Token::StringLiteral, literal);
191189
}
192190
else if (isDigit(currentChar)) {
193-
std::string literal = std::string(1, currentChar);
194-
currentChar = getNextChar();
191+
bool is_float;
192+
std::string literal = parseNumber(is_float);
195193

196-
while (isDigit(currentChar) || currentChar == '.' || currentChar == 'e') {
197-
literal += currentChar;
198-
199-
if (currentChar == 'e') {
200-
literal += getNextChar();
201-
}
202-
203-
currentChar = getNextChar();
204-
}
205-
206-
currentToken = Token(Token::Number, literal);
194+
currentToken = Token::createNumber(literal, true, is_float);
207195
}
208196
else if (isIdentifierStart(currentChar)) {
209197
std::string identifier = std::string(1, currentChar);
@@ -217,12 +205,9 @@ Token Tokenizer::getNextToken() {
217205
currentToken = Token(Token::Identifier, identifier);
218206
}
219207
else {
220-
throw CANDatabaseException(
221-
"Invalid character \"" +
222-
std::string(1, currentChar) +
223-
"\" encountered at line " +
224-
std::to_string(lineCount())
225-
);
208+
std::string exceptStr = "Invalid character \"" + std::string(1, currentChar) +
209+
"\" encountered at line " + std::to_string(lineCount());
210+
throw CANDatabaseException(exceptStr);
226211
}
227212

228213
// std::cout << "Token: " << currentToken.image() << std::endl;
@@ -243,6 +228,43 @@ void Tokenizer::skipLine() {
243228
}
244229
}
245230

231+
std::string Tokenizer::parseNumber(bool& is_float) {
232+
std::string result(1, getCurrentChar());
233+
234+
char currentChar = getNextChar();
235+
is_float = false;
236+
237+
while (isDigit(currentChar) && !isEOF(currentChar)) {
238+
result += currentChar;
239+
currentChar = getNextChar();
240+
241+
if(currentChar == '.') {
242+
is_float = true;
243+
result += currentChar;
244+
currentChar = getNextChar();
245+
}
246+
else if(currentChar == 'e') {
247+
result += currentChar;
248+
currentChar = getNextChar();
249+
250+
// Plus "in the wild" are not considered to be part of a number
251+
// They are only allowed after "e" (eg. 3e+002)
252+
if(currentChar == '+') {
253+
result += currentChar;
254+
currentChar = getNextChar();
255+
}
256+
// Negative exposants always represent floating-point numbers
257+
else if(currentChar == '-') {
258+
result += currentChar;
259+
is_float = true;
260+
currentChar == getNextChar();
261+
}
262+
}
263+
}
264+
265+
return result;
266+
}
267+
246268
void Tokenizer::skipUntil(const std::string& token) {
247269
unsigned long long initLine = lineCount();
248270

@@ -251,7 +273,7 @@ void Tokenizer::skipUntil(const std::string& token) {
251273
getNextToken();
252274
}
253275

254-
if(currentToken.type() == Token::Eof) {
276+
if(currentToken.type == Token::Eof) {
255277
throw CANDatabaseException(
256278
"Error: due to an unrecognized (and badly formed) command at line " +
257279
std::to_string(initLine) +

0 commit comments

Comments
 (0)