Skip to content

Commit dff22b3

Browse files
committed
improved tokenizer
1 parent f086d29 commit dff22b3

File tree

3 files changed

+84
-96
lines changed

3 files changed

+84
-96
lines changed

src/main/java/com/igormaznitsa/prologparser/tokenizer/Tokenizer.java

Lines changed: 28 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,6 @@
2121

2222
package com.igormaznitsa.prologparser.tokenizer;
2323

24-
import static com.igormaznitsa.prologparser.tokenizer.TokenizerState.INTEGER;
25-
import static com.igormaznitsa.prologparser.tokenizer.TokenizerState.LOOK_FOR;
26-
import static com.igormaznitsa.prologparser.tokenizer.TokenizerState.STRING;
27-
import static com.igormaznitsa.prologparser.utils.StringUtils.isCharAllowedForUnquotedAtom;
28-
2924
import com.igormaznitsa.prologparser.ParserContext;
3025
import com.igormaznitsa.prologparser.exceptions.CriticalUnexpectedError;
3126
import com.igormaznitsa.prologparser.exceptions.PrologParserException;
@@ -42,6 +37,9 @@
4237
import java.io.IOException;
4338
import java.io.Reader;
4439

40+
import static com.igormaznitsa.prologparser.tokenizer.TokenizerState.*;
41+
import static com.igormaznitsa.prologparser.utils.StringUtils.isCharAllowedForUnquotedAtom;
42+
4543
/**
4644
* Internal tokenizer to gen next token from reader.
4745
*/
@@ -427,18 +425,14 @@ TokenizerResult readNextToken() {
427425
letterOrDigitOnly = Character.isLetterOrDigit(chr);
428426
final String operator = String.valueOf(chr);
429427

430-
if (Character.isLowerCase(chr)) {
431-
state = TokenizerState.ATOM;
428+
if (hasOperatorStartsWith(operator)) {
429+
lastFoundFullOperator = findOperatorForName(operator);
430+
state = TokenizerState.OPERATOR;
432431
} else {
433-
if (hasOperatorStartsWith(operator)) {
434-
lastFoundFullOperator = findOperatorForName(operator);
435-
state = TokenizerState.OPERATOR;
432+
if (Character.isDigit(chr)) {
433+
state = TokenizerState.INTEGER;
436434
} else {
437-
if (Character.isDigit(chr)) {
438-
state = TokenizerState.INTEGER;
439-
} else {
440-
state = TokenizerState.ATOM;
441-
}
435+
state = TokenizerState.ATOM;
442436
}
443437
}
444438
}
@@ -607,16 +601,27 @@ TokenizerResult readNextToken() {
607601
if (chr != '_' && letterOrDigitOnly != Character.isLetterOrDigit(chr)) {
608602
push(chr);
609603

610-
if (lastFoundFullOperator == null) {
611-
return this.tokenizerResultPool.find().setData(
612-
makeTermFromString(strBuffer.toString(), quoting, state),
613-
state,
614-
getLastTokenLine(),
615-
getLastTokenPos()
616-
);
604+
if (lastFoundFullOperator == null || letterOrDigitOnly) {
605+
final String textInBuffer = strBuffer.toString();
606+
607+
if (lastFoundFullOperator != null && lastFoundFullOperator.getTermText().equals(textInBuffer)) {
608+
return this.tokenizerResultPool.find().setData(
609+
lastFoundFullOperator,
610+
state,
611+
getLastTokenLine(),
612+
getLastTokenPos()
613+
);
614+
} else {
615+
return this.tokenizerResultPool.find().setData(
616+
makeTermFromString(textInBuffer, quoting, ATOM),
617+
ATOM,
618+
getLastTokenLine(),
619+
getLastTokenPos()
620+
);
621+
}
617622
} else {
618623
calcDiffAndPushResultBack(
619-
lastFoundFullOperator.getTermText(), strBuffer);
624+
lastFoundFullOperator.getTermText(), strBuffer);
620625
return this.tokenizerResultPool.find().setData(
621626
lastFoundFullOperator,
622627
state,

src/main/java/com/igormaznitsa/prologparser/tokenizer/TokenizerResult.java

Lines changed: 43 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -29,55 +29,55 @@
2929
* Internal object representing tokenizer result.
3030
*/
3131
final class TokenizerResult {
32-
private final SoftObjectPool<TokenizerResult> pool;
33-
private TokenizerState parserState;
34-
private PrologTerm resultTerm;
35-
private int pos;
36-
private int line;
32+
private final SoftObjectPool<TokenizerResult> pool;
33+
private TokenizerState parserState;
34+
private PrologTerm resultTerm;
35+
private int pos;
36+
private int line;
3737

38-
TokenizerResult(final SoftObjectPool<TokenizerResult> pool) {
39-
this.pool = pool;
40-
}
38+
TokenizerResult(final SoftObjectPool<TokenizerResult> pool) {
39+
this.pool = pool;
40+
}
4141

42-
TokenizerResult setData(
43-
final PrologTerm term,
44-
final TokenizerState parserState,
45-
final int line,
46-
final int pos
47-
) {
48-
this.resultTerm = AssertUtils.assertNotNull(term);
49-
this.parserState = AssertUtils.assertNotNull(parserState);
50-
this.pos = pos;
51-
this.line = line;
52-
return this;
53-
}
42+
TokenizerResult setData(
43+
final PrologTerm term,
44+
final TokenizerState parserState,
45+
final int line,
46+
final int pos
47+
) {
48+
this.resultTerm = AssertUtils.assertNotNull(term);
49+
this.parserState = AssertUtils.assertNotNull(parserState);
50+
this.pos = pos;
51+
this.line = line;
52+
return this;
53+
}
5454

55-
void release() {
56-
this.parserState = null;
57-
this.resultTerm = null;
58-
this.pos = -1;
59-
this.line = -1;
60-
this.pool.push(this);
61-
}
55+
void release() {
56+
this.parserState = null;
57+
this.resultTerm = null;
58+
this.pos = -1;
59+
this.line = -1;
60+
this.pool.push(this);
61+
}
6262

63-
TokenizerState getTokenizerState() {
64-
return this.parserState;
65-
}
63+
TokenizerState getTokenizerState() {
64+
return this.parserState;
65+
}
6666

67-
PrologTerm getResult() {
68-
return this.resultTerm;
69-
}
67+
PrologTerm getResult() {
68+
return this.resultTerm;
69+
}
7070

71-
int getPos() {
72-
return this.pos;
73-
}
71+
int getPos() {
72+
return this.pos;
73+
}
7474

75-
int getLine() {
76-
return this.line;
77-
}
75+
int getLine() {
76+
return this.line;
77+
}
7878

79-
@Override
80-
public String toString() {
81-
return "TokenizerResult(" + this.resultTerm + ')';
82-
}
79+
@Override
80+
public String toString() {
81+
return "TokenizerResult(" + this.resultTerm + ')';
82+
}
8383
}

src/test/java/com/igormaznitsa/prologparser/IntegrationTest.java

Lines changed: 13 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,5 @@
11
package com.igormaznitsa.prologparser;
22

3-
import static com.igormaznitsa.prologparser.DefaultParserContext.of;
4-
import static com.igormaznitsa.prologparser.ParserContext.FLAG_ALLOW_ZERO_STRUCT;
5-
import static com.igormaznitsa.prologparser.ParserContext.FLAG_BLOCK_COMMENTS;
6-
import static com.igormaznitsa.prologparser.ParserContext.FLAG_NONE;
7-
import static com.igormaznitsa.prologparser.ParserContext.FLAG_VAR_AS_FUNCTOR;
8-
import static com.igormaznitsa.prologparser.ParserContext.FLAG_ZERO_SINGLE_QUOTATION_CHAR_CODE;
9-
import static com.igormaznitsa.prologparser.terms.OpContainer.make;
10-
import static com.igormaznitsa.prologparser.terms.PrologTerm.QuotingType.BACK_QUOTED;
11-
import static com.igormaznitsa.prologparser.terms.PrologTerm.QuotingType.DOUBLE_QUOTED;
12-
import static com.igormaznitsa.prologparser.terms.PrologTerm.QuotingType.NO_QUOTED;
13-
import static com.igormaznitsa.prologparser.terms.PrologTerm.QuotingType.SINGLE_QUOTED;
14-
import static com.igormaznitsa.prologparser.terms.TermType.ATOM;
15-
import static com.igormaznitsa.prologparser.tokenizer.OpAssoc.FY;
16-
import static com.igormaznitsa.prologparser.tokenizer.OpAssoc.XFX;
17-
import static com.igormaznitsa.prologparser.tokenizer.OpAssoc.XFY;
18-
import static java.util.stream.Collectors.joining;
19-
import static org.junit.jupiter.api.Assertions.assertEquals;
20-
import static org.junit.jupiter.api.Assertions.assertFalse;
21-
import static org.junit.jupiter.api.Assertions.assertNotNull;
22-
import static org.junit.jupiter.api.Assertions.assertNotSame;
23-
import static org.junit.jupiter.api.Assertions.assertSame;
24-
import static org.junit.jupiter.api.Assertions.assertThrows;
25-
import static org.junit.jupiter.api.Assertions.assertTrue;
26-
import static org.junit.jupiter.api.Assertions.fail;
27-
import static org.mockito.Mockito.clearInvocations;
28-
import static org.mockito.Mockito.mock;
29-
import static org.mockito.Mockito.when;
30-
313
import com.igormaznitsa.prologparser.exceptions.PrologParserException;
324
import com.igormaznitsa.prologparser.terms.OpContainer;
335
import com.igormaznitsa.prologparser.terms.PrologAtom;
@@ -60,6 +32,16 @@
6032
import java.util.Set;
6133
import java.util.concurrent.atomic.AtomicInteger;
6234

35+
import static com.igormaznitsa.prologparser.DefaultParserContext.of;
36+
import static com.igormaznitsa.prologparser.ParserContext.*;
37+
import static com.igormaznitsa.prologparser.terms.OpContainer.make;
38+
import static com.igormaznitsa.prologparser.terms.PrologTerm.QuotingType.*;
39+
import static com.igormaznitsa.prologparser.terms.TermType.ATOM;
40+
import static com.igormaznitsa.prologparser.tokenizer.OpAssoc.*;
41+
import static java.util.stream.Collectors.joining;
42+
import static org.junit.jupiter.api.Assertions.*;
43+
import static org.mockito.Mockito.*;
44+
6345
public class IntegrationTest {
6446

6547
private static PrologParser parseCpl(final String str) {
@@ -747,7 +729,7 @@ public void testSingleOperatorAsAtom() {
747729
public void testRecognizingUserOperatorsWhichSimilarMetaOperators() {
748730
final Map<String, OpContainer> operators = new HashMap<>();
749731
operators.put("(((", make(Op.make(1, OpAssoc.FX, "(((")));
750-
operators.put("...", make(Op.make(1200, OpAssoc.XF, "...")));
732+
operators.put("...", make(Op.make(1200, XF, "...")));
751733
final StubContext stubContext = new StubContext(operators);
752734

753735
final PrologStruct structure = (PrologStruct) parseEd("(((hello....", stubContext).next();
@@ -995,11 +977,12 @@ public void testOperatorAsFunctor() throws Exception {
995977

996978
@Test
997979
public void testPairOfOperatorsWithIncompatiblePrecedence() throws Exception {
980+
assertEquals("- (discontiguous)", parseEd("-discontiguous.").next().toString());
981+
assertEquals("aab", parseEd("aab.", DefaultParserContext.of(FLAG_NONE, Op.make(400, XF, "aabc"))).next().toString());
998982
assertEquals("1 - - -1", parseEd("1---1.").next().toString());
999983
assertEquals("1 + 1 * a * a + a - 1", parseEd("1+1*a*a+a-1.").next().toString());
1000984
assertEquals("-1 + 2 ** (- 3 ** (-4))", parseEd("-1+2**-3**-4.").next().toString());
1001985
assertEquals("X = (discontiguous)", parseEd("X=discontiguous.").next().toString());
1002-
assertEquals("- (discontiguous)", parseEd("-discontiguous.").next().toString());
1003986
assertEquals("2 ** (-1)", parseEd("2**-1.").next().toString());
1004987
assertEquals("0.2 is 5 ** (-1)", parseEd("0.2 is 5** -1.").next().toString());
1005988
assertEquals("a : b :> c :> d", parseEd("a:b:>c:>d.", DefaultParserContext.of(ParserContext.FLAG_NONE, Op.make(500, XFY, ":>"))).next().toString());

0 commit comments

Comments
 (0)