Skip to content

Commit 9b2042c

Browse files
committed
implemented radix for integer numbers
1 parent 5e69e66 commit 9b2042c

File tree

2 files changed

+75
-32
lines changed

2 files changed

+75
-32
lines changed

src/main/java/com/igormaznitsa/prologparser/tokenizer/Tokenizer.java

Lines changed: 62 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636

3737
import java.io.IOException;
3838
import java.io.Reader;
39+
import java.math.BigInteger;
3940

4041
import static com.igormaznitsa.prologparser.tokenizer.TokenizerState.*;
4142
import static com.igormaznitsa.prologparser.utils.StringUtils.isCharAllowedForUnquotedAtom;
@@ -273,6 +274,8 @@ TokenizerResult readNextToken() {
273274

274275
PrologTerm.QuotingType quoting = PrologTerm.QuotingType.NO_QUOTED;
275276

277+
int radix = 10;
278+
276279
TokenizerState state = LOOK_FOR;
277280
boolean specCharDetected = false;
278281
boolean charCodeAsInt = false;
@@ -309,16 +312,16 @@ TokenizerResult readNextToken() {
309312
push('.');
310313
// it is Integer
311314
return this.tokenizerResultPool.find().setData(
312-
makeTermFromString(strBuffer.toStringExcludeLastChar(), quoting, TokenizerState.INTEGER),
313-
TokenizerState.ATOM,
315+
makeTermFromString(strBuffer.toStringExcludeLastChar(), radix, quoting, TokenizerState.INTEGER),
316+
TokenizerState.INTEGER,
314317
getLastTokenLine(),
315318
getLastTokenPos()
316319
);
317320
} else {
318321
// it is just integer number or an atom
319322
final String text = strBuffer.toString();
320323
return this.tokenizerResultPool.find().setData(
321-
makeTermFromString(text, PrologTerm.findAppropriateQuoting(text), state),
324+
makeTermFromString(text, radix, PrologTerm.findAppropriateQuoting(text), state),
322325
state,
323326
getLastTokenLine(),
324327
getLastTokenPos()
@@ -348,7 +351,7 @@ TokenizerResult readNextToken() {
348351
case OPERATOR: {
349352
if (lastFoundFullOperator == null) {
350353
return this.tokenizerResultPool.find().setData(
351-
makeTermFromString(strBuffer.toString(), quoting, state),
354+
makeTermFromString(strBuffer.toString(), radix, quoting, state),
352355
state,
353356
getLastTokenLine(),
354357
getLastTokenPos()
@@ -459,7 +462,7 @@ TokenizerResult readNextToken() {
459462
}
460463

461464
return this.tokenizerResultPool.find().setData(
462-
makeTermFromString(text, PrologTerm.findAppropriateQuoting(text), state),
465+
makeTermFromString(text, radix, PrologTerm.findAppropriateQuoting(text), state),
463466
state,
464467
getLastTokenLine(),
465468
getLastTokenPos());
@@ -482,7 +485,7 @@ TokenizerResult readNextToken() {
482485
}
483486
}
484487
return this.tokenizerResultPool.find().setData(
485-
makeTermFromString(text, PrologTerm.findAppropriateQuoting(text), state),
488+
makeTermFromString(text, radix, PrologTerm.findAppropriateQuoting(text), state),
486489
state,
487490
getLastTokenLine(),
488491
getLastTokenPos());
@@ -493,7 +496,7 @@ TokenizerResult readNextToken() {
493496
}
494497
break;
495498
case INTEGER: {
496-
if (Character.isDigit(chr)) {
499+
if (isCharAllowedForRadix(chr, radix)) {
497500
foundUnderscoreInNumber = false;
498501
strBuffer.append(chr);
499502
} else if (chr == '_') {
@@ -515,14 +518,33 @@ TokenizerResult readNextToken() {
515518
throw new PrologParserException("Unexpected underscore", this.prevLine, this.prevPos);
516519
}
517520

518-
if (this.zeroSingleQuotationAllowed && chr == '\'' && strBuffer.isSingleChar('0')) {
519-
state = STRING;
520-
charCodeAsInt = true;
521-
strBuffer.clear();
521+
if (chr == '\'') {
522+
if (strBuffer.isSingleChar('0')) {
523+
if (this.zeroSingleQuotationAllowed) {
524+
state = STRING;
525+
charCodeAsInt = true;
526+
strBuffer.clear();
527+
} else {
528+
push(chr);
529+
return this.tokenizerResultPool.find().setData(
530+
makeTermFromString(strBuffer.toString(), radix, quoting, state),
531+
TokenizerState.INTEGER,
532+
getLastTokenLine(),
533+
getLastTokenPos());
534+
}
535+
} else {
536+
radix = Integer.parseInt(strBuffer.toString());
537+
if (radix < 2 || radix > 36) {
538+
throw new PrologParserException("Radix must be 2..36: " + radix,
539+
getLastTokenLine(),
540+
getLastTokenPos());
541+
}
542+
strBuffer.clear();
543+
}
522544
} else {
523545
push(chr);
524546
return this.tokenizerResultPool.find().setData(
525-
makeTermFromString(strBuffer.toString(), quoting, state),
547+
makeTermFromString(strBuffer.toString(), radix, quoting, state),
526548
TokenizerState.INTEGER,
527549
getLastTokenLine(),
528550
getLastTokenPos());
@@ -548,7 +570,7 @@ TokenizerResult readNextToken() {
548570
} else {
549571
push(chr);
550572
return this.tokenizerResultPool.find().setData(
551-
makeTermFromString(strBuffer.toString(), quoting, TokenizerState.FLOAT),
573+
makeTermFromString(strBuffer.toString(), radix, quoting, TokenizerState.FLOAT),
552574
TokenizerState.FLOAT,
553575
getLastTokenLine(),
554576
getLastTokenPos());
@@ -563,7 +585,7 @@ TokenizerResult readNextToken() {
563585
} else {
564586
push(chr);
565587
return this.tokenizerResultPool.find().setData(
566-
makeTermFromString(strBuffer.toStringExcludeLastChar(), quoting, TokenizerState.FLOAT),
588+
makeTermFromString(strBuffer.toStringExcludeLastChar(), radix, quoting, TokenizerState.FLOAT),
567589
TokenizerState.FLOAT,
568590
getLastTokenLine(),
569591
getLastTokenPos());
@@ -580,14 +602,14 @@ TokenizerResult readNextToken() {
580602
// it was an integer
581603
push('.');
582604
return this.tokenizerResultPool.find().setData(
583-
makeTermFromString(strBuffer.toStringExcludeLastChar(), quoting, TokenizerState.INTEGER),
605+
makeTermFromString(strBuffer.toStringExcludeLastChar(), radix, quoting, TokenizerState.INTEGER),
584606
TokenizerState.INTEGER,
585607
getLastTokenLine(),
586608
getLastTokenPos());
587609
} else {
588610
// it is float
589611
return this.tokenizerResultPool.find().setData(
590-
makeTermFromString(strBuffer.toString(), quoting, state),
612+
makeTermFromString(strBuffer.toString(), radix, quoting, state),
591613
state,
592614
getLastTokenLine(),
593615
getLastTokenPos()
@@ -613,7 +635,7 @@ TokenizerResult readNextToken() {
613635
);
614636
} else {
615637
return this.tokenizerResultPool.find().setData(
616-
makeTermFromString(textInBuffer, quoting, ATOM),
638+
makeTermFromString(textInBuffer, radix, quoting, ATOM),
617639
ATOM,
618640
getLastTokenLine(),
619641
getLastTokenPos()
@@ -687,7 +709,7 @@ state, getLastTokenLine(),
687709
strBuffer.append('\n');
688710
if (charCodeAsInt) {
689711
return this.tokenizerResultPool.find().setData(
690-
makeTermFromString(strBuffer.toString(), quoting, state),
712+
makeTermFromString(strBuffer.toString(), radix, quoting, state),
691713
state,
692714
getLastTokenLine(),
693715
getLastTokenPos()
@@ -717,7 +739,7 @@ state, getLastTokenLine(),
717739
case '\'':
718740
if (quoting == PrologTerm.QuotingType.SINGLE_QUOTED) {
719741
return this.tokenizerResultPool.find().setData(
720-
makeTermFromString(strBuffer.toString(), quoting, state),
742+
makeTermFromString(strBuffer.toString(), radix, quoting, state),
721743
state,
722744
getLastTokenLine(),
723745
getLastTokenPos()
@@ -738,7 +760,7 @@ state, getLastTokenLine(),
738760
case '`':
739761
if (quoting == PrologTerm.QuotingType.BACK_QUOTED) {
740762
return this.tokenizerResultPool.find().setData(
741-
makeTermFromString(strBuffer.toString(), quoting, state),
763+
makeTermFromString(strBuffer.toString(), radix, quoting, state),
742764
state,
743765
getLastTokenLine(),
744766
getLastTokenPos()
@@ -759,7 +781,7 @@ state, getLastTokenLine(),
759781
case '\"':
760782
if (quoting == PrologTerm.QuotingType.DOUBLE_QUOTED) {
761783
return this.tokenizerResultPool.find().setData(
762-
makeTermFromString(strBuffer.toString(), quoting, state),
784+
makeTermFromString(strBuffer.toString(), radix, quoting, state),
763785
state,
764786
getLastTokenLine(),
765787
getLastTokenPos()
@@ -827,13 +849,30 @@ state, getLastTokenLine(),
827849
}
828850
}
829851

830-
PrologTerm makeTermFromString(final String str, final PrologTerm.QuotingType quotingType, final TokenizerState state) {
852+
private static boolean isCharAllowedForRadix(final char chr, final int radix) {
853+
if (radix == 10) {
854+
return Character.isDigit(chr);
855+
} else if (radix < 10) {
856+
return chr >= '0' && chr < ('0' + radix);
857+
} else {
858+
if (chr >= '0' && chr <= '9') {
859+
return true;
860+
}
861+
final int diff = radix - 10;
862+
if (chr >= 'A' && chr < ('A' + diff)) {
863+
return true;
864+
}
865+
return chr >= 'a' && chr < ('a' + diff);
866+
}
867+
}
868+
869+
PrologTerm makeTermFromString(final String str, final int radix, final PrologTerm.QuotingType quotingType, final TokenizerState state) {
831870
PrologTerm result;
832871

833872
switch (state) {
834873
case INTEGER: {
835874
try {
836-
result = new PrologInt(str);
875+
result = radix == 10 ? new PrologInt(str) : new PrologInt(new BigInteger(str, radix));
837876
} catch (NumberFormatException ex) {
838877
result = null;
839878
}

src/test/java/com/igormaznitsa/prologparser/tokenizer/TokenizerTest.java

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,10 @@ public void testQuoting() {
246246

247247
@Test
248248
public void testUnderscoreInNumbers_Normal() {
249+
assertEquals(6384, ((PrologInt) tokenizeOf("2'001_1000_1111_0000.").readNextToken().getResult()).getNumber().intValue());
250+
assertEquals(255, ((PrologInt) tokenizeOf("16'F_F.").readNextToken().getResult()).getNumber().intValue());
251+
assertEquals(255, ((PrologInt) tokenizeOf("16'f_f.").readNextToken().getResult()).getNumber().intValue());
252+
249253
assertEquals(12345, ((PrologInt) tokenizeOf("12_345.").readNextToken().getResult()).getNumber().intValue());
250254
assertEquals(12345, ((PrologInt) tokenizeOf("12_34_5.").readNextToken().getResult()).getNumber().intValue());
251255
assertEquals(12345, ((PrologInt) tokenizeOf("1_2_34_5.").readNextToken().getResult()).getNumber().intValue());
@@ -261,55 +265,55 @@ public void testUnderscoreInNumbers_Normal() {
261265
@Test
262266
public void testMakeTermFromString() {
263267
final Tokenizer tokenizer = tokenizeOf("792394382");
264-
PrologTerm term = tokenizer.makeTermFromString("792394382", NO_QUOTED, TokenizerState.INTEGER);
268+
PrologTerm term = tokenizer.makeTermFromString("792394382", 10, NO_QUOTED, TokenizerState.INTEGER);
265269
assertNotNull(term);
266270
assertEquals(TermType.ATOM, term.getTermType());
267271
assertSame(term.getClass(), PrologInt.class);
268272
assertEquals("792394382", term.getTermText());
269273

270-
term = tokenizer.makeTermFromString(Long.toString(Long.MIN_VALUE), NO_QUOTED, TokenizerState.INTEGER);
274+
term = tokenizer.makeTermFromString(Long.toString(Long.MIN_VALUE), 10, NO_QUOTED, TokenizerState.INTEGER);
271275
assertNotNull(term);
272276
assertEquals(TermType.ATOM, term.getTermType());
273277
assertSame(term.getClass(), PrologInt.class);
274278
assertEquals(Long.toString(Long.MIN_VALUE), term.getTermText());
275279

276-
term = tokenizer.makeTermFromString(Long.toString(Long.MAX_VALUE), NO_QUOTED, TokenizerState.INTEGER);
280+
term = tokenizer.makeTermFromString(Long.toString(Long.MAX_VALUE), 10, NO_QUOTED, TokenizerState.INTEGER);
277281
assertNotNull(term);
278282
assertEquals(TermType.ATOM, term.getTermType());
279283
assertSame(term.getClass(), PrologInt.class);
280284
assertEquals(Long.toString(Long.MAX_VALUE), term.getTermText());
281285

282-
term = tokenizer.makeTermFromString("0.003422", NO_QUOTED, TokenizerState.FLOAT);
286+
term = tokenizer.makeTermFromString("0.003422", 10, NO_QUOTED, TokenizerState.FLOAT);
283287
assertNotNull(term);
284288
assertEquals(TermType.ATOM, term.getTermType());
285289
assertSame(term.getClass(), PrologFloat.class);
286290
assertEquals("0.003422", term.getTermText());
287291

288-
term = tokenizer.makeTermFromString("a0.003422b", NO_QUOTED, TokenizerState.FLOAT);
292+
term = tokenizer.makeTermFromString("a0.003422b", 10, NO_QUOTED, TokenizerState.FLOAT);
289293
assertNotNull(term);
290294
assertEquals(TermType.ATOM, term.getTermType());
291295
assertSame(term.getClass(), PrologAtom.class);
292296
assertEquals("a0.003422b", term.getTermText());
293297

294-
term = tokenizer.makeTermFromString("a12345b", NO_QUOTED, TokenizerState.INTEGER);
298+
term = tokenizer.makeTermFromString("a12345b", 10, NO_QUOTED, TokenizerState.INTEGER);
295299
assertNotNull(term);
296300
assertEquals(TermType.ATOM, term.getTermType());
297301
assertSame(term.getClass(), PrologAtom.class);
298302
assertEquals("a12345b", term.getTermText());
299303

300-
term = tokenizer.makeTermFromString("123", SINGLE_QUOTED, ATOM);
304+
term = tokenizer.makeTermFromString("123", 10, SINGLE_QUOTED, ATOM);
301305
assertNotNull(term);
302306
assertEquals(TermType.ATOM, term.getTermType());
303307
assertSame(term.getClass(), PrologAtom.class);
304308
assertEquals("123", term.getTermText());
305309

306-
term = tokenizer.makeTermFromString("123.123", SINGLE_QUOTED, ATOM);
310+
term = tokenizer.makeTermFromString("123.123", 10, SINGLE_QUOTED, ATOM);
307311
assertNotNull(term);
308312
assertEquals(TermType.ATOM, term.getTermType());
309313
assertSame(term.getClass(), PrologAtom.class);
310314
assertEquals("123.123", term.getTermText());
311315

312-
term = tokenizer.makeTermFromString("abcd", NO_QUOTED, ATOM);
316+
term = tokenizer.makeTermFromString("abcd", 10, NO_QUOTED, ATOM);
313317
assertNotNull(term);
314318
assertEquals(TermType.ATOM, term.getTermType());
315319
assertSame(term.getClass(), PrologAtom.class);

0 commit comments

Comments
 (0)