From e0049694ace8fde9bbbb95cb80fa74b04a115f68 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Wed, 12 Nov 2025 06:46:33 -0700 Subject: [PATCH 1/4] grok_bin_oct_hex: Add two UNLIKELY()s Underscores in numbers are much less common than digits, and its unlikely that this iteration of the loop through all the digits will still have a running total of 0. --- numeric.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/numeric.c b/numeric.c index ea855912dcf1..8ee066aef500 100644 --- a/numeric.c +++ b/numeric.c @@ -551,7 +551,7 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start, continue; } - if ( *s == '_' + if ( UNLIKELY(*s == '_') && s < e - 1 && allow_underscores && generic_isCC_(s[1], class_bit) @@ -567,7 +567,7 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start, /* To get here with the value so-far being 0 means we've only had * leading zeros, then an underscore. We can continue with the * branchless switch() instead of this loop */ - if (value == 0) { + if (UNLIKELY(value == 0)) { goto redo_switch; } else { From 314303be2683d705c17e151e623b0f2597212de1 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Wed, 12 Nov 2025 08:04:28 -0700 Subject: [PATCH 2/4] grok_bin_oct_hex: Add/revise some comments grok_bin_oct_hex: Move comments And reflow. This is in preparation for the next commit. --- numeric.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/numeric.c b/numeric.c index 8ee066aef500..f063c6c74487 100644 --- a/numeric.c +++ b/numeric.c @@ -511,10 +511,11 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start, * because a bit got shifted off the left end, so overflowed. * But if it worked, add the new digit. */ if (LIKELY((tentative_value >> shift) == value)) { + /* Note XDIGIT_VALUE() is branchless, works on binary and + * octal as well, so can be used here, without noticeably + * slowing those down (it does have unnecessary shifts, ANDSs, + * and additions for those) */ value = tentative_value | XDIGIT_VALUE(*s); - /* Note XDIGIT_VALUE() is branchless, works on binary - * and octal as well, so can be used here, without - * slowing those down */ factor *= base; continue; } @@ -551,14 +552,13 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start, continue; } + /* Handle non-trailing underscores when those are accepted */ if ( UNLIKELY(*s == '_') && s < e - 1 && allow_underscores && generic_isCC_(s[1], class_bit) - - /* Don't allow a leading underscore if the only-medial bit is - * set */ && ( LIKELY(s > s0) + /* Including initial underscores if those are accepted */ || UNLIKELY(! ( input_flags & PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES_ONLY)))) { @@ -575,6 +575,9 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start, } } + /* We get here when done with the parse, or it got interrupted by a + * non-digit or a digit that is outside the bounds of the base, like a + * digit 2 in a binary number */ if (*s) { if ( ! (input_flags & PERL_SCAN_SILENT_ILLDIGIT) && ckWARN(WARN_DIGIT)) @@ -593,7 +596,8 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start, * scanning as soon as non-octal characters are seen, * complain only if someone seems to want to use the digits * eight and nine. Since we know it is not octal, then if - * isDIGIT, must be an 8 or 9). */ + * isDIGIT, must be an 8 or 9). khw: XXX why not DWIM for + * other bases as well? */ warner(packWARN(WARN_DIGIT), "Illegal octal digit '%c' ignored", *s); } @@ -604,8 +608,9 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start, } } + /* Error, so quit parsing */ break; - } + } /* End of parsing loop */ *len_p = s - start; From 901167dd58ed9eb3c8e122f58c0fd640c14f9654 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sun, 30 Nov 2025 14:31:52 -0700 Subject: [PATCH 3/4] grok_bin_oct_dec: Speed up overflow detection We can compute outside the loop the exact value at which the next iteration wil overflow, saving some operations --- numeric.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/numeric.c b/numeric.c index f063c6c74487..ed324c986a92 100644 --- a/numeric.c +++ b/numeric.c @@ -495,6 +495,9 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start, NV value_nv = 0; const PERL_UINT_FAST8_T base = 1 << shift; /* 2, 8, or 16 */ + /* Value above which, the next digit processed would overflow */ + UV max_div = UV_MAX >> shift; + for (; s < e; s++) { if (generic_isCC_(*s, class_bit)) { /* Write it in this wonky order with a goto to attempt to get the @@ -503,19 +506,12 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start, (khw suspects that adding a LIKELY() just above would do the same thing) */ redo: ; - - /* Make room for the next digit */ - UV tentative_value = value << shift; - - /* If shiftng back doesn't yield the previous value, it was - * because a bit got shifted off the left end, so overflowed. - * But if it worked, add the new digit. */ - if (LIKELY((tentative_value >> shift) == value)) { + if (LIKELY(value <= max_div)) { /* Note XDIGIT_VALUE() is branchless, works on binary and * octal as well, so can be used here, without noticeably * slowing those down (it does have unnecessary shifts, ANDSs, * and additions for those) */ - value = tentative_value | XDIGIT_VALUE(*s); + value = (value << shift) | XDIGIT_VALUE(*s); factor *= base; continue; } From 054d093f4dd282994f4be523b32bfaa2abe1b659 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sun, 30 Nov 2025 14:53:53 -0700 Subject: [PATCH 4/4] grok_bin_oct_hex: Improve speed, precision This replaces a floating multiply each loop iteration with an integer increment, plus after the loop completes, a call to ldexp() or pow(). Most of the floating multiplies are done on integral values, so not much precision is lost, but this gets it down to just one precision-losing operation. --- numeric.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/numeric.c b/numeric.c index ed324c986a92..0e788bdd757a 100644 --- a/numeric.c +++ b/numeric.c @@ -487,9 +487,13 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start, break; } - /* In overflows, this keeps track of how much to multiply the overflowed NV - * by as we continue to parse the remaining digits */ - NV factor = 0.0; + /* The loop below accumulates the integral running total of the result, + * digit by digit. If this total overflows, it adds that to an NV + * approximation, and starts looking at the next batch of digits, until + * they overflow, and so on. This variable counts the number of digits + * seen in the current batch. (The initial value is irrelevant, as the + * first batch will end up being multiplied by zero.) */ + uint_fast8_t batch_digit_count = 0; bool overflowed = FALSE; NV value_nv = 0; @@ -512,16 +516,21 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start, * slowing those down (it does have unnecessary shifts, ANDSs, * and additions for those) */ value = (value << shift) | XDIGIT_VALUE(*s); - factor *= base; + batch_digit_count++; continue; } /* Bah. We are about to overflow. Instead, add the unoverflowed * value to an NV that contains an approximation to the correct - * value. Each time through the loop we have increased 'factor' so - * that it gives how much the current approximation needs to - * effectively be shifted to make room for this new value */ - value_nv *= factor; + * value. Each time through the loop we have incremented + * 'batch_digit_count' so that it gives how many digits the + * current approximation needs to effectively be shifted to make + * room for this new value */ +#ifdef Perl_ldexp + value_nv = Perl_ldexp(value_nv, batch_digit_count * shift); +#else + value_nv *= Perl_pow(base, batch_digit_count); +#endif value_nv += (NV) value; /* Then we keep accumulating digits, until all are parsed. We @@ -529,7 +538,7 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start, * 'value_nv' eventually, either when all digits are gone, or we * have overflowed this fresh start. */ value = XDIGIT_VALUE(*s); - factor = base; + batch_digit_count = 1; if (! overflowed) { overflowed = TRUE; @@ -623,7 +632,11 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start, } /* Overflowed: Calculate the final overflow approximation */ - value_nv *= factor; +#ifdef Perl_ldexp + value_nv = Perl_ldexp(value_nv, batch_digit_count * shift); +#else + value_nv *= Perl_pow(base, batch_digit_count); +#endif value_nv += (NV) value; output_non_portable(base);