From e0049694ace8fde9bbbb95cb80fa74b04a115f68 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Wed, 12 Nov 2025 06:46:33 -0700
Subject: [PATCH 1/4] grok_bin_oct_hex: Add two UNLIKELY()s

Underscores in numbers are much less common than digits, and its
unlikely that this iteration of the loop through all the digits will
still have a running total of 0.
---
 numeric.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/numeric.c b/numeric.c
index ea855912dcf1..8ee066aef500 100644
--- a/numeric.c
+++ b/numeric.c
@@ -551,7 +551,7 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start,
             continue;
         }
 
-        if (   *s == '_'
+        if (   UNLIKELY(*s == '_')
             && s < e - 1
             && allow_underscores
             && generic_isCC_(s[1], class_bit)
@@ -567,7 +567,7 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start,
             /* To get here with the value so-far being 0 means we've only had
              * leading zeros, then an underscore.  We can continue with the
              * branchless switch() instead of this loop */
-            if (value == 0) {
+            if (UNLIKELY(value == 0)) {
                 goto redo_switch;
             }
             else {

From 314303be2683d705c17e151e623b0f2597212de1 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Wed, 12 Nov 2025 08:04:28 -0700
Subject: [PATCH 2/4] grok_bin_oct_hex: Add/revise some comments

grok_bin_oct_hex: Move comments

And reflow.  This is in preparation for the next commit.
---
 numeric.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/numeric.c b/numeric.c
index 8ee066aef500..f063c6c74487 100644
--- a/numeric.c
+++ b/numeric.c
@@ -511,10 +511,11 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start,
              * because a bit got shifted off the left end, so overflowed.
              * But if it worked, add the new digit. */
             if (LIKELY((tentative_value >> shift) == value)) {
+                /* Note XDIGIT_VALUE() is branchless, works on binary and
+                 * octal as well, so can be used here, without noticeably
+                 * slowing those down (it does have unnecessary shifts, ANDSs,
+                 * and additions for those) */
                 value = tentative_value | XDIGIT_VALUE(*s);
-                    /* Note XDIGIT_VALUE() is branchless, works on binary
-                     * and octal as well, so can be used here, without
-                     * slowing those down */
                 factor *= base;
                 continue;
             }
@@ -551,14 +552,13 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start,
             continue;
         }
 
+        /* Handle non-trailing underscores when those are accepted */
         if (   UNLIKELY(*s == '_')
             && s < e - 1
             && allow_underscores
             && generic_isCC_(s[1], class_bit)
-
-                /* Don't allow a leading underscore if the only-medial bit is
-                 * set */
             && (   LIKELY(s > s0)
+                   /* Including initial underscores if those are accepted */
                 || UNLIKELY(! (  input_flags
                                & PERL_SCAN_ALLOW_MEDIAL_UNDERSCORES_ONLY))))
         {
@@ -575,6 +575,9 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start,
             }
         }
 
+        /* We get here when done with the parse, or it got interrupted by a
+         * non-digit or a digit that is outside the bounds of the base, like a
+         * digit 2 in a binary number */
         if (*s) {
             if (   ! (input_flags & PERL_SCAN_SILENT_ILLDIGIT)
                 &&    ckWARN(WARN_DIGIT))
@@ -593,7 +596,8 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start,
                      * scanning as soon as non-octal characters are seen,
                      * complain only if someone seems to want to use the digits
                      * eight and nine.  Since we know it is not octal, then if
-                     * isDIGIT, must be an 8 or 9). */
+                     * isDIGIT, must be an 8 or 9). khw: XXX why not DWIM for
+                     * other bases as well? */
                     warner(packWARN(WARN_DIGIT),
                            "Illegal octal digit '%c' ignored", *s);
                 }
@@ -604,8 +608,9 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start,
             }
         }
 
+        /* Error, so quit parsing */
         break;
-    }
+    }   /* End of parsing loop */
 
     *len_p = s - start;
 

From 901167dd58ed9eb3c8e122f58c0fd640c14f9654 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Sun, 30 Nov 2025 14:31:52 -0700
Subject: [PATCH 3/4] grok_bin_oct_dec: Speed up overflow detection

We can compute outside the loop the exact value at which the next
iteration wil overflow, saving some operations
---
 numeric.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/numeric.c b/numeric.c
index f063c6c74487..ed324c986a92 100644
--- a/numeric.c
+++ b/numeric.c
@@ -495,6 +495,9 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start,
     NV value_nv = 0;
     const PERL_UINT_FAST8_T base = 1 << shift;  /* 2, 8, or 16 */
 
+    /* Value above which, the next digit processed would overflow */
+    UV max_div = UV_MAX >> shift;
+
     for (; s < e; s++) {
         if (generic_isCC_(*s, class_bit)) {
             /* Write it in this wonky order with a goto to attempt to get the
@@ -503,19 +506,12 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start,
                (khw suspects that adding a LIKELY() just above would do the
                same thing) */
           redo: ;
-
-            /* Make room for the next digit */
-            UV tentative_value = value << shift;
-
-            /* If shiftng back doesn't yield the previous value, it was
-             * because a bit got shifted off the left end, so overflowed.
-             * But if it worked, add the new digit. */
-            if (LIKELY((tentative_value >> shift) == value)) {
+            if (LIKELY(value <= max_div)) {
                 /* Note XDIGIT_VALUE() is branchless, works on binary and
                  * octal as well, so can be used here, without noticeably
                  * slowing those down (it does have unnecessary shifts, ANDSs,
                  * and additions for those) */
-                value = tentative_value | XDIGIT_VALUE(*s);
+                value = (value << shift) | XDIGIT_VALUE(*s);
                 factor *= base;
                 continue;
             }

From 054d093f4dd282994f4be523b32bfaa2abe1b659 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Sun, 30 Nov 2025 14:53:53 -0700
Subject: [PATCH 4/4] grok_bin_oct_hex: Improve speed, precision

This replaces a floating multiply each loop iteration with an integer
increment, plus after the loop completes, a call to ldexp() or pow().

Most of the floating multiplies are done on integral values, so
not much precision is lost, but this gets it down to just one
precision-losing operation.
---
 numeric.c | 33 +++++++++++++++++++++++----------
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/numeric.c b/numeric.c
index ed324c986a92..0e788bdd757a 100644
--- a/numeric.c
+++ b/numeric.c
@@ -487,9 +487,13 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start,
           break;
     }
 
-    /* In overflows, this keeps track of how much to multiply the overflowed NV
-     * by as we continue to parse the remaining digits */
-    NV factor = 0.0;
+    /* The loop below accumulates the integral running total of the result,
+     * digit by digit.  If this total overflows, it adds that to an NV
+     * approximation, and starts looking at the next batch of digits, until
+     * they overflow, and so on.  This variable counts the number of digits
+     * seen in the current batch.  (The initial value is irrelevant, as the
+     * first batch will end up being multiplied by zero.) */
+    uint_fast8_t batch_digit_count = 0;
 
     bool overflowed = FALSE;
     NV value_nv = 0;
@@ -512,16 +516,21 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start,
                  * slowing those down (it does have unnecessary shifts, ANDSs,
                  * and additions for those) */
                 value = (value << shift) | XDIGIT_VALUE(*s);
-                factor *= base;
+                batch_digit_count++;
                 continue;
             }
 
             /* Bah. We are about to overflow.  Instead, add the unoverflowed
              * value to an NV that contains an approximation to the correct
-             * value.  Each time through the loop we have increased 'factor' so
-             * that it gives how much the current approximation needs to
-             * effectively be shifted to make room for this new value */
-            value_nv *= factor;
+             * value.  Each time through the loop we have incremented
+             * 'batch_digit_count' so that it gives how many digits the
+             * current approximation needs to effectively be shifted to make
+             * room for this new value */
+#ifdef Perl_ldexp
+                value_nv = Perl_ldexp(value_nv, batch_digit_count * shift);
+#else
+                value_nv *= Perl_pow(base, batch_digit_count);
+#endif
             value_nv += (NV) value;
 
             /* Then we keep accumulating digits, until all are parsed.  We
@@ -529,7 +538,7 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start,
              * 'value_nv' eventually, either when all digits are gone, or we
              * have overflowed this fresh start. */
             value = XDIGIT_VALUE(*s);
-            factor = base;
+            batch_digit_count = 1;
 
             if (! overflowed) {
                 overflowed = TRUE;
@@ -623,7 +632,11 @@ Perl_grok_bin_oct_hex(pTHX_ const char * const start,
     }
 
     /* Overflowed: Calculate the final overflow approximation */
-    value_nv *= factor;
+#ifdef Perl_ldexp
+        value_nv = Perl_ldexp(value_nv, batch_digit_count * shift);
+#else
+        value_nv *= Perl_pow(base, batch_digit_count);
+#endif
     value_nv += (NV) value;
 
     output_non_portable(base);