turkdevops · pull · Dec 31, 2025 · Dec 31, 2025 · Dec 31, 2025 · Dec 31, 2025
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c
@@ -477,23 +477,24 @@ static const signed char digit_values[256] = {
     -1, -1, -1, -1, -1, -1, -1
 };
 
-static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p)
+static uint32_t unescape_unicode(JSON_ParserState *state, const char *sp, const char *spe)
 {
-    signed char b;
-    uint32_t result = 0;
-    b = digit_values[p[0]];
-    if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
-    result = (result << 4) | (unsigned char)b;
-    b = digit_values[p[1]];
-    if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
-    result = (result << 4) | (unsigned char)b;
-    b = digit_values[p[2]];
-    if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
-    result = (result << 4) | (unsigned char)b;
-    b = digit_values[p[3]];
-    if (b < 0) raise_parse_error_at("incomplete unicode character escape sequence at %s", state, (char *)p - 2);
-    result = (result << 4) | (unsigned char)b;
-    return result;
+    if (RB_UNLIKELY(sp > spe - 4)) {
+        raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
+    }
+
+    const unsigned char *p = (const unsigned char *)sp;
+
+    const signed char b0 = digit_values[p[0]];
+    const signed char b1 = digit_values[p[1]];
+    const signed char b2 = digit_values[p[2]];
+    const signed char b3 = digit_values[p[3]];
+
+    if (RB_UNLIKELY((signed char)(b0 | b1 | b2 | b3) < 0)) {
+        raise_parse_error_at("incomplete unicode character escape sequence at %s", state, sp - 2);
+    }
+
+    return ((uint32_t)b0 << 12) | ((uint32_t)b1 << 8) | ((uint32_t)b2 << 4) | (uint32_t)b3;
 }
 
 #define GET_PARSER_CONFIG                          \
@@ -643,7 +644,7 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, JSON_ParserCon
 typedef struct _json_unescape_positions {
     long size;
     const char **positions;
-    bool has_more;
+    unsigned long additional_backslashes;
 } JSON_UnescapePositions;
 
 static inline const char *json_next_backslash(const char *pe, const char *stringEnd, JSON_UnescapePositions *positions)
@@ -657,7 +658,8 @@ static inline const char *json_next_backslash(const char *pe, const char *string
         }
     }
 
-    if (positions->has_more) {
+    if (positions->additional_backslashes) {
+        positions->additional_backslashes--;
         return memchr(pe, '\\', stringEnd - pe);
     }
 
@@ -707,50 +709,43 @@ NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_Parser
             case 'f':
                 APPEND_CHAR('\f');
                 break;
-            case 'u':
-                if (pe > stringEnd - 5) {
-                    raise_parse_error_at("incomplete unicode character escape sequence at %s", state, p);
-                } else {
-                    uint32_t ch = unescape_unicode(state, (unsigned char *) ++pe);
-                    pe += 3;
-                    /* To handle values above U+FFFF, we take a sequence of
-                     * \uXXXX escapes in the U+D800..U+DBFF then
-                     * U+DC00..U+DFFF ranges, take the low 10 bits from each
-                     * to make a 20-bit number, then add 0x10000 to get the
-                     * final codepoint.
-                     *
-                     * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
-                     * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
-                     * Area".
-                     */
-                    if ((ch & 0xFC00) == 0xD800) {
-                        pe++;
-                        if (pe > stringEnd - 6) {
-                            raise_parse_error_at("incomplete surrogate pair at %s", state, p);
-                        }
-                        if (pe[0] == '\\' && pe[1] == 'u') {
-                            uint32_t sur = unescape_unicode(state, (unsigned char *) pe + 2);
-
-                            if ((sur & 0xFC00) != 0xDC00) {
-                                raise_parse_error_at("invalid surrogate pair at %s", state, p);
-                            }
-
-                            ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16)
-                                    | (sur & 0x3FF));
-                            pe += 5;
-                        } else {
-                            raise_parse_error_at("incomplete surrogate pair at %s", state, p);
-                            break;
+            case 'u': {
+                uint32_t ch = unescape_unicode(state, ++pe, stringEnd);
+                pe += 3;
+                /* To handle values above U+FFFF, we take a sequence of
+                 * \uXXXX escapes in the U+D800..U+DBFF then
+                 * U+DC00..U+DFFF ranges, take the low 10 bits from each
+                 * to make a 20-bit number, then add 0x10000 to get the
+                 * final codepoint.
+                 *
+                 * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
+                 * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
+                 * Area".
+                 */
+                if ((ch & 0xFC00) == 0xD800) {
+                    pe++;
+                    if (RB_LIKELY((pe <= stringEnd - 6) && memcmp(pe, "\\u", 2) == 0)) {
+                        uint32_t sur = unescape_unicode(state, pe + 2, stringEnd);
+
+                        if (RB_UNLIKELY((sur & 0xFC00) != 0xDC00)) {
+                            raise_parse_error_at("invalid surrogate pair at %s", state, p);
                         }
-                    }
 
-                    char buf[4];
-                    int unescape_len = convert_UTF32_to_UTF8(buf, ch);
-                    MEMCPY(buffer, buf, char, unescape_len);
-                    buffer += unescape_len;
-                    p = ++pe;
+                        ch = (((ch & 0x3F) << 10) | ((((ch >> 6) & 0xF) + 1) << 16) | (sur & 0x3FF));
+                        pe += 5;
+                    } else {
+                        raise_parse_error_at("incomplete surrogate pair at %s", state, p);
+                        break;
+                    }
                 }
+
+                char buf[4];
+                int unescape_len = convert_UTF32_to_UTF8(buf, ch);
+                MEMCPY(buffer, buf, char, unescape_len);
+                buffer += unescape_len;
+                p = ++pe;
                 break;
+            }
             default:
                 if ((unsigned char)*pe < 0x20) {
                     if (!config->allow_control_characters) {
@@ -992,7 +987,7 @@ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfi
     JSON_UnescapePositions positions = {
         .size = 0,
         .positions = backslashes,
-        .has_more = false,
+        .additional_backslashes = 0,
     };
 
     do {
@@ -1007,7 +1002,7 @@ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfi
                     backslashes[positions.size] = state->cursor;
                     positions.size++;
                 } else {
-                    positions.has_more = true;
+                    positions.additional_backslashes++;
                 }
                 state->cursor++;
                 break;

diff --git a/include/ruby/internal/core/rtypeddata.h b/include/ruby/internal/core/rtypeddata.h
@@ -109,14 +109,17 @@
 /** @cond INTERNAL_MACRO */
 #define RTYPEDDATA_P                 RTYPEDDATA_P
 #define RTYPEDDATA_TYPE              RTYPEDDATA_TYPE
+#define TYPED_DATA_EMBEDDED          ((VALUE)1)
+#define TYPED_DATA_PTR_MASK          (~(TYPED_DATA_EMBEDDED))
+/** @endcond */
+
+/**
+ * Macros to see if each corresponding flag is defined.
+ */
 #define RUBY_TYPED_FREE_IMMEDIATELY  RUBY_TYPED_FREE_IMMEDIATELY
 #define RUBY_TYPED_FROZEN_SHAREABLE  RUBY_TYPED_FROZEN_SHAREABLE
 #define RUBY_TYPED_WB_PROTECTED      RUBY_TYPED_WB_PROTECTED
 #define RUBY_TYPED_PROMOTED1         RUBY_TYPED_PROMOTED1
-/** @endcond */
-
-#define TYPED_DATA_EMBEDDED ((VALUE)1)
-#define TYPED_DATA_PTR_MASK (~(TYPED_DATA_EMBEDDED))
 
 /**
  * @private

diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb
@@ -350,6 +350,7 @@ def test_invalid_surogates
     assert_raise(JSON::ParserError) { parse('"\\uD800"') }
     assert_raise(JSON::ParserError) { parse('"\\uD800_________________"') }
     assert_raise(JSON::ParserError) { parse('"\\uD800\\u0041"') }
+    assert_raise(JSON::ParserError) { parse('"\\uD800\\u004') }
   end
 
   def test_parse_big_integers