diff --git a/NEWS.md b/NEWS.md index e7d1da919e631d..3e2d37c84cfda9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -189,6 +189,9 @@ Note: We're only listing outstanding class updates. * Update Unicode to Version 17.0.0 and Emoji Version 17.0. [[Feature #19908]][[Feature #20724]][[Feature #21275]] (also applies to Regexp) + * `String#strip`, `strip!`, `lstrip`, `lstrip!`, `rstrip`, and `rstrip!` + are extended to accept `*selectors` arguments. [[Feature #21552]] + * Thread * Introduce support for `Thread#raise(cause:)` argument similar to @@ -252,7 +255,7 @@ The following default gems are updated. * io-nonblock 0.3.2 * io-wait 0.4.0.dev * ipaddr 1.2.8 -* json 2.17.1 +* json 2.18.0 * net-http 0.8.0 * openssl 4.0.0.pre * optparse 0.8.1 diff --git a/ext/json/lib/json/version.rb b/ext/json/lib/json/version.rb index 4ed61c43c8b187..631beba83e91b6 100644 --- a/ext/json/lib/json/version.rb +++ b/ext/json/lib/json/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module JSON - VERSION = '2.17.1' + VERSION = '2.18.0' end diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 45de8d1ff62f1d..8f9729ef28a7fb 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -7,7 +7,7 @@ static VALUE CNaN, CInfinity, CMinusInfinity; static ID i_new, i_try_convert, i_uminus, i_encode; -static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze, +static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_allow_control_characters, sym_symbolize_names, sym_freeze, sym_decimal_class, sym_on_load, sym_allow_duplicate_key; static int binary_encindex; @@ -335,6 +335,7 @@ typedef struct JSON_ParserStruct { int max_nesting; bool allow_nan; bool allow_trailing_comma; + bool allow_control_characters; bool symbolize_names; bool freeze; } JSON_ParserConfig; @@ -752,12 +753,15 @@ NOINLINE(static) VALUE json_string_unescape(JSON_ParserState *state, JSON_Parser break; default: if ((unsigned char)*pe < 0x20) { - if (*pe == '\n') { - raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1); + if (!config->allow_control_characters) { + if (*pe == '\n') { + raise_parse_error_at("Invalid unescaped newline character (\\n) in string: %s", state, pe - 1); + } + raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1); } - raise_parse_error_at("invalid ASCII control character in string: %s", state, pe - 1); + } else { + raise_parse_error_at("invalid escape character in string: %s", state, pe - 1); } - raise_parse_error_at("invalid escape character in string: %s", state, pe - 1); break; } } @@ -1009,7 +1013,9 @@ static VALUE json_parse_escaped_string(JSON_ParserState *state, JSON_ParserConfi break; } default: - raise_parse_error("invalid ASCII control character in string: %s", state); + if (!config->allow_control_characters) { + raise_parse_error("invalid ASCII control character in string: %s", state); + } break; } @@ -1430,14 +1436,15 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data) { JSON_ParserConfig *config = (JSON_ParserConfig *)data; - if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } - else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); } - else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); } - else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); } - else if (key == sym_freeze) { config->freeze = RTEST(val); } - else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; } - else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; } - else if (key == sym_decimal_class) { + if (key == sym_max_nesting) { config->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } + else if (key == sym_allow_nan) { config->allow_nan = RTEST(val); } + else if (key == sym_allow_trailing_comma) { config->allow_trailing_comma = RTEST(val); } + else if (key == sym_allow_control_characters) { config->allow_control_characters = RTEST(val); } + else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); } + else if (key == sym_freeze) { config->freeze = RTEST(val); } + else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; } + else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; } + else if (key == sym_decimal_class) { if (RTEST(val)) { if (rb_respond_to(val, i_try_convert)) { config->decimal_class = val; @@ -1650,6 +1657,7 @@ void Init_parser(void) sym_max_nesting = ID2SYM(rb_intern("max_nesting")); sym_allow_nan = ID2SYM(rb_intern("allow_nan")); sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma")); + sym_allow_control_characters = ID2SYM(rb_intern("allow_control_characters")); sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); sym_freeze = ID2SYM(rb_intern("freeze")); sym_on_load = ID2SYM(rb_intern("on_load")); diff --git a/parse.y b/parse.y index cb73ea2ef026bb..2a6de39236dc4c 100644 --- a/parse.y +++ b/parse.y @@ -5235,6 +5235,12 @@ block_call : command do_block $$ = new_command_qcall(p, $2, $1, $3, $4, $5, &@3, &@$); /*% ripper: method_add_block!(command_call!($:1, $:2, $:3, $:4), $:5) %*/ } + | block_call call_op2 paren_args + { + $$ = new_qcall(p, $2, $1, idCall, $3, &@2, &@$); + nd_set_line($$, @2.end_pos.lineno); + /*% ripper: method_add_arg!(call!($:1, $:2, ID2VAL(idCall)), $:3) %*/ + } ; method_call : fcall paren_args diff --git a/parser_bits.h b/parser_bits.h index cbe42db39631be..f894dde33e5e9e 100644 --- a/parser_bits.h +++ b/parser_bits.h @@ -30,7 +30,7 @@ #include /* for uintptr_t */ #include "internal/compilers.h" /* for MSC_VERSION_SINCE */ -#if defined(_MSC_VER) +#ifdef _MSC_VER # include /* for _byteswap_uint64 */ #endif @@ -57,9 +57,6 @@ # pragma intrinsic(_rotl64) # pragma intrinsic(_rotr64) # endif -#endif - -#if defined(_MSC_VER) # pragma intrinsic(_BitScanForward) # pragma intrinsic(_BitScanReverse) # ifdef _WIN64 @@ -90,6 +87,7 @@ #define UNSIGNED_INTEGER_MAX(T) ((T)~(T)0) +#ifndef MUL_OVERFLOW_SIGNED_INTEGER_P #if __has_builtin(__builtin_mul_overflow_p) # define MUL_OVERFLOW_P(a, b) \ __builtin_mul_overflow_p((a), (b), (__typeof__(a * b))0) @@ -118,15 +116,100 @@ MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, FIXNUM_MIN, FIXNUM_MAX) #endif -#ifdef MUL_OVERFLOW_P +#if defined(MUL_OVERFLOW_P) && defined(USE___BUILTIN_MUL_OVERFLOW_LONG_LONG) # define MUL_OVERFLOW_LONG_LONG_P(a, b) MUL_OVERFLOW_P(a, b) +#else +# define MUL_OVERFLOW_LONG_LONG_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, LLONG_MIN, LLONG_MAX) +#endif + +#ifdef MUL_OVERFLOW_P # define MUL_OVERFLOW_LONG_P(a, b) MUL_OVERFLOW_P(a, b) # define MUL_OVERFLOW_INT_P(a, b) MUL_OVERFLOW_P(a, b) #else -# define MUL_OVERFLOW_LONG_LONG_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, LLONG_MIN, LLONG_MAX) # define MUL_OVERFLOW_LONG_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, LONG_MIN, LONG_MAX) # define MUL_OVERFLOW_INT_P(a, b) MUL_OVERFLOW_SIGNED_INTEGER_P(a, b, INT_MIN, INT_MAX) #endif +#endif + +#ifndef ADD_OVERFLOW_SIGNED_INTEGER_P +#if __has_builtin(__builtin_add_overflow_p) +# define ADD_OVERFLOW_P(a, b) \ + __builtin_add_overflow_p((a), (b), (__typeof__(a * b))0) +#elif __has_builtin(__builtin_add_overflow) +# define ADD_OVERFLOW_P(a, b) \ + __extension__ ({ __typeof__(a) c; __builtin_add_overflow((a), (b), &c); }) +#endif + +#define ADD_OVERFLOW_SIGNED_INTEGER_P(a, b, min, max) ( \ + (a) > 0 ? (b) > (max) - (a) : (b) < (min) - (a)) + +#if __has_builtin(__builtin_add_overflow_p) +/* __builtin_add_overflow_p can take bitfield */ +/* and GCC permits bitfields for integers other than int */ +# define ADD_OVERFLOW_FIXNUM_P(a, b) \ + __extension__ ({ \ + struct { long fixnum : sizeof(long) * CHAR_BIT - 1; } c = { 0 }; \ + __builtin_add_overflow_p((a), (b), c.fixnum); \ + }) +#else +# define ADD_OVERFLOW_FIXNUM_P(a, b) \ + ADD_OVERFLOW_SIGNED_INTEGER_P(a, b, FIXNUM_MIN, FIXNUM_MAX) +#endif + +#if defined(ADD_OVERFLOW_P) && defined(USE___BUILTIN_ADD_OVERFLOW_LONG_LONG) +# define ADD_OVERFLOW_LONG_LONG_P(a, b) ADD_OVERFLOW_P(a, b) +#else +# define ADD_OVERFLOW_LONG_LONG_P(a, b) ADD_OVERFLOW_SIGNED_INTEGER_P(a, b, LLONG_MIN, LLONG_MAX) +#endif + +#ifdef ADD_OVERFLOW_P +# define ADD_OVERFLOW_LONG_P(a, b) ADD_OVERFLOW_P(a, b) +# define ADD_OVERFLOW_INT_P(a, b) ADD_OVERFLOW_P(a, b) +#else +# define ADD_OVERFLOW_LONG_P(a, b) ADD_OVERFLOW_SIGNED_INTEGER_P(a, b, LONG_MIN, LONG_MAX) +# define ADD_OVERFLOW_INT_P(a, b) ADD_OVERFLOW_SIGNED_INTEGER_P(a, b, INT_MIN, INT_MAX) +#endif +#endif + +#ifndef SUB_OVERFLOW_SIGNED_INTEGER_P +#if __has_builtin(__builtin_sub_overflow_p) +# define SUB_OVERFLOW_P(a, b) \ + __builtin_sub_overflow_p((a), (b), (__typeof__(a * b))0) +#elif __has_builtin(__builtin_sub_overflow) +# define SUB_OVERFLOW_P(a, b) \ + __extension__ ({ __typeof__(a) c; __builtin_sub_overflow((a), (b), &c); }) +#endif + +#define SUB_OVERFLOW_SIGNED_INTEGER_P(a, b, min, max) ( \ + (b) > 0 ? (a) < (min) + (b) : (a) > (max) + (b)) + +#if __has_builtin(__builtin_sub_overflow_p) +/* __builtin_sub_overflow_p can take bitfield */ +/* and GCC permits bitfields for integers other than int */ +# define SUB_OVERFLOW_FIXNUM_P(a, b) \ + __extension__ ({ \ + struct { long fixnum : sizeof(long) * CHAR_BIT - 1; } c = { 0 }; \ + __builtin_sub_overflow_p((a), (b), c.fixnum); \ + }) +#else +# define SUB_OVERFLOW_FIXNUM_P(a, b) \ + SUB_OVERFLOW_SIGNED_INTEGER_P(a, b, FIXNUM_MIN, FIXNUM_MAX) +#endif + +#if defined(SUB_OVERFLOW_P) && defined(USE___BUILTIN_SUB_OVERFLOW_LONG_LONG) +# define SUB_OVERFLOW_LONG_LONG_P(a, b) SUB_OVERFLOW_P(a, b) +#else +# define SUB_OVERFLOW_LONG_LONG_P(a, b) SUB_OVERFLOW_SIGNED_INTEGER_P(a, b, LLONG_MIN, LLONG_MAX) +#endif + +#ifdef SUB_OVERFLOW_P +# define SUB_OVERFLOW_LONG_P(a, b) SUB_OVERFLOW_P(a, b) +# define SUB_OVERFLOW_INT_P(a, b) SUB_OVERFLOW_P(a, b) +#else +# define SUB_OVERFLOW_LONG_P(a, b) SUB_OVERFLOW_SIGNED_INTEGER_P(a, b, LONG_MIN, LONG_MAX) +# define SUB_OVERFLOW_INT_P(a, b) SUB_OVERFLOW_SIGNED_INTEGER_P(a, b, INT_MIN, INT_MAX) +#endif +#endif #ifdef HAVE_UINT128_T # define bit_length(x) \ @@ -394,9 +477,9 @@ rb_popcount32(uint32_t x) #else x = (x & 0x55555555) + (x >> 1 & 0x55555555); x = (x & 0x33333333) + (x >> 2 & 0x33333333); - x = (x & 0x0f0f0f0f) + (x >> 4 & 0x0f0f0f0f); - x = (x & 0x001f001f) + (x >> 8 & 0x001f001f); - x = (x & 0x0000003f) + (x >>16 & 0x0000003f); + x = (x & 0x07070707) + (x >> 4 & 0x07070707); + x = (x & 0x000f000f) + (x >> 8 & 0x000f000f); + x = (x & 0x0000001f) + (x >>16 & 0x0000001f); return (unsigned int)x; #endif @@ -424,9 +507,9 @@ rb_popcount64(uint64_t x) x = (x & 0x5555555555555555) + (x >> 1 & 0x5555555555555555); x = (x & 0x3333333333333333) + (x >> 2 & 0x3333333333333333); x = (x & 0x0707070707070707) + (x >> 4 & 0x0707070707070707); - x = (x & 0x001f001f001f001f) + (x >> 8 & 0x001f001f001f001f); - x = (x & 0x0000003f0000003f) + (x >>16 & 0x0000003f0000003f); - x = (x & 0x000000000000007f) + (x >>32 & 0x000000000000007f); + x = (x & 0x000f000f000f000f) + (x >> 8 & 0x000f000f000f000f); + x = (x & 0x0000001f0000001f) + (x >>16 & 0x0000001f0000001f); + x = (x & 0x000000000000003f) + (x >>32 & 0x000000000000003f); return (unsigned int)x; #endif diff --git a/string.c b/string.c index 52d1f28cc1443f..5b7169ab12ffaa 100644 --- a/string.c +++ b/string.c @@ -7576,10 +7576,6 @@ undump_after_backslash(VALUE undumped, const char **ss, const char *s_end, rb_en } break; case 'x': - if (*utf8) { - rb_raise(rb_eRuntimeError, "hex escape and Unicode escape are mixed"); - } - *binary = true; if (++s >= s_end) { rb_raise(rb_eRuntimeError, "invalid hex escape"); } @@ -7587,6 +7583,12 @@ undump_after_backslash(VALUE undumped, const char **ss, const char *s_end, rb_en if (hexlen != 2) { rb_raise(rb_eRuntimeError, "invalid hex escape"); } + if (!ISASCII(*buf)) { + if (*utf8) { + rb_raise(rb_eRuntimeError, "hex escape and Unicode escape are mixed"); + } + *binary = true; + } rb_str_cat(undumped, (char *)buf, 1); s += hexlen; break; @@ -10184,6 +10186,22 @@ rb_str_chomp(int argc, VALUE *argv, VALUE str) return rb_str_subseq(str, 0, chompped_length(str, rs)); } +static void +tr_setup_table_multi(char table[TR_TABLE_SIZE], VALUE *tablep, VALUE *ctablep, + VALUE str, int num_selectors, VALUE *selectors) +{ + int i; + + for (i=0; i= e) return 0; + + /* remove leading characters in the table */ + while (s < e) { + int n; + unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc); + + if (!tr_find(cc, table, del, nodel)) break; + s += n; + } + return s - start; +} + /* * call-seq: - * lstrip! -> self or nil + * lstrip!(*selectors) -> self or nil * * Like String#lstrip, except that: * @@ -10220,16 +10257,28 @@ lstrip_offset(VALUE str, const char *s, const char *e, rb_encoding *enc) */ static VALUE -rb_str_lstrip_bang(VALUE str) +rb_str_lstrip_bang(int argc, VALUE *argv, VALUE str) { rb_encoding *enc; char *start, *s; long olen, loffset; + rb_check_arity(argc, 0, UNLIMITED_ARGUMENTS); + str_modify_keep_cr(str); enc = STR_ENC_GET(str); RSTRING_GETMEM(str, start, olen); - loffset = lstrip_offset(str, start, start+olen, enc); + if (argc > 0) { + char table[TR_TABLE_SIZE]; + VALUE del = 0, nodel = 0; + + tr_setup_table_multi(table, &del, &nodel, str, argc, argv); + loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel); + } + else { + loffset = lstrip_offset(str, start, start+olen, enc); + } + if (loffset > 0) { long len = olen-loffset; s = start + loffset; @@ -10244,7 +10293,7 @@ rb_str_lstrip_bang(VALUE str) /* * call-seq: - * lstrip -> new_string + * lstrip(*selectors) -> new_string * * Returns a copy of +self+ with leading whitespace removed; * see {Whitespace in Strings}[rdoc-ref:String@Whitespace+in+Strings]: @@ -10255,16 +10304,39 @@ rb_str_lstrip_bang(VALUE str) * s.lstrip * # => "abc\u0000\t\n\v\f\r " * + * If +selectors+ are given, removes characters of +selectors+ from the beginning of +self+: + * + * s = "---abc+++" + * s.lstrip("-") # => "abc+++" + * + * +selectors+ must be valid character selectors (see {Character Selectors}[rdoc-ref:character_selectors.rdoc]), + * and may use any of its valid forms, including negation, ranges, and escapes: + * + * "01234abc56789".lstrip("0-9") # "abc56789" + * "01234abc56789".lstrip("0-9", "^4-6") # "4abc56789" + * * Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. */ static VALUE -rb_str_lstrip(VALUE str) +rb_str_lstrip(int argc, VALUE *argv, VALUE str) { char *start; long len, loffset; + + rb_check_arity(argc, 0, UNLIMITED_ARGUMENTS); + RSTRING_GETMEM(str, start, len); - loffset = lstrip_offset(str, start, start+len, STR_ENC_GET(str)); + if (argc > 0) { + char table[TR_TABLE_SIZE]; + VALUE del = 0, nodel = 0; + + tr_setup_table_multi(table, &del, &nodel, str, argc, argv); + loffset = lstrip_offset_table(str, start, start+len, STR_ENC_GET(str), table, del, nodel); + } + else { + loffset = lstrip_offset(str, start, start+len, STR_ENC_GET(str)); + } if (loffset <= 0) return str_duplicate(rb_cString, str); return rb_str_subseq(str, loffset, len - loffset); } @@ -10298,9 +10370,33 @@ rstrip_offset(VALUE str, const char *s, const char *e, rb_encoding *enc) return e - t; } +static long +rstrip_offset_table(VALUE str, const char *s, const char *e, rb_encoding *enc, + char table[TR_TABLE_SIZE], VALUE del, VALUE nodel) +{ + const char *t; + char *tp; + + rb_str_check_dummy_enc(enc); + if (rb_enc_str_coderange(str) == ENC_CODERANGE_BROKEN) { + rb_raise(rb_eEncCompatError, "invalid byte sequence in %s", rb_enc_name(enc)); + } + if (!s || s >= e) return 0; + t = e; + + /* remove trailing characters in the table */ + while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) { + unsigned int c = rb_enc_codepoint(tp, e, enc); + if (!tr_find(c, table, del, nodel)) break; + t = tp; + } + + return e - t; +} + /* * call-seq: - * rstrip! -> self or nil + * rstrip!(*selectors) -> self or nil * * Like String#rstrip, except that: * @@ -10311,16 +10407,27 @@ rstrip_offset(VALUE str, const char *s, const char *e, rb_encoding *enc) */ static VALUE -rb_str_rstrip_bang(VALUE str) +rb_str_rstrip_bang(int argc, VALUE *argv, VALUE str) { rb_encoding *enc; char *start; long olen, roffset; + rb_check_arity(argc, 0, UNLIMITED_ARGUMENTS); + str_modify_keep_cr(str); enc = STR_ENC_GET(str); RSTRING_GETMEM(str, start, olen); - roffset = rstrip_offset(str, start, start+olen, enc); + if (argc > 0) { + char table[TR_TABLE_SIZE]; + VALUE del = 0, nodel = 0; + + tr_setup_table_multi(table, &del, &nodel, str, argc, argv); + roffset = rstrip_offset_table(str, start, start+olen, enc, table, del, nodel); + } + else { + roffset = rstrip_offset(str, start, start+olen, enc); + } if (roffset > 0) { long len = olen - roffset; @@ -10334,7 +10441,7 @@ rb_str_rstrip_bang(VALUE str) /* * call-seq: - * rstrip -> new_string + * rstrip(*selectors) -> new_string * * Returns a copy of +self+ with trailing whitespace removed; * see {Whitespace in Strings}[rdoc-ref:String@Whitespace+in+Strings]: @@ -10344,20 +10451,41 @@ rb_str_rstrip_bang(VALUE str) * s # => "\u0000\t\n\v\f\r abc\u0000\t\n\v\f\r " * s.rstrip # => "\u0000\t\n\v\f\r abc" * + * If +selectors+ are given, removes characters of +selectors+ from the end of +self+: + * + * s = "---abc+++" + * s.rstrip("+") # => "---abc" + * + * +selectors+ must be valid character selectors (see {Character Selectors}[rdoc-ref:character_selectors.rdoc]), + * and may use any of its valid forms, including negation, ranges, and escapes: + * + * "01234abc56789".rstrip("0-9") # "01234abc" + * "01234abc56789".rstrip("0-9", "^4-6") # "01234abc56" + * * Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. */ static VALUE -rb_str_rstrip(VALUE str) +rb_str_rstrip(int argc, VALUE *argv, VALUE str) { rb_encoding *enc; char *start; long olen, roffset; + rb_check_arity(argc, 0, UNLIMITED_ARGUMENTS); + enc = STR_ENC_GET(str); RSTRING_GETMEM(str, start, olen); - roffset = rstrip_offset(str, start, start+olen, enc); + if (argc > 0) { + char table[TR_TABLE_SIZE]; + VALUE del = 0, nodel = 0; + tr_setup_table_multi(table, &del, &nodel, str, argc, argv); + roffset = rstrip_offset_table(str, start, start+olen, enc, table, del, nodel); + } + else { + roffset = rstrip_offset(str, start, start+olen, enc); + } if (roffset <= 0) return str_duplicate(rb_cString, str); return rb_str_subseq(str, 0, olen-roffset); } @@ -10365,7 +10493,7 @@ rb_str_rstrip(VALUE str) /* * call-seq: - * strip! -> self or nil + * strip!(*selectors) -> self or nil * * Like String#strip, except that: * @@ -10376,17 +10504,30 @@ rb_str_rstrip(VALUE str) */ static VALUE -rb_str_strip_bang(VALUE str) +rb_str_strip_bang(int argc, VALUE *argv, VALUE str) { char *start; long olen, loffset, roffset; rb_encoding *enc; + rb_check_arity(argc, 0, UNLIMITED_ARGUMENTS); + str_modify_keep_cr(str); enc = STR_ENC_GET(str); RSTRING_GETMEM(str, start, olen); - loffset = lstrip_offset(str, start, start+olen, enc); - roffset = rstrip_offset(str, start+loffset, start+olen, enc); + + if (argc > 0) { + char table[TR_TABLE_SIZE]; + VALUE del = 0, nodel = 0; + + tr_setup_table_multi(table, &del, &nodel, str, argc, argv); + loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel); + roffset = rstrip_offset_table(str, start+loffset, start+olen, enc, table, del, nodel); + } + else { + loffset = lstrip_offset(str, start, start+olen, enc); + roffset = rstrip_offset(str, start+loffset, start+olen, enc); + } if (loffset > 0 || roffset > 0) { long len = olen-roffset; @@ -10404,7 +10545,7 @@ rb_str_strip_bang(VALUE str) /* * call-seq: - * strip -> new_string + * strip(*selectors) -> new_string * * Returns a copy of +self+ with leading and trailing whitespace removed; * see {Whitespace in Strings}[rdoc-ref:String@Whitespace+in+Strings]: @@ -10414,19 +10555,44 @@ rb_str_strip_bang(VALUE str) * # => "\u0000\t\n\v\f\r abc\u0000\t\n\v\f\r " * s.strip # => "abc" * + * If +selectors+ are given, removes characters of +selectors+ from both ends of +self+: + * + * s = "---abc+++" + * s.strip("-+") # => "abc" + * s.strip("+-") # => "abc" + * + * +selectors+ must be valid character selectors (see {Character Selectors}[rdoc-ref:character_selectors.rdoc]), + * and may use any of its valid forms, including negation, ranges, and escapes: + * + * "01234abc56789".strip("0-9") # "abc" + * "01234abc56789".strip("0-9", "^4-6") # "4abc56" + * * Related: see {Converting to New String}[rdoc-ref:String@Converting+to+New+String]. */ static VALUE -rb_str_strip(VALUE str) +rb_str_strip(int argc, VALUE *argv, VALUE str) { char *start; long olen, loffset, roffset; rb_encoding *enc = STR_ENC_GET(str); + rb_check_arity(argc, 0, UNLIMITED_ARGUMENTS); + RSTRING_GETMEM(str, start, olen); - loffset = lstrip_offset(str, start, start+olen, enc); - roffset = rstrip_offset(str, start+loffset, start+olen, enc); + + if (argc > 0) { + char table[TR_TABLE_SIZE]; + VALUE del = 0, nodel = 0; + + tr_setup_table_multi(table, &del, &nodel, str, argc, argv); + loffset = lstrip_offset_table(str, start, start+olen, enc, table, del, nodel); + roffset = rstrip_offset_table(str, start+loffset, start+olen, enc, table, del, nodel); + } + else { + loffset = lstrip_offset(str, start, start+olen, enc); + roffset = rstrip_offset(str, start+loffset, start+olen, enc); + } if (loffset <= 0 && roffset <= 0) return str_duplicate(rb_cString, str); return rb_str_subseq(str, loffset, olen-loffset-roffset); @@ -12714,9 +12880,9 @@ Init_String(void) rb_define_method(rb_cString, "gsub", rb_str_gsub, -1); rb_define_method(rb_cString, "chop", rb_str_chop, 0); rb_define_method(rb_cString, "chomp", rb_str_chomp, -1); - rb_define_method(rb_cString, "strip", rb_str_strip, 0); - rb_define_method(rb_cString, "lstrip", rb_str_lstrip, 0); - rb_define_method(rb_cString, "rstrip", rb_str_rstrip, 0); + rb_define_method(rb_cString, "strip", rb_str_strip, -1); + rb_define_method(rb_cString, "lstrip", rb_str_lstrip, -1); + rb_define_method(rb_cString, "rstrip", rb_str_rstrip, -1); rb_define_method(rb_cString, "delete_prefix", rb_str_delete_prefix, 1); rb_define_method(rb_cString, "delete_suffix", rb_str_delete_suffix, 1); @@ -12724,9 +12890,9 @@ Init_String(void) rb_define_method(rb_cString, "gsub!", rb_str_gsub_bang, -1); rb_define_method(rb_cString, "chop!", rb_str_chop_bang, 0); rb_define_method(rb_cString, "chomp!", rb_str_chomp_bang, -1); - rb_define_method(rb_cString, "strip!", rb_str_strip_bang, 0); - rb_define_method(rb_cString, "lstrip!", rb_str_lstrip_bang, 0); - rb_define_method(rb_cString, "rstrip!", rb_str_rstrip_bang, 0); + rb_define_method(rb_cString, "strip!", rb_str_strip_bang, -1); + rb_define_method(rb_cString, "lstrip!", rb_str_lstrip_bang, -1); + rb_define_method(rb_cString, "rstrip!", rb_str_rstrip_bang, -1); rb_define_method(rb_cString, "delete_prefix!", rb_str_delete_prefix_bang, 1); rb_define_method(rb_cString, "delete_suffix!", rb_str_delete_suffix_bang, 1); diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index 3f0fb7522dc671..d29f8077b1d138 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -172,6 +172,12 @@ def test_parse_control_chars_in_string end end + def test_parse_allowed_control_chars_in_string + 0.upto(31) do |ord| + assert_equal ord.chr, parse(%("#{ord.chr}"), allow_control_characters: true) + end + end + def test_parse_arrays assert_equal([1,2,3], parse('[1,2,3]')) assert_equal([1.2,2,3], parse('[1.2,2,3]')) diff --git a/test/ripper/test_parser_events.rb b/test/ripper/test_parser_events.rb index aa7434c083f2a2..3e72c7a331530a 100644 --- a/test/ripper/test_parser_events.rb +++ b/test/ripper/test_parser_events.rb @@ -480,6 +480,13 @@ def test_call assert_equal true, thru_call assert_equal "[call(vcall(foo),.,call,[])]", tree + thru_call = false + assert_nothing_raised { + tree = parse("a b do end.()", :on_call) {thru_call = true} + } + assert_equal true, thru_call + assert_equal "[call(command(a,[vcall(b)],&do_block(,bodystmt([void()]))),.,call,[])]", tree + thru_call = false assert_nothing_raised { tree = parse("self::foo", :on_call) {thru_call = true} diff --git a/test/ruby/test_parse.rb b/test/ruby/test_parse.rb index 78a5638647af67..9fa4dad41e2a86 100644 --- a/test/ruby/test_parse.rb +++ b/test/ruby/test_parse.rb @@ -352,6 +352,21 @@ def test_call_method assert_equal("foobar", b) end + def test_call_command + a = b = nil + o = Object.new + def o.m(*arg); proc {|a| arg.join + a }; end + + assert_nothing_raised do + o.instance_eval <<-END, __FILE__, __LINE__+1 + a = o.m "foo", "bar" do end.("buz") + b = o.m "foo", "bar" do end::("buz") + END + end + assert_equal("foobarbuz", a) + assert_equal("foobarbuz", b) + end + def test_xstring assert_raise(Errno::ENOENT) do eval("``") diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index 1fe0629331ec30..2458d38ef4b80b 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -872,6 +872,10 @@ def test_undump assert_equal('\#', S('"\\\\#"').undump) assert_equal('\#{', S('"\\\\\#{"').undump) + assert_undump("\0\u{ABCD}") + assert_undump(S('"\x00\u3042"'.force_encoding("SJIS"))) + assert_undump(S('"\u3042\x7E"'.force_encoding("SJIS"))) + assert_raise(RuntimeError) { S('\u3042').undump } assert_raise(RuntimeError) { S('"\x82\xA0\u3042"'.force_encoding("SJIS")).undump } assert_raise(RuntimeError) { S('"\u3042\x82\xA0"'.force_encoding("SJIS")).undump } @@ -2049,6 +2053,117 @@ def test_strip! assert_equal(S("x") ,a) end + def test_strip_with_selectors + assert_equal(S("abc"), S("---abc+++").strip("-+")) + assert_equal(S("abc"), S("+++abc---").strip("-+")) + assert_equal(S("abc"), S("+-+abc-+-").strip("-+")) + assert_equal(S(""), S("---+++").strip("-+")) + assert_equal(S("abc "), S("---abc ").strip("-")) + assert_equal(S(" abc"), S(" abc+++").strip("+")) + + # Test with multibyte characters + assert_equal(S("abc"), S("あああabcいいい").strip("あい")) + assert_equal(S("abc"), S("いいいabcあああ").strip("あい")) + + # Test with NUL characters + assert_equal(S("abc\0"), S("---abc\0--").strip("-")) + assert_equal(S("\0abc"), S("--\0abc---").strip("-")) + + # Test without modification + assert_equal(S("abc"), S("abc").strip("-+")) + assert_equal(S("abc"), S("abc").strip("")) + + # Test with range + assert_equal(S("abc"), S("012abc345").strip("0-9")) + assert_equal(S("abc"), S("012abc345").strip("^a-z")) + + # Test with multiple selectors + assert_equal(S("4abc56"), S("01234abc56789").strip("0-9", "^4-6")) + end + + def test_strip_bang_with_chars + a = S("---abc+++") + assert_equal(S("abc"), a.strip!("-+")) + assert_equal(S("abc"), a) + + a = S("+++abc---") + assert_equal(S("abc"), a.strip!("-+")) + assert_equal(S("abc"), a) + + a = S("abc") + assert_nil(a.strip!("-+")) + assert_equal(S("abc"), a) + + # Test with multibyte characters + a = S("あああabcいいい") + assert_equal(S("abc"), a.strip!("あい")) + assert_equal(S("abc"), a) + end + + def test_lstrip_with_selectors + assert_equal(S("abc+++"), S("---abc+++").lstrip("-")) + assert_equal(S("abc---"), S("+++abc---").lstrip("+")) + assert_equal(S("abc"), S("---abc").lstrip("-")) + assert_equal(S(""), S("---").lstrip("-")) + + # Test with multibyte characters + assert_equal(S("abcいいい"), S("あああabcいいい").lstrip("あ")) + + # Test with NUL characters + assert_equal(S("\0abc+++"), S("--\0abc+++").lstrip("-")) + + # Test without modification + assert_equal(S("abc"), S("abc").lstrip("-")) + + # Test with range + assert_equal(S("abc345"), S("012abc345").lstrip("0-9")) + + # Test with multiple selectors + assert_equal(S("4abc56789"), S("01234abc56789").lstrip("0-9", "^4-6")) + end + + def test_lstrip_bang_with_chars + a = S("---abc+++") + assert_equal(S("abc+++"), a.lstrip!("-")) + assert_equal(S("abc+++"), a) + + a = S("abc") + assert_nil(a.lstrip!("-")) + assert_equal(S("abc"), a) + end + + def test_rstrip_with_selectors + assert_equal(S("---abc"), S("---abc+++").rstrip("+")) + assert_equal(S("+++abc"), S("+++abc---").rstrip("-")) + assert_equal(S("abc"), S("abc+++").rstrip("+")) + assert_equal(S(""), S("+++").rstrip("+")) + + # Test with multibyte characters + assert_equal(S("あああabc"), S("あああabcいいい").rstrip("い")) + + # Test with NUL characters + assert_equal(S("---abc\0"), S("---abc\0++").rstrip("+")) + + # Test without modification + assert_equal(S("abc"), S("abc").rstrip("-")) + + # Test with range + assert_equal(S("012abc"), S("012abc345").rstrip("0-9")) + + # Test with multiple selectors + assert_equal(S("01234abc56"), S("01234abc56789").rstrip("0-9", "^4-6")) + end + + def test_rstrip_bang_with_chars + a = S("---abc+++") + assert_equal(S("---abc"), a.rstrip!("+")) + assert_equal(S("---abc"), a) + + a = S("abc") + assert_nil(a.rstrip!("+")) + assert_equal(S("abc"), a) + end + def test_sub assert_equal(S("h*llo"), S("hello").sub(/[aeiou]/, S('*'))) assert_equal(S("hllo"), S("hello").sub(/([aeiou])/, S('<\1>'))) @@ -3916,6 +4031,10 @@ def assert_byteindex(expected, string, match, *rest) def assert_byterindex(expected, string, match, *rest) assert_index_like(:byterindex, expected, string, match, *rest) end + + def assert_undump(str, *rest) + assert_equal(str, str.dump.undump, *rest) + end end class TestString2 < TestString diff --git a/tool/rbinstall.rb b/tool/rbinstall.rb index 936ef8242d7ecb..874c3ef1d9af91 100755 --- a/tool/rbinstall.rb +++ b/tool/rbinstall.rb @@ -870,16 +870,25 @@ def load_gemspec(file, base = nil, files: nil) code = File.read(file, encoding: "utf-8:-") code.gsub!(/^ *#.*/, "") - files = files ? files.map(&:dump).join(", ") : "" + spec_files = files ? files.map(&:dump).join(", ") : "" code.gsub!(/(?:`git[^\`]*`|%x\[git[^\]]*\])\.split(\([^\)]*\))?/m) do - "[" + files + "]" + "[" + spec_files + "]" end \ or code.gsub!(/IO\.popen\(.*git.*?\)/) do - "[" + files + "] || itself" + "[" + spec_files + "] || itself" end spec = eval(code, binding, file) + # for out-of-place build + collected_files = files ? spec.files.concat(files).uniq : spec.files + spec.files = collected_files.map do |f| + if !File.exist?(File.join(base || ".", f)) && f.end_with?(".rb") + "lib/#{f}" + else + f + end + end unless Gem::Specification === spec raise TypeError, "[#{file}] isn't a Gem::Specification (#{spec.class} instead)." end