diff --git a/Grammar/python.gram b/Grammar/python.gram index 7ae00c6f005e7e..110136af81b596 100644 --- a/Grammar/python.gram +++ b/Grammar/python.gram @@ -1251,8 +1251,7 @@ invalid_expression: # !(NAME STRING) is not matched so we don't show this error with some invalid string prefixes like: kf"dsfsdf" # Soft keywords need to also be ignored because they can be parsed as NAME NAME | !(NAME STRING | SOFT_KEYWORD) a=disjunction b=expression_without_invalid { - _PyPegen_check_legacy_stmt(p, a) ? NULL : p->tokens[p->mark-1]->level == 0 ? NULL : - RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Perhaps you forgot a comma?") } + _PyPegen_raise_error_for_missing_comma(p, a, b) } | a=disjunction 'if' b=disjunction !('else'|':') { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "expected 'else' after 'if' expression") } | a=disjunction 'if' b=disjunction 'else' !expression { RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN("expected expression after 'else', but statement is given") } diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index ade8f273a1e088..93f0b98de71d81 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -3336,6 +3336,20 @@ def test_multiline_compiler_error_points_to_the_end(self): lineno=3 ) + def test_multiline_string_concat_missing_comma_points_to_last_string(self): + # gh-142236: For multi-line string concatenations with a missing comma, + # the error should point to the last string, not the first. + self._check_error( + "print(\n" + ' "line1"\n' + ' "line2"\n' + ' "line3"\n' + " x=1\n" + ")", + "Perhaps you forgot a comma", + lineno=4, # Points to "line3", the last string + ) + @support.cpython_only def test_syntax_error_on_deeply_nested_blocks(self): # This raises a SyntaxError, it used to raise a SystemError. Context diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-12-06-00-38-37.gh-issue-142236.m3EF9E.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-06-00-38-37.gh-issue-142236.m3EF9E.rst new file mode 100644 index 00000000000000..a8d37b49de7160 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-12-06-00-38-37.gh-issue-142236.m3EF9E.rst @@ -0,0 +1,3 @@ +Improve the "Perhaps you forgot a comma?" syntax error for multi-line string +concatenations to point to the last string instead of the first, making it +easier to locate where the comma is missing. Patch by Pablo Galindo. diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index b7a5b9d5e307b1..50856686335a14 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -947,6 +947,35 @@ _PyPegen_check_legacy_stmt(Parser *p, expr_ty name) { return 0; } +void * +_PyPegen_raise_error_for_missing_comma(Parser *p, expr_ty a, expr_ty b) +{ + // Don't raise for legacy statements like "print x" or "exec x" + if (_PyPegen_check_legacy_stmt(p, a)) { + return NULL; + } + // Only raise inside parentheses/brackets (level > 0) + if (p->tokens[p->mark - 1]->level == 0) { + return NULL; + } + // For multi-line expressions (like string concatenations), point to the + // last line instead of the first for a more helpful error message. + // Use a->col_offset as the starting column since all strings in the + // concatenation typically share the same indentation. + if (a->end_lineno > a->lineno) { + return RAISE_ERROR_KNOWN_LOCATION( + p, PyExc_SyntaxError, a->end_lineno, a->col_offset, + a->end_lineno, a->end_col_offset, + "invalid syntax. Perhaps you forgot a comma?" + ); + } + return RAISE_ERROR_KNOWN_LOCATION( + p, PyExc_SyntaxError, a->lineno, a->col_offset, + b->end_lineno, b->end_col_offset, + "invalid syntax. Perhaps you forgot a comma?" + ); +} + static ResultTokenWithMetadata * result_token_with_metadata(Parser *p, void *result, PyObject *metadata) { diff --git a/Parser/parser.c b/Parser/parser.c index 648b3702d8ff71..09bfb5725a2ec3 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -21445,7 +21445,7 @@ invalid_expression_rule(Parser *p) ) { D(fprintf(stderr, "%*c+ invalid_expression[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "!(NAME STRING | SOFT_KEYWORD) disjunction expression_without_invalid")); - _res = _PyPegen_check_legacy_stmt ( p , a ) ? NULL : p -> tokens [p -> mark - 1] -> level == 0 ? NULL : RAISE_SYNTAX_ERROR_KNOWN_RANGE ( a , b , "invalid syntax. Perhaps you forgot a comma?" ); + _res = _PyPegen_raise_error_for_missing_comma ( p , a , b ); if (_res == NULL && PyErr_Occurred()) { p->error_indicator = 1; p->level--; diff --git a/Parser/pegen.h b/Parser/pegen.h index b8f887608b104e..be5333eb2684ae 100644 --- a/Parser/pegen.h +++ b/Parser/pegen.h @@ -358,6 +358,7 @@ expr_ty _PyPegen_ensure_real(Parser *p, expr_ty); asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *); int _PyPegen_check_barry_as_flufl(Parser *, Token *); int _PyPegen_check_legacy_stmt(Parser *p, expr_ty t); +void *_PyPegen_raise_error_for_missing_comma(Parser *p, expr_ty a, expr_ty b); ResultTokenWithMetadata *_PyPegen_check_fstring_conversion(Parser *p, Token *, expr_ty t); ResultTokenWithMetadata *_PyPegen_setup_full_format_spec(Parser *, Token *, asdl_expr_seq *, int, int, int, int, PyArena *);