Skip to content

Commit 00062fb

Browse files
committed
Detect incorrectly delimited strings
Strings should be delimited on both ends by double quotes, but this is currently not being been detected, and content is simply being incorrectly trimmed. This commit adds a check for each string to verify it starts and ends with a double quote character, issuing a warning/error if that's not the case (and fixing it as appropriate). A few new test cases have been added to check that the lack of double quotes to delimit strings issues errors as expected. Signed-off-by: Rodrigo Tobar <rtobar@icrar.org>
1 parent 5f09192 commit 00062fb

File tree

2 files changed

+51
-19
lines changed

2 files changed

+51
-19
lines changed

babel/messages/pofile.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,18 @@ def _process_keyword_line(self, lineno, line, obsolete=False) -> None:
278278

279279
self.obsolete = obsolete
280280

281+
def _normalized_string(s):
282+
# whole lines are already stripped on both ends
283+
s = s.lstrip()
284+
assert s, "string to normalize shouldn't be empty"
285+
if s[0] != '"':
286+
self._invalid_pofile(line, lineno, "String must be delimited on the left by double quotes")
287+
s = '"' + s
288+
if s[-1] != '"':
289+
self._invalid_pofile(line, lineno, "String must be delimited on the right by double quotes")
290+
s += '"'
291+
return _NormalizedString(s)
292+
281293
# The line that has the msgid is stored as the offset of the msg
282294
# should this be the msgctxt if it has one?
283295
if keyword == 'msgid':
@@ -286,7 +298,7 @@ def _process_keyword_line(self, lineno, line, obsolete=False) -> None:
286298
if keyword in ['msgid', 'msgid_plural']:
287299
self.in_msgctxt = False
288300
self.in_msgid = True
289-
self.messages.append(_NormalizedString(arg))
301+
self.messages.append(_normalized_string(arg))
290302

291303
elif keyword == 'msgstr':
292304
self.in_msgid = False
@@ -306,13 +318,13 @@ def _process_keyword_line(self, lineno, line, obsolete=False) -> None:
306318
if len(msg) < 2:
307319
self._invalid_pofile(line, lineno, "msgstr plural doesn't have a message")
308320
msg = '""'
309-
self.translations.append([idx, _NormalizedString(msg)])
321+
self.translations.append([idx, _normalized_string(msg)])
310322
else:
311-
self.translations.append([0, _NormalizedString(arg)])
323+
self.translations.append([0, _normalized_string(arg)])
312324

313325
elif keyword == 'msgctxt':
314326
self.in_msgctxt = True
315-
self.context = _NormalizedString(arg)
327+
self.context = _normalized_string(arg)
316328

317329
def _process_string_continuation_line(self, line, lineno) -> None:
318330
if self.in_msgid:
@@ -324,6 +336,10 @@ def _process_string_continuation_line(self, line, lineno) -> None:
324336
else:
325337
self._invalid_pofile(line, lineno, "Got line starting with \" but not in msgid, msgstr or msgctxt")
326338
return
339+
assert line[0] == '"'
340+
if line[-1] != '"':
341+
self._invalid_pofile(line, lineno, "Continuation string must end with double quotes")
342+
line += '"'
327343
s.append(line)
328344

329345
def _process_comment(self, line) -> None:

tests/messages/test_pofile.py

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -505,6 +505,7 @@ def test_with_location(self):
505505

506506

507507
def test_abort_invalid_po_file(self):
508+
# right double quote missing on msgstr, continuation missing both double quotes
508509
invalid_po = '''
509510
msgctxt ""
510511
"{\"checksum\": 2148532640, \"cxt\": \"collector_thankyou\", \"id\": "
@@ -517,6 +518,7 @@ def test_abort_invalid_po_file(self):
517518
Pour toute question, veuillez communiquer avec Fulano à nadie@blah.com
518519
"
519520
'''
521+
# as above, but contains ascii-only characters
520522
invalid_po_2 = '''
521523
msgctxt ""
522524
"{\"checksum\": 2148532640, \"cxt\": \"collector_thankyou\", \"id\": "
@@ -529,20 +531,29 @@ def test_abort_invalid_po_file(self):
529531
Pour toute question, veuillez communiquer avec Fulano a fulano@blah.com
530532
"
531533
'''
532-
# Catalog not created, throws Unicode Error
533-
buf = StringIO(invalid_po)
534-
output = pofile.read_po(buf, locale='fr', abort_invalid=False)
535-
assert isinstance(output, Catalog)
536-
537-
# Catalog not created, throws PoFileError
538-
buf = StringIO(invalid_po_2)
539-
with pytest.raises(pofile.PoFileError):
540-
pofile.read_po(buf, locale='fr', abort_invalid=True)
541-
542-
# Catalog is created with warning, no abort
543-
buf = StringIO(invalid_po_2)
544-
output = pofile.read_po(buf, locale='fr', abort_invalid=False)
545-
assert isinstance(output, Catalog)
534+
# left double quote missing in msgid
535+
invalid_po_3 = '''
536+
msgid *A"
537+
msgstr "B"
538+
'''
539+
# right double quote missing in msgstr
540+
invalid_po_4 = '''
541+
msgid "A"
542+
msgstr "B*
543+
'''
544+
# right double quote missing in msgstr continuation
545+
invalid_po_5 = '''
546+
msgid "A"
547+
msgstr ""
548+
"*
549+
'''
550+
for incorrectly_delimited_po_string in (invalid_po, invalid_po_2, invalid_po_3, invalid_po_4, invalid_po_5):
551+
buf = StringIO(incorrectly_delimited_po_string)
552+
with pytest.raises(pofile.PoFileError):
553+
pofile.read_po(buf, locale='fr', abort_invalid=True)
554+
buf.seek(0)
555+
output = pofile.read_po(buf, locale='fr', abort_invalid=False)
556+
assert isinstance(output, Catalog)
546557

547558
def test_invalid_msgstr_plural(self):
548559
# msgstr plural broken
@@ -563,7 +574,12 @@ def test_invalid_msgstr_plural(self):
563574
msgid "A"
564575
msgstr[0]
565576
'''
566-
for incorrectly_plural in (invalid_1, invalid_2, invalid_3, invalid_4):
577+
# not correctly delimited
578+
invalid_5 = '''
579+
msgid "A"
580+
msgstr[0] not delimited
581+
'''
582+
for incorrectly_plural in (invalid_1, invalid_2, invalid_3, invalid_4, invalid_5):
567583
buf = StringIO(incorrectly_plural)
568584
with pytest.raises(pofile.PoFileError):
569585
pofile.read_po(buf, locale='fr', abort_invalid=True)

0 commit comments

Comments
 (0)