1717
1818from babel .core import Locale
1919from babel .messages .catalog import Catalog , Message
20- from babel .util import TextWrapper , _cmp
20+ from babel .util import TextWrapper
2121
2222if TYPE_CHECKING :
2323 from typing import IO , AnyStr
2424
2525 from _typeshed import SupportsWrite
2626
2727
28+ _unescape_re = re .compile (r'\\([\\trn"])' )
29+
30+
2831def unescape (string : str ) -> str :
2932 r"""Reverse `escape` the given string.
3033
@@ -45,7 +48,10 @@ def replace_escapes(match):
4548 return '\r '
4649 # m is \ or "
4750 return m
48- return re .compile (r'\\([\\trn"])' ).sub (replace_escapes , string [1 :- 1 ])
51+
52+ if "\\ " not in string : # Fast path: there's nothing to unescape
53+ return string [1 :- 1 ]
54+ return _unescape_re .sub (replace_escapes , string [1 :- 1 ])
4955
5056
5157def denormalize (string : str ) -> str :
@@ -73,7 +79,7 @@ def denormalize(string: str) -> str:
7379 escaped_lines = string .splitlines ()
7480 if string .startswith ('""' ):
7581 escaped_lines = escaped_lines [1 :]
76- return '' .join (unescape ( line ) for line in escaped_lines )
82+ return '' .join (map ( unescape , escaped_lines ) )
7783 else :
7884 return unescape (string )
7985
@@ -132,48 +138,14 @@ def __init__(self, message: str, catalog: Catalog, line: str, lineno: int) -> No
132138 self .lineno = lineno
133139
134140
135- class _NormalizedString :
136-
141+ class _NormalizedString (list ):
137142 def __init__ (self , * args : str ) -> None :
138- self ._strs : list [str ] = []
139- for arg in args :
140- self .append (arg )
141-
142- def append (self , s : str ) -> None :
143- self ._strs .append (s .strip ())
143+ super ().__init__ (map (str .strip , args ))
144144
145145 def denormalize (self ) -> str :
146- return '' .join (unescape (s ) for s in self ._strs )
147-
148- def __bool__ (self ) -> bool :
149- return bool (self ._strs )
150-
151- def __repr__ (self ) -> str :
152- return os .linesep .join (self ._strs )
153-
154- def __cmp__ (self , other : object ) -> int :
155- if not other :
156- return 1
157-
158- return _cmp (str (self ), str (other ))
159-
160- def __gt__ (self , other : object ) -> bool :
161- return self .__cmp__ (other ) > 0
162-
163- def __lt__ (self , other : object ) -> bool :
164- return self .__cmp__ (other ) < 0
165-
166- def __ge__ (self , other : object ) -> bool :
167- return self .__cmp__ (other ) >= 0
168-
169- def __le__ (self , other : object ) -> bool :
170- return self .__cmp__ (other ) <= 0
171-
172- def __eq__ (self , other : object ) -> bool :
173- return self .__cmp__ (other ) == 0
174-
175- def __ne__ (self , other : object ) -> bool :
176- return self .__cmp__ (other ) != 0
146+ if not self :
147+ return ""
148+ return '' .join (map (unescape , self ))
177149
178150
179151class PoFileParser :
@@ -183,13 +155,6 @@ class PoFileParser:
183155 See `read_po` for simple cases.
184156 """
185157
186- _keywords = [
187- 'msgid' ,
188- 'msgstr' ,
189- 'msgctxt' ,
190- 'msgid_plural' ,
191- ]
192-
193158 def __init__ (self , catalog : Catalog , ignore_obsolete : bool = False , abort_invalid : bool = False ) -> None :
194159 self .catalog = catalog
195160 self .ignore_obsolete = ignore_obsolete
@@ -216,23 +181,20 @@ def _add_message(self) -> None:
216181 Add a message to the catalog based on the current parser state and
217182 clear the state ready to process the next message.
218183 """
219- self .translations .sort ()
220184 if len (self .messages ) > 1 :
221185 msgid = tuple (m .denormalize () for m in self .messages )
222- else :
223- msgid = self .messages [0 ].denormalize ()
224- if isinstance (msgid , (list , tuple )):
225186 string = ['' for _ in range (self .catalog .num_plurals )]
226- for idx , translation in self .translations :
187+ for idx , translation in sorted ( self .translations ) :
227188 if idx >= self .catalog .num_plurals :
228189 self ._invalid_pofile ("" , self .offset , "msg has more translations than num_plurals of catalog" )
229190 continue
230191 string [idx ] = translation .denormalize ()
231192 string = tuple (string )
232193 else :
194+ msgid = self .messages [0 ].denormalize ()
233195 string = self .translations [0 ][1 ].denormalize ()
234196 msgctxt = self .context .denormalize () if self .context else None
235- message = Message (msgid , string , list ( self .locations ), set ( self .flags ) ,
197+ message = Message (msgid , string , self .locations , self .flags ,
236198 self .auto_comments , self .user_comments , lineno = self .offset + 1 ,
237199 context = msgctxt )
238200 if self .obsolete :
@@ -247,27 +209,19 @@ def _finish_current_message(self) -> None:
247209 if self .messages :
248210 if not self .translations :
249211 self ._invalid_pofile ("" , self .offset , f"missing msgstr for msgid '{ self .messages [0 ].denormalize ()} '" )
250- self .translations .append ([0 , _NormalizedString ("" )])
212+ self .translations .append ([0 , _NormalizedString ()])
251213 self ._add_message ()
252214
253215 def _process_message_line (self , lineno , line , obsolete = False ) -> None :
254- if line .startswith ('"' ):
216+ if not line :
217+ return
218+ if line [0 ] == '"' :
255219 self ._process_string_continuation_line (line , lineno )
256220 else :
257221 self ._process_keyword_line (lineno , line , obsolete )
258222
259223 def _process_keyword_line (self , lineno , line , obsolete = False ) -> None :
260-
261- for keyword in self ._keywords :
262- try :
263- if line .startswith (keyword ) and line [len (keyword )] in [' ' , '[' ]:
264- arg = line [len (keyword ):]
265- break
266- except IndexError :
267- self ._invalid_pofile (line , lineno , "Keyword must be followed by a string" )
268- else :
269- self ._invalid_pofile (line , lineno , "Start of line didn't match any expected keyword." )
270- return
224+ keyword , _ , arg = line .partition (' ' )
271225
272226 if keyword in ['msgid' , 'msgctxt' ]:
273227 self ._finish_current_message ()
@@ -283,19 +237,23 @@ def _process_keyword_line(self, lineno, line, obsolete=False) -> None:
283237 self .in_msgctxt = False
284238 self .in_msgid = True
285239 self .messages .append (_NormalizedString (arg ))
240+ return
286241
287- elif keyword == 'msgstr' :
242+ if keyword == 'msgctxt' :
243+ self .in_msgctxt = True
244+ self .context = _NormalizedString (arg )
245+ return
246+
247+ if keyword == 'msgstr' or keyword .startswith ('msgstr[' ):
288248 self .in_msgid = False
289249 self .in_msgstr = True
290- if arg . startswith ('[' ):
291- idx , msg = arg [ 1 :]. split ( ']' , 1 )
292- self . translations . append ([ int ( idx ), _NormalizedString (msg )] )
293- else :
294- self . translations . append ([ 0 , _NormalizedString ( arg )])
250+ kwarg , has_bracket , idxarg = keyword . partition ('[' )
251+ idx = int ( idxarg [: - 1 ]) if has_bracket else 0
252+ s = _NormalizedString ( arg ) if arg != '""' else _NormalizedString ()
253+ self . translations . append ([ idx , s ])
254+ return
295255
296- elif keyword == 'msgctxt' :
297- self .in_msgctxt = True
298- self .context = _NormalizedString (arg )
256+ self ._invalid_pofile (line , lineno , "Unknown or misformatted keyword" )
299257
300258 def _process_string_continuation_line (self , line , lineno ) -> None :
301259 if self .in_msgid :
@@ -307,49 +265,62 @@ def _process_string_continuation_line(self, line, lineno) -> None:
307265 else :
308266 self ._invalid_pofile (line , lineno , "Got line starting with \" but not in msgid, msgstr or msgctxt" )
309267 return
310- s .append (line )
268+ s .append (line . strip ()) # For performance reasons, `NormalizedString` doesn't strip internally
311269
312270 def _process_comment (self , line ) -> None :
313271
314272 self ._finish_current_message ()
315273
316- if line [1 :].startswith (':' ):
274+ prefix = line [:2 ]
275+ if prefix == '#:' :
317276 for location in _extract_locations (line [2 :]):
318- pos = location .rfind (':' )
319- if pos >= 0 :
277+ a , colon , b = location .rpartition (':' )
278+ if colon :
320279 try :
321- lineno = int (location [ pos + 1 :] )
280+ self . locations . append (( a , int (b )) )
322281 except ValueError :
323282 continue
324- self .locations .append ((location [:pos ], lineno ))
325- else :
283+ else : # No line number specified
326284 self .locations .append ((location , None ))
327- elif line [1 :].startswith (',' ):
328- for flag in line [2 :].lstrip ().split (',' ):
329- self .flags .append (flag .strip ())
330- elif line [1 :].startswith ('.' ):
285+ return
286+
287+ if prefix == '#,' :
288+ self .flags .extend (flag .strip () for flag in line [2 :].lstrip ().split (',' ))
289+ return
290+
291+ if prefix == '#.' :
331292 # These are called auto-comments
332293 comment = line [2 :].strip ()
333294 if comment : # Just check that we're not adding empty comments
334295 self .auto_comments .append (comment )
335- else :
336- # These are called user comments
337- self .user_comments .append (line [1 :].strip ())
296+ return
297+
298+ # These are called user comments
299+ self .user_comments .append (line [1 :].strip ())
338300
339301 def parse (self , fileobj : IO [AnyStr ] | Iterable [AnyStr ]) -> None :
340302 """
341- Reads from the file-like object `fileobj` and adds any po file
342- units found in it to the `Catalog` supplied to the constructor.
303+ Reads from the file-like object (or iterable of string-likes) `fileobj`
304+ and adds any po file units found in it to the `Catalog`
305+ supplied to the constructor.
306+
307+ All of the items in the iterable must be the same type; either `str`
308+ or `bytes` (decoded with the catalog charset), but not a mixture.
343309 """
310+ needs_decode = None
344311
345312 for lineno , line in enumerate (fileobj ):
346313 line = line .strip ()
347- if not isinstance (line , str ):
348- line = line .decode (self .catalog .charset )
314+ if needs_decode is None :
315+ # If we don't yet know whether we need to decode,
316+ # let's find out now.
317+ needs_decode = not isinstance (line , str )
349318 if not line :
350319 continue
351- if line .startswith ('#' ):
352- if line [1 :].startswith ('~' ):
320+ if needs_decode :
321+ line = line .decode (self .catalog .charset )
322+ if line [0 ] == '#' :
323+ if line [:2 ] == '#~' :
353324 self ._process_message_line (lineno , line [2 :].lstrip (), obsolete = True )
354325 else :
355326 try :
@@ -364,8 +335,8 @@ def parse(self, fileobj: IO[AnyStr] | Iterable[AnyStr]) -> None:
364335 # No actual messages found, but there was some info in comments, from which
365336 # we'll construct an empty header message
366337 if not self .counter and (self .flags or self .user_comments or self .auto_comments ):
367- self .messages .append (_NormalizedString ('""' ))
368- self .translations .append ([0 , _NormalizedString ('""' )])
338+ self .messages .append (_NormalizedString ())
339+ self .translations .append ([0 , _NormalizedString ()])
369340 self ._add_message ()
370341
371342 def _invalid_pofile (self , line , lineno , msg ) -> None :
0 commit comments