diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a2dc52da..bda7c6c9 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,12 @@ Changelog ================ +Version 1.0.0 +------------- + +- Remove formatting from ``ParseString.__str__``, as this is not needed in + scancode-toolkit and is a performance issue + Version 0.9.0 ------------- diff --git a/src/pygmars/parse.py b/src/pygmars/parse.py index 34f09fa2..f931d3d4 100644 --- a/src/pygmars/parse.py +++ b/src/pygmars/parse.py @@ -319,18 +319,7 @@ def __repr__(self): return f"" def __str__(self): - """ - Return a formatted representation of this ``ParseString``. This - representation includes extra spaces to ensure that labels will line up - with the representation of other ``ParseString`` for the same text, - regardless of the grouping. - """ - # Add spaces to make everything line up. - s = re.sub(r">(?!\})", r"> ", self._parse_string) - s = re.sub(r"([^\{])<", r"\1 <", s) - if s[0] == "<": - s = " " + s - return s.rstrip() + return self._parse_string.rstrip() # used to split a ParseString on labels and braces delimiters @@ -525,7 +514,7 @@ def parse(self, tree, trace=0): if after_parse != before_parse: # only update the tree and the trace if there have been changes from this parse if trace: - updated = re.sub(r"\{[^\{]+\}", f" <{self.label}> ", after_parse) + updated = re.sub(r"\{[^\{]+\}", f"<{self.label}>", after_parse) trace_elements.append("-------------------------------------") trace_elements.append(f"Rule.parse: applied rule: {self!r}") trace_elements.append(f" Rule regex: {self._regexp}") diff --git a/tests/test_parse_doctest.py b/tests/test_parse_doctest.py index 05ba2827..818eb914 100644 --- a/tests/test_parse_doctest.py +++ b/tests/test_parse_doctest.py @@ -82,18 +82,6 @@ ... AttributeError: 'str' object has no attribute 'label' -The `str()` for a parse string adds spaces to it, which makes it line -up with `str()` output for other parse strings over the same -underlying input. - - >>> cs = ParseString(t1) - >>> print(cs) - - >>> cs.apply_transform(partial(re.compile('').sub, '{}')) - '{}' - >>> print(cs) - {} - The `validate()` method makes sure that the parsing does not corrupt the parse string. By setting validate=True, `validate()` will be called at the end of every call to `apply_transform`. @@ -203,41 +191,41 @@ Rule.parse: applied rule: ? * * / NP # NP> Rule regex: (?P(?:<(?:DT)>)?(?:<(?:JJ)>)*(?:<(?:NN)>)*) Input parsed to label: NP - before :
- after : {
} {
}{
} - new : + before :
+ after : {
}{
}{
} + new : length : 9,6 ------------------------------------- Rule.parse: applied rule: / P # Preposition> Rule regex: (?P(?:<(?:IN)>)) Input parsed to label: P - before : - after : {} - new :

+ before : + after : {} + new :

length : 6,6 ------------------------------------- Rule.parse: applied rule: / V # Verb> Rule regex: (?P(?:<(?:V[^\{\}<>]*)>)) Input parsed to label: V - before :

- after : {}

{} - new :

+ before :

+ after : {}

{} + new :

length : 6,6 ------------------------------------- Rule.parse: applied rule: / PP # PP -> P NP> Rule regex: (?P(?:<(?:P)>)(?:<(?:NP)>)) Input parsed to label: PP - before :

- after : {

} - new : + before :

+ after : {

} + new : length : 6,5 ------------------------------------- Rule.parse: applied rule: * / VP # VP -> V (NP|PP)*> Rule regex: (?P(?:<(?:V)>)(?:<(?:NP|PP)>)*) Input parsed to label: VP - before : - after : { }{} - new : + before : + after : {}{} + new : length : 5,3 parse tree: (label='ROOT', children=( (label='NP', children=(