From 9ba5319959e56edcec46efaa6caffaf1ba97a293 Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Tue, 15 Jul 2025 15:31:35 -0700 Subject: [PATCH 1/5] Do not format in ParseString.__str__ #13 Signed-off-by: Jono Yang --- src/pygmars/parse.py | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/src/pygmars/parse.py b/src/pygmars/parse.py index 34f09fa2..87b0935b 100644 --- a/src/pygmars/parse.py +++ b/src/pygmars/parse.py @@ -319,18 +319,7 @@ def __repr__(self): return f"" def __str__(self): - """ - Return a formatted representation of this ``ParseString``. This - representation includes extra spaces to ensure that labels will line up - with the representation of other ``ParseString`` for the same text, - regardless of the grouping. - """ - # Add spaces to make everything line up. - s = re.sub(r">(?!\})", r"> ", self._parse_string) - s = re.sub(r"([^\{])<", r"\1 <", s) - if s[0] == "<": - s = " " + s - return s.rstrip() + return self._parse_string.rstrip() # used to split a ParseString on labels and braces delimiters From e848366761bb83553518132e123d5718ba4988da Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Wed, 16 Jul 2025 09:43:58 -0700 Subject: [PATCH 2/5] Remove doctest on ParseString.__str__ #13 Signed-off-by: Jono Yang --- tests/test_parse_doctest.py | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/tests/test_parse_doctest.py b/tests/test_parse_doctest.py index 05ba2827..1b0bd0e1 100644 --- a/tests/test_parse_doctest.py +++ b/tests/test_parse_doctest.py @@ -82,18 +82,6 @@ ... AttributeError: 'str' object has no attribute 'label' -The `str()` for a parse string adds spaces to it, which makes it line -up with `str()` output for other parse strings over the same -underlying input. - - >>> cs = ParseString(t1) - >>> print(cs) - - >>> cs.apply_transform(partial(re.compile('').sub, '{}')) - '{}' - >>> print(cs) - {} - The `validate()` method makes sure that the parsing does not corrupt the parse string. By setting validate=True, `validate()` will be called at the end of every call to `apply_transform`. @@ -221,7 +209,7 @@ Input parsed to label: V before :

after : {}

{} - new :

+ new :

length : 6,6 ------------------------------------- Rule.parse: applied rule: / PP # PP -> P NP> @@ -237,7 +225,7 @@ Input parsed to label: VP before : after : { }{} - new : + new : length : 5,3 parse tree: (label='ROOT', children=( (label='NP', children=( From 174f9c11a1bc7caea4f47abbb8a9d7254f0cc6dd Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Wed, 16 Jul 2025 14:21:01 -0700 Subject: [PATCH 3/5] Update expected test results #13 Signed-off-by: Jono Yang --- tests/test_parse_doctest.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/test_parse_doctest.py b/tests/test_parse_doctest.py index 1b0bd0e1..c6c683d1 100644 --- a/tests/test_parse_doctest.py +++ b/tests/test_parse_doctest.py @@ -191,41 +191,41 @@ Rule.parse: applied rule: ? * * / NP # NP> Rule regex: (?P(?:<(?:DT)>)?(?:<(?:JJ)>)*(?:<(?:NN)>)*) Input parsed to label: NP - before :

- after : {
} {
}{
} - new : + before :
+ after : {
}{
}{
} + new : length : 9,6 ------------------------------------- Rule.parse: applied rule: / P # Preposition> Rule regex: (?P(?:<(?:IN)>)) Input parsed to label: P - before : - after : {} - new :

+ before : + after : {} + new :

length : 6,6 ------------------------------------- Rule.parse: applied rule: / V # Verb> Rule regex: (?P(?:<(?:V[^\{\}<>]*)>)) Input parsed to label: V - before :

- after : {}

{} - new :

+ before :

+ after : {}

{} + new :

length : 6,6 ------------------------------------- Rule.parse: applied rule: / PP # PP -> P NP> Rule regex: (?P(?:<(?:P)>)(?:<(?:NP)>)) Input parsed to label: PP - before :

- after : {

} - new : + before :

+ after : {

} + new : length : 6,5 ------------------------------------- Rule.parse: applied rule: * / VP # VP -> V (NP|PP)*> Rule regex: (?P(?:<(?:V)>)(?:<(?:NP|PP)>)*) Input parsed to label: VP - before : - after : { }{} - new : + before : + after : {}{} + new : length : 5,3 parse tree: (label='ROOT', children=( (label='NP', children=( From 6db988a647195107f27cebb6f81219593f0830dc Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Wed, 16 Jul 2025 14:58:15 -0700 Subject: [PATCH 4/5] Remove extra spaces when printing update #13 Signed-off-by: Jono Yang --- src/pygmars/parse.py | 2 +- tests/test_parse_doctest.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/pygmars/parse.py b/src/pygmars/parse.py index 87b0935b..f931d3d4 100644 --- a/src/pygmars/parse.py +++ b/src/pygmars/parse.py @@ -514,7 +514,7 @@ def parse(self, tree, trace=0): if after_parse != before_parse: # only update the tree and the trace if there have been changes from this parse if trace: - updated = re.sub(r"\{[^\{]+\}", f" <{self.label}> ", after_parse) + updated = re.sub(r"\{[^\{]+\}", f"<{self.label}>", after_parse) trace_elements.append("-------------------------------------") trace_elements.append(f"Rule.parse: applied rule: {self!r}") trace_elements.append(f" Rule regex: {self._regexp}") diff --git a/tests/test_parse_doctest.py b/tests/test_parse_doctest.py index c6c683d1..818eb914 100644 --- a/tests/test_parse_doctest.py +++ b/tests/test_parse_doctest.py @@ -193,7 +193,7 @@ Input parsed to label: NP before :

after : {
}{
}{
} - new : + new : length : 9,6 ------------------------------------- Rule.parse: applied rule: / P # Preposition> @@ -201,7 +201,7 @@ Input parsed to label: P before : after : {} - new :

+ new :

length : 6,6 ------------------------------------- Rule.parse: applied rule: / V # Verb> @@ -209,7 +209,7 @@ Input parsed to label: V before :

after : {}

{} - new :

+ new :

length : 6,6 ------------------------------------- Rule.parse: applied rule: / PP # PP -> P NP> @@ -217,7 +217,7 @@ Input parsed to label: PP before :

after : {

} - new : + new : length : 6,5 ------------------------------------- Rule.parse: applied rule: * / VP # VP -> V (NP|PP)*> @@ -225,7 +225,7 @@ Input parsed to label: VP before : after : {}{} - new : + new : length : 5,3 parse tree: (label='ROOT', children=( (label='NP', children=( From ec5b9ed636652151758bbefc62a214ccd84dbff3 Mon Sep 17 00:00:00 2001 From: Jono Yang Date: Wed, 16 Jul 2025 15:01:48 -0700 Subject: [PATCH 5/5] Update CHANGELOG.rst Signed-off-by: Jono Yang --- CHANGELOG.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index a2dc52da..bda7c6c9 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,12 @@ Changelog ================ +Version 1.0.0 +------------- + +- Remove formatting from ``ParseString.__str__``, as this is not needed in + scancode-toolkit and is a performance issue + Version 0.9.0 -------------