Skip to content

Commit e790cc5

Browse files
committed
Fix nondeterministic recursive descent
1 parent ac1ec55 commit e790cc5

File tree

5 files changed

+174
-15
lines changed

5 files changed

+174
-15
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@ ENV/
8080

8181
# Dev utils
8282
dev.py
83-
benchmark.py
8483
profile_.py
8584

8685
# Test fixtures

jsonpath_rfc9535/segments.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,9 +142,10 @@ def _children(node: JSONPathNode) -> Iterable[JSONPathNode]:
142142
while queue:
143143
_node = queue.popleft()
144144
yield _node
145+
# Visit child nodes now or queue them for later?
146+
visit_children = random.choice([True, False]) # noqa: S311
145147
for child in _children(_node):
146-
# Queue the child node or visit it now?
147-
if random.choice([True, False]): # noqa: S311
148+
if visit_children:
148149
queue.append(child)
149150
else:
150151
yield child

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,4 +169,4 @@ convention = "google"
169169
"scripts/__init__.py" = ["D104"]
170170
"tests/*" = ["D100", "D101", "D104", "D103"]
171171
"jsonpath_rfc9535/lex.py" = ["E741"]
172-
"scripts/nondeterministic_descent.py" = ["D103", "T201", "D101", "D102"]
172+
"scripts/nondeterministic_descent.py" = ["D103", "T201", "D101", "D102", "S311"]

scripts/benchmark.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
import json
2+
import timeit
3+
from typing import Any
4+
from typing import Mapping
5+
from typing import NamedTuple
6+
from typing import Sequence
7+
from typing import Union
8+
9+
# ruff: noqa: D100 D101 D103 T201
10+
11+
12+
class CTSCase(NamedTuple):
13+
query: str
14+
data: Union[Sequence[Any], Mapping[str, Any]]
15+
16+
17+
def valid_queries() -> Sequence[CTSCase]:
18+
with open("tests/cts/cts.json") as fd:
19+
data = json.load(fd)
20+
21+
return [
22+
(CTSCase(t["selector"], t["document"]))
23+
for t in data["tests"]
24+
if not t.get("invalid_selector", False)
25+
]
26+
27+
28+
QUERIES = valid_queries()
29+
30+
COMPILE_AND_FIND_SETUP = "from jsonpath_rfc9535 import query"
31+
32+
COMPILE_AND_FIND_STMT = """\
33+
for path, data in QUERIES:
34+
list(query(path, data))"""
35+
36+
COMPILE_AND_FIND_VALUES_STMT = """\
37+
for path, data in QUERIES:
38+
[node.value for node in query(path, data)]"""
39+
40+
JUST_COMPILE_SETUP = "from jsonpath_rfc9535 import compile"
41+
42+
JUST_COMPILE_STMT = """\
43+
for path, _ in QUERIES:
44+
compile(path)"""
45+
46+
JUST_FIND_SETUP = """\
47+
from jsonpath_rfc9535 import compile
48+
compiled_queries = [(compile(q), d) for q, d in QUERIES]
49+
"""
50+
51+
JUST_FIND_STMT = """\
52+
for path, data in compiled_queries:
53+
list(path.query(data))"""
54+
55+
JUST_FIND_VALUES_STMT = """\
56+
for path, data in compiled_queries:
57+
[node.value for node in path.query(data)]"""
58+
59+
60+
def benchmark(number: int = 100, best_of: int = 3) -> None:
61+
print(f"repeating {len(QUERIES)} queries {number} times, best of {best_of} rounds")
62+
63+
results = timeit.repeat(
64+
COMPILE_AND_FIND_STMT,
65+
setup=COMPILE_AND_FIND_SETUP,
66+
globals={"QUERIES": QUERIES},
67+
number=number,
68+
repeat=best_of,
69+
)
70+
71+
print("compile and find".ljust(30), f"\033[92m{min(results):.3f}\033[0m")
72+
73+
results = timeit.repeat(
74+
COMPILE_AND_FIND_VALUES_STMT,
75+
setup=COMPILE_AND_FIND_SETUP,
76+
globals={"QUERIES": QUERIES},
77+
number=number,
78+
repeat=best_of,
79+
)
80+
81+
print("compile and find (values)".ljust(30), f"{min(results):.3f}")
82+
83+
results = timeit.repeat(
84+
JUST_COMPILE_STMT,
85+
setup=JUST_COMPILE_SETUP,
86+
globals={"QUERIES": QUERIES},
87+
number=number,
88+
repeat=best_of,
89+
)
90+
91+
print("just compile".ljust(30), f"{min(results):.3f}")
92+
93+
results = timeit.repeat(
94+
JUST_FIND_STMT,
95+
setup=JUST_FIND_SETUP,
96+
globals={"QUERIES": QUERIES},
97+
number=number,
98+
repeat=best_of,
99+
)
100+
101+
print("just find".ljust(30), f"\033[92m{min(results):.3f}\033[0m")
102+
103+
results = timeit.repeat(
104+
JUST_FIND_VALUES_STMT,
105+
setup=JUST_FIND_SETUP,
106+
globals={"QUERIES": QUERIES},
107+
number=number,
108+
repeat=best_of,
109+
)
110+
111+
print("just find (values)".ljust(30), f"{min(results):.3f}")
112+
113+
114+
if __name__ == "__main__":
115+
benchmark()

scripts/nondeterministic_descent.py

Lines changed: 55 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
import json
56
import os
67
import random
78
import sys
@@ -69,6 +70,27 @@ def _visit(node: AuxNode, depth: int = 0) -> None:
6970
_visit(root)
7071
return root
7172

73+
@staticmethod
74+
def collections(data: JSONLikeData) -> AuxNode:
75+
def _visit(node: AuxNode, depth: int = 0) -> None:
76+
if isinstance(node.value, dict):
77+
for val in node.value.values():
78+
if isinstance(val, (list, dict)):
79+
_node = AuxNode(depth + 1, val)
80+
_visit(_node, depth + 1)
81+
node.children.append(_node)
82+
83+
elif isinstance(node.value, list):
84+
for val in node.value:
85+
if isinstance(val, (list, dict)):
86+
_node = AuxNode(depth + 1, val)
87+
_visit(_node, depth + 1)
88+
node.children.append(_node)
89+
90+
root = AuxNode(0, data)
91+
_visit(root)
92+
return root
93+
7294

7395
def pptree(
7496
node: AuxNode,
@@ -115,13 +137,14 @@ def nondeterministic_visit(root: AuxNode) -> Iterable[AuxNode]:
115137
while queue:
116138
_node = queue.popleft()
117139
yield _node
140+
# Visit child nodes now or queue them for later?
141+
visit_children = random.choice([True, False])
118142
for child in _node.children:
119-
# Queue the child node or visit it now?
120-
if random.choice([True, False]): # noqa: S311
121-
queue.append(child)
122-
else:
143+
if visit_children:
123144
yield child
124145
queue.extend(child.children)
146+
else:
147+
queue.append(child)
125148

126149

127150
def get_perms(root: AuxNode) -> List[Tuple[AuxNode, ...]]:
@@ -130,22 +153,43 @@ def get_perms(root: AuxNode) -> List[Tuple[AuxNode, ...]]:
130153
return sorted(perms, key=lambda t: str(t))
131154

132155

133-
def pp_json_path_data(data: JSONLikeData) -> None:
156+
def pp_json_path_perms(data: JSONLikeData) -> None:
157+
print("Input data")
158+
print(f"\033[92m{data}\033[0m")
134159
aux_tree = AuxNode.from_(data)
160+
print("\nTree view")
135161
pptree(aux_tree)
136162

137-
print("\nPre order\n")
163+
print("\nPre order")
138164
print(", ".join(str(n) for n in pre_order_visit(aux_tree)))
139165

140-
print("\nLevel order\n")
166+
print("\nLevel order")
141167
print(", ".join(str(n) for n in breadth_first_visit(aux_tree)))
142168

143-
print("\nNondeterministic order\n")
169+
print("\nNondeterministic order")
170+
for perm in get_perms(aux_tree):
171+
print(", ".join(str(node) for node in perm))
172+
173+
print("\n---\n\nCollections only")
174+
aux_tree = AuxNode.collections(data)
175+
pptree(aux_tree)
176+
177+
print("\nPre order")
178+
print(", ".join(str(n) for n in pre_order_visit(aux_tree)))
179+
180+
print("\nLevel order")
181+
print(", ".join(str(n) for n in breadth_first_visit(aux_tree)))
182+
183+
print("\nNondeterministic order")
144184
for perm in get_perms(aux_tree):
145185
print(", ".join(str(node) for node in perm))
146186

147187

148188
if __name__ == "__main__":
149-
# basic, descendant segment, name shorthand
150-
data = {"o": [{"a": "b"}, {"a": "c"}]}
151-
pp_json_path_data(data)
189+
if len(sys.argv) < 2: # noqa: PLR2004
190+
print("error: no data to process")
191+
print(f"usage: {sys.argv[0]} <JSON string>")
192+
sys.exit(1)
193+
194+
data = json.loads(sys.argv[1])
195+
pp_json_path_perms(data)

0 commit comments

Comments
 (0)