Skip to content

Commit adad732

Browse files
committed
Implement nondeterminism
1 parent a799424 commit adad732

File tree

10 files changed

+281
-13
lines changed

10 files changed

+281
-13
lines changed

jsonpath_rfc9535/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from .environment import JSONLikeData
12
from .environment import JSONPathEnvironment
23
from .exceptions import JSONPathError
34
from .exceptions import JSONPathIndexError
@@ -13,6 +14,7 @@
1314
from .path import JSONPath
1415

1516
__all__ = (
17+
"JSONLikeData",
1618
"JSONPathEnvironment",
1719
"JSONPathError",
1820
"JSONPathIndexError",

jsonpath_rfc9535/environment.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,12 @@ class JSONPathEnvironment:
5959
index. Defaults to `(2**53) - 1`.
6060
min_int_index (int): The minimum integer allowed when selecting array items by
6161
index. Defaults to `-(2**53) + 1`.
62-
max_recursion_depth: The maximum number of dict/objects and/or arrays/lists the
63-
recursive descent selector can visit before a `JSONPathRecursionError`
64-
is thrown.
65-
parser_class: The parser to use when parsing tokens from the lexer.
62+
max_recursion_depth (int): The maximum number of dict/objects and/or
63+
arrays/lists the recursive descent selector can visit before a
64+
`JSONPathRecursionError` is thrown.
65+
parser_class (Parser): The parser to use when parsing tokens from the lexer.
66+
nondeterministic (bool): If `True`, enable nondeterminism when iterating objects
67+
and visiting nodes with the recursive descent segment. Defaults to `False`.
6668
"""
6769

6870
parser_class: Type[Parser] = Parser
@@ -71,6 +73,8 @@ class JSONPathEnvironment:
7173
min_int_index = -(2**53) + 1
7274
max_recursion_depth = 100
7375

76+
nondeterministic = False
77+
7478
def __init__(self) -> None:
7579
self.parser: Parser = self.parser_class(env=self)
7680
"""The parser bound to this environment."""

jsonpath_rfc9535/segments.py

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,12 @@
22

33
from __future__ import annotations
44

5+
import random
56
from abc import ABC
67
from abc import abstractmethod
8+
from collections import deque
79
from typing import TYPE_CHECKING
10+
from typing import Deque
811
from typing import Iterable
912
from typing import Tuple
1013

@@ -66,12 +69,24 @@ class JSONPathRecursiveDescentSegment(JSONPathSegment):
6669

6770
def resolve(self, nodes: Iterable[JSONPathNode]) -> Iterable[JSONPathNode]:
6871
"""Select descendants of each node in _nodes_."""
72+
# The nondeterministic visitor never generates a pre order traversal, so we
73+
# still use the deterministic visitor 20% of the time, to cover all
74+
# permutations.
75+
#
76+
# XXX: This feels like a bit of a hack.
77+
visitor = (
78+
self._nondeterministic_visit
79+
if self.env.nondeterministic and random.random() < 0.2 # noqa: S311, PLR2004
80+
else self._visit
81+
)
82+
6983
for node in nodes:
70-
for _node in self._visit(node):
84+
for _node in visitor(node):
7185
for selector in self.selectors:
7286
yield from selector.resolve(_node)
7387

7488
def _visit(self, node: JSONPathNode, depth: int = 1) -> Iterable[JSONPathNode]:
89+
"""Pre order node traversal."""
7590
if depth > self.env.max_recursion_depth:
7691
raise JSONPathRecursionError("recursion limit exceeded", token=self.token)
7792

@@ -96,6 +111,45 @@ def _visit(self, node: JSONPathNode, depth: int = 1) -> Iterable[JSONPathNode]:
96111
)
97112
yield from self._visit(_node, depth + 1)
98113

114+
def _nondeterministic_visit(
115+
self,
116+
root: JSONPathNode,
117+
_: int = 1,
118+
) -> Iterable[JSONPathNode]:
119+
def _children(node: JSONPathNode) -> Iterable[JSONPathNode]:
120+
if isinstance(node.value, dict):
121+
items = list(node.value.items())
122+
random.shuffle(items)
123+
for key, val in items:
124+
if isinstance(val, (dict, list)):
125+
yield JSONPathNode(
126+
value=val,
127+
parts=node.parts + (key,),
128+
root=node.root,
129+
)
130+
elif isinstance(node.value, list):
131+
for i, val in enumerate(node.value):
132+
if isinstance(val, (dict, list)):
133+
yield JSONPathNode(
134+
value=val,
135+
parts=node.parts + (i,),
136+
root=node.root,
137+
)
138+
139+
queue: Deque[JSONPathNode] = deque(_children(root))
140+
yield root
141+
142+
while queue:
143+
_node = queue.popleft()
144+
yield _node
145+
for child in _children(_node):
146+
# Queue the child node or visit it now?
147+
if random.choice([True, False]): # noqa: S311
148+
queue.append(child)
149+
else:
150+
yield child
151+
queue.extend(_children(child))
152+
99153
def __str__(self) -> str:
100154
return f"..[{', '.join(str(itm) for itm in self.selectors)}]"
101155

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,3 +168,4 @@ convention = "google"
168168
"jsonpath_rfc9535/__init__.py" = ["D104"]
169169
"tests/*" = ["D100", "D101", "D104", "D103"]
170170
"jsonpath_rfc9535/lex.py" = ["E741"]
171+
"scripts/nondeterministic_descent.py" = ["D103", "T201"]

scripts/__init__.py

Whitespace-only changes.
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
"""Utilities for exploring nondeterminism in the recursive descent segment."""
2+
3+
from __future__ import annotations
4+
5+
import os
6+
import random
7+
import sys
8+
from collections import deque
9+
from typing import TYPE_CHECKING
10+
from typing import Deque
11+
from typing import Iterable
12+
from typing import List
13+
from typing import Optional
14+
from typing import TextIO
15+
from typing import Tuple
16+
17+
if TYPE_CHECKING:
18+
from jsonpath_rfc9535.environment import JSONLikeData
19+
20+
21+
HORIZONTAL_SEP = "\N{BOX DRAWINGS LIGHT HORIZONTAL}" * 2
22+
VERTICAL_SEP = "\N{BOX DRAWINGS LIGHT VERTICAL}"
23+
BRANCH = "\N{BOX DRAWINGS LIGHT VERTICAL AND RIGHT}" + HORIZONTAL_SEP + " "
24+
TERMINAL_BRANCH = "\N{BOX DRAWINGS LIGHT UP AND RIGHT}" + HORIZONTAL_SEP + " "
25+
INDENT = VERTICAL_SEP + " " * 3
26+
TERMINAL_INDENT = " " * 4
27+
28+
COLOR_CODES = [
29+
("\033[92m", "\033[0m"),
30+
("\033[93m", "\033[0m"),
31+
("\033[94m", "\033[0m"),
32+
("\033[95m", "\033[0m"),
33+
("\033[96m", "\033[0m"),
34+
("\033[91m", "\033[0m"),
35+
]
36+
37+
38+
class AuxNode:
39+
def __init__(
40+
self,
41+
depth: int,
42+
value: object,
43+
children: Optional[List[AuxNode]] = None,
44+
) -> None:
45+
self.value = value
46+
self.children = children or []
47+
self.depth = depth
48+
49+
def __str__(self) -> str:
50+
c_start, c_stop = COLOR_CODES[self.depth % len(COLOR_CODES)]
51+
return f"{c_start}{self.value}{c_stop}"
52+
53+
@staticmethod
54+
def from_(data: JSONLikeData) -> AuxNode:
55+
def _visit(node: AuxNode, depth: int = 0) -> None:
56+
if isinstance(node.value, dict):
57+
for val in node.value.values():
58+
_node = AuxNode(depth + 1, val)
59+
_visit(_node, depth + 1)
60+
node.children.append(_node)
61+
62+
elif isinstance(node.value, list):
63+
for val in node.value:
64+
_node = AuxNode(depth + 1, val)
65+
_visit(_node, depth + 1)
66+
node.children.append(_node)
67+
68+
root = AuxNode(0, data)
69+
_visit(root)
70+
return root
71+
72+
73+
def pptree(
74+
node: AuxNode,
75+
indent: str = "",
76+
buf: TextIO = sys.stdout,
77+
) -> None:
78+
"""Pretty print the tree rooted at `node`."""
79+
# Pre-order tree traversal
80+
buf.write(str(node) + os.linesep)
81+
82+
if node.children:
83+
# Recursively call pptree for all but the last child of `node`.
84+
for child in node.children[:-1]:
85+
buf.write(indent + BRANCH)
86+
pptree(child, indent=indent + INDENT, buf=buf)
87+
88+
# Terminal branch case for last, possibly only, child of `node`.
89+
buf.write(indent + TERMINAL_BRANCH)
90+
pptree(node.children[-1], indent=indent + TERMINAL_INDENT, buf=buf)
91+
92+
# Base case. No children.
93+
94+
95+
def pre_order_visit(node: AuxNode) -> Iterable[AuxNode]:
96+
yield node
97+
98+
for child in node.children:
99+
yield from pre_order_visit(child)
100+
101+
102+
def breadth_first_visit(node: AuxNode) -> Iterable[AuxNode]:
103+
queue: Deque[AuxNode] = deque([node])
104+
105+
while queue:
106+
_node = queue.popleft()
107+
yield _node
108+
queue.extend(_node.children)
109+
110+
111+
def nondeterministic_visit(root: AuxNode) -> Iterable[AuxNode]:
112+
queue: Deque[AuxNode] = deque(root.children)
113+
yield root
114+
115+
while queue:
116+
_node = queue.popleft()
117+
yield _node
118+
for child in _node.children:
119+
# Queue the child node or visit it now?
120+
if random.choice([True, False]): # noqa: S311
121+
queue.append(child)
122+
else:
123+
yield child
124+
queue.extend(child.children)
125+
126+
127+
def get_perms(root: AuxNode) -> List[Tuple[AuxNode, ...]]:
128+
perms = {tuple(nondeterministic_visit(root)) for _ in range(1000)}
129+
perms.add(tuple(pre_order_visit(root)))
130+
return sorted(perms, key=lambda t: str(t))
131+
132+
133+
def pp_json_path_data(data: JSONLikeData) -> None:
134+
aux_tree = AuxNode.from_(data)
135+
pptree(aux_tree)
136+
137+
print("\nPre order\n")
138+
print(", ".join(str(n) for n in pre_order_visit(aux_tree)))
139+
140+
print("\nLevel order\n")
141+
print(", ".join(str(n) for n in breadth_first_visit(aux_tree)))
142+
143+
print("\nNondeterministic order\n")
144+
for perm in get_perms(aux_tree):
145+
print(", ".join(str(node) for node in perm))
146+
147+
148+
if __name__ == "__main__":
149+
# basic, descendant segment, name shorthand
150+
data = {"o": [{"a": "b"}, {"a": "c"}]}
151+
pp_json_path_data(data)

tests/cts

tests/nondeterminism.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
"""Test against the JSONPath Compliance Test Suite with nondeterminism enabled.
2+
3+
The CTS is a submodule located in /tests/cts. After a git clone, run
4+
`git submodule update --init` from the root of the repository.
5+
"""
6+
7+
import json
8+
import operator
9+
from dataclasses import dataclass
10+
from typing import Any
11+
from typing import List
12+
from typing import Optional
13+
14+
import pytest
15+
16+
from jsonpath_rfc9535 import JSONLikeData
17+
from jsonpath_rfc9535 import JSONPathEnvironment
18+
19+
20+
@dataclass
21+
class Case:
22+
name: str
23+
selector: str
24+
document: JSONLikeData = None
25+
result: Any = None
26+
results: Optional[List[Any]] = None
27+
invalid_selector: Optional[bool] = None
28+
29+
30+
def cases() -> List[Case]:
31+
with open("tests/cts/cts.json", encoding="utf8") as fd:
32+
data = json.load(fd)
33+
return [Case(**case) for case in data["tests"]]
34+
35+
36+
def valid_cases() -> List[Case]:
37+
return [case for case in cases() if not case.invalid_selector]
38+
39+
40+
class MockEnv(JSONPathEnvironment):
41+
nondeterministic = True
42+
43+
44+
@pytest.mark.parametrize("case", valid_cases(), ids=operator.attrgetter("name"))
45+
def test_nondeterminism(case: Case) -> None:
46+
assert case.document is not None
47+
env = MockEnv()
48+
rv = env.findall(case.selector, case.document)
49+
50+
if case.results is not None:
51+
assert rv in case.results
52+
else:
53+
assert rv == case.result

tests/test_compliance.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,18 @@
1111
from typing import Dict
1212
from typing import List
1313
from typing import Optional
14-
from typing import Union
1514

1615
import pytest
1716

1817
import jsonpath_rfc9535 as jsonpath
18+
from jsonpath_rfc9535.environment import JSONLikeData
1919

2020

2121
@dataclass
2222
class Case:
2323
name: str
2424
selector: str
25-
document: Union[Dict[str, Any], List[Any], None] = None
25+
document: JSONLikeData = None
2626
result: Any = None
2727
results: Optional[List[Any]] = None
2828
invalid_selector: Optional[bool] = None

tests/test_errors.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,11 @@ def test_non_singular_query_is_not_comparable(env: JSONPathEnvironment) -> None:
3636
env.compile("$[?@.* > 2]")
3737

3838

39-
def test_recursive_data(env: JSONPathEnvironment) -> None:
39+
def test_recursive_data() -> None:
40+
class MockEnv(JSONPathEnvironment):
41+
nondeterministic = False
42+
43+
env = MockEnv()
4044
query = "$..a"
4145
arr: List[Any] = []
4246
data: Any = {"foo": arr}
@@ -46,11 +50,10 @@ def test_recursive_data(env: JSONPathEnvironment) -> None:
4650
env.query(query, data)
4751

4852

49-
class MockEnv(JSONPathEnvironment):
50-
max_recursion_depth = 3
51-
52-
5353
def test_low_recursion_limit() -> None:
54+
class MockEnv(JSONPathEnvironment):
55+
max_recursion_depth = 3
56+
5457
env = MockEnv()
5558
query = "$..a"
5659
data = {"foo": [{"bar": [1, 2, 3]}]}

0 commit comments

Comments
 (0)