Skip to content

Commit 4cf6697

Browse files
committed
Implement dict type detection (semi-manual)
1 parent a784a63 commit 4cf6697

File tree

5 files changed

+91228
-11
lines changed

5 files changed

+91228
-11
lines changed

json_to_models/dynamic_typing/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from .base import (
22
BaseType, ImportPathList, MetaData, NoneType, Unknown, UnknownType, get_hash_string
33
)
4-
from .complex import ComplexType, DList, DOptional, DTuple, DUnion, SingleType
4+
from .complex import ComplexType, DDict, DList, DOptional, DTuple, DUnion, SingleType
55
from .models_meta import AbsoluteModelRef, ModelMeta, ModelPtr
66
from .string_datetime import IsoDateString, IsoDatetimeString, IsoTimeString, register_datetime_classes
77
from .string_serializable import (

json_to_models/dynamic_typing/complex.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,8 @@ def __init__(self, *types: Union[type, BaseType, dict]):
150150
else:
151151
h = get_hash_string(t)
152152
if h not in hashes:
153-
hashes.add(h)
154153
unique_types.append(t)
154+
hashes.add(h)
155155
super().__init__(*unique_types)
156156

157157
def _extract_nested_types(self):
@@ -188,3 +188,13 @@ def to_typing_code(self) -> Tuple[ImportPathList, str]:
188188
[*imports, ('typing', 'List')],
189189
f"List[{nested}]"
190190
)
191+
192+
193+
class DDict(SingleType):
194+
# Dict is single type because keys of JSON dict are always strings.
195+
def to_typing_code(self) -> Tuple[ImportPathList, str]:
196+
imports, nested = metadata_to_typing(self.type)
197+
return (
198+
[*imports, ('typing', 'Dict')],
199+
f"Dict[str, {nested}]"
200+
)

json_to_models/generator.py

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
1+
import keyword
2+
import re
13
from collections import OrderedDict
24
from enum import Enum
3-
from typing import Any, Callable, List, Optional, Union
5+
from typing import Any, Callable, List, Optional, Pattern, Union
46

57
from unidecode import unidecode
68

7-
from .dynamic_typing import (ComplexType, DList, DOptional, DUnion, MetaData, ModelPtr, NoneType, SingleType,
9+
from .dynamic_typing import (ComplexType, DDict, DList, DOptional, DUnion, MetaData, ModelPtr, NoneType, SingleType,
810
StringSerializable, StringSerializableRegistry, Unknown, registry)
911

1012

@@ -27,11 +29,29 @@ def __str__(self):
2729
return self.value
2830

2931

32+
keywords_set = set(keyword.kwlist)
33+
34+
3035
class MetadataGenerator:
3136
CONVERTER_TYPE = Optional[Callable[[str], Any]]
3237

33-
def __init__(self, str_types_registry: StringSerializableRegistry = None):
38+
def __init__(
39+
self,
40+
str_types_registry: StringSerializableRegistry = None,
41+
dict_keys_regex: List[Union[Pattern, str]] = None,
42+
dict_keys_fields: List[str] = None
43+
):
44+
"""
45+
46+
:param str_types_registry: StringSerializableRegistry instance. Default registry will be used if None passed .
47+
:param dict_keys_regex: List of RegExpressions (compiled or not).
48+
If all keys of some dict are match one of them then this dict will be marked as dict field
49+
but not nested model.
50+
:param dict_keys_fields: List of model fields names that will be marked as dict field
51+
"""
3452
self.str_types_registry = str_types_registry if str_types_registry is not None else registry
53+
self.dict_keys_regex = [re.compile(r) for r in dict_keys_regex] if dict_keys_regex else []
54+
self.dict_keys_fields = set(dict_keys_fields or ())
3555

3656
def generate(self, *data_variants: dict) -> dict:
3757
"""
@@ -51,7 +71,10 @@ def _convert(self, data: dict):
5171
# ! _detect_type function can crash at some complex data sets if value is unicode with some characters (maybe German)
5272
# Crash does not produce any useful logs and can occur any time after bad string was processed
5373
# It can be reproduced on real_apis tests (openlibrary API)
54-
fields[key] = self._detect_type(value if not isinstance(value, str) else unidecode(value))
74+
convert_dict = key not in self.dict_keys_fields
75+
if key in keywords_set:
76+
key += "_"
77+
fields[key] = self._detect_type(value if not isinstance(value, str) else unidecode(value), convert_dict)
5578
return fields
5679

5780
def _detect_type(self, value, convert_dict=True) -> MetaData:
@@ -69,10 +92,7 @@ def _detect_type(self, value, convert_dict=True) -> MetaData:
6992
# List trying to yield nested type
7093
elif isinstance(value, list):
7194
if value:
72-
types = []
73-
for item in value:
74-
t = self._detect_type(item, convert_dict)
75-
types.append(t)
95+
types = [self._detect_type(item) for item in value]
7696
if len(types) > 1:
7797
union = DUnion(*types)
7898
if len(union.types) == 1:
@@ -85,10 +105,24 @@ def _detect_type(self, value, convert_dict=True) -> MetaData:
85105

86106
# Dict should be processed as another model if convert_dict is enabled
87107
elif isinstance(value, dict):
108+
for reg in self.dict_keys_regex:
109+
if all(map(reg.match, value.keys())):
110+
convert_dict = False
111+
break
112+
88113
if convert_dict:
89114
return self._convert(value)
90115
else:
91-
return dict
116+
types = [self._detect_type(item) for item in value.values()]
117+
if len(types) > 1:
118+
union = DUnion(*types)
119+
if len(union.types) == 1:
120+
return DDict(*union.types)
121+
return DDict(union)
122+
elif types:
123+
return DDict(*types)
124+
else:
125+
return DDict(Unknown)
92126

93127
# null interpreted as is and will be processed later on Union merge stage
94128
elif value is None:

0 commit comments

Comments
 (0)