1+ import keyword
2+ import re
13from collections import OrderedDict
24from enum import Enum
3- from typing import Any , Callable , List , Optional , Union
5+ from typing import Any , Callable , List , Optional , Pattern , Union
46
57from unidecode import unidecode
68
7- from .dynamic_typing import (ComplexType , DList , DOptional , DUnion , MetaData , ModelPtr , NoneType , SingleType ,
9+ from .dynamic_typing import (ComplexType , DDict , DList , DOptional , DUnion , MetaData , ModelPtr , NoneType , SingleType ,
810 StringSerializable , StringSerializableRegistry , Unknown , registry )
911
1012
@@ -27,11 +29,29 @@ def __str__(self):
2729 return self .value
2830
2931
32+ keywords_set = set (keyword .kwlist )
33+
34+
3035class MetadataGenerator :
3136 CONVERTER_TYPE = Optional [Callable [[str ], Any ]]
3237
33- def __init__ (self , str_types_registry : StringSerializableRegistry = None ):
38+ def __init__ (
39+ self ,
40+ str_types_registry : StringSerializableRegistry = None ,
41+ dict_keys_regex : List [Union [Pattern , str ]] = None ,
42+ dict_keys_fields : List [str ] = None
43+ ):
44+ """
45+
46+ :param str_types_registry: StringSerializableRegistry instance. Default registry will be used if None passed .
47+ :param dict_keys_regex: List of RegExpressions (compiled or not).
48+ If all keys of some dict are match one of them then this dict will be marked as dict field
49+ but not nested model.
50+ :param dict_keys_fields: List of model fields names that will be marked as dict field
51+ """
3452 self .str_types_registry = str_types_registry if str_types_registry is not None else registry
53+ self .dict_keys_regex = [re .compile (r ) for r in dict_keys_regex ] if dict_keys_regex else []
54+ self .dict_keys_fields = set (dict_keys_fields or ())
3555
3656 def generate (self , * data_variants : dict ) -> dict :
3757 """
@@ -51,7 +71,10 @@ def _convert(self, data: dict):
5171 # ! _detect_type function can crash at some complex data sets if value is unicode with some characters (maybe German)
5272 # Crash does not produce any useful logs and can occur any time after bad string was processed
5373 # It can be reproduced on real_apis tests (openlibrary API)
54- fields [key ] = self ._detect_type (value if not isinstance (value , str ) else unidecode (value ))
74+ convert_dict = key not in self .dict_keys_fields
75+ if key in keywords_set :
76+ key += "_"
77+ fields [key ] = self ._detect_type (value if not isinstance (value , str ) else unidecode (value ), convert_dict )
5578 return fields
5679
5780 def _detect_type (self , value , convert_dict = True ) -> MetaData :
@@ -69,10 +92,7 @@ def _detect_type(self, value, convert_dict=True) -> MetaData:
6992 # List trying to yield nested type
7093 elif isinstance (value , list ):
7194 if value :
72- types = []
73- for item in value :
74- t = self ._detect_type (item , convert_dict )
75- types .append (t )
95+ types = [self ._detect_type (item ) for item in value ]
7696 if len (types ) > 1 :
7797 union = DUnion (* types )
7898 if len (union .types ) == 1 :
@@ -85,10 +105,24 @@ def _detect_type(self, value, convert_dict=True) -> MetaData:
85105
86106 # Dict should be processed as another model if convert_dict is enabled
87107 elif isinstance (value , dict ):
108+ for reg in self .dict_keys_regex :
109+ if all (map (reg .match , value .keys ())):
110+ convert_dict = False
111+ break
112+
88113 if convert_dict :
89114 return self ._convert (value )
90115 else :
91- return dict
116+ types = [self ._detect_type (item ) for item in value .values ()]
117+ if len (types ) > 1 :
118+ union = DUnion (* types )
119+ if len (union .types ) == 1 :
120+ return DDict (* union .types )
121+ return DDict (union )
122+ elif types :
123+ return DDict (* types )
124+ else :
125+ return DDict (Unknown )
92126
93127 # null interpreted as is and will be processed later on Union merge stage
94128 elif value is None :
0 commit comments