Skip to content

Commit 19c4e45

Browse files
committed
create json schemas now uses format attribute, url and date validaiton rules deprecated
1 parent 34cbb04 commit 19c4e45

File tree

4 files changed

+150
-47
lines changed

4 files changed

+150
-47
lines changed

synapseclient/extensions/curator/schema_generation.py

Lines changed: 84 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ class ListColumnType(ColumnType):
114114
]
115115

116116

117-
class Format(Enum):
117+
class JSONSchemaFormat(Enum):
118118
"""
119119
Allowed formats by the JSON Schema validator used by Synapse: https://github.com/everit-org/json-schema#format-validators
120120
For descriptions see: https://json-schema.org/understanding-json-schema/reference/type#format
@@ -150,6 +150,9 @@ class ValidationRuleName(Enum):
150150
# TODO: remove list:
151151
# https://sagebionetworks.jira.com/browse/SYNPY-1692
152152
LIST = "list"
153+
# url and date rules are deprecated for adding format keyword
154+
# TODO: remove url and date
155+
# https://sagebionetworks.jira.com/browse/SYNPY-1685
153156
DATE = "date"
154157
URL = "url"
155158
REGEX = "regex"
@@ -187,6 +190,9 @@ class ValidationRule:
187190
name=ValidationRuleName.LIST,
188191
incompatible_rules=[],
189192
),
193+
# url and date rules are deprecated for adding format keyword
194+
# TODO: remove url and date
195+
# https://sagebionetworks.jira.com/browse/SYNPY-1685
190196
"date": ValidationRule(
191197
name=ValidationRuleName.DATE,
192198
incompatible_rules=[
@@ -236,13 +242,6 @@ def __post_init__(self) -> None:
236242
self.display_name = str(self.fields["displayName"])
237243

238244

239-
class JSONSchemaFormat(Enum):
240-
"""This enum is the currently supported JSON Schema formats"""
241-
242-
DATE = "date"
243-
URI = "uri"
244-
245-
246245
def load_json(file_path: str) -> Any:
247246
"""Load json document from file path or url
248247
@@ -1763,7 +1762,7 @@ def get_node_column_type(
17631762

17641763
def get_node_format(
17651764
self, node_label: Optional[str] = None, node_display_name: Optional[str] = None
1766-
) -> Optional[ColumnType]:
1765+
) -> Optional[JSONSchemaFormat]:
17671766
"""Gets the format of the node
17681767
17691768
Args:
@@ -1783,11 +1782,11 @@ def get_node_format(
17831782
return format_value
17841783
format_string = str(format_value).lower()
17851784
try:
1786-
column_type = Format(format_string)
1785+
column_type = JSONSchemaFormat(format_string)
17871786
except ValueError as exc:
17881787
msg = (
17891788
f"Node: '{node_label}' had illegal format value: '{format_value}'. "
1790-
f"Allowed values are: [{[member.value for member in Format]}]"
1789+
f"Allowed values are: [{[member.value for member in JSONSchemaFormat]}]"
17911790
)
17921791
raise ValueError(msg) from exc
17931792
return column_type
@@ -2918,7 +2917,7 @@ def define_data_model_relationships(self) -> dict:
29182917
"required_header": False,
29192918
"edge_rel": False,
29202919
"node_attr_dict": {"default": None},
2921-
"allowed_values": [member.value for member in Format],
2920+
"allowed_values": [member.value for member in JSONSchemaFormat],
29222921
},
29232922
}
29242923

@@ -4384,6 +4383,7 @@ def _get_rules_by_names(names: list[str]) -> list[ValidationRule]:
43844383
def _get_validation_rule_based_fields(
43854384
validation_rules: list[str],
43864385
explicit_is_array: Optional[bool],
4386+
explicit_format: Optional[JSONSchemaFormat],
43874387
name: str,
43884388
column_type: Optional[ColumnType],
43894389
logger: Logger,
@@ -4428,7 +4428,7 @@ def _get_validation_rule_based_fields(
44284428
- js_pattern: If the type is string the JSON Schema pattern
44294429
"""
44304430
js_is_array = False
4431-
js_format = None
4431+
js_format = explicit_format
44324432
js_minimum = None
44334433
js_maximum = None
44344434
js_pattern = None
@@ -4492,10 +4492,51 @@ def _get_validation_rule_based_fields(
44924492
)
44934493
logger.warning(msg)
44944494

4495-
if ValidationRuleName.URL in validation_rule_names:
4496-
js_format = JSONSchemaFormat.URI
4497-
elif ValidationRuleName.DATE in validation_rule_names:
4498-
js_format = JSONSchemaFormat.DATE
4495+
if explicit_format:
4496+
if (
4497+
ValidationRuleName.DATE in validation_rule_names
4498+
and explicit_format == JSONSchemaFormat.URI
4499+
):
4500+
msg = (
4501+
f"For property: {name}, the format is uri, "
4502+
"but the validation rule date is present. "
4503+
"The format will be set to uri."
4504+
)
4505+
logger.warning(msg)
4506+
elif (
4507+
ValidationRuleName.URL in validation_rule_names
4508+
and explicit_format == JSONSchemaFormat.DATE
4509+
):
4510+
msg = (
4511+
f"For property: {name}, the format is date, "
4512+
"but the validation rule url is present. "
4513+
"The format will be set to date."
4514+
)
4515+
logger.warning(msg)
4516+
4517+
# url and date rules are deprecated for adding format keyword
4518+
# TODO: remove the if/else block below
4519+
# https://sagebionetworks.jira.com/browse/SYNPY-1685
4520+
4521+
else:
4522+
if ValidationRuleName.URL in validation_rule_names:
4523+
js_format = JSONSchemaFormat.URI
4524+
msg = (
4525+
f"A url validation rule is set for property: {name}, but the format is not set. "
4526+
"The format will be set to uri, but this behavior is deprecated and validation "
4527+
"rules will no longer be used in the future."
4528+
"Please explicitly set the format to uri in the data model."
4529+
)
4530+
logger.warning(msg)
4531+
elif ValidationRuleName.DATE in validation_rule_names:
4532+
js_format = JSONSchemaFormat.DATE
4533+
msg = (
4534+
f"A date validation rule is set for property: {name}, but the format is not set. "
4535+
"The format will be set to date, but this behavior is deprecated and validation "
4536+
"rules will no longer be used in the future."
4537+
"Please explicitly set the format to uri in the data model."
4538+
)
4539+
logger.warning(msg)
44994540

45004541
in_range_rule = get_rule_from_inputted_rules(
45014542
ValidationRuleName.IN_RANGE, validation_rules
@@ -4511,7 +4552,6 @@ def _get_validation_rule_based_fields(
45114552
if regex_rule:
45124553
js_pattern = get_regex_parameters_from_inputted_rule(regex_rule)
45134554

4514-
print(js_is_array)
45154555
return (
45164556
js_is_array,
45174557
js_format,
@@ -4590,7 +4630,6 @@ def __post_init__(self) -> None:
45904630
column_type = self.dmge.get_node_column_type(
45914631
node_display_name=self.display_name
45924632
)
4593-
45944633
# list validation rule is been deprecated for use in deciding type
45954634
# TODO: set self.is_array here instead of return from _get_validation_rule_based_fields
45964635
# https://sagebionetworks.jira.com/browse/SYNPY-1692
@@ -4603,6 +4642,25 @@ def __post_init__(self) -> None:
46034642
else:
46044643
self.type = None
46054644
explicit_is_array = None
4645+
4646+
# url and date rules are deprecated for adding format keyword
4647+
# TODO: set self.format here instead of passing it to get_validation_rule_based_fields
4648+
# https://sagebionetworks.jira.com/browse/SYNPY-1685
4649+
explicit_format = self.dmge.get_node_format(node_display_name=self.display_name)
4650+
if explicit_format:
4651+
if (
4652+
column_type != ListColumnType.STRING_LIST
4653+
and column_type != AtomicColumnType.STRING
4654+
):
4655+
msg = (
4656+
f"A format value (current value: {explicit_format.value}) "
4657+
f"is set for property: {self.name}, but columnType is not a string type "
4658+
f"(current value: {column_type.value}). "
4659+
"To use a format value the columnType must be set to one of: "
4660+
"[string, string_list] "
4661+
)
4662+
raise ValueError(msg)
4663+
46064664
(
46074665
self.is_array,
46084666
self.format,
@@ -4612,6 +4670,7 @@ def __post_init__(self) -> None:
46124670
) = _get_validation_rule_based_fields(
46134671
validation_rules=validation_rules,
46144672
explicit_is_array=explicit_is_array,
4673+
explicit_format=explicit_format,
46154674
name=self.name,
46164675
column_type=self.type,
46174676
logger=self.logger,
@@ -4990,7 +5049,7 @@ def _set_conditional_dependencies(
49905049

49915050

49925051
def _create_enum_array_property(
4993-
node: Node, use_valid_value_display_names: bool = True
5052+
node: TraversalNode, use_valid_value_display_names: bool = True
49945053
) -> Property:
49955054
"""
49965055
Creates a JSON Schema property array with enum items
@@ -5024,7 +5083,7 @@ def _create_enum_array_property(
50245083
return array_property
50255084

50265085

5027-
def _create_array_property(node: Node) -> Property:
5086+
def _create_array_property(node: TraversalNode) -> Property:
50285087
"""
50295088
Creates a JSON Schema property array
50305089
@@ -5056,7 +5115,7 @@ def _create_array_property(node: Node) -> Property:
50565115

50575116

50585117
def _create_enum_property(
5059-
node: Node, use_valid_value_display_names: bool = True
5118+
node: TraversalNode, use_valid_value_display_names: bool = True
50605119
) -> Property:
50615120
"""
50625121
Creates a JSON Schema property enum
@@ -5089,7 +5148,7 @@ def _create_enum_property(
50895148
return enum_property
50905149

50915150

5092-
def _create_simple_property(node: Node) -> Property:
5151+
def _create_simple_property(node: TraversalNode) -> Property:
50935152
"""
50945153
Creates a JSON Schema property
50955154
@@ -5125,7 +5184,7 @@ def _create_simple_property(node: Node) -> Property:
51255184
return prop
51265185

51275186

5128-
def _set_type_specific_keywords(schema: dict[str, Any], node: Node) -> None:
5187+
def _set_type_specific_keywords(schema: dict[str, Any], node: TraversalNode) -> None:
51295188
"""Sets JSON Schema keywords that are allowed if type has been set
51305189
51315190
Arguments:
@@ -5143,7 +5202,7 @@ def _set_type_specific_keywords(schema: dict[str, Any], node: Node) -> None:
51435202

51445203
def _set_property(
51455204
json_schema: JSONSchema,
5146-
node: Node,
5205+
node: TraversalNode,
51475206
use_property_display_names: bool = True,
51485207
use_valid_value_display_names: bool = True,
51495208
) -> None:

tests/unit/synapseclient/extensions/schema_files/example.model.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ Check Regex Single,,,,,TRUE,DataProperty,,,regex search [a-f],,
3333
Check Regex Format,,,,,TRUE,DataProperty,,,regex match [a-f],,
3434
Check Regex Integer,,,,,TRUE,DataProperty,,,regex search ^\d+$,,
3535
Check Num,,,,,TRUE,DataProperty,,,num error,,
36-
Check Float,,,,,TRUE,DataProperty,,,float error,,
36+
Check Float,,,,,TRUE,DataProperty,,,float error,number,
3737
Check Int,,,,,TRUE,DataProperty,,,int error,,
3838
Check String,,,,,TRUE,DataProperty,,,str error,,
3939
Check URL,,,,,TRUE,DataProperty,,,url,string,uri

tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,12 @@
1313
"CheckDate": {
1414
"description": "TBD",
1515
"format": "date",
16-
"not": {
17-
"type": "null"
18-
},
16+
"type": "string",
1917
"title": "Check Date"
2018
},
2119
"CheckFloat": {
2220
"description": "TBD",
23-
"not": {
24-
"type": "null"
25-
},
21+
"type": "number",
2622
"title": "Check Float"
2723
},
2824
"CheckInt": {
@@ -207,9 +203,7 @@
207203
"CheckURL": {
208204
"description": "TBD",
209205
"format": "uri",
210-
"not": {
211-
"type": "null"
212-
},
206+
"type": "string",
213207
"title": "Check URL"
214208
},
215209
"CheckUnique": {

0 commit comments

Comments
 (0)