@@ -122,13 +122,37 @@ class ListColumnType(ColumnType):
122122}
123123
124124
125+ class JSONSchemaFormat (Enum ):
126+ """
127+ Allowed formats by the JSON Schema validator used by Synapse: https://github.com/everit-org/json-schema#format-validators
128+ For descriptions see: https://json-schema.org/understanding-json-schema/reference/type#format
129+ """
130+
131+ DATE_TIME = "date-time"
132+ EMAIL = "email"
133+ HOSTNAME = "hostname"
134+ IPV4 = "ipv4"
135+ IPV6 = "ipv6"
136+ URI = "uri"
137+ URI_REFERENCE = "uri-reference"
138+ URI_TEMPLATE = "uri-template"
139+ JSON_POINTER = "json-pointer"
140+ DATE = "date"
141+ TIME = "time"
142+ REGEX = "regex"
143+ RELATIVE_JSON_POINTER = "relative-json-pointer"
144+
145+
125146class ValidationRuleName (Enum ):
126147 """Names of validation rules that are used to create JSON Schema"""
127148
128149 # list validation rule is been deprecated for use in deciding type
129150 # TODO: remove list:
130151 # https://sagebionetworks.jira.com/browse/SYNPY-1692
131152 LIST = "list"
153+ # url and date rules are deprecated for adding format keyword
154+ # TODO: remove url and date
155+ # https://sagebionetworks.jira.com/browse/SYNPY-1685
132156 DATE = "date"
133157 URL = "url"
134158 REGEX = "regex"
@@ -166,6 +190,9 @@ class ValidationRule:
166190 name = ValidationRuleName .LIST ,
167191 incompatible_rules = [],
168192 ),
193+ # url and date rules are deprecated for adding format keyword
194+ # TODO: remove url and date
195+ # https://sagebionetworks.jira.com/browse/SYNPY-1685
169196 "date" : ValidationRule (
170197 name = ValidationRuleName .DATE ,
171198 incompatible_rules = [
@@ -215,13 +242,6 @@ def __post_init__(self) -> None:
215242 self .display_name = str (self .fields ["displayName" ])
216243
217244
218- class JSONSchemaFormat (Enum ):
219- """This enum is the currently supported JSON Schema formats"""
220-
221- DATE = "date"
222- URI = "uri"
223-
224-
225245def load_json (file_path : str ) -> Any :
226246 """Load json document from file path or url
227247
@@ -635,7 +655,10 @@ def gather_csv_attributes_relationships(
635655
636656 # get attributes from Attribute column
637657 attributes = model_df .to_dict ("records" )
658+
659+ # Check for presence of optional columns
638660 model_includes_column_type = "columnType" in model_df .columns
661+ model_includes_format = "Format" in model_df .columns
639662
640663 # Build attribute/relationship dictionary
641664 relationship_types = self .required_headers
@@ -659,6 +682,9 @@ def gather_csv_attributes_relationships(
659682 attr_rel_dictionary [attribute_name ]["Relationships" ].update (
660683 column_type_dict
661684 )
685+ if model_includes_format :
686+ format_dict = self .parse_format (attr )
687+ attr_rel_dictionary [attribute_name ]["Relationships" ].update (format_dict )
662688 return attr_rel_dictionary
663689
664690 def parse_column_type (self , attr : dict ) -> dict :
@@ -691,6 +717,34 @@ def parse_column_type(self, attr: dict) -> dict:
691717
692718 return {"ColumnType" : column_type }
693719
720+ def parse_format (self , attribute_dict : dict ) -> dict [str , str ]:
721+ """Finds the format value if it exists and returns it as a dictionary.
722+
723+ Args:
724+ attribute_dict: The attribute dictionary.
725+
726+ Returns:
727+ A dictionary containing the format value if it exists
728+ else an empty dict
729+ """
730+ from pandas import isna
731+
732+ format_value = attribute_dict .get ("Format" )
733+
734+ if isna (format_value ):
735+ return {}
736+
737+ format_string = str (format_value ).strip ().lower ()
738+
739+ check_allowed_values (
740+ self .dmr ,
741+ entry_id = attribute_dict ["Format" ],
742+ value = format_string ,
743+ relationship = "format" ,
744+ )
745+
746+ return {"Format" : format_string }
747+
694748 def parse_csv_model (
695749 self ,
696750 path_to_data_model : str ,
@@ -1708,6 +1762,37 @@ def get_node_column_type(
17081762 raise ValueError (msg )
17091763 return column_type
17101764
1765+ def get_node_format (
1766+ self , node_label : Optional [str ] = None , node_display_name : Optional [str ] = None
1767+ ) -> Optional [JSONSchemaFormat ]:
1768+ """Gets the format of the node
1769+
1770+ Args:
1771+ node_label: The label of the node to get the format from
1772+ node_display_name: The display name of the node to get the format from
1773+
1774+ Raises:
1775+ ValueError: If the value from the node is not allowed
1776+
1777+ Returns:
1778+ The format of the node if it has one, otherwise None
1779+ """
1780+ node_label = self ._get_node_label (node_label , node_display_name )
1781+ rel_node_label = self .dmr .get_relationship_value ("format" , "node_label" )
1782+ format_value = self .graph .nodes [node_label ][rel_node_label ]
1783+ if format_value is None :
1784+ return format_value
1785+ format_string = str (format_value ).lower ()
1786+ try :
1787+ column_type = JSONSchemaFormat (format_string )
1788+ except ValueError as exc :
1789+ msg = (
1790+ f"Node: '{ node_label } ' had illegal format value: '{ format_value } '. "
1791+ f"Allowed values are: [{ [member .value for member in JSONSchemaFormat ]} ]"
1792+ )
1793+ raise ValueError (msg ) from exc
1794+ return column_type
1795+
17111796 def _get_node_label (
17121797 self , node_label : Optional [str ] = None , node_display_name : Optional [str ] = None
17131798 ) -> str :
@@ -2826,6 +2911,16 @@ def define_data_model_relationships(self) -> dict:
28262911 "node_attr_dict" : {"default" : None },
28272912 "allowed_values" : ALL_COLUMN_TYPE_VALUES ,
28282913 },
2914+ "format" : {
2915+ "jsonld_key" : "sms:format" ,
2916+ "csv_header" : "Format" ,
2917+ "node_label" : "format" ,
2918+ "type" : str ,
2919+ "required_header" : False ,
2920+ "edge_rel" : False ,
2921+ "node_attr_dict" : {"default" : None },
2922+ "allowed_values" : [member .value for member in JSONSchemaFormat ],
2923+ },
28292924 }
28302925
28312926 return map_data_model_relationships
@@ -4290,6 +4385,7 @@ def _get_rules_by_names(names: list[str]) -> list[ValidationRule]:
42904385def _get_validation_rule_based_fields (
42914386 validation_rules : list [str ],
42924387 explicit_is_array : Optional [bool ],
4388+ explicit_format : Optional [JSONSchemaFormat ],
42934389 name : str ,
42944390 column_type : Optional [ColumnType ],
42954391 logger : Logger ,
@@ -4334,7 +4430,7 @@ def _get_validation_rule_based_fields(
43344430 - js_pattern: If the type is string the JSON Schema pattern
43354431 """
43364432 js_is_array = False
4337- js_format = None
4433+ js_format = explicit_format
43384434 js_minimum = None
43394435 js_maximum = None
43404436 js_pattern = None
@@ -4398,10 +4494,51 @@ def _get_validation_rule_based_fields(
43984494 )
43994495 logger .warning (msg )
44004496
4401- if ValidationRuleName .URL in validation_rule_names :
4402- js_format = JSONSchemaFormat .URI
4403- elif ValidationRuleName .DATE in validation_rule_names :
4404- js_format = JSONSchemaFormat .DATE
4497+ # url and date rules are deprecated for adding format keyword
4498+ # TODO: remove the if/else block below
4499+ # https://sagebionetworks.jira.com/browse/SYNPY-1685
4500+
4501+ if explicit_format :
4502+ if (
4503+ ValidationRuleName .DATE in validation_rule_names
4504+ and explicit_format == JSONSchemaFormat .URI
4505+ ):
4506+ msg = (
4507+ f"For property: { name } , the format is uri, "
4508+ "but the validation rule date is present. "
4509+ "The format will be set to uri."
4510+ )
4511+ logger .warning (msg )
4512+ elif (
4513+ ValidationRuleName .URL in validation_rule_names
4514+ and explicit_format == JSONSchemaFormat .DATE
4515+ ):
4516+ msg = (
4517+ f"For property: { name } , the format is date, "
4518+ "but the validation rule url is present. "
4519+ "The format will be set to date."
4520+ )
4521+ logger .warning (msg )
4522+
4523+ else :
4524+ if ValidationRuleName .URL in validation_rule_names :
4525+ js_format = JSONSchemaFormat .URI
4526+ msg = (
4527+ f"A url validation rule is set for property: { name } , but the format is not set. "
4528+ "The format will be set to uri, but this behavior is deprecated and validation "
4529+ "rules will no longer be used in the future."
4530+ "Please explicitly set the format to uri in the data model."
4531+ )
4532+ logger .warning (msg )
4533+ elif ValidationRuleName .DATE in validation_rule_names :
4534+ js_format = JSONSchemaFormat .DATE
4535+ msg = (
4536+ f"A date validation rule is set for property: { name } , but the format is not set. "
4537+ "The format will be set to date, but this behavior is deprecated and validation "
4538+ "rules will no longer be used in the future."
4539+ "Please explicitly set the format to uri in the data model."
4540+ )
4541+ logger .warning (msg )
44054542
44064543 in_range_rule = get_rule_from_inputted_rules (
44074544 ValidationRuleName .IN_RANGE , validation_rules
@@ -4417,7 +4554,6 @@ def _get_validation_rule_based_fields(
44174554 if regex_rule :
44184555 js_pattern = get_regex_parameters_from_inputted_rule (regex_rule )
44194556
4420- print (js_is_array )
44214557 return (
44224558 js_is_array ,
44234559 js_format ,
@@ -4496,7 +4632,6 @@ def __post_init__(self) -> None:
44964632 column_type = self .dmge .get_node_column_type (
44974633 node_display_name = self .display_name
44984634 )
4499-
45004635 # list validation rule is been deprecated for use in deciding type
45014636 # TODO: set self.is_array here instead of return from _get_validation_rule_based_fields
45024637 # https://sagebionetworks.jira.com/browse/SYNPY-1692
@@ -4509,6 +4644,22 @@ def __post_init__(self) -> None:
45094644 else :
45104645 self .type = None
45114646 explicit_is_array = None
4647+
4648+ # url and date rules are deprecated for adding format keyword
4649+ # TODO: set self.format here instead of passing it to get_validation_rule_based_fields
4650+ # https://sagebionetworks.jira.com/browse/SYNPY-1685
4651+ explicit_format = self .dmge .get_node_format (node_display_name = self .display_name )
4652+ if explicit_format :
4653+ if column_type not in (ListColumnType .STRING_LIST , AtomicColumnType .STRING ):
4654+ msg = (
4655+ f"A format value (current value: { explicit_format .value } ) "
4656+ f"is set for property: { self .name } , but columnType is not a string type "
4657+ f"(current value: { column_type .value } ). "
4658+ "To use a format value the columnType must be set to one of: "
4659+ "[string, string_list] "
4660+ )
4661+ raise ValueError (msg )
4662+
45124663 (
45134664 self .is_array ,
45144665 self .format ,
@@ -4518,6 +4669,7 @@ def __post_init__(self) -> None:
45184669 ) = _get_validation_rule_based_fields (
45194670 validation_rules = validation_rules ,
45204671 explicit_is_array = explicit_is_array ,
4672+ explicit_format = explicit_format ,
45214673 name = self .name ,
45224674 column_type = self .type ,
45234675 logger = self .logger ,
@@ -4896,7 +5048,7 @@ def _set_conditional_dependencies(
48965048
48975049
48985050def _create_enum_array_property (
4899- node : Node , use_valid_value_display_names : bool = True
5051+ node : TraversalNode , use_valid_value_display_names : bool = True
49005052) -> Property :
49015053 """
49025054 Creates a JSON Schema property array with enum items
@@ -4930,7 +5082,7 @@ def _create_enum_array_property(
49305082 return array_property
49315083
49325084
4933- def _create_array_property (node : Node ) -> Property :
5085+ def _create_array_property (node : TraversalNode ) -> Property :
49345086 """
49355087 Creates a JSON Schema property array
49365088
@@ -4962,7 +5114,7 @@ def _create_array_property(node: Node) -> Property:
49625114
49635115
49645116def _create_enum_property (
4965- node : Node , use_valid_value_display_names : bool = True
5117+ node : TraversalNode , use_valid_value_display_names : bool = True
49665118) -> Property :
49675119 """
49685120 Creates a JSON Schema property enum
@@ -4995,7 +5147,7 @@ def _create_enum_property(
49955147 return enum_property
49965148
49975149
4998- def _create_simple_property (node : Node ) -> Property :
5150+ def _create_simple_property (node : TraversalNode ) -> Property :
49995151 """
50005152 Creates a JSON Schema property
50015153
@@ -5031,7 +5183,7 @@ def _create_simple_property(node: Node) -> Property:
50315183 return prop
50325184
50335185
5034- def _set_type_specific_keywords (schema : dict [str , Any ], node : Node ) -> None :
5186+ def _set_type_specific_keywords (schema : dict [str , Any ], node : TraversalNode ) -> None :
50355187 """Sets JSON Schema keywords that are allowed if type has been set
50365188
50375189 Arguments:
@@ -5049,7 +5201,7 @@ def _set_type_specific_keywords(schema: dict[str, Any], node: Node) -> None:
50495201
50505202def _set_property (
50515203 json_schema : JSONSchema ,
5052- node : Node ,
5204+ node : TraversalNode ,
50535205 use_property_display_names : bool = True ,
50545206 use_valid_value_display_names : bool = True ,
50555207) -> None :
0 commit comments