-
Notifications
You must be signed in to change notification settings - Fork 73
[SYNPY-1686] Suport Pattern column in data model #1285
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
ea0a85f
00aeeee
34b8ff6
e1d0ebe
e4b915e
da2e667
5ed6e0b
074322f
186c2be
7b8f716
d622b53
12e118c
857cf78
e286ddb
2c04290
8d73871
c389b3e
581797b
269e8c3
599129b
60159d6
b4b27a7
23dc7f2
4be1e6a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -659,6 +659,7 @@ def gather_csv_attributes_relationships( | |
| # Check for presence of optional columns | ||
| model_includes_column_type = "columnType" in model_df.columns | ||
| model_includes_format = "Format" in model_df.columns | ||
| model_includes_pattern = "Pattern" in model_df.columns | ||
|
|
||
| # Build attribute/relationship dictionary | ||
| relationship_types = self.required_headers | ||
|
|
@@ -697,6 +698,12 @@ def gather_csv_attributes_relationships( | |
| attr_rel_dictionary[attribute_name]["Relationships"].update( | ||
| maximum_dict | ||
| ) | ||
|
|
||
| if model_includes_pattern: | ||
| pattern_dict = self.parse_pattern(attr) | ||
| attr_rel_dictionary[attribute_name]["Relationships"].update( | ||
| pattern_dict | ||
| ) | ||
| return attr_rel_dictionary | ||
|
|
||
| def parse_column_type(self, attr: dict) -> dict: | ||
|
|
@@ -798,6 +805,26 @@ def parse_format(self, attribute_dict: dict) -> dict[str, str]: | |
|
|
||
| return {"Format": format_string} | ||
|
|
||
| def parse_pattern(self, attribute_dict: dict) -> dict[str, str]: | ||
| """Finds the pattern value if it exists and returns it as a dictionary. | ||
|
|
||
| Args: | ||
| attribute_dict: The attribute dictionary. | ||
| Returns: | ||
| A dictionary containing the pattern value if it exists | ||
| else an empty dict | ||
| """ | ||
| from pandas import isna | ||
|
|
||
| pattern_value = attribute_dict.get("Pattern") | ||
|
|
||
| if isna(pattern_value): | ||
| return {} | ||
|
|
||
| pattern_string = str(pattern_value).strip() | ||
|
|
||
| return {"Pattern": pattern_string} | ||
|
|
||
| def parse_csv_model( | ||
| self, | ||
| path_to_data_model: str, | ||
|
|
@@ -1815,6 +1842,27 @@ def get_node_column_type( | |
| raise ValueError(msg) | ||
| return column_type | ||
|
|
||
| def get_node_column_pattern( | ||
| self, node_label: Optional[str] = None, node_display_name: Optional[str] = None | ||
| ) -> Optional[str]: | ||
| """Gets the regex pattern of the node | ||
|
|
||
| Args: | ||
| node_label: The label of the node to get the type from | ||
| node_display_name: The display name of the node to get the type from | ||
|
|
||
| Raises: | ||
| ValueError: If the value from the node is not allowed | ||
|
|
||
| Returns: | ||
| The column pattern of the node if it has one, otherwise None | ||
| """ | ||
| node_label = self._get_node_label(node_label, node_display_name) | ||
| rel_node_label = self.dmr.get_relationship_value("pattern", "node_label") | ||
| pattern = self.graph.nodes[node_label][rel_node_label] | ||
|
|
||
| return pattern | ||
|
|
||
| def get_node_format( | ||
| self, node_label: Optional[str] = None, node_display_name: Optional[str] = None | ||
| ) -> Optional[JSONSchemaFormat]: | ||
|
|
@@ -1942,6 +1990,9 @@ class PropertyTemplate: | |
| magic_validationRules: list = field( | ||
| default_factory=list, metadata=config(field_name="sms:validationRules") | ||
| ) | ||
| magic_pattern: list = field( | ||
SageGJ marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| default_factory=list, metadata=config(field_name="sms:pattern") | ||
| ) | ||
|
|
||
|
|
||
| @dataclass_json | ||
|
|
@@ -2841,6 +2892,7 @@ def define_data_model_relationships(self) -> dict: | |
| allowed_values: A list of values the entry must be one of | ||
| edge_dir: str, 'in'/'out' is the edge an in or out edge. Define for edge relationships | ||
| jsonld_dir: str, 'in'/out is the direction in or out in the JSONLD. | ||
| pattern: regex pattern that the entry must match | ||
| """ | ||
| map_data_model_relationships = { | ||
| "displayName": { | ||
|
|
@@ -3016,6 +3068,15 @@ def define_data_model_relationships(self) -> dict: | |
| "edge_rel": False, | ||
| "node_attr_dict": {"default": None}, | ||
| }, | ||
| "pattern": { | ||
| "jsonld_key": "sms:pattern", | ||
| "csv_header": "Pattern", | ||
| "node_label": "pattern", | ||
| "type": str, | ||
| "required_header": False, | ||
| "edge_rel": False, | ||
| "node_attr_dict": {"default": None}, | ||
| }, | ||
| } | ||
|
|
||
| return map_data_model_relationships | ||
|
|
@@ -4741,6 +4802,9 @@ def __post_init__(self) -> None: | |
| relationship_value="minimum", node_display_name=self.display_name | ||
| ) | ||
|
|
||
| column_pattern = self.dmge.get_node_column_pattern( | ||
| node_display_name=self.display_name | ||
| ) | ||
| # list validation rule is been deprecated for use in deciding type | ||
| # TODO: set self.is_array here instead of return from _get_validation_rule_based_fields | ||
| # https://sagebionetworks.jira.com/browse/SYNPY-1692 | ||
|
|
@@ -4771,7 +4835,7 @@ def __post_init__(self) -> None: | |
| self.format, | ||
| implicit_minimum, | ||
| implicit_maximum, | ||
| self.pattern, | ||
| rule_pattern, | ||
| ) = _get_validation_rule_based_fields( | ||
| validation_rules=validation_rules, | ||
| explicit_is_array=explicit_is_array, | ||
|
|
@@ -4790,6 +4854,30 @@ def __post_init__(self) -> None: | |
| explicit_maximum if explicit_maximum is not None else implicit_maximum | ||
| ) | ||
|
|
||
| if column_pattern and column_type and column_type.value != "string": | ||
| raise ValueError( | ||
| "Column type must be set to 'string' to use column pattern specification for regex validation." | ||
| ) | ||
|
|
||
| self.pattern = column_pattern if column_pattern else rule_pattern | ||
SageGJ marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| if rule_pattern and not column_pattern: | ||
| msg = ( | ||
| f"A regex validation rule is set for property: {self.name}, but the pattern is not set in the data model. " | ||
| f"The regex pattern will be set to {self.pattern}, but the regex rule is deprecated and validation " | ||
| "rules will no longer be used in the future." | ||
| "Please explicitly set the regex pattern in the 'Pattern' column in the data model." | ||
| ) | ||
| self.logger.warning(msg) | ||
|
|
||
| if self.pattern: | ||
SageGJ marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| try: | ||
| re.compile(self.pattern) | ||
| except re.error as e: | ||
| raise SyntaxError( | ||
| f"The regex pattern '{self.pattern}' for property '{self.name}' is invalid." | ||
| ) from e | ||
|
|
||
| def _determine_type_and_array( | ||
| self, column_type: Optional[ColumnType] | ||
| ) -> tuple[Optional[AtomicColumnType], Optional[bool]]: | ||
|
|
@@ -5369,14 +5457,17 @@ def _set_type_specific_keywords(schema: dict[str, Any], node: TraversalNode) -> | |
| schema: The schema to set keywords on | ||
| node (Node): The node the corresponds to the property which is being set in the JSON Schema | ||
| """ | ||
| for attr in ["minimum", "maximum", "pattern"]: | ||
| for attr in ["minimum", "maximum"]: | ||
| value = getattr(node, attr) | ||
| if value is not None: | ||
| schema[attr] = value | ||
|
|
||
| if node.format is not None: | ||
| schema["format"] = node.format.value | ||
|
|
||
| if hasattr(node, "pattern") and node.pattern is not None: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in the loop starting on line
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you elaborate @SageGJ ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if no pattern is specified for the node then the |
||
| schema["pattern"] = node.pattern | ||
|
|
||
|
|
||
| def _set_property( | ||
| json_schema: JSONSchema, | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.