Skip to content

Commit 34b8ff6

Browse files
committed
update pattern extraction
1 parent 00aeeee commit 34b8ff6

File tree

1 file changed

+52
-2
lines changed

1 file changed

+52
-2
lines changed

synapseclient/extensions/curator/schema_generation.py

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1840,6 +1840,27 @@ def get_node_column_type(
18401840
raise ValueError(msg)
18411841
return column_type
18421842

1843+
def get_node_column_pattern(
1844+
self, node_label: Optional[str] = None, node_display_name: Optional[str] = None
1845+
) -> Optional[ColumnType]:
1846+
"""Gets the regex pattern of the node
1847+
1848+
Args:
1849+
node_label: The label of the node to get the type from
1850+
node_display_name: The display name of the node to get the type from
1851+
1852+
Raises:
1853+
ValueError: If the value from the node is not allowed
1854+
1855+
Returns:
1856+
The column pattern of the node if it has one, otherwise None
1857+
"""
1858+
node_label = self._get_node_label(node_label, node_display_name)
1859+
rel_node_label = self.dmr.get_relationship_value("pattern", "node_label")
1860+
pattern = self.graph.nodes[node_label][rel_node_label]
1861+
1862+
return pattern
1863+
18431864
def get_node_format(
18441865
self, node_label: Optional[str] = None, node_display_name: Optional[str] = None
18451866
) -> Optional[JSONSchemaFormat]:
@@ -1967,6 +1988,9 @@ class PropertyTemplate:
19671988
magic_validationRules: list = field(
19681989
default_factory=list, metadata=config(field_name="sms:validationRules")
19691990
)
1991+
magic_pattern: list = field(
1992+
default_factory=list, metadata=config(field_name="sms:pattern")
1993+
)
19701994

19711995

19721996
@dataclass_json
@@ -2001,6 +2025,9 @@ class ClassTemplate:
20012025
magic_validationRules: list = field(
20022026
default_factory=list, metadata=config(field_name="sms:validationRules")
20032027
)
2028+
magic_pattern: list = field(
2029+
default_factory=list, metadata=config(field_name="sms:pattern")
2030+
)
20042031

20052032

20062033
class DataModelJsonLD:
@@ -4771,6 +4798,9 @@ def __post_init__(self) -> None:
47714798
relationship_value="minimum", node_display_name=self.display_name
47724799
)
47734800

4801+
column_pattern = self.dmge.get_node_column_pattern(
4802+
node_display_name=self.display_name
4803+
)
47744804
# list validation rule is been deprecated for use in deciding type
47754805
# TODO: set self.is_array here instead of return from _get_validation_rule_based_fields
47764806
# https://sagebionetworks.jira.com/browse/SYNPY-1692
@@ -4801,7 +4831,7 @@ def __post_init__(self) -> None:
48014831
self.format,
48024832
implicit_minimum,
48034833
implicit_maximum,
4804-
self.pattern,
4834+
rule_pattern,
48054835
) = _get_validation_rule_based_fields(
48064836
validation_rules=validation_rules,
48074837
explicit_is_array=explicit_is_array,
@@ -4819,6 +4849,22 @@ def __post_init__(self) -> None:
48194849
self.maximum = (
48204850
explicit_maximum if explicit_maximum is not None else implicit_maximum
48214851
)
4852+
4853+
if column_pattern and column_type.value != "string":
4854+
raise ValueError(
4855+
"Column type must be set to 'string' to use column pattern specification for regex validation."
4856+
)
4857+
4858+
self.pattern = column_pattern if column_pattern else rule_pattern
4859+
4860+
if rule_pattern:
4861+
msg = (
4862+
f"A regex validation rule is set for property: {self.name}, but the pattern is not set in the data model. "
4863+
f"The regex pattern will be set to {self.pattern}, but the regex rule is deprecated and validation "
4864+
"rules will no longer be used in the future."
4865+
"Please explicitly set the regex pattern in the 'Pattern' column in the data model."
4866+
)
4867+
self.logger.warning(msg)
48224868

48234869
def _determine_type_and_array(
48244870
self, column_type: Optional[ColumnType]
@@ -5121,6 +5167,7 @@ class JSONSchema: # pylint: disable=too-many-instance-attributes
51215167
properties: dict[str, Property] = field(default_factory=dict)
51225168
required: list[str] = field(default_factory=list)
51235169
all_of: list[AllOf] = field(default_factory=list)
5170+
pattern: str = ""
51245171

51255172
def as_json_schema_dict(
51265173
self,
@@ -5399,14 +5446,17 @@ def _set_type_specific_keywords(schema: dict[str, Any], node: TraversalNode) ->
53995446
schema: The schema to set keywords on
54005447
node (Node): The node the corresponds to the property which is being set in the JSON Schema
54015448
"""
5402-
for attr in ["minimum", "maximum", "pattern"]:
5449+
for attr in ["minimum", "maximum"]:
54035450
value = getattr(node, attr)
54045451
if value is not None:
54055452
schema[attr] = value
54065453

54075454
if node.format is not None:
54085455
schema["format"] = node.format.value
54095456

5457+
if hasattr(node, "pattern") and node.pattern is not None:
5458+
schema["pattern"] = node.pattern
5459+
54105460

54115461
def _set_property(
54125462
json_schema: JSONSchema,

0 commit comments

Comments
 (0)