Sage-Bionetworks · SageGJ · Nov 24, 2025 · Nov 24, 2025 · Nov 25, 2025 · Nov 25, 2025
@@ -659,6 +659,7 @@ def gather_csv_attributes_relationships(
         # Check for presence of optional columns
         model_includes_column_type = "columnType" in model_df.columns
         model_includes_format = "Format" in model_df.columns
+        model_includes_pattern = "Pattern" in model_df.columns
 
         # Build attribute/relationship dictionary
         relationship_types = self.required_headers
@@ -697,6 +698,12 @@ def gather_csv_attributes_relationships(
                 attr_rel_dictionary[attribute_name]["Relationships"].update(
                     maximum_dict
                 )
+
+            if model_includes_pattern:
+                pattern_dict = self.parse_pattern(attr)
+                attr_rel_dictionary[attribute_name]["Relationships"].update(
+                    pattern_dict
+                )
         return attr_rel_dictionary
 
     def parse_column_type(self, attr: dict) -> dict:
@@ -798,6 +805,26 @@ def parse_format(self, attribute_dict: dict) -> dict[str, str]:
 
         return {"Format": format_string}
 
+    def parse_pattern(self, attribute_dict: dict) -> dict[str, str]:
+        """Finds the pattern value if it exists and returns it as a dictionary.
+
+        Args:
+            attribute_dict: The attribute dictionary.
+        Returns:
+            A dictionary containing the pattern value if it exists
+            else an empty dict
+        """
+        from pandas import isna
+
+        pattern_value = attribute_dict.get("Pattern")
+
+        if isna(pattern_value):
+            return {}
+
+        pattern_string = str(pattern_value).strip()
+
+        return {"Pattern": pattern_string}
+
     def parse_csv_model(
         self,
         path_to_data_model: str,
@@ -1815,6 +1842,27 @@ def get_node_column_type(
                 raise ValueError(msg)
         return column_type
 
+    def get_node_column_pattern(
+        self, node_label: Optional[str] = None, node_display_name: Optional[str] = None
+    ) -> Optional[str]:
+        """Gets the regex pattern of the node
+
+        Args:
+            node_label: The label of the node to get the type from
+            node_display_name: The display name of the node to get the type from
+
+        Raises:
+            ValueError: If the value from the node is not allowed
+
+        Returns:
+            The column pattern of the node if it has one, otherwise None
+        """
+        node_label = self._get_node_label(node_label, node_display_name)
+        rel_node_label = self.dmr.get_relationship_value("pattern", "node_label")
+        pattern = self.graph.nodes[node_label][rel_node_label]
+
+        return pattern
+
     def get_node_format(
         self, node_label: Optional[str] = None, node_display_name: Optional[str] = None
     ) -> Optional[JSONSchemaFormat]:
@@ -1942,6 +1990,9 @@ class PropertyTemplate:
     magic_validationRules: list = field(
         default_factory=list, metadata=config(field_name="sms:validationRules")
     )
+    magic_pattern: list = field(
+        default_factory=list, metadata=config(field_name="sms:pattern")
+    )
 
 
 @dataclass_json
@@ -2841,6 +2892,7 @@ def define_data_model_relationships(self) -> dict:
             allowed_values: A list of values the entry must be  one of
             edge_dir: str, 'in'/'out' is the edge an in or out edge. Define for edge relationships
             jsonld_dir: str, 'in'/out is the direction in or out in the JSONLD.
+            pattern: regex pattern that the entry must match
         """
         map_data_model_relationships = {
             "displayName": {
@@ -3016,6 +3068,15 @@ def define_data_model_relationships(self) -> dict:
                 "edge_rel": False,
                 "node_attr_dict": {"default": None},
             },
+            "pattern": {
+                "jsonld_key": "sms:pattern",
+                "csv_header": "Pattern",
+                "node_label": "pattern",
+                "type": str,
+                "required_header": False,
+                "edge_rel": False,
+                "node_attr_dict": {"default": None},
+            },
         }
 
         return map_data_model_relationships
@@ -4741,6 +4802,9 @@ def __post_init__(self) -> None:
             relationship_value="minimum", node_display_name=self.display_name
         )
 
+        column_pattern = self.dmge.get_node_column_pattern(
+            node_display_name=self.display_name
+        )
         # list validation rule is been deprecated for use in deciding type
         # TODO: set self.is_array here instead of return from _get_validation_rule_based_fields
         # https://sagebionetworks.jira.com/browse/SYNPY-1692
@@ -4771,7 +4835,7 @@ def __post_init__(self) -> None:
             self.format,
             implicit_minimum,
             implicit_maximum,
-            self.pattern,
+            rule_pattern,
         ) = _get_validation_rule_based_fields(
             validation_rules=validation_rules,
             explicit_is_array=explicit_is_array,
@@ -4790,6 +4854,30 @@ def __post_init__(self) -> None:
             explicit_maximum if explicit_maximum is not None else implicit_maximum
         )
 
+        if column_pattern and column_type and column_type.value != "string":
+            raise ValueError(
+                "Column type must be set to 'string' to use column pattern specification for regex validation."
+            )
+
+        self.pattern = column_pattern if column_pattern else rule_pattern
+
+        if rule_pattern and not column_pattern:
+            msg = (
+                f"A regex validation rule is set for property: {self.name}, but the pattern is not set in the data model. "
+                f"The regex pattern will be set to {self.pattern}, but the regex rule is deprecated and validation "
+                "rules will no longer be used in the future."
+                "Please explicitly set the regex pattern in the 'Pattern' column in the data model."
+            )
+            self.logger.warning(msg)
+
+        if self.pattern:
+            try:
+                re.compile(self.pattern)
+            except re.error as e:
+                raise SyntaxError(
+                    f"The regex pattern '{self.pattern}' for property '{self.name}' is invalid."
+                ) from e
+
     def _determine_type_and_array(
         self, column_type: Optional[ColumnType]
     ) -> tuple[Optional[AtomicColumnType], Optional[bool]]:
@@ -5369,14 +5457,17 @@ def _set_type_specific_keywords(schema: dict[str, Any], node: TraversalNode) ->
         schema: The schema to set keywords on
         node (Node): The node the corresponds to the property which is being set in the JSON Schema
     """
-    for attr in ["minimum", "maximum", "pattern"]:
+    for attr in ["minimum", "maximum"]:
         value = getattr(node, attr)
         if value is not None:
             schema[attr] = value
 
     if node.format is not None:
         schema["format"] = node.format.value
 
+    if hasattr(node, "pattern") and node.pattern is not None:
+        schema["pattern"] = node.pattern
+
 
 def _set_property(
     json_schema: JSONSchema,