From ea0a85fa492bbfa4a7b8807149da4526bcf709e0 Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Mon, 24 Nov 2025 16:56:31 -0700 Subject: [PATCH 01/29] add pattern parsing --- .../extensions/curator/schema_generation.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index 172bd8c1c..ee9cf5c7b 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -659,6 +659,7 @@ def gather_csv_attributes_relationships( # Check for presence of optional columns model_includes_column_type = "columnType" in model_df.columns model_includes_format = "Format" in model_df.columns + model_includes_pattern = "Pattern" in model_df.columns # Build attribute/relationship dictionary relationship_types = self.required_headers @@ -696,6 +697,10 @@ def gather_csv_attributes_relationships( maximum_dict = self.parse_minimum_maximum(attr, "Maximum") attr_rel_dictionary[attribute_name]["Relationships"].update( maximum_dict + if model_includes_pattern: + pattern_dict = self.parse_pattern(attr) + attr_rel_dictionary[attribute_name]["Relationships"].update( + pattern_dict ) return attr_rel_dictionary @@ -798,6 +803,26 @@ def parse_format(self, attribute_dict: dict) -> dict[str, str]: return {"Format": format_string} + def parse_pattern(self, attribute_dict: dict) -> dict[str, str]: + """Finds the pattern value if it exists and returns it as a dictionary. + + Args: + attribute_dict: The attribute dictionary. + Returns: + A dictionary containing the pattern value if it exists + else an empty dict + """ + from pandas import isna + + pattern_value = attribute_dict.get("Pattern") + + if isna(pattern_value): + return {} + + pattern_string = str(pattern_value).strip() + + return {"Pattern": pattern_string} + def parse_csv_model( self, path_to_data_model: str, @@ -3012,6 +3037,11 @@ def define_data_model_relationships(self) -> dict: "csv_header": "Minimum", "node_label": "minimum", "type": Union[float, int], + "pattern": { + "jsonld_key": "sms:pattern", + "csv_header": "Pattern", + "node_label": "pattern", + "type": str, "required_header": False, "edge_rel": False, "node_attr_dict": {"default": None}, From 00aeeee13569d5f6cdc06d3a4557aaabb0a94573 Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Mon, 24 Nov 2025 16:57:26 -0700 Subject: [PATCH 02/29] add pattern column --- .../data_models/example.model.csv | 182 +++++++++--------- 1 file changed, 91 insertions(+), 91 deletions(-) diff --git a/tests/unit/synapseclient/extensions/schema_files/data_models/example.model.csv b/tests/unit/synapseclient/extensions/schema_files/data_models/example.model.csv index f5aee46fb..30933c453 100644 --- a/tests/unit/synapseclient/extensions/schema_files/data_models/example.model.csv +++ b/tests/unit/synapseclient/extensions/schema_files/data_models/example.model.csv @@ -1,92 +1,92 @@ -Attribute,Description,Valid Values,DependsOn,Properties,Required,Parent,DependsOn Component,Source,Validation Rules,columnType,Format,Maximum,Minimum -Component,,,,,TRUE,,,,,,,, -Patient,,,"Patient ID, Sex, Year of Birth, Diagnosis, Component",,FALSE,DataType,,,,,,, +Attribute,Description,Valid Values,DependsOn,Properties,Required,Parent,DependsOn Component,Source,Validation Rules,columnType,Format,Maximum,Minimum,Pattern +Component,,,,,TRUE,,,,,,,,, +Patient,,,"Patient ID, Sex, Year of Birth, Diagnosis, Component",,FALSE,DataType,,,,,,,, Patient ID,,,,,TRUE,DataProperty,,,#Patient unique warning^^#Biospecimen unique error,,,, -Sex,,"Female, Male, Other",,,TRUE,DataProperty,,,,,,, -Year of Birth,,,,,FALSE,DataProperty,,,,,,, -Diagnosis,,"Healthy, Cancer",,,TRUE,DataProperty,,,,,,, -Cancer,,,"Cancer Type, Family History",,FALSE,ValidValue,,,,,,, -Cancer Type,,"Breast, Colorectal, Lung, Prostate, Skin",,,TRUE,DataProperty,,,,,,, -Family History,,"Breast, Colorectal, Lung, Prostate, Skin",,,TRUE,DataProperty,,,list strict,,,, -Biospecimen,,,"Sample ID, Patient ID, Tissue Status, Component",,FALSE,DataType,Patient,,,,,, -Sample ID,,,,,TRUE,DataProperty,,,,,,, -Tissue Status,,"Healthy, Malignant, None",,,TRUE,DataProperty,,,,,,, -Bulk RNA-seq Assay,,,"Filename, Sample ID, File Format, Component",,FALSE,DataType,Biospecimen,,,,,, -Filename,,,,,TRUE,DataProperty,,,#MockFilename filenameExists^^,,,, -File Format,,"FASTQ, BAM, CRAM, CSV/TSV",,,TRUE,DataProperty,,,,,,, -BAM,,,Genome Build,,FALSE,ValidValue,,,,,,, -CRAM,,,"Genome Build, Genome FASTA",,FALSE,ValidValue,,,,,,, -CSV/TSV,,,Genome Build,,FALSE,ValidValue,,,,,,, -Genome Build,,"GRCh37, GRCh38, GRCm38, GRCm39",,,TRUE,DataProperty,,,,,,, -Genome FASTA,,,,,TRUE,DataProperty,,,,,,, -MockComponent,Component to hold mock attributes for testing all validation rules,,"Component, Check List, Check List Enum, Check List Like, Check List Like Enum, Check List Strict, Check List Enum Strict, Check Regex List, Check Regex List Like, Check Regex List Strict, Check Regex Single, Check Regex Format, Check Regex Integer, Check Num, Check Float, Check Int, Check String, Check URL,Check Match at Least, Check Match at Least values, Check Match Exactly, Check Match Exactly values, Check Match None, Check Match None values, Check Recommended, Check Ages, Check Unique, Check Range, Check Date, Check NA",,FALSE,DataType,,,,,,, -Check List,,,,,TRUE,DataProperty,,,list,,,, -Check List Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list,,,, -Check List Like,,,,,TRUE,DataProperty,,,list like,,,, -Check List Like Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list like,,,, -Check List Strict,,,,,TRUE,DataProperty,,,list strict,,,, -Check List Enum Strict,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list strict,,,, -Check Regex List,,,,,TRUE,DataProperty,,,list::regex match [a-f],,,, -Check Regex List Strict,,,,,TRUE,DataProperty,,,list strict::regex match [a-f],,,, -Check Regex List Like,,,,,TRUE,DataProperty,,,list like::regex match [a-f],,,, -Check Regex Single,,,,,TRUE,DataProperty,,,regex search [a-f],,,, -Check Regex Format,,,,,TRUE,DataProperty,,,regex match [a-f],,,, -Check Regex Integer,,,,,TRUE,DataProperty,,,regex search ^\d+$,,,, -Check Num,,,,,TRUE,DataProperty,,,num error,,,, -Check Float,,,,,TRUE,DataProperty,,,float error,number,,, -Check Int,,,,,TRUE,DataProperty,,,int error,,,, -Check String,,,,,TRUE,DataProperty,,,str error,,,, -Check URL,,,,,TRUE,DataProperty,,,url,string,uri,, -Check Match at Least,,,,,TRUE,DataProperty,,,matchAtLeastOne Patient.PatientID set,,,, -Check Match Exactly,,,,,TRUE,DataProperty,,,matchExactlyOne MockComponent.checkMatchExactly set,,,, -Check Match None,,,,,TRUE,DataProperty,,,matchNone MockComponent.checkMatchNone set error,,,, -Check Match at Least values,,,,,TRUE,DataProperty,,,matchAtLeastOne MockComponent.checkMatchatLeastvalues value,,,, -Check Match Exactly values,,,,,TRUE,DataProperty,,,matchExactlyOne MockComponent.checkMatchExactlyvalues value,,,, -Check Match None values,,,,,TRUE,DataProperty,,,matchNone MockComponent.checkMatchNonevalues value error,,,, -Check Recommended,,,,,FALSE,DataProperty,,,recommended,,,, -Check Ages,,,,,TRUE,DataProperty,,,protectAges,,,, -Check Unique,,,,,TRUE,DataProperty,,,unique error,,,, -Check Range,,,,,TRUE,DataProperty,,,inRange 50 100 error,,,, -Check Date,,,,,TRUE,DataProperty,,,date,string,date,, -Check NA,,,,,TRUE,DataProperty,,,int::IsNA,,,, -MockRDB,,,"Component, MockRDB_id, SourceManifest",,FALSE,DataType,,,,,,, -MockRDB_id,,,,,TRUE,DataProperty,,,int,,,, -SourceManifest,,,,,TRUE,DataProperty,,,,,,, -MockFilename,,,"Component, Filename",,FALSE,DataType,,,,,,, -JSONSchemaComponent,Component to hold attributes for testing JSON Schemas,,"Component, No Rules, No Rules Not Required, String, String Not Required, Enum, Enum Not Required, Date, URL, InRange, Regex, List, List Not Required, List Enum, List Enum Not Required, List Boolean, List, Integer, List InRange",,FALSE,DataType,,,,,,, -No Rules,,,,,TRUE,DataProperty,,,,,,, -No Rules Not Required,,,,,FALSE,DataProperty,,,,,,, -String,,,,,TRUE,DataProperty,,,,string,,, -String Not Required,,,,,FALSE,DataProperty,,,,string,,, -Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,,string,,, -Enum Not Required,,"ab, cd, ef, gh",,,FALSE,DataProperty,,,,string,,, -Date,,,,,TRUE,DataProperty,,,date,string,date,, -URL,,,,,TRUE,DataProperty,,,url,string,uri,, -InRange,,,,,TRUE,DataProperty,,,inRange 50 100,number,,, -Regex,,,,,TRUE,DataProperty,,,regex search [a-f],string,,, -List,,,,,TRUE,DataProperty,,,,string_list,,, -List Not Required,,,,,FALSE,DataProperty,,,,string_list,,, -List Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,,string_list,,, -List Enum Not Required,,"ab, cd, ef, gh",,,FALSE,DataProperty,,,,string_list,,, -List Boolean,,,,,TRUE,DataProperty,,,,boolean_list,,, -List Integer,,,,,TRUE,DataProperty,,,,integer_list,,, -List InRange,,,,,TRUE,DataProperty,,,inRange 50 100,integer_list,,, -TypeDefinitionComponent,Component to check type specification,,"Component, String type, String type caps, Int type, Int type caps, Num type, Num type caps, Nan type, Missing type, Boolean type, Boolean type caps",,FALSE,DataType,,,,,,, -String type,,,,,TRUE,DataProperty,,,,string,,, -String type caps,,,,,TRUE,DataProperty,,,,STRING,,, -Int type,,,,,TRUE,DataProperty,,,,integer,,, -Int type caps,,,,,TRUE,DataProperty,,,,INTEGER,,, -Num type,,,,,TRUE,DataProperty,,,,number,,, -Num type caps,,,,,TRUE,DataProperty,,,,NUMBER,,, -Nan type,,,,,TRUE,DataProperty,,,,nan,,, -Missing type,,,,,TRUE,DataProperty,,,,,,, -Boolean type,,,,,TRUE,DataProperty,,,,boolean,,, -Boolean type caps,,,,,TRUE,DataProperty,,,,BOOLEAN,,, -RangeComponent,Component to ensure maximum and minimum can be set correctly,,"Component, Maximum Integer, Minimum Integer, Maximum Float, Minimum Float, Maximum Minimum, Maximum Minimum Integer List, Maximum Minimum Validation Rule",,FALSE,DataType,,,,,,, -Maximum Integer,,,,,TRUE,DataProperty,,,,integer,,100, -Minimum Integer,,,,,TRUE,DataProperty,,,,integer,,,10 -Maximum Float,,,,,TRUE,DataProperty,,,,number,,100.5, -Minimum Float,,,,,TRUE,DataProperty,,,,number,,,10.8 -Maximum Minimum,,,,,TRUE,DataProperty,,,,integer,,100,10 -Maximum Minimum Integer List,,,,,TRUE,DataProperty,,,,integer_list,,100,10 -Maximum Minimum Validation Rule,,,,,TRUE,DataProperty,,,inRange 50 100,integer,,200,10 +Sex,,"Female, Male, Other",,,TRUE,DataProperty,,,,,,,, +Year of Birth,,,,,FALSE,DataProperty,,,,,,,, +Diagnosis,,"Healthy, Cancer",,,TRUE,DataProperty,,,,,,,, +Cancer,,,"Cancer Type, Family History",,FALSE,ValidValue,,,,,,,, +Cancer Type,,"Breast, Colorectal, Lung, Prostate, Skin",,,TRUE,DataProperty,,,,,,,, +Family History,,"Breast, Colorectal, Lung, Prostate, Skin",,,TRUE,DataProperty,,,list strict,,,,, +Biospecimen,,,"Sample ID, Patient ID, Tissue Status, Component",,FALSE,DataType,Patient,,,,,,, +Sample ID,,,,,TRUE,DataProperty,,,,,,,, +Tissue Status,,"Healthy, Malignant, None",,,TRUE,DataProperty,,,,,,,, +Bulk RNA-seq Assay,,,"Filename, Sample ID, File Format, Component",,FALSE,DataType,Biospecimen,,,,,,, +Filename,,,,,TRUE,DataProperty,,,#MockFilename filenameExists^^,,,,, +File Format,,"FASTQ, BAM, CRAM, CSV/TSV",,,TRUE,DataProperty,,,,,,,, +BAM,,,Genome Build,,FALSE,ValidValue,,,,,,,, +CRAM,,,"Genome Build, Genome FASTA",,FALSE,ValidValue,,,,,,,, +CSV/TSV,,,Genome Build,,FALSE,ValidValue,,,,,,,, +Genome Build,,"GRCh37, GRCh38, GRCm38, GRCm39",,,TRUE,DataProperty,,,,,,,, +Genome FASTA,,,,,TRUE,DataProperty,,,,,,,, +MockComponent,Component to hold mock attributes for testing all validation rules,,"Component, Check List, Check List Enum, Check List Like, Check List Like Enum, Check List Strict, Check List Enum Strict, Check Regex List, Check Regex List Like, Check Regex List Strict, Check Regex Single, Check Regex Format, Check Regex Integer, Check Num, Check Float, Check Int, Check String, Check URL,Check Match at Least, Check Match at Least values, Check Match Exactly, Check Match Exactly values, Check Match None, Check Match None values, Check Recommended, Check Ages, Check Unique, Check Range, Check Date, Check NA",,FALSE,DataType,,,,,,,, +Check List,,,,,TRUE,DataProperty,,,list,,,,, +Check List Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list,,,,, +Check List Like,,,,,TRUE,DataProperty,,,list like,,,,, +Check List Like Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list like,,,,, +Check List Strict,,,,,TRUE,DataProperty,,,list strict,,,,, +Check List Enum Strict,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list strict,,,,, +Check Regex List,,,,,TRUE,DataProperty,,,list::regex match [a-f],,,,, +Check Regex List Strict,,,,,TRUE,DataProperty,,,list strict::regex match [a-f],,,,, +Check Regex List Like,,,,,TRUE,DataProperty,,,list like::regex match [a-f],,,,, +Check Regex Single,,,,,TRUE,DataProperty,,,regex search [a-f],,,,[a-f] +Check Regex Format,,,,,TRUE,DataProperty,,,regex match [a-f],,,,[a-f] +Check Regex Integer,,,,,TRUE,DataProperty,,,regex search ^\d+$,,,,, +Check Num,,,,,TRUE,DataProperty,,,num error,,,,, +Check Float,,,,,TRUE,DataProperty,,,float error,number,,,, +Check Int,,,,,TRUE,DataProperty,,,int error,,,,, +Check String,,,,,TRUE,DataProperty,,,str error,,,,, +Check URL,,,,,TRUE,DataProperty,,,url,string,uri,,, +Check Match at Least,,,,,TRUE,DataProperty,,,matchAtLeastOne Patient.PatientID set,,,,, +Check Match Exactly,,,,,TRUE,DataProperty,,,matchExactlyOne MockComponent.checkMatchExactly set,,,,, +Check Match None,,,,,TRUE,DataProperty,,,matchNone MockComponent.checkMatchNone set error,,,,, +Check Match at Least values,,,,,TRUE,DataProperty,,,matchAtLeastOne MockComponent.checkMatchatLeastvalues value,,,,, +Check Match Exactly values,,,,,TRUE,DataProperty,,,matchExactlyOne MockComponent.checkMatchExactlyvalues value,,,,, +Check Match None values,,,,,TRUE,DataProperty,,,matchNone MockComponent.checkMatchNonevalues value error,,,,, +Check Recommended,,,,,FALSE,DataProperty,,,recommended,,,,, +Check Ages,,,,,TRUE,DataProperty,,,protectAges,,,,, +Check Unique,,,,,TRUE,DataProperty,,,unique error,,,,, +Check Range,,,,,TRUE,DataProperty,,,inRange 50 100 error,,,,, +Check Date,,,,,TRUE,DataProperty,,,date,string,date,,, +Check NA,,,,,TRUE,DataProperty,,,int::IsNA,,,,, +MockRDB,,,"Component, MockRDB_id, SourceManifest",,FALSE,DataType,,,,,,,, +MockRDB_id,,,,,TRUE,DataProperty,,,int,,,,, +SourceManifest,,,,,TRUE,DataProperty,,,,,,,, +MockFilename,,,"Component, Filename",,FALSE,DataType,,,,,,,, +JSONSchemaComponent,Component to hold attributes for testing JSON Schemas,,"Component, No Rules, No Rules Not Required, String, String Not Required, Enum, Enum Not Required, Date, URL, InRange, Regex, List, List Not Required, List Enum, List Enum Not Required, List Boolean, List, Integer, List InRange",,FALSE,DataType,,,,,,,, +No Rules,,,,,TRUE,DataProperty,,,,,,,, +No Rules Not Required,,,,,FALSE,DataProperty,,,,,,,, +String,,,,,TRUE,DataProperty,,,,string,,,, +String Not Required,,,,,FALSE,DataProperty,,,,string,,,, +Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,,string,,,, +Enum Not Required,,"ab, cd, ef, gh",,,FALSE,DataProperty,,,,string,,,, +Date,,,,,TRUE,DataProperty,,,date,string,date,,, +URL,,,,,TRUE,DataProperty,,,url,string,uri,,, +InRange,,,,,TRUE,DataProperty,,,inRange 50 100,number,,,, +Regex,,,,,TRUE,DataProperty,,,regex search [a-f],string,,,, +List,,,,,TRUE,DataProperty,,,,string_list,,,, +List Not Required,,,,,FALSE,DataProperty,,,,string_list,,,, +List Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,,string_list,,,, +List Enum Not Required,,"ab, cd, ef, gh",,,FALSE,DataProperty,,,,string_list,,,, +List Boolean,,,,,TRUE,DataProperty,,,,boolean_list,,,, +List Integer,,,,,TRUE,DataProperty,,,,integer_list,,,, +List InRange,,,,,TRUE,DataProperty,,,inRange 50 100,integer_list,,,, +TypeDefinitionComponent,Component to check type specification,,"Component, String type, String type caps, Int type, Int type caps, Num type, Num type caps, Nan type, Missing type, Boolean type, Boolean type caps",,FALSE,DataType,,,,,,,, +String type,,,,,TRUE,DataProperty,,,,string,,,, +String type caps,,,,,TRUE,DataProperty,,,,STRING,,,, +Int type,,,,,TRUE,DataProperty,,,,integer,,,, +Int type caps,,,,,TRUE,DataProperty,,,,INTEGER,,,, +Num type,,,,,TRUE,DataProperty,,,,number,,,, +Num type caps,,,,,TRUE,DataProperty,,,,NUMBER,,,, +Nan type,,,,,TRUE,DataProperty,,,,nan,,,, +Missing type,,,,,TRUE,DataProperty,,,,,,,, +Boolean type,,,,,TRUE,DataProperty,,,,boolean,,,, +Boolean type caps,,,,,TRUE,DataProperty,,,,BOOLEAN,,,, +RangeComponent,Component to ensure maximum and minimum can be set correctly,,"Component, Maximum Integer, Minimum Integer, Maximum Float, Minimum Float, Maximum Minimum, Maximum Minimum Integer List, Maximum Minimum Validation Rule",,FALSE,DataType,,,,,,,, +Maximum Integer,,,,,TRUE,DataProperty,,,,integer,,100,, +Minimum Integer,,,,,TRUE,DataProperty,,,,integer,,,10, +Maximum Float,,,,,TRUE,DataProperty,,,,number,,100.5,, +Minimum Float,,,,,TRUE,DataProperty,,,,number,,,10.8, +Maximum Minimum,,,,,TRUE,DataProperty,,,,integer,,100,10, +Maximum Minimum Integer List,,,,,TRUE,DataProperty,,,,integer_list,,100,10, +Maximum Minimum Validation Rule,,,,,TRUE,DataProperty,,,inRange 50 100,integer,,200,10, From 34b8ff67046524892bae744accd8b7f9d3ee8df6 Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Tue, 25 Nov 2025 16:48:36 -0700 Subject: [PATCH 03/29] update pattern extraction --- .../extensions/curator/schema_generation.py | 54 ++++++++++++++++++- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index ee9cf5c7b..99b23ec9a 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -1840,6 +1840,27 @@ def get_node_column_type( raise ValueError(msg) return column_type + def get_node_column_pattern( + self, node_label: Optional[str] = None, node_display_name: Optional[str] = None + ) -> Optional[ColumnType]: + """Gets the regex pattern of the node + + Args: + node_label: The label of the node to get the type from + node_display_name: The display name of the node to get the type from + + Raises: + ValueError: If the value from the node is not allowed + + Returns: + The column pattern of the node if it has one, otherwise None + """ + node_label = self._get_node_label(node_label, node_display_name) + rel_node_label = self.dmr.get_relationship_value("pattern", "node_label") + pattern = self.graph.nodes[node_label][rel_node_label] + + return pattern + def get_node_format( self, node_label: Optional[str] = None, node_display_name: Optional[str] = None ) -> Optional[JSONSchemaFormat]: @@ -1967,6 +1988,9 @@ class PropertyTemplate: magic_validationRules: list = field( default_factory=list, metadata=config(field_name="sms:validationRules") ) + magic_pattern: list = field( + default_factory=list, metadata=config(field_name="sms:pattern") + ) @dataclass_json @@ -2001,6 +2025,9 @@ class ClassTemplate: magic_validationRules: list = field( default_factory=list, metadata=config(field_name="sms:validationRules") ) + magic_pattern: list = field( + default_factory=list, metadata=config(field_name="sms:pattern") + ) class DataModelJsonLD: @@ -4771,6 +4798,9 @@ def __post_init__(self) -> None: relationship_value="minimum", node_display_name=self.display_name ) + column_pattern = self.dmge.get_node_column_pattern( + node_display_name=self.display_name + ) # list validation rule is been deprecated for use in deciding type # TODO: set self.is_array here instead of return from _get_validation_rule_based_fields # https://sagebionetworks.jira.com/browse/SYNPY-1692 @@ -4801,7 +4831,7 @@ def __post_init__(self) -> None: self.format, implicit_minimum, implicit_maximum, - self.pattern, + rule_pattern, ) = _get_validation_rule_based_fields( validation_rules=validation_rules, explicit_is_array=explicit_is_array, @@ -4819,6 +4849,22 @@ def __post_init__(self) -> None: self.maximum = ( explicit_maximum if explicit_maximum is not None else implicit_maximum ) + + if column_pattern and column_type.value != "string": + raise ValueError( + "Column type must be set to 'string' to use column pattern specification for regex validation." + ) + + self.pattern = column_pattern if column_pattern else rule_pattern + + if rule_pattern: + msg = ( + f"A regex validation rule is set for property: {self.name}, but the pattern is not set in the data model. " + f"The regex pattern will be set to {self.pattern}, but the regex rule is deprecated and validation " + "rules will no longer be used in the future." + "Please explicitly set the regex pattern in the 'Pattern' column in the data model." + ) + self.logger.warning(msg) def _determine_type_and_array( self, column_type: Optional[ColumnType] @@ -5121,6 +5167,7 @@ class JSONSchema: # pylint: disable=too-many-instance-attributes properties: dict[str, Property] = field(default_factory=dict) required: list[str] = field(default_factory=list) all_of: list[AllOf] = field(default_factory=list) + pattern: str = "" def as_json_schema_dict( self, @@ -5399,7 +5446,7 @@ def _set_type_specific_keywords(schema: dict[str, Any], node: TraversalNode) -> schema: The schema to set keywords on node (Node): The node the corresponds to the property which is being set in the JSON Schema """ - for attr in ["minimum", "maximum", "pattern"]: + for attr in ["minimum", "maximum"]: value = getattr(node, attr) if value is not None: schema[attr] = value @@ -5407,6 +5454,9 @@ def _set_type_specific_keywords(schema: dict[str, Any], node: TraversalNode) -> if node.format is not None: schema["format"] = node.format.value + if hasattr(node, "pattern") and node.pattern is not None: + schema["pattern"] = node.pattern + def _set_property( json_schema: JSONSchema, From e1d0ebead55dc3f8cd68b38dd48068fc2e42cc6a Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Tue, 25 Nov 2025 16:49:07 -0700 Subject: [PATCH 04/29] update data model --- .../extensions/schema_files/data_models/example.model.csv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit/synapseclient/extensions/schema_files/data_models/example.model.csv b/tests/unit/synapseclient/extensions/schema_files/data_models/example.model.csv index 30933c453..ceeeba712 100644 --- a/tests/unit/synapseclient/extensions/schema_files/data_models/example.model.csv +++ b/tests/unit/synapseclient/extensions/schema_files/data_models/example.model.csv @@ -1,7 +1,7 @@ Attribute,Description,Valid Values,DependsOn,Properties,Required,Parent,DependsOn Component,Source,Validation Rules,columnType,Format,Maximum,Minimum,Pattern Component,,,,,TRUE,,,,,,,,, Patient,,,"Patient ID, Sex, Year of Birth, Diagnosis, Component",,FALSE,DataType,,,,,,,, -Patient ID,,,,,TRUE,DataProperty,,,#Patient unique warning^^#Biospecimen unique error,,,, +Patient ID,,,,,TRUE,DataProperty,,,#Patient unique warning^^#Biospecimen unique error,,,,, Sex,,"Female, Male, Other",,,TRUE,DataProperty,,,,,,,, Year of Birth,,,,,FALSE,DataProperty,,,,,,,, Diagnosis,,"Healthy, Cancer",,,TRUE,DataProperty,,,,,,,, @@ -29,8 +29,8 @@ Check List Enum Strict,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list strict,,,,, Check Regex List,,,,,TRUE,DataProperty,,,list::regex match [a-f],,,,, Check Regex List Strict,,,,,TRUE,DataProperty,,,list strict::regex match [a-f],,,,, Check Regex List Like,,,,,TRUE,DataProperty,,,list like::regex match [a-f],,,,, -Check Regex Single,,,,,TRUE,DataProperty,,,regex search [a-f],,,,[a-f] -Check Regex Format,,,,,TRUE,DataProperty,,,regex match [a-f],,,,[a-f] +Check Regex Single,,,,,TRUE,DataProperty,,,,,,,[a-b], +Check Regex Format,,,,,TRUE,DataProperty,,,regex match [a-f],,,,^[a-b], Check Regex Integer,,,,,TRUE,DataProperty,,,regex search ^\d+$,,,,, Check Num,,,,,TRUE,DataProperty,,,num error,,,,, Check Float,,,,,TRUE,DataProperty,,,float error,number,,,, From e4b915e5e5f2f20ad3659e8637b7828801ee8f9f Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Tue, 25 Nov 2025 16:49:37 -0700 Subject: [PATCH 05/29] add tests --- .../unit_test_create_json_schema.py | 65 +++++++++++++------ 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py index 9f5faa418..30f0ebb11 100644 --- a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py +++ b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py @@ -7,6 +7,7 @@ import logging import os from typing import Any, Optional +from unittest import mock from unittest.mock import Mock import pytest @@ -101,6 +102,8 @@ def fixture_test_nodes( "EnumNotRequired", "InRange", "Regex", + "CheckRegexSingle", + "CheckRegexFormat", "Date", "URL", "List", @@ -327,6 +330,18 @@ def test_update_property(self) -> None: ("InRange", AtomicColumnType.NUMBER, False, 50, 100, None, None), # Node with "regex search [a-f]" validation rule and columnType "string" - pattern is set, type is STRING ("Regex", AtomicColumnType.STRING, False, None, None, "[a-f]", None), + # Node with "[a-f]" pattern column specification and columnType "string" - pattern is set, type is STRING + ("CheckRegexSingle", AtomicColumnType.STRING, False, None, None, "[a-b]", None), + # Node with "regex search [a-f]" validation rule, "^[a-b]" pattern column specification, and columnType "string" - pattern is set, type is STRING + ( + "CheckRegexFormat", + AtomicColumnType.STRING, + False, + None, + None, + "^[a-b]", + None, + ), # Node with "date" validation rule and columnType "string" - format is set to DATE, type is STRING ( "Date", @@ -350,6 +365,8 @@ def test_update_property(self) -> None: "List", "InRange", "Regex", + "CheckRegexSingle", + "CheckRegexFormat", "Date", "URI", "ListBoolean", @@ -365,6 +382,7 @@ def test_node_init( expected_pattern: Optional[str], expected_format: Optional[JSONSchemaFormat], test_nodes: dict[str, TraversalNode], + caplog, ) -> None: """ Tests for TraversalNode class initialization. @@ -377,6 +395,7 @@ def test_node_init( The type property comes from the columnType field, while constraints come from parsing validation rules like "str", "inRange", "regex", etc. """ + node = test_nodes[node_name] assert node.type == expected_type assert node.format == expected_format @@ -384,6 +403,9 @@ def test_node_init( assert node.minimum == expected_min assert node.maximum == expected_max assert node.pattern == expected_pattern + if node_name == "Regex": + warning_message = "A regex validation rule is set for property: Regex, but the pattern is not set in the data model." + assert warning_message in test_nodes[node_name].logger.mock_calls[0][1][0] @pytest.mark.parametrize( @@ -542,26 +564,29 @@ def test_get_validation_rule_based_fields( Test for _get_validation_rule_based_fields Tests that output is expected based on the input validation rules """ - logger = Mock() - ( - is_array, - property_format, - minimum, - maximum, - pattern, - ) = _get_validation_rule_based_fields( - validation_rules, - explicit_is_array=explicit_is_array, - explicit_format=explicit_format, - name="name", - column_type=AtomicColumnType.STRING, - logger=logger, - ) - assert is_array == expected_is_array - assert property_format == expected_format - assert minimum == expected_min - assert maximum == expected_max - assert pattern == expected_pattern + logger = logging.getLogger("synapseclient.extensions.curator.schema_generation") + with mock.patch.object(logger, "warning") as mock_logger: + ( + is_array, + property_format, + minimum, + maximum, + pattern, + ) = _get_validation_rule_based_fields( + validation_rules, + explicit_is_array=explicit_is_array, + explicit_format=explicit_format, + name="name", + column_type=AtomicColumnType.STRING, + logger=mock_logger, + ) + assert is_array == expected_is_array + assert property_format == expected_format + assert minimum == expected_min + assert maximum == expected_max + assert pattern == expected_pattern + if expected_pattern: + print(expected_pattern) def test_get_validation_rule_based_fields_inrange_warning(caplog) -> None: From da2e6679c7fa44b3089b17b8c864a6ab6d4d3c36 Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Wed, 26 Nov 2025 11:35:21 -0700 Subject: [PATCH 06/29] update tests --- .../unit_test_create_json_schema.py | 47 +++++++++++++++++++ .../unit_test_data_model_graph_explorer.py | 14 ++++++ .../extensions/unit_test_data_model_parser.py | 23 +++++++-- .../unit_test_data_model_relationships.py | 1 + 4 files changed, 82 insertions(+), 3 deletions(-) diff --git a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py index 30f0ebb11..c358bb95d 100644 --- a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py +++ b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py @@ -6,10 +6,13 @@ import json import logging import os +import tempfile +from time import sleep from typing import Any, Optional from unittest import mock from unittest.mock import Mock +import pandas as pd import pytest from jsonschema import Draft7Validator from jsonschema.exceptions import ValidationError @@ -17,7 +20,9 @@ from synapseclient.extensions.curator.schema_generation import ( AtomicColumnType, ColumnType, + DataModelGraph, DataModelGraphExplorer, + DataModelParser, GraphTraversalState, JSONSchema, JSONSchemaFormat, @@ -408,6 +413,48 @@ def test_node_init( assert warning_message in test_nodes[node_name].logger.mock_calls[0][1][0] +def test_invalid_regex_columntype_traversalnode( + helpers, +) -> None: + """ + Tests for TraversalNode class initialization. + + Verifies that when TransversalNode objects are initialized with a patter specified and an incompatible column type, a ValueError is raised. + """ + node = "Check Regex Single" + + path_to_data_model = helpers.get_schema_file_path("data_models/example.model.csv") + + fullpath = helpers.get_schema_file_path(path_to_data_model) + + # Instantiate DataModelParser + data_model_parser = DataModelParser(path_to_data_model=fullpath, logger=Mock()) + + # Parse Model + parsed_data_model = data_model_parser.parse_model() + + # Change column type to imcompatible type + parsed_data_model[node]["Relationships"]["ColumnType"] = "integer" + + # Instantiate DataModelGraph + data_model_grapher = DataModelGraph( + parsed_data_model, data_model_labels="class_label", logger=Mock() + ) + + # Generate graph + graph_data_model = data_model_grapher.graph + + # Instantiate DataModelGraphExplorer + dmge = DataModelGraphExplorer(graph_data_model, logger=Mock()) + + # A value error should be raised when using pattern specification with non-string column type + error_message = "Column type must be set to 'string' to use column pattern specification for regex validation." + with pytest.raises(ValueError, match=error_message): + node = TraversalNode( + node.replace(" ", ""), "JSONSchemaComponent", dmge, logger=Mock() + ) + + @pytest.mark.parametrize( "node_name, expected_node_type, expected_max, expected_min", [ diff --git a/tests/unit/synapseclient/extensions/unit_test_data_model_graph_explorer.py b/tests/unit/synapseclient/extensions/unit_test_data_model_graph_explorer.py index 867237b27..6a8a46f32 100644 --- a/tests/unit/synapseclient/extensions/unit_test_data_model_graph_explorer.py +++ b/tests/unit/synapseclient/extensions/unit_test_data_model_graph_explorer.py @@ -69,6 +69,20 @@ def test_get_node_column_type( assert dmge.get_node_column_type(node_label) == column_type +@pytest.mark.parametrize( + "node_label, pattern", + [ + ("CheckRegexSingle", "[a-b]"), + ("CheckRegexFormat", "^[a-b]"), + ("CheckRegexInteger", None), + ], +) +def test_get_node_pattern( + dmge: DataModelGraphExplorer, node_label: str, pattern: str +) -> None: + assert dmge.get_node_column_pattern(node_label) == pattern + + @pytest.mark.parametrize( "node_label, column_type", [("String", None), ("Date", JSONSchemaFormat.DATE), ("URL", JSONSchemaFormat.URI)], diff --git a/tests/unit/synapseclient/extensions/unit_test_data_model_parser.py b/tests/unit/synapseclient/extensions/unit_test_data_model_parser.py index b65a099d8..7a529ce37 100644 --- a/tests/unit/synapseclient/extensions/unit_test_data_model_parser.py +++ b/tests/unit/synapseclient/extensions/unit_test_data_model_parser.py @@ -1,6 +1,9 @@ +import tempfile +from time import sleep from typing import Any, Union import numpy as np +import pandas as pd import pytest from synapseclient.extensions.curator.schema_generation import ( @@ -87,6 +90,23 @@ def test_parse_format( ) -> None: assert csv_dmp.parse_format(attribute_dict) == expected_dict + @pytest.mark.parametrize( + "attribute_dict, expected_dict", + [ + ({}, {}), + ({"Pattern": np.nan}, {}), + ({"Pattern": "^[a-b]"}, {"Pattern": "^[a-b]"}), + ({"Pattern": " [a-b] "}, {"Pattern": "[a-b]"}), + ], + ) + def test_parse_regex_pattern( + self, + csv_dmp: DataModelCSVParser, + attribute_dict: dict[str, Any], + expected_dict: dict[str, str], + ) -> None: + assert csv_dmp.parse_pattern(attribute_dict) == expected_dict + @pytest.mark.parametrize( "attribute_dict, relationship, expected_dict", [ @@ -132,7 +152,6 @@ def test_parse_minimum_maximum( == expected_dict ) - class TestDataModelJsonLdParser: def test_gather_jsonld_attributes_relationships( self, @@ -143,8 +162,6 @@ def test_gather_jsonld_attributes_relationships( assert csv_dmp.parse_minimum_maximum(attribute_dict, "Minimum") == expected_dict assert csv_dmp.parse_minimum_maximum(attribute_dict, "Maximum") == expected_dict - -class TestDataModelJsonLdParser: def test_gather_jsonld_attributes_relationships( self, helpers, diff --git a/tests/unit/synapseclient/extensions/unit_test_data_model_relationships.py b/tests/unit/synapseclient/extensions/unit_test_data_model_relationships.py index bbc3eca96..c5fb2250c 100644 --- a/tests/unit/synapseclient/extensions/unit_test_data_model_relationships.py +++ b/tests/unit/synapseclient/extensions/unit_test_data_model_relationships.py @@ -71,6 +71,7 @@ def test_retrieve_rel_headers_dict(self, dmr: DataModelRelationships, edge: bool "id": "Source", "maximum": "Maximum", "minimum": "Minimum", + "pattern": "Pattern", } def test_get_relationship_value(self, dmr: DataModelRelationships) -> None: From 5ed6e0b01dda6e90efc6f241103b58d1a3700123 Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Wed, 26 Nov 2025 11:35:52 -0700 Subject: [PATCH 07/29] update pattern extraction --- synapseclient/extensions/curator/schema_generation.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index 99b23ec9a..11f3dccbe 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -2893,6 +2893,7 @@ def define_data_model_relationships(self) -> dict: allowed_values: A list of values the entry must be one of edge_dir: str, 'in'/'out' is the edge an in or out edge. Define for edge relationships jsonld_dir: str, 'in'/out is the direction in or out in the JSONLD. + pattern: regex pattern that the entry must match """ map_data_model_relationships = { "displayName": { @@ -4866,6 +4867,14 @@ def __post_init__(self) -> None: ) self.logger.warning(msg) + if self.pattern: + try: + re.compile(self.pattern) + except re.error as e: + raise ValueError( + f"The regex pattern '{self.pattern}' for property '{self.name}' is invalid." + ) from e + def _determine_type_and_array( self, column_type: Optional[ColumnType] ) -> tuple[Optional[AtomicColumnType], Optional[bool]]: From 074322f1e22f5051ef741b00a182c01d20aba071 Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Wed, 26 Nov 2025 11:42:08 -0700 Subject: [PATCH 08/29] update expected jsonschemas --- .../expected.Biospecimen.schema.json | 1 + ...BulkRNA-seqAssay.display_names_schema.json | 1 + .../expected.BulkRNA-seqAssay.schema.json | 1 + ...NSchemaComponent.display_names_schema.json | 212 ++++++++++++++++++ .../expected.JSONSchemaComponent.schema.json | 1 + .../expected.MockComponent.schema.json | 5 +- .../expected.MockFilename.schema.json | 1 + .../expected.MockRDB.schema.json | 1 + ...expected.Patient.display_names_schema.json | 1 + .../expected.Patient.schema.json | 1 + 10 files changed, 223 insertions(+), 2 deletions(-) create mode 100644 tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.display_names_schema.json diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Biospecimen.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Biospecimen.schema.json index 41097740c..315f7e37e 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Biospecimen.schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Biospecimen.schema.json @@ -2,6 +2,7 @@ "$id": "http://example.com/Biospecimen_validation", "$schema": "http://json-schema.org/draft-07/schema#", "description": "TBD", + "pattern": "", "properties": { "Component": { "description": "TBD", diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.BulkRNA-seqAssay.display_names_schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.BulkRNA-seqAssay.display_names_schema.json index c7d8cf158..ddd66fe46 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.BulkRNA-seqAssay.display_names_schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.BulkRNA-seqAssay.display_names_schema.json @@ -96,6 +96,7 @@ } ], "description": "TBD", + "pattern": "", "properties": { "Component": { "description": "TBD", diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.BulkRNA-seqAssay.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.BulkRNA-seqAssay.schema.json index 872888213..9c0022df1 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.BulkRNA-seqAssay.schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.BulkRNA-seqAssay.schema.json @@ -96,6 +96,7 @@ } ], "description": "TBD", + "pattern": "", "properties": { "Component": { "description": "TBD", diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.display_names_schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.display_names_schema.json new file mode 100644 index 000000000..2dc5431fb --- /dev/null +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.display_names_schema.json @@ -0,0 +1,212 @@ +{ + "$id": "http://example.com/JSONSchemaComponent_validation", + "$schema": "http://json-schema.org/draft-07/schema#", + "description": "Component to hold attributes for testing JSON Schemas", + "pattern": "", + "properties": { + "Component": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "Component" + }, + "Date": { + "description": "TBD", + "format": "date", + "title": "Date", + "type": "string" + }, + "Enum": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "ab", + "cd", + "ef", + "gh" + ], + "title": "enum" + } + ], + "title": "Enum" + }, + "EnumNotRequired": { + "description": "TBD", + "oneOf": [ + { + "enum": [ + "ab", + "cd", + "ef", + "gh" + ], + "title": "enum" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "Enum Not Required" + }, + "InRange": { + "description": "TBD", + "maximum": 100.0, + "minimum": 50.0, + "title": "InRange", + "type": "number" + }, + "List": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + } + ], + "title": "List" + }, + "ListEnum": { + "description": "TBD", + "oneOf": [ + { + "items": { + "enum": [ + "ab", + "cd", + "ef", + "gh" + ] + }, + "title": "array", + "type": "array" + } + ], + "title": "List Enum" + }, + "ListEnumNotRequired": { + "description": "TBD", + "oneOf": [ + { + "items": { + "enum": [ + "ab", + "cd", + "ef", + "gh" + ] + }, + "title": "array", + "type": "array" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "List Enum Not Required" + }, + "ListInRange": { + "description": "TBD", + "oneOf": [ + { + "items": { + "maximum": 100.0, + "minimum": 50.0, + "type": "number" + }, + "title": "array", + "type": "array" + } + ], + "title": "List InRange" + }, + "ListNotRequired": { + "description": "TBD", + "oneOf": [ + { + "title": "array", + "type": "array" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "List Not Required" + }, + "ListString": { + "description": "TBD", + "oneOf": [ + { + "items": { + "type": "string" + }, + "title": "array", + "type": "array" + } + ], + "title": "List String" + }, + "NoRules": { + "description": "TBD", + "not": { + "type": "null" + }, + "title": "No Rules" + }, + "NoRulesNotRequired": { + "description": "TBD", + "title": "No Rules Not Required" + }, + "Regex": { + "description": "TBD", + "pattern": "[a-f]", + "title": "Regex", + "type": "string" + }, + "String": { + "description": "TBD", + "title": "String", + "type": "string" + }, + "StringNotRequired": { + "description": "TBD", + "oneOf": [ + { + "title": "string", + "type": "string" + }, + { + "title": "null", + "type": "null" + } + ], + "title": "String Not Required" + }, + "URL": { + "description": "TBD", + "format": "uri", + "title": "URL", + "type": "string" + } + }, + "required": [ + "Component", + "Date", + "Enum", + "InRange", + "List", + "ListEnum", + "ListInRange", + "ListString", + "NoRules", + "Regex", + "String", + "URL" + ], + "title": "JSONSchemaComponent_validation", + "type": "object" +} diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.schema.json index 4aca7da53..5a8b7137e 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.schema.json @@ -2,6 +2,7 @@ "$id": "http://example.com/JSONSchemaComponent_validation", "$schema": "http://json-schema.org/draft-07/schema#", "description": "Component to hold attributes for testing JSON Schemas", + "pattern": "", "properties": { "Component": { "description": "TBD", diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json index ad384ec1a..eafff46e6 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json @@ -2,6 +2,7 @@ "$id": "http://example.com/MockComponent_validation", "$schema": "http://json-schema.org/draft-07/schema#", "description": "Component to hold mock attributes for testing all validation rules", + "pattern": "", "properties": { "CheckAges": { "description": "TBD", @@ -159,7 +160,7 @@ "not": { "type": "null" }, - "pattern": "^[a-f]", + "pattern": "^[a-b]", "title": "Check Regex Format" }, "CheckRegexInteger": { @@ -190,7 +191,7 @@ "not": { "type": "null" }, - "pattern": "[a-f]", + "pattern": "[a-b]", "title": "Check Regex Single" }, "CheckString": { diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockFilename.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockFilename.schema.json index 0fe609256..d92affbac 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockFilename.schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockFilename.schema.json @@ -2,6 +2,7 @@ "$id": "http://example.com/MockFilename_validation", "$schema": "http://json-schema.org/draft-07/schema#", "description": "TBD", + "pattern": "", "properties": { "Component": { "description": "TBD", diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockRDB.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockRDB.schema.json index 003865f8e..daf84a472 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockRDB.schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockRDB.schema.json @@ -2,6 +2,7 @@ "$id": "http://example.com/MockRDB_validation", "$schema": "http://json-schema.org/draft-07/schema#", "description": "TBD", + "pattern": "", "properties": { "Component": { "description": "TBD", diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Patient.display_names_schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Patient.display_names_schema.json index 80042a52c..fa9f63898 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Patient.display_names_schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Patient.display_names_schema.json @@ -50,6 +50,7 @@ } ], "description": "TBD", + "pattern": "", "properties": { "Cancer Type": { "description": "TBD", diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Patient.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Patient.schema.json index d83d3cfa9..2ca4209cd 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Patient.schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Patient.schema.json @@ -50,6 +50,7 @@ } ], "description": "TBD", + "pattern": "", "properties": { "CancerType": { "description": "TBD", From 186c2beb611ab2e8f755d3cddfc1c79e6719e72d Mon Sep 17 00:00:00 2001 From: SageGJ Date: Wed, 26 Nov 2025 11:50:37 -0700 Subject: [PATCH 09/29] update model jsonld --- .../schema_files/data_models_jsonld/example.model.jsonld | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/unit/synapseclient/extensions/schema_files/data_models_jsonld/example.model.jsonld b/tests/unit/synapseclient/extensions/schema_files/data_models_jsonld/example.model.jsonld index 94972828b..bd6ae21e1 100644 --- a/tests/unit/synapseclient/extensions/schema_files/data_models_jsonld/example.model.jsonld +++ b/tests/unit/synapseclient/extensions/schema_files/data_models_jsonld/example.model.jsonld @@ -1218,11 +1218,11 @@ "schema:isPartOf": { "@id": "http://schema.biothings.io" }, + "sms:columnType": "string", "sms:displayName": "Check Regex Single", + "sms:pattern": "[a-b]", "sms:required": "sms:true", - "sms:validationRules": [ - "regex search [a-f]" - ] + "sms:validationRules": [] }, { "@id": "bts:CheckRegexFormat", @@ -1237,7 +1237,9 @@ "schema:isPartOf": { "@id": "http://schema.biothings.io" }, + "sms:columnType": "string", "sms:displayName": "Check Regex Format", + "sms:pattern": "^[a-b]", "sms:required": "sms:true", "sms:validationRules": [ "regex match [a-f]" From 7b8f7167706451322833664bbbd19f1c39be6b28 Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Wed, 26 Nov 2025 12:00:28 -0700 Subject: [PATCH 10/29] update expected schema template --- .../synapseclient/extensions/unit_test_create_json_schema.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py index c358bb95d..103f4002e 100644 --- a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py +++ b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py @@ -271,6 +271,7 @@ def test_as_json_schema_dict(self) -> None: "$id": "", "$schema": "http://json-schema.org/draft-07/schema#", "description": "TBD", + "pattern": "", "properties": {}, "required": [], "title": "", From d622b534ab7b5fb1e16a1e411e948f016f07892b Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Wed, 26 Nov 2025 12:14:19 -0700 Subject: [PATCH 11/29] update expected schema --- .../expected_jsonschemas/expected.MockComponent.schema.json | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json index eafff46e6..f4382d397 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json @@ -161,7 +161,8 @@ "type": "null" }, "pattern": "^[a-b]", - "title": "Check Regex Format" + "title": "Check Regex Format", + "type": "string" }, "CheckRegexInteger": { "description": "TBD", @@ -192,7 +193,8 @@ "type": "null" }, "pattern": "[a-b]", - "title": "Check Regex Single" + "title": "Check Regex Single", + "type": "string" }, "CheckString": { "description": "TBD", From 12e118cfd273d0c7a65183a83eb12b34dc63c309 Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Wed, 26 Nov 2025 13:47:43 -0700 Subject: [PATCH 12/29] add test --- .../unit_test_create_json_schema.py | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py index 103f4002e..ac0a1d28a 100644 --- a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py +++ b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py @@ -456,6 +456,48 @@ def test_invalid_regex_columntype_traversalnode( ) +def test_invalid_regex_traversalnode( + helpers, +) -> None: + """ + Tests for TraversalNode class initialization. + + Verifies that when TransversalNode objects are initialized with a patter specified and an incompatible column type, a ValueError is raised. + """ + node = "Check Regex Single" + + path_to_data_model = helpers.get_schema_file_path("data_models/example.model.csv") + + fullpath = helpers.get_schema_file_path(path_to_data_model) + + # Instantiate DataModelParser + data_model_parser = DataModelParser(path_to_data_model=fullpath, logger=Mock()) + + # Parse Model + parsed_data_model = data_model_parser.parse_model() + + # Change column type to imcompatible type + parsed_data_model[node]["Relationships"]["Pattern"] = "\\u" + + # Instantiate DataModelGraph + data_model_grapher = DataModelGraph( + parsed_data_model, data_model_labels="class_label", logger=Mock() + ) + + # Generate graph + graph_data_model = data_model_grapher.graph + + # Instantiate DataModelGraphExplorer + dmge = DataModelGraphExplorer(graph_data_model, logger=Mock()) + + # A value error should be raised when using pattern specification with non-string column type + error_message = "Column type must be set to 'string' to use column pattern specification for regex validation." + with pytest.raises(SyntaxError, match="The regex pattern.*is invalid"): + node = TraversalNode( + node.replace(" ", ""), "JSONSchemaComponent", dmge, logger=Mock() + ) + + @pytest.mark.parametrize( "node_name, expected_node_type, expected_max, expected_min", [ From 857cf78d33fa70979f35e8e1c6c9fbe8b40a8393 Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Wed, 26 Nov 2025 13:47:57 -0700 Subject: [PATCH 13/29] update error --- synapseclient/extensions/curator/schema_generation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index 11f3dccbe..9f3ac8ef6 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -4871,7 +4871,7 @@ def __post_init__(self) -> None: try: re.compile(self.pattern) except re.error as e: - raise ValueError( + raise SyntaxError( f"The regex pattern '{self.pattern}' for property '{self.name}' is invalid." ) from e From e286ddb9e6c729329242ce5e7a0e235563e6030b Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Wed, 26 Nov 2025 13:51:41 -0700 Subject: [PATCH 14/29] revert temp changes --- .../unit_test_create_json_schema.py | 43 +++++++++---------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py index ac0a1d28a..5cd369a0e 100644 --- a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py +++ b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py @@ -654,29 +654,26 @@ def test_get_validation_rule_based_fields( Test for _get_validation_rule_based_fields Tests that output is expected based on the input validation rules """ - logger = logging.getLogger("synapseclient.extensions.curator.schema_generation") - with mock.patch.object(logger, "warning") as mock_logger: - ( - is_array, - property_format, - minimum, - maximum, - pattern, - ) = _get_validation_rule_based_fields( - validation_rules, - explicit_is_array=explicit_is_array, - explicit_format=explicit_format, - name="name", - column_type=AtomicColumnType.STRING, - logger=mock_logger, - ) - assert is_array == expected_is_array - assert property_format == expected_format - assert minimum == expected_min - assert maximum == expected_max - assert pattern == expected_pattern - if expected_pattern: - print(expected_pattern) + logger = Mock() + ( + is_array, + property_format, + minimum, + maximum, + pattern, + ) = _get_validation_rule_based_fields( + validation_rules, + explicit_is_array=explicit_is_array, + explicit_format=explicit_format, + name="name", + column_type=AtomicColumnType.STRING, + logger=logger, + ) + assert is_array == expected_is_array + assert property_format == expected_format + assert minimum == expected_min + assert maximum == expected_max + assert pattern == expected_pattern def test_get_validation_rule_based_fields_inrange_warning(caplog) -> None: From 2c042900d418b11b8ae00c8621577f6558a83c8a Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Wed, 26 Nov 2025 13:55:47 -0700 Subject: [PATCH 15/29] update docstrings --- .../extensions/unit_test_create_json_schema.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py index 5cd369a0e..a424d5a57 100644 --- a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py +++ b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py @@ -418,9 +418,9 @@ def test_invalid_regex_columntype_traversalnode( helpers, ) -> None: """ - Tests for TraversalNode class initialization. + Tests for matching pattern and columnType specification. - Verifies that when TransversalNode objects are initialized with a patter specified and an incompatible column type, a ValueError is raised. + Verifies that when TransversalNode objects are initialized with a pattern specified and an incompatible column type, a ValueError is raised. """ node = "Check Regex Single" @@ -460,9 +460,9 @@ def test_invalid_regex_traversalnode( helpers, ) -> None: """ - Tests for TraversalNode class initialization. + Tests for invalid regex pattern specification. - Verifies that when TransversalNode objects are initialized with a patter specified and an incompatible column type, a ValueError is raised. + Verifies that only valid regex patterns are specified. """ node = "Check Regex Single" From 8d738713599b338999c4852b179b9fef4bf93e4a Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Mon, 1 Dec 2025 09:44:07 -0700 Subject: [PATCH 16/29] update type hinting --- synapseclient/extensions/curator/schema_generation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index 9f3ac8ef6..c42f672b3 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -1842,7 +1842,7 @@ def get_node_column_type( def get_node_column_pattern( self, node_label: Optional[str] = None, node_display_name: Optional[str] = None - ) -> Optional[ColumnType]: + ) -> Optional[str]: """Gets the regex pattern of the node Args: From c389b3e3319fab1a599b2a4ff4554d91d3e4c608 Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Mon, 1 Dec 2025 11:35:42 -0700 Subject: [PATCH 17/29] rm pattern from data type spec --- synapseclient/extensions/curator/schema_generation.py | 4 ---- .../expected_jsonschemas/expected.Biospecimen.schema.json | 1 - .../expected.BulkRNA-seqAssay.display_names_schema.json | 1 - .../expected.BulkRNA-seqAssay.schema.json | 1 - .../expected.JSONSchemaComponent.display_names_schema.json | 1 - .../expected.JSONSchemaComponent.schema.json | 1 - .../expected_jsonschemas/expected.MockComponent.schema.json | 1 - .../expected_jsonschemas/expected.MockFilename.schema.json | 1 - .../expected_jsonschemas/expected.MockRDB.schema.json | 1 - .../synapseclient/extensions/unit_test_create_json_schema.py | 1 - 10 files changed, 13 deletions(-) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index c42f672b3..0247510a7 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -2025,9 +2025,6 @@ class ClassTemplate: magic_validationRules: list = field( default_factory=list, metadata=config(field_name="sms:validationRules") ) - magic_pattern: list = field( - default_factory=list, metadata=config(field_name="sms:pattern") - ) class DataModelJsonLD: @@ -5176,7 +5173,6 @@ class JSONSchema: # pylint: disable=too-many-instance-attributes properties: dict[str, Property] = field(default_factory=dict) required: list[str] = field(default_factory=list) all_of: list[AllOf] = field(default_factory=list) - pattern: str = "" def as_json_schema_dict( self, diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Biospecimen.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Biospecimen.schema.json index 315f7e37e..41097740c 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Biospecimen.schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Biospecimen.schema.json @@ -2,7 +2,6 @@ "$id": "http://example.com/Biospecimen_validation", "$schema": "http://json-schema.org/draft-07/schema#", "description": "TBD", - "pattern": "", "properties": { "Component": { "description": "TBD", diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.BulkRNA-seqAssay.display_names_schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.BulkRNA-seqAssay.display_names_schema.json index ddd66fe46..c7d8cf158 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.BulkRNA-seqAssay.display_names_schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.BulkRNA-seqAssay.display_names_schema.json @@ -96,7 +96,6 @@ } ], "description": "TBD", - "pattern": "", "properties": { "Component": { "description": "TBD", diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.BulkRNA-seqAssay.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.BulkRNA-seqAssay.schema.json index 9c0022df1..872888213 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.BulkRNA-seqAssay.schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.BulkRNA-seqAssay.schema.json @@ -96,7 +96,6 @@ } ], "description": "TBD", - "pattern": "", "properties": { "Component": { "description": "TBD", diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.display_names_schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.display_names_schema.json index 2dc5431fb..b378f46d2 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.display_names_schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.display_names_schema.json @@ -2,7 +2,6 @@ "$id": "http://example.com/JSONSchemaComponent_validation", "$schema": "http://json-schema.org/draft-07/schema#", "description": "Component to hold attributes for testing JSON Schemas", - "pattern": "", "properties": { "Component": { "description": "TBD", diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.schema.json index 5a8b7137e..4aca7da53 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.JSONSchemaComponent.schema.json @@ -2,7 +2,6 @@ "$id": "http://example.com/JSONSchemaComponent_validation", "$schema": "http://json-schema.org/draft-07/schema#", "description": "Component to hold attributes for testing JSON Schemas", - "pattern": "", "properties": { "Component": { "description": "TBD", diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json index f4382d397..1ce06f5d3 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json @@ -2,7 +2,6 @@ "$id": "http://example.com/MockComponent_validation", "$schema": "http://json-schema.org/draft-07/schema#", "description": "Component to hold mock attributes for testing all validation rules", - "pattern": "", "properties": { "CheckAges": { "description": "TBD", diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockFilename.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockFilename.schema.json index d92affbac..0fe609256 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockFilename.schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockFilename.schema.json @@ -2,7 +2,6 @@ "$id": "http://example.com/MockFilename_validation", "$schema": "http://json-schema.org/draft-07/schema#", "description": "TBD", - "pattern": "", "properties": { "Component": { "description": "TBD", diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockRDB.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockRDB.schema.json index daf84a472..003865f8e 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockRDB.schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockRDB.schema.json @@ -2,7 +2,6 @@ "$id": "http://example.com/MockRDB_validation", "$schema": "http://json-schema.org/draft-07/schema#", "description": "TBD", - "pattern": "", "properties": { "Component": { "description": "TBD", diff --git a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py index a424d5a57..c84606a4a 100644 --- a/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py +++ b/tests/unit/synapseclient/extensions/unit_test_create_json_schema.py @@ -271,7 +271,6 @@ def test_as_json_schema_dict(self) -> None: "$id": "", "$schema": "http://json-schema.org/draft-07/schema#", "description": "TBD", - "pattern": "", "properties": {}, "required": [], "title": "", From 581797b503bf4e25950f8d519669138c423ec6d6 Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Mon, 1 Dec 2025 11:41:31 -0700 Subject: [PATCH 18/29] change wanring trigger --- synapseclient/extensions/curator/schema_generation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index 0247510a7..b0b05c216 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -4855,7 +4855,7 @@ def __post_init__(self) -> None: self.pattern = column_pattern if column_pattern else rule_pattern - if rule_pattern: + if rule_pattern and not column_pattern: msg = ( f"A regex validation rule is set for property: {self.name}, but the pattern is not set in the data model. " f"The regex pattern will be set to {self.pattern}, but the regex rule is deprecated and validation " From 269e8c37423de58d0d9e742a23ac8496143e24fb Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Thu, 4 Dec 2025 09:28:53 -0700 Subject: [PATCH 19/29] update expected jsonschema files --- .../expected_jsonschemas/expected.MockComponent.schema.json | 6 ------ .../expected.Patient.display_names_schema.json | 1 - .../expected_jsonschemas/expected.Patient.schema.json | 1 - 3 files changed, 8 deletions(-) diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json index 1ce06f5d3..ab88d4e1e 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json @@ -156,9 +156,6 @@ }, "CheckRegexFormat": { "description": "TBD", - "not": { - "type": "null" - }, "pattern": "^[a-b]", "title": "Check Regex Format", "type": "string" @@ -188,9 +185,6 @@ }, "CheckRegexSingle": { "description": "TBD", - "not": { - "type": "null" - }, "pattern": "[a-b]", "title": "Check Regex Single", "type": "string" diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Patient.display_names_schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Patient.display_names_schema.json index fa9f63898..80042a52c 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Patient.display_names_schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Patient.display_names_schema.json @@ -50,7 +50,6 @@ } ], "description": "TBD", - "pattern": "", "properties": { "Cancer Type": { "description": "TBD", diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Patient.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Patient.schema.json index 2ca4209cd..d83d3cfa9 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Patient.schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.Patient.schema.json @@ -50,7 +50,6 @@ } ], "description": "TBD", - "pattern": "", "properties": { "CancerType": { "description": "TBD", From 599129b8208fb27cb7db8c3bc8ed51525b49b63b Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Thu, 4 Dec 2025 11:35:08 -0700 Subject: [PATCH 20/29] fix precommit errors --- .../extensions/curator/schema_generation.py | 21 +++++++++++-------- .../extensions/unit_test_data_model_parser.py | 1 + 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index b0b05c216..721e54614 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -697,6 +697,8 @@ def gather_csv_attributes_relationships( maximum_dict = self.parse_minimum_maximum(attr, "Maximum") attr_rel_dictionary[attribute_name]["Relationships"].update( maximum_dict + ) + if model_includes_pattern: pattern_dict = self.parse_pattern(attr) attr_rel_dictionary[attribute_name]["Relationships"].update( @@ -3062,14 +3064,15 @@ def define_data_model_relationships(self) -> dict: "csv_header": "Minimum", "node_label": "minimum", "type": Union[float, int], - "pattern": { - "jsonld_key": "sms:pattern", - "csv_header": "Pattern", - "node_label": "pattern", - "type": str, - "required_header": False, - "edge_rel": False, - "node_attr_dict": {"default": None}, + "pattern": { + "jsonld_key": "sms:pattern", + "csv_header": "Pattern", + "node_label": "pattern", + "type": str, + "required_header": False, + "edge_rel": False, + "node_attr_dict": {"default": None}, + }, }, } @@ -4847,7 +4850,7 @@ def __post_init__(self) -> None: self.maximum = ( explicit_maximum if explicit_maximum is not None else implicit_maximum ) - + if column_pattern and column_type.value != "string": raise ValueError( "Column type must be set to 'string' to use column pattern specification for regex validation." diff --git a/tests/unit/synapseclient/extensions/unit_test_data_model_parser.py b/tests/unit/synapseclient/extensions/unit_test_data_model_parser.py index 7a529ce37..605da11d2 100644 --- a/tests/unit/synapseclient/extensions/unit_test_data_model_parser.py +++ b/tests/unit/synapseclient/extensions/unit_test_data_model_parser.py @@ -152,6 +152,7 @@ def test_parse_minimum_maximum( == expected_dict ) + class TestDataModelJsonLdParser: def test_gather_jsonld_attributes_relationships( self, From 60159d67f000870ba54724d949ad509e70b9c3f8 Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Thu, 4 Dec 2025 13:22:19 -0700 Subject: [PATCH 21/29] fix example model --- .../extensions/schema_files/data_models/example.model.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/synapseclient/extensions/schema_files/data_models/example.model.csv b/tests/unit/synapseclient/extensions/schema_files/data_models/example.model.csv index ceeeba712..e8b7bd13b 100644 --- a/tests/unit/synapseclient/extensions/schema_files/data_models/example.model.csv +++ b/tests/unit/synapseclient/extensions/schema_files/data_models/example.model.csv @@ -29,8 +29,8 @@ Check List Enum Strict,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list strict,,,,, Check Regex List,,,,,TRUE,DataProperty,,,list::regex match [a-f],,,,, Check Regex List Strict,,,,,TRUE,DataProperty,,,list strict::regex match [a-f],,,,, Check Regex List Like,,,,,TRUE,DataProperty,,,list like::regex match [a-f],,,,, -Check Regex Single,,,,,TRUE,DataProperty,,,,,,,[a-b], -Check Regex Format,,,,,TRUE,DataProperty,,,regex match [a-f],,,,^[a-b], +Check Regex Single,,,,,TRUE,DataProperty,,,,,,,,[a-b] +Check Regex Format,,,,,TRUE,DataProperty,,,regex match [a-f],,,,,^[a-b] Check Regex Integer,,,,,TRUE,DataProperty,,,regex search ^\d+$,,,,, Check Num,,,,,TRUE,DataProperty,,,num error,,,,, Check Float,,,,,TRUE,DataProperty,,,float error,number,,,, From b4b27a73e894acbeba52c94e9f047cc0a81d5c1b Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Thu, 4 Dec 2025 13:23:38 -0700 Subject: [PATCH 22/29] fix relationships dict --- .../extensions/curator/schema_generation.py | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index 721e54614..ae3f57033 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -3064,15 +3064,18 @@ def define_data_model_relationships(self) -> dict: "csv_header": "Minimum", "node_label": "minimum", "type": Union[float, int], - "pattern": { - "jsonld_key": "sms:pattern", - "csv_header": "Pattern", - "node_label": "pattern", - "type": str, - "required_header": False, - "edge_rel": False, - "node_attr_dict": {"default": None}, - }, + "required_header": False, + "edge_rel": False, + "node_attr_dict": {"default": None}, + }, + "pattern": { + "jsonld_key": "sms:pattern", + "csv_header": "Pattern", + "node_label": "pattern", + "type": str, + "required_header": False, + "edge_rel": False, + "node_attr_dict": {"default": None}, }, } From 23dc7f24a4125606e16d502ee88f98c1ce82bfd2 Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Thu, 4 Dec 2025 13:34:48 -0700 Subject: [PATCH 23/29] fix type logic --- synapseclient/extensions/curator/schema_generation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index ae3f57033..86692d64c 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -4854,7 +4854,7 @@ def __post_init__(self) -> None: explicit_maximum if explicit_maximum is not None else implicit_maximum ) - if column_pattern and column_type.value != "string": + if column_pattern and column_type and column_type.value != "string": raise ValueError( "Column type must be set to 'string' to use column pattern specification for regex validation." ) From 4be1e6abe7bbaa39a4e5efd0f385c83f9d4789b2 Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Thu, 4 Dec 2025 13:34:58 -0700 Subject: [PATCH 24/29] specify column type --- .../extensions/schema_files/data_models/example.model.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/synapseclient/extensions/schema_files/data_models/example.model.csv b/tests/unit/synapseclient/extensions/schema_files/data_models/example.model.csv index e8b7bd13b..36c1bfec7 100644 --- a/tests/unit/synapseclient/extensions/schema_files/data_models/example.model.csv +++ b/tests/unit/synapseclient/extensions/schema_files/data_models/example.model.csv @@ -29,8 +29,8 @@ Check List Enum Strict,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list strict,,,,, Check Regex List,,,,,TRUE,DataProperty,,,list::regex match [a-f],,,,, Check Regex List Strict,,,,,TRUE,DataProperty,,,list strict::regex match [a-f],,,,, Check Regex List Like,,,,,TRUE,DataProperty,,,list like::regex match [a-f],,,,, -Check Regex Single,,,,,TRUE,DataProperty,,,,,,,,[a-b] -Check Regex Format,,,,,TRUE,DataProperty,,,regex match [a-f],,,,,^[a-b] +Check Regex Single,,,,,TRUE,DataProperty,,,,string,,,,[a-b] +Check Regex Format,,,,,TRUE,DataProperty,,,regex match [a-f],string,,,,^[a-b] Check Regex Integer,,,,,TRUE,DataProperty,,,regex search ^\d+$,,,,, Check Num,,,,,TRUE,DataProperty,,,num error,,,,, Check Float,,,,,TRUE,DataProperty,,,float error,number,,,, From a2448eeb303c974003bc4b86bf8f88bf0956e2a0 Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Mon, 8 Dec 2025 10:09:04 -0700 Subject: [PATCH 25/29] reconsolildate pattern extraction --- synapseclient/extensions/curator/schema_generation.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index 86692d64c..e2bb707b5 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -5457,7 +5457,7 @@ def _set_type_specific_keywords(schema: dict[str, Any], node: TraversalNode) -> schema: The schema to set keywords on node (Node): The node the corresponds to the property which is being set in the JSON Schema """ - for attr in ["minimum", "maximum"]: + for attr in ["minimum", "maximum", "pattern"]: value = getattr(node, attr) if value is not None: schema[attr] = value @@ -5465,9 +5465,6 @@ def _set_type_specific_keywords(schema: dict[str, Any], node: TraversalNode) -> if node.format is not None: schema["format"] = node.format.value - if hasattr(node, "pattern") and node.pattern is not None: - schema["pattern"] = node.pattern - def _set_property( json_schema: JSONSchema, From 0304a82be6c11ef987d00cb3728f80965a871d70 Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Mon, 8 Dec 2025 11:12:14 -0700 Subject: [PATCH 26/29] add legacy example data models --- .../example.model_no_pattern_column.csv | 92 + .../example.model_no_pattern_column.jsonld | 2698 +++++++++++++++++ 2 files changed, 2790 insertions(+) create mode 100644 tests/unit/synapseclient/extensions/schema_files/data_models/example.model_no_pattern_column.csv create mode 100644 tests/unit/synapseclient/extensions/schema_files/data_models_jsonld/example.model_no_pattern_column.jsonld diff --git a/tests/unit/synapseclient/extensions/schema_files/data_models/example.model_no_pattern_column.csv b/tests/unit/synapseclient/extensions/schema_files/data_models/example.model_no_pattern_column.csv new file mode 100644 index 000000000..b1387d1a5 --- /dev/null +++ b/tests/unit/synapseclient/extensions/schema_files/data_models/example.model_no_pattern_column.csv @@ -0,0 +1,92 @@ +Attribute,Description,Valid Values,DependsOn,Properties,Required,Parent,DependsOn Component,Source,Validation Rules,columnType,Format,Maximum,Minimum +Component,,,,,TRUE,,,,,,,, +Patient,,,"Patient ID, Sex, Year of Birth, Diagnosis, Component",,FALSE,DataType,,,,,,, +Patient ID,,,,,TRUE,DataProperty,,,#Patient unique warning^^#Biospecimen unique error,,,, +Sex,,"Female, Male, Other",,,TRUE,DataProperty,,,,,,, +Year of Birth,,,,,FALSE,DataProperty,,,,,,, +Diagnosis,,"Healthy, Cancer",,,TRUE,DataProperty,,,,,,, +Cancer,,,"Cancer Type, Family History",,FALSE,ValidValue,,,,,,, +Cancer Type,,"Breast, Colorectal, Lung, Prostate, Skin",,,TRUE,DataProperty,,,,,,, +Family History,,"Breast, Colorectal, Lung, Prostate, Skin",,,TRUE,DataProperty,,,list strict,,,, +Biospecimen,,,"Sample ID, Patient ID, Tissue Status, Component",,FALSE,DataType,Patient,,,,,, +Sample ID,,,,,TRUE,DataProperty,,,,,,, +Tissue Status,,"Healthy, Malignant, None",,,TRUE,DataProperty,,,,,,, +Bulk RNA-seq Assay,,,"Filename, Sample ID, File Format, Component",,FALSE,DataType,Biospecimen,,,,,, +Filename,,,,,TRUE,DataProperty,,,#MockFilename filenameExists^^,,,, +File Format,,"FASTQ, BAM, CRAM, CSV/TSV",,,TRUE,DataProperty,,,,,,, +BAM,,,Genome Build,,FALSE,ValidValue,,,,,,, +CRAM,,,"Genome Build, Genome FASTA",,FALSE,ValidValue,,,,,,, +CSV/TSV,,,Genome Build,,FALSE,ValidValue,,,,,,, +Genome Build,,"GRCh37, GRCh38, GRCm38, GRCm39",,,TRUE,DataProperty,,,,,,, +Genome FASTA,,,,,TRUE,DataProperty,,,,,,, +MockComponent,Component to hold mock attributes for testing all validation rules,,"Component, Check List, Check List Enum, Check List Like, Check List Like Enum, Check List Strict, Check List Enum Strict, Check Regex List, Check Regex List Like, Check Regex List Strict, Check Regex Single, Check Regex Format, Check Regex Integer, Check Num, Check Float, Check Int, Check String, Check URL,Check Match at Least, Check Match at Least values, Check Match Exactly, Check Match Exactly values, Check Match None, Check Match None values, Check Recommended, Check Ages, Check Unique, Check Range, Check Date, Check NA",,FALSE,DataType,,,,,,, +Check List,,,,,TRUE,DataProperty,,,list,,,, +Check List Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list,,,, +Check List Like,,,,,TRUE,DataProperty,,,list like,,,, +Check List Like Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list like,,,, +Check List Strict,,,,,TRUE,DataProperty,,,list strict,,,, +Check List Enum Strict,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,list strict,,,, +Check Regex List,,,,,TRUE,DataProperty,,,list::regex match [a-f],,,, +Check Regex List Strict,,,,,TRUE,DataProperty,,,list strict::regex match [a-f],,,, +Check Regex List Like,,,,,TRUE,DataProperty,,,list like::regex match [a-f],,,, +Check Regex Single,,,,,TRUE,DataProperty,,,,string,,, +Check Regex Format,,,,,TRUE,DataProperty,,,regex match [a-f],string,,, +Check Regex Integer,,,,,TRUE,DataProperty,,,regex search ^\d+$,,,, +Check Num,,,,,TRUE,DataProperty,,,num error,,,, +Check Float,,,,,TRUE,DataProperty,,,float error,number,,, +Check Int,,,,,TRUE,DataProperty,,,int error,,,, +Check String,,,,,TRUE,DataProperty,,,str error,,,, +Check URL,,,,,TRUE,DataProperty,,,url,string,uri,, +Check Match at Least,,,,,TRUE,DataProperty,,,matchAtLeastOne Patient.PatientID set,,,, +Check Match Exactly,,,,,TRUE,DataProperty,,,matchExactlyOne MockComponent.checkMatchExactly set,,,, +Check Match None,,,,,TRUE,DataProperty,,,matchNone MockComponent.checkMatchNone set error,,,, +Check Match at Least values,,,,,TRUE,DataProperty,,,matchAtLeastOne MockComponent.checkMatchatLeastvalues value,,,, +Check Match Exactly values,,,,,TRUE,DataProperty,,,matchExactlyOne MockComponent.checkMatchExactlyvalues value,,,, +Check Match None values,,,,,TRUE,DataProperty,,,matchNone MockComponent.checkMatchNonevalues value error,,,, +Check Recommended,,,,,FALSE,DataProperty,,,recommended,,,, +Check Ages,,,,,TRUE,DataProperty,,,protectAges,,,, +Check Unique,,,,,TRUE,DataProperty,,,unique error,,,, +Check Range,,,,,TRUE,DataProperty,,,inRange 50 100 error,,,, +Check Date,,,,,TRUE,DataProperty,,,date,string,date,, +Check NA,,,,,TRUE,DataProperty,,,int::IsNA,,,, +MockRDB,,,"Component, MockRDB_id, SourceManifest",,FALSE,DataType,,,,,,, +MockRDB_id,,,,,TRUE,DataProperty,,,int,,,, +SourceManifest,,,,,TRUE,DataProperty,,,,,,, +MockFilename,,,"Component, Filename",,FALSE,DataType,,,,,,, +JSONSchemaComponent,Component to hold attributes for testing JSON Schemas,,"Component, No Rules, No Rules Not Required, String, String Not Required, Enum, Enum Not Required, Date, URL, InRange, Regex, List, List Not Required, List Enum, List Enum Not Required, List Boolean, List, Integer, List InRange",,FALSE,DataType,,,,,,, +No Rules,,,,,TRUE,DataProperty,,,,,,, +No Rules Not Required,,,,,FALSE,DataProperty,,,,,,, +String,,,,,TRUE,DataProperty,,,,string,,, +String Not Required,,,,,FALSE,DataProperty,,,,string,,, +Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,,string,,, +Enum Not Required,,"ab, cd, ef, gh",,,FALSE,DataProperty,,,,string,,, +Date,,,,,TRUE,DataProperty,,,date,string,date,, +URL,,,,,TRUE,DataProperty,,,url,string,uri,, +InRange,,,,,TRUE,DataProperty,,,inRange 50 100,number,,, +Regex,,,,,TRUE,DataProperty,,,regex search [a-f],string,,, +List,,,,,TRUE,DataProperty,,,,string_list,,, +List Not Required,,,,,FALSE,DataProperty,,,,string_list,,, +List Enum,,"ab, cd, ef, gh",,,TRUE,DataProperty,,,,string_list,,, +List Enum Not Required,,"ab, cd, ef, gh",,,FALSE,DataProperty,,,,string_list,,, +List Boolean,,,,,TRUE,DataProperty,,,,boolean_list,,, +List Integer,,,,,TRUE,DataProperty,,,,integer_list,,, +List InRange,,,,,TRUE,DataProperty,,,inRange 50 100,integer_list,,, +TypeDefinitionComponent,Component to check type specification,,"Component, String type, String type caps, Int type, Int type caps, Num type, Num type caps, Nan type, Missing type, Boolean type, Boolean type caps",,FALSE,DataType,,,,,,, +String type,,,,,TRUE,DataProperty,,,,string,,, +String type caps,,,,,TRUE,DataProperty,,,,STRING,,, +Int type,,,,,TRUE,DataProperty,,,,integer,,, +Int type caps,,,,,TRUE,DataProperty,,,,INTEGER,,, +Num type,,,,,TRUE,DataProperty,,,,number,,, +Num type caps,,,,,TRUE,DataProperty,,,,NUMBER,,, +Nan type,,,,,TRUE,DataProperty,,,,nan,,, +Missing type,,,,,TRUE,DataProperty,,,,,,, +Boolean type,,,,,TRUE,DataProperty,,,,boolean,,, +Boolean type caps,,,,,TRUE,DataProperty,,,,BOOLEAN,,, +RangeComponent,Component to ensure maximum and minimum can be set correctly,,"Component, Maximum Integer, Minimum Integer, Maximum Float, Minimum Float, Maximum Minimum, Maximum Minimum Integer List, Maximum Minimum Validation Rule",,FALSE,DataType,,,,,,, +Maximum Integer,,,,,TRUE,DataProperty,,,,integer,,100, +Minimum Integer,,,,,TRUE,DataProperty,,,,integer,,,10 +Maximum Float,,,,,TRUE,DataProperty,,,,number,,100.5, +Minimum Float,,,,,TRUE,DataProperty,,,,number,,,10.8 +Maximum Minimum,,,,,TRUE,DataProperty,,,,integer,,100,10 +Maximum Minimum Integer List,,,,,TRUE,DataProperty,,,,integer_list,,100,10 +Maximum Minimum Validation Rule,,,,,TRUE,DataProperty,,,inRange 50 100,integer,,200,10 diff --git a/tests/unit/synapseclient/extensions/schema_files/data_models_jsonld/example.model_no_pattern_column.jsonld b/tests/unit/synapseclient/extensions/schema_files/data_models_jsonld/example.model_no_pattern_column.jsonld new file mode 100644 index 000000000..018fa1fcd --- /dev/null +++ b/tests/unit/synapseclient/extensions/schema_files/data_models_jsonld/example.model_no_pattern_column.jsonld @@ -0,0 +1,2698 @@ +{ + "@context": { + "bts": "http://schema.biothings.io/", + "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "schema": "http://schema.org/", + "xsd": "http://www.w3.org/2001/XMLSchema#" + }, + "@graph": [ + { + "@id": "bts:Component", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Component", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Component", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:Patient", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Patient", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Patient", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:PatientID" + }, + { + "@id": "bts:Sex" + }, + { + "@id": "bts:YearofBirth" + }, + { + "@id": "bts:Diagnosis" + }, + { + "@id": "bts:Component" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:PatientID", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "PatientID", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Patient ID", + "sms:required": "sms:true", + "sms:validationRules": { + "Biospecimen": "unique error", + "Patient": "unique warning" + } + }, + { + "@id": "bts:Sex", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Sex", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Female" + }, + { + "@id": "bts:Male" + }, + { + "@id": "bts:Other" + } + ], + "sms:displayName": "Sex", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:YearofBirth", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "YearofBirth", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Year of Birth", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Diagnosis", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Diagnosis", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Healthy" + }, + { + "@id": "bts:Cancer" + } + ], + "sms:displayName": "Diagnosis", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:DataType", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "DataType", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "DataType", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:DataProperty", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "DataProperty", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "DataProperty", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Female", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Female", + "rdfs:subClassOf": [ + { + "@id": "bts:Sex" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Female", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Male", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Male", + "rdfs:subClassOf": [ + { + "@id": "bts:Sex" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Male", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Other", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Other", + "rdfs:subClassOf": [ + { + "@id": "bts:Sex" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Other", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Healthy", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Healthy", + "rdfs:subClassOf": [ + { + "@id": "bts:Diagnosis" + }, + { + "@id": "bts:TissueStatus" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Healthy", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Cancer", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Cancer", + "rdfs:subClassOf": [ + { + "@id": "bts:ValidValue" + }, + { + "@id": "bts:Diagnosis" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Cancer", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:CancerType" + }, + { + "@id": "bts:FamilyHistory" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:CancerType", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CancerType", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Breast" + }, + { + "@id": "bts:Colorectal" + }, + { + "@id": "bts:Lung" + }, + { + "@id": "bts:Prostate" + }, + { + "@id": "bts:Skin" + } + ], + "sms:displayName": "Cancer Type", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:FamilyHistory", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "FamilyHistory", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Breast" + }, + { + "@id": "bts:Colorectal" + }, + { + "@id": "bts:Lung" + }, + { + "@id": "bts:Prostate" + }, + { + "@id": "bts:Skin" + } + ], + "sms:displayName": "Family History", + "sms:required": "sms:true", + "sms:validationRules": [ + "list strict" + ] + }, + { + "@id": "bts:ValidValue", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "ValidValue", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "ValidValue", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Breast", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Breast", + "rdfs:subClassOf": [ + { + "@id": "bts:CancerType" + }, + { + "@id": "bts:FamilyHistory" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Breast", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Colorectal", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Colorectal", + "rdfs:subClassOf": [ + { + "@id": "bts:CancerType" + }, + { + "@id": "bts:FamilyHistory" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Colorectal", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Lung", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Lung", + "rdfs:subClassOf": [ + { + "@id": "bts:CancerType" + }, + { + "@id": "bts:FamilyHistory" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Lung", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Prostate", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Prostate", + "rdfs:subClassOf": [ + { + "@id": "bts:CancerType" + }, + { + "@id": "bts:FamilyHistory" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Prostate", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Skin", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Skin", + "rdfs:subClassOf": [ + { + "@id": "bts:CancerType" + }, + { + "@id": "bts:FamilyHistory" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Skin", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Biospecimen", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Biospecimen", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Biospecimen", + "sms:required": "sms:false", + "sms:requiresComponent": [ + { + "@id": "bts:Patient" + } + ], + "sms:requiresDependency": [ + { + "@id": "bts:SampleID" + }, + { + "@id": "bts:PatientID" + }, + { + "@id": "bts:TissueStatus" + }, + { + "@id": "bts:Component" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:SampleID", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "SampleID", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Sample ID", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:TissueStatus", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "TissueStatus", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Healthy" + }, + { + "@id": "bts:Malignant" + }, + { + "@id": "bts:None" + } + ], + "sms:displayName": "Tissue Status", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:Malignant", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Malignant", + "rdfs:subClassOf": [ + { + "@id": "bts:TissueStatus" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Malignant", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:None", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "None", + "rdfs:subClassOf": [ + { + "@id": "bts:TissueStatus" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "None", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:BulkRNA-seqAssay", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "BulkRNA-seqAssay", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Bulk RNA-seq Assay", + "sms:required": "sms:false", + "sms:requiresComponent": [ + { + "@id": "bts:Biospecimen" + } + ], + "sms:requiresDependency": [ + { + "@id": "bts:Filename" + }, + { + "@id": "bts:SampleID" + }, + { + "@id": "bts:FileFormat" + }, + { + "@id": "bts:Component" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:Filename", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Filename", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Filename", + "sms:required": "sms:true", + "sms:validationRules": { + "MockFilename": "filenameExists" + } + }, + { + "@id": "bts:FileFormat", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "FileFormat", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:FASTQ" + }, + { + "@id": "bts:BAM" + }, + { + "@id": "bts:CRAM" + }, + { + "@id": "bts:CSV/TSV" + } + ], + "sms:displayName": "File Format", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:FASTQ", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "FASTQ", + "rdfs:subClassOf": [ + { + "@id": "bts:FileFormat" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "FASTQ", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:BAM", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "BAM", + "rdfs:subClassOf": [ + { + "@id": "bts:ValidValue" + }, + { + "@id": "bts:FileFormat" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "BAM", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:GenomeBuild" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:CRAM", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CRAM", + "rdfs:subClassOf": [ + { + "@id": "bts:ValidValue" + }, + { + "@id": "bts:FileFormat" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "CRAM", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:GenomeBuild" + }, + { + "@id": "bts:GenomeFASTA" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:CSV/TSV", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CSV/TSV", + "rdfs:subClassOf": [ + { + "@id": "bts:ValidValue" + }, + { + "@id": "bts:FileFormat" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "CSV/TSV", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:GenomeBuild" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:GenomeBuild", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "GenomeBuild", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:GRCh37" + }, + { + "@id": "bts:GRCh38" + }, + { + "@id": "bts:GRCm38" + }, + { + "@id": "bts:GRCm39" + } + ], + "sms:displayName": "Genome Build", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:GenomeFASTA", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "GenomeFASTA", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Genome FASTA", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:GRCh37", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "GRCh37", + "rdfs:subClassOf": [ + { + "@id": "bts:GenomeBuild" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "GRCh37", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:GRCh38", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "GRCh38", + "rdfs:subClassOf": [ + { + "@id": "bts:GenomeBuild" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "GRCh38", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:GRCm38", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "GRCm38", + "rdfs:subClassOf": [ + { + "@id": "bts:GenomeBuild" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "GRCm38", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:GRCm39", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "GRCm39", + "rdfs:subClassOf": [ + { + "@id": "bts:GenomeBuild" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "GRCm39", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:MockComponent", + "@type": "rdfs:Class", + "rdfs:comment": "Component to hold mock attributes for testing all validation rules", + "rdfs:label": "MockComponent", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "MockComponent", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:Component" + }, + { + "@id": "bts:CheckList" + }, + { + "@id": "bts:CheckListEnum" + }, + { + "@id": "bts:CheckListLike" + }, + { + "@id": "bts:CheckListLikeEnum" + }, + { + "@id": "bts:CheckListStrict" + }, + { + "@id": "bts:CheckListEnumStrict" + }, + { + "@id": "bts:CheckRegexList" + }, + { + "@id": "bts:CheckRegexListLike" + }, + { + "@id": "bts:CheckRegexListStrict" + }, + { + "@id": "bts:CheckRegexSingle" + }, + { + "@id": "bts:CheckRegexFormat" + }, + { + "@id": "bts:CheckRegexInteger" + }, + { + "@id": "bts:CheckNum" + }, + { + "@id": "bts:CheckFloat" + }, + { + "@id": "bts:CheckInt" + }, + { + "@id": "bts:CheckString" + }, + { + "@id": "bts:CheckURL" + }, + { + "@id": "bts:CheckMatchatLeast" + }, + { + "@id": "bts:CheckMatchatLeastvalues" + }, + { + "@id": "bts:CheckMatchExactly" + }, + { + "@id": "bts:CheckMatchExactlyvalues" + }, + { + "@id": "bts:CheckMatchNone" + }, + { + "@id": "bts:CheckMatchNonevalues" + }, + { + "@id": "bts:CheckRecommended" + }, + { + "@id": "bts:CheckAges" + }, + { + "@id": "bts:CheckUnique" + }, + { + "@id": "bts:CheckRange" + }, + { + "@id": "bts:CheckDate" + }, + { + "@id": "bts:CheckNA" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:CheckList", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckList", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check List", + "sms:required": "sms:true", + "sms:validationRules": [ + "list" + ] + }, + { + "@id": "bts:CheckListEnum", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckListEnum", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Ab" + }, + { + "@id": "bts:Cd" + }, + { + "@id": "bts:Ef" + }, + { + "@id": "bts:Gh" + } + ], + "sms:displayName": "Check List Enum", + "sms:required": "sms:true", + "sms:validationRules": [ + "list" + ] + }, + { + "@id": "bts:CheckListLike", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckListLike", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check List Like", + "sms:required": "sms:true", + "sms:validationRules": [ + "list like" + ] + }, + { + "@id": "bts:CheckListLikeEnum", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckListLikeEnum", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Ab" + }, + { + "@id": "bts:Cd" + }, + { + "@id": "bts:Ef" + }, + { + "@id": "bts:Gh" + } + ], + "sms:displayName": "Check List Like Enum", + "sms:required": "sms:true", + "sms:validationRules": [ + "list like" + ] + }, + { + "@id": "bts:CheckListStrict", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckListStrict", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check List Strict", + "sms:required": "sms:true", + "sms:validationRules": [ + "list strict" + ] + }, + { + "@id": "bts:CheckListEnumStrict", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckListEnumStrict", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Ab" + }, + { + "@id": "bts:Cd" + }, + { + "@id": "bts:Ef" + }, + { + "@id": "bts:Gh" + } + ], + "sms:displayName": "Check List Enum Strict", + "sms:required": "sms:true", + "sms:validationRules": [ + "list strict" + ] + }, + { + "@id": "bts:CheckRegexList", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRegexList", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Regex List", + "sms:required": "sms:true", + "sms:validationRules": [ + "list", + "regex match [a-f]" + ] + }, + { + "@id": "bts:CheckRegexListLike", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRegexListLike", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Regex List Like", + "sms:required": "sms:true", + "sms:validationRules": [ + "list like", + "regex match [a-f]" + ] + }, + { + "@id": "bts:CheckRegexListStrict", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRegexListStrict", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Regex List Strict", + "sms:required": "sms:true", + "sms:validationRules": [ + "list strict", + "regex match [a-f]" + ] + }, + { + "@id": "bts:CheckRegexSingle", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRegexSingle", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "string", + "sms:displayName": "Check Regex Single", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:CheckRegexFormat", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRegexFormat", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "string", + "sms:displayName": "Check Regex Format", + "sms:required": "sms:true", + "sms:validationRules": [ + "regex match [a-f]" + ] + }, + { + "@id": "bts:CheckRegexInteger", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRegexInteger", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Regex Integer", + "sms:required": "sms:true", + "sms:validationRules": [ + "regex search ^\\d+$" + ] + }, + { + "@id": "bts:CheckNum", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckNum", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Num", + "sms:required": "sms:true", + "sms:validationRules": [ + "num error" + ] + }, + { + "@id": "bts:CheckFloat", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckFloat", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "number", + "sms:displayName": "Check Float", + "sms:required": "sms:true", + "sms:validationRules": [ + "float error" + ] + }, + { + "@id": "bts:CheckInt", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckInt", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Int", + "sms:required": "sms:true", + "sms:validationRules": [ + "int error" + ] + }, + { + "@id": "bts:CheckString", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckString", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check String", + "sms:required": "sms:true", + "sms:validationRules": [ + "str error" + ] + }, + { + "@id": "bts:CheckURL", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckURL", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "string", + "sms:displayName": "Check URL", + "sms:format": "uri", + "sms:required": "sms:true", + "sms:validationRules": [ + "url" + ] + }, + { + "@id": "bts:CheckMatchatLeast", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckMatchatLeast", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Match at Least", + "sms:required": "sms:true", + "sms:validationRules": [ + "matchAtLeastOne Patient.PatientID set" + ] + }, + { + "@id": "bts:CheckMatchatLeastvalues", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckMatchatLeastvalues", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Match at Least values", + "sms:required": "sms:true", + "sms:validationRules": [ + "matchAtLeastOne MockComponent.checkMatchatLeastvalues value" + ] + }, + { + "@id": "bts:CheckMatchExactly", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckMatchExactly", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Match Exactly", + "sms:required": "sms:true", + "sms:validationRules": [ + "matchExactlyOne MockComponent.checkMatchExactly set" + ] + }, + { + "@id": "bts:CheckMatchExactlyvalues", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckMatchExactlyvalues", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Match Exactly values", + "sms:required": "sms:true", + "sms:validationRules": [ + "matchExactlyOne MockComponent.checkMatchExactlyvalues value" + ] + }, + { + "@id": "bts:CheckMatchNone", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckMatchNone", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Match None", + "sms:required": "sms:true", + "sms:validationRules": [ + "matchNone MockComponent.checkMatchNone set error" + ] + }, + { + "@id": "bts:CheckMatchNonevalues", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckMatchNonevalues", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Match None values", + "sms:required": "sms:true", + "sms:validationRules": [ + "matchNone MockComponent.checkMatchNonevalues value error" + ] + }, + { + "@id": "bts:CheckRecommended", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRecommended", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Recommended", + "sms:required": "sms:false", + "sms:validationRules": [ + "recommended" + ] + }, + { + "@id": "bts:CheckAges", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckAges", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Ages", + "sms:required": "sms:true", + "sms:validationRules": [ + "protectAges" + ] + }, + { + "@id": "bts:CheckUnique", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckUnique", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Unique", + "sms:required": "sms:true", + "sms:validationRules": [ + "unique error" + ] + }, + { + "@id": "bts:CheckRange", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckRange", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check Range", + "sms:required": "sms:true", + "sms:validationRules": [ + "inRange 50 100 error" + ] + }, + { + "@id": "bts:CheckDate", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckDate", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "string", + "sms:displayName": "Check Date", + "sms:format": "date", + "sms:required": "sms:true", + "sms:validationRules": [ + "date" + ] + }, + { + "@id": "bts:CheckNA", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "CheckNA", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Check NA", + "sms:required": "sms:true", + "sms:validationRules": [ + "int", + "IsNA" + ] + }, + { + "@id": "bts:Ab", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Ab", + "rdfs:subClassOf": [ + { + "@id": "bts:CheckListEnum" + }, + { + "@id": "bts:CheckListLikeEnum" + }, + { + "@id": "bts:CheckListEnumStrict" + }, + { + "@id": "bts:Enum" + }, + { + "@id": "bts:EnumNotRequired" + }, + { + "@id": "bts:ListEnum" + }, + { + "@id": "bts:ListEnumNotRequired" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "ab", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Cd", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Cd", + "rdfs:subClassOf": [ + { + "@id": "bts:CheckListEnum" + }, + { + "@id": "bts:CheckListLikeEnum" + }, + { + "@id": "bts:CheckListEnumStrict" + }, + { + "@id": "bts:Enum" + }, + { + "@id": "bts:EnumNotRequired" + }, + { + "@id": "bts:ListEnum" + }, + { + "@id": "bts:ListEnumNotRequired" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "cd", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Ef", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Ef", + "rdfs:subClassOf": [ + { + "@id": "bts:CheckListEnum" + }, + { + "@id": "bts:CheckListLikeEnum" + }, + { + "@id": "bts:CheckListEnumStrict" + }, + { + "@id": "bts:Enum" + }, + { + "@id": "bts:EnumNotRequired" + }, + { + "@id": "bts:ListEnum" + }, + { + "@id": "bts:ListEnumNotRequired" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "ef", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Gh", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Gh", + "rdfs:subClassOf": [ + { + "@id": "bts:CheckListEnum" + }, + { + "@id": "bts:CheckListLikeEnum" + }, + { + "@id": "bts:CheckListEnumStrict" + }, + { + "@id": "bts:Enum" + }, + { + "@id": "bts:EnumNotRequired" + }, + { + "@id": "bts:ListEnum" + }, + { + "@id": "bts:ListEnumNotRequired" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "gh", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:MockRDB", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "MockRDB", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "MockRDB", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:Component" + }, + { + "@id": "bts:MockRDBId" + }, + { + "@id": "bts:SourceManifest" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:MockRDBId", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "MockRDBId", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "MockRDB_id", + "sms:required": "sms:true", + "sms:validationRules": [ + "int" + ] + }, + { + "@id": "bts:SourceManifest", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "SourceManifest", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "SourceManifest", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:MockFilename", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "MockFilename", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "MockFilename", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:Component" + }, + { + "@id": "bts:Filename" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:JSONSchemaComponent", + "@type": "rdfs:Class", + "rdfs:comment": "Component to hold attributes for testing JSON Schemas", + "rdfs:label": "JSONSchemaComponent", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "JSONSchemaComponent", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:Component" + }, + { + "@id": "bts:NoRules" + }, + { + "@id": "bts:NoRulesNotRequired" + }, + { + "@id": "bts:String" + }, + { + "@id": "bts:StringNotRequired" + }, + { + "@id": "bts:Enum" + }, + { + "@id": "bts:EnumNotRequired" + }, + { + "@id": "bts:Date" + }, + { + "@id": "bts:URL" + }, + { + "@id": "bts:InRange" + }, + { + "@id": "bts:Regex" + }, + { + "@id": "bts:List" + }, + { + "@id": "bts:ListNotRequired" + }, + { + "@id": "bts:ListEnum" + }, + { + "@id": "bts:ListEnumNotRequired" + }, + { + "@id": "bts:ListBoolean" + }, + { + "@id": "bts:Integer" + }, + { + "@id": "bts:ListInRange" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:NoRules", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "NoRules", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "No Rules", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:NoRulesNotRequired", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "NoRulesNotRequired", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "No Rules Not Required", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:String", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "String", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "string", + "sms:displayName": "String", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:StringNotRequired", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "StringNotRequired", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "string", + "sms:displayName": "String Not Required", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Enum", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Enum", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Ab" + }, + { + "@id": "bts:Cd" + }, + { + "@id": "bts:Ef" + }, + { + "@id": "bts:Gh" + } + ], + "sms:columnType": "string", + "sms:displayName": "Enum", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:EnumNotRequired", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "EnumNotRequired", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Ab" + }, + { + "@id": "bts:Cd" + }, + { + "@id": "bts:Ef" + }, + { + "@id": "bts:Gh" + } + ], + "sms:columnType": "string", + "sms:displayName": "Enum Not Required", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:Date", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Date", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "string", + "sms:displayName": "Date", + "sms:format": "date", + "sms:required": "sms:true", + "sms:validationRules": [ + "date" + ] + }, + { + "@id": "bts:URL", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "URL", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "string", + "sms:displayName": "URL", + "sms:format": "uri", + "sms:required": "sms:true", + "sms:validationRules": [ + "url" + ] + }, + { + "@id": "bts:InRange", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "InRange", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "number", + "sms:displayName": "InRange", + "sms:required": "sms:true", + "sms:validationRules": [ + "inRange 50 100" + ] + }, + { + "@id": "bts:Regex", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Regex", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "string", + "sms:displayName": "Regex", + "sms:required": "sms:true", + "sms:validationRules": [ + "regex search [a-f]" + ] + }, + { + "@id": "bts:List", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "List", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "string_list", + "sms:displayName": "List", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:ListNotRequired", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "ListNotRequired", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "string_list", + "sms:displayName": "List Not Required", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:ListEnum", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "ListEnum", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Ab" + }, + { + "@id": "bts:Cd" + }, + { + "@id": "bts:Ef" + }, + { + "@id": "bts:Gh" + } + ], + "sms:columnType": "string_list", + "sms:displayName": "List Enum", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:ListEnumNotRequired", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "ListEnumNotRequired", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "schema:rangeIncludes": [ + { + "@id": "bts:Ab" + }, + { + "@id": "bts:Cd" + }, + { + "@id": "bts:Ef" + }, + { + "@id": "bts:Gh" + } + ], + "sms:columnType": "string_list", + "sms:displayName": "List Enum Not Required", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:ListBoolean", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "ListBoolean", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "boolean_list", + "sms:displayName": "List Boolean", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:Integer", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Integer", + "rdfs:subClassOf": [ + { + "@id": "bts:Thing" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Integer", + "sms:required": "sms:false", + "sms:validationRules": [] + }, + { + "@id": "bts:ListInRange", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "ListInRange", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "integer_list", + "sms:displayName": "List InRange", + "sms:required": "sms:true", + "sms:validationRules": [ + "inRange 50 100" + ] + }, + { + "@id": "bts:ListInteger", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "ListInteger", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "integer_list", + "sms:displayName": "List Integer", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:TypeDefinitionComponent", + "@type": "rdfs:Class", + "rdfs:comment": "Component to check type specification", + "rdfs:label": "TypeDefinitionComponent", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "TypeDefinitionComponent", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:Component" + }, + { + "@id": "bts:Stringtype" + }, + { + "@id": "bts:Stringtypecaps" + }, + { + "@id": "bts:Inttype" + }, + { + "@id": "bts:Inttypecaps" + }, + { + "@id": "bts:Numtype" + }, + { + "@id": "bts:Numtypecaps" + }, + { + "@id": "bts:Nantype" + }, + { + "@id": "bts:Missingtype" + }, + { + "@id": "bts:Booleantype" + }, + { + "@id": "bts:Booleantypecaps" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:Stringtype", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Stringtype", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "string", + "sms:displayName": "String type", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:Stringtypecaps", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Stringtypecaps", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "string", + "sms:displayName": "String type caps", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:Inttype", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Inttype", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "integer", + "sms:displayName": "Int type", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:Inttypecaps", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Inttypecaps", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "integer", + "sms:displayName": "Int type caps", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:Numtype", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Numtype", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "number", + "sms:displayName": "Num type", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:Numtypecaps", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Numtypecaps", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "number", + "sms:displayName": "Num type caps", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:Nantype", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Nantype", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Nan type", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:Missingtype", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Missingtype", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "Missing type", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:Booleantype", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Booleantype", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "boolean", + "sms:displayName": "Boolean type", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:Booleantypecaps", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "Booleantypecaps", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "boolean", + "sms:displayName": "Boolean type caps", + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:RangeComponent", + "@type": "rdfs:Class", + "rdfs:comment": "Component to ensure maximum and minimum can be set correctly", + "rdfs:label": "RangeComponent", + "rdfs:subClassOf": [ + { + "@id": "bts:DataType" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:displayName": "RangeComponent", + "sms:required": "sms:false", + "sms:requiresDependency": [ + { + "@id": "bts:Component" + }, + { + "@id": "bts:MaximumInteger" + }, + { + "@id": "bts:MinimumInteger" + }, + { + "@id": "bts:MaximumFloat" + }, + { + "@id": "bts:MinimumFloat" + }, + { + "@id": "bts:MaximumMinimum" + }, + { + "@id": "bts:MaximumMinimumIntegerList" + }, + { + "@id": "bts:MaximumMinimumValidationRule" + } + ], + "sms:validationRules": [] + }, + { + "@id": "bts:MaximumInteger", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "MaximumInteger", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "integer", + "sms:displayName": "Maximum Integer", + "sms:maximum": 100.0, + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:MinimumInteger", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "MinimumInteger", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "integer", + "sms:displayName": "Minimum Integer", + "sms:minimum": 10.0, + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:MaximumFloat", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "MaximumFloat", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "number", + "sms:displayName": "Maximum Float", + "sms:maximum": 100.5, + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:MinimumFloat", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "MinimumFloat", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "number", + "sms:displayName": "Minimum Float", + "sms:minimum": 10.8, + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:MaximumMinimum", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "MaximumMinimum", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "integer", + "sms:displayName": "Maximum Minimum", + "sms:maximum": 100.0, + "sms:minimum": 10.0, + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:MaximumMinimumIntegerList", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "MaximumMinimumIntegerList", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "integer_list", + "sms:displayName": "Maximum Minimum Integer List", + "sms:maximum": 100.0, + "sms:minimum": 10.0, + "sms:required": "sms:true", + "sms:validationRules": [] + }, + { + "@id": "bts:MaximumMinimumValidationRule", + "@type": "rdfs:Class", + "rdfs:comment": "TBD", + "rdfs:label": "MaximumMinimumValidationRule", + "rdfs:subClassOf": [ + { + "@id": "bts:DataProperty" + } + ], + "schema:isPartOf": { + "@id": "http://schema.biothings.io" + }, + "sms:columnType": "integer", + "sms:displayName": "Maximum Minimum Validation Rule", + "sms:maximum": 200.0, + "sms:minimum": 10.0, + "sms:required": "sms:true", + "sms:validationRules": [ + "inRange 50 100" + ] + } + ], + "@id": "http://schema.biothings.io/#0.1" +} From 7f6efe4f657269f5dd5428c5c122c371ae2ccf04 Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Mon, 8 Dec 2025 11:14:39 -0700 Subject: [PATCH 27/29] update pattern propagation --- synapseclient/extensions/curator/schema_generation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index e2bb707b5..219538785 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -4865,7 +4865,7 @@ def __post_init__(self) -> None: msg = ( f"A regex validation rule is set for property: {self.name}, but the pattern is not set in the data model. " f"The regex pattern will be set to {self.pattern}, but the regex rule is deprecated and validation " - "rules will no longer be used in the future." + "rules will no longer be used in the future. " "Please explicitly set the regex pattern in the 'Pattern' column in the data model." ) self.logger.warning(msg) @@ -5500,6 +5500,9 @@ def _set_property( else: prop = _create_simple_property(node) + if node.pattern: + prop["pattern"] = node.pattern + prop["description"] = node.description prop["title"] = node.display_name schema_property = {node_name: prop} From 92cb4d3ef1fd43bf258c92c22be683bd6177f586 Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Mon, 8 Dec 2025 11:38:10 -0700 Subject: [PATCH 28/29] update column type checking --- .../extensions/curator/schema_generation.py | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/synapseclient/extensions/curator/schema_generation.py b/synapseclient/extensions/curator/schema_generation.py index 219538785..b6328c35e 100644 --- a/synapseclient/extensions/curator/schema_generation.py +++ b/synapseclient/extensions/curator/schema_generation.py @@ -4812,7 +4812,9 @@ def __post_init__(self) -> None: # Validate column type compatibility with min/max constraints self._validate_column_type_compatibility( - explicit_maximum=explicit_maximum, explicit_minimum=explicit_minimum + explicit_maximum=explicit_maximum, + explicit_minimum=explicit_minimum, + column_pattern=column_pattern, ) # url and date rules are deprecated for adding format keyword @@ -4854,11 +4856,6 @@ def __post_init__(self) -> None: explicit_maximum if explicit_maximum is not None else implicit_maximum ) - if column_pattern and column_type and column_type.value != "string": - raise ValueError( - "Column type must be set to 'string' to use column pattern specification for regex validation." - ) - self.pattern = column_pattern if column_pattern else rule_pattern if rule_pattern and not column_pattern: @@ -4900,6 +4897,7 @@ def _validate_column_type_compatibility( self, explicit_maximum: Union[int, float, None], explicit_minimum: Union[int, float, None], + column_pattern: Optional[str] = None, ) -> None: """Validate that columnType is compatible with Maximum/Minimum constraints. @@ -4922,7 +4920,7 @@ def _validate_column_type_compatibility( None: This method performs validation only and doesn't return a value. It raises ValueError if validation fails. """ - if not explicit_maximum and not explicit_minimum: + if not explicit_maximum and not explicit_minimum and not column_pattern: return if not self.type: raise ValueError( @@ -4930,8 +4928,14 @@ def _validate_column_type_compatibility( f"(min: {explicit_minimum}, max: {explicit_maximum}) are specified, " f"but columnType is not set. Please set columnType to 'number', 'integer' or 'integer_list'." ) + + if column_pattern and self.type and self.type.value != "string": + raise ValueError( + "Column type must be set to 'string' to use column pattern specification for regex validation." + ) + # If type is specified but not numeric, raise error - if self.type not in ( + if (explicit_maximum or explicit_minimum) and self.type not in ( AtomicColumnType.NUMBER, AtomicColumnType.INTEGER, ListColumnType.INTEGER_LIST, From 688b1e168961e177d905a7b8b33dbba9888c2db8 Mon Sep 17 00:00:00 2001 From: GiaJordan Date: Mon, 8 Dec 2025 11:45:21 -0700 Subject: [PATCH 29/29] update expected jsonschema --- .../expected_jsonschemas/expected.MockComponent.schema.json | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json index ab88d4e1e..5236a424c 100644 --- a/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json +++ b/tests/unit/synapseclient/extensions/schema_files/expected_jsonschemas/expected.MockComponent.schema.json @@ -170,16 +170,19 @@ }, "CheckRegexList": { "description": "TBD", + "pattern": "^[a-f]", "title": "Check Regex List", "type": "array" }, "CheckRegexListLike": { "description": "TBD", + "pattern": "^[a-f]", "title": "Check Regex List Like", "type": "array" }, "CheckRegexListStrict": { "description": "TBD", + "pattern": "^[a-f]", "title": "Check Regex List Strict", "type": "array" },