From 8647237e16876f603e749c90ec1df7db072727d8 Mon Sep 17 00:00:00 2001 From: eesa456 Date: Thu, 22 May 2025 01:33:18 +0100 Subject: [PATCH 1/3] NRL-1114 for nicip types allow multiple categories --- ...test_search_document_reference_consumer.py | 120 ++++++++++++++++++ layer/nrlf/core/constants.py | 10 +- layer/nrlf/core/dynamodb/repository.py | 93 +++++++++----- layer/nrlf/core/tests/test_type_categories.py | 20 ++- layer/nrlf/core/tests/test_validators.py | 21 ++- layer/nrlf/core/validators.py | 33 +++-- 6 files changed, 240 insertions(+), 57 deletions(-) diff --git a/api/consumer/searchDocumentReference/tests/test_search_document_reference_consumer.py b/api/consumer/searchDocumentReference/tests/test_search_document_reference_consumer.py index 9643dad18..10e6e9899 100644 --- a/api/consumer/searchDocumentReference/tests/test_search_document_reference_consumer.py +++ b/api/consumer/searchDocumentReference/tests/test_search_document_reference_consumer.py @@ -351,6 +351,126 @@ def test_search_document_reference_happy_path_with_nicip_type( } +@mock_aws +@mock_repository +def test_search_document_reference_happy_path_with_nicip_type_for_both_categories( + repository: DocumentPointerRepository, +): + doc_ref = load_document_reference("Y05868-736253002-Valid") + doc_ref.type.coding[0].code = "MAULR" + doc_ref.type.coding[0].system = "https://nicip.nhs.uk" + doc_ref.type.coding[0].display = "MRA Upper Limb Rt" + doc_ref.category[0].coding[0].code = "721981007" + doc_ref.category[0].coding[0].display = "Diagnostic Studies Report" + doc_pointer = DocumentPointer.from_document_reference(doc_ref) + + repository.create(doc_pointer) + + doc_ref2 = load_document_reference("Y05868-736253002-Valid") + doc_ref2.type.coding[0].code = "MAULR" + doc_ref2.type.coding[0].system = "https://nicip.nhs.uk" + doc_ref2.type.coding[0].display = "MRA Upper Limb Rt" + doc_ref2.id = "Y05868-736253002-Valid2" + doc_ref2.category[0].coding[0].code = "103693007" + doc_ref2.category[0].coding[0].display = "Diagnostic procedure" + doc_pointer2 = DocumentPointer.from_document_reference(doc_ref2) + + repository.create(doc_pointer2) + + event = create_test_api_gateway_event( + headers=create_headers(), + query_string_parameters={ + "subject:identifier": "https://fhir.nhs.uk/Id/nhs-number|6700028191", + "type": "https://nicip.nhs.uk|MAULR", + }, + ) + + result = handler(event, create_mock_context()) + body = result.pop("body") + + assert result == { + "statusCode": "200", + "headers": default_response_headers(), + "isBase64Encoded": False, + } + + parsed_body = json.loads(body) + assert parsed_body == { + "resourceType": "Bundle", + "type": "searchset", + "link": [ + { + "relation": "self", + "url": "https://pytest.api.service.nhs.uk/record-locator/consumer/FHIR/R4/DocumentReference?subject:identifier=https://fhir.nhs.uk/Id/nhs-number|6700028191&type=https://nicip.nhs.uk|MAULR", + } + ], + "total": 2, + "entry": [ + {"resource": doc_ref2.model_dump(exclude_none=True)}, + {"resource": doc_ref.model_dump(exclude_none=True)}, + ], + } + + +@mock_aws +@mock_repository +def test_search_document_reference_happy_path_with_nicip_type_for_one_category( + repository: DocumentPointerRepository, +): + doc_ref = load_document_reference("Y05868-736253002-Valid") + doc_ref.type.coding[0].code = "MAULR" + doc_ref.type.coding[0].system = "https://nicip.nhs.uk" + doc_ref.type.coding[0].display = "MRA Upper Limb Rt" + doc_ref.category[0].coding[0].code = "721981007" + doc_ref.category[0].coding[0].display = "Diagnostic Studies Report" + doc_pointer = DocumentPointer.from_document_reference(doc_ref) + + repository.create(doc_pointer) + + doc_ref2 = load_document_reference("Y05868-736253002-Valid") + doc_ref2.type.coding[0].code = "MAULR" + doc_ref2.type.coding[0].system = "https://nicip.nhs.uk" + doc_ref2.type.coding[0].display = "MRA Upper Limb Rt" + doc_ref2.id = "Y05868-736253002-Valid2" + doc_ref2.category[0].coding[0].code = "103693007" + doc_ref2.category[0].coding[0].display = "Diagnostic procedure" + doc_pointer2 = DocumentPointer.from_document_reference(doc_ref2) + + repository.create(doc_pointer2) + + event = create_test_api_gateway_event( + headers=create_headers(), + query_string_parameters={ + "subject:identifier": "https://fhir.nhs.uk/Id/nhs-number|6700028191", + "type": "https://nicip.nhs.uk|MAULR", + "category": "http://snomed.info/sct|721981007", + }, + ) + + result = handler(event, create_mock_context()) + body = result.pop("body") + + assert result == { + "statusCode": "200", + "headers": default_response_headers(), + "isBase64Encoded": False, + } + + parsed_body = json.loads(body) + assert parsed_body == { + "resourceType": "Bundle", + "type": "searchset", + "link": [ + { + "relation": "self", + "url": "https://pytest.api.service.nhs.uk/record-locator/consumer/FHIR/R4/DocumentReference?subject:identifier=https://fhir.nhs.uk/Id/nhs-number|6700028191&type=https://nicip.nhs.uk|MAULR&category=http://snomed.info/sct|721981007", + } + ], + "total": 1, + "entry": [{"resource": doc_ref.model_dump(exclude_none=True)}], + } + + @mock_aws @mock_repository def test_search_document_reference_no_results(repository: DocumentPointerRepository): diff --git a/layer/nrlf/core/constants.py b/layer/nrlf/core/constants.py index d7b94f366..43a98071b 100644 --- a/layer/nrlf/core/constants.py +++ b/layer/nrlf/core/constants.py @@ -180,8 +180,14 @@ def coding_value(self): PointerTypes.SUMMARY_RECORD.value: Categories.CLINICAL_NOTE.value, # # Imaging - PointerTypes.MRA_UPPER_LIMB_ARTERY.value: Categories.DIAGNOSTIC_STUDIES_REPORT.value, - PointerTypes.MRI_AXILLA_BOTH.value: Categories.DIAGNOSTIC_PROCEDURE.value, + PointerTypes.MRA_UPPER_LIMB_ARTERY.value: { + Categories.DIAGNOSTIC_STUDIES_REPORT.value, + Categories.DIAGNOSTIC_PROCEDURE.value, + }, + PointerTypes.MRI_AXILLA_BOTH.value: { + Categories.DIAGNOSTIC_PROCEDURE.value, + Categories.DIAGNOSTIC_STUDIES_REPORT.value, + }, } PRACTICE_SETTING_VALUE_SET_URL = ( diff --git a/layer/nrlf/core/dynamodb/repository.py b/layer/nrlf/core/dynamodb/repository.py index a65931ccb..2682ee4ed 100644 --- a/layer/nrlf/core/dynamodb/repository.py +++ b/layer/nrlf/core/dynamodb/repository.py @@ -15,11 +15,16 @@ RepositoryModel = TypeVar("RepositoryModel", bound=DynamoDBModel) -def _get_sk_ids_for_type(pointer_type: str) -> tuple: +def _get_sk_ids_for_type(pointer_type: str) -> tuple[str, str]: if pointer_type not in TYPE_CATEGORIES: raise ValueError(f"Cannot find category for pointer type: {pointer_type}") - category = TYPE_CATEGORIES[pointer_type] + categories = TYPE_CATEGORIES[pointer_type] + if isinstance(categories, str): + category = categories + else: + category = next(iter(categories)) + category_system, category_code = category.split("|") if category_system not in SYSTEM_SHORT_IDS: raise ValueError(f"Unknown system for category: {category_system}") @@ -158,7 +163,7 @@ def count_by_nhs_number( if len(pointer_types) == 1: # Optimisation for single pointer type - category_id, type_id = _get_sk_ids_for_type(pointer_types[0]) + category_id, type_id = _get_sk_ids_for_type(pointer_types[0])[0] patient_sort = f"C#{category_id}#T#{type_id}" key_conditions.append("begins_with(patient_sort, :patient_sort)") expression_values[":patient_sort"] = patient_sort @@ -220,12 +225,12 @@ def search( pointer_types: Optional[List[str]] = [], categories: Optional[List[str]] = [], ) -> Iterator[DocumentPointer]: - """""" logger.log( LogReference.REPOSITORY020, nhs_number=nhs_number, custodian=custodian, pointer_types=pointer_types, + categories=categories, ) key_conditions = ["patient_key = :patient_key"] @@ -233,43 +238,67 @@ def search( expression_names = {} expression_values = {":patient_key": f"P#{nhs_number}"} - if len(pointer_types) == 1: - # Optimisation for single pointer type - category_id, type_id = _get_sk_ids_for_type(pointer_types[0]) - patient_sort = f"C#{category_id}#T#{type_id}" - key_conditions.append("begins_with(patient_sort, :patient_sort)") - expression_values[":patient_sort"] = patient_sort - else: - # Handle single/multiple categories and pointer types with filter expressions - if len(categories) == 1: - split_category = categories[0].split("|") - category_id = ( - SYSTEM_SHORT_IDS[split_category[0]] + "-" + split_category[1] - ) - patient_sort = f"C#{category_id}" - key_conditions.append("begins_with(patient_sort, :patient_sort)") - expression_values[":patient_sort"] = patient_sort - - if len(categories) > 1: - expression_names["#category"] = "category" - category_filters = [ - f"#category = :category_{i}" for i in range(len(categories)) - ] - category_filter_values = { - f":category_{i}": categories[i] for i in range(len(categories)) - } - filter_expressions.append(f"({' OR '.join(category_filters)})") - expression_values.update(category_filter_values) - + # If both categories and pointer_types are provided, filter on both + if pointer_types and categories: expression_names["#pointer_type"] = "type" + expression_names["#category"] = "category" types_filters = [ f"#pointer_type = :type_{i}" for i in range(len(pointer_types)) ] types_filter_values = { f":type_{i}": pointer_types[i] for i in range(len(pointer_types)) } + category_filters = [ + f"#category = :category_{i}" for i in range(len(categories)) + ] + category_filter_values = { + f":category_{i}": categories[i] for i in range(len(categories)) + } filter_expressions.append(f"({' OR '.join(types_filters)})") + filter_expressions.append(f"({' OR '.join(category_filters)})") expression_values.update(types_filter_values) + expression_values.update(category_filter_values) + + # If only pointer_types are provided, retrieve all categories for each type and filter on both + elif pointer_types and not categories: + expression_names["#pointer_type"] = "type" + expression_names["#category"] = "category" + types_filters = [] + category_filters = [] + types_filter_values = {} + category_filter_values = {} + category_set = set() + for i, pointer_type in enumerate(pointer_types): + types_filters.append(f"#pointer_type = :type_{i}") + types_filter_values[f":type_{i}"] = pointer_type + # Get all categories for this type, handling both set and string + if pointer_type in TYPE_CATEGORIES: + cats = TYPE_CATEGORIES[pointer_type] + if isinstance(cats, str): + category_set.add(cats) + else: + category_set.update(cats) + for j, cat in enumerate(category_set): + category_filters.append(f"#category = :category_{j}") + category_filter_values[f":category_{j}"] = cat + if types_filters: + filter_expressions.append(f"({' OR '.join(types_filters)})") + if category_filters: + filter_expressions.append(f"({' OR '.join(category_filters)})") + expression_values.update(types_filter_values) + expression_values.update(category_filter_values) + + # If only categories are provided, filter on categories + elif categories and not pointer_types: + expression_names["#category"] = "category" + category_filters = [ + f"#category = :category_{i}" for i in range(len(categories)) + ] + category_filter_values = { + f":category_{i}": categories[i] for i in range(len(categories)) + } + filter_expressions.append(f"({' OR '.join(category_filters)})") + expression_values.update(category_filter_values) if custodian: logger.log( diff --git a/layer/nrlf/core/tests/test_type_categories.py b/layer/nrlf/core/tests/test_type_categories.py index 8271861e7..33147bf9c 100644 --- a/layer/nrlf/core/tests/test_type_categories.py +++ b/layer/nrlf/core/tests/test_type_categories.py @@ -25,8 +25,8 @@ def test_pointer_types(pointer_type): @pytest.mark.parametrize("category", Categories) def test_pointer_category_has_types(category): - assert ( - category.value in TYPE_CATEGORIES.values() + assert any( + category.value in cat_set for cat_set in TYPE_CATEGORIES.values() ), f"Pointer category {category.value} is not used by any type" @@ -42,11 +42,17 @@ def test_type_category_type_is_known(type): assert type in PointerTypes.list(), f"Unknown type {type} used in TYPE_CATEGORIES" -@pytest.mark.parametrize("category", TYPE_CATEGORIES.values()) -def test_type_category_category_is_known(category): - assert ( - category in Categories.list() - ), f"Unknown category {category} used in TYPE_CATEGORIES" +@pytest.mark.parametrize("cat_set", TYPE_CATEGORIES.values()) +def test_type_category_category_is_known(cat_set): + if isinstance(cat_set, (set, list, tuple)): + for category in cat_set: + assert ( + category in Categories.list() + ), f"Unknown category {category} used in TYPE_CATEGORIES" + else: + assert ( + cat_set in Categories.list() + ), f"Unknown category {cat_set} used in TYPE_CATEGORIES" @pytest.mark.parametrize("category", Categories) diff --git a/layer/nrlf/core/tests/test_validators.py b/layer/nrlf/core/tests/test_validators.py index d1440f429..e7ac3ad59 100644 --- a/layer/nrlf/core/tests/test_validators.py +++ b/layer/nrlf/core/tests/test_validators.py @@ -397,8 +397,11 @@ def test_validate_category_coding_display_mismatch( matching_type_str = next( ( type_str - for type_str in TYPE_CATEGORIES - if TYPE_CATEGORIES[type_str] == category_str + for type_str, cat_val in TYPE_CATEGORIES.items() + if ( + (isinstance(cat_val, set) and category_str in cat_val) + or (isinstance(cat_val, str) and cat_val == category_str) + ) ), None, ) @@ -417,11 +420,12 @@ def test_validate_category_coding_display_mismatch( } result = validator.validate(document_ref_data) - + expected_issues = 2 if type_code == "https://nicip.nhs.uk|" else 1 + issue_number = 1 if type_code == "https://nicip.nhs.uk|" else 0 assert result.is_valid is False assert result.resource.id == "Y05868-99999-99999-999999" - assert len(result.issues) == 1 - assert result.issues[0].model_dump(exclude_none=True) == { + assert len(result.issues) == expected_issues + assert result.issues[issue_number].model_dump(exclude_none=True) == { "severity": "error", "code": "business-rule", "details": { @@ -603,10 +607,15 @@ def test_validate_type_coding_display_mismatch(type_str: str, display: str): } # Find the category string that matches the category code to avoid that error - category_str = TYPE_CATEGORIES[type_str] + category_val = TYPE_CATEGORIES[type_str] + if isinstance(category_val, set): + category_str = next(iter(category_val)) + else: + category_str = category_val category_parts = category_str.split("|") category_system = category_parts[0] category_code = category_parts[1] + document_ref_data["category"][0] = { "coding": [ { diff --git a/layer/nrlf/core/validators.py b/layer/nrlf/core/validators.py index a52a14d63..65b2468ed 100644 --- a/layer/nrlf/core/validators.py +++ b/layer/nrlf/core/validators.py @@ -434,7 +434,7 @@ def _validate_category(self, model: DocumentReference): def _validate_type_category_mapping(self, model: DocumentReference): """ - Validate the type field matches the expected category + Validate the type field matches one of the expected categories. """ logger.log(LogReference.VALIDATOR001, step="type_category_mapping") @@ -444,16 +444,29 @@ def _validate_type_category_mapping(self, model: DocumentReference): category_id = f"{category_coding.system}|{category_coding.code}" if type_id not in PointerTypes.list() or category_id not in Categories.list(): - return # No point mapping to an unexisting/unsupported type/category + return - type_category = TYPE_CATEGORIES.get(type_id) - if type_category != category_id: - self.result.add_error( - issue_code="business-rule", - error_code="UNPROCESSABLE_ENTITY", - diagnostics=f"The Category code of the provided document '{category_id}' must match the allowed category for pointer type '{type_id}' with a category value of '{type_category}'", - field="category.coding[0].code", - ) + if type_id.startswith("https://nicip.nhs.uk|"): + allowed_categories = TYPE_CATEGORIES.get(type_id, set()) + if category_id not in allowed_categories: + self.result.add_error( + issue_code="business-rule", + error_code="UNPROCESSABLE_ENTITY", + diagnostics=( + f"The Category code of the provided document '{category_id}' must match " + f"one of the allowed categories for pointer type '{type_id}': {allowed_categories}" + ), + field="category.coding[0].code", + ) + else: + type_category = TYPE_CATEGORIES.get(type_id) + if type_category != category_id: + self.result.add_error( + issue_code="business-rule", + error_code="UNPROCESSABLE_ENTITY", + diagnostics=f"The Category code of the provided document '{category_id}' must match the allowed category for pointer type '{type_id}' with a category value of '{type_category}'", + field="category.coding[0].code", + ) def _validate_content_format(self, model: DocumentReference): """ From 177301ce2c97dfecff277cf43b8f86a5760e82d3 Mon Sep 17 00:00:00 2001 From: eesa456 Date: Thu, 22 May 2025 01:45:54 +0100 Subject: [PATCH 2/3] NRL-1114 add comments back --- layer/nrlf/core/dynamodb/repository.py | 1 + layer/nrlf/core/validators.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/layer/nrlf/core/dynamodb/repository.py b/layer/nrlf/core/dynamodb/repository.py index 2682ee4ed..e92d2550c 100644 --- a/layer/nrlf/core/dynamodb/repository.py +++ b/layer/nrlf/core/dynamodb/repository.py @@ -225,6 +225,7 @@ def search( pointer_types: Optional[List[str]] = [], categories: Optional[List[str]] = [], ) -> Iterator[DocumentPointer]: + """""" logger.log( LogReference.REPOSITORY020, nhs_number=nhs_number, diff --git a/layer/nrlf/core/validators.py b/layer/nrlf/core/validators.py index 65b2468ed..04962c50f 100644 --- a/layer/nrlf/core/validators.py +++ b/layer/nrlf/core/validators.py @@ -444,7 +444,7 @@ def _validate_type_category_mapping(self, model: DocumentReference): category_id = f"{category_coding.system}|{category_coding.code}" if type_id not in PointerTypes.list() or category_id not in Categories.list(): - return + return # No point mapping to an unexisting/unsupported type/category if type_id.startswith("https://nicip.nhs.uk|"): allowed_categories = TYPE_CATEGORIES.get(type_id, set()) From 912abccf34c1452d7d409b673bcadc0ea6580ad4 Mon Sep 17 00:00:00 2001 From: eesa456 Date: Thu, 22 May 2025 02:03:02 +0100 Subject: [PATCH 3/3] NRL-1114 reduce complexity and fix count by nhs num --- layer/nrlf/core/dynamodb/repository.py | 116 ++++++++++++------------- 1 file changed, 56 insertions(+), 60 deletions(-) diff --git a/layer/nrlf/core/dynamodb/repository.py b/layer/nrlf/core/dynamodb/repository.py index e92d2550c..a607fdccd 100644 --- a/layer/nrlf/core/dynamodb/repository.py +++ b/layer/nrlf/core/dynamodb/repository.py @@ -38,6 +38,47 @@ def _get_sk_ids_for_type(pointer_type: str) -> tuple[str, str]: return category_id, type_id +def _get_categories_for_pointer_types(pointer_types: list[str]) -> set[str]: + """Return all unique categories for the given pointer types.""" + category_set = set() + for pointer_type in pointer_types: + if pointer_type in TYPE_CATEGORIES: + cats = TYPE_CATEGORIES[pointer_type] + if isinstance(cats, str): + category_set.add(cats) + else: + category_set.update(cats) + return category_set + + +def _build_filter_expressions( + pointer_types, categories, expression_names, expression_values +): + """Build DynamoDB filter expressions for pointer_types and categories.""" + filter_expressions = [] + if pointer_types: + expression_names["#pointer_type"] = "type" + types_filters = [ + f"#pointer_type = :type_{i}" for i in range(len(pointer_types)) + ] + types_filter_values = { + f":type_{i}": pointer_types[i] for i in range(len(pointer_types)) + } + filter_expressions.append(f"({' OR '.join(types_filters)})") + expression_values.update(types_filter_values) + if categories: + expression_names["#category"] = "category" + category_filters = [ + f"#category = :category_{i}" for i in range(len(categories)) + ] + category_filter_values = { + f":category_{i}": categories[i] for i in range(len(categories)) + } + filter_expressions.append(f"({' OR '.join(category_filters)})") + expression_values.update(category_filter_values) + return filter_expressions + + class Repository(ABC, Generic[RepositoryModel]): ITEM_TYPE: Type[RepositoryModel] @@ -163,7 +204,7 @@ def count_by_nhs_number( if len(pointer_types) == 1: # Optimisation for single pointer type - category_id, type_id = _get_sk_ids_for_type(pointer_types[0])[0] + category_id, type_id = _get_sk_ids_for_type(pointer_types[0]) patient_sort = f"C#{category_id}#T#{type_id}" key_conditions.append("begins_with(patient_sort, :patient_sort)") expression_values[":patient_sort"] = patient_sort @@ -225,7 +266,6 @@ def search( pointer_types: Optional[List[str]] = [], categories: Optional[List[str]] = [], ) -> Iterator[DocumentPointer]: - """""" logger.log( LogReference.REPOSITORY020, nhs_number=nhs_number, @@ -239,67 +279,23 @@ def search( expression_names = {} expression_values = {":patient_key": f"P#{nhs_number}"} - # If both categories and pointer_types are provided, filter on both + # Determine which filters to apply if pointer_types and categories: - expression_names["#pointer_type"] = "type" - expression_names["#category"] = "category" - types_filters = [ - f"#pointer_type = :type_{i}" for i in range(len(pointer_types)) - ] - types_filter_values = { - f":type_{i}": pointer_types[i] for i in range(len(pointer_types)) - } - category_filters = [ - f"#category = :category_{i}" for i in range(len(categories)) - ] - category_filter_values = { - f":category_{i}": categories[i] for i in range(len(categories)) - } - filter_expressions.append(f"({' OR '.join(types_filters)})") - filter_expressions.append(f"({' OR '.join(category_filters)})") - expression_values.update(types_filter_values) - expression_values.update(category_filter_values) - - # If only pointer_types are provided, retrieve all categories for each type and filter on both + # Use both pointer_types and categories as filters + filter_expressions = _build_filter_expressions( + pointer_types, categories, expression_names, expression_values + ) elif pointer_types and not categories: - expression_names["#pointer_type"] = "type" - expression_names["#category"] = "category" - types_filters = [] - category_filters = [] - types_filter_values = {} - category_filter_values = {} - category_set = set() - for i, pointer_type in enumerate(pointer_types): - types_filters.append(f"#pointer_type = :type_{i}") - types_filter_values[f":type_{i}"] = pointer_type - # Get all categories for this type, handling both set and string - if pointer_type in TYPE_CATEGORIES: - cats = TYPE_CATEGORIES[pointer_type] - if isinstance(cats, str): - category_set.add(cats) - else: - category_set.update(cats) - for j, cat in enumerate(category_set): - category_filters.append(f"#category = :category_{j}") - category_filter_values[f":category_{j}"] = cat - if types_filters: - filter_expressions.append(f"({' OR '.join(types_filters)})") - if category_filters: - filter_expressions.append(f"({' OR '.join(category_filters)})") - expression_values.update(types_filter_values) - expression_values.update(category_filter_values) - - # If only categories are provided, filter on categories + # Get all categories for these pointer_types + all_categories = list(_get_categories_for_pointer_types(pointer_types)) + filter_expressions = _build_filter_expressions( + pointer_types, all_categories, expression_names, expression_values + ) elif categories and not pointer_types: - expression_names["#category"] = "category" - category_filters = [ - f"#category = :category_{i}" for i in range(len(categories)) - ] - category_filter_values = { - f":category_{i}": categories[i] for i in range(len(categories)) - } - filter_expressions.append(f"({' OR '.join(category_filters)})") - expression_values.update(category_filter_values) + # Only categories provided + filter_expressions = _build_filter_expressions( + [], categories, expression_names, expression_values + ) if custodian: logger.log(