From ffc287b98cf9b7cf1d18453f0797b2e5f4e1667f Mon Sep 17 00:00:00 2001 From: Anjali Trace Date: Thu, 18 Dec 2025 10:57:25 +0000 Subject: [PATCH 1/5] NRL-1666 Point the perftest environment at the baseline table to conduct baseline performance tests --- .../test/dynamodb__pointers-table.tf | 2 +- terraform/infrastructure/etc/perftest.tfvars | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform/account-wide-infrastructure/test/dynamodb__pointers-table.tf b/terraform/account-wide-infrastructure/test/dynamodb__pointers-table.tf index 98c7bae25..cdbe7dbc7 100644 --- a/terraform/account-wide-infrastructure/test/dynamodb__pointers-table.tf +++ b/terraform/account-wide-infrastructure/test/dynamodb__pointers-table.tf @@ -33,5 +33,5 @@ module "ref-pointers-table" { module "perftest-pointers-table" { source = "../modules/pointers-table" - name_prefix = "nhsd-nrlf--perftest" + name_prefix = "nhsd-nrlf--perftest-baseline" } diff --git a/terraform/infrastructure/etc/perftest.tfvars b/terraform/infrastructure/etc/perftest.tfvars index 2ddb7ecc5..56478f9fe 100644 --- a/terraform/infrastructure/etc/perftest.tfvars +++ b/terraform/infrastructure/etc/perftest.tfvars @@ -1,7 +1,7 @@ account_name = "perftest" aws_account_name = "test" -dynamodb_pointers_table_prefix = "nhsd-nrlf--perftest" +dynamodb_pointers_table_prefix = "nhsd-nrlf--perftest-baseline" domain = "perftest.record-locator.national.nhs.uk" public_domain = "perftest.api.service.nhs.uk" From 511d0125c621055d4dc09a11d16adcd7381e9eb0 Mon Sep 17 00:00:00 2001 From: Anjali Trace Date: Thu, 18 Dec 2025 11:53:20 +0000 Subject: [PATCH 2/5] NRL-1666 Just deploy one change at a time --- .../account-wide-infrastructure/modules/pointers-table/vars.tf | 2 +- terraform/infrastructure/etc/perftest.tfvars | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/terraform/account-wide-infrastructure/modules/pointers-table/vars.tf b/terraform/account-wide-infrastructure/modules/pointers-table/vars.tf index 29d04b60e..d0f2d36e3 100644 --- a/terraform/account-wide-infrastructure/modules/pointers-table/vars.tf +++ b/terraform/account-wide-infrastructure/modules/pointers-table/vars.tf @@ -23,6 +23,6 @@ variable "kms_deletion_window_in_days" { variable "enable_backups" { type = bool - description = "Enable AwS cloud backup" + description = "Enable AWS cloud backup" default = false } diff --git a/terraform/infrastructure/etc/perftest.tfvars b/terraform/infrastructure/etc/perftest.tfvars index 56478f9fe..2ddb7ecc5 100644 --- a/terraform/infrastructure/etc/perftest.tfvars +++ b/terraform/infrastructure/etc/perftest.tfvars @@ -1,7 +1,7 @@ account_name = "perftest" aws_account_name = "test" -dynamodb_pointers_table_prefix = "nhsd-nrlf--perftest-baseline" +dynamodb_pointers_table_prefix = "nhsd-nrlf--perftest" domain = "perftest.record-locator.national.nhs.uk" public_domain = "perftest.api.service.nhs.uk" From 78378bd5dfd699a4a4e5d7dc2d7fbc04a27ceaa6 Mon Sep 17 00:00:00 2001 From: Anjali Trace Date: Thu, 18 Dec 2025 14:17:28 +0000 Subject: [PATCH 3/5] NRL-1666 Back to both changes for teardown --- terraform/infrastructure/etc/perftest.tfvars | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/infrastructure/etc/perftest.tfvars b/terraform/infrastructure/etc/perftest.tfvars index 2ddb7ecc5..56478f9fe 100644 --- a/terraform/infrastructure/etc/perftest.tfvars +++ b/terraform/infrastructure/etc/perftest.tfvars @@ -1,7 +1,7 @@ account_name = "perftest" aws_account_name = "test" -dynamodb_pointers_table_prefix = "nhsd-nrlf--perftest" +dynamodb_pointers_table_prefix = "nhsd-nrlf--perftest-baseline" domain = "perftest.record-locator.national.nhs.uk" public_domain = "perftest.api.service.nhs.uk" From 9c8b30e3e32e5ee023675c3f7df2ad1e993944e8 Mon Sep 17 00:00:00 2001 From: Anjali Trace Date: Mon, 22 Dec 2025 12:55:25 +0000 Subject: [PATCH 4/5] NRL-1666 Please let me deploy --- tests/performance/producer/seed_nft_tables.py | 209 +++++++++++++++++- 1 file changed, 208 insertions(+), 1 deletion(-) diff --git a/tests/performance/producer/seed_nft_tables.py b/tests/performance/producer/seed_nft_tables.py index f59a85af1..f5805adb5 100644 --- a/tests/performance/producer/seed_nft_tables.py +++ b/tests/performance/producer/seed_nft_tables.py @@ -1,10 +1,32 @@ +import csv +from datetime import datetime, timedelta, timezone +from itertools import cycle +from math import gcd +from random import shuffle +from typing import Any, Iterator + import boto3 +import fire + +# import json +import numpy as np + +from nrlf.core.constants import ( + CATEGORY_ATTRIBUTES, + SNOMED_SYSTEM_URL, + TYPE_ATTRIBUTES, + TYPE_CATEGORIES, +) +from nrlf.core.dynamodb.model import DocumentPointer +from nrlf.core.logger import logger +from nrlf.tests.data import load_document_reference dynamodb = boto3.client("dynamodb") resource = boto3.resource("dynamodb") +logger.setLevel("ERROR") -# DOC_REF_TEMPLATE = load_document_reference("NFT-template") +DOC_REF_TEMPLATE = load_document_reference("NFT-template") CHECKSUM_WEIGHTS = [i for i in range(10, 1, -1)] @@ -66,3 +88,188 @@ "TRXT": 1, }, # summary record currently has only one supplier } + +DEFAULT_COUNT_DISTRIBUTIONS = {"1": 91, "2": 8, "3": 1} + + +class TestNhsNumbersIterator: + def __iter__(self): + self.first9 = 900000000 + return self + + def __next__(self): + if self.first9 > 999999999: + raise StopIteration + checksum = 10 + while checksum == 10: + self.first9 += 1 + nhs_no_digits = list(map(int, str(self.first9))) + checksum = ( + sum( + weight * digit + for weight, digit in zip(CHECKSUM_WEIGHTS, nhs_no_digits) + ) + * -1 + % 11 + ) + nhs_no = str(self.first9) + str(checksum) + return nhs_no + + +def _make_seed_pointer( + type_code: str, custodian: str, nhs_number: str, counter: int +) -> DocumentPointer: + """ + Populates the example pointer template with test data to create a valid NRL 3.0 pointer + """ + doc_ref = DOC_REF_TEMPLATE + doc_ref.id = f"{custodian}-{str(counter).zfill(12)}" # deterministic to aid perftest script retrieval + doc_ref.subject.identifier.value = nhs_number + doc_ref.custodian.identifier.value = custodian + doc_ref.author[0].identifier.value = "X26NFT" + doc_ref.type.coding[0].code = type_code + doc_ref.type.coding[0].display = TYPE_ATTRIBUTES.get( + f"{SNOMED_SYSTEM_URL}|{type_code}" + ).get("display") + type_url = f"{SNOMED_SYSTEM_URL}|{type_code}" + category = TYPE_CATEGORIES.get(type_url) + doc_ref.category[0].coding[0].code = category.split("|")[-1] + doc_ref.category[0].coding[0].display = CATEGORY_ATTRIBUTES.get(category).get( + "display" + ) + nft_pointer = DocumentPointer.from_document_reference(doc_ref, source="NFT-SEED") + return nft_pointer + + +def _populate_seed_table( + table_name: str, + px_with_pointers: int, + pointers_per_px: float = 1.0, + type_dists: dict[str, int] = DEFAULT_TYPE_DISTRIBUTIONS, + custodian_dists: dict[str, dict[str, int]] = DEFAULT_CUSTODIAN_DISTRIBUTIONS, +): + """ + Seeds a table with example data for non-functional testing. + """ + if pointers_per_px < 1.0: + raise ValueError("Cannot populate table with patients with zero pointers") + # set up iterations + type_iter = _set_up_cyclical_iterator(type_dists) + custodian_iters = _set_up_custodian_iterators(custodian_dists) + # count_iter = _set_up_cyclical_iterator(DEFAULT_COUNT_DISTRIBUTIONS) + count_iter = _get_pointer_count_poisson_distributions( + px_with_pointers, pointers_per_px + ) + # count_iter = _get_pointer_count_negbinom_distributions(px_with_pointers, pointers_per_px) + testnum_cls = TestNhsNumbersIterator() + testnum_iter = iter(testnum_cls) + + px_counter = 0 + doc_ref_target = int(pointers_per_px * px_with_pointers) + logger.log( + f"Will upsert ~{doc_ref_target} test pointers for {px_with_pointers} patients." + ) + doc_ref_counter = 0 + batch_counter = 0 + unprocessed_count = 0 + + pointer_data: list[list[str]] = [] + + start_time = datetime.now(tz=timezone.utc) + + batch_upsert_items: list[dict[str, Any]] = [] + while px_counter < px_with_pointers: + pointers_for_px = int(next(count_iter)) + + if batch_counter + pointers_for_px > 25 or px_counter == px_with_pointers: + response = resource.batch_write_item( + RequestItems={table_name: batch_upsert_items} + ) + + if response.get("UnprocessedItems"): + unprocessed_count += len( + response.get("UnprocessedItems").get(table_name, []) + ) + + batch_upsert_items = [] + batch_counter = 0 + + new_px = next(testnum_iter) + for _ in range(pointers_for_px): + new_type = next(type_iter) + new_custodian = next(custodian_iters[new_type]) + doc_ref_counter += 1 + batch_counter += 1 + + pointer = _make_seed_pointer( + new_type, new_custodian, new_px, doc_ref_counter + ) + put_req = {"PutRequest": {"Item": pointer.model_dump()}} + batch_upsert_items.append(put_req) + pointer_data.append( + [ + pointer.id, + pointer.type, + pointer.custodian, + pointer.nhs_number, + ] + ) + px_counter += 1 + + if px_counter % 1000 == 0: + logger.log(".", end="", flush=True) + if px_counter % 100000 == 0: + logger.log( + f" {px_counter} patients processed ({doc_ref_counter} pointers)." + ) + + logger.log("Done.") + + end_time = datetime.now(tz=timezone.utc) + logger.log( + f"Created {doc_ref_counter} pointers in {timedelta.total_seconds(end_time - start_time)} seconds (unprocessed: {unprocessed_count})." + ) + + with open("./dist/seed-nft-pointers.csv", "w") as f: + writer = csv.writer(f) + writer.writerow(["pointer_id", "pointer_type", "custodian", "nhs_number"]) + writer.writerows(pointer_data) + logger.log(f"Pointer data saved to ./dist/seed-nft-pointers.csv") # noqa + + +def _set_up_cyclical_iterator(dists: dict[str, int]) -> Iterator[str]: + """ + Given a dict of values and their relative frequencies, + returns an iterator that will cycle through a the reduced and shuffled set of values. + This should result in more live-like data than e.g. creating a bulk amount of each pointer type/custodian in series. + It also means each batch will contain a representative sample of the distribution. + """ + d = gcd(*dists.values()) + value_list: list[str] = [] + for entry in dists: + value_list.extend([entry] * (dists[entry] // d)) + shuffle(value_list) + return cycle(value_list) + + +def _get_pointer_count_poisson_distributions( + num_of_patients: int, pointers_per_px: float +) -> Iterator[int]: + p_count_distr = np.random.poisson(lam=pointers_per_px - 1, size=num_of_patients) + 1 + p_count_distr = np.clip(p_count_distr, a_min=1, a_max=4) + return cycle(p_count_distr) + + +def _set_up_custodian_iterators( + custodian_dists: dict[str, dict[str, int]], +) -> dict[str, Iterator[str]]: + custodian_iters: dict[str, Iterator[str]] = {} + for pointer_type in custodian_dists: + custodian_iters[pointer_type] = _set_up_cyclical_iterator( + custodian_dists[pointer_type] + ) + return custodian_iters + + +if __name__ == "__main__": + fire.Fire(_populate_seed_table) From ab5cab721842ea2dd698ad4bcb08949642552f2d Mon Sep 17 00:00:00 2001 From: Anjali Trace Date: Wed, 24 Dec 2025 13:04:57 +0000 Subject: [PATCH 5/5] NRL-1866 Update preformance test readme with all prep steps --- terraform/infrastructure/README.md | 2 ++ tests/performance/README.md | 50 +++++++++++++++++++++++++----- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/terraform/infrastructure/README.md b/terraform/infrastructure/README.md index 6b9eec9b4..65c3a4437 100644 --- a/terraform/infrastructure/README.md +++ b/terraform/infrastructure/README.md @@ -116,6 +116,8 @@ replacing `{ENV_NAME}` with the environment name (e.g. `dev`, `qa`, `qa-sandbox` To tear down the infrastructure, you need to use Terraform to destroy the resources in your Terraform workspace. +First `make build-artifacts`. Then assume management and run `make get-s3-perms ENV={ENV_NAME}` in the project root. + To teardown the infrastructure, do the following: ``` diff --git a/tests/performance/README.md b/tests/performance/README.md index 6c01cc1a5..15fe7145c 100644 --- a/tests/performance/README.md +++ b/tests/performance/README.md @@ -1,20 +1,44 @@ # Performance Testing -some high level context short +We have performance tests which give us a benchmark of how NRLF performs under load for consumers and producers. -## Run perf tests +## Run performance tests ### Prep the environment Perf tests are generally conducted in the perftest env. There's a selection of tables in the perftest env representing different pointer volume scenarios e.g. perftest-baseline vs perftest-1million (todo: update with real names!). -To reset this table to the expected state for perftests, restore the table from a backup. +#### Point perftest at a different pointers table -In the steps below, make sure the table name is the table your environment is pointing at. You might need to redeploy NRLF lambdas to point at the desired table. +We (will) have multiple tables representing different states of NRLF in the future e.g. all patients receiving an IPS (International Patient Summary), onboarding particular high-volume suppliers. + +In order to run performance tests to get figures for these different states, we can point the perftest environment at one of these tables. + +Currently, this requires tearing down the existing environment and restoring from scratch: + +1. Follow instructions in terraform/infrastructure/readme.md to tear down the perf test environment. + - Do **not** tear down shared account-wide infrastructure +2. Update `perftest-pointers-table.name_prefix` in `terraform/account-wide-infrastructure/test/dynamodb__pointers-table.tf` to be the table name you want, minus "-pointers-table" + - e.g. to use the baseline table `nhsd-nrlf--perftest-baseline-pointers-table`, set `name_prefix = "nhsd-nrlf--perftest-baseline"` +3. Update `dynamodb_pointers_table_prefix` in `terraform/infrastructure/etc/perftest.tfvars` same as above. + - e.g. to use the baseline table `dynamodb_pointers_table_prefix = "nhsd-nrlf--perftest-baseline"` +4. Commit changes to a branch & push +5. Run the [Deploy Account-wide infrastructure](https://github.com/NHSDigital/NRLF/actions/workflows/deploy-account-wide-infra.yml) workflow against your branch & `account-test`. + - If you get a terraform failure like "tried to create table but it already exists", you will need to do some fanangaling: + 1. make sure there is a backup of your chosen table or create one if not. In the AWS console: dynamodb > tables > your perftest table > backups > create backup > Create on-demand backup > leave all settings as defaults > create backup. This might take up to an hour to complete. + 2. once backed up, delete your table. In the AWS console: dynamodb > tables > your perftest table > actions > delete table + 3. Rerun the Deploy Account-wide infrastructure action. + 4. Terraform will create an empty table with the correct name & (most importantly!) read/write IAM policies. + 5. Delete the empty table created by terraform and restore from the backup, specifying the same table name you've defined in code. +6. Run the [Persistent Environment Deploy](https://github.com/NHSDigital/NRLF/actions/workflows/persistent-environment.yml) workflow against your branch & `perftest` to restore the environment with lambdas pointed at your chosen table. +7. You can check this has been successful by checking the table name in the lambdas. + - In the AWS console: Lambda > functions > pick any perftest-1 lambda > Configuration > Environment variables > `TABLE_NAME` should be your desired pointer table e.g. `nhsd-nrlf--perftest-baseline-pointers-table` + +If you've followed these steps, you will also need to [generate permissions](#generate-permissions) as the organisation permissions will have been lost when the environment was torn down. ### Prepare to run tests -#### Pull certs for env +#### Pull certs for perftest ```sh assume management @@ -26,14 +50,14 @@ make truststore-pull-all ENV=perftest You will need to generate pointer permissions the first time performance tests are run in an environment e.g. if the perftest environment is destroyed & recreated. ```sh -make generate permissions # makes a bunch of json permission files +make generate permissions # makes a bunch of json permission files for test organisations make build # will take all permissions & create nrlf_permissions.zip file # apply this new permissions zip file to your environment cd ./terraform/infrastructure -assume test # needed? +assume test make init TF_WORKSPACE_NAME=perftest-1 ENV=perftest -tf apply +make ENV=perftest USE_SHARED_RESOURCES=true apply ``` #### Generate input files @@ -49,3 +73,13 @@ make perftest-prepare PERFTEST_TABLE_NAME=perftest-baseline make perftest-consumer ENV_TYPE=perftest PERFTEST_HOST=perftest-1.perftest.record-locator.national.nhs.uk make perftest-producer ENV_TYPE=perftest PERFTEST_HOST=perftest-1.perftest.record-locator.national.nhs.uk ``` + +## Assumptions / Caveats + +- Run performance tests in the perftest environment only\* +- Both producer & consumer tests are repeatable +- These tests work on the assumption that all nhs numbers in the test data are serial and lie within a fixed range i.e. picking any number between NHS_NUMBER_MINIMUM & NHS_NUMBER_MAXIMUM will yield a patient with pointer(s). +- Configure scenarios in the `consumer/perftest.config.json` & `producer/perftest.config.json` files. This does not alter the number of stages per scenario, that's fixed in `perftest.js`. +- Consider running these tests multiple times to get figures for a warm environment - perftest, unlike prod, is not well-used so you will get cold-start figures on your first run + +\*These performance tests are tightly coupled to the seed scripts that populate test data. This means these tests can only be run in an environment containing solely test data created by the seed data scripts. `perftest` is a dedicated environment to do this in, but in theory any environment could be populated with the seed data and used.