From 0722cc00ab86de8438c2a21721431932f3e3893b Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Fri, 22 Nov 2024 16:58:09 +0000 Subject: [PATCH 1/4] Put the msoa bug back in --- analysis/ehrql_dataset_definition_london_adults.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/analysis/ehrql_dataset_definition_london_adults.py b/analysis/ehrql_dataset_definition_london_adults.py index 830697f..5c0ad55 100644 --- a/analysis/ehrql_dataset_definition_london_adults.py +++ b/analysis/ehrql_dataset_definition_london_adults.py @@ -13,7 +13,7 @@ london_msoa = ( addresses - .where(addresses.msoa_code == "E02000001") + .where((addresses.msoa_code == "E02000001").is_not_null()) .sort_by(addresses.start_date) .last_for_patient() ) From 80bd1e55ab34eda32b875d73147251ad37b6147e Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Fri, 22 Nov 2024 16:58:43 +0000 Subject: [PATCH 2/4] Add first event date to the basic dummy dataset already --- analysis/ehrql_dataset_definition.py | 4 ++- docs_src/dummy_data_in_opensafely.md | 44 +++++++++++++++------------- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/analysis/ehrql_dataset_definition.py b/analysis/ehrql_dataset_definition.py index c2dc867..2d8dd5f 100644 --- a/analysis/ehrql_dataset_definition.py +++ b/analysis/ehrql_dataset_definition.py @@ -1,10 +1,12 @@ from ehrql import create_dataset -from ehrql.tables.core import patients +from ehrql.tables.core import patients, clinical_events dataset = create_dataset() age = patients.age_on("2020-03-31") +first_event_date = clinical_events.sort_by(clinical_events.date).first_for_patient().date dataset.define_population((age > 18) & (age < 80)) dataset.age = age dataset.sex = patients.sex +dataset.first_event_date = first_event_date diff --git a/docs_src/dummy_data_in_opensafely.md b/docs_src/dummy_data_in_opensafely.md index 710486f..d73e574 100644 --- a/docs_src/dummy_data_in_opensafely.md +++ b/docs_src/dummy_data_in_opensafely.md @@ -20,19 +20,22 @@ There are 3 ways to use dummy data: :fontawesome-solid-code: `analysis/ehrql_dataset_definition.py` This example is a very minimal dataset definition, which finds patients between -18 and 80, and adds their age and sex to the output dataset: +18 and 80, and adds their age, sex and date of their first clinical event to the output +dataset: ```py from ehrql import create_dataset -from ehrql.tables.core import patients +from ehrql.tables.core import patients, clinical_events dataset = create_dataset() age = patients.age_on("2020-03-31") +first_event_date = clinical_events.sort_by(clinical_events.date).first_for_patient().date dataset.define_population((age > 18) & (age < 80)) dataset.age = age dataset.sex = patients.sex +dataset.first_event_date = first_event_date ``` :octicons-terminal-16: Try generating a dummy dataset. In the terminal, run: @@ -51,17 +54,17 @@ opensafely exec ehrql:v1 generate-dataset analysis/ehrql_dataset_definition.py By default, this generates 10 patients and will print them to the terminal: ```bash -patient_id,age,sex -1,29,unknown -2,24,male -3,58,unknown -6,29,unknown -9,61,intersex -10,57,male -11,69,female -14,76,unknown -15,23,male -17,25,unknown +patient_id,age,sex,first_event_date +1,29,unknown,2022-10-02 +2,24,male,2022-07-30 +3,58,unknown,2018-10-02 +6,29,unknown,2007-12-12 +9,61,intersex,1963-04-24 +10,57,male,1967-12-20 +11,69,female,2023-09-02 +14,76,unknown,2007-10-16 +15,23,male,1999-06-27 +17,25,unknown,2019-08-14 ``` Note that all 10 patients have been generated with ages within the expected range (18-80, as defined in the dataset definition) and sex in one of the 4 possible values. @@ -111,13 +114,14 @@ Dummy data produced from a dataset definition is: ```py from ehrql.tables.core import patients, clinical_events ... - events = clinical_events.sort_by(clinical_events.date).first_for_patient() - dataset.event_date = events.date - dataset.after_dob = events.date > patients.date_of_birth - dataset.before_dod = (events.date < patients.date_of_death) | patients.date_of_death.is_null() + dataset.after_dob = first_event_date > patients.date_of_birth + dataset.before_dod = (first_event_date < patients.date_of_death) | patients.date_of_death.is_null() ``` - Generate the dataset again and confirm that event dates are always after the patients + These two additional variables will return True (T) if the clinical event is within + the patient's lifespan. + + Generate the dataset again and confirm that event dates are always after the patient's date of birth and before their date of death. :octicons-terminal-16: @@ -125,7 +129,7 @@ Dummy data produced from a dataset definition is: opensafely exec ehrql:v1 generate-dataset analysis/ehrql_dataset_definition.py ... - patient_id,age,sex,event_date,after_dob,before_dod + patient_id,age,sex,first_event_date,after_dob,before_dod 1,29,unknown,2022-10-02,T,T 2,24,male,2022-07-30,T,T 3,58,unknown,2018-10-02,T,T @@ -149,7 +153,7 @@ in the OpenSAFELY documentation for a guide to the available tables and columns. However, setting up lots of dummy tables can be tedious. ehrQL generates dummy datasets by first creating dummy tables, and then running the dataset definition on them. We can have ehrQL output those dummy tables directly, rather than the dummy dataset. -:octicons-terminal-16: Using our updated dataset definition file, create dummy tables and write them to a local folder called `dummy_tables` +:octicons-terminal-16: Using the previous dataset definition file, create dummy tables and write them to a local folder called `dummy_tables` ```sh opensafely exec ehrql:v1 create-dummy-tables analysis/ehrql_dataset_definition.py dummy_tables From e5fa8095696f93c655b9706f43673a0592445b59 Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Fri, 22 Nov 2024 17:02:05 +0000 Subject: [PATCH 3/4] Make the debug demo an offline exercise --- docs_src/dummy_data_in_opensafely.md | 33 ++++++++++++++-------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/docs_src/dummy_data_in_opensafely.md b/docs_src/dummy_data_in_opensafely.md index d73e574..a9a8d4d 100644 --- a/docs_src/dummy_data_in_opensafely.md +++ b/docs_src/dummy_data_in_opensafely.md @@ -165,28 +165,29 @@ csv files corresponding to the two tables that this dataset definition uses. ![dummy tables](images/dummy_tables_folder.png) -Now that we have some dummy tables, we can take advantage of a new feature, the ehrQL debug command from the new OpenSAFELY VSCode extension. +???+ example "Try out the ehrQL `debug()` feature" + Now that we have some dummy tables, we can take advantage of a new feature, the ehrQL `debug` command from the new OpenSAFELY VSCode extension. -:fontawesome-solid-code: Update the dataset definition to add a debug statement before and after our -definition of events. Here we can have a look at the date column from the full (dummy) clinical events -table, and then the column after we've filtered to just the first event for each patient. + :fontawesome-solid-code: Update the dataset definition to add a `debug` statement before and after our + definition of events. Here we can have a look at the date column from the full (dummy) clinical events + table, and then the column after we've filtered to just the first event for each patient. -```py -from ehrql import create_dataset, debug -... -debug(clinical_events.date) -events = clinical_events.sort_by(clinical_events.date).first_for_patient() -debug(events.date) -... -``` + ```py + from ehrql import create_dataset, debug + ... + debug(clinical_events.date) + events = clinical_events.sort_by(clinical_events.date).first_for_patient() + debug(events.date) + ... + ``` -Click on the "Debug ehrQL" button in the bottom right. + Click on the "Debug ehrQL" button in the bottom right. -![ehrQL debug button](images/debug_ehrql_btn.png) + ![ehrQL debug button](images/debug_ehrql_btn.png) -This will open a new panel and display the columns we asked to debug. + This will open a new panel and display the columns we asked to debug. -![ehrQL debug output](images/debug_output.png) + ![ehrQL debug output](images/debug_output.png) Once you've created some dummy tables, you can then use those tables as the input when you run your dataset definition again locally. Or you can use them as a starting point to generate more data, or to test your From e741f8e7e2b953b93ce113581bfca05cb4437408 Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Fri, 22 Nov 2024 17:09:08 +0000 Subject: [PATCH 4/4] Moved hositalised dataset definition to the exercises subfolder --- .../ehrql_dataset_definition_hospitalised_deaths.py | 0 analysis/{ => exercises}/supporting_data/icd10_codes.py | 0 docs_src/dummy_data_in_opensafely.md | 6 +++--- 3 files changed, 3 insertions(+), 3 deletions(-) rename analysis/{ => exercises}/ehrql_dataset_definition_hospitalised_deaths.py (100%) rename analysis/{ => exercises}/supporting_data/icd10_codes.py (100%) diff --git a/analysis/ehrql_dataset_definition_hospitalised_deaths.py b/analysis/exercises/ehrql_dataset_definition_hospitalised_deaths.py similarity index 100% rename from analysis/ehrql_dataset_definition_hospitalised_deaths.py rename to analysis/exercises/ehrql_dataset_definition_hospitalised_deaths.py diff --git a/analysis/supporting_data/icd10_codes.py b/analysis/exercises/supporting_data/icd10_codes.py similarity index 100% rename from analysis/supporting_data/icd10_codes.py rename to analysis/exercises/supporting_data/icd10_codes.py diff --git a/docs_src/dummy_data_in_opensafely.md b/docs_src/dummy_data_in_opensafely.md index a9a8d4d..c07c7a9 100644 --- a/docs_src/dummy_data_in_opensafely.md +++ b/docs_src/dummy_data_in_opensafely.md @@ -457,7 +457,7 @@ patient_id,age,msoa ???+ example "Generating dummy tables: prevalence of death from heart failure for hospitalised patients" - `analysis/ehrql_dataset_definition_hospitalised_deaths.py` is a dataset definition that identifies + `analysis/exercises/ehrql_dataset_definition_hospitalised_deaths.py` is a dataset definition that identifies patients who have an ONS death record with an underlying cause of death recorded, and who were hospitalised 6 months before their death. @@ -474,7 +474,7 @@ patient_id,age,msoa by ICD-10 codes representing cause of death. Write a dummy tables definition and use it to create dummy tables that can be used as an input to - the dataset definition at `analysis/ehrql_dataset_definition_hospitalised_deaths.py` to produce + the dataset definition at `analysis/exercises/ehrql_dataset_definition_hospitalised_deaths.py` to produce a dataset with valid hospitalisation dates and ICD-10 codes. Your dummy tables should include a range of ICD-10 codes, in order to allow downstream analyses @@ -484,7 +484,7 @@ patient_id,age,msoa Note that the original dataset definition sdoes not need to be modified in any way. - (:bulb: Hint: You may want to make use of the data at `analysis/supporting_data/icd10_codes.py`) + (:bulb: Hint: You may want to make use of the data at `analysis/exercises/supporting_data/icd10_codes.py`) ## Limitations of native OpenSAFELY dummy data