Skip to content

Commit 177a7df

Browse files
authored
Merge pull request #120 from WDGPH/bug/replace-unspecified
Refactor preprocessing logic to replace unspecified agents and update…
2 parents ecce168 + 4e40f42 commit 177a7df

File tree

8 files changed

+27
-27
lines changed

8 files changed

+27
-27
lines changed

pipeline/orchestrator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ def run_step_2_preprocess(
236236

237237
# Build preprocessing result
238238
result = preprocess.build_preprocess_result(
239-
df, language, vaccine_reference, preprocess.IGNORE_AGENTS
239+
df, language, vaccine_reference, preprocess.REPLACE_UNSPECIFIED
240240
)
241241

242242
# Write artifact

pipeline/preprocess.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@
7070

7171
_FORMATTER = Formatter()
7272

73-
IGNORE_AGENTS = [
73+
REPLACE_UNSPECIFIED = [
7474
"-unspecified",
7575
"unspecified",
7676
"Not Specified",
@@ -613,7 +613,7 @@ def process_vaccines_due(vaccines_due: Any, language: str) -> str:
613613

614614

615615
def process_received_agents(
616-
received_agents: Any, ignore_agents: List[str]
616+
received_agents: Any, replace_unspecified: List[str]
617617
) -> List[Dict[str, Any]]:
618618
"""Extract and normalize vaccination history from received_agents string."""
619619
if not isinstance(received_agents, str) or not received_agents.strip():
@@ -626,7 +626,7 @@ def process_received_agents(
626626
for match in matches:
627627
date_str, vaccine = match.split(" - ", maxsplit=1)
628628
vaccine = vaccine.strip()
629-
if vaccine in ignore_agents:
629+
if vaccine in replace_unspecified:
630630
continue
631631
date_iso = convert_date_iso(date_str.strip())
632632
rows.append({"date_given": date_iso, "vaccine": vaccine})
@@ -716,7 +716,7 @@ def build_preprocess_result(
716716
df: pd.DataFrame,
717717
language: str,
718718
vaccine_reference: Dict[str, Any],
719-
ignore_agents: List[str],
719+
replace_unspecified: List[str],
720720
) -> PreprocessResult:
721721
"""Process and normalize client data into structured artifact.
722722
@@ -788,7 +788,7 @@ def build_preprocess_result(
788788
vaccines_due_list = [
789789
item.strip() for item in vaccines_due.split(",") if item.strip()
790790
]
791-
received_grouped = process_received_agents(row.IMMS_GIVEN, ignore_agents) # type: ignore[attr-defined]
791+
received_grouped = process_received_agents(row.IMMS_GIVEN, replace_unspecified) # type: ignore[attr-defined]
792792
received = enrich_grouped_records(
793793
received_grouped, vaccine_reference, language, chart_diseases_header
794794
)

templates/en_template.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
5050
#v(0.2cm)
5151
52-
#conf.header_info_cim("__LOGO_PATH__", black, 16pt, "Request for Immunization Record")
52+
#conf.header_info_cim("__LOGO_PATH__", 6cm, black, 16pt, "Request for Immunization Record")
5353
5454
#v(0.2cm)
5555

templates/fr_template.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
5151
#v(0.2cm)
5252
53-
#conf.header_info_cim("__LOGO_PATH__", black, 16pt, "Demande de dossier d'immunisation")
53+
#conf.header_info_cim("__LOGO_PATH__", 6cm, black, 16pt, "Demande de dossier d'immunisation")
5454
5555
#v(0.2cm)
5656

tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ def default_config(tmp_output_structure: Dict[str, Path]) -> Dict[str, Any]:
148148
"Mumps",
149149
"Rubella",
150150
],
151-
"ignore_agents": [],
151+
"replace_unspecified": [],
152152
"typst": {
153153
"bin": "typst",
154154
},

tests/fixtures/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ def default_config(tmp_output_structure: Dict[str, Path]) -> Dict[str, Any]:
144144
"Mumps",
145145
"Rubella",
146146
],
147-
"ignore_agents": [],
147+
"replace_unspecified": [],
148148
}
149149

150150

tests/integration/test_config_driven_behavior.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,15 +106,15 @@ def test_chart_diseases_header_configuration(
106106
assert isinstance(default_config["chart_diseases_header"], list)
107107
assert len(default_config["chart_diseases_header"]) > 0
108108

109-
def test_ignore_agents_configuration(self, default_config: Dict[str, Any]) -> None:
110-
"""Verify ignore_agents list is configurable.
109+
def test_replace_unspecified_configuration(self, default_config: Dict[str, Any]) -> None:
110+
"""Verify replace_unspecified list is configurable.
111111
112112
Real-world significance:
113113
- Some agents (staff) should not receive notices
114114
- Config allows filtering out specific agent types
115115
"""
116-
assert "ignore_agents" in default_config
117-
assert isinstance(default_config["ignore_agents"], list)
116+
assert "replace_unspecified" in default_config
117+
assert isinstance(default_config["replace_unspecified"], list)
118118

119119

120120
@pytest.mark.integration

tests/unit/test_preprocess.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -483,7 +483,7 @@ def test_build_result_generates_clients_with_sequences(
483483
normalized,
484484
language="en",
485485
vaccine_reference=default_vaccine_reference,
486-
ignore_agents=[],
486+
replace_unspecified=[],
487487
)
488488

489489
assert len(result.clients) == 3
@@ -508,14 +508,14 @@ def test_build_result_sorts_clients_deterministically(
508508
normalized,
509509
language="en",
510510
vaccine_reference=default_vaccine_reference,
511-
ignore_agents=[],
511+
replace_unspecified=[],
512512
)
513513

514514
result2 = preprocess.build_preprocess_result(
515515
normalized,
516516
language="en",
517517
vaccine_reference=default_vaccine_reference,
518-
ignore_agents=[],
518+
replace_unspecified=[],
519519
)
520520

521521
ids1 = [c.client_id for c in result1.clients]
@@ -569,7 +569,7 @@ def test_build_result_sorts_by_school_then_name(
569569
normalized,
570570
language="en",
571571
vaccine_reference=default_vaccine_reference,
572-
ignore_agents=[],
572+
replace_unspecified=[],
573573
)
574574

575575
# Expected order: Apple/Chloe/Jones, Apple/Diana/Jones, Zebra/Alice/Smith, Zebra/Bob/Smith
@@ -595,7 +595,7 @@ def test_build_result_maps_vaccines_correctly(
595595
normalized,
596596
language="en",
597597
vaccine_reference=default_vaccine_reference,
598-
ignore_agents=[],
598+
replace_unspecified=[],
599599
)
600600

601601
# Should have DTaP expanded to component diseases
@@ -637,7 +637,7 @@ def test_build_result_handles_missing_board_name_with_warning(
637637
normalized,
638638
language="en",
639639
vaccine_reference=default_vaccine_reference,
640-
ignore_agents=[],
640+
replace_unspecified=[],
641641
)
642642

643643
# Should still process - at least one client
@@ -660,16 +660,16 @@ def test_build_result_french_language_support(
660660
normalized,
661661
language="fr",
662662
vaccine_reference=default_vaccine_reference,
663-
ignore_agents=[],
663+
replace_unspecified=[],
664664
)
665665

666666
assert len(result.clients) == 1
667667
assert result.clients[0].language == "fr"
668668

669-
def test_build_result_handles_ignore_agents(
669+
def test_build_result_handles_replace_unspecified(
670670
self, default_vaccine_reference
671671
) -> None:
672-
"""Verify ignore_agents filters out unspecified vaccines.
672+
"""Verify replace_unspecified filters out unspecified vaccines.
673673
674674
Real-world significance:
675675
- Input may contain "Not Specified" vaccine agents
@@ -682,7 +682,7 @@ def test_build_result_handles_ignore_agents(
682682
normalized,
683683
language="en",
684684
vaccine_reference=default_vaccine_reference,
685-
ignore_agents=["Not Specified", "unspecified"],
685+
replace_unspecified=["Not Specified", "unspecified"],
686686
)
687687

688688
assert len(result.clients) == 1
@@ -708,7 +708,7 @@ def test_build_result_detects_duplicate_client_ids(
708708
normalized,
709709
language="en",
710710
vaccine_reference=default_vaccine_reference,
711-
ignore_agents=[],
711+
replace_unspecified=[],
712712
)
713713

714714
# Should have 2 clients (no deduplication)
@@ -744,7 +744,7 @@ def test_build_result_detects_multiple_duplicate_client_ids(
744744
normalized,
745745
language="en",
746746
vaccine_reference=default_vaccine_reference,
747-
ignore_agents=[],
747+
replace_unspecified=[],
748748
)
749749

750750
# Should have 5 clients (no deduplication)
@@ -776,7 +776,7 @@ def test_build_result_no_warning_for_unique_client_ids(
776776
normalized,
777777
language="en",
778778
vaccine_reference=default_vaccine_reference,
779-
ignore_agents=[],
779+
replace_unspecified=[],
780780
)
781781

782782
# Should have 3 unique clients

0 commit comments

Comments
 (0)