diff --git a/cdk/constructs/DpsStacItemGenerator/runtime/pyproject.toml b/cdk/constructs/DpsStacItemGenerator/runtime/pyproject.toml index f8d953d..fedcdac 100644 --- a/cdk/constructs/DpsStacItemGenerator/runtime/pyproject.toml +++ b/cdk/constructs/DpsStacItemGenerator/runtime/pyproject.toml @@ -11,6 +11,7 @@ dependencies = [ "pydantic>=2.11.0", "pystac[validation]>=1.13.0", "stac-pydantic>=3.2.0", + "python-slugify==8.0.4", ] [dependency-groups] diff --git a/cdk/constructs/DpsStacItemGenerator/runtime/src/dps_stac_item_generator/item.py b/cdk/constructs/DpsStacItemGenerator/runtime/src/dps_stac_item_generator/item.py index a16a8de..24ffba4 100644 --- a/cdk/constructs/DpsStacItemGenerator/runtime/src/dps_stac_item_generator/item.py +++ b/cdk/constructs/DpsStacItemGenerator/runtime/src/dps_stac_item_generator/item.py @@ -10,6 +10,7 @@ from pystac import Link from pystac.stac_io import DefaultStacIO, StacIO from stac_pydantic.item import Item +from slugify import slugify logger = logging.getLogger() logger.setLevel(logging.INFO) @@ -94,7 +95,7 @@ def get_stac_items(catalog_json_key: str) -> Generator[Item, Any, Any]: f"could not locate the .met.json file with the DPS job outputs in {job_output_prefix}" ) - collection_id = COLLECTION_ID_FORMAT.format(**job_metadata) + collection_id = slugify(COLLECTION_ID_FORMAT.format(**job_metadata), regex_pattern=r'[/\?#%& ]+') catalog = pystac.Catalog.from_file(catalog_json_key) catalog.make_all_asset_hrefs_absolute() diff --git a/cdk/constructs/DpsStacItemGenerator/runtime/tests/test_item.py b/cdk/constructs/DpsStacItemGenerator/runtime/tests/test_item.py index 9e61cc6..77e505a 100644 --- a/cdk/constructs/DpsStacItemGenerator/runtime/tests/test_item.py +++ b/cdk/constructs/DpsStacItemGenerator/runtime/tests/test_item.py @@ -300,3 +300,25 @@ def test_get_stac_items_invalid_catalog_json(self, mock_job_metadata): Exception, match="Failed to parse catalog.json: invalid format" ): list(get_stac_items(catalog_s3_key)) + + def test_santitize_collection_id(self, mock_catalog, mock_job_metadata): + """Test that collection ID is sanitized correctly.""" + catalog_s3_key = "s3://test-bucket/2023/01/15/10/30/45/123456/catalog.json" + mock_job_metadata["username"] = "user/name" + mock_job_metadata["algorithm_name"] = "algo?name" + expected_collection_id = "user-name__algo-name__0.1__test" + + with ( + patch( + "dps_stac_item_generator.item.pystac.Catalog.from_file", + return_value=mock_catalog, + ), + patch( + "dps_stac_item_generator.item.load_met_json", + return_value=mock_job_metadata, + ), + ): + items = list(get_stac_items(catalog_s3_key)) + + for item in items: + assert item.collection == expected_collection_id diff --git a/cdk/constructs/DpsStacItemGenerator/runtime/uv.lock b/cdk/constructs/DpsStacItemGenerator/runtime/uv.lock index 85170db..b8dfd5b 100644 --- a/cdk/constructs/DpsStacItemGenerator/runtime/uv.lock +++ b/cdk/constructs/DpsStacItemGenerator/runtime/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.12" [[package]] @@ -72,6 +72,7 @@ dependencies = [ { name = "obstore" }, { name = "pydantic" }, { name = "pystac", extra = ["validation"] }, + { name = "python-slugify" }, { name = "stac-pydantic" }, ] @@ -87,6 +88,7 @@ requires-dist = [ { name = "obstore", specifier = ">=0.7.0" }, { name = "pydantic", specifier = ">=2.11.0" }, { name = "pystac", extras = ["validation"], specifier = ">=1.13.0" }, + { name = "python-slugify", specifier = "==8.0.4" }, { name = "stac-pydantic", specifier = ">=3.2.0" }, ] @@ -402,6 +404,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" }, ] +[[package]] +name = "python-slugify" +version = "8.0.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "text-unidecode" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/87/c7/5e1547c44e31da50a460df93af11a535ace568ef89d7a811069ead340c4a/python-slugify-8.0.4.tar.gz", hash = "sha256:59202371d1d05b54a9e7720c5e038f928f45daaffe41dd10822f3907b937c856", size = 10921, upload-time = "2024-02-08T18:32:45.488Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/62/02da182e544a51a5c3ccf4b03ab79df279f9c60c5e82d5e8bec7ca26ac11/python_slugify-8.0.4-py2.py3-none-any.whl", hash = "sha256:276540b79961052b66b7d116620b36518847f52d5fd9e3a70164fc8c50faa6b8", size = 10051, upload-time = "2024-02-08T18:32:43.911Z" }, +] + [[package]] name = "referencing" version = "0.36.2" @@ -529,6 +543,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/27/c7/8ee49430a0a745559dab4205ad9f946a264e793061fe5d1456a4b7cd2f27/stac_pydantic-3.4.0-py3-none-any.whl", hash = "sha256:ce2e7b377db078abbb164f378e18d54b53cda2953e44b643cdfa3adc831ca1c8", size = 24851, upload-time = "2025-07-17T11:17:27.966Z" }, ] +[[package]] +name = "text-unidecode" +version = "1.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ab/e2/e9a00f0ccb71718418230718b3d900e71a5d16e701a3dae079a21e9cd8f8/text-unidecode-1.3.tar.gz", hash = "sha256:bad6603bb14d279193107714b288be206cac565dfa49aa5b105294dd5c4aab93", size = 76885, upload-time = "2019-08-30T21:36:45.405Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a6/a5/c0b6468d3824fe3fde30dbb5e1f687b291608f9473681bbf7dabbf5a87d7/text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8", size = 78154, upload-time = "2019-08-30T21:37:03.543Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0"