From 59b36b87a118bd07c4e093f0785947a7710bcf27 Mon Sep 17 00:00:00 2001 From: Andrzej Pijanowski Date: Mon, 1 Dec 2025 13:10:53 +0100 Subject: [PATCH 1/4] feat: add support for custom Elasticsearch mappings and dynamic mapping configuration --- README.md | 88 ++++++ .../stac_fastapi/sfeos_helpers/mappings.py | 116 +++++++- stac_fastapi/tests/sfeos_helpers/__init__.py | 1 + .../tests/sfeos_helpers/test_mappings.py | 265 ++++++++++++++++++ 4 files changed, 468 insertions(+), 2 deletions(-) create mode 100644 stac_fastapi/tests/sfeos_helpers/__init__.py create mode 100644 stac_fastapi/tests/sfeos_helpers/test_mappings.py diff --git a/README.md b/README.md index f2a7f498..583e5cc9 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,7 @@ This project is built on the following technologies: STAC, stac-fastapi, FastAPI - [Ingesting Sample Data CLI Tool](#ingesting-sample-data-cli-tool) - [Redis for navigation](#redis-for-navigation) - [Elasticsearch Mappings](#elasticsearch-mappings) + - [Custom Index Mappings](#custom-index-mappings) - [Managing Elasticsearch Indices](#managing-elasticsearch-indices) - [Snapshots](#snapshots) - [Reindexing](#reindexing) @@ -369,6 +370,8 @@ You can customize additional settings in your `.env` file: | `USE_DATETIME_NANOS` | Enables nanosecond precision handling for `datetime` field searches as per the `date_nanos` type. When `False`, it uses 3 millisecond precision as per the type `date`. | `true` | Optional | | `EXCLUDED_FROM_QUERYABLES` | Comma-separated list of fully qualified field names to exclude from the queryables endpoint and filtering. Use full paths like `properties.auth:schemes,properties.storage:schemes`. Excluded fields and their nested children will not be exposed in queryables. | None | Optional | | `EXCLUDED_FROM_ITEMS` | Specifies fields to exclude from STAC item responses. Supports comma-separated field names and dot notation for nested fields (e.g., `private_data,properties.confidential,assets.internal`). | `None` | Optional | +| `STAC_FASTAPI_ES_CUSTOM_MAPPINGS` | JSON string of custom Elasticsearch/OpenSearch property mappings to merge with defaults. See [Custom Index Mappings](#custom-index-mappings). | `None` | Optional | +| `STAC_FASTAPI_ES_DYNAMIC_MAPPING` | Controls dynamic mapping behavior for item indices. Values: `true` (default), `false`, or `strict`. See [Custom Index Mappings](#custom-index-mappings). | `true` | Optional | > [!NOTE] @@ -693,6 +696,91 @@ pip install stac-fastapi-elasticsearch[redis] - The `sfeos_helpers` package contains shared mapping definitions used by both Elasticsearch and OpenSearch backends - **Customization**: Custom mappings can be defined by extending the base mapping templates. +## Custom Index Mappings + +SFEOS provides environment variables to customize Elasticsearch/OpenSearch index mappings without modifying source code. This is useful for: + +- Adding STAC extension fields (SAR, Cube, etc.) with proper types +- Optimizing performance by controlling which fields are indexed +- Ensuring correct field types instead of relying on dynamic mapping inference + +### Environment Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| `STAC_FASTAPI_ES_CUSTOM_MAPPINGS` | JSON string of property mappings to merge with defaults | None | +| `STAC_FASTAPI_ES_DYNAMIC_MAPPING` | Controls dynamic mapping: `true`, `false`, or `strict` | `true` | + +### Custom Mappings (`STAC_FASTAPI_ES_CUSTOM_MAPPINGS`) + +Accepts a JSON string representing a properties dictionary that will be merged into the default item mappings. Custom mappings will overwrite defaults if keys collide. + +**Example - Adding SAR Extension Fields:** + +```bash +export STAC_FASTAPI_ES_CUSTOM_MAPPINGS='{ + "properties": { + "properties": { + "sar:frequency_band": {"type": "keyword"}, + "sar:center_frequency": {"type": "float"}, + "sar:polarizations": {"type": "keyword"}, + "sar:product_type": {"type": "keyword"} + } + } +}' +``` + +**Example - Adding Cube Extension Fields:** + +```bash +export STAC_FASTAPI_ES_CUSTOM_MAPPINGS='{ + "properties": { + "properties": { + "cube:dimensions": {"type": "object", "enabled": false}, + "cube:variables": {"type": "object", "enabled": false} + } + } +}' +``` + +### Dynamic Mapping Control (`STAC_FASTAPI_ES_DYNAMIC_MAPPING`) + +Controls how Elasticsearch/OpenSearch handles fields not defined in the mapping: + +| Value | Behavior | +|-------|----------| +| `true` (default) | New fields are automatically added to the mapping. Maintains backward compatibility. | +| `false` | New fields are ignored and not indexed. Documents can still contain these fields, but they won't be searchable. | +| `strict` | Documents with unmapped fields are rejected. | + +### Combining Both Variables for Performance Optimization + +For large datasets with extensive metadata that isn't queried, you can disable dynamic mapping and define only the fields you need: + +```bash +# Disable dynamic mapping +export STAC_FASTAPI_ES_DYNAMIC_MAPPING=false + +# Define only queryable fields +export STAC_FASTAPI_ES_CUSTOM_MAPPINGS='{ + "properties": { + "properties": { + "platform": {"type": "keyword"}, + "eo:cloud_cover": {"type": "float"}, + "view:sun_elevation": {"type": "float"} + } + } +}' +``` + +This prevents Elasticsearch from creating mappings for unused metadata fields, reducing index size and improving ingestion performance. + +> [!NOTE] +> These environment variables apply to both Elasticsearch and OpenSearch backends. Changes only affect newly created indices. For existing indices, you'll need to reindex using [SFEOS-tools](https://github.com/Healy-Hyperspatial/sfeos-tools). + +> [!WARNING] +> Use caution when overriding core fields like `geometry`, `datetime`, or `id`. Incorrect types may cause search failures or data loss. + ## Managing Elasticsearch Indices ### Snapshots diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py index 129194da..d5da5f85 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py @@ -25,11 +25,119 @@ - Parameter names should be consistent across similar functions """ +import copy +import json +import logging import os -from typing import Any, Dict, Literal, Protocol +from typing import Any, Dict, Literal, Optional, Protocol, Union from stac_fastapi.core.utilities import get_bool_env +logger = logging.getLogger(__name__) + + +def merge_mappings(base: Dict[str, Any], custom: Dict[str, Any]) -> None: + """Recursively merge custom mappings into base mappings. + + Custom mappings will overwrite base mappings if keys collide. + Nested dictionaries are merged recursively. + + Args: + base: The base mapping dictionary to merge into (modified in place). + custom: The custom mapping dictionary to merge from. + """ + for key, value in custom.items(): + if key in base and isinstance(base[key], dict) and isinstance(value, dict): + merge_mappings(base[key], value) + else: + base[key] = value + + +def parse_dynamic_mapping_config( + config_value: Optional[str], +) -> Union[bool, str]: + """Parse the dynamic mapping configuration value. + + Args: + config_value: The configuration value from environment variable. + Can be "true", "false", "strict", or None. + + Returns: + True for "true" (default), False for "false", or the string value + for other settings like "strict". + """ + if config_value is None: + return True + config_lower = config_value.lower() + if config_lower == "true": + return True + elif config_lower == "false": + return False + else: + return config_lower + + +def apply_custom_mappings( + mappings: Dict[str, Any], custom_mappings_json: Optional[str] +) -> None: + """Apply custom mappings from a JSON string to the mappings dictionary. + + Args: + mappings: The mappings dictionary to modify (modified in place). + custom_mappings_json: JSON string containing custom property mappings. + + Raises: + Logs error if JSON parsing or merging fails. + """ + if not custom_mappings_json: + return + + try: + custom_mappings = json.loads(custom_mappings_json) + merge_mappings(mappings["properties"], custom_mappings) + except json.JSONDecodeError as e: + logger.error(f"Failed to parse STAC_FASTAPI_ES_CUSTOM_MAPPINGS JSON: {e}") + except Exception as e: + logger.error(f"Failed to merge STAC_FASTAPI_ES_CUSTOM_MAPPINGS: {e}") + + +def get_items_mappings( + dynamic_mapping: Optional[str] = None, custom_mappings: Optional[str] = None +) -> Dict[str, Any]: + """Get the ES_ITEMS_MAPPINGS with optional dynamic mapping and custom mappings applied. + + This function creates a fresh copy of the base mappings and applies the + specified configuration. Useful for testing or programmatic configuration. + + Args: + dynamic_mapping: Override for STAC_FASTAPI_ES_DYNAMIC_MAPPING. + If None, reads from environment variable. + custom_mappings: Override for STAC_FASTAPI_ES_CUSTOM_MAPPINGS. + If None, reads from environment variable. + + Returns: + A new dictionary containing the configured mappings. + """ + mappings = copy.deepcopy(_BASE_ITEMS_MAPPINGS) + + # Apply dynamic mapping configuration + dynamic_config = ( + dynamic_mapping + if dynamic_mapping is not None + else os.getenv("STAC_FASTAPI_ES_DYNAMIC_MAPPING", "true") + ) + mappings["dynamic"] = parse_dynamic_mapping_config(dynamic_config) + + # Apply custom mappings + custom_config = ( + custom_mappings + if custom_mappings is not None + else os.getenv("STAC_FASTAPI_ES_CUSTOM_MAPPINGS") + ) + apply_custom_mappings(mappings, custom_config) + + return mappings + # stac_pydantic classes extend _GeometryBase, which doesn't have a type field, # So create our own Protocol for typing @@ -129,7 +237,8 @@ class Geometry(Protocol): # noqa }, ] -ES_ITEMS_MAPPINGS = { +# Base items mappings without dynamic configuration applied +_BASE_ITEMS_MAPPINGS = { "numeric_detection": False, "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, "properties": { @@ -155,6 +264,9 @@ class Geometry(Protocol): # noqa }, } +# ES_ITEMS_MAPPINGS with environment-based configuration applied at module load time +ES_ITEMS_MAPPINGS = get_items_mappings() + ES_COLLECTIONS_MAPPINGS = { "numeric_detection": False, "dynamic_templates": ES_MAPPINGS_DYNAMIC_TEMPLATES, diff --git a/stac_fastapi/tests/sfeos_helpers/__init__.py b/stac_fastapi/tests/sfeos_helpers/__init__.py new file mode 100644 index 00000000..7df7bb4e --- /dev/null +++ b/stac_fastapi/tests/sfeos_helpers/__init__.py @@ -0,0 +1 @@ +"""Tests for sfeos_helpers module.""" diff --git a/stac_fastapi/tests/sfeos_helpers/test_mappings.py b/stac_fastapi/tests/sfeos_helpers/test_mappings.py new file mode 100644 index 00000000..6ead3aa8 --- /dev/null +++ b/stac_fastapi/tests/sfeos_helpers/test_mappings.py @@ -0,0 +1,265 @@ +"""Tests for custom mappings configuration. + +These tests verify the STAC_FASTAPI_ES_CUSTOM_MAPPINGS and +STAC_FASTAPI_ES_DYNAMIC_MAPPING environment variable functionality. +""" + +import json + +import pytest + +from stac_fastapi.sfeos_helpers.mappings import ( + apply_custom_mappings, + get_items_mappings, + merge_mappings, + parse_dynamic_mapping_config, +) + + +class TestMergeMappings: + """Tests for the merge_mappings function.""" + + def test_recursive_merge_preserves_existing_and_adds_new(self): + """Test recursive merging preserves existing keys and adds new ones at all levels.""" + base = { + "properties": { + "properties": { + "datetime": {"type": "date_nanos"}, + "created": {"type": "date"}, + } + } + } + custom = {"properties": {"properties": {"custom_field": {"type": "keyword"}}}} + merge_mappings(base, custom) + + # Existing fields preserved + assert base["properties"]["properties"]["datetime"] == {"type": "date_nanos"} + assert base["properties"]["properties"]["created"] == {"type": "date"} + # New field added + assert base["properties"]["properties"]["custom_field"] == {"type": "keyword"} + + def test_custom_overwrites_on_key_collision(self): + """Test that custom values overwrite base values when keys collide.""" + base = {"level1": {"a": {"type": "date_nanos"}}} + custom = {"level1": {"a": {"type": "date"}}} + merge_mappings(base, custom) + assert base["level1"]["a"] == {"type": "date"} + + @pytest.mark.parametrize( + "base,custom,expected", + [ + # Dict replaces non-dict + ({"a": "string"}, {"a": {"nested": "dict"}}, {"a": {"nested": "dict"}}), + # Non-dict replaces dict + ({"a": {"nested": "dict"}}, {"a": "string"}, {"a": "string"}), + ], + ids=["dict_replaces_non_dict", "non_dict_replaces_dict"], + ) + def test_type_replacement(self, base, custom, expected): + """Test that values are replaced when types don't match for merging.""" + merge_mappings(base, custom) + assert base == expected + + +class TestParseDynamicMappingConfig: + """Tests for the parse_dynamic_mapping_config function.""" + + @pytest.mark.parametrize( + "input_value,expected", + [ + (None, True), + ("true", True), + ("TRUE", True), + ("True", True), + ("false", False), + ("FALSE", False), + ("False", False), + ("strict", "strict"), + ("STRICT", "strict"), + ("runtime", "runtime"), + ], + ids=[ + "none_defaults_true", + "true_lowercase", + "true_uppercase", + "true_mixed", + "false_lowercase", + "false_uppercase", + "false_mixed", + "strict_lowercase", + "strict_uppercase", + "other_value", + ], + ) + def test_parse_dynamic_mapping_config(self, input_value, expected): + """Test dynamic mapping config parsing for various inputs.""" + assert parse_dynamic_mapping_config(input_value) == expected + + +class TestApplyCustomMappings: + """Tests for the apply_custom_mappings function.""" + + @pytest.mark.parametrize( + "custom_json", + [None, ""], + ids=["none", "empty_string"], + ) + def test_no_op_for_empty_input(self, custom_json): + """Test that None or empty string leaves mappings unchanged.""" + mappings = {"properties": {"id": {"type": "keyword"}}} + original = {"properties": {"id": {"type": "keyword"}}} + apply_custom_mappings(mappings, custom_json) + assert mappings == original + + def test_merges_valid_json(self): + """Test that valid JSON custom mappings are merged into properties.""" + mappings = { + "properties": { + "properties": {"properties": {"datetime": {"type": "date_nanos"}}} + } + } + custom_json = json.dumps( + {"properties": {"properties": {"sar:frequency_band": {"type": "keyword"}}}} + ) + apply_custom_mappings(mappings, custom_json) + + assert mappings["properties"]["properties"]["properties"]["datetime"] == { + "type": "date_nanos" + } + assert mappings["properties"]["properties"]["properties"][ + "sar:frequency_band" + ] == {"type": "keyword"} + + def test_invalid_json_logs_error_and_preserves_mappings(self, caplog): + """Test that invalid JSON logs an error and doesn't modify mappings.""" + mappings = {"properties": {"id": {"type": "keyword"}}} + original = {"properties": {"id": {"type": "keyword"}}} + apply_custom_mappings(mappings, "not valid json") + assert mappings == original + assert "Failed to parse STAC_FASTAPI_ES_CUSTOM_MAPPINGS JSON" in caplog.text + + +class TestGetItemsMappings: + """Tests for the get_items_mappings function.""" + + @pytest.mark.parametrize( + "dynamic_mapping,expected", + [ + ("true", True), + ("false", False), + ("strict", "strict"), + ], + ids=["dynamic_true", "dynamic_false", "dynamic_strict"], + ) + def test_dynamic_mapping_values(self, dynamic_mapping, expected): + """Test dynamic mapping configuration with various values.""" + mappings = get_items_mappings(dynamic_mapping=dynamic_mapping) + assert mappings["dynamic"] == expected + + def test_custom_mappings_merged_preserving_defaults(self): + """Test that custom mappings are merged while preserving default fields.""" + custom = json.dumps( + {"properties": {"properties": {"custom:field": {"type": "keyword"}}}} + ) + mappings = get_items_mappings(custom_mappings=custom) + + # Custom field added + assert mappings["properties"]["properties"]["properties"]["custom:field"] == { + "type": "keyword" + } + # Default fields preserved + assert mappings["properties"]["id"] == {"type": "keyword"} + assert mappings["properties"]["geometry"] == {"type": "geo_shape"} + assert mappings["properties"]["properties"]["properties"]["datetime"] == { + "type": "date_nanos" + } + + def test_custom_can_override_defaults(self): + """Test that custom mappings can override default field types.""" + custom = json.dumps( + {"properties": {"properties": {"datetime": {"type": "date"}}}} + ) + mappings = get_items_mappings(custom_mappings=custom) + assert mappings["properties"]["properties"]["properties"]["datetime"] == { + "type": "date" + } + + def test_returns_independent_copies(self): + """Test that each call returns a new independent copy of mappings.""" + mappings1 = get_items_mappings() + mappings2 = get_items_mappings() + mappings1["properties"]["test"] = "value" + assert "test" not in mappings2["properties"] + + def test_has_required_base_structure(self): + """Test that returned mappings have required base structure.""" + mappings = get_items_mappings() + assert "numeric_detection" in mappings + assert "dynamic_templates" in mappings + assert all( + key in mappings["properties"] for key in ["id", "collection", "geometry"] + ) + + +class TestSTACExtensionUseCases: + """Integration tests for real-world STAC extension use cases.""" + + @pytest.mark.parametrize( + "extension_name,custom_fields", + [ + ( + "sar", + { + "properties": { + "properties": { + "sar:frequency_band": {"type": "keyword"}, + "sar:center_frequency": {"type": "float"}, + "sar:polarizations": {"type": "keyword"}, + } + } + }, + ), + ( + "cube", + { + "properties": { + "properties": { + "cube:dimensions": {"type": "object", "enabled": False}, + "cube:variables": {"type": "object", "enabled": False}, + } + } + }, + ), + ], + ids=["sar_extension", "cube_extension"], + ) + def test_add_extension_fields(self, extension_name, custom_fields): + """Test adding STAC extension fields via custom mappings.""" + mappings = get_items_mappings(custom_mappings=json.dumps(custom_fields)) + + props = mappings["properties"]["properties"]["properties"] + for field_name, field_config in custom_fields["properties"][ + "properties" + ].items(): + assert props[field_name] == field_config + # Default fields still present + assert props["datetime"] == {"type": "date_nanos"} + + def test_performance_optimization_with_disabled_dynamic_mapping(self): + """Test disabling dynamic mapping with selective field indexing.""" + query_fields = { + "properties": { + "properties": { + "platform": {"type": "keyword"}, + "eo:cloud_cover": {"type": "float"}, + } + } + } + mappings = get_items_mappings( + dynamic_mapping="false", custom_mappings=json.dumps(query_fields) + ) + + assert mappings["dynamic"] is False + props = mappings["properties"]["properties"]["properties"] + assert props["platform"] == {"type": "keyword"} + assert props["eo:cloud_cover"] == {"type": "float"} From ca0002bb9a3e369fc09b3b9845bdd6d9a25a4109 Mon Sep 17 00:00:00 2001 From: Andrzej Pijanowski Date: Tue, 2 Dec 2025 10:31:24 +0100 Subject: [PATCH 2/4] feat: enhance custom mappings support with detailed merge behavior and examples --- README.md | 85 ++++++++++++++++--- .../stac_fastapi/sfeos_helpers/mappings.py | 8 +- .../tests/sfeos_helpers/test_mappings.py | 76 +++++++++++++---- 3 files changed, 142 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index 583e5cc9..6a61ceda 100644 --- a/README.md +++ b/README.md @@ -713,7 +713,56 @@ SFEOS provides environment variables to customize Elasticsearch/OpenSearch index ### Custom Mappings (`STAC_FASTAPI_ES_CUSTOM_MAPPINGS`) -Accepts a JSON string representing a properties dictionary that will be merged into the default item mappings. Custom mappings will overwrite defaults if keys collide. +Accepts a JSON string with the same structure as the default ES mappings. The custom mappings are **recursively merged** with the defaults at the root level. + +#### Merge Behavior + +The merge follows these rules: + +| Scenario | Result | +|----------|--------| +| Key only in defaults | Preserved | +| Key only in custom | Added | +| Key in both, both are dicts | Recursively merged | +| Key in both, values are not both dicts | **Custom overwrites default** | + +**Example - Adding new properties (merged):** + +```json +// Default has: {"geometry": {"type": "geo_shape"}} +// Custom has: {"geometry": {"ignore_malformed": true}} +// Result: {"geometry": {"type": "geo_shape", "ignore_malformed": true}} +``` + +**Example - Overriding a value (replaced):** + +```json +// Default has: {"properties": {"datetime": {"type": "date_nanos"}}} +// Custom has: {"properties": {"datetime": {"type": "date"}}} +// Result: {"properties": {"datetime": {"type": "date"}}} +``` + +#### JSON Structure + +The custom JSON should mirror the structure of the default mappings. For STAC item properties, the path is `properties.properties.properties`: + +``` +{ + "numeric_detection": false, + "dynamic_templates": [...], + "properties": { # Top-level ES mapping properties + "id": {...}, + "geometry": {...}, + "properties": { # STAC item "properties" field + "type": "object", + "properties": { # Nested properties within STAC properties + "datetime": {...}, + "sar:frequency_band": {...} # <-- Custom extension fields go here + } + } + } +} +``` **Example - Adding SAR Extension Fields:** @@ -721,10 +770,12 @@ Accepts a JSON string representing a properties dictionary that will be merged i export STAC_FASTAPI_ES_CUSTOM_MAPPINGS='{ "properties": { "properties": { - "sar:frequency_band": {"type": "keyword"}, - "sar:center_frequency": {"type": "float"}, - "sar:polarizations": {"type": "keyword"}, - "sar:product_type": {"type": "keyword"} + "properties": { + "sar:frequency_band": {"type": "keyword"}, + "sar:center_frequency": {"type": "float"}, + "sar:polarizations": {"type": "keyword"}, + "sar:product_type": {"type": "keyword"} + } } } }' @@ -736,13 +787,25 @@ export STAC_FASTAPI_ES_CUSTOM_MAPPINGS='{ export STAC_FASTAPI_ES_CUSTOM_MAPPINGS='{ "properties": { "properties": { - "cube:dimensions": {"type": "object", "enabled": false}, - "cube:variables": {"type": "object", "enabled": false} + "properties": { + "cube:dimensions": {"type": "object", "enabled": false}, + "cube:variables": {"type": "object", "enabled": false} + } } } }' ``` +**Example - Adding geometry options:** + +```bash +export STAC_FASTAPI_ES_CUSTOM_MAPPINGS='{ + "properties": { + "geometry": {"ignore_malformed": true} + } +}' +``` + ### Dynamic Mapping Control (`STAC_FASTAPI_ES_DYNAMIC_MAPPING`) Controls how Elasticsearch/OpenSearch handles fields not defined in the mapping: @@ -765,9 +828,11 @@ export STAC_FASTAPI_ES_DYNAMIC_MAPPING=false export STAC_FASTAPI_ES_CUSTOM_MAPPINGS='{ "properties": { "properties": { - "platform": {"type": "keyword"}, - "eo:cloud_cover": {"type": "float"}, - "view:sun_elevation": {"type": "float"} + "properties": { + "platform": {"type": "keyword"}, + "eo:cloud_cover": {"type": "float"}, + "view:sun_elevation": {"type": "float"} + } } } }' diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py index d5da5f85..e5d16e05 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py @@ -82,9 +82,13 @@ def apply_custom_mappings( ) -> None: """Apply custom mappings from a JSON string to the mappings dictionary. + The custom mappings JSON should have the same structure as ES_ITEMS_MAPPINGS. + It will be recursively merged at the root level, allowing users to override + any part of the mapping including properties, dynamic_templates, etc. + Args: mappings: The mappings dictionary to modify (modified in place). - custom_mappings_json: JSON string containing custom property mappings. + custom_mappings_json: JSON string containing custom mappings. Raises: Logs error if JSON parsing or merging fails. @@ -94,7 +98,7 @@ def apply_custom_mappings( try: custom_mappings = json.loads(custom_mappings_json) - merge_mappings(mappings["properties"], custom_mappings) + merge_mappings(mappings, custom_mappings) except json.JSONDecodeError as e: logger.error(f"Failed to parse STAC_FASTAPI_ES_CUSTOM_MAPPINGS JSON: {e}") except Exception as e: diff --git a/stac_fastapi/tests/sfeos_helpers/test_mappings.py b/stac_fastapi/tests/sfeos_helpers/test_mappings.py index 6ead3aa8..0695658c 100644 --- a/stac_fastapi/tests/sfeos_helpers/test_mappings.py +++ b/stac_fastapi/tests/sfeos_helpers/test_mappings.py @@ -45,6 +45,13 @@ def test_custom_overwrites_on_key_collision(self): merge_mappings(base, custom) assert base["level1"]["a"] == {"type": "date"} + def test_merge_adds_properties_to_existing_nested_dict(self): + """Test that merging adds new properties to existing nested dicts.""" + base = {"geometry": {"type": "geo_shape"}} + custom = {"geometry": {"ignore_malformed": True}} + merge_mappings(base, custom) + assert base == {"geometry": {"type": "geo_shape", "ignore_malformed": True}} + @pytest.mark.parametrize( "base,custom,expected", [ @@ -111,24 +118,49 @@ def test_no_op_for_empty_input(self, custom_json): apply_custom_mappings(mappings, custom_json) assert mappings == original - def test_merges_valid_json(self): - """Test that valid JSON custom mappings are merged into properties.""" + def test_merges_at_root_level(self): + """Test that custom mappings are merged at the root level.""" mappings = { + "numeric_detection": False, "properties": { - "properties": {"properties": {"datetime": {"type": "date_nanos"}}} - } + "id": {"type": "keyword"}, + "properties": {"properties": {"datetime": {"type": "date_nanos"}}}, + }, } custom_json = json.dumps( - {"properties": {"properties": {"sar:frequency_band": {"type": "keyword"}}}} + { + "properties": { + "properties": { + "properties": {"sar:frequency_band": {"type": "keyword"}} + }, + "bbox": {"type": "object", "enabled": False}, + } + } ) apply_custom_mappings(mappings, custom_json) + # Existing fields preserved + assert mappings["properties"]["id"] == {"type": "keyword"} assert mappings["properties"]["properties"]["properties"]["datetime"] == { "type": "date_nanos" } + # New fields added assert mappings["properties"]["properties"]["properties"][ "sar:frequency_band" ] == {"type": "keyword"} + assert mappings["properties"]["bbox"] == {"type": "object", "enabled": False} + + def test_can_override_dynamic_templates(self): + """Test that dynamic_templates can be overridden via custom mappings.""" + mappings = { + "dynamic_templates": [{"old": "template"}], + "properties": {"id": {"type": "keyword"}}, + } + custom_json = json.dumps({"dynamic_templates": [{"new": "template"}]}) + apply_custom_mappings(mappings, custom_json) + + assert mappings["dynamic_templates"] == [{"new": "template"}] + assert mappings["properties"]["id"] == {"type": "keyword"} def test_invalid_json_logs_error_and_preserves_mappings(self, caplog): """Test that invalid JSON logs an error and doesn't modify mappings.""" @@ -159,7 +191,11 @@ def test_dynamic_mapping_values(self, dynamic_mapping, expected): def test_custom_mappings_merged_preserving_defaults(self): """Test that custom mappings are merged while preserving default fields.""" custom = json.dumps( - {"properties": {"properties": {"custom:field": {"type": "keyword"}}}} + { + "properties": { + "properties": {"properties": {"custom:field": {"type": "keyword"}}} + } + } ) mappings = get_items_mappings(custom_mappings=custom) @@ -177,7 +213,11 @@ def test_custom_mappings_merged_preserving_defaults(self): def test_custom_can_override_defaults(self): """Test that custom mappings can override default field types.""" custom = json.dumps( - {"properties": {"properties": {"datetime": {"type": "date"}}}} + { + "properties": { + "properties": {"properties": {"datetime": {"type": "date"}}} + } + } ) mappings = get_items_mappings(custom_mappings=custom) assert mappings["properties"]["properties"]["properties"]["datetime"] == { @@ -212,9 +252,11 @@ class TestSTACExtensionUseCases: { "properties": { "properties": { - "sar:frequency_band": {"type": "keyword"}, - "sar:center_frequency": {"type": "float"}, - "sar:polarizations": {"type": "keyword"}, + "properties": { + "sar:frequency_band": {"type": "keyword"}, + "sar:center_frequency": {"type": "float"}, + "sar:polarizations": {"type": "keyword"}, + } } } }, @@ -224,8 +266,10 @@ class TestSTACExtensionUseCases: { "properties": { "properties": { - "cube:dimensions": {"type": "object", "enabled": False}, - "cube:variables": {"type": "object", "enabled": False}, + "properties": { + "cube:dimensions": {"type": "object", "enabled": False}, + "cube:variables": {"type": "object", "enabled": False}, + } } } }, @@ -238,7 +282,7 @@ def test_add_extension_fields(self, extension_name, custom_fields): mappings = get_items_mappings(custom_mappings=json.dumps(custom_fields)) props = mappings["properties"]["properties"]["properties"] - for field_name, field_config in custom_fields["properties"][ + for field_name, field_config in custom_fields["properties"]["properties"][ "properties" ].items(): assert props[field_name] == field_config @@ -250,8 +294,10 @@ def test_performance_optimization_with_disabled_dynamic_mapping(self): query_fields = { "properties": { "properties": { - "platform": {"type": "keyword"}, - "eo:cloud_cover": {"type": "float"}, + "properties": { + "platform": {"type": "keyword"}, + "eo:cloud_cover": {"type": "float"}, + } } } } From d1ae14786a725a32b4b7427ae3e76d061d45192f Mon Sep 17 00:00:00 2001 From: Andrzej Pijanowski Date: Tue, 2 Dec 2025 10:44:15 +0100 Subject: [PATCH 3/4] chore: updated CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a0d1a0c..2c2a821c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] ### Added +- Added configuration-based support for extending Elasticsearch/OpenSearch index mappings via environment variables, allowing users to customize field mappings without code change through `STAC_FASTAPI_ES_CUSTOM_MAPPINGS` environment variable. Also added `STAC_FASTAPI_ES_DYNAMIC_MAPPING` variable to control dynamic mapping behavior. [#546](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/546) ### Changed From 81ee57d95137d7013263f0e24fd51dfa896f88ad Mon Sep 17 00:00:00 2001 From: Andrzej Pijanowski Date: Mon, 8 Dec 2025 10:38:39 +0100 Subject: [PATCH 4/4] feat: add STAC_FASTAPI_ES_MAPPINGS_FILE for file-based custom mappings configuration --- CHANGELOG.md | 1 + README.md | 91 ++++++++++++++- .../stac_fastapi/sfeos_helpers/mappings.py | 12 ++ .../tests/sfeos_helpers/test_mappings_file.py | 106 ++++++++++++++++++ 4 files changed, 208 insertions(+), 2 deletions(-) create mode 100644 stac_fastapi/tests/sfeos_helpers/test_mappings_file.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 27ea61bf..69681762 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] ### Added +- Added `STAC_FASTAPI_ES_MAPPINGS_FILE` environment variable to support file-based custom mappings configuration. - Added configuration-based support for extending Elasticsearch/OpenSearch index mappings via environment variables, allowing users to customize field mappings without code change through `STAC_FASTAPI_ES_CUSTOM_MAPPINGS` environment variable. Also added `STAC_FASTAPI_ES_DYNAMIC_MAPPING` variable to control dynamic mapping behavior. [#546](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/546) ### Changed diff --git a/README.md b/README.md index 6a61ceda..14db2d2a 100644 --- a/README.md +++ b/README.md @@ -371,6 +371,7 @@ You can customize additional settings in your `.env` file: | `EXCLUDED_FROM_QUERYABLES` | Comma-separated list of fully qualified field names to exclude from the queryables endpoint and filtering. Use full paths like `properties.auth:schemes,properties.storage:schemes`. Excluded fields and their nested children will not be exposed in queryables. | None | Optional | | `EXCLUDED_FROM_ITEMS` | Specifies fields to exclude from STAC item responses. Supports comma-separated field names and dot notation for nested fields (e.g., `private_data,properties.confidential,assets.internal`). | `None` | Optional | | `STAC_FASTAPI_ES_CUSTOM_MAPPINGS` | JSON string of custom Elasticsearch/OpenSearch property mappings to merge with defaults. See [Custom Index Mappings](#custom-index-mappings). | `None` | Optional | +| `STAC_FASTAPI_ES_MAPPINGS_FILE` | Path to a JSON file containing custom Elasticsearch/OpenSearch property mappings to merge with defaults. See [Custom Index Mappings](#custom-index-mappings). | `None` | Optional | | `STAC_FASTAPI_ES_DYNAMIC_MAPPING` | Controls dynamic mapping behavior for item indices. Values: `true` (default), `false`, or `strict`. See [Custom Index Mappings](#custom-index-mappings). | `true` | Optional | @@ -709,11 +710,17 @@ SFEOS provides environment variables to customize Elasticsearch/OpenSearch index | Variable | Description | Default | |----------|-------------|---------| | `STAC_FASTAPI_ES_CUSTOM_MAPPINGS` | JSON string of property mappings to merge with defaults | None | +| `STAC_FASTAPI_ES_MAPPINGS_FILE` | Path to a JSON file containing property mappings to merge with defaults | None | | `STAC_FASTAPI_ES_DYNAMIC_MAPPING` | Controls dynamic mapping: `true`, `false`, or `strict` | `true` | -### Custom Mappings (`STAC_FASTAPI_ES_CUSTOM_MAPPINGS`) +### Custom Mappings -Accepts a JSON string with the same structure as the default ES mappings. The custom mappings are **recursively merged** with the defaults at the root level. +You can customize the Elasticsearch/OpenSearch mappings by providing a JSON configuration. This can be done via: + +1. `STAC_FASTAPI_ES_CUSTOM_MAPPINGS` environment variable (takes precedence) +2. `STAC_FASTAPI_ES_MAPPINGS_FILE` environment variable (file path) + +The configuration should have the same structure as the default ES mappings. The custom mappings are **recursively merged** with the defaults at the root level. #### Merge Behavior @@ -806,6 +813,86 @@ export STAC_FASTAPI_ES_CUSTOM_MAPPINGS='{ }' ``` +**Example - Using a mappings file (recommended for complex configurations):** + +Instead of passing large JSON blobs via environment variables, you can use a file: + +```bash +# Create a mappings file +cat > custom-mappings.json < [!TIP] +> If both `STAC_FASTAPI_ES_CUSTOM_MAPPINGS` and `STAC_FASTAPI_ES_MAPPINGS_FILE` are set, the environment variable takes precedence, allowing quick overrides during testing or troubleshooting. + ### Dynamic Mapping Control (`STAC_FASTAPI_ES_DYNAMIC_MAPPING`) Controls how Elasticsearch/OpenSearch handles fields not defined in the mapping: diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py index e5d16e05..f5a55b07 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py @@ -138,6 +138,18 @@ def get_items_mappings( if custom_mappings is not None else os.getenv("STAC_FASTAPI_ES_CUSTOM_MAPPINGS") ) + + if custom_config is None: + mappings_file = os.getenv("STAC_FASTAPI_ES_MAPPINGS_FILE") + if mappings_file: + try: + with open(mappings_file, "r") as f: + custom_config = f.read() + except Exception as e: + logger.error( + f"Failed to read STAC_FASTAPI_ES_MAPPINGS_FILE at {mappings_file}: {e}" + ) + apply_custom_mappings(mappings, custom_config) return mappings diff --git a/stac_fastapi/tests/sfeos_helpers/test_mappings_file.py b/stac_fastapi/tests/sfeos_helpers/test_mappings_file.py new file mode 100644 index 00000000..428e3938 --- /dev/null +++ b/stac_fastapi/tests/sfeos_helpers/test_mappings_file.py @@ -0,0 +1,106 @@ +import json + +from stac_fastapi.sfeos_helpers.mappings import get_items_mappings + + +class TestMappingsFile: + def test_mappings_file_applied(self, monkeypatch, tmp_path): + """Test that mappings are read from file when env var is set.""" + custom_mappings = { + "properties": {"properties": {"file_field": {"type": "keyword"}}} + } + mappings_file = tmp_path / "mappings.json" + mappings_file.write_text(json.dumps(custom_mappings)) + + monkeypatch.setenv("STAC_FASTAPI_ES_MAPPINGS_FILE", str(mappings_file)) + monkeypatch.delenv("STAC_FASTAPI_ES_CUSTOM_MAPPINGS", raising=False) + + mappings = get_items_mappings() + + assert mappings["properties"]["properties"]["file_field"] == {"type": "keyword"} + + def test_env_var_precedence(self, monkeypatch, tmp_path): + """Test that STAC_FASTAPI_ES_CUSTOM_MAPPINGS takes precedence over file.""" + file_mappings = { + "properties": {"properties": {"shared_field": {"type": "keyword"}}} + } + mappings_file = tmp_path / "mappings.json" + mappings_file.write_text(json.dumps(file_mappings)) + + env_mappings = { + "properties": {"properties": {"shared_field": {"type": "text"}}} + } + + monkeypatch.setenv("STAC_FASTAPI_ES_MAPPINGS_FILE", str(mappings_file)) + monkeypatch.setenv("STAC_FASTAPI_ES_CUSTOM_MAPPINGS", json.dumps(env_mappings)) + + mappings = get_items_mappings() + + assert mappings["properties"]["properties"]["shared_field"] == {"type": "text"} + + def test_missing_file_handled_gracefully(self, monkeypatch, caplog): + """Test that missing file is logged and ignored.""" + monkeypatch.setenv("STAC_FASTAPI_ES_MAPPINGS_FILE", "/non/existent/file.json") + monkeypatch.delenv("STAC_FASTAPI_ES_CUSTOM_MAPPINGS", raising=False) + + get_items_mappings() + + assert "Failed to read STAC_FASTAPI_ES_MAPPINGS_FILE" in caplog.text + + def test_invalid_json_in_file(self, monkeypatch, tmp_path, caplog): + """Test that invalid JSON in file is logged and ignored.""" + mappings_file = tmp_path / "invalid.json" + mappings_file.write_text("{this is not valid json}") + + monkeypatch.setenv("STAC_FASTAPI_ES_MAPPINGS_FILE", str(mappings_file)) + monkeypatch.delenv("STAC_FASTAPI_ES_CUSTOM_MAPPINGS", raising=False) + + get_items_mappings() + + assert "Failed to parse STAC_FASTAPI_ES_CUSTOM_MAPPINGS JSON" in caplog.text + + def test_file_and_env_var_both_set(self, monkeypatch, tmp_path): + """Test that env var completely overrides file when both are set.""" + file_mappings = { + "properties": { + "properties": { + "file_only_field": {"type": "keyword"}, + "shared_field": {"type": "text"}, + } + } + } + mappings_file = tmp_path / "mappings.json" + mappings_file.write_text(json.dumps(file_mappings)) + + env_mappings = { + "properties": { + "properties": { + "env_only_field": {"type": "keyword"}, + "shared_field": {"type": "integer"}, + } + } + } + + monkeypatch.setenv("STAC_FASTAPI_ES_MAPPINGS_FILE", str(mappings_file)) + monkeypatch.setenv("STAC_FASTAPI_ES_CUSTOM_MAPPINGS", json.dumps(env_mappings)) + + mappings = get_items_mappings() + + # Only env var fields should be present + assert "env_only_field" in mappings["properties"]["properties"] + assert "file_only_field" not in mappings["properties"]["properties"] + assert mappings["properties"]["properties"]["shared_field"] == { + "type": "integer" + } + + def test_empty_file_handled_gracefully(self, monkeypatch, tmp_path): + """Test that empty file is handled without error.""" + mappings_file = tmp_path / "empty.json" + mappings_file.write_text("") + + monkeypatch.setenv("STAC_FASTAPI_ES_MAPPINGS_FILE", str(mappings_file)) + monkeypatch.delenv("STAC_FASTAPI_ES_CUSTOM_MAPPINGS", raising=False) + + # Should not raise, just use default mappings + mappings = get_items_mappings() + assert "properties" in mappings