diff --git a/CHANGELOG.md b/CHANGELOG.md index aa2b21810..8b07d8982 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,14 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added +- Environment variable `VALIDATE_QUERYABLES` to enable/disable validation of queryables in search/filter requests. When set to `true`, search requests will be validated against the defined queryables, returning an error for any unsupported fields. Defaults to `false` for backward compatibility.[#532](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/532) + +- Environment variable `QUERYABLES_CACHE_TTL` to configure the TTL (in seconds) for caching queryables. Default is `1800` seconds (30 minutes) to balance performance and freshness of queryables data. [#532](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/532) + - Added optional `/catalogs` route support to enable federated hierarchical catalog browsing and navigation. [#547](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/547) + - Added DELETE `/catalogs/{catalog_id}/collections/{collection_id}` endpoint to support removing collections from catalogs. When a collection belongs to multiple catalogs, it removes only the specified catalog from the collection's parent_ids. When a collection belongs to only one catalog, the collection is deleted entirely. [#554](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/554) + - Added `parent_ids` internal field to collections to support multi-catalog hierarchies. Collections can now belong to multiple catalogs, with parent catalog IDs stored in this field for efficient querying and management. [#554](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/554) ### Changed diff --git a/README.md b/README.md index 7744b7f41..7d165bf84 100644 --- a/README.md +++ b/README.md @@ -469,8 +469,10 @@ You can customize additional settings in your `.env` file: | `STAC_INDEX_ASSETS` | Controls if Assets are indexed when added to Elasticsearch/Opensearch. This allows asset fields to be included in search queries. | `false` | Optional | | `USE_DATETIME` | Configures the datetime search behavior in SFEOS. When enabled, searches both datetime field and falls back to start_datetime/end_datetime range for items with null datetime. When disabled, searches only by start_datetime/end_datetime range. | `true` | Optional | | `USE_DATETIME_NANOS` | Enables nanosecond precision handling for `datetime` field searches as per the `date_nanos` type. When `False`, it uses 3 millisecond precision as per the type `date`. | `true` | Optional | -| `EXCLUDED_FROM_QUERYABLES` | Comma-separated list of fully qualified field names to exclude from the queryables endpoint and filtering. Use full paths like `properties.auth:schemes,properties.storage:schemes`. Excluded fields and their nested children will not be exposed in queryables. | None | Optional | +| `EXCLUDED_FROM_QUERYABLES` | Comma-separated list of fully qualified field names to exclude from the queryables endpoint and filtering. Use full paths like `properties.auth:schemes,properties.storage:schemes`. Excluded fields and their nested children will not be exposed in queryables. If `VALIDATE_QUERYABLES` is enabled, these fields will also be considered invalid for filtering. | None | Optional | | `EXCLUDED_FROM_ITEMS` | Specifies fields to exclude from STAC item responses. Supports comma-separated field names and dot notation for nested fields (e.g., `private_data,properties.confidential,assets.internal`). | `None` | Optional | +| `VALIDATE_QUERYABLES` | Enable validation of query parameters against the collection's queryables. If set to `true`, the API will reject queries containing fields that are not defined in the collection's queryables. | `false` | Optional | +| `QUERYABLES_CACHE_TTL` | Time-to-live (in seconds) for the queryables cache. Used when `VALIDATE_QUERYABLES` is enabled. | `1800` | Optional | > [!NOTE] @@ -526,6 +528,29 @@ EXCLUDED_FROM_QUERYABLES="properties.auth:schemes,properties.storage:schemes,pro - Excluded fields and their nested children will be skipped during field traversal - Both the field itself and any nested properties will be excluded +## Queryables Validation + +SFEOS supports validating query parameters against the collection's defined queryables. This ensures that users only query fields that are explicitly exposed and indexed. + +**Configuration:** + +To enable queryables validation, set the following environment variables: + +```bash +VALIDATE_QUERYABLES=true +QUERYABLES_CACHE_TTL=1800 # Optional, defaults to 1800 seconds (30 minutes) +``` + +**Behavior:** + +- When enabled, the API maintains a cache of all queryable fields across all collections. +- Search requests (both GET and POST) are checked against this cache. +- If a request contains a query parameter or filter field that is not in the list of allowed queryables, the API returns a `400 Bad Request` error with a message indicating the invalid field(s). +- The cache is automatically refreshed based on the `QUERYABLES_CACHE_TTL` setting. +- **Interaction with `EXCLUDED_FROM_QUERYABLES`**: If `VALIDATE_QUERYABLES` is enabled, fields listed in `EXCLUDED_FROM_QUERYABLES` will also be considered invalid for filtering. This effectively enforces the exclusion of these fields from search queries. + +This feature helps prevent queries on non-queryable fields which could lead to unnecessary load on the database. + ## Datetime-Based Index Management ### Overview diff --git a/stac_fastapi/core/stac_fastapi/core/base_database_logic.py b/stac_fastapi/core/stac_fastapi/core/base_database_logic.py index 4f5434535..9ade962ba 100644 --- a/stac_fastapi/core/stac_fastapi/core/base_database_logic.py +++ b/stac_fastapi/core/stac_fastapi/core/base_database_logic.py @@ -140,6 +140,10 @@ async def delete_collection( pass @abc.abstractmethod + async def get_queryables_mapping(self, collection_id: str = "*") -> Dict[str, Any]: + """Retrieve mapping of Queryables for search.""" + pass + async def get_all_catalogs( self, token: Optional[str], diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index d04b66062..18bea46f4 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -24,6 +24,10 @@ from stac_fastapi.core.base_settings import ApiBaseSettings from stac_fastapi.core.datetime_utils import format_datetime_range from stac_fastapi.core.models.links import PagingLinks +from stac_fastapi.core.queryables import ( + QueryablesCache, + get_properties_from_cql2_filter, +) from stac_fastapi.core.serializers import ( CatalogSerializer, CollectionSerializer, @@ -92,6 +96,10 @@ class CoreClient(AsyncBaseCoreClient): title: str = attr.ib(default="stac-fastapi") description: str = attr.ib(default="stac-fastapi") + def __attrs_post_init__(self): + """Initialize the queryables cache.""" + self.queryables_cache = QueryablesCache(self.database) + def extension_is_enabled(self, extension_name: str) -> bool: """Check if an extension is enabled by checking self.extensions. @@ -844,6 +852,8 @@ async def post_search( ) if hasattr(search_request, "query") and getattr(search_request, "query"): + query_fields = set(getattr(search_request, "query").keys()) + await self.queryables_cache.validate(query_fields) for field_name, expr in getattr(search_request, "query").items(): field = "properties__" + field_name for op, value in expr.items(): @@ -862,7 +872,11 @@ async def post_search( if cql2_filter is not None: try: + query_fields = get_properties_from_cql2_filter(cql2_filter) + await self.queryables_cache.validate(query_fields) search = await self.database.apply_cql2_filter(search, cql2_filter) + except HTTPException: + raise except Exception as e: raise HTTPException( status_code=400, detail=f"Error with cql2 filter: {e}" diff --git a/stac_fastapi/core/stac_fastapi/core/queryables.py b/stac_fastapi/core/stac_fastapi/core/queryables.py new file mode 100644 index 000000000..60bd5c779 --- /dev/null +++ b/stac_fastapi/core/stac_fastapi/core/queryables.py @@ -0,0 +1,105 @@ +"""A module for managing queryable attributes.""" + +import asyncio +import os +import time +from typing import Any, Dict, List, Set + +from fastapi import HTTPException + + +class QueryablesCache: + """A thread-safe, time-based cache for queryable properties.""" + + def __init__(self, database_logic: Any): + """ + Initialize the QueryablesCache. + + Args: + database_logic: An instance of a class with a `get_queryables_mapping` method. + """ + self._db_logic = database_logic + self._cache: Dict[str, List[str]] = {} + self._all_queryables: Set[str] = set() + self._last_updated: float = 0 + self._lock = asyncio.Lock() + self.validation_enabled: bool = False + self.cache_ttl: int = 1800 # How often to refresh cache (in seconds) + self.reload_settings() + + def reload_settings(self): + """Reload settings from environment variables.""" + self.validation_enabled = ( + os.getenv("VALIDATE_QUERYABLES", "false").lower() == "true" + ) + self.cache_ttl = int(os.getenv("QUERYABLES_CACHE_TTL", "1800")) + + async def _update_cache(self): + """Update the cache with the latest queryables from the database.""" + if not self.validation_enabled: + return + + async with self._lock: + if (time.time() - self._last_updated < self.cache_ttl) and self._cache: + return + + queryables_mapping = await self._db_logic.get_queryables_mapping() + all_queryables_set = set(queryables_mapping.keys()) + + self._all_queryables = all_queryables_set + + self._cache = {"*": list(all_queryables_set)} + self._last_updated = time.time() + + async def get_all_queryables(self) -> Set[str]: + """ + Return a set of all queryable attributes across all collections. + + This method will update the cache if it's stale or has been cleared. + """ + if not self.validation_enabled: + return set() + + if (time.time() - self._last_updated >= self.cache_ttl) or not self._cache: + await self._update_cache() + return self._all_queryables + + async def validate(self, fields: Set[str]) -> None: + """ + Validate if the provided fields are queryable. + + Raises HTTPException if invalid fields are found. + """ + if not self.validation_enabled: + return + + allowed_fields = await self.get_all_queryables() + invalid_fields = fields - allowed_fields + if invalid_fields: + raise HTTPException( + status_code=400, + detail=f"Invalid query fields: {', '.join(sorted(invalid_fields))}. " + "These fields are not defined in the collection's queryables. " + "Use the /queryables endpoint to see available fields.", + ) + + +def get_properties_from_cql2_filter(cql2_filter: Dict[str, Any]) -> Set[str]: + """Recursively extract property names from a CQL2 filter. + + Property names are normalized by stripping the 'properties.' prefix + if present, to match queryables stored without the prefix. + """ + props: Set[str] = set() + if "op" in cql2_filter and "args" in cql2_filter: + for arg in cql2_filter["args"]: + if isinstance(arg, dict): + if "op" in arg: + props.update(get_properties_from_cql2_filter(arg)) + elif "property" in arg: + prop_name = arg["property"] + # Strip 'properties.' prefix if present + if prop_name.startswith("properties."): + prop_name = prop_name[11:] + props.add(prop_name) + return props diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/mapping.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/mapping.py index 8f6646512..70f07efa7 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/mapping.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/database/mapping.py @@ -3,14 +3,62 @@ This module provides functions for working with Elasticsearch/OpenSearch mappings. """ -from typing import Any, Dict +import os +from collections import deque +from typing import Any, Dict, Set + + +def _get_excluded_from_queryables() -> Set[str]: + """Get fields to exclude from queryables endpoint and filtering. + + Reads from EXCLUDED_FROM_QUERYABLES environment variable. + Supports comma-separated list of field names. + + For each exclusion pattern, both the original and the version with/without + 'properties.' prefix are included. This ensures fields are excluded regardless + of whether they appear at the top level or under 'properties' in the mapping. + + Example: + EXCLUDED_FROM_QUERYABLES="properties.auth:schemes,storage:schemes" + + This will exclude: + - properties.auth:schemes (and children like properties.auth:schemes.s3.type) + - auth:schemes (and children like auth:schemes.s3.type) + - storage:schemes (and children) + - properties.storage:schemes (and children) + + Returns: + Set[str]: Set of field names to exclude from queryables + """ + excluded = os.getenv("EXCLUDED_FROM_QUERYABLES", "") + if not excluded: + return set() + + result = set() + for field in excluded.split(","): + field = field.strip() + if not field: + continue + + result.add(field) + + if field.startswith("properties."): + result.add(field.removeprefix("properties.")) + else: + result.add(f"properties.{field}") + + return result async def get_queryables_mapping_shared( - mappings: Dict[str, Dict[str, Any]], collection_id: str = "*" + mappings: Dict[str, Dict[str, Any]], + collection_id: str = "*", ) -> Dict[str, str]: """Retrieve mapping of Queryables for search. + Fields listed in the EXCLUDED_FROM_QUERYABLES environment variable will be + excluded from the result, along with their children. + Args: mappings (Dict[str, Dict[str, Any]]): The mapping information returned from Elasticsearch/OpenSearch client's indices.get_mapping() method. @@ -20,19 +68,44 @@ async def get_queryables_mapping_shared( Returns: Dict[str, str]: A dictionary containing the Queryables mappings, where keys are - field names and values are the corresponding paths in the Elasticsearch/OpenSearch - document structure. + field names (with 'properties.' prefix removed) and values are the + corresponding paths in the Elasticsearch/OpenSearch document structure. """ queryables_mapping = {} + excluded = _get_excluded_from_queryables() + + def is_excluded(path: str) -> bool: + """Check if the path starts with any excluded prefix.""" + return any( + path == prefix or path.startswith(prefix + ".") for prefix in excluded + ) for mapping in mappings.values(): - fields = mapping["mappings"].get("properties", {}) - properties = fields.pop("properties", {}).get("properties", {}).keys() + mapping_properties = mapping["mappings"].get("properties", {}) + + stack: deque[tuple[str, Dict[str, Any]]] = deque(mapping_properties.items()) + + while stack: + field_fqn, field_def = stack.popleft() + + nested_properties = field_def.get("properties") + if nested_properties: + stack.extend( + (f"{field_fqn}.{k}", v) + for k, v in nested_properties.items() + if v.get("enabled", True) and not is_excluded(f"{field_fqn}.{k}") + ) + + field_type = field_def.get("type") + if ( + not field_type + or not field_def.get("enabled", True) + or is_excluded(field_fqn) + ): + continue - for field_key in fields: - queryables_mapping[field_key] = field_key + field_name = field_fqn.removeprefix("properties.") - for property_key in properties: - queryables_mapping[property_key] = f"properties.{property_key}" + queryables_mapping[field_name] = field_fqn return queryables_mapping diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py index ac19d65d8..7febde1a5 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/client.py @@ -26,8 +26,12 @@ def _get_excluded_from_queryables() -> set[str]: Reads from EXCLUDED_FROM_QUERYABLES environment variable. Supports comma-separated list of field names. + For each exclusion pattern, both the original and the version with/without + 'properties.' prefix are included. This ensures fields are excluded regardless + of whether they appear at the top level or under 'properties' in the mapping. + Example: - EXCLUDED_FROM_QUERYABLES="auth:schemes,storage:schemes" + EXCLUDED_FROM_QUERYABLES="properties.auth:schemes,storage:schemes" Returns: Set[str]: Set of field names to exclude from queryables @@ -35,7 +39,41 @@ def _get_excluded_from_queryables() -> set[str]: excluded = os.getenv("EXCLUDED_FROM_QUERYABLES", "") if not excluded: return set() - return {field.strip() for field in excluded.split(",") if field.strip()} + + result = set() + for field in excluded.split(","): + field = field.strip() + if not field: + continue + + result.add(field) + + if field.startswith("properties."): + result.add(field.removeprefix("properties.")) + else: + result.add(f"properties.{field}") + + return result + + @staticmethod + def _is_excluded(field_fqn: str, excluded: set[str]) -> bool: + """Check if a field should be excluded based on prefix matching. + + A field is excluded if: + - It exactly matches an exclusion pattern + - It starts with an exclusion pattern followed by a dot (nested child) + + Args: + field_fqn: Fully qualified field name (e.g., "properties.auth:schemes.s3.type") + excluded: Set of exclusion patterns + + Returns: + True if field should be excluded, False otherwise + """ + for prefix in excluded: + if field_fqn == prefix or field_fqn.startswith(prefix + "."): + return True + return False async def get_queryables( self, @@ -92,23 +130,20 @@ async def get_queryables( while stack: field_fqn, field_def = stack.popleft() - # Iterate over nested fields + if self._is_excluded(field_fqn, excluded_fields): + continue + field_properties = field_def.get("properties") if field_properties: stack.extend( (f"{field_fqn}.{k}", v) for k, v in field_properties.items() if v.get("enabled", True) - and f"{field_fqn}.{k}" not in excluded_fields ) # Skip non-indexed or disabled fields field_type = field_def.get("type") - if ( - not field_type - or not field_def.get("enabled", True) - or field_fqn in excluded_fields - ): + if not field_type or not field_def.get("enabled", True): continue # Fields in Item Properties should be exposed with their un-prefixed names, diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/transform.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/transform.py index 6945a359e..26ba40330 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/transform.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/filter/transform.py @@ -22,7 +22,20 @@ def to_es_field(queryables_mapping: Dict[str, Any], field: str) -> str: Returns: str: The mapped field name suitable for Elasticsearch queries. """ - return queryables_mapping.get(field, field) + # First, try to find the field as-is in the mapping + if field in queryables_mapping: + return queryables_mapping[field] + + # If field has 'properties.' prefix, try without it + # This handles cases where users specify 'properties.eo:cloud_cover' + # but queryables_mapping uses 'eo:cloud_cover' as the key + if field.startswith("properties."): + normalized_field = field[11:] # len("properties.") == 11 + if normalized_field in queryables_mapping: + return queryables_mapping[normalized_field] + + # If not found, return the original field + return field def to_es(queryables_mapping: Dict[str, Any], query: Dict[str, Any]) -> Dict[str, Any]: diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py index 3b65244d4..fb11906c3 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/search_engine/selection/cache_manager.py @@ -12,7 +12,7 @@ class IndexCacheManager: """Manages caching of index aliases with expiration.""" - def __init__(self, cache_ttl_seconds: int = 3600): + def __init__(self, cache_ttl_seconds: int = 1800): """Initialize the cache manager. Args: diff --git a/stac_fastapi/tests/api/test_api_query_validation.py b/stac_fastapi/tests/api/test_api_query_validation.py new file mode 100644 index 000000000..2638f871a --- /dev/null +++ b/stac_fastapi/tests/api/test_api_query_validation.py @@ -0,0 +1,171 @@ +import json +import os +from unittest import mock + +import pytest + +if os.getenv("BACKEND", "elasticsearch").lower() == "opensearch": + from stac_fastapi.opensearch.app import app_config +else: + from stac_fastapi.elasticsearch.app import app_config + + +def get_core_client(): + if os.getenv("BACKEND", "elasticsearch").lower() == "opensearch": + from stac_fastapi.opensearch.app import app_config + else: + from stac_fastapi.elasticsearch.app import app_config + return app_config["client"] + + +def reload_queryables_settings(): + client = get_core_client() + if hasattr(client, "queryables_cache"): + client.queryables_cache.reload_settings() + + +@pytest.fixture(autouse=True) +def enable_validation(): + + client = app_config["client"] + with mock.patch.dict(os.environ, {"VALIDATE_QUERYABLES": "true"}): + client.queryables_cache.reload_settings() + client.queryables_cache._cache = {} + client.queryables_cache._last_updated = 0 + yield + client.queryables_cache.reload_settings() + + +@pytest.mark.asyncio +async def test_search_post_query_valid_param(app_client, ctx): + """Test POST /search with a valid query parameter""" + query = {"query": {"eo:cloud_cover": {"lt": 10}}} + resp = await app_client.post("/search", json=query) + assert resp.status_code == 200 + + +@pytest.mark.asyncio +async def test_search_post_query_invalid_param(app_client, ctx): + """Test POST /search with an invalid query parameter""" + query = {"query": {"invalid_param": {"eq": "test"}}} + resp = await app_client.post("/search", json=query) + assert resp.status_code == 400 + resp_json = resp.json() + assert "Invalid query fields: invalid_param" in resp_json["detail"] + + +@pytest.mark.asyncio +async def test_item_collection_get_filter_valid_param(app_client, ctx): + """Test GET /collections/{collection_id}/items with a valid filter parameter""" + collection_id = ctx.item["collection"] + filter_body = { + "op": "<", + "args": [{"property": "eo:cloud_cover"}, 10], + } + params = { + "filter-lang": "cql2-json", + "filter": json.dumps(filter_body), + } + resp = await app_client.get(f"/collections/{collection_id}/items", params=params) + assert resp.status_code == 200 + + +@pytest.mark.asyncio +async def test_item_collection_get_filter_invalid_param(app_client, ctx): + """Test GET /collections/{collection_id}/items with an invalid filter parameter""" + collection_id = ctx.item["collection"] + filter_body = { + "op": "=", + "args": [{"property": "invalid_param"}, "test"], + } + params = { + "filter-lang": "cql2-json", + "filter": json.dumps(filter_body), + } + resp = await app_client.get(f"/collections/{collection_id}/items", params=params) + assert resp.status_code == 400 + resp_json = resp.json() + assert "Invalid query fields: invalid_param" in resp_json["detail"] + + +@pytest.mark.asyncio +async def test_validate_queryables_excluded(app_client, ctx): + """Test that excluded queryables are rejected when validation is enabled.""" + + excluded_field = "eo:cloud_cover" + client = app_config["client"] + + with mock.patch.dict( + os.environ, + { + "VALIDATE_QUERYABLES": "true", + "EXCLUDED_FROM_QUERYABLES": excluded_field, + "QUERYABLES_CACHE_TTL": "0", + }, + ): + client.queryables_cache.reload_settings() + + query = {"query": {excluded_field: {"lt": 10}}} + resp = await app_client.post("/search", json=query) + assert resp.status_code == 400 + assert "Invalid query fields" in resp.json()["detail"] + assert excluded_field in resp.json()["detail"] + + query = {"query": {"id": {"eq": "test-item"}}} + resp = await app_client.post("/search", json=query) + assert resp.status_code == 200 + + client.queryables_cache.reload_settings() + + +@pytest.mark.asyncio +async def test_search_get_cql2_text_invalid_param(app_client, ctx): + """Test GET /search with an invalid cql2-text filter parameter.""" + params = { + "filter-lang": "cql2-text", + "filter": "properties.invalid_param < 5", + } + resp = await app_client.get("/search", params=params) + assert resp.status_code == 400 + resp_json = resp.json() + assert "Invalid query fields: invalid_param" in resp_json["detail"] + + +@pytest.mark.asyncio +async def test_search_get_cql2_text_valid_param(app_client, ctx): + """Test GET /search with a valid cql2-text filter parameter.""" + params = { + "filter-lang": "cql2-text", + "filter": "eo:cloud_cover < 10", + } + resp = await app_client.get("/search", params=params) + assert resp.status_code == 200 + + +@pytest.mark.asyncio +async def test_item_collection_get_cql2_text_invalid_param(app_client, ctx): + """Test GET /collections/{collection_id}/items with invalid cql2-text filter.""" + collection_id = ctx.item["collection"] + params = { + "filter-lang": "cql2-text", + "filter": "properties.invalid_param < 5", + } + resp = await app_client.get(f"/collections/{collection_id}/items", params=params) + assert resp.status_code == 400 + resp_json = resp.json() + assert "Invalid query fields: invalid_param" in resp_json["detail"] + + +@pytest.mark.asyncio +async def test_search_get_cql2_text_with_properties_prefix(app_client, ctx): + """Test GET /search with a valid cql2-text filter using properties. prefix. + + This tests the case where users specify 'properties.eo:cloud_cover' instead of + just 'eo:cloud_cover'. Both formats should work correctly. + """ + params = { + "filter-lang": "cql2-text", + "filter": "properties.eo:cloud_cover < 10", + } + resp = await app_client.get("/search", params=params) + assert resp.status_code == 200 diff --git a/stac_fastapi/tests/core/test_queryables.py b/stac_fastapi/tests/core/test_queryables.py new file mode 100644 index 000000000..d65792748 --- /dev/null +++ b/stac_fastapi/tests/core/test_queryables.py @@ -0,0 +1,137 @@ +import os +import time +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest +from fastapi import HTTPException + +from stac_fastapi.core.queryables import ( + QueryablesCache, + get_properties_from_cql2_filter, +) + + +class TestQueryablesCache: + @pytest.fixture + def mock_db_logic(self): + db_logic = MagicMock() + db_logic.get_queryables_mapping = AsyncMock( + return_value={"prop1": "type1", "prop2": "type2"} + ) + return db_logic + + @pytest.fixture + def queryables_cache(self, mock_db_logic): + with patch.dict( + os.environ, {"VALIDATE_QUERYABLES": "true", "QUERYABLES_CACHE_TTL": "60"} + ): + cache = QueryablesCache(mock_db_logic) + return cache + + def test_init(self, mock_db_logic): + with patch.dict( + os.environ, {"VALIDATE_QUERYABLES": "true", "QUERYABLES_CACHE_TTL": "120"} + ): + cache = QueryablesCache(mock_db_logic) + assert cache.validation_enabled is True + assert cache.cache_ttl == 120 + + def test_reload_settings(self, queryables_cache): + with patch.dict( + os.environ, {"VALIDATE_QUERYABLES": "false", "QUERYABLES_CACHE_TTL": "300"} + ): + queryables_cache.reload_settings() + assert queryables_cache.validation_enabled is False + assert queryables_cache.cache_ttl == 300 + + @pytest.mark.asyncio + async def test_get_all_queryables_updates_cache( + self, queryables_cache, mock_db_logic + ): + queryables = await queryables_cache.get_all_queryables() + assert queryables == {"prop1", "prop2"} + mock_db_logic.get_queryables_mapping.assert_called_once() + + @pytest.mark.asyncio + async def test_get_all_queryables_uses_cache(self, queryables_cache, mock_db_logic): + await queryables_cache.get_all_queryables() + mock_db_logic.get_queryables_mapping.assert_called_once() + + # Should use cache now + await queryables_cache.get_all_queryables() + mock_db_logic.get_queryables_mapping.assert_called_once() + + @pytest.mark.asyncio + async def test_get_all_queryables_refresh_after_ttl( + self, queryables_cache, mock_db_logic + ): + await queryables_cache.get_all_queryables() + mock_db_logic.get_queryables_mapping.assert_called_once() + + # Simulate time passing + queryables_cache._last_updated = time.time() - queryables_cache.cache_ttl - 1 + + await queryables_cache.get_all_queryables() + assert mock_db_logic.get_queryables_mapping.call_count == 2 + + @pytest.mark.asyncio + async def test_get_all_queryables_disabled(self, queryables_cache): + queryables_cache.validation_enabled = False + queryables = await queryables_cache.get_all_queryables() + assert queryables == set() + + @pytest.mark.asyncio + async def test_validate_valid_fields(self, queryables_cache): + await queryables_cache.validate({"prop1"}) + + @pytest.mark.asyncio + async def test_validate_invalid_fields(self, queryables_cache): + with pytest.raises(HTTPException) as excinfo: + await queryables_cache.validate({"invalid_prop"}) + assert excinfo.value.status_code == 400 + assert "Invalid query fields: invalid_prop" in str(excinfo.value.detail) + + @pytest.mark.asyncio + async def test_validate_disabled(self, queryables_cache): + queryables_cache.validation_enabled = False + await queryables_cache.validate({"invalid_prop"}) + + +def test_get_properties_from_cql2_filter(): + # Simple prop + cql2 = {"op": "=", "args": [{"property": "prop1"}, "value"]} + props = get_properties_from_cql2_filter(cql2) + assert props == {"prop1"} + + # Nested props + cql2_nested = { + "op": "and", + "args": [ + {"op": "=", "args": [{"property": "prop1"}, "v1"]}, + {"op": "<", "args": [{"property": "prop2"}, 10]}, + ], + } + props = get_properties_from_cql2_filter(cql2_nested) + assert props == {"prop1", "prop2"} + + # Empty/invalid + assert get_properties_from_cql2_filter({}) == set() + + +def test_get_properties_from_cql2_filter_strips_properties_prefix(): + """Test that 'properties.' prefix is stripped from property names.""" + # Single property with prefix + cql2 = {"op": "<", "args": [{"property": "properties.none"}, 5]} + props = get_properties_from_cql2_filter(cql2) + assert props == {"none"} + + # Mixed with and without prefix + cql2_nested = { + "op": "and", + "args": [ + {"op": "=", "args": [{"property": "properties.test"}, "v1"]}, + {"op": "<", "args": [{"property": "eo:cloud_cover"}, 10]}, + ], + } + props = get_properties_from_cql2_filter(cql2_nested) + assert props == {"test", "eo:cloud_cover"} diff --git a/stac_fastapi/tests/data/test_collection.json b/stac_fastapi/tests/data/test_collection.json index 32a7d254b..dda5b8a0e 100644 --- a/stac_fastapi/tests/data/test_collection.json +++ b/stac_fastapi/tests/data/test_collection.json @@ -6,6 +6,11 @@ "type": "Collection", "description": "Landat 8 imagery radiometrically calibrated and orthorectified using gound points and Digital Elevation Model (DEM) data to correct relief displacement.", "stac_version": "1.0.0", + "queryables": { + "eo:cloud_cover": { + "$ref": "https://stac-extensions.github.io/eo/v1.0.0/schema.json#/definitions/fields/properties/eo:cloud_cover" + } + }, "license": "PDDL-1.0", "summaries": { "platform": [ diff --git a/stac_fastapi/tests/database/test_mapping.py b/stac_fastapi/tests/database/test_mapping.py new file mode 100644 index 000000000..ebf869c06 --- /dev/null +++ b/stac_fastapi/tests/database/test_mapping.py @@ -0,0 +1,382 @@ +"""Tests for the mapping module.""" + +import pytest + +from stac_fastapi.sfeos_helpers.database.mapping import get_queryables_mapping_shared + + +@pytest.mark.asyncio +async def test_get_queryables_mapping_shared_simple(): + """Test basic mapping extraction.""" + mappings = { + "test_index": { + "mappings": { + "properties": { + "id": {"type": "keyword"}, + "collection": {"type": "keyword"}, + "properties": { + "properties": { + "datetime": {"type": "date"}, + "eo:cloud_cover": {"type": "float"}, + } + }, + } + } + } + } + + result = await get_queryables_mapping_shared(mappings) + + assert "id" in result + assert result["id"] == "id" + assert "collection" in result + assert result["collection"] == "collection" + assert "datetime" in result + assert result["datetime"] == "properties.datetime" + assert "eo:cloud_cover" in result + assert result["eo:cloud_cover"] == "properties.eo:cloud_cover" + + +@pytest.mark.asyncio +async def test_get_queryables_mapping_shared_nested_properties(): + """Test that nested properties are properly traversed. + + This tests the case where a property like 'processing:software.eometadatatool' + exists, which is represented as a nested object in Elasticsearch/OpenSearch. + """ + mappings = { + "test_index": { + "mappings": { + "properties": { + "id": {"type": "keyword"}, + "properties": { + "properties": { + "processing:software": { + "properties": { + "eometadatatool": {"type": "keyword"}, + "version": {"type": "keyword"}, + } + }, + "eo:cloud_cover": {"type": "float"}, + } + }, + } + } + } + } + + result = await get_queryables_mapping_shared(mappings) + + # Check that nested properties are properly traversed + assert "processing:software.eometadatatool" in result + assert ( + result["processing:software.eometadatatool"] + == "properties.processing:software.eometadatatool" + ) + assert "processing:software.version" in result + assert ( + result["processing:software.version"] + == "properties.processing:software.version" + ) + + # Regular properties should still work + assert "eo:cloud_cover" in result + assert result["eo:cloud_cover"] == "properties.eo:cloud_cover" + + +@pytest.mark.asyncio +async def test_get_queryables_mapping_shared_deeply_nested(): + """Test deeply nested properties.""" + mappings = { + "test_index": { + "mappings": { + "properties": { + "properties": { + "properties": { + "level1": { + "properties": { + "level2": { + "properties": {"level3": {"type": "keyword"}} + } + } + }, + } + }, + } + } + } + } + + result = await get_queryables_mapping_shared(mappings) + + assert "level1.level2.level3" in result + assert result["level1.level2.level3"] == "properties.level1.level2.level3" + + +@pytest.mark.asyncio +async def test_get_queryables_mapping_shared_disabled_fields(): + """Test that disabled fields are excluded.""" + mappings = { + "test_index": { + "mappings": { + "properties": { + "properties": { + "properties": { + "enabled_field": {"type": "keyword"}, + "disabled_field": {"type": "keyword", "enabled": False}, + "parent": { + "properties": { + "enabled_nested": {"type": "keyword"}, + "disabled_nested": { + "type": "keyword", + "enabled": False, + }, + } + }, + } + }, + } + } + } + } + + result = await get_queryables_mapping_shared(mappings) + + assert "enabled_field" in result + assert "disabled_field" not in result + assert "parent.enabled_nested" in result + assert "parent.disabled_nested" not in result + + +@pytest.mark.asyncio +async def test_get_queryables_mapping_shared_container_fields(): + """Test that container fields (without type) are not included but their children are.""" + mappings = { + "test_index": { + "mappings": { + "properties": { + "properties": { + "properties": { + # This is a container field with no type + "container": { + "properties": { + "child1": {"type": "keyword"}, + "child2": {"type": "float"}, + } + }, + } + }, + } + } + } + } + + result = await get_queryables_mapping_shared(mappings) + + # Container field should not be in results (no type) + assert "container" not in result + # But its children should be + assert "container.child1" in result + assert "container.child2" in result + + +@pytest.mark.asyncio +async def test_get_queryables_mapping_shared_multiple_indices(): + """Test mapping from multiple indices are merged.""" + mappings = { + "index1": { + "mappings": { + "properties": { + "id": {"type": "keyword"}, + "properties": { + "properties": { + "field1": {"type": "keyword"}, + } + }, + } + } + }, + "index2": { + "mappings": { + "properties": { + "id": {"type": "keyword"}, + "properties": { + "properties": { + "field2": {"type": "float"}, + } + }, + } + } + }, + } + + result = await get_queryables_mapping_shared(mappings) + + assert "field1" in result + assert "field2" in result + + +@pytest.mark.asyncio +async def test_get_queryables_mapping_shared_excluded_fields(monkeypatch): + """Test that fields in EXCLUDED_FROM_QUERYABLES are excluded.""" + # Set the environment variable to exclude fields + monkeypatch.setenv( + "EXCLUDED_FROM_QUERYABLES", + "properties.auth:schemes,properties.storage:schemes", + ) + + mappings = { + "test_index": { + "mappings": { + "properties": { + "id": {"type": "keyword"}, + "properties": { + "properties": { + "eo:cloud_cover": {"type": "float"}, + "auth:schemes": { + "properties": { + "s3": { + "properties": { + "type": {"type": "keyword"}, + "requester_pays": {"type": "boolean"}, + } + }, + "http": { + "properties": { + "type": {"type": "keyword"}, + } + }, + } + }, + "storage:schemes": { + "properties": { + "s3": { + "properties": { + "platform": {"type": "keyword"}, + } + }, + } + }, + } + }, + } + } + } + } + + result = await get_queryables_mapping_shared(mappings) + + # Regular fields should be present + assert "id" in result + assert "eo:cloud_cover" in result + + # Excluded fields and their children should NOT be present + assert "auth:schemes" not in result + assert "auth:schemes.s3" not in result + assert "auth:schemes.s3.type" not in result + assert "auth:schemes.s3.requester_pays" not in result + assert "auth:schemes.http" not in result + assert "auth:schemes.http.type" not in result + assert "storage:schemes" not in result + assert "storage:schemes.s3" not in result + assert "storage:schemes.s3.platform" not in result + + +@pytest.mark.asyncio +async def test_get_queryables_mapping_shared_excluded_fields_top_level(monkeypatch): + """Test that exclusions work for fields at top level (no properties. prefix in path). + + Some indices (like EOPF) have auth:schemes at the top level, resulting in + field paths like 'auth:schemes.s3.type' instead of 'properties.auth:schemes.s3.type'. + The exclusion should work for both cases. + """ + # Set the environment variable to exclude fields with properties. prefix + monkeypatch.setenv( + "EXCLUDED_FROM_QUERYABLES", + "properties.auth:schemes,properties.storage:schemes", + ) + + # Mapping where auth:schemes is at the TOP level (not under properties.properties) + mappings = { + "test_index": { + "mappings": { + "properties": { + "id": {"type": "keyword"}, + "eo:cloud_cover": {"type": "float"}, + "auth:schemes": { + "properties": { + "s3": { + "properties": { + "type": {"type": "keyword"}, + "requester_pays": {"type": "boolean"}, + } + }, + } + }, + } + } + } + } + + result = await get_queryables_mapping_shared(mappings) + + # Regular fields should be present + assert "id" in result + assert "eo:cloud_cover" in result + + # Excluded fields should NOT be present even without properties. prefix in path + assert "auth:schemes" not in result + assert "auth:schemes.s3" not in result + assert "auth:schemes.s3.type" not in result + assert "auth:schemes.s3.requester_pays" not in result + + +@pytest.mark.asyncio +async def test_get_queryables_mapping_shared_excluded_fields_no_prefix_config( + monkeypatch, +): + """Test that exclusions work when configured WITHOUT properties. prefix. + + If user sets EXCLUDED_FROM_QUERYABLES='auth:schemes', it should also + exclude 'properties.auth:schemes' and vice versa. + """ + # Set the environment variable WITHOUT properties. prefix + monkeypatch.setenv( + "EXCLUDED_FROM_QUERYABLES", + "auth:schemes", + ) + + mappings = { + "test_index": { + "mappings": { + "properties": { + "id": {"type": "keyword"}, + "properties": { + "properties": { + "eo:cloud_cover": {"type": "float"}, + "auth:schemes": { + "properties": { + "s3": { + "properties": { + "type": {"type": "keyword"}, + } + }, + } + }, + } + }, + } + } + } + } + + result = await get_queryables_mapping_shared(mappings) + + # Regular fields should be present + assert "id" in result + assert "eo:cloud_cover" in result + + # Excluded fields should NOT be present (properties. prefix auto-added) + assert "auth:schemes" not in result + assert "auth:schemes.s3" not in result + assert "auth:schemes.s3.type" not in result