From 7fbe2f83d48916b8b91080deb062f075cf5e2459 Mon Sep 17 00:00:00 2001 From: Anthony Lukach Date: Thu, 27 Nov 2025 23:11:58 -0800 Subject: [PATCH 1/8] Add CQL2 filters --- .../montandon-eoapi/cql2_filters.py | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 applications/argocd/staging/applications/montandon-eoapi/cql2_filters.py diff --git a/applications/argocd/staging/applications/montandon-eoapi/cql2_filters.py b/applications/argocd/staging/applications/montandon-eoapi/cql2_filters.py new file mode 100644 index 0000000..8991e71 --- /dev/null +++ b/applications/argocd/staging/applications/montandon-eoapi/cql2_filters.py @@ -0,0 +1,125 @@ +""" +Logic for generating CQL2 filters based on JWT. +""" + +import dataclasses +import os +import time +from functools import wraps +from typing import Any, Callable, Optional + +import httpx + + +PUBLIC_COLLECTIONS_FILTER = "private IS NULL OR private = false" + + +def async_ttl_cache(default_ttl: int = 30): + """ + Decorator to cache async method results with a time-to-live. + + Args: + ttl: Either an integer (seconds) or a string attribute name to read from the instance + """ + + def decorator(func: Callable): + cache_attr = f"_cache_{func.__name__}" + expiry_attr = f"_cache_expiry_{func.__name__}" + + @wraps(func) + async def wrapper(self, *args, **kwargs): + # Get TTL value (either from instance attribute or fallback to default) + ttl_value = getattr(self, "cache_ttl", default_ttl) + + # Check if cache exists and is valid + if ( + hasattr(self, cache_attr) + and hasattr(self, expiry_attr) + and time.time() < getattr(self, expiry_attr) + ): + return getattr(self, cache_attr) + + # Call the function and cache result + result = await func(self, *args, **kwargs) + setattr(self, cache_attr, result) + setattr(self, expiry_attr, time.time() + ttl_value) + return result + + return wrapper + + return decorator + + +@dataclasses.dataclass +class CollectionsFilter: + collections_claim: str = "collections" + + async def __call__(self, context: dict[str, Any]) -> str: + """ + CQL2 filter for collections based on JWT permissions. + """ + jwt_payload = context.get("payload", {}) + permitted_collections = jwt_payload.get(self.collections_claim, []) + + return " OR ".join( + [ + # Include public collections + PUBLIC_COLLECTIONS_FILTER, + # Include permitted collections + *[f"id = '{collection_id}'" for collection_id in permitted_collections], + ] + ) + + +@dataclasses.dataclass +class ItemsFilter: + collections_claim: str = "collections" + client: httpx.AsyncClient = dataclasses.field(init=False) + cache_ttl: Optional[int] = None + + def __post_init__(self): + self.client = httpx.AsyncClient(base_url=os.environ["UPSTREAM_URL"]) + + @async_ttl_cache( + default_ttl=300 + ) # Read TTL from instance attribute, can be overridden with cache_ttl class param + async def get_public_collections_ids(self) -> list[str]: + ids = [] + + # First request uses params dict + url: Optional[str] = "/collections" + params: Optional[dict[str, Any]] = { + "filter": PUBLIC_COLLECTIONS_FILTER, + "limit": 100, + } + + while url: + response = await self.client.get(url, params=params) + response.raise_for_status() + data = response.json() + + ids.extend([collection["id"] for collection in data["collections"]]) + + # Subsequent requests use the "next" link URL directly (already has params) + url = next( + (link["href"] for link in data["links"] if link["rel"] == "next"), + None, + ) + params = None # Clear params after first request + + return ids + + async def __call__(self, context: dict[str, Any]) -> str: + """ + CQL2 filter for items based on JWT permissions. + """ + jwt_payload = context.get("payload", {}) + permitted_collections = set( + # Get IDs of public collections (we can't lookup Items by a property of their parent Collection) + await self.get_public_collections_ids() + # Include permitted collections + + jwt_payload.get(self.collections_claim, []) + ) + return " OR ".join( + f"collection = '{collection_id}'" for collection_id in permitted_collections + ) From 120733654fa21f30ad20a2ed52a941be04c1f606 Mon Sep 17 00:00:00 2001 From: Anthony Lukach Date: Fri, 28 Nov 2025 08:46:59 -0800 Subject: [PATCH 2/8] Rework filters for clarity --- .../montandon-eoapi/cql2_filters.py | 144 +++++++++--------- 1 file changed, 76 insertions(+), 68 deletions(-) diff --git a/applications/argocd/staging/applications/montandon-eoapi/cql2_filters.py b/applications/argocd/staging/applications/montandon-eoapi/cql2_filters.py index 8991e71..a1d1a82 100644 --- a/applications/argocd/staging/applications/montandon-eoapi/cql2_filters.py +++ b/applications/argocd/staging/applications/montandon-eoapi/cql2_filters.py @@ -5,66 +5,39 @@ import dataclasses import os import time -from functools import wraps -from typing import Any, Callable, Optional +import logging +from typing import Any, Optional import httpx +logger = logging.getLogger(__name__) -PUBLIC_COLLECTIONS_FILTER = "private IS NULL OR private = false" - -def async_ttl_cache(default_ttl: int = 30): +@dataclasses.dataclass +class CollectionsFilter: """ - Decorator to cache async method results with a time-to-live. - - Args: - ttl: Either an integer (seconds) or a string attribute name to read from the instance + CQL2 filter factory for collections based on JWT permissions. """ - def decorator(func: Callable): - cache_attr = f"_cache_{func.__name__}" - expiry_attr = f"_cache_expiry_{func.__name__}" - - @wraps(func) - async def wrapper(self, *args, **kwargs): - # Get TTL value (either from instance attribute or fallback to default) - ttl_value = getattr(self, "cache_ttl", default_ttl) - - # Check if cache exists and is valid - if ( - hasattr(self, cache_attr) - and hasattr(self, expiry_attr) - and time.time() < getattr(self, expiry_attr) - ): - return getattr(self, cache_attr) - - # Call the function and cache result - result = await func(self, *args, **kwargs) - setattr(self, cache_attr, result) - setattr(self, expiry_attr, time.time() + ttl_value) - return result - - return wrapper - - return decorator - - -@dataclasses.dataclass -class CollectionsFilter: - collections_claim: str = "collections" + collections_claim: str = "collections" # JWT claim with allowed collection IDs + admin_claim: str = "superuser" # JWT claim indicating superuser status + public_collections_filter: str = "private IS NULL OR private = false" async def __call__(self, context: dict[str, Any]) -> str: - """ - CQL2 filter for collections based on JWT permissions. - """ jwt_payload = context.get("payload", {}) - permitted_collections = jwt_payload.get(self.collections_claim, []) + if jwt_payload.get(self.admin_claim): + logger.info( + f"Superuser detected for sub {jwt_payload.get('sub')}, " + "no filter applied for collections" + ) + return "1=1" # No filter for superusers + # Allowed to access collections in specified collections + permitted_collections = jwt_payload.get(self.collections_claim, []) return " OR ".join( [ # Include public collections - PUBLIC_COLLECTIONS_FILTER, + self.public_collections_filter, # Include permitted collections *[f"id = '{collection_id}'" for collection_id in permitted_collections], ] @@ -73,32 +46,62 @@ async def __call__(self, context: dict[str, Any]) -> str: @dataclasses.dataclass class ItemsFilter: - collections_claim: str = "collections" - client: httpx.AsyncClient = dataclasses.field(init=False) - cache_ttl: Optional[int] = None + """ + CQL2 filter factory for items based on JWT permissions. + """ - def __post_init__(self): - self.client = httpx.AsyncClient(base_url=os.environ["UPSTREAM_URL"]) + collections_claim: str = "collections" # JWT claim with allowed collection IDs + admin_claim: str = "superuser" # JWT claim indicating superuser status + public_collections_filter: str = "private IS NULL OR private = false" + + cache_ttl: int = 30 # TTL for caching public collections, in seconds + _client: httpx.AsyncClient = dataclasses.field( + init=False, + repr=False, + default_factory=lambda: httpx.AsyncClient(base_url=os.environ["UPSTREAM_URL"]), + ) + _public_collections_cache: Optional[list[str]] = dataclasses.field( + init=False, default=None, repr=False + ) + _cache_expiry: float = dataclasses.field(init=False, default=0, repr=False) + + @property + def _cached_public_collections(self) -> Optional[list[str]]: + """Return cached public collections if still valid, otherwise None.""" + if time.time() < self._cache_expiry: + return self._public_collections_cache + return None + + @_cached_public_collections.setter + def _cached_public_collections(self, value: list[str]) -> None: + """Set the cache with a new value and expiry time.""" + self._public_collections_cache = value + self._cache_expiry = time.time() + self.cache_ttl + + async def _get_public_collections_ids(self) -> list[str]: + """ + Retrieve IDs of public collections from the upstream API. + """ + # Return cached value if still valid + if (cached := self._cached_public_collections) is not None: + logger.debug("Using cached public collections") + return cached - @async_ttl_cache( - default_ttl=300 - ) # Read TTL from instance attribute, can be overridden with cache_ttl class param - async def get_public_collections_ids(self) -> list[str]: - ids = [] + logger.info("Fetching public collections from upstream API") # First request uses params dict url: Optional[str] = "/collections" params: Optional[dict[str, Any]] = { - "filter": PUBLIC_COLLECTIONS_FILTER, + "filter": self.public_collections_filter, "limit": 100, } + ids = [] while url: - response = await self.client.get(url, params=params) + response = await self._client.get(url, params=params) response.raise_for_status() data = response.json() - - ids.extend([collection["id"] for collection in data["collections"]]) + ids.extend(collection["id"] for collection in data["collections"]) # Subsequent requests use the "next" link URL directly (already has params) url = next( @@ -107,19 +110,24 @@ async def get_public_collections_ids(self) -> list[str]: ) params = None # Clear params after first request + # Update cache + self._cached_public_collections = ids return ids async def __call__(self, context: dict[str, Any]) -> str: - """ - CQL2 filter for items based on JWT permissions. - """ jwt_payload = context.get("payload", {}) - permitted_collections = set( - # Get IDs of public collections (we can't lookup Items by a property of their parent Collection) - await self.get_public_collections_ids() - # Include permitted collections - + jwt_payload.get(self.collections_claim, []) - ) + if jwt_payload.get(self.admin_claim): + logger.info( + f"Superuser detected for sub {jwt_payload.get('sub')}, " + "no filter applied for items" + ) + return "1=1" # No filter for superusers + + # Allowed to access items in specified collections + permitted_collections = set(jwt_payload.get(self.collections_claim, [])) + # Allowed to access items in public collections + permitted_collections.update(await self._get_public_collections_ids()) + return " OR ".join( f"collection = '{collection_id}'" for collection_id in permitted_collections ) From 1c897edc0c1cb68391a663b79850e17f0f1bd29a Mon Sep 17 00:00:00 2001 From: Anthony Lukach Date: Fri, 28 Nov 2025 09:56:29 -0800 Subject: [PATCH 3/8] Add tooling to mount filters into runtime --- .../staging/applications/montandon-eoapi.yaml | 10 ++++++ .../montandon-eoapi/kustomization.yaml | 13 ++++++++ .../stac-auth-proxy-filters/__init__.py | 0 .../montandon_filters.py} | 32 ++++++++++++------- 4 files changed, 44 insertions(+), 11 deletions(-) create mode 100644 applications/argocd/staging/applications/montandon-eoapi/kustomization.yaml create mode 100644 applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy-filters/__init__.py rename applications/argocd/staging/applications/montandon-eoapi/{cql2_filters.py => stac-auth-proxy-filters/montandon_filters.py} (80%) diff --git a/applications/argocd/staging/applications/montandon-eoapi.yaml b/applications/argocd/staging/applications/montandon-eoapi.yaml index 2112f4f..3610199 100644 --- a/applications/argocd/staging/applications/montandon-eoapi.yaml +++ b/applications/argocd/staging/applications/montandon-eoapi.yaml @@ -142,6 +142,8 @@ spec: # UPSTREAM_URL: "https://montandon-eoapi-stage.ifrc.org/stac" OIDC_DISCOVERY_URL: "https://goadmin-stage.ifrc.org/o/.well-known/openid-configuration" OVERRIDE_HOST: "0" + COLLECTIONS_FILTER_CLS: stac_auth_proxy.contrib.montandon_filters:CollectionsFilter + ITEMS_FILTER_CLS: stac_auth_proxy.contrib.montandon_filters:ItemsFilter ingress: enabled: "true" host: "stac-test.whydidweevendothis.com" @@ -152,6 +154,14 @@ spec: enabled: "true" secretName: "stac-auth-proxy-tls-cert" replicaCount: 1 + extraVolumes: + - name: filters + configMap: + name: stac-auth-proxy-filters + extraVolumeMounts: + - name: filters + mountPath: /app/src/stac_auth_proxy/contrib + readOnly: true destination: server: https://kubernetes.default.svc namespace: montandon-eoapi diff --git a/applications/argocd/staging/applications/montandon-eoapi/kustomization.yaml b/applications/argocd/staging/applications/montandon-eoapi/kustomization.yaml new file mode 100644 index 0000000..75a71f5 --- /dev/null +++ b/applications/argocd/staging/applications/montandon-eoapi/kustomization.yaml @@ -0,0 +1,13 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: montandon-eoapi + +resources: + - montandon-eoapi-spc.yaml + +configMapGenerator: + - name: stac-auth-proxy-filters + files: + - __init__.py=stac-auth-proxy-filters/__init__.py + - montandon_filters.py=stac-auth-proxy-filters/montandon_filters.py diff --git a/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy-filters/__init__.py b/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy-filters/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/applications/argocd/staging/applications/montandon-eoapi/cql2_filters.py b/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy-filters/montandon_filters.py similarity index 80% rename from applications/argocd/staging/applications/montandon-eoapi/cql2_filters.py rename to applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy-filters/montandon_filters.py index a1d1a82..0edf017 100644 --- a/applications/argocd/staging/applications/montandon-eoapi/cql2_filters.py +++ b/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy-filters/montandon_filters.py @@ -21,10 +21,16 @@ class CollectionsFilter: collections_claim: str = "collections" # JWT claim with allowed collection IDs admin_claim: str = "superuser" # JWT claim indicating superuser status - public_collections_filter: str = "private IS NULL OR private = false" + public_collections_filter: str = "(private IS NULL OR private = false)" async def __call__(self, context: dict[str, Any]) -> str: - jwt_payload = context.get("payload", {}) + jwt_payload: Optional[dict[str, Any]] = context.get("payload") + + # Anonymous: only public collections + if not jwt_payload: + return self.public_collections_filter + + # Superuser: no filter if jwt_payload.get(self.admin_claim): logger.info( f"Superuser detected for sub {jwt_payload.get('sub')}, " @@ -32,7 +38,7 @@ async def __call__(self, context: dict[str, Any]) -> str: ) return "1=1" # No filter for superusers - # Allowed to access collections in specified collections + # Authenticated user: Allowed to access collections mentioned in JWT permitted_collections = jwt_payload.get(self.collections_claim, []) return " OR ".join( [ @@ -52,7 +58,7 @@ class ItemsFilter: collections_claim: str = "collections" # JWT claim with allowed collection IDs admin_claim: str = "superuser" # JWT claim indicating superuser status - public_collections_filter: str = "private IS NULL OR private = false" + public_collections_filter: str = "(private IS NULL OR private = false)" cache_ttl: int = 30 # TTL for caching public collections, in seconds _client: httpx.AsyncClient = dataclasses.field( @@ -115,18 +121,22 @@ async def _get_public_collections_ids(self) -> list[str]: return ids async def __call__(self, context: dict[str, Any]) -> str: - jwt_payload = context.get("payload", {}) - if jwt_payload.get(self.admin_claim): + jwt_payload: Optional[dict[str, Any]] = context.get("payload") + + # Superuser: no filter + if jwt_payload and jwt_payload.get(self.admin_claim): logger.info( f"Superuser detected for sub {jwt_payload.get('sub')}, " "no filter applied for items" ) - return "1=1" # No filter for superusers + return "1=1" + + # Everyone: Allowed access to items in public collections + permitted_collections = set(await self._get_public_collections_ids()) - # Allowed to access items in specified collections - permitted_collections = set(jwt_payload.get(self.collections_claim, [])) - # Allowed to access items in public collections - permitted_collections.update(await self._get_public_collections_ids()) + # Authenticated user: Allowed to access items in collections mentioned in JWT + if jwt_payload: + permitted_collections.update(jwt_payload.get(self.collections_claim, [])) return " OR ".join( f"collection = '{collection_id}'" for collection_id in permitted_collections From 667f5ba614da9547d81f1a373db7bd3e5bdf9a22 Mon Sep 17 00:00:00 2001 From: Anthony Lukach Date: Fri, 28 Nov 2025 10:09:04 -0800 Subject: [PATCH 4/8] Use single file rather than contrib submodule --- .../argocd/staging/applications/montandon-eoapi.yaml | 7 ++++--- .../applications/montandon-eoapi/kustomization.yaml | 3 +-- .../montandon-eoapi/stac-auth-proxy-filters/__init__.py | 0 .../montandon_filters.py | 0 4 files changed, 5 insertions(+), 5 deletions(-) delete mode 100644 applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy-filters/__init__.py rename applications/argocd/staging/applications/montandon-eoapi/{stac-auth-proxy-filters => stac-auth-proxy}/montandon_filters.py (100%) diff --git a/applications/argocd/staging/applications/montandon-eoapi.yaml b/applications/argocd/staging/applications/montandon-eoapi.yaml index 3610199..97756c0 100644 --- a/applications/argocd/staging/applications/montandon-eoapi.yaml +++ b/applications/argocd/staging/applications/montandon-eoapi.yaml @@ -142,8 +142,8 @@ spec: # UPSTREAM_URL: "https://montandon-eoapi-stage.ifrc.org/stac" OIDC_DISCOVERY_URL: "https://goadmin-stage.ifrc.org/o/.well-known/openid-configuration" OVERRIDE_HOST: "0" - COLLECTIONS_FILTER_CLS: stac_auth_proxy.contrib.montandon_filters:CollectionsFilter - ITEMS_FILTER_CLS: stac_auth_proxy.contrib.montandon_filters:ItemsFilter + COLLECTIONS_FILTER_CLS: stac_auth_proxy.montandon_filters:CollectionsFilter + ITEMS_FILTER_CLS: stac_auth_proxy.montandon_filters:ItemsFilter ingress: enabled: "true" host: "stac-test.whydidweevendothis.com" @@ -160,7 +160,8 @@ spec: name: stac-auth-proxy-filters extraVolumeMounts: - name: filters - mountPath: /app/src/stac_auth_proxy/contrib + mountPath: /app/src/stac_auth_proxy/montandon_filters.py + subPath: montandon_filters.py readOnly: true destination: server: https://kubernetes.default.svc diff --git a/applications/argocd/staging/applications/montandon-eoapi/kustomization.yaml b/applications/argocd/staging/applications/montandon-eoapi/kustomization.yaml index 75a71f5..131245e 100644 --- a/applications/argocd/staging/applications/montandon-eoapi/kustomization.yaml +++ b/applications/argocd/staging/applications/montandon-eoapi/kustomization.yaml @@ -9,5 +9,4 @@ resources: configMapGenerator: - name: stac-auth-proxy-filters files: - - __init__.py=stac-auth-proxy-filters/__init__.py - - montandon_filters.py=stac-auth-proxy-filters/montandon_filters.py + - montandon_filters.py=stac-auth-proxy/montandon_filters.py diff --git a/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy-filters/__init__.py b/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy-filters/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy-filters/montandon_filters.py b/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy/montandon_filters.py similarity index 100% rename from applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy-filters/montandon_filters.py rename to applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy/montandon_filters.py From d145fd6098d575ab63e82da5aaeeccd9872a4f1e Mon Sep 17 00:00:00 2001 From: Anthony Lukach Date: Fri, 28 Nov 2025 22:38:53 -0800 Subject: [PATCH 5/8] Harden code --- .../stac-auth-proxy/montandon_filters.py | 121 ++++++++++++------ 1 file changed, 82 insertions(+), 39 deletions(-) diff --git a/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy/montandon_filters.py b/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy/montandon_filters.py index 0edf017..d3f943f 100644 --- a/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy/montandon_filters.py +++ b/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy/montandon_filters.py @@ -1,17 +1,44 @@ """ -Logic for generating CQL2 filters based on JWT. +CQL2 filter factories. + +These classes will be initialized at the startup of the STAC Auth Proxy service and will +be called for each request to collections/items endpoints in order to generate CQL2 +filters based on the JWT permissions. + +docs: https://developmentseed.org/stac-auth-proxy/user-guide/record-level-auth/ """ +import asyncio import dataclasses import os import time import logging -from typing import Any, Optional +from typing import Any, Literal, Optional, Sequence import httpx logger = logging.getLogger(__name__) +if not (UPSTREAM_URL := os.environ.get("UPSTREAM_URL")) + raise ValueError("Failed to retrieve upstream URL") + + +def cql2_in_query( + variable: Literal["collection", "id"], collection_ids: Sequence[str] +) -> str: + """ + Generate CQL2 query to see if value of variable matches any element of sequence of + strings. Due to CQL2 syntax ambiguities around single element arrays with the "in" + operator, we use a direct comparison when there's only one permitted collection. + """ + if not collection_ids: + return "1=0" + + if len(collection_ids) == 1: + return f"{variable} = " + repr(list(collection_ids)[0]) + + return f"{variable} IN ({','.join(repr(c_id) for c_id in collection_ids)})" + @dataclasses.dataclass class CollectionsFilter: @@ -32,7 +59,7 @@ async def __call__(self, context: dict[str, Any]) -> str: # Superuser: no filter if jwt_payload.get(self.admin_claim): - logger.info( + logger.debug( f"Superuser detected for sub {jwt_payload.get('sub')}, " "no filter applied for collections" ) @@ -42,10 +69,8 @@ async def __call__(self, context: dict[str, Any]) -> str: permitted_collections = jwt_payload.get(self.collections_claim, []) return " OR ".join( [ - # Include public collections self.public_collections_filter, - # Include permitted collections - *[f"id = '{collection_id}'" for collection_id in permitted_collections], + cql2_in_query("id", permitted_collections), ] ) @@ -64,12 +89,15 @@ class ItemsFilter: _client: httpx.AsyncClient = dataclasses.field( init=False, repr=False, - default_factory=lambda: httpx.AsyncClient(base_url=os.environ["UPSTREAM_URL"]), + default_factory=lambda: httpx.AsyncClient(base_url=UPSTREAM_URL), ) _public_collections_cache: Optional[list[str]] = dataclasses.field( init=False, default=None, repr=False ) _cache_expiry: float = dataclasses.field(init=False, default=0, repr=False) + _cache_lock: asyncio.Lock = dataclasses.field( + init=False, repr=False, default_factory=asyncio.Lock + ) @property def _cached_public_collections(self) -> Optional[list[str]]: @@ -87,57 +115,72 @@ def _cached_public_collections(self, value: list[str]) -> None: async def _get_public_collections_ids(self) -> list[str]: """ Retrieve IDs of public collections from the upstream API. + Uses a lock to prevent concurrent requests from fetching the same data. """ - # Return cached value if still valid + # Return cached value if still valid (fast path without lock) if (cached := self._cached_public_collections) is not None: logger.debug("Using cached public collections") return cached - logger.info("Fetching public collections from upstream API") - - # First request uses params dict - url: Optional[str] = "/collections" - params: Optional[dict[str, Any]] = { - "filter": self.public_collections_filter, - "limit": 100, - } - - ids = [] - while url: - response = await self._client.get(url, params=params) - response.raise_for_status() - data = response.json() - ids.extend(collection["id"] for collection in data["collections"]) - - # Subsequent requests use the "next" link URL directly (already has params) - url = next( - (link["href"] for link in data["links"] if link["rel"] == "next"), - None, - ) - params = None # Clear params after first request - - # Update cache - self._cached_public_collections = ids - return ids + # Acquire lock to prevent concurrent fetches + async with self._cache_lock: + # Double-check cache after acquiring lock + # Another coroutine might have populated it while we waited + if (cached := self._cached_public_collections) is not None: + logger.debug("Using cached public collections (after lock)") + return cached + + logger.debug("Fetching public collections from upstream API") + + # First request uses params dict + url: Optional[str] = "/collections" + params: Optional[dict[str, Any]] = { + "filter": self.public_collections_filter, + "limit": 100, + } + + ids = [] + while url: + try: + response = await self._client.get(url, params=params) + response.raise_for_status() + data = response.json() + except httpx.HTTPError: + logger.exception(f"Failed to fetch {url!r}.") + raise + ids.extend(collection["id"] for collection in data["collections"]) + + # Subsequent requests use the "next" link URL directly (already has params) + url = next( + (link["href"] for link in data["links"] if link["rel"] == "next"), + None, + ) + params = None # Clear params after first request + + # Update cache + self._cached_public_collections = ids + return ids async def __call__(self, context: dict[str, Any]) -> str: jwt_payload: Optional[dict[str, Any]] = context.get("payload") # Superuser: no filter if jwt_payload and jwt_payload.get(self.admin_claim): - logger.info( + logger.debug( f"Superuser detected for sub {jwt_payload.get('sub')}, " "no filter applied for items" ) return "1=1" # Everyone: Allowed access to items in public collections - permitted_collections = set(await self._get_public_collections_ids()) + try: + permitted_collections = set(await self._get_public_collections_ids()) + except httpx.HTTPError: + logger.warning("Failed to fetch public collections.") + permitted_collections = set() # Authenticated user: Allowed to access items in collections mentioned in JWT if jwt_payload: permitted_collections.update(jwt_payload.get(self.collections_claim, [])) - return " OR ".join( - f"collection = '{collection_id}'" for collection_id in permitted_collections - ) + return cql2_in_query("collection", permitted_collections) From 261fab9a4b1df926e608acea027b31233c8e758b Mon Sep 17 00:00:00 2001 From: Anthony Lukach Date: Mon, 1 Dec 2025 12:39:53 -0800 Subject: [PATCH 6/8] require superuser claim to be 'true' --- .../montandon-eoapi/stac-auth-proxy/montandon_filters.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy/montandon_filters.py b/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy/montandon_filters.py index d3f943f..405c6c4 100644 --- a/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy/montandon_filters.py +++ b/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy/montandon_filters.py @@ -58,7 +58,7 @@ async def __call__(self, context: dict[str, Any]) -> str: return self.public_collections_filter # Superuser: no filter - if jwt_payload.get(self.admin_claim): + if jwt_payload.get(self.admin_claim) == 'true': logger.debug( f"Superuser detected for sub {jwt_payload.get('sub')}, " "no filter applied for collections" @@ -165,7 +165,7 @@ async def __call__(self, context: dict[str, Any]) -> str: jwt_payload: Optional[dict[str, Any]] = context.get("payload") # Superuser: no filter - if jwt_payload and jwt_payload.get(self.admin_claim): + if jwt_payload and jwt_payload.get(self.admin_claim) == 'true': logger.debug( f"Superuser detected for sub {jwt_payload.get('sub')}, " "no filter applied for items" From f7a6b74800279b9fc0553eb2137957bfd3047567 Mon Sep 17 00:00:00 2001 From: Anthony Lukach Date: Mon, 1 Dec 2025 20:56:44 -0800 Subject: [PATCH 7/8] fix missing semicolon --- .../montandon-eoapi/stac-auth-proxy/montandon_filters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy/montandon_filters.py b/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy/montandon_filters.py index 405c6c4..16766f7 100644 --- a/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy/montandon_filters.py +++ b/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy/montandon_filters.py @@ -19,7 +19,7 @@ logger = logging.getLogger(__name__) -if not (UPSTREAM_URL := os.environ.get("UPSTREAM_URL")) +if not (UPSTREAM_URL := os.environ.get("UPSTREAM_URL")): raise ValueError("Failed to retrieve upstream URL") From dd210d9d4255928ed1ff3b99fefc7765b8c54bea Mon Sep 17 00:00:00 2001 From: Anthony Lukach Date: Mon, 1 Dec 2025 20:57:04 -0800 Subject: [PATCH 8/8] ensure anonymous access is prevented --- .../stac-auth-proxy/montandon_filters.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy/montandon_filters.py b/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy/montandon_filters.py index 16766f7..46000e2 100644 --- a/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy/montandon_filters.py +++ b/applications/argocd/staging/applications/montandon-eoapi/stac-auth-proxy/montandon_filters.py @@ -53,12 +53,13 @@ class CollectionsFilter: async def __call__(self, context: dict[str, Any]) -> str: jwt_payload: Optional[dict[str, Any]] = context.get("payload") - # Anonymous: only public collections + # Anonymous: no data if not jwt_payload: - return self.public_collections_filter + logger.debug("Anonymous user, no collections permitted to be viewed") + return "1=0" - # Superuser: no filter - if jwt_payload.get(self.admin_claim) == 'true': + # Superuser: all data + if jwt_payload.get(self.admin_claim) == "true": logger.debug( f"Superuser detected for sub {jwt_payload.get('sub')}, " "no filter applied for collections" @@ -164,8 +165,13 @@ async def _get_public_collections_ids(self) -> list[str]: async def __call__(self, context: dict[str, Any]) -> str: jwt_payload: Optional[dict[str, Any]] = context.get("payload") - # Superuser: no filter - if jwt_payload and jwt_payload.get(self.admin_claim) == 'true': + # Anonymous: no data + if not jwt_payload: + logger.debug("Anonymous user, no items permitted to be viewed") + return "1=0" + + # Superuser: all data + if jwt_payload.get(self.admin_claim) == "true": logger.debug( f"Superuser detected for sub {jwt_payload.get('sub')}, " "no filter applied for items"