diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index e0db5c03..7448fb57 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -117,3 +117,19 @@ services: depends_on: - stac - tiler + + funcs-dev: + image: pc-apis-funcs-dev + platform: linux/amd64 + build: + context: . + dockerfile: pcfuncs/Dockerfile.dev + env_file: ${PC_FUNCS_ENV_FILE:-./pc-funcs.dev.env} + ports: + - "8083:80" + volumes: + - .:/opt/src + command: > + /bin/bash + depends_on: + - funcs diff --git a/pcfuncs/Dockerfile.dev b/pcfuncs/Dockerfile.dev new file mode 100644 index 00000000..de8460fd --- /dev/null +++ b/pcfuncs/Dockerfile.dev @@ -0,0 +1,4 @@ +FROM pc-apis-funcs + +COPY pcfuncs/requirements-dev.txt requirements-dev.txt +RUN pip install -r requirements-dev.txt diff --git a/pcfuncs/funclib/raster.py b/pcfuncs/funclib/raster.py index bfdc9607..0cdb1b7b 100644 --- a/pcfuncs/funclib/raster.py +++ b/pcfuncs/funclib/raster.py @@ -8,6 +8,7 @@ import mercantile from PIL.Image import Image as PILImage from pyproj import CRS, Transformer +from rio_tiler.models import ImageData T = TypeVar("T", bound="Raster") @@ -170,5 +171,56 @@ def mask(self, geom: Dict[str, Any]) -> "PILRaster": class GDALRaster(Raster): - # TODO: Implement - ... + def __init__(self, extent: RasterExtent, image: ImageData) -> None: + self.image = image + super().__init__(extent) + + def to_bytes(self, format: str = ExportFormats.PNG) -> io.BytesIO: + img_bytes = self.image.render( + add_mask=True, + img_format=format.upper(), + ) + return io.BytesIO(img_bytes) + + def crop(self, bbox: Bbox) -> "GDALRaster": + # Web mercator of user bbox + if ( + not bbox.crs == self.extent.bbox.crs + and bbox.crs is not None + and self.extent.bbox.crs is not None + ): + bbox = bbox.reproject(self.extent.bbox.crs) + + col_min, row_min = self.extent.map_to_grid(bbox.xmin, bbox.ymax) + col_max, row_max = self.extent.map_to_grid(bbox.xmax, bbox.ymin) + + data = self.image.data[:, row_min:row_max, col_min:col_max] + mask = self.image.mask[row_min:row_max, col_min:col_max] + cropped = ImageData( + data, + mask, + assets=self.image.assets, + crs=self.image.crs, + bounds=bbox.to_list(), + band_names=self.image.band_names, + metadata=self.image.metadata, + dataset_statistics=self.image.dataset_statistics, + ) + + return GDALRaster( + extent=RasterExtent( + bbox, + cols=col_max - col_min, + rows=row_max - row_min, + ), + image=cropped, + ) + + def resample(self, cols: int, rows: int) -> "GDALRaster": + return GDALRaster( + extent=RasterExtent(bbox=self.extent.bbox, cols=cols, rows=rows), + image=self.image.resize(rows, cols), # type: ignore + ) + + def mask(self, geom: Dict[str, Any]) -> "GDALRaster": + raise NotImplementedError("GDALRaster does not support masking") diff --git a/pcfuncs/funclib/tiles.py b/pcfuncs/funclib/tiles.py index e695b239..23da4800 100644 --- a/pcfuncs/funclib/tiles.py +++ b/pcfuncs/funclib/tiles.py @@ -3,11 +3,12 @@ import logging from abc import ABC, abstractmethod from dataclasses import dataclass -from typing import Any, Dict, Generic, List, Optional, Type, TypeVar, Union +from typing import Any, Dict, Generic, List, Optional, Tuple, Type, TypeVar, Union from urllib.parse import urlsplit import aiohttp import mercantile +import numpy from funclib.models import RenderOptions from funclib.raster import ( Bbox, @@ -20,6 +21,7 @@ from funclib.settings import BaseExporterSettings from mercantile import Tile from PIL import Image +from rio_tiler.models import ImageData from pccommon.backoff import BackoffStrategy, with_backoff_async @@ -57,6 +59,46 @@ def get_tileset_dimensions(tiles: List[Tile], tile_size: int) -> TileSetDimensio ) +def paste_into_image( + from_image: ImageData, + to_image: ImageData, + box: Optional[ + Union[ + Tuple[int, int], + Tuple[int, int, int, int], + ] + ], +) -> None: + """Paste image data from one to the other.""" + if from_image.count != to_image.count: + raise Exception("Cannot merge 2 images with different band number") + + if from_image.data.dtype != to_image.data.dtype: + raise Exception("Cannot merge 2 images with different datatype") + + # Pastes another image into this image. + # The box argument is either a 2-tuple giving the upper left corner, + # a 4-tuple defining the left, upper, right, and lower pixel coordinate, + # or None (same as (0, 0)). See Coordinate System. If a 4-tuple is given, + # the size of the pasted image must match the size of the region. + if box is None: + box = (0, 0) + + if len(box) == 2: + size = (from_image.width, from_image.height) + box += (box[0] + size[0], box[1] + size[1]) # type: ignore + minx, maxy, maxx, miny = box # type: ignore + elif len(box) == 4: + # TODO add more size tests + minx, maxy, maxx, miny = box # type: ignore + + else: + raise Exception("Invalid box format") + + to_image.data[:, maxy:miny, minx:maxx] = from_image.data + to_image.mask[maxy:miny, minx:maxx] = from_image.mask + + class TileSet(ABC, Generic[T]): def __init__( self, @@ -152,8 +194,89 @@ async def create( class GDALTileSet(TileSet[GDALRaster]): + async def _get_tile( + self, + url: str, + ) -> Union[ImageData, None]: + async def _f() -> ImageData: + async with aiohttp.ClientSession() as session: + async with self._async_limit: + # We set Accept-Encoding to make sure the response is compressed + async with session.get( + url, headers={"Accept-Encoding": "gzip"} + ) as resp: + if resp.status == 200: + return ImageData.from_bytes(await resp.read()) + + else: + raise TilerError( + f"Error downloading tile: {url}", resp=resp + ) + + try: + return await with_backoff_async( + _f, + is_throttle=lambda e: isinstance(e, TilerError), + strategy=BackoffStrategy(waits=[0.2, 0.5, 0.75, 1, 2]), + ) + except Exception: + logger.warning(f"Tile request failed with backoff: {url}") + return None + async def get_mosaic(self, tiles: List[Tile]) -> GDALRaster: - raise NotImplementedError() + tasks: List[asyncio.Future[Union[ImageData, None]]] = [] + for tile in tiles: + url = self.get_tile_url(tile.z, tile.x, tile.y) + print(f"Downloading {url}") + tasks.append(asyncio.ensure_future(self._get_tile(url))) + + tile_images: List[Union[ImageData, None]] = list(await asyncio.gather(*tasks)) + + tileset_dimensions = get_tileset_dimensions(tiles, self.tile_size) + + # By default if no tiles where return we create an + # empty mosaic with 3 bands and uint8 + count: int = 3 + dtype: str = "uint8" + for im in tile_images: + if im: + count = im.count + dtype = im.data.dtype + break # Get Count / datatype from the first valid tile_images + + mosaic = ImageData( # type: ignore + numpy.zeros( + (count, tileset_dimensions.total_rows, tileset_dimensions.total_cols), + dtype=dtype, + ) + ) + + x = 0 + y = 0 + for i, img in enumerate(tile_images): + if not img: + continue + + paste_into_image( + img, + mosaic, + (x * self.tile_size, y * self.tile_size), + ) + + # Increment the row/col position for subsequent tiles + if (i + 1) % tileset_dimensions.tile_rows == 0: + y = 0 + x += 1 + else: + y += 1 + + raster_extent = RasterExtent( + bbox=Bbox.from_tiles(tiles), + cols=tileset_dimensions.total_cols, + rows=tileset_dimensions.total_rows, + ) + + return GDALRaster(raster_extent, mosaic) class PILTileSet(TileSet[PILRaster]): diff --git a/pcfuncs/requirements-dev.txt b/pcfuncs/requirements-dev.txt new file mode 100644 index 00000000..f193b621 --- /dev/null +++ b/pcfuncs/requirements-dev.txt @@ -0,0 +1 @@ +uvicorn diff --git a/pcfuncs/requirements.txt b/pcfuncs/requirements.txt index a5650fd2..9c1172bc 100644 --- a/pcfuncs/requirements.txt +++ b/pcfuncs/requirements.txt @@ -14,6 +14,7 @@ pillow==9.3.0 pyproj==3.3.1 pydantic>=1.9,<2.0.0 rasterio==1.3.* +rio-tiler>=4.1.7 # to make sure we have ImageData.from_bytes # Deployment needs to copy the local code into # the app code directory, so requires a separate diff --git a/pcfuncs/statistics/__init__.py b/pcfuncs/statistics/__init__.py new file mode 100644 index 00000000..9fe93ce6 --- /dev/null +++ b/pcfuncs/statistics/__init__.py @@ -0,0 +1,70 @@ +import json +import logging +from typing import Dict + +import azure.functions as func +from funclib.errors import BBoxTooLargeError +from pydantic import ValidationError + +from .models import StatisticsRequest +from .settings import StatisticsSettings +from .statistics import PcMosaicImage + + +async def main(req: func.HttpRequest) -> func.HttpResponse: + try: + body = req.get_json() + except ValueError: + return func.HttpResponse( + status_code=400, + mimetype="application/text", + body="Error: Invalid JSON", + ) + + try: + parsed_request = StatisticsRequest(**body) + except ValidationError as e: + return func.HttpResponse( + status_code=400, + mimetype="application/json", + body=e.json(), + ) + + try: + response = await handle_request(parsed_request) + + return func.HttpResponse( + status_code=200, + mimetype="application/json", + body=json.dumps(response), + ) + except BBoxTooLargeError as e: + logging.exception(e) + return func.HttpResponse( + status_code=400, + mimetype="application/json", + body=json.dumps({"error": str(e)}), + ) + except Exception as e: + logging.exception(e) + return func.HttpResponse( + status_code=500, + mimetype="application/json", + ) + + +async def handle_request(req: StatisticsRequest) -> Dict: + settings = StatisticsSettings.get() + + mosaic_image = PcMosaicImage( + bbox=req.bbox, + zoom=req.zoom, + cql=req.cql, + render_options=req.get_render_options(), + settings=settings, + data_api_url_override=req.data_api_url, + ) + + img = await mosaic_image.get() + + return {k: v.dict() for k, v in img.statistics().items()} diff --git a/pcfuncs/statistics/constants.py b/pcfuncs/statistics/constants.py new file mode 100644 index 00000000..9a549b19 --- /dev/null +++ b/pcfuncs/statistics/constants.py @@ -0,0 +1,6 @@ +STATISTICS_SETTINGS_PREFIX = "STATISTICS_" + +DEFAULT_STATISTICS_CONTAINER_URL = "https://pcexplorer.blob.core.windows.net/statistics" +MAX_TILE_COUNT = 16 + +DEFAULT_CONCURRENCY = 10 diff --git a/pcfuncs/statistics/function.json b/pcfuncs/statistics/function.json new file mode 100644 index 00000000..3736a679 --- /dev/null +++ b/pcfuncs/statistics/function.json @@ -0,0 +1,17 @@ +{ + "scriptFile": "__init__.py", + "bindings": [ + { + "authLevel": "anonymous", + "type": "httpTrigger", + "direction": "in", + "name": "req", + "methods": ["post"] + }, + { + "type": "http", + "direction": "out", + "name": "$return" + } + ] +} diff --git a/pcfuncs/statistics/models.py b/pcfuncs/statistics/models.py new file mode 100644 index 00000000..8c682c6d --- /dev/null +++ b/pcfuncs/statistics/models.py @@ -0,0 +1,36 @@ +from typing import Any, Dict, List, Optional + +from funclib.models import RenderOptions +from pydantic import BaseModel, validator + + +def _get_render_options(render_params: str) -> Dict[str, List[str]]: + result: Dict[str, List[str]] = {} + for p in render_params.split("&"): + k, v = p.split("=") + if k not in result: + result[k] = [] + result[k].append(v) + return result + + +class StatisticsRequest(BaseModel): + bbox: List[float] + zoom: int + cql: Dict[str, Any] + render_params: str + + data_api_url: Optional[str] = None + """Override for the data API URL. Useful for testing.""" + + @validator("render_params") + def _validate_render_params(cls, v: str) -> str: + RenderOptions.from_query_params(v) + return v + + def get_render_options(self) -> RenderOptions: + return RenderOptions.from_query_params(self.render_params) + + def get_collection(self) -> str: + render_options = _get_render_options(self.render_params) + return render_options["collection"][0] diff --git a/pcfuncs/statistics/settings.py b/pcfuncs/statistics/settings.py new file mode 100644 index 00000000..41e52308 --- /dev/null +++ b/pcfuncs/statistics/settings.py @@ -0,0 +1,31 @@ +import logging + +from cachetools import Cache, LRUCache, cachedmethod +from funclib.settings import BaseExporterSettings + +from .constants import ( + DEFAULT_CONCURRENCY, + DEFAULT_STATISTICS_CONTAINER_URL, + STATISTICS_SETTINGS_PREFIX, +) + +logger = logging.getLogger(__name__) + + +class StatisticsSettings(BaseExporterSettings): + _cache: Cache = LRUCache(maxsize=100) + + output_storage_url: str = DEFAULT_STATISTICS_CONTAINER_URL + tile_request_concurrency: int = DEFAULT_CONCURRENCY + + class Config: + env_prefix = STATISTICS_SETTINGS_PREFIX + env_nested_delimiter = "__" + + @classmethod + @cachedmethod(lambda cls: cls._cache) + def get(cls) -> "StatisticsSettings": + settings = StatisticsSettings() # type: ignore + logger.info(f"API URL: {settings.api_root_url}") + logger.info(f"Concurrency limit: {settings.tile_request_concurrency}") + return settings diff --git a/pcfuncs/statistics/statistics.py b/pcfuncs/statistics/statistics.py new file mode 100644 index 00000000..5ad4cb94 --- /dev/null +++ b/pcfuncs/statistics/statistics.py @@ -0,0 +1,57 @@ +import asyncio +from typing import Any, Dict, List, Optional + +from funclib.errors import BBoxTooLargeError +from funclib.models import RenderOptions +from funclib.raster import Bbox +from funclib.tiles import GDALTileSet +from mercantile import Tile, tiles +from pyproj import CRS +from rio_tiler.models import ImageData + +from .constants import MAX_TILE_COUNT +from .settings import StatisticsSettings + + +class PcMosaicImage: + def __init__( + self, + bbox: List[float], + zoom: int, + cql: Dict[str, Any], + render_options: RenderOptions, + settings: StatisticsSettings, + data_api_url_override: Optional[str] = None, + ): + self.bbox = Bbox(bbox[0], bbox[1], bbox[2], bbox[3], crs=CRS.from_epsg(4326)) + self.zoom = zoom + self.cql = cql + self.render_options = render_options + self.settings = settings + self.data_api_url_override = data_api_url_override + + tiles_args: List[Any] = bbox + [zoom] + self.tiles: List[Tile] = list(tiles(*tiles_args)) + self.tile_size = 512 + + settings = StatisticsSettings.get() + self.registerUrl = f"{settings.api_root_url}/mosaic/register/" + self.async_limit = asyncio.Semaphore(settings.tile_request_concurrency) + + if len(self.tiles) > MAX_TILE_COUNT: + raise BBoxTooLargeError( + f"Export area is too large, please draw a smaller area or zoom out." + f" ({len(self.tiles)} of {MAX_TILE_COUNT} max tiles requested)" + ) + + async def get(self) -> ImageData: + tile_set = await GDALTileSet.create( + self.cql["filter"], + self.render_options, + self.settings, + self.data_api_url_override, + ) + + raster = await tile_set.get_mosaic(self.tiles) + image = raster.crop(self.bbox).image + return image diff --git a/pcfuncs/tests/data-files/cog.tif b/pcfuncs/tests/data-files/cog.tif new file mode 100644 index 00000000..54c83c75 Binary files /dev/null and b/pcfuncs/tests/data-files/cog.tif differ diff --git a/pcfuncs/tests/funclib/test_gdal_tileset.py b/pcfuncs/tests/funclib/test_gdal_tileset.py new file mode 100644 index 00000000..3a24490f --- /dev/null +++ b/pcfuncs/tests/funclib/test_gdal_tileset.py @@ -0,0 +1,141 @@ +import contextlib +import pathlib +import threading +import time +from enum import Enum +from types import DynamicClassAttribute +from typing import Dict, Generator + +import pytest +import uvicorn +from fastapi import FastAPI, Path, Query +from funclib.models import RenderOptions +from funclib.tiles import GDALTileSet +from mercantile import Tile +from rio_tiler.io import Reader +from rio_tiler.profiles import img_profiles +from starlette.responses import Response + +HERE = pathlib.Path(__file__).parent +DATA_FILES = HERE / ".." / "data-files" + +cog_file = HERE / ".." / "data-files" / "cog.tif" + + +class ImageDriver(str, Enum): + """Supported output GDAL drivers.""" + + jpg = "JPEG" + png = "PNG" + tif = "GTiff" + + +class MediaType(str, Enum): + """Responses Media types formerly known as MIME types.""" + + tif = "image/tiff; application=geotiff" + png = "image/png" + jpeg = "image/jpeg" + + +class ImageType(str, Enum): + """Available Output image type.""" + + png = "png" + tif = "tif" + jpg = "jpg" + + @DynamicClassAttribute + def profile(self) -> Dict: + """Return rio-tiler image default profile.""" + return img_profiles.get(self._name_, {}) + + @DynamicClassAttribute + def driver(self) -> str: + """Return rio-tiler image default profile.""" + return ImageDriver[self._name_].value + + @DynamicClassAttribute + def mediatype(self) -> str: + """Return image media type.""" + return MediaType[self._name_].value + + +class Server(uvicorn.Server): + """Uvicorn Server.""" + + def install_signal_handlers(self) -> None: + """install handlers.""" + pass + + @contextlib.contextmanager + def run_in_thread(self) -> Generator: + """run in thread.""" + thread = threading.Thread(target=self.run) + thread.start() + try: + while not self.started: + time.sleep(1e-3) + yield + finally: + self.should_exit = True + thread.join() + + +@pytest.fixture(scope="session") +def application() -> Generator: + """Run app in Thread.""" + app = FastAPI() + + @app.get("/{z}/{x}/{y}.{format}", response_class=Response) + def tiler( + z: int = Path(...), + x: int = Path(...), + y: int = Path(...), + format: ImageType = Path(...), + collection: str = Query(...), + tile_scale: int = Query( + 1, gt=0, lt=4, description="Tile size scale. 1=256x256, 2=512x512..." + ), + ) -> Response: + with Reader(collection) as src: + image = src.tile(x, y, z, tilesize=tile_scale * 256) + + content = image.render( + img_format=format.driver, + **format.profile, + ) + return Response(content, media_type=format.mediatype) + + config = uvicorn.Config( + app, host="127.0.0.1", port=5000, log_level="info", loop="asyncio" + ) + server = Server(config=config) + with server.run_in_thread(): + yield "http://127.0.0.1:5000" + + +async def test_app(application: str) -> None: + """Test GDAL Tileset application.""" + tileset = GDALTileSet( + f"{application}/{{z}}/{{x}}/{{y}}.tif", + RenderOptions( + collection=str(cog_file), + ), + ) + expect = f"http://127.0.0.1:5000/0/1/2.tif?collection={cog_file}&tile_scale=2" + assert tileset.get_tile_url(0, 1, 2) == expect + + # Test one Tile + url = tileset.get_tile_url(7, 44, 25) + im = await tileset._get_tile(url) + assert im + assert im.width == 512 + assert im.height == 512 + + # Test Mosaic + mosaic = await tileset.get_mosaic([Tile(44, 25, 7), Tile(45, 25, 7)]) + assert mosaic.image.width == 1024 + assert mosaic.image.height == 512 + assert mosaic.image.count == 1 # same as cog_file + assert mosaic.image.data.dtype == "uint16" # same as cog_file diff --git a/pcfuncs/tests/funclib/test_raster.py b/pcfuncs/tests/funclib/test_raster.py index 3c2d90bc..2bef3c8b 100644 --- a/pcfuncs/tests/funclib/test_raster.py +++ b/pcfuncs/tests/funclib/test_raster.py @@ -1,7 +1,8 @@ from pathlib import Path -from funclib.raster import Bbox, PILRaster, RasterExtent +from funclib.raster import Bbox, GDALRaster, PILRaster, RasterExtent from PIL import Image +from rio_tiler.models import ImageData HERE = Path(__file__).parent DATA_FILES = HERE / ".." / "data-files" @@ -35,3 +36,22 @@ def test_raster_extent_map_to_grid() -> None: assert x == 5 assert y == 5 + + +def test_rio_crop() -> None: + with open(DATA_FILES / "s2.png", "rb") as src: + img = ImageData.from_bytes(src.read()) + + raster = GDALRaster( + extent=RasterExtent( + bbox=Bbox(0, 0, 10, 10), + cols=img.width, + rows=img.height, + ), + image=img, + ) + + cropped = raster.crop(Bbox(0, 0, 5, 5)) + + assert abs(cropped.extent.cols - (img.width / 2)) < 1 + assert abs(cropped.extent.rows - (img.height / 2)) < 1 diff --git a/scripts/test b/scripts/test index e7bc3cd4..422d5d40 100755 --- a/scripts/test +++ b/scripts/test @@ -87,8 +87,9 @@ if [ "${BASH_SOURCE[0]}" = "${0}" ]; then docker-compose \ -f docker-compose.yml \ + -f docker-compose.dev.yml \ run --rm \ - funcs /bin/bash -c "cd /opt/src && scripts/bin/test-funcs" + funcs-dev /bin/bash -c "cd /opt/src && scripts/bin/test-funcs" fi fi