Skip to content

Commit e3b0635

Browse files
authored
revert shortcut for ids limit, add tests (#212)
1 parent 318fa2d commit e3b0635

File tree

8 files changed

+204
-29
lines changed

8 files changed

+204
-29
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
66

77
## [unreleased]
88

9+
### Fixed
10+
11+
- Revert an optimisation which limited the number of results from a search query to the number of item IDs specified in the query.
12+
This fixes an issue where items with the same ID that are in multiple collections could be left out of search results.
13+
914
### Changed
1015

1116
- update `pydantic` requirement to `~=2.0`
@@ -16,6 +21,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
1621
- Add private jsonb column to items and collections table to hold private metadata that should not be returned as part of a stac item
1722
- Add generated columns to collections with the bounding box as a geometry and the datetime and end_datetime from the extents (this is to help with forthcoming work on collections search)
1823
- Add PLRust to the Docker postgres image for forthcoming work to add optional PLRust functions for expensive json manipulation (including hydration)
24+
- Remove default queryable for eo:cloud_cover
1925

2026
## [v0.7.10]
2127

docker/pypgstac/bin/test

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,15 @@ ALTER DATABASE pgstac_test_db_template SET CLIENT_MIN_MESSAGES TO WARNING;
2626
ALTER DATABASE pgstac_test_db_template SET SEARCH_PATH to pgstac, public;
2727
\connect pgstac_test_db_template;
2828
\i pgstac.sql
29+
DO \$\$
30+
BEGIN
31+
INSERT INTO queryables (name, definition, property_wrapper, property_index_type) VALUES
32+
('eo:cloud_cover','{"$ref": "https://stac-extensions.github.io/eo/v1.0.0/schema.json#/definitions/fieldsproperties/eo:cloud_cover"}','to_int','BTREE')
33+
ON CONFLICT DO NOTHING;
34+
EXCEPTION WHEN unique_violation THEN
35+
RAISE NOTICE '%', SQLERRM USING ERRCODE = SQLSTATE;
36+
END
37+
\$\$;
2938
EOSQL
3039
}
3140

@@ -61,6 +70,7 @@ psql -X -q -v ON_ERROR_STOP=1 <<EOSQL
6170
DROP DATABASE IF EXISTS pgstac_test_pgtap WITH (force);
6271
CREATE DATABASE pgstac_test_pgtap TEMPLATE $TEMPLATEDB;
6372
ALTER DATABASE pgstac_test_pgtap SET client_min_messages to $CLIENTMESSAGES;
73+
6474
EOSQL
6575
TESTOUTPUT=$(psql -X -q -v ON_ERROR_STOP=1 -f $PGSTACDIR/tests/pgtap.sql pgstac_test_pgtap)
6676
psql -X -q -v ON_ERROR_STOP=1 <<EOSQL

src/pgstac/migrations/pgstac.0.7.10-unreleased.sql

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,165 @@ AS $function$
257257
$function$
258258
;
259259

260+
CREATE OR REPLACE FUNCTION pgstac.search(_search jsonb DEFAULT '{}'::jsonb)
261+
RETURNS jsonb
262+
LANGUAGE plpgsql
263+
AS $function$
264+
DECLARE
265+
searches searches%ROWTYPE;
266+
_where text;
267+
orderby text;
268+
search_where search_wheres%ROWTYPE;
269+
total_count bigint;
270+
token record;
271+
token_prev boolean;
272+
token_item items%ROWTYPE;
273+
token_where text;
274+
full_where text;
275+
init_ts timestamptz := clock_timestamp();
276+
timer timestamptz := clock_timestamp();
277+
hydrate bool := NOT (_search->'conf'->>'nohydrate' IS NOT NULL AND (_search->'conf'->>'nohydrate')::boolean = true);
278+
prev text;
279+
next text;
280+
context jsonb;
281+
collection jsonb;
282+
out_records jsonb;
283+
out_len int;
284+
_limit int := coalesce((_search->>'limit')::int, 10);
285+
_querylimit int;
286+
_fields jsonb := coalesce(_search->'fields', '{}'::jsonb);
287+
has_prev boolean := FALSE;
288+
has_next boolean := FALSE;
289+
BEGIN
290+
searches := search_query(_search);
291+
_where := searches._where;
292+
orderby := searches.orderby;
293+
search_where := where_stats(_where);
294+
total_count := coalesce(search_where.total_count, search_where.estimated_count);
295+
RAISE NOTICE 'SEARCH:TOKEN: %', _search->>'token';
296+
token := get_token_record(_search->>'token');
297+
RAISE NOTICE '***TOKEN: %', token;
298+
_querylimit := _limit + 1;
299+
IF token IS NOT NULL THEN
300+
token_prev := token.prev;
301+
token_item := token.item;
302+
token_where := get_token_filter(_search->'sortby', token_item, token_prev, FALSE);
303+
RAISE LOG 'TOKEN_WHERE: % (%ms from search start)', token_where, age_ms(timer);
304+
IF token_prev THEN -- if we are using a prev token, we know has_next is true
305+
RAISE LOG 'There is a previous token, so automatically setting has_next to true';
306+
has_next := TRUE;
307+
orderby := sort_sqlorderby(_search, TRUE);
308+
ELSE
309+
RAISE LOG 'There is a next token, so automatically setting has_prev to true';
310+
has_prev := TRUE;
311+
312+
END IF;
313+
ELSE -- if there was no token, we know there is no prev
314+
RAISE LOG 'There is no token, so we know there is no prev. setting has_prev to false';
315+
has_prev := FALSE;
316+
END IF;
317+
318+
full_where := concat_ws(' AND ', _where, token_where);
319+
RAISE NOTICE 'FULL WHERE CLAUSE: %', full_where;
320+
RAISE NOTICE 'Time to get counts and build query %', age_ms(timer);
321+
timer := clock_timestamp();
322+
323+
IF hydrate THEN
324+
RAISE NOTICE 'Getting hydrated data.';
325+
ELSE
326+
RAISE NOTICE 'Getting non-hydrated data.';
327+
END IF;
328+
RAISE NOTICE 'CACHE SET TO %', get_setting_bool('format_cache');
329+
RAISE NOTICE 'Time to set hydration/formatting %', age_ms(timer);
330+
timer := clock_timestamp();
331+
SELECT jsonb_agg(format_item(i, _fields, hydrate)) INTO out_records
332+
FROM search_rows(
333+
full_where,
334+
orderby,
335+
search_where.partitions,
336+
_querylimit
337+
) as i;
338+
339+
RAISE NOTICE 'Time to fetch rows %', age_ms(timer);
340+
timer := clock_timestamp();
341+
342+
343+
IF token_prev THEN
344+
out_records := flip_jsonb_array(out_records);
345+
END IF;
346+
347+
RAISE NOTICE 'Query returned % records.', jsonb_array_length(out_records);
348+
RAISE LOG 'TOKEN: % %', token_item.id, token_item.collection;
349+
RAISE LOG 'RECORD_1: % %', out_records->0->>'id', out_records->0->>'collection';
350+
RAISE LOG 'RECORD-1: % %', out_records->-1->>'id', out_records->-1->>'collection';
351+
352+
-- REMOVE records that were from our token
353+
IF out_records->0->>'id' = token_item.id AND out_records->0->>'collection' = token_item.collection THEN
354+
out_records := out_records - 0;
355+
ELSIF out_records->-1->>'id' = token_item.id AND out_records->-1->>'collection' = token_item.collection THEN
356+
out_records := out_records - -1;
357+
END IF;
358+
359+
out_len := jsonb_array_length(out_records);
360+
361+
IF out_len = _limit + 1 THEN
362+
IF token_prev THEN
363+
has_prev := TRUE;
364+
out_records := out_records - 0;
365+
ELSE
366+
has_next := TRUE;
367+
out_records := out_records - -1;
368+
END IF;
369+
END IF;
370+
371+
IF has_next THEN
372+
next := concat(out_records->-1->>'collection', ':', out_records->-1->>'id');
373+
RAISE NOTICE 'HAS NEXT | %', next;
374+
END IF;
375+
376+
IF has_prev THEN
377+
prev := concat(out_records->0->>'collection', ':', out_records->0->>'id');
378+
RAISE NOTICE 'HAS PREV | %', prev;
379+
END IF;
380+
381+
RAISE NOTICE 'Time to get prev/next %', age_ms(timer);
382+
timer := clock_timestamp();
383+
384+
IF context(_search->'conf') != 'off' THEN
385+
context := jsonb_strip_nulls(jsonb_build_object(
386+
'limit', _limit,
387+
'matched', total_count,
388+
'returned', coalesce(jsonb_array_length(out_records), 0)
389+
));
390+
ELSE
391+
context := jsonb_strip_nulls(jsonb_build_object(
392+
'limit', _limit,
393+
'returned', coalesce(jsonb_array_length(out_records), 0)
394+
));
395+
END IF;
396+
397+
collection := jsonb_build_object(
398+
'type', 'FeatureCollection',
399+
'features', coalesce(out_records, '[]'::jsonb),
400+
'next', next,
401+
'prev', prev,
402+
'context', context
403+
);
404+
405+
IF get_setting_bool('timing', _search->'conf') THEN
406+
collection = collection || jsonb_build_object('timing', age_ms(init_ts));
407+
END IF;
408+
409+
RAISE NOTICE 'Time to build final json %', age_ms(timer);
410+
timer := clock_timestamp();
411+
412+
RAISE NOTICE 'Total Time: %', age_ms(current_timestamp);
413+
RAISE NOTICE 'RETURNING % records. NEXT: %. PREV: %', collection->'context'->>'returned', collection->>'next', collection->>'prev';
414+
RETURN collection;
415+
END;
416+
$function$
417+
;
418+
260419

261420
-- END migra calculated SQL
262421
DO $$

src/pgstac/migrations/pgstac.unreleased.sql

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3596,22 +3596,11 @@ DECLARE
35963596
out_records jsonb;
35973597
out_len int;
35983598
_limit int := coalesce((_search->>'limit')::int, 10);
3599-
init_limit int := _limit;
36003599
_querylimit int;
36013600
_fields jsonb := coalesce(_search->'fields', '{}'::jsonb);
36023601
has_prev boolean := FALSE;
36033602
has_next boolean := FALSE;
3604-
items_cnt int := coalesce(jsonb_array_length(_search->'ids'),0);
36053603
BEGIN
3606-
RAISE NOTICE 'Items Count: %', items_cnt;
3607-
IF items_cnt > 0 THEN
3608-
IF items_cnt <= _limit THEN
3609-
_limit := items_cnt - 1;
3610-
ELSE
3611-
_limit := items_cnt;
3612-
END IF;
3613-
RAISE NOTICE 'Items is set. Changing limit to %', items_cnt;
3614-
END IF;
36153604
searches := search_query(_search);
36163605
_where := searches._where;
36173606
orderby := searches.orderby;
@@ -3683,7 +3672,7 @@ BEGIN
36833672

36843673
out_len := jsonb_array_length(out_records);
36853674

3686-
IF out_len = init_limit + 1 THEN
3675+
IF out_len = _limit + 1 THEN
36873676
IF token_prev THEN
36883677
has_prev := TRUE;
36893678
out_records := out_records - 0;
@@ -3708,13 +3697,13 @@ BEGIN
37083697

37093698
IF context(_search->'conf') != 'off' THEN
37103699
context := jsonb_strip_nulls(jsonb_build_object(
3711-
'limit', init_limit,
3700+
'limit', _limit,
37123701
'matched', total_count,
37133702
'returned', coalesce(jsonb_array_length(out_records), 0)
37143703
));
37153704
ELSE
37163705
context := jsonb_strip_nulls(jsonb_build_object(
3717-
'limit', init_limit,
3706+
'limit', _limit,
37183707
'returned', coalesce(jsonb_array_length(out_records), 0)
37193708
));
37203709
END IF;

src/pgstac/sql/004_search.sql

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -779,22 +779,11 @@ DECLARE
779779
out_records jsonb;
780780
out_len int;
781781
_limit int := coalesce((_search->>'limit')::int, 10);
782-
init_limit int := _limit;
783782
_querylimit int;
784783
_fields jsonb := coalesce(_search->'fields', '{}'::jsonb);
785784
has_prev boolean := FALSE;
786785
has_next boolean := FALSE;
787-
items_cnt int := coalesce(jsonb_array_length(_search->'ids'),0);
788786
BEGIN
789-
RAISE NOTICE 'Items Count: %', items_cnt;
790-
IF items_cnt > 0 THEN
791-
IF items_cnt <= _limit THEN
792-
_limit := items_cnt - 1;
793-
ELSE
794-
_limit := items_cnt;
795-
END IF;
796-
RAISE NOTICE 'Items is set. Changing limit to %', items_cnt;
797-
END IF;
798787
searches := search_query(_search);
799788
_where := searches._where;
800789
orderby := searches.orderby;
@@ -866,7 +855,7 @@ BEGIN
866855

867856
out_len := jsonb_array_length(out_records);
868857

869-
IF out_len = init_limit + 1 THEN
858+
IF out_len = _limit + 1 THEN
870859
IF token_prev THEN
871860
has_prev := TRUE;
872861
out_records := out_records - 0;
@@ -891,13 +880,13 @@ BEGIN
891880

892881
IF context(_search->'conf') != 'off' THEN
893882
context := jsonb_strip_nulls(jsonb_build_object(
894-
'limit', init_limit,
883+
'limit', _limit,
895884
'matched', total_count,
896885
'returned', coalesce(jsonb_array_length(out_records), 0)
897886
));
898887
ELSE
899888
context := jsonb_strip_nulls(jsonb_build_object(
900-
'limit', init_limit,
889+
'limit', _limit,
901890
'returned', coalesce(jsonb_array_length(out_records), 0)
902891
));
903892
END IF;

src/pgstac/tests/pgtap.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ CREATE EXTENSION IF NOT EXISTS pgtap;
1717
SET SEARCH_PATH TO pgstac, pgtap, public;
1818

1919
-- Plan the tests.
20-
SELECT plan(191);
20+
SELECT plan(194);
2121
--SELECT * FROM no_plan();
2222

2323
-- Run the tests.

src/pgstac/tests/pgtap/004_search.sql

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,3 +650,23 @@ SELECT * FROM pg_temp.testpaging('asc','asc');
650650
SELECT * FROM pg_temp.testpaging('asc','desc');
651651
SELECT * FROM pg_temp.testpaging('desc','desc');
652652
SELECT * FROM pg_temp.testpaging('desc','asc');
653+
654+
\copy items_staging (content) FROM 'tests/testdata/items_duplicate_ids.ndjson'
655+
656+
SELECT is(
657+
(SELECT jsonb_array_length(search('{"ids": ["pgstac-test-item-duplicated"]}')->'features')),
658+
'2',
659+
'Make sure all matching items are returned when items with the same ID are in multiple collections, no collections specified. #192'
660+
);
661+
662+
SELECT is(
663+
(SELECT jsonb_array_length(search('{"ids": ["pgstac-test-item-duplicated"], "collections": ["pgstac-test-collection"]}')->'features')),
664+
'1',
665+
'Make sure all matching items are returned when items with the same ID are in multiple collections, some collections specified. #192'
666+
);
667+
668+
SELECT is(
669+
(SELECT jsonb_array_length(search('{"ids": ["pgstac-test-item-duplicated"], "collections": ["pgstac-test-collection", "pgstac-test-collection2"]}')->'features')),
670+
'2',
671+
'Make sure all matching items are returned when items with the same ID are in multiple collections, all collections specified. #192'
672+
);
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
{"id": "pgstac-test-item-duplicated", "bbox": [-87.816179, 30.496894, -87.74637, 30.565604], "type": "Feature", "links": [], "assets": {"image": {"href": "https://naipeuwest.blob.core.windows.net/naip/v002/al/2011/al_100cm_2011/30087/m_3008726_se_16_1_20110731.tif", "type": "image/tiff; application=geotiff; profile=cloud-optimized", "roles": ["data"], "title": "RGBIR COG tile", "eo:bands": [{"name": "Red", "common_name": "red"}, {"name": "Green", "common_name": "green"}, {"name": "Blue", "common_name": "blue"}, {"name": "NIR", "common_name": "nir", "description": "near-infrared"}]}, "metadata": {"href": "https://naipeuwest.blob.core.windows.net/naip/v002/al/2011/al_fgdc_2011/30087/m_3008726_se_16_1_20110731.txt", "type": "text/plain", "roles": ["metadata"], "title": "FGDC Metdata"}, "thumbnail": {"href": "https://naipeuwest.blob.core.windows.net/naip/v002/al/2011/al_100cm_2011/30087/m_3008726_se_16_1_20110731.200.jpg", "type": "image/jpeg", "roles": ["thumbnail"], "title": "Thumbnail"}}, "geometry": {"type": "Polygon", "coordinates": [[[-87.74637, 30.497308], [-87.746892, 30.565604], [-87.816179, 30.565188], [-87.815608, 30.496894], [-87.74637, 30.497308]]]}, "collection": "pgstac-test-collection", "properties": {"gsd": 1, "datetime": "2011-07-31T00:00:00Z", "naip:year": 2013, "proj:bbox": [421730, 3374130, 428375, 3381699], "proj:epsg": 26916, "providers": [{"url": "https://www.fsa.usda.gov/programs-and-services/aerial-photography/imagery-programs/naip-imagery/", "name": "USDA Farm Service Agency", "roles": ["producer", "licensor"]}], "naip:state": "zz", "proj:shape": [7569, 6645], "eo:cloud_cover": 50, "proj:transform": [1, 0, 421730, 0, -1, 3381699, 0, 0, 1]}, "stac_version": "1.0.0-beta.2", "stac_extensions": ["eo", "projection"]}
2+
{"id": "pgstac-test-item-duplicated", "bbox": [-87.816179, 30.496894, -87.74637, 30.565604], "type": "Feature", "links": [], "assets": {"image": {"href": "https://naipeuwest.blob.core.windows.net/naip/v002/al/2011/al_100cm_2011/30087/m_3008726_se_16_1_20110731.tif", "type": "image/tiff; application=geotiff; profile=cloud-optimized", "roles": ["data"], "title": "RGBIR COG tile", "eo:bands": [{"name": "Red", "common_name": "red"}, {"name": "Green", "common_name": "green"}, {"name": "Blue", "common_name": "blue"}, {"name": "NIR", "common_name": "nir", "description": "near-infrared"}]}, "metadata": {"href": "https://naipeuwest.blob.core.windows.net/naip/v002/al/2011/al_fgdc_2011/30087/m_3008726_se_16_1_20110731.txt", "type": "text/plain", "roles": ["metadata"], "title": "FGDC Metdata"}, "thumbnail": {"href": "https://naipeuwest.blob.core.windows.net/naip/v002/al/2011/al_100cm_2011/30087/m_3008726_se_16_1_20110731.200.jpg", "type": "image/jpeg", "roles": ["thumbnail"], "title": "Thumbnail"}}, "geometry": {"type": "Polygon", "coordinates": [[[-87.74637, 30.497308], [-87.746892, 30.565604], [-87.816179, 30.565188], [-87.815608, 30.496894], [-87.74637, 30.497308]]]}, "collection": "pgstac-test-collection2", "properties": {"gsd": 1, "datetime": "2011-07-31T00:00:00Z", "naip:year": 2013, "proj:bbox": [421730, 3374130, 428375, 3381699], "proj:epsg": 26916, "providers": [{"url": "https://www.fsa.usda.gov/programs-and-services/aerial-photography/imagery-programs/naip-imagery/", "name": "USDA Farm Service Agency", "roles": ["producer", "licensor"]}], "naip:state": "zz", "proj:shape": [7569, 6645], "eo:cloud_cover": 50, "proj:transform": [1, 0, 421730, 0, -1, 3381699, 0, 0, 1]}, "stac_version": "1.0.0-beta.2", "stac_extensions": ["eo", "projection"]}

0 commit comments

Comments
 (0)