Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 66 additions & 1 deletion docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ There are several API endpoints:

- [`GET /`](#index): check if API is running (just returns `It works!` message)
- [`POST /datapoints`](#insert-datapoints): insert datapoints into DP³
- [`GET /entity/<entity_type>`](#list-entities): list current snapshots of all entities of given type
- ~~[`GET /entity/<entity_type>`](#list-entities): list current snapshots of all entities of given type~~
- [`GET /entity/<entity_type>/get`](#get-entities): get current snapshots of entities of entity type
- [`GET /entity/<entity_type>/count`](#count-entities): get total document count for query of entity type
- [`GET /entity/<entity_type>/<entity_id>`](#get-eid-data): get data of entity with given entity id
- [`GET /entity/<entity_type>/<entity_id>/get/<attr_id>`](#get-attr-value): get attribute value
- [`GET /entity/<entity_type>/<entity_id>/set/<attr_id>`](#set-attr-value): set attribute value
Expand Down Expand Up @@ -198,8 +200,46 @@ v -> some_embedded_dict_field

## List entities

!!! warning "Deprecated"

This endpoint is deprecated and will be removed in the future,
Use [`GET /entity/<entity_type>/get`](#get-entities) to get paged documents and
[`GET /entity/<entity_type>/count`](#count-entities) to get total document count for query.

List latest snapshots of all ids present in database under entity type,
filtered by `generic_filter` and `fulltext_filters`.
Contains only the latest snapshot per entity.

Counts all results for given query.

### Request

`GET /entity/<entity_type>`

**Optional query parameters:**

- skip: how many entities to skip (default: 0)
- limit: how many entities to return (default: 20)
- fulltext_filters: dictionary of fulltext filters (default: no filters)
- generic_filter: dictionary of generic filters (default: no filters)

### Response

```json
{
"time_created": "2023-07-04T12:10:38.827Z",
"data": [
{}
]
}
```

---

## Get entities

Get a list of latest snapshots of all ids present in database under entity type,
filtered by `generic_filter` and `fulltext_filters`.
Contains only the latest snapshot per entity.

Uses pagination, default limit is 20, setting to 0 will return all results.
Expand Down Expand Up @@ -244,6 +284,31 @@ Generic and fulltext filters are merged - fulltext overrides conflicting keys.

---

## Count entities

Count latest snapshots of all ids present in database under entity type,
filtered by `generic_filter` and `fulltext_filters`.
See [`GET /entity/<entity_type>/get`](#get-entities) for details on filter format.

### Request

`GET /entity/<entity_type>/count`

**Optional query parameters:**

- fulltext_filters: dictionary of fulltext filters (default: no filters)
- generic_filter: dictionary of generic filters (default: no filters)

### Response

```json
{
"total_count": 0
}
```

---

## Get Eid data

Get data of entity type's eid.
Expand Down
6 changes: 6 additions & 0 deletions dp3/api/internal/entity_response_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ class EntityEidList(BaseModel):
data: EntityEidSnapshots


class EntityEidCount(BaseModel):
"""Total count of documents available under specified filter."""

total_count: int


class EntityEidData(BaseModel):
"""Data of entity eid

Expand Down
112 changes: 93 additions & 19 deletions dp3/api/routers/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from dp3.api.internal.entity_response_models import (
EntityEidAttrValue,
EntityEidAttrValueOrHistory,
EntityEidCount,
EntityEidData,
EntityEidList,
EntityEidMasterRecord,
Expand Down Expand Up @@ -76,8 +77,29 @@ def get_eid_snapshots_handler(
router = APIRouter(dependencies=[Depends(check_etype)])


def _validate_snapshot_filters(fulltext_filters, generic_filter):
if not fulltext_filters:
fulltext_filters = {}
if not isinstance(fulltext_filters, dict):
raise HTTPException(status_code=400, detail="Fulltext filter is invalid")

if not generic_filter:
generic_filter = {}
if not isinstance(generic_filter, dict):
raise HTTPException(status_code=400, detail="Generic filter is invalid")

for attr in fulltext_filters:
ftr = fulltext_filters[attr]
if not isinstance(ftr, str):
raise HTTPException(status_code=400, detail=f"Filter '{ftr}' is not string")

return fulltext_filters, generic_filter


@router.get(
"/{etype}", responses={400: {"description": "Query can't be processed", "model": ErrorResponse}}
"/{etype}",
responses={400: {"description": "Query can't be processed", "model": ErrorResponse}},
deprecated=True,
)
async def list_entity_type_eids(
etype: str,
Expand All @@ -88,7 +110,47 @@ async def list_entity_type_eids(
) -> EntityEidList:
"""List latest snapshots of all `id`s present in database under `etype`.

Deprecated in favor of `/entity/{etype}/get` and `/entity/{etype}/count` endpoints,
which provide more flexibility and better performance.

See `/entity/{etype}/get` for more information.
"""
fulltext_filters, generic_filter = _validate_snapshot_filters(fulltext_filters, generic_filter)

try:
cursor, total_count = DB.snapshots.get_latest(etype, fulltext_filters, generic_filter)
cursor_page = cursor.skip(skip).limit(limit)
except DatabaseError as e:
raise HTTPException(status_code=400, detail=str(e)) from e

time_created = None

# Remove _id field
result = [r["last"] for r in cursor_page]
for r in result:
time_created = r["_time_created"]
del r["_time_created"]

return EntityEidList(
time_created=time_created, count=len(result), total_count=total_count, data=result
)


@router.get(
"/{etype}/get",
responses={400: {"description": "Query can't be processed", "model": ErrorResponse}},
)
async def get_entity_type_eids(
etype: str,
fulltext_filters: Json = None,
generic_filter: Json = None,
skip: NonNegativeInt = 0,
limit: NonNegativeInt = 20,
) -> EntityEidList:
"""List latest snapshots of all `id`s present in database under `etype`.

Contains only latest snapshot.
The `total_count` returned is always 0, use `/entity/{etype}/count` to get total count.

Uses pagination.
Setting `limit` to 0 is interpreted as no limit (return all results).
Expand Down Expand Up @@ -164,23 +226,10 @@ async def list_entity_type_eids(

Generic and fulltext filters are merged - fulltext overrides conflicting keys.
"""
if not fulltext_filters:
fulltext_filters = {}
if not isinstance(fulltext_filters, dict):
raise HTTPException(status_code=400, detail="Fulltext filter is invalid")

if not generic_filter:
generic_filter = {}
if not isinstance(generic_filter, dict):
raise HTTPException(status_code=400, detail="Generic filter is invalid")

for attr in fulltext_filters:
ftr = fulltext_filters[attr]
if not isinstance(ftr, str):
raise HTTPException(status_code=400, detail=f"Filter '{ftr}' is not string")
fulltext_filters, generic_filter = _validate_snapshot_filters(fulltext_filters, generic_filter)

try:
cursor, total_count = DB.snapshots.get_latest(etype, fulltext_filters, generic_filter)
cursor = DB.snapshots.find_latest(etype, fulltext_filters, generic_filter)
cursor_page = cursor.skip(skip).limit(limit)
except DatabaseError as e:
raise HTTPException(status_code=400, detail=str(e)) from e
Expand All @@ -193,9 +242,34 @@ async def list_entity_type_eids(
time_created = r["_time_created"]
del r["_time_created"]

return EntityEidList(
time_created=time_created, count=len(result), total_count=total_count, data=result
)
return EntityEidList(time_created=time_created, count=len(result), total_count=0, data=result)


@router.get(
"/{etype}/count",
responses={400: {"description": "Query can't be processed", "model": ErrorResponse}},
)
async def count_entity_type_eids(
etype: str,
fulltext_filters: Json = None,
generic_filter: Json = None,
) -> EntityEidCount:
"""Count latest snapshots of all `id`s present in database under `etype`.

Returns only count of documents matching `generic_filter` and `fulltext_filters`,
see `/entity/{etype}/get` documentation for details.

Note that responses from this endpoint may take much longer than `/entity/{etype}/get`
for large datasets.
"""
fulltext_filters, generic_filter = _validate_snapshot_filters(fulltext_filters, generic_filter)

try:
count = DB.snapshots.count_latest(etype, fulltext_filters, generic_filter)
except DatabaseError as e:
raise HTTPException(status_code=400, detail=str(e)) from e

return EntityEidCount(total_count=count)


@router.get("/{etype}/{eid}")
Expand Down
95 changes: 85 additions & 10 deletions dp3/database/snapshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,13 +185,59 @@ def get_latest(
May raise `SnapshotCollectionError` if query is invalid.
"""
snapshot_col = self._col()
query = self._prepare_latest_query(fulltext_filters or {}, generic_filter or {})

if not fulltext_filters:
fulltext_filters = {}
try:
return snapshot_col.find(query, {"last": 1}).sort(
[("_id", pymongo.ASCENDING)]
), snapshot_col.count_documents(query)
except OperationFailure as e:
raise SnapshotCollectionError(f"Query is invalid: {e}") from e

def find_latest(
self,
fulltext_filters: Optional[dict[str, str]] = None,
generic_filter: Optional[dict[str, Any]] = None,
) -> Cursor:
"""Find latest snapshots of given `etype`.

See [`get_latest`][dp3.database.snapshots.SnapshotCollectionContainer.get_latest]
for more information.

Returns only documents matching `generic_filter` and `fulltext_filters`,
does not count them.
"""
query = self._prepare_latest_query(fulltext_filters or {}, generic_filter or {})
try:
return self._col().find(query, {"last": 1}).sort([("_id", pymongo.ASCENDING)])
except OperationFailure as e:
raise SnapshotCollectionError(f"Query is invalid: {e}") from e

if not generic_filter:
generic_filter = {}
def count_latest(
self,
fulltext_filters: Optional[dict[str, str]] = None,
generic_filter: Optional[dict[str, Any]] = None,
) -> int:
"""Count latest snapshots of given `etype`.

See [`get_latest`][dp3.database.snapshots.SnapshotCollectionContainer.get_latest]
for more information.

Returns only count of documents matching `generic_filter` and `fulltext_filters`.

Note that this method may take much longer than `get_latest` on larger databases,
as it does count all documents, not just return the first few.
"""
query = self._prepare_latest_query(fulltext_filters or {}, generic_filter or {})
try:
return self._col().count_documents(query)
except OperationFailure as e:
raise SnapshotCollectionError(f"Query is invalid: {e}") from e

def _prepare_latest_query(
self, fulltext_filters: dict[str, str], generic_filter: dict[str, Any]
):
"""Prepare query for get_latest method."""
# Create base of query
try:
query = search_and_replace(generic_filter)
Expand Down Expand Up @@ -222,12 +268,7 @@ def get_latest(
else:
query["last." + attr] = fulltext_filter

try:
return snapshot_col.find(query, {"last": 1}).sort(
[("_id", pymongo.ASCENDING)]
), snapshot_col.count_documents(query)
except OperationFailure as e:
raise SnapshotCollectionError(f"Query is invalid: {e}") from e
return query

def get_by_eid(
self, eid: AnyEidT, t1: Optional[datetime] = None, t2: Optional[datetime] = None
Expand Down Expand Up @@ -778,6 +819,40 @@ def get_latest(
"""
return self[entity_type].get_latest(fulltext_filters, generic_filter)

def find_latest(
self,
entity_type: str,
fulltext_filters: Optional[dict[str, str]] = None,
generic_filter: Optional[dict[str, Any]] = None,
) -> Cursor:
"""Find latest snapshots of given `etype`.

see [`get_latest`][dp3.database.snapshots.SnapshotCollectionContainer.get_latest]
for more information.

Returns only documents matching `generic_filter` and `fulltext_filters`,
does not count them.
"""
return self[entity_type].find_latest(fulltext_filters, generic_filter)

def count_latest(
self,
entity_type: str,
fulltext_filters: Optional[dict[str, str]] = None,
generic_filter: Optional[dict[str, Any]] = None,
) -> int:
"""Count latest snapshots of given `etype`.

see [`get_latest`][dp3.database.snapshots.SnapshotCollectionContainer.get_latest]
for more information.

Returns only count of documents matching `generic_filter` and `fulltext_filters`.

Note that this method may take much longer than `get_latest` on larger databases,
as it does count all documents, not just return the first few.
"""
return self[entity_type].count_latest(fulltext_filters, generic_filter)

def get_by_eid(
self,
entity_type: str,
Expand Down
2 changes: 2 additions & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ markdown_extensions:
# Additional attribute lists (used e.g. for image size)
- attr_list
- md_in_html
# Strike-through
- pymdownx.tilde

plugins:
# Default search bar
Expand Down