From 225dab7cba2a7f04a55c820e50b2fa0b6ef2f13b Mon Sep 17 00:00:00 2001 From: Oleh Paduchak Date: Thu, 15 May 2025 15:29:19 +0300 Subject: [PATCH 1/3] Reference Zenodo imp --- addon_imps/link/zenodo.py | 203 +++++++++++++++++++++++++++++ addon_service/common/known_imps.py | 4 +- 2 files changed, 206 insertions(+), 1 deletion(-) create mode 100644 addon_imps/link/zenodo.py diff --git a/addon_imps/link/zenodo.py b/addon_imps/link/zenodo.py new file mode 100644 index 00000000..dfbb411b --- /dev/null +++ b/addon_imps/link/zenodo.py @@ -0,0 +1,203 @@ +from __future__ import annotations + +import re +from dataclasses import dataclass +from enum import Enum +from http import HTTPStatus + +from django.core.exceptions import ValidationError + +from addon_toolkit.interfaces.link import ( + ItemResult, + ItemSampleResult, + ItemType, + LinkAddonHttpRequestorImp, + SupportedResourceTypes, +) + + +DEPOSITION_REGEX = re.compile(r"^deposition/(?P\d+)$") +FILE_REGEX = re.compile(r"^file/(?P\d+)/(?P\d+)$") + + +class _ResourceTypeMapping(Enum): + PUBLICATION = SupportedResourceTypes.Text + POSTER = SupportedResourceTypes.Text + PRESENTATION = SupportedResourceTypes.Text + DATASET = SupportedResourceTypes.Dataset + IMAGE = SupportedResourceTypes.Image + VIDEO = SupportedResourceTypes.Audiovisual + SOFTWARE = SupportedResourceTypes.Software + LESSON = SupportedResourceTypes.Text + PHYSICALOBJECT = SupportedResourceTypes.PhysicalObject + OTHER = SupportedResourceTypes.Other + + +@dataclass +class ZenodoLinkImp(LinkAddonHttpRequestorImp): + """Storage on Zenodo + + See https://developers.zenodo.org/ for API documentation + """ + + async def build_url_for_id(self, item_id: str) -> str: + if match := DEPOSITION_REGEX.match(item_id): + return self._build_deposition_url(match["id"]) + elif match := FILE_REGEX.match(item_id): + return self._build_file_url(match["deposition_id"], match["file_id"]) + else: + raise ValidationError(f"Invalid {item_id=}") + + def _build_file_url(self, deposition_id: str, file_id: str): + return f"{self.config.external_web_url}/record/{deposition_id}/files/{file_id}" + + def _build_deposition_url(self, deposition_id: str): + return f"{self.config.external_web_url}/deposition/{deposition_id}" + + async def get_external_account_id(self, _: dict[str, str]) -> str: + try: + async with self.network.GET("api/deposit/depositions") as response: + if not response.http_status.is_success: + raise ValidationError( + "Could not get Zenodo account id, check your API Token" + ) + # Zenodo doesn't have a specific endpoint for user info + # Using the first deposition's owner as the account ID + content = await response.json_content() + if content and len(content) > 0: + return str(content[0].get("owner", "")) + return "zenodo_user" # Fallback if no depositions found + except ValueError as exc: + if "relative url may not alter the base url" in str(exc).lower(): + raise ValidationError( + "Invalid host URL. Please check your Zenodo base URL." + ) + raise + + async def list_root_items(self, page_cursor: str = "") -> ItemSampleResult: + page = 1 + if page_cursor: + try: + page = int(page_cursor) + except ValueError: + pass + + async with self.network.GET( + "api/deposit/depositions", + query=[("page", page), ("size", 10)], + ) as response: + content = await response.json_content() + return self._parse_depositions_list(content, page) + + def _parse_depositions_list( + self, raw_content: list[dict], current_page: int + ) -> ItemSampleResult: + """Parse the response from the depositions endpoint. + + The depositions endpoint returns a list of depositions directly, not wrapped in a hits object. + """ + items = [] + for deposition in raw_content: + parsed = self._parse_deposition(deposition) + if parsed.doi: + items.append(parsed) + + next_page = current_page + 1 if len(raw_content) == 10 else None + prev_page = current_page - 1 if current_page > 1 else None + + return ItemSampleResult( + items=items, + total_count=len(items), + this_sample_cursor=str(current_page), + next_sample_cursor=str(next_page) if next_page else None, + prev_sample_cursor=str(prev_page) if prev_page else None, + first_sample_cursor="1", + ) + + async def get_item_info(self, item_id: str) -> ItemResult: + if not item_id: + return ItemResult(item_id="", item_name="", item_type=ItemType.FOLDER) + elif match := DEPOSITION_REGEX.match(item_id): + return await self._fetch_deposition(match["id"]) + elif match := FILE_REGEX.match(item_id): + return await self._fetch_file(match["record_id"], match["file_id"]) + else: + raise ValueError(f"Invalid item id: {item_id}") + + async def list_child_items( + self, + item_id: str, + page_cursor: str = "", + item_type: ItemType | None = None, + ) -> ItemSampleResult: + if not item_id: + return await self.list_root_items(page_cursor) + elif match := DEPOSITION_REGEX.match(item_id): + files = await self._fetch_record_files(match["id"]) + return ItemSampleResult( + items=files, + total_count=len(files), + ) + else: + return ItemSampleResult(items=[], total_count=0) + + async def _fetch_record_files(self, record_id: str) -> list[ItemResult]: + async with self.network.GET( + f"api/deposit/depositions/{record_id}/files" + ) as response: + if response.http_status == HTTPStatus.NOT_FOUND: + return [] + files = await response.json_content() + return [self._parse_file(file, record_id) for file in files] + + async def _fetch_file(self, record_id: str, file_id: str) -> ItemResult: + async with self.network.GET( + f"api/deposit/depositions/{record_id}/files/{file_id}" + ) as response: + if response.http_status == HTTPStatus.NOT_FOUND: + raise ValueError(f"Record not found: {record_id}") + + file = await response.json_content() + + return self._parse_file(file, record_id) + + def _parse_file(self, file, record_id): + return ItemResult( + item_id=f"file/{record_id}/{file.get('id')}", + item_name=file.get("filename"), + item_type=ItemType.RESOURCE, + item_link=f"{self.config.external_web_url}/record/{record_id}/files/{file.get('filename')}", + ) + + async def _fetch_deposition(self, deposition_id: str) -> ItemResult: + async with self.network.GET( + f"api/deposit/depositions/{deposition_id}" + ) as response: + if response.http_status == HTTPStatus.NOT_FOUND: + raise ValueError(f"Record not found: {deposition_id}") + content = await response.json_content() + return self._parse_deposition(content) + + def _parse_deposition(self, raw): + deposition_id = raw.get("id") + + metadata = raw.get("metadata", {}) + + resource_type = self._map_resource_type( + metadata.get("resource_type", {}).get("type", "") + ) + + return ItemResult( + item_id=f"deposition/{deposition_id}", + item_name=metadata.get("title", f"Deposition {deposition_id}"), + item_type=ItemType.FOLDER, + resource_type=resource_type, + item_link=f"{self.config.external_web_url}/record/{deposition_id}", + doi=metadata.get("doi", ""), + ) + + def _map_resource_type(self, zenodo_type: str) -> SupportedResourceTypes | None: + """Map Zenodo resource types to SupportedResourceTypes""" + if zenodo_type: + return _ResourceTypeMapping[zenodo_type.upper()] + return None diff --git a/addon_service/common/known_imps.py b/addon_service/common/known_imps.py index 8692e99e..04c86f2f 100644 --- a/addon_service/common/known_imps.py +++ b/addon_service/common/known_imps.py @@ -10,7 +10,7 @@ zotero_org, ) from addon_imps.computing import boa -from addon_imps.link import dataverse as link_dataverse +from addon_imps.link import dataverse as link_dataverse, zenodo from addon_imps.storage import ( bitbucket, box_dot_com, @@ -99,6 +99,7 @@ class KnownAddonImps(enum.Enum): # Type: Link LINK_DATAVERSE = link_dataverse.DataverseLinkImp + ZENODO = zenodo.ZenodoLinkImp if __debug__: BLARG = my_blarg.MyBlargStorage @@ -136,6 +137,7 @@ class AddonImpNumbers(enum.Enum): # Type: Link LINK_DATAVERSE = 1030 + ZENODO = 1031 if __debug__: BLARG = -7 From b2fd8438bc6aaf943160a6938011e4372c489fb4 Mon Sep 17 00:00:00 2001 From: Oleh Paduchak Date: Mon, 19 May 2025 13:50:11 +0300 Subject: [PATCH 2/3] added docstrings to each method of imp to make it more comprehensible by external developers --- addon_imps/link/zenodo.py | 68 ++++++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 18 deletions(-) diff --git a/addon_imps/link/zenodo.py b/addon_imps/link/zenodo.py index dfbb411b..a7c67510 100644 --- a/addon_imps/link/zenodo.py +++ b/addon_imps/link/zenodo.py @@ -41,6 +41,10 @@ class ZenodoLinkImp(LinkAddonHttpRequestorImp): """ async def build_url_for_id(self, item_id: str) -> str: + """ + This method is used by ConfiguredLinkAddon to construct user-facing url, + which will take them to the configured resource web page + """ if match := DEPOSITION_REGEX.match(item_id): return self._build_deposition_url(match["id"]) elif match := FILE_REGEX.match(item_id): @@ -55,26 +59,30 @@ def _build_deposition_url(self, deposition_id: str): return f"{self.config.external_web_url}/deposition/{deposition_id}" async def get_external_account_id(self, _: dict[str, str]) -> str: - try: - async with self.network.GET("api/deposit/depositions") as response: - if not response.http_status.is_success: - raise ValidationError( - "Could not get Zenodo account id, check your API Token" - ) - # Zenodo doesn't have a specific endpoint for user info - # Using the first deposition's owner as the account ID - content = await response.json_content() - if content and len(content) > 0: - return str(content[0].get("owner", "")) - return "zenodo_user" # Fallback if no depositions found - except ValueError as exc: - if "relative url may not alter the base url" in str(exc).lower(): + """ + This method fetches external account's id on the provider side, and is meant for use internally. + + For Zenodo it has to fetch user's depositions and take owner id form it, + as Zenodo API does not provide direct way to fetch user's internal id + """ + async with self.network.GET("api/deposit/depositions") as response: + if not response.http_status.is_success: raise ValidationError( - "Invalid host URL. Please check your Zenodo base URL." + "Could not get Zenodo account id, check your API Token" ) - raise + content = await response.json_content() + if content and len(content) > 0: + return str(content[0].get("owner", "")) + return "zenodo_user" # Fallback if no depositions found async def list_root_items(self, page_cursor: str = "") -> ItemSampleResult: + """ + This method lists root (top level ) from external provider. + Please note that it must return only public items. + + For Zenodo it fetches user's depositions and filters them to be public on client's side + (as the API does not such filtering) + """ page = 1 if page_cursor: try: @@ -95,6 +103,7 @@ def _parse_depositions_list( """Parse the response from the depositions endpoint. The depositions endpoint returns a list of depositions directly, not wrapped in a hits object. + Also filter only public (only published) depositions. """ items = [] for deposition in raw_content: @@ -115,6 +124,9 @@ def _parse_depositions_list( ) async def get_item_info(self, item_id: str) -> ItemResult: + """ + This method fetches desired item from an API + """ if not item_id: return ItemResult(item_id="", item_name="", item_type=ItemType.FOLDER) elif match := DEPOSITION_REGEX.match(item_id): @@ -130,6 +142,11 @@ async def list_child_items( page_cursor: str = "", item_type: ItemType | None = None, ) -> ItemSampleResult: + """ + This method lists children items of requested id. Used to navigate item tree (if there is one). + + For Zenodo it fetches files for given depositions, as there is no apparent item tree + """ if not item_id: return await self.list_root_items(page_cursor) elif match := DEPOSITION_REGEX.match(item_id): @@ -142,6 +159,9 @@ async def list_child_items( return ItemSampleResult(items=[], total_count=0) async def _fetch_record_files(self, record_id: str) -> list[ItemResult]: + """ + Helper used to fetch deposition's files + """ async with self.network.GET( f"api/deposit/depositions/{record_id}/files" ) as response: @@ -151,6 +171,9 @@ async def _fetch_record_files(self, record_id: str) -> list[ItemResult]: return [self._parse_file(file, record_id) for file in files] async def _fetch_file(self, record_id: str, file_id: str) -> ItemResult: + """ + Helper used to fetch deposition's file + """ async with self.network.GET( f"api/deposit/depositions/{record_id}/files/{file_id}" ) as response: @@ -161,7 +184,10 @@ async def _fetch_file(self, record_id: str, file_id: str) -> ItemResult: return self._parse_file(file, record_id) - def _parse_file(self, file, record_id): + def _parse_file(self, file: dict, record_id: str) -> ItemResult: + """ + Helper which parses file response into ItemResult + """ return ItemResult( item_id=f"file/{record_id}/{file.get('id')}", item_name=file.get("filename"), @@ -170,6 +196,9 @@ def _parse_file(self, file, record_id): ) async def _fetch_deposition(self, deposition_id: str) -> ItemResult: + """ + Helper used to fetch deposition + """ async with self.network.GET( f"api/deposit/depositions/{deposition_id}" ) as response: @@ -178,7 +207,10 @@ async def _fetch_deposition(self, deposition_id: str) -> ItemResult: content = await response.json_content() return self._parse_deposition(content) - def _parse_deposition(self, raw): + def _parse_deposition(self, raw: dict) -> ItemResult: + """ + Helper which parses deposition response into ItemResult + """ deposition_id = raw.get("id") metadata = raw.get("metadata", {}) From 83383db6f0570e93ae17fdeb7f8e2b6d5d48aced Mon Sep 17 00:00:00 2001 From: Oleh Paduchak Date: Mon, 26 May 2025 12:41:22 +0300 Subject: [PATCH 3/3] fixed pre-commit after rebase --- addon_service/common/known_imps.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/addon_service/common/known_imps.py b/addon_service/common/known_imps.py index 04c86f2f..f4e903db 100644 --- a/addon_service/common/known_imps.py +++ b/addon_service/common/known_imps.py @@ -10,7 +10,8 @@ zotero_org, ) from addon_imps.computing import boa -from addon_imps.link import dataverse as link_dataverse, zenodo +from addon_imps.link import dataverse as link_dataverse +from addon_imps.link import zenodo from addon_imps.storage import ( bitbucket, box_dot_com,