Skip to content

Commit e3c99cb

Browse files
committed
Design some draft API
Signed-off-by: Philippe Ombredanne <pombredanne@aboutcode.org>
1 parent e52181c commit e3c99cb

File tree

1 file changed

+56
-20
lines changed

1 file changed

+56
-20
lines changed

aboutcode/federated/__init__.py

Lines changed: 56 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -497,8 +497,8 @@ def config_repo(self) -> "GitRepo":
497497
def from_dict(
498498
cls,
499499
data: dict,
500-
local_root_dir: Path = None,
501-
remote_root_url: str = None,
500+
local_root_dir: Path=None,
501+
remote_root_url: str=None,
502502
) -> "DataFederation":
503503
"""
504504
Return a DataFederation from a configuration mapping.
@@ -532,7 +532,7 @@ def from_dict(
532532
)
533533

534534
@classmethod
535-
def load(cls, name: str, local_root_dir: Path, remote_root_url: str = None) -> "DataFederation":
535+
def load(cls, name: str, local_root_dir: Path, remote_root_url: str=None) -> "DataFederation":
536536
"""
537537
Return an existing DataFederation loaded from ``local_root_dir`` using
538538
the existing configuration file at its conventional location.
@@ -551,7 +551,7 @@ def from_url(
551551
cls,
552552
name: str,
553553
remote_root_url: str,
554-
local_root_dir: Path = None,
554+
local_root_dir: Path=None,
555555
) -> "DataFederation":
556556
"""
557557
Return a DataFederation loaded from a remote configuration file.
@@ -578,8 +578,8 @@ def from_yaml_config(
578578
cls,
579579
name: str,
580580
text: str,
581-
local_root_dir: Path = None,
582-
remote_root_url: str = None,
581+
local_root_dir: Path=None,
582+
remote_root_url: str=None,
583583
) -> "DataFederation":
584584
"""
585585
Return a DataFederation loaded from a YAML configuration text.
@@ -666,6 +666,28 @@ def get_cluster(self, data_kind: str) -> "DataCluster":
666666
"""
667667
return self._data_clusters_by_data_kind.get(data_kind)
668668

669+
def get_datafile_download_url(self, data_kind: str, purl: Union[str, PackageURL]) -> Path:
670+
"""
671+
Return the direct download URL to the data file for a data kind given a
672+
PURL, or None.
673+
"""
674+
cluster = self.get_cluster(data_kind=data_kind)
675+
return cluster.get_datafile_download_url(purl=purl)
676+
677+
def get_local_datafile(self, data_kind: str, purl: Union[str, PackageURL]) -> "LocalDataFile":
678+
"""
679+
Return a LocalDataFile for a data kind given a PURL, or None.
680+
"""
681+
cluster = self.get_cluster(data_kind=data_kind)
682+
return cluster.get_datafile_local_path(purl=purl)
683+
684+
685+
@dataclass
686+
class LocalDataFile:
687+
"""A local data file storeed optionally in a GitRepo"""
688+
path: Path
689+
git_repo: "GitRepo" = None
690+
669691

670692
@dataclass(order=True)
671693
class DataCluster:
@@ -783,6 +805,8 @@ def get_datafile_download_url(self, purl: Union[str, PackageURL]) -> str:
783805
Return the direct download URL to the data file of the data kind stored
784806
in this cluster given a PURL.
785807
"""
808+
raise NotImplementedError()
809+
786810
purl = as_purl(purl)
787811
# FIXME: create as member
788812
purl_type_config_by_type = {ptc.purl_type: ptc for ptc in self.purl_type_configs}
@@ -796,11 +820,12 @@ def get_datafile_download_url(self, purl: Union[str, PackageURL]) -> str:
796820
# construct a URL
797821
return direct_url
798822

799-
def get_datafile_local_path(self, purl: Union[str, PackageURL]) -> str:
823+
def get_local_datafile(self, purl: Union[str, PackageURL]) -> LocalDataFile:
800824
"""
801-
Return the direct download URL to the data file of the data kind stored
802-
in this cluster given a PURL.
825+
Return a LocalDataFile of the data kind stored in this cluster given a
826+
PURL, or None
803827
"""
828+
raise NotImplementedError()
804829

805830

806831
@dataclass
@@ -880,7 +905,7 @@ def get_repos(self, data_kind: str) -> Iterable["DataRepository"]:
880905
hashids = self.hashids
881906

882907
for i in range(0, self.number_of_dirs, dirs_per_repo):
883-
hashids_of_repo = hashids[i : i + dirs_per_repo]
908+
hashids_of_repo = hashids[i: i + dirs_per_repo]
884909
yield DataRepository.from_hashids(
885910
data_kind=data_kind,
886911
purl_type=purl_type,
@@ -1188,7 +1213,7 @@ def cluster_preset():
11881213
@dataclass
11891214
class DataRepository:
11901215
"""
1191-
A Git Data Repository in a DataCluster
1216+
A Data Repository (Git repo or local plain dir) in a DataCluster
11921217
"""
11931218

11941219
data_kind: str
@@ -1199,7 +1224,7 @@ class DataRepository:
11991224
default_factory=list,
12001225
repr=False,
12011226
)
1202-
1227+
12031228
@property
12041229
def name(self):
12051230
return f"{self.data_kind}-{self.purl_type}-{self.start_hashid}"
@@ -1229,6 +1254,17 @@ def from_hashids(
12291254
data_directories=data_directories,
12301255
)
12311256

1257+
@property
1258+
def git_repo(self) -> "GitRepo":
1259+
"""
1260+
Return the GitRepo that contains the data for this DataRepository.
1261+
"""
1262+
return GitRepo(
1263+
name=self.name,
1264+
local_root_dir=self.local_root_dir,
1265+
remote_root_url=self.remote_root_url,
1266+
)
1267+
12321268

12331269
@dataclass
12341270
class DataDirectory:
@@ -1334,7 +1370,7 @@ def build_raw_download_url(
13341370
root_url: str,
13351371
repo: str,
13361372
path: str,
1337-
branch: str = "main",
1373+
branch: str="main",
13381374
builder=None,
13391375
):
13401376
"""
@@ -1356,7 +1392,7 @@ def build_raw_download_url_github(
13561392
root_url: str,
13571393
repo: str,
13581394
path: str,
1359-
branch: str = "main",
1395+
branch: str="main",
13601396
):
13611397
"""
13621398
Return a direct access raw URL to a file in a github repo.
@@ -1370,7 +1406,7 @@ def build_raw_download_url_gitlab(
13701406
root_url: str,
13711407
repo: str,
13721408
path: str,
1373-
branch: str = "main",
1409+
branch: str="main",
13741410
):
13751411
"""
13761412
Return a direct access raw URL to a file in a gitlab repo.
@@ -1383,15 +1419,15 @@ def build_raw_download_url_codeberg(
13831419
root_url: str,
13841420
repo: str,
13851421
path: str,
1386-
branch: str = "main",
1422+
branch: str="main",
13871423
):
13881424
"""
13891425
Return a direct access raw URL to a file in a codeberg repo.
13901426
"""
13911427
return "/".join([root_url, repo, "raw/branch", branch, path])
13921428

13931429

1394-
def compute_purl_hash(purl: Union[PackageURL, str], max_value: int = 1024) -> str:
1430+
def compute_purl_hash(purl: Union[PackageURL, str], max_value: int=1024) -> str:
13951431
"""
13961432
Return a hash string from a ``purl`` string or object.
13971433
@@ -1445,7 +1481,7 @@ def compute_purl_hash(purl: Union[PackageURL, str], max_value: int = 1024) -> st
14451481
return _compute_hash(core_purl=core_purl, max_value=max_value)
14461482

14471483

1448-
def _compute_hash(core_purl: str, max_value: int = 1024) -> str:
1484+
def _compute_hash(core_purl: str, max_value: int=1024) -> str:
14491485
"""
14501486
Return a hash string from a ``core_purl`` string. The core purl string
14511487
must be computed ahead
@@ -1474,7 +1510,7 @@ def _compute_hash(core_purl: str, max_value: int = 1024) -> str:
14741510
return f"{short_int:04}"
14751511

14761512

1477-
def is_valid_power_of_two(n: int, max_value: int = 1024):
1513+
def is_valid_power_of_two(n: int, max_value: int=1024):
14781514
"""
14791515
Return True if ``n`` is a power of two between 1 and ``max_value``.
14801516
Use bit manipulations.
@@ -1537,7 +1573,7 @@ def get_core_purl(purl: Union[PackageURL, str]):
15371573

15381574
def package_path_elements(
15391575
purl: Union[PackageURL, str],
1540-
max_value: int = 1024,
1576+
max_value: int=1024,
15411577
):
15421578
"""
15431579
Return a 4-tuple of POSIX path strings from the ``purl`` string or object.

0 commit comments

Comments
 (0)