@@ -497,8 +497,8 @@ def config_repo(self) -> "GitRepo":
497497 def from_dict (
498498 cls ,
499499 data : dict ,
500- local_root_dir : Path = None ,
501- remote_root_url : str = None ,
500+ local_root_dir : Path = None ,
501+ remote_root_url : str = None ,
502502 ) -> "DataFederation" :
503503 """
504504 Return a DataFederation from a configuration mapping.
@@ -532,7 +532,7 @@ def from_dict(
532532 )
533533
534534 @classmethod
535- def load (cls , name : str , local_root_dir : Path , remote_root_url : str = None ) -> "DataFederation" :
535+ def load (cls , name : str , local_root_dir : Path , remote_root_url : str = None ) -> "DataFederation" :
536536 """
537537 Return an existing DataFederation loaded from ``local_root_dir`` using
538538 the existing configuration file at its conventional location.
@@ -551,7 +551,7 @@ def from_url(
551551 cls ,
552552 name : str ,
553553 remote_root_url : str ,
554- local_root_dir : Path = None ,
554+ local_root_dir : Path = None ,
555555 ) -> "DataFederation" :
556556 """
557557 Return a DataFederation loaded from a remote configuration file.
@@ -578,8 +578,8 @@ def from_yaml_config(
578578 cls ,
579579 name : str ,
580580 text : str ,
581- local_root_dir : Path = None ,
582- remote_root_url : str = None ,
581+ local_root_dir : Path = None ,
582+ remote_root_url : str = None ,
583583 ) -> "DataFederation" :
584584 """
585585 Return a DataFederation loaded from a YAML configuration text.
@@ -666,6 +666,28 @@ def get_cluster(self, data_kind: str) -> "DataCluster":
666666 """
667667 return self ._data_clusters_by_data_kind .get (data_kind )
668668
669+ def get_datafile_download_url (self , data_kind : str , purl : Union [str , PackageURL ]) -> Path :
670+ """
671+ Return the direct download URL to the data file for a data kind given a
672+ PURL, or None.
673+ """
674+ cluster = self .get_cluster (data_kind = data_kind )
675+ return cluster .get_datafile_download_url (purl = purl )
676+
677+ def get_local_datafile (self , data_kind : str , purl : Union [str , PackageURL ]) -> "LocalDataFile" :
678+ """
679+ Return a LocalDataFile for a data kind given a PURL, or None.
680+ """
681+ cluster = self .get_cluster (data_kind = data_kind )
682+ return cluster .get_datafile_local_path (purl = purl )
683+
684+
685+ @dataclass
686+ class LocalDataFile :
687+ """A local data file storeed optionally in a GitRepo"""
688+ path : Path
689+ git_repo : "GitRepo" = None
690+
669691
670692@dataclass (order = True )
671693class DataCluster :
@@ -783,6 +805,8 @@ def get_datafile_download_url(self, purl: Union[str, PackageURL]) -> str:
783805 Return the direct download URL to the data file of the data kind stored
784806 in this cluster given a PURL.
785807 """
808+ raise NotImplementedError ()
809+
786810 purl = as_purl (purl )
787811 # FIXME: create as member
788812 purl_type_config_by_type = {ptc .purl_type : ptc for ptc in self .purl_type_configs }
@@ -796,11 +820,12 @@ def get_datafile_download_url(self, purl: Union[str, PackageURL]) -> str:
796820 # construct a URL
797821 return direct_url
798822
799- def get_datafile_local_path (self , purl : Union [str , PackageURL ]) -> str :
823+ def get_local_datafile (self , purl : Union [str , PackageURL ]) -> LocalDataFile :
800824 """
801- Return the direct download URL to the data file of the data kind stored
802- in this cluster given a PURL.
825+ Return a LocalDataFile of the data kind stored in this cluster given a
826+ PURL, or None
803827 """
828+ raise NotImplementedError ()
804829
805830
806831@dataclass
@@ -880,7 +905,7 @@ def get_repos(self, data_kind: str) -> Iterable["DataRepository"]:
880905 hashids = self .hashids
881906
882907 for i in range (0 , self .number_of_dirs , dirs_per_repo ):
883- hashids_of_repo = hashids [i : i + dirs_per_repo ]
908+ hashids_of_repo = hashids [i : i + dirs_per_repo ]
884909 yield DataRepository .from_hashids (
885910 data_kind = data_kind ,
886911 purl_type = purl_type ,
@@ -1188,7 +1213,7 @@ def cluster_preset():
11881213@dataclass
11891214class DataRepository :
11901215 """
1191- A Git Data Repository in a DataCluster
1216+ A Data Repository (Git repo or local plain dir) in a DataCluster
11921217 """
11931218
11941219 data_kind : str
@@ -1199,7 +1224,7 @@ class DataRepository:
11991224 default_factory = list ,
12001225 repr = False ,
12011226 )
1202-
1227+
12031228 @property
12041229 def name (self ):
12051230 return f"{ self .data_kind } -{ self .purl_type } -{ self .start_hashid } "
@@ -1229,6 +1254,17 @@ def from_hashids(
12291254 data_directories = data_directories ,
12301255 )
12311256
1257+ @property
1258+ def git_repo (self ) -> "GitRepo" :
1259+ """
1260+ Return the GitRepo that contains the data for this DataRepository.
1261+ """
1262+ return GitRepo (
1263+ name = self .name ,
1264+ local_root_dir = self .local_root_dir ,
1265+ remote_root_url = self .remote_root_url ,
1266+ )
1267+
12321268
12331269@dataclass
12341270class DataDirectory :
@@ -1334,7 +1370,7 @@ def build_raw_download_url(
13341370 root_url : str ,
13351371 repo : str ,
13361372 path : str ,
1337- branch : str = "main" ,
1373+ branch : str = "main" ,
13381374 builder = None ,
13391375):
13401376 """
@@ -1356,7 +1392,7 @@ def build_raw_download_url_github(
13561392 root_url : str ,
13571393 repo : str ,
13581394 path : str ,
1359- branch : str = "main" ,
1395+ branch : str = "main" ,
13601396):
13611397 """
13621398 Return a direct access raw URL to a file in a github repo.
@@ -1370,7 +1406,7 @@ def build_raw_download_url_gitlab(
13701406 root_url : str ,
13711407 repo : str ,
13721408 path : str ,
1373- branch : str = "main" ,
1409+ branch : str = "main" ,
13741410):
13751411 """
13761412 Return a direct access raw URL to a file in a gitlab repo.
@@ -1383,15 +1419,15 @@ def build_raw_download_url_codeberg(
13831419 root_url : str ,
13841420 repo : str ,
13851421 path : str ,
1386- branch : str = "main" ,
1422+ branch : str = "main" ,
13871423):
13881424 """
13891425 Return a direct access raw URL to a file in a codeberg repo.
13901426 """
13911427 return "/" .join ([root_url , repo , "raw/branch" , branch , path ])
13921428
13931429
1394- def compute_purl_hash (purl : Union [PackageURL , str ], max_value : int = 1024 ) -> str :
1430+ def compute_purl_hash (purl : Union [PackageURL , str ], max_value : int = 1024 ) -> str :
13951431 """
13961432 Return a hash string from a ``purl`` string or object.
13971433
@@ -1445,7 +1481,7 @@ def compute_purl_hash(purl: Union[PackageURL, str], max_value: int = 1024) -> st
14451481 return _compute_hash (core_purl = core_purl , max_value = max_value )
14461482
14471483
1448- def _compute_hash (core_purl : str , max_value : int = 1024 ) -> str :
1484+ def _compute_hash (core_purl : str , max_value : int = 1024 ) -> str :
14491485 """
14501486 Return a hash string from a ``core_purl`` string. The core purl string
14511487 must be computed ahead
@@ -1474,7 +1510,7 @@ def _compute_hash(core_purl: str, max_value: int = 1024) -> str:
14741510 return f"{ short_int :04} "
14751511
14761512
1477- def is_valid_power_of_two (n : int , max_value : int = 1024 ):
1513+ def is_valid_power_of_two (n : int , max_value : int = 1024 ):
14781514 """
14791515 Return True if ``n`` is a power of two between 1 and ``max_value``.
14801516 Use bit manipulations.
@@ -1537,7 +1573,7 @@ def get_core_purl(purl: Union[PackageURL, str]):
15371573
15381574def package_path_elements (
15391575 purl : Union [PackageURL , str ],
1540- max_value : int = 1024 ,
1576+ max_value : int = 1024 ,
15411577):
15421578 """
15431579 Return a 4-tuple of POSIX path strings from the ``purl`` string or object.
0 commit comments