diff --git a/minecode_pipelines/pipelines/mine_pypi.py b/minecode_pipelines/pipelines/mine_pypi.py index 354c084a..3f30edb2 100644 --- a/minecode_pipelines/pipelines/mine_pypi.py +++ b/minecode_pipelines/pipelines/mine_pypi.py @@ -98,6 +98,7 @@ def mine_and_publish_packageurls(self): def update_state_and_checkpoints(self): pypi.update_state_and_checkpoints( config_repo=self.config_repo, + state=self.state, last_serial=self.last_serial, logger=self.log, ) diff --git a/minecode_pipelines/pipes/npm.py b/minecode_pipelines/pipes/npm.py index a09a8e6a..1fc013d1 100644 --- a/minecode_pipelines/pipes/npm.py +++ b/minecode_pipelines/pipes/npm.py @@ -59,11 +59,6 @@ COMPRESSED_NPM_REPLICATE_CHECKPOINT_PATH = "npm/" + COMPRESSED_PACKAGE_FILE_NAME NPM_CHECKPOINT_PATH = "npm/checkpoints.json" NPM_PACKAGES_CHECKPOINT_PATH = "npm/packages_checkpoint.json" - -# We are testing and storing mined packageURLs in one single repo per ecosystem for now -MINECODE_DATA_NPM_REPO = "https://github.com/aboutcode-data/minecode-data-npm-test" - - PACKAGE_BATCH_SIZE = 700 diff --git a/minecode_pipelines/pipes/pypi.py b/minecode_pipelines/pipes/pypi.py index beb5d08b..bb056b9b 100644 --- a/minecode_pipelines/pipes/pypi.py +++ b/minecode_pipelines/pipes/pypi.py @@ -24,10 +24,10 @@ from minecode_pipelines.pipes import fetch_checkpoint_from_github from minecode_pipelines.pipes import update_checkpoints_in_github +from minecode_pipelines.pipes import update_checkpoint_state from minecode_pipelines.pipes import get_mined_packages_from_checkpoint from minecode_pipelines.pipes import update_mined_packages_in_checkpoint from minecode_pipelines.pipes import get_packages_file_from_checkpoint -from minecode_pipelines.pipes import update_checkpoint_state from minecode_pipelines.pipes import write_packages_json from minecode_pipelines.pipes import MINECODE_PIPELINES_CONFIG_REPO from minecode_pipelines.pipes import INITIAL_SYNC_STATE @@ -58,10 +58,6 @@ PYPI_PACKAGES_CHECKPOINT_PATH = "pypi/packages_checkpoint.json" -# We are testing and storing mined packageURLs in one single repo per ecosystem for now -MINECODE_DATA_PYPI_REPO = "https://github.com/aboutcode-data/minecode-data-pypi-test" - - def mine_pypi_packages(logger=None): """ Mine pypi package names from pypi simple and save to checkpoints, @@ -256,7 +252,7 @@ def save_mined_packages_in_checkpoint(packages_mined, config_repo, logger=None): ) -def update_state_and_checkpoints(state, config_repo, last_serial, logger=None): +def update_state_and_checkpoints(config_repo, state, last_serial, logger=None): # If we are finshed mining all the packages in the intial sync, we can now # periodically sync the packages from latest if state == INITIAL_SYNC_STATE: