From 66d104c8e346767a8e0132b862236d37b52648de Mon Sep 17 00:00:00 2001 From: "baogorek@gmail.com" Date: Wed, 7 Jan 2026 14:19:13 -0500 Subject: [PATCH 1/9] Add Modal integration for CI/CD workflows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace self-hosted GCP runners with Modal serverless compute for data builds and local area publishing workflows. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .github/workflows/local_area_publish.yaml | 44 ++------ .github/workflows/reusable_test.yaml | 63 ++++------- changelog_entry.yaml | 7 ++ modal_app/__init__.py | 1 + modal_app/data_build.py | 125 ++++++++++++++++++++++ modal_app/local_area.py | 72 +++++++++++++ uv.lock | 2 +- 7 files changed, 232 insertions(+), 82 deletions(-) create mode 100644 modal_app/__init__.py create mode 100644 modal_app/data_build.py create mode 100644 modal_app/local_area.py diff --git a/.github/workflows/local_area_publish.yaml b/.github/workflows/local_area_publish.yaml index 08fcdd5b..44a51ffd 100644 --- a/.github/workflows/local_area_publish.yaml +++ b/.github/workflows/local_area_publish.yaml @@ -6,6 +6,7 @@ on: paths: - 'policyengine_us_data/datasets/cps/local_area_calibration/**' - '.github/workflows/local_area_publish.yaml' + - 'modal_app/**' repository_dispatch: types: [calibration-updated] workflow_dispatch: @@ -17,54 +18,25 @@ on: jobs: publish-local-area: - runs-on: self-hosted + runs-on: ubuntu-latest permissions: contents: read - id-token: write env: HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }} + MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }} + MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }} steps: - name: Checkout repo uses: actions/checkout@v4 - - name: Install uv - uses: astral-sh/setup-uv@v5 - - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.13' - - name: Authenticate to Google Cloud - uses: google-github-actions/auth@v2 - with: - workload_identity_provider: "projects/322898545428/locations/global/workloadIdentityPools/policyengine-research-id-pool/providers/prod-github-provider" - service_account: "policyengine-research@policyengine-research.iam.gserviceaccount.com" - - - name: Install package - run: uv sync --dev - - - name: Download checkpoint (if exists) - continue-on-error: true - run: | - gsutil cp gs://policyengine-us-data/checkpoints/completed_states.txt . || true - gsutil cp gs://policyengine-us-data/checkpoints/completed_districts.txt . || true - gsutil cp gs://policyengine-us-data/checkpoints/completed_cities.txt . || true - - - name: Build and publish local area H5 files - run: uv run make publish-local-area - - - name: Upload checkpoint - if: always() - run: | - gsutil cp completed_states.txt gs://policyengine-us-data/checkpoints/ || true - gsutil cp completed_districts.txt gs://policyengine-us-data/checkpoints/ || true - gsutil cp completed_cities.txt gs://policyengine-us-data/checkpoints/ || true + - name: Install Modal CLI + run: pip install modal - - name: Clean up checkpoints on success - if: success() - run: | - gsutil rm gs://policyengine-us-data/checkpoints/completed_states.txt || true - gsutil rm gs://policyengine-us-data/checkpoints/completed_districts.txt || true - gsutil rm gs://policyengine-us-data/checkpoints/completed_cities.txt || true + - name: Run local area publishing on Modal + run: modal run modal_app/local_area.py --branch=${{ github.ref_name }} diff --git a/.github/workflows/reusable_test.yaml b/.github/workflows/reusable_test.yaml index ff147899..7b4ece1d 100644 --- a/.github/workflows/reusable_test.yaml +++ b/.github/workflows/reusable_test.yaml @@ -23,16 +23,22 @@ on: required: false POLICYENGINE_US_DATA_GITHUB_TOKEN: required: false + MODAL_TOKEN_ID: + required: false + MODAL_TOKEN_SECRET: + required: false jobs: test: - runs-on: self-hosted + runs-on: ubuntu-latest permissions: - contents: write # Required for GitHub Pages deploy - id-token: write # Required for GCP auth + contents: write + id-token: write env: HUGGING_FACE_TOKEN: ${{ secrets.HUGGING_FACE_TOKEN }} POLICYENGINE_US_DATA_GITHUB_TOKEN: ${{ secrets.POLICYENGINE_US_DATA_GITHUB_TOKEN }} + MODAL_TOKEN_ID: ${{ secrets.MODAL_TOKEN_ID }} + MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }} steps: - name: Checkout repo uses: actions/checkout@v4 @@ -50,57 +56,24 @@ jobs: with: node-version: '24' - - uses: "google-github-actions/auth@v2" - if: inputs.upload_data - with: - workload_identity_provider: "projects/322898545428/locations/global/workloadIdentityPools/policyengine-research-id-pool/providers/prod-github-provider" - service_account: "policyengine-research@policyengine-research.iam.gserviceaccount.com" - - - name: Install package - run: uv sync --dev - - - name: Download data inputs - if: inputs.full_suite - run: uv run make download - - # Temporarily disabled - database target causing issues - # - name: Create and load calibration targets database - # if: inputs.full_suite - # run: make database - - - name: Build datasets + - name: Install Modal CLI if: inputs.full_suite - run: uv run make data - env: - TEST_LITE: ${{ !inputs.upload_data }} - PYTHON_LOG_LEVEL: INFO + run: pip install modal - - name: Build datasets for local area calibration + - name: Run data build on Modal if: inputs.full_suite run: | - LOCAL_AREA_CALIBRATION=true uv run python policyengine_us_data/datasets/cps/cps.py - LOCAL_AREA_CALIBRATION=true uv run python policyengine_us_data/datasets/puf/puf.py - LOCAL_AREA_CALIBRATION=true uv run python policyengine_us_data/datasets/cps/extended_cps.py - uv run python policyengine_us_data/datasets/cps/local_area_calibration/create_stratified_cps.py 10500 + modal run modal_app/data_build.py \ + --upload=${{ inputs.upload_data }} \ + --branch=${{ github.ref_name }} \ + --test-lite=${{ !inputs.upload_data }} - - name: Run local area calibration tests - if: inputs.full_suite - run: uv run pytest policyengine_us_data/tests/test_local_area_calibration/ -v - - - name: Save calibration log - if: inputs.full_suite - uses: actions/upload-artifact@v4 - with: - name: calibration_log.csv - path: calibration_log.csv + - name: Install package + run: uv sync --dev - name: Run tests run: uv run pytest - - name: Upload data - if: inputs.upload_data - run: uv run make upload - - name: Test documentation builds run: uv run make documentation env: diff --git a/changelog_entry.yaml b/changelog_entry.yaml index e69de29b..40dcae30 100644 --- a/changelog_entry.yaml +++ b/changelog_entry.yaml @@ -0,0 +1,7 @@ +- bump: minor + changes: + added: + - Modal integration for CI/CD workflows, replacing self-hosted GCP runners + changed: + - Updated reusable_test.yaml to trigger data builds on Modal + - Updated local_area_publish.yaml to run on Modal diff --git a/modal_app/__init__.py b/modal_app/__init__.py new file mode 100644 index 00000000..d8bc5ccc --- /dev/null +++ b/modal_app/__init__.py @@ -0,0 +1 @@ +# Modal application for policyengine-us-data CI/CD diff --git a/modal_app/data_build.py b/modal_app/data_build.py new file mode 100644 index 00000000..de09920d --- /dev/null +++ b/modal_app/data_build.py @@ -0,0 +1,125 @@ +import os +import subprocess +import modal + +app = modal.App("policyengine-us-data") + +hf_secret = modal.Secret.from_name("huggingface-token") +gcp_secret = modal.Secret.from_name("gcp-credentials") + +data_volume = modal.Volume.from_name( + "policyengine-data", create_if_missing=True +) + +image = ( + modal.Image.debian_slim(python_version="3.13") + .apt_install("git") + .pip_install( + "policyengine-us>=1.353.0", + "policyengine-core>=3.19.0", + "pandas>=2.3.1", + "requests>=2.25.0", + "tqdm>=4.60.0", + "microdf_python>=1.0.0", + "microimpute>=1.1.4", + "google-cloud-storage>=2.0.0", + "google-auth>=2.0.0", + "scipy>=1.15.3", + "statsmodels>=0.14.5", + "openpyxl>=3.1.5", + "tables>=3.10.2", + "torch>=2.7.1", + "us>=2.0.0", + "sqlalchemy>=2.0.41", + "sqlmodel>=0.0.24", + "xlrd>=2.0.2", + "huggingface_hub", + ) +) + +REPO_URL = "https://github.com/PolicyEngine/policyengine-us-data.git" + + +@app.function( + image=image, + secrets=[hf_secret, gcp_secret], + volumes={"/data": data_volume}, + memory=32768, + cpu=8.0, + timeout=7200, +) +def build_datasets( + upload: bool = False, + branch: str = "main", + test_lite: bool = False, +): + os.chdir("/root") + subprocess.run(["git", "clone", "-b", branch, REPO_URL], check=True) + os.chdir("policyengine-us-data") + subprocess.run(["pip", "install", "-e", "."], check=True) + + env = os.environ.copy() + if test_lite: + env["TEST_LITE"] = "true" + + subprocess.run( + [ + "python", + "policyengine_us_data/storage/download_private_prerequisites.py", + ], + check=True, + env=env, + ) + + scripts = [ + "policyengine_us_data/utils/uprating.py", + "policyengine_us_data/datasets/acs/acs.py", + "policyengine_us_data/datasets/cps/cps.py", + "policyengine_us_data/datasets/puf/irs_puf.py", + "policyengine_us_data/datasets/puf/puf.py", + "policyengine_us_data/datasets/cps/extended_cps.py", + "policyengine_us_data/datasets/cps/enhanced_cps.py", + "policyengine_us_data/datasets/cps/small_enhanced_cps.py", + ] + for script in scripts: + print(f"Running {script}...") + subprocess.run(["python", script], check=True, env=env) + + os.rename( + "policyengine_us_data/storage/enhanced_cps_2024.h5", + "policyengine_us_data/storage/dense_enhanced_cps_2024.h5", + ) + subprocess.run( + [ + "cp", + "policyengine_us_data/storage/sparse_enhanced_cps_2024.h5", + "policyengine_us_data/storage/enhanced_cps_2024.h5", + ], + check=True, + ) + + if upload: + subprocess.run( + [ + "python", + "policyengine_us_data/storage/upload_completed_datasets.py", + ], + check=True, + env=env, + ) + + return "Data build completed successfully" + + +@app.local_entrypoint() +def main( + upload: bool = False, + branch: str = "main", + test_lite: bool = False, +): + result = build_datasets.remote( + upload=upload, + branch=branch, + test_lite=test_lite, + ) + print(result) diff --git a/modal_app/local_area.py b/modal_app/local_area.py new file mode 100644 index 00000000..b9f793c4 --- /dev/null +++ b/modal_app/local_area.py @@ -0,0 +1,72 @@ +import os +import subprocess +import modal + +app = modal.App("policyengine-us-data-local-area") + +hf_secret = modal.Secret.from_name("huggingface-token") +gcp_secret = modal.Secret.from_name("gcp-credentials") + +data_volume = modal.Volume.from_name( + "policyengine-data", create_if_missing=True +) + +image = ( + modal.Image.debian_slim(python_version="3.13") + .apt_install("git") + .pip_install( + "policyengine-us>=1.353.0", + "policyengine-core>=3.19.0", + "pandas>=2.3.1", + "requests>=2.25.0", + "tqdm>=4.60.0", + "microdf_python>=1.0.0", + "microimpute>=1.1.4", + "google-cloud-storage>=2.0.0", + "google-auth>=2.0.0", + "scipy>=1.15.3", + "statsmodels>=0.14.5", + "openpyxl>=3.1.5", + "tables>=3.10.2", + "torch>=2.7.1", + "us>=2.0.0", + "sqlalchemy>=2.0.41", + "sqlmodel>=0.0.24", + "xlrd>=2.0.2", + "huggingface_hub", + ) +) + +REPO_URL = "https://github.com/PolicyEngine/policyengine-us-data.git" + + +@app.function( + image=image, + secrets=[hf_secret, gcp_secret], + volumes={"/data": data_volume}, + memory=8192, + cpu=4.0, + timeout=86400, +) +def publish_all_local_areas(branch: str = "main"): + os.chdir("/root") + subprocess.run(["git", "clone", "-b", branch, REPO_URL], check=True) + os.chdir("policyengine-us-data") + subprocess.run(["pip", "install", "-e", "."], check=True) + + subprocess.run( + [ + "python", + "policyengine_us_data/datasets/cps/local_area_calibration/publish_local_area.py", + ], + check=True, + env=os.environ.copy(), + ) + + return "Local area publishing completed successfully" + + +@app.local_entrypoint() +def main(branch: str = "main"): + result = publish_all_local_areas.remote(branch=branch) + print(result) diff --git a/uv.lock b/uv.lock index 90a0d1d7..b4d056fa 100644 --- a/uv.lock +++ b/uv.lock @@ -1854,7 +1854,7 @@ wheels = [ [[package]] name = "policyengine-us-data" -version = "1.51.0" +version = "1.51.1" source = { editable = "." } dependencies = [ { name = "google-auth" }, From ea37f44c553b7f341b03dd629c183265e81bfd5d Mon Sep 17 00:00:00 2001 From: "baogorek@gmail.com" Date: Wed, 7 Jan 2026 14:22:00 -0500 Subject: [PATCH 2/9] Update uv.lock MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- uv.lock | 88 +++++++++++++++++++++++++-------------------------------- 1 file changed, 39 insertions(+), 49 deletions(-) diff --git a/uv.lock b/uv.lock index b4d056fa..7f2e4e5f 100644 --- a/uv.lock +++ b/uv.lock @@ -52,15 +52,15 @@ wheels = [ [[package]] name = "anyio" -version = "4.12.0" +version = "4.12.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "idna" }, { name = "typing-extensions", marker = "python_full_version < '3.13'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/16/ce/8a777047513153587e5434fd752e89334ac33e379aa3497db860eeb60377/anyio-4.12.0.tar.gz", hash = "sha256:73c693b567b0c55130c104d0b43a9baf3aa6a31fc6110116509f27bf75e21ec0", size = 228266, upload-time = "2025-11-28T23:37:38.911Z" } +sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7f/9c/36c5c37947ebfb8c7f22e0eb6e4d188ee2d53aa3880f3f2744fb894f0cb1/anyio-4.12.0-py3-none-any.whl", hash = "sha256:dad2376a628f98eeca4881fc56cd06affd18f659b17a747d3ff0307ced94b1bb", size = 113362, upload-time = "2025-11-28T23:36:57.897Z" }, + { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" }, ] [[package]] @@ -252,34 +252,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/8c/2b30c12155ad8de0cf641d76a8b396a16d2c36bc6d50b621a62b7c4567c1/build-1.3.0-py3-none-any.whl", hash = "sha256:7145f0b5061ba90a1500d60bd1b13ca0a8a4cebdd0cc16ed8adf1c0e739f43b4", size = 23382, upload-time = "2025-08-01T21:27:07.844Z" }, ] -[[package]] -name = "cachetools" -version = "6.2.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/bc/1d/ede8680603f6016887c062a2cf4fc8fdba905866a3ab8831aa8aa651320c/cachetools-6.2.4.tar.gz", hash = "sha256:82c5c05585e70b6ba2d3ae09ea60b79548872185d2f24ae1f2709d37299fd607", size = 31731, upload-time = "2025-12-15T18:24:53.744Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2c/fc/1d7b80d0eb7b714984ce40efc78859c022cd930e402f599d8ca9e39c78a4/cachetools-6.2.4-py3-none-any.whl", hash = "sha256:69a7a52634fed8b8bf6e24a050fb60bff1c9bd8f6d24572b99c32d4e71e62a51", size = 11551, upload-time = "2025-12-15T18:24:52.332Z" }, -] - [[package]] name = "census" -version = "0.8.24" +version = "0.8.25" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "requests" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/07/cc/5161b96b309331e54e3acb06b67ac1d2a98f52cc9d0e27627abb527115f4/census-0.8.24.tar.gz", hash = "sha256:5c6b789652f9a3ae2eb5762367405ae7ca04be7e0f3416700ddc300fc9fe7768", size = 13048, upload-time = "2025-04-08T15:52:14.664Z" } +sdist = { url = "https://files.pythonhosted.org/packages/98/8f/cdb1b8a7c210b4e2991d09e460cf8b2a36532c7e911d65bc8a6ba5dba8a0/census-0.8.25.tar.gz", hash = "sha256:433d3c280728d9c10ebfbf97df06c5911b6443a4ab5aa9a4e572af11e6d1a17c", size = 13074, upload-time = "2026-01-07T16:35:55.386Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/99/73/3868a695f082f379dce20f19b55451fef4c3f4337824f0991dc1a228301b/census-0.8.24-py3-none-any.whl", hash = "sha256:9ac2c2adca6a7c074d0e6551e1e3bd819724e5d309f0b0d72285247436b58089", size = 11401, upload-time = "2025-04-08T15:52:13.449Z" }, + { url = "https://files.pythonhosted.org/packages/9d/be/29054ec18c2dc99363f1e5a07bd3cee1b31cf04e2ca736a3b4926e96f00f/census-0.8.25-py3-none-any.whl", hash = "sha256:8396e71c92faa003b999c4a4f5996736047a148d34225b5347c47e255e81f344", size = 11421, upload-time = "2026-01-07T16:35:54.215Z" }, ] [[package]] name = "certifi" -version = "2025.11.12" +version = "2026.1.4" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a2/8c/58f469717fa48465e4a50c014a0400602d3c437d7c0c468e17ada824da3a/certifi-2025.11.12.tar.gz", hash = "sha256:d8ab5478f2ecd78af242878415affce761ca6bc54a22a27e026d7c25357c3316", size = 160538, upload-time = "2025-11-12T02:54:51.517Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/2d/a891ca51311197f6ad14a7ef42e2399f36cf2f9bd44752b3dc4eab60fdc5/certifi-2026.1.4.tar.gz", hash = "sha256:ac726dd470482006e014ad384921ed6438c457018f4b3d204aea4281258b2120", size = 154268, upload-time = "2026-01-04T02:42:41.825Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438, upload-time = "2025-11-12T02:54:49.735Z" }, + { url = "https://files.pythonhosted.org/packages/e6/ad/3cc14f097111b4de0040c83a525973216457bbeeb63739ef1ed275c1c021/certifi-2026.1.4-py3-none-any.whl", hash = "sha256:9943707519e4add1115f44c2bc244f782c0249876bf51b6599fee1ffbedd685c", size = 152900, upload-time = "2026-01-04T02:42:40.15Z" }, ] [[package]] @@ -495,11 +486,11 @@ wheels = [ [[package]] name = "filelock" -version = "3.20.1" +version = "3.20.2" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a7/23/ce7a1126827cedeb958fc043d61745754464eb56c5937c35bbf2b8e26f34/filelock-3.20.1.tar.gz", hash = "sha256:b8360948b351b80f420878d8516519a2204b07aefcdcfd24912a5d33127f188c", size = 19476, upload-time = "2025-12-15T23:54:28.027Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c1/e0/a75dbe4bca1e7d41307323dad5ea2efdd95408f74ab2de8bd7dba9b51a1a/filelock-3.20.2.tar.gz", hash = "sha256:a2241ff4ddde2a7cebddf78e39832509cb045d18ec1a09d7248d6bfc6bfbbe64", size = 19510, upload-time = "2026-01-02T15:33:32.582Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e3/7f/a1a97644e39e7316d850784c642093c99df1290a460df4ede27659056834/filelock-3.20.1-py3-none-any.whl", hash = "sha256:15d9e9a67306188a44baa72f569d2bfd803076269365fdea0934385da4dc361a", size = 16666, upload-time = "2025-12-15T23:54:26.874Z" }, + { url = "https://files.pythonhosted.org/packages/9a/30/ab407e2ec752aa541704ed8f93c11e2a5d92c168b8a755d818b74a3c5c2d/filelock-3.20.2-py3-none-any.whl", hash = "sha256:fbba7237d6ea277175a32c54bb71ef814a8546d8601269e1bfc388de333974e8", size = 16697, upload-time = "2026-01-02T15:33:31.133Z" }, ] [[package]] @@ -554,16 +545,15 @@ wheels = [ [[package]] name = "google-auth" -version = "2.45.0" +version = "2.47.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "cachetools" }, { name = "pyasn1-modules" }, { name = "rsa" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e5/00/3c794502a8b892c404b2dea5b3650eb21bfc7069612fbfd15c7f17c1cb0d/google_auth-2.45.0.tar.gz", hash = "sha256:90d3f41b6b72ea72dd9811e765699ee491ab24139f34ebf1ca2b9cc0c38708f3", size = 320708, upload-time = "2025-12-15T22:58:42.889Z" } +sdist = { url = "https://files.pythonhosted.org/packages/60/3c/ec64b9a275ca22fa1cd3b6e77fefcf837b0732c890aa32d2bd21313d9b33/google_auth-2.47.0.tar.gz", hash = "sha256:833229070a9dfee1a353ae9877dcd2dec069a8281a4e72e72f77d4a70ff945da", size = 323719, upload-time = "2026-01-06T21:55:31.045Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c6/97/451d55e05487a5cd6279a01a7e34921858b16f7dc8aa38a2c684743cd2b3/google_auth-2.45.0-py2.py3-none-any.whl", hash = "sha256:82344e86dc00410ef5382d99be677c6043d72e502b625aa4f4afa0bdacca0f36", size = 233312, upload-time = "2025-12-15T22:58:40.777Z" }, + { url = "https://files.pythonhosted.org/packages/db/18/79e9008530b79527e0d5f79e7eef08d3b179b7f851cfd3a2f27822fbdfa9/google_auth-2.47.0-py3-none-any.whl", hash = "sha256:c516d68336bfde7cf0da26aab674a36fedcf04b37ac4edd59c597178760c3498", size = 234867, upload-time = "2026-01-06T21:55:28.6Z" }, ] [[package]] @@ -750,7 +740,7 @@ wheels = [ [[package]] name = "huggingface-hub" -version = "1.2.3" +version = "1.2.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, @@ -764,9 +754,9 @@ dependencies = [ { name = "typer-slim" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/a7/c8/9cd2fcb670ba0e708bfdf95a1177b34ca62de2d3821df0773bc30559af80/huggingface_hub-1.2.3.tar.gz", hash = "sha256:4ba57f17004fd27bb176a6b7107df579865d4cde015112db59184c51f5602ba7", size = 614605, upload-time = "2025-12-12T15:31:42.161Z" } +sdist = { url = "https://files.pythonhosted.org/packages/fb/94/42ed2ff780f4bc58acbe4b8cb98eb4574310ad6feba12f76a820e7546120/huggingface_hub-1.2.4.tar.gz", hash = "sha256:7a1d9ec4802e64372d1d152d69fb8e26d943f15a2289096fbc8e09e7b90c21a5", size = 614771, upload-time = "2026-01-06T11:01:29.828Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/df/8d/7ca723a884d55751b70479b8710f06a317296b1fa1c1dec01d0420d13e43/huggingface_hub-1.2.3-py3-none-any.whl", hash = "sha256:c9b7a91a9eedaa2149cdc12bdd8f5a11780e10de1f1024718becf9e41e5a4642", size = 520953, upload-time = "2025-12-12T15:31:40.339Z" }, + { url = "https://files.pythonhosted.org/packages/dd/b0/113c4a688e7af9f0b92f5585cb425e71134e04c83a0a4a1e62db90edee20/huggingface_hub-1.2.4-py3-none-any.whl", hash = "sha256:2db69b91877d9d34825f5cd2a63b94f259011a77dcf761b437bf510fbe9522e9", size = 520980, upload-time = "2026-01-06T11:01:27.789Z" }, ] [[package]] @@ -822,7 +812,7 @@ wheels = [ [[package]] name = "ipython" -version = "8.37.0" +version = "8.38.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -835,9 +825,9 @@ dependencies = [ { name = "stack-data" }, { name = "traitlets" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/85/31/10ac88f3357fc276dc8a64e8880c82e80e7459326ae1d0a211b40abf6665/ipython-8.37.0.tar.gz", hash = "sha256:ca815841e1a41a1e6b73a0b08f3038af9b2252564d01fc405356d34033012216", size = 5606088, upload-time = "2025-05-31T16:39:09.613Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e5/61/1810830e8b93c72dcd3c0f150c80a00c3deb229562d9423807ec92c3a539/ipython-8.38.0.tar.gz", hash = "sha256:9cfea8c903ce0867cc2f23199ed8545eb741f3a69420bfcf3743ad1cec856d39", size = 5513996, upload-time = "2026-01-05T10:59:06.901Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/91/d0/274fbf7b0b12643cbbc001ce13e6a5b1607ac4929d1b11c72460152c9fc3/ipython-8.37.0-py3-none-any.whl", hash = "sha256:ed87326596b878932dbcb171e3e698845434d8c61b8d8cd474bf663041a9dcf2", size = 831864, upload-time = "2025-05-31T16:39:06.38Z" }, + { url = "https://files.pythonhosted.org/packages/9f/df/db59624f4c71b39717c423409950ac3f2c8b2ce4b0aac843112c7fb3f721/ipython-8.38.0-py3-none-any.whl", hash = "sha256:750162629d800ac65bb3b543a14e7a74b0e88063eac9b92124d4b2aa3f6d8e86", size = 831813, upload-time = "2026-01-05T10:59:04.239Z" }, ] [[package]] @@ -942,7 +932,7 @@ wheels = [ [[package]] name = "jsonschema" -version = "4.25.1" +version = "4.26.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "attrs" }, @@ -950,9 +940,9 @@ dependencies = [ { name = "referencing" }, { name = "rpds-py" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/74/69/f7185de793a29082a9f3c7728268ffb31cb5095131a9c139a74078e27336/jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85", size = 357342, upload-time = "2025-08-18T17:03:50.038Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583, upload-time = "2026-01-07T13:41:07.246Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bf/9c/8c95d856233c1f82500c2450b8c68576b4cf1c871db3afac5c34ff84e6fd/jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63", size = 90040, upload-time = "2025-08-18T17:03:48.373Z" }, + { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" }, ] [package.optional-dependencies] @@ -1171,15 +1161,15 @@ wheels = [ [[package]] name = "microdf-python" -version = "1.1.1" +version = "1.1.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy" }, { name = "pandas" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/31/e9/7f77ee19b2d3dcdb561ceaeb915ef6a944eb315412d0e8065c12817b7ff4/microdf_python-1.1.1.tar.gz", hash = "sha256:97dd0cb8562d98a32bbd35861719ae565544143e2ad8a462d6f6261058d9c469", size = 17290, upload-time = "2025-12-01T14:13:57.131Z" } +sdist = { url = "https://files.pythonhosted.org/packages/88/9b/e41192b8bdd7e9017f5b940f29db00f926b06b93aea1e03de76c808b7c9e/microdf_python-1.1.2.tar.gz", hash = "sha256:34ce6542dbc56013de812f5355a144363f427972af45135329d43a5c85921d4b", size = 17731, upload-time = "2026-01-07T12:06:02.113Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/8d/53/a6b3474d1f46bbab9ffa8a6d5727662e440e7689337316c90f4c3b8c038f/microdf_python-1.1.1-py3-none-any.whl", hash = "sha256:f06ec231a58dcf7ab1c6fa6a8cf25632a34ebec0f67b9067c7c0364d4c0aeb4f", size = 18096, upload-time = "2025-12-01T14:13:56.056Z" }, + { url = "https://files.pythonhosted.org/packages/8a/bc/32478cfc2b88123d50fe137e65033eb670945b0e5ca35c135f9e141344c2/microdf_python-1.1.2-py3-none-any.whl", hash = "sha256:d5c534ac0a4615a39fa820301f630fc1dda17702b278be7906507b53e2c0e254", size = 18583, upload-time = "2026-01-07T12:06:00.957Z" }, ] [[package]] @@ -1697,11 +1687,11 @@ wheels = [ [[package]] name = "pathspec" -version = "0.12.1" +version = "1.0.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" } +sdist = { url = "https://files.pythonhosted.org/packages/28/2e/83722ece0f6ee24387d6cb830dd562ddbcd6ce0b9d76072c6849670c31b4/pathspec-1.0.1.tar.gz", hash = "sha256:e2769b508d0dd47b09af6ee2c75b2744a2cb1f474ae4b1494fd6a1b7a841613c", size = 129791, upload-time = "2026-01-06T13:02:55.15Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" }, + { url = "https://files.pythonhosted.org/packages/d2/fe/2257c71721aeab6a6e8aa1f00d01f2a20f58547d249a6c8fef5791f559fc/pathspec-1.0.1-py3-none-any.whl", hash = "sha256:8870061f22c58e6d83463cfce9a7dd6eca0512c772c1001fb09ac64091816721", size = 54584, upload-time = "2026-01-06T13:02:53.601Z" }, ] [[package]] @@ -1840,16 +1830,16 @@ wheels = [ [[package]] name = "policyengine-us" -version = "1.485.0" +version = "1.497.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "microdf-python" }, { name = "policyengine-core" }, { name = "tqdm" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/02/0d/2e69a55b0ebc89e70fa89778210383a62c101dedeca60ee4bdd3de085d6c/policyengine_us-1.485.0.tar.gz", hash = "sha256:e4c39e1a8a1f53ffd8fe42b02ecc5cd4f12b8a36dd99aafcc1a8af5ffbbdfa46", size = 8364044, upload-time = "2025-12-29T22:39:50.435Z" } +sdist = { url = "https://files.pythonhosted.org/packages/fd/51/9df605ac6939ccbd8a93f5fe8a23d08b4b97e3806ea509c022c603e44266/policyengine_us-1.497.1.tar.gz", hash = "sha256:2f5eb011c8c8c205b3d313f42aa52b8356266921f46611ac9346bc04361eff61", size = 8449641, upload-time = "2026-01-06T15:19:16.995Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/f4/51/63e2619e44b0e035d0aa5d081ec06285570625d2018f2dc95763ee3fef0f/policyengine_us-1.485.0-py3-none-any.whl", hash = "sha256:38aa7bfcd3798b7dd0f3a45aa906037c1870505f420de19c0501ab829275ae31", size = 6899839, upload-time = "2025-12-29T22:39:47.987Z" }, + { url = "https://files.pythonhosted.org/packages/cc/6d/c877b3e438ae3a8d509161e7439c399629e85647d9238a9d168b06dce21d/policyengine_us-1.497.1-py3-none-any.whl", hash = "sha256:b589e060545f6e38099b0e6233a2ba94195e5c15d53b5aaa8c1efa97b025cd9f", size = 7139280, upload-time = "2026-01-06T15:19:14.666Z" }, ] [[package]] @@ -3028,15 +3018,15 @@ wheels = [ [[package]] name = "typer-slim" -version = "0.21.0" +version = "0.21.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f9/3b/2f60ce16f578b1db5b8816d37d6a4d9786b33b76407fc8c13b0b86312c31/typer_slim-0.21.0.tar.gz", hash = "sha256:f2dbd150cfa0fead2242e21fa9f654dfc64773763ddf07c6be9a49ad34f79557", size = 106841, upload-time = "2025-12-25T09:54:55.998Z" } +sdist = { url = "https://files.pythonhosted.org/packages/17/d4/064570dec6358aa9049d4708e4a10407d74c99258f8b2136bb8702303f1a/typer_slim-0.21.1.tar.gz", hash = "sha256:73495dd08c2d0940d611c5a8c04e91c2a0a98600cbd4ee19192255a233b6dbfd", size = 110478, upload-time = "2026-01-06T11:21:11.176Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b4/84/e97abf10e4a699194ff07fd586ec7f4cf867d9d04bead559a65f9e7aff84/typer_slim-0.21.0-py3-none-any.whl", hash = "sha256:92aee2188ac6fc2b2924bd75bb61a340b78bd8cd51fd9735533ce5a856812c8e", size = 47174, upload-time = "2025-12-25T09:54:54.609Z" }, + { url = "https://files.pythonhosted.org/packages/c8/0a/4aca634faf693e33004796b6cee0ae2e1dba375a800c16ab8d3eff4bb800/typer_slim-0.21.1-py3-none-any.whl", hash = "sha256:6e6c31047f171ac93cc5a973c9e617dbc5ab2bddc4d0a3135dc161b4e2020e0d", size = 47444, upload-time = "2026-01-06T11:21:12.441Z" }, ] [[package]] @@ -3080,11 +3070,11 @@ wheels = [ [[package]] name = "urllib3" -version = "2.6.2" +version = "2.6.3" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/1e/24/a2a2ed9addd907787d7aa0355ba36a6cadf1768b934c652ea78acbd59dcd/urllib3-2.6.2.tar.gz", hash = "sha256:016f9c98bb7e98085cb2b4b17b87d2c702975664e4f060c6532e64d1c1a5e797", size = 432930, upload-time = "2025-12-11T15:56:40.252Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/6d/b9/4095b668ea3678bf6a0af005527f39de12fb026516fb3df17495a733b7f8/urllib3-2.6.2-py3-none-any.whl", hash = "sha256:ec21cddfe7724fc7cb4ba4bea7aa8e2ef36f607a4bab81aa6ce42a13dc3f03dd", size = 131182, upload-time = "2025-12-11T15:56:38.584Z" }, + { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" }, ] [[package]] From b77aa6dd1fbfd2a960bee67ff62e3650f29406f8 Mon Sep 17 00:00:00 2001 From: "baogorek@gmail.com" Date: Wed, 7 Jan 2026 14:28:32 -0500 Subject: [PATCH 3/9] Fix uv.lock freshness check to use --locked MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The --upgrade flag causes platform-specific wheel differences between local and CI environments. Using --locked just validates consistency. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .github/workflows/pr_code_changes.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pr_code_changes.yaml b/.github/workflows/pr_code_changes.yaml index d8f2e7bf..cf335694 100644 --- a/.github/workflows/pr_code_changes.yaml +++ b/.github/workflows/pr_code_changes.yaml @@ -44,9 +44,8 @@ jobs: uses: astral-sh/setup-uv@v5 - name: Check lock file is up-to-date run: | - uv lock --upgrade - git diff --exit-code uv.lock || { - echo "::error::uv.lock is outdated. Run 'uv lock --upgrade' and commit the changes." + uv lock --locked || { + echo "::error::uv.lock is outdated. Run 'uv lock' and commit the changes." exit 1 } From 5894194b63b139f61ca8428c6c9f8ddc678c2304 Mon Sep 17 00:00:00 2001 From: "baogorek@gmail.com" Date: Wed, 7 Jan 2026 14:32:30 -0500 Subject: [PATCH 4/9] Fix Modal CLI flag syntax in workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modal CLI uses --flag/--no-flag syntax, not --flag=value. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .github/workflows/reusable_test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/reusable_test.yaml b/.github/workflows/reusable_test.yaml index 7b4ece1d..e11dffdd 100644 --- a/.github/workflows/reusable_test.yaml +++ b/.github/workflows/reusable_test.yaml @@ -64,9 +64,9 @@ jobs: if: inputs.full_suite run: | modal run modal_app/data_build.py \ - --upload=${{ inputs.upload_data }} \ + ${{ inputs.upload_data && '--upload' || '--no-upload' }} \ --branch=${{ github.ref_name }} \ - --test-lite=${{ !inputs.upload_data }} + ${{ inputs.upload_data && '--no-test-lite' || '--test-lite' }} - name: Install package run: uv sync --dev From 945bf25dc26ca4631eb384d7fca9f4a8c7f61e99 Mon Sep 17 00:00:00 2001 From: "baogorek@gmail.com" Date: Wed, 7 Jan 2026 14:35:51 -0500 Subject: [PATCH 5/9] Fix branch reference for PR builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use github.head_ref for PRs (falls back to github.ref_name for pushes). github.ref_name returns '465/merge' for PRs which isn't a valid branch. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .github/workflows/local_area_publish.yaml | 2 +- .github/workflows/reusable_test.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/local_area_publish.yaml b/.github/workflows/local_area_publish.yaml index 44a51ffd..e23468a6 100644 --- a/.github/workflows/local_area_publish.yaml +++ b/.github/workflows/local_area_publish.yaml @@ -39,4 +39,4 @@ jobs: run: pip install modal - name: Run local area publishing on Modal - run: modal run modal_app/local_area.py --branch=${{ github.ref_name }} + run: modal run modal_app/local_area.py --branch=${{ github.head_ref || github.ref_name }} diff --git a/.github/workflows/reusable_test.yaml b/.github/workflows/reusable_test.yaml index e11dffdd..0a8dd370 100644 --- a/.github/workflows/reusable_test.yaml +++ b/.github/workflows/reusable_test.yaml @@ -65,7 +65,7 @@ jobs: run: | modal run modal_app/data_build.py \ ${{ inputs.upload_data && '--upload' || '--no-upload' }} \ - --branch=${{ github.ref_name }} \ + --branch=${{ github.head_ref || github.ref_name }} \ ${{ inputs.upload_data && '--no-test-lite' || '--test-lite' }} - name: Install package From 0d3640d20ff3b990797a7784659877a3f4ea3964 Mon Sep 17 00:00:00 2001 From: "baogorek@gmail.com" Date: Wed, 7 Jan 2026 14:40:47 -0500 Subject: [PATCH 6/9] Fix GCP credentials for upload and double timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Write GOOGLE_APPLICATION_CREDENTIALS_JSON to temp file for google.auth.default() - Increase data build timeout from 2h to 4h 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- modal_app/data_build.py | 16 +++++++++++++++- modal_app/local_area.py | 14 ++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/modal_app/data_build.py b/modal_app/data_build.py index de09920d..56a9c3e9 100644 --- a/modal_app/data_build.py +++ b/modal_app/data_build.py @@ -40,19 +40,33 @@ REPO_URL = "https://github.com/PolicyEngine/policyengine-us-data.git" +def setup_gcp_credentials(): + """Write GCP credentials JSON to a temp file for google.auth.default().""" + creds_json = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS_JSON") + if creds_json: + creds_path = "/tmp/gcp-credentials.json" + with open(creds_path, "w") as f: + f.write(creds_json) + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = creds_path + return creds_path + return None + + @app.function( image=image, secrets=[hf_secret, gcp_secret], volumes={"/data": data_volume}, memory=32768, cpu=8.0, - timeout=7200, + timeout=14400, ) def build_datasets( upload: bool = False, branch: str = "main", test_lite: bool = False, ): + setup_gcp_credentials() + os.chdir("/root") subprocess.run(["git", "clone", "-b", branch, REPO_URL], check=True) os.chdir("policyengine-us-data") diff --git a/modal_app/local_area.py b/modal_app/local_area.py index b9f793c4..c28a0fa5 100644 --- a/modal_app/local_area.py +++ b/modal_app/local_area.py @@ -40,6 +40,18 @@ REPO_URL = "https://github.com/PolicyEngine/policyengine-us-data.git" +def setup_gcp_credentials(): + """Write GCP credentials JSON to a temp file for google.auth.default().""" + creds_json = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS_JSON") + if creds_json: + creds_path = "/tmp/gcp-credentials.json" + with open(creds_path, "w") as f: + f.write(creds_json) + os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = creds_path + return creds_path + return None + + @app.function( image=image, secrets=[hf_secret, gcp_secret], @@ -49,6 +61,8 @@ timeout=86400, ) def publish_all_local_areas(branch: str = "main"): + setup_gcp_credentials() + os.chdir("/root") subprocess.run(["git", "clone", "-b", branch, REPO_URL], check=True) os.chdir("policyengine-us-data") From be1de667dab9918219ae711e53da8a4aa9447ed7 Mon Sep 17 00:00:00 2001 From: "baogorek@gmail.com" Date: Wed, 7 Jan 2026 15:40:11 -0500 Subject: [PATCH 7/9] Add tests to Modal build, fix workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add local area calibration dataset builds to Modal - Add local area calibration tests to Modal - Add main pytest suite to Modal - Only run basic tests on GitHub runner when full_suite=false - Install dev dependencies for pytest 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- .github/workflows/reusable_test.yaml | 5 +-- modal_app/data_build.py | 54 ++++++++++++++++++++++++++-- 2 files changed, 55 insertions(+), 4 deletions(-) diff --git a/.github/workflows/reusable_test.yaml b/.github/workflows/reusable_test.yaml index 0a8dd370..95d55281 100644 --- a/.github/workflows/reusable_test.yaml +++ b/.github/workflows/reusable_test.yaml @@ -60,7 +60,7 @@ jobs: if: inputs.full_suite run: pip install modal - - name: Run data build on Modal + - name: Run data build and tests on Modal if: inputs.full_suite run: | modal run modal_app/data_build.py \ @@ -71,7 +71,8 @@ jobs: - name: Install package run: uv sync --dev - - name: Run tests + - name: Run basic tests + if: ${{ !inputs.full_suite }} run: uv run pytest - name: Test documentation builds diff --git a/modal_app/data_build.py b/modal_app/data_build.py index 56a9c3e9..cc47faf4 100644 --- a/modal_app/data_build.py +++ b/modal_app/data_build.py @@ -34,6 +34,7 @@ "sqlmodel>=0.0.24", "xlrd>=2.0.2", "huggingface_hub", + "pytest", ) ) @@ -70,12 +71,13 @@ def build_datasets( os.chdir("/root") subprocess.run(["git", "clone", "-b", branch, REPO_URL], check=True) os.chdir("policyengine-us-data") - subprocess.run(["pip", "install", "-e", "."], check=True) + subprocess.run(["pip", "install", "-e", ".[dev]"], check=True) env = os.environ.copy() if test_lite: env["TEST_LITE"] = "true" + # Download prerequisites subprocess.run( [ "python", @@ -85,6 +87,7 @@ def build_datasets( env=env, ) + # Build main datasets scripts = [ "policyengine_us_data/utils/uprating.py", "policyengine_us_data/datasets/acs/acs.py", @@ -112,6 +115,53 @@ def build_datasets( check=True, ) + # Build local area calibration datasets + print("Building local area calibration datasets...") + local_area_env = env.copy() + local_area_env["LOCAL_AREA_CALIBRATION"] = "true" + + subprocess.run( + ["python", "policyengine_us_data/datasets/cps/cps.py"], + check=True, + env=local_area_env, + ) + subprocess.run( + ["python", "policyengine_us_data/datasets/puf/puf.py"], + check=True, + env=local_area_env, + ) + subprocess.run( + ["python", "policyengine_us_data/datasets/cps/extended_cps.py"], + check=True, + env=local_area_env, + ) + subprocess.run( + [ + "python", + "policyengine_us_data/datasets/cps/local_area_calibration/create_stratified_cps.py", + "10500", + ], + check=True, + env=env, + ) + + # Run local area calibration tests + print("Running local area calibration tests...") + subprocess.run( + [ + "pytest", + "policyengine_us_data/tests/test_local_area_calibration/", + "-v", + ], + check=True, + env=env, + ) + + # Run main test suite + print("Running main test suite...") + subprocess.run(["pytest"], check=True, env=env) + + # Upload if requested if upload: subprocess.run( [ @@ -122,7 +172,7 @@ def build_datasets( env=env, ) - return "Data build completed successfully" + return "Data build and tests completed successfully" @app.local_entrypoint() From 62c94dfdbe3208c4fd907d906f3d209e85cd8793 Mon Sep 17 00:00:00 2001 From: "baogorek@gmail.com" Date: Wed, 7 Jan 2026 16:17:16 -0500 Subject: [PATCH 8/9] Fix TEST_LITE env leak to local area calibration builds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The local area calibration scripts should run WITHOUT TEST_LITE, matching the original workflow where only the main data build had the TEST_LITE env var set. This fixes the size mismatch error where main CPS was built with TEST_LITE (28k tax units) but local area expected full size (76k tax units). 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- modal_app/data_build.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modal_app/data_build.py b/modal_app/data_build.py index cc47faf4..2cde7c94 100644 --- a/modal_app/data_build.py +++ b/modal_app/data_build.py @@ -115,9 +115,9 @@ def build_datasets( check=True, ) - # Build local area calibration datasets + # Build local area calibration datasets (without TEST_LITE - must match full dataset) print("Building local area calibration datasets...") - local_area_env = env.copy() + local_area_env = os.environ.copy() local_area_env["LOCAL_AREA_CALIBRATION"] = "true" subprocess.run( @@ -142,7 +142,7 @@ def build_datasets( "10500", ], check=True, - env=env, + env=local_area_env, ) # Run local area calibration tests @@ -154,7 +154,7 @@ def build_datasets( "-v", ], check=True, - env=env, + env=local_area_env, ) # Run main test suite From a3557d1862592752ccca4e549b2ebd1d9a957e31 Mon Sep 17 00:00:00 2001 From: "baogorek@gmail.com" Date: Wed, 7 Jan 2026 19:32:26 -0500 Subject: [PATCH 9/9] Remove unused data_volume, add timeout comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove data_volume which was mounted at /data but never used - Document 24h timeout in local_area.py (processes all states/districts) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- modal_app/data_build.py | 5 ----- modal_app/local_area.py | 7 +------ 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/modal_app/data_build.py b/modal_app/data_build.py index 2cde7c94..5492e586 100644 --- a/modal_app/data_build.py +++ b/modal_app/data_build.py @@ -7,10 +7,6 @@ hf_secret = modal.Secret.from_name("huggingface-token") gcp_secret = modal.Secret.from_name("gcp-credentials") -data_volume = modal.Volume.from_name( - "policyengine-data", create_if_missing=True -) - image = ( modal.Image.debian_slim(python_version="3.13") .apt_install("git") @@ -56,7 +52,6 @@ def setup_gcp_credentials(): @app.function( image=image, secrets=[hf_secret, gcp_secret], - volumes={"/data": data_volume}, memory=32768, cpu=8.0, timeout=14400, diff --git a/modal_app/local_area.py b/modal_app/local_area.py index c28a0fa5..3f8f903b 100644 --- a/modal_app/local_area.py +++ b/modal_app/local_area.py @@ -7,10 +7,6 @@ hf_secret = modal.Secret.from_name("huggingface-token") gcp_secret = modal.Secret.from_name("gcp-credentials") -data_volume = modal.Volume.from_name( - "policyengine-data", create_if_missing=True -) - image = ( modal.Image.debian_slim(python_version="3.13") .apt_install("git") @@ -55,10 +51,9 @@ def setup_gcp_credentials(): @app.function( image=image, secrets=[hf_secret, gcp_secret], - volumes={"/data": data_volume}, memory=8192, cpu=4.0, - timeout=86400, + timeout=86400, # 24h: processes 50 states + 435 districts with checkpointing ) def publish_all_local_areas(branch: str = "main"): setup_gcp_credentials()