From a24b36d6fd1ccd176d4e1147bf8065c34ec39a45 Mon Sep 17 00:00:00 2001 From: byczong Date: Wed, 19 Feb 2025 14:35:59 +0100 Subject: [PATCH 1/2] do not use auth in the s3 downloader --- namegraph/download_from_s3.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/namegraph/download_from_s3.py b/namegraph/download_from_s3.py index ff934f37..cddbdbf9 100644 --- a/namegraph/download_from_s3.py +++ b/namegraph/download_from_s3.py @@ -4,30 +4,23 @@ import tarfile import boto3 +import botocore import hydra -from dotenv import load_dotenv from omegaconf import DictConfig class S3Downloader: def __init__(self): self.s3_client = None + self.region_name = 'us-east-1' self.bucket = 'prod-name-generator-namegeneratori-inputss3bucket-c26jqo3twfxy' def get_client(self): if self.s3_client is None: - load_dotenv() - - S3_ACCESS_KEY_ID = os.getenv('S3_ACCESS_KEY_ID') - S3_SECRET_ACCESS_KEY = os.getenv('S3_SECRET_ACCESS_KEY') - REGION_NAME = 'us-east-1' - self.s3_client = boto3.client('s3', - aws_access_key_id=S3_ACCESS_KEY_ID, - aws_secret_access_key=S3_SECRET_ACCESS_KEY, - region_name=REGION_NAME - ) - + self.s3_client = boto3.client( + 's3', region_name=self.region_name, config=botocore.config.Config(signature_version=botocore.UNSIGNED) + ) return self.s3_client def download_file(self, url, path, override=True): From 871da9be7a61fd1676c5c07a815d70c53ad4aca3 Mon Sep 17 00:00:00 2001 From: byczong Date: Wed, 19 Feb 2025 16:18:55 +0100 Subject: [PATCH 2/2] remove s3 keys from ci and dockerfile --- .github/workflows/build.yml | 2 -- .github/workflows/ci.yml | 3 --- Dockerfile | 3 --- 3 files changed, 8 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 35c7e0a8..c24fda59 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -24,8 +24,6 @@ jobs: - name: build image run: | docker build \ - --build-arg AWS_SECRET_ACCESS_KEY \ - --build-arg AWS_ACCESS_KEY_ID \ -t ${ECR_REPOSITORY}:commit_$(git rev-parse --short "$GITHUB_SHA") \ -t ${ECR_REPOSITORY}:${PROD_IMAGE_TAG} \ . diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dfa70628..9742a039 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -39,9 +39,6 @@ jobs: key: cache-v1.1 - name: Download data - env: - S3_ACCESS_KEY_ID: ${{ secrets.S3_ACCESS_KEY_ID }} - S3_SECRET_ACCESS_KEY: ${{ secrets.S3_SECRET_ACCESS_KEY }} run: | ln -s ../tests/data/wikipedia2vec.pkl data/wikipedia2vec.pkl touch data/wikipedia2vec.pkl.vectors.npy diff --git a/Dockerfile b/Dockerfile index a7689721..c6f811d1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,9 +17,6 @@ RUN poetry install --only main --no-root --no-interaction --no-ansi RUN poetry self add poetry-plugin-export RUN poetry export -f requirements.txt -o requirements.txt -ARG AWS_SECRET_ACCESS_KEY -ARG AWS_ACCESS_KEY_ID - COPY data/ data RUN mkdir namegraph