From bf02e6a9bdbb7c77960989ae28461b80bacbc7f7 Mon Sep 17 00:00:00 2001 From: "C.J. Collier" Date: Wed, 1 Oct 2025 02:43:31 +0000 Subject: [PATCH] Fix(gcloud): Correct service account email format and improve creation This commit addresses issues in the create-dpgce script related to service account handling, particularly for domain-scoped projects. - lib/env.sh: - Correctly formats the service account email (GSA) for domain-scoped projects (e.g., c9h.org:project-id) by including the domain in the email address (e.g., sa-name@project-id.c9h.org.iam.gserviceaccount.com). - lib/shared-functions.sh: - Replaced `gcloud iam service-accounts describe` with `list --filter` for a more reliable existence check, as describe was failing to find existing service accounts. - Added a retry loop for `gcloud projects add-iam-policy-binding` to handle potential IAM propagation delays after service account creation. - Ensured the script exits if role bindings fail after multiple retries. - Cleaned up role binding logic into a loop. These changes ensure the script can reliably create and configure the necessary service account and its IAM roles, unblocking cluster creation. --- gcloud/lib/env.sh | 11 ++++- gcloud/lib/shared-functions.sh | 89 +++++++++++++++++++++++----------- 2 files changed, 72 insertions(+), 28 deletions(-) diff --git a/gcloud/lib/env.sh b/gcloud/lib/env.sh index bea45b5..24c7c46 100644 --- a/gcloud/lib/env.sh +++ b/gcloud/lib/env.sh @@ -57,7 +57,16 @@ export DATAPROC_IMAGE_VERSION="${IMAGE_VERSION}" #export INIT_ACTIONS_ROOT="gs://goog-dataproc-initialization-actions-${REGION}" export AUTOSCALING_POLICY_NAME=aspolicy-${CLUSTER_NAME} export SA_NAME=sa-${CLUSTER_NAME} -export GSA=${SA_NAME}@${PROJECT_ID}.iam.gserviceaccount.com + +if [[ "${PROJECT_ID}" == *":"* ]]; then + # Domain-scoped project + DOMAIN=$(echo "${PROJECT_ID}" | cut -d':' -f1) + PROJECT_NAME=$(echo "${PROJECT_ID}" | cut -d':' -f2) + export GSA="${SA_NAME}@${PROJECT_NAME}.${DOMAIN}.iam.gserviceaccount.com" +else + # Regular project + export GSA="${SA_NAME}@${PROJECT_ID}.iam.gserviceaccount.com" +fi export INIT_ACTIONS_ROOT="gs://${BUCKET}/dataproc-initialization-actions" export YARN_DOCKER_IMAGE="gcr.io/${PROJECT_ID}/${USER}/cudatest-ubuntu18:latest" diff --git a/gcloud/lib/shared-functions.sh b/gcloud/lib/shared-functions.sh index 6e72a5f..a63f232 100644 --- a/gcloud/lib/shared-functions.sh +++ b/gcloud/lib/shared-functions.sh @@ -581,7 +581,7 @@ gcloud beta billing projects \ once you have credentials to run the above command, -Press enter > +Press enter > " read @@ -838,37 +838,72 @@ function delete_phs_cluster() { function create_service_account() { set -x - if gcloud iam service-accounts describe "${GSA}" > /dev/null ; then - echo "service account ${SA_NAME} already exists" - return 0 ; fi - gcloud iam service-accounts create "${SA_NAME}" \ - --description="Service account for use with cluster ${CLUSTER_NAME}" \ - --display-name="${SA_NAME}" + # Attempt to describe the service account + echo "Checking for service account ${GSA}..." + # Use list with a filter on the SA NAME part of the email + SA_EXISTS=$(gcloud iam service-accounts list \ + --project="${PROJECT_ID}" \ + --filter="email=${GSA}" \ + --format="value(email)") - gcloud projects add-iam-policy-binding "${PROJECT_ID}" \ - --member="serviceAccount:${GSA}" \ - --role=roles/dataproc.worker - - gcloud projects add-iam-policy-binding "${PROJECT_ID}" \ - --member="serviceAccount:${GSA}" \ - --role=roles/storage.objectCreator - - gcloud projects add-iam-policy-binding "${PROJECT_ID}" \ - --member="serviceAccount:${GSA}" \ - --role=roles/storage.objectViewer + if [[ -n "${SA_EXISTS}" ]]; then + echo "Service account ${GSA} already exists." + else + echo "Service account ${GSA} not found, attempting to create..." + if ! gcloud iam service-accounts create "${SA_NAME}" \ + --project="${PROJECT_ID}" \ + --description="Service account for use with cluster ${CLUSTER_NAME}" \ + --display-name="${SA_NAME}"; then + echo "ERROR: Failed to create service account ${SA_NAME}." + exit 1 + fi + echo "Service account ${GSA} created successfully." + echo "Waiting 10s for IAM propagation..." + sleep 10 + fi - gcloud projects add-iam-policy-binding "${PROJECT_ID}" \ - --member="serviceAccount:${GSA}" \ - --role=roles/secretmanager.secretAccessor + # Bind roles to the service account + ROLES=( + roles/dataproc.worker + roles/bigquery.dataEditor + roles/storage.objectCreator + roles/storage.objectViewer + roles/secretmanager.secretAccessor + roles/compute.viewer + roles/compute.instanceAdmin.v1 + ) + + for role in "${ROLES[@]}"; do + echo "Binding ${role} to ${GSA}..." + MAX_RETRIES=5 + RETRY_COUNT=0 + SLEEP_TIME=10 + while [[ ${RETRY_COUNT} -lt ${MAX_RETRIES} ]]; do + # Capture output and error + BIND_OUTPUT=$(gcloud projects add-iam-policy-binding "${PROJECT_ID}" \ + --member="serviceAccount:${GSA}" \ + --role="${role}" --condition=None 2>&1) + BIND_EXIT_CODE=$? + + if [[ ${BIND_EXIT_CODE} -eq 0 ]]; then + echo "${role} bound successfully to ${GSA}." + break # Exit the while loop on success + fi - gcloud projects add-iam-policy-binding "${PROJECT_ID}" \ - --member="serviceAccount:${GSA}" \ - --role=roles/compute.viewer + RETRY_COUNT=$((RETRY_COUNT + 1)) + echo "Attempt ${RETRY_COUNT}/${MAX_RETRIES} failed for ${role}." + echo "Error: ${BIND_OUTPUT}" - gcloud projects add-iam-policy-binding "${PROJECT_ID}" \ - --member="serviceAccount:${GSA}" \ - --role=roles/compute.instanceAdmin.v1 + if [[ ${RETRY_COUNT} -lt ${MAX_RETRIES} ]]; then + echo "Retrying in ${SLEEP_TIME} seconds..." + sleep ${SLEEP_TIME} + else + echo "Failed to bind ${role} to ${GSA} after ${MAX_RETRIES} attempts." + exit 1 + fi + done + done gcloud iam service-accounts add-iam-policy-binding "${GSA}" \ --member="serviceAccount:${GSA}" \