|
| 1 | +from gitxray.include import gx_definitions, gh_api, gh_time |
| 2 | +from collections import defaultdict |
| 3 | +import base64, re |
| 4 | + |
| 5 | + |
| 6 | +def run(gx_context, gx_output): |
| 7 | + print("\rRunning verifications on existing Workflows..."+" "*50) |
| 8 | + repository = gx_context.getRepository() |
| 9 | + contributors = gx_context.getContributors() |
| 10 | + |
| 11 | + print(f"\rQuerying for repository action workflows.."+" "*50, end="") |
| 12 | + workflows = gh_api.fetch_repository_actions_workflows(repository) |
| 13 | + if workflows != None and workflows.get('total_count') > 0: |
| 14 | + gx_output.r_log(f"{workflows.get('total_count')} Workflows available at: [{repository.get('url')}/actions/workflows]", rtype="workflows") |
| 15 | + for workflow in workflows.get('workflows'): |
| 16 | + workflow_file = workflow.get('path').split('/')[-1] |
| 17 | + gx_output.r_log(f"Workflow [{workflow.get('name')}] created [{workflow.get('created_at')}], updated [{workflow.get('updated_at')}]: [{workflow.get('html_url')}]", rtype="workflows") |
| 18 | + |
| 19 | + runs = gh_api.fetch_repository_actions_runs(repository, workflow_file=workflow_file) |
| 20 | + if runs != None and runs.get('total_count', 0) > 0: |
| 21 | + run_contributors = defaultdict(int) |
| 22 | + run_non_contributors = defaultdict(int) |
| 23 | + run_actors = defaultdict(int) |
| 24 | + run_numbers = [] |
| 25 | + for run in runs.get('workflow_runs'): |
| 26 | + run_numbers.append(run.get('run_number', -1)) |
| 27 | + run_actors[run.get('actor').get('login')] += 1 |
| 28 | + |
| 29 | + if len(run_numbers) > 0: |
| 30 | + min_run = min(run_numbers) |
| 31 | + max_run = max(run_numbers) |
| 32 | + missing_numbers = sorted(set(range(min_run, max_run+1)) - set(run_numbers)) |
| 33 | + if len(missing_numbers) > 0: |
| 34 | + gx_output.r_log(f"Workflow [{workflow.get('name')}] has [{len(missing_numbers)}] missing or deleted runs. This could have been an attacker erasing their tracks, or legitimate cleanup. {gx_context.verboseLegend()}", rtype="workflows") |
| 35 | + if gx_context.verboseEnabled(): |
| 36 | + gx_output.r_log(f"Missing run numbers for Workflow [{workflow.get('name')}]: {missing_numbers}", rtype="v_workflows") |
| 37 | + |
| 38 | + total_runs = int(runs.get('total_count')) |
| 39 | + for actor, actor_runs in run_actors.items(): |
| 40 | + percentage_runs = (actor_runs / total_runs) * 100 |
| 41 | + if gx_context.isContributor(actor): |
| 42 | + run_contributors[actor] += 1 |
| 43 | + message = f"Contributor [{actor}] ran {actor_runs} [{percentage_runs:.2f}%] times workflow [{workflow.get('name')}] - See them at: [{repository.get('html_url')}/actions?query=actor%3A{actor}]" |
| 44 | + else: |
| 45 | + run_non_contributors[actor] += 1 |
| 46 | + message = f"{actor} is NOT a contributor and ran {actor_runs} [{percentage_runs:.2f}%] times workflow [{workflow.get('name')}] - See them at: [{repository.get('html_url')}/actions?query=actor%3A{actor}]" |
| 47 | + |
| 48 | + gx_output.c_log(message, rtype="v_workflows", contributor=actor) |
| 49 | + gx_output.r_log(message, rtype="v_workflows") |
| 50 | + |
| 51 | + if len(run_non_contributors) > 0 or len(run_contributors) > 0: |
| 52 | + all_non_c_runners = len(run_non_contributors.keys()) |
| 53 | + all_non_c_runs = sum(run_non_contributors.values()) |
| 54 | + all_c_runners = len(run_contributors.keys()) |
| 55 | + all_c_runs = sum(run_contributors.values()) |
| 56 | + gx_output.r_log(f"Workflow [{workflow.get('name')}] was run by [{all_non_c_runners}] NON-contributors [{all_non_c_runs}] times and by [{all_c_runners}] contributors [{all_c_runs}] times. {gx_context.verboseLegend()}[{repository.get('html_url')}/actions/workflows/{workflow_file}]", rtype="workflows") |
| 57 | + |
| 58 | + contents = gh_api.fetch_repository_file_contents(repository, workflow.get('path')) |
| 59 | + if contents.get('content') != None: |
| 60 | + |
| 61 | + # We have the contents of a workflow, let's analyze it. |
| 62 | + encoded_content = contents.get('content') |
| 63 | + decoded_content = base64.b64decode(encoded_content).decode('utf-8').lower() |
| 64 | + |
| 65 | + # https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners |
| 66 | + if "self-hosted" in decoded_content: gx_output.rlog(f"Workflow [{workflow.get('name')}] appears to be executing in a self-hosted runner: [{workflow.get('html_url')}]", rtype="workflows") |
| 67 | + |
| 68 | + # https://securitylab.github.com/resources/github-actions-preventing-pwn-requests/ |
| 69 | + if any(a in decoded_content for a in ["pull_request_target","workflow_run","issue_comment","issue:"]): |
| 70 | + gx_output.r_log(f"Workflow [{workflow.get('name')}] may be triggered by an event that might be misused by attackers. See more at https://gitxray.com/vulnerable_workflows", rtype="workflows") |
| 71 | + |
| 72 | + #https://github.com/actions/toolkit/issues/641 |
| 73 | + if "ACTIONS_ALLOW_UNSECURE_COMMANDS: true" in decoded_content: gx_output.r_log(f"Workflow [{workflow.get('name')}] sets ACTIONS_ALLOW_UNSECURE_COMMANDS.", rtype="workflows") |
| 74 | + |
| 75 | + if "secrets." in decoded_content: |
| 76 | + secrets = re.findall(r"secrets\.[A-Za-z-_0-9]*", decoded_content) |
| 77 | + gx_output.r_log(f"Workflow [{workflow.get('name')}] makes use of Secrets: {secrets}: [{workflow.get('html_url')}]", rtype="workflows") |
| 78 | + |
| 79 | + # https://securitylab.github.com/resources/github-actions-untrusted-input/ |
| 80 | + user_inputs = [] |
| 81 | + for input_label, pattern in gx_definitions.WORKFLOWS_USER_INPUT.items(): |
| 82 | + if re.search(pattern, decoded_content): |
| 83 | + user_inputs.append(input_label) |
| 84 | + |
| 85 | + if len(user_inputs) > 0: gx_output.r_log(f"Workflow [{workflow.get('name')}] handles user input via: {user_inputs}", rtype="workflows") |
| 86 | + |
| 87 | + |
| 88 | + print(f"\rQuerying for repository workflow artifacts.."+" "*30, end="") |
| 89 | + artifacts = gh_api.fetch_repository_actions_artifacts(repository) |
| 90 | + if artifacts != None and artifacts.get('total_count') > 0: |
| 91 | + gx_output.r_log(f"{artifacts.get('total_count')} Artifacts available at: [{repository.get('url')}/actions/artifacts]", rtype="artifacts") |
| 92 | + for artifact in artifacts.get('artifacts'): |
| 93 | + # There are normally multiple artifacts hence we keep them under verbose. |
| 94 | + gx_output.r_log(f"Artifact [{artifact.get('name')}] created [{artifact.get('created_at')}], updated [{artifact.get('updated_at')}]: {artifact.get('url')}", rtype="v_artifacts") |
| 95 | + created_at = artifact.get('created_at') |
| 96 | + created_at_ts = gh_time.parse_date(created_at) |
| 97 | + updated_at = artifact.get('updated_at') |
| 98 | + updated_at_ts = gh_time.parse_date(updated_at) |
| 99 | + # This shouldn't happen but we still run a check; artifacts can't be updated but instead completely overwritten |
| 100 | + # More data here: https://github.com/actions/upload-artifact#overwriting-an-artifact |
| 101 | + if (updated_at_ts-created_at_ts).days > 0: |
| 102 | + gx_output.r_log(f"WARNING: An artifact [{artifact.get('name')}] was updated {(updated_at_ts-created_at_ts).days} days after being created: {artifact.get('url')}", rtype="artifacts") |
| 103 | + |
| 104 | + |
| 105 | + print() |
| 106 | + return True |
0 commit comments