Skip to content

Commit 7a4f979

Browse files
committed
Added parsing and simple analysis of commit times, improved console output and added additional validation
1 parent 6691402 commit 7a4f979

File tree

1 file changed

+26
-5
lines changed

1 file changed

+26
-5
lines changed

src/gitxray/xrays/contributors_xray.py

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def run(gx_context, gx_output):
2121
c_anon = []
2222

2323
c_len = len(gx_context.getContributors())
24-
print(f"\rIdentified {c_len} contributors.." + ' '*40, flush=True)
24+
print(f"\rIdentified {c_len} contributors.." + ' '*70, flush=True)
2525

2626
# If focused on a contributor, let's first make sure the contributor exists in the repository
2727
if contributor_scope != None:
@@ -61,12 +61,12 @@ def run(gx_context, gx_output):
6161

6262
c_users_index = 1
6363
for contributor in c_users:
64+
if contributor is None: continue
6465
unique_pgp_keyids = []
6566
contributor_emails = []
6667
contributor_login = contributor.get('login')
6768
c_started_at = datetime.now()
6869
gx_output.c_log(f"X-Ray on contributor started at {c_started_at}", contributor=contributor_login, rtype="metrics")
69-
commits = gh_api.fetch_commits(repository, author=contributor.get('login'))
7070

7171
print(f"\r[{c_users_index}/{len(c_users)}] Analyzing Profile data for {contributor.get('login')}"+' '*40, end = '', flush=True)
7272
gx_output.c_log(f"Contributor URL: {contributor.get('html_url')}", rtype="urls")
@@ -113,7 +113,8 @@ def run(gx_context, gx_output):
113113
if contributor.get('site_admin') != False:
114114
gx_output.c_log(f"The account may be an administrator. It has 'site_admin' set to True", rtype="profiling")
115115

116-
if len(commits) > 0:
116+
commits = gh_api.fetch_commits(repository, author=contributor.get('login'))
117+
if commits != None and len(commits) > 0:
117118
commits_message = f", at {commits[0]['commit']['author']['date']}."
118119
oldest_commit = commits[-1]['commit']['author']['date']
119120
if len(commits) > 1:
@@ -124,6 +125,7 @@ def run(gx_context, gx_output):
124125
failed_verifications = []
125126
signature_attributes = []
126127
dates_mismatch_commits = []
128+
commit_times = defaultdict(int)
127129
print(f"\r[{c_users_index}/{len(c_users)}] Analyzing {len(commits)} commits and any signing keys for {contributor.get('login')}"+' '*40, end = '', flush=True)
128130
for commit in commits:
129131
c = commit["commit"]
@@ -168,14 +170,33 @@ def run(gx_context, gx_output):
168170
contributor_emails.append(c["author"]["email"])
169171
gx_context.linkIdentifier("EMAIL", [c["author"]["email"]], contributor_login)
170172

171-
if gh_time.parse_date(c['author']['date']) < contributor_created_at_time:
173+
commit_date = gh_time.parse_date(c['author']['date'])
174+
if commit_date < contributor_created_at_time:
172175
dates_mismatch_commits.append(c)
173176

177+
# Let's group by commit hour, we may have an insight here.
178+
commit_times[commit_date.hour] += 1
179+
174180
if len(dates_mismatch_commits) > 0:
175181
gx_output.c_log(f"WARNING: UNRELIABLE DATES (Older than Account) in {len(dates_mismatch_commits)} commits by [{contributor_login}]. Potential tampering, account re-use, or Rebase. List at: {repository.get('html_url')}/commits/?author={contributor_login}&until={contributor.get('created_at')}", rtype="commits")
176182
gx_output.c_log(f"View commits with unreliable DATES here: {repository.get('html_url')}/commits/?author={contributor_login}&until={contributor.get('created_at')}", rtype="commits")
177183
gx_context.linkIdentifier("DATE_MISMATCH_COMMITS", [len(dates_mismatch_commits)], contributor_login)
178184

185+
if len(commit_times) > 0:
186+
# Let's link these commit hours to this contributor, and we'll do extra analysis in the associations X-Ray
187+
gx_context.linkIdentifier("COMMIT_HOURS", commit_times, contributor_login)
188+
189+
total_commits = len(commits)
190+
formatted_output = f"Commit Hours for [{total_commits}] commits:"
191+
sorted_commit_times = sorted(commit_times.items(), key=lambda item: item[1], reverse=True)
192+
193+
for commit_hour, count in sorted_commit_times:
194+
percentage = (count / total_commits) * 100
195+
range_label = gx_definitions.COMMIT_HOURS[commit_hour]
196+
formatted_output += f" [{range_label}: {count} ({percentage:.2f}%)]"
197+
198+
gx_output.c_log(formatted_output, rtype="commits")
199+
179200
# PGP Signature attributes: We have precise Key IDs used in signatures + details on signature creation time and algorithm
180201
unique_pgp_pka = set(attribute.get('pgp_publicKeyAlgorithm') for attribute in signature_attributes if attribute.get('pgp_pulicKeyAlgorithm') is not None)
181202
unique_pgp_st = set(attribute.get('pgp_sig_type') for attribute in signature_attributes if attribute.get('pgp_sig_type') is not None)
@@ -284,7 +305,7 @@ def run(gx_context, gx_output):
284305
# SSH Signing keys
285306
# https://docs.github.com/en/rest/users/ssh-signing-keys?apiVersion=2022-11-28#list-ssh-signing-keys-for-a-user
286307
ssh_signing_keys = gh_api.fetch_ssh_signing_keys(contributor_login)
287-
if len(ssh_signing_keys) > 0:
308+
if ssh_signing_keys != None and len(ssh_signing_keys) > 0:
288309
gx_output.c_log(f"{len(ssh_signing_keys)} SSH Keys used for Signatures in this contributor's profile", rtype="keys")
289310
gx_output.c_log(f"SSH Signing Keys: https://api.github.com/users/{contributor_login}/ssh_signing_keys", rtype="keys")
290311

0 commit comments

Comments
 (0)