Skip to content

Commit 1fa387d

Browse files
committed
New check between commit dates and repository creation time.
Also added a second attempt at pulling commits when filtering by author, using their GH email.
1 parent 8eea0d7 commit 1fa387d

File tree

2 files changed

+32
-14
lines changed

2 files changed

+32
-14
lines changed

src/gitxray/xrays/contributors_xray.py

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ def run(gx_context, gx_output, gh_api):
4040
gx_output.stdout(f"IMPORTANT: The repository has 500+ contributors. GitHub states > 500 contributors will appear as Anonymous")
4141
gx_output.stdout(f"More information at: https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-repository-contributors")
4242

43+
# We will use this created_at_time for the repository in one or two loops before.
44+
repository_created_at_time = gh_time.parse_date(repository.get('created_at'))
45+
4346
for i, c in enumerate(gx_context.getContributors()):
4447
if contributor_scope != None and c.get('login') not in contributor_scope: continue
4548
gx_output.stdout('\rFetching repository contributor details [{}/{}]'.format(i+1, c_len), end='', flush=True)
@@ -116,17 +119,24 @@ def run(gx_context, gx_output, gh_api):
116119
gx_output.c_log(f"The account may be an administrator. It has 'site_admin' set to True", rtype="profiling")
117120

118121
commits = gh_api.fetch_commits(repository, author=contributor.get('login'))
119-
if commits != None and len(commits) > 0:
120-
commits_message = f", at {commits[0]['commit']['author']['date']}."
121-
oldest_commit = commits[-1]['commit']['author']['date']
122-
if len(commits) > 1:
123-
commits_message = f", first one at {oldest_commit} and last one at {commits[0]['commit']['author']['date']}."
124-
gx_output.c_log(f'Made (to this repo) {len(commits)} commits{commits_message}', rtype="commits")
122+
# The REST API does not always work reliable when filtering commits by author.
123+
# We create a username@users.noreply.github.com as an alternative author.
124+
if commits != None:
125+
if len(commits) == 0:
126+
commits = gh_api.fetch_commits(repository, author=contributor.get('login')+"@users.noreply.github.com")
127+
128+
if len(commits) > 0:
129+
commits_message = f", at {commits[0]['commit']['author']['date']}."
130+
oldest_commit = commits[-1]['commit']['author']['date']
131+
if len(commits) > 1:
132+
commits_message = f", first one at {oldest_commit} and last one at {commits[0]['commit']['author']['date']}."
133+
gx_output.c_log(f'Made (to this repo) {len(commits)} commits{commits_message}', rtype="commits")
125134

126135
signed_commits = []
127136
failed_verifications = []
128137
signature_attributes = []
129-
dates_mismatch_commits = []
138+
dates_mismatch_commits_account = []
139+
dates_mismatch_commits_repository = []
130140
commit_times = defaultdict(int)
131141
gx_output.stdout(f"\r[{c_users_index}/{len(c_users)}] Analyzing {len(commits)} commits and any signing keys for {contributor.get('login')}"+' '*40, end = '', flush=True)
132142
for commit in commits:
@@ -174,15 +184,22 @@ def run(gx_context, gx_output, gh_api):
174184

175185
commit_date = gh_time.parse_date(c['author']['date'])
176186
if commit_date < contributor_created_at_time:
177-
dates_mismatch_commits.append(c)
187+
dates_mismatch_commits_account.append(c)
188+
189+
if commit_date < repository_created_at_time:
190+
dates_mismatch_commits_repository.append(c)
178191

179192
# Let's group by commit hour, we may have an insight here.
180193
commit_times[commit_date.hour] += 1
181194

182-
if len(dates_mismatch_commits) > 0:
183-
gx_output.c_log(f"WARNING: UNRELIABLE DATES (Older than Account) in {len(dates_mismatch_commits)} commits by [{contributor_login}]. Potential tampering, account re-use, or Rebase. List at: {repository.get('html_url')}/commits/?author={contributor_login}&until={contributor.get('created_at')}", rtype="commits")
195+
if len(dates_mismatch_commits_account) > 0:
196+
gx_output.c_log(f"WARNING: UNRELIABLE COMMIT DATES (Older than Account, which was created on {contributor.get('created_at')}) in {len(dates_mismatch_commits_account)} commits by [{contributor_login}]. Potential tampering, account re-use, or Rebase. List at: {repository.get('html_url')}/commits/?author={contributor_login}&until={contributor.get('created_at')}", rtype="commits")
184197
gx_output.c_log(f"View commits with unreliable DATES here: {repository.get('html_url')}/commits/?author={contributor_login}&until={contributor.get('created_at')}", rtype="commits")
185-
gx_context.linkIdentifier("DATE_MISMATCH_COMMITS", [len(dates_mismatch_commits)], contributor_login)
198+
gx_context.linkIdentifier("DATE_MISMATCH_COMMITS_ACCOUNT", [len(dates_mismatch_commits_account)], contributor_login)
199+
200+
if len(dates_mismatch_commits_repository) > 0:
201+
gx_output.c_log(f"WARNING: UNRELIABLE COMMIT DATES (Older than Repository, which was created on {repository.get('created_at')}) in {len(dates_mismatch_commits_repository)} commits by [{contributor_login}]. Potential tampering, account re-use, or Rebase. List at: {repository.get('html_url')}/commits/?author={contributor_login}&until={contributor.get('created_at')}", rtype="commits")
202+
gx_context.linkIdentifier("DATE_MISMATCH_COMMITS_REPOSITORY", [len(dates_mismatch_commits_repository)], contributor_login)
186203

187204
if len(commit_times) > 0:
188205
# Let's link these commit hours to this contributor, and we'll do extra analysis in the associations X-Ray

src/gitxray/xrays/repository_xray.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -481,9 +481,10 @@ def run(gx_context, gx_output, gh_api):
481481
gx_output.c_log(f"The user submitted {details['submitted']} Pull Requests out of which {details['open']} remain open.", rtype="profiling", contributor=user)
482482

483483
# Check if there were any users with mismatches in commits dates in the repository.
484-
for user, dates_mismatch_commits in gx_context.getIdentifierValues("DATE_MISMATCH_COMMITS").items():
485-
gx_output.r_log(f"WARNING: UNRELIABLE DATES (Older than Account) in {dates_mismatch_commits} commits by [{user}]. Potential tampering, account re-use, or Rebase.", rtype="commits")
486-
484+
for user, dates_mismatch_commits in gx_context.getIdentifierValues("DATE_MISMATCH_COMMITS_ACCOUNT").items():
485+
gx_output.r_log(f"UNRELIABLE COMMIT DATES in {dates_mismatch_commits} commits by [{user}]. They are dated earlier than the account creation time. Potential tampering, account re-use, or Rebase.", rtype="commits")
486+
for user, dates_mismatch_commits in gx_context.getIdentifierValues("DATE_MISMATCH_COMMITS_REPOSITORY").items():
487+
gx_output.r_log(f"UNRELIABLE COMMIT DATES in {dates_mismatch_commits} commits by [{user}]. They are dated earlier than the repository creation time. Potential tampering, account re-use, or Rebase.", rtype="commits")
487488

488489
# Get any Hosts and analyze them
489490
if gx_context.usingToken():

0 commit comments

Comments
 (0)