Skip to content

Commit 2253c64

Browse files
committed
Added new checks for similar Repository Names, also moved out code to the Workflows X-Ray, and some minor fixes
1 parent 7a4f979 commit 2253c64

File tree

1 file changed

+30
-63
lines changed

1 file changed

+30
-63
lines changed

src/gitxray/xrays/repository_xray.py

Lines changed: 30 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,25 @@ def run(gx_context, gx_output):
99
repository = gx_context.getRepository()
1010
contributors = gx_context.getContributors()
1111

12+
print(f"Checking for similar repository names in GitHub.."+" "*40, end="")
13+
# This gets all repository names matching our repository name, sorted first by highest rating
14+
similar_names = gh_api.search_repositories_by_name(repository.get('name'), limit=10)
15+
if similar_names != None and similar_names.get('total_count') != None and similar_names.get('total_count') > 0:
16+
most_rated = similar_names.get('items')[0]
17+
search_url = f"https://github.com/search?q={repository.get('name')}%20in:name&type=repositories&s=stars&o=desc"
18+
if most_rated.get('full_name') == repository.get('full_name'):
19+
gx_output.r_log(f"This is the highest rating repository with name [{repository.get('name')}]", rtype="profiling")
20+
else:
21+
gx_output.r_log(f"WARNING: This is NOT the highest rating repository with name [{repository.get('name')}]", rtype="profiling")
22+
23+
if similar_names.get('total_count') > 1:
24+
gx_output.r_log(f'{similar_names.get("total_count")} repositories with a similar name were discovered - See them here: {search_url}', 'profiling')
25+
26+
stargazers_message = f"Stars count: [{repository.get('stargazers_count')}]"
27+
if repository.get('stargazers_count') > 0:
28+
stargazers_message += f" List at: {repository.get('stargazers_url')}"
29+
gx_output.r_log(stargazers_message, rtype="profiling")
30+
1231
if repository.get('owner'):
1332
gx_output.r_log(f"Repository owner account is [{repository.get('owner').get('login')}]: {repository.get('owner').get('html_url')}", rtype="profiling")
1433

@@ -20,7 +39,7 @@ def run(gx_context, gx_output):
2039

2140
# These go in the repository xray and not contributors because the REST API returns all per repository
2241
# https://api.github.com/repos/infobyte/faraday/issues/comments - and won't allow filtering in a helpful (to us) way
23-
print(f"Getting all repository comments on commits.."+" "*40, end="")
42+
print(f"\rGetting all repository comments on commits.."+" "*40, end="")
2443
commit_comments = gh_api.fetch_repository_commit_comments(repository)
2544
if len(commit_comments) > 0:
2645
total_comments = defaultdict(int)
@@ -45,7 +64,7 @@ def run(gx_context, gx_output):
4564
login_tmp = f"{login} [NOT a contributor]"
4665
else:
4766
login_tmp = login
48-
gx_output.c_log(f"User {login_tmp} added {ccount} Comments to Commits. Full breakdown of comments available in Verbose mode.", rtype="comments", contributor=login)
67+
gx_output.c_log(f"User {login_tmp} added {ccount} Comments to Commits. {gx_context.verboseLegend()}", rtype="comments", contributor=login)
4968
#gx_output.c_log(f"{ccount} Comments added to Commits by [{login}] available at: {repository.get('url')}/comments", rtype="comments")
5069

5170
# Not adding much value
@@ -65,7 +84,7 @@ def run(gx_context, gx_output):
6584

6685
print(f"\rGetting all repository comments on issues.."+" "*30, end="")
6786
issues_comments = gh_api.fetch_repository_issues_comments(repository)
68-
if len(issues_comments) > 0:
87+
if issues_comments != None and len(issues_comments) > 0:
6988
total_comments = defaultdict(int)
7089
positive_reactions = defaultdict(int)
7190
negative_reactions = defaultdict(int)
@@ -88,7 +107,7 @@ def run(gx_context, gx_output):
88107
login_tmp = f"{login} [NOT a contributor]"
89108
else:
90109
login_tmp = login
91-
gx_output.c_log(f"User {login_tmp} added {ccount} Comments to Issues. Full breakdown of comments available in Verbose mode.", rtype="comments", contributor=login)
110+
gx_output.c_log(f"User {login_tmp} added {ccount} Comments to Issues. {gx_context.verboseLegend()}", rtype="comments", contributor=login)
92111
#gx_output.c_log(f"{ccount} Comments added to Issues by [{login}] available at: {repository.get('url')}/issues/comments", rtype="comments")
93112

94113
gx_output.r_log(f"{len(issues_comments)} Comments in issues available at: {repository.get('url')}/issues/comments", rtype="comments")
@@ -106,7 +125,7 @@ def run(gx_context, gx_output):
106125

107126
print(f"\rGetting all repository comments on pull requests.."+" "*30, end="")
108127
pulls_comments = gh_api.fetch_repository_pulls_comments(repository)
109-
if len(pulls_comments) > 0:
128+
if pulls_comments != None and len(pulls_comments) > 0:
110129
total_comments = defaultdict(int)
111130
positive_reactions = defaultdict(int)
112131
negative_reactions = defaultdict(int)
@@ -129,7 +148,7 @@ def run(gx_context, gx_output):
129148
login_tmp = f"{login} [NOT a contributor]"
130149
else:
131150
login_tmp = login
132-
gx_output.c_log(f"User {login_tmp} added {ccount} Comments to PRs. Full breakdown of comments available in Verbose mode.", rtype="comments", contributor=login)
151+
gx_output.c_log(f"User {login_tmp} added {ccount} Comments to PRs. {gx_context.verboseLegend()}", rtype="comments", contributor=login)
133152
#gx_output.c_log(f"{ccount} Comments added to PRs by [{login}] available at: {repository.get('url')}/pulls/comments", rtype="comments")
134153

135154
gx_output.r_log(f"{len(pulls_comments)} Comments in pulls available at: {repository.get('url')}/pulls/comments", rtype="comments")
@@ -163,55 +182,9 @@ def run(gx_context, gx_output):
163182
if repository.get('forks_count') > 0:
164183
gx_output.r_log(f"Repository has {repository.get('forks_count')} forks: {repository.get('forks_url')}", rtype="profiling")
165184

166-
print(f"\rQuerying for repository action workflows.."+" "*30, end="")
167-
workflows = gh_api.fetch_repository_actions_workflows(repository)
168-
if workflows != None and workflows.get('total_count') > 0:
169-
gx_output.r_log(f"{workflows.get('total_count')} Workflows available at: [{repository.get('url')}/actions/workflows]", rtype="workflows")
170-
for workflow in workflows.get('workflows'):
171-
gx_output.r_log(f"Workflow [{workflow.get('name')}] created [{workflow.get('created_at')}], updated [{workflow.get('updated_at')}]: {workflow.get('html_url')}", rtype="workflows")
172-
173-
print(f"\rAnalyzing repository action workflow runs (Analysis capped to 5000 max).."+" "*30, end="")
174-
# Some repositories have dozens of thousands of runs, which we could analyze Buuuut, it would take forever.
175-
# Therefore we prioritize medium size repositories with < 5000 runs (as of today at least)
176-
runs = gh_api.fetch_repository_actions_runs(repository, limit=5000)
177-
if runs != None and runs.get('total_count') > 0:
178-
gx_output.r_log(f"{workflows.get('total_count')} Workflows were run {runs.get('total_count')} times: [{repository.get('url')}/actions/runs]", rtype="workflows")
179-
# Pending adding more functionality here to analyse workflow runs, although capped.
180-
run_actors = defaultdict(int)
181-
for run in runs.get('workflow_runs'):
182-
run_actors[run.get('actor').get('login')] += 1
183-
184-
total_runs = int(runs.get('total_count'))
185-
for actor, actor_runs in run_actors.items():
186-
percentage_runs = (actor_runs / total_runs) * 100
187-
if gx_context.isContributor(actor):
188-
message = f"User {actor} triggered {actor_runs} workflow runs [{percentage_runs:.2f}%] - See them at: [{repository.get('html_url')}/actions?query=actor%3A{actor}]"
189-
else:
190-
message = f"WARNING: {actor} is NOT a contributor to this repository and yet triggered {actor_runs} workflow runs [{percentage_runs:.2f}%] - See them at: [{repository.get('html_url')}/actions?query=actor%3A{actor}]"
191-
192-
gx_output.c_log(message, rtype="workflows", contributor=actor)
193-
gx_output.r_log(message, rtype="workflows")
194-
195-
196-
print(f"\rQuerying for repository action artifacts (Analysis capped to 5000 max).."+" "*30, end="")
197-
artifacts = gh_api.fetch_repository_actions_artifacts(repository, limit=5000)
198-
if artifacts != None and artifacts.get('total_count') > 0:
199-
gx_output.r_log(f"{artifacts.get('total_count')} Artifacts available at: [{repository.get('url')}/actions/artifacts]", rtype="artifacts")
200-
for artifact in artifacts.get('artifacts'):
201-
# There are normally multiple artifacts hence we keep them under verbose.
202-
gx_output.r_log(f"Artifact [{artifact.get('name')}] created [{artifact.get('created_at')}], updated [{artifact.get('updated_at')}]: {artifact.get('url')}", rtype="v_artifacts")
203-
created_at = artifact.get('created_at')
204-
created_at_ts = gh_time.parse_date(created_at)
205-
updated_at = artifact.get('updated_at')
206-
updated_at_ts = gh_time.parse_date(updated_at)
207-
# This shouldn't happen but we still run a check; artifacts can't be updated but instead completely overwritten
208-
# More data here: https://github.com/actions/upload-artifact#overwriting-an-artifact
209-
if (updated_at_ts-created_at_ts).days > 0:
210-
gx_output.r_log(f"WARNING: An artifact [{artifact.get('name')}] was updated {(updated_at_ts-created_at_ts).days} days after being created: {artifact.get('url')}", rtype="artifacts")
211-
212185
print(f"\rInspecting repository branches.."+" "*40, end="")
213186
branches = gh_api.fetch_repository_branches(repository)
214-
if len(branches) > 0:
187+
if branches != None and len(branches) > 0:
215188
gx_output.r_log(f"{len(branches)} Branches available at: [{repository.get('html_url')}/branches]", rtype="branches")
216189
unprotected_branches = []
217190
protected_branches = []
@@ -226,15 +199,15 @@ def run(gx_context, gx_output):
226199

227200
print(f"\rInspecting repository labels.."+" "*40, end="")
228201
labels = gh_api.fetch_repository_labels(repository)
229-
if len(labels) > 0:
202+
if labels != None and len(labels) > 0:
230203
gx_output.r_log(f"{len(labels)} Labels available at: [{repository.get('html_url')}/labels]", rtype="labels")
231204
non_default_labels = [label.get('name') for label in labels if label.get('default') == False]
232205
if len(non_default_labels) > 0:
233206
gx_output.r_log(f"{len(non_default_labels)} Non-default Labels: {non_default_labels} available at: [{repository.get('html_url')}/labels]", rtype="labels")
234207

235208
print(f"\rInspecting repository tags.."+" "*40, end="")
236209
tags = gh_api.fetch_repository_tags(repository)
237-
if len(tags) > 0: gx_output.r_log(f"{len(tags)} Tags available at: [{repository.get('html_url')}/tags]", rtype="tags")
210+
if tags != None and len(tags) > 0: gx_output.r_log(f"{len(tags)} Tags available at: [{repository.get('html_url')}/tags]", rtype="tags")
238211
tag_taggers = defaultdict(int)
239212

240213
""" A bit shameful here because we can't really get too much data out of tags because of the way the GH API is implemented.
@@ -256,7 +229,7 @@ def run(gx_context, gx_output):
256229
gx_output.r_log(message, rtype="tags")
257230

258231

259-
print(f"\rInspecting repository releases.."+" "*40)
232+
print(f"\rInspecting repository releases.."+" "*40, end="")
260233
releases = gh_api.fetch_repository_releases(repository)
261234
if len(releases) > 0: gx_output.r_log(f"{len(releases)} Releases available at: [{repository.get('html_url')}/releases]", rtype="releases")
262235

@@ -361,11 +334,6 @@ def run(gx_context, gx_output):
361334
watchers_message += f" List at: {repository.get('subscribers_url')}"
362335
gx_output.r_log(watchers_message, rtype="profiling")
363336

364-
stargazers_message = f"Stars count: [{repository.get('stargazers_count')}]"
365-
if repository.get('stargazers_count') > 0:
366-
stargazers_message += f" List at: {repository.get('stargazers_url')}"
367-
gx_output.r_log(stargazers_message, rtype="profiling")
368-
369337
if repository.get('open_issues_count') > 0:
370338
gx_output.r_log(f"Repository has {repository.get('open_issues_count')} Open Issues: {repository.get('html_url')}/issues", rtype="profiling")
371339

@@ -378,7 +346,7 @@ def run(gx_context, gx_output):
378346
if repository.get('fork') != False:
379347
parent = repository.get('parent').get('full_name')
380348
source = repository.get('source').get('full_name')
381-
print(f"Repository is a FORK of a parent named: {repository.get('parent').get('full_name')}: {repository.get('parent')['html_url']}")
349+
print(f"\rRepository is a FORK of a parent named: {repository.get('parent').get('full_name')}: {repository.get('parent')['html_url']}")
382350
gx_output.r_log(f"Repository is a FORK of repo: {repository.get('parent')['html_url']}", rtype="fork")
383351
print(f"This also means that GitHub will return ALL contributors (might be a LOT) up to the source repository")
384352
if parent != source:
@@ -507,5 +475,4 @@ def run(gx_context, gx_output):
507475
gx_output.r_log(f"The repository has no record of Issues or Pull Requests.", rtype="profiling")
508476
"""
509477

510-
print(f"\rRepository has been analyzed.." + " "*40)
511478
return True

0 commit comments

Comments
 (0)