Skip to content

Commit 86cf130

Browse files
committed
Improved handling of Paging, now printing a % of Progress that can be skipped by hitting CTRL+C. Also a time ETA.
Added new API calls to GitHub REST API
1 parent ce1ccb8 commit 86cf130

File tree

1 file changed

+113
-53
lines changed

1 file changed

+113
-53
lines changed

src/gitxray/include/gh_api.py

Lines changed: 113 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
import os, requests, base64, re, time
2-
from . import gx_definitions
1+
import os, requests, base64, re, time, urllib
2+
from . import gx_definitions, gx_output
33

44
# GitHub API URL
55
GITHUB_API_BASE_URL = "https://api.github.com"
@@ -18,6 +18,34 @@ def make_request(url, headers, params):
1818
links = response.headers.get('Link', '')
1919
return data, links, rate_limit_remaining, rate_limit_reset
2020

21+
def get_total_pages_from_link_header(links):
22+
if not links:
23+
return None
24+
25+
# Parse the Link header to find the "last" page
26+
for link in links.split(','):
27+
if 'rel="last"' in link:
28+
last_page_url = link.split(';')[0].strip('<> ')
29+
# Extract the page number from the URL
30+
if 'page=' in last_page_url:
31+
try:
32+
return int(last_page_url.split('page=')[-1].split('&')[0])
33+
except ValueError:
34+
pass
35+
return None
36+
37+
def get_last_two_path_segments(url):
38+
parsed_url = urllib.parse.urlparse(url)
39+
path = parsed_url.path
40+
parts = [part for part in path.split("/") if part]
41+
if len(parts) >= 2:
42+
return f"{parts[-2]}/{parts[-1]}"
43+
elif len(parts) == 1:
44+
return parts[-1]
45+
else:
46+
return ""
47+
48+
2149
def github_request_json(url, params=None, limit_results=None):
2250
# https://docs.github.com/en/rest/about-the-rest-api/api-versions?apiVersion=2022-11-28
2351
headers = {"X-GitHub-Api-Version":"2022-11-28"}
@@ -31,57 +59,82 @@ def github_request_json(url, params=None, limit_results=None):
3159
all_results = None
3260
next_url = url
3361
remaining = -1
62+
pages_fetched = 0
63+
total_pages = None
64+
start_time = time.time()
3465

3566
while next_url:
3667

3768
try:
38-
data, links, remaining, reset = make_request(next_url, headers, params)
39-
except Exception as ex:
40-
print(ex)
41-
print(f"Failed to talk to the GitHub API when fetching URL: {next_url} - Quitting.")
42-
exit(-1)
43-
44-
if remaining == 0:
45-
# Calculate how long to sleep, then sleep
46-
sleep_time = reset - time.time()
47-
if sleep_time > 0:
48-
hours, remainder = divmod(int(sleep_time), 3600)
49-
minutes, seconds = divmod(remainder, 60)
50-
message = f"GitHub Rate limit reached. Sleeping for {hours} hours, {minutes} minutes, and {seconds} seconds. You may go and make coffee.."
51-
print(f"\r\n\033[33m{message}\033[0m", flush=True)
52-
if GITHUB_TOKEN == None:
53-
message = f"You should try using a Github Access Token, improves the experience significantly and it's easy!"
54-
print(f"\033[33m{message}\033[0m", flush=True)
55-
print("For information on how to create a GitHub API Access Token refer to: ")
56-
print("https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens")
57-
print("For information on GitHub Rate Limits refer to: ")
58-
print("https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api")
59-
60-
time.sleep(sleep_time + 1) # Sleep until the reset time, plus a little buffer
61-
continue # and restart the loop
62-
63-
if all_results is None:
64-
all_results = data
65-
# if we come from all_results being a list, then we're extending it.
66-
elif isinstance(all_results, list):
67-
all_results.extend(data)
68-
elif isinstance(all_results, dict) and data.get('total_count') != None:
69-
all_results[list(all_results.keys())[1]].extend(list(data.values())[1])
70-
else:
71-
all_results.update(data)
72-
73-
# Reset next_url
74-
next_url = None
75-
76-
# Using "limit" we can cap the amount of results in order to prevent huge amounts of requests.
77-
if limit_results == None or \
78-
((isinstance(all_results, list) and len(all_results) < limit_results) \
79-
or (isinstance(all_results, dict) and all_results.get('total_count') != None and len(list(all_results.values())[1]) < limit_results)):
80-
if 'rel="next"' in links:
81-
for link in links.split(','):
82-
if 'rel="next"' in link:
83-
next_url = link.split(';')[0].strip('<> ')
84-
break
69+
70+
try:
71+
data, links, remaining, reset = make_request(next_url, headers, params)
72+
except Exception as ex:
73+
print(ex)
74+
print(f"Failed to talk to the GitHub API when fetching URL: {next_url} - Quitting.")
75+
exit(-1)
76+
77+
if remaining == 0:
78+
# Calculate how long to sleep, then sleep
79+
sleep_time = reset - time.time()
80+
if sleep_time > 0:
81+
hours, remainder = divmod(int(sleep_time), 3600)
82+
minutes, seconds = divmod(remainder, 60)
83+
message = f"GitHub Rate limit reached. Sleeping for {hours} hours, {minutes} minutes, and {seconds} seconds. You may go and make coffee.."
84+
print(f"\r\n\033[33m{message}\033[0m", flush=True)
85+
if GITHUB_TOKEN == None:
86+
message = f"You should try using a Github Access Token, improves the experience significantly and it's easy!"
87+
print(f"\033[33m{message}\033[0m", flush=True)
88+
print("For information on how to create a GitHub API Access Token refer to: ")
89+
print("https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens")
90+
print("For information on GitHub Rate Limits refer to: ")
91+
print("https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api")
92+
93+
time.sleep(sleep_time + 1) # Sleep until the reset time, plus a little buffer
94+
continue # and restart the loop
95+
96+
if all_results is None:
97+
all_results = data
98+
# if we come from all_results being a list, then we're extending it.
99+
elif isinstance(all_results, list):
100+
all_results.extend(data)
101+
elif isinstance(all_results, dict) and data.get('total_count') != None:
102+
all_results[list(all_results.keys())[-1]].extend(list(data.values())[-1])
103+
else:
104+
all_results.update(data)
105+
106+
pages_fetched += 1
107+
if total_pages is None:
108+
total_pages = get_total_pages_from_link_header(links)
109+
110+
# Print progress if total pages is known
111+
if total_pages:
112+
progress = (pages_fetched / total_pages) * 100
113+
elapsed_time = time.time() - start_time
114+
avg_time_per_page = elapsed_time / pages_fetched
115+
remaining_pages = total_pages - pages_fetched
116+
estimated_time_left = remaining_pages * avg_time_per_page
117+
time_estimate = f": {estimated_time_left:.0f} seconds left."
118+
urlpath = get_last_two_path_segments(url)
119+
print(f"\rFetching {urlpath} [Hit CTRL^C to skip]: ({progress:.2f}%) {time_estimate}" + " " * 30, flush=True, end="")
120+
121+
# Reset next_url
122+
next_url = None
123+
124+
# Using "limit" we can cap the amount of results in order to prevent huge amounts of requests.
125+
if limit_results == None or \
126+
((isinstance(all_results, list) and len(all_results) < limit_results) \
127+
or (isinstance(all_results, dict) and all_results.get('total_count') != None and len(list(all_results.values())[-1]) < limit_results)):
128+
if 'rel="next"' in links:
129+
for link in links.split(','):
130+
if 'rel="next"' in link:
131+
next_url = link.split(';')[0].strip('<> ')
132+
break
133+
134+
except KeyboardInterrupt:
135+
print("\r\n\033[33mReceived CTRL+C - Skipping..\033[0m")
136+
next_url = None
137+
85138

86139
return all_results
87140

@@ -108,8 +161,11 @@ def fetch_repositories_for_org(org_url):
108161
org = org_url.strip('/').split('/')[-1]
109162
return github_request_json(f"{GITHUB_API_BASE_URL}/orgs/{org}/repos")
110163

111-
def fetch_commits(repo, author=None, per_page=10):
112-
return github_request_json(repo.get('commits_url').replace("{/sha}", f'?per_page={per_page}&author={author}' if author != None else ""))
164+
def fetch_repository_file_contents(repository, path):
165+
return github_request_json(f"{GITHUB_API_BASE_URL}/repos/{repository.get('full_name')}/contents/{path}")
166+
167+
def fetch_commits(repo, author=None):
168+
return github_request_json(repo.get('commits_url').replace("{/sha}", f'?author={author}' if author != None else ""))
113169

114170
def fetch_ssh_signing_keys(login):
115171
return github_request_json(f"{GITHUB_API_BASE_URL}/users/{login}/ssh_signing_keys")
@@ -141,10 +197,12 @@ def fetch_repository_pulls_comments(repo):
141197
def fetch_repository_actions_workflows(repo):
142198
return github_request_json(f"{GITHUB_API_BASE_URL}/repos/{repo.get('full_name')}/actions/workflows")
143199

144-
def fetch_repository_actions_artifacts(repo, limit):
200+
def fetch_repository_actions_artifacts(repo, limit=None):
145201
return github_request_json(f"{GITHUB_API_BASE_URL}/repos/{repo.get('full_name')}/actions/artifacts", limit_results=limit)
146202

147-
def fetch_repository_actions_runs(repo, limit):
203+
def fetch_repository_actions_runs(repo, workflow_file=None, limit=None):
204+
if workflow_file != None:
205+
return github_request_json(f"{GITHUB_API_BASE_URL}/repos/{repo.get('full_name')}/actions/workflows/{workflow_file}/runs", limit_results=limit)
148206
return github_request_json(f"{GITHUB_API_BASE_URL}/repos/{repo.get('full_name')}/actions/runs", limit_results=limit)
149207

150208
def fetch_repository_releases(repo):
@@ -186,3 +244,5 @@ def fetch_contributor_contributions(repo, contributor_obj):
186244
def fetch_contributor_events(contributor_obj):
187245
return github_request_json(contributor_obj.get('events_url').replace("{/privacy}", ""))
188246

247+
def search_repositories_by_name(name, limit):
248+
return github_request_json(f"{GITHUB_API_BASE_URL}/search/repositories", {'q':name, 'type':'repositories','s':'stars','o':'desc'}, limit_results=limit)

0 commit comments

Comments
 (0)