1- import os , requests , base64 , re , time
2- from . import gx_definitions
1+ import os , requests , base64 , re , time , urllib
2+ from . import gx_definitions , gx_output
33
44# GitHub API URL
55GITHUB_API_BASE_URL = "https://api.github.com"
@@ -18,6 +18,34 @@ def make_request(url, headers, params):
1818 links = response .headers .get ('Link' , '' )
1919 return data , links , rate_limit_remaining , rate_limit_reset
2020
21+ def get_total_pages_from_link_header (links ):
22+ if not links :
23+ return None
24+
25+ # Parse the Link header to find the "last" page
26+ for link in links .split (',' ):
27+ if 'rel="last"' in link :
28+ last_page_url = link .split (';' )[0 ].strip ('<> ' )
29+ # Extract the page number from the URL
30+ if 'page=' in last_page_url :
31+ try :
32+ return int (last_page_url .split ('page=' )[- 1 ].split ('&' )[0 ])
33+ except ValueError :
34+ pass
35+ return None
36+
37+ def get_last_two_path_segments (url ):
38+ parsed_url = urllib .parse .urlparse (url )
39+ path = parsed_url .path
40+ parts = [part for part in path .split ("/" ) if part ]
41+ if len (parts ) >= 2 :
42+ return f"{ parts [- 2 ]} /{ parts [- 1 ]} "
43+ elif len (parts ) == 1 :
44+ return parts [- 1 ]
45+ else :
46+ return ""
47+
48+
2149def github_request_json (url , params = None , limit_results = None ):
2250 # https://docs.github.com/en/rest/about-the-rest-api/api-versions?apiVersion=2022-11-28
2351 headers = {"X-GitHub-Api-Version" :"2022-11-28" }
@@ -31,57 +59,82 @@ def github_request_json(url, params=None, limit_results=None):
3159 all_results = None
3260 next_url = url
3361 remaining = - 1
62+ pages_fetched = 0
63+ total_pages = None
64+ start_time = time .time ()
3465
3566 while next_url :
3667
3768 try :
38- data , links , remaining , reset = make_request (next_url , headers , params )
39- except Exception as ex :
40- print (ex )
41- print (f"Failed to talk to the GitHub API when fetching URL: { next_url } - Quitting." )
42- exit (- 1 )
43-
44- if remaining == 0 :
45- # Calculate how long to sleep, then sleep
46- sleep_time = reset - time .time ()
47- if sleep_time > 0 :
48- hours , remainder = divmod (int (sleep_time ), 3600 )
49- minutes , seconds = divmod (remainder , 60 )
50- message = f"GitHub Rate limit reached. Sleeping for { hours } hours, { minutes } minutes, and { seconds } seconds. You may go and make coffee.."
51- print (f"\r \n \033 [33m{ message } \033 [0m" , flush = True )
52- if GITHUB_TOKEN == None :
53- message = f"You should try using a Github Access Token, improves the experience significantly and it's easy!"
54- print (f"\033 [33m{ message } \033 [0m" , flush = True )
55- print ("For information on how to create a GitHub API Access Token refer to: " )
56- print ("https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens" )
57- print ("For information on GitHub Rate Limits refer to: " )
58- print ("https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api" )
59-
60- time .sleep (sleep_time + 1 ) # Sleep until the reset time, plus a little buffer
61- continue # and restart the loop
62-
63- if all_results is None :
64- all_results = data
65- # if we come from all_results being a list, then we're extending it.
66- elif isinstance (all_results , list ):
67- all_results .extend (data )
68- elif isinstance (all_results , dict ) and data .get ('total_count' ) != None :
69- all_results [list (all_results .keys ())[1 ]].extend (list (data .values ())[1 ])
70- else :
71- all_results .update (data )
72-
73- # Reset next_url
74- next_url = None
75-
76- # Using "limit" we can cap the amount of results in order to prevent huge amounts of requests.
77- if limit_results == None or \
78- ((isinstance (all_results , list ) and len (all_results ) < limit_results ) \
79- or (isinstance (all_results , dict ) and all_results .get ('total_count' ) != None and len (list (all_results .values ())[1 ]) < limit_results )):
80- if 'rel="next"' in links :
81- for link in links .split (',' ):
82- if 'rel="next"' in link :
83- next_url = link .split (';' )[0 ].strip ('<> ' )
84- break
69+
70+ try :
71+ data , links , remaining , reset = make_request (next_url , headers , params )
72+ except Exception as ex :
73+ print (ex )
74+ print (f"Failed to talk to the GitHub API when fetching URL: { next_url } - Quitting." )
75+ exit (- 1 )
76+
77+ if remaining == 0 :
78+ # Calculate how long to sleep, then sleep
79+ sleep_time = reset - time .time ()
80+ if sleep_time > 0 :
81+ hours , remainder = divmod (int (sleep_time ), 3600 )
82+ minutes , seconds = divmod (remainder , 60 )
83+ message = f"GitHub Rate limit reached. Sleeping for { hours } hours, { minutes } minutes, and { seconds } seconds. You may go and make coffee.."
84+ print (f"\r \n \033 [33m{ message } \033 [0m" , flush = True )
85+ if GITHUB_TOKEN == None :
86+ message = f"You should try using a Github Access Token, improves the experience significantly and it's easy!"
87+ print (f"\033 [33m{ message } \033 [0m" , flush = True )
88+ print ("For information on how to create a GitHub API Access Token refer to: " )
89+ print ("https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens" )
90+ print ("For information on GitHub Rate Limits refer to: " )
91+ print ("https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api" )
92+
93+ time .sleep (sleep_time + 1 ) # Sleep until the reset time, plus a little buffer
94+ continue # and restart the loop
95+
96+ if all_results is None :
97+ all_results = data
98+ # if we come from all_results being a list, then we're extending it.
99+ elif isinstance (all_results , list ):
100+ all_results .extend (data )
101+ elif isinstance (all_results , dict ) and data .get ('total_count' ) != None :
102+ all_results [list (all_results .keys ())[- 1 ]].extend (list (data .values ())[- 1 ])
103+ else :
104+ all_results .update (data )
105+
106+ pages_fetched += 1
107+ if total_pages is None :
108+ total_pages = get_total_pages_from_link_header (links )
109+
110+ # Print progress if total pages is known
111+ if total_pages :
112+ progress = (pages_fetched / total_pages ) * 100
113+ elapsed_time = time .time () - start_time
114+ avg_time_per_page = elapsed_time / pages_fetched
115+ remaining_pages = total_pages - pages_fetched
116+ estimated_time_left = remaining_pages * avg_time_per_page
117+ time_estimate = f": { estimated_time_left :.0f} seconds left."
118+ urlpath = get_last_two_path_segments (url )
119+ print (f"\r Fetching { urlpath } [Hit CTRL^C to skip]: ({ progress :.2f} %) { time_estimate } " + " " * 30 , flush = True , end = "" )
120+
121+ # Reset next_url
122+ next_url = None
123+
124+ # Using "limit" we can cap the amount of results in order to prevent huge amounts of requests.
125+ if limit_results == None or \
126+ ((isinstance (all_results , list ) and len (all_results ) < limit_results ) \
127+ or (isinstance (all_results , dict ) and all_results .get ('total_count' ) != None and len (list (all_results .values ())[- 1 ]) < limit_results )):
128+ if 'rel="next"' in links :
129+ for link in links .split (',' ):
130+ if 'rel="next"' in link :
131+ next_url = link .split (';' )[0 ].strip ('<> ' )
132+ break
133+
134+ except KeyboardInterrupt :
135+ print ("\r \n \033 [33mReceived CTRL+C - Skipping..\033 [0m" )
136+ next_url = None
137+
85138
86139 return all_results
87140
@@ -108,8 +161,11 @@ def fetch_repositories_for_org(org_url):
108161 org = org_url .strip ('/' ).split ('/' )[- 1 ]
109162 return github_request_json (f"{ GITHUB_API_BASE_URL } /orgs/{ org } /repos" )
110163
111- def fetch_commits (repo , author = None , per_page = 10 ):
112- return github_request_json (repo .get ('commits_url' ).replace ("{/sha}" , f'?per_page={ per_page } &author={ author } ' if author != None else "" ))
164+ def fetch_repository_file_contents (repository , path ):
165+ return github_request_json (f"{ GITHUB_API_BASE_URL } /repos/{ repository .get ('full_name' )} /contents/{ path } " )
166+
167+ def fetch_commits (repo , author = None ):
168+ return github_request_json (repo .get ('commits_url' ).replace ("{/sha}" , f'?author={ author } ' if author != None else "" ))
113169
114170def fetch_ssh_signing_keys (login ):
115171 return github_request_json (f"{ GITHUB_API_BASE_URL } /users/{ login } /ssh_signing_keys" )
@@ -141,10 +197,12 @@ def fetch_repository_pulls_comments(repo):
141197def fetch_repository_actions_workflows (repo ):
142198 return github_request_json (f"{ GITHUB_API_BASE_URL } /repos/{ repo .get ('full_name' )} /actions/workflows" )
143199
144- def fetch_repository_actions_artifacts (repo , limit ):
200+ def fetch_repository_actions_artifacts (repo , limit = None ):
145201 return github_request_json (f"{ GITHUB_API_BASE_URL } /repos/{ repo .get ('full_name' )} /actions/artifacts" , limit_results = limit )
146202
147- def fetch_repository_actions_runs (repo , limit ):
203+ def fetch_repository_actions_runs (repo , workflow_file = None , limit = None ):
204+ if workflow_file != None :
205+ return github_request_json (f"{ GITHUB_API_BASE_URL } /repos/{ repo .get ('full_name' )} /actions/workflows/{ workflow_file } /runs" , limit_results = limit )
148206 return github_request_json (f"{ GITHUB_API_BASE_URL } /repos/{ repo .get ('full_name' )} /actions/runs" , limit_results = limit )
149207
150208def fetch_repository_releases (repo ):
@@ -186,3 +244,5 @@ def fetch_contributor_contributions(repo, contributor_obj):
186244def fetch_contributor_events (contributor_obj ):
187245 return github_request_json (contributor_obj .get ('events_url' ).replace ("{/privacy}" , "" ))
188246
247+ def search_repositories_by_name (name , limit ):
248+ return github_request_json (f"{ GITHUB_API_BASE_URL } /search/repositories" , {'q' :name , 'type' :'repositories' ,'s' :'stars' ,'o' :'desc' }, limit_results = limit )
0 commit comments