diff --git a/main.py b/main.py index dfc2a0f..1d70326 100644 --- a/main.py +++ b/main.py @@ -21,6 +21,9 @@ def parse_args(): parser.add_argument( "-p", "--pull_requests", help="log pull requests", action="store_true" ) + parser.add_argument( + "--graphql", help="use graphql for requesting data (work only with --pull_requests) ", action="store_true" + ) parser.add_argument("-i", "--issues", help="log issues", action="store_true") parser.add_argument("-w", "--wikis", help="log wikis", action="store_true") parser.add_argument("--contributors", help="log contributors", action="store_true") @@ -150,14 +153,20 @@ def run(args, binded_repos, repos_for_wiki=None): binded_repos, args.out, start, finish, args.branch, args.forks_include ) if args.pull_requests: - pull_requests_parser.log_pull_requests( - binded_repos, - args.out, - start, - finish, - args.forks_include, - args.pr_comments, - ) + if args.graphql: + pull_requests_parser.log_pull_requests_by_graphql( + binded_repos=binded_repos, + csv_name=args.out + ) + else: + pull_requests_parser.log_pull_requests( + binded_repos, + args.out, + start, + finish, + args.forks_include, + args.pr_comments, + ) if args.issues: issues_parser.log_issues( binded_repos, args.out, start, finish, args.forks_include, args.base_url, diff --git a/src/graphql/pull_request_parser.py b/src/graphql/pull_request_parser.py new file mode 100644 index 0000000..1d9e25a --- /dev/null +++ b/src/graphql/pull_request_parser.py @@ -0,0 +1,202 @@ +from dataclasses import asdict +from typing import Generator +from time import sleep + +import requests + +from src.constants import TIMEDELTA +from src.repo_dataclasses import PullRequestData +from src.interface_wrapper import IRepositoryAPI, Repository +from src.utils import logger + + +# -----------GraphQLAPI block-------------- + +def log_repositories_pr_by_graphql(owner, repo_name, token, csv_name, first_n=100): + HEADERS = {"Authorization": f"Bearer {token}", "Content-Type": "application/json"} + + query = """ + query GetPRData($owner: String!, $repo: String!, $first: Int!, $after: String) { + repository(owner: $owner, name: $repo) { + nameWithOwner + pullRequests(first: $first, after: $after, states: [OPEN, CLOSED, MERGED], orderBy: {field: CREATED_AT, direction: DESC}) { + totalCount + pageInfo { + hasNextPage + endCursor + } + nodes { + title + number + state + createdAt + + author { + login + ... on User { + name + email + } + } + + baseRef { + name + target { + oid + } + } + + headRef { + name + target { + oid + } + } + + changedFiles + additions + deletions + + mergedAt + mergedBy { + login + ... on User { + name + email + } + } + + assignees(first: 10) { + nodes { + login + name + } + } + + labels(first: 20) { + nodes { + name + color + } + } + } + } + } + } + """ + + has_next_page = True + after_cursor = None + processed_count = 0 + + while has_next_page: + + variables = { + "owner": owner, + "repo": repo_name, + "first": first_n, + "after": after_cursor, + } + + response = requests.post( + "https://api.github.com/graphql", + headers=HEADERS, + json={"query": query, "variables": variables}, + ) + + if response.status_code != 200: + logger.log_error(f"GraphQL request failed: {response.status_code} - {response.text}") + logger.log_to_stdout(f"Sleep to {100*TIMEDELTA} and retry") + sleep(100*TIMEDELTA) + continue + + graphql_data = response.json() + + if "errors" in graphql_data: + logger.log_error(f"GraphQL errors: {graphql_data['errors']}") + logger.log_to_stdout(f"Sleep to {100*TIMEDELTA} and retry") + sleep(100*TIMEDELTA) + continue + + repo_data = graphql_data["data"]["repository"] + + page_info = repo_data["pullRequests"]["pageInfo"] + has_next_page = page_info["hasNextPage"] + after_cursor = page_info["endCursor"] + + prs = repo_data["pullRequests"]["nodes"] + + processed_count += len(prs) + logger.log_to_stdout(f"Processing {processed_count} / {repo_data["pullRequests"]['totalCount']}") + + for pr in prs: + pr_data = PullRequestData( + repository_name=repo_data["nameWithOwner"], + title=pr["title"], + id=pr["number"], + state=str(pr["state"]).lower(), + commit_into=( + pr["baseRef"]["target"]["oid"] + if pr["baseRef"] and pr["baseRef"]["target"] + else None + ), + commit_from=( + pr["headRef"]["target"]["oid"] + if pr["headRef"] and pr["headRef"]["target"] + else None + ), + created_at=pr["createdAt"], + creator_name=( + pr["author"]["name"] + if pr["author"] and "name" in pr["author"] + else None + ), + creator_login=pr["author"]["login"] if pr["author"] else None, + creator_email=( + pr["author"]["email"] + if pr["author"] and "email" in pr["author"] + else None + ), + changed_files=pr["changedFiles"], + comment_body=None, + comment_created_at=None, + comment_author_name=None, + comment_author_login=None, + comment_author_email=None, + merger_name=( + pr["mergedBy"]["name"] + if pr["mergedBy"] and "name" in pr["mergedBy"] + else None + ), + merger_login=pr["mergedBy"]["login"] if pr["mergedBy"] else None, + merger_email=( + pr["mergedBy"]["email"] + if pr["mergedBy"] and "email" in pr["mergedBy"] + else None + ), + source_branch=pr["headRef"]["name"] if pr["headRef"] else None, + target_branch=pr["baseRef"]["name"] if pr["baseRef"] else None, + assignee_story=None, + related_issues=None, + labels=", ".join([label["name"] for label in pr["labels"]["nodes"]]), + milestone=None, + ) + + pr_info = asdict(pr_data) + logger.log_to_csv(csv_name, list(pr_info.keys()), pr_info) + logger.log_to_stdout(pr_info) + + +def log_pull_requests_by_graphql( + binded_repos: Generator[tuple[IRepositoryAPI, Repository, str], None, None], + csv_name: str, +): + info = asdict(PullRequestData()) + logger.log_to_csv(csv_name, list(info.keys())) + + for _, repo, token in binded_repos: + logger.log_title(repo.name) + log_repositories_pr_by_graphql( + owner=repo.owner.login, repo_name=repo.name, csv_name=csv_name, token=token + ) + sleep(100*TIMEDELTA) diff --git a/src/pull_requests_parser.py b/src/pull_requests_parser.py index 8a80a30..fdfae89 100644 --- a/src/pull_requests_parser.py +++ b/src/pull_requests_parser.py @@ -11,35 +11,8 @@ from src.git_logger import get_assignee_story from src.interface_wrapper import IRepositoryAPI, Repository from src.utils import logger - - -@dataclass(kw_only=True, frozen=True) -class PullRequestData: - repository_name: str = '' - title: str = '' - id: int = 0 - state: str = '' - commit_into: str = '' - commit_from: str = '' - created_at: str = '' - creator_name: str = '' - creator_login: str = '' - creator_email: str = '' - changed_files: str = '' - comment_body: str = '' - comment_created_at: str = '' - comment_author_name: str = '' - comment_author_login: str = '' - comment_author_email: str = '' - merger_name: str | None = None - merger_login: str | None = None - merger_email: str | None = None - source_branch: str = '' - target_branch: str = '' - assignee_story: str = '' - related_issues: str = '' - labels: str = '' - milestone: str = '' +from src.graphql.pull_request_parser import log_pull_requests_by_graphql # for using in main.py +from src.repo_dataclasses import PullRequestData def get_related_issues(pull_request_number, repo_owner, repo_name, token): @@ -106,6 +79,9 @@ def get_info(obj, attr): return EMPTY_FIELD if obj is None else getattr(obj, attr) +# -----------GithubAPI block-------------- + + def log_repositories_pr( client: IRepositoryAPI, repository: Repository, diff --git a/src/repo_dataclasses.py b/src/repo_dataclasses.py new file mode 100644 index 0000000..4163267 --- /dev/null +++ b/src/repo_dataclasses.py @@ -0,0 +1,30 @@ +from dataclasses import dataclass + + +@dataclass(kw_only=True, frozen=True) +class PullRequestData: + repository_name: str = '' + title: str = '' + id: int = 0 + state: str = '' + commit_into: str = '' + commit_from: str = '' + created_at: str = '' + creator_name: str = '' + creator_login: str = '' + creator_email: str = '' + changed_files: str = '' + comment_body: str = '' + comment_created_at: str = '' + comment_author_name: str = '' + comment_author_login: str = '' + comment_author_email: str = '' + merger_name: str | None = None + merger_login: str | None = None + merger_email: str | None = None + source_branch: str = '' + target_branch: str = '' + assignee_story: str = '' + related_issues: str = '' + labels: str = '' + milestone: str = '' \ No newline at end of file