Skip to content

Commit bafb28c

Browse files
authored
Track Accurate PR cycle timing (#661)
* Implement GitHub PR timeline event handling and related data structures * Refactor GitHub PR timeline event handling: rename types, update event structure, and improve data processing logic, Add Ready_for_review in calculation * Add tests for cycle time and first response time using ready_for_review events * Refactor GitHub event handling and improve code readability: - Clean up imports and remove unused code - Enhance formatting and consistency in function definitions - Update timeline event processing logic for better clarity - Adjust test cases for cycle time and first response time calculations * Refactor error handling to maintain exception context and update type hint for verification field * Refactor GitHub timeline event handling: replace GithubPRTimelineEvent with GithubPullRequestTimelineEvents, update related imports, and improve event processing logic * Refactor GithubPullRequestTimelineEvents: consolidate event type mapping into a class attribute and improve type hinting for raw_data property * Optimized the code for maintainability and scalability * Refactor GithubPullRequestTimelineEvents: streamline configuration initialization for improved readability * Refactor GitHub API service: rename and modularize timeline event fetching methods for improved clarity and error handling * Refactor GithubApiService: improve formatting of _fetch_timeline_events method for better readability
1 parent f76bd6a commit bafb28c

File tree

12 files changed

+746
-59
lines changed

12 files changed

+746
-59
lines changed

backend/analytics_server/mhq/exapi/github.py

Lines changed: 111 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,23 @@
11
import contextlib
22
from datetime import datetime
33
from http import HTTPStatus
4-
from typing import Optional, Dict, Tuple, List
4+
from typing import Optional, Dict, Tuple, List, cast
55

66
import requests
7+
78
from github import Github, UnknownObjectException
89
from github.GithubException import GithubException
910
from github.Organization import Organization as GithubOrganization
1011
from github.PaginatedList import PaginatedList as GithubPaginatedList
1112
from github.PullRequest import PullRequest as GithubPullRequest
1213
from github.Repository import Repository as GithubRepository
1314

15+
from mhq.exapi.schemas.timeline import (
16+
GitHubPullTimelineEvent,
17+
GitHubPrTimelineEventsDict,
18+
)
1419
from mhq.exapi.models.github import GitHubContributor
20+
from mhq.exapi.models.github_timeline import GithubPullRequestTimelineEvents
1521
from mhq.utils.log import LOG
1622

1723
PAGE_SIZE = 100
@@ -271,3 +277,107 @@ def _fetch_workflow_runs(page: int = 1):
271277
page += 1
272278
data = _fetch_workflow_runs(page=page)
273279
return repo_workflows
280+
281+
def _fetch_timeline_events(
282+
self, repo_name: str, pr_number: int, page: int = 1
283+
) -> List[Dict]:
284+
github_url = f"{self.base_url}/repos/{repo_name}/issues/{pr_number}/timeline"
285+
query_params = {"per_page": PAGE_SIZE, "page": page}
286+
287+
try:
288+
response = requests.get(
289+
github_url, headers=self.headers, params=query_params
290+
)
291+
except requests.RequestException as e:
292+
raise GithubException(
293+
HTTPStatus.SERVICE_UNAVAILABLE, f"Network error: {str(e)}"
294+
) from e
295+
296+
if response.status_code == HTTPStatus.NOT_FOUND:
297+
raise GithubException(
298+
HTTPStatus.NOT_FOUND,
299+
f"PR {pr_number} not found for repo {repo_name}",
300+
)
301+
302+
if response.status_code == HTTPStatus.FORBIDDEN:
303+
raise GithubRateLimitExceeded("GitHub API rate limit exceeded")
304+
305+
if response.status_code != HTTPStatus.OK:
306+
raise GithubException(
307+
response.status_code,
308+
f"Failed to fetch timeline events: {response.text}",
309+
)
310+
311+
try:
312+
return response.json()
313+
except ValueError as e:
314+
raise GithubException(
315+
HTTPStatus.INTERNAL_SERVER_ERROR, f"Invalid JSON response: {str(e)}"
316+
) from e
317+
318+
def _create_timeline_event(self, event_data: Dict) -> GitHubPrTimelineEventsDict:
319+
return GitHubPrTimelineEventsDict(
320+
event=event_data.get("event", ""),
321+
data=cast(GitHubPullTimelineEvent, event_data),
322+
)
323+
324+
def get_pr_timeline_events(
325+
self, repo_name: str, pr_number: int
326+
) -> List[GithubPullRequestTimelineEvents]:
327+
328+
all_timeline_events: List[GitHubPrTimelineEventsDict] = []
329+
page = 1
330+
331+
try:
332+
while True:
333+
timeline_events = self._fetch_timeline_events(
334+
repo_name, pr_number, page
335+
)
336+
if not timeline_events:
337+
break
338+
339+
all_timeline_events.extend(
340+
[
341+
self._create_timeline_event(event_data)
342+
for event_data in timeline_events
343+
]
344+
)
345+
346+
if len(timeline_events) < PAGE_SIZE:
347+
break
348+
page += 1
349+
350+
except GithubException:
351+
raise
352+
except Exception as e:
353+
raise GithubException(
354+
HTTPStatus.INTERNAL_SERVER_ERROR, f"Unexpected error: {str(e)}"
355+
) from e
356+
357+
return self._adapt_github_timeline_events(all_timeline_events)
358+
359+
@staticmethod
360+
def _adapt_github_timeline_events(
361+
timeline_events: List[GitHubPrTimelineEventsDict],
362+
) -> List[GithubPullRequestTimelineEvents]:
363+
adapted_timeline_events: List[GithubPullRequestTimelineEvents] = []
364+
365+
for timeline_event in timeline_events:
366+
event_data = timeline_event.get("data")
367+
if not event_data:
368+
continue
369+
370+
event_type = timeline_event.get("event")
371+
if not event_type:
372+
continue
373+
374+
event = GithubPullRequestTimelineEvents(event_type, event_data)
375+
376+
if all([event.timestamp, event.type, event.id, event.user]):
377+
adapted_timeline_events.append(event)
378+
else:
379+
LOG.warning(
380+
f"Skipping incomplete timeline event: {event_type} with id: {event.id}"
381+
)
382+
383+
return adapted_timeline_events
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
from datetime import datetime
2+
from dataclasses import dataclass
3+
from typing import Any, Optional, Dict, cast
4+
5+
6+
from mhq.exapi.schemas.timeline import (
7+
GitHubPullTimelineEvent,
8+
)
9+
from mhq.store.models.code.enums import PullRequestEventType
10+
from mhq.utils.log import LOG
11+
from mhq.utils.time import dt_from_iso_time_string
12+
13+
14+
@dataclass
15+
class GithubPullRequestTimelineEventConfig:
16+
actor_path: str
17+
timestamp_field: str
18+
id_path: str
19+
20+
21+
@dataclass
22+
class GithubPullRequestTimelineEvents:
23+
REVIEWED_CONFIG = GithubPullRequestTimelineEventConfig(
24+
actor_path="user", timestamp_field="submitted_at", id_path="id"
25+
)
26+
27+
READY_FOR_REVIEW_CONFIG = GithubPullRequestTimelineEventConfig(
28+
actor_path="actor", timestamp_field="created_at", id_path="id"
29+
)
30+
31+
COMMENTED_CONFIG = GithubPullRequestTimelineEventConfig(
32+
actor_path="user", timestamp_field="created_at", id_path="id"
33+
)
34+
35+
COMMITTED_CONFIG = GithubPullRequestTimelineEventConfig(
36+
actor_path="author.name", timestamp_field="author.date", id_path="sha"
37+
)
38+
39+
DEFAULT_CONFIG = GithubPullRequestTimelineEventConfig(
40+
actor_path="actor", timestamp_field="created_at", id_path="id"
41+
)
42+
43+
EVENT_CONFIG = {
44+
"reviewed": REVIEWED_CONFIG,
45+
"ready_for_review": READY_FOR_REVIEW_CONFIG,
46+
"commented": COMMENTED_CONFIG,
47+
"committed": COMMITTED_CONFIG,
48+
"default": DEFAULT_CONFIG,
49+
}
50+
EVENT_TYPE_MAPPING = {
51+
"assigned": PullRequestEventType.ASSIGNED,
52+
"closed": PullRequestEventType.CLOSED,
53+
"commented": PullRequestEventType.COMMENTED,
54+
"committed": PullRequestEventType.COMMITTED,
55+
"convert_to_draft": PullRequestEventType.CONVERT_TO_DRAFT,
56+
"head_ref_deleted": PullRequestEventType.HEAD_REF_DELETED,
57+
"head_ref_force_pushed": PullRequestEventType.HEAD_REF_FORCE_PUSHED,
58+
"labeled": PullRequestEventType.LABELED,
59+
"locked": PullRequestEventType.LOCKED,
60+
"merged": PullRequestEventType.MERGED,
61+
"ready_for_review": PullRequestEventType.READY_FOR_REVIEW,
62+
"referenced": PullRequestEventType.REFERENCED,
63+
"reopened": PullRequestEventType.REOPENED,
64+
"review_dismissed": PullRequestEventType.REVIEW_DISMISSED,
65+
"review_requested": PullRequestEventType.REVIEW_REQUESTED,
66+
"review_request_removed": PullRequestEventType.REVIEW_REQUEST_REMOVED,
67+
"reviewed": PullRequestEventType.REVIEW,
68+
"unassigned": PullRequestEventType.UNASSIGNED,
69+
"unlabeled": PullRequestEventType.UNLABELED,
70+
"unlocked": PullRequestEventType.UNLOCKED,
71+
}
72+
73+
def __init__(self, event_type: str, data: GitHubPullTimelineEvent):
74+
self.event_type = event_type
75+
self.data = data
76+
77+
def _get_nested_value(self, path: str) -> Optional[Any]:
78+
keys = path.split(".")
79+
current = self.data
80+
81+
for key in keys:
82+
if isinstance(current, dict) and key in current:
83+
current = current[key]
84+
else:
85+
return None
86+
return current
87+
88+
@property
89+
def user(self) -> Optional[str]:
90+
config = self.EVENT_CONFIG.get(self.event_type, self.EVENT_CONFIG["default"])
91+
actor_path = config.actor_path
92+
93+
if not actor_path:
94+
return None
95+
96+
if self.event_type == "committed":
97+
return self._get_nested_value(actor_path)
98+
99+
user_data = self._get_nested_value(actor_path)
100+
if not user_data:
101+
return None
102+
if isinstance(user_data, dict) and "login" in user_data:
103+
return user_data["login"]
104+
if hasattr(user_data, "login"):
105+
return user_data.login
106+
107+
LOG.warning(
108+
f"User data does not contain login field for event type: {self.event_type}"
109+
)
110+
return None
111+
112+
@property
113+
def timestamp(self) -> Optional[datetime]:
114+
config = self.EVENT_CONFIG.get(self.event_type, self.EVENT_CONFIG["default"])
115+
timestamp_field = config.timestamp_field
116+
timestamp_value = self._get_nested_value(timestamp_field)
117+
118+
if timestamp_value:
119+
timestamp_str = str(timestamp_value)
120+
return dt_from_iso_time_string(timestamp_str)
121+
return None
122+
123+
@property
124+
def raw_data(self) -> Dict:
125+
return cast(Dict[str, Any], self.data)
126+
127+
@property
128+
def id(self) -> Optional[str]:
129+
config = self.EVENT_CONFIG.get(self.event_type, self.EVENT_CONFIG["default"])
130+
id_path = config.id_path
131+
id_value = self._get_nested_value(id_path)
132+
return str(id_value) if id_value is not None else None
133+
134+
@property
135+
def type(self) -> Optional[PullRequestEventType]:
136+
137+
return self.EVENT_TYPE_MAPPING.get(
138+
self.event_type, PullRequestEventType.UNKNOWN
139+
)

backend/analytics_server/mhq/exapi/schemas/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)