Skip to content

Commit 7d243a7

Browse files
committed
refactor llm issue detection task for EAPTrace
1 parent 2233b18 commit 7d243a7

File tree

4 files changed

+259
-333
lines changed

4 files changed

+259
-333
lines changed

src/sentry/seer/sentry_data_models.py

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,6 @@ class Span(BaseModel):
2222
span_description: str | None
2323

2424

25-
class EvidenceSpan(BaseModel):
26-
span_id: str | None = None
27-
parent_span_id: str | None = None
28-
timestamp: float | None = None
29-
op: str | None = None
30-
description: str | None = None
31-
exclusive_time: float | None = None # duration in milliseconds
32-
data: dict[str, Any] | None = None
33-
34-
3525
class TraceData(BaseModel):
3626
trace_id: str
3727
project_id: int
@@ -40,12 +30,9 @@ class TraceData(BaseModel):
4030
spans: list[Span]
4131

4232

43-
class EvidenceTraceData(BaseModel):
33+
class EvidenceTraceData(BaseModel): # hate this name
4434
trace_id: str
45-
project_id: int
4635
transaction_name: str
47-
total_spans: int
48-
spans: list[EvidenceSpan]
4936

5037

5138
class EAPTrace(BaseModel):

src/sentry/tasks/llm_issue_detection/detection.py

Lines changed: 75 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@
1616
from sentry.issues.producer import PayloadType, produce_occurrence_to_kafka
1717
from sentry.models.project import Project
1818
from sentry.net.http import connection_from_url
19-
from sentry.seer.explorer.index_data import get_transactions_for_project
2019
from sentry.seer.models import SeerApiError
21-
from sentry.seer.sentry_data_models import EvidenceTraceData
2220
from sentry.seer.signed_seer_api import make_signed_seer_api_request
2321
from sentry.tasks.base import instrumented_task
24-
from sentry.tasks.llm_issue_detection.trace_data import get_evidence_trace_for_llm_detection
22+
from sentry.tasks.llm_issue_detection.trace_data import (
23+
get_project_top_transaction_traces_for_llm_detection,
24+
)
2525
from sentry.taskworker.namespaces import issues_tasks
2626
from sentry.utils import json
2727

@@ -30,10 +30,7 @@
3030
SEER_ANALYZE_ISSUE_ENDPOINT_PATH = "/v1/automation/issue-detection/analyze"
3131
SEER_TIMEOUT_S = 120
3232
SEER_RETRIES = 1
33-
34-
NUM_TRANSACTIONS_TO_PROCESS = 20
35-
LOWER_SPAN_LIMIT = 20
36-
UPPER_SPAN_LIMIT = 500
33+
START_TIME_DELTA_MINUTES = 30
3734

3835

3936
seer_issue_detection_connection_pool = connection_from_url(
@@ -45,11 +42,15 @@
4542

4643

4744
class DetectedIssue(BaseModel):
45+
# LLM generated fields
4846
explanation: str
4947
impact: str
5048
evidence: str
5149
missing_telemetry: str | None = None
5250
title: str
51+
# context fields, not LLM generated
52+
trace_id: str
53+
transaction_name: str
5354

5455

5556
class IssueDetectionResponse(BaseModel):
@@ -62,13 +63,13 @@ def __init__(
6263
message: str,
6364
status: int,
6465
project_id: int | None = None,
65-
trace_id: str | None = None,
66+
organization_id: int | None = None,
6667
response_data: str | None = None,
6768
error_message: str | None = None,
6869
):
6970
super().__init__(message, status)
7071
self.project_id = project_id
71-
self.trace_id = trace_id
72+
self.organization_id = organization_id
7273
self.response_data = response_data
7374
self.error_message = error_message
7475

@@ -99,9 +100,7 @@ def get_base_platform(platform: str | None) -> str | None:
99100

100101
def create_issue_occurrence_from_detection(
101102
detected_issue: DetectedIssue,
102-
trace: EvidenceTraceData,
103103
project_id: int,
104-
transaction_name: str,
105104
) -> None:
106105
"""
107106
Create and produce an IssueOccurrence from an LLM-detected issue.
@@ -110,11 +109,13 @@ def create_issue_occurrence_from_detection(
110109
occurrence_id = uuid4().hex
111110
detection_time = datetime.now(UTC)
112111
project = Project.objects.get_from_cache(id=project_id)
112+
trace_id = detected_issue.trace_id
113+
transaction_name = detected_issue.transaction_name
113114
title = detected_issue.title.lower().replace(" ", "-")
114115
fingerprint = [f"llm-detected-{title}-{transaction_name}"]
115116

116117
evidence_data = {
117-
"trace_id": trace.trace_id,
118+
"trace_id": trace_id,
118119
"transaction": transaction_name,
119120
"explanation": detected_issue.explanation,
120121
"impact": detected_issue.impact,
@@ -155,7 +156,7 @@ def create_issue_occurrence_from_detection(
155156
"transaction": transaction_name,
156157
"contexts": {
157158
"trace": {
158-
"trace_id": trace.trace_id,
159+
"trace_id": trace_id,
159160
"type": "trace",
160161
}
161162
},
@@ -206,6 +207,11 @@ def run_llm_issue_detection() -> None:
206207
def detect_llm_issues_for_project(project_id: int) -> None:
207208
"""
208209
Process a single project for LLM issue detection.
210+
211+
Gets the project's top 50 transaction spans from the last START_TIME_DELTA_MINUTES, sorted by -sum(span.duration).
212+
From the 50 longest transactions, dedupes on normalized transaction_name.
213+
For each deduped transaction, gets first trace_id from the start of time window, which has small random variation.
214+
Sends these trace_ids to seer, which uses get_trace_waterfall to construct an EAPTrace to analyze.
209215
"""
210216
project = Project.objects.get_from_cache(id=project_id)
211217
organization = project.organization
@@ -217,99 +223,67 @@ def detect_llm_issues_for_project(project_id: int) -> None:
217223
if not has_access:
218224
return
219225

220-
transactions = get_transactions_for_project(
221-
project_id, limit=100, start_time_delta={"minutes": 30}
226+
evidence_traces = get_project_top_transaction_traces_for_llm_detection(
227+
project_id, limit=100, start_time_delta_minutes=START_TIME_DELTA_MINUTES
222228
)
223-
if not transactions:
229+
if not evidence_traces:
224230
return
225231

226-
# Shuffle transactions to randomize order
227-
random.shuffle(transactions)
232+
# Shuffle to randomize order
233+
random.shuffle(evidence_traces)
228234

229-
processed_count = 0
230-
for transaction in transactions:
231-
if processed_count >= NUM_TRANSACTIONS_TO_PROCESS:
232-
break
235+
seer_request = {
236+
"telemetry": [{**trace.dict(), "kind": "trace"} for trace in evidence_traces],
237+
"organization_id": organization_id,
238+
"project_id": project_id,
239+
}
240+
response = make_signed_seer_api_request(
241+
connection_pool=seer_issue_detection_connection_pool,
242+
path=SEER_ANALYZE_ISSUE_ENDPOINT_PATH,
243+
body=json.dumps(seer_request).encode("utf-8"),
244+
)
233245

246+
if response.status < 200 or response.status >= 300:
247+
raise LLMIssueDetectionError(
248+
message="Seer HTTP error",
249+
status=response.status,
250+
project_id=project_id,
251+
organization_id=organization_id,
252+
response_data=response.data.decode("utf-8"),
253+
)
254+
255+
try:
256+
raw_response_data = response.json()
257+
response_data = IssueDetectionResponse.parse_obj(raw_response_data)
258+
except (ValueError, TypeError) as e:
259+
raise LLMIssueDetectionError(
260+
message="Seer response parsing error",
261+
status=response.status,
262+
project_id=project_id,
263+
organization_id=organization_id,
264+
response_data=response.data.decode("utf-8"),
265+
error_message=str(e),
266+
)
267+
268+
n_found_issues = len(response_data.issues)
269+
logger.info(
270+
"Seer issue detection success",
271+
extra={
272+
"num_traces": len(evidence_traces),
273+
"num_issues": n_found_issues,
274+
"organization_id": organization_id,
275+
"project_id": project_id,
276+
"titles": (
277+
[issue.title for issue in response_data.issues] if n_found_issues > 0 else None
278+
),
279+
},
280+
)
281+
for detected_issue in response_data.issues:
234282
try:
235-
trace = get_evidence_trace_for_llm_detection(transaction.name, transaction.project_id)
236-
237-
if (
238-
not trace
239-
or trace.total_spans < LOWER_SPAN_LIMIT
240-
or trace.total_spans > UPPER_SPAN_LIMIT
241-
):
242-
continue
243-
244-
processed_count += 1
245-
logger.info(
246-
"Found trace for LLM issue detection",
247-
extra={
248-
"trace_id": trace.trace_id,
249-
"project_id": project_id,
250-
"total_spans": trace.total_spans,
251-
"transaction_name": trace.transaction_name,
252-
},
253-
)
254-
255-
seer_request = {
256-
"telemetry": [{**trace.dict(), "kind": "trace"}],
257-
"organization_id": organization_id,
258-
"project_id": project_id,
259-
}
260-
response = make_signed_seer_api_request(
261-
connection_pool=seer_issue_detection_connection_pool,
262-
path=SEER_ANALYZE_ISSUE_ENDPOINT_PATH,
263-
body=json.dumps(seer_request).encode("utf-8"),
264-
)
265-
266-
if response.status < 200 or response.status >= 300:
267-
raise LLMIssueDetectionError(
268-
message="Seer HTTP error",
269-
status=response.status,
270-
project_id=project_id,
271-
trace_id=trace.trace_id,
272-
response_data=response.data.decode("utf-8"),
273-
)
274-
275-
try:
276-
raw_response_data = response.json()
277-
response_data = IssueDetectionResponse.parse_obj(raw_response_data)
278-
except (ValueError, TypeError) as e:
279-
raise LLMIssueDetectionError(
280-
message="Seer response parsing error",
281-
status=response.status,
282-
project_id=project_id,
283-
trace_id=trace.trace_id,
284-
response_data=response.data.decode("utf-8"),
285-
error_message=str(e),
286-
)
287-
288-
n_found_issues = len(response_data.issues)
289-
logger.info(
290-
"Seer issue detection success",
291-
extra={
292-
"num_issues": n_found_issues,
293-
"trace_id": trace.trace_id,
294-
"project_id": project_id,
295-
"titles": (
296-
[issue.title for issue in response_data.issues]
297-
if n_found_issues > 0
298-
else None
299-
),
300-
},
283+
create_issue_occurrence_from_detection(
284+
detected_issue=detected_issue,
285+
project_id=project_id,
301286
)
302-
for detected_issue in response_data.issues:
303-
try:
304-
create_issue_occurrence_from_detection(
305-
detected_issue=detected_issue,
306-
trace=trace,
307-
project_id=project_id,
308-
transaction_name=transaction.name,
309-
)
310-
311-
except Exception as e:
312-
sentry_sdk.capture_exception(e)
313-
except LLMIssueDetectionError as e:
287+
except Exception as e:
314288
sentry_sdk.capture_exception(e)
315-
continue # if one transaction encounters an error, don't block processing of the others
289+
continue

0 commit comments

Comments
 (0)