From e4368252928581300b4047af3637218f5a62c080 Mon Sep 17 00:00:00 2001 From: Nora Shapiro Date: Fri, 5 Dec 2025 14:15:39 -0800 Subject: [PATCH 1/4] refactor llm issue detection task for EAPTrace --- src/sentry/seer/sentry_data_models.py | 15 +- .../tasks/llm_issue_detection/detection.py | 176 ++++++------- .../tasks/llm_issue_detection/trace_data.py | 154 +++++------ .../sentry/tasks/test_llm_issue_detection.py | 247 +++++++++--------- 4 files changed, 259 insertions(+), 333 deletions(-) diff --git a/src/sentry/seer/sentry_data_models.py b/src/sentry/seer/sentry_data_models.py index 490684db1283d7..d7336f9cb1252d 100644 --- a/src/sentry/seer/sentry_data_models.py +++ b/src/sentry/seer/sentry_data_models.py @@ -22,16 +22,6 @@ class Span(BaseModel): span_description: str | None -class EvidenceSpan(BaseModel): - span_id: str | None = None - parent_span_id: str | None = None - timestamp: float | None = None - op: str | None = None - description: str | None = None - exclusive_time: float | None = None # duration in milliseconds - data: dict[str, Any] | None = None - - class TraceData(BaseModel): trace_id: str project_id: int @@ -40,12 +30,9 @@ class TraceData(BaseModel): spans: list[Span] -class EvidenceTraceData(BaseModel): +class EvidenceTraceData(BaseModel): # hate this name trace_id: str - project_id: int transaction_name: str - total_spans: int - spans: list[EvidenceSpan] class EAPTrace(BaseModel): diff --git a/src/sentry/tasks/llm_issue_detection/detection.py b/src/sentry/tasks/llm_issue_detection/detection.py index 7740cd8fb2e7a7..513b127faaa5f8 100644 --- a/src/sentry/tasks/llm_issue_detection/detection.py +++ b/src/sentry/tasks/llm_issue_detection/detection.py @@ -16,12 +16,12 @@ from sentry.issues.producer import PayloadType, produce_occurrence_to_kafka from sentry.models.project import Project from sentry.net.http import connection_from_url -from sentry.seer.explorer.index_data import get_transactions_for_project from sentry.seer.models import SeerApiError -from sentry.seer.sentry_data_models import EvidenceTraceData from sentry.seer.signed_seer_api import make_signed_seer_api_request from sentry.tasks.base import instrumented_task -from sentry.tasks.llm_issue_detection.trace_data import get_evidence_trace_for_llm_detection +from sentry.tasks.llm_issue_detection.trace_data import ( + get_project_top_transaction_traces_for_llm_detection, +) from sentry.taskworker.namespaces import issues_tasks from sentry.utils import json @@ -30,10 +30,7 @@ SEER_ANALYZE_ISSUE_ENDPOINT_PATH = "/v1/automation/issue-detection/analyze" SEER_TIMEOUT_S = 120 SEER_RETRIES = 1 - -NUM_TRANSACTIONS_TO_PROCESS = 20 -LOWER_SPAN_LIMIT = 20 -UPPER_SPAN_LIMIT = 500 +START_TIME_DELTA_MINUTES = 30 seer_issue_detection_connection_pool = connection_from_url( @@ -45,11 +42,15 @@ class DetectedIssue(BaseModel): + # LLM generated fields explanation: str impact: str evidence: str missing_telemetry: str | None = None title: str + # context fields, not LLM generated + trace_id: str + transaction_name: str class IssueDetectionResponse(BaseModel): @@ -62,13 +63,13 @@ def __init__( message: str, status: int, project_id: int | None = None, - trace_id: str | None = None, + organization_id: int | None = None, response_data: str | None = None, error_message: str | None = None, ): super().__init__(message, status) self.project_id = project_id - self.trace_id = trace_id + self.organization_id = organization_id self.response_data = response_data self.error_message = error_message @@ -99,9 +100,7 @@ def get_base_platform(platform: str | None) -> str | None: def create_issue_occurrence_from_detection( detected_issue: DetectedIssue, - trace: EvidenceTraceData, project_id: int, - transaction_name: str, ) -> None: """ Create and produce an IssueOccurrence from an LLM-detected issue. @@ -110,11 +109,13 @@ def create_issue_occurrence_from_detection( occurrence_id = uuid4().hex detection_time = datetime.now(UTC) project = Project.objects.get_from_cache(id=project_id) + trace_id = detected_issue.trace_id + transaction_name = detected_issue.transaction_name title = detected_issue.title.lower().replace(" ", "-") fingerprint = [f"llm-detected-{title}-{transaction_name}"] evidence_data = { - "trace_id": trace.trace_id, + "trace_id": trace_id, "transaction": transaction_name, "explanation": detected_issue.explanation, "impact": detected_issue.impact, @@ -155,7 +156,7 @@ def create_issue_occurrence_from_detection( "transaction": transaction_name, "contexts": { "trace": { - "trace_id": trace.trace_id, + "trace_id": trace_id, "type": "trace", } }, @@ -206,6 +207,11 @@ def run_llm_issue_detection() -> None: def detect_llm_issues_for_project(project_id: int) -> None: """ Process a single project for LLM issue detection. + + Gets the project's top 50 transaction spans from the last START_TIME_DELTA_MINUTES, sorted by -sum(span.duration). + From the 50 longest transactions, dedupes on normalized transaction_name. + For each deduped transaction, gets first trace_id from the start of time window, which has small random variation. + Sends these trace_ids to seer, which uses get_trace_waterfall to construct an EAPTrace to analyze. """ project = Project.objects.get_from_cache(id=project_id) organization = project.organization @@ -217,99 +223,67 @@ def detect_llm_issues_for_project(project_id: int) -> None: if not has_access: return - transactions = get_transactions_for_project( - project_id, limit=100, start_time_delta={"minutes": 30} + evidence_traces = get_project_top_transaction_traces_for_llm_detection( + project_id, limit=100, start_time_delta_minutes=START_TIME_DELTA_MINUTES ) - if not transactions: + if not evidence_traces: return - # Shuffle transactions to randomize order - random.shuffle(transactions) + # Shuffle to randomize order + random.shuffle(evidence_traces) - processed_count = 0 - for transaction in transactions: - if processed_count >= NUM_TRANSACTIONS_TO_PROCESS: - break + seer_request = { + "telemetry": [{**trace.dict(), "kind": "trace"} for trace in evidence_traces], + "organization_id": organization_id, + "project_id": project_id, + } + response = make_signed_seer_api_request( + connection_pool=seer_issue_detection_connection_pool, + path=SEER_ANALYZE_ISSUE_ENDPOINT_PATH, + body=json.dumps(seer_request).encode("utf-8"), + ) + if response.status < 200 or response.status >= 300: + raise LLMIssueDetectionError( + message="Seer HTTP error", + status=response.status, + project_id=project_id, + organization_id=organization_id, + response_data=response.data.decode("utf-8"), + ) + + try: + raw_response_data = response.json() + response_data = IssueDetectionResponse.parse_obj(raw_response_data) + except (ValueError, TypeError) as e: + raise LLMIssueDetectionError( + message="Seer response parsing error", + status=response.status, + project_id=project_id, + organization_id=organization_id, + response_data=response.data.decode("utf-8"), + error_message=str(e), + ) + + n_found_issues = len(response_data.issues) + logger.info( + "Seer issue detection success", + extra={ + "num_traces": len(evidence_traces), + "num_issues": n_found_issues, + "organization_id": organization_id, + "project_id": project_id, + "titles": ( + [issue.title for issue in response_data.issues] if n_found_issues > 0 else None + ), + }, + ) + for detected_issue in response_data.issues: try: - trace = get_evidence_trace_for_llm_detection(transaction.name, transaction.project_id) - - if ( - not trace - or trace.total_spans < LOWER_SPAN_LIMIT - or trace.total_spans > UPPER_SPAN_LIMIT - ): - continue - - processed_count += 1 - logger.info( - "Found trace for LLM issue detection", - extra={ - "trace_id": trace.trace_id, - "project_id": project_id, - "total_spans": trace.total_spans, - "transaction_name": trace.transaction_name, - }, - ) - - seer_request = { - "telemetry": [{**trace.dict(), "kind": "trace"}], - "organization_id": organization_id, - "project_id": project_id, - } - response = make_signed_seer_api_request( - connection_pool=seer_issue_detection_connection_pool, - path=SEER_ANALYZE_ISSUE_ENDPOINT_PATH, - body=json.dumps(seer_request).encode("utf-8"), - ) - - if response.status < 200 or response.status >= 300: - raise LLMIssueDetectionError( - message="Seer HTTP error", - status=response.status, - project_id=project_id, - trace_id=trace.trace_id, - response_data=response.data.decode("utf-8"), - ) - - try: - raw_response_data = response.json() - response_data = IssueDetectionResponse.parse_obj(raw_response_data) - except (ValueError, TypeError) as e: - raise LLMIssueDetectionError( - message="Seer response parsing error", - status=response.status, - project_id=project_id, - trace_id=trace.trace_id, - response_data=response.data.decode("utf-8"), - error_message=str(e), - ) - - n_found_issues = len(response_data.issues) - logger.info( - "Seer issue detection success", - extra={ - "num_issues": n_found_issues, - "trace_id": trace.trace_id, - "project_id": project_id, - "titles": ( - [issue.title for issue in response_data.issues] - if n_found_issues > 0 - else None - ), - }, + create_issue_occurrence_from_detection( + detected_issue=detected_issue, + project_id=project_id, ) - for detected_issue in response_data.issues: - try: - create_issue_occurrence_from_detection( - detected_issue=detected_issue, - trace=trace, - project_id=project_id, - transaction_name=transaction.name, - ) - - except Exception as e: - sentry_sdk.capture_exception(e) - except LLMIssueDetectionError as e: + except Exception as e: sentry_sdk.capture_exception(e) - continue # if one transaction encounters an error, don't block processing of the others + continue diff --git a/src/sentry/tasks/llm_issue_detection/trace_data.py b/src/sentry/tasks/llm_issue_detection/trace_data.py index bda63fc8e77d9c..1dfa35e03227ac 100644 --- a/src/sentry/tasks/llm_issue_detection/trace_data.py +++ b/src/sentry/tasks/llm_issue_detection/trace_data.py @@ -1,7 +1,3 @@ -""" -Functions for fetching trace data optimized for LLM issue detection. -""" - from __future__ import annotations import logging @@ -12,7 +8,8 @@ from sentry.models.project import Project from sentry.search.eap.types import SearchResolverConfig from sentry.search.events.types import SnubaParams -from sentry.seer.sentry_data_models import EvidenceSpan, EvidenceTraceData +from sentry.seer.explorer.utils import normalize_description +from sentry.seer.sentry_data_models import EvidenceTraceData from sentry.snuba.referrer import Referrer from sentry.snuba.spans_rpc import Spans @@ -22,123 +19,90 @@ UNESCAPED_QUOTE_RE = re.compile('(? EvidenceTraceData | None: +def get_project_top_transaction_traces_for_llm_detection( + project_id: int, + limit: int, + start_time_delta_minutes: int, +) -> list[EvidenceTraceData]: """ - Get trace data with performance metrics for LLM issue detection. - - Args: - transaction_name: The name of the transaction to find traces for - project_id: The ID of the project - - Returns: - EvidenceTraceData with spans including performance metrics, or None if no traces found + Get top transactions by total time spent, return one semi-randomly chosen trace per transaction. """ try: project = Project.objects.get(id=project_id) except Project.DoesNotExist: - logger.exception( - "Project does not exist; cannot fetch traces for LLM detection", - extra={"project_id": project_id, "transaction_name": transaction_name}, - ) - return None + logger.exception("Project does not exist", extra={"project_id": project_id}) + return [] random_offset = random.randint(1, 8) end_time = datetime.now(UTC) - start_time = end_time - timedelta(minutes=30 - random_offset) + start_time = end_time - timedelta(minutes=start_time_delta_minutes) + # use for both queries to ensure they are searching the same time window snuba_params = SnubaParams( start=start_time, end=end_time, projects=[project], organization=project.organization, ) - config = SearchResolverConfig( - auto_fields=True, - ) + config = SearchResolverConfig(auto_fields=True) - escaped_transaction_name = UNESCAPED_QUOTE_RE.sub('\\"', transaction_name) - traces_result = Spans.run_table_query( + # Step 1: Get top transactions by total time in time window + transactions_result = Spans.run_table_query( params=snuba_params, - query_string=f'transaction:"{escaped_transaction_name}" project.id:{project_id}', + query_string="is_transaction:true", selected_columns=[ - "trace", - "precise.start_ts", + "transaction", + "sum(span.duration)", ], - orderby=["precise.start_ts"], + orderby=["-sum(span.duration)"], offset=0, - limit=1, + limit=limit, referrer=Referrer.SEER_RPC, config=config, sampling_mode="NORMAL", ) - trace_id = None - for row in traces_result.get("data", []): - trace_id = row.get("trace") - if trace_id: - break - - if not trace_id: - logger.info( - "No traces found for transaction (LLM detection)", - extra={"transaction_name": transaction_name, "project_id": project_id}, + evidence_traces = [] + seen_names = set() + + for row in transactions_result.get("data", []): + transaction_name = row.get("transaction") + if not transaction_name: + continue + + normalized_name = normalize_description(transaction_name) + if normalized_name in seen_names: + continue + + # Step 2: Get ONE trace for this transaction from THE SAME time window + escaped_transaction_name = UNESCAPED_QUOTE_RE.sub('\\"', transaction_name) + trace_result = Spans.run_table_query( + params=snuba_params, + query_string=f'is_transaction:true transaction:"{escaped_transaction_name}"', + selected_columns=["trace", "precise.start_ts"], + orderby=["precise.start_ts"], # First trace in the window + offset=0, + limit=1, + referrer=Referrer.SEER_RPC, + config=config, + sampling_mode="NORMAL", ) - return None - spans_result = Spans.run_table_query( - params=snuba_params, - query_string=f"trace:{trace_id}", - selected_columns=[ - "span_id", - "parent_span", - "span.op", - "span.description", - "precise.start_ts", - "span.self_time", - "span.duration", - "span.status", - ], - orderby=["precise.start_ts"], - offset=0, - limit=1000, - referrer=Referrer.SEER_RPC, - config=config, - sampling_mode="NORMAL", - ) + # Get the first (and only) result + data = trace_result.get("data", []) + if not data: + continue + + trace_id = data[0].get("trace") + if not trace_id: + continue - evidence_spans: list[EvidenceSpan] = [] - for row in spans_result.get("data", []): - span_id = row.get("span_id") - parent_span_id = row.get("parent_span") - span_op = row.get("span.op") - span_description = row.get("span.description") - span_exclusive_time = row.get("span.self_time") - span_duration = row.get("span.duration") - span_status = row.get("span.status") - span_timestamp = row.get("precise.start_ts") - - if span_id: - evidence_spans.append( - EvidenceSpan( - span_id=span_id, - parent_span_id=parent_span_id, - op=span_op, - description=span_description or "", - exclusive_time=span_exclusive_time, - timestamp=span_timestamp, - data={ - "duration": span_duration, - "status": span_status, - }, - ) + evidence_traces.append( + EvidenceTraceData( + trace_id=trace_id, + transaction_name=normalized_name, ) + ) + seen_names.add(normalized_name) - return EvidenceTraceData( - trace_id=trace_id, - project_id=project_id, - transaction_name=transaction_name, - total_spans=len(evidence_spans), - spans=evidence_spans, - ) + return evidence_traces diff --git a/tests/sentry/tasks/test_llm_issue_detection.py b/tests/sentry/tasks/test_llm_issue_detection.py index 226791857a631e..3c9e60f4882ede 100644 --- a/tests/sentry/tasks/test_llm_issue_detection.py +++ b/tests/sentry/tasks/test_llm_issue_detection.py @@ -3,14 +3,16 @@ from unittest.mock import Mock, patch from sentry.issues.grouptype import LLMDetectedExperimentalGroupType -from sentry.seer.sentry_data_models import EvidenceSpan, EvidenceTraceData from sentry.tasks.llm_issue_detection import ( DetectedIssue, create_issue_occurrence_from_detection, detect_llm_issues_for_project, run_llm_issue_detection, ) -from sentry.tasks.llm_issue_detection.trace_data import get_evidence_trace_for_llm_detection +from sentry.tasks.llm_issue_detection.detection import START_TIME_DELTA_MINUTES +from sentry.tasks.llm_issue_detection.trace_data import ( + get_project_top_transaction_traces_for_llm_detection, +) from sentry.testutils.cases import APITransactionTestCase, SnubaTestCase, SpanTestCase, TestCase from sentry.testutils.helpers.datetime import before_now from sentry.testutils.helpers.features import with_feature @@ -35,33 +37,39 @@ def test_run_detection_dispatches_sub_tasks(self, mock_delay): assert mock_delay.call_args[0][0] == project.id @with_feature("organizations:gen-ai-features") - @patch("sentry.tasks.llm_issue_detection.detection.get_transactions_for_project") - def test_detect_llm_issues_no_transactions(self, mock_get_transactions): + @patch("sentry.tasks.llm_issue_detection.detection.make_signed_seer_api_request") + @patch( + "sentry.tasks.llm_issue_detection.detection.get_project_top_transaction_traces_for_llm_detection" + ) + def test_detect_llm_issues_no_transactions(self, mock_get_transactions, mock_seer_request): """Test that the task returns early when there are no transactions.""" mock_get_transactions.return_value = [] detect_llm_issues_for_project(self.project.id) mock_get_transactions.assert_called_once_with( - self.project.id, limit=100, start_time_delta={"minutes": 30} + self.project.id, limit=100, start_time_delta_minutes=START_TIME_DELTA_MINUTES ) + mock_seer_request.assert_not_called() @with_feature("organizations:gen-ai-features") - @patch("sentry.tasks.llm_issue_detection.detection.get_evidence_trace_for_llm_detection") - @patch("sentry.tasks.llm_issue_detection.detection.get_transactions_for_project") - @patch("sentry.tasks.llm_issue_detection.detection.random.shuffle") - def test_detect_llm_issues_no_traces(self, mock_shuffle, mock_get_transactions, mock_get_trace): - """Test that the task continues gracefully when traces can't be fetched.""" - mock_transaction = Mock() - mock_transaction.name = "test_tx" - mock_transaction.project_id = self.project.id - mock_get_transactions.return_value = [mock_transaction] - mock_shuffle.return_value = None # shuffle modifies in place - mock_get_trace.return_value = None + @patch("sentry.tasks.llm_issue_detection.trace_data.Spans.run_table_query") + @patch("sentry.tasks.llm_issue_detection.detection.make_signed_seer_api_request") + def test_detect_llm_issues_no_traces(self, mock_seer_request, mock_spans_query): + """Test that the task returns early when traces can't be fetched for top transactions.""" + mock_spans_query.side_effect = [ + # First call: Return a transaction + { + "data": [{"transaction": "transaction_name", "sum(span.duration)": 1000}], + "meta": {}, + }, + # Second call (trace query): return empty + {"data": [], "meta": {}}, + ] detect_llm_issues_for_project(self.project.id) - mock_get_trace.assert_called_once_with(mock_transaction.name, mock_transaction.project_id) + mock_seer_request.assert_not_called() @patch("sentry.tasks.llm_issue_detection.detection.produce_occurrence_to_kafka") def test_create_issue_occurrence_from_detection(self, mock_produce_occurrence): @@ -71,16 +79,13 @@ def test_create_issue_occurrence_from_detection(self, mock_produce_occurrence): impact="High - may cause request failures", evidence="Connection pool at 95% capacity", missing_telemetry="Database connection metrics", + trace_id="abc123xyz", + transaction_name="test_transaction", ) - mock_trace = Mock() - mock_trace.trace_id = "abc123xyz" - create_issue_occurrence_from_detection( detected_issue=detected_issue, - trace=mock_trace, project_id=self.project.id, - transaction_name="test_transaction", ) assert mock_produce_occurrence.called @@ -137,16 +142,13 @@ def test_create_issue_occurrence_without_missing_telemetry(self, mock_produce_oc explanation="API calls taking too long", impact="Medium", evidence="Response time > 2s", + trace_id="xyz789", + transaction_name="api_endpoint", ) - mock_trace = Mock() - mock_trace.trace_id = "xyz789" - create_issue_occurrence_from_detection( detected_issue=detected_issue, - trace=mock_trace, project_id=self.project.id, - transaction_name="api_endpoint", ) occurrence = mock_produce_occurrence.call_args.kwargs["occurrence"] @@ -159,44 +161,42 @@ def test_create_issue_occurrence_without_missing_telemetry(self, mock_produce_oc @with_feature("organizations:gen-ai-features") @patch("sentry.tasks.llm_issue_detection.detection.produce_occurrence_to_kafka") @patch("sentry.tasks.llm_issue_detection.detection.make_signed_seer_api_request") - @patch("sentry.tasks.llm_issue_detection.detection.get_evidence_trace_for_llm_detection") - @patch("sentry.tasks.llm_issue_detection.detection.get_transactions_for_project") + @patch("sentry.tasks.llm_issue_detection.trace_data.Spans.run_table_query") @patch("sentry.tasks.llm_issue_detection.detection.random.shuffle") def test_detect_llm_issues_full_flow( self, mock_shuffle, - mock_get_transactions, - mock_get_trace, + mock_spans_query, mock_seer_request, mock_produce_occurrence, ): """Test the full detect_llm_issues_for_project flow with Seer API interaction.""" - mock_transaction = Mock() - mock_transaction.name = "api/users/list" - mock_transaction.project_id = self.project.id - mock_get_transactions.return_value = [mock_transaction] - mock_shuffle.return_value = None - - mock_span = EvidenceSpan( - span_id="span123", - parent_span_id=None, - op="db.query", - description="SELECT * FROM users", - exclusive_time=150.5, - data={ - "duration": 200.0, - "status": "ok", - }, - ) + mock_shuffle.return_value = None # shuffles in-place, mock to block from changing order - mock_trace = EvidenceTraceData( - trace_id="trace-abc-123", - project_id=self.project.id, - transaction_name="api/users/list", - total_spans=100, - spans=[mock_span], - ) - mock_get_trace.return_value = mock_trace + mock_spans_query.side_effect = [ + # First call: transaction spans + { + "data": [ + {"transaction": "POST /some/thing", "sum(span.duration)": 1007}, + {"transaction": "GET /another/", "sum(span.duration)": 1003}, + ], + "meta": {}, + }, + # Second call: trace for transaction 1 + { + "data": [ + {"trace": "trace_id_1", "precise.start_ts": 1234}, + ], + "meta": {}, + }, + # Third call: trace for transaction 2 + { + "data": [ + {"trace": "trace_id_2", "precise.start_ts": 1234}, + ], + "meta": {}, + }, + ] seer_response_data = { "issues": [ @@ -206,6 +206,8 @@ def test_detect_llm_issues_full_flow( "impact": "High - causes performance degradation", "evidence": "15 queries executed sequentially", "missing_telemetry": "Database query attribution", + "trace_id": "trace_id_1", + "transaction_name": "POST /some/thing", }, { "title": "Memory Leak Risk", @@ -213,6 +215,8 @@ def test_detect_llm_issues_full_flow( "impact": "Medium - may cause OOM", "evidence": "Objects not released after use", "missing_telemetry": None, + "trace_id": "trace_id_2", + "transaction_name": "GET /another/", }, ] } @@ -224,6 +228,7 @@ def test_detect_llm_issues_full_flow( detect_llm_issues_for_project(self.project.id) + assert mock_spans_query.call_count == 3 # 1 for transactions, 2 for traces assert mock_seer_request.called seer_call_kwargs = mock_seer_request.call_args.kwargs assert seer_call_kwargs["path"] == "/v1/automation/issue-detection/analyze" @@ -231,16 +236,20 @@ def test_detect_llm_issues_full_flow( request_body = json.loads(seer_call_kwargs["body"].decode("utf-8")) assert request_body["project_id"] == self.project.id assert request_body["organization_id"] == self.project.organization_id - assert len(request_body["telemetry"]) == 1 + assert len(request_body["telemetry"]) == 2 assert request_body["telemetry"][0]["kind"] == "trace" - assert request_body["telemetry"][0]["trace_id"] == "trace-abc-123" + assert request_body["telemetry"][0]["trace_id"] == "trace_id_1" + assert request_body["telemetry"][0]["transaction_name"] == "POST /some/thing" + assert request_body["telemetry"][1]["kind"] == "trace" + assert request_body["telemetry"][1]["trace_id"] == "trace_id_2" + assert request_body["telemetry"][1]["transaction_name"] == "GET /another/" assert mock_produce_occurrence.call_count == 2 first_occurrence = mock_produce_occurrence.call_args_list[0].kwargs["occurrence"] assert first_occurrence.type == LLMDetectedExperimentalGroupType assert first_occurrence.issue_title == "N+1 Query Detected" - assert first_occurrence.culprit == "api/users/list" + assert first_occurrence.culprit == "POST /some/thing" assert first_occurrence.project_id == self.project.id assert len(first_occurrence.evidence_display) == 3 @@ -249,73 +258,65 @@ def test_detect_llm_issues_full_flow( assert len(second_occurrence.evidence_display) == 3 -class TestGetEvidenceTraceForLLMDetection(APITransactionTestCase, SnubaTestCase, SpanTestCase): +class TestGetProjectTopTransactionTracesForLLMDetection( + APITransactionTestCase, SnubaTestCase, SpanTestCase +): def setUp(self) -> None: super().setUp() self.ten_mins_ago = before_now(minutes=10) - def test_get_evidence_trace_for_llm_detection(self) -> None: - transaction_name = "api/users/profile" + def test_returns_deduped_transaction_traces(self) -> None: + trace_id_1 = uuid.uuid4().hex + span1 = self.create_span( + { + "description": "GET /api/users/123456", # will dedupe + "sentry_tags": {"transaction": "GET /api/users/123456"}, + "trace_id": trace_id_1, + "is_segment": True, + "exclusive_time_ms": 100, + "duration_ms": 100, + }, + start_ts=self.ten_mins_ago, + ) - # Create multiple traces with different span counts - traces_data = [ - (5, "trace-medium", 0), - (2, "trace-small", 10), - (8, "trace-large", 20), - ] + trace_id_2 = uuid.uuid4().hex + span2 = self.create_span( + { + "description": "GET /api/users/789012", # will dedupe + "sentry_tags": {"transaction": "GET /api/users/789012"}, + "trace_id": trace_id_2, + "is_segment": True, + "exclusive_time_ms": 200, + "duration_ms": 200, # will return before span1 in transaction query + }, + start_ts=self.ten_mins_ago + timedelta(seconds=1), + ) + + trace_id_3 = uuid.uuid4().hex + span3 = self.create_span( + { + "description": "POST /api/orders", + "sentry_tags": {"transaction": "POST /api/orders"}, + "trace_id": trace_id_3, + "is_segment": True, + "exclusive_time_ms": 150, + "duration_ms": 150, + }, + start_ts=self.ten_mins_ago + timedelta(seconds=2), + ) + + self.store_spans([span1, span2, span3], is_eap=True) + + evidence_traces = get_project_top_transaction_traces_for_llm_detection( + self.project.id, limit=50, start_time_delta_minutes=30 + ) + + assert len(evidence_traces) == 2 + + assert ( + evidence_traces[0].trace_id == trace_id_2 + ) # prevails over trace_id_1 because transaction span duration was higher + assert evidence_traces[0].transaction_name == "GET /api/users/" - spans = [] - trace_ids = [] - expected_trace_id = None - - for span_count, trace_suffix, start_offset_minutes in traces_data: - trace_id = uuid.uuid4().hex - trace_ids.append(trace_id) - if trace_suffix == "trace-medium": - expected_trace_id = trace_id - - for i in range(span_count): - span = self.create_span( - { - "description": f"span-{i}-{trace_suffix}", - "sentry_tags": {"transaction": transaction_name}, - "trace_id": trace_id, - "parent_span_id": None if i == 0 else f"parent-{i-1}", - "is_segment": i == 0, - }, - start_ts=self.ten_mins_ago + timedelta(minutes=start_offset_minutes + i), - ) - spans.append(span) - - self.store_spans(spans, is_eap=True) - - # Call the LLM detection function - result = get_evidence_trace_for_llm_detection(transaction_name, self.project.id) - - # Verify basic structure - assert result is not None - assert result.transaction_name == transaction_name - assert result.project_id == self.project.id - assert result.trace_id in trace_ids - assert result.trace_id == expected_trace_id - assert result.total_spans == 5 - assert len(result.spans) == 5 - - # Verify it's EvidenceTraceData with EvidenceSpan objects - assert isinstance(result, EvidenceTraceData) - for result_span in result.spans: - assert isinstance(result_span, EvidenceSpan) - assert result_span.span_id is not None - assert result_span.description is not None - assert result_span.description.startswith("span-") - assert "trace-medium" in result_span.description - assert hasattr(result_span, "op") - assert hasattr(result_span, "exclusive_time") - assert hasattr(result_span, "data") - assert result_span.data is not None - assert "duration" in result_span.data - assert "status" in result_span.data - - # Verify parent-child relationships are preserved - root_spans = [s for s in result.spans if s.parent_span_id is None] - assert len(root_spans) == 1 + assert evidence_traces[1].trace_id == trace_id_3 + assert evidence_traces[1].transaction_name == "POST /api/orders" From 7c869491c817e2df93a01727bd07fc49af3a6538 Mon Sep 17 00:00:00 2001 From: Nora Shapiro Date: Fri, 5 Dec 2025 14:25:05 -0800 Subject: [PATCH 2/4] increase limit, introduce randomness --- src/sentry/seer/sentry_data_models.py | 2 +- .../tasks/llm_issue_detection/detection.py | 7 +++-- .../tasks/llm_issue_detection/trace_data.py | 30 +++++++++++-------- .../sentry/tasks/test_llm_issue_detection.py | 11 +++++-- 4 files changed, 30 insertions(+), 20 deletions(-) diff --git a/src/sentry/seer/sentry_data_models.py b/src/sentry/seer/sentry_data_models.py index d7336f9cb1252d..b1216664ae7163 100644 --- a/src/sentry/seer/sentry_data_models.py +++ b/src/sentry/seer/sentry_data_models.py @@ -30,7 +30,7 @@ class TraceData(BaseModel): spans: list[Span] -class EvidenceTraceData(BaseModel): # hate this name +class EvidenceTraceData(BaseModel): trace_id: str transaction_name: str diff --git a/src/sentry/tasks/llm_issue_detection/detection.py b/src/sentry/tasks/llm_issue_detection/detection.py index 513b127faaa5f8..0f7f3a10ff522f 100644 --- a/src/sentry/tasks/llm_issue_detection/detection.py +++ b/src/sentry/tasks/llm_issue_detection/detection.py @@ -31,6 +31,7 @@ SEER_TIMEOUT_S = 120 SEER_RETRIES = 1 START_TIME_DELTA_MINUTES = 30 +TRANSACTION_BATCH_SIZE = 100 seer_issue_detection_connection_pool = connection_from_url( @@ -208,8 +209,8 @@ def detect_llm_issues_for_project(project_id: int) -> None: """ Process a single project for LLM issue detection. - Gets the project's top 50 transaction spans from the last START_TIME_DELTA_MINUTES, sorted by -sum(span.duration). - From the 50 longest transactions, dedupes on normalized transaction_name. + Gets the project's top TRANSACTION_BATCH_SIZE transaction spans from the last START_TIME_DELTA_MINUTES, sorted by -sum(span.duration). + From those transactions, dedupes on normalized transaction_name. For each deduped transaction, gets first trace_id from the start of time window, which has small random variation. Sends these trace_ids to seer, which uses get_trace_waterfall to construct an EAPTrace to analyze. """ @@ -224,7 +225,7 @@ def detect_llm_issues_for_project(project_id: int) -> None: return evidence_traces = get_project_top_transaction_traces_for_llm_detection( - project_id, limit=100, start_time_delta_minutes=START_TIME_DELTA_MINUTES + project_id, limit=TRANSACTION_BATCH_SIZE, start_time_delta_minutes=START_TIME_DELTA_MINUTES ) if not evidence_traces: return diff --git a/src/sentry/tasks/llm_issue_detection/trace_data.py b/src/sentry/tasks/llm_issue_detection/trace_data.py index 1dfa35e03227ac..c670a5b0890126 100644 --- a/src/sentry/tasks/llm_issue_detection/trace_data.py +++ b/src/sentry/tasks/llm_issue_detection/trace_data.py @@ -33,22 +33,27 @@ def get_project_top_transaction_traces_for_llm_detection( logger.exception("Project does not exist", extra={"project_id": project_id}) return [] - random_offset = random.randint(1, 8) end_time = datetime.now(UTC) start_time = end_time - timedelta(minutes=start_time_delta_minutes) - - # use for both queries to ensure they are searching the same time window - snuba_params = SnubaParams( - start=start_time, - end=end_time, - projects=[project], - organization=project.organization, - ) config = SearchResolverConfig(auto_fields=True) - # Step 1: Get top transactions by total time in time window + def _build_snuba_params(start: datetime) -> SnubaParams: + """ + Both queries have different start times and the same end time. + """ + return SnubaParams( + start=start, + end=end_time, + projects=[project], + organization=project.organization, + ) + + transaction_snuba_params = _build_snuba_params(start_time) + random_offset = random.randint(1, 8) + trace_snuba_params = _build_snuba_params(start_time + timedelta(minutes=random_offset)) + transactions_result = Spans.run_table_query( - params=snuba_params, + params=transaction_snuba_params, query_string="is_transaction:true", selected_columns=[ "transaction", @@ -74,10 +79,9 @@ def get_project_top_transaction_traces_for_llm_detection( if normalized_name in seen_names: continue - # Step 2: Get ONE trace for this transaction from THE SAME time window escaped_transaction_name = UNESCAPED_QUOTE_RE.sub('\\"', transaction_name) trace_result = Spans.run_table_query( - params=snuba_params, + params=trace_snuba_params, query_string=f'is_transaction:true transaction:"{escaped_transaction_name}"', selected_columns=["trace", "precise.start_ts"], orderby=["precise.start_ts"], # First trace in the window diff --git a/tests/sentry/tasks/test_llm_issue_detection.py b/tests/sentry/tasks/test_llm_issue_detection.py index 3c9e60f4882ede..0bd03129e8f34c 100644 --- a/tests/sentry/tasks/test_llm_issue_detection.py +++ b/tests/sentry/tasks/test_llm_issue_detection.py @@ -9,7 +9,10 @@ detect_llm_issues_for_project, run_llm_issue_detection, ) -from sentry.tasks.llm_issue_detection.detection import START_TIME_DELTA_MINUTES +from sentry.tasks.llm_issue_detection.detection import ( + START_TIME_DELTA_MINUTES, + TRANSACTION_BATCH_SIZE, +) from sentry.tasks.llm_issue_detection.trace_data import ( get_project_top_transaction_traces_for_llm_detection, ) @@ -48,7 +51,9 @@ def test_detect_llm_issues_no_transactions(self, mock_get_transactions, mock_see detect_llm_issues_for_project(self.project.id) mock_get_transactions.assert_called_once_with( - self.project.id, limit=100, start_time_delta_minutes=START_TIME_DELTA_MINUTES + self.project.id, + limit=TRANSACTION_BATCH_SIZE, + start_time_delta_minutes=START_TIME_DELTA_MINUTES, ) mock_seer_request.assert_not_called() @@ -308,7 +313,7 @@ def test_returns_deduped_transaction_traces(self) -> None: self.store_spans([span1, span2, span3], is_eap=True) evidence_traces = get_project_top_transaction_traces_for_llm_detection( - self.project.id, limit=50, start_time_delta_minutes=30 + self.project.id, limit=TRANSACTION_BATCH_SIZE, start_time_delta_minutes=30 ) assert len(evidence_traces) == 2 From ae4fc8a97529b6478ca1d7305d436556e0b9cef9 Mon Sep 17 00:00:00 2001 From: Nora Shapiro Date: Mon, 8 Dec 2025 16:18:43 -0800 Subject: [PATCH 3/4] update naming --- src/sentry/seer/sentry_data_models.py | 2 +- .../tasks/llm_issue_detection/detection.py | 18 +++++++++++++----- .../tasks/llm_issue_detection/trace_data.py | 12 ++++++------ tests/sentry/tasks/test_llm_issue_detection.py | 12 +++++------- 4 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/sentry/seer/sentry_data_models.py b/src/sentry/seer/sentry_data_models.py index b1216664ae7163..f341fb55c27a99 100644 --- a/src/sentry/seer/sentry_data_models.py +++ b/src/sentry/seer/sentry_data_models.py @@ -30,7 +30,7 @@ class TraceData(BaseModel): spans: list[Span] -class EvidenceTraceData(BaseModel): +class TraceMetadata(BaseModel): trace_id: str transaction_name: str diff --git a/src/sentry/tasks/llm_issue_detection/detection.py b/src/sentry/tasks/llm_issue_detection/detection.py index 0f7f3a10ff522f..4fd66bf9fec1b6 100644 --- a/src/sentry/tasks/llm_issue_detection/detection.py +++ b/src/sentry/tasks/llm_issue_detection/detection.py @@ -17,6 +17,7 @@ from sentry.models.project import Project from sentry.net.http import connection_from_url from sentry.seer.models import SeerApiError +from sentry.seer.sentry_data_models import TraceMetadata from sentry.seer.signed_seer_api import make_signed_seer_api_request from sentry.tasks.base import instrumented_task from sentry.tasks.llm_issue_detection.trace_data import ( @@ -58,6 +59,12 @@ class IssueDetectionResponse(BaseModel): issues: list[DetectedIssue] +class IssueDetectionRequest(BaseModel): + traces: list[TraceMetadata] + organization_id: int + project_id: int + + class LLMIssueDetectionError(SeerApiError): def __init__( self, @@ -233,11 +240,12 @@ def detect_llm_issues_for_project(project_id: int) -> None: # Shuffle to randomize order random.shuffle(evidence_traces) - seer_request = { - "telemetry": [{**trace.dict(), "kind": "trace"} for trace in evidence_traces], - "organization_id": organization_id, - "project_id": project_id, - } + seer_request = IssueDetectionRequest( + traces=evidence_traces, + organization_id=organization_id, + project_id=project_id, + ) + response = make_signed_seer_api_request( connection_pool=seer_issue_detection_connection_pool, path=SEER_ANALYZE_ISSUE_ENDPOINT_PATH, diff --git a/src/sentry/tasks/llm_issue_detection/trace_data.py b/src/sentry/tasks/llm_issue_detection/trace_data.py index c670a5b0890126..bccb5e9eef1405 100644 --- a/src/sentry/tasks/llm_issue_detection/trace_data.py +++ b/src/sentry/tasks/llm_issue_detection/trace_data.py @@ -9,7 +9,7 @@ from sentry.search.eap.types import SearchResolverConfig from sentry.search.events.types import SnubaParams from sentry.seer.explorer.utils import normalize_description -from sentry.seer.sentry_data_models import EvidenceTraceData +from sentry.seer.sentry_data_models import TraceMetadata from sentry.snuba.referrer import Referrer from sentry.snuba.spans_rpc import Spans @@ -23,7 +23,7 @@ def get_project_top_transaction_traces_for_llm_detection( project_id: int, limit: int, start_time_delta_minutes: int, -) -> list[EvidenceTraceData]: +) -> list[TraceMetadata]: """ Get top transactions by total time spent, return one semi-randomly chosen trace per transaction. """ @@ -67,7 +67,7 @@ def _build_snuba_params(start: datetime) -> SnubaParams: sampling_mode="NORMAL", ) - evidence_traces = [] + trace_metadata = [] seen_names = set() for row in transactions_result.get("data", []): @@ -101,12 +101,12 @@ def _build_snuba_params(start: datetime) -> SnubaParams: if not trace_id: continue - evidence_traces.append( - EvidenceTraceData( + trace_metadata.append( + TraceMetadata( trace_id=trace_id, transaction_name=normalized_name, ) ) seen_names.add(normalized_name) - return evidence_traces + return trace_metadata diff --git a/tests/sentry/tasks/test_llm_issue_detection.py b/tests/sentry/tasks/test_llm_issue_detection.py index 0bd03129e8f34c..47f7dfd3a35d9d 100644 --- a/tests/sentry/tasks/test_llm_issue_detection.py +++ b/tests/sentry/tasks/test_llm_issue_detection.py @@ -241,13 +241,11 @@ def test_detect_llm_issues_full_flow( request_body = json.loads(seer_call_kwargs["body"].decode("utf-8")) assert request_body["project_id"] == self.project.id assert request_body["organization_id"] == self.project.organization_id - assert len(request_body["telemetry"]) == 2 - assert request_body["telemetry"][0]["kind"] == "trace" - assert request_body["telemetry"][0]["trace_id"] == "trace_id_1" - assert request_body["telemetry"][0]["transaction_name"] == "POST /some/thing" - assert request_body["telemetry"][1]["kind"] == "trace" - assert request_body["telemetry"][1]["trace_id"] == "trace_id_2" - assert request_body["telemetry"][1]["transaction_name"] == "GET /another/" + assert len(request_body["traces"]) == 2 + assert request_body["traces"][0]["trace_id"] == "trace_id_1" + assert request_body["traces"][0]["transaction_name"] == "POST /some/thing" + assert request_body["traces"][1]["trace_id"] == "trace_id_2" + assert request_body["traces"][1]["transaction_name"] == "GET /another/" assert mock_produce_occurrence.call_count == 2 From ffc52e9a27ba810fc4189a879f93ab8cbd2baca6 Mon Sep 17 00:00:00 2001 From: Nora Shapiro Date: Tue, 9 Dec 2025 10:54:48 -0800 Subject: [PATCH 4/4] fix exception handling --- src/sentry/tasks/llm_issue_detection/detection.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sentry/tasks/llm_issue_detection/detection.py b/src/sentry/tasks/llm_issue_detection/detection.py index 4fd66bf9fec1b6..9785ddad9fd7d8 100644 --- a/src/sentry/tasks/llm_issue_detection/detection.py +++ b/src/sentry/tasks/llm_issue_detection/detection.py @@ -7,7 +7,7 @@ import sentry_sdk from django.conf import settings -from pydantic import BaseModel +from pydantic import BaseModel, ValidationError from sentry import features, options from sentry.constants import VALID_PLATFORMS @@ -249,7 +249,7 @@ def detect_llm_issues_for_project(project_id: int) -> None: response = make_signed_seer_api_request( connection_pool=seer_issue_detection_connection_pool, path=SEER_ANALYZE_ISSUE_ENDPOINT_PATH, - body=json.dumps(seer_request).encode("utf-8"), + body=json.dumps(seer_request.dict()).encode("utf-8"), ) if response.status < 200 or response.status >= 300: @@ -264,7 +264,7 @@ def detect_llm_issues_for_project(project_id: int) -> None: try: raw_response_data = response.json() response_data = IssueDetectionResponse.parse_obj(raw_response_data) - except (ValueError, TypeError) as e: + except (ValueError, TypeError, ValidationError) as e: raise LLMIssueDetectionError( message="Seer response parsing error", status=response.status,