From 19a72911223c44543fe5b5bf5c9cb44118a45848 Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Tue, 7 Oct 2025 10:57:04 -0700 Subject: [PATCH 01/34] Add response token count logic to Gemini instrumentation. (#1486) * Add response token count logic to Gemini instrumentation. * Update token counting util functions. * Linting * Add response token count logic to Gemini instrumentation. * Update token counting util functions. * [MegaLinter] Apply linters fixes * Bump tests. --------- Co-authored-by: Tim Pansino --- newrelic/hooks/mlmodel_gemini.py | 152 ++++++++++++------ tests/mlmodel_gemini/test_embeddings.py | 6 +- tests/mlmodel_gemini/test_embeddings_error.py | 62 +------ tests/mlmodel_gemini/test_text_generation.py | 12 +- .../test_text_generation_error.py | 81 +--------- tests/testing_support/ml_testing_utils.py | 19 +++ 6 files changed, 139 insertions(+), 193 deletions(-) diff --git a/newrelic/hooks/mlmodel_gemini.py b/newrelic/hooks/mlmodel_gemini.py index 8aeb1355d0..6f61c11125 100644 --- a/newrelic/hooks/mlmodel_gemini.py +++ b/newrelic/hooks/mlmodel_gemini.py @@ -175,20 +175,24 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg embedding_content = str(embedding_content) request_model = kwargs.get("model") + embedding_token_count = ( + settings.ai_monitoring.llm_token_count_callback(request_model, embedding_content) + if settings.ai_monitoring.llm_token_count_callback + else None + ) + full_embedding_response_dict = { "id": embedding_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(request_model, embedding_content) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "request.model": request_model, "duration": ft.duration * 1000, "vendor": "gemini", "ingest_source": "Python", } + if embedding_token_count: + full_embedding_response_dict["response.usage.total_tokens"] = embedding_token_count + if settings.ai_monitoring.record_content.enabled: full_embedding_response_dict["input"] = embedding_content @@ -300,15 +304,13 @@ def _record_generation_error(transaction, linking_metadata, completion_id, kwarg "Unable to parse input message to Gemini LLM. Message content and role will be omitted from " "corresponding LlmChatCompletionMessage event. " ) + # Extract the input message content and role from the input message if it exists + input_message_content, input_role = _parse_input_message(input_message) if input_message else (None, None) - generation_config = kwargs.get("config") - if generation_config: - request_temperature = getattr(generation_config, "temperature", None) - request_max_tokens = getattr(generation_config, "max_output_tokens", None) - else: - request_temperature = None - request_max_tokens = None + # Extract data from generation config object + request_temperature, request_max_tokens = _extract_generation_config(kwargs) + # Prepare error attributes notice_error_attributes = { "http.statusCode": getattr(exc, "code", None), "error.message": getattr(exc, "message", None), @@ -348,15 +350,17 @@ def _record_generation_error(transaction, linking_metadata, completion_id, kwarg create_chat_completion_message_event( transaction, - input_message, + input_message_content, + input_role, completion_id, span_id, trace_id, # Passing the request model as the response model here since we do not have access to a response model request_model, - request_model, llm_metadata, output_message_list, + # We do not record token counts in error cases, so set all_token_counts to True so the pipeline tokenizer does not run + all_token_counts=True, ) except Exception: _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, exc_info=True) @@ -377,6 +381,7 @@ def _handle_generation_success(transaction, linking_metadata, completion_id, kwa def _record_generation_success(transaction, linking_metadata, completion_id, kwargs, ft, response): + settings = transaction.settings or global_settings() span_id = linking_metadata.get("span.id") trace_id = linking_metadata.get("trace.id") try: @@ -385,12 +390,14 @@ def _record_generation_success(transaction, linking_metadata, completion_id, kwa # finish_reason is an enum, so grab just the stringified value from it to report finish_reason = response.get("candidates")[0].get("finish_reason").value output_message_list = [response.get("candidates")[0].get("content")] + token_usage = response.get("usage_metadata") or {} else: # Set all values to NoneTypes since we cannot access them through kwargs or another method that doesn't # require the response object response_model = None output_message_list = [] finish_reason = None + token_usage = {} request_model = kwargs.get("model") @@ -412,13 +419,44 @@ def _record_generation_success(transaction, linking_metadata, completion_id, kwa "corresponding LlmChatCompletionMessage event. " ) - generation_config = kwargs.get("config") - if generation_config: - request_temperature = getattr(generation_config, "temperature", None) - request_max_tokens = getattr(generation_config, "max_output_tokens", None) + input_message_content, input_role = _parse_input_message(input_message) if input_message else (None, None) + + # Parse output message content + # This list should have a length of 1 to represent the output message + # Parse the message text out to pass to any registered token counting callback + output_message_content = output_message_list[0].get("parts")[0].get("text") if output_message_list else None + + # Extract token counts from response object + if token_usage: + response_prompt_tokens = token_usage.get("prompt_token_count") + response_completion_tokens = token_usage.get("candidates_token_count") + response_total_tokens = token_usage.get("total_token_count") + else: - request_temperature = None - request_max_tokens = None + response_prompt_tokens = None + response_completion_tokens = None + response_total_tokens = None + + # Calculate token counts by checking if a callback is registered and if we have the necessary content to pass + # to it. If not, then we use the token counts provided in the response object + prompt_tokens = ( + settings.ai_monitoring.llm_token_count_callback(request_model, input_message_content) + if settings.ai_monitoring.llm_token_count_callback and input_message_content + else response_prompt_tokens + ) + completion_tokens = ( + settings.ai_monitoring.llm_token_count_callback(response_model, output_message_content) + if settings.ai_monitoring.llm_token_count_callback and output_message_content + else response_completion_tokens + ) + total_tokens = ( + prompt_tokens + completion_tokens if all([prompt_tokens, completion_tokens]) else response_total_tokens + ) + + all_token_counts = bool(prompt_tokens and completion_tokens and total_tokens) + + # Extract generation config + request_temperature, request_max_tokens = _extract_generation_config(kwargs) full_chat_completion_summary_dict = { "id": completion_id, @@ -438,66 +476,78 @@ def _record_generation_success(transaction, linking_metadata, completion_id, kwa "response.number_of_messages": 1 + len(output_message_list), } + if all_token_counts: + full_chat_completion_summary_dict["response.usage.prompt_tokens"] = prompt_tokens + full_chat_completion_summary_dict["response.usage.completion_tokens"] = completion_tokens + full_chat_completion_summary_dict["response.usage.total_tokens"] = total_tokens + llm_metadata = _get_llm_attributes(transaction) full_chat_completion_summary_dict.update(llm_metadata) transaction.record_custom_event("LlmChatCompletionSummary", full_chat_completion_summary_dict) create_chat_completion_message_event( transaction, - input_message, + input_message_content, + input_role, completion_id, span_id, trace_id, response_model, - request_model, llm_metadata, output_message_list, + all_token_counts, ) except Exception: _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, exc_info=True) +def _parse_input_message(input_message): + # The input_message will be a string if generate_content was called directly. In this case, we don't have + # access to the role, so we default to user since this was an input message + if isinstance(input_message, str): + return input_message, "user" + # The input_message will be a Google Content type if send_message was called, so we parse out the message + # text and role (which should be "user") + elif isinstance(input_message, google.genai.types.Content): + return input_message.parts[0].text, input_message.role + else: + return None, None + + +def _extract_generation_config(kwargs): + generation_config = kwargs.get("config") + if generation_config: + request_temperature = getattr(generation_config, "temperature", None) + request_max_tokens = getattr(generation_config, "max_output_tokens", None) + else: + request_temperature = None + request_max_tokens = None + + return request_temperature, request_max_tokens + + def create_chat_completion_message_event( transaction, - input_message, + input_message_content, + input_role, chat_completion_id, span_id, trace_id, response_model, - request_model, llm_metadata, output_message_list, + all_token_counts, ): try: settings = transaction.settings or global_settings() - if input_message: - # The input_message will be a string if generate_content was called directly. In this case, we don't have - # access to the role, so we default to user since this was an input message - if isinstance(input_message, str): - input_message_content = input_message - input_role = "user" - # The input_message will be a Google Content type if send_message was called, so we parse out the message - # text and role (which should be "user") - elif isinstance(input_message, google.genai.types.Content): - input_message_content = input_message.parts[0].text - input_role = input_message.role - # Set input data to NoneTypes to ensure token_count callback is not called - else: - input_message_content = None - input_role = None - + if input_message_content: message_id = str(uuid.uuid4()) chat_completion_input_message_dict = { "id": message_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(request_model, input_message_content) - if settings.ai_monitoring.llm_token_count_callback and input_message_content - else None - ), "role": input_role, "completion_id": chat_completion_id, # The input message will always be the first message in our request/ response sequence so this will @@ -507,6 +557,8 @@ def create_chat_completion_message_event( "vendor": "gemini", "ingest_source": "Python", } + if all_token_counts: + chat_completion_input_message_dict["token_count"] = 0 if settings.ai_monitoring.record_content.enabled: chat_completion_input_message_dict["content"] = input_message_content @@ -523,7 +575,7 @@ def create_chat_completion_message_event( # Add one to the index to account for the single input message so our sequence value is accurate for # the output message - if input_message: + if input_message_content: index += 1 message_id = str(uuid.uuid4()) @@ -532,11 +584,6 @@ def create_chat_completion_message_event( "id": message_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(response_model, message_content) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "role": message.get("role"), "completion_id": chat_completion_id, "sequence": index, @@ -546,6 +593,9 @@ def create_chat_completion_message_event( "is_response": True, } + if all_token_counts: + chat_completion_output_message_dict["token_count"] = 0 + if settings.ai_monitoring.record_content.enabled: chat_completion_output_message_dict["content"] = message_content diff --git a/tests/mlmodel_gemini/test_embeddings.py b/tests/mlmodel_gemini/test_embeddings.py index 0fc92897b6..5b4e30f860 100644 --- a/tests/mlmodel_gemini/test_embeddings.py +++ b/tests/mlmodel_gemini/test_embeddings.py @@ -15,7 +15,7 @@ import google.genai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -93,7 +93,7 @@ def test_gemini_embedding_sync_no_content(gemini_dev_client, set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings:test_gemini_embedding_sync_with_token_count", @@ -177,7 +177,7 @@ def test_gemini_embedding_async_no_content(gemini_dev_client, loop, set_trace_in @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings:test_gemini_embedding_async_with_token_count", diff --git a/tests/mlmodel_gemini/test_embeddings_error.py b/tests/mlmodel_gemini/test_embeddings_error.py index a65a6c2c6f..f0e7aac58a 100644 --- a/tests/mlmodel_gemini/test_embeddings_error.py +++ b/tests/mlmodel_gemini/test_embeddings_error.py @@ -16,12 +16,10 @@ import google.genai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -159,34 +157,6 @@ def test_embeddings_invalid_request_error_invalid_model(gemini_dev_client, set_t gemini_dev_client.models.embed_content(contents="Embedded: Model does not exist.", model="does-not-exist") -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(google.genai.errors.ClientError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={ - "error.message": "models/does-not-exist is not found for API version v1beta, or is not supported for embedContent. Call ListModels to see the list of available models and their supported methods." - } -) -@validate_transaction_metrics( - name="test_embeddings_error:test_embeddings_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/embedding/Gemini/embed_content", 1)], - rollup_metrics=[("Llm/embedding/Gemini/embed_content", 1)], - custom_metrics=[(f"Supportability/Python/ML/Gemini/{google.genai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count(gemini_dev_client, set_trace_info): - with pytest.raises(google.genai.errors.ClientError): - set_trace_info() - gemini_dev_client.models.embed_content(contents="Embedded: Model does not exist.", model="does-not-exist") - - embedding_invalid_key_error_events = [ ( {"type": "LlmEmbedding"}, @@ -326,36 +296,6 @@ def test_embeddings_async_invalid_request_error_invalid_model(gemini_dev_client, ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(google.genai.errors.ClientError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={ - "error.message": "models/does-not-exist is not found for API version v1beta, or is not supported for embedContent. Call ListModels to see the list of available models and their supported methods." - } -) -@validate_transaction_metrics( - name="test_embeddings_error:test_embeddings_async_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/embedding/Gemini/embed_content", 1)], - rollup_metrics=[("Llm/embedding/Gemini/embed_content", 1)], - custom_metrics=[(f"Supportability/Python/ML/Gemini/{google.genai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_async_invalid_request_error_invalid_model_with_token_count(gemini_dev_client, loop, set_trace_info): - with pytest.raises(google.genai.errors.ClientError): - set_trace_info() - loop.run_until_complete( - gemini_dev_client.models.embed_content(contents="Embedded: Model does not exist.", model="does-not-exist") - ) - - # Wrong api_key provided @dt_enabled @reset_core_stats_engine() diff --git a/tests/mlmodel_gemini/test_text_generation.py b/tests/mlmodel_gemini/test_text_generation.py index faec66aa75..3da978e777 100644 --- a/tests/mlmodel_gemini/test_text_generation.py +++ b/tests/mlmodel_gemini/test_text_generation.py @@ -15,7 +15,7 @@ import google.genai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -50,6 +50,9 @@ "vendor": "gemini", "ingest_source": "Python", "response.number_of_messages": 2, + "response.usage.prompt_tokens": 9, + "response.usage.completion_tokens": 13, + "response.usage.total_tokens": 22, }, ), ( @@ -60,6 +63,7 @@ "llm.foo": "bar", "span_id": None, "trace_id": "trace-id", + "token_count": 0, "content": "How many letters are in the word Python?", "role": "user", "completion_id": None, @@ -77,6 +81,7 @@ "llm.foo": "bar", "span_id": None, "trace_id": "trace-id", + "token_count": 0, "content": 'There are **6** letters in the word "Python".\n', "role": "model", "completion_id": None, @@ -183,7 +188,8 @@ def test_gemini_text_generation_sync_no_content(gemini_dev_client, set_trace_inf @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(text_generation_recorded_events)) +# Ensure LLM callback is invoked and response token counts are overridden +@validate_custom_events(add_token_counts_to_chat_events(text_generation_recorded_events)) @validate_custom_event_count(count=3) @validate_transaction_metrics( name="test_text_generation:test_gemini_text_generation_sync_with_token_count", @@ -324,7 +330,7 @@ def test_gemini_text_generation_async_no_content(gemini_dev_client, loop, set_tr @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(text_generation_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(text_generation_recorded_events)) @validate_custom_event_count(count=3) @validate_transaction_metrics( name="test_text_generation:test_gemini_text_generation_async_with_token_count", diff --git a/tests/mlmodel_gemini/test_text_generation_error.py b/tests/mlmodel_gemini/test_text_generation_error.py index 5e6f1c04de..c92e1a2d45 100644 --- a/tests/mlmodel_gemini/test_text_generation_error.py +++ b/tests/mlmodel_gemini/test_text_generation_error.py @@ -17,13 +17,11 @@ import google.genai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -63,6 +61,7 @@ "trace_id": "trace-id", "content": "How many letters are in the word Python?", "role": "user", + "token_count": 0, "completion_id": None, "sequence": 0, "vendor": "gemini", @@ -167,6 +166,7 @@ def _test(): "trace_id": "trace-id", "content": "Model does not exist.", "role": "user", + "token_count": 0, "completion_id": None, "response.model": "does-not-exist", "sequence": 0, @@ -179,39 +179,6 @@ def _test(): @dt_enabled @reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(google.genai.errors.ClientError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={ - "error.message": "models/does-not-exist is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods." - } -) -@validate_transaction_metrics( - "test_text_generation_error:test_text_generation_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/Gemini/generate_content", 1)], - rollup_metrics=[("Llm/completion/Gemini/generate_content", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_text_generation_invalid_request_error_invalid_model_with_token_count(gemini_dev_client, set_trace_info): - with pytest.raises(google.genai.errors.ClientError): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - gemini_dev_client.models.generate_content( - model="does-not-exist", - contents=["Model does not exist."], - config=google.genai.types.GenerateContentConfig(max_output_tokens=100, temperature=0.7), - ) - - -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) @validate_error_trace_attributes( callable_name(google.genai.errors.ClientError), exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, @@ -227,7 +194,7 @@ def test_text_generation_invalid_request_error_invalid_model_with_token_count(ge rollup_metrics=[("Llm/completion/Gemini/generate_content", 1)], background_task=True, ) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) +@validate_custom_events(expected_events_on_invalid_model_error) @validate_custom_event_count(count=2) @background_task() def test_text_generation_invalid_request_error_invalid_model_chat(gemini_dev_client, set_trace_info): @@ -266,6 +233,7 @@ def test_text_generation_invalid_request_error_invalid_model_chat(gemini_dev_cli "trace_id": "trace-id", "content": "Invalid API key.", "role": "user", + "token_count": 0, "response.model": "gemini-flash-2.0", "completion_id": None, "sequence": 0, @@ -377,43 +345,6 @@ def _test(): @dt_enabled @reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(google.genai.errors.ClientError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={ - "error.message": "models/does-not-exist is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods." - } -) -@validate_transaction_metrics( - "test_text_generation_error:test_text_generation_async_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/Gemini/generate_content", 1)], - rollup_metrics=[("Llm/completion/Gemini/generate_content", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_text_generation_async_invalid_request_error_invalid_model_with_token_count( - gemini_dev_client, loop, set_trace_info -): - with pytest.raises(google.genai.errors.ClientError): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - gemini_dev_client.models.generate_content( - model="does-not-exist", - contents=["Model does not exist."], - config=google.genai.types.GenerateContentConfig(max_output_tokens=100, temperature=0.7), - ) - ) - - -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) @validate_error_trace_attributes( callable_name(google.genai.errors.ClientError), exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, @@ -429,7 +360,7 @@ def test_text_generation_async_invalid_request_error_invalid_model_with_token_co rollup_metrics=[("Llm/completion/Gemini/generate_content", 1)], background_task=True, ) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) +@validate_custom_events(expected_events_on_invalid_model_error) @validate_custom_event_count(count=2) @background_task() def test_text_generation_async_invalid_request_error_invalid_model_chat(gemini_dev_client, loop, set_trace_info): diff --git a/tests/testing_support/ml_testing_utils.py b/tests/testing_support/ml_testing_utils.py index 4ff70c7ed4..55dbd08105 100644 --- a/tests/testing_support/ml_testing_utils.py +++ b/tests/testing_support/ml_testing_utils.py @@ -29,6 +29,7 @@ def llm_token_count_callback(model, content): return 105 +# This will be removed once all LLM instrumentations have been converted to use new token count design def add_token_count_to_events(expected_events): events = copy.deepcopy(expected_events) for event in events: @@ -37,6 +38,24 @@ def add_token_count_to_events(expected_events): return events +def add_token_count_to_embedding_events(expected_events): + events = copy.deepcopy(expected_events) + for event in events: + if event[0]["type"] == "LlmEmbedding": + event[1]["response.usage.total_tokens"] = 105 + return events + + +def add_token_counts_to_chat_events(expected_events): + events = copy.deepcopy(expected_events) + for event in events: + if event[0]["type"] == "LlmChatCompletionSummary": + event[1]["response.usage.prompt_tokens"] = 105 + event[1]["response.usage.completion_tokens"] = 105 + event[1]["response.usage.total_tokens"] = 210 + return events + + def events_sans_content(event): new_event = copy.deepcopy(event) for _event in new_event: From 7d5adac309d46d8d97a29b94c3c3bb51fbf8fe81 Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Tue, 7 Oct 2025 13:14:56 -0700 Subject: [PATCH 02/34] Add response token count logic to OpenAI instrumentation. (#1498) * Add OpenAI token counts. * Add token counts to langchain + openai tests. * Remove unused expected events. * Linting * Add OpenAI token counts. * Add token counts to langchain + openai tests. * Remove unused expected events. * [MegaLinter] Apply linters fixes --------- Co-authored-by: Tim Pansino --- newrelic/hooks/mlmodel_openai.py | 87 ++++++++--- tests/mlmodel_langchain/test_chain.py | 8 + tests/mlmodel_openai/test_chat_completion.py | 12 +- .../test_chat_completion_error.py | 71 +-------- .../test_chat_completion_error_v1.py | 142 +----------------- .../test_chat_completion_stream.py | 101 ++++++++++++- .../test_chat_completion_stream_error.py | 75 +-------- .../test_chat_completion_stream_error_v1.py | 80 +--------- .../test_chat_completion_stream_v1.py | 11 +- .../mlmodel_openai/test_chat_completion_v1.py | 12 +- tests/mlmodel_openai/test_embeddings.py | 7 +- .../test_embeddings_error_v1.py | 120 +-------------- tests/mlmodel_openai/test_embeddings_v1.py | 7 +- tests/testing_support/ml_testing_utils.py | 8 + 14 files changed, 241 insertions(+), 500 deletions(-) diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py index c3f7960b6e..3484762951 100644 --- a/newrelic/hooks/mlmodel_openai.py +++ b/newrelic/hooks/mlmodel_openai.py @@ -129,11 +129,11 @@ def create_chat_completion_message_event( span_id, trace_id, response_model, - request_model, response_id, request_id, llm_metadata, output_message_list, + all_token_counts, ): settings = transaction.settings if transaction.settings is not None else global_settings() @@ -153,11 +153,6 @@ def create_chat_completion_message_event( "request_id": request_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(request_model, message_content) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "role": message.get("role"), "completion_id": chat_completion_id, "sequence": index, @@ -166,6 +161,9 @@ def create_chat_completion_message_event( "ingest_source": "Python", } + if all_token_counts: + chat_completion_input_message_dict["token_count"] = 0 + if settings.ai_monitoring.record_content.enabled: chat_completion_input_message_dict["content"] = message_content @@ -193,11 +191,6 @@ def create_chat_completion_message_event( "request_id": request_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(response_model, message_content) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "role": message.get("role"), "completion_id": chat_completion_id, "sequence": index, @@ -207,6 +200,9 @@ def create_chat_completion_message_event( "is_response": True, } + if all_token_counts: + chat_completion_output_message_dict["token_count"] = 0 + if settings.ai_monitoring.record_content.enabled: chat_completion_output_message_dict["content"] = message_content @@ -280,15 +276,18 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg else getattr(attribute_response, "organization", None) ) + response_total_tokens = attribute_response.get("usage", {}).get("total_tokens") if response else None + + total_tokens = ( + settings.ai_monitoring.llm_token_count_callback(response_model, input_) + if settings.ai_monitoring.llm_token_count_callback and input_ + else response_total_tokens + ) + full_embedding_response_dict = { "id": embedding_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(response_model, input_) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "request.model": kwargs.get("model") or kwargs.get("engine"), "request_id": request_id, "duration": ft.duration * 1000, @@ -313,6 +312,7 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg "response.headers.ratelimitRemainingRequests": check_rate_limit_header( response_headers, "x-ratelimit-remaining-requests", True ), + "response.usage.total_tokens": total_tokens, "vendor": "openai", "ingest_source": "Python", } @@ -475,12 +475,15 @@ def _handle_completion_success(transaction, linking_metadata, completion_id, kwa def _record_completion_success(transaction, linking_metadata, completion_id, kwargs, ft, response_headers, response): + settings = transaction.settings if transaction.settings is not None else global_settings() span_id = linking_metadata.get("span.id") trace_id = linking_metadata.get("trace.id") + try: if response: response_model = response.get("model") response_id = response.get("id") + token_usage = response.get("usage") or {} output_message_list = [] finish_reason = None choices = response.get("choices") or [] @@ -494,6 +497,7 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa else: response_model = kwargs.get("response.model") response_id = kwargs.get("id") + token_usage = {} output_message_list = [] finish_reason = kwargs.get("finish_reason") if "content" in kwargs: @@ -505,10 +509,44 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa output_message_list = [] request_model = kwargs.get("model") or kwargs.get("engine") - request_id = response_headers.get("x-request-id") - organization = response_headers.get("openai-organization") or getattr(response, "organization", None) messages = kwargs.get("messages") or [{"content": kwargs.get("prompt"), "role": "user"}] input_message_list = list(messages) + + # Extract token counts from response object + if token_usage: + response_prompt_tokens = token_usage.get("prompt_tokens") + response_completion_tokens = token_usage.get("completion_tokens") + response_total_tokens = token_usage.get("total_tokens") + + else: + response_prompt_tokens = None + response_completion_tokens = None + response_total_tokens = None + + # Calculate token counts by checking if a callback is registered and if we have the necessary content to pass + # to it. If not, then we use the token counts provided in the response object + input_message_content = " ".join([msg.get("content", "") for msg in input_message_list if msg.get("content")]) + prompt_tokens = ( + settings.ai_monitoring.llm_token_count_callback(request_model, input_message_content) + if settings.ai_monitoring.llm_token_count_callback and input_message_content + else response_prompt_tokens + ) + output_message_content = " ".join([msg.get("content", "") for msg in output_message_list if msg.get("content")]) + completion_tokens = ( + settings.ai_monitoring.llm_token_count_callback(response_model, output_message_content) + if settings.ai_monitoring.llm_token_count_callback and output_message_content + else response_completion_tokens + ) + + total_tokens = ( + prompt_tokens + completion_tokens if all([prompt_tokens, completion_tokens]) else response_total_tokens + ) + + all_token_counts = bool(prompt_tokens and completion_tokens and total_tokens) + + request_id = response_headers.get("x-request-id") + organization = response_headers.get("openai-organization") or getattr(response, "organization", None) + full_chat_completion_summary_dict = { "id": completion_id, "span_id": span_id, @@ -553,6 +591,12 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa ), "response.number_of_messages": len(input_message_list) + len(output_message_list), } + + if all_token_counts: + full_chat_completion_summary_dict["response.usage.prompt_tokens"] = prompt_tokens + full_chat_completion_summary_dict["response.usage.completion_tokens"] = completion_tokens + full_chat_completion_summary_dict["response.usage.total_tokens"] = total_tokens + llm_metadata = _get_llm_attributes(transaction) full_chat_completion_summary_dict.update(llm_metadata) transaction.record_custom_event("LlmChatCompletionSummary", full_chat_completion_summary_dict) @@ -564,11 +608,11 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa span_id, trace_id, response_model, - request_model, response_id, request_id, llm_metadata, output_message_list, + all_token_counts, ) except Exception: _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info())) @@ -579,6 +623,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg trace_id = linking_metadata.get("trace.id") request_message_list = kwargs.get("messages", None) or [] notice_error_attributes = {} + try: if OPENAI_V1: response = getattr(exc, "response", None) @@ -643,6 +688,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg output_message_list = [] if "content" in kwargs: output_message_list = [{"content": kwargs.get("content"), "role": kwargs.get("role")}] + create_chat_completion_message_event( transaction, request_message_list, @@ -650,11 +696,12 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg span_id, trace_id, kwargs.get("response.model"), - request_model, response_id, request_id, llm_metadata, output_message_list, + # We do not record token counts in error cases, so set all_token_counts to True so the pipeline tokenizer does not run + all_token_counts=True, ) except Exception: _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info())) diff --git a/tests/mlmodel_langchain/test_chain.py b/tests/mlmodel_langchain/test_chain.py index 1a3cbbfd76..abf52efe09 100644 --- a/tests/mlmodel_langchain/test_chain.py +++ b/tests/mlmodel_langchain/test_chain.py @@ -359,6 +359,7 @@ "response.headers.ratelimitResetRequests": "20ms", "response.headers.ratelimitRemainingTokens": 999992, "response.headers.ratelimitRemainingRequests": 2999, + "response.usage.total_tokens": 8, "vendor": "openai", "ingest_source": "Python", "input": "[[3923, 374, 220, 17, 489, 220, 19, 30]]", @@ -382,6 +383,7 @@ "response.headers.ratelimitResetRequests": "20ms", "response.headers.ratelimitRemainingTokens": 999998, "response.headers.ratelimitRemainingRequests": 2999, + "response.usage.total_tokens": 1, "vendor": "openai", "ingest_source": "Python", "input": "[[10590]]", @@ -452,6 +454,9 @@ "response.headers.ratelimitResetRequests": "8.64s", "response.headers.ratelimitRemainingTokens": 199912, "response.headers.ratelimitRemainingRequests": 9999, + "response.usage.prompt_tokens": 73, + "response.usage.completion_tokens": 375, + "response.usage.total_tokens": 448, "response.number_of_messages": 3, }, ], @@ -467,6 +472,7 @@ "sequence": 0, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", + "token_count": 0, "ingest_source": "Python", "content": "You are a generator of quiz questions for a seminar. Use the following pieces of retrieved context to generate 5 multiple choice questions (A,B,C,D) on the subject matter. Use a three sentence maximum and keep the answer concise. Render the output as HTML\n\nWhat is 2 + 4?", }, @@ -483,6 +489,7 @@ "sequence": 1, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", + "token_count": 0, "ingest_source": "Python", "content": "math", }, @@ -499,6 +506,7 @@ "sequence": 2, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", + "token_count": 0, "ingest_source": "Python", "is_response": True, "content": "```html\n\n\n\n Math Quiz\n\n\n

Math Quiz Questions

\n
    \n
  1. What is the result of 5 + 3?
  2. \n
      \n
    • A) 7
    • \n
    • B) 8
    • \n
    • C) 9
    • \n
    • D) 10
    • \n
    \n
  3. What is the product of 6 x 7?
  4. \n
      \n
    • A) 36
    • \n
    • B) 42
    • \n
    • C) 48
    • \n
    • D) 56
    • \n
    \n
  5. What is the square root of 64?
  6. \n
      \n
    • A) 6
    • \n
    • B) 7
    • \n
    • C) 8
    • \n
    • D) 9
    • \n
    \n
  7. What is the result of 12 / 4?
  8. \n
      \n
    • A) 2
    • \n
    • B) 3
    • \n
    • C) 4
    • \n
    • D) 5
    • \n
    \n
  9. What is the sum of 15 + 9?
  10. \n
      \n
    • A) 22
    • \n
    • B) 23
    • \n
    • C) 24
    • \n
    • D) 25
    • \n
    \n
\n\n\n```", diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py index 1f8cf1cb74..5e4d209ed7 100644 --- a/tests/mlmodel_openai/test_chat_completion.py +++ b/tests/mlmodel_openai/test_chat_completion.py @@ -15,7 +15,7 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -55,6 +55,9 @@ "response.organization": "new-relic-nkmd8b", "request.temperature": 0.7, "request.max_tokens": 100, + "response.usage.completion_tokens": 11, + "response.usage.total_tokens": 64, + "response.usage.prompt_tokens": 53, "response.choices.finish_reason": "stop", "response.headers.llmVersion": "2020-10-01", "response.headers.ratelimitLimitRequests": 200, @@ -81,6 +84,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "gpt-3.5-turbo-0613", "vendor": "openai", "ingest_source": "Python", @@ -99,6 +103,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "gpt-3.5-turbo-0613", "vendor": "openai", "ingest_source": "Python", @@ -117,6 +122,7 @@ "role": "assistant", "completion_id": None, "sequence": 2, + "token_count": 0, "response.model": "gpt-3.5-turbo-0613", "vendor": "openai", "is_response": True, @@ -172,7 +178,7 @@ def test_openai_chat_completion_sync_no_content(set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -343,7 +349,7 @@ def test_openai_chat_completion_async_no_content(loop, set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( diff --git a/tests/mlmodel_openai/test_chat_completion_error.py b/tests/mlmodel_openai/test_chat_completion_error.py index bfb2267a33..97a4dd8793 100644 --- a/tests/mlmodel_openai/test_chat_completion_error.py +++ b/tests/mlmodel_openai/test_chat_completion_error.py @@ -15,13 +15,11 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -68,6 +66,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -83,6 +82,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -186,6 +186,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -193,36 +194,6 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - - openai.ChatCompletion.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - - # Invalid model provided @dt_enabled @reset_core_stats_engine() @@ -281,6 +252,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -296,6 +268,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -360,6 +333,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -471,37 +445,6 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], - rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(loop, set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - openai.ChatCompletion.acreate( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - ) - - # Invalid model provided @dt_enabled @reset_core_stats_engine() diff --git a/tests/mlmodel_openai/test_chat_completion_error_v1.py b/tests/mlmodel_openai/test_chat_completion_error_v1.py index 9be9fcab9c..5af1598847 100644 --- a/tests/mlmodel_openai/test_chat_completion_error_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_error_v1.py @@ -14,13 +14,11 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -67,6 +65,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -82,6 +81,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -229,6 +229,7 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -266,37 +267,6 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, syn ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - sync_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -329,41 +299,6 @@ def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_tra ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async( - loop, set_trace_info, async_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - async_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - ) - - expected_events_on_wrong_api_key_error = [ ( {"type": "LlmChatCompletionSummary"}, @@ -391,6 +326,7 @@ def test_chat_completion_invalid_request_error_invalid_model_with_token_count_as "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -610,39 +546,6 @@ def test_chat_completion_invalid_request_error_invalid_model_with_raw_response(s ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_with_raw_response", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_with_raw_response( - set_trace_info, sync_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - sync_openai_client.chat.completions.with_raw_response.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -677,41 +580,6 @@ def test_chat_completion_invalid_request_error_invalid_model_async_with_raw_resp ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async_with_raw_response", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async_with_raw_response( - loop, set_trace_info, async_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - async_openai_client.chat.completions.with_raw_response.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( diff --git a/tests/mlmodel_openai/test_chat_completion_stream.py b/tests/mlmodel_openai/test_chat_completion_stream.py index ad89d6f260..8019c0b6a9 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream.py +++ b/tests/mlmodel_openai/test_chat_completion_stream.py @@ -15,7 +15,8 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -184,9 +185,101 @@ def test_openai_chat_completion_sync_no_content(set_trace_info): assert resp +chat_completion_recorded_token_events = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "span_id": None, + "trace_id": "trace-id", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "duration": None, # Response time varies each test run + "request.model": "gpt-3.5-turbo", + "response.model": "gpt-3.5-turbo-0613", + "response.organization": "new-relic-nkmd8b", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.choices.finish_reason": "stop", + "response.headers.llmVersion": "2020-10-01", + "response.headers.ratelimitLimitRequests": 200, + "response.headers.ratelimitLimitTokens": 40000, + "response.headers.ratelimitResetTokens": "90ms", + "response.headers.ratelimitResetRequests": "7m12s", + "response.headers.ratelimitRemainingTokens": 39940, + "response.headers.ratelimitRemainingRequests": 199, + "vendor": "openai", + "ingest_source": "Python", + "response.number_of_messages": 3, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-0", + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "content": "You are a scientist.", + "role": "system", + "completion_id": None, + "sequence": 0, + "token_count": 0, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openai", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-1", + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "content": "What is 212 degrees Fahrenheit converted to Celsius?", + "role": "user", + "completion_id": None, + "sequence": 1, + "token_count": 0, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openai", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-2", + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "content": "212 degrees Fahrenheit is equal to 100 degrees Celsius.", + "role": "assistant", + "completion_id": None, + "sequence": 2, + "token_count": 0, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openai", + "is_response": True, + "ingest_source": "Python", + }, + ), +] + + @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -378,7 +471,9 @@ async def consumer(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) @validate_custom_event_count(count=4) @validate_transaction_metrics( name="test_chat_completion_stream:test_openai_chat_completion_async_with_token_count", diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error.py b/tests/mlmodel_openai/test_chat_completion_stream_error.py index eebb5ee8fb..e8e55426e9 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_error.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_error.py @@ -15,13 +15,11 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -68,6 +66,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -83,6 +82,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -191,6 +191,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -198,38 +199,6 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - generator = openai.ChatCompletion.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - stream=True, - ) - for resp in generator: - assert resp - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -290,6 +259,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -305,6 +275,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -374,6 +345,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -488,38 +460,6 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], - rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(loop, set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - openai.ChatCompletion.acreate( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - stream=True, - ) - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -649,6 +589,7 @@ def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py b/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py index 5f769ea0e6..64798300fc 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py @@ -12,16 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. - import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -68,6 +65,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -83,6 +81,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -243,6 +242,7 @@ async def consumer(): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -281,77 +281,6 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, syn assert resp -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_stream_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - - generator = sync_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - stream=True, - ) - for resp in generator: - assert resp - - -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_stream_error_v1:test_chat_completion_invalid_request_error_invalid_model_async_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_async_with_token_count( - loop, set_trace_info, async_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - - async def consumer(): - generator = await async_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - stream=True, - ) - async for resp in generator: - assert resp - - loop.run_until_complete(consumer()) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -414,6 +343,7 @@ async def consumer(): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, diff --git a/tests/mlmodel_openai/test_chat_completion_stream_v1.py b/tests/mlmodel_openai/test_chat_completion_stream_v1.py index 796404012b..c88e8b1df6 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_v1.py @@ -17,7 +17,8 @@ from conftest import get_openai_version from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -300,7 +301,9 @@ def test_openai_chat_completion_sync_no_content(set_trace_info, sync_openai_clie @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -622,7 +625,9 @@ async def consumer(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) # One summary event, one system message, one user message, and one response message from the assistant # @validate_custom_event_count(count=4) @validate_transaction_metrics( diff --git a/tests/mlmodel_openai/test_chat_completion_v1.py b/tests/mlmodel_openai/test_chat_completion_v1.py index 817db35d8e..007effcb17 100644 --- a/tests/mlmodel_openai/test_chat_completion_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_v1.py @@ -15,7 +15,7 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -54,6 +54,9 @@ "response.organization": "new-relic-nkmd8b", "request.temperature": 0.7, "request.max_tokens": 100, + "response.usage.completion_tokens": 75, + "response.usage.total_tokens": 101, + "response.usage.prompt_tokens": 26, "response.choices.finish_reason": "stop", "response.headers.llmVersion": "2020-10-01", "response.headers.ratelimitLimitRequests": 10000, @@ -80,6 +83,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", "ingest_source": "Python", @@ -98,6 +102,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", "ingest_source": "Python", @@ -116,6 +121,7 @@ "role": "assistant", "completion_id": None, "sequence": 2, + "token_count": 0, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", "is_response": True, @@ -193,7 +199,7 @@ def test_openai_chat_completion_sync_no_content(set_trace_info, sync_openai_clie @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -389,7 +395,7 @@ def test_openai_chat_completion_async_with_llm_metadata_no_content(loop, set_tra @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( diff --git a/tests/mlmodel_openai/test_embeddings.py b/tests/mlmodel_openai/test_embeddings.py index c3c3e7c429..935db04fe0 100644 --- a/tests/mlmodel_openai/test_embeddings.py +++ b/tests/mlmodel_openai/test_embeddings.py @@ -19,7 +19,7 @@ validate_attributes, ) from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -55,6 +55,7 @@ "response.headers.ratelimitResetRequests": "19m45.394s", "response.headers.ratelimitRemainingTokens": 149994, "response.headers.ratelimitRemainingRequests": 197, + "response.usage.total_tokens": 6, "vendor": "openai", "ingest_source": "Python", }, @@ -107,7 +108,7 @@ def test_openai_embedding_sync_no_content(set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings:test_openai_embedding_sync_with_token_count", @@ -191,7 +192,7 @@ def test_openai_embedding_async_no_content(loop, set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings:test_openai_embedding_async_with_token_count", diff --git a/tests/mlmodel_openai/test_embeddings_error_v1.py b/tests/mlmodel_openai/test_embeddings_error_v1.py index fd29236122..499f96893b 100644 --- a/tests/mlmodel_openai/test_embeddings_error_v1.py +++ b/tests/mlmodel_openai/test_embeddings_error_v1.py @@ -16,12 +16,10 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -149,32 +147,6 @@ def test_embeddings_invalid_request_error_no_model_async(set_trace_info, async_o ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client): - set_trace_info() - with pytest.raises(openai.NotFoundError): - sync_openai_client.embeddings.create(input="Model does not exist.", model="does-not-exist") - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -255,36 +227,6 @@ def test_embeddings_invalid_request_error_invalid_model_async_no_content(set_tra ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_async_with_token_count", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_async_with_token_count( - set_trace_info, async_openai_client, loop -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - loop.run_until_complete( - async_openai_client.embeddings.create(input="Model does not exist.", model="does-not-exist") - ) - - embedding_invalid_key_error_events = [ ( {"type": "LlmEmbedding"}, @@ -449,34 +391,6 @@ def test_embeddings_invalid_request_error_no_model_async_with_raw_response(set_t ) # no model provided -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_with_token_count_with_raw_response", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count_with_raw_response( - set_trace_info, sync_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - sync_openai_client.embeddings.with_raw_response.create(input="Model does not exist.", model="does-not-exist") - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -566,38 +480,6 @@ def test_embeddings_invalid_request_error_invalid_model_async_no_content_with_ra ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_async_with_token_count_with_raw_response", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_async_with_token_count_with_raw_response( - set_trace_info, async_openai_client, loop -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - loop.run_until_complete( - async_openai_client.embeddings.with_raw_response.create( - input="Model does not exist.", model="does-not-exist" - ) - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( diff --git a/tests/mlmodel_openai/test_embeddings_v1.py b/tests/mlmodel_openai/test_embeddings_v1.py index 405a2a9e5f..3801d3639c 100644 --- a/tests/mlmodel_openai/test_embeddings_v1.py +++ b/tests/mlmodel_openai/test_embeddings_v1.py @@ -15,7 +15,7 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -48,6 +48,7 @@ "response.headers.ratelimitResetRequests": "20ms", "response.headers.ratelimitRemainingTokens": 999994, "response.headers.ratelimitRemainingRequests": 2999, + "response.usage.total_tokens": 6, "vendor": "openai", "ingest_source": "Python", }, @@ -111,7 +112,7 @@ def test_openai_embedding_sync_no_content(set_trace_info, sync_openai_client): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings_v1:test_openai_embedding_sync_with_token_count", @@ -206,7 +207,7 @@ def test_openai_embedding_async_no_content(loop, set_trace_info, async_openai_cl @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings_v1:test_openai_embedding_async_with_token_count", diff --git a/tests/testing_support/ml_testing_utils.py b/tests/testing_support/ml_testing_utils.py index 55dbd08105..8c2c0444f0 100644 --- a/tests/testing_support/ml_testing_utils.py +++ b/tests/testing_support/ml_testing_utils.py @@ -46,6 +46,14 @@ def add_token_count_to_embedding_events(expected_events): return events +def add_token_count_streaming_events(expected_events): + events = copy.deepcopy(expected_events) + for event in events: + if event[0]["type"] == "LlmChatCompletionMessage": + event[1]["token_count"] = 0 + return events + + def add_token_counts_to_chat_events(expected_events): events = copy.deepcopy(expected_events) for event in events: From b18104dc7cf70a3c9292d90514c0d09ec45c737c Mon Sep 17 00:00:00 2001 From: Timothy Pansino <11214426+TimPansino@users.noreply.github.com> Date: Thu, 23 Oct 2025 14:32:16 -0700 Subject: [PATCH 03/34] Strands Mock Model (#1551) * Add strands to tox.ini * Add mock models for strands testing * Add simple test file to validate strands mocking --- tests/mlmodel_strands/_mock_model_provider.py | 99 ++++++++++++ tests/mlmodel_strands/conftest.py | 144 ++++++++++++++++++ tests/mlmodel_strands/test_simple.py | 36 +++++ tox.ini | 12 +- 4 files changed, 287 insertions(+), 4 deletions(-) create mode 100644 tests/mlmodel_strands/_mock_model_provider.py create mode 100644 tests/mlmodel_strands/conftest.py create mode 100644 tests/mlmodel_strands/test_simple.py diff --git a/tests/mlmodel_strands/_mock_model_provider.py b/tests/mlmodel_strands/_mock_model_provider.py new file mode 100644 index 0000000000..e4c9e79930 --- /dev/null +++ b/tests/mlmodel_strands/_mock_model_provider.py @@ -0,0 +1,99 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Test setup derived from: https://github.com/strands-agents/sdk-python/blob/main/tests/fixtures/mocked_model_provider.py +# strands Apache 2.0 license: https://github.com/strands-agents/sdk-python/blob/main/LICENSE + +import json +from typing import TypedDict + +from strands.models import Model + + +class RedactionMessage(TypedDict): + redactedUserContent: str + redactedAssistantContent: str + + +class MockedModelProvider(Model): + """A mock implementation of the Model interface for testing purposes. + + This class simulates a model provider by returning pre-defined agent responses + in sequence. It implements the Model interface methods and provides functionality + to stream mock responses as events. + """ + + def __init__(self, agent_responses): + self.agent_responses = agent_responses + self.index = 0 + + def format_chunk(self, event): + return event + + def format_request(self, messages, tool_specs=None, system_prompt=None): + return None + + def get_config(self): + pass + + def update_config(self, **model_config): + pass + + async def structured_output(self, output_model, prompt, system_prompt=None, **kwargs): + pass + + async def stream(self, messages, tool_specs=None, system_prompt=None): + events = self.map_agent_message_to_events(self.agent_responses[self.index]) + for event in events: + yield event + + self.index += 1 + + def map_agent_message_to_events(self, agent_message): + stop_reason = "end_turn" + yield {"messageStart": {"role": "assistant"}} + if agent_message.get("redactedAssistantContent"): + yield {"redactContent": {"redactUserContentMessage": agent_message["redactedUserContent"]}} + yield {"contentBlockStart": {"start": {}}} + yield {"contentBlockDelta": {"delta": {"text": agent_message["redactedAssistantContent"]}}} + yield {"contentBlockStop": {}} + stop_reason = "guardrail_intervened" + else: + for content in agent_message["content"]: + if "reasoningContent" in content: + yield {"contentBlockStart": {"start": {}}} + yield {"contentBlockDelta": {"delta": {"reasoningContent": content["reasoningContent"]}}} + yield {"contentBlockStop": {}} + if "text" in content: + yield {"contentBlockStart": {"start": {}}} + yield {"contentBlockDelta": {"delta": {"text": content["text"]}}} + yield {"contentBlockStop": {}} + if "toolUse" in content: + stop_reason = "tool_use" + yield { + "contentBlockStart": { + "start": { + "toolUse": { + "name": content["toolUse"]["name"], + "toolUseId": content["toolUse"]["toolUseId"], + } + } + } + } + yield { + "contentBlockDelta": {"delta": {"toolUse": {"input": json.dumps(content["toolUse"]["input"])}}} + } + yield {"contentBlockStop": {}} + + yield {"messageStop": {"stopReason": stop_reason}} diff --git a/tests/mlmodel_strands/conftest.py b/tests/mlmodel_strands/conftest.py new file mode 100644 index 0000000000..b810161f6a --- /dev/null +++ b/tests/mlmodel_strands/conftest.py @@ -0,0 +1,144 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from _mock_model_provider import MockedModelProvider +from testing_support.fixtures import collector_agent_registration_fixture, collector_available_fixture +from testing_support.ml_testing_utils import set_trace_info + +_default_settings = { + "package_reporting.enabled": False, # Turn off package reporting for testing as it causes slowdowns. + "transaction_tracer.explain_threshold": 0.0, + "transaction_tracer.transaction_threshold": 0.0, + "transaction_tracer.stack_trace_threshold": 0.0, + "debug.log_data_collector_payloads": True, + "debug.record_transaction_failure": True, + "ai_monitoring.enabled": True, +} + +collector_agent_registration = collector_agent_registration_fixture( + app_name="Python Agent Test (mlmodel_strands)", default_settings=_default_settings +) + + +@pytest.fixture +def single_tool_model(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Hello"}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model + + +@pytest.fixture +def single_tool_model_error(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + # Set arguments to an invalid type to trigger error in tool + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": 12}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model + + +@pytest.fixture +def multi_tool_model(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Hello"}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling compute_sum tool"}, + {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 5, "b": 3}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Goodbye"}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling compute_sum tool"}, + {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 123, "b": 2}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model + + +@pytest.fixture +def multi_tool_model_error(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Hello"}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling compute_sum tool"}, + {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 5, "b": 3}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Goodbye"}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling compute_sum tool"}, + # Set insufficient arguments to trigger error in tool + {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 123}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model diff --git a/tests/mlmodel_strands/test_simple.py b/tests/mlmodel_strands/test_simple.py new file mode 100644 index 0000000000..ae24003fab --- /dev/null +++ b/tests/mlmodel_strands/test_simple.py @@ -0,0 +1,36 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from strands import Agent, tool + +from newrelic.api.background_task import background_task + + +# Example tool for testing purposes +@tool +def add_exclamation(message: str) -> str: + return f"{message}!" + + +# TODO: Remove this file once all real tests are in place + + +@background_task() +def test_simple_run_agent(set_trace_info, single_tool_model): + set_trace_info() + my_agent = Agent(name="my_agent", model=single_tool_model, tools=[add_exclamation]) + + response = my_agent("Run the tools.") + assert response.message["content"][0]["text"] == "Success!" + assert response.metrics.tool_metrics["add_exclamation"].success_count == 1 diff --git a/tox.ini b/tox.ini index 39148b657f..ace7839db3 100644 --- a/tox.ini +++ b/tox.ini @@ -182,6 +182,7 @@ envlist = python-logger_structlog-{py38,py39,py310,py311,py312,py313,py314,pypy311}-structloglatest, python-mlmodel_autogen-{py310,py311,py312,py313,py314,pypy311}-autogen061, python-mlmodel_autogen-{py310,py311,py312,py313,py314,pypy311}-autogenlatest, + python-mlmodel_strands-{py310,py311,py312,py313}-strandslatest, python-mlmodel_gemini-{py39,py310,py311,py312,py313,py314}, python-mlmodel_langchain-{py39,py310,py311,py312,py313}, ;; Package not ready for Python 3.14 (type annotations not updated) @@ -440,6 +441,8 @@ deps = mlmodel_langchain: faiss-cpu mlmodel_langchain: mock mlmodel_langchain: asyncio + mlmodel_strands: strands-agents[openai] + mlmodel_strands: strands-agents-tools logger_loguru-logurulatest: loguru logger_structlog-structloglatest: structlog messagebroker_pika-pikalatest: pika @@ -510,6 +513,7 @@ changedir = application_celery: tests/application_celery component_djangorestframework: tests/component_djangorestframework component_flask_rest: tests/component_flask_rest + component_graphenedjango: tests/component_graphenedjango component_graphqlserver: tests/component_graphqlserver component_tastypie: tests/component_tastypie coroutines_asyncio: tests/coroutines_asyncio @@ -521,17 +525,17 @@ changedir = datastore_cassandradriver: tests/datastore_cassandradriver datastore_elasticsearch: tests/datastore_elasticsearch datastore_firestore: tests/datastore_firestore - datastore_oracledb: tests/datastore_oracledb datastore_memcache: tests/datastore_memcache + datastore_motor: tests/datastore_motor datastore_mysql: tests/datastore_mysql datastore_mysqldb: tests/datastore_mysqldb + datastore_oracledb: tests/datastore_oracledb datastore_postgresql: tests/datastore_postgresql datastore_psycopg: tests/datastore_psycopg datastore_psycopg2: tests/datastore_psycopg2 datastore_psycopg2cffi: tests/datastore_psycopg2cffi datastore_pylibmc: tests/datastore_pylibmc datastore_pymemcache: tests/datastore_pymemcache - datastore_motor: tests/datastore_motor datastore_pymongo: tests/datastore_pymongo datastore_pymssql: tests/datastore_pymssql datastore_pymysql: tests/datastore_pymysql @@ -539,8 +543,8 @@ changedir = datastore_pysolr: tests/datastore_pysolr datastore_redis: tests/datastore_redis datastore_rediscluster: tests/datastore_rediscluster - datastore_valkey: tests/datastore_valkey datastore_sqlite: tests/datastore_sqlite + datastore_valkey: tests/datastore_valkey external_aiobotocore: tests/external_aiobotocore external_botocore: tests/external_botocore external_feedparser: tests/external_feedparser @@ -561,7 +565,6 @@ changedir = framework_fastapi: tests/framework_fastapi framework_flask: tests/framework_flask framework_graphene: tests/framework_graphene - component_graphenedjango: tests/component_graphenedjango framework_graphql: tests/framework_graphql framework_grpc: tests/framework_grpc framework_pyramid: tests/framework_pyramid @@ -581,6 +584,7 @@ changedir = mlmodel_langchain: tests/mlmodel_langchain mlmodel_openai: tests/mlmodel_openai mlmodel_sklearn: tests/mlmodel_sklearn + mlmodel_strands: tests/mlmodel_strands template_genshi: tests/template_genshi template_jinja2: tests/template_jinja2 template_mako: tests/template_mako From c19207b273607e5346ac6639b9be6307ff862d6e Mon Sep 17 00:00:00 2001 From: Timothy Pansino <11214426+TimPansino@users.noreply.github.com> Date: Thu, 23 Oct 2025 14:32:16 -0700 Subject: [PATCH 04/34] Strands Mock Model (#1551) * Add strands to tox.ini * Add mock models for strands testing * Add simple test file to validate strands mocking --- tests/mlmodel_strands/_mock_model_provider.py | 99 ++++++++++++ tests/mlmodel_strands/conftest.py | 144 ++++++++++++++++++ tests/mlmodel_strands/test_simple.py | 36 +++++ tox.ini | 12 +- 4 files changed, 287 insertions(+), 4 deletions(-) create mode 100644 tests/mlmodel_strands/_mock_model_provider.py create mode 100644 tests/mlmodel_strands/conftest.py create mode 100644 tests/mlmodel_strands/test_simple.py diff --git a/tests/mlmodel_strands/_mock_model_provider.py b/tests/mlmodel_strands/_mock_model_provider.py new file mode 100644 index 0000000000..e4c9e79930 --- /dev/null +++ b/tests/mlmodel_strands/_mock_model_provider.py @@ -0,0 +1,99 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Test setup derived from: https://github.com/strands-agents/sdk-python/blob/main/tests/fixtures/mocked_model_provider.py +# strands Apache 2.0 license: https://github.com/strands-agents/sdk-python/blob/main/LICENSE + +import json +from typing import TypedDict + +from strands.models import Model + + +class RedactionMessage(TypedDict): + redactedUserContent: str + redactedAssistantContent: str + + +class MockedModelProvider(Model): + """A mock implementation of the Model interface for testing purposes. + + This class simulates a model provider by returning pre-defined agent responses + in sequence. It implements the Model interface methods and provides functionality + to stream mock responses as events. + """ + + def __init__(self, agent_responses): + self.agent_responses = agent_responses + self.index = 0 + + def format_chunk(self, event): + return event + + def format_request(self, messages, tool_specs=None, system_prompt=None): + return None + + def get_config(self): + pass + + def update_config(self, **model_config): + pass + + async def structured_output(self, output_model, prompt, system_prompt=None, **kwargs): + pass + + async def stream(self, messages, tool_specs=None, system_prompt=None): + events = self.map_agent_message_to_events(self.agent_responses[self.index]) + for event in events: + yield event + + self.index += 1 + + def map_agent_message_to_events(self, agent_message): + stop_reason = "end_turn" + yield {"messageStart": {"role": "assistant"}} + if agent_message.get("redactedAssistantContent"): + yield {"redactContent": {"redactUserContentMessage": agent_message["redactedUserContent"]}} + yield {"contentBlockStart": {"start": {}}} + yield {"contentBlockDelta": {"delta": {"text": agent_message["redactedAssistantContent"]}}} + yield {"contentBlockStop": {}} + stop_reason = "guardrail_intervened" + else: + for content in agent_message["content"]: + if "reasoningContent" in content: + yield {"contentBlockStart": {"start": {}}} + yield {"contentBlockDelta": {"delta": {"reasoningContent": content["reasoningContent"]}}} + yield {"contentBlockStop": {}} + if "text" in content: + yield {"contentBlockStart": {"start": {}}} + yield {"contentBlockDelta": {"delta": {"text": content["text"]}}} + yield {"contentBlockStop": {}} + if "toolUse" in content: + stop_reason = "tool_use" + yield { + "contentBlockStart": { + "start": { + "toolUse": { + "name": content["toolUse"]["name"], + "toolUseId": content["toolUse"]["toolUseId"], + } + } + } + } + yield { + "contentBlockDelta": {"delta": {"toolUse": {"input": json.dumps(content["toolUse"]["input"])}}} + } + yield {"contentBlockStop": {}} + + yield {"messageStop": {"stopReason": stop_reason}} diff --git a/tests/mlmodel_strands/conftest.py b/tests/mlmodel_strands/conftest.py new file mode 100644 index 0000000000..b810161f6a --- /dev/null +++ b/tests/mlmodel_strands/conftest.py @@ -0,0 +1,144 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from _mock_model_provider import MockedModelProvider +from testing_support.fixtures import collector_agent_registration_fixture, collector_available_fixture +from testing_support.ml_testing_utils import set_trace_info + +_default_settings = { + "package_reporting.enabled": False, # Turn off package reporting for testing as it causes slowdowns. + "transaction_tracer.explain_threshold": 0.0, + "transaction_tracer.transaction_threshold": 0.0, + "transaction_tracer.stack_trace_threshold": 0.0, + "debug.log_data_collector_payloads": True, + "debug.record_transaction_failure": True, + "ai_monitoring.enabled": True, +} + +collector_agent_registration = collector_agent_registration_fixture( + app_name="Python Agent Test (mlmodel_strands)", default_settings=_default_settings +) + + +@pytest.fixture +def single_tool_model(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Hello"}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model + + +@pytest.fixture +def single_tool_model_error(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + # Set arguments to an invalid type to trigger error in tool + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": 12}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model + + +@pytest.fixture +def multi_tool_model(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Hello"}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling compute_sum tool"}, + {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 5, "b": 3}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Goodbye"}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling compute_sum tool"}, + {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 123, "b": 2}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model + + +@pytest.fixture +def multi_tool_model_error(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Hello"}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling compute_sum tool"}, + {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 5, "b": 3}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Goodbye"}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling compute_sum tool"}, + # Set insufficient arguments to trigger error in tool + {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 123}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model diff --git a/tests/mlmodel_strands/test_simple.py b/tests/mlmodel_strands/test_simple.py new file mode 100644 index 0000000000..ae24003fab --- /dev/null +++ b/tests/mlmodel_strands/test_simple.py @@ -0,0 +1,36 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from strands import Agent, tool + +from newrelic.api.background_task import background_task + + +# Example tool for testing purposes +@tool +def add_exclamation(message: str) -> str: + return f"{message}!" + + +# TODO: Remove this file once all real tests are in place + + +@background_task() +def test_simple_run_agent(set_trace_info, single_tool_model): + set_trace_info() + my_agent = Agent(name="my_agent", model=single_tool_model, tools=[add_exclamation]) + + response = my_agent("Run the tools.") + assert response.message["content"][0]["text"] == "Success!" + assert response.metrics.tool_metrics["add_exclamation"].success_count == 1 diff --git a/tox.ini b/tox.ini index 39148b657f..ace7839db3 100644 --- a/tox.ini +++ b/tox.ini @@ -182,6 +182,7 @@ envlist = python-logger_structlog-{py38,py39,py310,py311,py312,py313,py314,pypy311}-structloglatest, python-mlmodel_autogen-{py310,py311,py312,py313,py314,pypy311}-autogen061, python-mlmodel_autogen-{py310,py311,py312,py313,py314,pypy311}-autogenlatest, + python-mlmodel_strands-{py310,py311,py312,py313}-strandslatest, python-mlmodel_gemini-{py39,py310,py311,py312,py313,py314}, python-mlmodel_langchain-{py39,py310,py311,py312,py313}, ;; Package not ready for Python 3.14 (type annotations not updated) @@ -440,6 +441,8 @@ deps = mlmodel_langchain: faiss-cpu mlmodel_langchain: mock mlmodel_langchain: asyncio + mlmodel_strands: strands-agents[openai] + mlmodel_strands: strands-agents-tools logger_loguru-logurulatest: loguru logger_structlog-structloglatest: structlog messagebroker_pika-pikalatest: pika @@ -510,6 +513,7 @@ changedir = application_celery: tests/application_celery component_djangorestframework: tests/component_djangorestframework component_flask_rest: tests/component_flask_rest + component_graphenedjango: tests/component_graphenedjango component_graphqlserver: tests/component_graphqlserver component_tastypie: tests/component_tastypie coroutines_asyncio: tests/coroutines_asyncio @@ -521,17 +525,17 @@ changedir = datastore_cassandradriver: tests/datastore_cassandradriver datastore_elasticsearch: tests/datastore_elasticsearch datastore_firestore: tests/datastore_firestore - datastore_oracledb: tests/datastore_oracledb datastore_memcache: tests/datastore_memcache + datastore_motor: tests/datastore_motor datastore_mysql: tests/datastore_mysql datastore_mysqldb: tests/datastore_mysqldb + datastore_oracledb: tests/datastore_oracledb datastore_postgresql: tests/datastore_postgresql datastore_psycopg: tests/datastore_psycopg datastore_psycopg2: tests/datastore_psycopg2 datastore_psycopg2cffi: tests/datastore_psycopg2cffi datastore_pylibmc: tests/datastore_pylibmc datastore_pymemcache: tests/datastore_pymemcache - datastore_motor: tests/datastore_motor datastore_pymongo: tests/datastore_pymongo datastore_pymssql: tests/datastore_pymssql datastore_pymysql: tests/datastore_pymysql @@ -539,8 +543,8 @@ changedir = datastore_pysolr: tests/datastore_pysolr datastore_redis: tests/datastore_redis datastore_rediscluster: tests/datastore_rediscluster - datastore_valkey: tests/datastore_valkey datastore_sqlite: tests/datastore_sqlite + datastore_valkey: tests/datastore_valkey external_aiobotocore: tests/external_aiobotocore external_botocore: tests/external_botocore external_feedparser: tests/external_feedparser @@ -561,7 +565,6 @@ changedir = framework_fastapi: tests/framework_fastapi framework_flask: tests/framework_flask framework_graphene: tests/framework_graphene - component_graphenedjango: tests/component_graphenedjango framework_graphql: tests/framework_graphql framework_grpc: tests/framework_grpc framework_pyramid: tests/framework_pyramid @@ -581,6 +584,7 @@ changedir = mlmodel_langchain: tests/mlmodel_langchain mlmodel_openai: tests/mlmodel_openai mlmodel_sklearn: tests/mlmodel_sklearn + mlmodel_strands: tests/mlmodel_strands template_genshi: tests/template_genshi template_jinja2: tests/template_jinja2 template_mako: tests/template_mako From 630f4093150c89f84e8661875c676ce4f750b38e Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Thu, 13 Nov 2025 14:53:41 -0800 Subject: [PATCH 05/34] Add response token count logic to Bedrock instrumentation. (#1504) * Add bedrock token counting. * [MegaLinter] Apply linters fixes * Add bedrock token counting. * Add safeguards when grabbing token counts. * Remove extra None defaults. * Cleanup default None checks. --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> --- newrelic/hooks/external_botocore.py | 260 ++++++++++++++---- .../test_bedrock_chat_completion_converse.py | 53 +--- ...st_bedrock_chat_completion_invoke_model.py | 102 +------ .../test_bedrock_embeddings.py | 43 +-- .../_test_bedrock_chat_completion.py | 30 ++ .../_test_bedrock_embeddings.py | 2 + ...st_bedrock_chat_completion_invoke_model.py | 150 ++++------ .../test_bedrock_embeddings.py | 43 +-- .../test_chat_completion_converse.py | 64 +---- tests/mlmodel_openai/test_embeddings_error.py | 57 +--- 10 files changed, 319 insertions(+), 485 deletions(-) diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py index d8c18b49db..a3da091284 100644 --- a/newrelic/hooks/external_botocore.py +++ b/newrelic/hooks/external_botocore.py @@ -192,6 +192,7 @@ def create_chat_completion_message_event( request_model, request_id, llm_metadata_dict, + all_token_counts, response_id=None, ): if not transaction: @@ -224,6 +225,8 @@ def create_chat_completion_message_event( "vendor": "bedrock", "ingest_source": "Python", } + if all_token_counts: + chat_completion_message_dict["token_count"] = 0 if settings.ai_monitoring.record_content.enabled: chat_completion_message_dict["content"] = content @@ -263,6 +266,8 @@ def create_chat_completion_message_event( "ingest_source": "Python", "is_response": True, } + if all_token_counts: + chat_completion_message_dict["token_count"] = 0 if settings.ai_monitoring.record_content.enabled: chat_completion_message_dict["content"] = content @@ -272,24 +277,21 @@ def create_chat_completion_message_event( transaction.record_custom_event("LlmChatCompletionMessage", chat_completion_message_dict) -def extract_bedrock_titan_text_model_request(request_body, bedrock_attrs): +def extract_bedrock_titan_embedding_model_request(request_body, bedrock_attrs): request_body = json.loads(request_body) - request_config = request_body.get("textGenerationConfig", {}) - input_message_list = [{"role": "user", "content": request_body.get("inputText")}] - - bedrock_attrs["input_message_list"] = input_message_list - bedrock_attrs["request.max_tokens"] = request_config.get("maxTokenCount") - bedrock_attrs["request.temperature"] = request_config.get("temperature") + bedrock_attrs["input"] = request_body.get("inputText") return bedrock_attrs -def extract_bedrock_mistral_text_model_request(request_body, bedrock_attrs): - request_body = json.loads(request_body) - bedrock_attrs["input_message_list"] = [{"role": "user", "content": request_body.get("prompt")}] - bedrock_attrs["request.max_tokens"] = request_body.get("max_tokens") - bedrock_attrs["request.temperature"] = request_body.get("temperature") +def extract_bedrock_titan_embedding_model_response(response_body, bedrock_attrs): + if response_body: + response_body = json.loads(response_body) + + input_tokens = response_body.get("inputTextTokenCount", 0) + bedrock_attrs["response.usage.total_tokens"] = input_tokens + return bedrock_attrs @@ -297,16 +299,31 @@ def extract_bedrock_titan_text_model_response(response_body, bedrock_attrs): if response_body: response_body = json.loads(response_body) + input_tokens = response_body.get("inputTextTokenCount", 0) + completion_tokens = sum(result.get("tokenCount", 0) for result in response_body.get("results", [])) + total_tokens = input_tokens + completion_tokens + output_message_list = [ - {"role": "assistant", "content": result["outputText"]} for result in response_body.get("results", []) + {"role": "assistant", "content": result.get("outputText")} for result in response_body.get("results", []) ] bedrock_attrs["response.choices.finish_reason"] = response_body["results"][0]["completionReason"] + bedrock_attrs["response.usage.completion_tokens"] = completion_tokens + bedrock_attrs["response.usage.prompt_tokens"] = input_tokens + bedrock_attrs["response.usage.total_tokens"] = total_tokens bedrock_attrs["output_message_list"] = output_message_list return bedrock_attrs +def extract_bedrock_mistral_text_model_request(request_body, bedrock_attrs): + request_body = json.loads(request_body) + bedrock_attrs["input_message_list"] = [{"role": "user", "content": request_body.get("prompt")}] + bedrock_attrs["request.max_tokens"] = request_body.get("max_tokens") + bedrock_attrs["request.temperature"] = request_body.get("temperature") + return bedrock_attrs + + def extract_bedrock_mistral_text_model_response(response_body, bedrock_attrs): if response_body: response_body = json.loads(response_body) @@ -319,17 +336,6 @@ def extract_bedrock_mistral_text_model_response(response_body, bedrock_attrs): return bedrock_attrs -def extract_bedrock_titan_text_model_streaming_response(response_body, bedrock_attrs): - if response_body: - if "outputText" in response_body: - bedrock_attrs["output_message_list"] = messages = bedrock_attrs.get("output_message_list", []) - messages.append({"role": "assistant", "content": response_body["outputText"]}) - - bedrock_attrs["response.choices.finish_reason"] = response_body.get("completionReason", None) - - return bedrock_attrs - - def extract_bedrock_mistral_text_model_streaming_response(response_body, bedrock_attrs): if response_body: outputs = response_body.get("outputs") @@ -338,14 +344,46 @@ def extract_bedrock_mistral_text_model_streaming_response(response_body, bedrock "output_message_list", [{"role": "assistant", "content": ""}] ) bedrock_attrs["output_message_list"][0]["content"] += outputs[0].get("text", "") - bedrock_attrs["response.choices.finish_reason"] = outputs[0].get("stop_reason", None) + bedrock_attrs["response.choices.finish_reason"] = outputs[0].get("stop_reason") return bedrock_attrs -def extract_bedrock_titan_embedding_model_request(request_body, bedrock_attrs): +def extract_bedrock_titan_text_model_request(request_body, bedrock_attrs): request_body = json.loads(request_body) + request_config = request_body.get("textGenerationConfig", {}) - bedrock_attrs["input"] = request_body.get("inputText") + input_message_list = [{"role": "user", "content": request_body.get("inputText")}] + + bedrock_attrs["input_message_list"] = input_message_list + bedrock_attrs["request.max_tokens"] = request_config.get("maxTokenCount") + bedrock_attrs["request.temperature"] = request_config.get("temperature") + + return bedrock_attrs + + +def extract_bedrock_titan_text_model_streaming_response(response_body, bedrock_attrs): + if response_body: + if "outputText" in response_body: + bedrock_attrs["output_message_list"] = messages = bedrock_attrs.get("output_message_list", []) + messages.append({"role": "assistant", "content": response_body["outputText"]}) + + bedrock_attrs["response.choices.finish_reason"] = response_body.get("completionReason") + + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) return bedrock_attrs @@ -409,6 +447,17 @@ def extract_bedrock_claude_model_response(response_body, bedrock_attrs): output_message_list = [{"role": role, "content": content}] bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") bedrock_attrs["output_message_list"] = output_message_list + bedrock_attrs[""] = str(response_body.get("id")) + + # Extract token information + token_usage = response_body.get("usage", {}) + if token_usage: + prompt_tokens = token_usage.get("input_tokens", 0) + completion_tokens = token_usage.get("output_tokens", 0) + total_tokens = prompt_tokens + completion_tokens + bedrock_attrs["response.usage.prompt_tokens"] = prompt_tokens + bedrock_attrs["response.usage.completion_tokens"] = completion_tokens + bedrock_attrs["response.usage.total_tokens"] = total_tokens return bedrock_attrs @@ -420,6 +469,23 @@ def extract_bedrock_claude_model_streaming_response(response_body, bedrock_attrs bedrock_attrs["output_message_list"] = [{"role": "assistant", "content": ""}] bedrock_attrs["output_message_list"][0]["content"] += content bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") + + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) + return bedrock_attrs @@ -440,6 +506,13 @@ def extract_bedrock_llama_model_response(response_body, bedrock_attrs): response_body = json.loads(response_body) output_message_list = [{"role": "assistant", "content": response_body.get("generation")}] + prompt_tokens = response_body.get("prompt_token_count", 0) + completion_tokens = response_body.get("generation_token_count", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = completion_tokens + bedrock_attrs["response.usage.prompt_tokens"] = prompt_tokens + bedrock_attrs["response.usage.total_tokens"] = total_tokens bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") bedrock_attrs["output_message_list"] = output_message_list @@ -453,6 +526,22 @@ def extract_bedrock_llama_model_streaming_response(response_body, bedrock_attrs) bedrock_attrs["output_message_list"] = [{"role": "assistant", "content": ""}] bedrock_attrs["output_message_list"][0]["content"] += content bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") + + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) return bedrock_attrs @@ -493,12 +582,33 @@ def extract_bedrock_cohere_model_streaming_response(response_body, bedrock_attrs bedrock_attrs["response.choices.finish_reason"] = response_body["generations"][0]["finish_reason"] bedrock_attrs["response_id"] = str(response_body.get("id")) + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) + return bedrock_attrs NULL_EXTRACTOR = lambda *args: {} # noqa: E731 # Empty extractor that returns nothing MODEL_EXTRACTORS = [ # Order is important here, avoiding dictionaries - ("amazon.titan-embed", extract_bedrock_titan_embedding_model_request, NULL_EXTRACTOR, NULL_EXTRACTOR), + ( + "amazon.titan-embed", + extract_bedrock_titan_embedding_model_request, + extract_bedrock_titan_embedding_model_response, + NULL_EXTRACTOR, + ), ("cohere.embed", extract_bedrock_cohere_embedding_model_request, NULL_EXTRACTOR, NULL_EXTRACTOR), ( "amazon.titan", @@ -550,8 +660,8 @@ def handle_bedrock_exception( input_message_list = [] bedrock_attrs["input_message_list"] = input_message_list - bedrock_attrs["request.max_tokens"] = kwargs.get("inferenceConfig", {}).get("maxTokens", None) - bedrock_attrs["request.temperature"] = kwargs.get("inferenceConfig", {}).get("temperature", None) + bedrock_attrs["request.max_tokens"] = kwargs.get("inferenceConfig", {}).get("maxTokens") + bedrock_attrs["request.temperature"] = kwargs.get("inferenceConfig", {}).get("temperature") try: request_extractor(request_body, bedrock_attrs) @@ -801,6 +911,7 @@ def _wrap_bedrock_runtime_converse(wrapped, instance, args, kwargs): try: # For aioboto3 clients, this will call make_api_call instrumentation in external_aiobotocore response = wrapped(*args, **kwargs) + except Exception as exc: handle_bedrock_exception( exc, False, model, span_id, trace_id, request_extractor, {}, ft, transaction, kwargs, is_converse=True @@ -848,6 +959,10 @@ def extract_bedrock_converse_attrs(kwargs, response, response_headers, model, sp for result in response.get("output").get("message").get("content", []) ] + response_prompt_tokens = response.get("usage", {}).get("inputTokens") if response else None + response_completion_tokens = response.get("usage", {}).get("outputTokens") if response else None + response_total_tokens = response.get("usage", {}).get("totalTokens") if response else None + bedrock_attrs = { "request_id": response_headers.get("x-amzn-requestid"), "model": model, @@ -855,9 +970,12 @@ def extract_bedrock_converse_attrs(kwargs, response, response_headers, model, sp "trace_id": trace_id, "response.choices.finish_reason": response.get("stopReason"), "output_message_list": output_message_list, - "request.max_tokens": kwargs.get("inferenceConfig", {}).get("maxTokens", None), - "request.temperature": kwargs.get("inferenceConfig", {}).get("temperature", None), + "request.max_tokens": kwargs.get("inferenceConfig", {}).get("maxTokens"), + "request.temperature": kwargs.get("inferenceConfig", {}).get("temperature"), "input_message_list": input_message_list, + "response.usage.prompt_tokens": response_prompt_tokens, + "response.usage.completion_tokens": response_completion_tokens, + "response.usage.total_tokens": response_total_tokens, } return bedrock_attrs @@ -1008,29 +1126,34 @@ def handle_embedding_event(transaction, bedrock_attrs): custom_attrs_dict = transaction._custom_params llm_metadata_dict = {key: value for key, value in custom_attrs_dict.items() if key.startswith("llm.")} - span_id = bedrock_attrs.get("span_id", None) - trace_id = bedrock_attrs.get("trace_id", None) - request_id = bedrock_attrs.get("request_id", None) - model = bedrock_attrs.get("model", None) + span_id = bedrock_attrs.get("span_id") + trace_id = bedrock_attrs.get("trace_id") + request_id = bedrock_attrs.get("request_id") + model = bedrock_attrs.get("model") input_ = bedrock_attrs.get("input") + response_total_tokens = bedrock_attrs.get("response.usage.total_tokens") + + total_tokens = ( + settings.ai_monitoring.llm_token_count_callback(model, input_) + if settings.ai_monitoring.llm_token_count_callback and input_ + else response_total_tokens + ) + embedding_dict = { "vendor": "bedrock", "ingest_source": "Python", "id": embedding_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(model, input_) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "request_id": request_id, - "duration": bedrock_attrs.get("duration", None), + "duration": bedrock_attrs.get("duration"), "request.model": model, "response.model": model, - "error": bedrock_attrs.get("error", None), + "response.usage.total_tokens": total_tokens, + "error": bedrock_attrs.get("error"), } + embedding_dict.update(llm_metadata_dict) if settings.ai_monitoring.record_content.enabled: @@ -1041,6 +1164,7 @@ def handle_embedding_event(transaction, bedrock_attrs): def handle_chat_completion_event(transaction, bedrock_attrs): + settings = transaction.settings or global_settings() chat_completion_id = str(uuid.uuid4()) # Grab LLM-related custom attributes off of the transaction to store as metadata on LLM events custom_attrs_dict = transaction._custom_params @@ -1049,11 +1173,15 @@ def handle_chat_completion_event(transaction, bedrock_attrs): llm_context_attrs = getattr(transaction, "_llm_context_attrs", None) if llm_context_attrs: llm_metadata_dict.update(llm_context_attrs) - span_id = bedrock_attrs.get("span_id", None) - trace_id = bedrock_attrs.get("trace_id", None) - request_id = bedrock_attrs.get("request_id", None) - response_id = bedrock_attrs.get("response_id", None) - model = bedrock_attrs.get("model", None) + span_id = bedrock_attrs.get("span_id") + trace_id = bedrock_attrs.get("trace_id") + request_id = bedrock_attrs.get("request_id") + response_id = bedrock_attrs.get("response_id") + model = bedrock_attrs.get("model") + + response_prompt_tokens = bedrock_attrs.get("response.usage.prompt_tokens") + response_completion_tokens = bedrock_attrs.get("response.usage.completion_tokens") + response_total_tokens = bedrock_attrs.get("response.usage.total_tokens") input_message_list = bedrock_attrs.get("input_message_list", []) output_message_list = bedrock_attrs.get("output_message_list", []) @@ -1061,6 +1189,25 @@ def handle_chat_completion_event(transaction, bedrock_attrs): len(input_message_list) + len(output_message_list) ) or None # If 0, attribute will be set to None and removed + input_message_content = " ".join([msg.get("content") for msg in input_message_list if msg.get("content")]) + prompt_tokens = ( + settings.ai_monitoring.llm_token_count_callback(model, input_message_content) + if settings.ai_monitoring.llm_token_count_callback and input_message_content + else response_prompt_tokens + ) + + output_message_content = " ".join([msg.get("content") for msg in output_message_list if msg.get("content")]) + completion_tokens = ( + settings.ai_monitoring.llm_token_count_callback(model, output_message_content) + if settings.ai_monitoring.llm_token_count_callback and output_message_content + else response_completion_tokens + ) + total_tokens = ( + prompt_tokens + completion_tokens if all([prompt_tokens, completion_tokens]) else response_total_tokens + ) + + all_token_counts = bool(prompt_tokens and completion_tokens and total_tokens) + chat_completion_summary_dict = { "vendor": "bedrock", "ingest_source": "Python", @@ -1069,15 +1216,21 @@ def handle_chat_completion_event(transaction, bedrock_attrs): "trace_id": trace_id, "request_id": request_id, "response_id": response_id, - "duration": bedrock_attrs.get("duration", None), - "request.max_tokens": bedrock_attrs.get("request.max_tokens", None), - "request.temperature": bedrock_attrs.get("request.temperature", None), + "duration": bedrock_attrs.get("duration"), + "request.max_tokens": bedrock_attrs.get("request.max_tokens"), + "request.temperature": bedrock_attrs.get("request.temperature"), "request.model": model, "response.model": model, # Duplicate data required by the UI "response.number_of_messages": number_of_messages, - "response.choices.finish_reason": bedrock_attrs.get("response.choices.finish_reason", None), - "error": bedrock_attrs.get("error", None), + "response.choices.finish_reason": bedrock_attrs.get("response.choices.finish_reason"), + "error": bedrock_attrs.get("error"), } + + if all_token_counts: + chat_completion_summary_dict["response.usage.prompt_tokens"] = prompt_tokens + chat_completion_summary_dict["response.usage.completion_tokens"] = completion_tokens + chat_completion_summary_dict["response.usage.total_tokens"] = total_tokens + chat_completion_summary_dict.update(llm_metadata_dict) chat_completion_summary_dict = {k: v for k, v in chat_completion_summary_dict.items() if v is not None} transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict) @@ -1092,6 +1245,7 @@ def handle_chat_completion_event(transaction, bedrock_attrs): request_model=model, request_id=request_id, llm_metadata_dict=llm_metadata_dict, + all_token_counts=all_token_counts, response_id=response_id, ) diff --git a/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py b/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py index da9c5818e7..87dfa1f1b6 100644 --- a/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py +++ b/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py @@ -17,7 +17,7 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -49,6 +49,9 @@ "duration": None, # Response time varies each test run "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "response.usage.prompt_tokens": 26, + "response.usage.completion_tokens": 100, + "response.usage.total_tokens": 126, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "max_tokens", @@ -70,6 +73,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -88,6 +92,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -106,6 +111,7 @@ "role": "assistant", "completion_id": None, "sequence": 2, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -189,7 +195,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model): - @validate_custom_events(add_token_count_to_events(chat_completion_expected_events)) + @validate_custom_events(add_token_counts_to_chat_events(chat_completion_expected_events)) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -476,46 +482,3 @@ def _test(): converse_invalid_model(loop, bedrock_converse_server) _test() - - -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_converse_server, loop, set_trace_info -): - """ - A request is made to the server with invalid credentials. botocore will reach out to the server and receive an - UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer - events. The error response can also be parsed, and will be included as attributes on the recorded exception. - """ - - @validate_custom_events(add_token_count_to_events(chat_completion_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion_incorrect_access_key_with_token_count", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion_incorrect_access_key_with_token_count") - def _test(): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - converse_incorrect_access_key(loop, bedrock_converse_server, monkeypatch) - - _test() diff --git a/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py b/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py index 65cb276c77..e3a897d0c8 100644 --- a/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py +++ b/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py @@ -34,7 +34,8 @@ ) from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -206,7 +207,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_events, expected_metrics): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_counts_to_chat_events(add_token_count_streaming_events(expected_events))) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=3) @validate_transaction_metrics( @@ -455,51 +456,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token( - monkeypatch, - bedrock_server, - exercise_model, - set_trace_info, - expected_invalid_access_key_error_events, - expected_metrics, -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=expected_metrics, - rollup_metrics=expected_metrics, - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100) - - _test() - - def invoke_model_malformed_request_body(loop, bedrock_server, response_streaming): async def _coro(): with pytest.raises(_client_error): @@ -798,58 +754,6 @@ async def _test(): loop.run_until_complete(_test()) -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_expected_streaming_error_events)) -@validate_custom_event_count(count=2) -@validate_error_trace_attributes( - _event_stream_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "error.message": "Malformed input request, please reformat your input and try again.", - "error.code": "ValidationException", - }, - }, - forgone_params={"agent": (), "intrinsic": (), "user": ("http.statusCode")}, -) -@validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - rollup_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, -) -@background_task(name="test_bedrock_chat_completion") -def test_bedrock_chat_completion_error_streaming_exception_with_token_count(loop, bedrock_server, set_trace_info): - """ - Duplicate of test_bedrock_chat_completion_error_streaming_exception, but with token callback being set. - - See the original test for a description of the error case. - """ - - async def _test(): - with pytest.raises(_event_stream_error): - model = "amazon.titan-text-express-v1" - body = (chat_completion_payload_templates[model] % ("Streaming Exception", 0.7, 100)).encode("utf-8") - - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - response = await bedrock_server.invoke_model_with_response_stream( - body=body, modelId=model, accept="application/json", contentType="application/json" - ) - - body = response.get("body") - async for resp in body: - assert resp - - loop.run_until_complete(_test()) - - def test_bedrock_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility(bedrock_server): assert bedrock_server._nr_wrapped diff --git a/tests/external_aiobotocore/test_bedrock_embeddings.py b/tests/external_aiobotocore/test_bedrock_embeddings.py index 96b930feb5..dacfbb4eed 100644 --- a/tests/external_aiobotocore/test_bedrock_embeddings.py +++ b/tests/external_aiobotocore/test_bedrock_embeddings.py @@ -27,7 +27,7 @@ ) from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -164,7 +164,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_embedding_with_token_count(set_trace_info, exercise_model, expected_events): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_count_to_embedding_events(expected_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_bedrock_embedding", @@ -289,45 +289,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_embedding_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_server, exercise_model, set_trace_info, expected_invalid_access_key_error_events -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_embedding", - scoped_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - rollup_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_embedding") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token") - - _test() - - @reset_core_stats_engine() @validate_custom_events(embedding_expected_malformed_request_body_events) @validate_custom_event_count(count=1) diff --git a/tests/external_botocore/_test_bedrock_chat_completion.py b/tests/external_botocore/_test_bedrock_chat_completion.py index 155b6c993c..6b65af8cb2 100644 --- a/tests/external_botocore/_test_bedrock_chat_completion.py +++ b/tests/external_botocore/_test_bedrock_chat_completion.py @@ -97,6 +97,9 @@ "duration": None, # Response time varies each test run "request.model": "amazon.titan-text-express-v1", "response.model": "amazon.titan-text-express-v1", + "response.usage.completion_tokens": 32, + "response.usage.total_tokens": 44, + "response.usage.prompt_tokens": 12, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "FINISH", @@ -118,6 +121,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -136,6 +140,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -335,6 +340,9 @@ "duration": None, # Response time varies each test run "request.model": "meta.llama2-13b-chat-v1", "response.model": "meta.llama2-13b-chat-v1", + "response.usage.prompt_tokens": 17, + "response.usage.completion_tokens": 69, + "response.usage.total_tokens": 86, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "stop", @@ -356,6 +364,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -374,6 +383,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -919,6 +929,9 @@ "duration": None, # Response time varies each test run "request.model": "amazon.titan-text-express-v1", "response.model": "amazon.titan-text-express-v1", + "response.usage.completion_tokens": 35, + "response.usage.total_tokens": 47, + "response.usage.prompt_tokens": 12, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "FINISH", @@ -940,6 +953,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -958,6 +972,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -978,6 +993,9 @@ "duration": None, # Response time varies each test run "request.model": "anthropic.claude-instant-v1", "response.model": "anthropic.claude-instant-v1", + "response.usage.completion_tokens": 99, + "response.usage.prompt_tokens": 19, + "response.usage.total_tokens": 118, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "stop_sequence", @@ -999,6 +1017,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "anthropic.claude-instant-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1017,6 +1036,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "anthropic.claude-instant-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1038,6 +1058,9 @@ "duration": None, # Response time varies each test run "request.model": "cohere.command-text-v14", "response.model": "cohere.command-text-v14", + "response.usage.completion_tokens": 91, + "response.usage.total_tokens": 100, + "response.usage.prompt_tokens": 9, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "COMPLETE", @@ -1059,6 +1082,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "cohere.command-text-v14", "vendor": "bedrock", "ingest_source": "Python", @@ -1077,6 +1101,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "cohere.command-text-v14", "vendor": "bedrock", "ingest_source": "Python", @@ -1097,6 +1122,9 @@ "duration": None, # Response time varies each test run "request.model": "meta.llama2-13b-chat-v1", "response.model": "meta.llama2-13b-chat-v1", + "response.usage.prompt_tokens": 17, + "response.usage.completion_tokens": 100, + "response.usage.total_tokens": 117, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "length", @@ -1118,6 +1146,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1136,6 +1165,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", diff --git a/tests/external_botocore/_test_bedrock_embeddings.py b/tests/external_botocore/_test_bedrock_embeddings.py index f5c227b9c3..af544af001 100644 --- a/tests/external_botocore/_test_bedrock_embeddings.py +++ b/tests/external_botocore/_test_bedrock_embeddings.py @@ -33,6 +33,7 @@ "response.model": "amazon.titan-embed-text-v1", "request.model": "amazon.titan-embed-text-v1", "request_id": "11233989-07e8-4ecb-9ba6-79601ba6d8cc", + "response.usage.total_tokens": 6, "vendor": "bedrock", "ingest_source": "Python", }, @@ -52,6 +53,7 @@ "response.model": "amazon.titan-embed-g1-text-02", "request.model": "amazon.titan-embed-g1-text-02", "request_id": "b10ac895-eae3-4f07-b926-10b2866c55ed", + "response.usage.total_tokens": 6, "vendor": "bedrock", "ingest_source": "Python", }, diff --git a/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py b/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py index 94a88e7a56..7a471b950e 100644 --- a/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py +++ b/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import json import os from io import BytesIO @@ -35,7 +36,8 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -129,6 +131,14 @@ def expected_events(model_id, response_streaming): return chat_completion_expected_events[model_id] +@pytest.fixture(scope="module") +def expected_events(model_id, response_streaming): + if response_streaming: + return chat_completion_streaming_expected_events[model_id] + else: + return chat_completion_expected_events[model_id] + + @pytest.fixture(scope="module") def expected_metrics(response_streaming): if response_streaming: @@ -200,7 +210,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_events, expected_metrics): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_counts_to_chat_events(add_token_count_streaming_events(expected_events))) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=3) @validate_transaction_metrics( @@ -438,49 +448,50 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token( - monkeypatch, - bedrock_server, - exercise_model, - set_trace_info, - expected_invalid_access_key_error_events, - expected_metrics, -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=expected_metrics, - rollup_metrics=expected_metrics, - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100) - - _test() +# +# @reset_core_stats_engine() +# @override_llm_token_callback_settings(llm_token_count_callback) +# def test_bedrock_chat_completion_error_incorrect_access_key_with_token( +# monkeypatch, +# bedrock_server, +# exercise_model, +# set_trace_info, +# expected_invalid_access_key_error_events, +# expected_metrics, +# ): +# @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) +# @validate_error_trace_attributes( +# _client_error_name, +# exact_attrs={ +# "agent": {}, +# "intrinsic": {}, +# "user": { +# "http.statusCode": 403, +# "error.message": "The security token included in the request is invalid.", +# "error.code": "UnrecognizedClientException", +# }, +# }, +# ) +# @validate_transaction_metrics( +# name="test_bedrock_chat_completion", +# scoped_metrics=expected_metrics, +# rollup_metrics=expected_metrics, +# custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], +# background_task=True, +# ) +# @background_task(name="test_bedrock_chat_completion") +# def _test(): +# monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") +# +# with pytest.raises(_client_error): # not sure where this exception actually comes from +# set_trace_info() +# add_custom_attribute("llm.conversation_id", "my-awesome-id") +# add_custom_attribute("llm.foo", "bar") +# add_custom_attribute("non_llm_attr", "python-agent") +# +# exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100) +# +# _test() @reset_core_stats_engine() @@ -762,55 +773,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_streaming_exception_with_token_count(bedrock_server, set_trace_info): - """ - Duplicate of test_bedrock_chat_completion_error_streaming_exception, but with token callback being set. - - See the original test for a description of the error case. - """ - - @validate_custom_events(add_token_count_to_events(chat_completion_expected_streaming_error_events)) - @validate_custom_event_count(count=2) - @validate_error_trace_attributes( - _event_stream_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "error.message": "Malformed input request, please reformat your input and try again.", - "error.code": "ValidationException", - }, - }, - forgone_params={"agent": (), "intrinsic": (), "user": ("http.statusCode")}, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - rollup_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion") - def _test(): - with pytest.raises(_event_stream_error): - model = "amazon.titan-text-express-v1" - body = (chat_completion_payload_templates[model] % ("Streaming Exception", 0.7, 100)).encode("utf-8") - - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - response = bedrock_server.invoke_model_with_response_stream( - body=body, modelId=model, accept="application/json", contentType="application/json" - ) - list(response["body"]) # Iterate - - _test() - - def test_bedrock_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility(bedrock_server): assert bedrock_server._nr_wrapped diff --git a/tests/external_botocore/test_bedrock_embeddings.py b/tests/external_botocore/test_bedrock_embeddings.py index 417e24b2d9..de2cb201e7 100644 --- a/tests/external_botocore/test_bedrock_embeddings.py +++ b/tests/external_botocore/test_bedrock_embeddings.py @@ -28,7 +28,7 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -161,7 +161,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_embedding_with_token_count(set_trace_info, exercise_model, expected_events): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_count_to_embedding_events(expected_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_bedrock_embedding", @@ -286,45 +286,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_embedding_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_server, exercise_model, set_trace_info, expected_invalid_access_key_error_events -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_embedding", - scoped_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - rollup_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_embedding") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token") - - _test() - - @reset_core_stats_engine() def test_bedrock_embedding_error_malformed_request_body(bedrock_server, set_trace_info): """ diff --git a/tests/external_botocore/test_chat_completion_converse.py b/tests/external_botocore/test_chat_completion_converse.py index 96ead41dd7..2d38d6b4a4 100644 --- a/tests/external_botocore/test_chat_completion_converse.py +++ b/tests/external_botocore/test_chat_completion_converse.py @@ -17,7 +17,7 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -49,6 +49,9 @@ "duration": None, # Response time varies each test run "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "response.usage.prompt_tokens": 26, + "response.usage.completion_tokens": 100, + "response.usage.total_tokens": 126, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "max_tokens", @@ -70,6 +73,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -88,6 +92,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -106,6 +111,7 @@ "role": "assistant", "completion_id": None, "sequence": 2, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -185,7 +191,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model): - @validate_custom_events(add_token_count_to_events(chat_completion_expected_events)) + @validate_custom_events(add_token_counts_to_chat_events(chat_completion_expected_events)) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -468,57 +474,3 @@ def _test(): assert response _test() - - -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_converse_server, exercise_model, set_trace_info -): - """ - A request is made to the server with invalid credentials. botocore will reach out to the server and receive an - UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer - events. The error response can also be parsed, and will be included as attributes on the recorded exception. - """ - - @validate_custom_events(add_token_count_to_events(chat_completion_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion_incorrect_access_key_with_token_count", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion_incorrect_access_key_with_token_count") - def _test(): - monkeypatch.setattr(bedrock_converse_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - message = [{"role": "user", "content": [{"text": "Invalid Token"}]}] - - response = bedrock_converse_server.converse( - modelId="anthropic.claude-3-sonnet-20240229-v1:0", - messages=message, - inferenceConfig={"temperature": 0.7, "maxTokens": 100}, - ) - - assert response - - _test() diff --git a/tests/mlmodel_openai/test_embeddings_error.py b/tests/mlmodel_openai/test_embeddings_error.py index a8e46bf23a..f80e6ff41d 100644 --- a/tests/mlmodel_openai/test_embeddings_error.py +++ b/tests/mlmodel_openai/test_embeddings_error.py @@ -14,12 +14,10 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -128,35 +126,6 @@ def test_embeddings_invalid_request_error_no_model_no_content(set_trace_info): ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={ - "error.message": "The model `does-not-exist` does not exist" - # "http.statusCode": 404, - } -) -@validate_transaction_metrics( - name="test_embeddings_error:test_embeddings_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count(set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - openai.Embedding.create(input="Model does not exist.", model="does-not-exist") - - # Invalid model provided @dt_enabled @reset_core_stats_engine() @@ -348,30 +317,6 @@ def test_embeddings_invalid_request_error_no_model_async_no_content(loop, set_tr ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - name="test_embeddings_error:test_embeddings_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/embedding/OpenAI/acreate", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/acreate", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count_async(set_trace_info, loop): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - loop.run_until_complete(openai.Embedding.acreate(input="Model does not exist.", model="does-not-exist")) - - # Invalid model provided @dt_enabled @reset_core_stats_engine() From e3b91062ecc3d380e157de229b524b7706c9709d Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Thu, 13 Nov 2025 14:54:05 -0800 Subject: [PATCH 06/34] Add Strands tools and agents instrumentation. (#1563) * Add baseline instrumentation. * Add tool and agent instrumentation. * Add tests file. * Cleanup instrumentation. * Cleanup. Co-authored-by: Tim Pansino * [MegaLinter] Apply linters fixes * Strands Mock Model (#1551) * Add strands to tox.ini * Add mock models for strands testing * Add simple test file to validate strands mocking * Add baseline instrumentation. * Add tool and agent instrumentation. * Add tests file. * Cleanup instrumentation. * Cleanup. Co-authored-by: Tim Pansino * Handle additional args in mock model. * Add test to force exception and exercise _handle_tool_streaming_completion_error. * Strands Mock Model (#1551) * Add strands to tox.ini * Add mock models for strands testing * Add simple test file to validate strands mocking * Add baseline instrumentation. * Add tool and agent instrumentation. * Add tests file. * Cleanup instrumentation. * Cleanup. Co-authored-by: Tim Pansino * Handle additional args in mock model. * Strands Mock Model (#1551) * Add strands to tox.ini * Add mock models for strands testing * Add simple test file to validate strands mocking * Add baseline instrumentation. * Add tool and agent instrumentation. * Cleanup. Co-authored-by: Tim Pansino * [MegaLinter] Apply linters fixes * Add test to force exception and exercise _handle_tool_streaming_completion_error. * Implement strands context passing instrumentation. * Address review feedback. * [MegaLinter] Apply linters fixes * Remove test_simple.py file. --------- Co-authored-by: Tim Pansino Co-authored-by: Timothy Pansino <11214426+TimPansino@users.noreply.github.com> Co-authored-by: Tim Pansino --- newrelic/api/error_trace.py | 29 +- newrelic/common/llm_utils.py | 24 + newrelic/config.py | 7 + newrelic/hooks/mlmodel_strands.py | 492 ++++++++++++++++++ tests/mlmodel_strands/_mock_model_provider.py | 4 +- tests/mlmodel_strands/conftest.py | 25 +- tests/mlmodel_strands/test_agent.py | 427 +++++++++++++++ tests/mlmodel_strands/test_simple.py | 36 -- tests/testing_support/fixtures.py | 2 +- .../validators/validate_custom_event.py | 4 +- .../validate_error_event_collector_json.py | 2 +- .../validate_transaction_error_event_count.py | 4 +- 12 files changed, 1004 insertions(+), 52 deletions(-) create mode 100644 newrelic/common/llm_utils.py create mode 100644 newrelic/hooks/mlmodel_strands.py create mode 100644 tests/mlmodel_strands/test_agent.py delete mode 100644 tests/mlmodel_strands/test_simple.py diff --git a/newrelic/api/error_trace.py b/newrelic/api/error_trace.py index db63c54316..aaa12b50e3 100644 --- a/newrelic/api/error_trace.py +++ b/newrelic/api/error_trace.py @@ -15,6 +15,7 @@ import functools from newrelic.api.time_trace import current_trace, notice_error +from newrelic.common.async_wrapper import async_wrapper as get_async_wrapper from newrelic.common.object_wrapper import FunctionWrapper, wrap_object @@ -43,17 +44,31 @@ def __exit__(self, exc, value, tb): ) -def ErrorTraceWrapper(wrapped, ignore=None, expected=None, status_code=None): - def wrapper(wrapped, instance, args, kwargs): - parent = current_trace() +def ErrorTraceWrapper(wrapped, ignore=None, expected=None, status_code=None, async_wrapper=None): + def literal_wrapper(wrapped, instance, args, kwargs): + # Determine if the wrapped function is async or sync + wrapper = async_wrapper if async_wrapper is not None else get_async_wrapper(wrapped) + # Sync function path + if not wrapper: + parent = current_trace() + if not parent: + # No active tracing context so just call the wrapped function directly + return wrapped(*args, **kwargs) + # Async function path + else: + # For async functions, the async wrapper will handle trace context propagation + parent = None - if parent is None: - return wrapped(*args, **kwargs) + trace = ErrorTrace(ignore, expected, status_code, parent=parent) + + if wrapper: + # The async wrapper handles the context management for us + return wrapper(wrapped, trace)(*args, **kwargs) - with ErrorTrace(ignore, expected, status_code, parent=parent): + with trace: return wrapped(*args, **kwargs) - return FunctionWrapper(wrapped, wrapper) + return FunctionWrapper(wrapped, literal_wrapper) def error_trace(ignore=None, expected=None, status_code=None): diff --git a/newrelic/common/llm_utils.py b/newrelic/common/llm_utils.py new file mode 100644 index 0000000000..eebdacfc7f --- /dev/null +++ b/newrelic/common/llm_utils.py @@ -0,0 +1,24 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def _get_llm_metadata(transaction): + # Grab LLM-related custom attributes off of the transaction to store as metadata on LLM events + custom_attrs_dict = transaction._custom_params + llm_metadata_dict = {key: value for key, value in custom_attrs_dict.items() if key.startswith("llm.")} + llm_context_attrs = getattr(transaction, "_llm_context_attrs", None) + if llm_context_attrs: + llm_metadata_dict.update(llm_context_attrs) + + return llm_metadata_dict diff --git a/newrelic/config.py b/newrelic/config.py index 21ce996f6c..ff2d85e359 100644 --- a/newrelic/config.py +++ b/newrelic/config.py @@ -2946,6 +2946,13 @@ def _process_module_builtin_defaults(): "newrelic.hooks.mlmodel_autogen", "instrument_autogen_agentchat_agents__assistant_agent", ) + _process_module_definition("strands.agent.agent", "newrelic.hooks.mlmodel_strands", "instrument_agent_agent") + _process_module_definition( + "strands.tools.executors._executor", "newrelic.hooks.mlmodel_strands", "instrument_tools_executors__executor" + ) + _process_module_definition("strands.tools.registry", "newrelic.hooks.mlmodel_strands", "instrument_tools_registry") + _process_module_definition("strands.models.bedrock", "newrelic.hooks.mlmodel_strands", "instrument_models_bedrock") + _process_module_definition("mcp.client.session", "newrelic.hooks.adapter_mcp", "instrument_mcp_client_session") _process_module_definition( "mcp.server.fastmcp.tools.tool_manager", diff --git a/newrelic/hooks/mlmodel_strands.py b/newrelic/hooks/mlmodel_strands.py new file mode 100644 index 0000000000..bf849fd717 --- /dev/null +++ b/newrelic/hooks/mlmodel_strands.py @@ -0,0 +1,492 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import sys +import uuid + +from newrelic.api.error_trace import ErrorTraceWrapper +from newrelic.api.function_trace import FunctionTrace +from newrelic.api.time_trace import current_trace, get_trace_linking_metadata +from newrelic.api.transaction import current_transaction +from newrelic.common.llm_utils import _get_llm_metadata +from newrelic.common.object_names import callable_name +from newrelic.common.object_wrapper import ObjectProxy, wrap_function_wrapper +from newrelic.common.package_version_utils import get_package_version +from newrelic.common.signature import bind_args +from newrelic.core.config import global_settings +from newrelic.core.context import ContextOf + +_logger = logging.getLogger(__name__) +STRANDS_VERSION = get_package_version("strands-agents") + +RECORD_EVENTS_FAILURE_LOG_MESSAGE = "Exception occurred in Strands instrumentation: Failed to record LLM events. Please report this issue to New Relic Support." +TOOL_OUTPUT_FAILURE_LOG_MESSAGE = "Exception occurred in Strands instrumentation: Failed to record output of tool call. Please report this issue to New Relic Support." +AGENT_EVENT_FAILURE_LOG_MESSAGE = "Exception occurred in Strands instrumentation: Failed to record agent data. Please report this issue to New Relic Support." +TOOL_EXTRACTOR_FAILURE_LOG_MESSAGE = "Exception occurred in Strands instrumentation: Failed to extract tool information. If the issue persists, report this issue to New Relic support.\n" + + +def wrap_agent__call__(wrapped, instance, args, kwargs): + trace = current_trace() + if not trace: + return wrapped(*args, **kwargs) + + try: + bound_args = bind_args(wrapped, args, kwargs) + # Make a copy of the invocation state before we mutate it + if "invocation_state" in bound_args: + invocation_state = bound_args["invocation_state"] = dict(bound_args["invocation_state"] or {}) + + # Attempt to save the current transaction context into the invocation state dictionary + invocation_state["_nr_transaction"] = trace + except Exception: + return wrapped(*args, **kwargs) + else: + return wrapped(**bound_args) + + +async def wrap_agent_invoke_async(wrapped, instance, args, kwargs): + # If there's already a transaction, don't propagate anything here + if current_transaction(): + return await wrapped(*args, **kwargs) + + try: + # Grab the trace context we should be running under and pass it to ContextOf + bound_args = bind_args(wrapped, args, kwargs) + invocation_state = bound_args["invocation_state"] or {} + trace = invocation_state.pop("_nr_transaction", None) + except Exception: + return await wrapped(*args, **kwargs) + + # If we find a transaction to propagate, use it. Otherwise, just call wrapped. + if trace: + with ContextOf(trace=trace): + return await wrapped(*args, **kwargs) + else: + return await wrapped(*args, **kwargs) + + +def wrap_stream_async(wrapped, instance, args, kwargs): + transaction = current_transaction() + if not transaction: + return wrapped(*args, **kwargs) + + settings = transaction.settings or global_settings() + if not settings.ai_monitoring.enabled: + return wrapped(*args, **kwargs) + + # Framework metric also used for entity tagging in the UI + transaction.add_ml_model_info("Strands", STRANDS_VERSION) + transaction._add_agent_attribute("llm", True) + + func_name = callable_name(wrapped) + agent_name = getattr(instance, "name", "agent") + function_trace_name = f"{func_name}/{agent_name}" + + ft = FunctionTrace(name=function_trace_name, group="Llm/agent/Strands") + ft.__enter__() + linking_metadata = get_trace_linking_metadata() + agent_id = str(uuid.uuid4()) + + try: + return_val = wrapped(*args, **kwargs) + except Exception: + raise + + # For streaming responses, wrap with proxy and attach metadata + try: + # For streaming responses, wrap with proxy and attach metadata + proxied_return_val = AsyncGeneratorProxy( + return_val, _record_agent_event_on_stop_iteration, _handle_agent_streaming_completion_error + ) + proxied_return_val._nr_ft = ft + proxied_return_val._nr_metadata = linking_metadata + proxied_return_val._nr_strands_attrs = {"agent_name": agent_name, "agent_id": agent_id} + return proxied_return_val + except Exception: + # If proxy creation fails, clean up the function trace and return original value + ft.__exit__(*sys.exc_info()) + return return_val + + +def _record_agent_event_on_stop_iteration(self, transaction): + if hasattr(self, "_nr_ft"): + # Use saved linking metadata to maintain correct span association + linking_metadata = self._nr_metadata or get_trace_linking_metadata() + self._nr_ft.__exit__(None, None, None) + + try: + strands_attrs = getattr(self, "_nr_strands_attrs", {}) + + # If there are no strands attrs exit early as there's no data to record. + if not strands_attrs: + return + + agent_name = strands_attrs.get("agent_name", "agent") + agent_id = strands_attrs.get("agent_id") + agent_event_dict = _construct_base_agent_event_dict(agent_name, agent_id, transaction, linking_metadata) + agent_event_dict["duration"] = self._nr_ft.duration * 1000 + transaction.record_custom_event("LlmAgent", agent_event_dict) + + except Exception: + _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, exc_info=True) + finally: + # Clear cached data to prevent memory leaks and duplicate reporting + if hasattr(self, "_nr_strands_attrs"): + self._nr_strands_attrs.clear() + + +def _record_tool_event_on_stop_iteration(self, transaction): + if hasattr(self, "_nr_ft"): + # Use saved linking metadata to maintain correct span association + linking_metadata = self._nr_metadata or get_trace_linking_metadata() + self._nr_ft.__exit__(None, None, None) + + try: + strands_attrs = getattr(self, "_nr_strands_attrs", {}) + + # If there are no strands attrs exit early as there's no data to record. + if not strands_attrs: + return + + try: + tool_results = strands_attrs.get("tool_results", []) + except Exception: + tool_results = None + _logger.warning(TOOL_OUTPUT_FAILURE_LOG_MESSAGE, exc_info=True) + + tool_event_dict = _construct_base_tool_event_dict( + strands_attrs, tool_results, transaction, linking_metadata + ) + tool_event_dict["duration"] = self._nr_ft.duration * 1000 + transaction.record_custom_event("LlmTool", tool_event_dict) + + except Exception: + _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, exc_info=True) + finally: + # Clear cached data to prevent memory leaks and duplicate reporting + if hasattr(self, "_nr_strands_attrs"): + self._nr_strands_attrs.clear() + + +def _construct_base_tool_event_dict(strands_attrs, tool_results, transaction, linking_metadata): + try: + try: + tool_output = tool_results[-1]["content"][0] if tool_results else None + error = tool_results[-1]["status"] == "error" + except Exception: + tool_output = None + error = False + _logger.warning(TOOL_OUTPUT_FAILURE_LOG_MESSAGE, exc_info=True) + + tool_name = strands_attrs.get("tool_name", "tool") + tool_id = strands_attrs.get("tool_id") + run_id = strands_attrs.get("run_id") + tool_input = strands_attrs.get("tool_input") + agent_name = strands_attrs.get("agent_name", "agent") + settings = transaction.settings or global_settings() + + tool_event_dict = { + "id": tool_id, + "run_id": run_id, + "name": tool_name, + "span_id": linking_metadata.get("span.id"), + "trace_id": linking_metadata.get("trace.id"), + "agent_name": agent_name, + "vendor": "strands", + "ingest_source": "Python", + } + # Set error flag if the status shows an error was caught, + # it will be reported further down in the instrumentation. + if error: + tool_event_dict["error"] = True + + if settings.ai_monitoring.record_content.enabled: + tool_event_dict["input"] = tool_input + # In error cases, the output will hold the error message + tool_event_dict["output"] = tool_output + tool_event_dict.update(_get_llm_metadata(transaction)) + except Exception: + tool_event_dict = {} + _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, exc_info=True) + + return tool_event_dict + + +def _construct_base_agent_event_dict(agent_name, agent_id, transaction, linking_metadata): + try: + agent_event_dict = { + "id": agent_id, + "name": agent_name, + "span_id": linking_metadata.get("span.id"), + "trace_id": linking_metadata.get("trace.id"), + "vendor": "strands", + "ingest_source": "Python", + } + agent_event_dict.update(_get_llm_metadata(transaction)) + except Exception: + _logger.warning(AGENT_EVENT_FAILURE_LOG_MESSAGE, exc_info=True) + agent_event_dict = {} + + return agent_event_dict + + +def _handle_agent_streaming_completion_error(self, transaction): + if hasattr(self, "_nr_ft"): + strands_attrs = getattr(self, "_nr_strands_attrs", {}) + + # If there are no strands attrs exit early as there's no data to record. + if not strands_attrs: + self._nr_ft.__exit__(*sys.exc_info()) + return + + # Use saved linking metadata to maintain correct span association + linking_metadata = self._nr_metadata or get_trace_linking_metadata() + + try: + agent_name = strands_attrs.get("agent_name", "agent") + agent_id = strands_attrs.get("agent_id") + + # Notice the error on the function trace + self._nr_ft.notice_error(attributes={"agent_id": agent_id}) + self._nr_ft.__exit__(*sys.exc_info()) + + # Create error event + agent_event_dict = _construct_base_agent_event_dict(agent_name, agent_id, transaction, linking_metadata) + agent_event_dict.update({"duration": self._nr_ft.duration * 1000, "error": True}) + transaction.record_custom_event("LlmAgent", agent_event_dict) + + except Exception: + _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, exc_info=True) + finally: + # Clear cached data to prevent memory leaks + if hasattr(self, "_nr_strands_attrs"): + self._nr_strands_attrs.clear() + + +def _handle_tool_streaming_completion_error(self, transaction): + if hasattr(self, "_nr_ft"): + strands_attrs = getattr(self, "_nr_strands_attrs", {}) + + # If there are no strands attrs exit early as there's no data to record. + if not strands_attrs: + self._nr_ft.__exit__(*sys.exc_info()) + return + + # Use saved linking metadata to maintain correct span association + linking_metadata = self._nr_metadata or get_trace_linking_metadata() + + try: + tool_id = strands_attrs.get("tool_id") + + # We expect this to never have any output since this is an error case, + # but if it does we will report it. + try: + tool_results = strands_attrs.get("tool_results", []) + except Exception: + tool_results = None + _logger.warning(TOOL_OUTPUT_FAILURE_LOG_MESSAGE, exc_info=True) + + # Notice the error on the function trace + self._nr_ft.notice_error(attributes={"tool_id": tool_id}) + self._nr_ft.__exit__(*sys.exc_info()) + + # Create error event + tool_event_dict = _construct_base_tool_event_dict( + strands_attrs, tool_results, transaction, linking_metadata + ) + tool_event_dict["duration"] = self._nr_ft.duration * 1000 + # Ensure error flag is set to True in case the tool_results did not indicate an error + if "error" not in tool_event_dict: + tool_event_dict["error"] = True + + transaction.record_custom_event("LlmTool", tool_event_dict) + + except Exception: + _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, exc_info=True) + finally: + # Clear cached data to prevent memory leaks + if hasattr(self, "_nr_strands_attrs"): + self._nr_strands_attrs.clear() + + +def wrap_tool_executor__stream(wrapped, instance, args, kwargs): + transaction = current_transaction() + if not transaction: + return wrapped(*args, **kwargs) + + settings = transaction.settings or global_settings() + if not settings.ai_monitoring.enabled: + return wrapped(*args, **kwargs) + + # Framework metric also used for entity tagging in the UI + transaction.add_ml_model_info("Strands", STRANDS_VERSION) + transaction._add_agent_attribute("llm", True) + + # Grab tool data + try: + bound_args = bind_args(wrapped, args, kwargs) + agent_name = getattr(bound_args.get("agent"), "name", "agent") + tool_use = bound_args.get("tool_use", {}) + + run_id = tool_use.get("toolUseId", "") + tool_name = tool_use.get("name", "tool") + _input = tool_use.get("input") + tool_input = str(_input) if _input else None + tool_results = bound_args.get("tool_results", []) + except Exception: + tool_name = "tool" + _logger.warning(TOOL_EXTRACTOR_FAILURE_LOG_MESSAGE, exc_info=True) + + func_name = callable_name(wrapped) + function_trace_name = f"{func_name}/{tool_name}" + + ft = FunctionTrace(name=function_trace_name, group="Llm/tool/Strands") + ft.__enter__() + linking_metadata = get_trace_linking_metadata() + tool_id = str(uuid.uuid4()) + + try: + return_val = wrapped(*args, **kwargs) + except Exception: + raise + + try: + # Wrap return value with proxy and attach metadata for later access + proxied_return_val = AsyncGeneratorProxy( + return_val, _record_tool_event_on_stop_iteration, _handle_tool_streaming_completion_error + ) + proxied_return_val._nr_ft = ft + proxied_return_val._nr_metadata = linking_metadata + proxied_return_val._nr_strands_attrs = { + "tool_results": tool_results, + "tool_name": tool_name, + "tool_id": tool_id, + "run_id": run_id, + "tool_input": tool_input, + "agent_name": agent_name, + } + return proxied_return_val + except Exception: + # If proxy creation fails, clean up the function trace and return original value + ft.__exit__(*sys.exc_info()) + return return_val + + +class AsyncGeneratorProxy(ObjectProxy): + def __init__(self, wrapped, on_stop_iteration, on_error): + super().__init__(wrapped) + self._nr_on_stop_iteration = on_stop_iteration + self._nr_on_error = on_error + + def __aiter__(self): + self._nr_wrapped_iter = self.__wrapped__.__aiter__() + return self + + async def __anext__(self): + transaction = current_transaction() + if not transaction: + return await self._nr_wrapped_iter.__anext__() + + return_val = None + try: + return_val = await self._nr_wrapped_iter.__anext__() + except StopAsyncIteration: + self._nr_on_stop_iteration(self, transaction) + raise + except Exception: + self._nr_on_error(self, transaction) + raise + return return_val + + async def aclose(self): + return await super().aclose() + + +def wrap_ToolRegister_register_tool(wrapped, instance, args, kwargs): + bound_args = bind_args(wrapped, args, kwargs) + bound_args["tool"]._tool_func = ErrorTraceWrapper(bound_args["tool"]._tool_func) + return wrapped(*args, **kwargs) + + +def wrap_bedrock_model_stream(wrapped, instance, args, kwargs): + """Stores trace context on the messages argument to be retrieved by the _stream() instrumentation.""" + trace = current_trace() + if not trace: + return wrapped(*args, **kwargs) + + settings = trace.settings or global_settings() + if not settings.ai_monitoring.enabled: + return wrapped(*args, **kwargs) + + try: + bound_args = bind_args(wrapped, args, kwargs) + except Exception: + return wrapped(*args, **kwargs) + + if "messages" in bound_args and isinstance(bound_args["messages"], list): + bound_args["messages"].append({"newrelic_trace": trace}) + + return wrapped(*args, **kwargs) + + +def wrap_bedrock_model__stream(wrapped, instance, args, kwargs): + """Retrieves trace context stored on the messages argument and propagates it to the new thread.""" + try: + bound_args = bind_args(wrapped, args, kwargs) + except Exception: + return wrapped(*args, **kwargs) + + if ( + "messages" in bound_args + and isinstance(bound_args["messages"], list) + and bound_args["messages"] # non-empty list + and "newrelic_trace" in bound_args["messages"][-1] + ): + trace_message = bound_args["messages"].pop() + with ContextOf(trace=trace_message["newrelic_trace"]): + return wrapped(*args, **kwargs) + + return wrapped(*args, **kwargs) + + +def instrument_agent_agent(module): + if hasattr(module, "Agent"): + if hasattr(module.Agent, "__call__"): # noqa: B004 + wrap_function_wrapper(module, "Agent.__call__", wrap_agent__call__) + if hasattr(module.Agent, "invoke_async"): + wrap_function_wrapper(module, "Agent.invoke_async", wrap_agent_invoke_async) + if hasattr(module.Agent, "stream_async"): + wrap_function_wrapper(module, "Agent.stream_async", wrap_stream_async) + + +def instrument_tools_executors__executor(module): + if hasattr(module, "ToolExecutor"): + if hasattr(module.ToolExecutor, "_stream"): + wrap_function_wrapper(module, "ToolExecutor._stream", wrap_tool_executor__stream) + + +def instrument_tools_registry(module): + if hasattr(module, "ToolRegistry"): + if hasattr(module.ToolRegistry, "register_tool"): + wrap_function_wrapper(module, "ToolRegistry.register_tool", wrap_ToolRegister_register_tool) + + +def instrument_models_bedrock(module): + # This instrumentation only exists to pass trace context due to bedrock models using a separate thread. + if hasattr(module, "BedrockModel"): + if hasattr(module.BedrockModel, "stream"): + wrap_function_wrapper(module, "BedrockModel.stream", wrap_bedrock_model_stream) + if hasattr(module.BedrockModel, "_stream"): + wrap_function_wrapper(module, "BedrockModel._stream", wrap_bedrock_model__stream) diff --git a/tests/mlmodel_strands/_mock_model_provider.py b/tests/mlmodel_strands/_mock_model_provider.py index e4c9e79930..ef60e13bad 100644 --- a/tests/mlmodel_strands/_mock_model_provider.py +++ b/tests/mlmodel_strands/_mock_model_provider.py @@ -41,7 +41,7 @@ def __init__(self, agent_responses): def format_chunk(self, event): return event - def format_request(self, messages, tool_specs=None, system_prompt=None): + def format_request(self, messages, tool_specs=None, system_prompt=None, **kwargs): return None def get_config(self): @@ -53,7 +53,7 @@ def update_config(self, **model_config): async def structured_output(self, output_model, prompt, system_prompt=None, **kwargs): pass - async def stream(self, messages, tool_specs=None, system_prompt=None): + async def stream(self, messages, tool_specs=None, system_prompt=None, **kwargs): events = self.map_agent_message_to_events(self.agent_responses[self.index]) for event in events: yield event diff --git a/tests/mlmodel_strands/conftest.py b/tests/mlmodel_strands/conftest.py index b810161f6a..a2ad9b8dd0 100644 --- a/tests/mlmodel_strands/conftest.py +++ b/tests/mlmodel_strands/conftest.py @@ -14,6 +14,7 @@ import pytest from _mock_model_provider import MockedModelProvider +from testing_support.fixture.event_loop import event_loop as loop from testing_support.fixtures import collector_agent_registration_fixture, collector_available_fixture from testing_support.ml_testing_utils import set_trace_info @@ -50,15 +51,33 @@ def single_tool_model(): @pytest.fixture -def single_tool_model_error(): +def single_tool_model_runtime_error_coro(): model = MockedModelProvider( [ { "role": "assistant", "content": [ - {"text": "Calling add_exclamation tool"}, + {"text": "Calling throw_exception_coro tool"}, + # Set arguments to an invalid type to trigger error in tool + {"toolUse": {"name": "throw_exception_coro", "toolUseId": "123", "input": {"message": "Hello"}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model + + +@pytest.fixture +def single_tool_model_runtime_error_agen(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling throw_exception_agen tool"}, # Set arguments to an invalid type to trigger error in tool - {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": 12}}}, + {"toolUse": {"name": "throw_exception_agen", "toolUseId": "123", "input": {"message": "Hello"}}}, ], }, {"role": "assistant", "content": [{"text": "Success!"}]}, diff --git a/tests/mlmodel_strands/test_agent.py b/tests/mlmodel_strands/test_agent.py new file mode 100644 index 0000000000..af685668ad --- /dev/null +++ b/tests/mlmodel_strands/test_agent.py @@ -0,0 +1,427 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from strands import Agent, tool +from testing_support.fixtures import reset_core_stats_engine, validate_attributes +from testing_support.ml_testing_utils import ( + disabled_ai_monitoring_record_content_settings, + disabled_ai_monitoring_settings, + events_with_context_attrs, + tool_events_sans_content, +) +from testing_support.validators.validate_custom_event import validate_custom_event_count +from testing_support.validators.validate_custom_events import validate_custom_events +from testing_support.validators.validate_error_trace_attributes import validate_error_trace_attributes +from testing_support.validators.validate_transaction_error_event_count import validate_transaction_error_event_count +from testing_support.validators.validate_transaction_metrics import validate_transaction_metrics + +from newrelic.api.background_task import background_task +from newrelic.api.llm_custom_attributes import WithLlmCustomAttributes +from newrelic.common.object_names import callable_name +from newrelic.common.object_wrapper import transient_function_wrapper + +tool_recorded_event = [ + ( + {"type": "LlmTool"}, + { + "id": None, + "run_id": "123", + "output": "{'text': 'Hello!'}", + "name": "add_exclamation", + "agent_name": "my_agent", + "span_id": None, + "trace_id": "trace-id", + "input": "{'message': 'Hello'}", + "vendor": "strands", + "ingest_source": "Python", + "duration": None, + }, + ) +] + +tool_recorded_event_forced_internal_error = [ + ( + {"type": "LlmTool"}, + { + "id": None, + "run_id": "123", + "name": "add_exclamation", + "agent_name": "my_agent", + "span_id": None, + "trace_id": "trace-id", + "input": "{'message': 'Hello'}", + "vendor": "strands", + "ingest_source": "Python", + "duration": None, + "error": True, + }, + ) +] + +tool_recorded_event_error_coro = [ + ( + {"type": "LlmTool"}, + { + "id": None, + "run_id": "123", + "name": "throw_exception_coro", + "agent_name": "my_agent", + "span_id": None, + "trace_id": "trace-id", + "input": "{'message': 'Hello'}", + "vendor": "strands", + "ingest_source": "Python", + "error": True, + "output": "{'text': 'Error: RuntimeError - Oops'}", + "duration": None, + }, + ) +] + + +tool_recorded_event_error_agen = [ + ( + {"type": "LlmTool"}, + { + "id": None, + "run_id": "123", + "name": "throw_exception_agen", + "agent_name": "my_agent", + "span_id": None, + "trace_id": "trace-id", + "input": "{'message': 'Hello'}", + "vendor": "strands", + "ingest_source": "Python", + "error": True, + "output": "{'text': 'Error: RuntimeError - Oops'}", + "duration": None, + }, + ) +] + + +agent_recorded_event = [ + ( + {"type": "LlmAgent"}, + { + "id": None, + "name": "my_agent", + "span_id": None, + "trace_id": "trace-id", + "vendor": "strands", + "ingest_source": "Python", + "duration": None, + }, + ) +] + +agent_recorded_event_error = [ + ( + {"type": "LlmAgent"}, + { + "id": None, + "name": "my_agent", + "span_id": None, + "trace_id": "trace-id", + "vendor": "strands", + "ingest_source": "Python", + "error": True, + "duration": None, + }, + ) +] + + +# Example tool for testing purposes +@tool +async def add_exclamation(message: str) -> str: + return f"{message}!" + + +@tool +async def throw_exception_coro(message: str) -> str: + raise RuntimeError("Oops") + + +@tool +async def throw_exception_agen(message: str) -> str: + raise RuntimeError("Oops") + yield + + +@reset_core_stats_engine() +@validate_custom_events(events_with_context_attrs(tool_recorded_event)) +@validate_custom_events(events_with_context_attrs(agent_recorded_event)) +@validate_custom_event_count(count=2) +@validate_transaction_metrics( + "test_agent:test_agent_invoke", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_agent_invoke(set_trace_info, single_tool_model): + set_trace_info() + my_agent = Agent(name="my_agent", model=single_tool_model, tools=[add_exclamation]) + + with WithLlmCustomAttributes({"context": "attr"}): + response = my_agent('Add an exclamation to the word "Hello"') + assert response.message["content"][0]["text"] == "Success!" + assert response.metrics.tool_metrics["add_exclamation"].success_count == 1 + + +@reset_core_stats_engine() +@validate_custom_events(tool_recorded_event) +@validate_custom_events(agent_recorded_event) +@validate_custom_event_count(count=2) +@validate_transaction_metrics( + "test_agent:test_agent_invoke_async", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_agent_invoke_async(loop, set_trace_info, single_tool_model): + set_trace_info() + my_agent = Agent(name="my_agent", model=single_tool_model, tools=[add_exclamation]) + + async def _test(): + response = await my_agent.invoke_async('Add an exclamation to the word "Hello"') + assert response.message["content"][0]["text"] == "Success!" + assert response.metrics.tool_metrics["add_exclamation"].success_count == 1 + + loop.run_until_complete(_test()) + + +@reset_core_stats_engine() +@validate_custom_events(tool_recorded_event) +@validate_custom_events(agent_recorded_event) +@validate_custom_event_count(count=2) +@validate_transaction_metrics( + "test_agent:test_agent_stream_async", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_agent_stream_async(loop, set_trace_info, single_tool_model): + set_trace_info() + my_agent = Agent(name="my_agent", model=single_tool_model, tools=[add_exclamation]) + + async def _test(): + response = my_agent.stream_async('Add an exclamation to the word "Hello"') + messages = [event["message"]["content"] async for event in response if "message" in event] + + assert len(messages) == 3 + assert messages[0][0]["text"] == "Calling add_exclamation tool" + assert messages[0][1]["toolUse"]["name"] == "add_exclamation" + assert messages[1][0]["toolResult"]["content"][0]["text"] == "Hello!" + assert messages[2][0]["text"] == "Success!" + + loop.run_until_complete(_test()) + + +@reset_core_stats_engine() +@disabled_ai_monitoring_record_content_settings +@validate_custom_events(agent_recorded_event) +@validate_custom_events(tool_events_sans_content(tool_recorded_event)) +@validate_custom_event_count(count=2) +@validate_transaction_metrics( + "test_agent:test_agent_invoke_no_content", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_agent_invoke_no_content(set_trace_info, single_tool_model): + set_trace_info() + my_agent = Agent(name="my_agent", model=single_tool_model, tools=[add_exclamation]) + + response = my_agent('Add an exclamation to the word "Hello"') + assert response.message["content"][0]["text"] == "Success!" + assert response.metrics.tool_metrics["add_exclamation"].success_count == 1 + + +@disabled_ai_monitoring_settings +@reset_core_stats_engine() +@validate_custom_event_count(count=0) +@background_task() +def test_agent_invoke_disabled_ai_monitoring_events(set_trace_info, single_tool_model): + set_trace_info() + my_agent = Agent(name="my_agent", model=single_tool_model, tools=[add_exclamation]) + + response = my_agent('Add an exclamation to the word "Hello"') + assert response.message["content"][0]["text"] == "Success!" + assert response.metrics.tool_metrics["add_exclamation"].success_count == 1 + + +@reset_core_stats_engine() +@validate_transaction_error_event_count(1) +@validate_error_trace_attributes(callable_name(ValueError), exact_attrs={"agent": {}, "intrinsic": {}, "user": {}}) +@validate_custom_events(agent_recorded_event_error) +@validate_custom_event_count(count=1) +@validate_transaction_metrics( + "test_agent:test_agent_invoke_error", + scoped_metrics=[("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1)], + rollup_metrics=[("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1)], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_agent_invoke_error(set_trace_info, single_tool_model): + # Add a wrapper to intentionally force an error in the Agent code + @transient_function_wrapper("strands.agent.agent", "Agent._convert_prompt_to_messages") + def _wrap_convert_prompt_to_messages(wrapped, instance, args, kwargs): + raise ValueError("Oops") + + @_wrap_convert_prompt_to_messages + def _test(): + set_trace_info() + my_agent = Agent(name="my_agent", model=single_tool_model, tools=[add_exclamation]) + my_agent('Add an exclamation to the word "Hello"') # raises ValueError + + with pytest.raises(ValueError): + _test() + + +@reset_core_stats_engine() +@validate_transaction_error_event_count(1) +@validate_error_trace_attributes(callable_name(RuntimeError), exact_attrs={"agent": {}, "intrinsic": {}, "user": {}}) +@validate_custom_events(tool_recorded_event_error_coro) +@validate_custom_event_count(count=2) +@validate_transaction_metrics( + "test_agent:test_agent_invoke_tool_coro_runtime_error", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/throw_exception_coro", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/throw_exception_coro", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_agent_invoke_tool_coro_runtime_error(set_trace_info, single_tool_model_runtime_error_coro): + set_trace_info() + my_agent = Agent(name="my_agent", model=single_tool_model_runtime_error_coro, tools=[throw_exception_coro]) + + response = my_agent('Add an exclamation to the word "Hello"') + assert response.message["content"][0]["text"] == "Success!" + assert response.metrics.tool_metrics["throw_exception_coro"].error_count == 1 + + +@reset_core_stats_engine() +@validate_transaction_error_event_count(1) +@validate_error_trace_attributes(callable_name(RuntimeError), exact_attrs={"agent": {}, "intrinsic": {}, "user": {}}) +@validate_custom_events(tool_recorded_event_error_agen) +@validate_custom_event_count(count=2) +@validate_transaction_metrics( + "test_agent:test_agent_invoke_tool_agen_runtime_error", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/throw_exception_agen", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/throw_exception_agen", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_agent_invoke_tool_agen_runtime_error(set_trace_info, single_tool_model_runtime_error_agen): + set_trace_info() + my_agent = Agent(name="my_agent", model=single_tool_model_runtime_error_agen, tools=[throw_exception_agen]) + + response = my_agent('Add an exclamation to the word "Hello"') + assert response.message["content"][0]["text"] == "Success!" + assert response.metrics.tool_metrics["throw_exception_agen"].error_count == 1 + + +@reset_core_stats_engine() +@validate_transaction_error_event_count(1) +@validate_error_trace_attributes(callable_name(ValueError), exact_attrs={"agent": {}, "intrinsic": {}, "user": {}}) +@validate_custom_events(agent_recorded_event) +@validate_custom_events(tool_recorded_event_forced_internal_error) +@validate_custom_event_count(count=2) +@validate_transaction_metrics( + "test_agent:test_agent_tool_forced_exception", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_agent_tool_forced_exception(set_trace_info, single_tool_model): + # Add a wrapper to intentionally force an error in the ToolExecutor._stream code to hit the exception path in + # the AsyncGeneratorProxy + @transient_function_wrapper("strands.hooks.events", "BeforeToolCallEvent.__init__") + def _wrap_BeforeToolCallEvent_init(wrapped, instance, args, kwargs): + raise ValueError("Oops") + + @_wrap_BeforeToolCallEvent_init + def _test(): + set_trace_info() + my_agent = Agent(name="my_agent", model=single_tool_model, tools=[add_exclamation]) + my_agent('Add an exclamation to the word "Hello"') + + # This will not explicitly raise a ValueError when running the test but we are still able to capture it in the error trace + _test() + + +@reset_core_stats_engine() +@validate_custom_event_count(count=0) +def test_agent_invoke_outside_txn(single_tool_model): + my_agent = Agent(name="my_agent", model=single_tool_model, tools=[add_exclamation]) + + response = my_agent('Add an exclamation to the word "Hello"') + assert response.message["content"][0]["text"] == "Success!" + assert response.metrics.tool_metrics["add_exclamation"].success_count == 1 diff --git a/tests/mlmodel_strands/test_simple.py b/tests/mlmodel_strands/test_simple.py deleted file mode 100644 index ae24003fab..0000000000 --- a/tests/mlmodel_strands/test_simple.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright 2010 New Relic, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from strands import Agent, tool - -from newrelic.api.background_task import background_task - - -# Example tool for testing purposes -@tool -def add_exclamation(message: str) -> str: - return f"{message}!" - - -# TODO: Remove this file once all real tests are in place - - -@background_task() -def test_simple_run_agent(set_trace_info, single_tool_model): - set_trace_info() - my_agent = Agent(name="my_agent", model=single_tool_model, tools=[add_exclamation]) - - response = my_agent("Run the tools.") - assert response.message["content"][0]["text"] == "Success!" - assert response.metrics.tool_metrics["add_exclamation"].success_count == 1 diff --git a/tests/testing_support/fixtures.py b/tests/testing_support/fixtures.py index 3d93e06e30..540e44f70c 100644 --- a/tests/testing_support/fixtures.py +++ b/tests/testing_support/fixtures.py @@ -797,7 +797,7 @@ def _bind_params(transaction, *args, **kwargs): transaction = _bind_params(*args, **kwargs) error_events = transaction.error_events(instance.stats_table) - assert len(error_events) == num_errors + assert len(error_events) == num_errors, f"Expected: {num_errors}, Got: {len(error_events)}" for sample in error_events: assert isinstance(sample, list) assert len(sample) == 3 diff --git a/tests/testing_support/validators/validate_custom_event.py b/tests/testing_support/validators/validate_custom_event.py index deeef7fb25..c3cf78032a 100644 --- a/tests/testing_support/validators/validate_custom_event.py +++ b/tests/testing_support/validators/validate_custom_event.py @@ -61,7 +61,9 @@ def _validate_custom_event_count(wrapped, instance, args, kwargs): raise else: stats = core_application_stats_engine(None) - assert stats.custom_events.num_samples == count + assert stats.custom_events.num_samples == count, ( + f"Expected: {count}, Got: {stats.custom_events.num_samples}. Events: {list(stats.custom_events)}" + ) return result diff --git a/tests/testing_support/validators/validate_error_event_collector_json.py b/tests/testing_support/validators/validate_error_event_collector_json.py index d1cec3a558..27ea76f3a3 100644 --- a/tests/testing_support/validators/validate_error_event_collector_json.py +++ b/tests/testing_support/validators/validate_error_event_collector_json.py @@ -52,7 +52,7 @@ def _validate_error_event_collector_json(wrapped, instance, args, kwargs): error_events = decoded_json[2] - assert len(error_events) == num_errors + assert len(error_events) == num_errors, f"Expected: {num_errors}, Got: {len(error_events)}" for event in error_events: # event is an array containing intrinsics, user-attributes, # and agent-attributes diff --git a/tests/testing_support/validators/validate_transaction_error_event_count.py b/tests/testing_support/validators/validate_transaction_error_event_count.py index b41a52330f..f5e8c0b206 100644 --- a/tests/testing_support/validators/validate_transaction_error_event_count.py +++ b/tests/testing_support/validators/validate_transaction_error_event_count.py @@ -28,7 +28,9 @@ def _validate_error_event_on_stats_engine(wrapped, instance, args, kwargs): raise else: error_events = list(instance.error_events) - assert len(error_events) == num_errors + assert len(error_events) == num_errors, ( + f"Expected: {num_errors}, Got: {len(error_events)}. Errors: {error_events}" + ) return result From bea210070f93e576a4c10d00ea6ac0b65514b507 Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Tue, 7 Oct 2025 10:34:31 -0700 Subject: [PATCH 07/34] Bump tests. From 491dd98553d9d65c933b43d9310ea3bd919fe2f8 Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Tue, 7 Oct 2025 10:57:04 -0700 Subject: [PATCH 08/34] Add response token count logic to Gemini instrumentation. (#1486) * Add response token count logic to Gemini instrumentation. * Update token counting util functions. * Linting * Add response token count logic to Gemini instrumentation. * Update token counting util functions. * [MegaLinter] Apply linters fixes * Bump tests. --------- Co-authored-by: Tim Pansino --- newrelic/hooks/mlmodel_gemini.py | 152 ++++++++++++------ tests/mlmodel_gemini/test_embeddings.py | 6 +- tests/mlmodel_gemini/test_embeddings_error.py | 62 +------ tests/mlmodel_gemini/test_text_generation.py | 12 +- .../test_text_generation_error.py | 81 +--------- tests/testing_support/ml_testing_utils.py | 19 +++ 6 files changed, 139 insertions(+), 193 deletions(-) diff --git a/newrelic/hooks/mlmodel_gemini.py b/newrelic/hooks/mlmodel_gemini.py index 8aeb1355d0..6f61c11125 100644 --- a/newrelic/hooks/mlmodel_gemini.py +++ b/newrelic/hooks/mlmodel_gemini.py @@ -175,20 +175,24 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg embedding_content = str(embedding_content) request_model = kwargs.get("model") + embedding_token_count = ( + settings.ai_monitoring.llm_token_count_callback(request_model, embedding_content) + if settings.ai_monitoring.llm_token_count_callback + else None + ) + full_embedding_response_dict = { "id": embedding_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(request_model, embedding_content) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "request.model": request_model, "duration": ft.duration * 1000, "vendor": "gemini", "ingest_source": "Python", } + if embedding_token_count: + full_embedding_response_dict["response.usage.total_tokens"] = embedding_token_count + if settings.ai_monitoring.record_content.enabled: full_embedding_response_dict["input"] = embedding_content @@ -300,15 +304,13 @@ def _record_generation_error(transaction, linking_metadata, completion_id, kwarg "Unable to parse input message to Gemini LLM. Message content and role will be omitted from " "corresponding LlmChatCompletionMessage event. " ) + # Extract the input message content and role from the input message if it exists + input_message_content, input_role = _parse_input_message(input_message) if input_message else (None, None) - generation_config = kwargs.get("config") - if generation_config: - request_temperature = getattr(generation_config, "temperature", None) - request_max_tokens = getattr(generation_config, "max_output_tokens", None) - else: - request_temperature = None - request_max_tokens = None + # Extract data from generation config object + request_temperature, request_max_tokens = _extract_generation_config(kwargs) + # Prepare error attributes notice_error_attributes = { "http.statusCode": getattr(exc, "code", None), "error.message": getattr(exc, "message", None), @@ -348,15 +350,17 @@ def _record_generation_error(transaction, linking_metadata, completion_id, kwarg create_chat_completion_message_event( transaction, - input_message, + input_message_content, + input_role, completion_id, span_id, trace_id, # Passing the request model as the response model here since we do not have access to a response model request_model, - request_model, llm_metadata, output_message_list, + # We do not record token counts in error cases, so set all_token_counts to True so the pipeline tokenizer does not run + all_token_counts=True, ) except Exception: _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, exc_info=True) @@ -377,6 +381,7 @@ def _handle_generation_success(transaction, linking_metadata, completion_id, kwa def _record_generation_success(transaction, linking_metadata, completion_id, kwargs, ft, response): + settings = transaction.settings or global_settings() span_id = linking_metadata.get("span.id") trace_id = linking_metadata.get("trace.id") try: @@ -385,12 +390,14 @@ def _record_generation_success(transaction, linking_metadata, completion_id, kwa # finish_reason is an enum, so grab just the stringified value from it to report finish_reason = response.get("candidates")[0].get("finish_reason").value output_message_list = [response.get("candidates")[0].get("content")] + token_usage = response.get("usage_metadata") or {} else: # Set all values to NoneTypes since we cannot access them through kwargs or another method that doesn't # require the response object response_model = None output_message_list = [] finish_reason = None + token_usage = {} request_model = kwargs.get("model") @@ -412,13 +419,44 @@ def _record_generation_success(transaction, linking_metadata, completion_id, kwa "corresponding LlmChatCompletionMessage event. " ) - generation_config = kwargs.get("config") - if generation_config: - request_temperature = getattr(generation_config, "temperature", None) - request_max_tokens = getattr(generation_config, "max_output_tokens", None) + input_message_content, input_role = _parse_input_message(input_message) if input_message else (None, None) + + # Parse output message content + # This list should have a length of 1 to represent the output message + # Parse the message text out to pass to any registered token counting callback + output_message_content = output_message_list[0].get("parts")[0].get("text") if output_message_list else None + + # Extract token counts from response object + if token_usage: + response_prompt_tokens = token_usage.get("prompt_token_count") + response_completion_tokens = token_usage.get("candidates_token_count") + response_total_tokens = token_usage.get("total_token_count") + else: - request_temperature = None - request_max_tokens = None + response_prompt_tokens = None + response_completion_tokens = None + response_total_tokens = None + + # Calculate token counts by checking if a callback is registered and if we have the necessary content to pass + # to it. If not, then we use the token counts provided in the response object + prompt_tokens = ( + settings.ai_monitoring.llm_token_count_callback(request_model, input_message_content) + if settings.ai_monitoring.llm_token_count_callback and input_message_content + else response_prompt_tokens + ) + completion_tokens = ( + settings.ai_monitoring.llm_token_count_callback(response_model, output_message_content) + if settings.ai_monitoring.llm_token_count_callback and output_message_content + else response_completion_tokens + ) + total_tokens = ( + prompt_tokens + completion_tokens if all([prompt_tokens, completion_tokens]) else response_total_tokens + ) + + all_token_counts = bool(prompt_tokens and completion_tokens and total_tokens) + + # Extract generation config + request_temperature, request_max_tokens = _extract_generation_config(kwargs) full_chat_completion_summary_dict = { "id": completion_id, @@ -438,66 +476,78 @@ def _record_generation_success(transaction, linking_metadata, completion_id, kwa "response.number_of_messages": 1 + len(output_message_list), } + if all_token_counts: + full_chat_completion_summary_dict["response.usage.prompt_tokens"] = prompt_tokens + full_chat_completion_summary_dict["response.usage.completion_tokens"] = completion_tokens + full_chat_completion_summary_dict["response.usage.total_tokens"] = total_tokens + llm_metadata = _get_llm_attributes(transaction) full_chat_completion_summary_dict.update(llm_metadata) transaction.record_custom_event("LlmChatCompletionSummary", full_chat_completion_summary_dict) create_chat_completion_message_event( transaction, - input_message, + input_message_content, + input_role, completion_id, span_id, trace_id, response_model, - request_model, llm_metadata, output_message_list, + all_token_counts, ) except Exception: _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, exc_info=True) +def _parse_input_message(input_message): + # The input_message will be a string if generate_content was called directly. In this case, we don't have + # access to the role, so we default to user since this was an input message + if isinstance(input_message, str): + return input_message, "user" + # The input_message will be a Google Content type if send_message was called, so we parse out the message + # text and role (which should be "user") + elif isinstance(input_message, google.genai.types.Content): + return input_message.parts[0].text, input_message.role + else: + return None, None + + +def _extract_generation_config(kwargs): + generation_config = kwargs.get("config") + if generation_config: + request_temperature = getattr(generation_config, "temperature", None) + request_max_tokens = getattr(generation_config, "max_output_tokens", None) + else: + request_temperature = None + request_max_tokens = None + + return request_temperature, request_max_tokens + + def create_chat_completion_message_event( transaction, - input_message, + input_message_content, + input_role, chat_completion_id, span_id, trace_id, response_model, - request_model, llm_metadata, output_message_list, + all_token_counts, ): try: settings = transaction.settings or global_settings() - if input_message: - # The input_message will be a string if generate_content was called directly. In this case, we don't have - # access to the role, so we default to user since this was an input message - if isinstance(input_message, str): - input_message_content = input_message - input_role = "user" - # The input_message will be a Google Content type if send_message was called, so we parse out the message - # text and role (which should be "user") - elif isinstance(input_message, google.genai.types.Content): - input_message_content = input_message.parts[0].text - input_role = input_message.role - # Set input data to NoneTypes to ensure token_count callback is not called - else: - input_message_content = None - input_role = None - + if input_message_content: message_id = str(uuid.uuid4()) chat_completion_input_message_dict = { "id": message_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(request_model, input_message_content) - if settings.ai_monitoring.llm_token_count_callback and input_message_content - else None - ), "role": input_role, "completion_id": chat_completion_id, # The input message will always be the first message in our request/ response sequence so this will @@ -507,6 +557,8 @@ def create_chat_completion_message_event( "vendor": "gemini", "ingest_source": "Python", } + if all_token_counts: + chat_completion_input_message_dict["token_count"] = 0 if settings.ai_monitoring.record_content.enabled: chat_completion_input_message_dict["content"] = input_message_content @@ -523,7 +575,7 @@ def create_chat_completion_message_event( # Add one to the index to account for the single input message so our sequence value is accurate for # the output message - if input_message: + if input_message_content: index += 1 message_id = str(uuid.uuid4()) @@ -532,11 +584,6 @@ def create_chat_completion_message_event( "id": message_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(response_model, message_content) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "role": message.get("role"), "completion_id": chat_completion_id, "sequence": index, @@ -546,6 +593,9 @@ def create_chat_completion_message_event( "is_response": True, } + if all_token_counts: + chat_completion_output_message_dict["token_count"] = 0 + if settings.ai_monitoring.record_content.enabled: chat_completion_output_message_dict["content"] = message_content diff --git a/tests/mlmodel_gemini/test_embeddings.py b/tests/mlmodel_gemini/test_embeddings.py index 0fc92897b6..5b4e30f860 100644 --- a/tests/mlmodel_gemini/test_embeddings.py +++ b/tests/mlmodel_gemini/test_embeddings.py @@ -15,7 +15,7 @@ import google.genai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -93,7 +93,7 @@ def test_gemini_embedding_sync_no_content(gemini_dev_client, set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings:test_gemini_embedding_sync_with_token_count", @@ -177,7 +177,7 @@ def test_gemini_embedding_async_no_content(gemini_dev_client, loop, set_trace_in @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings:test_gemini_embedding_async_with_token_count", diff --git a/tests/mlmodel_gemini/test_embeddings_error.py b/tests/mlmodel_gemini/test_embeddings_error.py index a65a6c2c6f..f0e7aac58a 100644 --- a/tests/mlmodel_gemini/test_embeddings_error.py +++ b/tests/mlmodel_gemini/test_embeddings_error.py @@ -16,12 +16,10 @@ import google.genai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -159,34 +157,6 @@ def test_embeddings_invalid_request_error_invalid_model(gemini_dev_client, set_t gemini_dev_client.models.embed_content(contents="Embedded: Model does not exist.", model="does-not-exist") -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(google.genai.errors.ClientError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={ - "error.message": "models/does-not-exist is not found for API version v1beta, or is not supported for embedContent. Call ListModels to see the list of available models and their supported methods." - } -) -@validate_transaction_metrics( - name="test_embeddings_error:test_embeddings_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/embedding/Gemini/embed_content", 1)], - rollup_metrics=[("Llm/embedding/Gemini/embed_content", 1)], - custom_metrics=[(f"Supportability/Python/ML/Gemini/{google.genai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count(gemini_dev_client, set_trace_info): - with pytest.raises(google.genai.errors.ClientError): - set_trace_info() - gemini_dev_client.models.embed_content(contents="Embedded: Model does not exist.", model="does-not-exist") - - embedding_invalid_key_error_events = [ ( {"type": "LlmEmbedding"}, @@ -326,36 +296,6 @@ def test_embeddings_async_invalid_request_error_invalid_model(gemini_dev_client, ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(google.genai.errors.ClientError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={ - "error.message": "models/does-not-exist is not found for API version v1beta, or is not supported for embedContent. Call ListModels to see the list of available models and their supported methods." - } -) -@validate_transaction_metrics( - name="test_embeddings_error:test_embeddings_async_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/embedding/Gemini/embed_content", 1)], - rollup_metrics=[("Llm/embedding/Gemini/embed_content", 1)], - custom_metrics=[(f"Supportability/Python/ML/Gemini/{google.genai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_async_invalid_request_error_invalid_model_with_token_count(gemini_dev_client, loop, set_trace_info): - with pytest.raises(google.genai.errors.ClientError): - set_trace_info() - loop.run_until_complete( - gemini_dev_client.models.embed_content(contents="Embedded: Model does not exist.", model="does-not-exist") - ) - - # Wrong api_key provided @dt_enabled @reset_core_stats_engine() diff --git a/tests/mlmodel_gemini/test_text_generation.py b/tests/mlmodel_gemini/test_text_generation.py index faec66aa75..3da978e777 100644 --- a/tests/mlmodel_gemini/test_text_generation.py +++ b/tests/mlmodel_gemini/test_text_generation.py @@ -15,7 +15,7 @@ import google.genai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -50,6 +50,9 @@ "vendor": "gemini", "ingest_source": "Python", "response.number_of_messages": 2, + "response.usage.prompt_tokens": 9, + "response.usage.completion_tokens": 13, + "response.usage.total_tokens": 22, }, ), ( @@ -60,6 +63,7 @@ "llm.foo": "bar", "span_id": None, "trace_id": "trace-id", + "token_count": 0, "content": "How many letters are in the word Python?", "role": "user", "completion_id": None, @@ -77,6 +81,7 @@ "llm.foo": "bar", "span_id": None, "trace_id": "trace-id", + "token_count": 0, "content": 'There are **6** letters in the word "Python".\n', "role": "model", "completion_id": None, @@ -183,7 +188,8 @@ def test_gemini_text_generation_sync_no_content(gemini_dev_client, set_trace_inf @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(text_generation_recorded_events)) +# Ensure LLM callback is invoked and response token counts are overridden +@validate_custom_events(add_token_counts_to_chat_events(text_generation_recorded_events)) @validate_custom_event_count(count=3) @validate_transaction_metrics( name="test_text_generation:test_gemini_text_generation_sync_with_token_count", @@ -324,7 +330,7 @@ def test_gemini_text_generation_async_no_content(gemini_dev_client, loop, set_tr @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(text_generation_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(text_generation_recorded_events)) @validate_custom_event_count(count=3) @validate_transaction_metrics( name="test_text_generation:test_gemini_text_generation_async_with_token_count", diff --git a/tests/mlmodel_gemini/test_text_generation_error.py b/tests/mlmodel_gemini/test_text_generation_error.py index 5e6f1c04de..c92e1a2d45 100644 --- a/tests/mlmodel_gemini/test_text_generation_error.py +++ b/tests/mlmodel_gemini/test_text_generation_error.py @@ -17,13 +17,11 @@ import google.genai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -63,6 +61,7 @@ "trace_id": "trace-id", "content": "How many letters are in the word Python?", "role": "user", + "token_count": 0, "completion_id": None, "sequence": 0, "vendor": "gemini", @@ -167,6 +166,7 @@ def _test(): "trace_id": "trace-id", "content": "Model does not exist.", "role": "user", + "token_count": 0, "completion_id": None, "response.model": "does-not-exist", "sequence": 0, @@ -179,39 +179,6 @@ def _test(): @dt_enabled @reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(google.genai.errors.ClientError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={ - "error.message": "models/does-not-exist is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods." - } -) -@validate_transaction_metrics( - "test_text_generation_error:test_text_generation_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/Gemini/generate_content", 1)], - rollup_metrics=[("Llm/completion/Gemini/generate_content", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_text_generation_invalid_request_error_invalid_model_with_token_count(gemini_dev_client, set_trace_info): - with pytest.raises(google.genai.errors.ClientError): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - gemini_dev_client.models.generate_content( - model="does-not-exist", - contents=["Model does not exist."], - config=google.genai.types.GenerateContentConfig(max_output_tokens=100, temperature=0.7), - ) - - -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) @validate_error_trace_attributes( callable_name(google.genai.errors.ClientError), exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, @@ -227,7 +194,7 @@ def test_text_generation_invalid_request_error_invalid_model_with_token_count(ge rollup_metrics=[("Llm/completion/Gemini/generate_content", 1)], background_task=True, ) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) +@validate_custom_events(expected_events_on_invalid_model_error) @validate_custom_event_count(count=2) @background_task() def test_text_generation_invalid_request_error_invalid_model_chat(gemini_dev_client, set_trace_info): @@ -266,6 +233,7 @@ def test_text_generation_invalid_request_error_invalid_model_chat(gemini_dev_cli "trace_id": "trace-id", "content": "Invalid API key.", "role": "user", + "token_count": 0, "response.model": "gemini-flash-2.0", "completion_id": None, "sequence": 0, @@ -377,43 +345,6 @@ def _test(): @dt_enabled @reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(google.genai.errors.ClientError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={ - "error.message": "models/does-not-exist is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods." - } -) -@validate_transaction_metrics( - "test_text_generation_error:test_text_generation_async_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/Gemini/generate_content", 1)], - rollup_metrics=[("Llm/completion/Gemini/generate_content", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_text_generation_async_invalid_request_error_invalid_model_with_token_count( - gemini_dev_client, loop, set_trace_info -): - with pytest.raises(google.genai.errors.ClientError): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - gemini_dev_client.models.generate_content( - model="does-not-exist", - contents=["Model does not exist."], - config=google.genai.types.GenerateContentConfig(max_output_tokens=100, temperature=0.7), - ) - ) - - -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) @validate_error_trace_attributes( callable_name(google.genai.errors.ClientError), exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, @@ -429,7 +360,7 @@ def test_text_generation_async_invalid_request_error_invalid_model_with_token_co rollup_metrics=[("Llm/completion/Gemini/generate_content", 1)], background_task=True, ) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) +@validate_custom_events(expected_events_on_invalid_model_error) @validate_custom_event_count(count=2) @background_task() def test_text_generation_async_invalid_request_error_invalid_model_chat(gemini_dev_client, loop, set_trace_info): diff --git a/tests/testing_support/ml_testing_utils.py b/tests/testing_support/ml_testing_utils.py index 4ff70c7ed4..55dbd08105 100644 --- a/tests/testing_support/ml_testing_utils.py +++ b/tests/testing_support/ml_testing_utils.py @@ -29,6 +29,7 @@ def llm_token_count_callback(model, content): return 105 +# This will be removed once all LLM instrumentations have been converted to use new token count design def add_token_count_to_events(expected_events): events = copy.deepcopy(expected_events) for event in events: @@ -37,6 +38,24 @@ def add_token_count_to_events(expected_events): return events +def add_token_count_to_embedding_events(expected_events): + events = copy.deepcopy(expected_events) + for event in events: + if event[0]["type"] == "LlmEmbedding": + event[1]["response.usage.total_tokens"] = 105 + return events + + +def add_token_counts_to_chat_events(expected_events): + events = copy.deepcopy(expected_events) + for event in events: + if event[0]["type"] == "LlmChatCompletionSummary": + event[1]["response.usage.prompt_tokens"] = 105 + event[1]["response.usage.completion_tokens"] = 105 + event[1]["response.usage.total_tokens"] = 210 + return events + + def events_sans_content(event): new_event = copy.deepcopy(event) for _event in new_event: From b266594910afccc7daf785cf26137694b8951307 Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Tue, 7 Oct 2025 13:14:56 -0700 Subject: [PATCH 09/34] Add response token count logic to OpenAI instrumentation. (#1498) * Add OpenAI token counts. * Add token counts to langchain + openai tests. * Remove unused expected events. * Linting * Add OpenAI token counts. * Add token counts to langchain + openai tests. * Remove unused expected events. * [MegaLinter] Apply linters fixes --------- Co-authored-by: Tim Pansino --- newrelic/hooks/mlmodel_openai.py | 87 ++++++++--- tests/mlmodel_langchain/test_chain.py | 8 + tests/mlmodel_openai/test_chat_completion.py | 12 +- .../test_chat_completion_error.py | 71 +-------- .../test_chat_completion_error_v1.py | 142 +----------------- .../test_chat_completion_stream.py | 101 ++++++++++++- .../test_chat_completion_stream_error.py | 75 +-------- .../test_chat_completion_stream_error_v1.py | 80 +--------- .../test_chat_completion_stream_v1.py | 11 +- .../mlmodel_openai/test_chat_completion_v1.py | 12 +- tests/mlmodel_openai/test_embeddings.py | 7 +- .../test_embeddings_error_v1.py | 120 +-------------- tests/mlmodel_openai/test_embeddings_v1.py | 7 +- tests/testing_support/ml_testing_utils.py | 8 + 14 files changed, 241 insertions(+), 500 deletions(-) diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py index c3f7960b6e..3484762951 100644 --- a/newrelic/hooks/mlmodel_openai.py +++ b/newrelic/hooks/mlmodel_openai.py @@ -129,11 +129,11 @@ def create_chat_completion_message_event( span_id, trace_id, response_model, - request_model, response_id, request_id, llm_metadata, output_message_list, + all_token_counts, ): settings = transaction.settings if transaction.settings is not None else global_settings() @@ -153,11 +153,6 @@ def create_chat_completion_message_event( "request_id": request_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(request_model, message_content) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "role": message.get("role"), "completion_id": chat_completion_id, "sequence": index, @@ -166,6 +161,9 @@ def create_chat_completion_message_event( "ingest_source": "Python", } + if all_token_counts: + chat_completion_input_message_dict["token_count"] = 0 + if settings.ai_monitoring.record_content.enabled: chat_completion_input_message_dict["content"] = message_content @@ -193,11 +191,6 @@ def create_chat_completion_message_event( "request_id": request_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(response_model, message_content) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "role": message.get("role"), "completion_id": chat_completion_id, "sequence": index, @@ -207,6 +200,9 @@ def create_chat_completion_message_event( "is_response": True, } + if all_token_counts: + chat_completion_output_message_dict["token_count"] = 0 + if settings.ai_monitoring.record_content.enabled: chat_completion_output_message_dict["content"] = message_content @@ -280,15 +276,18 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg else getattr(attribute_response, "organization", None) ) + response_total_tokens = attribute_response.get("usage", {}).get("total_tokens") if response else None + + total_tokens = ( + settings.ai_monitoring.llm_token_count_callback(response_model, input_) + if settings.ai_monitoring.llm_token_count_callback and input_ + else response_total_tokens + ) + full_embedding_response_dict = { "id": embedding_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(response_model, input_) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "request.model": kwargs.get("model") or kwargs.get("engine"), "request_id": request_id, "duration": ft.duration * 1000, @@ -313,6 +312,7 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg "response.headers.ratelimitRemainingRequests": check_rate_limit_header( response_headers, "x-ratelimit-remaining-requests", True ), + "response.usage.total_tokens": total_tokens, "vendor": "openai", "ingest_source": "Python", } @@ -475,12 +475,15 @@ def _handle_completion_success(transaction, linking_metadata, completion_id, kwa def _record_completion_success(transaction, linking_metadata, completion_id, kwargs, ft, response_headers, response): + settings = transaction.settings if transaction.settings is not None else global_settings() span_id = linking_metadata.get("span.id") trace_id = linking_metadata.get("trace.id") + try: if response: response_model = response.get("model") response_id = response.get("id") + token_usage = response.get("usage") or {} output_message_list = [] finish_reason = None choices = response.get("choices") or [] @@ -494,6 +497,7 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa else: response_model = kwargs.get("response.model") response_id = kwargs.get("id") + token_usage = {} output_message_list = [] finish_reason = kwargs.get("finish_reason") if "content" in kwargs: @@ -505,10 +509,44 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa output_message_list = [] request_model = kwargs.get("model") or kwargs.get("engine") - request_id = response_headers.get("x-request-id") - organization = response_headers.get("openai-organization") or getattr(response, "organization", None) messages = kwargs.get("messages") or [{"content": kwargs.get("prompt"), "role": "user"}] input_message_list = list(messages) + + # Extract token counts from response object + if token_usage: + response_prompt_tokens = token_usage.get("prompt_tokens") + response_completion_tokens = token_usage.get("completion_tokens") + response_total_tokens = token_usage.get("total_tokens") + + else: + response_prompt_tokens = None + response_completion_tokens = None + response_total_tokens = None + + # Calculate token counts by checking if a callback is registered and if we have the necessary content to pass + # to it. If not, then we use the token counts provided in the response object + input_message_content = " ".join([msg.get("content", "") for msg in input_message_list if msg.get("content")]) + prompt_tokens = ( + settings.ai_monitoring.llm_token_count_callback(request_model, input_message_content) + if settings.ai_monitoring.llm_token_count_callback and input_message_content + else response_prompt_tokens + ) + output_message_content = " ".join([msg.get("content", "") for msg in output_message_list if msg.get("content")]) + completion_tokens = ( + settings.ai_monitoring.llm_token_count_callback(response_model, output_message_content) + if settings.ai_monitoring.llm_token_count_callback and output_message_content + else response_completion_tokens + ) + + total_tokens = ( + prompt_tokens + completion_tokens if all([prompt_tokens, completion_tokens]) else response_total_tokens + ) + + all_token_counts = bool(prompt_tokens and completion_tokens and total_tokens) + + request_id = response_headers.get("x-request-id") + organization = response_headers.get("openai-organization") or getattr(response, "organization", None) + full_chat_completion_summary_dict = { "id": completion_id, "span_id": span_id, @@ -553,6 +591,12 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa ), "response.number_of_messages": len(input_message_list) + len(output_message_list), } + + if all_token_counts: + full_chat_completion_summary_dict["response.usage.prompt_tokens"] = prompt_tokens + full_chat_completion_summary_dict["response.usage.completion_tokens"] = completion_tokens + full_chat_completion_summary_dict["response.usage.total_tokens"] = total_tokens + llm_metadata = _get_llm_attributes(transaction) full_chat_completion_summary_dict.update(llm_metadata) transaction.record_custom_event("LlmChatCompletionSummary", full_chat_completion_summary_dict) @@ -564,11 +608,11 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa span_id, trace_id, response_model, - request_model, response_id, request_id, llm_metadata, output_message_list, + all_token_counts, ) except Exception: _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info())) @@ -579,6 +623,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg trace_id = linking_metadata.get("trace.id") request_message_list = kwargs.get("messages", None) or [] notice_error_attributes = {} + try: if OPENAI_V1: response = getattr(exc, "response", None) @@ -643,6 +688,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg output_message_list = [] if "content" in kwargs: output_message_list = [{"content": kwargs.get("content"), "role": kwargs.get("role")}] + create_chat_completion_message_event( transaction, request_message_list, @@ -650,11 +696,12 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg span_id, trace_id, kwargs.get("response.model"), - request_model, response_id, request_id, llm_metadata, output_message_list, + # We do not record token counts in error cases, so set all_token_counts to True so the pipeline tokenizer does not run + all_token_counts=True, ) except Exception: _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info())) diff --git a/tests/mlmodel_langchain/test_chain.py b/tests/mlmodel_langchain/test_chain.py index a6b7470a9a..5d7586ffb9 100644 --- a/tests/mlmodel_langchain/test_chain.py +++ b/tests/mlmodel_langchain/test_chain.py @@ -371,6 +371,7 @@ "response.headers.ratelimitResetRequests": "20ms", "response.headers.ratelimitRemainingTokens": 999992, "response.headers.ratelimitRemainingRequests": 2999, + "response.usage.total_tokens": 8, "vendor": "openai", "ingest_source": "Python", "input": "[[3923, 374, 220, 17, 489, 220, 19, 30]]", @@ -394,6 +395,7 @@ "response.headers.ratelimitResetRequests": "20ms", "response.headers.ratelimitRemainingTokens": 999998, "response.headers.ratelimitRemainingRequests": 2999, + "response.usage.total_tokens": 1, "vendor": "openai", "ingest_source": "Python", "input": "[[10590]]", @@ -464,6 +466,9 @@ "response.headers.ratelimitResetRequests": "8.64s", "response.headers.ratelimitRemainingTokens": 199912, "response.headers.ratelimitRemainingRequests": 9999, + "response.usage.prompt_tokens": 73, + "response.usage.completion_tokens": 375, + "response.usage.total_tokens": 448, "response.number_of_messages": 3, }, ], @@ -479,6 +484,7 @@ "sequence": 0, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", + "token_count": 0, "ingest_source": "Python", "content": "You are a generator of quiz questions for a seminar. Use the following pieces of retrieved context to generate 5 multiple choice questions (A,B,C,D) on the subject matter. Use a three sentence maximum and keep the answer concise. Render the output as HTML\n\nWhat is 2 + 4?", }, @@ -495,6 +501,7 @@ "sequence": 1, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", + "token_count": 0, "ingest_source": "Python", "content": "math", }, @@ -511,6 +518,7 @@ "sequence": 2, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", + "token_count": 0, "ingest_source": "Python", "is_response": True, "content": "```html\n\n\n\n Math Quiz\n\n\n

Math Quiz Questions

\n
    \n
  1. What is the result of 5 + 3?
  2. \n
      \n
    • A) 7
    • \n
    • B) 8
    • \n
    • C) 9
    • \n
    • D) 10
    • \n
    \n
  3. What is the product of 6 x 7?
  4. \n
      \n
    • A) 36
    • \n
    • B) 42
    • \n
    • C) 48
    • \n
    • D) 56
    • \n
    \n
  5. What is the square root of 64?
  6. \n
      \n
    • A) 6
    • \n
    • B) 7
    • \n
    • C) 8
    • \n
    • D) 9
    • \n
    \n
  7. What is the result of 12 / 4?
  8. \n
      \n
    • A) 2
    • \n
    • B) 3
    • \n
    • C) 4
    • \n
    • D) 5
    • \n
    \n
  9. What is the sum of 15 + 9?
  10. \n
      \n
    • A) 22
    • \n
    • B) 23
    • \n
    • C) 24
    • \n
    • D) 25
    • \n
    \n
\n\n\n```", diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py index 1f8cf1cb74..5e4d209ed7 100644 --- a/tests/mlmodel_openai/test_chat_completion.py +++ b/tests/mlmodel_openai/test_chat_completion.py @@ -15,7 +15,7 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -55,6 +55,9 @@ "response.organization": "new-relic-nkmd8b", "request.temperature": 0.7, "request.max_tokens": 100, + "response.usage.completion_tokens": 11, + "response.usage.total_tokens": 64, + "response.usage.prompt_tokens": 53, "response.choices.finish_reason": "stop", "response.headers.llmVersion": "2020-10-01", "response.headers.ratelimitLimitRequests": 200, @@ -81,6 +84,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "gpt-3.5-turbo-0613", "vendor": "openai", "ingest_source": "Python", @@ -99,6 +103,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "gpt-3.5-turbo-0613", "vendor": "openai", "ingest_source": "Python", @@ -117,6 +122,7 @@ "role": "assistant", "completion_id": None, "sequence": 2, + "token_count": 0, "response.model": "gpt-3.5-turbo-0613", "vendor": "openai", "is_response": True, @@ -172,7 +178,7 @@ def test_openai_chat_completion_sync_no_content(set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -343,7 +349,7 @@ def test_openai_chat_completion_async_no_content(loop, set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( diff --git a/tests/mlmodel_openai/test_chat_completion_error.py b/tests/mlmodel_openai/test_chat_completion_error.py index bfb2267a33..97a4dd8793 100644 --- a/tests/mlmodel_openai/test_chat_completion_error.py +++ b/tests/mlmodel_openai/test_chat_completion_error.py @@ -15,13 +15,11 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -68,6 +66,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -83,6 +82,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -186,6 +186,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -193,36 +194,6 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - - openai.ChatCompletion.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - - # Invalid model provided @dt_enabled @reset_core_stats_engine() @@ -281,6 +252,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -296,6 +268,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -360,6 +333,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -471,37 +445,6 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], - rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(loop, set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - openai.ChatCompletion.acreate( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - ) - - # Invalid model provided @dt_enabled @reset_core_stats_engine() diff --git a/tests/mlmodel_openai/test_chat_completion_error_v1.py b/tests/mlmodel_openai/test_chat_completion_error_v1.py index 9be9fcab9c..5af1598847 100644 --- a/tests/mlmodel_openai/test_chat_completion_error_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_error_v1.py @@ -14,13 +14,11 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -67,6 +65,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -82,6 +81,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -229,6 +229,7 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -266,37 +267,6 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, syn ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - sync_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -329,41 +299,6 @@ def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_tra ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async( - loop, set_trace_info, async_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - async_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - ) - - expected_events_on_wrong_api_key_error = [ ( {"type": "LlmChatCompletionSummary"}, @@ -391,6 +326,7 @@ def test_chat_completion_invalid_request_error_invalid_model_with_token_count_as "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -610,39 +546,6 @@ def test_chat_completion_invalid_request_error_invalid_model_with_raw_response(s ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_with_raw_response", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_with_raw_response( - set_trace_info, sync_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - sync_openai_client.chat.completions.with_raw_response.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -677,41 +580,6 @@ def test_chat_completion_invalid_request_error_invalid_model_async_with_raw_resp ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async_with_raw_response", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async_with_raw_response( - loop, set_trace_info, async_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - async_openai_client.chat.completions.with_raw_response.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( diff --git a/tests/mlmodel_openai/test_chat_completion_stream.py b/tests/mlmodel_openai/test_chat_completion_stream.py index ad89d6f260..8019c0b6a9 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream.py +++ b/tests/mlmodel_openai/test_chat_completion_stream.py @@ -15,7 +15,8 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -184,9 +185,101 @@ def test_openai_chat_completion_sync_no_content(set_trace_info): assert resp +chat_completion_recorded_token_events = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "span_id": None, + "trace_id": "trace-id", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "duration": None, # Response time varies each test run + "request.model": "gpt-3.5-turbo", + "response.model": "gpt-3.5-turbo-0613", + "response.organization": "new-relic-nkmd8b", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.choices.finish_reason": "stop", + "response.headers.llmVersion": "2020-10-01", + "response.headers.ratelimitLimitRequests": 200, + "response.headers.ratelimitLimitTokens": 40000, + "response.headers.ratelimitResetTokens": "90ms", + "response.headers.ratelimitResetRequests": "7m12s", + "response.headers.ratelimitRemainingTokens": 39940, + "response.headers.ratelimitRemainingRequests": 199, + "vendor": "openai", + "ingest_source": "Python", + "response.number_of_messages": 3, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-0", + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "content": "You are a scientist.", + "role": "system", + "completion_id": None, + "sequence": 0, + "token_count": 0, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openai", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-1", + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "content": "What is 212 degrees Fahrenheit converted to Celsius?", + "role": "user", + "completion_id": None, + "sequence": 1, + "token_count": 0, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openai", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-2", + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "content": "212 degrees Fahrenheit is equal to 100 degrees Celsius.", + "role": "assistant", + "completion_id": None, + "sequence": 2, + "token_count": 0, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openai", + "is_response": True, + "ingest_source": "Python", + }, + ), +] + + @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -378,7 +471,9 @@ async def consumer(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) @validate_custom_event_count(count=4) @validate_transaction_metrics( name="test_chat_completion_stream:test_openai_chat_completion_async_with_token_count", diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error.py b/tests/mlmodel_openai/test_chat_completion_stream_error.py index eebb5ee8fb..e8e55426e9 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_error.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_error.py @@ -15,13 +15,11 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -68,6 +66,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -83,6 +82,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -191,6 +191,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -198,38 +199,6 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - generator = openai.ChatCompletion.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - stream=True, - ) - for resp in generator: - assert resp - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -290,6 +259,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -305,6 +275,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -374,6 +345,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -488,38 +460,6 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], - rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(loop, set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - openai.ChatCompletion.acreate( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - stream=True, - ) - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -649,6 +589,7 @@ def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py b/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py index 5f769ea0e6..64798300fc 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py @@ -12,16 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. - import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -68,6 +65,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -83,6 +81,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -243,6 +242,7 @@ async def consumer(): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -281,77 +281,6 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, syn assert resp -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_stream_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - - generator = sync_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - stream=True, - ) - for resp in generator: - assert resp - - -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_stream_error_v1:test_chat_completion_invalid_request_error_invalid_model_async_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_async_with_token_count( - loop, set_trace_info, async_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - - async def consumer(): - generator = await async_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - stream=True, - ) - async for resp in generator: - assert resp - - loop.run_until_complete(consumer()) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -414,6 +343,7 @@ async def consumer(): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, diff --git a/tests/mlmodel_openai/test_chat_completion_stream_v1.py b/tests/mlmodel_openai/test_chat_completion_stream_v1.py index 796404012b..c88e8b1df6 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_v1.py @@ -17,7 +17,8 @@ from conftest import get_openai_version from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -300,7 +301,9 @@ def test_openai_chat_completion_sync_no_content(set_trace_info, sync_openai_clie @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -622,7 +625,9 @@ async def consumer(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) # One summary event, one system message, one user message, and one response message from the assistant # @validate_custom_event_count(count=4) @validate_transaction_metrics( diff --git a/tests/mlmodel_openai/test_chat_completion_v1.py b/tests/mlmodel_openai/test_chat_completion_v1.py index 817db35d8e..007effcb17 100644 --- a/tests/mlmodel_openai/test_chat_completion_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_v1.py @@ -15,7 +15,7 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -54,6 +54,9 @@ "response.organization": "new-relic-nkmd8b", "request.temperature": 0.7, "request.max_tokens": 100, + "response.usage.completion_tokens": 75, + "response.usage.total_tokens": 101, + "response.usage.prompt_tokens": 26, "response.choices.finish_reason": "stop", "response.headers.llmVersion": "2020-10-01", "response.headers.ratelimitLimitRequests": 10000, @@ -80,6 +83,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", "ingest_source": "Python", @@ -98,6 +102,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", "ingest_source": "Python", @@ -116,6 +121,7 @@ "role": "assistant", "completion_id": None, "sequence": 2, + "token_count": 0, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", "is_response": True, @@ -193,7 +199,7 @@ def test_openai_chat_completion_sync_no_content(set_trace_info, sync_openai_clie @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -389,7 +395,7 @@ def test_openai_chat_completion_async_with_llm_metadata_no_content(loop, set_tra @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( diff --git a/tests/mlmodel_openai/test_embeddings.py b/tests/mlmodel_openai/test_embeddings.py index c3c3e7c429..935db04fe0 100644 --- a/tests/mlmodel_openai/test_embeddings.py +++ b/tests/mlmodel_openai/test_embeddings.py @@ -19,7 +19,7 @@ validate_attributes, ) from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -55,6 +55,7 @@ "response.headers.ratelimitResetRequests": "19m45.394s", "response.headers.ratelimitRemainingTokens": 149994, "response.headers.ratelimitRemainingRequests": 197, + "response.usage.total_tokens": 6, "vendor": "openai", "ingest_source": "Python", }, @@ -107,7 +108,7 @@ def test_openai_embedding_sync_no_content(set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings:test_openai_embedding_sync_with_token_count", @@ -191,7 +192,7 @@ def test_openai_embedding_async_no_content(loop, set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings:test_openai_embedding_async_with_token_count", diff --git a/tests/mlmodel_openai/test_embeddings_error_v1.py b/tests/mlmodel_openai/test_embeddings_error_v1.py index fd29236122..499f96893b 100644 --- a/tests/mlmodel_openai/test_embeddings_error_v1.py +++ b/tests/mlmodel_openai/test_embeddings_error_v1.py @@ -16,12 +16,10 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -149,32 +147,6 @@ def test_embeddings_invalid_request_error_no_model_async(set_trace_info, async_o ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client): - set_trace_info() - with pytest.raises(openai.NotFoundError): - sync_openai_client.embeddings.create(input="Model does not exist.", model="does-not-exist") - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -255,36 +227,6 @@ def test_embeddings_invalid_request_error_invalid_model_async_no_content(set_tra ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_async_with_token_count", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_async_with_token_count( - set_trace_info, async_openai_client, loop -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - loop.run_until_complete( - async_openai_client.embeddings.create(input="Model does not exist.", model="does-not-exist") - ) - - embedding_invalid_key_error_events = [ ( {"type": "LlmEmbedding"}, @@ -449,34 +391,6 @@ def test_embeddings_invalid_request_error_no_model_async_with_raw_response(set_t ) # no model provided -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_with_token_count_with_raw_response", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count_with_raw_response( - set_trace_info, sync_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - sync_openai_client.embeddings.with_raw_response.create(input="Model does not exist.", model="does-not-exist") - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -566,38 +480,6 @@ def test_embeddings_invalid_request_error_invalid_model_async_no_content_with_ra ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_async_with_token_count_with_raw_response", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_async_with_token_count_with_raw_response( - set_trace_info, async_openai_client, loop -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - loop.run_until_complete( - async_openai_client.embeddings.with_raw_response.create( - input="Model does not exist.", model="does-not-exist" - ) - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( diff --git a/tests/mlmodel_openai/test_embeddings_v1.py b/tests/mlmodel_openai/test_embeddings_v1.py index 405a2a9e5f..3801d3639c 100644 --- a/tests/mlmodel_openai/test_embeddings_v1.py +++ b/tests/mlmodel_openai/test_embeddings_v1.py @@ -15,7 +15,7 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -48,6 +48,7 @@ "response.headers.ratelimitResetRequests": "20ms", "response.headers.ratelimitRemainingTokens": 999994, "response.headers.ratelimitRemainingRequests": 2999, + "response.usage.total_tokens": 6, "vendor": "openai", "ingest_source": "Python", }, @@ -111,7 +112,7 @@ def test_openai_embedding_sync_no_content(set_trace_info, sync_openai_client): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings_v1:test_openai_embedding_sync_with_token_count", @@ -206,7 +207,7 @@ def test_openai_embedding_async_no_content(loop, set_trace_info, async_openai_cl @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings_v1:test_openai_embedding_async_with_token_count", diff --git a/tests/testing_support/ml_testing_utils.py b/tests/testing_support/ml_testing_utils.py index 55dbd08105..8c2c0444f0 100644 --- a/tests/testing_support/ml_testing_utils.py +++ b/tests/testing_support/ml_testing_utils.py @@ -46,6 +46,14 @@ def add_token_count_to_embedding_events(expected_events): return events +def add_token_count_streaming_events(expected_events): + events = copy.deepcopy(expected_events) + for event in events: + if event[0]["type"] == "LlmChatCompletionMessage": + event[1]["token_count"] = 0 + return events + + def add_token_counts_to_chat_events(expected_events): events = copy.deepcopy(expected_events) for event in events: From 41939912cdde3bd4f6506542e1333678ec4885eb Mon Sep 17 00:00:00 2001 From: Timothy Pansino <11214426+TimPansino@users.noreply.github.com> Date: Mon, 17 Nov 2025 14:54:25 -0800 Subject: [PATCH 10/34] Fix instability in CI caused by health check tests (#1584) --- .../test_agent_control_health_check.py | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/tests/agent_features/test_agent_control_health_check.py b/tests/agent_features/test_agent_control_health_check.py index e12f3a07f0..84058a1b28 100644 --- a/tests/agent_features/test_agent_control_health_check.py +++ b/tests/agent_features/test_agent_control_health_check.py @@ -38,7 +38,7 @@ def get_health_file_contents(tmp_path): return contents -@pytest.fixture(scope="module", autouse=True) +@pytest.fixture(autouse=True) def restore_settings_fixture(): # Backup settings from before this test file runs original_settings = global_settings() @@ -51,6 +51,10 @@ def restore_settings_fixture(): original_settings.__dict__.clear() original_settings.__dict__.update(backup) + # Re-initialize the agent to restore the settings + _reset_configuration_done() + initialize() + @pytest.mark.parametrize("file_uri", ["", "file://", "/test/dir", "foo:/test/dir"]) def test_invalid_file_directory_supplied(monkeypatch, file_uri): @@ -155,10 +159,18 @@ def test_no_override_on_unhealthy_shutdown(monkeypatch, tmp_path): def test_health_check_running_threads(monkeypatch, tmp_path): - running_threads = threading.enumerate() - # Only the main thread should be running since not agent control env vars are set - assert len(running_threads) == 1 + # If the Activate-Session thread is still active, give it time to close before we proceed + timeout = 30.0 + while len(threading.enumerate()) != 1 and timeout > 0: + time.sleep(0.1) + timeout -= 0.1 + # Only the main thread should be running since no agent control env vars are set + assert len(threading.enumerate()) == 1, ( + f"Expected only the main thread to be running before the test starts. Got: {threading.enumerate()}" + ) + + # Setup expected env vars to run agent control health check monkeypatch.setenv("NEW_RELIC_AGENT_CONTROL_ENABLED", "True") file_path = tmp_path.as_uri() monkeypatch.setenv("NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION", file_path) @@ -180,6 +192,7 @@ def test_proxy_error_status(monkeypatch, tmp_path): file_path = tmp_path.as_uri() monkeypatch.setenv("NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION", file_path) + # Re-initialize the agent to allow the health check thread to start _reset_configuration_done() initialize() @@ -209,6 +222,7 @@ def test_multiple_activations_running_threads(monkeypatch, tmp_path): file_path = tmp_path.as_uri() monkeypatch.setenv("NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION", file_path) + # Re-initialize the agent to allow the health check thread to start and assert that it did _reset_configuration_done() initialize() From 32215b90c906ee43ae5e03379cd87a3e9a80ff8b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 17 Nov 2025 16:20:03 -0800 Subject: [PATCH 11/34] Bump the github_actions group across 1 directory with 5 updates (#1582) Bumps the github_actions group with 5 updates in the / directory: | Package | From | To | | --- | --- | --- | | [actions/checkout](https://github.com/actions/checkout) | `5.0.0` | `5.0.1` | | [docker/metadata-action](https://github.com/docker/metadata-action) | `5.8.0` | `5.9.0` | | [docker/setup-qemu-action](https://github.com/docker/setup-qemu-action) | `3.6.0` | `3.7.0` | | [astral-sh/setup-uv](https://github.com/astral-sh/setup-uv) | `7.1.2` | `7.1.3` | | [github/codeql-action](https://github.com/github/codeql-action) | `4.31.2` | `4.31.3` | Updates `actions/checkout` from 5.0.0 to 5.0.1 - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/08c6903cd8c0fde910a37f88322edcfb5dd907a8...93cb6efe18208431cddfb8368fd83d5badbf9bfd) Updates `docker/metadata-action` from 5.8.0 to 5.9.0 - [Release notes](https://github.com/docker/metadata-action/releases) - [Commits](https://github.com/docker/metadata-action/compare/c1e51972afc2121e065aed6d45c65596fe445f3f...318604b99e75e41977312d83839a89be02ca4893) Updates `docker/setup-qemu-action` from 3.6.0 to 3.7.0 - [Release notes](https://github.com/docker/setup-qemu-action/releases) - [Commits](https://github.com/docker/setup-qemu-action/compare/29109295f81e9208d7d86ff1c6c12d2833863392...c7c53464625b32c7a7e944ae62b3e17d2b600130) Updates `astral-sh/setup-uv` from 7.1.2 to 7.1.3 - [Release notes](https://github.com/astral-sh/setup-uv/releases) - [Commits](https://github.com/astral-sh/setup-uv/compare/85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41...5a7eac68fb9809dea845d802897dc5c723910fa3) Updates `github/codeql-action` from 4.31.2 to 4.31.3 - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/0499de31b99561a6d14a36a5f662c2a54f91beee...014f16e7ab1402f30e7c3329d33797e7948572db) --- updated-dependencies: - dependency-name: actions/checkout dependency-version: 5.0.1 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: github_actions - dependency-name: docker/metadata-action dependency-version: 5.9.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: github_actions - dependency-name: docker/setup-qemu-action dependency-version: 3.7.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: github_actions - dependency-name: astral-sh/setup-uv dependency-version: 7.1.3 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: github_actions - dependency-name: github/codeql-action dependency-version: 4.31.3 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: github_actions ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> --- .github/workflows/addlicense.yml | 2 +- .github/workflows/benchmarks.yml | 2 +- .github/workflows/build-ci-image.yml | 6 +-- .github/workflows/deploy.yml | 6 +-- .github/workflows/mega-linter.yml | 2 +- .github/workflows/tests.yml | 58 ++++++++++++++-------------- .github/workflows/trivy.yml | 4 +- 7 files changed, 40 insertions(+), 40 deletions(-) diff --git a/.github/workflows/addlicense.yml b/.github/workflows/addlicense.yml index 8d66691ff7..83e5b29ef4 100644 --- a/.github/workflows/addlicense.yml +++ b/.github/workflows/addlicense.yml @@ -39,7 +39,7 @@ jobs: permissions: contents: read steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 513e467f29..a65695e7c4 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -38,7 +38,7 @@ jobs: BASE_SHA: ${{ github.event.pull_request.base.sha }} steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 with: fetch-depth: 0 diff --git a/.github/workflows/build-ci-image.yml b/.github/workflows/build-ci-image.yml index ab183f48a2..061233b6dd 100644 --- a/.github/workflows/build-ci-image.yml +++ b/.github/workflows/build-ci-image.yml @@ -43,7 +43,7 @@ jobs: name: Docker Build ${{ matrix.platform }} steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 with: persist-credentials: false fetch-depth: 0 @@ -60,7 +60,7 @@ jobs: - name: Generate Docker Metadata (Tags and Labels) id: meta - uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # 5.8.0 + uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # 5.9.0 with: images: ghcr.io/${{ steps.image-name.outputs.IMAGE_NAME }} flavor: | @@ -139,7 +139,7 @@ jobs: - name: Generate Docker Metadata (Tags and Labels) id: meta - uses: docker/metadata-action@c1e51972afc2121e065aed6d45c65596fe445f3f # 5.8.0 + uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # 5.9.0 with: images: ghcr.io/${{ steps.image-name.outputs.IMAGE_NAME }} flavor: | diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 8b469eaacb..af4739f2a3 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -69,14 +69,14 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 with: persist-credentials: false fetch-depth: 0 - name: Setup QEMU if: runner.os == 'Linux' - uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 # 3.6.0 + uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # 3.7.0 with: platforms: arm64 @@ -109,7 +109,7 @@ jobs: runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 with: persist-credentials: false fetch-depth: 0 diff --git a/.github/workflows/mega-linter.yml b/.github/workflows/mega-linter.yml index 8f74866d43..0f869f3b58 100644 --- a/.github/workflows/mega-linter.yml +++ b/.github/workflows/mega-linter.yml @@ -45,7 +45,7 @@ jobs: steps: # Git Checkout - name: Checkout Code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 with: token: ${{ secrets.PAT || secrets.GITHUB_TOKEN }} fetch-depth: 0 # Required for pushing commits to PRs diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index e9ef7b2d4e..9e47302bd4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -93,7 +93,7 @@ jobs: - tests steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # 6.0.0 with: python-version: "3.13" @@ -127,7 +127,7 @@ jobs: - tests steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # 6.0.0 with: python-version: "3.13" @@ -166,7 +166,7 @@ jobs: --add-host=host.docker.internal:host-gateway timeout-minutes: 30 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | @@ -231,7 +231,7 @@ jobs: --add-host=host.docker.internal:host-gateway timeout-minutes: 30 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | @@ -294,14 +294,14 @@ jobs: runs-on: windows-2025 timeout-minutes: 30 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | git fetch --tags origin - name: Install uv - uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # 7.1.2 + uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # 7.1.3 - name: Install Python run: | @@ -363,14 +363,14 @@ jobs: runs-on: windows-11-arm timeout-minutes: 30 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | git fetch --tags origin - name: Install uv - uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # 7.1.2 + uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # 7.1.3 - name: Install Python run: | @@ -443,7 +443,7 @@ jobs: --add-host=host.docker.internal:host-gateway timeout-minutes: 30 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | @@ -526,7 +526,7 @@ jobs: --health-retries 10 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | @@ -606,7 +606,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | @@ -687,7 +687,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | @@ -772,7 +772,7 @@ jobs: # from every being executed as bash commands. steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | @@ -837,7 +837,7 @@ jobs: --add-host=host.docker.internal:host-gateway timeout-minutes: 30 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | @@ -927,7 +927,7 @@ jobs: KAFKA_CFG_INTER_BROKER_LISTENER_NAME: L3 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | @@ -1005,7 +1005,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | @@ -1083,7 +1083,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | @@ -1161,7 +1161,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | @@ -1244,7 +1244,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | @@ -1327,7 +1327,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | @@ -1406,7 +1406,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | @@ -1487,7 +1487,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | @@ -1567,7 +1567,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | @@ -1647,7 +1647,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | @@ -1726,7 +1726,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | @@ -1804,7 +1804,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | @@ -1923,7 +1923,7 @@ jobs: --add-host=host.docker.internal:host-gateway steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | @@ -2003,7 +2003,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | @@ -2081,7 +2081,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 - name: Fetch git tags run: | diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index c373a38bb1..e4b0e38c9c 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -32,7 +32,7 @@ jobs: steps: # Git Checkout - name: Checkout Code - uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 with: token: ${{ secrets.PAT || secrets.GITHUB_TOKEN }} fetch-depth: 0 @@ -61,6 +61,6 @@ jobs: - name: Upload Trivy scan results to GitHub Security tab if: ${{ github.event_name == 'schedule' }} - uses: github/codeql-action/upload-sarif@0499de31b99561a6d14a36a5f662c2a54f91beee # 4.31.2 + uses: github/codeql-action/upload-sarif@014f16e7ab1402f30e7c3329d33797e7948572db # 4.31.3 with: sarif_file: "trivy-results.sarif" From f59f52cd5c4856c57670ff8c4db40d723553ed96 Mon Sep 17 00:00:00 2001 From: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com> Date: Mon, 17 Nov 2025 16:54:54 -0800 Subject: [PATCH 12/34] Asyncio loop_factory fix (#1576) * Runner instrumentation in asyncio * Clean up asyncio instrumentation * Add asyncio tests for loop_factory * Modify uvicorn test for loop_factory * Fix linter errors * [MegaLinter] Apply linters fixes * Apply suggestions from code review --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: Tim Pansino --- newrelic/config.py | 6 +- newrelic/hooks/coroutines_asyncio.py | 61 +++++++-- tests/adapter_uvicorn/test_uvicorn.py | 6 +- .../test_context_propagation.py | 119 +++++++++++++++++- tox.ini | 8 +- 5 files changed, 176 insertions(+), 24 deletions(-) diff --git a/newrelic/config.py b/newrelic/config.py index 21ce996f6c..c2b7b5c2d6 100644 --- a/newrelic/config.py +++ b/newrelic/config.py @@ -2084,6 +2084,10 @@ def _process_module_builtin_defaults(): "asyncio.base_events", "newrelic.hooks.coroutines_asyncio", "instrument_asyncio_base_events" ) + _process_module_definition("asyncio.events", "newrelic.hooks.coroutines_asyncio", "instrument_asyncio_events") + + _process_module_definition("asyncio.runners", "newrelic.hooks.coroutines_asyncio", "instrument_asyncio_runners") + _process_module_definition( "langchain_core.runnables.base", "newrelic.hooks.mlmodel_langchain", @@ -2671,8 +2675,6 @@ def _process_module_builtin_defaults(): "langchain_core.callbacks.manager", "newrelic.hooks.mlmodel_langchain", "instrument_langchain_callbacks_manager" ) - _process_module_definition("asyncio.events", "newrelic.hooks.coroutines_asyncio", "instrument_asyncio_events") - _process_module_definition("asgiref.sync", "newrelic.hooks.adapter_asgiref", "instrument_asgiref_sync") _process_module_definition( diff --git a/newrelic/hooks/coroutines_asyncio.py b/newrelic/hooks/coroutines_asyncio.py index 41fc776595..6f862d52dd 100644 --- a/newrelic/hooks/coroutines_asyncio.py +++ b/newrelic/hooks/coroutines_asyncio.py @@ -16,36 +16,73 @@ from newrelic.core.trace_cache import trace_cache -def remove_from_cache(task): +def remove_from_cache_callback(task): cache = trace_cache() cache.task_stop(task) -def propagate_task_context(task): +def wrap_create_task(task): trace_cache().task_start(task) - task.add_done_callback(remove_from_cache) + task.add_done_callback(remove_from_cache_callback) return task -def _bind_loop(loop, *args, **kwargs): +def _instrument_event_loop(loop): + if loop and hasattr(loop, "create_task") and not hasattr(loop.create_task, "__wrapped__"): + wrap_out_function(loop, "create_task", wrap_create_task) + + +def _bind_set_event_loop(loop, *args, **kwargs): return loop -def wrap_create_task(wrapped, instance, args, kwargs): - loop = _bind_loop(*args, **kwargs) +def wrap_set_event_loop(wrapped, instance, args, kwargs): + loop = _bind_set_event_loop(*args, **kwargs) - if loop and not hasattr(loop.create_task, "__wrapped__"): - wrap_out_function(loop, "create_task", propagate_task_context) + _instrument_event_loop(loop) return wrapped(*args, **kwargs) +def wrap__lazy_init(wrapped, instance, args, kwargs): + result = wrapped(*args, **kwargs) + # This logic can be used for uvloop, but should + # work for any valid custom loop factory. + + # A custom loop_factory will be used to create + # a new event loop instance. It will then run + # the main() coroutine on this event loop. Once + # this coroutine is complete, the event loop will + # be stopped and closed. + + # The new loop that is created and set as the + # running loop of the duration of the run() call. + # When the coroutine starts, it runs in the context + # that was active when run() was called. Any tasks + # created within this coroutine on this new event + # loop will inherit that context. + + # Note: The loop created by loop_factory is never + # set as the global current loop for the thread, + # even while it is running. + loop = instance._loop + _instrument_event_loop(loop) + + return result + + def instrument_asyncio_base_events(module): - wrap_out_function(module, "BaseEventLoop.create_task", propagate_task_context) + wrap_out_function(module, "BaseEventLoop.create_task", wrap_create_task) def instrument_asyncio_events(module): if hasattr(module, "_BaseDefaultEventLoopPolicy"): # Python >= 3.14 - wrap_function_wrapper(module, "_BaseDefaultEventLoopPolicy.set_event_loop", wrap_create_task) - else: # Python <= 3.13 - wrap_function_wrapper(module, "BaseDefaultEventLoopPolicy.set_event_loop", wrap_create_task) + wrap_function_wrapper(module, "_BaseDefaultEventLoopPolicy.set_event_loop", wrap_set_event_loop) + elif hasattr(module, "BaseDefaultEventLoopPolicy"): # Python <= 3.13 + wrap_function_wrapper(module, "BaseDefaultEventLoopPolicy.set_event_loop", wrap_set_event_loop) + + +# For Python >= 3.11 +def instrument_asyncio_runners(module): + if hasattr(module, "Runner") and hasattr(module.Runner, "_lazy_init"): + wrap_function_wrapper(module, "Runner._lazy_init", wrap__lazy_init) diff --git a/tests/adapter_uvicorn/test_uvicorn.py b/tests/adapter_uvicorn/test_uvicorn.py index 0084be3e46..d5db2d6ca6 100644 --- a/tests/adapter_uvicorn/test_uvicorn.py +++ b/tests/adapter_uvicorn/test_uvicorn.py @@ -56,8 +56,8 @@ def app(request): return request.param -@pytest.fixture -def port(app): +@pytest.fixture(params=["asyncio", "uvloop", "none"], ids=["asyncio", "uvloop", "none"]) +def port(app, request): port = get_open_port() loops = [] @@ -72,7 +72,7 @@ def on_tick_sync(): async def on_tick(): on_tick_sync() - config = Config(app, host="127.0.0.1", port=port, loop="asyncio") + config = Config(app, host="127.0.0.1", port=port, loop=request.param) config.callback_notify = on_tick config.log_config = {"version": 1} config.disable_lifespan = True diff --git a/tests/coroutines_asyncio/test_context_propagation.py b/tests/coroutines_asyncio/test_context_propagation.py index b338b6ec3e..eb5c358745 100644 --- a/tests/coroutines_asyncio/test_context_propagation.py +++ b/tests/coroutines_asyncio/test_context_propagation.py @@ -36,16 +36,31 @@ import uvloop loop_policies = (pytest.param(None, id="asyncio"), pytest.param(uvloop.EventLoopPolicy(), id="uvloop")) + uvloop_factory = (pytest.param(uvloop.new_event_loop, id="uvloop"), pytest.param(None, id="None")) except ImportError: loop_policies = (pytest.param(None, id="asyncio"),) + uvloop_factory = (pytest.param(None, id="None"),) + + +def loop_factories(): + import asyncio + + if sys.platform == "win32": + return (pytest.param(asyncio.ProactorEventLoop, id="asyncio.ProactorEventLoop"), *uvloop_factory) + else: + return (pytest.param(asyncio.SelectorEventLoop, id="asyncio.SelectorEventLoop"), *uvloop_factory) @pytest.fixture(autouse=True) def reset_event_loop(): - from asyncio import set_event_loop, set_event_loop_policy + try: + from asyncio import set_event_loop, set_event_loop_policy + + # Remove the loop policy to avoid side effects + set_event_loop_policy(None) + except ImportError: + from asyncio import set_event_loop - # Remove the loop policy to avoid side effects - set_event_loop_policy(None) set_event_loop(None) @@ -102,6 +117,7 @@ async def _test(asyncio, schedule, nr_enabled=True): return trace +@pytest.mark.skipif(sys.version_info >= (3, 16), reason="loop_policy is not available") @pytest.mark.parametrize("loop_policy", loop_policies) @pytest.mark.parametrize("schedule", ("create_task", "ensure_future")) @validate_transaction_metrics( @@ -166,10 +182,12 @@ def handle_exception(loop, context): memcache_trace("cmd"), ], ) -def test_two_transactions(event_loop, trace): +def test_two_transactions_with_global_event_loop(event_loop, trace): """ Instantiate a coroutine in one transaction and await it in another. This should not cause any errors. + This uses the global event loop policy, which has been deprecated + since Python 3.11 and is scheduled for removal in Python 3.16. """ import asyncio @@ -211,6 +229,99 @@ async def await_task(): event_loop.run_until_complete(asyncio.gather(afut, bfut)) +@pytest.mark.skipif(sys.version_info < (3, 11), reason="asyncio.Runner is not available") +@validate_transaction_metrics("await_task", background_task=True) +@validate_transaction_metrics("create_coro", background_task=True, index=-2) +@pytest.mark.parametrize("loop_factory", loop_factories()) +@pytest.mark.parametrize( + "trace", + [ + function_trace(name="simple_gen"), + external_trace(library="lib", url="http://foo.com"), + database_trace("select * from foo"), + datastore_trace("lib", "foo", "bar"), + message_trace("lib", "op", "typ", "name"), + memcache_trace("cmd"), + ], +) +def test_two_transactions_with_loop_factory(trace, loop_factory): + """ + Instantiate a coroutine in one transaction and await it in + another. This should not cause any errors. + Starting in Python 3.11, the asyncio.Runner class was added + as well as the loop_factory parameter. The loop_factory + parameter provides a replacement for loop policies (which + are scheduled for removal in Python 3.16). + """ + import asyncio + + @trace + async def task(): + pass + + @background_task(name="create_coro") + async def create_coro(): + return asyncio.create_task(task()) + + @background_task(name="await_task") + async def await_task(task_to_await): + return await task_to_await + + async def _main(): + _task = await create_coro() + return await await_task(_task) + + with asyncio.Runner(loop_factory=loop_factory) as runner: + runner.run(_main()) + + +@pytest.mark.skipif(sys.version_info < (3, 11), reason="loop_factory/asyncio.Runner is not available") +@pytest.mark.parametrize("loop_factory", loop_factories()) +@validate_transaction_metrics( + "test_context_propagation:test_context_propagation_with_loop_factory", + background_task=True, + scoped_metrics=(("Function/waiter2", 2), ("Function/waiter3", 2)), +) +@background_task() +def test_context_propagation_with_loop_factory(loop_factory): + import asyncio + + exceptions = [] + + def handle_exception(loop, context): + exceptions.append(context) + + # Call default handler for standard logging + loop.default_exception_handler(context) + + async def subtask(): + with FunctionTrace(name="waiter2", terminal=True): + pass + + await child() + + async def _task(trace): + assert current_trace() == trace + + await subtask() + + trace = current_trace() + + with asyncio.Runner(loop_factory=loop_factory) as runner: + assert trace == current_trace() + runner._loop.set_exception_handler(handle_exception) + runner.run(_task(trace)) + runner.run(_task(trace)) + + # The agent should have removed all traces from the cache since + # run_until_complete has terminated (all callbacks scheduled inside the + # task have run) + assert len(trace_cache()) == 1 # Sentinel is all that remains + + # # Assert that no exceptions have occurred + assert not exceptions, exceptions + + # Sentinel left in cache transaction exited async def sentinel_in_cache_txn_exited(asyncio, bg): event = asyncio.Event() diff --git a/tox.ini b/tox.ini index e27ce2ef83..98cea6ee29 100644 --- a/tox.ini +++ b/tox.ini @@ -116,8 +116,8 @@ envlist = python-adapter_hypercorn-{py310,py311,py312,py313,py314}-hypercornlatest, python-adapter_hypercorn-{py38,py39}-hypercorn{0010,0011,0012,0013}, python-adapter_mcp-{py310,py311,py312,py313,py314}, - python-adapter_uvicorn-{py38,py39,py310,py311,py312,py313,py314}-uvicornlatest, - python-adapter_uvicorn-py38-uvicorn014, + python-adapter_uvicorn-{py39,py310,py311,py312,py313,py314}-uvicornlatest, + python-adapter_uvicorn-py38-uvicorn020, python-adapter_waitress-{py38,py39,py310,py311,py312,py313,py314}-waitresslatest, python-application_celery-{py38,py39,py310,py311,py312,py313,py314,pypy311}-celerylatest, python-application_celery-py311-celery{0504,0503,0502}, @@ -239,9 +239,11 @@ deps = adapter_hypercorn-hypercorn0010: hypercorn[h3]<0.11 adapter_hypercorn: niquests adapter_mcp: fastmcp - adapter_uvicorn-uvicorn014: uvicorn<0.15 + adapter_uvicorn-uvicorn020: uvicorn<0.21 + adapter_uvicorn-uvicorn020: uvloop<0.20 adapter_uvicorn-uvicornlatest: uvicorn adapter_uvicorn: typing-extensions + adapter_uvicorn: uvloop adapter_waitress: WSGIProxy2 adapter_waitress-waitresslatest: waitress agent_features: beautifulsoup4 From f1815857622a7f4e47c5e3051b24ae755349d7fd Mon Sep 17 00:00:00 2001 From: Timothy Pansino <11214426+TimPansino@users.noreply.github.com> Date: Tue, 18 Nov 2025 10:45:06 -0800 Subject: [PATCH 13/34] Fix issue in ASGI header consumption (#1578) * Correct code for Sanic instrumentation * Correct handling of headers in ASGIWebTransaction * Correct handling of headers in ASGIBrowserMiddleware * Add regression test for ASGI headers issues --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> --- newrelic/api/asgi_application.py | 20 ++++++++++++++-- newrelic/hooks/framework_sanic.py | 2 +- tests/agent_features/test_asgi_transaction.py | 24 +++++++++++++++++++ tests/testing_support/asgi_testing.py | 2 +- .../sample_asgi_applications.py | 17 +++++++++++++ 5 files changed, 61 insertions(+), 4 deletions(-) diff --git a/newrelic/api/asgi_application.py b/newrelic/api/asgi_application.py index 669d3e6db5..6b9a31130e 100644 --- a/newrelic/api/asgi_application.py +++ b/newrelic/api/asgi_application.py @@ -132,10 +132,20 @@ async def send_inject_browser_agent(self, message): message_type = message["type"] if message_type == "http.response.start" and not self.initial_message: - headers = list(message.get("headers", ())) + # message["headers"] may be a generator, and consuming it via process_response will leave the original + # application with no headers. Fix this by preserving them in a list before consuming them. + if "headers" in message: + message["headers"] = headers = list(message["headers"]) + else: + headers = [] + + # Check if we should insert the HTML snippet based on the headers. + # Currently if there are no headers this will always be False, but call the function + # anyway in case this logic changes in the future. if not self.should_insert_html(headers): await self.abort() return + message["headers"] = headers self.initial_message = message elif message_type == "http.response.body" and self.initial_message: @@ -232,7 +242,13 @@ async def send(self, event): finally: self.__exit__(*sys.exc_info()) elif event["type"] == "http.response.start": - self.process_response(event["status"], event.get("headers", ())) + # event["headers"] may be a generator, and consuming it via process_response will leave the original + # ASGI application with no headers. Fix this by preserving them in a list before consuming them. + if "headers" in event: + event["headers"] = headers = list(event["headers"]) + else: + headers = [] + self.process_response(event["status"], headers) return await self._send(event) diff --git a/newrelic/hooks/framework_sanic.py b/newrelic/hooks/framework_sanic.py index 14077eb6d9..74d8ab678e 100644 --- a/newrelic/hooks/framework_sanic.py +++ b/newrelic/hooks/framework_sanic.py @@ -183,7 +183,7 @@ async def _nr_sanic_response_send(wrapped, instance, args, kwargs): transaction = current_transaction() result = wrapped(*args, **kwargs) if isawaitable(result): - await result + result = await result if transaction is None: return result diff --git a/tests/agent_features/test_asgi_transaction.py b/tests/agent_features/test_asgi_transaction.py index e70ec95901..ac774689bd 100644 --- a/tests/agent_features/test_asgi_transaction.py +++ b/tests/agent_features/test_asgi_transaction.py @@ -19,6 +19,7 @@ from testing_support.fixtures import override_application_settings from testing_support.sample_asgi_applications import ( AppWithDescriptor, + asgi_application_generator_headers, simple_app_v2, simple_app_v2_init_exc, simple_app_v2_raw, @@ -37,6 +38,7 @@ simple_app_v3_wrapped = AsgiTest(simple_app_v3) simple_app_v2_wrapped = AsgiTest(simple_app_v2) simple_app_v2_init_exc = AsgiTest(simple_app_v2_init_exc) +asgi_application_generator_headers = AsgiTest(asgi_application_generator_headers) # Test naming scheme logic and ASGIApplicationWrapper for a single callable @@ -85,6 +87,28 @@ def test_double_callable_raw(): assert response.body == b"" +# Ensure headers object is preserved +@pytest.mark.parametrize("browser_monitoring", [True, False]) +@validate_transaction_metrics(name="", group="Uri") +def test_generator_headers(browser_monitoring): + """ + Both ASGIApplicationWrapper and ASGIBrowserMiddleware can cause headers to be lost if generators are + not handled properly. + + Ensure neither destroys headers by testing with and without the ASGIBrowserMiddleware, to make sure whichever + receives headers first properly preserves them in a list. + """ + + @override_application_settings({"browser_monitoring.enabled": browser_monitoring}) + def _test(): + response = asgi_application_generator_headers.make_request("GET", "/") + assert response.status == 200 + assert response.headers == {"x-my-header": "myvalue"} + assert response.body == b"" + + _test() + + # Test asgi_application decorator with parameters passed in on a single callable @pytest.mark.parametrize("name, group", ((None, "group"), ("name", "group"), ("", "group"))) def test_asgi_application_decorator_single_callable(name, group): diff --git a/tests/testing_support/asgi_testing.py b/tests/testing_support/asgi_testing.py index 821a20fe96..5c97be8860 100644 --- a/tests/testing_support/asgi_testing.py +++ b/tests/testing_support/asgi_testing.py @@ -106,7 +106,7 @@ def process_output(self): if self.response_state is ResponseState.NOT_STARTED: assert message["type"] == "http.response.start" response_status = message["status"] - response_headers = message.get("headers", response_headers) + response_headers = list(message.get("headers", response_headers)) self.response_state = ResponseState.BODY elif self.response_state is ResponseState.BODY: assert message["type"] == "http.response.body" diff --git a/tests/testing_support/sample_asgi_applications.py b/tests/testing_support/sample_asgi_applications.py index c1ef860763..e281a7cbf2 100644 --- a/tests/testing_support/sample_asgi_applications.py +++ b/tests/testing_support/sample_asgi_applications.py @@ -114,6 +114,23 @@ async def normal_asgi_application(scope, receive, send): await send({"type": "http.response.body", "body": output}) +@ASGIApplicationWrapper +async def asgi_application_generator_headers(scope, receive, send): + if scope["type"] == "lifespan": + return await handle_lifespan(scope, receive, send) + + if scope["type"] != "http": + raise ValueError("unsupported") + + def headers(): + yield (b"x-my-header", b"myvalue") + + await send({"type": "http.response.start", "status": 200, "headers": headers()}) + await send({"type": "http.response.body"}) + + assert current_transaction() is None + + async def handle_lifespan(scope, receive, send): """Handle lifespan protocol with no-ops to allow more compatibility.""" while True: From 060ddbdc1b5cdb12bce69510bd1d3ef4a898224a Mon Sep 17 00:00:00 2001 From: Timothy Pansino <11214426+TimPansino@users.noreply.github.com> Date: Tue, 18 Nov 2025 14:25:23 -0800 Subject: [PATCH 14/34] Bedrock Converse Streaming Support (#1565) * Add more formatting to custom event validatators * Add streamed responses to converse mock server * Add streaming fixtures for testing for converse * Rename other bedrock test files * Add tests for converse streaming * Instrument converse streaming * Move GeneratorProxy adjacent functions to mixin * Fix checking of supported models * Reorganize converse error tests * Port new converse botocore tests to aiobotocore * Instrument response streaming in aiobotocore converse * Fix suggestions from code review * Port in converse changes from strands PR * Delete commented code --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> --- newrelic/hooks/external_aiobotocore.py | 11 + newrelic/hooks/external_botocore.py | 206 +++++---- .../test_bedrock_chat_completion_converse.py | 388 ++++++----------- ...st_bedrock_chat_completion_invoke_model.py | 19 +- .../test_bedrock_embeddings.py | 16 +- .../_mock_external_bedrock_server_converse.py | 137 +++++- .../_test_bedrock_chat_completion_converse.py | 253 +++++++++++ ...t_bedrock_chat_completion_invoke_model.py} | 0 ... test_bedrock_chat_completion_converse.py} | 401 ++++++------------ ...st_bedrock_chat_completion_invoke_model.py | 19 +- ...t_bedrock_chat_completion_via_langchain.py | 2 +- .../test_bedrock_embeddings.py | 16 +- .../validators/validate_custom_event.py | 5 +- .../validators/validate_custom_events.py | 5 +- 14 files changed, 837 insertions(+), 641 deletions(-) create mode 100644 tests/external_botocore/_test_bedrock_chat_completion_converse.py rename tests/external_botocore/{_test_bedrock_chat_completion.py => _test_bedrock_chat_completion_invoke_model.py} (100%) rename tests/external_botocore/{test_chat_completion_converse.py => test_bedrock_chat_completion_converse.py} (54%) diff --git a/newrelic/hooks/external_aiobotocore.py b/newrelic/hooks/external_aiobotocore.py index ddb9d4d056..15daa7bd6d 100644 --- a/newrelic/hooks/external_aiobotocore.py +++ b/newrelic/hooks/external_aiobotocore.py @@ -149,6 +149,17 @@ async def wrap_client__make_api_call(wrapped, instance, args, kwargs): bedrock_attrs = extract_bedrock_converse_attrs( args[1], response, response_headers, model, span_id, trace_id ) + + if response_streaming: + # Wrap EventStream object here to intercept __iter__ method instead of instrumenting class. + # This class is used in numerous other services in botocore, and would cause conflicts. + response["stream"] = stream = AsyncEventStreamWrapper(response["stream"]) + stream._nr_ft = ft or None + stream._nr_bedrock_attrs = bedrock_attrs or {} + stream._nr_model_extractor = stream_extractor or None + stream._nr_is_converse = True + return response + else: bedrock_attrs = { "request_id": response_headers.get("x-amzn-requestid"), diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py index 39317ea752..e00e50b770 100644 --- a/newrelic/hooks/external_botocore.py +++ b/newrelic/hooks/external_botocore.py @@ -576,9 +576,9 @@ def handle_bedrock_exception( } if is_embedding: - notice_error_attributes.update({"embedding_id": str(uuid.uuid4())}) + notice_error_attributes["embedding_id"] = str(uuid.uuid4()) else: - notice_error_attributes.update({"completion_id": str(uuid.uuid4())}) + notice_error_attributes["completion_id"] = str(uuid.uuid4()) if ft: ft.notice_error(attributes=notice_error_attributes) @@ -766,7 +766,7 @@ def _wrap_bedrock_runtime_converse(wrapped, instance, args, kwargs): if not transaction: return wrapped(*args, **kwargs) - settings = transaction.settings or global_settings + settings = transaction.settings or global_settings() if not settings.ai_monitoring.enabled: return wrapped(*args, **kwargs) @@ -826,6 +826,16 @@ def _wrap_bedrock_runtime_converse(wrapped, instance, args, kwargs): bedrock_attrs = extract_bedrock_converse_attrs(kwargs, response, response_headers, model, span_id, trace_id) try: + if response_streaming: + # Wrap EventStream object here to intercept __iter__ method instead of instrumenting class. + # This class is used in numerous other services in botocore, and would cause conflicts. + response["stream"] = stream = EventStreamWrapper(response["stream"]) + stream._nr_ft = ft + stream._nr_bedrock_attrs = bedrock_attrs + stream._nr_model_extractor = stream_extractor + stream._nr_is_converse = True + return response + ft.__exit__(None, None, None) bedrock_attrs["duration"] = ft.duration * 1000 run_bedrock_response_extractor(response_extractor, {}, bedrock_attrs, False, transaction) @@ -846,14 +856,19 @@ def extract_bedrock_converse_attrs(kwargs, response, response_headers, model, sp # kwargs["messages"] can hold multiple requests and responses to maintain conversation history # We grab the last message (the newest request) in the list each time, so we don't duplicate recorded data + _input_messages = kwargs.get("messages", []) + _input_messages = _input_messages and (_input_messages[-1] or {}) + _input_messages = _input_messages.get("content", []) input_message_list.extend( - [{"role": "user", "content": result["text"]} for result in kwargs["messages"][-1].get("content", [])] + [{"role": "user", "content": result["text"]} for result in _input_messages if "text" in result] ) - output_message_list = [ - {"role": "assistant", "content": result["text"]} - for result in response.get("output").get("message").get("content", []) - ] + output_message_list = None + if "output" in response: + output_message_list = [ + {"role": "assistant", "content": result["text"]} + for result in response.get("output").get("message").get("content", []) + ] bedrock_attrs = { "request_id": response_headers.get("x-amzn-requestid"), @@ -861,24 +876,112 @@ def extract_bedrock_converse_attrs(kwargs, response, response_headers, model, sp "span_id": span_id, "trace_id": trace_id, "response.choices.finish_reason": response.get("stopReason"), - "output_message_list": output_message_list, "request.max_tokens": kwargs.get("inferenceConfig", {}).get("maxTokens", None), "request.temperature": kwargs.get("inferenceConfig", {}).get("temperature", None), "input_message_list": input_message_list, } + + if output_message_list is not None: + bedrock_attrs["output_message_list"] = output_message_list + return bedrock_attrs +class BedrockRecordEventMixin: + def record_events_on_stop_iteration(self, transaction): + if hasattr(self, "_nr_ft"): + bedrock_attrs = getattr(self, "_nr_bedrock_attrs", {}) + self._nr_ft.__exit__(None, None, None) + + # If there are no bedrock attrs exit early as there's no data to record. + if not bedrock_attrs: + return + + try: + bedrock_attrs["duration"] = self._nr_ft.duration * 1000 + handle_chat_completion_event(transaction, bedrock_attrs) + except Exception: + _logger.warning(RESPONSE_PROCESSING_FAILURE_LOG_MESSAGE, exc_info=True) + + # Clear cached data as this can be very large. + self._nr_bedrock_attrs.clear() + + def record_error(self, transaction, exc): + if hasattr(self, "_nr_ft"): + try: + ft = self._nr_ft + error_attributes = getattr(self, "_nr_bedrock_attrs", {}) + + # If there are no bedrock attrs exit early as there's no data to record. + if not error_attributes: + return + + error_attributes = bedrock_error_attributes(exc, error_attributes) + notice_error_attributes = { + "http.statusCode": error_attributes.get("http.statusCode"), + "error.message": error_attributes.get("error.message"), + "error.code": error_attributes.get("error.code"), + } + notice_error_attributes["completion_id"] = str(uuid.uuid4()) + + ft.notice_error(attributes=notice_error_attributes) + + ft.__exit__(*sys.exc_info()) + error_attributes["duration"] = ft.duration * 1000 + + handle_chat_completion_event(transaction, error_attributes) + + # Clear cached data as this can be very large. + error_attributes.clear() + except Exception: + _logger.warning(EXCEPTION_HANDLING_FAILURE_LOG_MESSAGE, exc_info=True) + + def record_stream_chunk(self, event, transaction): + if event: + try: + if getattr(self, "_nr_is_converse", False): + return self.converse_record_stream_chunk(event, transaction) + else: + return self.invoke_record_stream_chunk(event, transaction) + except Exception: + _logger.warning(RESPONSE_EXTRACTOR_FAILURE_LOG_MESSAGE, exc_info=True) + + def invoke_record_stream_chunk(self, event, transaction): + bedrock_attrs = getattr(self, "_nr_bedrock_attrs", {}) + chunk = json.loads(event["chunk"]["bytes"].decode("utf-8")) + self._nr_model_extractor(chunk, bedrock_attrs) + # In Langchain, the bedrock iterator exits early if type is "content_block_stop". + # So we need to call the record events here since stop iteration will not be raised. + _type = chunk.get("type") + if _type == "content_block_stop": + self.record_events_on_stop_iteration(transaction) + + def converse_record_stream_chunk(self, event, transaction): + bedrock_attrs = getattr(self, "_nr_bedrock_attrs", {}) + if "contentBlockDelta" in event: + if not bedrock_attrs: + return + + content = ((event.get("contentBlockDelta") or {}).get("delta") or {}).get("text", "") + if "output_message_list" not in bedrock_attrs: + bedrock_attrs["output_message_list"] = [{"role": "assistant", "content": ""}] + bedrock_attrs["output_message_list"][0]["content"] += content + + if "messageStop" in event: + bedrock_attrs["response.choices.finish_reason"] = (event.get("messageStop") or {}).get("stopReason", "") + + class EventStreamWrapper(ObjectProxy): def __iter__(self): g = GeneratorProxy(self.__wrapped__.__iter__()) g._nr_ft = getattr(self, "_nr_ft", None) g._nr_bedrock_attrs = getattr(self, "_nr_bedrock_attrs", {}) g._nr_model_extractor = getattr(self, "_nr_model_extractor", NULL_EXTRACTOR) + g._nr_is_converse = getattr(self, "_nr_is_converse", False) return g -class GeneratorProxy(ObjectProxy): +class GeneratorProxy(BedrockRecordEventMixin, ObjectProxy): def __init__(self, wrapped): super().__init__(wrapped) @@ -893,12 +996,12 @@ def __next__(self): return_val = None try: return_val = self.__wrapped__.__next__() - record_stream_chunk(self, return_val, transaction) + self.record_stream_chunk(return_val, transaction) except StopIteration: - record_events_on_stop_iteration(self, transaction) + self.record_events_on_stop_iteration(transaction) raise except Exception as exc: - record_error(self, transaction, exc) + self.record_error(transaction, exc) raise return return_val @@ -912,13 +1015,11 @@ def __aiter__(self): g._nr_ft = getattr(self, "_nr_ft", None) g._nr_bedrock_attrs = getattr(self, "_nr_bedrock_attrs", {}) g._nr_model_extractor = getattr(self, "_nr_model_extractor", NULL_EXTRACTOR) + g._nr_is_converse = getattr(self, "_nr_is_converse", False) return g -class AsyncGeneratorProxy(ObjectProxy): - def __init__(self, wrapped): - super().__init__(wrapped) - +class AsyncGeneratorProxy(BedrockRecordEventMixin, ObjectProxy): def __aiter__(self): return self @@ -929,12 +1030,12 @@ async def __anext__(self): return_val = None try: return_val = await self.__wrapped__.__anext__() - record_stream_chunk(self, return_val, transaction) + self.record_stream_chunk(return_val, transaction) except StopAsyncIteration: - record_events_on_stop_iteration(self, transaction) + self.record_events_on_stop_iteration(transaction) raise except Exception as exc: - record_error(self, transaction, exc) + self.record_error(transaction, exc) raise return return_val @@ -942,70 +1043,6 @@ async def aclose(self): return await super().aclose() -def record_stream_chunk(self, return_val, transaction): - if return_val: - try: - chunk = json.loads(return_val["chunk"]["bytes"].decode("utf-8")) - self._nr_model_extractor(chunk, self._nr_bedrock_attrs) - # In Langchain, the bedrock iterator exits early if type is "content_block_stop". - # So we need to call the record events here since stop iteration will not be raised. - _type = chunk.get("type") - if _type == "content_block_stop": - record_events_on_stop_iteration(self, transaction) - except Exception: - _logger.warning(RESPONSE_EXTRACTOR_FAILURE_LOG_MESSAGE, exc_info=True) - - -def record_events_on_stop_iteration(self, transaction): - if hasattr(self, "_nr_ft"): - bedrock_attrs = getattr(self, "_nr_bedrock_attrs", {}) - self._nr_ft.__exit__(None, None, None) - - # If there are no bedrock attrs exit early as there's no data to record. - if not bedrock_attrs: - return - - try: - bedrock_attrs["duration"] = self._nr_ft.duration * 1000 - handle_chat_completion_event(transaction, bedrock_attrs) - except Exception: - _logger.warning(RESPONSE_PROCESSING_FAILURE_LOG_MESSAGE, exc_info=True) - - # Clear cached data as this can be very large. - self._nr_bedrock_attrs.clear() - - -def record_error(self, transaction, exc): - if hasattr(self, "_nr_ft"): - try: - ft = self._nr_ft - error_attributes = getattr(self, "_nr_bedrock_attrs", {}) - - # If there are no bedrock attrs exit early as there's no data to record. - if not error_attributes: - return - - error_attributes = bedrock_error_attributes(exc, error_attributes) - notice_error_attributes = { - "http.statusCode": error_attributes.get("http.statusCode"), - "error.message": error_attributes.get("error.message"), - "error.code": error_attributes.get("error.code"), - } - notice_error_attributes.update({"completion_id": str(uuid.uuid4())}) - - ft.notice_error(attributes=notice_error_attributes) - - ft.__exit__(*sys.exc_info()) - error_attributes["duration"] = ft.duration * 1000 - - handle_chat_completion_event(transaction, error_attributes) - - # Clear cached data as this can be very large. - error_attributes.clear() - except Exception: - _logger.warning(EXCEPTION_HANDLING_FAILURE_LOG_MESSAGE, exc_info=True) - - def handle_embedding_event(transaction, bedrock_attrs): embedding_id = str(uuid.uuid4()) @@ -1551,6 +1588,7 @@ def wrap_serialize_to_request(wrapped, instance, args, kwargs): response_streaming=True ), ("bedrock-runtime", "converse"): wrap_bedrock_runtime_converse(response_streaming=False), + ("bedrock-runtime", "converse_stream"): wrap_bedrock_runtime_converse(response_streaming=True), } diff --git a/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py b/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py index da9c5818e7..55843b832c 100644 --- a/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py +++ b/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py @@ -15,6 +15,12 @@ import botocore.exceptions import pytest from conftest import BOTOCORE_VERSION +from external_botocore._test_bedrock_chat_completion_converse import ( + chat_completion_expected_events, + chat_completion_expected_streaming_events, + chat_completion_invalid_access_key_error_events, + chat_completion_invalid_model_error_events, +) from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( add_token_count_to_events, @@ -36,113 +42,65 @@ from newrelic.api.transaction import add_custom_attribute from newrelic.common.object_names import callable_name -chat_completion_expected_events = [ - ( - {"type": "LlmChatCompletionSummary"}, - { - "id": None, # UUID that varies with each run - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "span_id": None, - "trace_id": "trace-id", - "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", - "duration": None, # Response time varies each test run - "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "request.temperature": 0.7, - "request.max_tokens": 100, - "response.choices.finish_reason": "max_tokens", - "vendor": "bedrock", - "ingest_source": "Python", - "response.number_of_messages": 3, - }, - ), - ( - {"type": "LlmChatCompletionMessage"}, - { - "id": None, # UUID that varies with each run - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", - "span_id": None, - "trace_id": "trace-id", - "content": "You are a scientist.", - "role": "system", - "completion_id": None, - "sequence": 0, - "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "vendor": "bedrock", - "ingest_source": "Python", - }, - ), - ( - {"type": "LlmChatCompletionMessage"}, - { - "id": None, # UUID that varies with each run - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", - "span_id": None, - "trace_id": "trace-id", - "content": "What is 212 degrees Fahrenheit converted to Celsius?", - "role": "user", - "completion_id": None, - "sequence": 1, - "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "vendor": "bedrock", - "ingest_source": "Python", - }, - ), - ( - {"type": "LlmChatCompletionMessage"}, - { - "id": None, # UUID that varies with each run - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", - "span_id": None, - "trace_id": "trace-id", - "content": "To convert 212°F to Celsius, we can use the formula:\n\nC = (F - 32) × 5/9\n\nWhere:\nC is the temperature in Celsius\nF is the temperature in Fahrenheit\n\nPlugging in 212°F, we get:\n\nC = (212 - 32) × 5/9\nC = 180 × 5/9\nC = 100\n\nTherefore, 212°", # noqa: RUF001 - "role": "assistant", - "completion_id": None, - "sequence": 2, - "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "vendor": "bedrock", - "ingest_source": "Python", - "is_response": True, - }, - ), -] + +@pytest.fixture(scope="session", params=[False, True], ids=["ResponseStandard", "ResponseStreaming"]) +def response_streaming(request): + return request.param + + +@pytest.fixture(scope="session") +def expected_metric(response_streaming): + return ("Llm/completion/Bedrock/converse" + ("_stream" if response_streaming else ""), 1) + + +@pytest.fixture(scope="session") +def expected_events(response_streaming): + return chat_completion_expected_streaming_events if response_streaming else chat_completion_expected_events @pytest.fixture(scope="module") -def exercise_model(loop, bedrock_converse_server): +def exercise_model(loop, bedrock_converse_server, response_streaming): def _exercise_model(message): async def coro(): inference_config = {"temperature": 0.7, "maxTokens": 100} - response = await bedrock_converse_server.converse( + _response = await bedrock_converse_server.converse( modelId="anthropic.claude-3-sonnet-20240229-v1:0", messages=message, system=[{"text": "You are a scientist."}], inferenceConfig=inference_config, ) - assert response return loop.run_until_complete(coro()) - return _exercise_model + def _exercise_model_streaming(message): + async def coro(): + inference_config = {"temperature": 0.7, "maxTokens": 100} + + response = await bedrock_converse_server.converse_stream( + modelId="anthropic.claude-3-sonnet-20240229-v1:0", + messages=message, + system=[{"text": "You are a scientist."}], + inferenceConfig=inference_config, + ) + _responses = [r async for r in response["stream"]] # Consume the response stream + + return loop.run_until_complete(coro()) + + return _exercise_model_streaming if response_streaming else _exercise_model @reset_core_stats_engine() -def test_bedrock_chat_completion_in_txn_with_llm_metadata(set_trace_info, exercise_model): - @validate_custom_events(events_with_context_attrs(chat_completion_expected_events)) - # One summary event, one user message, and one response message from the assistant +def test_bedrock_chat_completion_in_txn_with_llm_metadata( + set_trace_info, exercise_model, expected_metric, expected_events +): + @validate_custom_events(events_with_context_attrs(expected_events)) + # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( name="test_bedrock_chat_completion_in_txn_with_llm_metadata", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + scoped_metrics=[expected_metric], + rollup_metrics=[expected_metric], custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], background_task=True, ) @@ -162,14 +120,14 @@ def _test(): @disabled_ai_monitoring_record_content_settings @reset_core_stats_engine() -def test_bedrock_chat_completion_no_content(set_trace_info, exercise_model): - @validate_custom_events(events_sans_content(chat_completion_expected_events)) +def test_bedrock_chat_completion_no_content(set_trace_info, exercise_model, expected_metric, expected_events): + @validate_custom_events(events_sans_content(expected_events)) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( name="test_bedrock_chat_completion_no_content", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + scoped_metrics=[expected_metric], + rollup_metrics=[expected_metric], custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], background_task=True, ) @@ -188,14 +146,14 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model): - @validate_custom_events(add_token_count_to_events(chat_completion_expected_events)) +def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_metric, expected_events): + @validate_custom_events(add_token_count_to_events(expected_events)) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( name="test_bedrock_chat_completion_with_token_count", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + scoped_metrics=[expected_metric], + rollup_metrics=[expected_metric], custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], background_task=True, ) @@ -213,13 +171,13 @@ def _test(): @reset_core_stats_engine() -def test_bedrock_chat_completion_no_llm_metadata(set_trace_info, exercise_model): - @validate_custom_events(events_sans_llm_metadata(chat_completion_expected_events)) +def test_bedrock_chat_completion_no_llm_metadata(set_trace_info, exercise_model, expected_metric, expected_events): + @validate_custom_events(events_sans_llm_metadata(expected_events)) @validate_custom_event_count(count=4) @validate_transaction_metrics( name="test_bedrock_chat_completion_in_txn_no_llm_metadata", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + scoped_metrics=[expected_metric], + rollup_metrics=[expected_metric], custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], background_task=True, ) @@ -250,54 +208,37 @@ def test_bedrock_chat_completion_disabled_ai_monitoring_settings(set_trace_info, exercise_model(message) -chat_completion_invalid_access_key_error_events = [ - ( - {"type": "LlmChatCompletionSummary"}, - { - "id": None, # UUID that varies with each run - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "span_id": None, - "trace_id": "trace-id", - "request_id": "e1206e19-2318-4a9d-be98-017c73f06118", - "duration": None, # Response time varies each test run - "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "request.temperature": 0.7, - "request.max_tokens": 100, - "vendor": "bedrock", - "ingest_source": "Python", - "response.number_of_messages": 1, - "error": True, - }, - ), - ( - {"type": "LlmChatCompletionMessage"}, - { - "id": None, # UUID that varies with each run - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "request_id": "e1206e19-2318-4a9d-be98-017c73f06118", - "span_id": None, - "trace_id": "trace-id", - "content": "Invalid Token", - "role": "user", - "completion_id": None, - "sequence": 0, - "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "vendor": "bedrock", - "ingest_source": "Python", - }, - ), -] - _client_error = botocore.exceptions.ClientError _client_error_name = callable_name(_client_error) +@pytest.fixture +def exercise_converse_incorrect_access_key(loop, bedrock_converse_server, response_streaming, monkeypatch): + def _exercise_converse_incorrect_access_key(): + async def _coro(): + monkeypatch.setattr( + bedrock_converse_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY" + ) + + message = [{"role": "user", "content": [{"text": "Invalid Token"}]}] + request = ( + bedrock_converse_server.converse_stream if response_streaming else bedrock_converse_server.converse + ) + with pytest.raises(_client_error): + await request( + modelId="anthropic.claude-3-sonnet-20240229-v1:0", + messages=message, + inferenceConfig={"temperature": 0.7, "maxTokens": 100}, + ) + + loop.run_until_complete(_coro()) + + return _exercise_converse_incorrect_access_key + + @reset_core_stats_engine() def test_bedrock_chat_completion_error_incorrect_access_key( - loop, monkeypatch, bedrock_converse_server, exercise_model, set_trace_info + exercise_converse_incorrect_access_key, set_trace_info, expected_metric ): """ A request is made to the server with invalid credentials. botocore will reach out to the server and receive an @@ -320,8 +261,8 @@ def test_bedrock_chat_completion_error_incorrect_access_key( ) @validate_transaction_metrics( name="test_bedrock_chat_completion", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + scoped_metrics=[expected_metric], + rollup_metrics=[expected_metric], custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], background_task=True, ) @@ -332,121 +273,79 @@ def _test(): add_custom_attribute("llm.foo", "bar") add_custom_attribute("non_llm_attr", "python-agent") - converse_incorrect_access_key(loop, bedrock_converse_server, monkeypatch) + exercise_converse_incorrect_access_key() _test() -def converse_incorrect_access_key(loop, bedrock_converse_server, monkeypatch): - async def _coro(): - monkeypatch.setattr(bedrock_converse_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): - message = [{"role": "user", "content": [{"text": "Invalid Token"}]}] - response = await bedrock_converse_server.converse( - modelId="anthropic.claude-3-sonnet-20240229-v1:0", - messages=message, - inferenceConfig={"temperature": 0.7, "maxTokens": 100}, - ) - assert response - - loop.run_until_complete(_coro()) - - -chat_completion_invalid_model_error_events = [ - ( - {"type": "LlmChatCompletionSummary"}, - { - "id": None, # UUID that varies with each run - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "request_id": "f4908827-3db9-4742-9103-2bbc34578b03", - "span_id": None, - "trace_id": "trace-id", - "duration": None, # Response time varies each test run - "request.model": "does-not-exist", - "response.model": "does-not-exist", - "request.temperature": 0.7, - "request.max_tokens": 100, - "response.number_of_messages": 1, - "vendor": "bedrock", - "ingest_source": "Python", - "error": True, - }, - ), - ( - {"type": "LlmChatCompletionMessage"}, - { - "id": None, - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "span_id": None, - "trace_id": "trace-id", - "request_id": "f4908827-3db9-4742-9103-2bbc34578b03", - "content": "Model does not exist.", - "role": "user", - "completion_id": None, - "response.model": "does-not-exist", - "sequence": 0, - "vendor": "bedrock", - "ingest_source": "Python", - }, - ), -] - - @reset_core_stats_engine() -def test_bedrock_chat_completion_error_invalid_model(loop, bedrock_converse_server, set_trace_info): - @validate_custom_events(chat_completion_invalid_model_error_events) +@override_llm_token_callback_settings(llm_token_count_callback) +def test_bedrock_chat_completion_error_incorrect_access_key_with_token_count( + exercise_converse_incorrect_access_key, set_trace_info, expected_metric +): + """ + A request is made to the server with invalid credentials. botocore will reach out to the server and receive an + UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer + events. The error response can also be parsed, and will be included as attributes on the recorded exception. + """ + + @validate_custom_events(add_token_count_to_events(chat_completion_invalid_access_key_error_events)) @validate_error_trace_attributes( - "botocore.errorfactory:ValidationException", + _client_error_name, exact_attrs={ "agent": {}, "intrinsic": {}, "user": { - "http.statusCode": 400, - "error.message": "The provided model identifier is invalid.", - "error.code": "ValidationException", + "http.statusCode": 403, + "error.message": "The security token included in the request is invalid.", + "error.code": "UnrecognizedClientException", }, }, ) @validate_transaction_metrics( - name="test_bedrock_chat_completion_error_invalid_model", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + name="test_bedrock_chat_completion_incorrect_access_key_with_token_count", + scoped_metrics=[expected_metric], + rollup_metrics=[expected_metric], custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], background_task=True, ) - @background_task(name="test_bedrock_chat_completion_error_invalid_model") + @background_task(name="test_bedrock_chat_completion_incorrect_access_key_with_token_count") def _test(): set_trace_info() add_custom_attribute("llm.conversation_id", "my-awesome-id") add_custom_attribute("llm.foo", "bar") add_custom_attribute("non_llm_attr", "python-agent") - converse_invalid_model(loop, bedrock_converse_server) + exercise_converse_incorrect_access_key() _test() -def converse_invalid_model(loop, bedrock_converse_server): - async def _coro(): - with pytest.raises(_client_error): - message = [{"role": "user", "content": [{"text": "Model does not exist."}]}] +@pytest.fixture +def exercise_converse_invalid_model(loop, bedrock_converse_server, response_streaming, monkeypatch): + def _exercise_converse_invalid_model(): + async def _coro(): + monkeypatch.setattr( + bedrock_converse_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY" + ) - response = await bedrock_converse_server.converse( - modelId="does-not-exist", messages=message, inferenceConfig={"temperature": 0.7, "maxTokens": 100} + message = [{"role": "user", "content": [{"text": "Model does not exist."}]}] + request = ( + bedrock_converse_server.converse_stream if response_streaming else bedrock_converse_server.converse ) + with pytest.raises(_client_error): + await request( + modelId="does-not-exist", messages=message, inferenceConfig={"temperature": 0.7, "maxTokens": 100} + ) - assert response + loop.run_until_complete(_coro()) - loop.run_until_complete(_coro()) + return _exercise_converse_invalid_model @reset_core_stats_engine() -@disabled_ai_monitoring_record_content_settings -def test_bedrock_chat_completion_error_invalid_model_no_content(loop, bedrock_converse_server, set_trace_info): - @validate_custom_events(events_sans_content(chat_completion_invalid_model_error_events)) +def test_bedrock_chat_completion_error_invalid_model(exercise_converse_invalid_model, set_trace_info, expected_metric): + @validate_custom_events(events_with_context_attrs(chat_completion_invalid_model_error_events)) @validate_error_trace_attributes( "botocore.errorfactory:ValidationException", exact_attrs={ @@ -460,62 +359,57 @@ def test_bedrock_chat_completion_error_invalid_model_no_content(loop, bedrock_co }, ) @validate_transaction_metrics( - name="test_bedrock_chat_completion_error_invalid_model_no_content", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + name="test_bedrock_chat_completion_error_invalid_model", + scoped_metrics=[expected_metric], + rollup_metrics=[expected_metric], custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], background_task=True, ) - @background_task(name="test_bedrock_chat_completion_error_invalid_model_no_content") + @background_task(name="test_bedrock_chat_completion_error_invalid_model") def _test(): set_trace_info() add_custom_attribute("llm.conversation_id", "my-awesome-id") add_custom_attribute("llm.foo", "bar") add_custom_attribute("non_llm_attr", "python-agent") - converse_invalid_model(loop, bedrock_converse_server) + with WithLlmCustomAttributes({"context": "attr"}): + exercise_converse_invalid_model() _test() @reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_converse_server, loop, set_trace_info +@disabled_ai_monitoring_record_content_settings +def test_bedrock_chat_completion_error_invalid_model_no_content( + exercise_converse_invalid_model, set_trace_info, expected_metric ): - """ - A request is made to the server with invalid credentials. botocore will reach out to the server and receive an - UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer - events. The error response can also be parsed, and will be included as attributes on the recorded exception. - """ - - @validate_custom_events(add_token_count_to_events(chat_completion_invalid_access_key_error_events)) + @validate_custom_events(events_sans_content(chat_completion_invalid_model_error_events)) @validate_error_trace_attributes( - _client_error_name, + "botocore.errorfactory:ValidationException", exact_attrs={ "agent": {}, "intrinsic": {}, "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", + "http.statusCode": 400, + "error.message": "The provided model identifier is invalid.", + "error.code": "ValidationException", }, }, ) @validate_transaction_metrics( - name="test_bedrock_chat_completion_incorrect_access_key_with_token_count", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + name="test_bedrock_chat_completion_error_invalid_model_no_content", + scoped_metrics=[expected_metric], + rollup_metrics=[expected_metric], custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], background_task=True, ) - @background_task(name="test_bedrock_chat_completion_incorrect_access_key_with_token_count") + @background_task(name="test_bedrock_chat_completion_error_invalid_model_no_content") def _test(): set_trace_info() add_custom_attribute("llm.conversation_id", "my-awesome-id") add_custom_attribute("llm.foo", "bar") add_custom_attribute("non_llm_attr", "python-agent") - converse_incorrect_access_key(loop, bedrock_converse_server, monkeypatch) + exercise_converse_invalid_model() _test() diff --git a/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py b/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py index e02cc5b543..207db7e31e 100644 --- a/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py +++ b/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py @@ -14,13 +14,13 @@ import json import os from io import BytesIO +from pprint import pformat -import botocore.errorfactory import botocore.eventstream import botocore.exceptions import pytest from conftest import BOTOCORE_VERSION -from external_botocore._test_bedrock_chat_completion import ( +from external_botocore._test_bedrock_chat_completion_invoke_model import ( chat_completion_expected_events, chat_completion_expected_malformed_request_body_events, chat_completion_expected_malformed_response_body_events, @@ -858,7 +858,12 @@ def test_bedrock_chat_completion_functions_marked_as_wrapped_for_sdk_compatibili def test_chat_models_instrumented(loop): import aiobotocore - SUPPORTED_MODELS = [model for model, _, _, _ in MODEL_EXTRACTORS if "embed" not in model] + def _is_supported_model(model): + supported_models = [model for model, _, _, _ in MODEL_EXTRACTORS if "embed" not in model] + for supported_model in supported_models: + if supported_model in model: + return True + return False _id = os.environ.get("AWS_ACCESS_KEY_ID") key = os.environ.get("AWS_SECRET_ACCESS_KEY") @@ -871,12 +876,8 @@ def test_chat_models_instrumented(loop): try: response = loop.run_until_complete(client.list_foundation_models(byOutputModality="TEXT")) models = [model["modelId"] for model in response["modelSummaries"]] - not_supported = [] - for model in models: - is_supported = any(model.startswith(supported_model) for supported_model in SUPPORTED_MODELS) - if not is_supported: - not_supported.append(model) + not_supported = [model for model in models if not _is_supported_model(model)] - assert not not_supported, f"The following unsupported models were found: {not_supported}" + assert not not_supported, f"The following unsupported models were found: {pformat(not_supported)}" finally: loop.run_until_complete(client.__aexit__(None, None, None)) diff --git a/tests/external_aiobotocore/test_bedrock_embeddings.py b/tests/external_aiobotocore/test_bedrock_embeddings.py index 96b930feb5..b964122294 100644 --- a/tests/external_aiobotocore/test_bedrock_embeddings.py +++ b/tests/external_aiobotocore/test_bedrock_embeddings.py @@ -14,6 +14,7 @@ import json import os from io import BytesIO +from pprint import pformat import botocore.exceptions import pytest @@ -414,7 +415,12 @@ async def _test(): def test_embedding_models_instrumented(loop): import aiobotocore - SUPPORTED_MODELS = [model for model, _, _, _ in MODEL_EXTRACTORS if "embed" in model] + def _is_supported_model(model): + supported_models = [model for model, _, _, _ in MODEL_EXTRACTORS if "embed" in model] + for supported_model in supported_models: + if supported_model in model: + return True + return False _id = os.environ.get("AWS_ACCESS_KEY_ID") key = os.environ.get("AWS_SECRET_ACCESS_KEY") @@ -427,12 +433,8 @@ def test_embedding_models_instrumented(loop): try: response = client.list_foundation_models(byOutputModality="EMBEDDING") models = [model["modelId"] for model in response["modelSummaries"]] - not_supported = [] - for model in models: - is_supported = any(model.startswith(supported_model) for supported_model in SUPPORTED_MODELS) - if not is_supported: - not_supported.append(model) + not_supported = [model for model in models if not _is_supported_model(model)] - assert not not_supported, f"The following unsupported models were found: {not_supported}" + assert not not_supported, f"The following unsupported models were found: {pformat(not_supported)}" finally: loop.run_until_complete(client.__aexit__(None, None, None)) diff --git a/tests/external_botocore/_mock_external_bedrock_server_converse.py b/tests/external_botocore/_mock_external_bedrock_server_converse.py index aef6d52856..bc93c8b773 100644 --- a/tests/external_botocore/_mock_external_bedrock_server_converse.py +++ b/tests/external_botocore/_mock_external_bedrock_server_converse.py @@ -16,6 +16,105 @@ from testing_support.mock_external_http_server import MockExternalHTTPServer +STREAMED_RESPONSES = { + "What is 212 degrees Fahrenheit converted to Celsius?": [ + { + "Content-Type": "application/vnd.amazon.eventstream", + "x-amzn-RequestId": "f070b880-e0fb-4537-8093-796671c39239", + }, + 200, + [ + "000000b2000000528a40b4c50b3a6576656e742d7479706507000c6d65737361676553746172740d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f505152535455565758595a30222c22726f6c65223a22617373697374616e74227d40ff8268000000ae000000575f3a3ac90b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a22546f227d2c2270223a226162636465666768696a6b6c6d6e6f70717273227d57b47eb0", + "000000b800000057b09a58eb0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220636f6e76657274227d2c2270223a226162636465666768696a6b6c6d6e6f7071727374757677227d7f921878", + "000000c600000057f67806450b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a222046616872656e227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c227d725b3c0b", + "000000a800000057d07acf690b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2268656974227d2c2270223a226162636465666768696a6b227d926527fe", + "000000b400000057756ab5ea0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220746f227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778227d47f66bd8", + "000000a400000057158a22680b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a222043656c73697573227d2c2270223a22616263227dc03a975f", + "000000c8000000574948b8240b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a222c227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f5051525354227db2e3dafb", + "000000ad00000057189a40190b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220796f75227d2c2270223a226162636465666768696a6b6c6d6e6f70227d76c0e56b", + "000000c500000057b1d87c950b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220757365227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e227de3731476", + "000000cb000000570ee8c2f40b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220746865227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f5051525354227dd4810232", + "000000d3000000575e781eb70b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220666f726d756c61227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f505152535455565758227df6672f41", + "000000d00000005719d864670b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a223a227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f505152535455565758595a3031227dbd8afb45", + "000000b6000000570faae68a0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a225c6e5c6e43227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778227d088d049f", + "000000a700000057522a58b80b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a22203d227d2c2270223a226162636465666768696a6b6c227d88e54236", + "000000b70000005732cacf3a0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a222028227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142227de6ec1ebe", + "000000b400000057756ab5ea0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2246227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a227d02007761", + "000000c900000057742891940b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a22202d227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f5051525354227d3b3f080c", + "000000ab0000005797dab5b90b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220227d2c2270223a226162636465666768696a6b6c6d6e6f7071227d5638cc83", + "0000009d00000057b9bbf89f0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a223332227d2c2270223a226162227dc02cb212", + "000000bc00000057451afe2b0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2229227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748227da0e9aee9", + "000000c700000057cb182ff50b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a22202a227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f505152227d0e3821bb", + "000000b70000005732cacf3a0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a414243227d1daf3cc5", + "000000b400000057756ab5ea0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2235227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a227dada5d973", + "000000d10000005724b84dd70b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a222f227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f505152535455565758595a303132227db97b8201", + "000000bc00000057451afe2b0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2239227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748227d99250da7", + "000000ad00000057189a40190b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a225c6e5c6e5768657265227d2c2270223a226162636465666768696a6b227d5f2ed4ef", + "0000009f00000057c37babff0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a223a227d2c2270223a226162636465227d85a07294", + "000000a900000057ed1ae6d90b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a225c6e43227d2c2270223a226162636465666768696a6b6c6d227d50fa22de", + "000000ce00000057c6084d840b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a22206973227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f505152535455565758227dfe3dc5ac", + "000000c8000000574948b8240b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220746865227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f5051227d3f77fbbc", + "000000c1000000574458da550b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a222074656d7065726174757265227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142227d402a7229", + "000000d200000057631837070b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220696e227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f505152535455565758595a3031227df5f66d94", + "000000d90000005714c806160b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a222043656c73697573227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f505152535455565758595a30313233227d3daccf94", + "000000b500000057480a9c5a0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a225c6e46227d2c2270223a226162636465666768696a6b6c6d6e6f70717273747576777879227d5042c3ff", + "000000cf00000057fb6864340b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a22206973227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f50515253545556575859227da79da7ad", + "000000bd00000057787ad79b0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220746865227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a414243444546227dbd3a0aec", + "000000b70000005732cacf3a0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a222074656d7065726174757265227d2c2270223a226162636465666768696a6b6c6d6e6f707172227d1560b810", + "000000bf0000005702ba84fb0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220696e227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a414243444546474849227d40f78c16", + "000000ce00000057c6084d840b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a222046616872656e227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f5051525354227d47b98626", + "000000a2000000579acad7c80b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2268656974227d2c2270223a226162636465227d54cc33be", + "000000da0000005753687cc60b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a225c6e5c6e506c7567227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f505152535455565758595a3031323334227d9eb4ac9a", + "000000bc00000057451afe2b0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2267696e67227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445227d3a11d9ac000000c500000057b1d87c950b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220696e227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f227d391bdff3", + "0000009e00000057fe1b824f0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220227d2c2270223a2261626364227da292de09", + "000000b70000005732cacf3a0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a22323132227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a41227dbfd117db", + "000000c20000005703f8a0850b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a22c2b0227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d227d1166f202", + "000000a100000057dd6aad180b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2246227d2c2270223a2261626364656667227dcba24fa6", + "000000b300000057c74a69fa0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220666f72227d2c2270223a226162636465666768696a6b6c6d6e6f70717273747576227dd306dee6", + "000000c700000057cb182ff50b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a222046227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f505152227d3bdbedf1", + "000000c600000057f67806450b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a223a227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f505152227d71d79c49", + "000000ae000000575f3a3ac90b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a225c6e5c6e43227d2c2270223a226162636465666768696a6b6c6d6e6f70227d2d8a1cce", + "000000bf0000005702ba84fb0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a22203d227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a227de81a06eb", + "000000b6000000570faae68a0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a222028227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a41227dea662b27", + "000000d500000057d138eb170b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a22323132227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f505152535455565758595a3031323334227da7888b21", + "000000d700000057abf8b8770b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f505152535455565758595a303132333435363738227d63107603", + "000000c0000000577938f3e50b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a222d227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c227d9e32b6f5", + "000000c600000057f67806450b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f505152227db3145f6b", + "0000009f00000057c37babff0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a223332227d2c2270223a2261626364227d277c3f97", + "000000a300000057a7aafe780b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2229227d2c2270223a22616263646566676869227dd05f85ca", + "000000bc00000057451afe2b0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a22202a227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a41424344454647227db0dfade1", + "000000aa00000057aaba9c090b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220227d2c2270223a226162636465666768696a6b6c6d6e6f70227da476449e", + "000000ac0000005725fa69a90b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2235227d2c2270223a226162636465666768696a6b6c6d6e6f707172227deedc54f0", + "000000ca000000573388eb440b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a222f227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f50515253545556227d7abef087", + "000000d00000005719d864670b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2239227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f505152535455565758595a3031227de7c50a2e", + "0000009f00000057c37babff0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a225c6e43227d2c2270223a22616263227df88e9dc2", + "000000ac0000005725fa69a90b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a22203d227d2c2270223a226162636465666768696a6b6c6d6e6f7071227d6f5c7d17", + "000000bd00000057787ad79b0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a414243444546474849227d1c650877", + "000000a400000057158a22680b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a22313830227d2c2270223a226162636465666768227dba33e936", + "000000bb00000057f73a223b0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a41424344454647227df14100ef", + "000000a400000057158a22680b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a222a227d2c2270223a226162636465666768696a227da79b0693", + "000000c700000057cb182ff50b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f50515253227de52ff51e", + "000000aa00000057aaba9c090b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2235227d2c2270223a226162636465666768696a6b6c6d6e6f70227df5cf9fcf", + "000000b9000000578dfa715b0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a222f227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445227dc22fcb78", + "0000009d00000057b9bbf89f0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2239227d2c2270223a22616263227db33d112d", + "000000b9000000578dfa715b0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a225c6e43227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a414243227d6e135792", + "000000c20000005703f8a0850b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a22203d227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d227d242e22f6", + "000000a000000057e00a84a80b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220227d2c2270223a22616263646566227d64c7e90b", + "000000a800000057d07acf690b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a22313030227d2c2270223a226162636465666768696a6b6c227dee65d4c5", + "000000e200000057c2398f810b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a225c6e5c6e5468657265666f7265227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f505152535455565758595a3031323334353637227d43ae3a9e", + "000000c600000057f67806450b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a222c227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f505152227df0760dea", + "000000a50000005728ea0bd80b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220227d2c2270223a226162636465666768696a6b227db714fc15", + "000000ab0000005797dab5b90b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a22323132227d2c2270223a226162636465666768696a6b6c6d6e6f227de9fc19df", + "000000be000000573fdaad4b0b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2220227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a227dd7107790", + "000000c600000057f67806450b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a2264656772656573227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c227d15374080", + "000000dd00000057e148a0d60b3a6576656e742d74797065070011636f6e74656e74426c6f636b44656c74610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2264656c7461223a7b2274657874223a222046616872656e227d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f505152535455565758595a303132333435363738227d8993e5c9", + "000000a800000056a77dffff0b3a6576656e742d74797065070010636f6e74656e74426c6f636b53746f700d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b22636f6e74656e74426c6f636b496e646578223a302c2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a227d1c361897", + "000000bd00000051911972ae0b3a6576656e742d7479706507000b6d65737361676553746f700d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b2270223a226162636465666768696a6b6c6d6e6f707172737475767778797a4142434445464748494a4b4c4d4e4f505152535455565758595a303132333435222c2273746f70526561736f6e223a226d61785f746f6b656e73227d2963d7e1", + "000000f00000004ebc72e3a30b3a6576656e742d747970650700086d657461646174610d3a636f6e74656e742d747970650700106170706c69636174696f6e2f6a736f6e0d3a6d6573736167652d747970650700056576656e747b226d657472696373223a7b226c6174656e63794d73223a323134397d2c2270223a226162636465666768696a6b6c6d6e6f707172737475767778222c227573616765223a7b22696e707574546f6b656e73223a32362c226f7574707574546f6b656e73223a3130302c22736572766572546f6f6c5573616765223a7b7d2c22746f74616c546f6b656e73223a3132367d7dd415e186", + ], + ] +} + RESPONSES = { "What is 212 degrees Fahrenheit converted to Celsius?": [ {"Content-Type": "application/json", "x-amzn-RequestId": "c20d345e-6878-4778-b674-6b187bae8ecf"}, @@ -65,6 +164,7 @@ def simple_get(self): except Exception: content = body + stream = self.path.endswith("converse-stream") prompt = extract_shortened_prompt_converse(content) if not prompt: self.send_response(500) @@ -73,11 +173,23 @@ def simple_get(self): return headers, status_code, response = ({}, 0, "") - - for k, v in RESPONSES.items(): - if prompt.startswith(k): - headers, status_code, response = v - break + if stream: + for k, v in STREAMED_RESPONSES.items(): + if prompt.startswith(k): + headers, status_code, response = v + break + if not response: + for k, v in RESPONSES.items(): + # Only look for error responses returned immediately instead of in a stream + if prompt.startswith(k) and v[1] >= 400: + headers, status_code, response = v + stream = False # Response will not be streamed + break + else: + for k, v in RESPONSES.items(): + if prompt.startswith(k): + headers, status_code, response = v + break if not response: # If no matches found @@ -94,10 +206,19 @@ def simple_get(self): self.send_header(k, v) self.end_headers() - # Send response body - response_body = json.dumps(response).encode("utf-8") + if stream: + # Send response body + for resp in response: + self.wfile.write(bytes.fromhex(resp)) + else: + # Send response body + response_body = json.dumps(response).encode("utf-8") + + if "Malformed Body" in prompt: + # Remove end of response to make invalid JSON + response_body = response_body[:-4] - self.wfile.write(response_body) + self.wfile.write(response_body) return diff --git a/tests/external_botocore/_test_bedrock_chat_completion_converse.py b/tests/external_botocore/_test_bedrock_chat_completion_converse.py new file mode 100644 index 0000000000..cdec652292 --- /dev/null +++ b/tests/external_botocore/_test_bedrock_chat_completion_converse.py @@ -0,0 +1,253 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Ignore unicode characters in this file from LLM responses +# ruff: noqa: RUF001 + +chat_completion_expected_events = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "span_id": None, + "trace_id": "trace-id", + "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", + "duration": None, # Response time varies each test run + "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.choices.finish_reason": "max_tokens", + "vendor": "bedrock", + "ingest_source": "Python", + "response.number_of_messages": 3, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", + "span_id": None, + "trace_id": "trace-id", + "content": "You are a scientist.", + "role": "system", + "completion_id": None, + "sequence": 0, + "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "vendor": "bedrock", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", + "span_id": None, + "trace_id": "trace-id", + "content": "What is 212 degrees Fahrenheit converted to Celsius?", + "role": "user", + "completion_id": None, + "sequence": 1, + "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "vendor": "bedrock", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", + "span_id": None, + "trace_id": "trace-id", + "content": "To convert 212°F to Celsius, we can use the formula:\n\nC = (F - 32) × 5/9\n\nWhere:\nC is the temperature in Celsius\nF is the temperature in Fahrenheit\n\nPlugging in 212°F, we get:\n\nC = (212 - 32) × 5/9\nC = 180 × 5/9\nC = 100\n\nTherefore, 212°", + "role": "assistant", + "completion_id": None, + "sequence": 2, + "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "vendor": "bedrock", + "ingest_source": "Python", + "is_response": True, + }, + ), +] + +chat_completion_expected_streaming_events = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "span_id": None, + "trace_id": "trace-id", + "request_id": "f070b880-e0fb-4537-8093-796671c39239", + "duration": None, # Response time varies each test run + "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.choices.finish_reason": "max_tokens", + "vendor": "bedrock", + "ingest_source": "Python", + "response.number_of_messages": 3, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "f070b880-e0fb-4537-8093-796671c39239", + "span_id": None, + "trace_id": "trace-id", + "content": "You are a scientist.", + "role": "system", + "completion_id": None, + "sequence": 0, + "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "vendor": "bedrock", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "f070b880-e0fb-4537-8093-796671c39239", + "span_id": None, + "trace_id": "trace-id", + "content": "What is 212 degrees Fahrenheit converted to Celsius?", + "role": "user", + "completion_id": None, + "sequence": 1, + "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "vendor": "bedrock", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "f070b880-e0fb-4537-8093-796671c39239", + "span_id": None, + "trace_id": "trace-id", + "content": "To convert Fahrenheit to Celsius, you use the formula:\n\nC = (F - 32) * 5/9\n\nWhere:\nC is the temperature in Celsius\nF is the temperature in Fahrenheit\n\nPlugging in 212°F for F:\n\nC = (212 - 32) * 5/9\nC = 180 * 5/9\nC = 100\n\nTherefore, 212 degrees Fahren", + "role": "assistant", + "completion_id": None, + "sequence": 2, + "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "vendor": "bedrock", + "ingest_source": "Python", + "is_response": True, + }, + ), +] + +chat_completion_invalid_access_key_error_events = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "span_id": None, + "trace_id": "trace-id", + "request_id": "e1206e19-2318-4a9d-be98-017c73f06118", + "duration": None, # Response time varies each test run + "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "request.temperature": 0.7, + "request.max_tokens": 100, + "vendor": "bedrock", + "ingest_source": "Python", + "response.number_of_messages": 1, + "error": True, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "e1206e19-2318-4a9d-be98-017c73f06118", + "span_id": None, + "trace_id": "trace-id", + "content": "Invalid Token", + "role": "user", + "completion_id": None, + "sequence": 0, + "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "vendor": "bedrock", + "ingest_source": "Python", + }, + ), +] +chat_completion_invalid_model_error_events = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "f4908827-3db9-4742-9103-2bbc34578b03", + "span_id": None, + "trace_id": "trace-id", + "duration": None, # Response time varies each test run + "request.model": "does-not-exist", + "response.model": "does-not-exist", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.number_of_messages": 1, + "vendor": "bedrock", + "ingest_source": "Python", + "error": True, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "span_id": None, + "trace_id": "trace-id", + "request_id": "f4908827-3db9-4742-9103-2bbc34578b03", + "content": "Model does not exist.", + "role": "user", + "completion_id": None, + "response.model": "does-not-exist", + "sequence": 0, + "vendor": "bedrock", + "ingest_source": "Python", + }, + ), +] diff --git a/tests/external_botocore/_test_bedrock_chat_completion.py b/tests/external_botocore/_test_bedrock_chat_completion_invoke_model.py similarity index 100% rename from tests/external_botocore/_test_bedrock_chat_completion.py rename to tests/external_botocore/_test_bedrock_chat_completion_invoke_model.py diff --git a/tests/external_botocore/test_chat_completion_converse.py b/tests/external_botocore/test_bedrock_chat_completion_converse.py similarity index 54% rename from tests/external_botocore/test_chat_completion_converse.py rename to tests/external_botocore/test_bedrock_chat_completion_converse.py index 96ead41dd7..e365b5163b 100644 --- a/tests/external_botocore/test_chat_completion_converse.py +++ b/tests/external_botocore/test_bedrock_chat_completion_converse.py @@ -14,6 +14,12 @@ import botocore.exceptions import pytest +from _test_bedrock_chat_completion_converse import ( + chat_completion_expected_events, + chat_completion_expected_streaming_events, + chat_completion_invalid_access_key_error_events, + chat_completion_invalid_model_error_events, +) from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( @@ -36,109 +42,59 @@ from newrelic.api.transaction import add_custom_attribute from newrelic.common.object_names import callable_name -chat_completion_expected_events = [ - ( - {"type": "LlmChatCompletionSummary"}, - { - "id": None, # UUID that varies with each run - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "span_id": None, - "trace_id": "trace-id", - "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", - "duration": None, # Response time varies each test run - "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "request.temperature": 0.7, - "request.max_tokens": 100, - "response.choices.finish_reason": "max_tokens", - "vendor": "bedrock", - "ingest_source": "Python", - "response.number_of_messages": 3, - }, - ), - ( - {"type": "LlmChatCompletionMessage"}, - { - "id": None, # UUID that varies with each run - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", - "span_id": None, - "trace_id": "trace-id", - "content": "You are a scientist.", - "role": "system", - "completion_id": None, - "sequence": 0, - "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "vendor": "bedrock", - "ingest_source": "Python", - }, - ), - ( - {"type": "LlmChatCompletionMessage"}, - { - "id": None, # UUID that varies with each run - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", - "span_id": None, - "trace_id": "trace-id", - "content": "What is 212 degrees Fahrenheit converted to Celsius?", - "role": "user", - "completion_id": None, - "sequence": 1, - "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "vendor": "bedrock", - "ingest_source": "Python", - }, - ), - ( - {"type": "LlmChatCompletionMessage"}, - { - "id": None, # UUID that varies with each run - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", - "span_id": None, - "trace_id": "trace-id", - "content": "To convert 212°F to Celsius, we can use the formula:\n\nC = (F - 32) × 5/9\n\nWhere:\nC is the temperature in Celsius\nF is the temperature in Fahrenheit\n\nPlugging in 212°F, we get:\n\nC = (212 - 32) × 5/9\nC = 180 × 5/9\nC = 100\n\nTherefore, 212°", # noqa: RUF001 - "role": "assistant", - "completion_id": None, - "sequence": 2, - "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "vendor": "bedrock", - "ingest_source": "Python", - "is_response": True, - }, - ), -] + +@pytest.fixture(scope="session", params=[False, True], ids=["ResponseStandard", "ResponseStreaming"]) +def response_streaming(request): + return request.param + + +@pytest.fixture(scope="session") +def expected_metric(response_streaming): + return ("Llm/completion/Bedrock/converse" + ("_stream" if response_streaming else ""), 1) + + +@pytest.fixture(scope="session") +def expected_events(response_streaming): + return chat_completion_expected_streaming_events if response_streaming else chat_completion_expected_events @pytest.fixture(scope="module") -def exercise_model(bedrock_converse_server): +def exercise_model(bedrock_converse_server, response_streaming): def _exercise_model(message): inference_config = {"temperature": 0.7, "maxTokens": 100} - response = bedrock_converse_server.converse( + _response = bedrock_converse_server.converse( + modelId="anthropic.claude-3-sonnet-20240229-v1:0", + messages=message, + system=[{"text": "You are a scientist."}], + inferenceConfig=inference_config, + ) + + def _exercise_model_streaming(message): + inference_config = {"temperature": 0.7, "maxTokens": 100} + + response = bedrock_converse_server.converse_stream( modelId="anthropic.claude-3-sonnet-20240229-v1:0", messages=message, system=[{"text": "You are a scientist."}], inferenceConfig=inference_config, ) + _responses = list(response["stream"]) # Consume the response stream - return _exercise_model + return _exercise_model_streaming if response_streaming else _exercise_model @reset_core_stats_engine() -def test_bedrock_chat_completion_in_txn_with_llm_metadata(set_trace_info, exercise_model): - @validate_custom_events(events_with_context_attrs(chat_completion_expected_events)) - # One summary event, one user message, and one response message from the assistant +def test_bedrock_chat_completion_in_txn_with_llm_metadata( + set_trace_info, exercise_model, expected_metric, expected_events +): + @validate_custom_events(events_with_context_attrs(expected_events)) + # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( name="test_bedrock_chat_completion_in_txn_with_llm_metadata", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + scoped_metrics=[expected_metric], + rollup_metrics=[expected_metric], custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], background_task=True, ) @@ -158,14 +114,14 @@ def _test(): @disabled_ai_monitoring_record_content_settings @reset_core_stats_engine() -def test_bedrock_chat_completion_no_content(set_trace_info, exercise_model): - @validate_custom_events(events_sans_content(chat_completion_expected_events)) +def test_bedrock_chat_completion_no_content(set_trace_info, exercise_model, expected_metric, expected_events): + @validate_custom_events(events_sans_content(expected_events)) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( name="test_bedrock_chat_completion_no_content", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + scoped_metrics=[expected_metric], + rollup_metrics=[expected_metric], custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], background_task=True, ) @@ -184,14 +140,14 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model): - @validate_custom_events(add_token_count_to_events(chat_completion_expected_events)) +def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_metric, expected_events): + @validate_custom_events(add_token_count_to_events(expected_events)) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( name="test_bedrock_chat_completion_with_token_count", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + scoped_metrics=[expected_metric], + rollup_metrics=[expected_metric], custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], background_task=True, ) @@ -209,13 +165,13 @@ def _test(): @reset_core_stats_engine() -def test_bedrock_chat_completion_no_llm_metadata(set_trace_info, exercise_model): - @validate_custom_events(events_sans_llm_metadata(chat_completion_expected_events)) +def test_bedrock_chat_completion_no_llm_metadata(set_trace_info, exercise_model, expected_metric, expected_events): + @validate_custom_events(events_sans_llm_metadata(expected_events)) @validate_custom_event_count(count=4) @validate_transaction_metrics( name="test_bedrock_chat_completion_in_txn_no_llm_metadata", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + scoped_metrics=[expected_metric], + rollup_metrics=[expected_metric], custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], background_task=True, ) @@ -246,54 +202,30 @@ def test_bedrock_chat_completion_disabled_ai_monitoring_settings(set_trace_info, exercise_model(message) -chat_completion_invalid_access_key_error_events = [ - ( - {"type": "LlmChatCompletionSummary"}, - { - "id": None, # UUID that varies with each run - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "span_id": None, - "trace_id": "trace-id", - "request_id": "e1206e19-2318-4a9d-be98-017c73f06118", - "duration": None, # Response time varies each test run - "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "request.temperature": 0.7, - "request.max_tokens": 100, - "vendor": "bedrock", - "ingest_source": "Python", - "response.number_of_messages": 1, - "error": True, - }, - ), - ( - {"type": "LlmChatCompletionMessage"}, - { - "id": None, # UUID that varies with each run - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "request_id": "e1206e19-2318-4a9d-be98-017c73f06118", - "span_id": None, - "trace_id": "trace-id", - "content": "Invalid Token", - "role": "user", - "completion_id": None, - "sequence": 0, - "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "vendor": "bedrock", - "ingest_source": "Python", - }, - ), -] - _client_error = botocore.exceptions.ClientError _client_error_name = callable_name(_client_error) +@pytest.fixture +def exercise_converse_incorrect_access_key(bedrock_converse_server, response_streaming, monkeypatch): + def _exercise_converse_incorrect_access_key(): + monkeypatch.setattr(bedrock_converse_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") + + message = [{"role": "user", "content": [{"text": "Invalid Token"}]}] + request = bedrock_converse_server.converse_stream if response_streaming else bedrock_converse_server.converse + with pytest.raises(_client_error): + request( + modelId="anthropic.claude-3-sonnet-20240229-v1:0", + messages=message, + inferenceConfig={"temperature": 0.7, "maxTokens": 100}, + ) + + return _exercise_converse_incorrect_access_key + + @reset_core_stats_engine() def test_bedrock_chat_completion_error_incorrect_access_key( - monkeypatch, bedrock_converse_server, exercise_model, set_trace_info + exercise_converse_incorrect_access_key, set_trace_info, expected_metric ): """ A request is made to the server with invalid credentials. botocore will reach out to the server and receive an @@ -316,122 +248,82 @@ def test_bedrock_chat_completion_error_incorrect_access_key( ) @validate_transaction_metrics( name="test_bedrock_chat_completion", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + scoped_metrics=[expected_metric], + rollup_metrics=[expected_metric], custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], background_task=True, ) @background_task(name="test_bedrock_chat_completion") def _test(): - monkeypatch.setattr(bedrock_converse_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - message = [{"role": "user", "content": [{"text": "Invalid Token"}]}] - - response = bedrock_converse_server.converse( - modelId="anthropic.claude-3-sonnet-20240229-v1:0", - messages=message, - inferenceConfig={"temperature": 0.7, "maxTokens": 100}, - ) + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + add_custom_attribute("llm.foo", "bar") + add_custom_attribute("non_llm_attr", "python-agent") - assert response + exercise_converse_incorrect_access_key() _test() -chat_completion_invalid_model_error_events = [ - ( - {"type": "LlmChatCompletionSummary"}, - { - "id": None, # UUID that varies with each run - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "request_id": "f4908827-3db9-4742-9103-2bbc34578b03", - "span_id": None, - "trace_id": "trace-id", - "duration": None, # Response time varies each test run - "request.model": "does-not-exist", - "response.model": "does-not-exist", - "request.temperature": 0.7, - "request.max_tokens": 100, - "response.number_of_messages": 1, - "vendor": "bedrock", - "ingest_source": "Python", - "error": True, - }, - ), - ( - {"type": "LlmChatCompletionMessage"}, - { - "id": None, - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "span_id": None, - "trace_id": "trace-id", - "request_id": "f4908827-3db9-4742-9103-2bbc34578b03", - "content": "Model does not exist.", - "role": "user", - "completion_id": None, - "response.model": "does-not-exist", - "sequence": 0, - "vendor": "bedrock", - "ingest_source": "Python", - }, - ), -] - - @reset_core_stats_engine() -def test_bedrock_chat_completion_error_invalid_model(bedrock_converse_server, set_trace_info): - @validate_custom_events(events_with_context_attrs(chat_completion_invalid_model_error_events)) +@override_llm_token_callback_settings(llm_token_count_callback) +def test_bedrock_chat_completion_error_incorrect_access_key_with_token_count( + exercise_converse_incorrect_access_key, set_trace_info, expected_metric +): + """ + A request is made to the server with invalid credentials. botocore will reach out to the server and receive an + UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer + events. The error response can also be parsed, and will be included as attributes on the recorded exception. + """ + + @validate_custom_events(add_token_count_to_events(chat_completion_invalid_access_key_error_events)) @validate_error_trace_attributes( - "botocore.errorfactory:ValidationException", + _client_error_name, exact_attrs={ "agent": {}, "intrinsic": {}, "user": { - "http.statusCode": 400, - "error.message": "The provided model identifier is invalid.", - "error.code": "ValidationException", + "http.statusCode": 403, + "error.message": "The security token included in the request is invalid.", + "error.code": "UnrecognizedClientException", }, }, ) @validate_transaction_metrics( - name="test_bedrock_chat_completion_error_invalid_model", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + name="test_bedrock_chat_completion_incorrect_access_key_with_token_count", + scoped_metrics=[expected_metric], + rollup_metrics=[expected_metric], custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], background_task=True, ) - @background_task(name="test_bedrock_chat_completion_error_invalid_model") + @background_task(name="test_bedrock_chat_completion_incorrect_access_key_with_token_count") def _test(): set_trace_info() add_custom_attribute("llm.conversation_id", "my-awesome-id") add_custom_attribute("llm.foo", "bar") add_custom_attribute("non_llm_attr", "python-agent") - with pytest.raises(_client_error): - with WithLlmCustomAttributes({"context": "attr"}): - message = [{"role": "user", "content": [{"text": "Model does not exist."}]}] + exercise_converse_incorrect_access_key() - response = bedrock_converse_server.converse( - modelId="does-not-exist", messages=message, inferenceConfig={"temperature": 0.7, "maxTokens": 100} - ) + _test() - assert response - _test() +@pytest.fixture +def exercise_converse_invalid_model(bedrock_converse_server, response_streaming, monkeypatch): + def _exercise_converse_invalid_model(): + monkeypatch.setattr(bedrock_converse_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") + + message = [{"role": "user", "content": [{"text": "Model does not exist."}]}] + request = bedrock_converse_server.converse_stream if response_streaming else bedrock_converse_server.converse + with pytest.raises(_client_error): + request(modelId="does-not-exist", messages=message, inferenceConfig={"temperature": 0.7, "maxTokens": 100}) + + return _exercise_converse_invalid_model @reset_core_stats_engine() -@disabled_ai_monitoring_record_content_settings -def test_bedrock_chat_completion_error_invalid_model_no_content(bedrock_converse_server, set_trace_info): - @validate_custom_events(events_sans_content(chat_completion_invalid_model_error_events)) +def test_bedrock_chat_completion_error_invalid_model(exercise_converse_invalid_model, set_trace_info, expected_metric): + @validate_custom_events(events_with_context_attrs(chat_completion_invalid_model_error_events)) @validate_error_trace_attributes( "botocore.errorfactory:ValidationException", exact_attrs={ @@ -445,80 +337,57 @@ def test_bedrock_chat_completion_error_invalid_model_no_content(bedrock_converse }, ) @validate_transaction_metrics( - name="test_bedrock_chat_completion_error_invalid_model_no_content", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + name="test_bedrock_chat_completion_error_invalid_model", + scoped_metrics=[expected_metric], + rollup_metrics=[expected_metric], custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], background_task=True, ) - @background_task(name="test_bedrock_chat_completion_error_invalid_model_no_content") + @background_task(name="test_bedrock_chat_completion_error_invalid_model") def _test(): set_trace_info() add_custom_attribute("llm.conversation_id", "my-awesome-id") add_custom_attribute("llm.foo", "bar") add_custom_attribute("non_llm_attr", "python-agent") - with pytest.raises(_client_error): - message = [{"role": "user", "content": [{"text": "Model does not exist."}]}] - - response = bedrock_converse_server.converse( - modelId="does-not-exist", messages=message, inferenceConfig={"temperature": 0.7, "maxTokens": 100} - ) - - assert response + with WithLlmCustomAttributes({"context": "attr"}): + exercise_converse_invalid_model() _test() @reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_converse_server, exercise_model, set_trace_info +@disabled_ai_monitoring_record_content_settings +def test_bedrock_chat_completion_error_invalid_model_no_content( + exercise_converse_invalid_model, set_trace_info, expected_metric ): - """ - A request is made to the server with invalid credentials. botocore will reach out to the server and receive an - UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer - events. The error response can also be parsed, and will be included as attributes on the recorded exception. - """ - - @validate_custom_events(add_token_count_to_events(chat_completion_invalid_access_key_error_events)) + @validate_custom_events(events_sans_content(chat_completion_invalid_model_error_events)) @validate_error_trace_attributes( - _client_error_name, + "botocore.errorfactory:ValidationException", exact_attrs={ "agent": {}, "intrinsic": {}, "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", + "http.statusCode": 400, + "error.message": "The provided model identifier is invalid.", + "error.code": "ValidationException", }, }, ) @validate_transaction_metrics( - name="test_bedrock_chat_completion_incorrect_access_key_with_token_count", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + name="test_bedrock_chat_completion_error_invalid_model_no_content", + scoped_metrics=[expected_metric], + rollup_metrics=[expected_metric], custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], background_task=True, ) - @background_task(name="test_bedrock_chat_completion_incorrect_access_key_with_token_count") + @background_task(name="test_bedrock_chat_completion_error_invalid_model_no_content") def _test(): - monkeypatch.setattr(bedrock_converse_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - message = [{"role": "user", "content": [{"text": "Invalid Token"}]}] - - response = bedrock_converse_server.converse( - modelId="anthropic.claude-3-sonnet-20240229-v1:0", - messages=message, - inferenceConfig={"temperature": 0.7, "maxTokens": 100}, - ) + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + add_custom_attribute("llm.foo", "bar") + add_custom_attribute("non_llm_attr", "python-agent") - assert response + exercise_converse_invalid_model() _test() diff --git a/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py b/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py index 4422685b9f..9acb0e8ed2 100644 --- a/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py +++ b/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py @@ -14,13 +14,13 @@ import json import os from io import BytesIO +from pprint import pformat import boto3 -import botocore.errorfactory import botocore.eventstream import botocore.exceptions import pytest -from _test_bedrock_chat_completion import ( +from _test_bedrock_chat_completion_invoke_model import ( chat_completion_expected_events, chat_completion_expected_malformed_request_body_events, chat_completion_expected_malformed_response_body_events, @@ -816,7 +816,12 @@ def test_bedrock_chat_completion_functions_marked_as_wrapped_for_sdk_compatibili def test_chat_models_instrumented(): - SUPPORTED_MODELS = [model for model, _, _, _ in MODEL_EXTRACTORS if "embed" not in model] + def _is_supported_model(model): + supported_models = [model for model, _, _, _ in MODEL_EXTRACTORS if "embed" not in model] + for supported_model in supported_models: + if supported_model in model: + return True + return False _id = os.environ.get("AWS_ACCESS_KEY_ID") key = os.environ.get("AWS_SECRET_ACCESS_KEY") @@ -826,10 +831,6 @@ def test_chat_models_instrumented(): client = boto3.client("bedrock", "us-east-1") response = client.list_foundation_models(byOutputModality="TEXT") models = [model["modelId"] for model in response["modelSummaries"]] - not_supported = [] - for model in models: - is_supported = any(model.startswith(supported_model) for supported_model in SUPPORTED_MODELS) - if not is_supported: - not_supported.append(model) + not_supported = [model for model in models if not _is_supported_model(model)] - assert not not_supported, f"The following unsupported models were found: {not_supported}" + assert not not_supported, f"The following unsupported models were found: {pformat(not_supported)}" diff --git a/tests/external_botocore/test_bedrock_chat_completion_via_langchain.py b/tests/external_botocore/test_bedrock_chat_completion_via_langchain.py index 82537cd10a..b25516cd5b 100644 --- a/tests/external_botocore/test_bedrock_chat_completion_via_langchain.py +++ b/tests/external_botocore/test_bedrock_chat_completion_via_langchain.py @@ -13,7 +13,7 @@ # limitations under the License. import pytest -from _test_bedrock_chat_completion import ( +from _test_bedrock_chat_completion_invoke_model import ( chat_completion_langchain_expected_events, chat_completion_langchain_expected_streaming_events, ) diff --git a/tests/external_botocore/test_bedrock_embeddings.py b/tests/external_botocore/test_bedrock_embeddings.py index 417e24b2d9..36a5db6619 100644 --- a/tests/external_botocore/test_bedrock_embeddings.py +++ b/tests/external_botocore/test_bedrock_embeddings.py @@ -14,6 +14,7 @@ import json import os from io import BytesIO +from pprint import pformat import boto3 import botocore.exceptions @@ -409,7 +410,12 @@ def _test(): def test_embedding_models_instrumented(): - SUPPORTED_MODELS = [model for model, _, _, _ in MODEL_EXTRACTORS if "embed" in model] + def _is_supported_model(model): + supported_models = [model for model, _, _, _ in MODEL_EXTRACTORS if "embed" in model] + for supported_model in supported_models: + if supported_model in model: + return True + return False _id = os.environ.get("AWS_ACCESS_KEY_ID") key = os.environ.get("AWS_SECRET_ACCESS_KEY") @@ -419,10 +425,6 @@ def test_embedding_models_instrumented(): client = boto3.client("bedrock", "us-east-1") response = client.list_foundation_models(byOutputModality="EMBEDDING") models = [model["modelId"] for model in response["modelSummaries"]] - not_supported = [] - for model in models: - is_supported = any(model.startswith(supported_model) for supported_model in SUPPORTED_MODELS) - if not is_supported: - not_supported.append(model) + not_supported = [model for model in models if not _is_supported_model(model)] - assert not not_supported, f"The following unsupported models were found: {not_supported}" + assert not not_supported, f"The following unsupported models were found: {pformat(not_supported)}" diff --git a/tests/testing_support/validators/validate_custom_event.py b/tests/testing_support/validators/validate_custom_event.py index deeef7fb25..5e3eb65b74 100644 --- a/tests/testing_support/validators/validate_custom_event.py +++ b/tests/testing_support/validators/validate_custom_event.py @@ -13,6 +13,7 @@ # limitations under the License. import time +from pprint import pformat from newrelic.common.object_wrapper import function_wrapper from testing_support.fixtures import core_application_stats_engine @@ -61,7 +62,9 @@ def _validate_custom_event_count(wrapped, instance, args, kwargs): raise else: stats = core_application_stats_engine(None) - assert stats.custom_events.num_samples == count + assert stats.custom_events.num_samples == count, ( + f"Expected: {count}, Got: {stats.custom_events.num_samples}\nEvents: {pformat(list(stats.custom_events))}" + ) return result diff --git a/tests/testing_support/validators/validate_custom_events.py b/tests/testing_support/validators/validate_custom_events.py index 8a1bad4342..e3f1c1a15a 100644 --- a/tests/testing_support/validators/validate_custom_events.py +++ b/tests/testing_support/validators/validate_custom_events.py @@ -14,6 +14,7 @@ import copy import time +from pprint import pformat from newrelic.common.object_wrapper import function_wrapper, transient_function_wrapper from testing_support.fixtures import catch_background_exceptions @@ -100,8 +101,8 @@ def _check_event_attributes(expected, captured, mismatches): def _event_details(matching_custom_events, captured, mismatches): details = [ f"matching_custom_events={matching_custom_events}", - f"mismatches={mismatches}", - f"captured_events={captured}", + f"mismatches={pformat(mismatches)}", + f"captured_events={pformat(captured)}", ] return "\n".join(details) From fa7f3ca213aa71dfc7ed660ae3b8780f1c096caf Mon Sep 17 00:00:00 2001 From: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com> Date: Wed, 19 Nov 2025 15:07:14 -0800 Subject: [PATCH 15/34] Add new Redis methods (#1588) * Add new Redis methods * Add RedisCluster methods to ignore list --- newrelic/hooks/datastore_redis.py | 2 ++ .../test_uninstrumented_rediscluster_methods.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/newrelic/hooks/datastore_redis.py b/newrelic/hooks/datastore_redis.py index 0888f4a4b3..af14746692 100644 --- a/newrelic/hooks/datastore_redis.py +++ b/newrelic/hooks/datastore_redis.py @@ -278,6 +278,7 @@ "hsetnx", "hstrlen", "hvals", + "hybrid_search", "incr", "incrby", "incrbyfloat", @@ -325,6 +326,7 @@ "mrange", "mrevrange", "mset", + "msetex", "msetnx", "numincrby", "object_encoding", diff --git a/tests/datastore_rediscluster/test_uninstrumented_rediscluster_methods.py b/tests/datastore_rediscluster/test_uninstrumented_rediscluster_methods.py index 3f2a258355..c926a2ae21 100644 --- a/tests/datastore_rediscluster/test_uninstrumented_rediscluster_methods.py +++ b/tests/datastore_rediscluster/test_uninstrumented_rediscluster_methods.py @@ -118,10 +118,14 @@ "get_node", "get_node_from_key", "get_nodes", + "get_nodes_from_slot", "get_primaries", "get_random_node", + "get_random_primary_node", + "get_random_primary_or_all_nodes", "get_redis_connection", "get_replicas", + "get_special_nodes", "keyslot", "mget_nonatomic", "monitor", From 4f5ef0d5c030e53d907ac90b0fb7cb8f9b034dba Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 24 Nov 2025 17:06:14 +0000 Subject: [PATCH 16/34] Bump the github_actions group with 3 updates (#1591) Bumps the github_actions group with 3 updates: [actions/checkout](https://github.com/actions/checkout), [astral-sh/setup-uv](https://github.com/astral-sh/setup-uv) and [github/codeql-action](https://github.com/github/codeql-action). Updates `actions/checkout` from 5.0.1 to 6.0.0 - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/93cb6efe18208431cddfb8368fd83d5badbf9bfd...1af3b93b6815bc44a9784bd300feb67ff0d1eeb3) Updates `astral-sh/setup-uv` from 7.1.3 to 7.1.4 - [Release notes](https://github.com/astral-sh/setup-uv/releases) - [Commits](https://github.com/astral-sh/setup-uv/compare/5a7eac68fb9809dea845d802897dc5c723910fa3...1e862dfacbd1d6d858c55d9b792c756523627244) Updates `github/codeql-action` from 4.31.3 to 4.31.5 - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/014f16e7ab1402f30e7c3329d33797e7948572db...fdbfb4d2750291e159f0156def62b853c2798ca2) --- updated-dependencies: - dependency-name: actions/checkout dependency-version: 6.0.0 dependency-type: direct:production update-type: version-update:semver-major dependency-group: github_actions - dependency-name: astral-sh/setup-uv dependency-version: 7.1.4 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: github_actions - dependency-name: github/codeql-action dependency-version: 4.31.5 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: github_actions ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/addlicense.yml | 2 +- .github/workflows/benchmarks.yml | 2 +- .github/workflows/build-ci-image.yml | 2 +- .github/workflows/deploy.yml | 4 +- .github/workflows/mega-linter.yml | 2 +- .github/workflows/tests.yml | 58 ++++++++++++++-------------- .github/workflows/trivy.yml | 4 +- 7 files changed, 37 insertions(+), 37 deletions(-) diff --git a/.github/workflows/addlicense.yml b/.github/workflows/addlicense.yml index 83e5b29ef4..171cbf7f59 100644 --- a/.github/workflows/addlicense.yml +++ b/.github/workflows/addlicense.yml @@ -39,7 +39,7 @@ jobs: permissions: contents: read steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index a65695e7c4..77e0537925 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -38,7 +38,7 @@ jobs: BASE_SHA: ${{ github.event.pull_request.base.sha }} steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 with: fetch-depth: 0 diff --git a/.github/workflows/build-ci-image.yml b/.github/workflows/build-ci-image.yml index 061233b6dd..dd3833d79c 100644 --- a/.github/workflows/build-ci-image.yml +++ b/.github/workflows/build-ci-image.yml @@ -43,7 +43,7 @@ jobs: name: Docker Build ${{ matrix.platform }} steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 with: persist-credentials: false fetch-depth: 0 diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index af4739f2a3..a91dae3061 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -69,7 +69,7 @@ jobs: runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 with: persist-credentials: false fetch-depth: 0 @@ -109,7 +109,7 @@ jobs: runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 with: persist-credentials: false fetch-depth: 0 diff --git a/.github/workflows/mega-linter.yml b/.github/workflows/mega-linter.yml index 0f869f3b58..99b010c0d6 100644 --- a/.github/workflows/mega-linter.yml +++ b/.github/workflows/mega-linter.yml @@ -45,7 +45,7 @@ jobs: steps: # Git Checkout - name: Checkout Code - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 with: token: ${{ secrets.PAT || secrets.GITHUB_TOKEN }} fetch-depth: 0 # Required for pushing commits to PRs diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 9e47302bd4..70bdc8c6c5 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -93,7 +93,7 @@ jobs: - tests steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # 6.0.0 with: python-version: "3.13" @@ -127,7 +127,7 @@ jobs: - tests steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # 6.0.0 with: python-version: "3.13" @@ -166,7 +166,7 @@ jobs: --add-host=host.docker.internal:host-gateway timeout-minutes: 30 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | @@ -231,7 +231,7 @@ jobs: --add-host=host.docker.internal:host-gateway timeout-minutes: 30 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | @@ -294,14 +294,14 @@ jobs: runs-on: windows-2025 timeout-minutes: 30 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | git fetch --tags origin - name: Install uv - uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # 7.1.3 + uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # 7.1.4 - name: Install Python run: | @@ -363,14 +363,14 @@ jobs: runs-on: windows-11-arm timeout-minutes: 30 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | git fetch --tags origin - name: Install uv - uses: astral-sh/setup-uv@5a7eac68fb9809dea845d802897dc5c723910fa3 # 7.1.3 + uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # 7.1.4 - name: Install Python run: | @@ -443,7 +443,7 @@ jobs: --add-host=host.docker.internal:host-gateway timeout-minutes: 30 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | @@ -526,7 +526,7 @@ jobs: --health-retries 10 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | @@ -606,7 +606,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | @@ -687,7 +687,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | @@ -772,7 +772,7 @@ jobs: # from every being executed as bash commands. steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | @@ -837,7 +837,7 @@ jobs: --add-host=host.docker.internal:host-gateway timeout-minutes: 30 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | @@ -927,7 +927,7 @@ jobs: KAFKA_CFG_INTER_BROKER_LISTENER_NAME: L3 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | @@ -1005,7 +1005,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | @@ -1083,7 +1083,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | @@ -1161,7 +1161,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | @@ -1244,7 +1244,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | @@ -1327,7 +1327,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | @@ -1406,7 +1406,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | @@ -1487,7 +1487,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | @@ -1567,7 +1567,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | @@ -1647,7 +1647,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | @@ -1726,7 +1726,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | @@ -1804,7 +1804,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | @@ -1923,7 +1923,7 @@ jobs: --add-host=host.docker.internal:host-gateway steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | @@ -2003,7 +2003,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | @@ -2081,7 +2081,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - name: Fetch git tags run: | diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index e4b0e38c9c..614ec8903e 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -32,7 +32,7 @@ jobs: steps: # Git Checkout - name: Checkout Code - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # 5.0.1 + uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 with: token: ${{ secrets.PAT || secrets.GITHUB_TOKEN }} fetch-depth: 0 @@ -61,6 +61,6 @@ jobs: - name: Upload Trivy scan results to GitHub Security tab if: ${{ github.event_name == 'schedule' }} - uses: github/codeql-action/upload-sarif@014f16e7ab1402f30e7c3329d33797e7948572db # 4.31.3 + uses: github/codeql-action/upload-sarif@fdbfb4d2750291e159f0156def62b853c2798ca2 # 4.31.5 with: sarif_file: "trivy-results.sarif" From cb33db94e3e385e0fd36b7270b6d67d4e3e36247 Mon Sep 17 00:00:00 2001 From: sgoel-nr Date: Tue, 2 Dec 2025 02:55:50 +0530 Subject: [PATCH 17/34] LangChain: Fix message timestamps, add default role assignment, and Bedrock support (#1580) * Record the request message as the time the request started for LangChain. * Tracking the original timestamp of the request for input messages that are recorded as LlmChatCompletionMessage event types. * First pass at preserving LlmChatCompletionMessage timestamp for the request with Bedrock methods. * the `kwargs` was being mapped directly to the OpenAI client and having timestamp in there caused a problem. As a quick test, only add the request timestamp after the wrapped function has been invoked. * Moved the request timestamp to its own variable instead of part of kwargs. * OpenAI async request messages were not being assigned the correct timestamp. * Trying to improve the passing of the request timestamp through for Bedrock. * Passing too many parameters. * Set a default role on input/output messages within LangChain. * [MegaLinter] Apply linters fixes * Fix request_timestamp for LlmChatCompletionSummary table * Fix request_timestamp for LlmChatCompletionSummary table * [MegaLinter] Apply linters fixes * Bedrock Converse Streaming Support (#1565) * Add more formatting to custom event validatators * Add streamed responses to converse mock server * Add streaming fixtures for testing for converse * Rename other bedrock test files * Add tests for converse streaming * Instrument converse streaming * Move GeneratorProxy adjacent functions to mixin * Fix checking of supported models * Reorganize converse error tests * Port new converse botocore tests to aiobotocore * Instrument response streaming in aiobotocore converse * Fix suggestions from code review * Port in converse changes from strands PR * Delete commented code --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * Bedrock Converse Streaming Support (#1565) * Add more formatting to custom event validatators * Add streamed responses to converse mock server * Add streaming fixtures for testing for converse * Rename other bedrock test files * Add tests for converse streaming * Instrument converse streaming * Move GeneratorProxy adjacent functions to mixin * Fix checking of supported models * Reorganize converse error tests * Port new converse botocore tests to aiobotocore * Instrument response streaming in aiobotocore converse * Fix suggestions from code review * Port in converse changes from strands PR * Delete commented code --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> * [MegaLinter] Apply linters fixes * request_timestamp is now passed across different method * Fixed gemini model kwargs issue * [MegaLinter] Apply linters fixes * Update tests to validate presence of timestamp/ role and fix bugs in instrumentation. * Update aiobotocore instrumentation to receive request timestamp. --------- Co-authored-by: Josh Bonczkowski Co-authored-by: sgoel-nr <236423107+sgoel-nr@users.noreply.github.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> Co-authored-by: Timothy Pansino <11214426+TimPansino@users.noreply.github.com> Co-authored-by: Uma Annamalai --- newrelic/core/custom_event.py | 2 +- newrelic/hooks/external_aiobotocore.py | 6 +- newrelic/hooks/external_botocore.py | 101 ++++++++++++++---- newrelic/hooks/mlmodel_gemini.py | 34 ++++-- newrelic/hooks/mlmodel_langchain.py | 27 ++++- newrelic/hooks/mlmodel_openai.py | 64 ++++++++--- .../_test_bedrock_chat_completion_converse.py | 12 +++ ...st_bedrock_chat_completion_invoke_model.py | 93 ++++++++++++++++ tests/mlmodel_gemini/test_text_generation.py | 3 + .../test_text_generation_error.py | 6 ++ tests/mlmodel_langchain/test_chain.py | 54 ++++++++++ tests/mlmodel_openai/test_chat_completion.py | 4 + .../test_chat_completion_error.py | 10 ++ .../test_chat_completion_error_v1.py | 7 ++ .../test_chat_completion_stream.py | 4 + .../test_chat_completion_stream_error.py | 12 +++ .../test_chat_completion_stream_error_v1.py | 7 ++ .../test_chat_completion_stream_v1.py | 4 + .../mlmodel_openai/test_chat_completion_v1.py | 4 + 19 files changed, 405 insertions(+), 49 deletions(-) diff --git a/newrelic/core/custom_event.py b/newrelic/core/custom_event.py index 9bf5f75eda..c960a0afa2 100644 --- a/newrelic/core/custom_event.py +++ b/newrelic/core/custom_event.py @@ -141,7 +141,7 @@ def create_custom_event(event_type, params, settings=None, is_ml_event=False): ) return None - intrinsics = {"type": name, "timestamp": int(1000.0 * time.time())} + intrinsics = {"type": name, "timestamp": params.get("timestamp") or int(1000.0 * time.time())} event = [intrinsics, attributes] return event diff --git a/newrelic/hooks/external_aiobotocore.py b/newrelic/hooks/external_aiobotocore.py index 15daa7bd6d..1dbb2f2816 100644 --- a/newrelic/hooks/external_aiobotocore.py +++ b/newrelic/hooks/external_aiobotocore.py @@ -98,6 +98,7 @@ async def wrap_client__make_api_call(wrapped, instance, args, kwargs): response_extractor = getattr(instance, "_nr_response_extractor", None) stream_extractor = getattr(instance, "_nr_stream_extractor", None) response_streaming = getattr(instance, "_nr_response_streaming", False) + request_timestamp = getattr(instance, "_nr_request_timestamp", None) is_converse = getattr(instance, "_nr_is_converse", False) ft = getattr(instance, "_nr_ft", None) @@ -125,6 +126,7 @@ async def wrap_client__make_api_call(wrapped, instance, args, kwargs): transaction, bedrock_args, is_converse, + request_timestamp, ) raise @@ -187,7 +189,9 @@ async def wrap_client__make_api_call(wrapped, instance, args, kwargs): if ft: ft.__exit__(None, None, None) bedrock_attrs["duration"] = ft.duration * 1000 - run_bedrock_response_extractor(response_extractor, response_body, bedrock_attrs, is_embedding, transaction) + run_bedrock_response_extractor( + response_extractor, response_body, bedrock_attrs, is_embedding, transaction, request_timestamp + ) except Exception: _logger.warning(RESPONSE_PROCESSING_FAILURE_LOG_MESSAGE, exc_info=True) diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py index e00e50b770..d481ce8450 100644 --- a/newrelic/hooks/external_botocore.py +++ b/newrelic/hooks/external_botocore.py @@ -17,6 +17,7 @@ import logging import re import sys +import time import uuid from io import BytesIO @@ -193,6 +194,7 @@ def create_chat_completion_message_event( request_id, llm_metadata_dict, response_id=None, + request_timestamp=None, ): if not transaction: return @@ -227,6 +229,8 @@ def create_chat_completion_message_event( if settings.ai_monitoring.record_content.enabled: chat_completion_message_dict["content"] = content + if request_timestamp: + chat_completion_message_dict["timestamp"] = request_timestamp chat_completion_message_dict.update(llm_metadata_dict) @@ -266,6 +270,8 @@ def create_chat_completion_message_event( if settings.ai_monitoring.record_content.enabled: chat_completion_message_dict["content"] = content + if request_timestamp: + chat_completion_message_dict["timestamp"] = request_timestamp chat_completion_message_dict.update(llm_metadata_dict) @@ -542,10 +548,22 @@ def extract_bedrock_cohere_model_streaming_response(response_body, bedrock_attrs def handle_bedrock_exception( - exc, is_embedding, model, span_id, trace_id, request_extractor, request_body, ft, transaction, kwargs, is_converse + exc, + is_embedding, + model, + span_id, + trace_id, + request_extractor, + request_body, + ft, + transaction, + kwargs, + is_converse, + request_timestamp=None, ): try: bedrock_attrs = {"model": model, "span_id": span_id, "trace_id": trace_id} + if is_converse: try: input_message_list = [ @@ -589,12 +607,14 @@ def handle_bedrock_exception( if is_embedding: handle_embedding_event(transaction, error_attributes) else: - handle_chat_completion_event(transaction, error_attributes) + handle_chat_completion_event(transaction, error_attributes, request_timestamp) except Exception: _logger.warning(EXCEPTION_HANDLING_FAILURE_LOG_MESSAGE, exc_info=True) -def run_bedrock_response_extractor(response_extractor, response_body, bedrock_attrs, is_embedding, transaction): +def run_bedrock_response_extractor( + response_extractor, response_body, bedrock_attrs, is_embedding, transaction, request_timestamp=None +): # Run response extractor for non-streaming responses try: response_extractor(response_body, bedrock_attrs) @@ -604,7 +624,7 @@ def run_bedrock_response_extractor(response_extractor, response_body, bedrock_at if is_embedding: handle_embedding_event(transaction, bedrock_attrs) else: - handle_chat_completion_event(transaction, bedrock_attrs) + handle_chat_completion_event(transaction, bedrock_attrs, request_timestamp) def run_bedrock_request_extractor(request_extractor, request_body, bedrock_attrs): @@ -628,6 +648,8 @@ def _wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs): if not settings.ai_monitoring.enabled: return wrapped(*args, **kwargs) + request_timestamp = int(1000.0 * time.time()) + transaction.add_ml_model_info("Bedrock", BOTOCORE_VERSION) transaction._add_agent_attribute("llm", True) @@ -683,6 +705,7 @@ def _wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs): instance._nr_ft = ft instance._nr_response_streaming = response_streaming instance._nr_settings = settings + instance._nr_request_timestamp = request_timestamp # Add a bedrock flag to instance so we can determine when make_api_call instrumentation is hit from non-Bedrock paths and bypass it if so instance._nr_is_bedrock = True @@ -703,6 +726,7 @@ def _wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs): transaction, kwargs, is_converse=False, + request_timestamp=request_timestamp, ) raise @@ -733,6 +757,8 @@ def _wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs): run_bedrock_request_extractor(request_extractor, request_body, bedrock_attrs) try: + bedrock_attrs.pop("timestamp", None) # The request timestamp is only needed for request extraction + if response_streaming: # Wrap EventStream object here to intercept __iter__ method instead of instrumenting class. # This class is used in numerous other services in botocore, and would cause conflicts. @@ -748,7 +774,14 @@ def _wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs): bedrock_attrs["duration"] = ft.duration * 1000 response["body"] = StreamingBody(BytesIO(response_body), len(response_body)) - run_bedrock_response_extractor(response_extractor, response_body, bedrock_attrs, is_embedding, transaction) + run_bedrock_response_extractor( + response_extractor, + response_body, + bedrock_attrs, + is_embedding, + transaction, + request_timestamp=request_timestamp, + ) except Exception: _logger.warning(RESPONSE_PROCESSING_FAILURE_LOG_MESSAGE, exc_info=True) @@ -770,6 +803,8 @@ def _wrap_bedrock_runtime_converse(wrapped, instance, args, kwargs): if not settings.ai_monitoring.enabled: return wrapped(*args, **kwargs) + request_timestamp = int(1000.0 * time.time()) + transaction.add_ml_model_info("Bedrock", BOTOCORE_VERSION) transaction._add_agent_attribute("llm", True) @@ -800,6 +835,7 @@ def _wrap_bedrock_runtime_converse(wrapped, instance, args, kwargs): instance._nr_ft = ft instance._nr_response_streaming = response_streaming instance._nr_settings = settings + instance._nr_request_timestamp = request_timestamp instance._nr_is_converse = True # Add a bedrock flag to instance so we can determine when make_api_call instrumentation is hit from non-Bedrock paths and bypass it if so @@ -810,7 +846,18 @@ def _wrap_bedrock_runtime_converse(wrapped, instance, args, kwargs): response = wrapped(*args, **kwargs) except Exception as exc: handle_bedrock_exception( - exc, False, model, span_id, trace_id, request_extractor, {}, ft, transaction, kwargs, is_converse=True + exc, + False, + model, + span_id, + trace_id, + request_extractor, + {}, + ft, + transaction, + kwargs, + is_converse=True, + request_timestamp=request_timestamp, ) raise @@ -824,6 +871,7 @@ def _wrap_bedrock_runtime_converse(wrapped, instance, args, kwargs): response_headers = response.get("ResponseMetadata", {}).get("HTTPHeaders") or {} bedrock_attrs = extract_bedrock_converse_attrs(kwargs, response, response_headers, model, span_id, trace_id) + bedrock_attrs["timestamp"] = request_timestamp try: if response_streaming: @@ -838,7 +886,9 @@ def _wrap_bedrock_runtime_converse(wrapped, instance, args, kwargs): ft.__exit__(None, None, None) bedrock_attrs["duration"] = ft.duration * 1000 - run_bedrock_response_extractor(response_extractor, {}, bedrock_attrs, False, transaction) + run_bedrock_response_extractor( + response_extractor, {}, bedrock_attrs, False, transaction, request_timestamp=request_timestamp + ) except Exception: _logger.warning(RESPONSE_PROCESSING_FAILURE_LOG_MESSAGE, exc_info=True) @@ -888,7 +938,7 @@ def extract_bedrock_converse_attrs(kwargs, response, response_headers, model, sp class BedrockRecordEventMixin: - def record_events_on_stop_iteration(self, transaction): + def record_events_on_stop_iteration(self, transaction, request_timestamp=None): if hasattr(self, "_nr_ft"): bedrock_attrs = getattr(self, "_nr_bedrock_attrs", {}) self._nr_ft.__exit__(None, None, None) @@ -899,14 +949,14 @@ def record_events_on_stop_iteration(self, transaction): try: bedrock_attrs["duration"] = self._nr_ft.duration * 1000 - handle_chat_completion_event(transaction, bedrock_attrs) + handle_chat_completion_event(transaction, bedrock_attrs, request_timestamp) except Exception: _logger.warning(RESPONSE_PROCESSING_FAILURE_LOG_MESSAGE, exc_info=True) # Clear cached data as this can be very large. self._nr_bedrock_attrs.clear() - def record_error(self, transaction, exc): + def record_error(self, transaction, exc, request_timestamp=None): if hasattr(self, "_nr_ft"): try: ft = self._nr_ft @@ -929,24 +979,24 @@ def record_error(self, transaction, exc): ft.__exit__(*sys.exc_info()) error_attributes["duration"] = ft.duration * 1000 - handle_chat_completion_event(transaction, error_attributes) + handle_chat_completion_event(transaction, error_attributes, request_timestamp) # Clear cached data as this can be very large. error_attributes.clear() except Exception: _logger.warning(EXCEPTION_HANDLING_FAILURE_LOG_MESSAGE, exc_info=True) - def record_stream_chunk(self, event, transaction): + def record_stream_chunk(self, event, transaction, request_timestamp=None): if event: try: if getattr(self, "_nr_is_converse", False): return self.converse_record_stream_chunk(event, transaction) else: - return self.invoke_record_stream_chunk(event, transaction) + return self.invoke_record_stream_chunk(event, transaction, request_timestamp) except Exception: _logger.warning(RESPONSE_EXTRACTOR_FAILURE_LOG_MESSAGE, exc_info=True) - def invoke_record_stream_chunk(self, event, transaction): + def invoke_record_stream_chunk(self, event, transaction, request_timestamp=None): bedrock_attrs = getattr(self, "_nr_bedrock_attrs", {}) chunk = json.loads(event["chunk"]["bytes"].decode("utf-8")) self._nr_model_extractor(chunk, bedrock_attrs) @@ -954,7 +1004,7 @@ def invoke_record_stream_chunk(self, event, transaction): # So we need to call the record events here since stop iteration will not be raised. _type = chunk.get("type") if _type == "content_block_stop": - self.record_events_on_stop_iteration(transaction) + self.record_events_on_stop_iteration(transaction, request_timestamp) def converse_record_stream_chunk(self, event, transaction): bedrock_attrs = getattr(self, "_nr_bedrock_attrs", {}) @@ -984,6 +1034,7 @@ def __iter__(self): class GeneratorProxy(BedrockRecordEventMixin, ObjectProxy): def __init__(self, wrapped): super().__init__(wrapped) + self._nr_request_timestamp = int(1000.0 * time.time()) def __iter__(self): return self @@ -996,12 +1047,12 @@ def __next__(self): return_val = None try: return_val = self.__wrapped__.__next__() - self.record_stream_chunk(return_val, transaction) + self.record_stream_chunk(return_val, transaction, self._nr_request_timestamp) except StopIteration: - self.record_events_on_stop_iteration(transaction) + self.record_events_on_stop_iteration(transaction, self._nr_request_timestamp) raise except Exception as exc: - self.record_error(transaction, exc) + self.record_error(transaction, exc, self._nr_request_timestamp) raise return return_val @@ -1020,6 +1071,10 @@ def __aiter__(self): class AsyncGeneratorProxy(BedrockRecordEventMixin, ObjectProxy): + def __init__(self, wrapped): + super().__init__(wrapped) + self._nr_request_timestamp = int(1000.0 * time.time()) + def __aiter__(self): return self @@ -1030,12 +1085,12 @@ async def __anext__(self): return_val = None try: return_val = await self.__wrapped__.__anext__() - self.record_stream_chunk(return_val, transaction) + self.record_stream_chunk(return_val, transaction, self._nr_request_timestamp) except StopAsyncIteration: - self.record_events_on_stop_iteration(transaction) + self.record_events_on_stop_iteration(transaction, self._nr_request_timestamp) raise except Exception as exc: - self.record_error(transaction, exc) + self.record_error(transaction, exc, self._nr_request_timestamp) raise return return_val @@ -1084,7 +1139,7 @@ def handle_embedding_event(transaction, bedrock_attrs): transaction.record_custom_event("LlmEmbedding", embedding_dict) -def handle_chat_completion_event(transaction, bedrock_attrs): +def handle_chat_completion_event(transaction, bedrock_attrs, request_timestamp=None): chat_completion_id = str(uuid.uuid4()) # Grab LLM-related custom attributes off of the transaction to store as metadata on LLM events custom_attrs_dict = transaction._custom_params @@ -1128,6 +1183,7 @@ def handle_chat_completion_event(transaction, bedrock_attrs): "response.number_of_messages": number_of_messages, "response.choices.finish_reason": bedrock_attrs.get("response.choices.finish_reason", None), "error": bedrock_attrs.get("error", None), + "timestamp": request_timestamp or None, } chat_completion_summary_dict.update(llm_metadata_dict) chat_completion_summary_dict = {k: v for k, v in chat_completion_summary_dict.items() if v is not None} @@ -1144,6 +1200,7 @@ def handle_chat_completion_event(transaction, bedrock_attrs): request_id=request_id, llm_metadata_dict=llm_metadata_dict, response_id=response_id, + request_timestamp=request_timestamp, ) diff --git a/newrelic/hooks/mlmodel_gemini.py b/newrelic/hooks/mlmodel_gemini.py index 8aeb1355d0..6fffbebb47 100644 --- a/newrelic/hooks/mlmodel_gemini.py +++ b/newrelic/hooks/mlmodel_gemini.py @@ -14,6 +14,7 @@ import logging import sys +import time import uuid import google @@ -226,6 +227,7 @@ def wrap_generate_content_sync(wrapped, instance, args, kwargs): transaction._add_agent_attribute("llm", True) completion_id = str(uuid.uuid4()) + request_timestamp = int(1000.0 * time.time()) ft = FunctionTrace(name=wrapped.__name__, group="Llm/completion/Gemini") ft.__enter__() @@ -236,12 +238,12 @@ def wrap_generate_content_sync(wrapped, instance, args, kwargs): except Exception as exc: # In error cases, exit the function trace in _record_generation_error before recording the LLM error event so # that the duration is calculated correctly. - _record_generation_error(transaction, linking_metadata, completion_id, kwargs, ft, exc) + _record_generation_error(transaction, linking_metadata, completion_id, kwargs, ft, exc, request_timestamp) raise ft.__exit__(None, None, None) - _handle_generation_success(transaction, linking_metadata, completion_id, kwargs, ft, return_val) + _handle_generation_success(transaction, linking_metadata, completion_id, kwargs, ft, return_val, request_timestamp) return return_val @@ -260,6 +262,7 @@ async def wrap_generate_content_async(wrapped, instance, args, kwargs): transaction._add_agent_attribute("llm", True) completion_id = str(uuid.uuid4()) + request_timestamp = int(1000.0 * time.time()) ft = FunctionTrace(name=wrapped.__name__, group="Llm/completion/Gemini") ft.__enter__() @@ -269,17 +272,17 @@ async def wrap_generate_content_async(wrapped, instance, args, kwargs): except Exception as exc: # In error cases, exit the function trace in _record_generation_error before recording the LLM error event so # that the duration is calculated correctly. - _record_generation_error(transaction, linking_metadata, completion_id, kwargs, ft, exc) + _record_generation_error(transaction, linking_metadata, completion_id, kwargs, ft, exc, request_timestamp) raise ft.__exit__(None, None, None) - _handle_generation_success(transaction, linking_metadata, completion_id, kwargs, ft, return_val) + _handle_generation_success(transaction, linking_metadata, completion_id, kwargs, ft, return_val, request_timestamp) return return_val -def _record_generation_error(transaction, linking_metadata, completion_id, kwargs, ft, exc): +def _record_generation_error(transaction, linking_metadata, completion_id, kwargs, ft, exc, request_timestamp=None): span_id = linking_metadata.get("span.id") trace_id = linking_metadata.get("trace.id") @@ -339,6 +342,7 @@ def _record_generation_error(transaction, linking_metadata, completion_id, kwarg "ingest_source": "Python", "duration": ft.duration * 1000, "error": True, + "timestamp": request_timestamp, } llm_metadata = _get_llm_attributes(transaction) error_chat_completion_dict.update(llm_metadata) @@ -357,12 +361,15 @@ def _record_generation_error(transaction, linking_metadata, completion_id, kwarg request_model, llm_metadata, output_message_list, + request_timestamp, ) except Exception: _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, exc_info=True) -def _handle_generation_success(transaction, linking_metadata, completion_id, kwargs, ft, return_val): +def _handle_generation_success( + transaction, linking_metadata, completion_id, kwargs, ft, return_val, request_timestamp=None +): if not return_val: return @@ -370,13 +377,17 @@ def _handle_generation_success(transaction, linking_metadata, completion_id, kwa # Response objects are pydantic models so this function call converts the response into a dict response = return_val.model_dump() if hasattr(return_val, "model_dump") else return_val - _record_generation_success(transaction, linking_metadata, completion_id, kwargs, ft, response) + _record_generation_success( + transaction, linking_metadata, completion_id, kwargs, ft, response, request_timestamp + ) except Exception: _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, exc_info=True) -def _record_generation_success(transaction, linking_metadata, completion_id, kwargs, ft, response): +def _record_generation_success( + transaction, linking_metadata, completion_id, kwargs, ft, response, request_timestamp=None +): span_id = linking_metadata.get("span.id") trace_id = linking_metadata.get("trace.id") try: @@ -436,6 +447,7 @@ def _record_generation_success(transaction, linking_metadata, completion_id, kwa # message This value should be 2 in almost all cases since we will report a summary event for each # separate request (every input and output from the LLM) "response.number_of_messages": 1 + len(output_message_list), + "timestamp": request_timestamp, } llm_metadata = _get_llm_attributes(transaction) @@ -452,6 +464,7 @@ def _record_generation_success(transaction, linking_metadata, completion_id, kwa request_model, llm_metadata, output_message_list, + request_timestamp, ) except Exception: _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, exc_info=True) @@ -467,6 +480,7 @@ def create_chat_completion_message_event( request_model, llm_metadata, output_message_list, + request_timestamp=None, ): try: settings = transaction.settings or global_settings() @@ -510,6 +524,8 @@ def create_chat_completion_message_event( if settings.ai_monitoring.record_content.enabled: chat_completion_input_message_dict["content"] = input_message_content + if request_timestamp: + chat_completion_input_message_dict["timestamp"] = request_timestamp chat_completion_input_message_dict.update(llm_metadata) @@ -548,6 +564,8 @@ def create_chat_completion_message_event( if settings.ai_monitoring.record_content.enabled: chat_completion_output_message_dict["content"] = message_content + if request_timestamp: + chat_completion_output_message_dict["timestamp"] = request_timestamp chat_completion_output_message_dict.update(llm_metadata) diff --git a/newrelic/hooks/mlmodel_langchain.py b/newrelic/hooks/mlmodel_langchain.py index cfcc031e9d..318e1313a7 100644 --- a/newrelic/hooks/mlmodel_langchain.py +++ b/newrelic/hooks/mlmodel_langchain.py @@ -14,6 +14,7 @@ import logging import sys +import time import traceback import uuid @@ -549,6 +550,7 @@ async def wrap_chain_async_run(wrapped, instance, args, kwargs): transaction._add_agent_attribute("llm", True) run_args = bind_args(wrapped, args, kwargs) + run_args["timestamp"] = int(1000.0 * time.time()) completion_id = str(uuid.uuid4()) add_nr_completion_id(run_args, completion_id) # Check to see if launched from agent or directly from chain. @@ -593,6 +595,7 @@ def wrap_chain_sync_run(wrapped, instance, args, kwargs): transaction._add_agent_attribute("llm", True) run_args = bind_args(wrapped, args, kwargs) + run_args["timestamp"] = int(1000.0 * time.time()) completion_id = str(uuid.uuid4()) add_nr_completion_id(run_args, completion_id) # Check to see if launched from agent or directly from chain. @@ -658,12 +661,21 @@ def _create_error_chain_run_events(transaction, instance, run_args, completion_i "response.number_of_messages": len(input_message_list), "tags": tags, "error": True, + "timestamp": run_args.get("timestamp") or None, } ) full_chat_completion_summary_dict.update(llm_metadata_dict) transaction.record_custom_event("LlmChatCompletionSummary", full_chat_completion_summary_dict) create_chat_completion_message_event( - transaction, input_message_list, completion_id, span_id, trace_id, run_id, llm_metadata_dict, [] + transaction, + input_message_list, + completion_id, + span_id, + trace_id, + run_id, + llm_metadata_dict, + [], + run_args["timestamp"] or None, ) @@ -728,8 +740,13 @@ def _create_successful_chain_run_events( "duration": duration, "response.number_of_messages": len(input_message_list) + len(output_message_list), "tags": tags, + "timestamp": run_args.get("timestamp") or None, } ) + + if run_args.get("timestamp"): + full_chat_completion_summary_dict["timestamp"] = run_args.get("timestamp") + full_chat_completion_summary_dict.update(llm_metadata_dict) transaction.record_custom_event("LlmChatCompletionSummary", full_chat_completion_summary_dict) create_chat_completion_message_event( @@ -741,6 +758,7 @@ def _create_successful_chain_run_events( run_id, llm_metadata_dict, output_message_list, + run_args["timestamp"] or None, ) @@ -753,6 +771,7 @@ def create_chat_completion_message_event( run_id, llm_metadata_dict, output_message_list, + request_timestamp=None, ): settings = transaction.settings if transaction.settings is not None else global_settings() @@ -768,9 +787,12 @@ def create_chat_completion_message_event( "vendor": "langchain", "ingest_source": "Python", "virtual_llm": True, + "role": "user", # default role for input messages, overridden by values in llm_metadata_dict } if settings.ai_monitoring.record_content.enabled: chat_completion_input_message_dict["content"] = message + if request_timestamp: + chat_completion_input_message_dict["timestamp"] = request_timestamp chat_completion_input_message_dict.update(llm_metadata_dict) transaction.record_custom_event("LlmChatCompletionMessage", chat_completion_input_message_dict) @@ -791,9 +813,12 @@ def create_chat_completion_message_event( "ingest_source": "Python", "is_response": True, "virtual_llm": True, + "role": "assistant", # default role for output messages, overridden by values in llm_metadata_dict } if settings.ai_monitoring.record_content.enabled: chat_completion_output_message_dict["content"] = message + if request_timestamp: + chat_completion_output_message_dict["timestamp"] = request_timestamp chat_completion_output_message_dict.update(llm_metadata_dict) transaction.record_custom_event("LlmChatCompletionMessage", chat_completion_output_message_dict) diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py index c3f7960b6e..59f7060394 100644 --- a/newrelic/hooks/mlmodel_openai.py +++ b/newrelic/hooks/mlmodel_openai.py @@ -15,6 +15,7 @@ import json import logging import sys +import time import traceback import uuid @@ -84,6 +85,8 @@ def wrap_chat_completion_sync(wrapped, instance, args, kwargs): if (kwargs.get("extra_headers") or {}).get("X-Stainless-Raw-Response") == "stream": return wrapped(*args, **kwargs) + request_timestamp = int(1000.0 * time.time()) + settings = transaction.settings if transaction.settings is not None else global_settings() if not settings.ai_monitoring.enabled: return wrapped(*args, **kwargs) @@ -100,9 +103,10 @@ def wrap_chat_completion_sync(wrapped, instance, args, kwargs): try: return_val = wrapped(*args, **kwargs) except Exception as exc: - _record_completion_error(transaction, linking_metadata, completion_id, kwargs, ft, exc) + _record_completion_error(transaction, linking_metadata, completion_id, kwargs, ft, exc, request_timestamp) raise - _handle_completion_success(transaction, linking_metadata, completion_id, kwargs, ft, return_val) + + _handle_completion_success(transaction, linking_metadata, completion_id, kwargs, ft, return_val, request_timestamp) return return_val @@ -134,6 +138,7 @@ def create_chat_completion_message_event( request_id, llm_metadata, output_message_list, + request_timestamp=None, ): settings = transaction.settings if transaction.settings is not None else global_settings() @@ -168,6 +173,8 @@ def create_chat_completion_message_event( if settings.ai_monitoring.record_content.enabled: chat_completion_input_message_dict["content"] = message_content + if request_timestamp: + chat_completion_input_message_dict["timestamp"] = request_timestamp chat_completion_input_message_dict.update(llm_metadata) @@ -209,6 +216,8 @@ def create_chat_completion_message_event( if settings.ai_monitoring.record_content.enabled: chat_completion_output_message_dict["content"] = message_content + if request_timestamp: + chat_completion_output_message_dict["timestamp"] = request_timestamp chat_completion_output_message_dict.update(llm_metadata) @@ -403,6 +412,8 @@ async def wrap_chat_completion_async(wrapped, instance, args, kwargs): if (kwargs.get("extra_headers") or {}).get("X-Stainless-Raw-Response") == "stream": return await wrapped(*args, **kwargs) + request_timestamp = int(1000.0 * time.time()) + settings = transaction.settings if transaction.settings is not None else global_settings() if not settings.ai_monitoring.enabled: return await wrapped(*args, **kwargs) @@ -419,14 +430,16 @@ async def wrap_chat_completion_async(wrapped, instance, args, kwargs): try: return_val = await wrapped(*args, **kwargs) except Exception as exc: - _record_completion_error(transaction, linking_metadata, completion_id, kwargs, ft, exc) + _record_completion_error(transaction, linking_metadata, completion_id, kwargs, ft, exc, request_timestamp) raise - _handle_completion_success(transaction, linking_metadata, completion_id, kwargs, ft, return_val) + _handle_completion_success(transaction, linking_metadata, completion_id, kwargs, ft, return_val, request_timestamp) return return_val -def _handle_completion_success(transaction, linking_metadata, completion_id, kwargs, ft, return_val): +def _handle_completion_success( + transaction, linking_metadata, completion_id, kwargs, ft, return_val, request_timestamp=None +): settings = transaction.settings if transaction.settings is not None else global_settings() stream = kwargs.get("stream", False) # Only if streaming and streaming monitoring is enabled and the response is not empty @@ -469,12 +482,16 @@ def _handle_completion_success(transaction, linking_metadata, completion_id, kwa # openai._legacy_response.LegacyAPIResponse response = json.loads(response.http_response.text.strip()) - _record_completion_success(transaction, linking_metadata, completion_id, kwargs, ft, response_headers, response) + _record_completion_success( + transaction, linking_metadata, completion_id, kwargs, ft, response_headers, response, request_timestamp + ) except Exception: _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info())) -def _record_completion_success(transaction, linking_metadata, completion_id, kwargs, ft, response_headers, response): +def _record_completion_success( + transaction, linking_metadata, completion_id, kwargs, ft, response_headers, response, request_timestamp=None +): span_id = linking_metadata.get("span.id") trace_id = linking_metadata.get("trace.id") try: @@ -552,6 +569,7 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa response_headers, "x-ratelimit-remaining-tokens_usage_based", True ), "response.number_of_messages": len(input_message_list) + len(output_message_list), + "timestamp": request_timestamp, } llm_metadata = _get_llm_attributes(transaction) full_chat_completion_summary_dict.update(llm_metadata) @@ -569,12 +587,13 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa request_id, llm_metadata, output_message_list, + request_timestamp, ) except Exception: _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info())) -def _record_completion_error(transaction, linking_metadata, completion_id, kwargs, ft, exc): +def _record_completion_error(transaction, linking_metadata, completion_id, kwargs, ft, exc, request_timestamp=None): span_id = linking_metadata.get("span.id") trace_id = linking_metadata.get("trace.id") request_message_list = kwargs.get("messages", None) or [] @@ -635,6 +654,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg "response.organization": exc_organization, "duration": ft.duration * 1000, "error": True, + "timestamp": request_timestamp, } llm_metadata = _get_llm_attributes(transaction) error_chat_completion_dict.update(llm_metadata) @@ -655,6 +675,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg request_id, llm_metadata, output_message_list, + request_timestamp, ) except Exception: _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info())) @@ -719,6 +740,7 @@ async def wrap_base_client_process_response_async(wrapped, instance, args, kwarg class GeneratorProxy(ObjectProxy): def __init__(self, wrapped): super().__init__(wrapped) + self._nr_request_timestamp = int(1000.0 * time.time()) def __iter__(self): return self @@ -733,10 +755,10 @@ def __next__(self): return_val = self.__wrapped__.__next__() _record_stream_chunk(self, return_val) except StopIteration: - _record_events_on_stop_iteration(self, transaction) + _record_events_on_stop_iteration(self, transaction, self._nr_request_timestamp) raise except Exception as exc: - _handle_streaming_completion_error(self, transaction, exc) + _handle_streaming_completion_error(self, transaction, exc, self._nr_request_timestamp) raise return return_val @@ -770,7 +792,7 @@ def _record_stream_chunk(self, return_val): _logger.warning(STREAM_PARSING_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info())) -def _record_events_on_stop_iteration(self, transaction): +def _record_events_on_stop_iteration(self, transaction, request_timestamp=None): if hasattr(self, "_nr_ft"): # We first check for our saved linking metadata before making a new call to get_trace_linking_metadata # Directly calling get_trace_linking_metadata() causes the incorrect span ID to be captured and associated with the LLM call @@ -787,7 +809,14 @@ def _record_events_on_stop_iteration(self, transaction): completion_id = str(uuid.uuid4()) response_headers = openai_attrs.get("response_headers") or {} _record_completion_success( - transaction, linking_metadata, completion_id, openai_attrs, self._nr_ft, response_headers, None + transaction, + linking_metadata, + completion_id, + openai_attrs, + self._nr_ft, + response_headers, + None, + request_timestamp, ) except Exception: _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info())) @@ -802,7 +831,7 @@ def _record_events_on_stop_iteration(self, transaction): self._nr_openai_attrs.clear() -def _handle_streaming_completion_error(self, transaction, exc): +def _handle_streaming_completion_error(self, transaction, exc, request_timestamp=None): if hasattr(self, "_nr_ft"): openai_attrs = getattr(self, "_nr_openai_attrs", {}) @@ -812,12 +841,15 @@ def _handle_streaming_completion_error(self, transaction, exc): return linking_metadata = get_trace_linking_metadata() completion_id = str(uuid.uuid4()) - _record_completion_error(transaction, linking_metadata, completion_id, openai_attrs, self._nr_ft, exc) + _record_completion_error( + transaction, linking_metadata, completion_id, openai_attrs, self._nr_ft, exc, request_timestamp + ) class AsyncGeneratorProxy(ObjectProxy): def __init__(self, wrapped): super().__init__(wrapped) + self._nr_request_timestamp = int(1000.0 * time.time()) def __aiter__(self): self._nr_wrapped_iter = self.__wrapped__.__aiter__() @@ -833,10 +865,10 @@ async def __anext__(self): return_val = await self._nr_wrapped_iter.__anext__() _record_stream_chunk(self, return_val) except StopAsyncIteration: - _record_events_on_stop_iteration(self, transaction) + _record_events_on_stop_iteration(self, transaction, self._nr_request_timestamp) raise except Exception as exc: - _handle_streaming_completion_error(self, transaction, exc) + _handle_streaming_completion_error(self, transaction, exc, self._nr_request_timestamp) raise return return_val diff --git a/tests/external_botocore/_test_bedrock_chat_completion_converse.py b/tests/external_botocore/_test_bedrock_chat_completion_converse.py index cdec652292..7cde46faf8 100644 --- a/tests/external_botocore/_test_bedrock_chat_completion_converse.py +++ b/tests/external_botocore/_test_bedrock_chat_completion_converse.py @@ -20,6 +20,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -40,6 +41,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", @@ -58,6 +60,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", @@ -76,6 +79,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", @@ -98,6 +102,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -118,6 +123,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "f070b880-e0fb-4537-8093-796671c39239", @@ -136,6 +142,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "f070b880-e0fb-4537-8093-796671c39239", @@ -154,6 +161,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "f070b880-e0fb-4537-8093-796671c39239", @@ -176,6 +184,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -196,6 +205,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "e1206e19-2318-4a9d-be98-017c73f06118", @@ -216,6 +226,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "f4908827-3db9-4742-9103-2bbc34578b03", @@ -236,6 +247,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, diff --git a/tests/external_botocore/_test_bedrock_chat_completion_invoke_model.py b/tests/external_botocore/_test_bedrock_chat_completion_invoke_model.py index fd970b0603..f72b9fa583 100644 --- a/tests/external_botocore/_test_bedrock_chat_completion_invoke_model.py +++ b/tests/external_botocore/_test_bedrock_chat_completion_invoke_model.py @@ -31,6 +31,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -51,6 +52,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "48c7ee13-7790-461f-959f-04b0a4cf91c8", @@ -69,6 +71,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "48c7ee13-7790-461f-959f-04b0a4cf91c8", @@ -90,6 +93,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -110,6 +114,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "81508a1c-33a8-4294-8743-f0c629af2f49", @@ -128,6 +133,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "81508a1c-33a8-4294-8743-f0c629af2f49", @@ -149,6 +155,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -170,6 +177,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": "1234-0", + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "228ee63f-4eca-4b7d-b679-bc920de63525", @@ -188,6 +196,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": "1234-1", + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "228ee63f-4eca-4b7d-b679-bc920de63525", @@ -209,6 +218,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -229,6 +239,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "6a886158-b39f-46ce-b214-97458ab76f2f", @@ -247,6 +258,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "6a886158-b39f-46ce-b214-97458ab76f2f", @@ -268,6 +280,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -288,6 +301,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "ab38295d-df9c-4141-8173-38221651bf46", @@ -306,6 +320,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "ab38295d-df9c-4141-8173-38221651bf46", @@ -327,6 +342,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -348,6 +364,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "12912a17-aa13-45f3-914c-cc82166f3601", @@ -366,6 +383,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "12912a17-aa13-45f3-914c-cc82166f3601", @@ -387,6 +405,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -407,6 +426,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "a168214d-742d-4244-bd7f-62214ffa07df", @@ -425,6 +445,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "a168214d-742d-4244-bd7f-62214ffa07df", @@ -448,6 +469,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -468,6 +490,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "48c7ee13-7790-461f-959f-04b0a4cf91c8", @@ -486,6 +509,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "48c7ee13-7790-461f-959f-04b0a4cf91c8", @@ -507,6 +531,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -525,6 +550,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "884db5c9-18ab-4f27-8892-33656176a2e6", @@ -543,6 +569,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "884db5c9-18ab-4f27-8892-33656176a2e6", @@ -564,6 +591,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -581,6 +609,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "1a72a1f6-310f-469c-af1d-2c59eb600089", @@ -599,6 +628,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "1a72a1f6-310f-469c-af1d-2c59eb600089", @@ -620,6 +650,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -637,6 +668,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "e8fc1dd7-3d1e-42c6-9c58-535cae563bff", @@ -655,6 +687,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "e8fc1dd7-3d1e-42c6-9c58-535cae563bff", @@ -676,6 +709,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -694,6 +728,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "cce6b34c-812c-4f97-8885-515829aa9639", @@ -712,6 +747,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "cce6b34c-812c-4f97-8885-515829aa9639", @@ -735,6 +771,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -755,6 +792,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "48c7ee13-7790-461f-959f-04b0a4cf91c8", @@ -773,6 +811,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "48c7ee13-7790-461f-959f-04b0a4cf91c8", @@ -794,6 +833,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -812,6 +852,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "884db5c9-18ab-4f27-8892-33656176a2e6", @@ -830,6 +871,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "884db5c9-18ab-4f27-8892-33656176a2e6", @@ -851,6 +893,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -869,6 +912,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "1a72a1f6-310f-469c-af1d-2c59eb600089", @@ -887,6 +931,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "1a72a1f6-310f-469c-af1d-2c59eb600089", @@ -908,6 +953,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -926,6 +972,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "96c7306d-2d60-4629-83e9-dbd6befb0e4e", @@ -944,6 +991,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "96c7306d-2d60-4629-83e9-dbd6befb0e4e", @@ -965,6 +1013,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -983,6 +1032,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "cce6b34c-812c-4f97-8885-515829aa9639", @@ -1001,6 +1051,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "cce6b34c-812c-4f97-8885-515829aa9639", @@ -1025,6 +1076,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -1045,6 +1097,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "48c7ee13-7790-461f-959f-04b0a4cf91c8", @@ -1063,6 +1116,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "48c7ee13-7790-461f-959f-04b0a4cf91c8", @@ -1084,6 +1138,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -1104,6 +1159,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "request_id": "b427270f-371a-458d-81b6-a05aafb2704c", "span_id": None, "trace_id": "trace-id", @@ -1122,6 +1178,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "request_id": "b427270f-371a-458d-81b6-a05aafb2704c", "span_id": None, "trace_id": "trace-id", @@ -1143,6 +1200,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -1163,6 +1221,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "request_id": "a645548f-0b3a-47ce-a675-f51e6e9037de", "span_id": None, "trace_id": "trace-id", @@ -1181,6 +1240,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "request_id": "a645548f-0b3a-47ce-a675-f51e6e9037de", "span_id": None, "trace_id": "trace-id", @@ -1202,6 +1262,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -1221,6 +1282,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "1efe6197-80f9-43a6-89a5-bb536c1b822f", @@ -1239,6 +1301,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "1efe6197-80f9-43a6-89a5-bb536c1b822f", @@ -1260,6 +1323,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -1281,6 +1345,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "request_id": "4f8ab6c5-42d1-4e35-9573-30f9f41f821e", "span_id": None, "trace_id": "trace-id", @@ -1299,6 +1364,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "request_id": "4f8ab6c5-42d1-4e35-9573-30f9f41f821e", "span_id": None, "trace_id": "trace-id", @@ -1320,6 +1386,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -1340,6 +1407,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "request_id": "6dd99878-0919-4f92-850c-48f50f923b76", "span_id": None, "trace_id": "trace-id", @@ -1358,6 +1426,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "request_id": "6dd99878-0919-4f92-850c-48f50f923b76", "span_id": None, "trace_id": "trace-id", @@ -1381,6 +1450,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "f4908827-3db9-4742-9103-2bbc34578b03", @@ -1402,6 +1472,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -1422,6 +1493,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "48c7ee13-7790-461f-959f-04b0a4cf91c8", @@ -1442,6 +1514,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -1462,6 +1535,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "15b39c8b-8e85-42c9-9623-06720301bda3", @@ -1482,6 +1556,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -1502,6 +1577,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "9021791d-3797-493d-9277-e33aa6f6d544", @@ -1522,6 +1598,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -1542,6 +1619,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "37396f55-b721-4bae-9461-4c369f5a080d", @@ -1562,6 +1640,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -1582,6 +1661,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "282ba076-576f-46aa-a2e6-680392132e87", @@ -1602,6 +1682,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -1622,6 +1703,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "22476490-a0d6-42db-b5ea-32d0b8a7f751", @@ -1642,6 +1724,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -1662,6 +1745,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "22476490-a0d6-42db-b5ea-32d0b8a7f751", @@ -1685,6 +1769,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -1705,6 +1790,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -1724,6 +1810,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "81508a1c-33a8-4294-8743-f0c629af2f49", @@ -1745,6 +1832,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -1764,6 +1852,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "a5a8cebb-fd33-4437-8168-5667fbdfc1fb", @@ -1785,6 +1874,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -1804,6 +1894,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "a5a8cebb-fd33-4437-8168-5667fbdfc1fb", @@ -1826,6 +1917,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -1845,6 +1937,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, diff --git a/tests/mlmodel_gemini/test_text_generation.py b/tests/mlmodel_gemini/test_text_generation.py index faec66aa75..1c789f8197 100644 --- a/tests/mlmodel_gemini/test_text_generation.py +++ b/tests/mlmodel_gemini/test_text_generation.py @@ -37,6 +37,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -56,6 +57,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -73,6 +75,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, diff --git a/tests/mlmodel_gemini/test_text_generation_error.py b/tests/mlmodel_gemini/test_text_generation_error.py index 5e6f1c04de..eb8aec950f 100644 --- a/tests/mlmodel_gemini/test_text_generation_error.py +++ b/tests/mlmodel_gemini/test_text_generation_error.py @@ -42,6 +42,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -58,6 +59,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -145,6 +147,7 @@ def _test(): {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -162,6 +165,7 @@ def _test(): {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -246,6 +250,7 @@ def test_text_generation_invalid_request_error_invalid_model_chat(gemini_dev_cli {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "span_id": None, "trace_id": "trace-id", "duration": None, # Response time varies each test run @@ -262,6 +267,7 @@ def test_text_generation_invalid_request_error_invalid_model_chat(gemini_dev_cli {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "span_id": None, "trace_id": "trace-id", "content": "Invalid API key.", diff --git a/tests/mlmodel_langchain/test_chain.py b/tests/mlmodel_langchain/test_chain.py index a6b7470a9a..2f52f85504 100644 --- a/tests/mlmodel_langchain/test_chain.py +++ b/tests/mlmodel_langchain/test_chain.py @@ -65,6 +65,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -83,6 +84,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": None, @@ -93,6 +95,7 @@ "sequence": 0, "vendor": "langchain", "ingest_source": "Python", + "role": "user", "virtual_llm": True, }, ), @@ -103,6 +106,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -121,6 +125,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": None, @@ -131,6 +136,7 @@ "sequence": 0, "vendor": "langchain", "ingest_source": "Python", + "role": "user", "virtual_llm": True, }, ), @@ -140,6 +146,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -158,6 +165,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": None, @@ -168,6 +176,7 @@ "sequence": 0, "vendor": "langchain", "ingest_source": "Python", + "role": "user", "virtual_llm": True, }, ), @@ -175,6 +184,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": None, @@ -185,6 +195,7 @@ "sequence": 1, "vendor": "langchain", "ingest_source": "Python", + "role": "assistant", "is_response": True, "virtual_llm": True, }, @@ -195,6 +206,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -213,6 +225,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": None, @@ -223,6 +236,7 @@ "sequence": 0, "vendor": "langchain", "ingest_source": "Python", + "role": "user", "virtual_llm": True, }, ), @@ -230,6 +244,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": None, @@ -240,6 +255,7 @@ "sequence": 1, "vendor": "langchain", "ingest_source": "Python", + "role": "assistant", "is_response": True, "virtual_llm": True, }, @@ -250,6 +266,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -266,6 +283,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": None, @@ -276,6 +294,7 @@ "sequence": 0, "vendor": "langchain", "ingest_source": "Python", + "role": "user", "virtual_llm": True, }, ), @@ -283,6 +302,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": None, @@ -293,6 +313,7 @@ "sequence": 1, "vendor": "langchain", "ingest_source": "Python", + "role": "assistant", "is_response": True, "virtual_llm": True, }, @@ -303,6 +324,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -319,6 +341,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": None, @@ -329,6 +352,7 @@ "sequence": 0, "vendor": "langchain", "ingest_source": "Python", + "role": "user", "virtual_llm": True, }, ), @@ -336,6 +360,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": None, @@ -346,6 +371,7 @@ "sequence": 1, "vendor": "langchain", "ingest_source": "Python", + "role": "assistant", "is_response": True, "virtual_llm": True, }, @@ -430,6 +456,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "request_id": None, "span_id": None, "trace_id": "trace-id", @@ -438,6 +465,7 @@ "vendor": "langchain", "ingest_source": "Python", "is_response": True, + "role": "assistant", "virtual_llm": True, "content": "page_content='What is 2 + 4?'", }, @@ -446,6 +474,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, + "timestamp": None, "span_id": None, "trace_id": "trace-id", "request.model": "gpt-3.5-turbo", @@ -471,6 +500,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "request_id": None, "span_id": None, "trace_id": "trace-id", @@ -487,6 +517,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "request_id": None, "span_id": None, "trace_id": "trace-id", @@ -503,6 +534,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "request_id": None, "span_id": None, "trace_id": "trace-id", @@ -520,6 +552,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "request_id": None, "span_id": None, "trace_id": "trace-id", @@ -527,6 +560,7 @@ "sequence": 0, "vendor": "langchain", "ingest_source": "Python", + "role": "user", "virtual_llm": True, "content": "{'input': 'math', 'context': [Document(id='1234', metadata={}, page_content='What is 2 + 4?')]}", }, @@ -535,6 +569,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "request_id": None, "span_id": None, "trace_id": "trace-id", @@ -542,6 +577,7 @@ "sequence": 1, "vendor": "langchain", "ingest_source": "Python", + "role": "assistant", "is_response": True, "virtual_llm": True, "content": "```html\n\n\n\n Math Quiz\n\n\n

Math Quiz Questions

\n
    \n
  1. What is the result of 5 + 3?
  2. \n
      \n
    • A) 7
    • \n
    • B) 8
    • \n
    • C) 9
    • \n
    • D) 10
    • \n
    \n
  3. What is the product of 6 x 7?
  4. \n
      \n
    • A) 36
    • \n
    • B) 42
    • \n
    • C) 48
    • \n
    • D) 56
    • \n
    \n
  5. What is the square root of 64?
  6. \n
      \n
    • A) 6
    • \n
    • B) 7
    • \n
    • C) 8
    • \n
    • D) 9
    • \n
    \n
  7. What is the result of 12 / 4?
  8. \n
      \n
    • A) 2
    • \n
    • B) 3
    • \n
    • C) 4
    • \n
    • D) 5
    • \n
    \n
  9. What is the sum of 15 + 9?
  10. \n
      \n
    • A) 22
    • \n
    • B) 23
    • \n
    • C) 24
    • \n
    • D) 25
    • \n
    \n
\n\n\n```", @@ -551,6 +587,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "request_id": None, "span_id": None, "trace_id": "trace-id", @@ -558,6 +595,7 @@ "sequence": 1, "vendor": "langchain", "ingest_source": "Python", + "role": "assistant", "is_response": True, "virtual_llm": True, "content": "{'input': 'math', 'context': [Document(id='1234', metadata={}, page_content='What is 2 + 4?')], 'answer': '```html\\n\\n\\n\\n Math Quiz\\n\\n\\n

Math Quiz Questions

\\n
    \\n
  1. What is the result of 5 + 3?
  2. \\n
      \\n
    • A) 7
    • \\n
    • B) 8
    • \\n
    • C) 9
    • \\n
    • D) 10
    • \\n
    \\n
  3. What is the product of 6 x 7?
  4. \\n
      \\n
    • A) 36
    • \\n
    • B) 42
    • \\n
    • C) 48
    • \\n
    • D) 56
    • \\n
    \\n
  5. What is the square root of 64?
  6. \\n
      \\n
    • A) 6
    • \\n
    • B) 7
    • \\n
    • C) 8
    • \\n
    • D) 9
    • \\n
    \\n
  7. What is the result of 12 / 4?
  8. \\n
      \\n
    • A) 2
    • \\n
    • B) 3
    • \\n
    • C) 4
    • \\n
    • D) 5
    • \\n
    \\n
  9. What is the sum of 15 + 9?
  10. \\n
      \\n
    • A) 22
    • \\n
    • B) 23
    • \\n
    • C) 24
    • \\n
    • D) 25
    • \\n
    \\n
\\n\\n\\n```'}", @@ -570,6 +608,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -587,6 +626,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": None, @@ -597,6 +637,7 @@ "sequence": 0, "vendor": "langchain", "ingest_source": "Python", + "role": "user", "virtual_llm": True, }, ), @@ -604,6 +645,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": None, @@ -614,6 +656,7 @@ "sequence": 1, "vendor": "langchain", "ingest_source": "Python", + "role": "assistant", "is_response": True, "virtual_llm": True, }, @@ -624,6 +667,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -641,6 +685,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": None, @@ -651,6 +696,7 @@ "sequence": 0, "vendor": "langchain", "ingest_source": "Python", + "role": "user", "virtual_llm": True, }, ), @@ -658,6 +704,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": None, @@ -668,6 +715,7 @@ "sequence": 1, "vendor": "langchain", "ingest_source": "Python", + "role": "assistant", "is_response": True, "virtual_llm": True, }, @@ -679,6 +727,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -696,6 +745,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": None, @@ -706,6 +756,7 @@ "sequence": 0, "vendor": "langchain", "ingest_source": "Python", + "role": "user", "virtual_llm": True, }, ), @@ -716,6 +767,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -732,6 +784,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": None, @@ -742,6 +795,7 @@ "sequence": 0, "vendor": "langchain", "ingest_source": "Python", + "role": "user", "virtual_llm": True, }, ), diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py index 1f8cf1cb74..89208ab268 100644 --- a/tests/mlmodel_openai/test_chat_completion.py +++ b/tests/mlmodel_openai/test_chat_completion.py @@ -44,6 +44,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -72,6 +73,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-0", + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "49dbbffbd3c3f4612aa48def69059ccd", @@ -90,6 +92,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-1", + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "49dbbffbd3c3f4612aa48def69059ccd", @@ -108,6 +111,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-2", + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "49dbbffbd3c3f4612aa48def69059ccd", diff --git a/tests/mlmodel_openai/test_chat_completion_error.py b/tests/mlmodel_openai/test_chat_completion_error.py index bfb2267a33..79cc79d6db 100644 --- a/tests/mlmodel_openai/test_chat_completion_error.py +++ b/tests/mlmodel_openai/test_chat_completion_error.py @@ -45,6 +45,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -61,6 +62,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -76,6 +78,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -162,6 +165,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -179,6 +183,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -257,6 +262,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -274,6 +280,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -289,6 +296,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -338,6 +346,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info): {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "span_id": None, "trace_id": "trace-id", "duration": None, # Response time varies each test run @@ -354,6 +363,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info): {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "span_id": None, "trace_id": "trace-id", "content": "Invalid API key.", diff --git a/tests/mlmodel_openai/test_chat_completion_error_v1.py b/tests/mlmodel_openai/test_chat_completion_error_v1.py index 9be9fcab9c..848ad57add 100644 --- a/tests/mlmodel_openai/test_chat_completion_error_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_error_v1.py @@ -44,6 +44,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -60,6 +61,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -75,6 +77,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -205,6 +208,7 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -222,6 +226,7 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -369,6 +374,7 @@ def test_chat_completion_invalid_request_error_invalid_model_with_token_count_as {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "span_id": None, "trace_id": "trace-id", "duration": None, # Response time varies each test run @@ -385,6 +391,7 @@ def test_chat_completion_invalid_request_error_invalid_model_with_token_count_as {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "span_id": None, "trace_id": "trace-id", "content": "Invalid API key.", diff --git a/tests/mlmodel_openai/test_chat_completion_stream.py b/tests/mlmodel_openai/test_chat_completion_stream.py index ad89d6f260..55e8e8fbdb 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream.py +++ b/tests/mlmodel_openai/test_chat_completion_stream.py @@ -45,6 +45,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -73,6 +74,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-0", + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "49dbbffbd3c3f4612aa48def69059ccd", @@ -91,6 +93,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-1", + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "49dbbffbd3c3f4612aa48def69059ccd", @@ -109,6 +112,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-2", + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "49dbbffbd3c3f4612aa48def69059ccd", diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error.py b/tests/mlmodel_openai/test_chat_completion_stream_error.py index eebb5ee8fb..0fb0d06867 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_error.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_error.py @@ -45,6 +45,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -61,6 +62,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -76,6 +78,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -167,6 +170,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -184,6 +188,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -266,6 +271,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -283,6 +289,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -298,6 +305,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -352,6 +360,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info): {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "span_id": None, "trace_id": "trace-id", "duration": None, # Response time varies each test run @@ -368,6 +377,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info): {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "span_id": None, "trace_id": "trace-id", "content": "Invalid API key.", @@ -626,6 +636,7 @@ def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "span_id": None, "trace_id": "trace-id", "duration": None, # Response time varies each test run @@ -643,6 +654,7 @@ def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "span_id": None, "trace_id": "trace-id", "content": "Stream parsing error.", diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py b/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py index 5f769ea0e6..5d06dc2a28 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py @@ -45,6 +45,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -61,6 +62,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -76,6 +78,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -219,6 +222,7 @@ async def consumer(): {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -236,6 +240,7 @@ async def consumer(): {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "llm.conversation_id": "my-awesome-id", "span_id": None, "trace_id": "trace-id", @@ -392,6 +397,7 @@ async def consumer(): {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "span_id": None, "trace_id": "trace-id", "duration": None, # Response time varies each test run @@ -408,6 +414,7 @@ async def consumer(): {"type": "LlmChatCompletionMessage"}, { "id": None, + "timestamp": None, "span_id": None, "trace_id": "trace-id", "content": "Invalid API key.", diff --git a/tests/mlmodel_openai/test_chat_completion_stream_v1.py b/tests/mlmodel_openai/test_chat_completion_stream_v1.py index 796404012b..6fc5d58f28 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_v1.py @@ -54,6 +54,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -83,6 +84,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": "chatcmpl-8TJ9dS50zgQM7XicE8PLnCyEihRug-0", + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "f8d0f53b6881c5c0a3698e55f8f410ac", @@ -101,6 +103,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": "chatcmpl-8TJ9dS50zgQM7XicE8PLnCyEihRug-1", + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "f8d0f53b6881c5c0a3698e55f8f410ac", @@ -119,6 +122,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": "chatcmpl-8TJ9dS50zgQM7XicE8PLnCyEihRug-2", + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "f8d0f53b6881c5c0a3698e55f8f410ac", diff --git a/tests/mlmodel_openai/test_chat_completion_v1.py b/tests/mlmodel_openai/test_chat_completion_v1.py index 817db35d8e..5a6793d955 100644 --- a/tests/mlmodel_openai/test_chat_completion_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_v1.py @@ -43,6 +43,7 @@ {"type": "LlmChatCompletionSummary"}, { "id": None, # UUID that varies with each run + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "span_id": None, @@ -71,6 +72,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": "chatcmpl-9NPYxI4Zk5ztxNwW5osYdpevgoiBQ-0", + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "req_25be7e064e0c590cd65709c85385c796", @@ -89,6 +91,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": "chatcmpl-9NPYxI4Zk5ztxNwW5osYdpevgoiBQ-1", + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "req_25be7e064e0c590cd65709c85385c796", @@ -107,6 +110,7 @@ {"type": "LlmChatCompletionMessage"}, { "id": "chatcmpl-9NPYxI4Zk5ztxNwW5osYdpevgoiBQ-2", + "timestamp": None, "llm.conversation_id": "my-awesome-id", "llm.foo": "bar", "request_id": "req_25be7e064e0c590cd65709c85385c796", From 1606479bd4b354e68aa2ef1d88b72497e523c95c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 2 Dec 2025 22:01:27 +0000 Subject: [PATCH 18/34] Bump the github_actions group with 4 updates (#1595) Bumps the github_actions group with 4 updates: [actions/setup-python](https://github.com/actions/setup-python), [docker/metadata-action](https://github.com/docker/metadata-action), [oxsecurity/megalinter](https://github.com/oxsecurity/megalinter) and [github/codeql-action](https://github.com/github/codeql-action). Updates `actions/setup-python` from 6.0.0 to 6.1.0 - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/e797f83bcb11b83ae66e0230d6156d7c80228e7c...83679a892e2d95755f2dac6acb0bfd1e9ac5d548) Updates `docker/metadata-action` from 5.9.0 to 5.10.0 - [Release notes](https://github.com/docker/metadata-action/releases) - [Commits](https://github.com/docker/metadata-action/compare/318604b99e75e41977312d83839a89be02ca4893...c299e40c65443455700f0fdfc63efafe5b349051) Updates `oxsecurity/megalinter` from 9.1.0 to 9.2.0 - [Release notes](https://github.com/oxsecurity/megalinter/releases) - [Changelog](https://github.com/oxsecurity/megalinter/blob/main/CHANGELOG.md) - [Commits](https://github.com/oxsecurity/megalinter/compare/62c799d895af9bcbca5eacfebca29d527f125a57...55a59b24a441e0e1943080d4a512d827710d4a9d) Updates `github/codeql-action` from 4.31.5 to 4.31.6 - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/fdbfb4d2750291e159f0156def62b853c2798ca2...fe4161a26a8629af62121b670040955b330f9af2) --- updated-dependencies: - dependency-name: actions/setup-python dependency-version: 6.1.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: github_actions - dependency-name: docker/metadata-action dependency-version: 5.10.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: github_actions - dependency-name: oxsecurity/megalinter dependency-version: 9.2.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: github_actions - dependency-name: github/codeql-action dependency-version: 4.31.6 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: github_actions ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> --- .github/workflows/benchmarks.yml | 2 +- .github/workflows/build-ci-image.yml | 4 ++-- .github/workflows/deploy.yml | 2 +- .github/workflows/mega-linter.yml | 2 +- .github/workflows/tests.yml | 4 ++-- .github/workflows/trivy.yml | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 77e0537925..d66254bd9e 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -42,7 +42,7 @@ jobs: with: fetch-depth: 0 - - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # 6.0.0 + - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # 6.1.0 with: python-version: "${{ matrix.python }}" diff --git a/.github/workflows/build-ci-image.yml b/.github/workflows/build-ci-image.yml index dd3833d79c..ee867679ae 100644 --- a/.github/workflows/build-ci-image.yml +++ b/.github/workflows/build-ci-image.yml @@ -60,7 +60,7 @@ jobs: - name: Generate Docker Metadata (Tags and Labels) id: meta - uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # 5.9.0 + uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # 5.10.0 with: images: ghcr.io/${{ steps.image-name.outputs.IMAGE_NAME }} flavor: | @@ -139,7 +139,7 @@ jobs: - name: Generate Docker Metadata (Tags and Labels) id: meta - uses: docker/metadata-action@318604b99e75e41977312d83839a89be02ca4893 # 5.9.0 + uses: docker/metadata-action@c299e40c65443455700f0fdfc63efafe5b349051 # 5.10.0 with: images: ghcr.io/${{ steps.image-name.outputs.IMAGE_NAME }} flavor: | diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index a91dae3061..c82c1d0654 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -114,7 +114,7 @@ jobs: persist-credentials: false fetch-depth: 0 - - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # 6.0.0 + - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # 6.1.0 with: python-version: "3.13" diff --git a/.github/workflows/mega-linter.yml b/.github/workflows/mega-linter.yml index 99b010c0d6..76f6ea74b4 100644 --- a/.github/workflows/mega-linter.yml +++ b/.github/workflows/mega-linter.yml @@ -53,7 +53,7 @@ jobs: # MegaLinter - name: MegaLinter id: ml - uses: oxsecurity/megalinter/flavors/python@62c799d895af9bcbca5eacfebca29d527f125a57 # 9.1.0 + uses: oxsecurity/megalinter/flavors/python@55a59b24a441e0e1943080d4a512d827710d4a9d # 9.2.0 env: # All available variables are described in documentation # https://megalinter.io/latest/configuration/ diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 70bdc8c6c5..fcb9289971 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -94,7 +94,7 @@ jobs: steps: - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # 6.0.0 + - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # 6.1.0 with: python-version: "3.13" architecture: x64 @@ -128,7 +128,7 @@ jobs: steps: - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0 - - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # 6.0.0 + - uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # 6.1.0 with: python-version: "3.13" architecture: x64 diff --git a/.github/workflows/trivy.yml b/.github/workflows/trivy.yml index 614ec8903e..a485674e55 100644 --- a/.github/workflows/trivy.yml +++ b/.github/workflows/trivy.yml @@ -61,6 +61,6 @@ jobs: - name: Upload Trivy scan results to GitHub Security tab if: ${{ github.event_name == 'schedule' }} - uses: github/codeql-action/upload-sarif@fdbfb4d2750291e159f0156def62b853c2798ca2 # 4.31.5 + uses: github/codeql-action/upload-sarif@fe4161a26a8629af62121b670040955b330f9af2 # 4.31.6 with: sarif_file: "trivy-results.sarif" From 748bd5b60fd46412e313b2a721eeb737ee0b2405 Mon Sep 17 00:00:00 2001 From: Timothy Pansino <11214426+TimPansino@users.noreply.github.com> Date: Fri, 5 Dec 2025 09:14:46 -0800 Subject: [PATCH 19/34] Strands MultiAgent Instrumentation (#1590) * Rename strands instrument functions * Add instrumentation for strands multiagent * Reorganize strands tests * Strands multiagent tests * Remove timestamp from test expected events. --------- Co-authored-by: Uma Annamalai --- newrelic/config.py | 22 +- newrelic/hooks/mlmodel_strands.py | 24 +- tests/mlmodel_strands/__init__.py | 13 + tests/mlmodel_strands/_test_agent.py | 165 +++++++++++ .../mlmodel_strands/_test_multiagent_graph.py | 91 ++++++ .../mlmodel_strands/_test_multiagent_swarm.py | 108 ++++++++ tests/mlmodel_strands/conftest.py | 132 --------- tests/mlmodel_strands/test_agent.py | 46 ++-- .../mlmodel_strands/test_multiagent_graph.py | 233 ++++++++++++++++ .../mlmodel_strands/test_multiagent_swarm.py | 260 ++++++++++++++++++ 10 files changed, 928 insertions(+), 166 deletions(-) create mode 100644 tests/mlmodel_strands/__init__.py create mode 100644 tests/mlmodel_strands/_test_agent.py create mode 100644 tests/mlmodel_strands/_test_multiagent_graph.py create mode 100644 tests/mlmodel_strands/_test_multiagent_swarm.py create mode 100644 tests/mlmodel_strands/test_multiagent_graph.py create mode 100644 tests/mlmodel_strands/test_multiagent_swarm.py diff --git a/newrelic/config.py b/newrelic/config.py index 94955293d5..4b8627772d 100644 --- a/newrelic/config.py +++ b/newrelic/config.py @@ -2948,12 +2948,26 @@ def _process_module_builtin_defaults(): "newrelic.hooks.mlmodel_autogen", "instrument_autogen_agentchat_agents__assistant_agent", ) - _process_module_definition("strands.agent.agent", "newrelic.hooks.mlmodel_strands", "instrument_agent_agent") _process_module_definition( - "strands.tools.executors._executor", "newrelic.hooks.mlmodel_strands", "instrument_tools_executors__executor" + "strands.agent.agent", "newrelic.hooks.mlmodel_strands", "instrument_strands_agent_agent" + ) + _process_module_definition( + "strands.multiagent.graph", "newrelic.hooks.mlmodel_strands", "instrument_strands_multiagent_graph" + ) + _process_module_definition( + "strands.multiagent.swarm", "newrelic.hooks.mlmodel_strands", "instrument_strands_multiagent_swarm" + ) + _process_module_definition( + "strands.tools.executors._executor", + "newrelic.hooks.mlmodel_strands", + "instrument_strands_tools_executors__executor", + ) + _process_module_definition( + "strands.tools.registry", "newrelic.hooks.mlmodel_strands", "instrument_strands_tools_registry" + ) + _process_module_definition( + "strands.models.bedrock", "newrelic.hooks.mlmodel_strands", "instrument_strands_models_bedrock" ) - _process_module_definition("strands.tools.registry", "newrelic.hooks.mlmodel_strands", "instrument_tools_registry") - _process_module_definition("strands.models.bedrock", "newrelic.hooks.mlmodel_strands", "instrument_models_bedrock") _process_module_definition("mcp.client.session", "newrelic.hooks.adapter_mcp", "instrument_mcp_client_session") _process_module_definition( diff --git a/newrelic/hooks/mlmodel_strands.py b/newrelic/hooks/mlmodel_strands.py index bf849fd717..20317626da 100644 --- a/newrelic/hooks/mlmodel_strands.py +++ b/newrelic/hooks/mlmodel_strands.py @@ -461,7 +461,7 @@ def wrap_bedrock_model__stream(wrapped, instance, args, kwargs): return wrapped(*args, **kwargs) -def instrument_agent_agent(module): +def instrument_strands_agent_agent(module): if hasattr(module, "Agent"): if hasattr(module.Agent, "__call__"): # noqa: B004 wrap_function_wrapper(module, "Agent.__call__", wrap_agent__call__) @@ -471,19 +471,35 @@ def instrument_agent_agent(module): wrap_function_wrapper(module, "Agent.stream_async", wrap_stream_async) -def instrument_tools_executors__executor(module): +def instrument_strands_multiagent_graph(module): + if hasattr(module, "Graph"): + if hasattr(module.Graph, "__call__"): # noqa: B004 + wrap_function_wrapper(module, "Graph.__call__", wrap_agent__call__) + if hasattr(module.Graph, "invoke_async"): + wrap_function_wrapper(module, "Graph.invoke_async", wrap_agent_invoke_async) + + +def instrument_strands_multiagent_swarm(module): + if hasattr(module, "Swarm"): + if hasattr(module.Swarm, "__call__"): # noqa: B004 + wrap_function_wrapper(module, "Swarm.__call__", wrap_agent__call__) + if hasattr(module.Swarm, "invoke_async"): + wrap_function_wrapper(module, "Swarm.invoke_async", wrap_agent_invoke_async) + + +def instrument_strands_tools_executors__executor(module): if hasattr(module, "ToolExecutor"): if hasattr(module.ToolExecutor, "_stream"): wrap_function_wrapper(module, "ToolExecutor._stream", wrap_tool_executor__stream) -def instrument_tools_registry(module): +def instrument_strands_tools_registry(module): if hasattr(module, "ToolRegistry"): if hasattr(module.ToolRegistry, "register_tool"): wrap_function_wrapper(module, "ToolRegistry.register_tool", wrap_ToolRegister_register_tool) -def instrument_models_bedrock(module): +def instrument_strands_models_bedrock(module): # This instrumentation only exists to pass trace context due to bedrock models using a separate thread. if hasattr(module, "BedrockModel"): if hasattr(module.BedrockModel, "stream"): diff --git a/tests/mlmodel_strands/__init__.py b/tests/mlmodel_strands/__init__.py new file mode 100644 index 0000000000..8030baccf7 --- /dev/null +++ b/tests/mlmodel_strands/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/mlmodel_strands/_test_agent.py b/tests/mlmodel_strands/_test_agent.py new file mode 100644 index 0000000000..15aa79a5ac --- /dev/null +++ b/tests/mlmodel_strands/_test_agent.py @@ -0,0 +1,165 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from strands import tool + +from ._mock_model_provider import MockedModelProvider + + +# Example tool for testing purposes +@tool +async def add_exclamation(message: str) -> str: + return f"{message}!" + + +@tool +async def throw_exception_coro(message: str) -> str: + raise RuntimeError("Oops") + + +@tool +async def throw_exception_agen(message: str) -> str: + raise RuntimeError("Oops") + yield + + +@pytest.fixture +def single_tool_model(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Hello"}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model + + +@pytest.fixture +def single_tool_model_runtime_error_coro(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling throw_exception_coro tool"}, + # Set arguments to an invalid type to trigger error in tool + {"toolUse": {"name": "throw_exception_coro", "toolUseId": "123", "input": {"message": "Hello"}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model + + +@pytest.fixture +def single_tool_model_runtime_error_agen(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling throw_exception_agen tool"}, + # Set arguments to an invalid type to trigger error in tool + {"toolUse": {"name": "throw_exception_agen", "toolUseId": "123", "input": {"message": "Hello"}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model + + +@pytest.fixture +def multi_tool_model(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Hello"}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling compute_sum tool"}, + {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 5, "b": 3}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Goodbye"}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling compute_sum tool"}, + {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 123, "b": 2}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model + + +@pytest.fixture +def multi_tool_model_error(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Hello"}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling compute_sum tool"}, + {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 5, "b": 3}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Goodbye"}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling compute_sum tool"}, + # Set insufficient arguments to trigger error in tool + {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 123}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model diff --git a/tests/mlmodel_strands/_test_multiagent_graph.py b/tests/mlmodel_strands/_test_multiagent_graph.py new file mode 100644 index 0000000000..73c1679701 --- /dev/null +++ b/tests/mlmodel_strands/_test_multiagent_graph.py @@ -0,0 +1,91 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from strands import Agent, tool +from strands.multiagent.graph import GraphBuilder + +from ._mock_model_provider import MockedModelProvider + + +@pytest.fixture +def math_model(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "I'll calculate the sum of 15 and 27 for you."}, + {"toolUse": {"name": "calculate_sum", "toolUseId": "123", "input": {"a": 15, "b": 27}}}, + ], + }, + {"role": "assistant", "content": [{"text": "The sum of 15 and 27 is 42."}]}, + ] + ) + return model + + +@pytest.fixture +def analysis_model(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "I'll validate the calculation result of 42 from the calculator."}, + {"toolUse": {"name": "analyze_result", "toolUseId": "456", "input": {"value": 42}}}, + ], + }, + { + "role": "assistant", + "content": [{"text": "The calculation is correct, and 42 is a positive integer result."}], + }, + ] + ) + return model + + +# Example tool for testing purposes +@tool +async def calculate_sum(a: int, b: int) -> int: + """Calculate the sum of two numbers.""" + return a + b + + +@tool +async def analyze_result(value: int) -> str: + """Analyze a numeric result.""" + return f"The result {value} is {'positive' if value > 0 else 'zero or negative'}" + + +@pytest.fixture +def math_agent(math_model): + return Agent(name="math_agent", model=math_model, tools=[calculate_sum]) + + +@pytest.fixture +def analysis_agent(analysis_model): + return Agent(name="analysis_agent", model=analysis_model, tools=[analyze_result]) + + +@pytest.fixture +def agent_graph(math_agent, analysis_agent): + # Build graph + builder = GraphBuilder() + builder.add_node(math_agent, "math") + builder.add_node(analysis_agent, "analysis") + builder.add_edge("math", "analysis") + builder.set_entry_point("math") + + return builder.build() diff --git a/tests/mlmodel_strands/_test_multiagent_swarm.py b/tests/mlmodel_strands/_test_multiagent_swarm.py new file mode 100644 index 0000000000..4b7916c27b --- /dev/null +++ b/tests/mlmodel_strands/_test_multiagent_swarm.py @@ -0,0 +1,108 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from strands import Agent, tool +from strands.multiagent.swarm import Swarm + +from ._mock_model_provider import MockedModelProvider + + +@pytest.fixture +def math_model(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "I'll calculate the sum of 15 and 27 for you."}, + {"toolUse": {"name": "calculate_sum", "toolUseId": "123", "input": {"a": 15, "b": 27}}}, + ], + }, + { + "role": "assistant", + "content": [ + { + "toolUse": { + "name": "handoff_to_agent", + "toolUseId": "789", + "input": { + "agent_name": "analysis_agent", + "message": "Analyze the result of the calculation done by the math_agent.", + "context": {"result": 42}, + }, + } + } + ], + }, + {"role": "assistant", "content": [{"text": "The sum of 15 and 27 is 42."}]}, + ] + ) + return model + + +@pytest.fixture +def analysis_model(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "I'll validate the calculation result of 42 from the calculator."}, + {"toolUse": {"name": "analyze_result", "toolUseId": "456", "input": {"value": 42}}}, + ], + }, + { + "role": "assistant", + "content": [{"text": "The calculation is correct, and 42 is a positive integer result."}], + }, + ] + ) + return model + + +# Example tool for testing purposes +@tool +async def calculate_sum(a: int, b: int) -> int: + """Calculate the sum of two numbers.""" + return a + b + + +@tool +async def analyze_result(value: int) -> str: + """Analyze a numeric result.""" + return f"The result {value} is {'positive' if value > 0 else 'zero or negative'}" + + +@pytest.fixture +def math_agent(math_model): + return Agent(name="math_agent", model=math_model, tools=[calculate_sum]) + + +@pytest.fixture +def analysis_agent(analysis_model): + return Agent(name="analysis_agent", model=analysis_model, tools=[analyze_result]) + + +@pytest.fixture +def agent_swarm(math_agent, analysis_agent): + # Build graph with conditional edge + return Swarm( + [math_agent, analysis_agent], + entry_point=math_agent, + execution_timeout=60, + node_timeout=30, + max_handoffs=5, + max_iterations=5, + ) diff --git a/tests/mlmodel_strands/conftest.py b/tests/mlmodel_strands/conftest.py index a2ad9b8dd0..abbc29b969 100644 --- a/tests/mlmodel_strands/conftest.py +++ b/tests/mlmodel_strands/conftest.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest -from _mock_model_provider import MockedModelProvider from testing_support.fixture.event_loop import event_loop as loop from testing_support.fixtures import collector_agent_registration_fixture, collector_available_fixture from testing_support.ml_testing_utils import set_trace_info @@ -31,133 +29,3 @@ collector_agent_registration = collector_agent_registration_fixture( app_name="Python Agent Test (mlmodel_strands)", default_settings=_default_settings ) - - -@pytest.fixture -def single_tool_model(): - model = MockedModelProvider( - [ - { - "role": "assistant", - "content": [ - {"text": "Calling add_exclamation tool"}, - {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Hello"}}}, - ], - }, - {"role": "assistant", "content": [{"text": "Success!"}]}, - ] - ) - return model - - -@pytest.fixture -def single_tool_model_runtime_error_coro(): - model = MockedModelProvider( - [ - { - "role": "assistant", - "content": [ - {"text": "Calling throw_exception_coro tool"}, - # Set arguments to an invalid type to trigger error in tool - {"toolUse": {"name": "throw_exception_coro", "toolUseId": "123", "input": {"message": "Hello"}}}, - ], - }, - {"role": "assistant", "content": [{"text": "Success!"}]}, - ] - ) - return model - - -@pytest.fixture -def single_tool_model_runtime_error_agen(): - model = MockedModelProvider( - [ - { - "role": "assistant", - "content": [ - {"text": "Calling throw_exception_agen tool"}, - # Set arguments to an invalid type to trigger error in tool - {"toolUse": {"name": "throw_exception_agen", "toolUseId": "123", "input": {"message": "Hello"}}}, - ], - }, - {"role": "assistant", "content": [{"text": "Success!"}]}, - ] - ) - return model - - -@pytest.fixture -def multi_tool_model(): - model = MockedModelProvider( - [ - { - "role": "assistant", - "content": [ - {"text": "Calling add_exclamation tool"}, - {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Hello"}}}, - ], - }, - { - "role": "assistant", - "content": [ - {"text": "Calling compute_sum tool"}, - {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 5, "b": 3}}}, - ], - }, - { - "role": "assistant", - "content": [ - {"text": "Calling add_exclamation tool"}, - {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Goodbye"}}}, - ], - }, - { - "role": "assistant", - "content": [ - {"text": "Calling compute_sum tool"}, - {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 123, "b": 2}}}, - ], - }, - {"role": "assistant", "content": [{"text": "Success!"}]}, - ] - ) - return model - - -@pytest.fixture -def multi_tool_model_error(): - model = MockedModelProvider( - [ - { - "role": "assistant", - "content": [ - {"text": "Calling add_exclamation tool"}, - {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Hello"}}}, - ], - }, - { - "role": "assistant", - "content": [ - {"text": "Calling compute_sum tool"}, - {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 5, "b": 3}}}, - ], - }, - { - "role": "assistant", - "content": [ - {"text": "Calling add_exclamation tool"}, - {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Goodbye"}}}, - ], - }, - { - "role": "assistant", - "content": [ - {"text": "Calling compute_sum tool"}, - # Set insufficient arguments to trigger error in tool - {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 123}}}, - ], - }, - {"role": "assistant", "content": [{"text": "Success!"}]}, - ] - ) - return model diff --git a/tests/mlmodel_strands/test_agent.py b/tests/mlmodel_strands/test_agent.py index af685668ad..6fa5e56a68 100644 --- a/tests/mlmodel_strands/test_agent.py +++ b/tests/mlmodel_strands/test_agent.py @@ -13,7 +13,7 @@ # limitations under the License. import pytest -from strands import Agent, tool +from strands import Agent from testing_support.fixtures import reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( disabled_ai_monitoring_record_content_settings, @@ -32,6 +32,17 @@ from newrelic.common.object_names import callable_name from newrelic.common.object_wrapper import transient_function_wrapper +from ._test_agent import ( + add_exclamation, + multi_tool_model, + multi_tool_model_error, + single_tool_model, + single_tool_model_runtime_error_agen, + single_tool_model_runtime_error_coro, + throw_exception_agen, + throw_exception_coro, +) + tool_recorded_event = [ ( {"type": "LlmTool"}, @@ -144,29 +155,12 @@ ] -# Example tool for testing purposes -@tool -async def add_exclamation(message: str) -> str: - return f"{message}!" - - -@tool -async def throw_exception_coro(message: str) -> str: - raise RuntimeError("Oops") - - -@tool -async def throw_exception_agen(message: str) -> str: - raise RuntimeError("Oops") - yield - - @reset_core_stats_engine() @validate_custom_events(events_with_context_attrs(tool_recorded_event)) @validate_custom_events(events_with_context_attrs(agent_recorded_event)) @validate_custom_event_count(count=2) @validate_transaction_metrics( - "test_agent:test_agent_invoke", + "mlmodel_strands.test_agent:test_agent_invoke", scoped_metrics=[ ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), @@ -194,7 +188,7 @@ def test_agent_invoke(set_trace_info, single_tool_model): @validate_custom_events(agent_recorded_event) @validate_custom_event_count(count=2) @validate_transaction_metrics( - "test_agent:test_agent_invoke_async", + "mlmodel_strands.test_agent:test_agent_invoke_async", scoped_metrics=[ ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), @@ -224,7 +218,7 @@ async def _test(): @validate_custom_events(agent_recorded_event) @validate_custom_event_count(count=2) @validate_transaction_metrics( - "test_agent:test_agent_stream_async", + "mlmodel_strands.test_agent:test_agent_stream_async", scoped_metrics=[ ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), @@ -260,7 +254,7 @@ async def _test(): @validate_custom_events(tool_events_sans_content(tool_recorded_event)) @validate_custom_event_count(count=2) @validate_transaction_metrics( - "test_agent:test_agent_invoke_no_content", + "mlmodel_strands.test_agent:test_agent_invoke_no_content", scoped_metrics=[ ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), @@ -301,7 +295,7 @@ def test_agent_invoke_disabled_ai_monitoring_events(set_trace_info, single_tool_ @validate_custom_events(agent_recorded_event_error) @validate_custom_event_count(count=1) @validate_transaction_metrics( - "test_agent:test_agent_invoke_error", + "mlmodel_strands.test_agent:test_agent_invoke_error", scoped_metrics=[("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1)], rollup_metrics=[("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1)], background_task=True, @@ -330,7 +324,7 @@ def _test(): @validate_custom_events(tool_recorded_event_error_coro) @validate_custom_event_count(count=2) @validate_transaction_metrics( - "test_agent:test_agent_invoke_tool_coro_runtime_error", + "mlmodel_strands.test_agent:test_agent_invoke_tool_coro_runtime_error", scoped_metrics=[ ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/throw_exception_coro", 1), @@ -358,7 +352,7 @@ def test_agent_invoke_tool_coro_runtime_error(set_trace_info, single_tool_model_ @validate_custom_events(tool_recorded_event_error_agen) @validate_custom_event_count(count=2) @validate_transaction_metrics( - "test_agent:test_agent_invoke_tool_agen_runtime_error", + "mlmodel_strands.test_agent:test_agent_invoke_tool_agen_runtime_error", scoped_metrics=[ ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/throw_exception_agen", 1), @@ -387,7 +381,7 @@ def test_agent_invoke_tool_agen_runtime_error(set_trace_info, single_tool_model_ @validate_custom_events(tool_recorded_event_forced_internal_error) @validate_custom_event_count(count=2) @validate_transaction_metrics( - "test_agent:test_agent_tool_forced_exception", + "mlmodel_strands.test_agent:test_agent_tool_forced_exception", scoped_metrics=[ ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), diff --git a/tests/mlmodel_strands/test_multiagent_graph.py b/tests/mlmodel_strands/test_multiagent_graph.py new file mode 100644 index 0000000000..7bd84fc901 --- /dev/null +++ b/tests/mlmodel_strands/test_multiagent_graph.py @@ -0,0 +1,233 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from testing_support.fixtures import reset_core_stats_engine, validate_attributes +from testing_support.ml_testing_utils import disabled_ai_monitoring_settings, events_with_context_attrs +from testing_support.validators.validate_custom_event import validate_custom_event_count +from testing_support.validators.validate_custom_events import validate_custom_events +from testing_support.validators.validate_transaction_metrics import validate_transaction_metrics + +from newrelic.api.background_task import background_task +from newrelic.api.llm_custom_attributes import WithLlmCustomAttributes + +from ._test_multiagent_graph import agent_graph, analysis_agent, analysis_model, math_agent, math_model + +agent_recorded_events = [ + [ + {"type": "LlmAgent"}, + { + "duration": None, + "id": None, + "ingest_source": "Python", + "name": "math_agent", + "span_id": None, + "trace_id": "trace-id", + "vendor": "strands", + }, + ], + [ + {"type": "LlmAgent"}, + { + "duration": None, + "id": None, + "ingest_source": "Python", + "name": "analysis_agent", + "span_id": None, + "trace_id": "trace-id", + "vendor": "strands", + }, + ], +] + +tool_recorded_events = [ + [ + {"type": "LlmTool"}, + { + "agent_name": "math_agent", + "duration": None, + "id": None, + "ingest_source": "Python", + "input": "{'a': 15, 'b': 27}", + "name": "calculate_sum", + "output": "{'text': '42'}", + "run_id": "123", + "span_id": None, + "trace_id": "trace-id", + "vendor": "strands", + }, + ], + [ + {"type": "LlmTool"}, + { + "agent_name": "analysis_agent", + "duration": None, + "id": None, + "ingest_source": "Python", + "input": "{'value': 42}", + "name": "analyze_result", + "output": "{'text': 'The result 42 is positive'}", + "run_id": "456", + "span_id": None, + "trace_id": "trace-id", + "vendor": "strands", + }, + ], +] + + +@reset_core_stats_engine() +@validate_custom_events(events_with_context_attrs(tool_recorded_events)) +@validate_custom_events(events_with_context_attrs(agent_recorded_events)) +@validate_custom_event_count(count=4) # 2 LlmTool events, 2 LlmAgent events +@validate_transaction_metrics( + "mlmodel_strands.test_multiagent_graph:test_multiagent_graph_invoke", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_multiagent_graph_invoke(set_trace_info, agent_graph): + set_trace_info() + + with WithLlmCustomAttributes({"context": "attr"}): + response = agent_graph("Calculate the sum of 15 and 27.") + + assert response.execution_count == 2 + assert not response.failed_nodes + assert response.results["math"].result.message["content"][0]["text"] == "The sum of 15 and 27 is 42." + assert ( + response.results["analysis"].result.message["content"][0]["text"] + == "The calculation is correct, and 42 is a positive integer result." + ) + + +@reset_core_stats_engine() +@validate_custom_events(tool_recorded_events) +@validate_custom_events(agent_recorded_events) +@validate_custom_event_count(count=4) # 2 LlmTool events, 2 LlmAgent events +@validate_transaction_metrics( + "mlmodel_strands.test_multiagent_graph:test_multiagent_graph_invoke_async", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_multiagent_graph_invoke_async(loop, set_trace_info, agent_graph): + set_trace_info() + + async def _test(): + response = await agent_graph.invoke_async("Calculate the sum of 15 and 27.") + + assert response.execution_count == 2 + assert not response.failed_nodes + assert response.results["math"].result.message["content"][0]["text"] == "The sum of 15 and 27 is 42." + assert ( + response.results["analysis"].result.message["content"][0]["text"] + == "The calculation is correct, and 42 is a positive integer result." + ) + + loop.run_until_complete(_test()) + + +@reset_core_stats_engine() +@validate_custom_events(tool_recorded_events) +@validate_custom_events(agent_recorded_events) +@validate_custom_event_count(count=4) # 2 LlmTool events, 2 LlmAgent events +@validate_transaction_metrics( + "mlmodel_strands.test_multiagent_graph:test_multiagent_graph_stream_async", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_multiagent_graph_stream_async(loop, set_trace_info, agent_graph): + set_trace_info() + + async def _test(): + response = agent_graph.stream_async("Calculate the sum of 15 and 27.") + messages = [ + event["node_result"].result.message async for event in response if event["type"] == "multiagent_node_stop" + ] + + assert len(messages) == 2 + + assert messages[0]["content"][0]["text"] == "The sum of 15 and 27 is 42." + assert messages[1]["content"][0]["text"] == "The calculation is correct, and 42 is a positive integer result." + + loop.run_until_complete(_test()) + + +@disabled_ai_monitoring_settings +@reset_core_stats_engine() +@validate_custom_event_count(count=0) +@background_task() +def test_multiagent_graph_invoke_disabled_ai_monitoring_events(set_trace_info, agent_graph): + set_trace_info() + + response = agent_graph("Calculate the sum of 15 and 27.") + + assert response.execution_count == 2 + assert not response.failed_nodes + assert response.results["math"].result.message["content"][0]["text"] == "The sum of 15 and 27 is 42." + assert ( + response.results["analysis"].result.message["content"][0]["text"] + == "The calculation is correct, and 42 is a positive integer result." + ) + + +@reset_core_stats_engine() +@validate_custom_event_count(count=0) +def test_multiagent_graph_invoke_outside_txn(agent_graph): + response = agent_graph("Calculate the sum of 15 and 27.") + + assert response.execution_count == 2 + assert not response.failed_nodes + assert response.results["math"].result.message["content"][0]["text"] == "The sum of 15 and 27 is 42." + assert ( + response.results["analysis"].result.message["content"][0]["text"] + == "The calculation is correct, and 42 is a positive integer result." + ) diff --git a/tests/mlmodel_strands/test_multiagent_swarm.py b/tests/mlmodel_strands/test_multiagent_swarm.py new file mode 100644 index 0000000000..bbcbb3e27c --- /dev/null +++ b/tests/mlmodel_strands/test_multiagent_swarm.py @@ -0,0 +1,260 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from testing_support.fixtures import reset_core_stats_engine, validate_attributes +from testing_support.ml_testing_utils import disabled_ai_monitoring_settings, events_with_context_attrs +from testing_support.validators.validate_custom_event import validate_custom_event_count +from testing_support.validators.validate_custom_events import validate_custom_events +from testing_support.validators.validate_transaction_metrics import validate_transaction_metrics + +from newrelic.api.background_task import background_task +from newrelic.api.llm_custom_attributes import WithLlmCustomAttributes + +from ._test_multiagent_swarm import agent_swarm, analysis_agent, analysis_model, math_agent, math_model + +agent_recorded_events = [ + [ + {"type": "LlmAgent"}, + { + "duration": None, + "id": None, + "ingest_source": "Python", + "name": "math_agent", + "span_id": None, + "trace_id": "trace-id", + "vendor": "strands", + }, + ], + [ + {"type": "LlmAgent"}, + { + "duration": None, + "id": None, + "ingest_source": "Python", + "name": "analysis_agent", + "span_id": None, + "trace_id": "trace-id", + "vendor": "strands", + }, + ], +] + +tool_recorded_events = [ + [ + {"type": "LlmTool"}, + { + "agent_name": "math_agent", + "duration": None, + "id": None, + "ingest_source": "Python", + "input": "{'a': 15, 'b': 27}", + "name": "calculate_sum", + "output": "{'text': '42'}", + "run_id": "123", + "span_id": None, + "trace_id": "trace-id", + "vendor": "strands", + }, + ], + [ + {"type": "LlmTool"}, + { + "agent_name": "analysis_agent", + "duration": None, + "id": None, + "ingest_source": "Python", + "input": "{'value': 42}", + "name": "analyze_result", + "output": "{'text': 'The result 42 is positive'}", + "run_id": "456", + "span_id": None, + "trace_id": "trace-id", + "vendor": "strands", + }, + ], +] + +handoff_recorded_event = [ + [ + {"type": "LlmTool"}, + { + "agent_name": "math_agent", + "duration": None, + "id": None, + "ingest_source": "Python", + # This is the output from math_agent being sent to the handoff_to_agent tool, which will then be input to the analysis_agent + "input": "{'agent_name': 'analysis_agent', 'message': 'Analyze the result of the calculation done by the math_agent.', 'context': {'result': 42}}", + "name": "handoff_to_agent", + "output": "{'text': 'Handing off to analysis_agent: Analyze the result of the calculation done by the math_agent.'}", + "run_id": "789", + "span_id": None, + "trace_id": "trace-id", + "vendor": "strands", + }, + ] +] + + +@reset_core_stats_engine() +@validate_custom_events(events_with_context_attrs(tool_recorded_events)) +@validate_custom_events(events_with_context_attrs(agent_recorded_events)) +@validate_custom_events(events_with_context_attrs(handoff_recorded_event)) +@validate_custom_event_count(count=5) # 2 LlmTool events, 2 LlmAgent events, 1 LlmTool Handoff event +@validate_transaction_metrics( + "mlmodel_strands.test_multiagent_swarm:test_multiagent_swarm_invoke", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_multiagent_swarm_invoke(set_trace_info, agent_swarm): + set_trace_info() + + with WithLlmCustomAttributes({"context": "attr"}): + response = agent_swarm("Calculate the sum of 15 and 27.") + + assert response.execution_count == 2 + node_history = [node.node_id for node in response.node_history] + assert node_history == ["math_agent", "analysis_agent"] + assert response.results["math_agent"].result.message["content"][0]["text"] == "The sum of 15 and 27 is 42." + assert ( + response.results["analysis_agent"].result.message["content"][0]["text"] + == "The calculation is correct, and 42 is a positive integer result." + ) + + +@reset_core_stats_engine() +@validate_custom_events(tool_recorded_events) +@validate_custom_events(agent_recorded_events) +@validate_custom_events(handoff_recorded_event) +@validate_custom_event_count(count=5) # 2 LlmTool events, 2 LlmAgent events, 1 LlmTool Handoff event +@validate_transaction_metrics( + "mlmodel_strands.test_multiagent_swarm:test_multiagent_swarm_invoke_async", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_multiagent_swarm_invoke_async(loop, set_trace_info, agent_swarm): + set_trace_info() + + async def _test(): + response = await agent_swarm.invoke_async("Calculate the sum of 15 and 27.") + + assert response.execution_count == 2 + node_history = [node.node_id for node in response.node_history] + assert node_history == ["math_agent", "analysis_agent"] + assert response.results["math_agent"].result.message["content"][0]["text"] == "The sum of 15 and 27 is 42." + assert ( + response.results["analysis_agent"].result.message["content"][0]["text"] + == "The calculation is correct, and 42 is a positive integer result." + ) + + loop.run_until_complete(_test()) + + +@reset_core_stats_engine() +@validate_custom_events(tool_recorded_events) +@validate_custom_events(agent_recorded_events) +@validate_custom_events(handoff_recorded_event) +@validate_custom_event_count(count=5) # 2 LlmTool events, 2 LlmAgent events, 1 LlmTool Handoff event +@validate_transaction_metrics( + "mlmodel_strands.test_multiagent_swarm:test_multiagent_swarm_stream_async", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_multiagent_swarm_stream_async(loop, set_trace_info, agent_swarm): + set_trace_info() + + async def _test(): + response = agent_swarm.stream_async("Calculate the sum of 15 and 27.") + messages = [ + event["node_result"].result.message async for event in response if event["type"] == "multiagent_node_stop" + ] + + assert len(messages) == 2 + + assert messages[0]["content"][0]["text"] == "The sum of 15 and 27 is 42." + assert messages[1]["content"][0]["text"] == "The calculation is correct, and 42 is a positive integer result." + + loop.run_until_complete(_test()) + + +@disabled_ai_monitoring_settings +@reset_core_stats_engine() +@validate_custom_event_count(count=0) +@background_task() +def test_multiagent_swarm_invoke_disabled_ai_monitoring_events(set_trace_info, agent_swarm): + set_trace_info() + + response = agent_swarm("Calculate the sum of 15 and 27.") + + assert response.execution_count == 2 + node_history = [node.node_id for node in response.node_history] + assert node_history == ["math_agent", "analysis_agent"] + assert response.results["math_agent"].result.message["content"][0]["text"] == "The sum of 15 and 27 is 42." + assert ( + response.results["analysis_agent"].result.message["content"][0]["text"] + == "The calculation is correct, and 42 is a positive integer result." + ) + + +@reset_core_stats_engine() +@validate_custom_event_count(count=0) +def test_multiagent_swarm_invoke_outside_txn(agent_swarm): + response = agent_swarm("Calculate the sum of 15 and 27.") + + assert response.execution_count == 2 + node_history = [node.node_id for node in response.node_history] + assert node_history == ["math_agent", "analysis_agent"] + assert response.results["math_agent"].result.message["content"][0]["text"] == "The sum of 15 and 27 is 42." + assert ( + response.results["analysis_agent"].result.message["content"][0]["text"] + == "The calculation is correct, and 42 is a positive integer result." + ) From dcadeb103f2db611ab58df24e5caba121c5296b8 Mon Sep 17 00:00:00 2001 From: Timothy Pansino <11214426+TimPansino@users.noreply.github.com> Date: Thu, 23 Oct 2025 14:32:16 -0700 Subject: [PATCH 20/34] Strands Mock Model (#1551) * Add strands to tox.ini * Add mock models for strands testing * Add simple test file to validate strands mocking --- tests/mlmodel_strands/_mock_model_provider.py | 99 ++++++++++++ tests/mlmodel_strands/conftest.py | 144 ++++++++++++++++++ tests/mlmodel_strands/test_simple.py | 36 +++++ tox.ini | 12 +- 4 files changed, 287 insertions(+), 4 deletions(-) create mode 100644 tests/mlmodel_strands/_mock_model_provider.py create mode 100644 tests/mlmodel_strands/conftest.py create mode 100644 tests/mlmodel_strands/test_simple.py diff --git a/tests/mlmodel_strands/_mock_model_provider.py b/tests/mlmodel_strands/_mock_model_provider.py new file mode 100644 index 0000000000..e4c9e79930 --- /dev/null +++ b/tests/mlmodel_strands/_mock_model_provider.py @@ -0,0 +1,99 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Test setup derived from: https://github.com/strands-agents/sdk-python/blob/main/tests/fixtures/mocked_model_provider.py +# strands Apache 2.0 license: https://github.com/strands-agents/sdk-python/blob/main/LICENSE + +import json +from typing import TypedDict + +from strands.models import Model + + +class RedactionMessage(TypedDict): + redactedUserContent: str + redactedAssistantContent: str + + +class MockedModelProvider(Model): + """A mock implementation of the Model interface for testing purposes. + + This class simulates a model provider by returning pre-defined agent responses + in sequence. It implements the Model interface methods and provides functionality + to stream mock responses as events. + """ + + def __init__(self, agent_responses): + self.agent_responses = agent_responses + self.index = 0 + + def format_chunk(self, event): + return event + + def format_request(self, messages, tool_specs=None, system_prompt=None): + return None + + def get_config(self): + pass + + def update_config(self, **model_config): + pass + + async def structured_output(self, output_model, prompt, system_prompt=None, **kwargs): + pass + + async def stream(self, messages, tool_specs=None, system_prompt=None): + events = self.map_agent_message_to_events(self.agent_responses[self.index]) + for event in events: + yield event + + self.index += 1 + + def map_agent_message_to_events(self, agent_message): + stop_reason = "end_turn" + yield {"messageStart": {"role": "assistant"}} + if agent_message.get("redactedAssistantContent"): + yield {"redactContent": {"redactUserContentMessage": agent_message["redactedUserContent"]}} + yield {"contentBlockStart": {"start": {}}} + yield {"contentBlockDelta": {"delta": {"text": agent_message["redactedAssistantContent"]}}} + yield {"contentBlockStop": {}} + stop_reason = "guardrail_intervened" + else: + for content in agent_message["content"]: + if "reasoningContent" in content: + yield {"contentBlockStart": {"start": {}}} + yield {"contentBlockDelta": {"delta": {"reasoningContent": content["reasoningContent"]}}} + yield {"contentBlockStop": {}} + if "text" in content: + yield {"contentBlockStart": {"start": {}}} + yield {"contentBlockDelta": {"delta": {"text": content["text"]}}} + yield {"contentBlockStop": {}} + if "toolUse" in content: + stop_reason = "tool_use" + yield { + "contentBlockStart": { + "start": { + "toolUse": { + "name": content["toolUse"]["name"], + "toolUseId": content["toolUse"]["toolUseId"], + } + } + } + } + yield { + "contentBlockDelta": {"delta": {"toolUse": {"input": json.dumps(content["toolUse"]["input"])}}} + } + yield {"contentBlockStop": {}} + + yield {"messageStop": {"stopReason": stop_reason}} diff --git a/tests/mlmodel_strands/conftest.py b/tests/mlmodel_strands/conftest.py new file mode 100644 index 0000000000..b810161f6a --- /dev/null +++ b/tests/mlmodel_strands/conftest.py @@ -0,0 +1,144 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from _mock_model_provider import MockedModelProvider +from testing_support.fixtures import collector_agent_registration_fixture, collector_available_fixture +from testing_support.ml_testing_utils import set_trace_info + +_default_settings = { + "package_reporting.enabled": False, # Turn off package reporting for testing as it causes slowdowns. + "transaction_tracer.explain_threshold": 0.0, + "transaction_tracer.transaction_threshold": 0.0, + "transaction_tracer.stack_trace_threshold": 0.0, + "debug.log_data_collector_payloads": True, + "debug.record_transaction_failure": True, + "ai_monitoring.enabled": True, +} + +collector_agent_registration = collector_agent_registration_fixture( + app_name="Python Agent Test (mlmodel_strands)", default_settings=_default_settings +) + + +@pytest.fixture +def single_tool_model(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Hello"}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model + + +@pytest.fixture +def single_tool_model_error(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + # Set arguments to an invalid type to trigger error in tool + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": 12}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model + + +@pytest.fixture +def multi_tool_model(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Hello"}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling compute_sum tool"}, + {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 5, "b": 3}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Goodbye"}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling compute_sum tool"}, + {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 123, "b": 2}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model + + +@pytest.fixture +def multi_tool_model_error(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Hello"}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling compute_sum tool"}, + {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 5, "b": 3}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Goodbye"}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling compute_sum tool"}, + # Set insufficient arguments to trigger error in tool + {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 123}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model diff --git a/tests/mlmodel_strands/test_simple.py b/tests/mlmodel_strands/test_simple.py new file mode 100644 index 0000000000..ae24003fab --- /dev/null +++ b/tests/mlmodel_strands/test_simple.py @@ -0,0 +1,36 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from strands import Agent, tool + +from newrelic.api.background_task import background_task + + +# Example tool for testing purposes +@tool +def add_exclamation(message: str) -> str: + return f"{message}!" + + +# TODO: Remove this file once all real tests are in place + + +@background_task() +def test_simple_run_agent(set_trace_info, single_tool_model): + set_trace_info() + my_agent = Agent(name="my_agent", model=single_tool_model, tools=[add_exclamation]) + + response = my_agent("Run the tools.") + assert response.message["content"][0]["text"] == "Success!" + assert response.metrics.tool_metrics["add_exclamation"].success_count == 1 diff --git a/tox.ini b/tox.ini index 98cea6ee29..24bdb095e6 100644 --- a/tox.ini +++ b/tox.ini @@ -183,6 +183,7 @@ envlist = python-logger_structlog-{py38,py39,py310,py311,py312,py313,py314,pypy311}-structloglatest, python-mlmodel_autogen-{py310,py311,py312,py313,py314,pypy311}-autogen061, python-mlmodel_autogen-{py310,py311,py312,py313,py314,pypy311}-autogenlatest, + python-mlmodel_strands-{py310,py311,py312,py313}-strandslatest, python-mlmodel_gemini-{py39,py310,py311,py312,py313,py314}, python-mlmodel_langchain-{py39,py310,py311,py312,py313}, ;; Package not ready for Python 3.14 (type annotations not updated) @@ -443,6 +444,8 @@ deps = mlmodel_langchain: faiss-cpu mlmodel_langchain: mock mlmodel_langchain: asyncio + mlmodel_strands: strands-agents[openai] + mlmodel_strands: strands-agents-tools logger_loguru-logurulatest: loguru logger_structlog-structloglatest: structlog messagebroker_pika-pikalatest: pika @@ -513,6 +516,7 @@ changedir = application_celery: tests/application_celery component_djangorestframework: tests/component_djangorestframework component_flask_rest: tests/component_flask_rest + component_graphenedjango: tests/component_graphenedjango component_graphqlserver: tests/component_graphqlserver component_tastypie: tests/component_tastypie coroutines_asyncio: tests/coroutines_asyncio @@ -524,17 +528,17 @@ changedir = datastore_cassandradriver: tests/datastore_cassandradriver datastore_elasticsearch: tests/datastore_elasticsearch datastore_firestore: tests/datastore_firestore - datastore_oracledb: tests/datastore_oracledb datastore_memcache: tests/datastore_memcache + datastore_motor: tests/datastore_motor datastore_mysql: tests/datastore_mysql datastore_mysqldb: tests/datastore_mysqldb + datastore_oracledb: tests/datastore_oracledb datastore_postgresql: tests/datastore_postgresql datastore_psycopg: tests/datastore_psycopg datastore_psycopg2: tests/datastore_psycopg2 datastore_psycopg2cffi: tests/datastore_psycopg2cffi datastore_pylibmc: tests/datastore_pylibmc datastore_pymemcache: tests/datastore_pymemcache - datastore_motor: tests/datastore_motor datastore_pymongo: tests/datastore_pymongo datastore_pymssql: tests/datastore_pymssql datastore_pymysql: tests/datastore_pymysql @@ -542,8 +546,8 @@ changedir = datastore_pysolr: tests/datastore_pysolr datastore_redis: tests/datastore_redis datastore_rediscluster: tests/datastore_rediscluster - datastore_valkey: tests/datastore_valkey datastore_sqlite: tests/datastore_sqlite + datastore_valkey: tests/datastore_valkey external_aiobotocore: tests/external_aiobotocore external_botocore: tests/external_botocore external_feedparser: tests/external_feedparser @@ -564,7 +568,6 @@ changedir = framework_fastapi: tests/framework_fastapi framework_flask: tests/framework_flask framework_graphene: tests/framework_graphene - component_graphenedjango: tests/component_graphenedjango framework_graphql: tests/framework_graphql framework_grpc: tests/framework_grpc framework_pyramid: tests/framework_pyramid @@ -584,6 +587,7 @@ changedir = mlmodel_langchain: tests/mlmodel_langchain mlmodel_openai: tests/mlmodel_openai mlmodel_sklearn: tests/mlmodel_sklearn + mlmodel_strands: tests/mlmodel_strands template_genshi: tests/template_genshi template_jinja2: tests/template_jinja2 template_mako: tests/template_mako From e1a82c6193188e42c0e6a5fafcd6013d55c0de66 Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Thu, 13 Nov 2025 14:54:05 -0800 Subject: [PATCH 21/34] Add Strands tools and agents instrumentation. (#1563) * Add baseline instrumentation. * Add tool and agent instrumentation. * Add tests file. * Cleanup instrumentation. * Cleanup. Co-authored-by: Tim Pansino * [MegaLinter] Apply linters fixes * Strands Mock Model (#1551) * Add strands to tox.ini * Add mock models for strands testing * Add simple test file to validate strands mocking * Add baseline instrumentation. * Add tool and agent instrumentation. * Add tests file. * Cleanup instrumentation. * Cleanup. Co-authored-by: Tim Pansino * Handle additional args in mock model. * Add test to force exception and exercise _handle_tool_streaming_completion_error. * Strands Mock Model (#1551) * Add strands to tox.ini * Add mock models for strands testing * Add simple test file to validate strands mocking * Add baseline instrumentation. * Add tool and agent instrumentation. * Add tests file. * Cleanup instrumentation. * Cleanup. Co-authored-by: Tim Pansino * Handle additional args in mock model. * Strands Mock Model (#1551) * Add strands to tox.ini * Add mock models for strands testing * Add simple test file to validate strands mocking * Add baseline instrumentation. * Add tool and agent instrumentation. * Cleanup. Co-authored-by: Tim Pansino * [MegaLinter] Apply linters fixes * Add test to force exception and exercise _handle_tool_streaming_completion_error. * Implement strands context passing instrumentation. * Address review feedback. * [MegaLinter] Apply linters fixes * Remove test_simple.py file. --------- Co-authored-by: Tim Pansino Co-authored-by: Timothy Pansino <11214426+TimPansino@users.noreply.github.com> Co-authored-by: Tim Pansino --- newrelic/api/error_trace.py | 29 +- newrelic/common/llm_utils.py | 24 + newrelic/config.py | 7 + newrelic/hooks/mlmodel_strands.py | 492 ++++++++++++++++++ tests/mlmodel_strands/_mock_model_provider.py | 4 +- tests/mlmodel_strands/conftest.py | 25 +- tests/mlmodel_strands/test_agent.py | 427 +++++++++++++++ tests/mlmodel_strands/test_simple.py | 36 -- tests/testing_support/fixtures.py | 2 +- .../validate_error_event_collector_json.py | 2 +- .../validate_transaction_error_event_count.py | 4 +- 11 files changed, 1001 insertions(+), 51 deletions(-) create mode 100644 newrelic/common/llm_utils.py create mode 100644 newrelic/hooks/mlmodel_strands.py create mode 100644 tests/mlmodel_strands/test_agent.py delete mode 100644 tests/mlmodel_strands/test_simple.py diff --git a/newrelic/api/error_trace.py b/newrelic/api/error_trace.py index db63c54316..aaa12b50e3 100644 --- a/newrelic/api/error_trace.py +++ b/newrelic/api/error_trace.py @@ -15,6 +15,7 @@ import functools from newrelic.api.time_trace import current_trace, notice_error +from newrelic.common.async_wrapper import async_wrapper as get_async_wrapper from newrelic.common.object_wrapper import FunctionWrapper, wrap_object @@ -43,17 +44,31 @@ def __exit__(self, exc, value, tb): ) -def ErrorTraceWrapper(wrapped, ignore=None, expected=None, status_code=None): - def wrapper(wrapped, instance, args, kwargs): - parent = current_trace() +def ErrorTraceWrapper(wrapped, ignore=None, expected=None, status_code=None, async_wrapper=None): + def literal_wrapper(wrapped, instance, args, kwargs): + # Determine if the wrapped function is async or sync + wrapper = async_wrapper if async_wrapper is not None else get_async_wrapper(wrapped) + # Sync function path + if not wrapper: + parent = current_trace() + if not parent: + # No active tracing context so just call the wrapped function directly + return wrapped(*args, **kwargs) + # Async function path + else: + # For async functions, the async wrapper will handle trace context propagation + parent = None - if parent is None: - return wrapped(*args, **kwargs) + trace = ErrorTrace(ignore, expected, status_code, parent=parent) + + if wrapper: + # The async wrapper handles the context management for us + return wrapper(wrapped, trace)(*args, **kwargs) - with ErrorTrace(ignore, expected, status_code, parent=parent): + with trace: return wrapped(*args, **kwargs) - return FunctionWrapper(wrapped, wrapper) + return FunctionWrapper(wrapped, literal_wrapper) def error_trace(ignore=None, expected=None, status_code=None): diff --git a/newrelic/common/llm_utils.py b/newrelic/common/llm_utils.py new file mode 100644 index 0000000000..eebdacfc7f --- /dev/null +++ b/newrelic/common/llm_utils.py @@ -0,0 +1,24 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def _get_llm_metadata(transaction): + # Grab LLM-related custom attributes off of the transaction to store as metadata on LLM events + custom_attrs_dict = transaction._custom_params + llm_metadata_dict = {key: value for key, value in custom_attrs_dict.items() if key.startswith("llm.")} + llm_context_attrs = getattr(transaction, "_llm_context_attrs", None) + if llm_context_attrs: + llm_metadata_dict.update(llm_context_attrs) + + return llm_metadata_dict diff --git a/newrelic/config.py b/newrelic/config.py index c2b7b5c2d6..94955293d5 100644 --- a/newrelic/config.py +++ b/newrelic/config.py @@ -2948,6 +2948,13 @@ def _process_module_builtin_defaults(): "newrelic.hooks.mlmodel_autogen", "instrument_autogen_agentchat_agents__assistant_agent", ) + _process_module_definition("strands.agent.agent", "newrelic.hooks.mlmodel_strands", "instrument_agent_agent") + _process_module_definition( + "strands.tools.executors._executor", "newrelic.hooks.mlmodel_strands", "instrument_tools_executors__executor" + ) + _process_module_definition("strands.tools.registry", "newrelic.hooks.mlmodel_strands", "instrument_tools_registry") + _process_module_definition("strands.models.bedrock", "newrelic.hooks.mlmodel_strands", "instrument_models_bedrock") + _process_module_definition("mcp.client.session", "newrelic.hooks.adapter_mcp", "instrument_mcp_client_session") _process_module_definition( "mcp.server.fastmcp.tools.tool_manager", diff --git a/newrelic/hooks/mlmodel_strands.py b/newrelic/hooks/mlmodel_strands.py new file mode 100644 index 0000000000..bf849fd717 --- /dev/null +++ b/newrelic/hooks/mlmodel_strands.py @@ -0,0 +1,492 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import sys +import uuid + +from newrelic.api.error_trace import ErrorTraceWrapper +from newrelic.api.function_trace import FunctionTrace +from newrelic.api.time_trace import current_trace, get_trace_linking_metadata +from newrelic.api.transaction import current_transaction +from newrelic.common.llm_utils import _get_llm_metadata +from newrelic.common.object_names import callable_name +from newrelic.common.object_wrapper import ObjectProxy, wrap_function_wrapper +from newrelic.common.package_version_utils import get_package_version +from newrelic.common.signature import bind_args +from newrelic.core.config import global_settings +from newrelic.core.context import ContextOf + +_logger = logging.getLogger(__name__) +STRANDS_VERSION = get_package_version("strands-agents") + +RECORD_EVENTS_FAILURE_LOG_MESSAGE = "Exception occurred in Strands instrumentation: Failed to record LLM events. Please report this issue to New Relic Support." +TOOL_OUTPUT_FAILURE_LOG_MESSAGE = "Exception occurred in Strands instrumentation: Failed to record output of tool call. Please report this issue to New Relic Support." +AGENT_EVENT_FAILURE_LOG_MESSAGE = "Exception occurred in Strands instrumentation: Failed to record agent data. Please report this issue to New Relic Support." +TOOL_EXTRACTOR_FAILURE_LOG_MESSAGE = "Exception occurred in Strands instrumentation: Failed to extract tool information. If the issue persists, report this issue to New Relic support.\n" + + +def wrap_agent__call__(wrapped, instance, args, kwargs): + trace = current_trace() + if not trace: + return wrapped(*args, **kwargs) + + try: + bound_args = bind_args(wrapped, args, kwargs) + # Make a copy of the invocation state before we mutate it + if "invocation_state" in bound_args: + invocation_state = bound_args["invocation_state"] = dict(bound_args["invocation_state"] or {}) + + # Attempt to save the current transaction context into the invocation state dictionary + invocation_state["_nr_transaction"] = trace + except Exception: + return wrapped(*args, **kwargs) + else: + return wrapped(**bound_args) + + +async def wrap_agent_invoke_async(wrapped, instance, args, kwargs): + # If there's already a transaction, don't propagate anything here + if current_transaction(): + return await wrapped(*args, **kwargs) + + try: + # Grab the trace context we should be running under and pass it to ContextOf + bound_args = bind_args(wrapped, args, kwargs) + invocation_state = bound_args["invocation_state"] or {} + trace = invocation_state.pop("_nr_transaction", None) + except Exception: + return await wrapped(*args, **kwargs) + + # If we find a transaction to propagate, use it. Otherwise, just call wrapped. + if trace: + with ContextOf(trace=trace): + return await wrapped(*args, **kwargs) + else: + return await wrapped(*args, **kwargs) + + +def wrap_stream_async(wrapped, instance, args, kwargs): + transaction = current_transaction() + if not transaction: + return wrapped(*args, **kwargs) + + settings = transaction.settings or global_settings() + if not settings.ai_monitoring.enabled: + return wrapped(*args, **kwargs) + + # Framework metric also used for entity tagging in the UI + transaction.add_ml_model_info("Strands", STRANDS_VERSION) + transaction._add_agent_attribute("llm", True) + + func_name = callable_name(wrapped) + agent_name = getattr(instance, "name", "agent") + function_trace_name = f"{func_name}/{agent_name}" + + ft = FunctionTrace(name=function_trace_name, group="Llm/agent/Strands") + ft.__enter__() + linking_metadata = get_trace_linking_metadata() + agent_id = str(uuid.uuid4()) + + try: + return_val = wrapped(*args, **kwargs) + except Exception: + raise + + # For streaming responses, wrap with proxy and attach metadata + try: + # For streaming responses, wrap with proxy and attach metadata + proxied_return_val = AsyncGeneratorProxy( + return_val, _record_agent_event_on_stop_iteration, _handle_agent_streaming_completion_error + ) + proxied_return_val._nr_ft = ft + proxied_return_val._nr_metadata = linking_metadata + proxied_return_val._nr_strands_attrs = {"agent_name": agent_name, "agent_id": agent_id} + return proxied_return_val + except Exception: + # If proxy creation fails, clean up the function trace and return original value + ft.__exit__(*sys.exc_info()) + return return_val + + +def _record_agent_event_on_stop_iteration(self, transaction): + if hasattr(self, "_nr_ft"): + # Use saved linking metadata to maintain correct span association + linking_metadata = self._nr_metadata or get_trace_linking_metadata() + self._nr_ft.__exit__(None, None, None) + + try: + strands_attrs = getattr(self, "_nr_strands_attrs", {}) + + # If there are no strands attrs exit early as there's no data to record. + if not strands_attrs: + return + + agent_name = strands_attrs.get("agent_name", "agent") + agent_id = strands_attrs.get("agent_id") + agent_event_dict = _construct_base_agent_event_dict(agent_name, agent_id, transaction, linking_metadata) + agent_event_dict["duration"] = self._nr_ft.duration * 1000 + transaction.record_custom_event("LlmAgent", agent_event_dict) + + except Exception: + _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, exc_info=True) + finally: + # Clear cached data to prevent memory leaks and duplicate reporting + if hasattr(self, "_nr_strands_attrs"): + self._nr_strands_attrs.clear() + + +def _record_tool_event_on_stop_iteration(self, transaction): + if hasattr(self, "_nr_ft"): + # Use saved linking metadata to maintain correct span association + linking_metadata = self._nr_metadata or get_trace_linking_metadata() + self._nr_ft.__exit__(None, None, None) + + try: + strands_attrs = getattr(self, "_nr_strands_attrs", {}) + + # If there are no strands attrs exit early as there's no data to record. + if not strands_attrs: + return + + try: + tool_results = strands_attrs.get("tool_results", []) + except Exception: + tool_results = None + _logger.warning(TOOL_OUTPUT_FAILURE_LOG_MESSAGE, exc_info=True) + + tool_event_dict = _construct_base_tool_event_dict( + strands_attrs, tool_results, transaction, linking_metadata + ) + tool_event_dict["duration"] = self._nr_ft.duration * 1000 + transaction.record_custom_event("LlmTool", tool_event_dict) + + except Exception: + _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, exc_info=True) + finally: + # Clear cached data to prevent memory leaks and duplicate reporting + if hasattr(self, "_nr_strands_attrs"): + self._nr_strands_attrs.clear() + + +def _construct_base_tool_event_dict(strands_attrs, tool_results, transaction, linking_metadata): + try: + try: + tool_output = tool_results[-1]["content"][0] if tool_results else None + error = tool_results[-1]["status"] == "error" + except Exception: + tool_output = None + error = False + _logger.warning(TOOL_OUTPUT_FAILURE_LOG_MESSAGE, exc_info=True) + + tool_name = strands_attrs.get("tool_name", "tool") + tool_id = strands_attrs.get("tool_id") + run_id = strands_attrs.get("run_id") + tool_input = strands_attrs.get("tool_input") + agent_name = strands_attrs.get("agent_name", "agent") + settings = transaction.settings or global_settings() + + tool_event_dict = { + "id": tool_id, + "run_id": run_id, + "name": tool_name, + "span_id": linking_metadata.get("span.id"), + "trace_id": linking_metadata.get("trace.id"), + "agent_name": agent_name, + "vendor": "strands", + "ingest_source": "Python", + } + # Set error flag if the status shows an error was caught, + # it will be reported further down in the instrumentation. + if error: + tool_event_dict["error"] = True + + if settings.ai_monitoring.record_content.enabled: + tool_event_dict["input"] = tool_input + # In error cases, the output will hold the error message + tool_event_dict["output"] = tool_output + tool_event_dict.update(_get_llm_metadata(transaction)) + except Exception: + tool_event_dict = {} + _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, exc_info=True) + + return tool_event_dict + + +def _construct_base_agent_event_dict(agent_name, agent_id, transaction, linking_metadata): + try: + agent_event_dict = { + "id": agent_id, + "name": agent_name, + "span_id": linking_metadata.get("span.id"), + "trace_id": linking_metadata.get("trace.id"), + "vendor": "strands", + "ingest_source": "Python", + } + agent_event_dict.update(_get_llm_metadata(transaction)) + except Exception: + _logger.warning(AGENT_EVENT_FAILURE_LOG_MESSAGE, exc_info=True) + agent_event_dict = {} + + return agent_event_dict + + +def _handle_agent_streaming_completion_error(self, transaction): + if hasattr(self, "_nr_ft"): + strands_attrs = getattr(self, "_nr_strands_attrs", {}) + + # If there are no strands attrs exit early as there's no data to record. + if not strands_attrs: + self._nr_ft.__exit__(*sys.exc_info()) + return + + # Use saved linking metadata to maintain correct span association + linking_metadata = self._nr_metadata or get_trace_linking_metadata() + + try: + agent_name = strands_attrs.get("agent_name", "agent") + agent_id = strands_attrs.get("agent_id") + + # Notice the error on the function trace + self._nr_ft.notice_error(attributes={"agent_id": agent_id}) + self._nr_ft.__exit__(*sys.exc_info()) + + # Create error event + agent_event_dict = _construct_base_agent_event_dict(agent_name, agent_id, transaction, linking_metadata) + agent_event_dict.update({"duration": self._nr_ft.duration * 1000, "error": True}) + transaction.record_custom_event("LlmAgent", agent_event_dict) + + except Exception: + _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, exc_info=True) + finally: + # Clear cached data to prevent memory leaks + if hasattr(self, "_nr_strands_attrs"): + self._nr_strands_attrs.clear() + + +def _handle_tool_streaming_completion_error(self, transaction): + if hasattr(self, "_nr_ft"): + strands_attrs = getattr(self, "_nr_strands_attrs", {}) + + # If there are no strands attrs exit early as there's no data to record. + if not strands_attrs: + self._nr_ft.__exit__(*sys.exc_info()) + return + + # Use saved linking metadata to maintain correct span association + linking_metadata = self._nr_metadata or get_trace_linking_metadata() + + try: + tool_id = strands_attrs.get("tool_id") + + # We expect this to never have any output since this is an error case, + # but if it does we will report it. + try: + tool_results = strands_attrs.get("tool_results", []) + except Exception: + tool_results = None + _logger.warning(TOOL_OUTPUT_FAILURE_LOG_MESSAGE, exc_info=True) + + # Notice the error on the function trace + self._nr_ft.notice_error(attributes={"tool_id": tool_id}) + self._nr_ft.__exit__(*sys.exc_info()) + + # Create error event + tool_event_dict = _construct_base_tool_event_dict( + strands_attrs, tool_results, transaction, linking_metadata + ) + tool_event_dict["duration"] = self._nr_ft.duration * 1000 + # Ensure error flag is set to True in case the tool_results did not indicate an error + if "error" not in tool_event_dict: + tool_event_dict["error"] = True + + transaction.record_custom_event("LlmTool", tool_event_dict) + + except Exception: + _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, exc_info=True) + finally: + # Clear cached data to prevent memory leaks + if hasattr(self, "_nr_strands_attrs"): + self._nr_strands_attrs.clear() + + +def wrap_tool_executor__stream(wrapped, instance, args, kwargs): + transaction = current_transaction() + if not transaction: + return wrapped(*args, **kwargs) + + settings = transaction.settings or global_settings() + if not settings.ai_monitoring.enabled: + return wrapped(*args, **kwargs) + + # Framework metric also used for entity tagging in the UI + transaction.add_ml_model_info("Strands", STRANDS_VERSION) + transaction._add_agent_attribute("llm", True) + + # Grab tool data + try: + bound_args = bind_args(wrapped, args, kwargs) + agent_name = getattr(bound_args.get("agent"), "name", "agent") + tool_use = bound_args.get("tool_use", {}) + + run_id = tool_use.get("toolUseId", "") + tool_name = tool_use.get("name", "tool") + _input = tool_use.get("input") + tool_input = str(_input) if _input else None + tool_results = bound_args.get("tool_results", []) + except Exception: + tool_name = "tool" + _logger.warning(TOOL_EXTRACTOR_FAILURE_LOG_MESSAGE, exc_info=True) + + func_name = callable_name(wrapped) + function_trace_name = f"{func_name}/{tool_name}" + + ft = FunctionTrace(name=function_trace_name, group="Llm/tool/Strands") + ft.__enter__() + linking_metadata = get_trace_linking_metadata() + tool_id = str(uuid.uuid4()) + + try: + return_val = wrapped(*args, **kwargs) + except Exception: + raise + + try: + # Wrap return value with proxy and attach metadata for later access + proxied_return_val = AsyncGeneratorProxy( + return_val, _record_tool_event_on_stop_iteration, _handle_tool_streaming_completion_error + ) + proxied_return_val._nr_ft = ft + proxied_return_val._nr_metadata = linking_metadata + proxied_return_val._nr_strands_attrs = { + "tool_results": tool_results, + "tool_name": tool_name, + "tool_id": tool_id, + "run_id": run_id, + "tool_input": tool_input, + "agent_name": agent_name, + } + return proxied_return_val + except Exception: + # If proxy creation fails, clean up the function trace and return original value + ft.__exit__(*sys.exc_info()) + return return_val + + +class AsyncGeneratorProxy(ObjectProxy): + def __init__(self, wrapped, on_stop_iteration, on_error): + super().__init__(wrapped) + self._nr_on_stop_iteration = on_stop_iteration + self._nr_on_error = on_error + + def __aiter__(self): + self._nr_wrapped_iter = self.__wrapped__.__aiter__() + return self + + async def __anext__(self): + transaction = current_transaction() + if not transaction: + return await self._nr_wrapped_iter.__anext__() + + return_val = None + try: + return_val = await self._nr_wrapped_iter.__anext__() + except StopAsyncIteration: + self._nr_on_stop_iteration(self, transaction) + raise + except Exception: + self._nr_on_error(self, transaction) + raise + return return_val + + async def aclose(self): + return await super().aclose() + + +def wrap_ToolRegister_register_tool(wrapped, instance, args, kwargs): + bound_args = bind_args(wrapped, args, kwargs) + bound_args["tool"]._tool_func = ErrorTraceWrapper(bound_args["tool"]._tool_func) + return wrapped(*args, **kwargs) + + +def wrap_bedrock_model_stream(wrapped, instance, args, kwargs): + """Stores trace context on the messages argument to be retrieved by the _stream() instrumentation.""" + trace = current_trace() + if not trace: + return wrapped(*args, **kwargs) + + settings = trace.settings or global_settings() + if not settings.ai_monitoring.enabled: + return wrapped(*args, **kwargs) + + try: + bound_args = bind_args(wrapped, args, kwargs) + except Exception: + return wrapped(*args, **kwargs) + + if "messages" in bound_args and isinstance(bound_args["messages"], list): + bound_args["messages"].append({"newrelic_trace": trace}) + + return wrapped(*args, **kwargs) + + +def wrap_bedrock_model__stream(wrapped, instance, args, kwargs): + """Retrieves trace context stored on the messages argument and propagates it to the new thread.""" + try: + bound_args = bind_args(wrapped, args, kwargs) + except Exception: + return wrapped(*args, **kwargs) + + if ( + "messages" in bound_args + and isinstance(bound_args["messages"], list) + and bound_args["messages"] # non-empty list + and "newrelic_trace" in bound_args["messages"][-1] + ): + trace_message = bound_args["messages"].pop() + with ContextOf(trace=trace_message["newrelic_trace"]): + return wrapped(*args, **kwargs) + + return wrapped(*args, **kwargs) + + +def instrument_agent_agent(module): + if hasattr(module, "Agent"): + if hasattr(module.Agent, "__call__"): # noqa: B004 + wrap_function_wrapper(module, "Agent.__call__", wrap_agent__call__) + if hasattr(module.Agent, "invoke_async"): + wrap_function_wrapper(module, "Agent.invoke_async", wrap_agent_invoke_async) + if hasattr(module.Agent, "stream_async"): + wrap_function_wrapper(module, "Agent.stream_async", wrap_stream_async) + + +def instrument_tools_executors__executor(module): + if hasattr(module, "ToolExecutor"): + if hasattr(module.ToolExecutor, "_stream"): + wrap_function_wrapper(module, "ToolExecutor._stream", wrap_tool_executor__stream) + + +def instrument_tools_registry(module): + if hasattr(module, "ToolRegistry"): + if hasattr(module.ToolRegistry, "register_tool"): + wrap_function_wrapper(module, "ToolRegistry.register_tool", wrap_ToolRegister_register_tool) + + +def instrument_models_bedrock(module): + # This instrumentation only exists to pass trace context due to bedrock models using a separate thread. + if hasattr(module, "BedrockModel"): + if hasattr(module.BedrockModel, "stream"): + wrap_function_wrapper(module, "BedrockModel.stream", wrap_bedrock_model_stream) + if hasattr(module.BedrockModel, "_stream"): + wrap_function_wrapper(module, "BedrockModel._stream", wrap_bedrock_model__stream) diff --git a/tests/mlmodel_strands/_mock_model_provider.py b/tests/mlmodel_strands/_mock_model_provider.py index e4c9e79930..ef60e13bad 100644 --- a/tests/mlmodel_strands/_mock_model_provider.py +++ b/tests/mlmodel_strands/_mock_model_provider.py @@ -41,7 +41,7 @@ def __init__(self, agent_responses): def format_chunk(self, event): return event - def format_request(self, messages, tool_specs=None, system_prompt=None): + def format_request(self, messages, tool_specs=None, system_prompt=None, **kwargs): return None def get_config(self): @@ -53,7 +53,7 @@ def update_config(self, **model_config): async def structured_output(self, output_model, prompt, system_prompt=None, **kwargs): pass - async def stream(self, messages, tool_specs=None, system_prompt=None): + async def stream(self, messages, tool_specs=None, system_prompt=None, **kwargs): events = self.map_agent_message_to_events(self.agent_responses[self.index]) for event in events: yield event diff --git a/tests/mlmodel_strands/conftest.py b/tests/mlmodel_strands/conftest.py index b810161f6a..a2ad9b8dd0 100644 --- a/tests/mlmodel_strands/conftest.py +++ b/tests/mlmodel_strands/conftest.py @@ -14,6 +14,7 @@ import pytest from _mock_model_provider import MockedModelProvider +from testing_support.fixture.event_loop import event_loop as loop from testing_support.fixtures import collector_agent_registration_fixture, collector_available_fixture from testing_support.ml_testing_utils import set_trace_info @@ -50,15 +51,33 @@ def single_tool_model(): @pytest.fixture -def single_tool_model_error(): +def single_tool_model_runtime_error_coro(): model = MockedModelProvider( [ { "role": "assistant", "content": [ - {"text": "Calling add_exclamation tool"}, + {"text": "Calling throw_exception_coro tool"}, + # Set arguments to an invalid type to trigger error in tool + {"toolUse": {"name": "throw_exception_coro", "toolUseId": "123", "input": {"message": "Hello"}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model + + +@pytest.fixture +def single_tool_model_runtime_error_agen(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling throw_exception_agen tool"}, # Set arguments to an invalid type to trigger error in tool - {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": 12}}}, + {"toolUse": {"name": "throw_exception_agen", "toolUseId": "123", "input": {"message": "Hello"}}}, ], }, {"role": "assistant", "content": [{"text": "Success!"}]}, diff --git a/tests/mlmodel_strands/test_agent.py b/tests/mlmodel_strands/test_agent.py new file mode 100644 index 0000000000..af685668ad --- /dev/null +++ b/tests/mlmodel_strands/test_agent.py @@ -0,0 +1,427 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from strands import Agent, tool +from testing_support.fixtures import reset_core_stats_engine, validate_attributes +from testing_support.ml_testing_utils import ( + disabled_ai_monitoring_record_content_settings, + disabled_ai_monitoring_settings, + events_with_context_attrs, + tool_events_sans_content, +) +from testing_support.validators.validate_custom_event import validate_custom_event_count +from testing_support.validators.validate_custom_events import validate_custom_events +from testing_support.validators.validate_error_trace_attributes import validate_error_trace_attributes +from testing_support.validators.validate_transaction_error_event_count import validate_transaction_error_event_count +from testing_support.validators.validate_transaction_metrics import validate_transaction_metrics + +from newrelic.api.background_task import background_task +from newrelic.api.llm_custom_attributes import WithLlmCustomAttributes +from newrelic.common.object_names import callable_name +from newrelic.common.object_wrapper import transient_function_wrapper + +tool_recorded_event = [ + ( + {"type": "LlmTool"}, + { + "id": None, + "run_id": "123", + "output": "{'text': 'Hello!'}", + "name": "add_exclamation", + "agent_name": "my_agent", + "span_id": None, + "trace_id": "trace-id", + "input": "{'message': 'Hello'}", + "vendor": "strands", + "ingest_source": "Python", + "duration": None, + }, + ) +] + +tool_recorded_event_forced_internal_error = [ + ( + {"type": "LlmTool"}, + { + "id": None, + "run_id": "123", + "name": "add_exclamation", + "agent_name": "my_agent", + "span_id": None, + "trace_id": "trace-id", + "input": "{'message': 'Hello'}", + "vendor": "strands", + "ingest_source": "Python", + "duration": None, + "error": True, + }, + ) +] + +tool_recorded_event_error_coro = [ + ( + {"type": "LlmTool"}, + { + "id": None, + "run_id": "123", + "name": "throw_exception_coro", + "agent_name": "my_agent", + "span_id": None, + "trace_id": "trace-id", + "input": "{'message': 'Hello'}", + "vendor": "strands", + "ingest_source": "Python", + "error": True, + "output": "{'text': 'Error: RuntimeError - Oops'}", + "duration": None, + }, + ) +] + + +tool_recorded_event_error_agen = [ + ( + {"type": "LlmTool"}, + { + "id": None, + "run_id": "123", + "name": "throw_exception_agen", + "agent_name": "my_agent", + "span_id": None, + "trace_id": "trace-id", + "input": "{'message': 'Hello'}", + "vendor": "strands", + "ingest_source": "Python", + "error": True, + "output": "{'text': 'Error: RuntimeError - Oops'}", + "duration": None, + }, + ) +] + + +agent_recorded_event = [ + ( + {"type": "LlmAgent"}, + { + "id": None, + "name": "my_agent", + "span_id": None, + "trace_id": "trace-id", + "vendor": "strands", + "ingest_source": "Python", + "duration": None, + }, + ) +] + +agent_recorded_event_error = [ + ( + {"type": "LlmAgent"}, + { + "id": None, + "name": "my_agent", + "span_id": None, + "trace_id": "trace-id", + "vendor": "strands", + "ingest_source": "Python", + "error": True, + "duration": None, + }, + ) +] + + +# Example tool for testing purposes +@tool +async def add_exclamation(message: str) -> str: + return f"{message}!" + + +@tool +async def throw_exception_coro(message: str) -> str: + raise RuntimeError("Oops") + + +@tool +async def throw_exception_agen(message: str) -> str: + raise RuntimeError("Oops") + yield + + +@reset_core_stats_engine() +@validate_custom_events(events_with_context_attrs(tool_recorded_event)) +@validate_custom_events(events_with_context_attrs(agent_recorded_event)) +@validate_custom_event_count(count=2) +@validate_transaction_metrics( + "test_agent:test_agent_invoke", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_agent_invoke(set_trace_info, single_tool_model): + set_trace_info() + my_agent = Agent(name="my_agent", model=single_tool_model, tools=[add_exclamation]) + + with WithLlmCustomAttributes({"context": "attr"}): + response = my_agent('Add an exclamation to the word "Hello"') + assert response.message["content"][0]["text"] == "Success!" + assert response.metrics.tool_metrics["add_exclamation"].success_count == 1 + + +@reset_core_stats_engine() +@validate_custom_events(tool_recorded_event) +@validate_custom_events(agent_recorded_event) +@validate_custom_event_count(count=2) +@validate_transaction_metrics( + "test_agent:test_agent_invoke_async", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_agent_invoke_async(loop, set_trace_info, single_tool_model): + set_trace_info() + my_agent = Agent(name="my_agent", model=single_tool_model, tools=[add_exclamation]) + + async def _test(): + response = await my_agent.invoke_async('Add an exclamation to the word "Hello"') + assert response.message["content"][0]["text"] == "Success!" + assert response.metrics.tool_metrics["add_exclamation"].success_count == 1 + + loop.run_until_complete(_test()) + + +@reset_core_stats_engine() +@validate_custom_events(tool_recorded_event) +@validate_custom_events(agent_recorded_event) +@validate_custom_event_count(count=2) +@validate_transaction_metrics( + "test_agent:test_agent_stream_async", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_agent_stream_async(loop, set_trace_info, single_tool_model): + set_trace_info() + my_agent = Agent(name="my_agent", model=single_tool_model, tools=[add_exclamation]) + + async def _test(): + response = my_agent.stream_async('Add an exclamation to the word "Hello"') + messages = [event["message"]["content"] async for event in response if "message" in event] + + assert len(messages) == 3 + assert messages[0][0]["text"] == "Calling add_exclamation tool" + assert messages[0][1]["toolUse"]["name"] == "add_exclamation" + assert messages[1][0]["toolResult"]["content"][0]["text"] == "Hello!" + assert messages[2][0]["text"] == "Success!" + + loop.run_until_complete(_test()) + + +@reset_core_stats_engine() +@disabled_ai_monitoring_record_content_settings +@validate_custom_events(agent_recorded_event) +@validate_custom_events(tool_events_sans_content(tool_recorded_event)) +@validate_custom_event_count(count=2) +@validate_transaction_metrics( + "test_agent:test_agent_invoke_no_content", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_agent_invoke_no_content(set_trace_info, single_tool_model): + set_trace_info() + my_agent = Agent(name="my_agent", model=single_tool_model, tools=[add_exclamation]) + + response = my_agent('Add an exclamation to the word "Hello"') + assert response.message["content"][0]["text"] == "Success!" + assert response.metrics.tool_metrics["add_exclamation"].success_count == 1 + + +@disabled_ai_monitoring_settings +@reset_core_stats_engine() +@validate_custom_event_count(count=0) +@background_task() +def test_agent_invoke_disabled_ai_monitoring_events(set_trace_info, single_tool_model): + set_trace_info() + my_agent = Agent(name="my_agent", model=single_tool_model, tools=[add_exclamation]) + + response = my_agent('Add an exclamation to the word "Hello"') + assert response.message["content"][0]["text"] == "Success!" + assert response.metrics.tool_metrics["add_exclamation"].success_count == 1 + + +@reset_core_stats_engine() +@validate_transaction_error_event_count(1) +@validate_error_trace_attributes(callable_name(ValueError), exact_attrs={"agent": {}, "intrinsic": {}, "user": {}}) +@validate_custom_events(agent_recorded_event_error) +@validate_custom_event_count(count=1) +@validate_transaction_metrics( + "test_agent:test_agent_invoke_error", + scoped_metrics=[("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1)], + rollup_metrics=[("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1)], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_agent_invoke_error(set_trace_info, single_tool_model): + # Add a wrapper to intentionally force an error in the Agent code + @transient_function_wrapper("strands.agent.agent", "Agent._convert_prompt_to_messages") + def _wrap_convert_prompt_to_messages(wrapped, instance, args, kwargs): + raise ValueError("Oops") + + @_wrap_convert_prompt_to_messages + def _test(): + set_trace_info() + my_agent = Agent(name="my_agent", model=single_tool_model, tools=[add_exclamation]) + my_agent('Add an exclamation to the word "Hello"') # raises ValueError + + with pytest.raises(ValueError): + _test() + + +@reset_core_stats_engine() +@validate_transaction_error_event_count(1) +@validate_error_trace_attributes(callable_name(RuntimeError), exact_attrs={"agent": {}, "intrinsic": {}, "user": {}}) +@validate_custom_events(tool_recorded_event_error_coro) +@validate_custom_event_count(count=2) +@validate_transaction_metrics( + "test_agent:test_agent_invoke_tool_coro_runtime_error", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/throw_exception_coro", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/throw_exception_coro", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_agent_invoke_tool_coro_runtime_error(set_trace_info, single_tool_model_runtime_error_coro): + set_trace_info() + my_agent = Agent(name="my_agent", model=single_tool_model_runtime_error_coro, tools=[throw_exception_coro]) + + response = my_agent('Add an exclamation to the word "Hello"') + assert response.message["content"][0]["text"] == "Success!" + assert response.metrics.tool_metrics["throw_exception_coro"].error_count == 1 + + +@reset_core_stats_engine() +@validate_transaction_error_event_count(1) +@validate_error_trace_attributes(callable_name(RuntimeError), exact_attrs={"agent": {}, "intrinsic": {}, "user": {}}) +@validate_custom_events(tool_recorded_event_error_agen) +@validate_custom_event_count(count=2) +@validate_transaction_metrics( + "test_agent:test_agent_invoke_tool_agen_runtime_error", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/throw_exception_agen", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/throw_exception_agen", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_agent_invoke_tool_agen_runtime_error(set_trace_info, single_tool_model_runtime_error_agen): + set_trace_info() + my_agent = Agent(name="my_agent", model=single_tool_model_runtime_error_agen, tools=[throw_exception_agen]) + + response = my_agent('Add an exclamation to the word "Hello"') + assert response.message["content"][0]["text"] == "Success!" + assert response.metrics.tool_metrics["throw_exception_agen"].error_count == 1 + + +@reset_core_stats_engine() +@validate_transaction_error_event_count(1) +@validate_error_trace_attributes(callable_name(ValueError), exact_attrs={"agent": {}, "intrinsic": {}, "user": {}}) +@validate_custom_events(agent_recorded_event) +@validate_custom_events(tool_recorded_event_forced_internal_error) +@validate_custom_event_count(count=2) +@validate_transaction_metrics( + "test_agent:test_agent_tool_forced_exception", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_agent_tool_forced_exception(set_trace_info, single_tool_model): + # Add a wrapper to intentionally force an error in the ToolExecutor._stream code to hit the exception path in + # the AsyncGeneratorProxy + @transient_function_wrapper("strands.hooks.events", "BeforeToolCallEvent.__init__") + def _wrap_BeforeToolCallEvent_init(wrapped, instance, args, kwargs): + raise ValueError("Oops") + + @_wrap_BeforeToolCallEvent_init + def _test(): + set_trace_info() + my_agent = Agent(name="my_agent", model=single_tool_model, tools=[add_exclamation]) + my_agent('Add an exclamation to the word "Hello"') + + # This will not explicitly raise a ValueError when running the test but we are still able to capture it in the error trace + _test() + + +@reset_core_stats_engine() +@validate_custom_event_count(count=0) +def test_agent_invoke_outside_txn(single_tool_model): + my_agent = Agent(name="my_agent", model=single_tool_model, tools=[add_exclamation]) + + response = my_agent('Add an exclamation to the word "Hello"') + assert response.message["content"][0]["text"] == "Success!" + assert response.metrics.tool_metrics["add_exclamation"].success_count == 1 diff --git a/tests/mlmodel_strands/test_simple.py b/tests/mlmodel_strands/test_simple.py deleted file mode 100644 index ae24003fab..0000000000 --- a/tests/mlmodel_strands/test_simple.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright 2010 New Relic, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from strands import Agent, tool - -from newrelic.api.background_task import background_task - - -# Example tool for testing purposes -@tool -def add_exclamation(message: str) -> str: - return f"{message}!" - - -# TODO: Remove this file once all real tests are in place - - -@background_task() -def test_simple_run_agent(set_trace_info, single_tool_model): - set_trace_info() - my_agent = Agent(name="my_agent", model=single_tool_model, tools=[add_exclamation]) - - response = my_agent("Run the tools.") - assert response.message["content"][0]["text"] == "Success!" - assert response.metrics.tool_metrics["add_exclamation"].success_count == 1 diff --git a/tests/testing_support/fixtures.py b/tests/testing_support/fixtures.py index 3d93e06e30..540e44f70c 100644 --- a/tests/testing_support/fixtures.py +++ b/tests/testing_support/fixtures.py @@ -797,7 +797,7 @@ def _bind_params(transaction, *args, **kwargs): transaction = _bind_params(*args, **kwargs) error_events = transaction.error_events(instance.stats_table) - assert len(error_events) == num_errors + assert len(error_events) == num_errors, f"Expected: {num_errors}, Got: {len(error_events)}" for sample in error_events: assert isinstance(sample, list) assert len(sample) == 3 diff --git a/tests/testing_support/validators/validate_error_event_collector_json.py b/tests/testing_support/validators/validate_error_event_collector_json.py index d1cec3a558..27ea76f3a3 100644 --- a/tests/testing_support/validators/validate_error_event_collector_json.py +++ b/tests/testing_support/validators/validate_error_event_collector_json.py @@ -52,7 +52,7 @@ def _validate_error_event_collector_json(wrapped, instance, args, kwargs): error_events = decoded_json[2] - assert len(error_events) == num_errors + assert len(error_events) == num_errors, f"Expected: {num_errors}, Got: {len(error_events)}" for event in error_events: # event is an array containing intrinsics, user-attributes, # and agent-attributes diff --git a/tests/testing_support/validators/validate_transaction_error_event_count.py b/tests/testing_support/validators/validate_transaction_error_event_count.py index b41a52330f..f5e8c0b206 100644 --- a/tests/testing_support/validators/validate_transaction_error_event_count.py +++ b/tests/testing_support/validators/validate_transaction_error_event_count.py @@ -28,7 +28,9 @@ def _validate_error_event_on_stats_engine(wrapped, instance, args, kwargs): raise else: error_events = list(instance.error_events) - assert len(error_events) == num_errors + assert len(error_events) == num_errors, ( + f"Expected: {num_errors}, Got: {len(error_events)}. Errors: {error_events}" + ) return result From 4d234e4213a04ce53c7cedb5773aa94f1ef34df4 Mon Sep 17 00:00:00 2001 From: Timothy Pansino <11214426+TimPansino@users.noreply.github.com> Date: Fri, 5 Dec 2025 09:14:46 -0800 Subject: [PATCH 22/34] Strands MultiAgent Instrumentation (#1590) * Rename strands instrument functions * Add instrumentation for strands multiagent * Reorganize strands tests * Strands multiagent tests * Remove timestamp from test expected events. --------- Co-authored-by: Uma Annamalai --- newrelic/config.py | 22 +- newrelic/hooks/mlmodel_strands.py | 24 +- tests/mlmodel_strands/__init__.py | 13 + tests/mlmodel_strands/_test_agent.py | 165 +++++++++++ .../mlmodel_strands/_test_multiagent_graph.py | 91 ++++++ .../mlmodel_strands/_test_multiagent_swarm.py | 108 ++++++++ tests/mlmodel_strands/conftest.py | 132 --------- tests/mlmodel_strands/test_agent.py | 46 ++-- .../mlmodel_strands/test_multiagent_graph.py | 233 ++++++++++++++++ .../mlmodel_strands/test_multiagent_swarm.py | 260 ++++++++++++++++++ 10 files changed, 928 insertions(+), 166 deletions(-) create mode 100644 tests/mlmodel_strands/__init__.py create mode 100644 tests/mlmodel_strands/_test_agent.py create mode 100644 tests/mlmodel_strands/_test_multiagent_graph.py create mode 100644 tests/mlmodel_strands/_test_multiagent_swarm.py create mode 100644 tests/mlmodel_strands/test_multiagent_graph.py create mode 100644 tests/mlmodel_strands/test_multiagent_swarm.py diff --git a/newrelic/config.py b/newrelic/config.py index 94955293d5..4b8627772d 100644 --- a/newrelic/config.py +++ b/newrelic/config.py @@ -2948,12 +2948,26 @@ def _process_module_builtin_defaults(): "newrelic.hooks.mlmodel_autogen", "instrument_autogen_agentchat_agents__assistant_agent", ) - _process_module_definition("strands.agent.agent", "newrelic.hooks.mlmodel_strands", "instrument_agent_agent") _process_module_definition( - "strands.tools.executors._executor", "newrelic.hooks.mlmodel_strands", "instrument_tools_executors__executor" + "strands.agent.agent", "newrelic.hooks.mlmodel_strands", "instrument_strands_agent_agent" + ) + _process_module_definition( + "strands.multiagent.graph", "newrelic.hooks.mlmodel_strands", "instrument_strands_multiagent_graph" + ) + _process_module_definition( + "strands.multiagent.swarm", "newrelic.hooks.mlmodel_strands", "instrument_strands_multiagent_swarm" + ) + _process_module_definition( + "strands.tools.executors._executor", + "newrelic.hooks.mlmodel_strands", + "instrument_strands_tools_executors__executor", + ) + _process_module_definition( + "strands.tools.registry", "newrelic.hooks.mlmodel_strands", "instrument_strands_tools_registry" + ) + _process_module_definition( + "strands.models.bedrock", "newrelic.hooks.mlmodel_strands", "instrument_strands_models_bedrock" ) - _process_module_definition("strands.tools.registry", "newrelic.hooks.mlmodel_strands", "instrument_tools_registry") - _process_module_definition("strands.models.bedrock", "newrelic.hooks.mlmodel_strands", "instrument_models_bedrock") _process_module_definition("mcp.client.session", "newrelic.hooks.adapter_mcp", "instrument_mcp_client_session") _process_module_definition( diff --git a/newrelic/hooks/mlmodel_strands.py b/newrelic/hooks/mlmodel_strands.py index bf849fd717..20317626da 100644 --- a/newrelic/hooks/mlmodel_strands.py +++ b/newrelic/hooks/mlmodel_strands.py @@ -461,7 +461,7 @@ def wrap_bedrock_model__stream(wrapped, instance, args, kwargs): return wrapped(*args, **kwargs) -def instrument_agent_agent(module): +def instrument_strands_agent_agent(module): if hasattr(module, "Agent"): if hasattr(module.Agent, "__call__"): # noqa: B004 wrap_function_wrapper(module, "Agent.__call__", wrap_agent__call__) @@ -471,19 +471,35 @@ def instrument_agent_agent(module): wrap_function_wrapper(module, "Agent.stream_async", wrap_stream_async) -def instrument_tools_executors__executor(module): +def instrument_strands_multiagent_graph(module): + if hasattr(module, "Graph"): + if hasattr(module.Graph, "__call__"): # noqa: B004 + wrap_function_wrapper(module, "Graph.__call__", wrap_agent__call__) + if hasattr(module.Graph, "invoke_async"): + wrap_function_wrapper(module, "Graph.invoke_async", wrap_agent_invoke_async) + + +def instrument_strands_multiagent_swarm(module): + if hasattr(module, "Swarm"): + if hasattr(module.Swarm, "__call__"): # noqa: B004 + wrap_function_wrapper(module, "Swarm.__call__", wrap_agent__call__) + if hasattr(module.Swarm, "invoke_async"): + wrap_function_wrapper(module, "Swarm.invoke_async", wrap_agent_invoke_async) + + +def instrument_strands_tools_executors__executor(module): if hasattr(module, "ToolExecutor"): if hasattr(module.ToolExecutor, "_stream"): wrap_function_wrapper(module, "ToolExecutor._stream", wrap_tool_executor__stream) -def instrument_tools_registry(module): +def instrument_strands_tools_registry(module): if hasattr(module, "ToolRegistry"): if hasattr(module.ToolRegistry, "register_tool"): wrap_function_wrapper(module, "ToolRegistry.register_tool", wrap_ToolRegister_register_tool) -def instrument_models_bedrock(module): +def instrument_strands_models_bedrock(module): # This instrumentation only exists to pass trace context due to bedrock models using a separate thread. if hasattr(module, "BedrockModel"): if hasattr(module.BedrockModel, "stream"): diff --git a/tests/mlmodel_strands/__init__.py b/tests/mlmodel_strands/__init__.py new file mode 100644 index 0000000000..8030baccf7 --- /dev/null +++ b/tests/mlmodel_strands/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/mlmodel_strands/_test_agent.py b/tests/mlmodel_strands/_test_agent.py new file mode 100644 index 0000000000..15aa79a5ac --- /dev/null +++ b/tests/mlmodel_strands/_test_agent.py @@ -0,0 +1,165 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from strands import tool + +from ._mock_model_provider import MockedModelProvider + + +# Example tool for testing purposes +@tool +async def add_exclamation(message: str) -> str: + return f"{message}!" + + +@tool +async def throw_exception_coro(message: str) -> str: + raise RuntimeError("Oops") + + +@tool +async def throw_exception_agen(message: str) -> str: + raise RuntimeError("Oops") + yield + + +@pytest.fixture +def single_tool_model(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Hello"}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model + + +@pytest.fixture +def single_tool_model_runtime_error_coro(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling throw_exception_coro tool"}, + # Set arguments to an invalid type to trigger error in tool + {"toolUse": {"name": "throw_exception_coro", "toolUseId": "123", "input": {"message": "Hello"}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model + + +@pytest.fixture +def single_tool_model_runtime_error_agen(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling throw_exception_agen tool"}, + # Set arguments to an invalid type to trigger error in tool + {"toolUse": {"name": "throw_exception_agen", "toolUseId": "123", "input": {"message": "Hello"}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model + + +@pytest.fixture +def multi_tool_model(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Hello"}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling compute_sum tool"}, + {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 5, "b": 3}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Goodbye"}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling compute_sum tool"}, + {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 123, "b": 2}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model + + +@pytest.fixture +def multi_tool_model_error(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Hello"}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling compute_sum tool"}, + {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 5, "b": 3}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling add_exclamation tool"}, + {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Goodbye"}}}, + ], + }, + { + "role": "assistant", + "content": [ + {"text": "Calling compute_sum tool"}, + # Set insufficient arguments to trigger error in tool + {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 123}}}, + ], + }, + {"role": "assistant", "content": [{"text": "Success!"}]}, + ] + ) + return model diff --git a/tests/mlmodel_strands/_test_multiagent_graph.py b/tests/mlmodel_strands/_test_multiagent_graph.py new file mode 100644 index 0000000000..73c1679701 --- /dev/null +++ b/tests/mlmodel_strands/_test_multiagent_graph.py @@ -0,0 +1,91 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from strands import Agent, tool +from strands.multiagent.graph import GraphBuilder + +from ._mock_model_provider import MockedModelProvider + + +@pytest.fixture +def math_model(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "I'll calculate the sum of 15 and 27 for you."}, + {"toolUse": {"name": "calculate_sum", "toolUseId": "123", "input": {"a": 15, "b": 27}}}, + ], + }, + {"role": "assistant", "content": [{"text": "The sum of 15 and 27 is 42."}]}, + ] + ) + return model + + +@pytest.fixture +def analysis_model(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "I'll validate the calculation result of 42 from the calculator."}, + {"toolUse": {"name": "analyze_result", "toolUseId": "456", "input": {"value": 42}}}, + ], + }, + { + "role": "assistant", + "content": [{"text": "The calculation is correct, and 42 is a positive integer result."}], + }, + ] + ) + return model + + +# Example tool for testing purposes +@tool +async def calculate_sum(a: int, b: int) -> int: + """Calculate the sum of two numbers.""" + return a + b + + +@tool +async def analyze_result(value: int) -> str: + """Analyze a numeric result.""" + return f"The result {value} is {'positive' if value > 0 else 'zero or negative'}" + + +@pytest.fixture +def math_agent(math_model): + return Agent(name="math_agent", model=math_model, tools=[calculate_sum]) + + +@pytest.fixture +def analysis_agent(analysis_model): + return Agent(name="analysis_agent", model=analysis_model, tools=[analyze_result]) + + +@pytest.fixture +def agent_graph(math_agent, analysis_agent): + # Build graph + builder = GraphBuilder() + builder.add_node(math_agent, "math") + builder.add_node(analysis_agent, "analysis") + builder.add_edge("math", "analysis") + builder.set_entry_point("math") + + return builder.build() diff --git a/tests/mlmodel_strands/_test_multiagent_swarm.py b/tests/mlmodel_strands/_test_multiagent_swarm.py new file mode 100644 index 0000000000..4b7916c27b --- /dev/null +++ b/tests/mlmodel_strands/_test_multiagent_swarm.py @@ -0,0 +1,108 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest +from strands import Agent, tool +from strands.multiagent.swarm import Swarm + +from ._mock_model_provider import MockedModelProvider + + +@pytest.fixture +def math_model(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "I'll calculate the sum of 15 and 27 for you."}, + {"toolUse": {"name": "calculate_sum", "toolUseId": "123", "input": {"a": 15, "b": 27}}}, + ], + }, + { + "role": "assistant", + "content": [ + { + "toolUse": { + "name": "handoff_to_agent", + "toolUseId": "789", + "input": { + "agent_name": "analysis_agent", + "message": "Analyze the result of the calculation done by the math_agent.", + "context": {"result": 42}, + }, + } + } + ], + }, + {"role": "assistant", "content": [{"text": "The sum of 15 and 27 is 42."}]}, + ] + ) + return model + + +@pytest.fixture +def analysis_model(): + model = MockedModelProvider( + [ + { + "role": "assistant", + "content": [ + {"text": "I'll validate the calculation result of 42 from the calculator."}, + {"toolUse": {"name": "analyze_result", "toolUseId": "456", "input": {"value": 42}}}, + ], + }, + { + "role": "assistant", + "content": [{"text": "The calculation is correct, and 42 is a positive integer result."}], + }, + ] + ) + return model + + +# Example tool for testing purposes +@tool +async def calculate_sum(a: int, b: int) -> int: + """Calculate the sum of two numbers.""" + return a + b + + +@tool +async def analyze_result(value: int) -> str: + """Analyze a numeric result.""" + return f"The result {value} is {'positive' if value > 0 else 'zero or negative'}" + + +@pytest.fixture +def math_agent(math_model): + return Agent(name="math_agent", model=math_model, tools=[calculate_sum]) + + +@pytest.fixture +def analysis_agent(analysis_model): + return Agent(name="analysis_agent", model=analysis_model, tools=[analyze_result]) + + +@pytest.fixture +def agent_swarm(math_agent, analysis_agent): + # Build graph with conditional edge + return Swarm( + [math_agent, analysis_agent], + entry_point=math_agent, + execution_timeout=60, + node_timeout=30, + max_handoffs=5, + max_iterations=5, + ) diff --git a/tests/mlmodel_strands/conftest.py b/tests/mlmodel_strands/conftest.py index a2ad9b8dd0..abbc29b969 100644 --- a/tests/mlmodel_strands/conftest.py +++ b/tests/mlmodel_strands/conftest.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest -from _mock_model_provider import MockedModelProvider from testing_support.fixture.event_loop import event_loop as loop from testing_support.fixtures import collector_agent_registration_fixture, collector_available_fixture from testing_support.ml_testing_utils import set_trace_info @@ -31,133 +29,3 @@ collector_agent_registration = collector_agent_registration_fixture( app_name="Python Agent Test (mlmodel_strands)", default_settings=_default_settings ) - - -@pytest.fixture -def single_tool_model(): - model = MockedModelProvider( - [ - { - "role": "assistant", - "content": [ - {"text": "Calling add_exclamation tool"}, - {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Hello"}}}, - ], - }, - {"role": "assistant", "content": [{"text": "Success!"}]}, - ] - ) - return model - - -@pytest.fixture -def single_tool_model_runtime_error_coro(): - model = MockedModelProvider( - [ - { - "role": "assistant", - "content": [ - {"text": "Calling throw_exception_coro tool"}, - # Set arguments to an invalid type to trigger error in tool - {"toolUse": {"name": "throw_exception_coro", "toolUseId": "123", "input": {"message": "Hello"}}}, - ], - }, - {"role": "assistant", "content": [{"text": "Success!"}]}, - ] - ) - return model - - -@pytest.fixture -def single_tool_model_runtime_error_agen(): - model = MockedModelProvider( - [ - { - "role": "assistant", - "content": [ - {"text": "Calling throw_exception_agen tool"}, - # Set arguments to an invalid type to trigger error in tool - {"toolUse": {"name": "throw_exception_agen", "toolUseId": "123", "input": {"message": "Hello"}}}, - ], - }, - {"role": "assistant", "content": [{"text": "Success!"}]}, - ] - ) - return model - - -@pytest.fixture -def multi_tool_model(): - model = MockedModelProvider( - [ - { - "role": "assistant", - "content": [ - {"text": "Calling add_exclamation tool"}, - {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Hello"}}}, - ], - }, - { - "role": "assistant", - "content": [ - {"text": "Calling compute_sum tool"}, - {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 5, "b": 3}}}, - ], - }, - { - "role": "assistant", - "content": [ - {"text": "Calling add_exclamation tool"}, - {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Goodbye"}}}, - ], - }, - { - "role": "assistant", - "content": [ - {"text": "Calling compute_sum tool"}, - {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 123, "b": 2}}}, - ], - }, - {"role": "assistant", "content": [{"text": "Success!"}]}, - ] - ) - return model - - -@pytest.fixture -def multi_tool_model_error(): - model = MockedModelProvider( - [ - { - "role": "assistant", - "content": [ - {"text": "Calling add_exclamation tool"}, - {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Hello"}}}, - ], - }, - { - "role": "assistant", - "content": [ - {"text": "Calling compute_sum tool"}, - {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 5, "b": 3}}}, - ], - }, - { - "role": "assistant", - "content": [ - {"text": "Calling add_exclamation tool"}, - {"toolUse": {"name": "add_exclamation", "toolUseId": "123", "input": {"message": "Goodbye"}}}, - ], - }, - { - "role": "assistant", - "content": [ - {"text": "Calling compute_sum tool"}, - # Set insufficient arguments to trigger error in tool - {"toolUse": {"name": "compute_sum", "toolUseId": "123", "input": {"a": 123}}}, - ], - }, - {"role": "assistant", "content": [{"text": "Success!"}]}, - ] - ) - return model diff --git a/tests/mlmodel_strands/test_agent.py b/tests/mlmodel_strands/test_agent.py index af685668ad..6fa5e56a68 100644 --- a/tests/mlmodel_strands/test_agent.py +++ b/tests/mlmodel_strands/test_agent.py @@ -13,7 +13,7 @@ # limitations under the License. import pytest -from strands import Agent, tool +from strands import Agent from testing_support.fixtures import reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( disabled_ai_monitoring_record_content_settings, @@ -32,6 +32,17 @@ from newrelic.common.object_names import callable_name from newrelic.common.object_wrapper import transient_function_wrapper +from ._test_agent import ( + add_exclamation, + multi_tool_model, + multi_tool_model_error, + single_tool_model, + single_tool_model_runtime_error_agen, + single_tool_model_runtime_error_coro, + throw_exception_agen, + throw_exception_coro, +) + tool_recorded_event = [ ( {"type": "LlmTool"}, @@ -144,29 +155,12 @@ ] -# Example tool for testing purposes -@tool -async def add_exclamation(message: str) -> str: - return f"{message}!" - - -@tool -async def throw_exception_coro(message: str) -> str: - raise RuntimeError("Oops") - - -@tool -async def throw_exception_agen(message: str) -> str: - raise RuntimeError("Oops") - yield - - @reset_core_stats_engine() @validate_custom_events(events_with_context_attrs(tool_recorded_event)) @validate_custom_events(events_with_context_attrs(agent_recorded_event)) @validate_custom_event_count(count=2) @validate_transaction_metrics( - "test_agent:test_agent_invoke", + "mlmodel_strands.test_agent:test_agent_invoke", scoped_metrics=[ ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), @@ -194,7 +188,7 @@ def test_agent_invoke(set_trace_info, single_tool_model): @validate_custom_events(agent_recorded_event) @validate_custom_event_count(count=2) @validate_transaction_metrics( - "test_agent:test_agent_invoke_async", + "mlmodel_strands.test_agent:test_agent_invoke_async", scoped_metrics=[ ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), @@ -224,7 +218,7 @@ async def _test(): @validate_custom_events(agent_recorded_event) @validate_custom_event_count(count=2) @validate_transaction_metrics( - "test_agent:test_agent_stream_async", + "mlmodel_strands.test_agent:test_agent_stream_async", scoped_metrics=[ ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), @@ -260,7 +254,7 @@ async def _test(): @validate_custom_events(tool_events_sans_content(tool_recorded_event)) @validate_custom_event_count(count=2) @validate_transaction_metrics( - "test_agent:test_agent_invoke_no_content", + "mlmodel_strands.test_agent:test_agent_invoke_no_content", scoped_metrics=[ ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), @@ -301,7 +295,7 @@ def test_agent_invoke_disabled_ai_monitoring_events(set_trace_info, single_tool_ @validate_custom_events(agent_recorded_event_error) @validate_custom_event_count(count=1) @validate_transaction_metrics( - "test_agent:test_agent_invoke_error", + "mlmodel_strands.test_agent:test_agent_invoke_error", scoped_metrics=[("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1)], rollup_metrics=[("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1)], background_task=True, @@ -330,7 +324,7 @@ def _test(): @validate_custom_events(tool_recorded_event_error_coro) @validate_custom_event_count(count=2) @validate_transaction_metrics( - "test_agent:test_agent_invoke_tool_coro_runtime_error", + "mlmodel_strands.test_agent:test_agent_invoke_tool_coro_runtime_error", scoped_metrics=[ ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/throw_exception_coro", 1), @@ -358,7 +352,7 @@ def test_agent_invoke_tool_coro_runtime_error(set_trace_info, single_tool_model_ @validate_custom_events(tool_recorded_event_error_agen) @validate_custom_event_count(count=2) @validate_transaction_metrics( - "test_agent:test_agent_invoke_tool_agen_runtime_error", + "mlmodel_strands.test_agent:test_agent_invoke_tool_agen_runtime_error", scoped_metrics=[ ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/throw_exception_agen", 1), @@ -387,7 +381,7 @@ def test_agent_invoke_tool_agen_runtime_error(set_trace_info, single_tool_model_ @validate_custom_events(tool_recorded_event_forced_internal_error) @validate_custom_event_count(count=2) @validate_transaction_metrics( - "test_agent:test_agent_tool_forced_exception", + "mlmodel_strands.test_agent:test_agent_tool_forced_exception", scoped_metrics=[ ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/my_agent", 1), ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/add_exclamation", 1), diff --git a/tests/mlmodel_strands/test_multiagent_graph.py b/tests/mlmodel_strands/test_multiagent_graph.py new file mode 100644 index 0000000000..7bd84fc901 --- /dev/null +++ b/tests/mlmodel_strands/test_multiagent_graph.py @@ -0,0 +1,233 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from testing_support.fixtures import reset_core_stats_engine, validate_attributes +from testing_support.ml_testing_utils import disabled_ai_monitoring_settings, events_with_context_attrs +from testing_support.validators.validate_custom_event import validate_custom_event_count +from testing_support.validators.validate_custom_events import validate_custom_events +from testing_support.validators.validate_transaction_metrics import validate_transaction_metrics + +from newrelic.api.background_task import background_task +from newrelic.api.llm_custom_attributes import WithLlmCustomAttributes + +from ._test_multiagent_graph import agent_graph, analysis_agent, analysis_model, math_agent, math_model + +agent_recorded_events = [ + [ + {"type": "LlmAgent"}, + { + "duration": None, + "id": None, + "ingest_source": "Python", + "name": "math_agent", + "span_id": None, + "trace_id": "trace-id", + "vendor": "strands", + }, + ], + [ + {"type": "LlmAgent"}, + { + "duration": None, + "id": None, + "ingest_source": "Python", + "name": "analysis_agent", + "span_id": None, + "trace_id": "trace-id", + "vendor": "strands", + }, + ], +] + +tool_recorded_events = [ + [ + {"type": "LlmTool"}, + { + "agent_name": "math_agent", + "duration": None, + "id": None, + "ingest_source": "Python", + "input": "{'a': 15, 'b': 27}", + "name": "calculate_sum", + "output": "{'text': '42'}", + "run_id": "123", + "span_id": None, + "trace_id": "trace-id", + "vendor": "strands", + }, + ], + [ + {"type": "LlmTool"}, + { + "agent_name": "analysis_agent", + "duration": None, + "id": None, + "ingest_source": "Python", + "input": "{'value': 42}", + "name": "analyze_result", + "output": "{'text': 'The result 42 is positive'}", + "run_id": "456", + "span_id": None, + "trace_id": "trace-id", + "vendor": "strands", + }, + ], +] + + +@reset_core_stats_engine() +@validate_custom_events(events_with_context_attrs(tool_recorded_events)) +@validate_custom_events(events_with_context_attrs(agent_recorded_events)) +@validate_custom_event_count(count=4) # 2 LlmTool events, 2 LlmAgent events +@validate_transaction_metrics( + "mlmodel_strands.test_multiagent_graph:test_multiagent_graph_invoke", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_multiagent_graph_invoke(set_trace_info, agent_graph): + set_trace_info() + + with WithLlmCustomAttributes({"context": "attr"}): + response = agent_graph("Calculate the sum of 15 and 27.") + + assert response.execution_count == 2 + assert not response.failed_nodes + assert response.results["math"].result.message["content"][0]["text"] == "The sum of 15 and 27 is 42." + assert ( + response.results["analysis"].result.message["content"][0]["text"] + == "The calculation is correct, and 42 is a positive integer result." + ) + + +@reset_core_stats_engine() +@validate_custom_events(tool_recorded_events) +@validate_custom_events(agent_recorded_events) +@validate_custom_event_count(count=4) # 2 LlmTool events, 2 LlmAgent events +@validate_transaction_metrics( + "mlmodel_strands.test_multiagent_graph:test_multiagent_graph_invoke_async", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_multiagent_graph_invoke_async(loop, set_trace_info, agent_graph): + set_trace_info() + + async def _test(): + response = await agent_graph.invoke_async("Calculate the sum of 15 and 27.") + + assert response.execution_count == 2 + assert not response.failed_nodes + assert response.results["math"].result.message["content"][0]["text"] == "The sum of 15 and 27 is 42." + assert ( + response.results["analysis"].result.message["content"][0]["text"] + == "The calculation is correct, and 42 is a positive integer result." + ) + + loop.run_until_complete(_test()) + + +@reset_core_stats_engine() +@validate_custom_events(tool_recorded_events) +@validate_custom_events(agent_recorded_events) +@validate_custom_event_count(count=4) # 2 LlmTool events, 2 LlmAgent events +@validate_transaction_metrics( + "mlmodel_strands.test_multiagent_graph:test_multiagent_graph_stream_async", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_multiagent_graph_stream_async(loop, set_trace_info, agent_graph): + set_trace_info() + + async def _test(): + response = agent_graph.stream_async("Calculate the sum of 15 and 27.") + messages = [ + event["node_result"].result.message async for event in response if event["type"] == "multiagent_node_stop" + ] + + assert len(messages) == 2 + + assert messages[0]["content"][0]["text"] == "The sum of 15 and 27 is 42." + assert messages[1]["content"][0]["text"] == "The calculation is correct, and 42 is a positive integer result." + + loop.run_until_complete(_test()) + + +@disabled_ai_monitoring_settings +@reset_core_stats_engine() +@validate_custom_event_count(count=0) +@background_task() +def test_multiagent_graph_invoke_disabled_ai_monitoring_events(set_trace_info, agent_graph): + set_trace_info() + + response = agent_graph("Calculate the sum of 15 and 27.") + + assert response.execution_count == 2 + assert not response.failed_nodes + assert response.results["math"].result.message["content"][0]["text"] == "The sum of 15 and 27 is 42." + assert ( + response.results["analysis"].result.message["content"][0]["text"] + == "The calculation is correct, and 42 is a positive integer result." + ) + + +@reset_core_stats_engine() +@validate_custom_event_count(count=0) +def test_multiagent_graph_invoke_outside_txn(agent_graph): + response = agent_graph("Calculate the sum of 15 and 27.") + + assert response.execution_count == 2 + assert not response.failed_nodes + assert response.results["math"].result.message["content"][0]["text"] == "The sum of 15 and 27 is 42." + assert ( + response.results["analysis"].result.message["content"][0]["text"] + == "The calculation is correct, and 42 is a positive integer result." + ) diff --git a/tests/mlmodel_strands/test_multiagent_swarm.py b/tests/mlmodel_strands/test_multiagent_swarm.py new file mode 100644 index 0000000000..bbcbb3e27c --- /dev/null +++ b/tests/mlmodel_strands/test_multiagent_swarm.py @@ -0,0 +1,260 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from testing_support.fixtures import reset_core_stats_engine, validate_attributes +from testing_support.ml_testing_utils import disabled_ai_monitoring_settings, events_with_context_attrs +from testing_support.validators.validate_custom_event import validate_custom_event_count +from testing_support.validators.validate_custom_events import validate_custom_events +from testing_support.validators.validate_transaction_metrics import validate_transaction_metrics + +from newrelic.api.background_task import background_task +from newrelic.api.llm_custom_attributes import WithLlmCustomAttributes + +from ._test_multiagent_swarm import agent_swarm, analysis_agent, analysis_model, math_agent, math_model + +agent_recorded_events = [ + [ + {"type": "LlmAgent"}, + { + "duration": None, + "id": None, + "ingest_source": "Python", + "name": "math_agent", + "span_id": None, + "trace_id": "trace-id", + "vendor": "strands", + }, + ], + [ + {"type": "LlmAgent"}, + { + "duration": None, + "id": None, + "ingest_source": "Python", + "name": "analysis_agent", + "span_id": None, + "trace_id": "trace-id", + "vendor": "strands", + }, + ], +] + +tool_recorded_events = [ + [ + {"type": "LlmTool"}, + { + "agent_name": "math_agent", + "duration": None, + "id": None, + "ingest_source": "Python", + "input": "{'a': 15, 'b': 27}", + "name": "calculate_sum", + "output": "{'text': '42'}", + "run_id": "123", + "span_id": None, + "trace_id": "trace-id", + "vendor": "strands", + }, + ], + [ + {"type": "LlmTool"}, + { + "agent_name": "analysis_agent", + "duration": None, + "id": None, + "ingest_source": "Python", + "input": "{'value': 42}", + "name": "analyze_result", + "output": "{'text': 'The result 42 is positive'}", + "run_id": "456", + "span_id": None, + "trace_id": "trace-id", + "vendor": "strands", + }, + ], +] + +handoff_recorded_event = [ + [ + {"type": "LlmTool"}, + { + "agent_name": "math_agent", + "duration": None, + "id": None, + "ingest_source": "Python", + # This is the output from math_agent being sent to the handoff_to_agent tool, which will then be input to the analysis_agent + "input": "{'agent_name': 'analysis_agent', 'message': 'Analyze the result of the calculation done by the math_agent.', 'context': {'result': 42}}", + "name": "handoff_to_agent", + "output": "{'text': 'Handing off to analysis_agent: Analyze the result of the calculation done by the math_agent.'}", + "run_id": "789", + "span_id": None, + "trace_id": "trace-id", + "vendor": "strands", + }, + ] +] + + +@reset_core_stats_engine() +@validate_custom_events(events_with_context_attrs(tool_recorded_events)) +@validate_custom_events(events_with_context_attrs(agent_recorded_events)) +@validate_custom_events(events_with_context_attrs(handoff_recorded_event)) +@validate_custom_event_count(count=5) # 2 LlmTool events, 2 LlmAgent events, 1 LlmTool Handoff event +@validate_transaction_metrics( + "mlmodel_strands.test_multiagent_swarm:test_multiagent_swarm_invoke", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_multiagent_swarm_invoke(set_trace_info, agent_swarm): + set_trace_info() + + with WithLlmCustomAttributes({"context": "attr"}): + response = agent_swarm("Calculate the sum of 15 and 27.") + + assert response.execution_count == 2 + node_history = [node.node_id for node in response.node_history] + assert node_history == ["math_agent", "analysis_agent"] + assert response.results["math_agent"].result.message["content"][0]["text"] == "The sum of 15 and 27 is 42." + assert ( + response.results["analysis_agent"].result.message["content"][0]["text"] + == "The calculation is correct, and 42 is a positive integer result." + ) + + +@reset_core_stats_engine() +@validate_custom_events(tool_recorded_events) +@validate_custom_events(agent_recorded_events) +@validate_custom_events(handoff_recorded_event) +@validate_custom_event_count(count=5) # 2 LlmTool events, 2 LlmAgent events, 1 LlmTool Handoff event +@validate_transaction_metrics( + "mlmodel_strands.test_multiagent_swarm:test_multiagent_swarm_invoke_async", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_multiagent_swarm_invoke_async(loop, set_trace_info, agent_swarm): + set_trace_info() + + async def _test(): + response = await agent_swarm.invoke_async("Calculate the sum of 15 and 27.") + + assert response.execution_count == 2 + node_history = [node.node_id for node in response.node_history] + assert node_history == ["math_agent", "analysis_agent"] + assert response.results["math_agent"].result.message["content"][0]["text"] == "The sum of 15 and 27 is 42." + assert ( + response.results["analysis_agent"].result.message["content"][0]["text"] + == "The calculation is correct, and 42 is a positive integer result." + ) + + loop.run_until_complete(_test()) + + +@reset_core_stats_engine() +@validate_custom_events(tool_recorded_events) +@validate_custom_events(agent_recorded_events) +@validate_custom_events(handoff_recorded_event) +@validate_custom_event_count(count=5) # 2 LlmTool events, 2 LlmAgent events, 1 LlmTool Handoff event +@validate_transaction_metrics( + "mlmodel_strands.test_multiagent_swarm:test_multiagent_swarm_stream_async", + scoped_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + rollup_metrics=[ + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/math_agent", 1), + ("Llm/agent/Strands/strands.agent.agent:Agent.stream_async/analysis_agent", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/calculate_sum", 1), + ("Llm/tool/Strands/strands.tools.executors._executor:ToolExecutor._stream/analyze_result", 1), + ], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_multiagent_swarm_stream_async(loop, set_trace_info, agent_swarm): + set_trace_info() + + async def _test(): + response = agent_swarm.stream_async("Calculate the sum of 15 and 27.") + messages = [ + event["node_result"].result.message async for event in response if event["type"] == "multiagent_node_stop" + ] + + assert len(messages) == 2 + + assert messages[0]["content"][0]["text"] == "The sum of 15 and 27 is 42." + assert messages[1]["content"][0]["text"] == "The calculation is correct, and 42 is a positive integer result." + + loop.run_until_complete(_test()) + + +@disabled_ai_monitoring_settings +@reset_core_stats_engine() +@validate_custom_event_count(count=0) +@background_task() +def test_multiagent_swarm_invoke_disabled_ai_monitoring_events(set_trace_info, agent_swarm): + set_trace_info() + + response = agent_swarm("Calculate the sum of 15 and 27.") + + assert response.execution_count == 2 + node_history = [node.node_id for node in response.node_history] + assert node_history == ["math_agent", "analysis_agent"] + assert response.results["math_agent"].result.message["content"][0]["text"] == "The sum of 15 and 27 is 42." + assert ( + response.results["analysis_agent"].result.message["content"][0]["text"] + == "The calculation is correct, and 42 is a positive integer result." + ) + + +@reset_core_stats_engine() +@validate_custom_event_count(count=0) +def test_multiagent_swarm_invoke_outside_txn(agent_swarm): + response = agent_swarm("Calculate the sum of 15 and 27.") + + assert response.execution_count == 2 + node_history = [node.node_id for node in response.node_history] + assert node_history == ["math_agent", "analysis_agent"] + assert response.results["math_agent"].result.message["content"][0]["text"] == "The sum of 15 and 27 is 42." + assert ( + response.results["analysis_agent"].result.message["content"][0]["text"] + == "The calculation is correct, and 42 is a positive integer result." + ) From 2ac14945eb4188e4af64de2542395242f6d82ff0 Mon Sep 17 00:00:00 2001 From: Shubham Goel Date: Mon, 8 Dec 2025 12:37:14 +0530 Subject: [PATCH 23/34] Fixed tool type bug for strands --- newrelic/hooks/mlmodel_strands.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/newrelic/hooks/mlmodel_strands.py b/newrelic/hooks/mlmodel_strands.py index bf849fd717..cf1a76cf28 100644 --- a/newrelic/hooks/mlmodel_strands.py +++ b/newrelic/hooks/mlmodel_strands.py @@ -416,7 +416,10 @@ async def aclose(self): def wrap_ToolRegister_register_tool(wrapped, instance, args, kwargs): bound_args = bind_args(wrapped, args, kwargs) - bound_args["tool"]._tool_func = ErrorTraceWrapper(bound_args["tool"]._tool_func) + tool = bound_args.get("tool") + + if hasattr(tool, "_tool_func"): + tool._tool_func = ErrorTraceWrapper(tool._tool_func) return wrapped(*args, **kwargs) From 7bea863271df6e6ede9260eaa12e32e0c28381e3 Mon Sep 17 00:00:00 2001 From: Lalleh Rafeei <84813886+lrafeei@users.noreply.github.com> Date: Mon, 8 Dec 2025 13:57:10 -0800 Subject: [PATCH 24/34] Pin langchain & langchain_core (#1604) --- tox.ini | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tox.ini b/tox.ini index 98cea6ee29..d3549c71a6 100644 --- a/tox.ini +++ b/tox.ini @@ -432,10 +432,10 @@ deps = mlmodel_openai-openailatest: openai[datalib] ; Required for openai testing mlmodel_openai: protobuf - ; Pinning to 0.1.16 while adding support for with_structured_output in chain tests - mlmodel_langchain: langchain + ; Pin to 1.1.0 temporarily + mlmodel_langchain: langchain<1.1.1 + mlmodel_langchain: langchain-core<1.1.1 mlmodel_langchain: langchain-community - mlmodel_langchain: langchain-core mlmodel_langchain: langchain-openai ; Required for langchain testing mlmodel_langchain: pypdf From 6860fc1c506054139eb1079ae73d2009bc00d648 Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Mon, 8 Dec 2025 14:15:40 -0800 Subject: [PATCH 25/34] Add safeguarding to converse attr extraction. (#1603) Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> --- newrelic/hooks/external_botocore.py | 49 +++++++++++++++++++---------- 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py index d481ce8450..12dd4153f9 100644 --- a/newrelic/hooks/external_botocore.py +++ b/newrelic/hooks/external_botocore.py @@ -900,25 +900,40 @@ def _wrap_bedrock_runtime_converse(wrapped, instance, args, kwargs): def extract_bedrock_converse_attrs(kwargs, response, response_headers, model, span_id, trace_id): input_message_list = [] - # If a system message is supplied, it is under its own key in kwargs rather than with the other input messages - if "system" in kwargs.keys(): - input_message_list.extend({"role": "system", "content": result["text"]} for result in kwargs.get("system", [])) - - # kwargs["messages"] can hold multiple requests and responses to maintain conversation history - # We grab the last message (the newest request) in the list each time, so we don't duplicate recorded data - _input_messages = kwargs.get("messages", []) - _input_messages = _input_messages and (_input_messages[-1] or {}) - _input_messages = _input_messages.get("content", []) - input_message_list.extend( - [{"role": "user", "content": result["text"]} for result in _input_messages if "text" in result] - ) + try: + # If a system message is supplied, it is under its own key in kwargs rather than with the other input messages + if "system" in kwargs.keys(): + input_message_list.extend( + {"role": "system", "content": result["text"]} for result in kwargs.get("system", []) if "text" in result + ) + + # kwargs["messages"] can hold multiple requests and responses to maintain conversation history + # We grab the last message (the newest request) in the list each time, so we don't duplicate recorded data + _input_messages = kwargs.get("messages", []) + _input_messages = _input_messages and (_input_messages[-1] or {}) + _input_messages = _input_messages.get("content", []) + input_message_list.extend( + [{"role": "user", "content": result["text"]} for result in _input_messages if "text" in result] + ) + except Exception: + _logger.warning( + "Exception occurred in botocore instrumentation for AWS Bedrock: Failed to extract input messages from Converse request. Report this issue to New Relic Support.", + exc_info=True, + ) output_message_list = None - if "output" in response: - output_message_list = [ - {"role": "assistant", "content": result["text"]} - for result in response.get("output").get("message").get("content", []) - ] + try: + if "output" in response: + output_message_list = [ + {"role": "assistant", "content": result["text"]} + for result in response.get("output").get("message").get("content", []) + if "text" in result + ] + except Exception: + _logger.warning( + "Exception occurred in botocore instrumentation for AWS Bedrock: Failed to extract output messages from onverse response. Report this issue to New Relic Support.", + exc_info=True, + ) bedrock_attrs = { "request_id": response_headers.get("x-amzn-requestid"), From 38d7547314882c15c58d16a93f13c5fcd552d253 Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Tue, 7 Oct 2025 10:34:31 -0700 Subject: [PATCH 26/34] Bump tests. From e6cb2bb1d3be80b080fd5526e95808f55ea59986 Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Tue, 7 Oct 2025 10:57:04 -0700 Subject: [PATCH 27/34] Add response token count logic to Gemini instrumentation. (#1486) * Add response token count logic to Gemini instrumentation. * Update token counting util functions. * Linting * Add response token count logic to Gemini instrumentation. * Update token counting util functions. * [MegaLinter] Apply linters fixes * Bump tests. --------- Co-authored-by: Tim Pansino --- newrelic/hooks/mlmodel_gemini.py | 152 ++++++++++++------ tests/mlmodel_gemini/test_embeddings.py | 6 +- tests/mlmodel_gemini/test_embeddings_error.py | 62 +------ tests/mlmodel_gemini/test_text_generation.py | 12 +- .../test_text_generation_error.py | 81 +--------- tests/testing_support/ml_testing_utils.py | 19 +++ 6 files changed, 139 insertions(+), 193 deletions(-) diff --git a/newrelic/hooks/mlmodel_gemini.py b/newrelic/hooks/mlmodel_gemini.py index 6fffbebb47..d7585e0b60 100644 --- a/newrelic/hooks/mlmodel_gemini.py +++ b/newrelic/hooks/mlmodel_gemini.py @@ -176,20 +176,24 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg embedding_content = str(embedding_content) request_model = kwargs.get("model") + embedding_token_count = ( + settings.ai_monitoring.llm_token_count_callback(request_model, embedding_content) + if settings.ai_monitoring.llm_token_count_callback + else None + ) + full_embedding_response_dict = { "id": embedding_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(request_model, embedding_content) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "request.model": request_model, "duration": ft.duration * 1000, "vendor": "gemini", "ingest_source": "Python", } + if embedding_token_count: + full_embedding_response_dict["response.usage.total_tokens"] = embedding_token_count + if settings.ai_monitoring.record_content.enabled: full_embedding_response_dict["input"] = embedding_content @@ -303,15 +307,13 @@ def _record_generation_error(transaction, linking_metadata, completion_id, kwarg "Unable to parse input message to Gemini LLM. Message content and role will be omitted from " "corresponding LlmChatCompletionMessage event. " ) + # Extract the input message content and role from the input message if it exists + input_message_content, input_role = _parse_input_message(input_message) if input_message else (None, None) - generation_config = kwargs.get("config") - if generation_config: - request_temperature = getattr(generation_config, "temperature", None) - request_max_tokens = getattr(generation_config, "max_output_tokens", None) - else: - request_temperature = None - request_max_tokens = None + # Extract data from generation config object + request_temperature, request_max_tokens = _extract_generation_config(kwargs) + # Prepare error attributes notice_error_attributes = { "http.statusCode": getattr(exc, "code", None), "error.message": getattr(exc, "message", None), @@ -352,15 +354,17 @@ def _record_generation_error(transaction, linking_metadata, completion_id, kwarg create_chat_completion_message_event( transaction, - input_message, + input_message_content, + input_role, completion_id, span_id, trace_id, # Passing the request model as the response model here since we do not have access to a response model request_model, - request_model, llm_metadata, output_message_list, + # We do not record token counts in error cases, so set all_token_counts to True so the pipeline tokenizer does not run + True, request_timestamp, ) except Exception: @@ -388,6 +392,7 @@ def _handle_generation_success( def _record_generation_success( transaction, linking_metadata, completion_id, kwargs, ft, response, request_timestamp=None ): + settings = transaction.settings or global_settings() span_id = linking_metadata.get("span.id") trace_id = linking_metadata.get("trace.id") try: @@ -396,12 +401,14 @@ def _record_generation_success( # finish_reason is an enum, so grab just the stringified value from it to report finish_reason = response.get("candidates")[0].get("finish_reason").value output_message_list = [response.get("candidates")[0].get("content")] + token_usage = response.get("usage_metadata") or {} else: # Set all values to NoneTypes since we cannot access them through kwargs or another method that doesn't # require the response object response_model = None output_message_list = [] finish_reason = None + token_usage = {} request_model = kwargs.get("model") @@ -423,13 +430,44 @@ def _record_generation_success( "corresponding LlmChatCompletionMessage event. " ) - generation_config = kwargs.get("config") - if generation_config: - request_temperature = getattr(generation_config, "temperature", None) - request_max_tokens = getattr(generation_config, "max_output_tokens", None) + input_message_content, input_role = _parse_input_message(input_message) if input_message else (None, None) + + # Parse output message content + # This list should have a length of 1 to represent the output message + # Parse the message text out to pass to any registered token counting callback + output_message_content = output_message_list[0].get("parts")[0].get("text") if output_message_list else None + + # Extract token counts from response object + if token_usage: + response_prompt_tokens = token_usage.get("prompt_token_count") + response_completion_tokens = token_usage.get("candidates_token_count") + response_total_tokens = token_usage.get("total_token_count") + else: - request_temperature = None - request_max_tokens = None + response_prompt_tokens = None + response_completion_tokens = None + response_total_tokens = None + + # Calculate token counts by checking if a callback is registered and if we have the necessary content to pass + # to it. If not, then we use the token counts provided in the response object + prompt_tokens = ( + settings.ai_monitoring.llm_token_count_callback(request_model, input_message_content) + if settings.ai_monitoring.llm_token_count_callback and input_message_content + else response_prompt_tokens + ) + completion_tokens = ( + settings.ai_monitoring.llm_token_count_callback(response_model, output_message_content) + if settings.ai_monitoring.llm_token_count_callback and output_message_content + else response_completion_tokens + ) + total_tokens = ( + prompt_tokens + completion_tokens if all([prompt_tokens, completion_tokens]) else response_total_tokens + ) + + all_token_counts = bool(prompt_tokens and completion_tokens and total_tokens) + + # Extract generation config + request_temperature, request_max_tokens = _extract_generation_config(kwargs) full_chat_completion_summary_dict = { "id": completion_id, @@ -450,68 +488,80 @@ def _record_generation_success( "timestamp": request_timestamp, } + if all_token_counts: + full_chat_completion_summary_dict["response.usage.prompt_tokens"] = prompt_tokens + full_chat_completion_summary_dict["response.usage.completion_tokens"] = completion_tokens + full_chat_completion_summary_dict["response.usage.total_tokens"] = total_tokens + llm_metadata = _get_llm_attributes(transaction) full_chat_completion_summary_dict.update(llm_metadata) transaction.record_custom_event("LlmChatCompletionSummary", full_chat_completion_summary_dict) create_chat_completion_message_event( transaction, - input_message, + input_message_content, + input_role, completion_id, span_id, trace_id, response_model, - request_model, llm_metadata, output_message_list, + all_token_counts, request_timestamp, ) except Exception: _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, exc_info=True) +def _parse_input_message(input_message): + # The input_message will be a string if generate_content was called directly. In this case, we don't have + # access to the role, so we default to user since this was an input message + if isinstance(input_message, str): + return input_message, "user" + # The input_message will be a Google Content type if send_message was called, so we parse out the message + # text and role (which should be "user") + elif isinstance(input_message, google.genai.types.Content): + return input_message.parts[0].text, input_message.role + else: + return None, None + + +def _extract_generation_config(kwargs): + generation_config = kwargs.get("config") + if generation_config: + request_temperature = getattr(generation_config, "temperature", None) + request_max_tokens = getattr(generation_config, "max_output_tokens", None) + else: + request_temperature = None + request_max_tokens = None + + return request_temperature, request_max_tokens + + def create_chat_completion_message_event( transaction, - input_message, + input_message_content, + input_role, chat_completion_id, span_id, trace_id, response_model, - request_model, llm_metadata, output_message_list, + all_token_counts, request_timestamp=None, ): try: settings = transaction.settings or global_settings() - if input_message: - # The input_message will be a string if generate_content was called directly. In this case, we don't have - # access to the role, so we default to user since this was an input message - if isinstance(input_message, str): - input_message_content = input_message - input_role = "user" - # The input_message will be a Google Content type if send_message was called, so we parse out the message - # text and role (which should be "user") - elif isinstance(input_message, google.genai.types.Content): - input_message_content = input_message.parts[0].text - input_role = input_message.role - # Set input data to NoneTypes to ensure token_count callback is not called - else: - input_message_content = None - input_role = None - + if input_message_content: message_id = str(uuid.uuid4()) chat_completion_input_message_dict = { "id": message_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(request_model, input_message_content) - if settings.ai_monitoring.llm_token_count_callback and input_message_content - else None - ), "role": input_role, "completion_id": chat_completion_id, # The input message will always be the first message in our request/ response sequence so this will @@ -521,6 +571,8 @@ def create_chat_completion_message_event( "vendor": "gemini", "ingest_source": "Python", } + if all_token_counts: + chat_completion_input_message_dict["token_count"] = 0 if settings.ai_monitoring.record_content.enabled: chat_completion_input_message_dict["content"] = input_message_content @@ -539,7 +591,7 @@ def create_chat_completion_message_event( # Add one to the index to account for the single input message so our sequence value is accurate for # the output message - if input_message: + if input_message_content: index += 1 message_id = str(uuid.uuid4()) @@ -548,11 +600,6 @@ def create_chat_completion_message_event( "id": message_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(response_model, message_content) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "role": message.get("role"), "completion_id": chat_completion_id, "sequence": index, @@ -562,6 +609,9 @@ def create_chat_completion_message_event( "is_response": True, } + if all_token_counts: + chat_completion_output_message_dict["token_count"] = 0 + if settings.ai_monitoring.record_content.enabled: chat_completion_output_message_dict["content"] = message_content if request_timestamp: diff --git a/tests/mlmodel_gemini/test_embeddings.py b/tests/mlmodel_gemini/test_embeddings.py index 0fc92897b6..5b4e30f860 100644 --- a/tests/mlmodel_gemini/test_embeddings.py +++ b/tests/mlmodel_gemini/test_embeddings.py @@ -15,7 +15,7 @@ import google.genai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -93,7 +93,7 @@ def test_gemini_embedding_sync_no_content(gemini_dev_client, set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings:test_gemini_embedding_sync_with_token_count", @@ -177,7 +177,7 @@ def test_gemini_embedding_async_no_content(gemini_dev_client, loop, set_trace_in @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings:test_gemini_embedding_async_with_token_count", diff --git a/tests/mlmodel_gemini/test_embeddings_error.py b/tests/mlmodel_gemini/test_embeddings_error.py index a65a6c2c6f..f0e7aac58a 100644 --- a/tests/mlmodel_gemini/test_embeddings_error.py +++ b/tests/mlmodel_gemini/test_embeddings_error.py @@ -16,12 +16,10 @@ import google.genai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -159,34 +157,6 @@ def test_embeddings_invalid_request_error_invalid_model(gemini_dev_client, set_t gemini_dev_client.models.embed_content(contents="Embedded: Model does not exist.", model="does-not-exist") -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(google.genai.errors.ClientError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={ - "error.message": "models/does-not-exist is not found for API version v1beta, or is not supported for embedContent. Call ListModels to see the list of available models and their supported methods." - } -) -@validate_transaction_metrics( - name="test_embeddings_error:test_embeddings_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/embedding/Gemini/embed_content", 1)], - rollup_metrics=[("Llm/embedding/Gemini/embed_content", 1)], - custom_metrics=[(f"Supportability/Python/ML/Gemini/{google.genai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count(gemini_dev_client, set_trace_info): - with pytest.raises(google.genai.errors.ClientError): - set_trace_info() - gemini_dev_client.models.embed_content(contents="Embedded: Model does not exist.", model="does-not-exist") - - embedding_invalid_key_error_events = [ ( {"type": "LlmEmbedding"}, @@ -326,36 +296,6 @@ def test_embeddings_async_invalid_request_error_invalid_model(gemini_dev_client, ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(google.genai.errors.ClientError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={ - "error.message": "models/does-not-exist is not found for API version v1beta, or is not supported for embedContent. Call ListModels to see the list of available models and their supported methods." - } -) -@validate_transaction_metrics( - name="test_embeddings_error:test_embeddings_async_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/embedding/Gemini/embed_content", 1)], - rollup_metrics=[("Llm/embedding/Gemini/embed_content", 1)], - custom_metrics=[(f"Supportability/Python/ML/Gemini/{google.genai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_async_invalid_request_error_invalid_model_with_token_count(gemini_dev_client, loop, set_trace_info): - with pytest.raises(google.genai.errors.ClientError): - set_trace_info() - loop.run_until_complete( - gemini_dev_client.models.embed_content(contents="Embedded: Model does not exist.", model="does-not-exist") - ) - - # Wrong api_key provided @dt_enabled @reset_core_stats_engine() diff --git a/tests/mlmodel_gemini/test_text_generation.py b/tests/mlmodel_gemini/test_text_generation.py index 1c789f8197..ad35024afe 100644 --- a/tests/mlmodel_gemini/test_text_generation.py +++ b/tests/mlmodel_gemini/test_text_generation.py @@ -15,7 +15,7 @@ import google.genai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -51,6 +51,9 @@ "vendor": "gemini", "ingest_source": "Python", "response.number_of_messages": 2, + "response.usage.prompt_tokens": 9, + "response.usage.completion_tokens": 13, + "response.usage.total_tokens": 22, }, ), ( @@ -62,6 +65,7 @@ "llm.foo": "bar", "span_id": None, "trace_id": "trace-id", + "token_count": 0, "content": "How many letters are in the word Python?", "role": "user", "completion_id": None, @@ -80,6 +84,7 @@ "llm.foo": "bar", "span_id": None, "trace_id": "trace-id", + "token_count": 0, "content": 'There are **6** letters in the word "Python".\n', "role": "model", "completion_id": None, @@ -186,7 +191,8 @@ def test_gemini_text_generation_sync_no_content(gemini_dev_client, set_trace_inf @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(text_generation_recorded_events)) +# Ensure LLM callback is invoked and response token counts are overridden +@validate_custom_events(add_token_counts_to_chat_events(text_generation_recorded_events)) @validate_custom_event_count(count=3) @validate_transaction_metrics( name="test_text_generation:test_gemini_text_generation_sync_with_token_count", @@ -327,7 +333,7 @@ def test_gemini_text_generation_async_no_content(gemini_dev_client, loop, set_tr @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(text_generation_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(text_generation_recorded_events)) @validate_custom_event_count(count=3) @validate_transaction_metrics( name="test_text_generation:test_gemini_text_generation_async_with_token_count", diff --git a/tests/mlmodel_gemini/test_text_generation_error.py b/tests/mlmodel_gemini/test_text_generation_error.py index eb8aec950f..37f5b06467 100644 --- a/tests/mlmodel_gemini/test_text_generation_error.py +++ b/tests/mlmodel_gemini/test_text_generation_error.py @@ -17,13 +17,11 @@ import google.genai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -65,6 +63,7 @@ "trace_id": "trace-id", "content": "How many letters are in the word Python?", "role": "user", + "token_count": 0, "completion_id": None, "sequence": 0, "vendor": "gemini", @@ -171,6 +170,7 @@ def _test(): "trace_id": "trace-id", "content": "Model does not exist.", "role": "user", + "token_count": 0, "completion_id": None, "response.model": "does-not-exist", "sequence": 0, @@ -183,39 +183,6 @@ def _test(): @dt_enabled @reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(google.genai.errors.ClientError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={ - "error.message": "models/does-not-exist is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods." - } -) -@validate_transaction_metrics( - "test_text_generation_error:test_text_generation_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/Gemini/generate_content", 1)], - rollup_metrics=[("Llm/completion/Gemini/generate_content", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_text_generation_invalid_request_error_invalid_model_with_token_count(gemini_dev_client, set_trace_info): - with pytest.raises(google.genai.errors.ClientError): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - gemini_dev_client.models.generate_content( - model="does-not-exist", - contents=["Model does not exist."], - config=google.genai.types.GenerateContentConfig(max_output_tokens=100, temperature=0.7), - ) - - -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) @validate_error_trace_attributes( callable_name(google.genai.errors.ClientError), exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, @@ -231,7 +198,7 @@ def test_text_generation_invalid_request_error_invalid_model_with_token_count(ge rollup_metrics=[("Llm/completion/Gemini/generate_content", 1)], background_task=True, ) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) +@validate_custom_events(expected_events_on_invalid_model_error) @validate_custom_event_count(count=2) @background_task() def test_text_generation_invalid_request_error_invalid_model_chat(gemini_dev_client, set_trace_info): @@ -272,6 +239,7 @@ def test_text_generation_invalid_request_error_invalid_model_chat(gemini_dev_cli "trace_id": "trace-id", "content": "Invalid API key.", "role": "user", + "token_count": 0, "response.model": "gemini-flash-2.0", "completion_id": None, "sequence": 0, @@ -383,43 +351,6 @@ def _test(): @dt_enabled @reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(google.genai.errors.ClientError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={ - "error.message": "models/does-not-exist is not found for API version v1beta, or is not supported for generateContent. Call ListModels to see the list of available models and their supported methods." - } -) -@validate_transaction_metrics( - "test_text_generation_error:test_text_generation_async_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/Gemini/generate_content", 1)], - rollup_metrics=[("Llm/completion/Gemini/generate_content", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_text_generation_async_invalid_request_error_invalid_model_with_token_count( - gemini_dev_client, loop, set_trace_info -): - with pytest.raises(google.genai.errors.ClientError): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - gemini_dev_client.models.generate_content( - model="does-not-exist", - contents=["Model does not exist."], - config=google.genai.types.GenerateContentConfig(max_output_tokens=100, temperature=0.7), - ) - ) - - -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) @validate_error_trace_attributes( callable_name(google.genai.errors.ClientError), exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "NOT_FOUND", "http.statusCode": 404}}, @@ -435,7 +366,7 @@ def test_text_generation_async_invalid_request_error_invalid_model_with_token_co rollup_metrics=[("Llm/completion/Gemini/generate_content", 1)], background_task=True, ) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) +@validate_custom_events(expected_events_on_invalid_model_error) @validate_custom_event_count(count=2) @background_task() def test_text_generation_async_invalid_request_error_invalid_model_chat(gemini_dev_client, loop, set_trace_info): diff --git a/tests/testing_support/ml_testing_utils.py b/tests/testing_support/ml_testing_utils.py index 4ff70c7ed4..55dbd08105 100644 --- a/tests/testing_support/ml_testing_utils.py +++ b/tests/testing_support/ml_testing_utils.py @@ -29,6 +29,7 @@ def llm_token_count_callback(model, content): return 105 +# This will be removed once all LLM instrumentations have been converted to use new token count design def add_token_count_to_events(expected_events): events = copy.deepcopy(expected_events) for event in events: @@ -37,6 +38,24 @@ def add_token_count_to_events(expected_events): return events +def add_token_count_to_embedding_events(expected_events): + events = copy.deepcopy(expected_events) + for event in events: + if event[0]["type"] == "LlmEmbedding": + event[1]["response.usage.total_tokens"] = 105 + return events + + +def add_token_counts_to_chat_events(expected_events): + events = copy.deepcopy(expected_events) + for event in events: + if event[0]["type"] == "LlmChatCompletionSummary": + event[1]["response.usage.prompt_tokens"] = 105 + event[1]["response.usage.completion_tokens"] = 105 + event[1]["response.usage.total_tokens"] = 210 + return events + + def events_sans_content(event): new_event = copy.deepcopy(event) for _event in new_event: From f4b9faaa688c981b4495959454c960cef17012ae Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Tue, 7 Oct 2025 13:14:56 -0700 Subject: [PATCH 28/34] Add response token count logic to OpenAI instrumentation. (#1498) * Add OpenAI token counts. * Add token counts to langchain + openai tests. * Remove unused expected events. * Linting * Add OpenAI token counts. * Add token counts to langchain + openai tests. * Remove unused expected events. * [MegaLinter] Apply linters fixes --------- Co-authored-by: Tim Pansino --- newrelic/hooks/mlmodel_openai.py | 87 ++++++++--- tests/mlmodel_langchain/test_chain.py | 8 + tests/mlmodel_openai/test_chat_completion.py | 12 +- .../test_chat_completion_error.py | 71 +-------- .../test_chat_completion_error_v1.py | 142 +----------------- .../test_chat_completion_stream.py | 101 ++++++++++++- .../test_chat_completion_stream_error.py | 75 +-------- .../test_chat_completion_stream_error_v1.py | 80 +--------- .../test_chat_completion_stream_v1.py | 11 +- .../mlmodel_openai/test_chat_completion_v1.py | 12 +- tests/mlmodel_openai/test_embeddings.py | 7 +- .../test_embeddings_error_v1.py | 120 +-------------- tests/mlmodel_openai/test_embeddings_v1.py | 7 +- tests/testing_support/ml_testing_utils.py | 8 + 14 files changed, 241 insertions(+), 500 deletions(-) diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py index 59f7060394..26b51e52f9 100644 --- a/newrelic/hooks/mlmodel_openai.py +++ b/newrelic/hooks/mlmodel_openai.py @@ -133,11 +133,11 @@ def create_chat_completion_message_event( span_id, trace_id, response_model, - request_model, response_id, request_id, llm_metadata, output_message_list, + all_token_counts, request_timestamp=None, ): settings = transaction.settings if transaction.settings is not None else global_settings() @@ -158,11 +158,6 @@ def create_chat_completion_message_event( "request_id": request_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(request_model, message_content) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "role": message.get("role"), "completion_id": chat_completion_id, "sequence": index, @@ -171,6 +166,9 @@ def create_chat_completion_message_event( "ingest_source": "Python", } + if all_token_counts: + chat_completion_input_message_dict["token_count"] = 0 + if settings.ai_monitoring.record_content.enabled: chat_completion_input_message_dict["content"] = message_content if request_timestamp: @@ -200,11 +198,6 @@ def create_chat_completion_message_event( "request_id": request_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(response_model, message_content) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "role": message.get("role"), "completion_id": chat_completion_id, "sequence": index, @@ -214,6 +207,9 @@ def create_chat_completion_message_event( "is_response": True, } + if all_token_counts: + chat_completion_output_message_dict["token_count"] = 0 + if settings.ai_monitoring.record_content.enabled: chat_completion_output_message_dict["content"] = message_content if request_timestamp: @@ -289,15 +285,18 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg else getattr(attribute_response, "organization", None) ) + response_total_tokens = attribute_response.get("usage", {}).get("total_tokens") if response else None + + total_tokens = ( + settings.ai_monitoring.llm_token_count_callback(response_model, input_) + if settings.ai_monitoring.llm_token_count_callback and input_ + else response_total_tokens + ) + full_embedding_response_dict = { "id": embedding_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(response_model, input_) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "request.model": kwargs.get("model") or kwargs.get("engine"), "request_id": request_id, "duration": ft.duration * 1000, @@ -322,6 +321,7 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg "response.headers.ratelimitRemainingRequests": check_rate_limit_header( response_headers, "x-ratelimit-remaining-requests", True ), + "response.usage.total_tokens": total_tokens, "vendor": "openai", "ingest_source": "Python", } @@ -492,12 +492,15 @@ def _handle_completion_success( def _record_completion_success( transaction, linking_metadata, completion_id, kwargs, ft, response_headers, response, request_timestamp=None ): + settings = transaction.settings if transaction.settings is not None else global_settings() span_id = linking_metadata.get("span.id") trace_id = linking_metadata.get("trace.id") + try: if response: response_model = response.get("model") response_id = response.get("id") + token_usage = response.get("usage") or {} output_message_list = [] finish_reason = None choices = response.get("choices") or [] @@ -511,6 +514,7 @@ def _record_completion_success( else: response_model = kwargs.get("response.model") response_id = kwargs.get("id") + token_usage = {} output_message_list = [] finish_reason = kwargs.get("finish_reason") if "content" in kwargs: @@ -522,10 +526,44 @@ def _record_completion_success( output_message_list = [] request_model = kwargs.get("model") or kwargs.get("engine") - request_id = response_headers.get("x-request-id") - organization = response_headers.get("openai-organization") or getattr(response, "organization", None) messages = kwargs.get("messages") or [{"content": kwargs.get("prompt"), "role": "user"}] input_message_list = list(messages) + + # Extract token counts from response object + if token_usage: + response_prompt_tokens = token_usage.get("prompt_tokens") + response_completion_tokens = token_usage.get("completion_tokens") + response_total_tokens = token_usage.get("total_tokens") + + else: + response_prompt_tokens = None + response_completion_tokens = None + response_total_tokens = None + + # Calculate token counts by checking if a callback is registered and if we have the necessary content to pass + # to it. If not, then we use the token counts provided in the response object + input_message_content = " ".join([msg.get("content", "") for msg in input_message_list if msg.get("content")]) + prompt_tokens = ( + settings.ai_monitoring.llm_token_count_callback(request_model, input_message_content) + if settings.ai_monitoring.llm_token_count_callback and input_message_content + else response_prompt_tokens + ) + output_message_content = " ".join([msg.get("content", "") for msg in output_message_list if msg.get("content")]) + completion_tokens = ( + settings.ai_monitoring.llm_token_count_callback(response_model, output_message_content) + if settings.ai_monitoring.llm_token_count_callback and output_message_content + else response_completion_tokens + ) + + total_tokens = ( + prompt_tokens + completion_tokens if all([prompt_tokens, completion_tokens]) else response_total_tokens + ) + + all_token_counts = bool(prompt_tokens and completion_tokens and total_tokens) + + request_id = response_headers.get("x-request-id") + organization = response_headers.get("openai-organization") or getattr(response, "organization", None) + full_chat_completion_summary_dict = { "id": completion_id, "span_id": span_id, @@ -571,6 +609,12 @@ def _record_completion_success( "response.number_of_messages": len(input_message_list) + len(output_message_list), "timestamp": request_timestamp, } + + if all_token_counts: + full_chat_completion_summary_dict["response.usage.prompt_tokens"] = prompt_tokens + full_chat_completion_summary_dict["response.usage.completion_tokens"] = completion_tokens + full_chat_completion_summary_dict["response.usage.total_tokens"] = total_tokens + llm_metadata = _get_llm_attributes(transaction) full_chat_completion_summary_dict.update(llm_metadata) transaction.record_custom_event("LlmChatCompletionSummary", full_chat_completion_summary_dict) @@ -582,11 +626,11 @@ def _record_completion_success( span_id, trace_id, response_model, - request_model, response_id, request_id, llm_metadata, output_message_list, + all_token_counts, request_timestamp, ) except Exception: @@ -598,6 +642,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg trace_id = linking_metadata.get("trace.id") request_message_list = kwargs.get("messages", None) or [] notice_error_attributes = {} + try: if OPENAI_V1: response = getattr(exc, "response", None) @@ -663,6 +708,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg output_message_list = [] if "content" in kwargs: output_message_list = [{"content": kwargs.get("content"), "role": kwargs.get("role")}] + create_chat_completion_message_event( transaction, request_message_list, @@ -670,11 +716,12 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg span_id, trace_id, kwargs.get("response.model"), - request_model, response_id, request_id, llm_metadata, output_message_list, + # We do not record token counts in error cases, so set all_token_counts to True so the pipeline tokenizer does not run + True, request_timestamp, ) except Exception: diff --git a/tests/mlmodel_langchain/test_chain.py b/tests/mlmodel_langchain/test_chain.py index 2f52f85504..d859b41c84 100644 --- a/tests/mlmodel_langchain/test_chain.py +++ b/tests/mlmodel_langchain/test_chain.py @@ -397,6 +397,7 @@ "response.headers.ratelimitResetRequests": "20ms", "response.headers.ratelimitRemainingTokens": 999992, "response.headers.ratelimitRemainingRequests": 2999, + "response.usage.total_tokens": 8, "vendor": "openai", "ingest_source": "Python", "input": "[[3923, 374, 220, 17, 489, 220, 19, 30]]", @@ -420,6 +421,7 @@ "response.headers.ratelimitResetRequests": "20ms", "response.headers.ratelimitRemainingTokens": 999998, "response.headers.ratelimitRemainingRequests": 2999, + "response.usage.total_tokens": 1, "vendor": "openai", "ingest_source": "Python", "input": "[[10590]]", @@ -493,6 +495,9 @@ "response.headers.ratelimitResetRequests": "8.64s", "response.headers.ratelimitRemainingTokens": 199912, "response.headers.ratelimitRemainingRequests": 9999, + "response.usage.prompt_tokens": 73, + "response.usage.completion_tokens": 375, + "response.usage.total_tokens": 448, "response.number_of_messages": 3, }, ], @@ -509,6 +514,7 @@ "sequence": 0, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", + "token_count": 0, "ingest_source": "Python", "content": "You are a generator of quiz questions for a seminar. Use the following pieces of retrieved context to generate 5 multiple choice questions (A,B,C,D) on the subject matter. Use a three sentence maximum and keep the answer concise. Render the output as HTML\n\nWhat is 2 + 4?", }, @@ -526,6 +532,7 @@ "sequence": 1, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", + "token_count": 0, "ingest_source": "Python", "content": "math", }, @@ -543,6 +550,7 @@ "sequence": 2, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", + "token_count": 0, "ingest_source": "Python", "is_response": True, "content": "```html\n\n\n\n Math Quiz\n\n\n

Math Quiz Questions

\n
    \n
  1. What is the result of 5 + 3?
  2. \n
      \n
    • A) 7
    • \n
    • B) 8
    • \n
    • C) 9
    • \n
    • D) 10
    • \n
    \n
  3. What is the product of 6 x 7?
  4. \n
      \n
    • A) 36
    • \n
    • B) 42
    • \n
    • C) 48
    • \n
    • D) 56
    • \n
    \n
  5. What is the square root of 64?
  6. \n
      \n
    • A) 6
    • \n
    • B) 7
    • \n
    • C) 8
    • \n
    • D) 9
    • \n
    \n
  7. What is the result of 12 / 4?
  8. \n
      \n
    • A) 2
    • \n
    • B) 3
    • \n
    • C) 4
    • \n
    • D) 5
    • \n
    \n
  9. What is the sum of 15 + 9?
  10. \n
      \n
    • A) 22
    • \n
    • B) 23
    • \n
    • C) 24
    • \n
    • D) 25
    • \n
    \n
\n\n\n```", diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py index 89208ab268..9bb57e48b5 100644 --- a/tests/mlmodel_openai/test_chat_completion.py +++ b/tests/mlmodel_openai/test_chat_completion.py @@ -15,7 +15,7 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -56,6 +56,9 @@ "response.organization": "new-relic-nkmd8b", "request.temperature": 0.7, "request.max_tokens": 100, + "response.usage.completion_tokens": 11, + "response.usage.total_tokens": 64, + "response.usage.prompt_tokens": 53, "response.choices.finish_reason": "stop", "response.headers.llmVersion": "2020-10-01", "response.headers.ratelimitLimitRequests": 200, @@ -83,6 +86,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "gpt-3.5-turbo-0613", "vendor": "openai", "ingest_source": "Python", @@ -102,6 +106,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "gpt-3.5-turbo-0613", "vendor": "openai", "ingest_source": "Python", @@ -121,6 +126,7 @@ "role": "assistant", "completion_id": None, "sequence": 2, + "token_count": 0, "response.model": "gpt-3.5-turbo-0613", "vendor": "openai", "is_response": True, @@ -176,7 +182,7 @@ def test_openai_chat_completion_sync_no_content(set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -347,7 +353,7 @@ def test_openai_chat_completion_async_no_content(loop, set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( diff --git a/tests/mlmodel_openai/test_chat_completion_error.py b/tests/mlmodel_openai/test_chat_completion_error.py index 79cc79d6db..042cdef31a 100644 --- a/tests/mlmodel_openai/test_chat_completion_error.py +++ b/tests/mlmodel_openai/test_chat_completion_error.py @@ -15,13 +15,11 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -70,6 +68,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -86,6 +85,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -191,6 +191,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -198,36 +199,6 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - - openai.ChatCompletion.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - - # Invalid model provided @dt_enabled @reset_core_stats_engine() @@ -288,6 +259,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -304,6 +276,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -370,6 +343,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -481,37 +455,6 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], - rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(loop, set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - openai.ChatCompletion.acreate( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - ) - - # Invalid model provided @dt_enabled @reset_core_stats_engine() diff --git a/tests/mlmodel_openai/test_chat_completion_error_v1.py b/tests/mlmodel_openai/test_chat_completion_error_v1.py index 848ad57add..0f9b05c562 100644 --- a/tests/mlmodel_openai/test_chat_completion_error_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_error_v1.py @@ -14,13 +14,11 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -69,6 +67,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -85,6 +84,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -234,6 +234,7 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -271,37 +272,6 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, syn ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - sync_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -334,41 +304,6 @@ def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_tra ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async( - loop, set_trace_info, async_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - async_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - ) - - expected_events_on_wrong_api_key_error = [ ( {"type": "LlmChatCompletionSummary"}, @@ -398,6 +333,7 @@ def test_chat_completion_invalid_request_error_invalid_model_with_token_count_as "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -617,39 +553,6 @@ def test_chat_completion_invalid_request_error_invalid_model_with_raw_response(s ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_with_raw_response", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_with_raw_response( - set_trace_info, sync_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - sync_openai_client.chat.completions.with_raw_response.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -684,41 +587,6 @@ def test_chat_completion_invalid_request_error_invalid_model_async_with_raw_resp ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async_with_raw_response", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async_with_raw_response( - loop, set_trace_info, async_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - async_openai_client.chat.completions.with_raw_response.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( diff --git a/tests/mlmodel_openai/test_chat_completion_stream.py b/tests/mlmodel_openai/test_chat_completion_stream.py index 55e8e8fbdb..ae62b88c4b 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream.py +++ b/tests/mlmodel_openai/test_chat_completion_stream.py @@ -15,7 +15,8 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -188,9 +189,101 @@ def test_openai_chat_completion_sync_no_content(set_trace_info): assert resp +chat_completion_recorded_token_events = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "span_id": None, + "trace_id": "trace-id", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "duration": None, # Response time varies each test run + "request.model": "gpt-3.5-turbo", + "response.model": "gpt-3.5-turbo-0613", + "response.organization": "new-relic-nkmd8b", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.choices.finish_reason": "stop", + "response.headers.llmVersion": "2020-10-01", + "response.headers.ratelimitLimitRequests": 200, + "response.headers.ratelimitLimitTokens": 40000, + "response.headers.ratelimitResetTokens": "90ms", + "response.headers.ratelimitResetRequests": "7m12s", + "response.headers.ratelimitRemainingTokens": 39940, + "response.headers.ratelimitRemainingRequests": 199, + "vendor": "openai", + "ingest_source": "Python", + "response.number_of_messages": 3, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-0", + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "content": "You are a scientist.", + "role": "system", + "completion_id": None, + "sequence": 0, + "token_count": 0, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openai", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-1", + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "content": "What is 212 degrees Fahrenheit converted to Celsius?", + "role": "user", + "completion_id": None, + "sequence": 1, + "token_count": 0, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openai", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-2", + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "content": "212 degrees Fahrenheit is equal to 100 degrees Celsius.", + "role": "assistant", + "completion_id": None, + "sequence": 2, + "token_count": 0, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openai", + "is_response": True, + "ingest_source": "Python", + }, + ), +] + + @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -382,7 +475,9 @@ async def consumer(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) @validate_custom_event_count(count=4) @validate_transaction_metrics( name="test_chat_completion_stream:test_openai_chat_completion_async_with_token_count", diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error.py b/tests/mlmodel_openai/test_chat_completion_stream_error.py index 0fb0d06867..2b01813d9f 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_error.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_error.py @@ -15,13 +15,11 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -70,6 +68,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -86,6 +85,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -196,6 +196,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -203,38 +204,6 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - generator = openai.ChatCompletion.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - stream=True, - ) - for resp in generator: - assert resp - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -297,6 +266,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -313,6 +283,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -384,6 +355,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -498,38 +470,6 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], - rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(loop, set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - openai.ChatCompletion.acreate( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - stream=True, - ) - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -661,6 +601,7 @@ def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py b/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py index 5d06dc2a28..987991d9f8 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py @@ -12,16 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. - import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -70,6 +67,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -86,6 +84,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -248,6 +247,7 @@ async def consumer(): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -286,77 +286,6 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, syn assert resp -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_stream_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - - generator = sync_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - stream=True, - ) - for resp in generator: - assert resp - - -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_stream_error_v1:test_chat_completion_invalid_request_error_invalid_model_async_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_async_with_token_count( - loop, set_trace_info, async_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - - async def consumer(): - generator = await async_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - stream=True, - ) - async for resp in generator: - assert resp - - loop.run_until_complete(consumer()) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -421,6 +350,7 @@ async def consumer(): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, diff --git a/tests/mlmodel_openai/test_chat_completion_stream_v1.py b/tests/mlmodel_openai/test_chat_completion_stream_v1.py index 6fc5d58f28..2fb0c4950a 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_v1.py @@ -17,7 +17,8 @@ from conftest import get_openai_version from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -304,7 +305,9 @@ def test_openai_chat_completion_sync_no_content(set_trace_info, sync_openai_clie @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -626,7 +629,9 @@ async def consumer(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) # One summary event, one system message, one user message, and one response message from the assistant # @validate_custom_event_count(count=4) @validate_transaction_metrics( diff --git a/tests/mlmodel_openai/test_chat_completion_v1.py b/tests/mlmodel_openai/test_chat_completion_v1.py index 5a6793d955..495bf5de93 100644 --- a/tests/mlmodel_openai/test_chat_completion_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_v1.py @@ -15,7 +15,7 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -55,6 +55,9 @@ "response.organization": "new-relic-nkmd8b", "request.temperature": 0.7, "request.max_tokens": 100, + "response.usage.completion_tokens": 75, + "response.usage.total_tokens": 101, + "response.usage.prompt_tokens": 26, "response.choices.finish_reason": "stop", "response.headers.llmVersion": "2020-10-01", "response.headers.ratelimitLimitRequests": 10000, @@ -82,6 +85,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", "ingest_source": "Python", @@ -101,6 +105,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", "ingest_source": "Python", @@ -120,6 +125,7 @@ "role": "assistant", "completion_id": None, "sequence": 2, + "token_count": 0, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", "is_response": True, @@ -197,7 +203,7 @@ def test_openai_chat_completion_sync_no_content(set_trace_info, sync_openai_clie @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -393,7 +399,7 @@ def test_openai_chat_completion_async_with_llm_metadata_no_content(loop, set_tra @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( diff --git a/tests/mlmodel_openai/test_embeddings.py b/tests/mlmodel_openai/test_embeddings.py index c3c3e7c429..935db04fe0 100644 --- a/tests/mlmodel_openai/test_embeddings.py +++ b/tests/mlmodel_openai/test_embeddings.py @@ -19,7 +19,7 @@ validate_attributes, ) from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -55,6 +55,7 @@ "response.headers.ratelimitResetRequests": "19m45.394s", "response.headers.ratelimitRemainingTokens": 149994, "response.headers.ratelimitRemainingRequests": 197, + "response.usage.total_tokens": 6, "vendor": "openai", "ingest_source": "Python", }, @@ -107,7 +108,7 @@ def test_openai_embedding_sync_no_content(set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings:test_openai_embedding_sync_with_token_count", @@ -191,7 +192,7 @@ def test_openai_embedding_async_no_content(loop, set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings:test_openai_embedding_async_with_token_count", diff --git a/tests/mlmodel_openai/test_embeddings_error_v1.py b/tests/mlmodel_openai/test_embeddings_error_v1.py index fd29236122..499f96893b 100644 --- a/tests/mlmodel_openai/test_embeddings_error_v1.py +++ b/tests/mlmodel_openai/test_embeddings_error_v1.py @@ -16,12 +16,10 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -149,32 +147,6 @@ def test_embeddings_invalid_request_error_no_model_async(set_trace_info, async_o ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client): - set_trace_info() - with pytest.raises(openai.NotFoundError): - sync_openai_client.embeddings.create(input="Model does not exist.", model="does-not-exist") - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -255,36 +227,6 @@ def test_embeddings_invalid_request_error_invalid_model_async_no_content(set_tra ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_async_with_token_count", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_async_with_token_count( - set_trace_info, async_openai_client, loop -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - loop.run_until_complete( - async_openai_client.embeddings.create(input="Model does not exist.", model="does-not-exist") - ) - - embedding_invalid_key_error_events = [ ( {"type": "LlmEmbedding"}, @@ -449,34 +391,6 @@ def test_embeddings_invalid_request_error_no_model_async_with_raw_response(set_t ) # no model provided -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_with_token_count_with_raw_response", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count_with_raw_response( - set_trace_info, sync_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - sync_openai_client.embeddings.with_raw_response.create(input="Model does not exist.", model="does-not-exist") - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -566,38 +480,6 @@ def test_embeddings_invalid_request_error_invalid_model_async_no_content_with_ra ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_async_with_token_count_with_raw_response", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_async_with_token_count_with_raw_response( - set_trace_info, async_openai_client, loop -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - loop.run_until_complete( - async_openai_client.embeddings.with_raw_response.create( - input="Model does not exist.", model="does-not-exist" - ) - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( diff --git a/tests/mlmodel_openai/test_embeddings_v1.py b/tests/mlmodel_openai/test_embeddings_v1.py index 405a2a9e5f..3801d3639c 100644 --- a/tests/mlmodel_openai/test_embeddings_v1.py +++ b/tests/mlmodel_openai/test_embeddings_v1.py @@ -15,7 +15,7 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -48,6 +48,7 @@ "response.headers.ratelimitResetRequests": "20ms", "response.headers.ratelimitRemainingTokens": 999994, "response.headers.ratelimitRemainingRequests": 2999, + "response.usage.total_tokens": 6, "vendor": "openai", "ingest_source": "Python", }, @@ -111,7 +112,7 @@ def test_openai_embedding_sync_no_content(set_trace_info, sync_openai_client): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings_v1:test_openai_embedding_sync_with_token_count", @@ -206,7 +207,7 @@ def test_openai_embedding_async_no_content(loop, set_trace_info, async_openai_cl @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings_v1:test_openai_embedding_async_with_token_count", diff --git a/tests/testing_support/ml_testing_utils.py b/tests/testing_support/ml_testing_utils.py index 55dbd08105..8c2c0444f0 100644 --- a/tests/testing_support/ml_testing_utils.py +++ b/tests/testing_support/ml_testing_utils.py @@ -46,6 +46,14 @@ def add_token_count_to_embedding_events(expected_events): return events +def add_token_count_streaming_events(expected_events): + events = copy.deepcopy(expected_events) + for event in events: + if event[0]["type"] == "LlmChatCompletionMessage": + event[1]["token_count"] = 0 + return events + + def add_token_counts_to_chat_events(expected_events): events = copy.deepcopy(expected_events) for event in events: From 1b08062f92722818b6bd6d8288ccbd477e67911f Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Tue, 7 Oct 2025 10:57:04 -0700 Subject: [PATCH 29/34] Add response token count logic to Gemini instrumentation. (#1486) * Add response token count logic to Gemini instrumentation. * Update token counting util functions. * Linting * Add response token count logic to Gemini instrumentation. * Update token counting util functions. * [MegaLinter] Apply linters fixes * Bump tests. --------- Co-authored-by: Tim Pansino --- newrelic/hooks/mlmodel_gemini.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/newrelic/hooks/mlmodel_gemini.py b/newrelic/hooks/mlmodel_gemini.py index d7585e0b60..bc6f8b2340 100644 --- a/newrelic/hooks/mlmodel_gemini.py +++ b/newrelic/hooks/mlmodel_gemini.py @@ -364,7 +364,7 @@ def _record_generation_error(transaction, linking_metadata, completion_id, kwarg llm_metadata, output_message_list, # We do not record token counts in error cases, so set all_token_counts to True so the pipeline tokenizer does not run - True, + True, request_timestamp, ) except Exception: From 055f4f3be74ba7bc989e5ccde79639ba41929450 Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Tue, 7 Oct 2025 13:14:56 -0700 Subject: [PATCH 30/34] Add response token count logic to OpenAI instrumentation. (#1498) * Add OpenAI token counts. * Add token counts to langchain + openai tests. * Remove unused expected events. * Linting * Add OpenAI token counts. * Add token counts to langchain + openai tests. * Remove unused expected events. * [MegaLinter] Apply linters fixes --------- Co-authored-by: Tim Pansino --- newrelic/hooks/mlmodel_openai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py index 26b51e52f9..4dcdda8c11 100644 --- a/newrelic/hooks/mlmodel_openai.py +++ b/newrelic/hooks/mlmodel_openai.py @@ -649,7 +649,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg response_headers = getattr(response, "headers", None) or {} exc_organization = response_headers.get("openai-organization") # There appears to be a bug here in openai v1 where despite having code, - # param, etc in the error response, they are not populated on the exception + # param, etc. in the error response, they are not populated on the exception # object so grab them from the response body object instead. body = getattr(exc, "body", None) or {} notice_error_attributes = { From 509a4d70cad9b0b94c4c6b020e2f3cd997fa2127 Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Thu, 13 Nov 2025 14:53:41 -0800 Subject: [PATCH 31/34] Add response token count logic to Bedrock instrumentation. (#1504) * Add bedrock token counting. * [MegaLinter] Apply linters fixes * Add bedrock token counting. * Add safeguards when grabbing token counts. * Remove extra None defaults. * Cleanup default None checks. --------- Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> --- newrelic/hooks/external_botocore.py | 255 ++++++++++++++---- .../test_bedrock_chat_completion_converse.py | 47 +--- ...st_bedrock_chat_completion_invoke_model.py | 102 +------ .../test_bedrock_embeddings.py | 43 +-- .../_test_bedrock_chat_completion_converse.py | 6 + ...st_bedrock_chat_completion_invoke_model.py | 30 +++ .../_test_bedrock_embeddings.py | 2 + .../test_bedrock_chat_completion_converse.py | 47 +--- ...st_bedrock_chat_completion_invoke_model.py | 150 ++++------- .../test_bedrock_embeddings.py | 43 +-- tests/mlmodel_openai/test_embeddings_error.py | 57 +--- 11 files changed, 310 insertions(+), 472 deletions(-) diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py index 12dd4153f9..86fa65e20f 100644 --- a/newrelic/hooks/external_botocore.py +++ b/newrelic/hooks/external_botocore.py @@ -193,6 +193,7 @@ def create_chat_completion_message_event( request_model, request_id, llm_metadata_dict, + all_token_counts, response_id=None, request_timestamp=None, ): @@ -226,6 +227,8 @@ def create_chat_completion_message_event( "vendor": "bedrock", "ingest_source": "Python", } + if all_token_counts: + chat_completion_message_dict["token_count"] = 0 if settings.ai_monitoring.record_content.enabled: chat_completion_message_dict["content"] = content @@ -267,6 +270,8 @@ def create_chat_completion_message_event( "ingest_source": "Python", "is_response": True, } + if all_token_counts: + chat_completion_message_dict["token_count"] = 0 if settings.ai_monitoring.record_content.enabled: chat_completion_message_dict["content"] = content @@ -278,24 +283,21 @@ def create_chat_completion_message_event( transaction.record_custom_event("LlmChatCompletionMessage", chat_completion_message_dict) -def extract_bedrock_titan_text_model_request(request_body, bedrock_attrs): +def extract_bedrock_titan_embedding_model_request(request_body, bedrock_attrs): request_body = json.loads(request_body) - request_config = request_body.get("textGenerationConfig", {}) - input_message_list = [{"role": "user", "content": request_body.get("inputText")}] - - bedrock_attrs["input_message_list"] = input_message_list - bedrock_attrs["request.max_tokens"] = request_config.get("maxTokenCount") - bedrock_attrs["request.temperature"] = request_config.get("temperature") + bedrock_attrs["input"] = request_body.get("inputText") return bedrock_attrs -def extract_bedrock_mistral_text_model_request(request_body, bedrock_attrs): - request_body = json.loads(request_body) - bedrock_attrs["input_message_list"] = [{"role": "user", "content": request_body.get("prompt")}] - bedrock_attrs["request.max_tokens"] = request_body.get("max_tokens") - bedrock_attrs["request.temperature"] = request_body.get("temperature") +def extract_bedrock_titan_embedding_model_response(response_body, bedrock_attrs): + if response_body: + response_body = json.loads(response_body) + + input_tokens = response_body.get("inputTextTokenCount", 0) + bedrock_attrs["response.usage.total_tokens"] = input_tokens + return bedrock_attrs @@ -303,16 +305,31 @@ def extract_bedrock_titan_text_model_response(response_body, bedrock_attrs): if response_body: response_body = json.loads(response_body) + input_tokens = response_body.get("inputTextTokenCount", 0) + completion_tokens = sum(result.get("tokenCount", 0) for result in response_body.get("results", [])) + total_tokens = input_tokens + completion_tokens + output_message_list = [ - {"role": "assistant", "content": result["outputText"]} for result in response_body.get("results", []) + {"role": "assistant", "content": result.get("outputText")} for result in response_body.get("results", []) ] bedrock_attrs["response.choices.finish_reason"] = response_body["results"][0]["completionReason"] + bedrock_attrs["response.usage.completion_tokens"] = completion_tokens + bedrock_attrs["response.usage.prompt_tokens"] = input_tokens + bedrock_attrs["response.usage.total_tokens"] = total_tokens bedrock_attrs["output_message_list"] = output_message_list return bedrock_attrs +def extract_bedrock_mistral_text_model_request(request_body, bedrock_attrs): + request_body = json.loads(request_body) + bedrock_attrs["input_message_list"] = [{"role": "user", "content": request_body.get("prompt")}] + bedrock_attrs["request.max_tokens"] = request_body.get("max_tokens") + bedrock_attrs["request.temperature"] = request_body.get("temperature") + return bedrock_attrs + + def extract_bedrock_mistral_text_model_response(response_body, bedrock_attrs): if response_body: response_body = json.loads(response_body) @@ -325,17 +342,6 @@ def extract_bedrock_mistral_text_model_response(response_body, bedrock_attrs): return bedrock_attrs -def extract_bedrock_titan_text_model_streaming_response(response_body, bedrock_attrs): - if response_body: - if "outputText" in response_body: - bedrock_attrs["output_message_list"] = messages = bedrock_attrs.get("output_message_list", []) - messages.append({"role": "assistant", "content": response_body["outputText"]}) - - bedrock_attrs["response.choices.finish_reason"] = response_body.get("completionReason", None) - - return bedrock_attrs - - def extract_bedrock_mistral_text_model_streaming_response(response_body, bedrock_attrs): if response_body: outputs = response_body.get("outputs") @@ -344,14 +350,46 @@ def extract_bedrock_mistral_text_model_streaming_response(response_body, bedrock "output_message_list", [{"role": "assistant", "content": ""}] ) bedrock_attrs["output_message_list"][0]["content"] += outputs[0].get("text", "") - bedrock_attrs["response.choices.finish_reason"] = outputs[0].get("stop_reason", None) + bedrock_attrs["response.choices.finish_reason"] = outputs[0].get("stop_reason") return bedrock_attrs -def extract_bedrock_titan_embedding_model_request(request_body, bedrock_attrs): +def extract_bedrock_titan_text_model_request(request_body, bedrock_attrs): request_body = json.loads(request_body) + request_config = request_body.get("textGenerationConfig", {}) - bedrock_attrs["input"] = request_body.get("inputText") + input_message_list = [{"role": "user", "content": request_body.get("inputText")}] + + bedrock_attrs["input_message_list"] = input_message_list + bedrock_attrs["request.max_tokens"] = request_config.get("maxTokenCount") + bedrock_attrs["request.temperature"] = request_config.get("temperature") + + return bedrock_attrs + + +def extract_bedrock_titan_text_model_streaming_response(response_body, bedrock_attrs): + if response_body: + if "outputText" in response_body: + bedrock_attrs["output_message_list"] = messages = bedrock_attrs.get("output_message_list", []) + messages.append({"role": "assistant", "content": response_body["outputText"]}) + + bedrock_attrs["response.choices.finish_reason"] = response_body.get("completionReason") + + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) return bedrock_attrs @@ -421,6 +459,17 @@ def extract_bedrock_claude_model_response(response_body, bedrock_attrs): bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") bedrock_attrs["output_message_list"] = output_message_list + bedrock_attrs[""] = str(response_body.get("id")) + + # Extract token information + token_usage = response_body.get("usage", {}) + if token_usage: + prompt_tokens = token_usage.get("input_tokens", 0) + completion_tokens = token_usage.get("output_tokens", 0) + total_tokens = prompt_tokens + completion_tokens + bedrock_attrs["response.usage.prompt_tokens"] = prompt_tokens + bedrock_attrs["response.usage.completion_tokens"] = completion_tokens + bedrock_attrs["response.usage.total_tokens"] = total_tokens return bedrock_attrs @@ -433,6 +482,22 @@ def extract_bedrock_claude_model_streaming_response(response_body, bedrock_attrs bedrock_attrs["output_message_list"][0]["content"] += content bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) + return bedrock_attrs @@ -453,6 +518,13 @@ def extract_bedrock_llama_model_response(response_body, bedrock_attrs): response_body = json.loads(response_body) output_message_list = [{"role": "assistant", "content": response_body.get("generation")}] + prompt_tokens = response_body.get("prompt_token_count", 0) + completion_tokens = response_body.get("generation_token_count", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = completion_tokens + bedrock_attrs["response.usage.prompt_tokens"] = prompt_tokens + bedrock_attrs["response.usage.total_tokens"] = total_tokens bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") bedrock_attrs["output_message_list"] = output_message_list @@ -466,6 +538,22 @@ def extract_bedrock_llama_model_streaming_response(response_body, bedrock_attrs) bedrock_attrs["output_message_list"] = [{"role": "assistant", "content": ""}] bedrock_attrs["output_message_list"][0]["content"] += content bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") + + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) return bedrock_attrs @@ -506,12 +594,33 @@ def extract_bedrock_cohere_model_streaming_response(response_body, bedrock_attrs bedrock_attrs["response.choices.finish_reason"] = response_body["generations"][0]["finish_reason"] bedrock_attrs["response_id"] = str(response_body.get("id")) + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) + return bedrock_attrs NULL_EXTRACTOR = lambda *args: {} # noqa: E731 # Empty extractor that returns nothing MODEL_EXTRACTORS = [ # Order is important here, avoiding dictionaries - ("amazon.titan-embed", extract_bedrock_titan_embedding_model_request, NULL_EXTRACTOR, NULL_EXTRACTOR), + ( + "amazon.titan-embed", + extract_bedrock_titan_embedding_model_request, + extract_bedrock_titan_embedding_model_response, + NULL_EXTRACTOR, + ), ("cohere.embed", extract_bedrock_cohere_embedding_model_request, NULL_EXTRACTOR, NULL_EXTRACTOR), ( "amazon.titan", @@ -575,8 +684,8 @@ def handle_bedrock_exception( input_message_list = [] bedrock_attrs["input_message_list"] = input_message_list - bedrock_attrs["request.max_tokens"] = kwargs.get("inferenceConfig", {}).get("maxTokens", None) - bedrock_attrs["request.temperature"] = kwargs.get("inferenceConfig", {}).get("temperature", None) + bedrock_attrs["request.max_tokens"] = kwargs.get("inferenceConfig", {}).get("maxTokens") + bedrock_attrs["request.temperature"] = kwargs.get("inferenceConfig", {}).get("temperature") try: request_extractor(request_body, bedrock_attrs) @@ -844,6 +953,7 @@ def _wrap_bedrock_runtime_converse(wrapped, instance, args, kwargs): try: # For aioboto3 clients, this will call make_api_call instrumentation in external_aiobotocore response = wrapped(*args, **kwargs) + except Exception as exc: handle_bedrock_exception( exc, @@ -935,15 +1045,22 @@ def extract_bedrock_converse_attrs(kwargs, response, response_headers, model, sp exc_info=True, ) + response_prompt_tokens = response.get("usage", {}).get("inputTokens") if response else None + response_completion_tokens = response.get("usage", {}).get("outputTokens") if response else None + response_total_tokens = response.get("usage", {}).get("totalTokens") if response else None + bedrock_attrs = { "request_id": response_headers.get("x-amzn-requestid"), "model": model, "span_id": span_id, "trace_id": trace_id, "response.choices.finish_reason": response.get("stopReason"), - "request.max_tokens": kwargs.get("inferenceConfig", {}).get("maxTokens", None), - "request.temperature": kwargs.get("inferenceConfig", {}).get("temperature", None), + "request.max_tokens": kwargs.get("inferenceConfig", {}).get("maxTokens"), + "request.temperature": kwargs.get("inferenceConfig", {}).get("temperature"), "input_message_list": input_message_list, + "response.usage.prompt_tokens": response_prompt_tokens, + "response.usage.completion_tokens": response_completion_tokens, + "response.usage.total_tokens": response_total_tokens, } if output_message_list is not None: @@ -1122,29 +1239,34 @@ def handle_embedding_event(transaction, bedrock_attrs): custom_attrs_dict = transaction._custom_params llm_metadata_dict = {key: value for key, value in custom_attrs_dict.items() if key.startswith("llm.")} - span_id = bedrock_attrs.get("span_id", None) - trace_id = bedrock_attrs.get("trace_id", None) - request_id = bedrock_attrs.get("request_id", None) - model = bedrock_attrs.get("model", None) + span_id = bedrock_attrs.get("span_id") + trace_id = bedrock_attrs.get("trace_id") + request_id = bedrock_attrs.get("request_id") + model = bedrock_attrs.get("model") input_ = bedrock_attrs.get("input") + response_total_tokens = bedrock_attrs.get("response.usage.total_tokens") + + total_tokens = ( + settings.ai_monitoring.llm_token_count_callback(model, input_) + if settings.ai_monitoring.llm_token_count_callback and input_ + else response_total_tokens + ) + embedding_dict = { "vendor": "bedrock", "ingest_source": "Python", "id": embedding_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(model, input_) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "request_id": request_id, - "duration": bedrock_attrs.get("duration", None), + "duration": bedrock_attrs.get("duration"), "request.model": model, "response.model": model, - "error": bedrock_attrs.get("error", None), + "response.usage.total_tokens": total_tokens, + "error": bedrock_attrs.get("error"), } + embedding_dict.update(llm_metadata_dict) if settings.ai_monitoring.record_content.enabled: @@ -1155,6 +1277,7 @@ def handle_embedding_event(transaction, bedrock_attrs): def handle_chat_completion_event(transaction, bedrock_attrs, request_timestamp=None): + settings = transaction.settings or global_settings() chat_completion_id = str(uuid.uuid4()) # Grab LLM-related custom attributes off of the transaction to store as metadata on LLM events custom_attrs_dict = transaction._custom_params @@ -1163,11 +1286,15 @@ def handle_chat_completion_event(transaction, bedrock_attrs, request_timestamp=N llm_context_attrs = getattr(transaction, "_llm_context_attrs", None) if llm_context_attrs: llm_metadata_dict.update(llm_context_attrs) - span_id = bedrock_attrs.get("span_id", None) - trace_id = bedrock_attrs.get("trace_id", None) - request_id = bedrock_attrs.get("request_id", None) - response_id = bedrock_attrs.get("response_id", None) - model = bedrock_attrs.get("model", None) + span_id = bedrock_attrs.get("span_id") + trace_id = bedrock_attrs.get("trace_id") + request_id = bedrock_attrs.get("request_id") + response_id = bedrock_attrs.get("response_id") + model = bedrock_attrs.get("model") + + response_prompt_tokens = bedrock_attrs.get("response.usage.prompt_tokens") + response_completion_tokens = bedrock_attrs.get("response.usage.completion_tokens") + response_total_tokens = bedrock_attrs.get("response.usage.total_tokens") input_message_list = bedrock_attrs.get("input_message_list", []) output_message_list = bedrock_attrs.get("output_message_list", []) @@ -1182,6 +1309,25 @@ def handle_chat_completion_event(transaction, bedrock_attrs, request_timestamp=N len(input_message_list) + len(output_message_list) ) or None # If 0, attribute will be set to None and removed + input_message_content = " ".join([msg.get("content") for msg in input_message_list if msg.get("content")]) + prompt_tokens = ( + settings.ai_monitoring.llm_token_count_callback(model, input_message_content) + if settings.ai_monitoring.llm_token_count_callback and input_message_content + else response_prompt_tokens + ) + + output_message_content = " ".join([msg.get("content") for msg in output_message_list if msg.get("content")]) + completion_tokens = ( + settings.ai_monitoring.llm_token_count_callback(model, output_message_content) + if settings.ai_monitoring.llm_token_count_callback and output_message_content + else response_completion_tokens + ) + total_tokens = ( + prompt_tokens + completion_tokens if all([prompt_tokens, completion_tokens]) else response_total_tokens + ) + + all_token_counts = bool(prompt_tokens and completion_tokens and total_tokens) + chat_completion_summary_dict = { "vendor": "bedrock", "ingest_source": "Python", @@ -1190,9 +1336,9 @@ def handle_chat_completion_event(transaction, bedrock_attrs, request_timestamp=N "trace_id": trace_id, "request_id": request_id, "response_id": response_id, - "duration": bedrock_attrs.get("duration", None), - "request.max_tokens": bedrock_attrs.get("request.max_tokens", None), - "request.temperature": bedrock_attrs.get("request.temperature", None), + "duration": bedrock_attrs.get("duration"), + "request.max_tokens": bedrock_attrs.get("request.max_tokens"), + "request.temperature": bedrock_attrs.get("request.temperature"), "request.model": model, "response.model": model, # Duplicate data required by the UI "response.number_of_messages": number_of_messages, @@ -1200,6 +1346,12 @@ def handle_chat_completion_event(transaction, bedrock_attrs, request_timestamp=N "error": bedrock_attrs.get("error", None), "timestamp": request_timestamp or None, } + + if all_token_counts: + chat_completion_summary_dict["response.usage.prompt_tokens"] = prompt_tokens + chat_completion_summary_dict["response.usage.completion_tokens"] = completion_tokens + chat_completion_summary_dict["response.usage.total_tokens"] = total_tokens + chat_completion_summary_dict.update(llm_metadata_dict) chat_completion_summary_dict = {k: v for k, v in chat_completion_summary_dict.items() if v is not None} transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict) @@ -1214,6 +1366,7 @@ def handle_chat_completion_event(transaction, bedrock_attrs, request_timestamp=N request_model=model, request_id=request_id, llm_metadata_dict=llm_metadata_dict, + all_token_counts=all_token_counts, response_id=response_id, request_timestamp=request_timestamp, ) diff --git a/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py b/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py index 55843b832c..f115fc3d90 100644 --- a/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py +++ b/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py @@ -23,7 +23,7 @@ ) from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -147,7 +147,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_metric, expected_events): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_counts_to_chat_events(chat_completion_expected_events)) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -278,49 +278,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token_count( - exercise_converse_incorrect_access_key, set_trace_info, expected_metric -): - """ - A request is made to the server with invalid credentials. botocore will reach out to the server and receive an - UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer - events. The error response can also be parsed, and will be included as attributes on the recorded exception. - """ - - @validate_custom_events(add_token_count_to_events(chat_completion_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion_incorrect_access_key_with_token_count", - scoped_metrics=[expected_metric], - rollup_metrics=[expected_metric], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion_incorrect_access_key_with_token_count") - def _test(): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_converse_incorrect_access_key() - - _test() - - @pytest.fixture def exercise_converse_invalid_model(loop, bedrock_converse_server, response_streaming, monkeypatch): def _exercise_converse_invalid_model(): diff --git a/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py b/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py index 207db7e31e..40c21c35ee 100644 --- a/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py +++ b/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py @@ -34,7 +34,8 @@ ) from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -207,7 +208,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_events, expected_metrics): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_counts_to_chat_events(add_token_count_streaming_events(expected_events))) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=3) @validate_transaction_metrics( @@ -456,51 +457,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token( - monkeypatch, - bedrock_server, - exercise_model, - set_trace_info, - expected_invalid_access_key_error_events, - expected_metrics, -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=expected_metrics, - rollup_metrics=expected_metrics, - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100) - - _test() - - def invoke_model_malformed_request_body(loop, bedrock_server, response_streaming): async def _coro(): with pytest.raises(_client_error): @@ -799,58 +755,6 @@ async def _test(): loop.run_until_complete(_test()) -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_expected_streaming_error_events)) -@validate_custom_event_count(count=2) -@validate_error_trace_attributes( - _event_stream_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "error.message": "Malformed input request, please reformat your input and try again.", - "error.code": "ValidationException", - }, - }, - forgone_params={"agent": (), "intrinsic": (), "user": ("http.statusCode")}, -) -@validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - rollup_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, -) -@background_task(name="test_bedrock_chat_completion") -def test_bedrock_chat_completion_error_streaming_exception_with_token_count(loop, bedrock_server, set_trace_info): - """ - Duplicate of test_bedrock_chat_completion_error_streaming_exception, but with token callback being set. - - See the original test for a description of the error case. - """ - - async def _test(): - with pytest.raises(_event_stream_error): - model = "amazon.titan-text-express-v1" - body = (chat_completion_payload_templates[model] % ("Streaming Exception", 0.7, 100)).encode("utf-8") - - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - response = await bedrock_server.invoke_model_with_response_stream( - body=body, modelId=model, accept="application/json", contentType="application/json" - ) - - body = response.get("body") - async for resp in body: - assert resp - - loop.run_until_complete(_test()) - - def test_bedrock_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility(bedrock_server): assert bedrock_server._nr_wrapped diff --git a/tests/external_aiobotocore/test_bedrock_embeddings.py b/tests/external_aiobotocore/test_bedrock_embeddings.py index b964122294..1f9359934b 100644 --- a/tests/external_aiobotocore/test_bedrock_embeddings.py +++ b/tests/external_aiobotocore/test_bedrock_embeddings.py @@ -28,7 +28,7 @@ ) from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -165,7 +165,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_embedding_with_token_count(set_trace_info, exercise_model, expected_events): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_count_to_embedding_events(expected_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_bedrock_embedding", @@ -290,45 +290,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_embedding_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_server, exercise_model, set_trace_info, expected_invalid_access_key_error_events -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_embedding", - scoped_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - rollup_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_embedding") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token") - - _test() - - @reset_core_stats_engine() @validate_custom_events(embedding_expected_malformed_request_body_events) @validate_custom_event_count(count=1) diff --git a/tests/external_botocore/_test_bedrock_chat_completion_converse.py b/tests/external_botocore/_test_bedrock_chat_completion_converse.py index 7cde46faf8..a3501ef27d 100644 --- a/tests/external_botocore/_test_bedrock_chat_completion_converse.py +++ b/tests/external_botocore/_test_bedrock_chat_completion_converse.py @@ -29,6 +29,9 @@ "duration": None, # Response time varies each test run "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "response.usage.prompt_tokens": 26, + "response.usage.completion_tokens": 100, + "response.usage.total_tokens": 126, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "max_tokens", @@ -51,6 +54,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -70,6 +74,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -89,6 +94,7 @@ "role": "assistant", "completion_id": None, "sequence": 2, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", diff --git a/tests/external_botocore/_test_bedrock_chat_completion_invoke_model.py b/tests/external_botocore/_test_bedrock_chat_completion_invoke_model.py index f72b9fa583..63db603f78 100644 --- a/tests/external_botocore/_test_bedrock_chat_completion_invoke_model.py +++ b/tests/external_botocore/_test_bedrock_chat_completion_invoke_model.py @@ -102,6 +102,9 @@ "duration": None, # Response time varies each test run "request.model": "amazon.titan-text-express-v1", "response.model": "amazon.titan-text-express-v1", + "response.usage.completion_tokens": 32, + "response.usage.total_tokens": 44, + "response.usage.prompt_tokens": 12, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "FINISH", @@ -124,6 +127,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -143,6 +147,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -414,6 +419,9 @@ "duration": None, # Response time varies each test run "request.model": "meta.llama2-13b-chat-v1", "response.model": "meta.llama2-13b-chat-v1", + "response.usage.prompt_tokens": 17, + "response.usage.completion_tokens": 69, + "response.usage.total_tokens": 86, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "stop", @@ -436,6 +444,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -455,6 +464,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1147,6 +1157,9 @@ "duration": None, # Response time varies each test run "request.model": "amazon.titan-text-express-v1", "response.model": "amazon.titan-text-express-v1", + "response.usage.completion_tokens": 35, + "response.usage.total_tokens": 47, + "response.usage.prompt_tokens": 12, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "FINISH", @@ -1169,6 +1182,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1188,6 +1202,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1209,6 +1224,9 @@ "duration": None, # Response time varies each test run "request.model": "anthropic.claude-instant-v1", "response.model": "anthropic.claude-instant-v1", + "response.usage.completion_tokens": 99, + "response.usage.prompt_tokens": 19, + "response.usage.total_tokens": 118, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "stop_sequence", @@ -1231,6 +1249,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "anthropic.claude-instant-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1250,6 +1269,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "anthropic.claude-instant-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1333,6 +1353,9 @@ "duration": None, # Response time varies each test run "request.model": "cohere.command-text-v14", "response.model": "cohere.command-text-v14", + "response.usage.completion_tokens": 91, + "response.usage.total_tokens": 100, + "response.usage.prompt_tokens": 9, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "COMPLETE", @@ -1355,6 +1378,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "cohere.command-text-v14", "vendor": "bedrock", "ingest_source": "Python", @@ -1374,6 +1398,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "cohere.command-text-v14", "vendor": "bedrock", "ingest_source": "Python", @@ -1395,6 +1420,9 @@ "duration": None, # Response time varies each test run "request.model": "meta.llama2-13b-chat-v1", "response.model": "meta.llama2-13b-chat-v1", + "response.usage.prompt_tokens": 17, + "response.usage.completion_tokens": 100, + "response.usage.total_tokens": 117, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "length", @@ -1417,6 +1445,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1436,6 +1465,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", diff --git a/tests/external_botocore/_test_bedrock_embeddings.py b/tests/external_botocore/_test_bedrock_embeddings.py index f5c227b9c3..af544af001 100644 --- a/tests/external_botocore/_test_bedrock_embeddings.py +++ b/tests/external_botocore/_test_bedrock_embeddings.py @@ -33,6 +33,7 @@ "response.model": "amazon.titan-embed-text-v1", "request.model": "amazon.titan-embed-text-v1", "request_id": "11233989-07e8-4ecb-9ba6-79601ba6d8cc", + "response.usage.total_tokens": 6, "vendor": "bedrock", "ingest_source": "Python", }, @@ -52,6 +53,7 @@ "response.model": "amazon.titan-embed-g1-text-02", "request.model": "amazon.titan-embed-g1-text-02", "request_id": "b10ac895-eae3-4f07-b926-10b2866c55ed", + "response.usage.total_tokens": 6, "vendor": "bedrock", "ingest_source": "Python", }, diff --git a/tests/external_botocore/test_bedrock_chat_completion_converse.py b/tests/external_botocore/test_bedrock_chat_completion_converse.py index e365b5163b..ca6cfd9d7b 100644 --- a/tests/external_botocore/test_bedrock_chat_completion_converse.py +++ b/tests/external_botocore/test_bedrock_chat_completion_converse.py @@ -23,7 +23,7 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -141,7 +141,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_metric, expected_events): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_counts_to_chat_events(chat_completion_expected_events)) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -265,49 +265,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token_count( - exercise_converse_incorrect_access_key, set_trace_info, expected_metric -): - """ - A request is made to the server with invalid credentials. botocore will reach out to the server and receive an - UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer - events. The error response can also be parsed, and will be included as attributes on the recorded exception. - """ - - @validate_custom_events(add_token_count_to_events(chat_completion_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion_incorrect_access_key_with_token_count", - scoped_metrics=[expected_metric], - rollup_metrics=[expected_metric], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion_incorrect_access_key_with_token_count") - def _test(): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_converse_incorrect_access_key() - - _test() - - @pytest.fixture def exercise_converse_invalid_model(bedrock_converse_server, response_streaming, monkeypatch): def _exercise_converse_invalid_model(): diff --git a/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py b/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py index 9acb0e8ed2..ac72e458fb 100644 --- a/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py +++ b/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import json import os from io import BytesIO @@ -35,7 +36,8 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -129,6 +131,14 @@ def expected_events(model_id, response_streaming): return chat_completion_expected_events[model_id] +@pytest.fixture(scope="module") +def expected_events(model_id, response_streaming): + if response_streaming: + return chat_completion_streaming_expected_events[model_id] + else: + return chat_completion_expected_events[model_id] + + @pytest.fixture(scope="module") def expected_metrics(response_streaming): if response_streaming: @@ -200,7 +210,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_events, expected_metrics): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_counts_to_chat_events(add_token_count_streaming_events(expected_events))) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=3) @validate_transaction_metrics( @@ -438,49 +448,50 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token( - monkeypatch, - bedrock_server, - exercise_model, - set_trace_info, - expected_invalid_access_key_error_events, - expected_metrics, -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=expected_metrics, - rollup_metrics=expected_metrics, - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100) - - _test() +# +# @reset_core_stats_engine() +# @override_llm_token_callback_settings(llm_token_count_callback) +# def test_bedrock_chat_completion_error_incorrect_access_key_with_token( +# monkeypatch, +# bedrock_server, +# exercise_model, +# set_trace_info, +# expected_invalid_access_key_error_events, +# expected_metrics, +# ): +# @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) +# @validate_error_trace_attributes( +# _client_error_name, +# exact_attrs={ +# "agent": {}, +# "intrinsic": {}, +# "user": { +# "http.statusCode": 403, +# "error.message": "The security token included in the request is invalid.", +# "error.code": "UnrecognizedClientException", +# }, +# }, +# ) +# @validate_transaction_metrics( +# name="test_bedrock_chat_completion", +# scoped_metrics=expected_metrics, +# rollup_metrics=expected_metrics, +# custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], +# background_task=True, +# ) +# @background_task(name="test_bedrock_chat_completion") +# def _test(): +# monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") +# +# with pytest.raises(_client_error): # not sure where this exception actually comes from +# set_trace_info() +# add_custom_attribute("llm.conversation_id", "my-awesome-id") +# add_custom_attribute("llm.foo", "bar") +# add_custom_attribute("non_llm_attr", "python-agent") +# +# exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100) +# +# _test() @reset_core_stats_engine() @@ -762,55 +773,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_streaming_exception_with_token_count(bedrock_server, set_trace_info): - """ - Duplicate of test_bedrock_chat_completion_error_streaming_exception, but with token callback being set. - - See the original test for a description of the error case. - """ - - @validate_custom_events(add_token_count_to_events(chat_completion_expected_streaming_error_events)) - @validate_custom_event_count(count=2) - @validate_error_trace_attributes( - _event_stream_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "error.message": "Malformed input request, please reformat your input and try again.", - "error.code": "ValidationException", - }, - }, - forgone_params={"agent": (), "intrinsic": (), "user": ("http.statusCode")}, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - rollup_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion") - def _test(): - with pytest.raises(_event_stream_error): - model = "amazon.titan-text-express-v1" - body = (chat_completion_payload_templates[model] % ("Streaming Exception", 0.7, 100)).encode("utf-8") - - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - response = bedrock_server.invoke_model_with_response_stream( - body=body, modelId=model, accept="application/json", contentType="application/json" - ) - list(response["body"]) # Iterate - - _test() - - def test_bedrock_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility(bedrock_server): assert bedrock_server._nr_wrapped diff --git a/tests/external_botocore/test_bedrock_embeddings.py b/tests/external_botocore/test_bedrock_embeddings.py index 36a5db6619..f28308354a 100644 --- a/tests/external_botocore/test_bedrock_embeddings.py +++ b/tests/external_botocore/test_bedrock_embeddings.py @@ -29,7 +29,7 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -162,7 +162,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_embedding_with_token_count(set_trace_info, exercise_model, expected_events): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_count_to_embedding_events(expected_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_bedrock_embedding", @@ -287,45 +287,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_embedding_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_server, exercise_model, set_trace_info, expected_invalid_access_key_error_events -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_embedding", - scoped_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - rollup_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_embedding") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token") - - _test() - - @reset_core_stats_engine() def test_bedrock_embedding_error_malformed_request_body(bedrock_server, set_trace_info): """ diff --git a/tests/mlmodel_openai/test_embeddings_error.py b/tests/mlmodel_openai/test_embeddings_error.py index a8e46bf23a..f80e6ff41d 100644 --- a/tests/mlmodel_openai/test_embeddings_error.py +++ b/tests/mlmodel_openai/test_embeddings_error.py @@ -14,12 +14,10 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -128,35 +126,6 @@ def test_embeddings_invalid_request_error_no_model_no_content(set_trace_info): ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={ - "error.message": "The model `does-not-exist` does not exist" - # "http.statusCode": 404, - } -) -@validate_transaction_metrics( - name="test_embeddings_error:test_embeddings_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count(set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - openai.Embedding.create(input="Model does not exist.", model="does-not-exist") - - # Invalid model provided @dt_enabled @reset_core_stats_engine() @@ -348,30 +317,6 @@ def test_embeddings_invalid_request_error_no_model_async_no_content(loop, set_tr ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - name="test_embeddings_error:test_embeddings_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/embedding/OpenAI/acreate", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/acreate", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count_async(set_trace_info, loop): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - loop.run_until_complete(openai.Embedding.acreate(input="Model does not exist.", model="does-not-exist")) - - # Invalid model provided @dt_enabled @reset_core_stats_engine() From bf80674de84467aa4ad2e9b2710efe8759d8e933 Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Wed, 10 Dec 2025 19:02:25 -0800 Subject: [PATCH 32/34] Reconcile changes from main and token counting branch. --- .../test_bedrock_chat_completion_converse.py | 6 +++++- .../_test_bedrock_chat_completion_invoke_model.py | 5 +++++ .../test_bedrock_chat_completion_converse.py | 9 +++++++-- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py b/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py index f115fc3d90..765d21f790 100644 --- a/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py +++ b/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py @@ -24,6 +24,7 @@ from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( add_token_counts_to_chat_events, + add_token_count_streaming_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -147,7 +148,10 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_metric, expected_events): - @validate_custom_events(add_token_counts_to_chat_events(chat_completion_expected_events)) + expected_events = add_token_counts_to_chat_events(expected_events) + if response_streaming: + expected_events = add_token_count_streaming_events(expected_events) + @validate_custom_events(expected_events) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( diff --git a/tests/external_botocore/_test_bedrock_chat_completion_invoke_model.py b/tests/external_botocore/_test_bedrock_chat_completion_invoke_model.py index 63db603f78..e7b1844922 100644 --- a/tests/external_botocore/_test_bedrock_chat_completion_invoke_model.py +++ b/tests/external_botocore/_test_bedrock_chat_completion_invoke_model.py @@ -294,6 +294,9 @@ "duration": None, # Response time varies each test run "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "response.usage.completion_tokens": 31, + "response.usage.prompt_tokens": 21, + "response.usage.total_tokens": 52, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "end_turn", @@ -316,6 +319,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -335,6 +339,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", diff --git a/tests/external_botocore/test_bedrock_chat_completion_converse.py b/tests/external_botocore/test_bedrock_chat_completion_converse.py index ca6cfd9d7b..273a2626f5 100644 --- a/tests/external_botocore/test_bedrock_chat_completion_converse.py +++ b/tests/external_botocore/test_bedrock_chat_completion_converse.py @@ -24,6 +24,7 @@ from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( add_token_counts_to_chat_events, + add_token_count_streaming_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -140,8 +141,12 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_metric, expected_events): - @validate_custom_events(add_token_counts_to_chat_events(chat_completion_expected_events)) +def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_metric, expected_events, response_streaming): + expected_events = add_token_counts_to_chat_events(expected_events) + if response_streaming: + expected_events = add_token_count_streaming_events(expected_events) + + @validate_custom_events(expected_events) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( From 4ea432ae26581cb0e165849156d89ae21caf5e64 Mon Sep 17 00:00:00 2001 From: umaannamalai <19895951+umaannamalai@users.noreply.github.com> Date: Thu, 11 Dec 2025 03:10:37 +0000 Subject: [PATCH 33/34] [MegaLinter] Apply linters fixes --- .../test_bedrock_chat_completion_converse.py | 2 +- .../test_bedrock_chat_completion_converse.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py b/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py index 5fedb34550..516787c2e5 100644 --- a/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py +++ b/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py @@ -23,8 +23,8 @@ ) from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_counts_to_chat_events, add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, diff --git a/tests/external_botocore/test_bedrock_chat_completion_converse.py b/tests/external_botocore/test_bedrock_chat_completion_converse.py index 273a2626f5..b613b6c3a8 100644 --- a/tests/external_botocore/test_bedrock_chat_completion_converse.py +++ b/tests/external_botocore/test_bedrock_chat_completion_converse.py @@ -23,8 +23,8 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_counts_to_chat_events, add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -141,7 +141,9 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_metric, expected_events, response_streaming): +def test_bedrock_chat_completion_with_token_count( + set_trace_info, exercise_model, expected_metric, expected_events, response_streaming +): expected_events = add_token_counts_to_chat_events(expected_events) if response_streaming: expected_events = add_token_count_streaming_events(expected_events) From 4897665750cb5e307c24c6542a91ccb4803d3346 Mon Sep 17 00:00:00 2001 From: Uma Annamalai Date: Wed, 10 Dec 2025 20:04:18 -0800 Subject: [PATCH 34/34] Remove outdated converse testing file. --- .../test_chat_completion_converse.py | 476 ------------------ 1 file changed, 476 deletions(-) delete mode 100644 tests/external_botocore/test_chat_completion_converse.py diff --git a/tests/external_botocore/test_chat_completion_converse.py b/tests/external_botocore/test_chat_completion_converse.py deleted file mode 100644 index 2d38d6b4a4..0000000000 --- a/tests/external_botocore/test_chat_completion_converse.py +++ /dev/null @@ -1,476 +0,0 @@ -# Copyright 2010 New Relic, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import botocore.exceptions -import pytest -from conftest import BOTOCORE_VERSION -from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes -from testing_support.ml_testing_utils import ( - add_token_counts_to_chat_events, - disabled_ai_monitoring_record_content_settings, - disabled_ai_monitoring_settings, - events_sans_content, - events_sans_llm_metadata, - events_with_context_attrs, - llm_token_count_callback, - set_trace_info, -) -from testing_support.validators.validate_custom_event import validate_custom_event_count -from testing_support.validators.validate_custom_events import validate_custom_events -from testing_support.validators.validate_error_trace_attributes import validate_error_trace_attributes -from testing_support.validators.validate_transaction_metrics import validate_transaction_metrics - -from newrelic.api.background_task import background_task -from newrelic.api.llm_custom_attributes import WithLlmCustomAttributes -from newrelic.api.transaction import add_custom_attribute -from newrelic.common.object_names import callable_name - -chat_completion_expected_events = [ - ( - {"type": "LlmChatCompletionSummary"}, - { - "id": None, # UUID that varies with each run - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "span_id": None, - "trace_id": "trace-id", - "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", - "duration": None, # Response time varies each test run - "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "response.usage.prompt_tokens": 26, - "response.usage.completion_tokens": 100, - "response.usage.total_tokens": 126, - "request.temperature": 0.7, - "request.max_tokens": 100, - "response.choices.finish_reason": "max_tokens", - "vendor": "bedrock", - "ingest_source": "Python", - "response.number_of_messages": 3, - }, - ), - ( - {"type": "LlmChatCompletionMessage"}, - { - "id": None, # UUID that varies with each run - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", - "span_id": None, - "trace_id": "trace-id", - "content": "You are a scientist.", - "role": "system", - "completion_id": None, - "sequence": 0, - "token_count": 0, - "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "vendor": "bedrock", - "ingest_source": "Python", - }, - ), - ( - {"type": "LlmChatCompletionMessage"}, - { - "id": None, # UUID that varies with each run - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", - "span_id": None, - "trace_id": "trace-id", - "content": "What is 212 degrees Fahrenheit converted to Celsius?", - "role": "user", - "completion_id": None, - "sequence": 1, - "token_count": 0, - "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "vendor": "bedrock", - "ingest_source": "Python", - }, - ), - ( - {"type": "LlmChatCompletionMessage"}, - { - "id": None, # UUID that varies with each run - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", - "span_id": None, - "trace_id": "trace-id", - "content": "To convert 212°F to Celsius, we can use the formula:\n\nC = (F - 32) × 5/9\n\nWhere:\nC is the temperature in Celsius\nF is the temperature in Fahrenheit\n\nPlugging in 212°F, we get:\n\nC = (212 - 32) × 5/9\nC = 180 × 5/9\nC = 100\n\nTherefore, 212°", # noqa: RUF001 - "role": "assistant", - "completion_id": None, - "sequence": 2, - "token_count": 0, - "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "vendor": "bedrock", - "ingest_source": "Python", - "is_response": True, - }, - ), -] - - -@pytest.fixture(scope="module") -def exercise_model(bedrock_converse_server): - def _exercise_model(message): - inference_config = {"temperature": 0.7, "maxTokens": 100} - - response = bedrock_converse_server.converse( - modelId="anthropic.claude-3-sonnet-20240229-v1:0", - messages=message, - system=[{"text": "You are a scientist."}], - inferenceConfig=inference_config, - ) - - return _exercise_model - - -@reset_core_stats_engine() -def test_bedrock_chat_completion_in_txn_with_llm_metadata(set_trace_info, exercise_model): - @validate_custom_events(events_with_context_attrs(chat_completion_expected_events)) - # One summary event, one user message, and one response message from the assistant - @validate_custom_event_count(count=4) - @validate_transaction_metrics( - name="test_bedrock_chat_completion_in_txn_with_llm_metadata", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @validate_attributes("agent", ["llm"]) - @background_task(name="test_bedrock_chat_completion_in_txn_with_llm_metadata") - def _test(): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - with WithLlmCustomAttributes({"context": "attr"}): - message = [{"role": "user", "content": [{"text": "What is 212 degrees Fahrenheit converted to Celsius?"}]}] - exercise_model(message) - - _test() - - -@disabled_ai_monitoring_record_content_settings -@reset_core_stats_engine() -def test_bedrock_chat_completion_no_content(set_trace_info, exercise_model): - @validate_custom_events(events_sans_content(chat_completion_expected_events)) - # One summary event, one user message, and one response message from the assistant - @validate_custom_event_count(count=4) - @validate_transaction_metrics( - name="test_bedrock_chat_completion_no_content", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @validate_attributes("agent", ["llm"]) - @background_task(name="test_bedrock_chat_completion_no_content") - def _test(): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - message = [{"role": "user", "content": [{"text": "What is 212 degrees Fahrenheit converted to Celsius?"}]}] - exercise_model(message) - - _test() - - -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model): - @validate_custom_events(add_token_counts_to_chat_events(chat_completion_expected_events)) - # One summary event, one user message, and one response message from the assistant - @validate_custom_event_count(count=4) - @validate_transaction_metrics( - name="test_bedrock_chat_completion_with_token_count", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @validate_attributes("agent", ["llm"]) - @background_task(name="test_bedrock_chat_completion_with_token_count") - def _test(): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - message = [{"role": "user", "content": [{"text": "What is 212 degrees Fahrenheit converted to Celsius?"}]}] - exercise_model(message) - - _test() - - -@reset_core_stats_engine() -def test_bedrock_chat_completion_no_llm_metadata(set_trace_info, exercise_model): - @validate_custom_events(events_sans_llm_metadata(chat_completion_expected_events)) - @validate_custom_event_count(count=4) - @validate_transaction_metrics( - name="test_bedrock_chat_completion_in_txn_no_llm_metadata", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion_in_txn_no_llm_metadata") - def _test(): - set_trace_info() - message = [{"role": "user", "content": [{"text": "What is 212 degrees Fahrenheit converted to Celsius?"}]}] - exercise_model(message) - - _test() - - -@reset_core_stats_engine() -@validate_custom_event_count(count=0) -def test_bedrock_chat_completion_outside_txn(exercise_model): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - message = [{"role": "user", "content": [{"text": "What is 212 degrees Fahrenheit converted to Celsius?"}]}] - exercise_model(message) - - -@disabled_ai_monitoring_settings -@reset_core_stats_engine() -@validate_custom_event_count(count=0) -@background_task(name="test_bedrock_chat_completion_disabled_ai_monitoring_settings") -def test_bedrock_chat_completion_disabled_ai_monitoring_settings(set_trace_info, exercise_model): - set_trace_info() - message = [{"role": "user", "content": [{"text": "What is 212 degrees Fahrenheit converted to Celsius?"}]}] - exercise_model(message) - - -chat_completion_invalid_access_key_error_events = [ - ( - {"type": "LlmChatCompletionSummary"}, - { - "id": None, # UUID that varies with each run - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "span_id": None, - "trace_id": "trace-id", - "request_id": "e1206e19-2318-4a9d-be98-017c73f06118", - "duration": None, # Response time varies each test run - "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "request.temperature": 0.7, - "request.max_tokens": 100, - "vendor": "bedrock", - "ingest_source": "Python", - "response.number_of_messages": 1, - "error": True, - }, - ), - ( - {"type": "LlmChatCompletionMessage"}, - { - "id": None, # UUID that varies with each run - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "request_id": "e1206e19-2318-4a9d-be98-017c73f06118", - "span_id": None, - "trace_id": "trace-id", - "content": "Invalid Token", - "role": "user", - "completion_id": None, - "sequence": 0, - "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", - "vendor": "bedrock", - "ingest_source": "Python", - }, - ), -] - -_client_error = botocore.exceptions.ClientError -_client_error_name = callable_name(_client_error) - - -@reset_core_stats_engine() -def test_bedrock_chat_completion_error_incorrect_access_key( - monkeypatch, bedrock_converse_server, exercise_model, set_trace_info -): - """ - A request is made to the server with invalid credentials. botocore will reach out to the server and receive an - UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer - events. The error response can also be parsed, and will be included as attributes on the recorded exception. - """ - - @validate_custom_events(chat_completion_invalid_access_key_error_events) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion") - def _test(): - monkeypatch.setattr(bedrock_converse_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - message = [{"role": "user", "content": [{"text": "Invalid Token"}]}] - - response = bedrock_converse_server.converse( - modelId="anthropic.claude-3-sonnet-20240229-v1:0", - messages=message, - inferenceConfig={"temperature": 0.7, "maxTokens": 100}, - ) - - assert response - - _test() - - -chat_completion_invalid_model_error_events = [ - ( - {"type": "LlmChatCompletionSummary"}, - { - "id": None, # UUID that varies with each run - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "request_id": "f4908827-3db9-4742-9103-2bbc34578b03", - "span_id": None, - "trace_id": "trace-id", - "duration": None, # Response time varies each test run - "request.model": "does-not-exist", - "response.model": "does-not-exist", - "request.temperature": 0.7, - "request.max_tokens": 100, - "response.number_of_messages": 1, - "vendor": "bedrock", - "ingest_source": "Python", - "error": True, - }, - ), - ( - {"type": "LlmChatCompletionMessage"}, - { - "id": None, - "llm.conversation_id": "my-awesome-id", - "llm.foo": "bar", - "span_id": None, - "trace_id": "trace-id", - "request_id": "f4908827-3db9-4742-9103-2bbc34578b03", - "content": "Model does not exist.", - "role": "user", - "completion_id": None, - "response.model": "does-not-exist", - "sequence": 0, - "vendor": "bedrock", - "ingest_source": "Python", - }, - ), -] - - -@reset_core_stats_engine() -def test_bedrock_chat_completion_error_invalid_model(bedrock_converse_server, set_trace_info): - @validate_custom_events(events_with_context_attrs(chat_completion_invalid_model_error_events)) - @validate_error_trace_attributes( - "botocore.errorfactory:ValidationException", - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 400, - "error.message": "The provided model identifier is invalid.", - "error.code": "ValidationException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion_error_invalid_model", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion_error_invalid_model") - def _test(): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - with pytest.raises(_client_error): - with WithLlmCustomAttributes({"context": "attr"}): - message = [{"role": "user", "content": [{"text": "Model does not exist."}]}] - - response = bedrock_converse_server.converse( - modelId="does-not-exist", messages=message, inferenceConfig={"temperature": 0.7, "maxTokens": 100} - ) - - assert response - - _test() - - -@reset_core_stats_engine() -@disabled_ai_monitoring_record_content_settings -def test_bedrock_chat_completion_error_invalid_model_no_content(bedrock_converse_server, set_trace_info): - @validate_custom_events(events_sans_content(chat_completion_invalid_model_error_events)) - @validate_error_trace_attributes( - "botocore.errorfactory:ValidationException", - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 400, - "error.message": "The provided model identifier is invalid.", - "error.code": "ValidationException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion_error_invalid_model_no_content", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion_error_invalid_model_no_content") - def _test(): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - with pytest.raises(_client_error): - message = [{"role": "user", "content": [{"text": "Model does not exist."}]}] - - response = bedrock_converse_server.converse( - modelId="does-not-exist", messages=message, inferenceConfig={"temperature": 0.7, "maxTokens": 100} - ) - - assert response - - _test()