Skip to content

Commit 1b97fed

Browse files
authored
Sample code for score model grader with image (#44239)
1 parent baba115 commit 1b97fed

File tree

2 files changed

+206
-0
lines changed

2 files changed

+206
-0
lines changed
43.5 KB
Loading
Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
# pylint: disable=line-too-long,useless-suppression
2+
# ------------------------------------
3+
# Copyright (c) Microsoft Corporation.
4+
# Licensed under the MIT License.
5+
# ------------------------------------
6+
7+
"""
8+
DESCRIPTION:
9+
Given an AIProjectClient, this sample demonstrates how to use the synchronous
10+
`openai.evals.*` methods to create, get and list evaluation and eval runs.
11+
12+
USAGE:
13+
python sample_evaluations_score_model_grader_with_image.py
14+
15+
Before running the sample:
16+
17+
pip install "azure-ai-projects>=2.0.0b2" azure-identity python-dotenv Pillow
18+
19+
Set these environment variables with your own values:
20+
1) AZURE_AI_PROJECT_ENDPOINT - Required. The Azure AI Project endpoint, as found in the overview page of your
21+
Microsoft Foundry project. It has the form: https://<account_name>.services.ai.azure.com/api/projects/<project_name>.
22+
2) AZURE_AI_MODEL_DEPLOYMENT_NAME - Required. The name of the model deployment to use for evaluation.
23+
"""
24+
25+
import os
26+
import base64
27+
from PIL import Image
28+
from io import BytesIO
29+
30+
from azure.identity import DefaultAzureCredential
31+
from azure.ai.projects import AIProjectClient
32+
import time
33+
from pprint import pprint
34+
from openai.types.evals.create_eval_completions_run_data_source_param import (
35+
CreateEvalCompletionsRunDataSourceParam,
36+
SourceFileContent,
37+
SourceFileContentContent,
38+
InputMessagesTemplate,
39+
InputMessagesTemplateTemplateEvalItem,
40+
InputMessagesTemplateTemplateEvalItemContentInputImage,
41+
)
42+
from openai.types.responses import EasyInputMessageParam
43+
from openai.types.eval_create_params import DataSourceConfigCustom
44+
from dotenv import load_dotenv
45+
46+
47+
load_dotenv()
48+
file_path = os.path.abspath(__file__)
49+
folder_path = os.path.dirname(file_path)
50+
51+
endpoint = os.environ["AZURE_AI_PROJECT_ENDPOINT"]
52+
model_deployment_name = os.environ.get("AZURE_AI_MODEL_DEPLOYMENT_NAME", "")
53+
54+
55+
def image_to_data_uri(image_path: str) -> str:
56+
with Image.open(image_path) as img:
57+
buffered = BytesIO()
58+
img.save(buffered, format=img.format or 'PNG')
59+
img_str = base64.b64encode(buffered.getvalue()).decode()
60+
mime_type = f"image/{img.format.lower()}" if img.format else "image/png"
61+
return f"data:{mime_type};base64,{img_str}"
62+
63+
64+
with (
65+
DefaultAzureCredential() as credential,
66+
AIProjectClient(endpoint=endpoint, credential=credential) as project_client,
67+
project_client.get_openai_client() as client,
68+
):
69+
70+
data_source_config = DataSourceConfigCustom(
71+
{
72+
"type": "custom",
73+
"item_schema": {
74+
"type": "object",
75+
"properties": {
76+
"image_url": {
77+
"type": "string",
78+
"description": "The URL of the image to be evaluated."
79+
},
80+
"caption": {
81+
"type": "string",
82+
"description": "The caption describing the image."
83+
},
84+
},
85+
"required": [
86+
"image_url",
87+
"caption",
88+
],
89+
},
90+
"include_sample_schema": True,
91+
}
92+
)
93+
94+
testing_criteria = [
95+
{
96+
"type": "score_model",
97+
"name": "score_grader",
98+
"model": model_deployment_name,
99+
"input": [
100+
{
101+
"role": "system",
102+
"content": "You are an expert grader. Judge how well the model response {{sample.output_text}} describes the image as well as matches the caption {{item.caption}}. Output a score of 1 if it's an excellent match with both. If it's somewhat compatible, output a score around 0.5. Otherwise, give a score of 0."
103+
},
104+
{
105+
"role": "user",
106+
"content":
107+
{
108+
"type": "input_image",
109+
"image_url": "{{item.image_url}}",
110+
"detail": "auto",
111+
}
112+
}
113+
],
114+
"range": [
115+
0.0,
116+
1.0
117+
],
118+
"pass_threshold": 0.5,
119+
},
120+
]
121+
122+
print("Creating evaluation")
123+
eval_object = client.evals.create(
124+
name="OpenAI graders test",
125+
data_source_config=data_source_config,
126+
testing_criteria=testing_criteria, # type: ignore
127+
)
128+
print(f"Evaluation created (id: {eval_object.id}, name: {eval_object.name})")
129+
130+
print("Get evaluation by Id")
131+
eval_object_response = client.evals.retrieve(eval_object.id)
132+
print("Evaluation Response:")
133+
pprint(eval_object_response)
134+
135+
image_path = os.path.join(folder_path, "data_folder/sample_evaluations_score_model_grader_with_image.jpg")
136+
source_file_content_content1 = SourceFileContentContent(
137+
item={
138+
"image_url": image_to_data_uri(image_path),
139+
"caption": "industrial plants in the distance at night",
140+
},
141+
)
142+
source_file_content_content2 = SourceFileContentContent(
143+
item={
144+
"image_url": "https://ep1.pinkbike.org/p4pb6973204/p4pb6973204.jpg",
145+
"caption": "all shots by by person and rider shots can be found on his website.",
146+
},
147+
)
148+
source_file_content = SourceFileContent(
149+
type="file_content",
150+
content=[source_file_content_content1, source_file_content_content2],
151+
)
152+
input_messages = InputMessagesTemplate(
153+
type="template",
154+
template=[
155+
EasyInputMessageParam(
156+
role="system",
157+
content="You are an assistant that analyzes images and provides captions that accurately describe the content of the image.",
158+
),
159+
InputMessagesTemplateTemplateEvalItem(
160+
role="user",
161+
type="message",
162+
content=InputMessagesTemplateTemplateEvalItemContentInputImage(
163+
type="input_image",
164+
image_url="{{item.image_url}}",
165+
detail="auto",
166+
)
167+
),
168+
],
169+
)
170+
171+
print("Creating Eval Run")
172+
eval_run_object = client.evals.runs.create(
173+
eval_id=eval_object.id,
174+
name="Eval",
175+
metadata={"team": "eval-exp", "scenario": "notifications-v1"},
176+
data_source=CreateEvalCompletionsRunDataSourceParam(
177+
type="completions",
178+
source=source_file_content,
179+
model=model_deployment_name,
180+
input_messages=input_messages,
181+
sampling_params={
182+
"temperature": 0.8,
183+
}
184+
),
185+
)
186+
print(f"Eval Run created (id: {eval_run_object.id}, name: {eval_run_object.name})")
187+
pprint(eval_run_object)
188+
189+
print("Get Eval Run by Id")
190+
eval_run_response = client.evals.runs.retrieve(run_id=eval_run_object.id, eval_id=eval_object.id)
191+
print("Eval Run Response:")
192+
pprint(eval_run_response)
193+
194+
while True:
195+
run = client.evals.runs.retrieve(run_id=eval_run_response.id, eval_id=eval_object.id)
196+
if run.status == "completed" or run.status == "failed":
197+
output_items = list(client.evals.runs.output_items.list(run_id=run.id, eval_id=eval_object.id))
198+
pprint(output_items)
199+
print(f"Eval Run Report URL: {run.report_url}")
200+
201+
break
202+
time.sleep(5)
203+
print("Waiting for eval run to complete...")
204+
205+
client.evals.delete(eval_id=eval_object.id)
206+
print("Evaluation deleted")

0 commit comments

Comments
 (0)