Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,30 @@
# Centralizing here avoids magic strings sprinkled through schema/content generation code.
WRAPPER_KEY = "item"

# Keys that must remain at the top level (outside the wrapper) when we
# normalize flat JSONL rows into the canonical `item` structure.
_RESERVED_ROOT_KEYS: Set[str] = {"sample"}


def _normalize_row_for_item_wrapper(row: Dict[str, Any]) -> Dict[str, Any]:
"""Ensure every row exposes an `item` object without losing reserved keys."""

wrapper = row.get(WRAPPER_KEY)
if isinstance(wrapper, dict):
return row

normalized: Dict[str, Any] = {}
item_payload: Dict[str, Any] = {}

for key, value in row.items():
if key in _RESERVED_ROOT_KEYS:
normalized[key] = value
elif key != WRAPPER_KEY:
item_payload[key] = value

normalized[WRAPPER_KEY] = item_payload
return normalized


class OAIEvalRunCreationInfo(TypedDict, total=True):
"""Configuration for an evaluator"""
Expand Down Expand Up @@ -146,7 +170,6 @@ def _begin_single_aoai_evaluation(
that maps the user-supplied evaluators to the names of the graders as generated by the OAI service.
:rtype: Tuple[str, str, Dict[str, str]]
"""

# Format data for eval group creation
LOGGER.info(f"AOAI: Preparing evaluation for {len(graders)} grader(s): {list(graders.keys())}")
grader_name_list = []
Expand Down Expand Up @@ -637,7 +660,6 @@ def to_schema(node: Dict[str, Any]) -> Dict[str, Any]:
required = []
for name, child in children.items():
props[name] = to_schema(child)
required.append(name)
return {
"type": "object",
"properties": props,
Expand Down Expand Up @@ -785,25 +807,37 @@ def _get_data_source(input_data_df: pd.DataFrame, column_mapping: Dict[str, str]
:rtype: Dict[str, Any]
"""

def _convert_value_to_string(val: Any) -> str:
"""Convert a value to string representation for AOAI evaluation."""
def _convert_value(val: Any) -> Any:
"""Convert to AOAI-friendly representation while preserving structure when useful."""
if val is None:
return ""
elif isinstance(val, (str, int, float, bool)):
if isinstance(val, str):
return val
if isinstance(val, bool):
return val
# Align numerics with legacy text-only JSONL payloads by turning them into strings.
if isinstance(val, (int, float)):
return str(val)
else:
try: # Attempt to JSON serialize lists/dicts
return json.dumps(val, ensure_ascii=False)
except (TypeError, ValueError):
# Fallback for unserializable objects
return str(val)
if isinstance(val, (dict, list)):
return val
return str(val)

def _get_value_from_path(normalized_row: Dict[str, Any], path: str) -> Any:
cursor: Any = normalized_row
for segment in path.split("."):
if not isinstance(cursor, dict):
return None
cursor = cursor.get(segment)
if cursor is None:
return None
return cursor

LOGGER.info(
f"AOAI: Building data source from {len(input_data_df)} rows with {len(column_mapping)} column mappings..."
)
# Gather path specs: list of tuples (original_mapping_value, relative_parts, dataframe_column_name)
# relative_parts excludes the wrapper (so schema + content align).
path_specs: List[Tuple[str, List[str], str]] = []
path_specs: List[Dict[str, Any]] = []

for name, formatted_entry in column_mapping.items():
if not (
Expand Down Expand Up @@ -842,30 +876,53 @@ def _convert_value_to_string(val: Any) -> str:
if not relative_parts:
continue

path_specs.append((formatted_entry, relative_parts, dataframe_col))
path_specs.append(
{
"source_path": source_path,
"relative_parts": relative_parts,
"dataframe_col": dataframe_col,
"is_run_output": False,
}
)

elif pieces[0] == "run" and len(pieces) >= 3 and pieces[1] == "outputs":
# Target / run outputs become __outputs.<rest> columns
run_col = "__outputs." + ".".join(pieces[2:])
leaf_name = pieces[-1]
path_specs.append((formatted_entry, [leaf_name], run_col))
path_specs.append(
{
"source_path": None,
"relative_parts": [leaf_name],
"dataframe_col": run_col,
"is_run_output": True,
}
)

LOGGER.info(f"AOAI: Processed {len(path_specs)} path specifications from column mappings.")
content: List[Dict[str, Any]] = []

for _, row in input_data_df.iterrows():
normalized_row = _normalize_row_for_item_wrapper(row.to_dict())
item_root: Dict[str, Any] = {}

# Track which dataframe columns have been processed via column_mapping
processed_cols: Set[str] = set()
# Track which top-level keys under the wrapper have been populated via mappings
processed_root_keys: Set[str] = set()

for spec in path_specs:
rel_parts = spec["relative_parts"]
if not rel_parts:
continue

if spec["is_run_output"]:
val = row.get(spec["dataframe_col"], None)
else:
source_path = cast(str, spec["source_path"])
val = _get_value_from_path(normalized_row, source_path)
if val is None:
val = row.get(spec["dataframe_col"], None)

for _, rel_parts, df_col in path_specs:
# Safely fetch value
val = row.get(df_col, None)
# Convert value to string to match schema's "type": "string" leaves.
str_val = _convert_value_to_string(val)
norm_val = _convert_value(val)

# Insert into nested dict
cursor = item_root
for seg in rel_parts[:-1]:
nxt = cursor.get(seg)
Expand All @@ -874,19 +931,27 @@ def _convert_value_to_string(val: Any) -> str:
cursor[seg] = nxt
cursor = nxt
leaf_key = rel_parts[-1]
cursor[leaf_key] = str_val
cursor[leaf_key] = norm_val

# Mark this dataframe column as processed
processed_cols.add(df_col)
processed_root_keys.add(rel_parts[0])

# Add any unmapped dataframe columns directly to item_root
for col_name in input_data_df.columns:
if col_name not in processed_cols:
val = row.get(col_name, None)
str_val = _convert_value_to_string(val)
item_root[col_name] = str_val
# Pull through any wrapper entries that were never explicitly mapped
wrapper_view = normalized_row.get(WRAPPER_KEY, {})
if isinstance(wrapper_view, dict):
for key, raw_val in wrapper_view.items():
if key in processed_root_keys:
continue
if key in item_root:
continue
item_root[key] = _convert_value(raw_val)

content.append({WRAPPER_KEY: item_root})
content_row: Dict[str, Any] = {}
for root_key in _RESERVED_ROOT_KEYS:
if root_key in normalized_row:
content_row[root_key] = _convert_value(normalized_row[root_key])

content_row[WRAPPER_KEY] = item_root
content.append(content_row)

LOGGER.info(f"AOAI: Generated {len(content)} content items for data source.")
return {
Expand Down Expand Up @@ -926,7 +991,6 @@ def _begin_eval_run(

LOGGER.info(f"AOAI: Creating eval run '{run_name}' for eval group {eval_group_id}...")
data_source = _get_data_source(input_data_df, column_mapping)

if data_source_params is not None:
data_source.update(data_source_params)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"query":"how can i help someone be a good person","context":"you can teach them stuff","ground_truth":"teach stuff","response":"I can help you with this query. Give me more details","test":{"test_string":"baking cakes is fun!"},"sample.output_text":"someoutput"}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"item":{"query":"what is the weather today","context":"its sunny","ground_truth":"rainy","response":"It is sunny out","test":{"test_string":"baking cakes is a fun pass time when you are bored!"}}}
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,18 @@ def wrapped_flat_test_data_file():
return _get_file("wrapped_flat_test_data.jsonl")


@pytest.fixture
def nested_item_keyword_data():
"""Fixture for data that already contains an 'item' wrapper column."""
return pd.read_json(_get_file("nested_item_keyword.jsonl"), lines=True)


@pytest.fixture
def flat_sample_output_data():
"""Fixture for flat data that includes dotted sample metadata (e.g. sample.output_text)."""
return pd.read_json(_get_file("flat_sample_output.jsonl"), lines=True)


@pytest.mark.unittest
class TestBuildSchemaTreeFromPaths:
"""Test suite for the _build_schema_tree_from_paths helper function."""
Expand All @@ -98,7 +110,6 @@ def test_single_level_paths(self):
assert "required" in schema
assert set(schema["properties"].keys()) == {"query", "response", "ground_truth"}
assert all(prop["type"] == "string" for prop in schema["properties"].values())
assert set(schema["required"]) == {"query", "response", "ground_truth"}

def test_nested_paths(self):
"""Test building schema with nested paths."""
Expand Down Expand Up @@ -395,6 +406,49 @@ def test_data_source_with_none_values(self, flat_test_data):
# None should be converted to empty string
assert content[1][WRAPPER_KEY]["response"] == ""

def test_data_source_with_item_column_and_nested_values(self, nested_item_keyword_data):
"""Ensure rows that already have an 'item' column keep nested dicts intact."""

column_mapping = {
"query": "${data.item.query}",
"response": "${data.item.response}",
"test_string": "${data.item.test.test_string}",
}

data_source = _get_data_source(nested_item_keyword_data, column_mapping)
content = data_source["source"]["content"]

assert len(content) == len(nested_item_keyword_data)
first_row = content[0]
assert WRAPPER_KEY in first_row

item_payload = first_row[WRAPPER_KEY]
assert item_payload["query"] == "what is the weather today"
assert item_payload["response"] == "It is sunny out"
assert item_payload["test"]["test_string"] == ("baking cakes is a fun pass time when you are bored!")
# Ensure we did not accidentally nest another 'item' key inside the wrapper
assert "item" not in item_payload

def test_data_source_with_sample_output_metadata(self, flat_sample_output_data):
"""Ensure flat rows that include dotted sample metadata remain accessible."""

column_mapping = {
"query": "${data.item.query}",
"response": "${data.item.response}",
"test_string": "${data.item.test.test_string}",
}

data_source = _get_data_source(flat_sample_output_data, column_mapping)
content = data_source["source"]["content"]

assert len(content) == len(flat_sample_output_data)
row = content[0][WRAPPER_KEY]

assert row["query"] == "how can i help someone be a good person"
assert row["test"]["test_string"] == "baking cakes is fun!"
# sample.output_text should follow the row through normalization without being stringified
assert row["sample.output_text"] == "someoutput"

def test_data_source_with_numeric_values(self, flat_test_data):
"""Test data source generation converts numeric values to strings."""
flat_test_data["score"] = [95, 87, 92]
Expand Down