Azure · ebwinters · Dec 4, 2025 · Dec 8, 2025 · Dec 8, 2025 · Dec 8, 2025
@@ -32,6 +32,30 @@
 # Centralizing here avoids magic strings sprinkled through schema/content generation code.
 WRAPPER_KEY = "item"
 
+# Keys that must remain at the top level (outside the wrapper) when we
+# normalize flat JSONL rows into the canonical `item` structure.
+_RESERVED_ROOT_KEYS: Set[str] = {"sample"}
+
+
+def _normalize_row_for_item_wrapper(row: Dict[str, Any]) -> Dict[str, Any]:
+    """Ensure every row exposes an `item` object without losing reserved keys."""
+
+    wrapper = row.get(WRAPPER_KEY)
+    if isinstance(wrapper, dict):
+        return row
+
+    normalized: Dict[str, Any] = {}
+    item_payload: Dict[str, Any] = {}
+
+    for key, value in row.items():
+        if key in _RESERVED_ROOT_KEYS:
+            normalized[key] = value
+        elif key != WRAPPER_KEY:
+            item_payload[key] = value
+
+    normalized[WRAPPER_KEY] = item_payload
+    return normalized
+
 
 class OAIEvalRunCreationInfo(TypedDict, total=True):
     """Configuration for an evaluator"""
@@ -146,7 +170,6 @@ def _begin_single_aoai_evaluation(
         that maps the user-supplied evaluators to the names of the graders as generated by the OAI service.
     :rtype: Tuple[str, str, Dict[str, str]]
     """
-
     # Format data for eval group creation
     LOGGER.info(f"AOAI: Preparing evaluation for {len(graders)} grader(s): {list(graders.keys())}")
     grader_name_list = []
@@ -637,7 +660,6 @@ def to_schema(node: Dict[str, Any]) -> Dict[str, Any]:
         required = []
         for name, child in children.items():
             props[name] = to_schema(child)
-            required.append(name)
         return {
             "type": "object",
             "properties": props,
@@ -785,25 +807,37 @@ def _get_data_source(input_data_df: pd.DataFrame, column_mapping: Dict[str, str]
     :rtype: Dict[str, Any]
     """
 
-    def _convert_value_to_string(val: Any) -> str:
-        """Convert a value to string representation for AOAI evaluation."""
+    def _convert_value(val: Any) -> Any:
+        """Convert to AOAI-friendly representation while preserving structure when useful."""
         if val is None:
             return ""
-        elif isinstance(val, (str, int, float, bool)):
+        if isinstance(val, str):
+            return val
+        if isinstance(val, bool):
+            return val
+        # Align numerics with legacy text-only JSONL payloads by turning them into strings.
+        if isinstance(val, (int, float)):
             return str(val)
-        else:
-            try:  # Attempt to JSON serialize lists/dicts
-                return json.dumps(val, ensure_ascii=False)
-            except (TypeError, ValueError):
-                # Fallback for unserializable objects
-                return str(val)
+        if isinstance(val, (dict, list)):
+            return val
+        return str(val)
+
+    def _get_value_from_path(normalized_row: Dict[str, Any], path: str) -> Any:
+        cursor: Any = normalized_row
+        for segment in path.split("."):
+            if not isinstance(cursor, dict):
+                return None
+            cursor = cursor.get(segment)
+            if cursor is None:
+                return None
+        return cursor
 
     LOGGER.info(
         f"AOAI: Building data source from {len(input_data_df)} rows with {len(column_mapping)} column mappings..."
     )
     # Gather path specs: list of tuples (original_mapping_value, relative_parts, dataframe_column_name)
     # relative_parts excludes the wrapper (so schema + content align).
-    path_specs: List[Tuple[str, List[str], str]] = []
+    path_specs: List[Dict[str, Any]] = []
 
     for name, formatted_entry in column_mapping.items():
         if not (
@@ -842,30 +876,53 @@ def _convert_value_to_string(val: Any) -> str:
             if not relative_parts:
                 continue
 
-            path_specs.append((formatted_entry, relative_parts, dataframe_col))
+            path_specs.append(
+                {
+                    "source_path": source_path,
+                    "relative_parts": relative_parts,
+                    "dataframe_col": dataframe_col,
+                    "is_run_output": False,
+                }
+            )
 
         elif pieces[0] == "run" and len(pieces) >= 3 and pieces[1] == "outputs":
             # Target / run outputs become __outputs.<rest> columns
             run_col = "__outputs." + ".".join(pieces[2:])
             leaf_name = pieces[-1]
-            path_specs.append((formatted_entry, [leaf_name], run_col))
+            path_specs.append(
+                {
+                    "source_path": None,
+                    "relative_parts": [leaf_name],
+                    "dataframe_col": run_col,
+                    "is_run_output": True,
+                }
+            )
 
     LOGGER.info(f"AOAI: Processed {len(path_specs)} path specifications from column mappings.")
     content: List[Dict[str, Any]] = []
 
     for _, row in input_data_df.iterrows():
+        normalized_row = _normalize_row_for_item_wrapper(row.to_dict())
         item_root: Dict[str, Any] = {}
 
-        # Track which dataframe columns have been processed via column_mapping
-        processed_cols: Set[str] = set()
+        # Track which top-level keys under the wrapper have been populated via mappings
+        processed_root_keys: Set[str] = set()
+
+        for spec in path_specs:
+            rel_parts = spec["relative_parts"]
+            if not rel_parts:
+                continue
+
+            if spec["is_run_output"]:
+                val = row.get(spec["dataframe_col"], None)
+            else:
+                source_path = cast(str, spec["source_path"])
+                val = _get_value_from_path(normalized_row, source_path)
+                if val is None:
+                    val = row.get(spec["dataframe_col"], None)
 
-        for _, rel_parts, df_col in path_specs:
-            # Safely fetch value
-            val = row.get(df_col, None)
-            # Convert value to string to match schema's "type": "string" leaves.
-            str_val = _convert_value_to_string(val)
+            norm_val = _convert_value(val)
 
-            # Insert into nested dict
             cursor = item_root
             for seg in rel_parts[:-1]:
                 nxt = cursor.get(seg)
@@ -874,19 +931,27 @@ def _convert_value_to_string(val: Any) -> str:
                     cursor[seg] = nxt
                 cursor = nxt
             leaf_key = rel_parts[-1]
-            cursor[leaf_key] = str_val
+            cursor[leaf_key] = norm_val
 
-            # Mark this dataframe column as processed
-            processed_cols.add(df_col)
+            processed_root_keys.add(rel_parts[0])
 
-        # Add any unmapped dataframe columns directly to item_root
-        for col_name in input_data_df.columns:
-            if col_name not in processed_cols:
-                val = row.get(col_name, None)
-                str_val = _convert_value_to_string(val)
-                item_root[col_name] = str_val
+        # Pull through any wrapper entries that were never explicitly mapped
+        wrapper_view = normalized_row.get(WRAPPER_KEY, {})
+        if isinstance(wrapper_view, dict):
+            for key, raw_val in wrapper_view.items():
+                if key in processed_root_keys:
+                    continue
+                if key in item_root:
+                    continue
+                item_root[key] = _convert_value(raw_val)
 
-        content.append({WRAPPER_KEY: item_root})
+        content_row: Dict[str, Any] = {}
+        for root_key in _RESERVED_ROOT_KEYS:
+            if root_key in normalized_row:
+                content_row[root_key] = _convert_value(normalized_row[root_key])
+
+        content_row[WRAPPER_KEY] = item_root
+        content.append(content_row)
 
     LOGGER.info(f"AOAI: Generated {len(content)} content items for data source.")
     return {
@@ -926,7 +991,6 @@ def _begin_eval_run(
 
     LOGGER.info(f"AOAI: Creating eval run '{run_name}' for eval group {eval_group_id}...")
     data_source = _get_data_source(input_data_df, column_mapping)
-
     if data_source_params is not None:
         data_source.update(data_source_params)
 

@@ -0,0 +1 @@
+{"query":"how can i help someone be a good person","context":"you can teach them stuff","ground_truth":"teach stuff","response":"I can help you with this query. Give me more details","test":{"test_string":"baking cakes is fun!"},"sample.output_text":"someoutput"}
@@ -0,0 +1 @@
+{"item":{"query":"what is the weather today","context":"its sunny","ground_truth":"rainy","response":"It is sunny out","test":{"test_string":"baking cakes is a fun pass time when you are bored!"}}}
@@ -84,6 +84,18 @@ def wrapped_flat_test_data_file():
     return _get_file("wrapped_flat_test_data.jsonl")
 
 
+@pytest.fixture
+def nested_item_keyword_data():
+    """Fixture for data that already contains an 'item' wrapper column."""
+    return pd.read_json(_get_file("nested_item_keyword.jsonl"), lines=True)
+
+
+@pytest.fixture
+def flat_sample_output_data():
+    """Fixture for flat data that includes dotted sample metadata (e.g. sample.output_text)."""
+    return pd.read_json(_get_file("flat_sample_output.jsonl"), lines=True)
+
+
 @pytest.mark.unittest
 class TestBuildSchemaTreeFromPaths:
     """Test suite for the _build_schema_tree_from_paths helper function."""
@@ -98,7 +110,6 @@ def test_single_level_paths(self):
         assert "required" in schema
         assert set(schema["properties"].keys()) == {"query", "response", "ground_truth"}
         assert all(prop["type"] == "string" for prop in schema["properties"].values())
-        assert set(schema["required"]) == {"query", "response", "ground_truth"}
 
     def test_nested_paths(self):
         """Test building schema with nested paths."""
@@ -395,6 +406,49 @@ def test_data_source_with_none_values(self, flat_test_data):
         # None should be converted to empty string
         assert content[1][WRAPPER_KEY]["response"] == ""
 
+    def test_data_source_with_item_column_and_nested_values(self, nested_item_keyword_data):
+        """Ensure rows that already have an 'item' column keep nested dicts intact."""
+
+        column_mapping = {
+            "query": "${data.item.query}",
+            "response": "${data.item.response}",
+            "test_string": "${data.item.test.test_string}",
+        }
+
+        data_source = _get_data_source(nested_item_keyword_data, column_mapping)
+        content = data_source["source"]["content"]
+
+        assert len(content) == len(nested_item_keyword_data)
+        first_row = content[0]
+        assert WRAPPER_KEY in first_row
+
+        item_payload = first_row[WRAPPER_KEY]
+        assert item_payload["query"] == "what is the weather today"
+        assert item_payload["response"] == "It is sunny out"
+        assert item_payload["test"]["test_string"] == ("baking cakes is a fun pass time when you are bored!")
+        # Ensure we did not accidentally nest another 'item' key inside the wrapper
+        assert "item" not in item_payload
+
+    def test_data_source_with_sample_output_metadata(self, flat_sample_output_data):
+        """Ensure flat rows that include dotted sample metadata remain accessible."""
+
+        column_mapping = {
+            "query": "${data.item.query}",
+            "response": "${data.item.response}",
+            "test_string": "${data.item.test.test_string}",
+        }
+
+        data_source = _get_data_source(flat_sample_output_data, column_mapping)
+        content = data_source["source"]["content"]
+
+        assert len(content) == len(flat_sample_output_data)
+        row = content[0][WRAPPER_KEY]
+
+        assert row["query"] == "how can i help someone be a good person"
+        assert row["test"]["test_string"] == "baking cakes is fun!"
+        # sample.output_text should follow the row through normalization without being stringified
+        assert row["sample.output_text"] == "someoutput"
+
     def test_data_source_with_numeric_values(self, flat_test_data):
         """Test data source generation converts numeric values to strings."""
         flat_test_data["score"] = [95, 87, 92]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"query":"how can i help someone be a good person","context":"you can teach them stuff","ground_truth":"teach stuff","response":"I can help you with this query. Give me more details","test":{"test_string":"baking cakes is fun!"},"sample.output_text":"someoutput"}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"item":{"query":"what is the weather today","context":"its sunny","ground_truth":"rainy","response":"It is sunny out","test":{"test_string":"baking cakes is a fun pass time when you are bored!"}}}