pandas-dev · AKHIL-149 · Dec 9, 2025 · Dec 9, 2025
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
@@ -680,14 +680,45 @@ def compress_group_index(
     space can be huge, so this function compresses it, by computing offsets
     (comp_ids) into the list of unique labels (obs_group_ids).
     """
-    if len(group_index) and np.all(group_index[1:] >= group_index[:-1]):
+    import sys
+
+    # Use numpy-based approach for Python 3.14+ to avoid hashtable issues
+    is_sorted = len(group_index) and np.all(
+        group_index[1:] >= group_index[:-1]
+    )
+    if sys.version_info >= (3, 14) or is_sorted:
         # GH 53806: fast path for sorted group_index
+        # GH 63314: also use for Python 3.14+ due to hashtable behavior changes
+        if len(group_index) == 0:
+            empty_arr = np.array([], dtype=np.int64)
+            return ensure_int64(empty_arr), ensure_int64(empty_arr)
+
+        # Sort if needed
+        if not np.all(group_index[1:] >= group_index[:-1]):
+            sorted_idx = np.argsort(group_index, kind="stable")
+            sorted_group_index = group_index[sorted_idx]
+            unsort_idx = np.empty_like(sorted_idx)
+            unsort_idx[sorted_idx] = np.arange(len(sorted_idx))
+        else:
+            sorted_group_index = group_index
+            unsort_idx = None
+
         unique_mask = np.concatenate(
-            [group_index[:1] > -1, group_index[1:] != group_index[:-1]]
+            [
+                sorted_group_index[:1] > -1,
+                sorted_group_index[1:] != sorted_group_index[:-1],
+            ]
         )
-        comp_ids = unique_mask.cumsum()
-        comp_ids -= 1
-        obs_group_ids = group_index[unique_mask]
+        comp_ids_sorted = unique_mask.cumsum() - 1
+        obs_group_ids = sorted_group_index[unique_mask]
+
+        if unsort_idx is not None:
+            comp_ids = comp_ids_sorted[unsort_idx]
+        else:
+            comp_ids = comp_ids_sorted
+
+        if sort and not np.all(obs_group_ids[1:] >= obs_group_ids[:-1]):
+            obs_group_ids, comp_ids = _reorder_by_uniques(obs_group_ids, comp_ids)
     else:
         size_hint = len(group_index)
         table = hashtable.Int64HashTable(size_hint)

diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
@@ -2959,3 +2959,42 @@ def test_pivot_empty_dataframe_period_dtype(self, freq):
         )
 
         tm.assert_frame_equal(result, expected)
+
+    def test_pivot_table_large_dataset_no_duplicates(self):
+        # GH 63314: pivot_table with large datasets should not produce
+        # duplicate indices. This test ensures the Python 3.14 fix works.
+        n_indices = 10000
+        metrics = ["apple", "banana", "coconut"]
+
+        data = [
+            {"idx": f"id_{i}", "metric": metric, "value": i * 10 + len(metric)}
+            for i in range(n_indices)
+            for metric in metrics
+        ]
+
+        df = DataFrame(data)
+
+        result = df.pivot_table(
+            index=["idx"],
+            columns="metric",
+            values="value",
+            aggfunc="first",
+        )
+
+        # Verify no duplicate indices in the result
+        n_unique = len(result.index.unique())
+        assert len(result.index) == n_unique, (
+            f"Expected {n_unique} unique indices, got {len(result.index)}"
+        )
+
+        # Verify we have the expected number of rows
+        assert len(result) == n_indices, (
+            f"Expected {n_indices} rows, got {len(result)}"
+        )
+
+        # Verify all expected indices are present
+        expected_indices = {f"id_{i}" for i in range(n_indices)}
+        actual_indices = set(result.index)
+        assert expected_indices == actual_indices, (
+            "Result indices don't match expected indices"
+        )