perf: Merge duplicate annotations (#4646)

simonswine · web-flow · commit c5389c76c5d2 · 2025-11-27T15:58:49.000Z
* perf: Merge duplicate annotations

As part of a time series merge this will remove duplicate annotations.

If we think this information is valuable, we could add a field how often
we saw the particular annotations being repeated, but I don't think that
data is that valuable.

* Remove outdated comment
diff --git a/pkg/model/time_series_merger.go b/pkg/model/time_series_merger.go
@@ -4,6 +4,7 @@ import (
 	"cmp"
 	"slices"
 	"sort"
+	"strings"
 	"sync"
 
 	typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
@@ -132,16 +133,48 @@ func (m *TimeSeriesMerger) mergePoints(points []*typesv1.Point) int {
 		}
 		if m.sum {
 			points[j].Value += points[i].Value
-			// Duplicate annotations are semantically correct and provide useful information.
-			// Users of the data can decide whether to discard or make use of duplicates.
-			points[j].Annotations = append(points[j].Annotations, points[i].Annotations...)
-
+			points[j].Annotations = mergeAnnotations(points[j].Annotations, points[i].Annotations)
 			points[j].Exemplars = mergeExemplars(points[j].Exemplars, points[i].Exemplars)
 		}
 	}
 	return j + 1
 }
 
+func compareAnnotations(a, b *typesv1.ProfileAnnotation) int {
+	if r := strings.Compare(a.Key, b.Key); r != 0 {
+		return r
+	}
+	return strings.Compare(a.Value, b.Value)
+}
+
+func mergeAnnotations(a, b []*typesv1.ProfileAnnotation) []*typesv1.ProfileAnnotation {
+	if len(a) == 0 {
+		return b
+	}
+	if len(b) == 0 {
+		return a
+	}
+
+	// Merge into a single slice
+	merged := append(a, b...)
+
+	// Sort by key and value
+	slices.SortFunc(merged, compareAnnotations)
+
+	// Remove duplicates in-place
+	j := 0
+	for i := 1; i < len(merged); i++ {
+		// Only keep if different from the current unique element
+		if merged[j].Key != merged[i].Key || merged[j].Value != merged[i].Value {
+			j++
+			merged[j] = merged[i]
+		}
+	}
+
+	// Return the slice with only unique elements
+	return merged[:j+1]
+}
+
 // mergeExemplars combines two exemplar lists.
 // For exemplars with the same profileID, it keeps the highest value and intersects labels.
 func mergeExemplars(a, b []*typesv1.Exemplar) []*typesv1.Exemplar {
diff --git a/pkg/model/time_series_merger_test.go b/pkg/model/time_series_merger_test.go
@@ -56,6 +56,263 @@ func Test_SeriesMerger(t *testing.T) {
 	}
 }
 
+func Test_SeriesMerger_Annotations(t *testing.T) {
+	for _, tc := range []struct {
+		name string
+		in   [][]*typesv1.Series
+		out  []*typesv1.Series
+	}{
+		{
+			name: "merge two distinct annotations",
+			in: [][]*typesv1.Series{
+				{
+					{
+						Labels: LabelsFromStrings("foo", "bar"),
+						Points: []*typesv1.Point{
+							{
+								Timestamp: 1,
+								Value:     1,
+								Annotations: []*typesv1.ProfileAnnotation{
+									{Key: "key1", Value: "value1"},
+								},
+							},
+						},
+					},
+				},
+				{
+					{
+						Labels: LabelsFromStrings("foo", "bar"),
+						Points: []*typesv1.Point{
+							{
+								Timestamp: 1,
+								Value:     2,
+								Annotations: []*typesv1.ProfileAnnotation{
+									{Key: "key1", Value: "value2"},
+								},
+							},
+						},
+					},
+				},
+			},
+			out: []*typesv1.Series{
+				{
+					Labels: LabelsFromStrings("foo", "bar"),
+					Points: []*typesv1.Point{
+						{
+							Timestamp: 1,
+							Value:     3,
+							Annotations: []*typesv1.ProfileAnnotation{
+								{Key: "key1", Value: "value1"},
+								{Key: "key1", Value: "value2"},
+							},
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "merge duplicate annotations",
+			in: [][]*typesv1.Series{
+				{
+					{
+						Labels: LabelsFromStrings("foo", "bar"),
+						Points: []*typesv1.Point{
+							{
+								Timestamp: 1,
+								Value:     1,
+								Annotations: []*typesv1.ProfileAnnotation{
+									{Key: "key1", Value: "value1"},
+									{Key: "key2", Value: "value2"},
+								},
+							},
+						},
+					},
+				},
+				{
+					{
+						Labels: LabelsFromStrings("foo", "bar"),
+						Points: []*typesv1.Point{
+							{
+								Timestamp: 1,
+								Value:     2,
+								Annotations: []*typesv1.ProfileAnnotation{
+									{Key: "key1", Value: "value1"},
+									{Key: "key3", Value: "value3"},
+								},
+							},
+						},
+					},
+				},
+			},
+			out: []*typesv1.Series{
+				{
+					Labels: LabelsFromStrings("foo", "bar"),
+					Points: []*typesv1.Point{
+						{
+							Timestamp: 1,
+							Value:     3,
+							Annotations: []*typesv1.ProfileAnnotation{
+								{Key: "key1", Value: "value1"},
+								{Key: "key2", Value: "value2"},
+								{Key: "key3", Value: "value3"},
+							},
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "merge all duplicate annotations",
+			in: [][]*typesv1.Series{
+				{
+					{
+						Labels: LabelsFromStrings("foo", "bar"),
+						Points: []*typesv1.Point{
+							{
+								Timestamp: 1,
+								Value:     1,
+								Annotations: []*typesv1.ProfileAnnotation{
+									{Key: "key1", Value: "value1"},
+									{Key: "key2", Value: "value2"},
+								},
+							},
+						},
+					},
+				},
+				{
+					{
+						Labels: LabelsFromStrings("foo", "bar"),
+						Points: []*typesv1.Point{
+							{
+								Timestamp: 1,
+								Value:     2,
+								Annotations: []*typesv1.ProfileAnnotation{
+									{Key: "key1", Value: "value1"},
+									{Key: "key2", Value: "value2"},
+								},
+							},
+						},
+					},
+				},
+			},
+			out: []*typesv1.Series{
+				{
+					Labels: LabelsFromStrings("foo", "bar"),
+					Points: []*typesv1.Point{
+						{
+							Timestamp: 1,
+							Value:     3,
+							Annotations: []*typesv1.ProfileAnnotation{
+								{Key: "key1", Value: "value1"},
+								{Key: "key2", Value: "value2"},
+							},
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "annotations sorted by key then value",
+			in: [][]*typesv1.Series{
+				{
+					{
+						Labels: LabelsFromStrings("foo", "bar"),
+						Points: []*typesv1.Point{
+							{
+								Timestamp: 1,
+								Value:     1,
+								Annotations: []*typesv1.ProfileAnnotation{
+									{Key: "z", Value: "last"},
+									{Key: "a", Value: "first"},
+								},
+							},
+						},
+					},
+				},
+				{
+					{
+						Labels: LabelsFromStrings("foo", "bar"),
+						Points: []*typesv1.Point{
+							{
+								Timestamp: 1,
+								Value:     2,
+								Annotations: []*typesv1.ProfileAnnotation{
+									{Key: "m", Value: "middle"},
+								},
+							},
+						},
+					},
+				},
+			},
+			out: []*typesv1.Series{
+				{
+					Labels: LabelsFromStrings("foo", "bar"),
+					Points: []*typesv1.Point{
+						{
+							Timestamp: 1,
+							Value:     3,
+							Annotations: []*typesv1.ProfileAnnotation{
+								{Key: "a", Value: "first"},
+								{Key: "m", Value: "middle"},
+								{Key: "z", Value: "last"},
+							},
+						},
+					},
+				},
+			},
+		},
+		{
+			name: "empty annotations on one side",
+			in: [][]*typesv1.Series{
+				{
+					{
+						Labels: LabelsFromStrings("foo", "bar"),
+						Points: []*typesv1.Point{
+							{
+								Timestamp:   1,
+								Value:       1,
+								Annotations: []*typesv1.ProfileAnnotation{},
+							},
+						},
+					},
+				},
+				{
+					{
+						Labels: LabelsFromStrings("foo", "bar"),
+						Points: []*typesv1.Point{
+							{
+								Timestamp: 1,
+								Value:     2,
+								Annotations: []*typesv1.ProfileAnnotation{
+									{Key: "key1", Value: "value1"},
+								},
+							},
+						},
+					},
+				},
+			},
+			out: []*typesv1.Series{
+				{
+					Labels: LabelsFromStrings("foo", "bar"),
+					Points: []*typesv1.Point{
+						{
+							Timestamp: 1,
+							Value:     3,
+							Annotations: []*typesv1.ProfileAnnotation{
+								{Key: "key1", Value: "value1"},
+							},
+						},
+					},
+				},
+			},
+		},
+	} {
+		t.Run(tc.name, func(t *testing.T) {
+			testhelper.EqualProto(t, tc.out, MergeSeries(nil, tc.in...))
+		})
+	}
+}
+
 func Test_SeriesMerger_Overlap_Sum(t *testing.T) {
 	for _, tc := range []struct {
 		name string