Skip to content

Commit c5389c7

Browse files
authored
perf: Merge duplicate annotations (#4646)
* perf: Merge duplicate annotations As part of a time series merge this will remove duplicate annotations. If we think this information is valuable, we could add a field how often we saw the particular annotations being repeated, but I don't think that data is that valuable. * Remove outdated comment
1 parent ffe5822 commit c5389c7

File tree

2 files changed

+294
-4
lines changed

2 files changed

+294
-4
lines changed

pkg/model/time_series_merger.go

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"cmp"
55
"slices"
66
"sort"
7+
"strings"
78
"sync"
89

910
typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1"
@@ -132,16 +133,48 @@ func (m *TimeSeriesMerger) mergePoints(points []*typesv1.Point) int {
132133
}
133134
if m.sum {
134135
points[j].Value += points[i].Value
135-
// Duplicate annotations are semantically correct and provide useful information.
136-
// Users of the data can decide whether to discard or make use of duplicates.
137-
points[j].Annotations = append(points[j].Annotations, points[i].Annotations...)
138-
136+
points[j].Annotations = mergeAnnotations(points[j].Annotations, points[i].Annotations)
139137
points[j].Exemplars = mergeExemplars(points[j].Exemplars, points[i].Exemplars)
140138
}
141139
}
142140
return j + 1
143141
}
144142

143+
func compareAnnotations(a, b *typesv1.ProfileAnnotation) int {
144+
if r := strings.Compare(a.Key, b.Key); r != 0 {
145+
return r
146+
}
147+
return strings.Compare(a.Value, b.Value)
148+
}
149+
150+
func mergeAnnotations(a, b []*typesv1.ProfileAnnotation) []*typesv1.ProfileAnnotation {
151+
if len(a) == 0 {
152+
return b
153+
}
154+
if len(b) == 0 {
155+
return a
156+
}
157+
158+
// Merge into a single slice
159+
merged := append(a, b...)
160+
161+
// Sort by key and value
162+
slices.SortFunc(merged, compareAnnotations)
163+
164+
// Remove duplicates in-place
165+
j := 0
166+
for i := 1; i < len(merged); i++ {
167+
// Only keep if different from the current unique element
168+
if merged[j].Key != merged[i].Key || merged[j].Value != merged[i].Value {
169+
j++
170+
merged[j] = merged[i]
171+
}
172+
}
173+
174+
// Return the slice with only unique elements
175+
return merged[:j+1]
176+
}
177+
145178
// mergeExemplars combines two exemplar lists.
146179
// For exemplars with the same profileID, it keeps the highest value and intersects labels.
147180
func mergeExemplars(a, b []*typesv1.Exemplar) []*typesv1.Exemplar {

pkg/model/time_series_merger_test.go

Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,263 @@ func Test_SeriesMerger(t *testing.T) {
5656
}
5757
}
5858

59+
func Test_SeriesMerger_Annotations(t *testing.T) {
60+
for _, tc := range []struct {
61+
name string
62+
in [][]*typesv1.Series
63+
out []*typesv1.Series
64+
}{
65+
{
66+
name: "merge two distinct annotations",
67+
in: [][]*typesv1.Series{
68+
{
69+
{
70+
Labels: LabelsFromStrings("foo", "bar"),
71+
Points: []*typesv1.Point{
72+
{
73+
Timestamp: 1,
74+
Value: 1,
75+
Annotations: []*typesv1.ProfileAnnotation{
76+
{Key: "key1", Value: "value1"},
77+
},
78+
},
79+
},
80+
},
81+
},
82+
{
83+
{
84+
Labels: LabelsFromStrings("foo", "bar"),
85+
Points: []*typesv1.Point{
86+
{
87+
Timestamp: 1,
88+
Value: 2,
89+
Annotations: []*typesv1.ProfileAnnotation{
90+
{Key: "key1", Value: "value2"},
91+
},
92+
},
93+
},
94+
},
95+
},
96+
},
97+
out: []*typesv1.Series{
98+
{
99+
Labels: LabelsFromStrings("foo", "bar"),
100+
Points: []*typesv1.Point{
101+
{
102+
Timestamp: 1,
103+
Value: 3,
104+
Annotations: []*typesv1.ProfileAnnotation{
105+
{Key: "key1", Value: "value1"},
106+
{Key: "key1", Value: "value2"},
107+
},
108+
},
109+
},
110+
},
111+
},
112+
},
113+
{
114+
name: "merge duplicate annotations",
115+
in: [][]*typesv1.Series{
116+
{
117+
{
118+
Labels: LabelsFromStrings("foo", "bar"),
119+
Points: []*typesv1.Point{
120+
{
121+
Timestamp: 1,
122+
Value: 1,
123+
Annotations: []*typesv1.ProfileAnnotation{
124+
{Key: "key1", Value: "value1"},
125+
{Key: "key2", Value: "value2"},
126+
},
127+
},
128+
},
129+
},
130+
},
131+
{
132+
{
133+
Labels: LabelsFromStrings("foo", "bar"),
134+
Points: []*typesv1.Point{
135+
{
136+
Timestamp: 1,
137+
Value: 2,
138+
Annotations: []*typesv1.ProfileAnnotation{
139+
{Key: "key1", Value: "value1"},
140+
{Key: "key3", Value: "value3"},
141+
},
142+
},
143+
},
144+
},
145+
},
146+
},
147+
out: []*typesv1.Series{
148+
{
149+
Labels: LabelsFromStrings("foo", "bar"),
150+
Points: []*typesv1.Point{
151+
{
152+
Timestamp: 1,
153+
Value: 3,
154+
Annotations: []*typesv1.ProfileAnnotation{
155+
{Key: "key1", Value: "value1"},
156+
{Key: "key2", Value: "value2"},
157+
{Key: "key3", Value: "value3"},
158+
},
159+
},
160+
},
161+
},
162+
},
163+
},
164+
{
165+
name: "merge all duplicate annotations",
166+
in: [][]*typesv1.Series{
167+
{
168+
{
169+
Labels: LabelsFromStrings("foo", "bar"),
170+
Points: []*typesv1.Point{
171+
{
172+
Timestamp: 1,
173+
Value: 1,
174+
Annotations: []*typesv1.ProfileAnnotation{
175+
{Key: "key1", Value: "value1"},
176+
{Key: "key2", Value: "value2"},
177+
},
178+
},
179+
},
180+
},
181+
},
182+
{
183+
{
184+
Labels: LabelsFromStrings("foo", "bar"),
185+
Points: []*typesv1.Point{
186+
{
187+
Timestamp: 1,
188+
Value: 2,
189+
Annotations: []*typesv1.ProfileAnnotation{
190+
{Key: "key1", Value: "value1"},
191+
{Key: "key2", Value: "value2"},
192+
},
193+
},
194+
},
195+
},
196+
},
197+
},
198+
out: []*typesv1.Series{
199+
{
200+
Labels: LabelsFromStrings("foo", "bar"),
201+
Points: []*typesv1.Point{
202+
{
203+
Timestamp: 1,
204+
Value: 3,
205+
Annotations: []*typesv1.ProfileAnnotation{
206+
{Key: "key1", Value: "value1"},
207+
{Key: "key2", Value: "value2"},
208+
},
209+
},
210+
},
211+
},
212+
},
213+
},
214+
{
215+
name: "annotations sorted by key then value",
216+
in: [][]*typesv1.Series{
217+
{
218+
{
219+
Labels: LabelsFromStrings("foo", "bar"),
220+
Points: []*typesv1.Point{
221+
{
222+
Timestamp: 1,
223+
Value: 1,
224+
Annotations: []*typesv1.ProfileAnnotation{
225+
{Key: "z", Value: "last"},
226+
{Key: "a", Value: "first"},
227+
},
228+
},
229+
},
230+
},
231+
},
232+
{
233+
{
234+
Labels: LabelsFromStrings("foo", "bar"),
235+
Points: []*typesv1.Point{
236+
{
237+
Timestamp: 1,
238+
Value: 2,
239+
Annotations: []*typesv1.ProfileAnnotation{
240+
{Key: "m", Value: "middle"},
241+
},
242+
},
243+
},
244+
},
245+
},
246+
},
247+
out: []*typesv1.Series{
248+
{
249+
Labels: LabelsFromStrings("foo", "bar"),
250+
Points: []*typesv1.Point{
251+
{
252+
Timestamp: 1,
253+
Value: 3,
254+
Annotations: []*typesv1.ProfileAnnotation{
255+
{Key: "a", Value: "first"},
256+
{Key: "m", Value: "middle"},
257+
{Key: "z", Value: "last"},
258+
},
259+
},
260+
},
261+
},
262+
},
263+
},
264+
{
265+
name: "empty annotations on one side",
266+
in: [][]*typesv1.Series{
267+
{
268+
{
269+
Labels: LabelsFromStrings("foo", "bar"),
270+
Points: []*typesv1.Point{
271+
{
272+
Timestamp: 1,
273+
Value: 1,
274+
Annotations: []*typesv1.ProfileAnnotation{},
275+
},
276+
},
277+
},
278+
},
279+
{
280+
{
281+
Labels: LabelsFromStrings("foo", "bar"),
282+
Points: []*typesv1.Point{
283+
{
284+
Timestamp: 1,
285+
Value: 2,
286+
Annotations: []*typesv1.ProfileAnnotation{
287+
{Key: "key1", Value: "value1"},
288+
},
289+
},
290+
},
291+
},
292+
},
293+
},
294+
out: []*typesv1.Series{
295+
{
296+
Labels: LabelsFromStrings("foo", "bar"),
297+
Points: []*typesv1.Point{
298+
{
299+
Timestamp: 1,
300+
Value: 3,
301+
Annotations: []*typesv1.ProfileAnnotation{
302+
{Key: "key1", Value: "value1"},
303+
},
304+
},
305+
},
306+
},
307+
},
308+
},
309+
} {
310+
t.Run(tc.name, func(t *testing.T) {
311+
testhelper.EqualProto(t, tc.out, MergeSeries(nil, tc.in...))
312+
})
313+
}
314+
}
315+
59316
func Test_SeriesMerger_Overlap_Sum(t *testing.T) {
60317
for _, tc := range []struct {
61318
name string

0 commit comments

Comments
 (0)