Skip to content

Commit 928a414

Browse files
committed
pping: Add RTT-based sampling
Add an option (-R, --rtt-rate) to adapt the rate sampling based on the RTT of the flow. The sampling rate will be C * RTT, where C is a configurable constant (ex 1.0 to get one sample every RTT), and RTT is either the current minimum (default) or smoothed RTT of the flow (chosen via the -t or --rtt-type option). The smoothed RTT (sRTT) is updated for each calculated RTT, and is calculated in a similar manner to srtt in the kernel's TCP stack. The sRTT is a moving average of all RTTs, and is calculated according to the formula: srtt = 7/8 * prev_srtt + 1/8 * rtt To allow the user to pass a non-integer C (ex 0.1 to get 10 RTT samples for every RTT-period), fixed-point arithmetic has been used in the eBPF programs (due to lack of support for floats). The maximum value for C has been limited to 10000 in order for it to be unlikely that the C * RTT calculation will overflow (with C = 10000, overflow will only occur if RTT > 28 seconds). Signed-off-by: Simon Sundberg <simon.sundberg@kau.se>
1 parent c79c4e8 commit 928a414

File tree

3 files changed

+68
-8
lines changed

3 files changed

+68
-8
lines changed

pping/pping.c

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ static const struct option long_options[] = {
100100
{ "help", no_argument, NULL, 'h' },
101101
{ "interface", required_argument, NULL, 'i' }, // Name of interface to run on
102102
{ "rate-limit", required_argument, NULL, 'r' }, // Sampling rate-limit in ms
103+
{ "rtt-rate", required_argument, NULL, 'R' }, // Sampling rate in terms of flow-RTT (ex 1 sample per RTT-interval)
104+
{ "rtt-type", required_argument, NULL, 't' }, // What type of RTT the RTT-rate should be applied to ("min" or "smoothed"), only relevant if rtt-rate is provided
103105
{ "force", no_argument, NULL, 'f' }, // Overwrite any existing XDP program on interface, remove qdisc on cleanup
104106
{ "cleanup-interval", required_argument, NULL, 'c' }, // Map cleaning interval in s, 0 to disable
105107
{ "format", required_argument, NULL, 'F' }, // Which format to output in (standard/json/ppviz)
@@ -167,14 +169,14 @@ static int parse_bounded_double(double *res, const char *str, double low,
167169
static int parse_arguments(int argc, char *argv[], struct pping_config *config)
168170
{
169171
int err, opt;
170-
double rate_limit_ms, cleanup_interval_s;
172+
double rate_limit_ms, cleanup_interval_s, rtt_rate;
171173

172174
config->ifindex = 0;
173175
config->force = false;
174176
config->bpf_config.track_tcp = false;
175177
config->bpf_config.track_icmp = false;
176178

177-
while ((opt = getopt_long(argc, argv, "hfTCi:r:c:F:I:", long_options,
179+
while ((opt = getopt_long(argc, argv, "hfTCi:r:R:t:c:F:I:", long_options,
178180
NULL)) != -1) {
179181
switch (opt) {
180182
case 'i':
@@ -203,6 +205,26 @@ static int parse_arguments(int argc, char *argv[], struct pping_config *config)
203205
config->bpf_config.rate_limit =
204206
rate_limit_ms * NS_PER_MS;
205207
break;
208+
case 'R':
209+
err = parse_bounded_double(&rtt_rate, optarg, 0, 10000,
210+
"rtt-rate");
211+
if (err)
212+
return -EINVAL;
213+
config->bpf_config.rtt_rate =
214+
DOUBLE_TO_FIXPOINT(rtt_rate);
215+
break;
216+
case 't':
217+
if (strcmp(optarg, "min") == 0) {
218+
config->bpf_config.use_srtt = false;
219+
}
220+
else if (strcmp(optarg, "smoothed") == 0) {
221+
config->bpf_config.use_srtt = true;
222+
} else {
223+
fprintf(stderr,
224+
"rtt-type must be \"min\" or \"smoothed\"\n");
225+
return -EINVAL;
226+
}
227+
break;
206228
case 'c':
207229
err = parse_bounded_double(&cleanup_interval_s, optarg,
208230
0, 7 * S_PER_DAY,
@@ -482,7 +504,7 @@ static bool flow_timeout(void *key_ptr, void *val_ptr, __u64 now)
482504
if (print_event_func) {
483505
fe.event_type = EVENT_TYPE_FLOW;
484506
fe.timestamp = now;
485-
memcpy(&fe.flow, key_ptr, sizeof(struct network_tuple));
507+
fe.flow = *(struct network_tuple *)key_ptr;
486508
fe.event_info.event = FLOW_EVENT_CLOSING;
487509
fe.event_info.reason = EVENT_REASON_FLOW_TIMEOUT;
488510
fe.source = EVENT_SOURCE_USERSPACE;
@@ -976,7 +998,9 @@ int main(int argc, char *argv[])
976998
DECLARE_LIBBPF_OPTS(bpf_tc_opts, tc_egress_opts);
977999

9781000
struct pping_config config = {
979-
.bpf_config = { .rate_limit = 100 * NS_PER_MS },
1001+
.bpf_config = { .rate_limit = 100 * NS_PER_MS,
1002+
.rtt_rate = 0,
1003+
.use_srtt = false },
9801004
.cleanup_interval = 1 * NS_PER_SECOND,
9811005
.object_path = "pping_kern.o",
9821006
.ingress_prog = "pping_xdp_ingress",

pping/pping.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@
66
#include <linux/in6.h>
77
#include <stdbool.h>
88

9+
typedef __u64 fixpoint64;
10+
#define FIXPOINT_SHIFT 16
11+
#define DOUBLE_TO_FIXPOINT(X) ((fixpoint64)((X) * (1UL << FIXPOINT_SHIFT)))
12+
#define FIXPOINT_TO_UINT(X) ((X) >> FIXPOINT_SHIFT)
13+
914
/* For the event_type members of rtt_event and flow_event */
1015
#define EVENT_TYPE_FLOW 1
1116
#define EVENT_TYPE_RTT 2
@@ -34,9 +39,11 @@ enum __attribute__((__packed__)) flow_event_source {
3439

3540
struct bpf_config {
3641
__u64 rate_limit;
42+
fixpoint64 rtt_rate;
43+
bool use_srtt;
3744
bool track_tcp;
3845
bool track_icmp;
39-
__u8 reserved[6];
46+
__u8 reserved[5];
4047
};
4148

4249
/*
@@ -67,6 +74,7 @@ struct network_tuple {
6774

6875
struct flow_state {
6976
__u64 min_rtt;
77+
__u64 srtt;
7078
__u64 last_timestamp;
7179
__u64 sent_pkts;
7280
__u64 sent_bytes;

pping/pping_kern.c

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,33 @@ static __u32 remaining_pkt_payload(struct parsing_context *ctx)
324324
return parsed_bytes < ctx->pkt_len ? ctx->pkt_len - parsed_bytes : 0;
325325
}
326326

327+
/*
328+
* Calculate a smooted rtt similar to how TCP stack does it in
329+
* net/ipv4/tcp_input.c/tcp_rtt_estimator().
330+
*
331+
* NOTE: Will cause roundoff errors, but if RTTs > 1000ns errors should be small
332+
*/
333+
static __u64 calculate_srtt(__u64 prev_srtt, __u64 rtt)
334+
{
335+
if (!prev_srtt)
336+
return rtt;
337+
// srtt = 7/8*prev_srtt + 1/8*rtt
338+
return prev_srtt - (prev_srtt >> 3) + (rtt >> 3);
339+
}
340+
341+
static bool is_rate_limited(__u64 now, __u64 last_ts, __u64 rtt)
342+
{
343+
if (now < last_ts)
344+
return true;
345+
346+
// RTT-based rate limit
347+
if (config.rtt_rate && rtt)
348+
return now - last_ts < FIXPOINT_TO_UINT(config.rtt_rate * rtt);
349+
350+
// Static rate limit
351+
return now - last_ts < config.rate_limit;
352+
}
353+
327354
/*
328355
* Fills in event_type, timestamp, flow, source and reserved.
329356
* Does not fill in the flow_info.
@@ -403,8 +430,9 @@ static void pping_egress(void *ctx, struct parsing_context *pctx)
403430
f_state->last_id = p_id.identifier;
404431

405432
// Check rate-limit
406-
if (!new_flow && (now < f_state->last_timestamp ||
407-
now - f_state->last_timestamp < config.rate_limit))
433+
if (!new_flow &&
434+
is_rate_limited(now, f_state->last_timestamp,
435+
config.use_srtt ? f_state->srtt : f_state->min_rtt))
408436
return;
409437

410438
/*
@@ -449,12 +477,12 @@ static void pping_ingress(void *ctx, struct parsing_context *pctx)
449477
goto validflow_out;
450478

451479
re.rtt = now - *p_ts;
452-
453480
// Delete timestamp entry as soon as RTT is calculated
454481
bpf_map_delete_elem(&packet_ts, &p_id);
455482

456483
if (f_state->min_rtt == 0 || re.rtt < f_state->min_rtt)
457484
f_state->min_rtt = re.rtt;
485+
f_state->srtt = calculate_srtt(f_state->srtt, re.rtt);
458486

459487
// Fill event and push to perf-buffer
460488
re.event_type = EVENT_TYPE_RTT;

0 commit comments

Comments
 (0)