Skip to content

Commit 8e91b84

Browse files
author
CKI KWF Bot
committed
Merge: tcp: allow to control max RTO
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/7381 Adding support for controlling the TCP max RTO, through a sysctl or a sockopt. Tested using a packetdrill script (see the linked issue). JIRA: https://issues.redhat.com/browse/RHEL-115191 Signed-off-by: Antoine Tenart <atenart@redhat.com> Approved-by: Paolo Abeni <pabeni@redhat.com> Approved-by: Florian Westphal <fwestpha@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: CKI GitLab Kmaint Pipeline Bot <26919896-cki-kmaint-pipeline-bot@users.noreply.gitlab.com>
2 parents 2d3bdc2 + 01ccd4f commit 8e91b84

File tree

14 files changed

+95
-48
lines changed

14 files changed

+95
-48
lines changed

Documentation/networking/ip-sysctl.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,8 @@ tcp_retries2 - INTEGER
697697
seconds and is a lower bound for the effective timeout.
698698
TCP will effectively time out at the first RTO which exceeds the
699699
hypothetical timeout.
700+
If tcp_rto_max_ms is decreased, it is recommended to also
701+
change tcp_retries2.
700702

701703
RFC 1122 recommends at least 100 seconds for the timeout,
702704
which corresponds to a value of at least 8.
@@ -1041,6 +1043,17 @@ tcp_challenge_ack_limit - INTEGER
10411043
in RFC 5961 (Improving TCP's Robustness to Blind In-Window Attacks)
10421044
Default: 1000
10431045

1046+
tcp_rto_max_ms - INTEGER
1047+
Maximal TCP retransmission timeout (in ms).
1048+
Note that TCP_RTO_MAX_MS socket option has higher precedence.
1049+
1050+
When changing tcp_rto_max_ms, it is important to understand
1051+
that tcp_retries2 might need a change.
1052+
1053+
Possible Values: 1000 - 120,000
1054+
1055+
Default: 120,000
1056+
10441057
UDP variables
10451058
=============
10461059

Documentation/networking/net_cachelines/inet_connection_sock.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ struct_timer_list icsk_retransmit_timer read_mostly -
1616
struct_timer_list icsk_delack_timer read_mostly - inet_csk_reset_xmit_timer,tcp_connect
1717
u32 icsk_rto read_write - tcp_cwnd_validate,tcp_schedule_loss_probe,tcp_connect_init,tcp_connect,tcp_write_xmit,tcp_push_one
1818
u32 icsk_rto_min - -
19+
u32 icsk_rto_max read_mostly - tcp_reset_xmit_timer
1920
u32 icsk_delack_max - -
2021
u32 icsk_pmtu_cookie read_write - tcp_sync_mss,tcp_current_mss,tcp_send_syn_data,tcp_connect_init,tcp_connect
2122
struct_tcp_congestion_ops icsk_ca_ops read_write - tcp_cwnd_validate,tcp_tso_segs,tcp_ca_dst_init,tcp_connect_init,tcp_connect,tcp_write_xmit

Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ u8 sysctl_tcp_sack -
8484
u8 sysctl_tcp_window_scaling - - tcp_syn_options,tcp_parse_options
8585
u8 sysctl_tcp_timestamps
8686
u8 sysctl_tcp_early_retrans read_mostly - tcp_schedule_loss_probe(tcp_write_xmit)
87+
u32 sysctl_tcp_rto_max_ms - -
8788
u8 sysctl_tcp_recovery - - tcp_fastretrans_alert
8889
u8 sysctl_tcp_thin_linear_timeouts - - tcp_retrans_timer(on_thin_streams)
8990
u8 sysctl_tcp_slow_start_after_idle - - unlikely(tcp_cwnd_validate-network-not-starved)

include/net/inet_connection_sock.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ struct inet_connection_sock {
8888
struct timer_list icsk_delack_timer;
8989
__u32 icsk_rto;
9090
__u32 icsk_rto_min;
91+
u32 icsk_rto_max;
9192
__u32 icsk_delack_max;
9293
__u32 icsk_pmtu_cookie;
9394
const struct tcp_congestion_ops *icsk_ca_ops;

include/net/netns/ipv4.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ struct netns_ipv4 {
171171
u8 sysctl_tcp_sack;
172172
u8 sysctl_tcp_window_scaling;
173173
u8 sysctl_tcp_timestamps;
174+
int sysctl_tcp_rto_max_ms;
174175
u8 sysctl_tcp_recovery;
175176
u8 sysctl_tcp_thin_linear_timeouts;
176177
u8 sysctl_tcp_slow_start_after_idle;

include/net/tcp.h

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -141,8 +141,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
141141
#define TCP_DELACK_MIN 4U
142142
#define TCP_ATO_MIN 4U
143143
#endif
144-
#define TCP_RTO_MAX ((unsigned)(120*HZ))
145-
#define TCP_RTO_MIN ((unsigned)(HZ/5))
144+
#define TCP_RTO_MAX_SEC 120
145+
#define TCP_RTO_MAX ((unsigned)(TCP_RTO_MAX_SEC * HZ))
146+
#define TCP_RTO_MIN ((unsigned)(HZ / 5))
146147
#define TCP_TIMEOUT_MIN (2U) /* Min timeout for TCP timers in jiffies */
147148

148149
#define TCP_TIMEOUT_MIN_US (2*USEC_PER_MSEC) /* Min TCP timeout in microsecs */
@@ -711,10 +712,14 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu);
711712
int tcp_mss_to_mtu(struct sock *sk, int mss);
712713
void tcp_mtup_init(struct sock *sk);
713714

715+
static inline unsigned int tcp_rto_max(const struct sock *sk)
716+
{
717+
return READ_ONCE(inet_csk(sk)->icsk_rto_max);
718+
}
719+
714720
static inline void tcp_bound_rto(const struct sock *sk)
715721
{
716-
if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX)
717-
inet_csk(sk)->icsk_rto = TCP_RTO_MAX;
722+
inet_csk(sk)->icsk_rto = min(inet_csk(sk)->icsk_rto, tcp_rto_max(sk));
718723
}
719724

720725
static inline u32 __tcp_set_rto(const struct tcp_sock *tp)
@@ -1355,10 +1360,12 @@ static inline unsigned long tcp_pacing_delay(const struct sock *sk)
13551360
static inline void tcp_reset_xmit_timer(struct sock *sk,
13561361
const int what,
13571362
unsigned long when,
1358-
const unsigned long max_when)
1363+
bool pace_delay)
13591364
{
1360-
inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk),
1361-
max_when);
1365+
if (pace_delay)
1366+
when += tcp_pacing_delay(sk);
1367+
inet_csk_reset_xmit_timer(sk, what, when,
1368+
tcp_rto_max(sk));
13621369
}
13631370

13641371
/* Something is really bad, we could not queue an additional packet,
@@ -1387,7 +1394,7 @@ static inline void tcp_check_probe_timer(struct sock *sk)
13871394
{
13881395
if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending)
13891396
tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
1390-
tcp_probe0_base(sk), TCP_RTO_MAX);
1397+
tcp_probe0_base(sk), true);
13911398
}
13921399

13931400
static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq)

include/uapi/linux/tcp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ enum {
129129

130130
#define TCP_TX_DELAY 37 /* delay outgoing packets by XX usec */
131131

132+
#define TCP_RTO_MAX_MS 44 /* max rto time in ms */
132133

133134
#define TCP_REPAIR_ON 1
134135
#define TCP_REPAIR_OFF 0

net/ipv4/sysctl_net_ipv4.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ static int tcp_adv_win_scale_max = 31;
4141
static int tcp_app_win_max = 31;
4242
static int tcp_min_snd_mss_min = TCP_MIN_SND_MSS;
4343
static int tcp_min_snd_mss_max = 65535;
44+
static int tcp_rto_max_max = TCP_RTO_MAX_SEC * MSEC_PER_SEC;
4445
static int ip_privileged_port_min;
4546
static int ip_privileged_port_max = 65535;
4647
static int ip_ttl_min = 1;
@@ -1425,6 +1426,15 @@ static struct ctl_table ipv4_net_table[] = {
14251426
.extra1 = SYSCTL_ZERO,
14261427
.extra2 = SYSCTL_ONE,
14271428
},
1429+
{
1430+
.procname = "tcp_rto_max_ms",
1431+
.data = &init_net.ipv4.sysctl_tcp_rto_max_ms,
1432+
.maxlen = sizeof(int),
1433+
.mode = 0644,
1434+
.proc_handler = proc_dointvec_minmax,
1435+
.extra1 = SYSCTL_ONE_THOUSAND,
1436+
.extra2 = &tcp_rto_max_max,
1437+
},
14281438
{ }
14291439
};
14301440

net/ipv4/tcp.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,7 @@ void tcp_init_sock(struct sock *sk)
417417
{
418418
struct inet_connection_sock *icsk = inet_csk(sk);
419419
struct tcp_sock *tp = tcp_sk(sk);
420+
int rto_max_ms;
420421

421422
tp->out_of_order_queue = RB_ROOT;
422423
sk->tcp_rtx_queue = RB_ROOT;
@@ -425,7 +426,12 @@ void tcp_init_sock(struct sock *sk)
425426
INIT_LIST_HEAD(&tp->tsorted_sent_queue);
426427

427428
icsk->icsk_rto = TCP_TIMEOUT_INIT;
429+
430+
rto_max_ms = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_max_ms);
431+
icsk->icsk_rto_max = msecs_to_jiffies(rto_max_ms);
432+
428433
icsk->icsk_rto_min = TCP_RTO_MIN;
434+
429435
icsk->icsk_delack_max = TCP_DELACK_MAX;
430436
tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
431437
minmax_reset(&tp->rtt_min, tcp_jiffies32, ~0U);
@@ -3669,6 +3675,11 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
36693675
secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
36703676
TCP_RTO_MAX / HZ));
36713677
return 0;
3678+
case TCP_RTO_MAX_MS:
3679+
if (val < MSEC_PER_SEC || val > TCP_RTO_MAX_SEC * MSEC_PER_SEC)
3680+
return -EINVAL;
3681+
WRITE_ONCE(inet_csk(sk)->icsk_rto_max, msecs_to_jiffies(val));
3682+
return 0;
36723683
}
36733684

36743685
sockopt_lock_sock(sk);
@@ -4429,6 +4440,9 @@ int do_tcp_getsockopt(struct sock *sk, int level,
44294440
return err;
44304441
}
44314442
#endif
4443+
case TCP_RTO_MAX_MS:
4444+
val = jiffies_to_msecs(tcp_rto_max(sk));
4445+
break;
44324446
default:
44334447
return -ENOPROTOOPT;
44344448
}

net/ipv4/tcp_fastopen.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -289,8 +289,8 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
289289
* The request socket is not added to the ehash
290290
* because it's been added to the accept queue directly.
291291
*/
292-
inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
293-
TCP_TIMEOUT_INIT, TCP_RTO_MAX);
292+
tcp_reset_xmit_timer(child, ICSK_TIME_RETRANS,
293+
TCP_TIMEOUT_INIT, false);
294294

295295
refcount_set(&req->rsk_refcnt, 2);
296296

0 commit comments

Comments
 (0)