FD.io VPP  v20.09-64-g4f7b92f0a
Vector Packet Processing
tcp.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2019 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef _vnet_tcp_h_
17 #define _vnet_tcp_h_
18 
19 #include <vnet/vnet.h>
20 #include <vnet/ip/ip.h>
21 #include <vnet/session/session.h>
22 #include <vnet/tcp/tcp_types.h>
23 #include <vnet/tcp/tcp_timer.h>
24 #include <vnet/tcp/tcp_debug.h>
25 #include <vnet/tcp/tcp_sack.h>
26 #include <vnet/tcp/tcp_bt.h>
27 #include <vnet/tcp/tcp_cc.h>
28 
30 
34 
35 typedef enum _tcp_error
36 {
37 #define tcp_error(n,s) TCP_ERROR_##n,
38 #include <vnet/tcp/tcp_error.def>
39 #undef tcp_error
41 } tcp_error_t;
42 
43 typedef struct _tcp_lookup_dispatch
44 {
45  u8 next, error;
47 
48 #define foreach_tcp_wrk_stat \
49  _(timer_expirations, u64, "timer expirations") \
50  _(rxt_segs, u64, "segments retransmitted") \
51  _(tr_events, u32, "timer retransmit events") \
52  _(to_closewait, u32, "timeout close-wait") \
53  _(to_closewait2, u32, "timeout close-wait w/data") \
54  _(to_finwait1, u32, "timeout fin-wait-1") \
55  _(to_finwait2, u32, "timeout fin-wait-2") \
56  _(to_lastack, u32, "timeout last-ack") \
57  _(to_closing, u32, "timeout closing") \
58  _(tr_abort, u32, "timer retransmit abort") \
59  _(rst_unread, u32, "reset on close due to unread data") \
60  _(no_buffer, u32, "out of buffers") \
61 
62 typedef struct tcp_wrk_stats_
63 {
64 #define _(name, type, str) type name;
66 #undef _
68 
69 typedef struct tcp_free_req_
70 {
74 
75 typedef struct tcp_worker_ctx_
76 {
77  CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
78 
79  /** worker's pool of connections */
81 
82  /** vector of pending ack dequeues */
84 
85  /** vector of pending disconnect notifications */
87 
88  /** vector of pending reset notifications */
90 
91  /** convenience pointer to this thread's vlib main */
93 
94  /** Time measured in @ref TCP_TSTAMP_TICK used for time stamps */
96 
97  /* Max timers to be handled per dispatch loop */
99 
100  /** Session layer edge indices to tcp output */
101  u32 tco_next_node[2];
102 
103  /* Fifo of pending timer expirations */
105 
106  CLIB_CACHE_LINE_ALIGN_MARK (cacheline1);
107 
108  /** cached 'on the wire' options for bursts */
109  u8 cached_opts[40];
110 
111  /** tx buffer free list */
113 
114  /* fifo of pending free requests */
116 
117  /** worker timer wheel */
119 
120  CLIB_CACHE_LINE_ALIGN_MARK (cacheline2);
121 
124 
125 #define tcp_worker_stats_inc(_wrk,_stat,_val) \
126  _wrk->stats._stat += _val
127 
128 typedef struct tcp_iss_seed_
129 {
133 
134 typedef struct tcp_configuration_
135 {
136  /** Max rx fifo size for a session (in bytes). It is used in to compute the
137  * rfc 7323 window scaling factor */
139 
140  /** Min rx fifo for a session (in bytes) */
142 
143  /** Default MTU to be used when establishing connections */
145 
146  /** Initial CWND multiplier, which multiplies MSS to determine initial CWND.
147  * Set 0 to determine the initial CWND by another way */
149 
150  /** Enable tx pacing for new connections */
152 
153  /** Allow use of TSO whenever available */
155 
156  /** Set if csum offloading is enabled */
158 
159  /** Default congestion control algorithm type */
161 
162  /** Min rwnd, as number of snd_mss segments, for update ack to be sent after
163  * a zero rwnd advertisement */
165 
166  /** Delayed ack time (disabled) */
168 
169  /** Timer ticks to wait for close from app */
171 
172  /** Timer ticks to wait in time-wait. Also known as 2MSL */
174 
175  /** Timer ticks to wait in fin-wait1 to send fin and rcv fin-ack */
177 
178  /** Timer ticks to wait in last ack for ack */
180 
181  /** Timer ticks to wait in fin-wait2 for fin */
183 
184  /** Timer ticks to wait in closing for fin ack */
186 
187  /** Time to wait (sec) before cleaning up the connection */
189 
190  /** Number of preallocated connections */
192 
193  /** Number of preallocated half-open connections */
195 
196  /** Vectors of src addresses. Optional unless one needs > 63K active-opens */
198  ip6_address_t *ip6_src_addrs;
199 
200  /** Fault-injection. Debug only */
203 
204 typedef struct _tcp_main
205 {
206  /** per-worker context */
207  tcp_worker_ctx_t *wrk_ctx;
208 
209  /* Pool of listeners. */
210  tcp_connection_t *listener_pool;
211 
212  /** vlib buffer size */
213  u32 bytes_per_buffer;
214 
215  /** Session layer edge indices to ip lookup (syns, rst) */
216  u32 ipl_next_node[2];
217 
218  /** Dispatch table by state and flags */
219  tcp_lookup_dispatch_t dispatch_table[TCP_N_STATES][64];
220 
221  clib_spinlock_t half_open_lock;
222 
223  /** Pool of half-open connections on which we've sent a SYN */
224  tcp_connection_t *half_open_connections;
225 
226  /** Seed used to generate random iss */
227  tcp_iss_seed_t iss_seed;
228 
229  /** Congestion control algorithms registered */
230  tcp_cc_algorithm_t *cc_algos;
231 
232  /** Hash table of cc algorithms by name */
233  uword *cc_algo_by_name;
234 
235  /** Last cc algo registered */
236  tcp_cc_algorithm_type_e cc_last_type;
237 
238  /** Flag that indicates if stack is on or off */
239  u8 is_enabled;
240 
241  /** Flag that indicates if v4 punting is enabled */
242  u8 punt_unknown4;
243 
244  /** Flag that indicates if v6 punting is enabled */
245  u8 punt_unknown6;
246 
247  /** Rotor for v4 source addresses */
248  u32 last_v4_addr_rotor;
249 
250  /** Rotor for v6 source addresses */
251  u32 last_v6_addr_rotor;
252 
253  /** Protocol configuration */
255 } tcp_main_t;
256 
257 extern tcp_main_t tcp_main;
270 
271 #define tcp_cfg tcp_main.cfg
272 #define tcp_node_index(node_id, is_ip4) \
273  ((is_ip4) ? tcp4_##node_id##_node.index : tcp6_##node_id##_node.index)
274 
277 {
278  return &tcp_main;
279 }
280 
282 tcp_get_worker (u32 thread_index)
283 {
284  ASSERT (thread_index < vec_len (tcp_main.wrk_ctx));
285  return &tcp_main.wrk_ctx[thread_index];
286 }
287 
288 #if (VLIB_BUFFER_TRACE_TRAJECTORY)
289 #define tcp_trajectory_add_start(b, start) \
290 { \
291  (*vlib_buffer_trace_trajectory_cb) (b, start); \
292 }
293 #else
294 #define tcp_trajectory_add_start(b, start)
295 #endif
296 
299  tcp_connection_t * base);
305 
307  u32 thread_index, u8 is_ip4);
308 void tcp_send_reset (tcp_connection_t * tc);
309 void tcp_send_syn (tcp_connection_t * tc);
311 void tcp_send_fin (tcp_connection_t * tc);
312 void tcp_send_ack (tcp_connection_t * tc);
314 
318 
322 void tcp_reschedule (tcp_connection_t * tc);
325  vlib_buffer_t * b);
326 int tcp_session_custom_tx (void *conn, transport_send_params_t * sp);
327 
334  u32 start_bucket);
336 
337 void tcp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
339  ip4_address_t * start,
342  ip6_address_t * start,
343  ip6_address_t * end, u32 table_id);
344 
346 
353 
354 #define tcp_validate_txf_size(_tc, _a) \
355  ASSERT(_tc->state != TCP_STATE_ESTABLISHED \
356  || transport_max_tx_dequeue (&_tc->connection) >= _a)
357 
358 #endif /* _vnet_tcp_h_ */
359 
360 /*
361  * fd.io coding-style-patch-verification: ON
362  *
363  * Local Variables:
364  * eval: (c-set-style "gnu")
365  * End:
366  */
u32 * pending_timers
Definition: tcp.h:104
tcp_main_t tcp_main
Definition: tcp.c:28
ip6_address_t * ip6_src_addrs
Definition: tcp.h:198
vlib_node_registration_t tcp6_listen_node
(constructor) VLIB_REGISTER_NODE (tcp6_listen_node)
Definition: tcp_input.c:2696
u8 allow_tso
Allow use of TSO whenever available.
Definition: tcp.h:154
void tcp_program_retransmit(tcp_connection_t *tc)
Definition: tcp_output.c:1057
#define CLIB_CACHE_LINE_ALIGN_MARK(mark)
Definition: cache.h:60
void tcp_send_window_update_ack(tcp_connection_t *tc)
Send window update ack.
Definition: tcp_output.c:1074
f64 clib_time_type_t
Definition: time.h:203
tcp_cleanup_req_t * pending_cleanups
Definition: tcp.h:115
u32 * pending_disconnects
vector of pending disconnect notifications
Definition: tcp.h:86
void tcp_connection_timers_reset(tcp_connection_t *tc)
Stop all connection timers.
Definition: tcp.c:493
#define foreach_tcp_wrk_stat
Definition: tcp.h:48
vlib_node_registration_t tcp4_output_node
(constructor) VLIB_REGISTER_NODE (tcp4_output_node)
Definition: tcp_output.c:2303
unsigned long u64
Definition: types.h:89
void tcp_connection_del(tcp_connection_t *tc)
Connection removal.
Definition: tcp.c:289
u32 tcp_snd_space(tcp_connection_t *tc)
Definition: tcp.c:948
struct _tcp_main tcp_main_t
struct _tcp_connection tcp_connection_t
vlib_node_registration_t tcp6_output_node
(constructor) VLIB_REGISTER_NODE (tcp6_output_node)
Definition: tcp_output.c:2323
timer_expiration_handler tcp_timer_retransmit_handler
u32 tcp_session_push_header(transport_connection_t *tconn, vlib_buffer_t *b)
Definition: tcp_output.c:986
struct _tcp_lookup_dispatch tcp_lookup_dispatch_t
u32 * pending_resets
vector of pending reset notifications
Definition: tcp.h:89
void tcp_connection_tx_pacer_update(tcp_connection_t *tc)
Definition: tcp.c:1193
void tcp_update_burst_snd_vars(tcp_connection_t *tc)
Update burst send vars.
Definition: tcp_output.c:298
vlib_main_t * vm
Definition: in2out_ed.c:1582
void tcp_connection_cleanup(tcp_connection_t *tc)
Cleans up connection state.
Definition: tcp.c:242
format_function_t format_tcp_flags
Definition: tcp.h:348
ip4_address_t * ip4_src_addrs
Vectors of src addresses.
Definition: tcp.h:197
u32 * pending_deq_acked
vector of pending ack dequeues
Definition: tcp.h:83
unsigned char u8
Definition: types.h:56
double f64
Definition: types.h:142
u8 *() format_function_t(u8 *s, va_list *args)
Definition: format.h:48
tcp_cc_algorithm_type_e cc_algo
Default congestion control algorithm type.
Definition: tcp.h:160
tcp_connection_t * connections
worker&#39;s pool of connections
Definition: tcp.h:80
struct _tcp_cc_algorithm tcp_cc_algorithm_t
Definition: tcp_types.h:256
vlib_node_registration_t tcp4_syn_sent_node
(constructor) VLIB_REGISTER_NODE (tcp4_syn_sent_node)
Definition: tcp_input.c:2059
struct tcp_wrk_stats_ tcp_wrk_stats_t
format_function_t format_tcp_connection
Definition: tcp.h:351
u32 min_rx_fifo
Min rx fifo for a session (in bytes)
Definition: tcp.h:141
void() timer_expiration_handler(tcp_connection_t *tc)
Definition: tcp.h:29
u8 enable_tx_pacing
Enable tx pacing for new connections.
Definition: tcp.h:151
timer_expiration_handler tcp_timer_retransmit_syn_handler
unsigned int u32
Definition: types.h:88
u16 closing_time
Timer ticks to wait in closing for fin ack.
Definition: tcp.h:185
f64 buffer_fail_fraction
Fault-injection.
Definition: tcp.h:201
struct tcp_worker_ctx_ tcp_worker_ctx_t
vlib_main_t * vm
convenience pointer to this thread&#39;s vlib main
Definition: tcp.h:92
timer_expiration_handler tcp_timer_persist_handler
void tcp_send_ack(tcp_connection_t *tc)
Definition: tcp_output.c:1015
vlib_node_registration_t tcp6_syn_sent_node
(constructor) VLIB_REGISTER_NODE (tcp6_syn_sent_node)
Definition: tcp_input.c:2078
int tcp_fastrecovery_prr_snd_space(tcp_connection_t *tc)
Estimate send space using proportional rate reduction (RFC6937)
Definition: tcp_output.c:1653
u32 max_rx_fifo
Max rx fifo size for a session (in bytes).
Definition: tcp.h:138
struct tcp_free_req_ tcp_cleanup_req_t
format_function_t format_tcp_connection_id
Definition: tcp.h:352
clib_error_t * vnet_tcp_enable_disable(vlib_main_t *vm, u8 is_en)
Definition: tcp.c:1387
void tcp_send_syn(tcp_connection_t *tc)
Send SYN.
Definition: tcp_output.c:799
unsigned short u16
Definition: types.h:57
tw_timer_wheel_16t_2w_512sl_t tcp_timer_wheel_t
Definition: tcp_types.h:451
u32 preallocated_half_open_connections
Number of preallocated half-open connections.
Definition: tcp.h:194
tcp_timer_wheel_t timer_wheel
worker timer wheel
Definition: tcp.h:118
#define always_inline
Definition: ipsec.h:28
tcp_connection_t * tcp_connection_alloc(u8 thread_index)
Definition: tcp.c:296
void tcp_connection_tx_pacer_reset(tcp_connection_t *tc, u32 window, u32 start_bucket)
Definition: tcp.c:1206
u8 csum_offload
Set if csum offloading is enabled.
Definition: tcp.h:157
u16 finwait1_time
Timer ticks to wait in fin-wait1 to send fin and rcv fin-ack.
Definition: tcp.h:176
vlib_node_registration_t tcp4_listen_node
(constructor) VLIB_REGISTER_NODE (tcp4_listen_node)
Definition: tcp_input.c:2677
enum _tcp_error tcp_error_t
int tcp_configure_v4_source_address_range(vlib_main_t *vm, ip4_address_t *start, ip4_address_t *end, u32 table_id)
Configure an ipv4 source address range.
Definition: tcp_cli.c:377
void tcp_program_cleanup(tcp_worker_ctx_t *wrk, tcp_connection_t *tc)
Definition: tcp.c:335
void tcp_program_dupack(tcp_connection_t *tc)
Definition: tcp_output.c:1045
void tcp_send_reset(tcp_connection_t *tc)
Build and set reset packet for connection.
Definition: tcp_output.c:740
void tcp_punt_unknown(vlib_main_t *vm, u8 is_ip4, u8 is_add)
Definition: tcp.c:1405
format_function_t format_tcp_state
Definition: tcp.h:347
u64 second
Definition: tcp.h:131
struct _transport_connection transport_connection_t
int tcp_half_open_connection_cleanup(tcp_connection_t *tc)
Try to cleanup half-open connection.
Definition: tcp.c:209
u32 fib_node_index_t
A typedef of a node index.
Definition: fib_types.h:30
void tcp_connection_timers_init(tcp_connection_t *tc)
Initialize all connection timers as invalid.
Definition: tcp.c:476
format_function_t format_tcp_rcv_sacks
Definition: tcp.h:350
vlib_node_registration_t tcp6_input_node
(constructor) VLIB_REGISTER_NODE (tcp6_input_node)
Definition: tcp_input.c:3021
fib_node_index_t tcp_lookup_rmt_in_fib(tcp_connection_t *tc)
u16 default_mtu
Default MTU to be used when establishing connections.
Definition: tcp.h:144
void tcp_send_synack(tcp_connection_t *tc)
Definition: tcp_output.c:835
#define ASSERT(truth)
u16 closewait_time
Timer ticks to wait for close from app.
Definition: tcp.h:170
u16 initial_cwnd_multiplier
Initial CWND multiplier, which multiplies MSS to determine initial CWND.
Definition: tcp.h:148
u16 lastack_time
Timer ticks to wait in last ack for ack.
Definition: tcp.h:179
u32 max_timers_per_loop
Definition: tcp.h:98
void tcp_connection_init_vars(tcp_connection_t *tc)
Initialize tcp connection variables.
Definition: tcp.c:704
clib_time_type_t free_time
Definition: tcp.h:71
vlib_node_registration_t tcp4_rcv_process_node
(constructor) VLIB_REGISTER_NODE (tcp4_rcv_process_node)
Definition: tcp_input.c:2502
vlib_node_registration_t tcp6_established_node
(constructor) VLIB_REGISTER_NODE (tcp6_established_node)
Definition: tcp_input.c:1594
float f32
Definition: types.h:143
struct _vlib_node_registration vlib_node_registration_t
struct tcp_iss_seed_ tcp_iss_seed_t
u64 first
Definition: tcp.h:130
void tcp_send_reset_w_pkt(tcp_connection_t *tc, vlib_buffer_t *pkt, u32 thread_index, u8 is_ip4)
Send reset without reusing existing buffer.
Definition: tcp_output.c:652
u32 rwnd_min_update_ack
Min rwnd, as number of snd_mss segments, for update ack to be sent after a zero rwnd advertisement...
Definition: tcp.h:164
vlib_node_registration_t tcp4_input_node
(constructor) VLIB_REGISTER_NODE (tcp4_input_node)
Definition: tcp_input.c:3001
vlib_node_registration_t tcp6_rcv_process_node
(constructor) VLIB_REGISTER_NODE (tcp6_rcv_process_node)
Definition: tcp_input.c:2521
void tcp_send_fin(tcp_connection_t *tc)
Send FIN.
Definition: tcp_output.c:863
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
tcp_wrk_stats_t stats
Definition: tcp.h:122
static tcp_worker_ctx_t * tcp_get_worker(u32 thread_index)
Definition: tcp.h:282
VLIB buffer representation.
Definition: buffer.h:102
f32 cleanup_time
Time to wait (sec) before cleaning up the connection.
Definition: tcp.h:188
u64 uword
Definition: types.h:112
u32 time_now
Time measured in TCP_TSTAMP_TICK used for time stamps.
Definition: tcp.h:95
u32 table_id
Definition: wireguard.api:100
void tcp_init_snd_vars(tcp_connection_t *tc)
Initialize connection send variables.
Definition: tcp.c:669
struct tcp_configuration_ tcp_configuration_t
void tcp_connection_close(tcp_connection_t *tc)
Begin connection closing procedure.
Definition: tcp.c:360
enum _tcp_cc_algorithm_type tcp_cc_algorithm_type_e
u16 delack_time
Delayed ack time (disabled)
Definition: tcp.h:167
void tcp_reschedule(tcp_connection_t *tc)
Definition: tcp.c:1217
format_function_t format_tcp_sacks
Definition: tcp.h:349
f64 end
end of the time range
Definition: mactime.api:44
void tcp_program_ack(tcp_connection_t *tc)
Definition: tcp_output.c:1035
int tcp_configure_v6_source_address_range(vlib_main_t *vm, ip6_address_t *start, ip6_address_t *end, u32 table_id)
Configure an ipv6 source address range.
Definition: tcp_cli.c:464
static tcp_main_t * vnet_get_tcp_main()
Definition: tcp.h:276
u32 preallocated_connections
Number of preallocated connections.
Definition: tcp.h:191
void tcp_connection_free(tcp_connection_t *tc)
Definition: tcp.c:322
u16 timewait_time
Timer ticks to wait in time-wait.
Definition: tcp.h:173
tcp_connection_t * tcp_connection_alloc_w_base(u8 thread_index, tcp_connection_t *base)
Definition: tcp.c:309
u16 finwait2_time
Timer ticks to wait in fin-wait2 for fin.
Definition: tcp.h:182
int tcp_session_custom_tx(void *conn, transport_send_params_t *sp)
Definition: tcp_output.c:2013
u32 * tx_buffers
tx buffer free list
Definition: tcp.h:112
vlib_node_registration_t tcp4_established_node
(constructor) VLIB_REGISTER_NODE (tcp4_established_node)
Definition: tcp_input.c:1575
u32 connection_index
Definition: tcp.h:72