27 #define TCP_TICK 0.001 28 #define THZ (u32) (1/TCP_TICK) 29 #define TCP_TSTAMP_RESOLUTION TCP_TICK 30 #define TCP_PAWS_IDLE 24 * 24 * 60 * 60 * THZ 31 #define TCP_FIB_RECHECK_PERIOD 1 * THZ 32 #define TCP_MAX_OPTION_SPACE 40 34 #define TCP_DUPACK_THRESHOLD 3 35 #define TCP_MAX_RX_FIFO_SIZE 4 << 20 36 #define TCP_IW_N_SEGMENTS 10 37 #define TCP_ALWAYS_ACK 1 38 #define TCP_USE_SACKS 1 41 #define foreach_tcp_fsm_state \ 44 _(SYN_SENT, "SYN_SENT") \ 45 _(SYN_RCVD, "SYN_RCVD") \ 46 _(ESTABLISHED, "ESTABLISHED") \ 47 _(CLOSE_WAIT, "CLOSE_WAIT") \ 48 _(FIN_WAIT_1, "FIN_WAIT_1") \ 49 _(LAST_ACK, "LAST_ACK") \ 50 _(CLOSING, "CLOSING") \ 51 _(FIN_WAIT_2, "FIN_WAIT_2") \ 52 _(TIME_WAIT, "TIME_WAIT") 54 typedef enum _tcp_state
56 #define _(sym, str) TCP_STATE_##sym, 67 #define foreach_tcp_timer \ 68 _(RETRANSMIT, "RETRANSMIT") \ 69 _(DELACK, "DELAYED ACK") \ 70 _(PERSIST, "PERSIST") \ 72 _(WAITCLOSE, "WAIT CLOSE") \ 73 _(RETRANSMIT_SYN, "RETRANSMIT SYN") \ 74 _(ESTABLISH, "ESTABLISH") 76 typedef enum _tcp_timers
78 #define _(sym, str) TCP_TIMER_##sym, 91 #define TCP_TIMER_HANDLE_INVALID ((u32) ~0) 94 #define TCP_TO_TIMER_TICK TCP_TICK*10 96 #define TCP_DELACK_TIME 1 97 #define TCP_ESTABLISH_TIME 750 98 #define TCP_2MSL_TIME 300 99 #define TCP_CLOSEWAIT_TIME 20 100 #define TCP_CLEANUP_TIME 5 101 #define TCP_TIMER_PERSIST_MIN 2 103 #define TCP_RTO_MAX 60 * THZ 104 #define TCP_RTO_MIN 0.2 * THZ 105 #define TCP_RTT_MAX 30 * THZ 106 #define TCP_RTO_SYN_RETRIES 3 107 #define TCP_RTO_INIT 1 * THZ 110 #define foreach_tcp_connection_flag \ 111 _(SNDACK, "Send ACK") \ 112 _(FINSNT, "FIN sent") \ 113 _(SENT_RCV_WND0, "Sent 0 receive window") \ 114 _(RECOVERY, "Recovery on") \ 115 _(FAST_RECOVERY, "Fast Recovery on") \ 116 _(FR_1_SMSS, "Sent 1 SMSS") 118 typedef enum _tcp_connection_flag_bits
120 #define _(sym, str) TCP_CONN_##sym##_BIT, 126 typedef enum _tcp_connection_flag
128 #define _(sym, str) TCP_CONN_##sym = 1 << TCP_CONN_##sym##_BIT, 135 #define foreach_tcp_buf_flag \ 141 #define _(f) TCP_BUF_BIT_##f, 149 #define _(f) TCP_BUF_FLAG_##f = 1 << TCP_BUF_BIT_##f, 154 #define TCP_MAX_SACK_BLOCKS 15 155 #define TCP_INVALID_SACK_HOLE_INDEX ((u32)~0) 157 typedef struct _sack_scoreboard_hole
166 typedef struct _sack_scoreboard
172 u32 last_sacked_bytes;
173 u32 last_bytes_delivered;
182 typedef enum _tcp_cc_algorithm_type
189 typedef enum _tcp_cc_ack_t
196 typedef struct _tcp_connection
230 u32 tsval_recent_age;
259 u32 limited_transmit;
263 struct _tcp_cc_algorithm
272 #define tcp_fastrecovery_on(tc) (tc)->flags |= TCP_CONN_FAST_RECOVERY 273 #define tcp_fastrecovery_off(tc) (tc)->flags &= ~TCP_CONN_FAST_RECOVERY 274 #define tcp_recovery_on(tc) (tc)->flags |= TCP_CONN_RECOVERY 275 #define tcp_recovery_off(tc) (tc)->flags &= ~TCP_CONN_RECOVERY 276 #define tcp_in_fastrecovery(tc) ((tc)->flags & TCP_CONN_FAST_RECOVERY) 277 #define tcp_in_recovery(tc) ((tc)->flags & (TCP_CONN_RECOVERY)) 278 #define tcp_in_slowstart(tc) (tc->cwnd < tc->ssthresh) 279 #define tcp_fastrecovery_sent_1_smss(tc) ((tc)->flags & TCP_CONN_FR_1_SMSS) 280 #define tcp_fastrecovery_1_smss_on(tc) ((tc)->flags |= TCP_CONN_FR_1_SMSS) 281 #define tcp_fastrecovery_1_smss_off(tc) ((tc)->flags &= ~TCP_CONN_FR_1_SMSS) 283 #define tcp_in_cong_recovery(tc) ((tc)->flags & \ 284 (TCP_CONN_FAST_RECOVERY | TCP_CONN_RECOVERY)) 289 tc->flags &= ~(TCP_CONN_FAST_RECOVERY | TCP_CONN_RECOVERY);
300 typedef enum _tcp_error
302 #define tcp_error(n,s) TCP_ERROR_##n, 308 typedef struct _tcp_lookup_dispatch
313 typedef struct _tcp_main
324 u8 log2_tstamp_clocks_per_tick;
325 f64 tstamp_ticks_per_clock;
331 tw_timer_wheel_16t_2w_512sl_t *timer_wheels;
391 if (
tcp_main.connections[thread_index] == 0)
436 #define seq_lt(_s1, _s2) ((i32)((_s1)-(_s2)) < 0) 437 #define seq_leq(_s1, _s2) ((i32)((_s1)-(_s2)) <= 0) 438 #define seq_gt(_s1, _s2) ((i32)((_s1)-(_s2)) > 0) 439 #define seq_geq(_s1, _s2) ((i32)((_s1)-(_s2)) >= 0) 440 #define seq_max(_s1, _s2) (seq_gt((_s1), (_s2)) ? (_s1) : (_s2)) 443 #define timestamp_lt(_t1, _t2) ((i32)((_t1)-(_t2)) < 0) 444 #define timestamp_leq(_t1, _t2) ((i32)((_t1)-(_t2)) <= 0) 453 return tc->sack_sb.sacked_bytes + tc->sack_sb.lost_bytes;
455 return tc->rcv_dupacks * tc->snd_mss;
466 flight_size = (int) (tc->snd_una_max - tc->snd_una) -
tcp_bytes_out (tc)
473 (
"Negative: %u %u %u dupacks %u sacked bytes %u flags %d",
475 tc->snd_rxt_bytes, tc->rcv_dupacks, tc->sack_sb.sacked_bytes,
489 if (tc->snd_mss > 2190)
490 return 2 * tc->snd_mss;
491 else if (tc->snd_mss > 1095)
492 return 3 * tc->snd_mss;
494 return 4 * tc->snd_mss;
506 return clib_min (tc->cwnd, tc->snd_wnd);
515 if (available_wnd <= flight_size)
518 return available_wnd - flight_size;
547 tw_timer_expire_timers_16t_2w_512sl (&
tcp_main.timer_wheels[thread_index],
565 tc->flags = TCP_CONN_SNDACK;
566 vnet_buffer (b)->tcp.flags &= ~TCP_BUF_FLAG_DUPACK;
573 = tw_timer_start_16t_2w_512sl (&
tcp_main.timer_wheels[tc->c_thread_index],
574 tc->c_c_index, timer_id, interval);
583 tw_timer_stop_16t_2w_512sl (&
tcp_main.timer_wheels[tc->c_thread_index],
584 tc->timers[timer_id]);
592 tw_timer_stop_16t_2w_512sl (&
tcp_main.timer_wheels[tc->c_thread_index],
593 tc->timers[timer_id]);
594 tc->timers[timer_id] =
595 tw_timer_start_16t_2w_512sl (&
tcp_main.timer_wheels[tc->c_thread_index],
596 tc->c_c_index, timer_id, interval);
646 if (tc->snd_una == tc->snd_una_max)
649 if (tc->snd_wnd < tc->snd_mss)
663 #define tcp_validate_txf_size(_tc, _a) \ 664 ASSERT(_tc->state != TCP_STATE_ESTABLISHED \ 665 || stream_session_tx_fifo_max_dequeue (&_tc->connection) >= _a) 672 u32 prev_index,
u32 start,
676 start,
u8 have_sent_1_smss,
730 sb->sacked_bytes = 0;
731 sb->last_sacked_bytes = 0;
732 sb->last_bytes_delivered = 0;
743 return hole->end - hole->start;
749 return hole - sb->holes;
769 tm->cc_algos[type] = *vft;
776 return &tm->cc_algos[type];
807 th->seq_number = seq;
808 th->ack_number = ack;
809 th->data_offset_and_reserved = (tcp_hdr_opts_len >> 2) << 4;
813 th->urgent_pointer = 0;
836 clib_host_to_net_u32 (seq),
837 clib_host_to_net_u32 (ack),
838 tcp_hdr_opts_len,
flags,
839 clib_host_to_net_u16 (wnd));
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
#define tcp_fastrecovery_1_smss_off(tc)
void scoreboard_init_high_rxt(sack_scoreboard_t *sb)
static void tcp_retransmit_timer_set(tcp_connection_t *tc)
static u32 tcp_available_wnd(const tcp_connection_t *tc)
void tcp_make_fin(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to FIN-ACK.
void tcp_send_reset(vlib_buffer_t *pkt, u8 is_ip4)
Send reset without reusing existing buffer.
struct _sack_block sack_block_t
void tcp_cc_init_congestion(tcp_connection_t *tc)
void tcp_connection_timers_reset(tcp_connection_t *tc)
Stop all connection timers.
struct _transport_connection transport_connection_t
#define TCP_TO_TIMER_TICK
vlib_node_registration_t tcp4_output_node
(constructor) VLIB_REGISTER_NODE (tcp4_output_node)
void tcp_fast_retransmit(tcp_connection_t *tc)
Do fast retransmit.
static u32 tcp_bytes_out(const tcp_connection_t *tc)
Our estimate of the number of bytes that have left the network.
sack_scoreboard_hole_t * scoreboard_insert_hole(sack_scoreboard_t *sb, u32 prev_index, u32 start, u32 end)
static tcp_connection_t * tcp_connection_get_if_valid(u32 conn_index, u32 thread_index)
void tcp_connection_del(tcp_connection_t *tc)
Connection removal.
struct _sack_scoreboard sack_scoreboard_t
void tcp_update_sack_list(tcp_connection_t *tc, u32 start, u32 end)
Build SACK list as per RFC2018.
u32 tcp_snd_space(tcp_connection_t *tc)
Compute tx window session is allowed to fill.
static tcp_connection_t * tcp_half_open_connection_get(u32 conn_index)
#define VLIB_BUFFER_PRE_DATA_SIZE
void scoreboard_update_lost(tcp_connection_t *tc, sack_scoreboard_t *sb)
struct _tcp_main tcp_main_t
vlib_node_registration_t tcp6_output_node
(constructor) VLIB_REGISTER_NODE (tcp6_output_node)
static u64 clib_cpu_time_now(void)
timer_expiration_handler tcp_timer_retransmit_handler
static void scoreboard_init(sack_scoreboard_t *sb)
u8 * format_tcp_scoreboard(u8 *s, va_list *args)
struct _vlib_node_registration vlib_node_registration_t
static sack_scoreboard_hole_t * scoreboard_prev_hole(sack_scoreboard_t *sb, sack_scoreboard_hole_t *hole)
struct _tcp_lookup_dispatch tcp_lookup_dispatch_t
u32 tcp_prepare_retransmit_segment(tcp_connection_t *tc, vlib_buffer_t *b, u32 offset, u32 max_bytes)
Build a retransmit segment.
struct _tcp_connection tcp_connection_t
void tcp_fast_retransmit_sack(tcp_connection_t *tc)
Do fast retransmit with SACKs.
void tcp_connection_cleanup(tcp_connection_t *tc)
Cleans up connection state.
static void scoreboard_clear(sack_scoreboard_t *sb)
format_function_t format_tcp_flags
struct _tcp_header tcp_header_t
static u32 tcp_available_snd_space(const tcp_connection_t *tc)
void tcp_connection_reset(tcp_connection_t *tc)
Notify session that connection has been reset.
struct _sack_scoreboard_hole sack_scoreboard_hole_t
static u32 scoreboard_hole_index(sack_scoreboard_t *sb, sack_scoreboard_hole_t *hole)
i16 current_data
signed offset in data[], pre_data[] that we are currently processing.
i32 tcp_rcv_wnd_available(tcp_connection_t *tc)
static tcp_header_t * tcp_buffer_hdr(vlib_buffer_t *b)
static timer_callback_t * timers
enum _tcp_state tcp_state_t
timer_expiration_handler tcp_timer_retransmit_syn_handler
static u32 tcp_time_now(void)
static tcp_cc_algorithm_t * tcp_cc_algo_get(tcp_cc_algorithm_type_e type)
static u32 scoreboard_hole_bytes(sack_scoreboard_hole_t *hole)
sack_scoreboard_hole_t * scoreboard_next_rxt_hole(sack_scoreboard_t *sb, sack_scoreboard_hole_t *start, u8 have_sent_1_smss, u8 *can_rescue, u8 *snd_limited)
Figure out the next hole to retransmit.
static void tcp_timer_set(tcp_connection_t *tc, u8 timer_id, u32 interval)
#define TCP_INVALID_SACK_HOLE_INDEX
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
timer_expiration_handler tcp_timer_persist_handler
static void * vlib_buffer_push_tcp_net_order(vlib_buffer_t *b, u16 sp, u16 dp, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
void tcp_retransmit_first_unacked(tcp_connection_t *tc)
Retransmit first unacked segment.
void tcp_rcv_sacks(tcp_connection_t *tc, u32 ack)
clib_error_t * vnet_tcp_enable_disable(vlib_main_t *vm, u8 is_en)
void tcp_send_syn(tcp_connection_t *tc)
Send SYN.
static sack_scoreboard_hole_t * scoreboard_next_hole(sack_scoreboard_t *sb, sack_scoreboard_hole_t *hole)
#define TCP_TIMER_HANDLE_INVALID
void tcp_fast_retransmit_no_sack(tcp_connection_t *tc)
Fast retransmit without SACK info.
static u32 tcp_flight_size(const tcp_connection_t *tc)
Our estimate of the number of bytes in flight (pipe size)
u32 tcp_push_header(transport_connection_t *tconn, vlib_buffer_t *b)
void( timer_expiration_handler)(u32 index)
static sack_scoreboard_hole_t * scoreboard_last_hole(sack_scoreboard_t *sb)
enum _tcp_cc_ack_t tcp_cc_ack_t
static void tcp_timer_reset(tcp_connection_t *tc, u8 timer_id)
static sack_scoreboard_hole_t * scoreboard_first_hole(sack_scoreboard_t *sb)
enum _tcp_error tcp_error_t
static void * vlib_buffer_push_tcp(vlib_buffer_t *b, u16 sp_net, u16 dp_net, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
void tcp_cc_init(tcp_connection_t *tc)
clib_bihash_24_8_t transport_endpoint_table_t
format_function_t format_tcp_state
#define clib_warning(format, args...)
enum _tcp_timers tcp_timers_e
static void tcp_connection_force_ack(tcp_connection_t *tc, vlib_buffer_t *b)
u32 fib_node_index_t
A typedef of a node index.
void tcp_connection_timers_init(tcp_connection_t *tc)
Initialize all connection timers as invalid.
void tcp_make_synack(tcp_connection_t *ts, vlib_buffer_t *b)
Convert buffer to SYN-ACK.
#define pool_is_free_index(P, I)
Use free bitmap to query whether given index is free.
u8 * format_tcp_connection_id(u8 *s, va_list *args)
vlib_node_registration_t tcp6_input_node
(constructor) VLIB_REGISTER_NODE (tcp6_input_node)
fib_node_index_t tcp_lookup_rmt_in_fib(tcp_connection_t *tc)
void tcp_make_ack(tcp_connection_t *ts, vlib_buffer_t *b)
Convert buffer to ACK.
static void tcp_timer_update(tcp_connection_t *tc, u8 timer_id, u32 interval)
int vlib_main(vlib_main_t *volatile vm, unformat_input_t *input)
enum _tcp_cc_algorithm_type tcp_cc_algorithm_type_e
enum _tcp_connection_flag_bits tcp_connection_flag_bits_e
vhost_vring_state_t state
void tcp_connection_init_vars(tcp_connection_t *tc)
Initialize tcp connection variables.
u8 * format_tcp_connection(u8 *s, va_list *args)
static u32 tcp_end_seq(tcp_header_t *th, u32 len)
struct _tcp_cc_algorithm tcp_cc_algorithm_t
void scoreboard_remove_hole(sack_scoreboard_t *sb, sack_scoreboard_hole_t *hole)
template key/value backing page structure
void tcp_update_snd_mss(tcp_connection_t *tc)
Update snd_mss to reflect the effective segment size that we can send by taking into account all TCP ...
static void tcp_persist_timer_update(tcp_connection_t *tc)
static sack_scoreboard_hole_t * scoreboard_get_hole(sack_scoreboard_t *sb, u32 index)
static void * vlib_buffer_push_uninit(vlib_buffer_t *b, u8 size)
Prepend uninitialized data to buffer.
vlib_node_registration_t tcp4_input_node
(constructor) VLIB_REGISTER_NODE (tcp4_input_node)
static u32 tcp_initial_cwnd(const tcp_connection_t *tc)
Initial cwnd as per RFC5681.
#define foreach_tcp_fsm_state
TCP FSM state definitions as per RFC793.
void tcp_send_fin(tcp_connection_t *tc)
Send FIN.
#define foreach_tcp_connection_flag
TCP connection flags.
#define foreach_tcp_timer
TCP timers.
static void tcp_update_time(f64 now, u32 thread_index)
static void tcp_retransmit_timer_update(tcp_connection_t *tc)
struct _transport_endpoint transport_endpoint_t
void tcp_connection_close(tcp_connection_t *tc)
Begin connection closing procedure.
static void tcp_retransmit_timer_force_update(tcp_connection_t *tc)
static tcp_connection_t * tcp_connection_get(u32 conn_index, u32 thread_index)
ip4_main_t ip4_main
Global ip4 main structure.
void tcp_update_rto(tcp_connection_t *tc)
void tcp_init_mss(tcp_connection_t *tc)
static void tcp_cc_algo_register(tcp_cc_algorithm_type_e type, const tcp_cc_algorithm_t *vft)
#define foreach_tcp_buf_flag
TCP buffer flags.
enum _tcp_connection_flag tcp_connection_flags_e
format_function_t format_tcp_sacks
#define TCP_TIMER_PERSIST_MIN
#define tcp_opts_sack_permitted(_to)
static u32 tcp_loss_wnd(const tcp_connection_t *tc)
static void tcp_persist_timer_set(tcp_connection_t *tc)
static tcp_main_t * vnet_get_tcp_main()
static void tcp_cong_recovery_off(tcp_connection_t *tc)
timer_expiration_handler tcp_timer_delack_handler
static void tcp_retransmit_timer_reset(tcp_connection_t *tc)
void tcp_cc_fastrecovery_exit(tcp_connection_t *tc)
void tcp_update_rcv_wnd(tcp_connection_t *tc)
static u8 tcp_timer_is_active(tcp_connection_t *tc, tcp_timers_e timer)
int tcp_cc_recover(tcp_connection_t *tc)
static tcp_connection_t * tcp_listener_get(u32 tli)
static void tcp_persist_timer_reset(tcp_connection_t *tc)