26 u16 port_host_byte_order,
u8 is_ip4)
31 pool_get (tm->listener_pool, listener);
32 memset (listener, 0,
sizeof (*listener));
34 listener->c_c_index = listener - tm->listener_pool;
35 listener->c_lcl_port = clib_host_to_net_u16 (port_host_byte_order);
39 listener->c_lcl_ip4.as_u32 = ip->ip4.as_u32;
40 listener->c_is_ip4 = 1;
41 listener->c_proto = SESSION_TYPE_IP4_TCP;
46 listener->c_proto = SESSION_TYPE_IP6_TCP;
49 listener->c_s_index = session_index;
50 listener->state = TCP_STATE_LISTEN;
56 return listener->c_c_index;
61 u16 port_host_byte_order)
68 u16 port_host_byte_order)
95 return &tc->connection;
119 pool_put (tm->local_endpoints, tep);
126 if (tc->state == TCP_STATE_SYN_SENT)
127 pool_put (tm->half_open_connections, tc);
129 pool_put (tm->connections[tc->c_thread_index], tc);
156 case TCP_STATE_SYN_RCVD:
161 case TCP_STATE_SYN_SENT:
162 case TCP_STATE_ESTABLISHED:
163 case TCP_STATE_CLOSE_WAIT:
164 case TCP_STATE_FIN_WAIT_1:
165 case TCP_STATE_FIN_WAIT_2:
166 case TCP_STATE_CLOSING:
167 tc->state = TCP_STATE_CLOSED;
174 case TCP_STATE_CLOSED:
199 if (tc->state == TCP_STATE_ESTABLISHED
200 || tc->state == TCP_STATE_SYN_RCVD || tc->state == TCP_STATE_CLOSE_WAIT)
204 if (tc->state == TCP_STATE_ESTABLISHED || tc->state == TCP_STATE_SYN_RCVD)
205 tc->state = TCP_STATE_FIN_WAIT_1;
206 else if (tc->state == TCP_STATE_SYN_SENT)
207 tc->state = TCP_STATE_CLOSED;
208 else if (tc->state == TCP_STATE_CLOSE_WAIT)
209 tc->state = TCP_STATE_LAST_ACK;
213 && tc->state == TCP_STATE_CLOSED)
232 tc->state = TCP_STATE_CLOSED;
265 #define PORT_MASK ((1 << 16)- 1) 275 u16 min = 1024, max = 65535;
282 pool_get (tm->local_endpoints, tep);
286 for (; tries >= 0; tries--)
307 tep - tm->local_endpoints);
312 pool_put (tm->local_endpoints, tep);
370 memset (&hdr, 0,
sizeof (hdr));
374 hdr.
tcp.src_port = tc->c_lcl_port;
375 hdr.
tcp.dst_port = tc->c_rmt_port;
381 memset (&hdr, 0,
sizeof (hdr));
387 hdr.
tcp.src_port = tc->c_lcl_port;
388 hdr.
tcp.dst_port = tc->c_rmt_port;
402 prefix.
fp_len = tc->c_is_ip4 ? 32 : 128;
410 u32 output_node_index;
461 ip46_address_t lcl_addr;
467 memset (&lcl_addr, 0,
sizeof (lcl_addr));
472 prefix.
fp_len = is_ip4 ? 32 : 128;
482 if (sw_if_index == (
u32) ~ 0)
489 lcl_addr.ip4.as_u32 = ip4->
as_u32;
510 pool_get (tm->half_open_connections, tc);
511 memset (tc, 0,
sizeof (*tc));
513 clib_memcpy (&tc->c_rmt_ip, rmt_addr, sizeof (ip46_address_t));
514 clib_memcpy (&tc->c_lcl_ip, &lcl_addr, sizeof (ip46_address_t));
515 tc->c_rmt_port = clib_host_to_net_u16 (rmt_port);
516 tc->c_lcl_port = clib_host_to_net_u16 (lcl_port);
517 tc->c_c_index = tc - tm->half_open_connections;
518 tc->c_is_ip4 = is_ip4;
519 tc->c_proto = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP;
526 tc->state = TCP_STATE_SYN_SENT;
530 return tc->c_c_index;
546 #define _(sym, str) str, 552 #define _(sym, str) str, 565 s =
format (s,
"UNKNOWN (%d (0x%x))", state, state);
570 #define _(sym, str) str, 586 for (i = 0; i <
last; i++)
605 s =
format (s,
"recovery");
607 s =
format (s,
"fastrecovery");
617 s =
format (s,
" snd_una %u snd_nxt %u snd_una_max %u\n",
618 tc->snd_una - tc->iss, tc->snd_nxt - tc->iss,
619 tc->snd_una_max - tc->iss);
620 s =
format (s,
" rcv_nxt %u rcv_las %u\n",
621 tc->rcv_nxt - tc->irs, tc->rcv_las - tc->irs);
622 s =
format (s,
" snd_wnd %u rcv_wnd %u snd_wl1 %u snd_wl2 %u\n",
623 tc->snd_wnd, tc->rcv_wnd, tc->snd_wl1 - tc->irs,
624 tc->snd_wl2 - tc->iss);
625 s =
format (s,
" flight size %u send space %u rcv_wnd_av %d\n",
629 s =
format (s,
"cwnd %u ssthresh %u rtx_bytes %u bytes_acked %u\n",
630 tc->cwnd, tc->ssthresh, tc->snd_rxt_bytes, tc->bytes_acked);
631 s =
format (s,
" prev_ssthresh %u snd_congestion %u dupack %u\n",
632 tc->prev_ssthresh, tc->snd_congestion - tc->iss,
634 s =
format (s,
" rto %u rto_boff %u srtt %u rttvar %u rtt_ts %u ", tc->rto,
635 tc->rto_boff, tc->srtt, tc->rttvar, tc->rtt_ts);
636 s =
format (s,
"rtt_seq %u\n", tc->rtt_seq);
652 s =
format (s,
"[#%d][%s] %U:%d->%U:%d", tc->c_thread_index,
"T",
655 &tc->c_rmt_ip4, clib_net_to_host_u16 (tc->c_rmt_port));
659 s =
format (s,
"[#%d][%s] %U:%d->%U:%d", tc->c_thread_index,
"T",
662 &tc->c_rmt_ip6, clib_net_to_host_u16 (tc->c_rmt_port));
672 u32 verbose = va_arg (*args,
u32);
687 u32 tci = va_arg (*args,
u32);
688 u32 thread_index = va_arg (*args,
u32);
689 u32 verbose = va_arg (*args,
u32);
703 u32 tci = va_arg (*args,
u32);
711 u32 tci = va_arg (*args,
u32);
724 s =
format (s,
" start %u end %u\n", block->start - tc->irs,
725 block->end - tc->irs);
734 s =
format (s,
"[%u, %u]", hole->start, hole->end);
743 s =
format (s,
"sacked_bytes %u last_sacked_bytes %u lost_bytes %u\n",
744 sb->sacked_bytes, sb->last_sacked_bytes, sb->lost_bytes);
745 s =
format (s,
" last_bytes_delivered %u high_sacked %u snd_una_adv %u\n",
746 sb->last_bytes_delivered, sb->high_sacked, sb->snd_una_adv);
747 s =
format (s,
" cur_rxt_hole %u high_rxt %u rescue_rxt %u",
748 sb->cur_rxt_hole, sb->high_rxt, sb->rescue_rxt);
752 s =
format (s,
"\n head %u tail %u holes:\n", sb->head, sb->tail);
766 return &tc->connection;
773 return &tc->connection;
799 if (tc->snd_wnd < tc->snd_mss)
801 return tc->snd_wnd <= snd_space ? tc->snd_wnd : 0;
805 if (snd_space < tc->snd_mss)
809 return snd_space - (snd_space % tc->snd_mss);
825 int snd_space, snt_limited;
835 && tc->sack_sb.last_sacked_bytes == 0)))
837 if (tc->rcv_dupacks == 1 && tc->limited_transmit != tc->snd_nxt)
838 tc->limited_transmit = tc->snd_nxt;
841 snt_limited = tc->snd_nxt - tc->limited_transmit;
842 snd_space =
clib_max (2 * tc->snd_mss - snt_limited, 0);
849 tc->snd_nxt = tc->snd_una_max;
851 - (tc->snd_una_max - tc->snd_congestion);
852 if (snd_space <= 0 || (tc->snd_una_max - tc->snd_una) >= tc->snd_wnd)
878 return (
i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
889 return (tc->snd_nxt - tc->snd_una);
951 ASSERT (tc->state == TCP_STATE_SYN_SENT);
953 sst = tc->c_is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP;
970 if (tc->state == TCP_STATE_CLOSE_WAIT)
972 if (tc->flags & TCP_CONN_FINSNT)
974 clib_warning (
"FIN was sent and still in CLOSE WAIT. Weird!");
978 tc->state = TCP_STATE_LAST_ACK;
1007 u32 connection_index, timer_id;
1009 for (i = 0; i <
vec_len (expired_timers); i++)
1012 connection_index = expired_timers[
i] & 0x0FFFFFFF;
1013 timer_id = expired_timers[
i] >> 28;
1015 TCP_EVT_DBG (TCP_EVT_TIMER_POP, connection_index, timer_id);
1025 tw_timer_wheel_16t_2w_512sl_t *tw;
1028 tw = &tm->timer_wheels[ii];
1089 clib_bihash_init_24_8 (&tm->local_endpoints_table,
"local endpoint table",
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
#define foreach_ip_interface_address(lm, a, sw_if_index, loop, body)
#define tcp_in_cong_recovery(tc)
void dpo_stack_from_node(u32 child_node_index, dpo_id_t *dpo, const dpo_id_t *parent)
Stack one DPO object on another, and thus establish a child parent relationship.
fib_protocol_t fp_proto
protocol type
void tcp_timer_keep_handler(u32 conn_index)
sll srl srl sll sra u16x4 i
const char * tcp_fsm_states[]
#define tcp_in_recovery(tc)
u8 * format_tcp_vars(u8 *s, va_list *args)
#define seq_leq(_s1, _s2)
static u32 tcp_available_wnd(const tcp_connection_t *tc)
struct _sack_block sack_block_t
void * ip_interface_get_first_ip(u32 sw_if_index, u8 is_ip4)
u8 * format_tcp_sacks(u8 *s, va_list *args)
vnet_main_t * vnet_get_main(void)
struct _transport_connection transport_connection_t
fib_node_index_t tcp_lookup_rmt_in_fib(tcp_connection_t *tc)
vlib_node_registration_t tcp4_output_node
(constructor) VLIB_REGISTER_NODE (tcp4_output_node)
u32 tcp_session_unbind(u32 listener_index)
static u32 ip4_compute_flow_hash(const ip4_header_t *ip, flow_hash_config_t flow_hash_config)
struct _sack_scoreboard sack_scoreboard_t
static f64 vlib_time_now(vlib_main_t *vm)
static tcp_connection_t * tcp_half_open_connection_get(u32 conn_index)
static void tcp_connection_fib_attach(tcp_connection_t *tc)
Stack tcp connection on peer's fib entry.
flow_hash_config_t lb_hash_config
the hash config to use when selecting a bucket.
struct _tcp_main tcp_main_t
void tcp_connection_timers_reset(tcp_connection_t *tc)
Stop all connection timers.
vlib_node_registration_t tcp6_output_node
(constructor) VLIB_REGISTER_NODE (tcp6_output_node)
void dpo_copy(dpo_id_t *dst, const dpo_id_t *src)
atomic copy a data-plane object.
timer_expiration_handler tcp_timer_retransmit_handler
static void scoreboard_init(sack_scoreboard_t *sb)
static heap_elt_t * last(heap_header_t *h)
ip_lookup_main_t lookup_main
struct _tcp_connection tcp_connection_t
transport_connection_t * tcp_half_open_session_get_transport(u32 conn_index)
u16 tcp_allocate_local_port(tcp_main_t *tm, ip46_address_t *ip)
Allocate local port and add if successful add entry to local endpoint table to mark the pair as used...
void tcp_timer_establish_handler(u32 conn_index)
void ip4_register_protocol(u32 protocol, u32 node_index)
struct ip4_tcp_hdr ip4_tcp_hdr_t
#define pool_get(P, E)
Allocate an object E from a pool P (unspecified alignment).
struct _tcp_header tcp_header_t
static u32 tcp_available_snd_space(const tcp_connection_t *tc)
struct _sack_scoreboard_hole sack_scoreboard_hole_t
u32 tcp_session_tx_fifo_offset(transport_connection_t *trans_conn)
unformat_function_t * unformat_pg_edit
u8 * format_tcp_connection_id(u8 *s, va_list *args)
#define VLIB_INIT_FUNCTION(x)
u16 lb_n_buckets_minus_1
number of buckets in the load-balance - 1.
void tcp_connection_timers_init(tcp_connection_t *tc)
Initialize all connection timers as invalid.
struct ip6_tcp_hdr ip6_tcp_hdr_t
void transport_endpoint_table_del(transport_endpoint_table_t *ht, transport_endpoint_t *te)
static void tcp_connection_unbind(u32 listener_index)
clib_error_t * tcp_main_enable(vlib_main_t *vm)
Aggregrate type for a prefix.
#define clib_error_return(e, args...)
void stream_session_delete_notify(transport_connection_t *tc)
Notification from transport that connection is being deleted.
timer_expiration_handler tcp_timer_retransmit_syn_handler
static u32 tcp_time_now(void)
u32 tcp_snd_space(tcp_connection_t *tc)
Compute tx window session is allowed to fill.
u16 fp_len
The mask length.
#define vlib_call_init_function(vm, x)
u32 tcp_session_bind_ip4(u32 session_index, ip46_address_t *ip, u16 port_host_byte_order)
int stream_session_connect_notify(transport_connection_t *tc, u8 sst, u8 is_fail)
#define TCP_EVT_DBG(_evt, _args...)
fib_node_index_t fib_table_lookup(u32 fib_index, const fib_prefix_t *prefix)
Perfom a longest prefix match in the non-forwarding table.
struct _transport_proto_vft transport_proto_vft_t
#define TRANSPORT_ENDPOINT_INVALID_INDEX
static void tcp_timer_set(tcp_connection_t *tc, u8 timer_id, u32 interval)
static timer_expiration_handler * timer_expiration_handlers[TCP_N_TIMERS]
The identity of a DPO is a combination of its type and its instance number/index of objects of that t...
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
transport_connection_t * tcp_session_get_listener(u32 listener_index)
static void tcp_connection_select_lb_bucket(tcp_connection_t *tc, const dpo_id_t *dpo, dpo_id_t *result)
timer_expiration_handler tcp_timer_persist_handler
static ip_protocol_info_t * ip_get_protocol_info(ip_main_t *im, u32 protocol)
#define tcp_in_fastrecovery(tc)
ip46_address_t fp_addr
The address type is not deriveable from the fp_addr member.
dpo_type_t dpoi_type
the type
format_function_t * format_header
static const dpo_id_t * load_balance_get_bucket_i(const load_balance_t *lb, u32 bucket)
void tcp_session_cleanup(u32 conn_index, u32 thread_index)
void tcp_send_syn(tcp_connection_t *tc)
Send SYN.
static const transport_proto_vft_t tcp4_proto
static sack_scoreboard_hole_t * scoreboard_next_hole(sack_scoreboard_t *sb, sack_scoreboard_hole_t *hole)
load-balancing over a choice of [un]equal cost paths
static u32 ip6_compute_flow_hash(const ip6_header_t *ip, flow_hash_config_t flow_hash_config)
#define pool_put(P, E)
Free an object E in pool P.
static void tcp_expired_timers_dispatch(u32 *expired_timers)
#define TCP_TIMER_HANDLE_INVALID
void tcp_timer_waitclose_handler(u32 conn_index)
static u32 tcp_flight_size(const tcp_connection_t *tc)
Our estimate of the number of bytes in flight (pipe size)
u8 * format_tcp_congestion_status(u8 *s, va_list *args)
u8 * format_tcp_scoreboard(u8 *s, va_list *args)
clib_error_t * vnet_tcp_enable_disable(vlib_main_t *vm, u8 is_en)
u32 tcp_push_header(transport_connection_t *tconn, vlib_buffer_t *b)
void transport_endpoint_table_add(transport_endpoint_table_t *ht, transport_endpoint_t *te, u32 value)
void tcp_session_close(u32 conn_index, u32 thread_index)
void( timer_expiration_handler)(u32 index)
#define foreach_vlib_main(body)
static u32 tcp_connection_bind(u32 session_index, ip46_address_t *ip, u16 port_host_byte_order, u8 is_ip4)
u32 fib_entry_get_resolving_interface(fib_node_index_t entry_index)
static void tcp_timer_reset(tcp_connection_t *tc, u8 timer_id)
static sack_scoreboard_hole_t * scoreboard_first_hole(sack_scoreboard_t *sb)
#define tcp_fastrecovery_sent_1_smss(tc)
u8 * format_tcp_half_open_session(u8 *s, va_list *args)
clib_error_t * ip_main_init(vlib_main_t *vm)
static_always_inline uword vlib_get_thread_index(void)
void tcp_cc_init(tcp_connection_t *tc)
void tcp_connection_close(tcp_connection_t *tc)
Begin connection closing procedure.
#define clib_warning(format, args...)
#define clib_memcpy(a, b, c)
u8 * format_tcp_session(u8 *s, va_list *args)
int tcp_session_open_ip6(ip46_address_t *addr, u16 port)
const char * tcp_dbg_evt_str[]
u32 fib_node_index_t
A typedef of a node index.
int tcp_connection_open(ip46_address_t *rmt_addr, u16 rmt_port, u8 is_ip4)
u16 tcp_session_send_mss(transport_connection_t *trans_conn)
Compute maximum segment size for session layer.
fib_entry_t * fib_entry_get(fib_node_index_t index)
static void tcp_timer_update(tcp_connection_t *tc, u8 timer_id, u32 interval)
#define pool_put_index(p, i)
Free pool element with given index.
ip_lookup_main_t lookup_main
static load_balance_t * load_balance_get(index_t lbi)
#define seq_geq(_s1, _s2)
vhost_vring_state_t state
u32 transport_endpoint_lookup(transport_endpoint_table_t *ht, ip46_address_t *ip, u16 port)
void stream_session_reset_notify(transport_connection_t *tc)
Notify application that connection has been reset.
i32 tcp_rcv_wnd_available(tcp_connection_t *tc)
clib_error_t * ip4_lookup_init(vlib_main_t *vm)
void session_register_transport(u8 type, const transport_proto_vft_t *vft)
u8 * format_tcp_timers(u8 *s, va_list *args)
dpo_id_t fe_lb
The load-balance used for forwarding.
void tcp_update_snd_mss(tcp_connection_t *tc)
Update snd_mss to reflect the effective segment size that we can send by taking into account all TCP ...
#define tcp_fastrecovery_1_smss_on(tc)
void tcp_connection_reset(tcp_connection_t *tc)
Notify session that connection has been reset.
vlib_node_registration_t tcp4_input_node
(constructor) VLIB_REGISTER_NODE (tcp4_input_node)
index_t dpoi_index
the index of objects of that type
#define FIB_NODE_INDEX_INVALID
int tcp_session_open_ip4(ip46_address_t *addr, u16 port)
#define foreach_tcp_fsm_state
TCP FSM state definitions as per RFC793.
void tcp_send_fin(tcp_connection_t *tc)
Send FIN.
#define foreach_tcp_timer
TCP timers.
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
static u32 tcp_round_snd_space(tcp_connection_t *tc, u32 snd_space)
struct _transport_endpoint transport_endpoint_t
void tcp_connection_init_vars(tcp_connection_t *tc)
Initialize tcp connection variables.
#define DPO_INVALID
An initialiser for DPOs declared on the stack.
const char * tcp_conn_timers[]
clib_error_t * tcp_init(vlib_main_t *vm)
u8 * format_tcp_connection(u8 *s, va_list *args)
static tcp_connection_t * tcp_connection_get(u32 conn_index, u32 thread_index)
static u32 random_u32(u32 *seed)
32-bit random number generator
ip4_main_t ip4_main
Global ip4 main structure.
static int tcp_connection_stack_on_fib_entry(tcp_connection_t *tc)
static vlib_thread_main_t * vlib_get_thread_main()
void tcp_connection_cleanup(tcp_connection_t *tc)
Cleans up connection state.
void tcp_connection_del(tcp_connection_t *tc)
Connection removal.
void tcp_init_mss(tcp_connection_t *tc)
#define vec_foreach(var, vec)
Vector iterator.
u8 * format_tcp_sack_hole(u8 *s, va_list *args)
#define TCP_TSTAMP_RESOLUTION
Time stamp resolution.
#define tcp_opts_sack_permitted(_to)
static void * ip_interface_address_get_address(ip_lookup_main_t *lm, ip_interface_address_t *a)
static const transport_proto_vft_t tcp6_proto
u8 * format_tcp_state(u8 *s, va_list *args)
u32 tcp_session_send_space(transport_connection_t *trans_conn)
static clib_error_t * ip6_lookup_init(vlib_main_t *vm)
static tcp_main_t * vnet_get_tcp_main()
timer_expiration_handler tcp_timer_delack_handler
u32 tcp_session_bind_ip6(u32 session_index, ip46_address_t *ip, u16 port_host_byte_order)
transport_connection_t * tcp_session_get_transport(u32 conn_index, u32 thread_index)
u8 * format_tcp_listener_session(u8 *s, va_list *args)
void tcp_initialize_timer_wheels(tcp_main_t *tm)
static tcp_connection_t * tcp_listener_get(u32 tli)