22 typedef enum _tcp_output_next
31 #define foreach_tcp4_output_next \ 32 _ (DROP, "error-drop") \ 33 _ (IP_LOOKUP, "ip4-lookup") \ 34 _ (IP_REWRITE, "ip4-rewrite") \ 37 #define foreach_tcp6_output_next \ 38 _ (DROP, "error-drop") \ 39 _ (IP_LOOKUP, "ip6-lookup") \ 40 _ (IP_REWRITE, "ip6-rewrite") \ 41 _ (IP_ARP, "ip6-discover-neighbor") 44 #define tcp_error(n,s) s, 71 #ifndef CLIB_MARCH_VARIANT 107 if (tc->state != TCP_STATE_SYN_RCVD ||
tcp_opts_wscale (&tc->rcv_opts))
118 u32 available_space, wnd;
130 observed_wnd = (
i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
137 TCP_EVT (TCP_EVT_RCV_WND_SHRUNK, tc, observed_wnd, available_space);
158 if (state < TCP_STATE_ESTABLISHED)
162 return tc->rcv_wnd >> tc->rcv_wscale;
170 opts->
flags |= TCP_OPTS_FLAG_MSS;
174 opts->
flags |= TCP_OPTS_FLAG_WSCALE;
175 opts->
wscale = tc->rcv_wscale;
178 opts->
flags |= TCP_OPTS_FLAG_TSTAMP;
185 opts->
flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
199 opts->
flags |= TCP_OPTS_FLAG_MSS;
205 opts->
flags |= TCP_OPTS_FLAG_WSCALE;
206 opts->
wscale = tc->rcv_wscale;
212 opts->
flags |= TCP_OPTS_FLAG_TSTAMP;
214 opts->
tsecr = tc->tsval_recent;
220 opts->
flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
238 opts->
flags |= TCP_OPTS_FLAG_TSTAMP;
240 opts->
tsecr = tc->tsval_recent;
247 opts->
flags |= TCP_OPTS_FLAG_SACK;
248 if (tc->snd_sack_pos >=
vec_len (tc->snd_sacks))
249 tc->snd_sack_pos = 0;
250 opts->
sacks = &tc->snd_sacks[tc->snd_sack_pos];
270 case TCP_STATE_ESTABLISHED:
271 case TCP_STATE_CLOSE_WAIT:
272 case TCP_STATE_FIN_WAIT_1:
273 case TCP_STATE_LAST_ACK:
274 case TCP_STATE_CLOSING:
275 case TCP_STATE_FIN_WAIT_2:
276 case TCP_STATE_TIME_WAIT:
277 case TCP_STATE_CLOSED:
279 case TCP_STATE_SYN_RCVD:
281 case TCP_STATE_SYN_SENT:
307 TCP_STATE_ESTABLISHED);
310 tc->snd_mss =
clib_min (tc->mss, tc->rcv_opts.mss) - tc->snd_opts_len;
318 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
321 if (tc->snd_una == tc->snd_nxt)
327 if (tc->flags & TCP_CONN_PSH_PENDING)
331 tc->psh_seq = tc->snd_una + max_deq - 1;
340 if (b->
flags & VLIB_BUFFER_NEXT_PRESENT)
343 b->
flags &= VLIB_BUFFER_NEXT_PRESENT - 1;
353 #ifndef CLIB_MARCH_VARIANT 357 ASSERT ((b->
flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
358 b->
flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
371 ip46_address_t *
src, ip46_address_t *
dst)
374 u16 payload_length_host_byte_order;
379 clib_host_to_net_u16 (IP_PROTOCOL_TCP);
382 for (i = 0; i <
ARRAY_LEN (src->ip6.as_uword); i++)
391 payload_length_host_byte_order, NULL, 0,
397 ip46_address_t *
src, ip46_address_t *
dst)
400 u32 payload_length_host_byte_order;
404 clib_host_to_net_u32 (payload_length_host_byte_order +
405 (IP_PROTOCOL_TCP << 16));
411 payload_length_host_byte_order, NULL, 0,
419 if (
PREDICT_FALSE (tc->cfg_flags & TCP_CFG_F_NO_CSUM_OFFLOAD))
426 (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
429 (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
433 b->
flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
446 u8 tcp_opts_len, tcp_hdr_opts_len;
454 tcp_hdr_opts_len = tcp_opts_len +
sizeof (
tcp_header_t);
457 tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd);
463 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
478 TCP_EVT (TCP_EVT_ACK_SENT, tc);
479 tc->rcv_las = tc->rcv_nxt;
497 u8 tcp_hdr_opts_len, tcp_opts_len;
507 tcp_hdr_opts_len = tcp_opts_len +
sizeof (
tcp_header_t);
512 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
524 u8 tcp_opts_len, tcp_hdr_opts_len;
531 tcp_hdr_opts_len = tcp_opts_len +
sizeof (
tcp_header_t);
534 tc->rcv_nxt, tcp_hdr_opts_len,
538 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
544 u8 is_ip4,
u32 fib_index)
549 b->
flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
558 tm->ipl_next_node[!is_ip4]);
568 b->
flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
584 ip6_address_t src_ip6, dst_ip6;
608 src_port = th->src_port;
609 dst_port = th->dst_port;
620 seq = th->ack_number;
626 tmp = clib_net_to_host_u32 (th->seq_number);
628 ack = clib_host_to_net_u32 (tmp + len);
654 #ifndef CLIB_MARCH_VARIANT 662 u32 thread_index,
u8 is_ip4)
704 seq = pkt_th->ack_number;
705 ack = (tc->state >= TCP_STATE_SYN_RCVD) ? tc->rcv_nxt : 0;
711 ack = clib_host_to_net_u32 (
vnet_buffer (pkt)->tcp.seq_end);
715 seq, ack, tcp_hdr_len, flags, 0);
734 tc->ipv6_flow_label);
740 TCP_EVT (TCP_EVT_RST_SENT, tc);
742 TCP_ERROR_RST_SENT, 1);
756 u16 tcp_hdr_opts_len, advertise_wnd, opts_write_len;
768 tcp_hdr_opts_len = tc->snd_opts_len +
sizeof (
tcp_header_t);
769 advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
772 tc->rcv_nxt, tcp_hdr_opts_len, flags,
776 ASSERT (opts_write_len == tc->snd_opts_len);
777 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
779 TCP_EVT (TCP_EVT_RST_SENT, tc);
781 TCP_ERROR_RST_SENT, 1);
796 IP_PROTOCOL_TCP, tc->ipv6_flow_label);
835 tc->rtt_seq = tc->snd_nxt;
840 TCP_EVT (TCP_EVT_SYN_SENT, tc);
851 ASSERT (tc->snd_una != tc->snd_nxt);
866 TCP_EVT (TCP_EVT_SYNACK_SENT, tc);
881 fin_snt = tc->flags & TCP_CONN_FINSNT;
893 tc->flags |= TCP_CONN_FINSNT;
899 if ((tc->flags & TCP_CONN_SNDACK) && !tc->pending_dupacks)
900 tc->flags &= ~TCP_CONN_SNDACK;
906 TCP_EVT (TCP_EVT_FIN_SENT, tc);
912 tc->flags |= TCP_CONN_FINSNT;
913 tc->flags &= ~TCP_CONN_FINPNDG;
923 u8 compute_opts,
u8 maybe_burst,
u8 update_snd_nxt)
935 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
940 tcp_hdr_opts_len = tc->snd_opts_len +
sizeof (
tcp_header_t);
943 advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
949 if (
seq_geq (tc->psh_seq, snd_nxt)
950 &&
seq_lt (tc->psh_seq, snd_nxt + data_len))
951 flags |= TCP_FLAG_PSH;
954 tc->rcv_nxt, tcp_hdr_opts_len, flags,
960 tm->wrk_ctx[tc->c_thread_index].cached_opts,
966 ASSERT (len == tc->snd_opts_len);
975 tc->rcv_las = tc->rcv_nxt;
978 tc->data_segs_out += 1;
999 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1010 tc->rtt_seq = tc->snd_nxt;
1045 if (!(tc->flags & TCP_CONN_SNDACK))
1048 tc->flags |= TCP_CONN_SNDACK;
1055 if (!(tc->flags & TCP_CONN_SNDACK))
1058 tc->flags |= TCP_CONN_SNDACK;
1060 if (tc->pending_dupacks < 255)
1061 tc->pending_dupacks += 1;
1067 if (!(tc->flags & TCP_CONN_RXT_PENDING))
1070 tc->flags |= TCP_CONN_RXT_PENDING;
1087 if (tc->rcv_wnd >=
tcp_cfg.rwnd_min_update_ack * tc->snd_mss)
1140 ASSERT (n_bytes == max_deq_bytes);
1148 u32 chain_bi = ~0, n_bufs_per_seg, n_bufs;
1149 u16 n_peeked, len_to_deq;
1154 n_bufs_per_seg = ceil ((
double) seg_size / bytes_per_buffer);
1170 TRANSPORT_MAX_HDRS_LEN);
1172 b[0]->
flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
1174 max_deq_bytes -= n_bytes;
1177 for (i = 1; i < n_bufs_per_seg; i++)
1180 len_to_deq =
clib_min (max_deq_bytes, bytes_per_buffer);
1188 ASSERT (n_peeked == len_to_deq);
1189 n_bytes += n_peeked;
1195 prev_b->
flags |= VLIB_BUFFER_NEXT_PRESENT;
1197 max_deq_bytes -= n_peeked;
1212 ASSERT (((*b)->current_data + (*b)->current_length) <= bytes_per_buffer);
1228 u32 start, available_bytes;
1231 ASSERT (tc->state >= TCP_STATE_ESTABLISHED);
1232 ASSERT (max_deq_bytes != 0);
1238 ASSERT (available_bytes >= offset);
1239 available_bytes -=
offset;
1240 if (!available_bytes)
1243 max_deq_bytes =
clib_min (tc->snd_mss, max_deq_bytes);
1244 max_deq_bytes =
clib_min (available_bytes, max_deq_bytes);
1246 start = tc->snd_una +
offset;
1253 tc->snd_rxt_bytes += n_bytes;
1255 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1258 tc->bytes_retrans += n_bytes;
1259 tc->segs_retrans += 1;
1261 TCP_EVT (TCP_EVT_CC_RTX, tc, offset, n_bytes);
1273 if (!sb->is_reneging && (!hole || hole->start == tc->snd_una))
1285 TCP_EVT (TCP_EVT_CC_EVT, tc, 6);
1287 tc->prev_ssthresh = tc->ssthresh;
1288 tc->prev_cwnd = tc->cwnd;
1299 tc->cwnd_acc_bytes = 0;
1300 tc->tr_occurences += 1;
1320 if (tc->state == TCP_STATE_CLOSED)
1323 if (tc->state >= TCP_STATE_ESTABLISHED)
1325 TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1328 if (tc->flags & TCP_CONN_FINSNT)
1337 if (tc->snd_una == tc->snd_nxt)
1371 tc->snd_congestion = tc->snd_nxt;
1375 n_bytes =
clib_min (tc->snd_mss, tc->snd_nxt - tc->snd_una);
1390 if (tc->rto_boff == 1)
1403 else if (tc->state == TCP_STATE_SYN_RCVD)
1405 TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1430 ASSERT (tc->snd_una != tc->snd_nxt);
1436 TCP_EVT (TCP_EVT_SYN_RXT, tc, 1);
1443 ASSERT (tc->state == TCP_STATE_CLOSED);
1466 if (tc->flags & TCP_CONN_HALF_OPEN_DONE)
1469 TCP_DBG (
"could not remove half-open connection");
1473 TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1501 TCP_EVT (TCP_EVT_SYN_RXT, tc, 0);
1519 u32 bi, max_snd_bytes, available_bytes,
offset;
1527 if (tc->state == TCP_STATE_CLOSED || tc->snd_wnd > tc->snd_mss
1528 || (tc->flags & TCP_CONN_FINSNT))
1529 goto update_scheduler;
1532 offset = tc->snd_nxt - tc->snd_una;
1536 if (!available_bytes)
1542 if (available_bytes <= offset)
1543 goto update_scheduler;
1564 max_snd_bytes =
clib_min (tc->snd_mss,
1570 || tc->snd_una == tc->snd_nxt
1571 || tc->rto_boff > 1));
1573 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1605 TCP_EVT (TCP_EVT_CC_EVT, tc, 1);
1621 u32 offset, n_segs = 0, n_written, bi, available_wnd;
1625 offset = tc->snd_nxt - tc->snd_una;
1626 available_wnd = tc->snd_wnd -
offset;
1627 burst_size =
clib_min (burst_size, available_wnd / tc->snd_mss);
1629 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1632 while (n_segs < burst_size)
1640 offset += n_written;
1643 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1646 tc->snd_nxt += n_written;
1663 prr_out = tc->snd_rxt_bytes + (tc->snd_nxt - tc->snd_congestion);
1665 if (pipe > tc->ssthresh)
1667 space = ((int) tc->prr_delivered * ((
f64) tc->ssthresh / tc->prev_cwnd))
1673 limit =
clib_max ((
int) (tc->prr_delivered - prr_out), 0) + tc->snd_mss;
1674 space =
clib_min (tc->ssthresh - pipe, limit);
1676 space =
clib_max (space, prr_out ? 0 : tc->snd_mss);
1684 u32 tx_adv_sack = sb->high_sacked - tc->snd_congestion;
1685 f64 rr = (
f64) tc->ssthresh / tc->prev_cwnd;
1690 return (tx_adv_sack > (tc->snd_una - tc->prr_start) * rr);
1697 - (tc->snd_nxt - tc->snd_una));
1700 #define scoreboard_rescue_rxt_valid(_sb, _tc) \ 1701 (seq_geq (_sb->rescue_rxt, _tc->snd_una) \ 1702 && seq_leq (_sb->rescue_rxt, _tc->snd_congestion)) 1711 u32 n_written = 0,
offset, max_bytes, n_segs = 0;
1712 u8 snd_limited = 0, can_rescue = 0;
1713 u32 bi, max_deq, burst_bytes;
1723 burst_size =
clib_min (burst_size, burst_bytes / tc->snd_mss);
1735 if (snd_space < tc->snd_mss)
1742 &&
seq_gt (sb->high_sacked, tc->snd_congestion)
1743 && tc->rxt_head != tc->snd_una
1746 max_bytes =
clib_min (tc->snd_mss, tc->snd_congestion - tc->snd_una);
1757 tc->rxt_head = tc->snd_una;
1758 tc->rxt_delivered += n_written;
1759 tc->prr_delivered += n_written;
1760 ASSERT (tc->rxt_delivered <= tc->snd_rxt_bytes);
1765 TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
1769 max_deq -= tc->snd_nxt - tc->snd_una;
1771 while (snd_space > 0 && n_segs < burst_size)
1778 if (max_deq > tc->snd_mss)
1785 av_wnd = (int) tc->snd_wnd - (tc->snd_nxt - tc->snd_una);
1786 av_wnd =
clib_max (av_wnd - tc->snd_mss, 0);
1787 snd_space =
clib_min (snd_space, av_wnd);
1788 snd_space =
clib_min (max_deq, snd_space);
1789 burst_size =
clib_min (burst_size - n_segs,
1790 snd_space / tc->snd_mss);
1793 if (max_deq > n_segs_new * tc->snd_mss)
1796 n_segs += n_segs_new;
1810 max_bytes =
clib_min (tc->snd_mss, hole->end - hole->start);
1811 max_bytes =
clib_min (max_bytes, snd_space);
1812 offset = hole->end - tc->snd_una - max_bytes;
1818 sb->rescue_rxt = tc->snd_congestion;
1825 max_bytes =
clib_min (hole->end - sb->high_rxt, snd_space);
1826 max_bytes = snd_limited ?
clib_min (max_bytes, tc->snd_mss) : max_bytes;
1830 offset = sb->high_rxt - tc->snd_una;
1833 ASSERT (n_written <= snd_space);
1842 sb->high_rxt += n_written;
1845 snd_space -= n_written;
1865 u32 n_written = 0,
offset = 0, bi, max_deq, n_segs_now, max_bytes;
1866 u32 burst_bytes, sent_bytes;
1868 int snd_space, n_segs = 0;
1873 TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
1876 burst_size =
clib_min (burst_size, burst_bytes / tc->snd_mss);
1884 cc_limited = snd_space < burst_bytes;
1891 while (snd_space > 0 && n_segs < burst_size)
1894 tc->snd_congestion - tc->snd_una -
offset);
1906 snd_space -= n_written;
1911 if (n_segs == burst_size)
1917 if (snd_space < tc->snd_mss || tc->snd_mss == 0)
1921 max_deq -= tc->snd_nxt - tc->snd_una;
1924 snd_space =
clib_min (max_deq, snd_space);
1925 burst_size =
clib_min (burst_size - n_segs, snd_space / tc->snd_mss);
1927 if (n_segs_now && max_deq > n_segs_now * tc->snd_mss)
1929 n_segs += n_segs_now;
1935 sent_bytes =
clib_min (n_segs * tc->snd_mss, burst_bytes);
1936 sent_bytes = cc_limited ? burst_bytes : sent_bytes;
1947 if (!tc->pending_dupacks)
1950 || tc->state != TCP_STATE_ESTABLISHED)
1963 tc->dupacks_out += 1;
1964 tc->pending_dupacks = 0;
1969 tc->snd_sack_pos = 0;
1975 n_acks =
clib_min (n_acks, tc->pending_dupacks);
1977 for (j = 0; j <
clib_min (n_acks, max_burst_size); j++)
1980 if (n_acks < max_burst_size)
1982 tc->pending_dupacks = 0;
1983 tc->snd_sack_pos = 0;
1984 tc->dupacks_out += n_acks;
1989 TCP_DBG (
"constrained by burst size");
1990 tc->pending_dupacks = n_acks - max_burst_size;
1991 tc->dupacks_out += max_burst_size;
1993 return max_burst_size;
2024 tc->flags &= ~TCP_CONN_RXT_PENDING;
2028 if (!(tc->flags & TCP_CONN_SNDACK))
2031 tc->flags &= ~TCP_CONN_SNDACK;
2034 if (n_segs && !tc->pending_dupacks)
2051 u16 * next0,
u32 * error0)
2064 *error0 = TCP_ERROR_LINK_LOCAL_RW;
2076 *error0 = TCP_ERROR_LINK_LOCAL_RW;
2083 u32 * to_next,
u32 n_bufs)
2091 for (i = 0; i < n_bufs; i++)
2094 if (!(b->
flags & VLIB_BUFFER_IS_TRACED))
2118 IP_PROTOCOL_TCP, tc0->ipv6_flow_label);
2136 ASSERT ((b->
flags & VNET_BUFFER_F_L3_HDR_OFFSET_VALID) != 0);
2137 ASSERT ((b->
flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID) != 0);
2138 b->
flags |= VNET_BUFFER_F_GSO;
2151 if (tc0->next_node_index)
2153 *next0 = tc0->next_node_index;
2154 vnet_buffer (b0)->tcp.next_node_opaque = tc0->next_node_opaque;
2200 while (n_left_from >= 4)
2238 b[0]->
error = node->
errors[TCP_ERROR_INVALID_CONNECTION];
2249 b[1]->
error = node->
errors[TCP_ERROR_INVALID_CONNECTION];
2258 while (n_left_from > 0)
2262 if (n_left_from > 1)
2279 b[0]->
error = node->
errors[TCP_ERROR_INVALID_CONNECTION];
2309 .name =
"tcp4-output",
2311 .vector_size =
sizeof (
u32),
2317 #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n, 2329 .name =
"tcp6-output",
2331 .vector_size =
sizeof (
u32),
2337 #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n, 2346 typedef enum _tcp_reset_next
2353 #define foreach_tcp4_reset_next \ 2354 _(DROP, "error-drop") \ 2355 _(IP_LOOKUP, "ip4-lookup") 2357 #define foreach_tcp6_reset_next \ 2358 _(DROP, "error-drop") \ 2359 _(IP_LOOKUP, "ip6-lookup") 2366 u32 n_left_from, next_index, *from, *to_next;
2373 while (n_left_from > 0)
2379 while (n_left_from > 0 && n_left_to_next > 0)
2391 n_left_to_next -= 1;
2400 b0->
flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
2414 n_left_to_next, bi0, next0);
2435 .name =
"tcp4-reset",
2436 .vector_size =
sizeof (
u32),
2441 #define _(s,n) [TCP_RESET_NEXT_##s] = n, 2451 .name =
"tcp6-reset",
2452 .vector_size =
sizeof (
u32),
2457 #define _(s,n) [TCP_RESET_NEXT_##s] = n,
u32 flags
buffer flags: VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index, VLIB_BUFFER_IS_TRACED: trace this buffer.
static void tcp_check_if_gso(tcp_connection_t *tc, vlib_buffer_t *b)
static void tcp_check_sack_reneging(tcp_connection_t *tc)
#define TCP_RXT_MAX_BURST
static int tcp_send_acks(tcp_connection_t *tc, u32 max_burst_size)
u16 ip4_tcp_compute_checksum_custom(vlib_main_t *vm, vlib_buffer_t *p0, ip46_address_t *src, ip46_address_t *dst)
static void tcp_persist_timer_set(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
#define TCP_OPTION_LEN_SACK_PERMITTED
static u32 tcp_time_now(void)
void tcp_timer_persist_handler(tcp_connection_t *tc)
Got 0 snd_wnd from peer, try to do something about it.
vl_api_wireguard_peer_flags_t flags
static void vlib_buffer_free(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Free buffers Frees the entire buffer chain for each buffer.
static tcp_connection_t * tcp_connection_get(u32 conn_index, u32 thread_index)
#define tcp_node_index(node_id, is_ip4)
int session_tx_fifo_peek_bytes(transport_connection_t *tc, u8 *buffer, u32 offset, u32 max_bytes)
void session_queue_run_on_main_thread(vlib_main_t *vm)
static void tcp_make_synack(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to SYN-ACK.
#define tcp_opts_tstamp(_to)
i16 current_data
signed offset in data[], pre_data[] that we are currently processing.
vl_api_ip_port_and_mask_t dst_port
void tcp_timer_retransmit_syn_handler(tcp_connection_t *tc)
SYN retransmit timer handler.
#define clib_memcpy_fast(a, b, c)
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
static int tcp_do_retransmit(tcp_connection_t *tc, u32 max_burst_size)
u32 fib_table_get_index_for_sw_if_index(fib_protocol_t proto, u32 sw_if_index)
Get the index of the FIB bound to the interface.
#define tcp_fastrecovery_first_off(tc)
static u32 tcp_options_write(u8 *data, tcp_options_t *opts)
Write TCP options to segment.
void session_add_self_custom_tx_evt(transport_connection_t *tc, u8 has_prio)
void tcp_timer_retransmit_handler(tcp_connection_t *tc)
struct _tcp_main tcp_main_t
static void * vlib_buffer_push_tcp_net_order(vlib_buffer_t *b, u16 sp, u16 dp, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
static sack_scoreboard_hole_t * scoreboard_get_hole(sack_scoreboard_t *sb, u32 index)
void tcp_connection_timers_reset(tcp_connection_t *tc)
Stop all connection timers.
This packet is to be rewritten and forwarded to the next processing node.
u16 current_length
Nbytes between current data and the end of this buffer.
static int tcp_transmit_unsent(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
struct _tcp_connection tcp_connection_t
static u32 tcp_set_time_now(tcp_worker_ctx_t *wrk)
static u32 tcp_initial_wnd_unscaled(tcp_connection_t *tc)
TCP's initial window.
enum _tcp_output_next tcp_output_next_t
static void tcp_cc_congestion(tcp_connection_t *tc)
static ip_csum_t ip_csum_with_carry(ip_csum_t sum, ip_csum_t x)
#define TCP_RTO_SYN_RETRIES
#define VLIB_NODE_FN(node)
static void tcp_push_ip_hdr(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, vlib_buffer_t *b)
static void * vlib_buffer_push_ip6_custom(vlib_main_t *vm, vlib_buffer_t *b, ip6_address_t *src, ip6_address_t *dst, int proto, u32 flow_label)
Push IPv6 header to buffer.
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
static uword tcp46_send_reset_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame, u8 is_ip4)
vlib_error_t * errors
Vector of errors for this node.
static uword vlib_buffer_length_in_chain(vlib_main_t *vm, vlib_buffer_t *b)
Get length in bytes of the buffer chain.
static u8 tcp_is_descheduled(tcp_connection_t *tc)
u8 n_sack_blocks
Number of SACKs blocks.
struct _sack_scoreboard sack_scoreboard_t
struct _tcp_header tcp_header_t
int tcp_half_open_connection_cleanup(tcp_connection_t *tc)
Try to cleanup half-open connection.
#define scoreboard_rescue_rxt_valid(_sb, _tc)
#define tcp_in_cong_recovery(tc)
u8 wscale
Option flags, see above.
enum fib_protocol_t_ fib_protocol_t
Protocol Type.
#define TCP_OPTS_MAX_SACK_BLOCKS
#define foreach_tcp4_reset_next
static u32 tcp_prepare_retransmit_segment(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 offset, u32 max_deq_bytes, vlib_buffer_t **b)
Build a retransmit segment.
static u16 ip_calculate_l4_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip_csum_t sum0, u32 payload_length, u8 *iph, u32 ip_header_size, u8 *l4h)
int tcp_session_custom_tx(void *conn, transport_send_params_t *sp)
void session_transport_closing_notify(transport_connection_t *tc)
Notification from transport that connection is being closed.
static uword tcp46_output_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, int is_ip4)
static void * tcp_init_buffer(vlib_main_t *vm, vlib_buffer_t *b)
static ip_adjacency_t * adj_get(adj_index_t adj_index)
Get a pointer to an adjacency object from its index.
void tcp_make_syn(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to SYN.
static int tcp_prepare_segment(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 offset, u32 max_deq_bytes, vlib_buffer_t **b)
Allocate a new buffer and build a new tcp segment.
static void tcp_make_fin(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to FIN-ACK.
#define TCP_OPTION_LEN_SACK_BLOCK
#define seq_leq(_s1, _s2)
description fragment has unexpected format
#define vlib_prefetch_buffer_header(b, type)
Prefetch buffer metadata.
static void * ip4_next_header(ip4_header_t *i)
#define tcp_zero_rwnd_sent(tc)
sack_block_t * sacks
SACK blocks.
static sack_scoreboard_hole_t * scoreboard_first_hole(sack_scoreboard_t *sb)
static tcp_header_t * tcp_buffer_hdr(vlib_buffer_t *b)
static void tcp46_output_trace_frame(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *to_next, u32 n_bufs)
#define tcp_validate_txf_size(_tc, _a)
static void tcp_push_hdr_i(tcp_connection_t *tc, vlib_buffer_t *b, u32 snd_nxt, u8 compute_opts, u8 maybe_burst, u8 update_snd_nxt)
Push TCP header and update connection variables.
#define tcp_in_fastrecovery(tc)
#define tcp_csum_offload(tc)
static u32 vlib_get_buffer_index(vlib_main_t *vm, void *p)
Translate buffer pointer into buffer index.
static void tcp_retransmit_timer_update(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
u32 tcp_session_push_header(transport_connection_t *tconn, vlib_buffer_t *b)
static void * vlib_buffer_push_ip6(vlib_main_t *vm, vlib_buffer_t *b, ip6_address_t *src, ip6_address_t *dst, int proto)
Push IPv6 header to buffer.
#define TCP_OPTION_LEN_WINDOW_SCALE
vlib_node_registration_t tcp6_reset_node
(constructor) VLIB_REGISTER_NODE (tcp6_reset_node)
void scoreboard_clear_reneging(sack_scoreboard_t *sb, u32 start, u32 end)
vlib_error_t error
Error code for buffers to be enqueued to error handler.
#define tcp_trajectory_add_start(b, start)
#define TRANSPORT_MAX_HDRS_LEN
static void tcp_retransmit_timer_set(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
vlib_main_t * vm
convenience pointer to this thread's vlib main
void tcp_send_reset(tcp_connection_t *tc)
Build and set reset packet for connection.
void tcp_send_synack(tcp_connection_t *tc)
#define ADJ_INDEX_INVALID
Invalid ADJ index - used when no adj is known likewise blazoned capitals INVALID speak volumes where ...
static int tcp_make_synack_options(tcp_connection_t *tc, tcp_options_t *opts)
static int tcp_make_syn_options(tcp_connection_t *tc, tcp_options_t *opts)
static void * vlib_buffer_make_headroom(vlib_buffer_t *b, u8 size)
Make head room, typically for packet headers.
static int tcp_retransmit_sack(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Do retransmit with SACKs.
void tcp_connection_tx_pacer_reset(tcp_connection_t *tc, u32 window, u32 start_bucket)
static void tcp_cc_loss(tcp_connection_t *tc)
format_function_t format_tcp_connection_id
static __clib_warn_unused_result u32 vlib_buffer_alloc(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Allocate buffers into supplied array.
#define TCP_DUPACK_THRESHOLD
static void * vlib_buffer_get_current(vlib_buffer_t *b)
Get pointer to current data to process.
#define TCP_ESTABLISH_TIME
static void tcp_output_handle_link_local(tcp_connection_t *tc0, vlib_buffer_t *b0, u16 *next0, u32 *error0)
tcp_timer_wheel_t timer_wheel
worker timer wheel
#define foreach_tcp6_output_next
void tcp_program_dupack(tcp_connection_t *tc)
int tcp_fastrecovery_prr_snd_space(tcp_connection_t *tc)
Estimate send space using proportional rate reduction (RFC6937)
static u8 tcp_window_compute_scale(u32 window)
#define vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, bi0, next0)
Finish enqueueing one buffer forward in the graph.
#define vlib_get_next_frame(vm, node, next_index, vectors, n_vectors_left)
Get pointer to next frame vector data by (vlib_node_runtime_t, next_index).
#define TCP_OPTION_LEN_TIMESTAMP
#define foreach_tcp4_output_next
static void tcp_enqueue_to_ip_lookup(tcp_worker_ctx_t *wrk, vlib_buffer_t *b, u32 bi, u8 is_ip4, u32 fib_index)
static void vlib_node_increment_counter(vlib_main_t *vm, u32 node_index, u32 counter_index, u64 increment)
#define TCP_DBG(_fmt, _args...)
#define TCP_MAX_WND_SCALE
void tcp_program_cleanup(tcp_worker_ctx_t *wrk, tcp_connection_t *tc)
static void tcp_output_handle_packet(tcp_connection_t *tc0, vlib_buffer_t *b0, vlib_node_runtime_t *error_node, u16 *next0, u8 is_ip4)
vl_api_ip_port_and_mask_t src_port
static void tcp_cc_event(tcp_connection_t *tc, tcp_cc_event_t evt)
void transport_connection_reschedule(transport_connection_t *tc)
static u32 tcp_flight_size(const tcp_connection_t *tc)
Our estimate of the number of bytes in flight (pipe size)
This packet matches an "incomplete adjacency" and packets need to be passed to ARP to find rewrite st...
void tcp_bt_track_tx(tcp_connection_t *tc, u32 len)
Track a tcp tx burst.
#define VLIB_REGISTER_NODE(x,...)
static sack_scoreboard_hole_t * scoreboard_last_hole(sack_scoreboard_t *sb)
#define CLIB_PREFETCH(addr, size, type)
static_always_inline void vlib_buffer_enqueue_to_next(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 *nexts, uword count)
void tcp_send_window_update_ack(tcp_connection_t *tc)
Send window update ack.
sll srl srl sll sra u16x4 i
void tcp_program_retransmit(tcp_connection_t *tc)
void tcp_send_reset_w_pkt(tcp_connection_t *tc, vlib_buffer_t *pkt, u32 thread_index, u8 is_ip4)
Send reset without reusing existing buffer.
format_function_t format_tcp_state
static void tcp_update_rto(tcp_connection_t *tc)
#define clib_warning(format, args...)
#define tcp_in_recovery(tc)
struct _transport_connection transport_connection_t
#define TCP_TO_TIMER_TICK
Factor for converting ticks to timer ticks.
static u32 tcp_window_to_advertise(tcp_connection_t *tc, tcp_state_t state)
Compute and return window to advertise, scaled as per RFC1323.
u32 adj_index_t
An index for adjacencies.
void vlib_put_next_frame(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, u32 n_vectors_left)
Release pointer to next frame vector data.
#define tcp_zero_rwnd_sent_on(tc)
u16 mss
Maximum segment size advertised.
static u32 tcp_available_cc_snd_space(const tcp_connection_t *tc)
Estimate of how many bytes we can still push into the network.
static void * ip6_next_header(ip6_header_t *i)
static int tcp_retransmit_no_sack(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Fast retransmit without SACK info.
vlib_main_t vlib_node_runtime_t * node
static void * vlib_buffer_push_tcp(vlib_buffer_t *b, u16 sp_net, u16 dp_net, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
static void tcp_make_ack(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to ACK.
static u32 transport_max_tx_dequeue(transport_connection_t *tc)
#define seq_geq(_s1, _s2)
u16 ip6_tcp_udp_icmp_compute_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip6_header_t *ip0, int *bogus_lengthp)
static int tcp_make_established_options(tcp_connection_t *tc, tcp_options_t *opts)
void tcp_bt_check_app_limited(tcp_connection_t *tc)
Check if sample to be generated is app limited.
u16 cached_next_index
Next frame index that vector arguments were last enqueued to last time this node ran.
static void tcp_cc_init_rxt_timeout(tcp_connection_t *tc)
Reset congestion control, switch cwnd to loss window and try again.
static void tcp_output_push_ip(vlib_main_t *vm, vlib_buffer_t *b0, tcp_connection_t *tc0, u8 is_ip4)
#define tcp_recovery_on(tc)
static u8 * format_tcp_tx_trace(u8 *s, va_list *args)
u16 ip4_tcp_udp_compute_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip4_header_t *ip0)
void transport_connection_tx_pacer_reset_bucket(transport_connection_t *tc, u32 bucket)
Reset tx pacer bucket.
void tcp_bt_track_rxt(tcp_connection_t *tc, u32 start, u32 end)
Track a tcp retransmission.
void tcp_update_burst_snd_vars(tcp_connection_t *tc)
Update burst send vars.
#define TRANSPORT_PACER_MIN_BURST
static uword ip6_address_is_link_local_unicast(const ip6_address_t *a)
#define clib_mem_unaligned(pointer, type)
static void tcp_update_rcv_wnd(tcp_connection_t *tc)
struct _sack_scoreboard_hole sack_scoreboard_hole_t
void tcp_send_fin(tcp_connection_t *tc)
Send FIN.
void tcp_send_ack(tcp_connection_t *tc)
void transport_connection_tx_pacer_update_bytes(transport_connection_t *tc, u32 bytes)
int tcp_retransmit_first_unacked(tcp_worker_ctx_t *wrk, tcp_connection_t *tc)
Retransmit first unacked segment.
template key/value backing page structure
#define tcp_opts_wscale(_to)
u32 tsval
Timestamp value.
static void tcp_timer_update(tcp_timer_wheel_t *tw, tcp_connection_t *tc, u8 timer_id, u32 interval)
u32 tsecr
Echoed/reflected time stamp.
#define tcp_fastrecovery_first(tc)
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
static u8 tcp_max_tx_deq(tcp_connection_t *tc)
ip_lookup_next_t lookup_next_index
Next hop after ip4-lookup.
u32 next_buffer
Next buffer for this linked-list of buffers.
vlib_main_t vlib_node_runtime_t vlib_frame_t * frame
#define foreach_tcp6_reset_next
#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b)
static tcp_worker_ctx_t * tcp_get_worker(u32 thread_index)
void session_transport_closed_notify(transport_connection_t *tc)
Notification from transport that it is closed.
VLIB buffer representation.
int session_stream_connect_notify(transport_connection_t *tc, session_error_t err)
static void * vlib_frame_vector_args(vlib_frame_t *f)
Get pointer to frame vector data.
static void tcp_make_ack_i(tcp_connection_t *tc, vlib_buffer_t *b, tcp_state_t state, u8 flags)
Prepare ACK.
static int tcp_make_reset_in_place(vlib_main_t *vm, vlib_buffer_t *b, u8 is_ip4)
#define TCP_OPTION_LEN_MSS
sack_scoreboard_hole_t * scoreboard_next_rxt_hole(sack_scoreboard_t *sb, sack_scoreboard_hole_t *start, u8 have_unsent, u8 *can_rescue, u8 *snd_limited)
Figure out the next hole to retransmit.
u16 ip6_tcp_compute_checksum_custom(vlib_main_t *vm, vlib_buffer_t *p0, ip46_address_t *src, ip46_address_t *dst)
static f64 tcp_time_now_us(u32 thread_index)
void scoreboard_init_rxt(sack_scoreboard_t *sb, u32 snd_una)
static void tcp_connection_set_state(tcp_connection_t *tc, tcp_state_t state)
struct clib_bihash_value offset
template key/value backing page structure
u32 tcp_initial_window_to_advertise(tcp_connection_t *tc)
Compute initial window and scale factor.
#define TCP_USE_SACKS
Disable only for testing.
vl_api_dhcp_client_state_t state
static u32 vlib_num_workers()
void tcp_connection_cleanup(tcp_connection_t *tc)
Cleans up connection state.
static u32 tcp_buffer_len(vlib_buffer_t *b)
static u8 tcp_retransmit_should_retry_head(tcp_connection_t *tc, sack_scoreboard_t *sb)
void tcp_send_syn(tcp_connection_t *tc)
Send SYN.
vlib_node_registration_t tcp6_output_node
(constructor) VLIB_REGISTER_NODE (tcp6_output_node)
u16 flags
Copy of main node flags.
void * vlib_add_trace(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_buffer_t *b, u32 n_data_bytes)
static u8 tcp_timer_is_active(tcp_connection_t *tc, tcp_timers_e timer)
static u16 tcp_compute_checksum(tcp_connection_t *tc, vlib_buffer_t *b)
enum _tcp_reset_next tcp_reset_next_t
static u32 transport_max_rx_enqueue(transport_connection_t *tc)
#define tcp_opts_sack_permitted(_to)
static void vlib_buffer_free_one(vlib_main_t *vm, u32 buffer_index)
Free one buffer Shorthand to free a single buffer chain.
tcp_connection_t tcp_connection
static u32 tcp_tstamp(tcp_connection_t *tc)
Generate timestamp for tcp connection.
void tcp_program_ack(tcp_connection_t *tc)
static void * tcp_reuse_buffer(vlib_main_t *vm, vlib_buffer_t *b)
static_always_inline void vlib_get_buffers(vlib_main_t *vm, u32 *bi, vlib_buffer_t **b, int count)
Translate array of buffer indices into buffer pointers.
vlib_node_registration_t tcp4_reset_node
(constructor) VLIB_REGISTER_NODE (tcp4_reset_node)
#define VLIB_NODE_FLAG_TRACE
static uword round_down_pow2(uword x, uword pow2)
vlib_node_registration_t tcp4_output_node
(constructor) VLIB_REGISTER_NODE (tcp4_output_node)
#define CLIB_CACHE_LINE_BYTES
u32 total_length_not_including_first_buffer
Only valid for first buffer in chain.
static void * vlib_buffer_push_ip4(vlib_main_t *vm, vlib_buffer_t *b, ip4_address_t *src, ip4_address_t *dst, int proto, u8 csum_offload)
Push IPv4 header to buffer.
static void tcp_enqueue_to_output(tcp_worker_ctx_t *wrk, vlib_buffer_t *b, u32 bi, u8 is_ip4)
static tcp_main_t * vnet_get_tcp_main()
u32 tco_next_node[2]
Session layer edge indices to tcp output.
static char * tcp_error_strings[]
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
enum _tcp_state tcp_state_t
u32 transport_connection_tx_pacer_burst(transport_connection_t *tc)
Get tx pacer max burst.
vl_api_interface_index_t sw_if_index
#define tcp_worker_stats_inc(_wrk, _stat, _val)
#define tcp_zero_rwnd_sent_off(tc)
u32 * tx_buffers
tx buffer free list
static void session_add_pending_tx_buffer(u32 thread_index, u32 bi, u32 next_node)
Add session node pending buffer with custom node.
adj_index_t adj_nbr_find(fib_protocol_t nh_proto, vnet_link_t link_type, const ip46_address_t *nh_addr, u32 sw_if_index)
Lookup neighbor adjancency.
#define TCP_EVT(_evt, _args...)
static int tcp_make_options(tcp_connection_t *tc, tcp_options_t *opts, tcp_state_t state)
static uword pool_elts(void *v)
Number of active elements in a pool.