20 typedef enum _tcp_output_next
29 #define foreach_tcp4_output_next \ 30 _ (DROP, "error-drop") \ 31 _ (IP_LOOKUP, "ip4-lookup") \ 32 _ (IP_REWRITE, "ip4-rewrite") \ 35 #define foreach_tcp6_output_next \ 36 _ (DROP, "error-drop") \ 37 _ (IP_LOOKUP, "ip6-lookup") \ 38 _ (IP_REWRITE, "ip6-rewrite") \ 39 _ (IP_ARP, "ip6-discover-neighbor") 42 #define tcp_error(n,s) s, 69 #ifndef CLIB_MARCH_VARIANT 105 if (tc->state != TCP_STATE_SYN_RCVD ||
tcp_opts_wscale (&tc->rcv_opts))
116 u32 available_space, wnd;
128 observed_wnd = (
i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
135 TCP_EVT (TCP_EVT_RCV_WND_SHRUNK, tc, observed_wnd, available_space);
156 if (state < TCP_STATE_ESTABLISHED)
160 return tc->rcv_wnd >> tc->rcv_wscale;
168 opts->
flags |= TCP_OPTS_FLAG_MSS;
172 opts->
flags |= TCP_OPTS_FLAG_WSCALE;
173 opts->
wscale = tc->rcv_wscale;
176 opts->
flags |= TCP_OPTS_FLAG_TSTAMP;
183 opts->
flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
197 opts->
flags |= TCP_OPTS_FLAG_MSS;
203 opts->
flags |= TCP_OPTS_FLAG_WSCALE;
204 opts->
wscale = tc->rcv_wscale;
210 opts->
flags |= TCP_OPTS_FLAG_TSTAMP;
212 opts->
tsecr = tc->tsval_recent;
218 opts->
flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
236 opts->
flags |= TCP_OPTS_FLAG_TSTAMP;
238 opts->
tsecr = tc->tsval_recent;
245 opts->
flags |= TCP_OPTS_FLAG_SACK;
246 if (tc->snd_sack_pos >=
vec_len (tc->snd_sacks))
247 tc->snd_sack_pos = 0;
248 opts->
sacks = &tc->snd_sacks[tc->snd_sack_pos];
268 case TCP_STATE_ESTABLISHED:
269 case TCP_STATE_CLOSE_WAIT:
270 case TCP_STATE_FIN_WAIT_1:
271 case TCP_STATE_LAST_ACK:
272 case TCP_STATE_CLOSING:
273 case TCP_STATE_FIN_WAIT_2:
274 case TCP_STATE_TIME_WAIT:
275 case TCP_STATE_CLOSED:
277 case TCP_STATE_SYN_RCVD:
279 case TCP_STATE_SYN_SENT:
305 TCP_STATE_ESTABLISHED);
308 tc->snd_mss =
clib_min (tc->mss, tc->rcv_opts.mss) - tc->snd_opts_len;
316 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
319 if (tc->snd_una == tc->snd_nxt)
331 if (b->
flags & VLIB_BUFFER_NEXT_PRESENT)
334 b->
flags &= VLIB_BUFFER_NEXT_PRESENT - 1;
344 #ifndef CLIB_MARCH_VARIANT 348 ASSERT ((b->
flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
349 b->
flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
362 ip46_address_t *
src, ip46_address_t *
dst)
365 u16 payload_length_host_byte_order;
370 clib_host_to_net_u16 (IP_PROTOCOL_TCP);
373 for (i = 0; i <
ARRAY_LEN (src->ip6.as_uword); i++)
382 payload_length_host_byte_order, NULL, 0,
388 ip46_address_t *
src, ip46_address_t *
dst)
391 u32 payload_length_host_byte_order;
395 clib_host_to_net_u32 (payload_length_host_byte_order +
396 (IP_PROTOCOL_TCP << 16));
402 payload_length_host_byte_order, NULL, 0,
410 if (
PREDICT_FALSE (tc->cfg_flags & TCP_CFG_F_NO_CSUM_OFFLOAD))
417 (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
420 (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
424 b->
flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
437 u8 tcp_opts_len, tcp_hdr_opts_len;
445 tcp_hdr_opts_len = tcp_opts_len +
sizeof (
tcp_header_t);
448 tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd);
454 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
469 TCP_EVT (TCP_EVT_ACK_SENT, tc);
470 tc->rcv_las = tc->rcv_nxt;
488 u8 tcp_hdr_opts_len, tcp_opts_len;
498 tcp_hdr_opts_len = tcp_opts_len +
sizeof (
tcp_header_t);
503 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
515 u8 tcp_opts_len, tcp_hdr_opts_len;
522 tcp_hdr_opts_len = tcp_opts_len +
sizeof (
tcp_header_t);
525 tc->rcv_nxt, tcp_hdr_opts_len,
529 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
535 u8 is_ip4,
u32 fib_index)
540 b->
flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
549 tm->ipl_next_node[!is_ip4]);
559 b->
flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
575 ip6_address_t src_ip6, dst_ip6;
599 src_port = th->src_port;
600 dst_port = th->dst_port;
611 seq = th->ack_number;
617 tmp = clib_net_to_host_u32 (th->seq_number);
619 ack = clib_host_to_net_u32 (tmp + len);
645 #ifndef CLIB_MARCH_VARIANT 653 u32 thread_index,
u8 is_ip4)
695 seq = pkt_th->ack_number;
696 ack = (tc->state >= TCP_STATE_SYN_RCVD) ? tc->rcv_nxt : 0;
702 ack = clib_host_to_net_u32 (
vnet_buffer (pkt)->tcp.seq_end);
706 seq, ack, tcp_hdr_len, flags, 0);
725 tc->ipv6_flow_label);
731 TCP_EVT (TCP_EVT_RST_SENT, tc);
733 TCP_ERROR_RST_SENT, 1);
747 u16 tcp_hdr_opts_len, advertise_wnd, opts_write_len;
759 tcp_hdr_opts_len = tc->snd_opts_len +
sizeof (
tcp_header_t);
760 advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
763 tc->rcv_nxt, tcp_hdr_opts_len, flags,
767 ASSERT (opts_write_len == tc->snd_opts_len);
768 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
770 TCP_EVT (TCP_EVT_RST_SENT, tc);
772 TCP_ERROR_RST_SENT, 1);
787 IP_PROTOCOL_TCP, tc->ipv6_flow_label);
826 tc->rtt_seq = tc->snd_nxt;
831 TCP_EVT (TCP_EVT_SYN_SENT, tc);
856 TCP_EVT (TCP_EVT_SYNACK_SENT, tc);
871 fin_snt = tc->
flags & TCP_CONN_FINSNT;
883 tc->flags |= TCP_CONN_FINSNT;
889 if ((tc->flags & TCP_CONN_SNDACK) && !tc->pending_dupacks)
890 tc->flags &= ~TCP_CONN_SNDACK;
897 TCP_EVT (TCP_EVT_FIN_SENT, tc);
902 tc->flags |= TCP_CONN_FINSNT;
903 tc->flags &= ~TCP_CONN_FINPNDG;
904 tc->snd_una_max =
seq_max (tc->snd_una_max, tc->snd_nxt);
914 u8 compute_opts,
u8 maybe_burst,
u8 update_snd_nxt)
926 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
931 tcp_hdr_opts_len = tc->snd_opts_len +
sizeof (
tcp_header_t);
934 advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
940 if (
seq_geq (tc->psh_seq, snd_nxt)
941 &&
seq_lt (tc->psh_seq, snd_nxt + data_len))
942 flags |= TCP_FLAG_PSH;
945 tc->rcv_nxt, tcp_hdr_opts_len, flags,
951 tm->wrk_ctx[tc->c_thread_index].cached_opts,
957 ASSERT (len == tc->snd_opts_len);
966 tc->rcv_las = tc->rcv_nxt;
969 tc->data_segs_out += 1;
990 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
996 tc->snd_una_max =
seq_max (tc->snd_nxt, tc->snd_una_max);
1002 tc->rtt_seq = tc->snd_nxt;
1037 if (!(tc->flags & TCP_CONN_SNDACK))
1040 tc->flags |= TCP_CONN_SNDACK;
1047 if (!(tc->flags & TCP_CONN_SNDACK))
1050 tc->flags |= TCP_CONN_SNDACK;
1052 if (tc->pending_dupacks < 255)
1053 tc->pending_dupacks += 1;
1059 if (!(tc->flags & TCP_CONN_RXT_PENDING))
1062 tc->flags |= TCP_CONN_RXT_PENDING;
1079 if (tc->rcv_wnd >=
tcp_cfg.rwnd_min_update_ack * tc->snd_mss)
1132 ASSERT (n_bytes == max_deq_bytes);
1140 u32 chain_bi = ~0, n_bufs_per_seg, n_bufs;
1141 u16 n_peeked, len_to_deq;
1146 n_bufs_per_seg = ceil ((
double) seg_size / bytes_per_buffer);
1162 TRANSPORT_MAX_HDRS_LEN);
1164 b[0]->
flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
1166 max_deq_bytes -= n_bytes;
1169 for (i = 1; i < n_bufs_per_seg; i++)
1172 len_to_deq =
clib_min (max_deq_bytes, bytes_per_buffer);
1180 ASSERT (n_peeked == len_to_deq);
1181 n_bytes += n_peeked;
1187 prev_b->
flags |= VLIB_BUFFER_NEXT_PRESENT;
1189 max_deq_bytes -= n_peeked;
1204 ASSERT (((*b)->current_data + (*b)->current_length) <= bytes_per_buffer);
1220 u32 start, available_bytes;
1223 ASSERT (tc->state >= TCP_STATE_ESTABLISHED);
1224 ASSERT (max_deq_bytes != 0);
1230 ASSERT (available_bytes >= offset);
1231 available_bytes -=
offset;
1232 if (!available_bytes)
1235 max_deq_bytes =
clib_min (tc->snd_mss, max_deq_bytes);
1236 max_deq_bytes =
clib_min (available_bytes, max_deq_bytes);
1238 start = tc->snd_una +
offset;
1245 tc->snd_rxt_bytes += n_bytes;
1247 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1250 tc->bytes_retrans += n_bytes;
1251 tc->segs_retrans += 1;
1253 TCP_EVT (TCP_EVT_CC_RTX, tc, offset, n_bytes);
1265 if (!sb->is_reneging && (!hole || hole->start == tc->snd_una))
1277 TCP_EVT (TCP_EVT_CC_EVT, tc, 6);
1279 tc->prev_ssthresh = tc->ssthresh;
1280 tc->prev_cwnd = tc->cwnd;
1291 tc->cwnd_acc_bytes = 0;
1292 tc->tr_occurences += 1;
1312 if (tc->state == TCP_STATE_CLOSED)
1315 if (tc->state >= TCP_STATE_ESTABLISHED)
1317 TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1320 if (tc->flags & TCP_CONN_FINSNT)
1330 if ((tc->rto_boff == 0 && tc->snd_una == tc->snd_nxt)
1331 || (tc->rto_boff > 0 &&
seq_geq (tc->snd_una, tc->snd_congestion)
1366 tc->snd_congestion = tc->snd_nxt;
1370 n_bytes =
clib_min (tc->snd_mss, tc->snd_nxt - tc->snd_una);
1385 if (tc->rto_boff == 1)
1398 else if (tc->state == TCP_STATE_SYN_RCVD)
1400 TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1430 TCP_EVT (TCP_EVT_SYN_RXT, tc, 1);
1437 ASSERT (tc->state == TCP_STATE_CLOSED);
1460 if (tc->flags & TCP_CONN_HALF_OPEN_DONE)
1463 TCP_DBG (
"could not remove half-open connection");
1467 TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1495 TCP_EVT (TCP_EVT_SYN_RXT, tc, 0);
1513 u32 bi, max_snd_bytes, available_bytes,
offset;
1521 if (tc->state == TCP_STATE_CLOSED || tc->snd_wnd > tc->snd_mss
1522 || (tc->flags & TCP_CONN_FINSNT))
1523 goto update_scheduler;
1526 offset = tc->snd_nxt - tc->snd_una;
1530 if (!available_bytes)
1536 if (available_bytes <= offset)
1537 goto update_scheduler;
1558 max_snd_bytes =
clib_min (tc->snd_mss,
1564 || tc->snd_nxt == tc->snd_una_max
1565 || tc->rto_boff > 1));
1567 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1575 tc->snd_una_max =
seq_max (tc->snd_nxt, tc->snd_una_max);
1600 TCP_EVT (TCP_EVT_CC_EVT, tc, 1);
1616 u32 offset, n_segs = 0, n_written, bi, available_wnd;
1620 offset = tc->snd_nxt - tc->snd_una;
1621 available_wnd = tc->snd_wnd -
offset;
1622 burst_size =
clib_min (burst_size, available_wnd / tc->snd_mss);
1624 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1627 while (n_segs < burst_size)
1635 offset += n_written;
1638 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1641 tc->snd_nxt += n_written;
1642 tc->snd_una_max =
seq_max (tc->snd_nxt, tc->snd_una_max);
1659 prr_out = tc->snd_rxt_bytes + (tc->snd_nxt - tc->snd_congestion);
1661 if (pipe > tc->ssthresh)
1663 space = ((int) tc->prr_delivered * ((
f64) tc->ssthresh / tc->prev_cwnd))
1669 limit =
clib_max ((
int) (tc->prr_delivered - prr_out), 0) + tc->snd_mss;
1670 space =
clib_min (tc->ssthresh - pipe, limit);
1672 space =
clib_max (space, prr_out ? 0 : tc->snd_mss);
1680 u32 tx_adv_sack = sb->high_sacked - tc->snd_congestion;
1681 f64 rr = (
f64) tc->ssthresh / tc->prev_cwnd;
1686 return (tx_adv_sack > (tc->snd_una - tc->prr_start) * rr);
1693 - (tc->snd_nxt - tc->snd_una));
1696 #define scoreboard_rescue_rxt_valid(_sb, _tc) \ 1697 (seq_geq (_sb->rescue_rxt, _tc->snd_una) \ 1698 && seq_leq (_sb->rescue_rxt, _tc->snd_congestion)) 1707 u32 n_written = 0,
offset, max_bytes, n_segs = 0;
1708 u8 snd_limited = 0, can_rescue = 0;
1709 u32 bi, max_deq, burst_bytes;
1719 burst_size =
clib_min (burst_size, burst_bytes / tc->snd_mss);
1731 if (snd_space < tc->snd_mss)
1738 &&
seq_gt (sb->high_sacked, tc->snd_congestion)
1739 && tc->rxt_head != tc->snd_una
1742 max_bytes =
clib_min (tc->snd_mss, tc->snd_congestion - tc->snd_una);
1753 tc->rxt_head = tc->snd_una;
1754 tc->rxt_delivered += n_written;
1755 tc->prr_delivered += n_written;
1756 ASSERT (tc->rxt_delivered <= tc->snd_rxt_bytes);
1761 TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
1765 max_deq -= tc->snd_nxt - tc->snd_una;
1767 while (snd_space > 0 && n_segs < burst_size)
1774 if (max_deq > tc->snd_mss)
1781 av_wnd = (int) tc->snd_wnd - (tc->snd_nxt - tc->snd_una);
1782 av_wnd =
clib_max (av_wnd - tc->snd_mss, 0);
1783 snd_space =
clib_min (snd_space, av_wnd);
1784 snd_space =
clib_min (max_deq, snd_space);
1785 burst_size =
clib_min (burst_size - n_segs,
1786 snd_space / tc->snd_mss);
1789 if (max_deq > n_segs_new * tc->snd_mss)
1792 n_segs += n_segs_new;
1806 max_bytes =
clib_min (tc->snd_mss, hole->end - hole->start);
1807 max_bytes =
clib_min (max_bytes, snd_space);
1808 offset = hole->end - tc->snd_una - max_bytes;
1814 sb->rescue_rxt = tc->snd_congestion;
1821 max_bytes =
clib_min (hole->end - sb->high_rxt, snd_space);
1822 max_bytes = snd_limited ?
clib_min (max_bytes, tc->snd_mss) : max_bytes;
1826 offset = sb->high_rxt - tc->snd_una;
1829 ASSERT (n_written <= snd_space);
1838 sb->high_rxt += n_written;
1841 snd_space -= n_written;
1861 u32 n_written = 0,
offset = 0, bi, max_deq, n_segs_now, max_bytes;
1862 u32 burst_bytes, sent_bytes;
1864 int snd_space, n_segs = 0;
1869 TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
1872 burst_size =
clib_min (burst_size, burst_bytes / tc->snd_mss);
1880 cc_limited = snd_space < burst_bytes;
1887 while (snd_space > 0 && n_segs < burst_size)
1890 tc->snd_congestion - tc->snd_una -
offset);
1902 snd_space -= n_written;
1907 if (n_segs == burst_size)
1913 if (snd_space < tc->snd_mss || tc->snd_mss == 0)
1917 max_deq -= tc->snd_nxt - tc->snd_una;
1920 snd_space =
clib_min (max_deq, snd_space);
1921 burst_size =
clib_min (burst_size - n_segs, snd_space / tc->snd_mss);
1923 if (n_segs_now && max_deq > n_segs_now * tc->snd_mss)
1925 n_segs += n_segs_now;
1931 sent_bytes =
clib_min (n_segs * tc->snd_mss, burst_bytes);
1932 sent_bytes = cc_limited ? burst_bytes : sent_bytes;
1943 if (!tc->pending_dupacks)
1946 || tc->state != TCP_STATE_ESTABLISHED)
1959 tc->dupacks_out += 1;
1960 tc->pending_dupacks = 0;
1965 tc->snd_sack_pos = 0;
1971 n_acks =
clib_min (n_acks, tc->pending_dupacks);
1973 for (j = 0; j <
clib_min (n_acks, max_burst_size); j++)
1976 if (n_acks < max_burst_size)
1978 tc->pending_dupacks = 0;
1979 tc->snd_sack_pos = 0;
1980 tc->dupacks_out += n_acks;
1985 TCP_DBG (
"constrained by burst size");
1986 tc->pending_dupacks = n_acks - max_burst_size;
1987 tc->dupacks_out += max_burst_size;
1989 return max_burst_size;
2020 tc->flags &= ~TCP_CONN_RXT_PENDING;
2024 if (!(tc->flags & TCP_CONN_SNDACK))
2027 tc->flags &= ~TCP_CONN_SNDACK;
2030 if (n_segs && !tc->pending_dupacks)
2047 u16 * next0,
u32 * error0)
2060 *error0 = TCP_ERROR_LINK_LOCAL_RW;
2072 *error0 = TCP_ERROR_LINK_LOCAL_RW;
2079 u32 * to_next,
u32 n_bufs)
2087 for (i = 0; i < n_bufs; i++)
2090 if (!(b->
flags & VLIB_BUFFER_IS_TRACED))
2114 IP_PROTOCOL_TCP, tc0->ipv6_flow_label);
2132 ASSERT ((b->
flags & VNET_BUFFER_F_L3_HDR_OFFSET_VALID) != 0);
2133 ASSERT ((b->
flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID) != 0);
2134 b->
flags |= VNET_BUFFER_F_GSO;
2147 if (tc0->next_node_index)
2149 *next0 = tc0->next_node_index;
2150 vnet_buffer (b0)->tcp.next_node_opaque = tc0->next_node_opaque;
2196 while (n_left_from >= 4)
2234 b[0]->
error = node->
errors[TCP_ERROR_INVALID_CONNECTION];
2245 b[1]->
error = node->
errors[TCP_ERROR_INVALID_CONNECTION];
2254 while (n_left_from > 0)
2258 if (n_left_from > 1)
2275 b[0]->
error = node->
errors[TCP_ERROR_INVALID_CONNECTION];
2305 .name =
"tcp4-output",
2307 .vector_size =
sizeof (
u32),
2313 #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n, 2325 .name =
"tcp6-output",
2327 .vector_size =
sizeof (
u32),
2333 #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n, 2342 typedef enum _tcp_reset_next
2349 #define foreach_tcp4_reset_next \ 2350 _(DROP, "error-drop") \ 2351 _(IP_LOOKUP, "ip4-lookup") 2353 #define foreach_tcp6_reset_next \ 2354 _(DROP, "error-drop") \ 2355 _(IP_LOOKUP, "ip6-lookup") 2362 u32 n_left_from, next_index, *from, *to_next;
2369 while (n_left_from > 0)
2375 while (n_left_from > 0 && n_left_to_next > 0)
2387 n_left_to_next -= 1;
2396 b0->
flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
2410 n_left_to_next, bi0, next0);
2431 .name =
"tcp4-reset",
2432 .vector_size =
sizeof (
u32),
2437 #define _(s,n) [TCP_RESET_NEXT_##s] = n, 2447 .name =
"tcp6-reset",
2448 .vector_size =
sizeof (
u32),
2453 #define _(s,n) [TCP_RESET_NEXT_##s] = n,
u32 flags
buffer flags: VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index, VLIB_BUFFER_IS_TRACED: trace this buffer.
static void tcp_check_if_gso(tcp_connection_t *tc, vlib_buffer_t *b)
static void tcp_check_sack_reneging(tcp_connection_t *tc)
#define TCP_RXT_MAX_BURST
static int tcp_send_acks(tcp_connection_t *tc, u32 max_burst_size)
u16 ip4_tcp_compute_checksum_custom(vlib_main_t *vm, vlib_buffer_t *p0, ip46_address_t *src, ip46_address_t *dst)
static void tcp_persist_timer_set(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
#define TCP_OPTION_LEN_SACK_PERMITTED
static u32 tcp_time_now(void)
void tcp_timer_persist_handler(tcp_connection_t *tc)
Got 0 snd_wnd from peer, try to do something about it.
vl_api_wireguard_peer_flags_t flags
static void vlib_buffer_free(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Free buffers Frees the entire buffer chain for each buffer.
static tcp_connection_t * tcp_connection_get(u32 conn_index, u32 thread_index)
#define tcp_node_index(node_id, is_ip4)
int session_tx_fifo_peek_bytes(transport_connection_t *tc, u8 *buffer, u32 offset, u32 max_bytes)
void session_queue_run_on_main_thread(vlib_main_t *vm)
static void tcp_make_synack(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to SYN-ACK.
#define tcp_opts_tstamp(_to)
i16 current_data
signed offset in data[], pre_data[] that we are currently processing.
vl_api_ip_port_and_mask_t dst_port
void tcp_timer_retransmit_syn_handler(tcp_connection_t *tc)
SYN retransmit timer handler.
#define clib_memcpy_fast(a, b, c)
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
static int tcp_do_retransmit(tcp_connection_t *tc, u32 max_burst_size)
u32 fib_table_get_index_for_sw_if_index(fib_protocol_t proto, u32 sw_if_index)
Get the index of the FIB bound to the interface.
#define tcp_fastrecovery_first_off(tc)
static u32 tcp_options_write(u8 *data, tcp_options_t *opts)
Write TCP options to segment.
void session_add_self_custom_tx_evt(transport_connection_t *tc, u8 has_prio)
void tcp_timer_retransmit_handler(tcp_connection_t *tc)
struct _tcp_main tcp_main_t
static void * vlib_buffer_push_tcp_net_order(vlib_buffer_t *b, u16 sp, u16 dp, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
static sack_scoreboard_hole_t * scoreboard_get_hole(sack_scoreboard_t *sb, u32 index)
void tcp_connection_timers_reset(tcp_connection_t *tc)
Stop all connection timers.
This packet is to be rewritten and forwarded to the next processing node.
u16 current_length
Nbytes between current data and the end of this buffer.
static int tcp_transmit_unsent(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
struct _tcp_connection tcp_connection_t
static u32 tcp_set_time_now(tcp_worker_ctx_t *wrk)
static u32 tcp_initial_wnd_unscaled(tcp_connection_t *tc)
TCP's initial window.
enum _tcp_output_next tcp_output_next_t
static void tcp_cc_congestion(tcp_connection_t *tc)
static ip_csum_t ip_csum_with_carry(ip_csum_t sum, ip_csum_t x)
#define TCP_RTO_SYN_RETRIES
#define VLIB_NODE_FN(node)
static void tcp_push_ip_hdr(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, vlib_buffer_t *b)
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
static uword tcp46_send_reset_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame, u8 is_ip4)
vlib_error_t * errors
Vector of errors for this node.
static uword vlib_buffer_length_in_chain(vlib_main_t *vm, vlib_buffer_t *b)
Get length in bytes of the buffer chain.
static u8 tcp_is_descheduled(tcp_connection_t *tc)
u8 n_sack_blocks
Number of SACKs blocks.
struct _sack_scoreboard sack_scoreboard_t
struct _tcp_header tcp_header_t
int tcp_half_open_connection_cleanup(tcp_connection_t *tc)
Try to cleanup half-open connection.
#define scoreboard_rescue_rxt_valid(_sb, _tc)
#define tcp_in_cong_recovery(tc)
u8 wscale
Option flags, see above.
enum fib_protocol_t_ fib_protocol_t
Protocol Type.
#define TCP_OPTS_MAX_SACK_BLOCKS
#define foreach_tcp4_reset_next
static u32 tcp_prepare_retransmit_segment(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 offset, u32 max_deq_bytes, vlib_buffer_t **b)
Build a retransmit segment.
static u16 ip_calculate_l4_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip_csum_t sum0, u32 payload_length, u8 *iph, u32 ip_header_size, u8 *l4h)
int tcp_session_custom_tx(void *conn, transport_send_params_t *sp)
void session_transport_closing_notify(transport_connection_t *tc)
Notification from transport that connection is being closed.
static uword tcp46_output_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, int is_ip4)
static void * tcp_init_buffer(vlib_main_t *vm, vlib_buffer_t *b)
static ip_adjacency_t * adj_get(adj_index_t adj_index)
Get a pointer to an adjacency object from its index.
void tcp_make_syn(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to SYN.
static int tcp_prepare_segment(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 offset, u32 max_deq_bytes, vlib_buffer_t **b)
Allocate a new buffer and build a new tcp segment.
static void tcp_make_fin(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to FIN-ACK.
#define TCP_OPTION_LEN_SACK_BLOCK
#define seq_leq(_s1, _s2)
#define vlib_prefetch_buffer_header(b, type)
Prefetch buffer metadata.
static void tcp_retransmit_timer_force_update(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
static void * ip4_next_header(ip4_header_t *i)
#define tcp_zero_rwnd_sent(tc)
sack_block_t * sacks
SACK blocks.
static sack_scoreboard_hole_t * scoreboard_first_hole(sack_scoreboard_t *sb)
static tcp_header_t * tcp_buffer_hdr(vlib_buffer_t *b)
static void tcp46_output_trace_frame(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *to_next, u32 n_bufs)
#define tcp_validate_txf_size(_tc, _a)
static void tcp_push_hdr_i(tcp_connection_t *tc, vlib_buffer_t *b, u32 snd_nxt, u8 compute_opts, u8 maybe_burst, u8 update_snd_nxt)
Push TCP header and update connection variables.
#define tcp_in_fastrecovery(tc)
#define tcp_csum_offload(tc)
static u32 vlib_get_buffer_index(vlib_main_t *vm, void *p)
Translate buffer pointer into buffer index.
static void tcp_retransmit_timer_update(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
u32 tcp_session_push_header(transport_connection_t *tconn, vlib_buffer_t *b)
#define TCP_OPTION_LEN_WINDOW_SCALE
vlib_node_registration_t tcp6_reset_node
(constructor) VLIB_REGISTER_NODE (tcp6_reset_node)
void scoreboard_clear_reneging(sack_scoreboard_t *sb, u32 start, u32 end)
vlib_error_t error
Error code for buffers to be enqueued to error handler.
#define tcp_trajectory_add_start(b, start)
#define TRANSPORT_MAX_HDRS_LEN
static void tcp_retransmit_timer_set(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
vlib_main_t * vm
convenience pointer to this thread's vlib main
void tcp_send_reset(tcp_connection_t *tc)
Build and set reset packet for connection.
void tcp_send_synack(tcp_connection_t *tc)
#define ADJ_INDEX_INVALID
Invalid ADJ index - used when no adj is known likewise blazoned capitals INVALID speak volumes where ...
static int tcp_make_synack_options(tcp_connection_t *tc, tcp_options_t *opts)
static int tcp_make_syn_options(tcp_connection_t *tc, tcp_options_t *opts)
static void * vlib_buffer_make_headroom(vlib_buffer_t *b, u8 size)
Make head room, typically for packet headers.
static int tcp_retransmit_sack(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Do retransmit with SACKs.
void tcp_connection_tx_pacer_reset(tcp_connection_t *tc, u32 window, u32 start_bucket)
static void tcp_cc_loss(tcp_connection_t *tc)
format_function_t format_tcp_connection_id
static __clib_warn_unused_result u32 vlib_buffer_alloc(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Allocate buffers into supplied array.
#define TCP_DUPACK_THRESHOLD
static void * vlib_buffer_get_current(vlib_buffer_t *b)
Get pointer to current data to process.
#define TCP_ESTABLISH_TIME
static void tcp_output_handle_link_local(tcp_connection_t *tc0, vlib_buffer_t *b0, u16 *next0, u32 *error0)
tcp_timer_wheel_t timer_wheel
worker timer wheel
#define foreach_tcp6_output_next
void tcp_program_dupack(tcp_connection_t *tc)
int tcp_fastrecovery_prr_snd_space(tcp_connection_t *tc)
Estimate send space using proportional rate reduction (RFC6937)
static u8 tcp_window_compute_scale(u32 window)
#define vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, bi0, next0)
Finish enqueueing one buffer forward in the graph.
#define vlib_get_next_frame(vm, node, next_index, vectors, n_vectors_left)
Get pointer to next frame vector data by (vlib_node_runtime_t, next_index).
#define TCP_OPTION_LEN_TIMESTAMP
#define foreach_tcp4_output_next
static void tcp_enqueue_to_ip_lookup(tcp_worker_ctx_t *wrk, vlib_buffer_t *b, u32 bi, u8 is_ip4, u32 fib_index)
static void vlib_node_increment_counter(vlib_main_t *vm, u32 node_index, u32 counter_index, u64 increment)
#define TCP_DBG(_fmt, _args...)
#define TCP_MAX_WND_SCALE
void tcp_program_cleanup(tcp_worker_ctx_t *wrk, tcp_connection_t *tc)
static void tcp_output_handle_packet(tcp_connection_t *tc0, vlib_buffer_t *b0, vlib_node_runtime_t *error_node, u16 *next0, u8 is_ip4)
vl_api_ip_port_and_mask_t src_port
static void tcp_cc_event(tcp_connection_t *tc, tcp_cc_event_t evt)
void transport_connection_reschedule(transport_connection_t *tc)
static u32 tcp_flight_size(const tcp_connection_t *tc)
Our estimate of the number of bytes in flight (pipe size)
This packet matches an "incomplete adjacency" and packets need to be passed to ARP to find rewrite st...
void tcp_bt_track_tx(tcp_connection_t *tc, u32 len)
Track a tcp tx burst.
#define VLIB_REGISTER_NODE(x,...)
static sack_scoreboard_hole_t * scoreboard_last_hole(sack_scoreboard_t *sb)
#define seq_max(_s1, _s2)
#define CLIB_PREFETCH(addr, size, type)
static_always_inline void vlib_buffer_enqueue_to_next(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 *nexts, uword count)
void tcp_send_window_update_ack(tcp_connection_t *tc)
Send window update ack.
sll srl srl sll sra u16x4 i
void tcp_program_retransmit(tcp_connection_t *tc)
void tcp_send_reset_w_pkt(tcp_connection_t *tc, vlib_buffer_t *pkt, u32 thread_index, u8 is_ip4)
Send reset without reusing existing buffer.
format_function_t format_tcp_state
static void tcp_update_rto(tcp_connection_t *tc)
#define clib_warning(format, args...)
#define tcp_in_recovery(tc)
struct _transport_connection transport_connection_t
#define TCP_TO_TIMER_TICK
Factor for converting ticks to timer ticks.
static u32 tcp_window_to_advertise(tcp_connection_t *tc, tcp_state_t state)
Compute and return window to advertise, scaled as per RFC1323.
u32 adj_index_t
An index for adjacencies.
void vlib_put_next_frame(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, u32 n_vectors_left)
Release pointer to next frame vector data.
#define tcp_zero_rwnd_sent_on(tc)
u16 mss
Maximum segment size advertised.
static u32 tcp_available_cc_snd_space(const tcp_connection_t *tc)
Estimate of how many bytes we can still push into the network.
static void * ip6_next_header(ip6_header_t *i)
static int tcp_retransmit_no_sack(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Fast retransmit without SACK info.
vlib_main_t vlib_node_runtime_t * node
static void * vlib_buffer_push_tcp(vlib_buffer_t *b, u16 sp_net, u16 dp_net, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
static void tcp_make_ack(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to ACK.
static u32 transport_max_tx_dequeue(transport_connection_t *tc)
#define seq_geq(_s1, _s2)
u16 ip6_tcp_udp_icmp_compute_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip6_header_t *ip0, int *bogus_lengthp)
static int tcp_make_established_options(tcp_connection_t *tc, tcp_options_t *opts)
void tcp_bt_check_app_limited(tcp_connection_t *tc)
Check if sample to be generated is app limited.
u16 cached_next_index
Next frame index that vector arguments were last enqueued to last time this node ran.
static void tcp_cc_init_rxt_timeout(tcp_connection_t *tc)
Reset congestion control, switch cwnd to loss window and try again.
static void tcp_output_push_ip(vlib_main_t *vm, vlib_buffer_t *b0, tcp_connection_t *tc0, u8 is_ip4)
#define tcp_recovery_on(tc)
static u8 * format_tcp_tx_trace(u8 *s, va_list *args)
u16 ip4_tcp_udp_compute_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip4_header_t *ip0)
void transport_connection_tx_pacer_reset_bucket(transport_connection_t *tc, u32 bucket)
Reset tx pacer bucket.
void tcp_bt_track_rxt(tcp_connection_t *tc, u32 start, u32 end)
Track a tcp retransmission.
void tcp_update_burst_snd_vars(tcp_connection_t *tc)
Update burst send vars.
#define TRANSPORT_PACER_MIN_BURST
static uword ip6_address_is_link_local_unicast(const ip6_address_t *a)
#define clib_mem_unaligned(pointer, type)
static void tcp_update_rcv_wnd(tcp_connection_t *tc)
struct _sack_scoreboard_hole sack_scoreboard_hole_t
void tcp_send_fin(tcp_connection_t *tc)
Send FIN.
void tcp_send_ack(tcp_connection_t *tc)
void transport_connection_tx_pacer_update_bytes(transport_connection_t *tc, u32 bytes)
int tcp_retransmit_first_unacked(tcp_worker_ctx_t *wrk, tcp_connection_t *tc)
Retransmit first unacked segment.
template key/value backing page structure
#define tcp_opts_wscale(_to)
u32 tsval
Timestamp value.
static void tcp_timer_update(tcp_timer_wheel_t *tw, tcp_connection_t *tc, u8 timer_id, u32 interval)
u32 tsecr
Echoed/reflected time stamp.
static void * vlib_buffer_push_ip6(vlib_main_t *vm, vlib_buffer_t *b, ip6_address_t *src, ip6_address_t *dst, int proto)
Push IPv6 header to buffer.
#define tcp_fastrecovery_first(tc)
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
static u8 tcp_max_tx_deq(tcp_connection_t *tc)
ip_lookup_next_t lookup_next_index
Next hop after ip4-lookup.
u32 next_buffer
Next buffer for this linked-list of buffers.
vlib_main_t vlib_node_runtime_t vlib_frame_t * frame
#define foreach_tcp6_reset_next
#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b)
static tcp_worker_ctx_t * tcp_get_worker(u32 thread_index)
void session_transport_closed_notify(transport_connection_t *tc)
Notification from transport that it is closed.
VLIB buffer representation.
int session_stream_connect_notify(transport_connection_t *tc, session_error_t err)
static void * vlib_frame_vector_args(vlib_frame_t *f)
Get pointer to frame vector data.
static void tcp_make_ack_i(tcp_connection_t *tc, vlib_buffer_t *b, tcp_state_t state, u8 flags)
Prepare ACK.
static int tcp_make_reset_in_place(vlib_main_t *vm, vlib_buffer_t *b, u8 is_ip4)
#define TCP_OPTION_LEN_MSS
sack_scoreboard_hole_t * scoreboard_next_rxt_hole(sack_scoreboard_t *sb, sack_scoreboard_hole_t *start, u8 have_unsent, u8 *can_rescue, u8 *snd_limited)
Figure out the next hole to retransmit.
u16 ip6_tcp_compute_checksum_custom(vlib_main_t *vm, vlib_buffer_t *p0, ip46_address_t *src, ip46_address_t *dst)
static f64 tcp_time_now_us(u32 thread_index)
void scoreboard_init_rxt(sack_scoreboard_t *sb, u32 snd_una)
static void tcp_connection_set_state(tcp_connection_t *tc, tcp_state_t state)
struct clib_bihash_value offset
template key/value backing page structure
u32 tcp_initial_window_to_advertise(tcp_connection_t *tc)
Compute initial window and scale factor.
#define TCP_USE_SACKS
Disable only for testing.
vl_api_dhcp_client_state_t state
static u32 vlib_num_workers()
void tcp_connection_cleanup(tcp_connection_t *tc)
Cleans up connection state.
static u32 tcp_buffer_len(vlib_buffer_t *b)
static u8 tcp_retransmit_should_retry_head(tcp_connection_t *tc, sack_scoreboard_t *sb)
void tcp_send_syn(tcp_connection_t *tc)
Send SYN.
vlib_node_registration_t tcp6_output_node
(constructor) VLIB_REGISTER_NODE (tcp6_output_node)
u16 flags
Copy of main node flags.
void * vlib_add_trace(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_buffer_t *b, u32 n_data_bytes)
static u8 tcp_timer_is_active(tcp_connection_t *tc, tcp_timers_e timer)
static u16 tcp_compute_checksum(tcp_connection_t *tc, vlib_buffer_t *b)
enum _tcp_reset_next tcp_reset_next_t
static u32 transport_max_rx_enqueue(transport_connection_t *tc)
#define tcp_opts_sack_permitted(_to)
static void vlib_buffer_free_one(vlib_main_t *vm, u32 buffer_index)
Free one buffer Shorthand to free a single buffer chain.
tcp_connection_t tcp_connection
static u32 tcp_tstamp(tcp_connection_t *tc)
Generate timestamp for tcp connection.
void tcp_program_ack(tcp_connection_t *tc)
static void * vlib_buffer_push_ip6_custom(vlib_main_t *vm, vlib_buffer_t *b, ip6_address_t *src, ip6_address_t *dst, int proto, u32 flow_label)
Push IPv6 header to buffer.
static void * tcp_reuse_buffer(vlib_main_t *vm, vlib_buffer_t *b)
static_always_inline void vlib_get_buffers(vlib_main_t *vm, u32 *bi, vlib_buffer_t **b, int count)
Translate array of buffer indices into buffer pointers.
vlib_node_registration_t tcp4_reset_node
(constructor) VLIB_REGISTER_NODE (tcp4_reset_node)
#define VLIB_NODE_FLAG_TRACE
static uword round_down_pow2(uword x, uword pow2)
vlib_node_registration_t tcp4_output_node
(constructor) VLIB_REGISTER_NODE (tcp4_output_node)
#define CLIB_CACHE_LINE_BYTES
u32 total_length_not_including_first_buffer
Only valid for first buffer in chain.
static void tcp_enqueue_to_output(tcp_worker_ctx_t *wrk, vlib_buffer_t *b, u32 bi, u8 is_ip4)
static tcp_main_t * vnet_get_tcp_main()
u32 tco_next_node[2]
Session layer edge indices to tcp output.
static char * tcp_error_strings[]
static void * vlib_buffer_push_ip4(vlib_main_t *vm, vlib_buffer_t *b, ip4_address_t *src, ip4_address_t *dst, int proto, u8 csum_offload)
Push IPv4 header to buffer.
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
enum _tcp_state tcp_state_t
u32 transport_connection_tx_pacer_burst(transport_connection_t *tc)
Get tx pacer max burst.
vl_api_interface_index_t sw_if_index
#define tcp_worker_stats_inc(_wrk, _stat, _val)
#define tcp_zero_rwnd_sent_off(tc)
u32 * tx_buffers
tx buffer free list
static void session_add_pending_tx_buffer(u32 thread_index, u32 bi, u32 next_node)
Add session node pending buffer with custom node.
adj_index_t adj_nbr_find(fib_protocol_t nh_proto, vnet_link_t link_type, const ip46_address_t *nh_addr, u32 sw_if_index)
Lookup neighbor adjancency.
#define TCP_EVT(_evt, _args...)
static int tcp_make_options(tcp_connection_t *tc, tcp_options_t *opts, tcp_state_t state)
static uword pool_elts(void *v)
Number of active elements in a pool.