20 typedef enum _tcp_output_next
29 #define foreach_tcp4_output_next \ 30 _ (DROP, "error-drop") \ 31 _ (IP_LOOKUP, "ip4-lookup") \ 32 _ (IP_REWRITE, "ip4-rewrite") \ 35 #define foreach_tcp6_output_next \ 36 _ (DROP, "error-drop") \ 37 _ (IP_LOOKUP, "ip6-lookup") \ 38 _ (IP_REWRITE, "ip6-rewrite") \ 39 _ (IP_ARP, "ip6-discover-neighbor") 42 #define tcp_error(n,s) s, 69 #ifndef CLIB_MARCH_VARIANT 105 if (tc->state != TCP_STATE_SYN_RCVD ||
tcp_opts_wscale (&tc->rcv_opts))
116 u32 available_space, wnd;
133 observed_wnd = (
i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
139 TCP_EVT (TCP_EVT_RCV_WND_SHRUNK, tc, observed_wnd, available_space);
143 wnd = available_space;
149 if (wnd && tc->rcv_wscale)
161 if (state < TCP_STATE_ESTABLISHED)
165 return tc->rcv_wnd >> tc->rcv_wscale;
173 opts->
flags |= TCP_OPTS_FLAG_MSS;
177 opts->
flags |= TCP_OPTS_FLAG_WSCALE;
178 opts->
wscale = tc->rcv_wscale;
181 opts->
flags |= TCP_OPTS_FLAG_TSTAMP;
188 opts->
flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
202 opts->
flags |= TCP_OPTS_FLAG_MSS;
208 opts->
flags |= TCP_OPTS_FLAG_WSCALE;
209 opts->
wscale = tc->rcv_wscale;
215 opts->
flags |= TCP_OPTS_FLAG_TSTAMP;
217 opts->
tsecr = tc->tsval_recent;
223 opts->
flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
241 opts->
flags |= TCP_OPTS_FLAG_TSTAMP;
243 opts->
tsecr = tc->tsval_recent;
250 opts->
flags |= TCP_OPTS_FLAG_SACK;
251 if (tc->snd_sack_pos >=
vec_len (tc->snd_sacks))
252 tc->snd_sack_pos = 0;
253 opts->
sacks = &tc->snd_sacks[tc->snd_sack_pos];
273 case TCP_STATE_ESTABLISHED:
274 case TCP_STATE_CLOSE_WAIT:
275 case TCP_STATE_FIN_WAIT_1:
276 case TCP_STATE_LAST_ACK:
277 case TCP_STATE_CLOSING:
278 case TCP_STATE_FIN_WAIT_2:
279 case TCP_STATE_TIME_WAIT:
280 case TCP_STATE_CLOSED:
282 case TCP_STATE_SYN_RCVD:
284 case TCP_STATE_SYN_SENT:
310 TCP_STATE_ESTABLISHED);
313 tc->snd_mss =
clib_min (tc->mss, tc->rcv_opts.mss) - tc->snd_opts_len;
321 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
324 if (tc->snd_una == tc->snd_nxt)
336 if (b->
flags & VLIB_BUFFER_NEXT_PRESENT)
339 b->
flags &= VLIB_BUFFER_NEXT_PRESENT - 1;
349 #ifndef CLIB_MARCH_VARIANT 353 ASSERT ((b->
flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
354 b->
flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
367 ip46_address_t *
src, ip46_address_t *
dst)
370 u16 payload_length_host_byte_order;
375 clib_host_to_net_u16 (IP_PROTOCOL_TCP);
378 for (i = 0; i <
ARRAY_LEN (src->ip6.as_uword); i++)
387 payload_length_host_byte_order, NULL, 0,
393 ip46_address_t *
src, ip46_address_t *
dst)
396 u32 payload_length_host_byte_order;
400 clib_host_to_net_u32 (payload_length_host_byte_order +
401 (IP_PROTOCOL_TCP << 16));
407 payload_length_host_byte_order, NULL, 0,
415 if (
PREDICT_FALSE (tc->cfg_flags & TCP_CFG_F_NO_CSUM_OFFLOAD))
422 (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
425 (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
429 b->
flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
442 u8 tcp_opts_len, tcp_hdr_opts_len;
450 tcp_hdr_opts_len = tcp_opts_len +
sizeof (
tcp_header_t);
453 tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd);
459 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
474 TCP_EVT (TCP_EVT_ACK_SENT, tc);
475 tc->rcv_las = tc->rcv_nxt;
493 u8 tcp_hdr_opts_len, tcp_opts_len;
503 tcp_hdr_opts_len = tcp_opts_len +
sizeof (
tcp_header_t);
508 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
520 u8 tcp_opts_len, tcp_hdr_opts_len;
527 tcp_hdr_opts_len = tcp_opts_len +
sizeof (
tcp_header_t);
530 tc->rcv_nxt, tcp_hdr_opts_len,
534 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
540 u8 is_ip4,
u32 fib_index)
545 b->
flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
554 tm->ipl_next_node[!is_ip4]);
564 b->
flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
580 ip6_address_t src_ip6, dst_ip6;
604 src_port = th->src_port;
605 dst_port = th->dst_port;
616 seq = th->ack_number;
622 tmp = clib_net_to_host_u32 (th->seq_number);
624 ack = clib_host_to_net_u32 (tmp + len);
650 #ifndef CLIB_MARCH_VARIANT 658 u32 thread_index,
u8 is_ip4)
697 seq = pkt_th->ack_number;
698 ack = (tc->state >= TCP_STATE_SYN_RCVD) ? tc->rcv_nxt : 0;
704 ack = clib_host_to_net_u32 (
vnet_buffer (pkt)->tcp.seq_end);
708 seq, ack, tcp_hdr_len, flags, 0);
727 tc->ipv6_flow_label);
733 TCP_EVT (TCP_EVT_RST_SENT, tc);
735 TCP_ERROR_RST_SENT, 1);
749 u16 tcp_hdr_opts_len, advertise_wnd, opts_write_len;
758 tcp_hdr_opts_len = tc->snd_opts_len +
sizeof (
tcp_header_t);
759 advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
762 tc->rcv_nxt, tcp_hdr_opts_len, flags,
766 ASSERT (opts_write_len == tc->snd_opts_len);
767 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
769 TCP_EVT (TCP_EVT_RST_SENT, tc);
771 TCP_ERROR_RST_SENT, 1);
786 IP_PROTOCOL_TCP, tc->ipv6_flow_label);
824 tc->rtt_seq = tc->snd_nxt;
829 TCP_EVT (TCP_EVT_SYN_SENT, tc);
853 TCP_EVT (TCP_EVT_SYNACK_SENT, tc);
868 fin_snt = tc->
flags & TCP_CONN_FINSNT;
880 tc->flags |= TCP_CONN_FINSNT;
885 if ((tc->flags & TCP_CONN_SNDACK) && !tc->pending_dupacks)
886 tc->flags &= ~TCP_CONN_SNDACK;
893 TCP_EVT (TCP_EVT_FIN_SENT, tc);
898 tc->flags |= TCP_CONN_FINSNT;
899 tc->flags &= ~TCP_CONN_FINPNDG;
900 tc->snd_una_max =
seq_max (tc->snd_una_max, tc->snd_nxt);
910 u8 compute_opts,
u8 maybe_burst,
u8 update_snd_nxt)
913 u32 advertise_wnd, data_len;
922 vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
927 tcp_hdr_opts_len = tc->snd_opts_len +
sizeof (
tcp_header_t);
930 advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
936 if (
seq_geq (tc->psh_seq, snd_nxt)
937 &&
seq_lt (tc->psh_seq, snd_nxt + data_len))
938 flags |= TCP_FLAG_PSH;
941 tc->rcv_nxt, tcp_hdr_opts_len, flags,
947 tm->wrk_ctx[tc->c_thread_index].cached_opts,
953 ASSERT (len == tc->snd_opts_len);
961 tc->snd_nxt += data_len;
962 tc->rcv_las = tc->rcv_nxt;
964 tc->bytes_out += data_len;
965 tc->data_segs_out += 1;
986 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
992 tc->snd_una_max =
seq_max (tc->snd_nxt, tc->snd_una_max);
998 tc->rtt_seq = tc->snd_nxt;
1032 if (!(tc->flags & TCP_CONN_SNDACK))
1035 tc->flags |= TCP_CONN_SNDACK;
1042 if (!(tc->flags & TCP_CONN_SNDACK))
1045 tc->flags |= TCP_CONN_SNDACK;
1047 if (tc->pending_dupacks < 255)
1048 tc->pending_dupacks += 1;
1054 if (!(tc->flags & TCP_CONN_RXT_PENDING))
1057 tc->flags |= TCP_CONN_RXT_PENDING;
1085 if (tc->rcv_wnd >=
tcp_cfg.rwnd_min_update_ack * tc->snd_mss)
1135 ASSERT (n_bytes == max_deq_bytes);
1143 u32 chain_bi = ~0, n_bufs_per_seg, n_bufs;
1144 u16 n_peeked, len_to_deq;
1149 n_bufs_per_seg = ceil ((
double) seg_size / bytes_per_buffer);
1164 TRANSPORT_MAX_HDRS_LEN);
1166 b[0]->
flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
1168 max_deq_bytes -= n_bytes;
1171 for (i = 1; i < n_bufs_per_seg; i++)
1174 len_to_deq =
clib_min (max_deq_bytes, bytes_per_buffer);
1182 ASSERT (n_peeked == len_to_deq);
1183 n_bytes += n_peeked;
1189 prev_b->
flags |= VLIB_BUFFER_NEXT_PRESENT;
1191 max_deq_bytes -= n_peeked;
1206 ASSERT (((*b)->current_data + (*b)->current_length) <= bytes_per_buffer);
1222 u32 start, available_bytes;
1225 ASSERT (tc->state >= TCP_STATE_ESTABLISHED);
1226 ASSERT (max_deq_bytes != 0);
1232 ASSERT (available_bytes >= offset);
1233 available_bytes -=
offset;
1234 if (!available_bytes)
1237 max_deq_bytes =
clib_min (tc->snd_mss, max_deq_bytes);
1238 max_deq_bytes =
clib_min (available_bytes, max_deq_bytes);
1240 start = tc->snd_una +
offset;
1247 tc->snd_rxt_bytes += n_bytes;
1249 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1252 tc->bytes_retrans += n_bytes;
1253 tc->segs_retrans += 1;
1255 TCP_EVT (TCP_EVT_CC_RTX, tc, offset, n_bytes);
1267 if (!sb->is_reneging && (!hole || hole->start == tc->snd_una))
1279 TCP_EVT (TCP_EVT_CC_EVT, tc, 6);
1281 tc->prev_ssthresh = tc->ssthresh;
1282 tc->prev_cwnd = tc->cwnd;
1293 tc->cwnd_acc_bytes = 0;
1294 tc->tr_occurences += 1;
1313 if (tc->state == TCP_STATE_CLOSED)
1316 if (tc->state >= TCP_STATE_ESTABLISHED)
1318 TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1321 if (tc->flags & TCP_CONN_FINSNT)
1331 if ((tc->rto_boff == 0 && tc->snd_una == tc->snd_nxt)
1332 || (tc->rto_boff > 0 &&
seq_geq (tc->snd_una, tc->snd_congestion)
1367 tc->snd_congestion = tc->snd_nxt;
1371 n_bytes =
clib_min (tc->snd_mss, tc->snd_nxt - tc->snd_una);
1386 if (tc->rto_boff == 1)
1399 else if (tc->state == TCP_STATE_SYN_RCVD)
1401 TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1430 TCP_EVT (TCP_EVT_SYN_RXT, tc, 1);
1437 ASSERT (tc->state == TCP_STATE_CLOSED);
1460 if (tc->flags & TCP_CONN_HALF_OPEN_DONE)
1463 TCP_DBG (
"could not remove half-open connection");
1467 TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1494 TCP_EVT (TCP_EVT_SYN_RXT, tc, 0);
1512 u32 bi, max_snd_bytes, available_bytes,
offset;
1520 if (tc->state == TCP_STATE_CLOSED || tc->snd_wnd > tc->snd_mss
1521 || (tc->flags & TCP_CONN_FINSNT))
1522 goto update_scheduler;
1525 offset = tc->snd_nxt - tc->snd_una;
1529 if (!available_bytes)
1535 if (available_bytes <= offset)
1536 goto update_scheduler;
1556 max_snd_bytes =
clib_min (tc->snd_mss,
1562 || tc->snd_nxt == tc->snd_una_max
1563 || tc->rto_boff > 1));
1565 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1573 tc->snd_una_max =
seq_max (tc->snd_nxt, tc->snd_una_max);
1598 TCP_EVT (TCP_EVT_CC_EVT, tc, 1);
1614 u32 offset, n_segs = 0, n_written, bi, available_wnd;
1618 offset = tc->snd_nxt - tc->snd_una;
1619 available_wnd = tc->snd_wnd -
offset;
1620 burst_size =
clib_min (burst_size, available_wnd / tc->snd_mss);
1622 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1625 while (n_segs < burst_size)
1633 offset += n_written;
1636 if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1639 tc->snd_nxt += n_written;
1640 tc->snd_una_max =
seq_max (tc->snd_nxt, tc->snd_una_max);
1657 prr_out = tc->snd_rxt_bytes + (tc->snd_nxt - tc->snd_congestion);
1659 if (pipe > tc->ssthresh)
1661 space = ((int) tc->prr_delivered * ((
f64) tc->ssthresh / tc->prev_cwnd))
1667 limit =
clib_max ((
int) (tc->prr_delivered - prr_out), 0) + tc->snd_mss;
1668 space =
clib_min (tc->ssthresh - pipe, limit);
1670 space =
clib_max (space, prr_out ? 0 : tc->snd_mss);
1678 u32 tx_adv_sack = sb->high_sacked - tc->snd_congestion;
1679 f64 rr = (
f64) tc->ssthresh / tc->prev_cwnd;
1684 return (tx_adv_sack > (tc->snd_una - tc->prr_start) * rr);
1691 - (tc->snd_nxt - tc->snd_una));
1694 #define scoreboard_rescue_rxt_valid(_sb, _tc) \ 1695 (seq_geq (_sb->rescue_rxt, _tc->snd_una) \ 1696 && seq_leq (_sb->rescue_rxt, _tc->snd_congestion)) 1705 u32 n_written = 0,
offset, max_bytes, n_segs = 0;
1706 u8 snd_limited = 0, can_rescue = 0;
1707 u32 bi, max_deq, burst_bytes;
1717 burst_size =
clib_min (burst_size, burst_bytes / tc->snd_mss);
1729 if (snd_space < tc->snd_mss)
1736 &&
seq_gt (sb->high_sacked, tc->snd_congestion)
1737 && tc->rxt_head != tc->snd_una
1740 max_bytes =
clib_min (tc->snd_mss, tc->snd_congestion - tc->snd_una);
1751 tc->rxt_head = tc->snd_una;
1752 tc->rxt_delivered += n_written;
1753 tc->prr_delivered += n_written;
1754 ASSERT (tc->rxt_delivered <= tc->snd_rxt_bytes);
1759 TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
1763 max_deq -= tc->snd_nxt - tc->snd_una;
1765 while (snd_space > 0 && n_segs < burst_size)
1772 if (max_deq > tc->snd_mss)
1779 av_wnd = (int) tc->snd_wnd - (tc->snd_nxt - tc->snd_una);
1780 av_wnd =
clib_max (av_wnd - tc->snd_mss, 0);
1781 snd_space =
clib_min (snd_space, av_wnd);
1782 snd_space =
clib_min (max_deq, snd_space);
1783 burst_size =
clib_min (burst_size - n_segs,
1784 snd_space / tc->snd_mss);
1787 if (max_deq > n_segs_new * tc->snd_mss)
1790 n_segs += n_segs_new;
1804 max_bytes =
clib_min (tc->snd_mss, hole->end - hole->start);
1805 max_bytes =
clib_min (max_bytes, snd_space);
1806 offset = hole->end - tc->snd_una - max_bytes;
1812 sb->rescue_rxt = tc->snd_congestion;
1819 max_bytes =
clib_min (hole->end - sb->high_rxt, snd_space);
1820 max_bytes = snd_limited ?
clib_min (max_bytes, tc->snd_mss) : max_bytes;
1824 offset = sb->high_rxt - tc->snd_una;
1827 ASSERT (n_written <= snd_space);
1836 sb->high_rxt += n_written;
1839 snd_space -= n_written;
1859 u32 n_written = 0,
offset = 0, bi, max_deq, n_segs_now, max_bytes;
1860 u32 burst_bytes, sent_bytes;
1862 int snd_space, n_segs = 0;
1867 TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
1870 burst_size =
clib_min (burst_size, burst_bytes / tc->snd_mss);
1878 cc_limited = snd_space < burst_bytes;
1885 while (snd_space > 0 && n_segs < burst_size)
1888 tc->snd_congestion - tc->snd_una -
offset);
1900 snd_space -= n_written;
1905 if (n_segs == burst_size)
1911 if (snd_space < tc->snd_mss || tc->snd_mss == 0)
1915 max_deq -= tc->snd_nxt - tc->snd_una;
1918 snd_space =
clib_min (max_deq, snd_space);
1919 burst_size =
clib_min (burst_size - n_segs, snd_space / tc->snd_mss);
1921 if (n_segs_now && max_deq > n_segs_now * tc->snd_mss)
1923 n_segs += n_segs_now;
1929 sent_bytes =
clib_min (n_segs * tc->snd_mss, burst_bytes);
1930 sent_bytes = cc_limited ? burst_bytes : sent_bytes;
1941 if (!tc->pending_dupacks)
1944 || tc->state != TCP_STATE_ESTABLISHED)
1957 tc->dupacks_out += 1;
1958 tc->pending_dupacks = 0;
1963 tc->snd_sack_pos = 0;
1969 n_acks =
clib_min (n_acks, tc->pending_dupacks);
1971 for (j = 0; j <
clib_min (n_acks, max_burst_size); j++)
1974 if (n_acks < max_burst_size)
1976 tc->pending_dupacks = 0;
1977 tc->snd_sack_pos = 0;
1978 tc->dupacks_out += n_acks;
1983 TCP_DBG (
"constrained by burst size");
1984 tc->pending_dupacks = n_acks - max_burst_size;
1985 tc->dupacks_out += max_burst_size;
1987 return max_burst_size;
2018 tc->flags &= ~TCP_CONN_RXT_PENDING;
2022 if (!(tc->flags & TCP_CONN_SNDACK))
2025 tc->flags &= ~TCP_CONN_SNDACK;
2028 if (n_segs && !tc->pending_dupacks)
2045 u16 * next0,
u32 * error0)
2058 *error0 = TCP_ERROR_LINK_LOCAL_RW;
2070 *error0 = TCP_ERROR_LINK_LOCAL_RW;
2077 u32 * to_next,
u32 n_bufs)
2085 for (i = 0; i < n_bufs; i++)
2088 if (!(b->
flags & VLIB_BUFFER_IS_TRACED))
2112 IP_PROTOCOL_TCP, tc0->ipv6_flow_label);
2130 ASSERT ((b->
flags & VNET_BUFFER_F_L3_HDR_OFFSET_VALID) != 0);
2131 ASSERT ((b->
flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID) != 0);
2132 b->
flags |= VNET_BUFFER_F_GSO;
2145 if (tc0->next_node_index)
2147 *next0 = tc0->next_node_index;
2148 vnet_buffer (b0)->tcp.next_node_opaque = tc0->next_node_opaque;
2194 while (n_left_from >= 4)
2232 b[0]->
error = node->
errors[TCP_ERROR_INVALID_CONNECTION];
2243 b[1]->
error = node->
errors[TCP_ERROR_INVALID_CONNECTION];
2252 while (n_left_from > 0)
2256 if (n_left_from > 1)
2273 b[0]->
error = node->
errors[TCP_ERROR_INVALID_CONNECTION];
2303 .name =
"tcp4-output",
2305 .vector_size =
sizeof (
u32),
2311 #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n, 2323 .name =
"tcp6-output",
2325 .vector_size =
sizeof (
u32),
2331 #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n, 2340 typedef enum _tcp_reset_next
2347 #define foreach_tcp4_reset_next \ 2348 _(DROP, "error-drop") \ 2349 _(IP_LOOKUP, "ip4-lookup") 2351 #define foreach_tcp6_reset_next \ 2352 _(DROP, "error-drop") \ 2353 _(IP_LOOKUP, "ip6-lookup") 2360 u32 n_left_from, next_index, *from, *to_next;
2367 while (n_left_from > 0)
2373 while (n_left_from > 0 && n_left_to_next > 0)
2385 n_left_to_next -= 1;
2394 b0->
flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
2408 n_left_to_next, bi0, next0);
2429 .name =
"tcp4-reset",
2430 .vector_size =
sizeof (
u32),
2435 #define _(s,n) [TCP_RESET_NEXT_##s] = n, 2445 .name =
"tcp6-reset",
2446 .vector_size =
sizeof (
u32),
2451 #define _(s,n) [TCP_RESET_NEXT_##s] = n,
u32 flags
buffer flags: VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index, VLIB_BUFFER_IS_TRACED: trace this buffer.
static void tcp_check_if_gso(tcp_connection_t *tc, vlib_buffer_t *b)
static void tcp_check_sack_reneging(tcp_connection_t *tc)
#define TCP_RXT_MAX_BURST
static int tcp_send_acks(tcp_connection_t *tc, u32 max_burst_size)
u16 ip4_tcp_compute_checksum_custom(vlib_main_t *vm, vlib_buffer_t *p0, ip46_address_t *src, ip46_address_t *dst)
static void tcp_persist_timer_set(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
#define TCP_OPTION_LEN_SACK_PERMITTED
static u32 tcp_time_now(void)
void tcp_timer_persist_handler(tcp_connection_t *tc)
Got 0 snd_wnd from peer, try to do something about it.
static void vlib_buffer_free(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Free buffers Frees the entire buffer chain for each buffer.
static tcp_connection_t * tcp_connection_get(u32 conn_index, u32 thread_index)
#define tcp_node_index(node_id, is_ip4)
int session_tx_fifo_peek_bytes(transport_connection_t *tc, u8 *buffer, u32 offset, u32 max_bytes)
void session_queue_run_on_main_thread(vlib_main_t *vm)
static void tcp_make_synack(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to SYN-ACK.
#define tcp_opts_tstamp(_to)
i16 current_data
signed offset in data[], pre_data[] that we are currently processing.
void tcp_timer_retransmit_syn_handler(tcp_connection_t *tc)
SYN retransmit timer handler.
#define clib_memcpy_fast(a, b, c)
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
static int tcp_do_retransmit(tcp_connection_t *tc, u32 max_burst_size)
u32 fib_table_get_index_for_sw_if_index(fib_protocol_t proto, u32 sw_if_index)
Get the index of the FIB bound to the interface.
#define tcp_fastrecovery_first_off(tc)
static u32 tcp_options_write(u8 *data, tcp_options_t *opts)
Write TCP options to segment.
void session_add_self_custom_tx_evt(transport_connection_t *tc, u8 has_prio)
void tcp_timer_retransmit_handler(tcp_connection_t *tc)
struct _tcp_main tcp_main_t
static void * vlib_buffer_push_tcp_net_order(vlib_buffer_t *b, u16 sp, u16 dp, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
static sack_scoreboard_hole_t * scoreboard_get_hole(sack_scoreboard_t *sb, u32 index)
void tcp_connection_timers_reset(tcp_connection_t *tc)
Stop all connection timers.
This packet is to be rewritten and forwarded to the next processing node.
u16 current_length
Nbytes between current data and the end of this buffer.
static int tcp_transmit_unsent(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
struct _tcp_connection tcp_connection_t
static u32 tcp_set_time_now(tcp_worker_ctx_t *wrk)
static u32 tcp_initial_wnd_unscaled(tcp_connection_t *tc)
TCP's initial window.
enum _tcp_output_next tcp_output_next_t
static void tcp_cc_congestion(tcp_connection_t *tc)
static ip_csum_t ip_csum_with_carry(ip_csum_t sum, ip_csum_t x)
#define TCP_RTO_SYN_RETRIES
#define VLIB_NODE_FN(node)
static void tcp_push_ip_hdr(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, vlib_buffer_t *b)
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
static uword tcp46_send_reset_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame, u8 is_ip4)
vlib_error_t * errors
Vector of errors for this node.
static uword vlib_buffer_length_in_chain(vlib_main_t *vm, vlib_buffer_t *b)
Get length in bytes of the buffer chain.
static u8 tcp_is_descheduled(tcp_connection_t *tc)
u8 n_sack_blocks
Number of SACKs blocks.
struct _sack_scoreboard sack_scoreboard_t
struct _tcp_header tcp_header_t
int tcp_half_open_connection_cleanup(tcp_connection_t *tc)
Try to cleanup half-open connection.
#define scoreboard_rescue_rxt_valid(_sb, _tc)
#define tcp_in_cong_recovery(tc)
u8 wscale
Option flags, see above.
enum fib_protocol_t_ fib_protocol_t
Protocol Type.
#define TCP_OPTS_MAX_SACK_BLOCKS
#define foreach_tcp4_reset_next
static u32 tcp_prepare_retransmit_segment(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 offset, u32 max_deq_bytes, vlib_buffer_t **b)
Build a retransmit segment.
static u16 ip_calculate_l4_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip_csum_t sum0, u32 payload_length, u8 *iph, u32 ip_header_size, u8 *l4h)
int tcp_session_custom_tx(void *conn, transport_send_params_t *sp)
void session_transport_closing_notify(transport_connection_t *tc)
Notification from transport that connection is being closed.
static uword tcp46_output_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, int is_ip4)
static void * tcp_init_buffer(vlib_main_t *vm, vlib_buffer_t *b)
static ip_adjacency_t * adj_get(adj_index_t adj_index)
Get a pointer to an adjacency object from its index.
void tcp_make_syn(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to SYN.
static int tcp_prepare_segment(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 offset, u32 max_deq_bytes, vlib_buffer_t **b)
Allocate a new buffer and build a new tcp segment.
static void tcp_make_fin(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to FIN-ACK.
vl_api_interface_index_t sw_if_index
#define TCP_OPTION_LEN_SACK_BLOCK
#define seq_leq(_s1, _s2)
#define vlib_prefetch_buffer_header(b, type)
Prefetch buffer metadata.
static void tcp_retransmit_timer_force_update(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
static void * ip4_next_header(ip4_header_t *i)
#define tcp_zero_rwnd_sent(tc)
sack_block_t * sacks
SACK blocks.
static sack_scoreboard_hole_t * scoreboard_first_hole(sack_scoreboard_t *sb)
static tcp_header_t * tcp_buffer_hdr(vlib_buffer_t *b)
static void tcp46_output_trace_frame(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *to_next, u32 n_bufs)
#define tcp_validate_txf_size(_tc, _a)
static void tcp_push_hdr_i(tcp_connection_t *tc, vlib_buffer_t *b, u32 snd_nxt, u8 compute_opts, u8 maybe_burst, u8 update_snd_nxt)
Push TCP header and update connection variables.
#define tcp_in_fastrecovery(tc)
#define tcp_csum_offload(tc)
static u32 vlib_get_buffer_index(vlib_main_t *vm, void *p)
Translate buffer pointer into buffer index.
static void tcp_retransmit_timer_update(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
u32 tcp_session_push_header(transport_connection_t *tconn, vlib_buffer_t *b)
#define TCP_OPTION_LEN_WINDOW_SCALE
vlib_node_registration_t tcp6_reset_node
(constructor) VLIB_REGISTER_NODE (tcp6_reset_node)
void scoreboard_clear_reneging(sack_scoreboard_t *sb, u32 start, u32 end)
vlib_error_t error
Error code for buffers to be enqueued to error handler.
#define tcp_trajectory_add_start(b, start)
#define TRANSPORT_MAX_HDRS_LEN
static void tcp_retransmit_timer_set(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
vlib_main_t * vm
convenience pointer to this thread's vlib main
void tcp_send_reset(tcp_connection_t *tc)
Build and set reset packet for connection.
void tcp_send_synack(tcp_connection_t *tc)
#define ADJ_INDEX_INVALID
Invalid ADJ index - used when no adj is known likewise blazoned capitals INVALID speak volumes where ...
static int tcp_make_synack_options(tcp_connection_t *tc, tcp_options_t *opts)
static int tcp_make_syn_options(tcp_connection_t *tc, tcp_options_t *opts)
static void * vlib_buffer_make_headroom(vlib_buffer_t *b, u8 size)
Make head room, typically for packet headers.
static int tcp_retransmit_sack(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Do retransmit with SACKs.
void tcp_connection_tx_pacer_reset(tcp_connection_t *tc, u32 window, u32 start_bucket)
static void tcp_cc_loss(tcp_connection_t *tc)
format_function_t format_tcp_connection_id
static void * vlib_buffer_get_current(vlib_buffer_t *b)
Get pointer to current data to process.
#define TCP_ESTABLISH_TIME
static void tcp_output_handle_link_local(tcp_connection_t *tc0, vlib_buffer_t *b0, u16 *next0, u32 *error0)
tcp_timer_wheel_t timer_wheel
worker timer wheel
#define foreach_tcp6_output_next
void tcp_program_dupack(tcp_connection_t *tc)
int tcp_fastrecovery_prr_snd_space(tcp_connection_t *tc)
Estimate send space using proportional rate reduction (RFC6937)
static u8 tcp_window_compute_scale(u32 window)
#define vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, bi0, next0)
Finish enqueueing one buffer forward in the graph.
#define vlib_get_next_frame(vm, node, next_index, vectors, n_vectors_left)
Get pointer to next frame vector data by (vlib_node_runtime_t, next_index).
#define TCP_OPTION_LEN_TIMESTAMP
#define foreach_tcp4_output_next
static void tcp_enqueue_to_ip_lookup(tcp_worker_ctx_t *wrk, vlib_buffer_t *b, u32 bi, u8 is_ip4, u32 fib_index)
static void vlib_node_increment_counter(vlib_main_t *vm, u32 node_index, u32 counter_index, u64 increment)
#define TCP_DBG(_fmt, _args...)
#define TCP_MAX_WND_SCALE
void tcp_program_cleanup(tcp_worker_ctx_t *wrk, tcp_connection_t *tc)
static void tcp_output_handle_packet(tcp_connection_t *tc0, vlib_buffer_t *b0, vlib_node_runtime_t *error_node, u16 *next0, u8 is_ip4)
static void tcp_cc_event(tcp_connection_t *tc, tcp_cc_event_t evt)
void transport_connection_reschedule(transport_connection_t *tc)
static u32 tcp_flight_size(const tcp_connection_t *tc)
Our estimate of the number of bytes in flight (pipe size)
This packet matches an "incomplete adjacency" and packets need to be passed to ARP to find rewrite st...
void tcp_bt_track_tx(tcp_connection_t *tc, u32 len)
Track a tcp tx burst.
#define VLIB_REGISTER_NODE(x,...)
static sack_scoreboard_hole_t * scoreboard_last_hole(sack_scoreboard_t *sb)
#define seq_max(_s1, _s2)
#define CLIB_PREFETCH(addr, size, type)
static_always_inline void vlib_buffer_enqueue_to_next(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 *nexts, uword count)
void tcp_send_window_update_ack(tcp_connection_t *tc)
Send window update ack.
sll srl srl sll sra u16x4 i
void tcp_program_retransmit(tcp_connection_t *tc)
void tcp_send_reset_w_pkt(tcp_connection_t *tc, vlib_buffer_t *pkt, u32 thread_index, u8 is_ip4)
Send reset without reusing existing buffer.
format_function_t format_tcp_state
static void tcp_update_rto(tcp_connection_t *tc)
#define clib_warning(format, args...)
#define tcp_in_recovery(tc)
void tcp_timer_delack_handler(tcp_connection_t *tc)
Delayed ack timer handler.
struct _transport_connection transport_connection_t
#define TCP_TO_TIMER_TICK
Factor for converting ticks to timer ticks.
static u32 tcp_window_to_advertise(tcp_connection_t *tc, tcp_state_t state)
Compute and return window to advertise, scaled as per RFC1323.
u32 adj_index_t
An index for adjacencies.
void vlib_put_next_frame(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, u32 n_vectors_left)
Release pointer to next frame vector data.
#define tcp_zero_rwnd_sent_on(tc)
static uword round_pow2(uword x, uword pow2)
u16 mss
Maximum segment size advertised.
static u32 tcp_available_cc_snd_space(const tcp_connection_t *tc)
Estimate of how many bytes we can still push into the network.
vlib_main_t vlib_node_runtime_t * node
static void * ip6_next_header(ip6_header_t *i)
static int tcp_retransmit_no_sack(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Fast retransmit without SACK info.
static void * vlib_buffer_push_tcp(vlib_buffer_t *b, u16 sp_net, u16 dp_net, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
static void tcp_make_ack(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to ACK.
static u32 transport_max_tx_dequeue(transport_connection_t *tc)
#define seq_geq(_s1, _s2)
u16 ip6_tcp_udp_icmp_compute_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip6_header_t *ip0, int *bogus_lengthp)
static int tcp_make_established_options(tcp_connection_t *tc, tcp_options_t *opts)
void tcp_bt_check_app_limited(tcp_connection_t *tc)
Check if sample to be generated is app limited.
u16 cached_next_index
Next frame index that vector arguments were last enqueued to last time this node ran.
static void tcp_cc_init_rxt_timeout(tcp_connection_t *tc)
Reset congestion control, switch cwnd to loss window and try again.
static void tcp_output_push_ip(vlib_main_t *vm, vlib_buffer_t *b0, tcp_connection_t *tc0, u8 is_ip4)
#define tcp_recovery_on(tc)
static u8 * format_tcp_tx_trace(u8 *s, va_list *args)
u16 ip4_tcp_udp_compute_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip4_header_t *ip0)
void transport_connection_tx_pacer_reset_bucket(transport_connection_t *tc, u32 bucket)
Reset tx pacer bucket.
void tcp_bt_track_rxt(tcp_connection_t *tc, u32 start, u32 end)
Track a tcp retransmission.
void tcp_update_burst_snd_vars(tcp_connection_t *tc)
Update burst send vars.
#define TRANSPORT_PACER_MIN_BURST
static uword ip6_address_is_link_local_unicast(const ip6_address_t *a)
#define clib_mem_unaligned(pointer, type)
static void tcp_update_rcv_wnd(tcp_connection_t *tc)
struct _sack_scoreboard_hole sack_scoreboard_hole_t
void tcp_send_fin(tcp_connection_t *tc)
Send FIN.
void tcp_send_ack(tcp_connection_t *tc)
static void * vlib_add_trace(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_buffer_t *b, u32 n_data_bytes)
void transport_connection_tx_pacer_update_bytes(transport_connection_t *tc, u32 bytes)
int tcp_retransmit_first_unacked(tcp_worker_ctx_t *wrk, tcp_connection_t *tc)
Retransmit first unacked segment.
template key/value backing page structure
#define tcp_opts_wscale(_to)
u32 tsval
Timestamp value.
static void tcp_timer_update(tcp_timer_wheel_t *tw, tcp_connection_t *tc, u8 timer_id, u32 interval)
u32 tsecr
Echoed/reflected time stamp.
static void * vlib_buffer_push_ip6(vlib_main_t *vm, vlib_buffer_t *b, ip6_address_t *src, ip6_address_t *dst, int proto)
Push IPv6 header to buffer.
#define tcp_fastrecovery_first(tc)
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
static u8 tcp_max_tx_deq(tcp_connection_t *tc)
ip_lookup_next_t lookup_next_index
Next hop after ip4-lookup.
u32 next_buffer
Next buffer for this linked-list of buffers.
#define foreach_tcp6_reset_next
#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b)
static tcp_worker_ctx_t * tcp_get_worker(u32 thread_index)
void session_transport_closed_notify(transport_connection_t *tc)
Notification from transport that it is closed.
VLIB buffer representation.
int session_stream_connect_notify(transport_connection_t *tc, session_error_t err)
static void * vlib_frame_vector_args(vlib_frame_t *f)
Get pointer to frame vector data.
static void tcp_make_ack_i(tcp_connection_t *tc, vlib_buffer_t *b, tcp_state_t state, u8 flags)
Prepare ACK.
static int tcp_make_reset_in_place(vlib_main_t *vm, vlib_buffer_t *b, u8 is_ip4)
#define TCP_OPTION_LEN_MSS
sack_scoreboard_hole_t * scoreboard_next_rxt_hole(sack_scoreboard_t *sb, sack_scoreboard_hole_t *start, u8 have_unsent, u8 *can_rescue, u8 *snd_limited)
Figure out the next hole to retransmit.
u16 ip6_tcp_compute_checksum_custom(vlib_main_t *vm, vlib_buffer_t *p0, ip46_address_t *src, ip46_address_t *dst)
static f64 tcp_time_now_us(u32 thread_index)
void scoreboard_init_rxt(sack_scoreboard_t *sb, u32 snd_una)
static void tcp_connection_set_state(tcp_connection_t *tc, tcp_state_t state)
struct clib_bihash_value offset
template key/value backing page structure
u32 tcp_initial_window_to_advertise(tcp_connection_t *tc)
Compute initial window and scale factor.
#define TCP_USE_SACKS
Disable only for testing.
vl_api_dhcp_client_state_t state
static u32 vlib_num_workers()
void tcp_connection_cleanup(tcp_connection_t *tc)
Cleans up connection state.
static u32 tcp_buffer_len(vlib_buffer_t *b)
static u8 tcp_retransmit_should_retry_head(tcp_connection_t *tc, sack_scoreboard_t *sb)
void tcp_send_syn(tcp_connection_t *tc)
Send SYN.
vlib_node_registration_t tcp6_output_node
(constructor) VLIB_REGISTER_NODE (tcp6_output_node)
vlib_main_t vlib_node_runtime_t vlib_frame_t * frame
u16 flags
Copy of main node flags.
static u8 tcp_timer_is_active(tcp_connection_t *tc, tcp_timers_e timer)
static u16 tcp_compute_checksum(tcp_connection_t *tc, vlib_buffer_t *b)
enum _tcp_reset_next tcp_reset_next_t
static u32 transport_max_rx_enqueue(transport_connection_t *tc)
#define tcp_opts_sack_permitted(_to)
static void vlib_buffer_free_one(vlib_main_t *vm, u32 buffer_index)
Free one buffer Shorthand to free a single buffer chain.
tcp_connection_t tcp_connection
static u32 tcp_tstamp(tcp_connection_t *tc)
Generate timestamp for tcp connection.
void tcp_program_ack(tcp_connection_t *tc)
static void * vlib_buffer_push_ip6_custom(vlib_main_t *vm, vlib_buffer_t *b, ip6_address_t *src, ip6_address_t *dst, int proto, u32 flow_label)
Push IPv6 header to buffer.
static void * tcp_reuse_buffer(vlib_main_t *vm, vlib_buffer_t *b)
static_always_inline void vlib_get_buffers(vlib_main_t *vm, u32 *bi, vlib_buffer_t **b, int count)
Translate array of buffer indices into buffer pointers.
vlib_node_registration_t tcp4_reset_node
(constructor) VLIB_REGISTER_NODE (tcp4_reset_node)
#define VLIB_NODE_FLAG_TRACE
vlib_node_registration_t tcp4_output_node
(constructor) VLIB_REGISTER_NODE (tcp4_output_node)
#define CLIB_CACHE_LINE_BYTES
u32 total_length_not_including_first_buffer
Only valid for first buffer in chain.
static void tcp_enqueue_to_output(tcp_worker_ctx_t *wrk, vlib_buffer_t *b, u32 bi, u8 is_ip4)
static u32 vlib_buffer_alloc(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Allocate buffers into supplied array.
static tcp_main_t * vnet_get_tcp_main()
u32 tco_next_node[2]
Session layer edge indices to tcp output.
static char * tcp_error_strings[]
static void * vlib_buffer_push_ip4(vlib_main_t *vm, vlib_buffer_t *b, ip4_address_t *src, ip4_address_t *dst, int proto, u8 csum_offload)
Push IPv4 header to buffer.
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
enum _tcp_state tcp_state_t
u32 transport_connection_tx_pacer_burst(transport_connection_t *tc)
Get tx pacer max burst.
#define tcp_worker_stats_inc(_wrk, _stat, _val)
#define tcp_zero_rwnd_sent_off(tc)
u32 * tx_buffers
tx buffer free list
static void session_add_pending_tx_buffer(u32 thread_index, u32 bi, u32 next_node)
Add session node pending buffer with custom node.
adj_index_t adj_nbr_find(fib_protocol_t nh_proto, vnet_link_t link_type, const ip46_address_t *nh_addr, u32 sw_if_index)
Lookup neighbor adjancency.
#define TCP_EVT(_evt, _args...)
static int tcp_make_options(tcp_connection_t *tc, tcp_options_t *opts, tcp_state_t state)
static uword pool_elts(void *v)
Number of active elements in a pool.