FD.io VPP  v21.06-3-gbb25fbf28
Vector Packet Processing
tcp_output.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2019 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <vnet/tcp/tcp.h>
17 #include <vnet/tcp/tcp_inlines.h>
18 #include <math.h>
19 #include <vnet/ip/ip4_inlines.h>
20 #include <vnet/ip/ip6_inlines.h>
21 
22 typedef enum _tcp_output_next
23 {
30 
31 #define foreach_tcp4_output_next \
32  _ (DROP, "error-drop") \
33  _ (IP_LOOKUP, "ip4-lookup") \
34  _ (IP_REWRITE, "ip4-rewrite") \
35  _ (IP_ARP, "ip4-arp")
36 
37 #define foreach_tcp6_output_next \
38  _ (DROP, "error-drop") \
39  _ (IP_LOOKUP, "ip6-lookup") \
40  _ (IP_REWRITE, "ip6-rewrite") \
41  _ (IP_ARP, "ip6-discover-neighbor")
42 
43 static char *tcp_error_strings[] = {
44 #define tcp_error(n,s) s,
45 #include <vnet/tcp/tcp_error.def>
46 #undef tcp_error
47 };
48 
49 typedef struct
50 {
54 
55 static u8 *
56 format_tcp_tx_trace (u8 * s, va_list * args)
57 {
58  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
59  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
60  tcp_tx_trace_t *t = va_arg (*args, tcp_tx_trace_t *);
62  u32 indent = format_get_indent (s);
63 
64  s = format (s, "%U state %U\n%U%U", format_tcp_connection_id, tc,
65  format_tcp_state, tc->state, format_white_space, indent,
66  format_tcp_header, &t->tcp_header, 128);
67 
68  return s;
69 }
70 
71 #ifndef CLIB_MARCH_VARIANT
72 static u8
74 {
75  u8 wnd_scale = 0;
76  while (wnd_scale < TCP_MAX_WND_SCALE && (window >> wnd_scale) > TCP_WND_MAX)
77  wnd_scale++;
78  return wnd_scale;
79 }
80 
81 /**
82  * TCP's initial window
83  */
86 {
87  /* RFC 6928 recommends the value lower. However at the time our connections
88  * are initialized, fifos may not be allocated. Therefore, advertise the
89  * smallest possible unscaled window size and update once fifos are
90  * assigned to the session.
91  */
92  /*
93  tcp_update_rcv_mss (tc);
94  TCP_IW_N_SEGMENTS * tc->mss;
95  */
96  return tcp_cfg.min_rx_fifo;
97 }
98 
99 /**
100  * Compute initial window and scale factor. As per RFC1323, window field in
101  * SYN and SYN-ACK segments is never scaled.
102  */
103 u32
105 {
106  /* Compute rcv wscale only if peer advertised support for it */
107  if (tc->state != TCP_STATE_SYN_RCVD || tcp_opts_wscale (&tc->rcv_opts))
108  tc->rcv_wscale = tcp_window_compute_scale (tcp_cfg.max_rx_fifo);
109 
110  tc->rcv_wnd = tcp_initial_wnd_unscaled (tc);
111 
112  return clib_min (tc->rcv_wnd, TCP_WND_MAX);
113 }
114 
115 static inline void
117 {
118  u32 available_space, wnd;
119  i32 observed_wnd;
120 
121  /*
122  * Figure out how much space we have available
123  */
124  available_space = transport_max_rx_enqueue (&tc->connection);
125 
126  /*
127  * Use the above and what we know about what we've previously advertised
128  * to compute the new window
129  */
130  observed_wnd = (i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
131 
132  /* Check if we are about to retract the window. Do the comparison before
133  * rounding to avoid errors. Per RFC7323 sec. 2.4 we could remove this */
134  if (PREDICT_FALSE ((i32) available_space < observed_wnd))
135  {
136  wnd = round_down_pow2 (clib_max (observed_wnd, 0), 1 << tc->rcv_wscale);
137  TCP_EVT (TCP_EVT_RCV_WND_SHRUNK, tc, observed_wnd, available_space);
138  }
139  else
140  {
141  /* Make sure we have a multiple of 1 << rcv_wscale. We round down to
142  * avoid advertising a window larger than what can be buffered */
143  wnd = round_down_pow2 (available_space, 1 << tc->rcv_wscale);
144  }
145 
146  if (PREDICT_FALSE (wnd < tc->rcv_opts.mss))
147  wnd = 0;
148 
149  tc->rcv_wnd = clib_min (wnd, TCP_WND_MAX << tc->rcv_wscale);
150 }
151 
152 /**
153  * Compute and return window to advertise, scaled as per RFC1323
154  */
155 static inline u32
157 {
158  if (state < TCP_STATE_ESTABLISHED)
160 
161  tcp_update_rcv_wnd (tc);
162  return tc->rcv_wnd >> tc->rcv_wscale;
163 }
164 
165 static int
167 {
168  u8 len = 0;
169 
170  opts->flags |= TCP_OPTS_FLAG_MSS;
171  opts->mss = tc->mss;
173 
174  opts->flags |= TCP_OPTS_FLAG_WSCALE;
175  opts->wscale = tc->rcv_wscale;
177 
178  opts->flags |= TCP_OPTS_FLAG_TSTAMP;
179  opts->tsval = tcp_time_tstamp (tc->c_thread_index);
180  opts->tsecr = 0;
182 
183  if (TCP_USE_SACKS)
184  {
185  opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
187  }
188 
189  /* Align to needed boundary */
191  return len;
192 }
193 
194 static int
196 {
197  u8 len = 0;
198 
199  opts->flags |= TCP_OPTS_FLAG_MSS;
200  opts->mss = tc->mss;
202 
203  if (tcp_opts_wscale (&tc->rcv_opts))
204  {
205  opts->flags |= TCP_OPTS_FLAG_WSCALE;
206  opts->wscale = tc->rcv_wscale;
208  }
209 
210  if (tcp_opts_tstamp (&tc->rcv_opts))
211  {
212  opts->flags |= TCP_OPTS_FLAG_TSTAMP;
213  opts->tsval = tcp_time_tstamp (tc->c_thread_index);
214  opts->tsecr = tc->tsval_recent;
216  }
217 
218  if (tcp_opts_sack_permitted (&tc->rcv_opts))
219  {
220  opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
222  }
223 
224  /* Align to needed boundary */
226  return len;
227 }
228 
229 static int
231 {
232  u8 len = 0;
233 
234  opts->flags = 0;
235 
236  if (tcp_opts_tstamp (&tc->rcv_opts))
237  {
238  opts->flags |= TCP_OPTS_FLAG_TSTAMP;
239  opts->tsval = tcp_tstamp (tc);
240  opts->tsecr = tc->tsval_recent;
242  }
243  if (tcp_opts_sack_permitted (&tc->rcv_opts))
244  {
245  if (vec_len (tc->snd_sacks))
246  {
247  opts->flags |= TCP_OPTS_FLAG_SACK;
248  if (tc->snd_sack_pos >= vec_len (tc->snd_sacks))
249  tc->snd_sack_pos = 0;
250  opts->sacks = &tc->snd_sacks[tc->snd_sack_pos];
251  opts->n_sack_blocks = vec_len (tc->snd_sacks) - tc->snd_sack_pos;
252  opts->n_sack_blocks = clib_min (opts->n_sack_blocks,
254  tc->snd_sack_pos += opts->n_sack_blocks;
256  }
257  }
258 
259  /* Align to needed boundary */
261  return len;
262 }
263 
264 always_inline int
267 {
268  switch (state)
269  {
270  case TCP_STATE_ESTABLISHED:
271  case TCP_STATE_CLOSE_WAIT:
272  case TCP_STATE_FIN_WAIT_1:
273  case TCP_STATE_LAST_ACK:
274  case TCP_STATE_CLOSING:
275  case TCP_STATE_FIN_WAIT_2:
276  case TCP_STATE_TIME_WAIT:
277  case TCP_STATE_CLOSED:
278  return tcp_make_established_options (tc, opts);
279  case TCP_STATE_SYN_RCVD:
280  return tcp_make_synack_options (tc, opts);
281  case TCP_STATE_SYN_SENT:
282  return tcp_make_syn_options (tc, opts);
283  default:
284  clib_warning ("State not handled! %d", state);
285  return 0;
286  }
287 }
288 
289 /**
290  * Update burst send vars
291  *
292  * - Updates snd_mss to reflect the effective segment size that we can send
293  * by taking into account all TCP options, including SACKs.
294  * - Cache 'on the wire' options for reuse
295  * - Updates receive window which can be reused for a burst.
296  *
297  * This should *only* be called when doing bursts
298  */
299 void
301 {
302  tcp_main_t *tm = &tcp_main;
303 
304  /* Compute options to be used for connection. These may be reused when
305  * sending data or to compute the effective mss (snd_mss) */
306  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts,
307  TCP_STATE_ESTABLISHED);
308 
309  /* XXX check if MTU has been updated */
310  tc->snd_mss = clib_min (tc->mss, tc->rcv_opts.mss) - tc->snd_opts_len;
311  ASSERT (tc->snd_mss > 0);
312 
313  tcp_options_write (tm->wrk_ctx[tc->c_thread_index].cached_opts,
314  &tc->snd_opts);
315 
316  tcp_update_rcv_wnd (tc);
317 
318  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
320 
321  if (tc->snd_una == tc->snd_nxt)
322  {
325  }
326 
327  if (tc->flags & TCP_CONN_PSH_PENDING)
328  {
329  u32 max_deq = transport_max_tx_dequeue (&tc->connection);
330  /* Last byte marked for push */
331  tc->psh_seq = tc->snd_una + max_deq - 1;
332  }
333 }
334 
335 #endif /* CLIB_MARCH_VARIANT */
336 
337 static void *
339 {
340  if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
342  /* Zero all flags but free list index and trace flag */
343  b->flags &= VLIB_BUFFER_NEXT_PRESENT - 1;
344  b->current_data = 0;
345  b->current_length = 0;
347  vnet_buffer (b)->tcp.flags = 0;
349  /* Leave enough space for headers */
351 }
352 
353 #ifndef CLIB_MARCH_VARIANT
354 static void *
356 {
357  ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
358  b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
360  b->current_data = 0;
361  vnet_buffer (b)->tcp.flags = 0;
362  /* Leave enough space for headers */
364 }
365 
366 
367 /* Compute TCP checksum in software when offloading is disabled for a connection */
368 u16
370  ip46_address_t * src, ip46_address_t * dst)
371 {
372  ip_csum_t sum0;
373  u16 payload_length_host_byte_order;
374  u32 i;
375 
376  /* Initialize checksum with ip header. */
377  sum0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0)) +
378  clib_host_to_net_u16 (IP_PROTOCOL_TCP);
379  payload_length_host_byte_order = vlib_buffer_length_in_chain (vm, p0);
380 
381  for (i = 0; i < ARRAY_LEN (src->ip6.as_uword); i++)
382  {
383  sum0 = ip_csum_with_carry
384  (sum0, clib_mem_unaligned (&src->ip6.as_uword[i], uword));
385  sum0 = ip_csum_with_carry
386  (sum0, clib_mem_unaligned (&dst->ip6.as_uword[i], uword));
387  }
388 
389  return ip_calculate_l4_checksum (vm, p0, sum0,
390  payload_length_host_byte_order, NULL, 0,
391  NULL);
392 }
393 
394 u16
396  ip46_address_t * src, ip46_address_t * dst)
397 {
398  ip_csum_t sum0;
399  u32 payload_length_host_byte_order;
400 
401  payload_length_host_byte_order = vlib_buffer_length_in_chain (vm, p0);
402  sum0 =
403  clib_host_to_net_u32 (payload_length_host_byte_order +
404  (IP_PROTOCOL_TCP << 16));
405 
406  sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&src->ip4, u32));
407  sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&dst->ip4, u32));
408 
409  return ip_calculate_l4_checksum (vm, p0, sum0,
410  payload_length_host_byte_order, NULL, 0,
411  NULL);
412 }
413 
414 static inline u16
416 {
417  u16 checksum = 0;
418  if (PREDICT_FALSE (tc->cfg_flags & TCP_CFG_F_NO_CSUM_OFFLOAD))
419  {
420  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
421  vlib_main_t *vm = wrk->vm;
422 
423  if (tc->c_is_ip4)
425  (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
426  else
428  (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
429  }
430  else
431  {
432  vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TCP_CKSUM);
433  }
434  return checksum;
435 }
436 
437 /**
438  * Prepare ACK
439  */
440 static inline void
442  u8 flags)
443 {
444  tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
445  u8 tcp_opts_len, tcp_hdr_opts_len;
446  tcp_header_t *th;
447  u16 wnd;
448 
449  wnd = tcp_window_to_advertise (tc, state);
450 
451  /* Make and write options */
452  tcp_opts_len = tcp_make_established_options (tc, snd_opts);
453  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
454 
455  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
456  tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd);
457 
458  tcp_options_write ((u8 *) (th + 1), snd_opts);
459 
460  th->checksum = tcp_compute_checksum (tc, b);
461 
462  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
463 
464  if (wnd == 0)
465  {
466  transport_rx_fifo_req_deq_ntf (&tc->connection);
468  }
469  else
471 }
472 
473 /**
474  * Convert buffer to ACK
475  */
476 static inline void
478 {
479  tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK);
480  TCP_EVT (TCP_EVT_ACK_SENT, tc);
481  tc->rcv_las = tc->rcv_nxt;
482 }
483 
484 /**
485  * Convert buffer to FIN-ACK
486  */
487 static void
489 {
490  tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_ACK);
491 }
492 
493 /**
494  * Convert buffer to SYN
495  */
496 void
498 {
499  u8 tcp_hdr_opts_len, tcp_opts_len;
500  tcp_header_t *th;
501  u16 initial_wnd;
502  tcp_options_t snd_opts;
503 
504  initial_wnd = tcp_initial_window_to_advertise (tc);
505 
506  /* Make and write options */
507  clib_memset (&snd_opts, 0, sizeof (snd_opts));
508  tcp_opts_len = tcp_make_syn_options (tc, &snd_opts);
509  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
510 
511  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
512  tc->rcv_nxt, tcp_hdr_opts_len, TCP_FLAG_SYN,
513  initial_wnd);
514  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
515  tcp_options_write ((u8 *) (th + 1), &snd_opts);
516  th->checksum = tcp_compute_checksum (tc, b);
517 }
518 
519 /**
520  * Convert buffer to SYN-ACK
521  */
522 static void
524 {
525  tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
526  u8 tcp_opts_len, tcp_hdr_opts_len;
527  tcp_header_t *th;
528  u16 initial_wnd;
529 
530  clib_memset (snd_opts, 0, sizeof (*snd_opts));
531  initial_wnd = tcp_initial_window_to_advertise (tc);
532  tcp_opts_len = tcp_make_synack_options (tc, snd_opts);
533  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
534 
535  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
536  tc->rcv_nxt, tcp_hdr_opts_len,
537  TCP_FLAG_SYN | TCP_FLAG_ACK, initial_wnd);
538  tcp_options_write ((u8 *) (th + 1), snd_opts);
539 
540  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
541  th->checksum = tcp_compute_checksum (tc, b);
542 }
543 
544 static void
546  u8 is_ip4, u32 fib_index)
547 {
548  tcp_main_t *tm = &tcp_main;
549  vlib_main_t *vm = wrk->vm;
550 
551  b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
552  b->error = 0;
553 
554  vnet_buffer (b)->sw_if_index[VLIB_TX] = fib_index;
555  vnet_buffer (b)->sw_if_index[VLIB_RX] = 0;
556 
558  tm->ipl_next_node[!is_ip4]);
559 
560  if (vm->thread_index == 0 && vlib_num_workers ())
562 }
563 
564 static void
566  u8 is_ip4)
567 {
568  b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
569  b->error = 0;
570 
572  wrk->tco_next_node[!is_ip4]);
573 }
574 
575 #endif /* CLIB_MARCH_VARIANT */
576 
577 static int
579 {
580  ip4_header_t *ih4;
581  ip6_header_t *ih6;
582  tcp_header_t *th;
583  ip4_address_t src_ip4, dst_ip4;
584  ip6_address_t src_ip6, dst_ip6;
586  u32 tmp, len, seq, ack;
587  u8 flags;
588 
589  /* Find IP and TCP headers */
590  th = tcp_buffer_hdr (b);
591 
592  /* Save src and dst ip */
593  if (is_ip4)
594  {
595  ih4 = vlib_buffer_get_current (b);
596  ASSERT ((ih4->ip_version_and_header_length & 0xF0) == 0x40);
597  src_ip4.as_u32 = ih4->src_address.as_u32;
598  dst_ip4.as_u32 = ih4->dst_address.as_u32;
599  }
600  else
601  {
602  ih6 = vlib_buffer_get_current (b);
603  ASSERT ((ih6->ip_version_traffic_class_and_flow_label & 0xF0) == 0x60);
604  clib_memcpy_fast (&src_ip6, &ih6->src_address, sizeof (ip6_address_t));
605  clib_memcpy_fast (&dst_ip6, &ih6->dst_address, sizeof (ip6_address_t));
606  }
607 
608  src_port = th->src_port;
609  dst_port = th->dst_port;
611 
612  /*
613  * RFC 793. If the ACK bit is off, sequence number zero is used,
614  * <SEQ=0><ACK=SEG.SEQ+SEG.LEN><CTL=RST,ACK>
615  * If the ACK bit is on,
616  * <SEQ=SEG.ACK><CTL=RST>
617  */
618  if (tcp_ack (th))
619  {
620  seq = th->ack_number;
621  ack = 0;
622  }
623  else
624  {
625  flags |= TCP_FLAG_ACK;
626  tmp = clib_net_to_host_u32 (th->seq_number);
627  len = vnet_buffer (b)->tcp.data_len + tcp_is_syn (th) + tcp_is_fin (th);
628  ack = clib_host_to_net_u32 (tmp + len);
629  seq = 0;
630  }
631 
632  tcp_reuse_buffer (vm, b);
634  sizeof (tcp_header_t), flags, 0);
635 
636  if (is_ip4)
637  {
638  ih4 = vlib_buffer_push_ip4 (vm, b, &dst_ip4, &src_ip4,
639  IP_PROTOCOL_TCP, 1);
640  th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
641  }
642  else
643  {
644  int bogus = ~0;
645  ih6 = vlib_buffer_push_ip6 (vm, b, &dst_ip6, &src_ip6, IP_PROTOCOL_TCP);
646  th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
647  ASSERT (!bogus);
648  }
649 
650  return 0;
651 }
652 
653 #ifndef CLIB_MARCH_VARIANT
654 /**
655  * Send reset without reusing existing buffer
656  *
657  * It extracts connection info out of original packet
658  */
659 void
661  u32 thread_index, u8 is_ip4)
662 {
664  vlib_main_t *vm = wrk->vm;
665  vlib_buffer_t *b;
666  u32 bi, sw_if_index, fib_index;
667  u8 tcp_hdr_len, flags = 0;
668  tcp_header_t *th, *pkt_th;
669  u32 seq, ack;
670  ip4_header_t *ih4, *pkt_ih4;
671  ip6_header_t *ih6, *pkt_ih6;
672  fib_protocol_t fib_proto;
673 
674  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
675  {
676  tcp_worker_stats_inc (wrk, no_buffer, 1);
677  return;
678  }
679 
680  b = vlib_get_buffer (vm, bi);
681  sw_if_index = vnet_buffer (pkt)->sw_if_index[VLIB_RX];
682  fib_proto = is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
683  fib_index = fib_table_get_index_for_sw_if_index (fib_proto, sw_if_index);
684  tcp_init_buffer (vm, b);
685 
686  /* Make and write options */
687  tcp_hdr_len = sizeof (tcp_header_t);
688 
689  if (is_ip4)
690  {
691  pkt_ih4 = vlib_buffer_get_current (pkt);
692  pkt_th = ip4_next_header (pkt_ih4);
693  }
694  else
695  {
696  pkt_ih6 = vlib_buffer_get_current (pkt);
697  pkt_th = ip6_next_header (pkt_ih6);
698  }
699 
700  if (tcp_ack (pkt_th))
701  {
703  seq = pkt_th->ack_number;
704  ack = (tc->state >= TCP_STATE_SYN_RCVD) ? tc->rcv_nxt : 0;
705  }
706  else
707  {
709  seq = 0;
710  ack = clib_host_to_net_u32 (vnet_buffer (pkt)->tcp.seq_end);
711  }
712 
713  th = vlib_buffer_push_tcp_net_order (b, pkt_th->dst_port, pkt_th->src_port,
714  seq, ack, tcp_hdr_len, flags, 0);
715 
716  /* Swap src and dst ip */
717  if (is_ip4)
718  {
719  ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40);
720  ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address,
721  &pkt_ih4->src_address, IP_PROTOCOL_TCP,
722  tcp_csum_offload (tc));
723  th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
724  }
725  else
726  {
727  int bogus = ~0;
728  ASSERT ((pkt_ih6->ip_version_traffic_class_and_flow_label & 0xF0) ==
729  0x60);
730  ih6 = vlib_buffer_push_ip6_custom (vm, b, &pkt_ih6->dst_address,
731  &pkt_ih6->src_address,
732  IP_PROTOCOL_TCP,
733  tc->ipv6_flow_label);
734  th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
735  ASSERT (!bogus);
736  }
737 
738  tcp_enqueue_to_ip_lookup (wrk, b, bi, is_ip4, fib_index);
739  TCP_EVT (TCP_EVT_RST_SENT, tc);
740  vlib_node_increment_counter (vm, tcp_node_index (output, tc->c_is_ip4),
741  TCP_ERROR_RST_SENT, 1);
742 }
743 
744 /**
745  * Build and set reset packet for connection
746  */
747 void
749 {
750  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
751  vlib_main_t *vm = wrk->vm;
752  vlib_buffer_t *b;
753  u32 bi;
754  tcp_header_t *th;
755  u16 tcp_hdr_opts_len, advertise_wnd, opts_write_len;
756  u8 flags;
757 
758  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
759  {
760  tcp_worker_stats_inc (wrk, no_buffer, 1);
761  return;
762  }
763  b = vlib_get_buffer (vm, bi);
764  tcp_init_buffer (vm, b);
765 
766  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
767  tcp_hdr_opts_len = tc->snd_opts_len + sizeof (tcp_header_t);
768  advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
770  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
771  tc->rcv_nxt, tcp_hdr_opts_len, flags,
772  advertise_wnd);
773  opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
774  th->checksum = tcp_compute_checksum (tc, b);
775  ASSERT (opts_write_len == tc->snd_opts_len);
776  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
777  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
778  TCP_EVT (TCP_EVT_RST_SENT, tc);
779  vlib_node_increment_counter (vm, tcp_node_index (output, tc->c_is_ip4),
780  TCP_ERROR_RST_SENT, 1);
781 }
782 
783 static void
785  vlib_buffer_t * b)
786 {
787  if (tc->c_is_ip4)
788  {
789  vlib_buffer_push_ip4 (wrk->vm, b, &tc->c_lcl_ip4, &tc->c_rmt_ip4,
790  IP_PROTOCOL_TCP, tcp_csum_offload (tc));
791  }
792  else
793  {
794  vlib_buffer_push_ip6_custom (wrk->vm, b, &tc->c_lcl_ip6, &tc->c_rmt_ip6,
795  IP_PROTOCOL_TCP, tc->ipv6_flow_label);
796  }
797 }
798 
799 /**
800  * Send SYN
801  *
802  * Builds a SYN packet for a half-open connection and sends it to ipx_lookup.
803  * The packet is not forwarded through tcpx_output to avoid doing lookups
804  * in the half_open pool.
805  */
806 void
808 {
809  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
810  vlib_main_t *vm = wrk->vm;
811  vlib_buffer_t *b;
812  u32 bi;
813 
814  /*
815  * Setup retransmit and establish timers before requesting buffer
816  * such that we can return if we've ran out.
817  */
818  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
819  tc->rto * TCP_TO_TIMER_TICK);
820 
821  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
822  {
823  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN, 1);
824  tcp_worker_stats_inc (wrk, no_buffer, 1);
825  return;
826  }
827 
828  b = vlib_get_buffer (vm, bi);
829  tcp_init_buffer (vm, b);
830  tcp_make_syn (tc, b);
831 
832  /* Measure RTT with this */
833  tc->rtt_ts = tcp_time_now_us (vlib_num_workers ()? 1 : 0);
834  tc->rtt_seq = tc->snd_nxt;
835  tc->rto_boff = 0;
836 
837  tcp_push_ip_hdr (wrk, tc, b);
838  tcp_enqueue_to_ip_lookup (wrk, b, bi, tc->c_is_ip4, tc->c_fib_index);
839  TCP_EVT (TCP_EVT_SYN_SENT, tc);
840 }
841 
842 void
844 {
845  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
846  vlib_main_t *vm = wrk->vm;
847  vlib_buffer_t *b;
848  u32 bi;
849 
850  ASSERT (tc->snd_una != tc->snd_nxt);
851  tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
852 
853  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
854  {
855  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT, 1);
856  tcp_worker_stats_inc (wrk, no_buffer, 1);
857  return;
858  }
859 
860  tc->rtt_ts = tcp_time_now_us (tc->c_thread_index);
861  b = vlib_get_buffer (vm, bi);
862  tcp_init_buffer (vm, b);
863  tcp_make_synack (tc, b);
864  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
865  TCP_EVT (TCP_EVT_SYNACK_SENT, tc);
866 }
867 
868 /**
869  * Send FIN
870  */
871 void
873 {
874  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
875  vlib_main_t *vm = wrk->vm;
876  vlib_buffer_t *b;
877  u32 bi;
878  u8 fin_snt = 0;
879 
880  fin_snt = tc->flags & TCP_CONN_FINSNT;
881  if (fin_snt)
882  tc->snd_nxt -= 1;
883 
884  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
885  {
886  /* Out of buffers so program fin retransmit ASAP */
887  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT, 1);
888  if (fin_snt)
889  tc->snd_nxt += 1;
890  else
891  /* Make sure retransmit retries a fin not data */
892  tc->flags |= TCP_CONN_FINSNT;
893  tcp_worker_stats_inc (wrk, no_buffer, 1);
894  return;
895  }
896 
897  /* If we have non-dupacks programmed, no need to send them */
898  if ((tc->flags & TCP_CONN_SNDACK) && !tc->pending_dupacks)
899  tc->flags &= ~TCP_CONN_SNDACK;
900 
901  b = vlib_get_buffer (vm, bi);
902  tcp_init_buffer (vm, b);
903  tcp_make_fin (tc, b);
904  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
905  TCP_EVT (TCP_EVT_FIN_SENT, tc);
906  /* Account for the FIN */
907  tc->snd_nxt += 1;
908  tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
909  if (!fin_snt)
910  {
911  tc->flags |= TCP_CONN_FINSNT;
912  tc->flags &= ~TCP_CONN_FINPNDG;
913  }
914 }
915 
916 /**
917  * Push TCP header and update connection variables. Should only be called
918  * for segments with data, not for 'control' packets.
919  */
920 always_inline void
922  u8 compute_opts, u8 maybe_burst, u8 update_snd_nxt)
923 {
924  u8 tcp_hdr_opts_len, flags = TCP_FLAG_ACK;
925  u32 advertise_wnd, data_len;
926  tcp_main_t *tm = &tcp_main;
927  tcp_header_t *th;
928 
930  if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
932 
933  vnet_buffer (b)->tcp.flags = 0;
934  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
935 
936  if (compute_opts)
937  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
938 
939  tcp_hdr_opts_len = tc->snd_opts_len + sizeof (tcp_header_t);
940 
941  if (maybe_burst)
942  advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
943  else
944  advertise_wnd = tcp_window_to_advertise (tc, TCP_STATE_ESTABLISHED);
945 
946  if (PREDICT_FALSE (tc->flags & TCP_CONN_PSH_PENDING))
947  {
948  if (seq_geq (tc->psh_seq, snd_nxt)
949  && seq_lt (tc->psh_seq, snd_nxt + data_len))
950  flags |= TCP_FLAG_PSH;
951  }
952  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, snd_nxt,
953  tc->rcv_nxt, tcp_hdr_opts_len, flags,
954  advertise_wnd);
955 
956  if (maybe_burst)
957  {
958  clib_memcpy_fast ((u8 *) (th + 1),
959  tm->wrk_ctx[tc->c_thread_index].cached_opts,
960  tc->snd_opts_len);
961  }
962  else
963  {
964  u8 len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
965  ASSERT (len == tc->snd_opts_len);
966  }
967 
968  /*
969  * Update connection variables
970  */
971 
972  if (update_snd_nxt)
973  tc->snd_nxt += data_len;
974  tc->rcv_las = tc->rcv_nxt;
975 
976  tc->bytes_out += data_len;
977  tc->data_segs_out += 1;
978 
979  th->checksum = tcp_compute_checksum (tc, b);
980 
981  TCP_EVT (TCP_EVT_PKTIZE, tc);
982 }
983 
986 {
988  if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
990  return data_len;
991 }
992 
993 u32
995 {
996  tcp_connection_t *tc = (tcp_connection_t *) tconn;
997 
998  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1000 
1001  tcp_push_hdr_i (tc, b, tc->snd_nxt, /* compute opts */ 0, /* burst */ 1,
1002  /* update_snd_nxt */ 1);
1003 
1004  tcp_validate_txf_size (tc, tc->snd_nxt - tc->snd_una);
1005  /* If not tracking an ACK, start tracking */
1006  if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))
1007  {
1008  tc->rtt_ts = tcp_time_now_us (tc->c_thread_index);
1009  tc->rtt_seq = tc->snd_nxt;
1010  }
1011  if (PREDICT_FALSE (!tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)))
1012  {
1013  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
1014  tcp_retransmit_timer_set (&wrk->timer_wheel, tc);
1015  tc->rto_boff = 0;
1016  }
1017  return 0;
1018 }
1019 
1020 void
1022 {
1023  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
1024  vlib_main_t *vm = wrk->vm;
1025  vlib_buffer_t *b;
1026  u32 bi;
1027 
1028  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1029  {
1030  tcp_update_rcv_wnd (tc);
1031  tcp_worker_stats_inc (wrk, no_buffer, 1);
1032  return;
1033  }
1034  b = vlib_get_buffer (vm, bi);
1035  tcp_init_buffer (vm, b);
1036  tcp_make_ack (tc, b);
1037  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1038 }
1039 
1040 void
1042 {
1043  if (!(tc->flags & TCP_CONN_SNDACK))
1044  {
1045  session_add_self_custom_tx_evt (&tc->connection, 1);
1046  tc->flags |= TCP_CONN_SNDACK;
1047  }
1048 }
1049 
1050 void
1052 {
1053  if (!(tc->flags & TCP_CONN_SNDACK))
1054  {
1055  session_add_self_custom_tx_evt (&tc->connection, 1);
1056  tc->flags |= TCP_CONN_SNDACK;
1057  }
1058  if (tc->pending_dupacks < 255)
1059  tc->pending_dupacks += 1;
1060 }
1061 
1062 void
1064 {
1065  if (!(tc->flags & TCP_CONN_RXT_PENDING))
1066  {
1067  session_add_self_custom_tx_evt (&tc->connection, 0);
1068  tc->flags |= TCP_CONN_RXT_PENDING;
1069  }
1070 }
1071 
1072 /**
1073  * Send window update ack
1074  *
1075  * Ensures that it will be sent only once, after a zero rwnd has been
1076  * advertised in a previous ack, and only if rwnd has grown beyond a
1077  * configurable value.
1078  */
1079 void
1081 {
1082  if (tcp_zero_rwnd_sent (tc))
1083  {
1084  tcp_update_rcv_wnd (tc);
1085  if (tc->rcv_wnd >= tcp_cfg.rwnd_min_update_ack * tc->snd_mss)
1086  {
1088  tcp_program_ack (tc);
1089  }
1090  }
1091 }
1092 
1093 /**
1094  * Allocate a new buffer and build a new tcp segment
1095  *
1096  * @param wrk tcp worker
1097  * @param tc connection for which the segment will be allocated
1098  * @param offset offset of the first byte in the tx fifo
1099  * @param max_deq_byte segment size
1100  * @param[out] b pointer to buffer allocated
1101  *
1102  * @return the number of bytes in the segment or 0 if buffer cannot be
1103  * allocated or no data available
1104  */
1105 static int
1107  u32 offset, u32 max_deq_bytes, vlib_buffer_t ** b)
1108 {
1109  u32 bytes_per_buffer = vnet_get_tcp_main ()->bytes_per_buffer;
1110  vlib_main_t *vm = wrk->vm;
1111  u32 bi, seg_size;
1112  int n_bytes = 0;
1113  u8 *data;
1114 
1115  seg_size = max_deq_bytes + TRANSPORT_MAX_HDRS_LEN;
1116 
1117  /*
1118  * Prepare options
1119  */
1120  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
1121 
1122  /*
1123  * Allocate and fill in buffer(s)
1124  */
1125 
1126  /* Easy case, buffer size greater than mss */
1127  if (PREDICT_TRUE (seg_size <= bytes_per_buffer))
1128  {
1129  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1130  {
1131  tcp_worker_stats_inc (wrk, no_buffer, 1);
1132  return 0;
1133  }
1134  *b = vlib_get_buffer (vm, bi);
1135  data = tcp_init_buffer (vm, *b);
1136  n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
1137  max_deq_bytes);
1138  ASSERT (n_bytes == max_deq_bytes);
1139  b[0]->current_length = n_bytes;
1140  tcp_push_hdr_i (tc, *b, tc->snd_una + offset, /* compute opts */ 0,
1141  /* burst */ 0, /* update_snd_nxt */ 0);
1142  }
1143  /* Split mss into multiple buffers */
1144  else
1145  {
1146  u32 chain_bi = ~0, n_bufs_per_seg, n_bufs;
1147  u16 n_peeked, len_to_deq;
1148  vlib_buffer_t *chain_b, *prev_b;
1149  int i;
1150 
1151  /* Make sure we have enough buffers */
1152  n_bufs_per_seg = ceil ((double) seg_size / bytes_per_buffer);
1153  vec_validate_aligned (wrk->tx_buffers, n_bufs_per_seg - 1,
1155  n_bufs = vlib_buffer_alloc (vm, wrk->tx_buffers, n_bufs_per_seg);
1156  if (PREDICT_FALSE (n_bufs != n_bufs_per_seg))
1157  {
1158  if (n_bufs)
1159  vlib_buffer_free (vm, wrk->tx_buffers, n_bufs);
1160  tcp_worker_stats_inc (wrk, no_buffer, 1);
1161  return 0;
1162  }
1163 
1164  *b = vlib_get_buffer (vm, wrk->tx_buffers[--n_bufs]);
1165  data = tcp_init_buffer (vm, *b);
1166  n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
1167  bytes_per_buffer -
1169  b[0]->current_length = n_bytes;
1170  b[0]->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
1172  max_deq_bytes -= n_bytes;
1173 
1174  chain_b = *b;
1175  for (i = 1; i < n_bufs_per_seg; i++)
1176  {
1177  prev_b = chain_b;
1178  len_to_deq = clib_min (max_deq_bytes, bytes_per_buffer);
1179  chain_bi = wrk->tx_buffers[--n_bufs];
1180  chain_b = vlib_get_buffer (vm, chain_bi);
1181  chain_b->current_data = 0;
1182  data = vlib_buffer_get_current (chain_b);
1183  n_peeked = session_tx_fifo_peek_bytes (&tc->connection, data,
1184  offset + n_bytes,
1185  len_to_deq);
1186  ASSERT (n_peeked == len_to_deq);
1187  n_bytes += n_peeked;
1188  chain_b->current_length = n_peeked;
1189  chain_b->next_buffer = 0;
1190 
1191  /* update previous buffer */
1192  prev_b->next_buffer = chain_bi;
1193  prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
1194 
1195  max_deq_bytes -= n_peeked;
1197  }
1198 
1199  tcp_push_hdr_i (tc, *b, tc->snd_una + offset, /* compute opts */ 0,
1200  /* burst */ 0, /* update_snd_nxt */ 0);
1201 
1202  if (PREDICT_FALSE (n_bufs))
1203  {
1204  clib_warning ("not all buffers consumed");
1205  vlib_buffer_free (vm, wrk->tx_buffers, n_bufs);
1206  }
1207  }
1208 
1209  ASSERT (n_bytes > 0);
1210  ASSERT (((*b)->current_data + (*b)->current_length) <= bytes_per_buffer);
1211 
1212  return n_bytes;
1213 }
1214 
1215 /**
1216  * Build a retransmit segment
1217  *
1218  * @return the number of bytes in the segment or 0 if there's nothing to
1219  * retransmit
1220  */
1221 static u32
1223  tcp_connection_t * tc, u32 offset,
1224  u32 max_deq_bytes, vlib_buffer_t ** b)
1225 {
1226  u32 start, available_bytes;
1227  int n_bytes = 0;
1228 
1229  ASSERT (tc->state >= TCP_STATE_ESTABLISHED);
1230  ASSERT (max_deq_bytes != 0);
1231 
1232  /*
1233  * Make sure we can retransmit something
1234  */
1235  available_bytes = transport_max_tx_dequeue (&tc->connection);
1236  ASSERT (available_bytes >= offset);
1237  available_bytes -= offset;
1238  if (!available_bytes)
1239  return 0;
1240 
1241  max_deq_bytes = clib_min (tc->snd_mss, max_deq_bytes);
1242  max_deq_bytes = clib_min (available_bytes, max_deq_bytes);
1243 
1244  start = tc->snd_una + offset;
1245  ASSERT (seq_leq (start + max_deq_bytes, tc->snd_nxt));
1246 
1247  n_bytes = tcp_prepare_segment (wrk, tc, offset, max_deq_bytes, b);
1248  if (!n_bytes)
1249  return 0;
1250 
1251  tc->snd_rxt_bytes += n_bytes;
1252 
1253  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1254  tcp_bt_track_rxt (tc, start, start + n_bytes);
1255 
1256  tc->bytes_retrans += n_bytes;
1257  tc->segs_retrans += 1;
1258  tcp_worker_stats_inc (wrk, rxt_segs, 1);
1259  TCP_EVT (TCP_EVT_CC_RTX, tc, offset, n_bytes);
1260 
1261  return n_bytes;
1262 }
1263 
1264 static void
1266 {
1267  sack_scoreboard_t *sb = &tc->sack_sb;
1268  sack_scoreboard_hole_t *hole;
1269 
1270  hole = scoreboard_first_hole (sb);
1271  if (!sb->is_reneging && (!hole || hole->start == tc->snd_una))
1272  return;
1273 
1274  scoreboard_clear_reneging (sb, tc->snd_una, tc->snd_nxt);
1275 }
1276 
1277 /**
1278  * Reset congestion control, switch cwnd to loss window and try again.
1279  */
1280 static void
1282 {
1283  TCP_EVT (TCP_EVT_CC_EVT, tc, 6);
1284 
1285  tc->prev_ssthresh = tc->ssthresh;
1286  tc->prev_cwnd = tc->cwnd;
1287 
1288  /* If we entrered loss without fast recovery, notify cc algo of the
1289  * congestion event such that it can update ssthresh and its state */
1290  if (!tcp_in_fastrecovery (tc))
1291  tcp_cc_congestion (tc);
1292 
1293  /* Let cc algo decide loss cwnd and ssthresh post unrecovered loss */
1294  tcp_cc_loss (tc);
1295 
1296  tc->rtt_ts = 0;
1297  tc->cwnd_acc_bytes = 0;
1298  tc->tr_occurences += 1;
1299  tc->sack_sb.reorder = TCP_DUPACK_THRESHOLD;
1300  tcp_recovery_on (tc);
1301 }
1302 
1303 void
1305 {
1306  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
1307  vlib_main_t *vm = wrk->vm;
1308  vlib_buffer_t *b = 0;
1309  u32 bi, n_bytes;
1310 
1311  tcp_worker_stats_inc (wrk, tr_events, 1);
1312 
1313  /* Should be handled by a different handler */
1314  if (PREDICT_FALSE (tc->state == TCP_STATE_SYN_SENT))
1315  return;
1316 
1317  /* Wait-close and retransmit could pop at the same time */
1318  if (tc->state == TCP_STATE_CLOSED)
1319  return;
1320 
1321  if (tc->state >= TCP_STATE_ESTABLISHED)
1322  {
1323  TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1324 
1325  /* Lost FIN, retransmit and return */
1326  if (tc->flags & TCP_CONN_FINSNT)
1327  {
1328  tcp_send_fin (tc);
1329  tc->rto_boff += 1;
1330  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1331  return;
1332  }
1333 
1334  /* Shouldn't be here */
1335  if (tc->snd_una == tc->snd_nxt)
1336  {
1337  ASSERT (!tcp_in_recovery (tc));
1338  tc->rto_boff = 0;
1339  return;
1340  }
1341 
1342  /* We're not in recovery so make sure rto_boff is 0. Can be non 0 due
1343  * to persist timer timeout */
1344  if (!tcp_in_recovery (tc) && tc->rto_boff > 0)
1345  {
1346  tc->rto_boff = 0;
1347  tcp_update_rto (tc);
1348  }
1349 
1350  /* Peer is dead or network connectivity is lost. Close connection.
1351  * RFC 1122 section 4.2.3.5 recommends a value of at least 100s. For
1352  * a min rto of 0.2s we need to retry about 8 times. */
1353  if (tc->rto_boff >= TCP_RTO_BOFF_MAX)
1354  {
1355  tcp_send_reset (tc);
1356  tcp_connection_set_state (tc, TCP_STATE_CLOSED);
1357  session_transport_closing_notify (&tc->connection);
1358  session_transport_closed_notify (&tc->connection);
1360  tcp_program_cleanup (wrk, tc);
1361  tcp_worker_stats_inc (wrk, tr_abort, 1);
1362  return;
1363  }
1364 
1365  if (tcp_opts_sack_permitted (&tc->rcv_opts))
1367 
1368  /* Update send congestion to make sure that rxt has data to send */
1369  tc->snd_congestion = tc->snd_nxt;
1370 
1371  /* Send the first unacked segment. If we're short on buffers, return
1372  * as soon as possible */
1373  n_bytes = clib_min (tc->snd_mss, tc->snd_nxt - tc->snd_una);
1375  if (!n_bytes)
1376  {
1377  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT, 1);
1378  return;
1379  }
1380 
1381  bi = vlib_get_buffer_index (vm, b);
1382  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1383 
1384  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1385  tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
1386 
1387  tc->rto_boff += 1;
1388  if (tc->rto_boff == 1)
1389  {
1391  /* Record timestamp. Eifel detection algorithm RFC3522 */
1392  tc->snd_rxt_ts = tcp_tstamp (tc);
1393  }
1394 
1395  if (tcp_opts_sack_permitted (&tc->rcv_opts))
1396  scoreboard_init_rxt (&tc->sack_sb, tc->snd_una + n_bytes);
1397 
1399  }
1400  /* Retransmit SYN-ACK */
1401  else if (tc->state == TCP_STATE_SYN_RCVD)
1402  {
1403  TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1404 
1405  tc->rtt_ts = 0;
1406 
1407  /* Passive open establish timeout */
1408  if (tc->rto > TCP_ESTABLISH_TIME >> 1)
1409  {
1410  tcp_connection_set_state (tc, TCP_STATE_CLOSED);
1412  tcp_program_cleanup (wrk, tc);
1413  tcp_worker_stats_inc (wrk, tr_abort, 1);
1414  return;
1415  }
1416 
1417  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1418  {
1419  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT, 1);
1420  tcp_worker_stats_inc (wrk, no_buffer, 1);
1421  return;
1422  }
1423 
1424  tc->rto_boff += 1;
1425  if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
1426  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1427 
1428  ASSERT (tc->snd_una != tc->snd_nxt);
1429  tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
1430 
1431  b = vlib_get_buffer (vm, bi);
1432  tcp_init_buffer (vm, b);
1433  tcp_make_synack (tc, b);
1434  TCP_EVT (TCP_EVT_SYN_RXT, tc, 1);
1435 
1436  /* Retransmit timer already updated, just enqueue to output */
1437  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1438  }
1439  else
1440  {
1441  ASSERT (tc->state == TCP_STATE_CLOSED);
1442  return;
1443  }
1444 }
1445 
1446 /**
1447  * SYN retransmit timer handler. Active open only.
1448  */
1449 void
1451 {
1452  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
1453  vlib_main_t *vm = wrk->vm;
1454  vlib_buffer_t *b = 0;
1455  u32 bi;
1456 
1457  /* Note: the connection may have transitioned to ESTABLISHED... */
1458  if (PREDICT_FALSE (tc->state != TCP_STATE_SYN_SENT))
1459  return;
1460 
1461  /* Half-open connection actually moved to established but we were
1462  * waiting for syn retransmit to pop to call cleanup from the right
1463  * thread. */
1464  if (tc->flags & TCP_CONN_HALF_OPEN_DONE)
1465  {
1467  TCP_DBG ("could not remove half-open connection");
1468  return;
1469  }
1470 
1471  TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1472  tc->rtt_ts = 0;
1473 
1474  /* Active open establish timeout */
1475  if (tc->rto >= TCP_ESTABLISH_TIME >> 1)
1476  {
1477  session_stream_connect_notify (&tc->connection, SESSION_E_TIMEDOUT);
1479  return;
1480  }
1481 
1482  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1483  {
1484  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN, 1);
1485  tcp_worker_stats_inc (wrk, no_buffer, 1);
1486  return;
1487  }
1488 
1489  /* Try without increasing RTO a number of times. If this fails,
1490  * start growing RTO exponentially */
1491  tc->rto_boff += 1;
1492  if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
1493  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1494 
1495  b = vlib_get_buffer (vm, bi);
1496  tcp_init_buffer (vm, b);
1497  tcp_make_syn (tc, b);
1498 
1499  TCP_EVT (TCP_EVT_SYN_RXT, tc, 0);
1500 
1501  /* This goes straight to ipx_lookup */
1502  tcp_push_ip_hdr (wrk, tc, b);
1503  tcp_enqueue_to_ip_lookup (wrk, b, bi, tc->c_is_ip4, tc->c_fib_index);
1504 
1505  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
1506  tc->rto * TCP_TO_TIMER_TICK);
1507 }
1508 
1509 /**
1510  * Got 0 snd_wnd from peer, try to do something about it.
1511  *
1512  */
1513 void
1515 {
1516  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
1517  u32 bi, max_snd_bytes, available_bytes, offset;
1518  tcp_main_t *tm = vnet_get_tcp_main ();
1519  vlib_main_t *vm = wrk->vm;
1520  vlib_buffer_t *b;
1521  int n_bytes = 0;
1522  u8 *data;
1523 
1524  /* Problem already solved or worse */
1525  if (tc->state == TCP_STATE_CLOSED || tc->snd_wnd > tc->snd_mss
1526  || (tc->flags & TCP_CONN_FINSNT))
1527  goto update_scheduler;
1528 
1529  available_bytes = transport_max_tx_dequeue (&tc->connection);
1530  offset = tc->snd_nxt - tc->snd_una;
1531 
1532  /* Reprogram persist if no new bytes available to send. We may have data
1533  * next time */
1534  if (!available_bytes)
1535  {
1536  tcp_persist_timer_set (&wrk->timer_wheel, tc);
1537  return;
1538  }
1539 
1540  if (available_bytes <= offset)
1541  goto update_scheduler;
1542 
1543  /* Increment RTO backoff */
1544  tc->rto_boff += 1;
1545  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1546 
1547  /*
1548  * Try to force the first unsent segment (or buffer)
1549  */
1550  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1551  {
1552  tcp_persist_timer_set (&wrk->timer_wheel, tc);
1553  tcp_worker_stats_inc (wrk, no_buffer, 1);
1554  return;
1555  }
1556 
1557  b = vlib_get_buffer (vm, bi);
1558  data = tcp_init_buffer (vm, b);
1559 
1561  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
1562  max_snd_bytes = clib_min (tc->snd_mss,
1563  tm->bytes_per_buffer - TRANSPORT_MAX_HDRS_LEN);
1564  n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
1565  max_snd_bytes);
1567  ASSERT (n_bytes != 0 && (tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)
1568  || tc->snd_una == tc->snd_nxt
1569  || tc->rto_boff > 1));
1570 
1571  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1572  {
1574  tcp_bt_track_tx (tc, n_bytes);
1575  }
1576 
1577  tcp_push_hdr_i (tc, b, tc->snd_nxt, /* compute opts */ 0,
1578  /* burst */ 0, /* update_snd_nxt */ 1);
1579  tcp_validate_txf_size (tc, tc->snd_nxt - tc->snd_una);
1580  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1581 
1582  /* Just sent new data, enable retransmit */
1583  tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
1584 
1585  return;
1586 
1587 update_scheduler:
1588 
1589  if (tcp_is_descheduled (tc))
1590  transport_connection_reschedule (&tc->connection);
1591 }
1592 
1593 /**
1594  * Retransmit first unacked segment
1595  */
1596 int
1598 {
1599  vlib_main_t *vm = wrk->vm;
1600  vlib_buffer_t *b;
1601  u32 bi, n_bytes;
1602 
1603  TCP_EVT (TCP_EVT_CC_EVT, tc, 1);
1604 
1605  n_bytes = tcp_prepare_retransmit_segment (wrk, tc, 0, tc->snd_mss, &b);
1606  if (!n_bytes)
1607  return -1;
1608 
1609  bi = vlib_get_buffer_index (vm, b);
1610  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1611 
1612  return 0;
1613 }
1614 
1615 static int
1617  u32 burst_size)
1618 {
1619  u32 offset, n_segs = 0, n_written, bi, available_wnd;
1620  vlib_main_t *vm = wrk->vm;
1621  vlib_buffer_t *b = 0;
1622 
1623  offset = tc->snd_nxt - tc->snd_una;
1624  available_wnd = tc->snd_wnd - offset;
1625  burst_size = clib_min (burst_size, available_wnd / tc->snd_mss);
1626 
1627  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1629 
1630  while (n_segs < burst_size)
1631  {
1632  n_written = tcp_prepare_segment (wrk, tc, offset, tc->snd_mss, &b);
1633  if (!n_written)
1634  goto done;
1635 
1636  bi = vlib_get_buffer_index (vm, b);
1637  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1638  offset += n_written;
1639  n_segs += 1;
1640 
1641  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1642  tcp_bt_track_tx (tc, n_written);
1643 
1644  tc->snd_nxt += n_written;
1645  }
1646 
1647 done:
1648  return n_segs;
1649 }
1650 
1651 /**
1652  * Estimate send space using proportional rate reduction (RFC6937)
1653  */
1654 int
1656 {
1657  u32 pipe, prr_out;
1658  int space;
1659 
1660  pipe = tcp_flight_size (tc);
1661  prr_out = tc->snd_rxt_bytes + (tc->snd_nxt - tc->snd_congestion);
1662 
1663  if (pipe > tc->ssthresh)
1664  {
1665  space = ((int) tc->prr_delivered * ((f64) tc->ssthresh / tc->prev_cwnd))
1666  - prr_out;
1667  }
1668  else
1669  {
1670  int limit;
1671  limit = clib_max ((int) (tc->prr_delivered - prr_out), 0) + tc->snd_mss;
1672  space = clib_min (tc->ssthresh - pipe, limit);
1673  }
1674  space = clib_max (space, prr_out ? 0 : tc->snd_mss);
1675  return space;
1676 }
1677 
1678 static inline u8
1680  sack_scoreboard_t * sb)
1681 {
1682  u32 tx_adv_sack = sb->high_sacked - tc->snd_congestion;
1683  f64 rr = (f64) tc->ssthresh / tc->prev_cwnd;
1684 
1685  if (tcp_fastrecovery_first (tc))
1686  return 1;
1687 
1688  return (tx_adv_sack > (tc->snd_una - tc->prr_start) * rr);
1689 }
1690 
1691 static inline u8
1693 {
1694  return (transport_max_tx_dequeue (&tc->connection)
1695  - (tc->snd_nxt - tc->snd_una));
1696 }
1697 
1698 #define scoreboard_rescue_rxt_valid(_sb, _tc) \
1699  (seq_geq (_sb->rescue_rxt, _tc->snd_una) \
1700  && seq_leq (_sb->rescue_rxt, _tc->snd_congestion))
1701 
1702 /**
1703  * Do retransmit with SACKs
1704  */
1705 static int
1707  u32 burst_size)
1708 {
1709  u32 n_written = 0, offset, max_bytes, n_segs = 0;
1710  u8 snd_limited = 0, can_rescue = 0;
1711  u32 bi, max_deq, burst_bytes;
1712  sack_scoreboard_hole_t *hole;
1713  vlib_main_t *vm = wrk->vm;
1714  vlib_buffer_t *b = 0;
1715  sack_scoreboard_t *sb;
1716  int snd_space;
1717 
1719 
1720  burst_bytes = transport_connection_tx_pacer_burst (&tc->connection);
1721  burst_size = clib_min (burst_size, burst_bytes / tc->snd_mss);
1722  if (!burst_size)
1723  {
1725  return 0;
1726  }
1727 
1728  if (tcp_in_recovery (tc))
1729  snd_space = tcp_available_cc_snd_space (tc);
1730  else
1731  snd_space = tcp_fastrecovery_prr_snd_space (tc);
1732 
1733  if (snd_space < tc->snd_mss)
1734  goto done;
1735 
1736  sb = &tc->sack_sb;
1737 
1738  /* Check if snd_una is a lost retransmit */
1739  if (pool_elts (sb->holes)
1740  && seq_gt (sb->high_sacked, tc->snd_congestion)
1741  && tc->rxt_head != tc->snd_una
1743  {
1744  max_bytes = clib_min (tc->snd_mss, tc->snd_congestion - tc->snd_una);
1745  n_written = tcp_prepare_retransmit_segment (wrk, tc, 0, max_bytes, &b);
1746  if (!n_written)
1747  {
1749  goto done;
1750  }
1751  bi = vlib_get_buffer_index (vm, b);
1752  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1753  n_segs = 1;
1754 
1755  tc->rxt_head = tc->snd_una;
1756  tc->rxt_delivered += n_written;
1757  tc->prr_delivered += n_written;
1758  ASSERT (tc->rxt_delivered <= tc->snd_rxt_bytes);
1759  }
1760 
1762 
1763  TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
1764  hole = scoreboard_get_hole (sb, sb->cur_rxt_hole);
1765 
1766  max_deq = transport_max_tx_dequeue (&tc->connection);
1767  max_deq -= tc->snd_nxt - tc->snd_una;
1768 
1769  while (snd_space > 0 && n_segs < burst_size)
1770  {
1771  hole = scoreboard_next_rxt_hole (sb, hole, max_deq != 0, &can_rescue,
1772  &snd_limited);
1773  if (!hole)
1774  {
1775  /* We are out of lost holes to retransmit so send some new data. */
1776  if (max_deq > tc->snd_mss)
1777  {
1778  u32 n_segs_new;
1779  int av_wnd;
1780 
1781  /* Make sure we don't exceed available window and leave space
1782  * for one more packet, to avoid zero window acks */
1783  av_wnd = (int) tc->snd_wnd - (tc->snd_nxt - tc->snd_una);
1784  av_wnd = clib_max (av_wnd - tc->snd_mss, 0);
1785  snd_space = clib_min (snd_space, av_wnd);
1786  snd_space = clib_min (max_deq, snd_space);
1787  burst_size = clib_min (burst_size - n_segs,
1788  snd_space / tc->snd_mss);
1789  burst_size = clib_min (burst_size, TCP_RXT_MAX_BURST);
1790  n_segs_new = tcp_transmit_unsent (wrk, tc, burst_size);
1791  if (max_deq > n_segs_new * tc->snd_mss)
1793 
1794  n_segs += n_segs_new;
1795  goto done;
1796  }
1797 
1798  if (tcp_in_recovery (tc) || !can_rescue
1799  || scoreboard_rescue_rxt_valid (sb, tc))
1800  break;
1801 
1802  /* If rescue rxt undefined or less than snd_una then one segment of
1803  * up to SMSS octets that MUST include the highest outstanding
1804  * unSACKed sequence number SHOULD be returned, and RescueRxt set to
1805  * RecoveryPoint. HighRxt MUST NOT be updated.
1806  */
1807  hole = scoreboard_last_hole (sb);
1808  max_bytes = clib_min (tc->snd_mss, hole->end - hole->start);
1809  max_bytes = clib_min (max_bytes, snd_space);
1810  offset = hole->end - tc->snd_una - max_bytes;
1811  n_written = tcp_prepare_retransmit_segment (wrk, tc, offset,
1812  max_bytes, &b);
1813  if (!n_written)
1814  goto done;
1815 
1816  sb->rescue_rxt = tc->snd_congestion;
1817  bi = vlib_get_buffer_index (vm, b);
1818  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1819  n_segs += 1;
1820  break;
1821  }
1822 
1823  max_bytes = clib_min (hole->end - sb->high_rxt, snd_space);
1824  max_bytes = snd_limited ? clib_min (max_bytes, tc->snd_mss) : max_bytes;
1825  if (max_bytes == 0)
1826  break;
1827 
1828  offset = sb->high_rxt - tc->snd_una;
1829  n_written = tcp_prepare_retransmit_segment (wrk, tc, offset, max_bytes,
1830  &b);
1831  ASSERT (n_written <= snd_space);
1832 
1833  /* Nothing left to retransmit */
1834  if (n_written == 0)
1835  break;
1836 
1837  bi = vlib_get_buffer_index (vm, b);
1838  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1839 
1840  sb->high_rxt += n_written;
1841  ASSERT (seq_leq (sb->high_rxt, tc->snd_nxt));
1842 
1843  snd_space -= n_written;
1844  n_segs += 1;
1845  }
1846 
1847  if (hole)
1849 
1850 done:
1851 
1852  transport_connection_tx_pacer_reset_bucket (&tc->connection, 0);
1853  return n_segs;
1854 }
1855 
1856 /**
1857  * Fast retransmit without SACK info
1858  */
1859 static int
1861  u32 burst_size)
1862 {
1863  u32 n_written = 0, offset = 0, bi, max_deq, n_segs_now, max_bytes;
1864  u32 burst_bytes, sent_bytes;
1865  vlib_main_t *vm = wrk->vm;
1866  int snd_space, n_segs = 0;
1867  u8 cc_limited = 0;
1868  vlib_buffer_t *b;
1869 
1871  TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
1872 
1873  burst_bytes = transport_connection_tx_pacer_burst (&tc->connection);
1874  burst_size = clib_min (burst_size, burst_bytes / tc->snd_mss);
1875  if (!burst_size)
1876  {
1878  return 0;
1879  }
1880 
1881  snd_space = tcp_available_cc_snd_space (tc);
1882  cc_limited = snd_space < burst_bytes;
1883 
1884  if (!tcp_fastrecovery_first (tc))
1885  goto send_unsent;
1886 
1887  /* RFC 6582: [If a partial ack], retransmit the first unacknowledged
1888  * segment. */
1889  while (snd_space > 0 && n_segs < burst_size)
1890  {
1891  max_bytes = clib_min (tc->snd_mss,
1892  tc->snd_congestion - tc->snd_una - offset);
1893  if (!max_bytes)
1894  break;
1895  n_written = tcp_prepare_retransmit_segment (wrk, tc, offset, max_bytes,
1896  &b);
1897 
1898  /* Nothing left to retransmit */
1899  if (n_written == 0)
1900  break;
1901 
1902  bi = vlib_get_buffer_index (vm, b);
1903  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1904  snd_space -= n_written;
1905  offset += n_written;
1906  n_segs += 1;
1907  }
1908 
1909  if (n_segs == burst_size)
1910  goto done;
1911 
1912 send_unsent:
1913 
1914  /* RFC 6582: Send a new segment if permitted by the new value of cwnd. */
1915  if (snd_space < tc->snd_mss || tc->snd_mss == 0)
1916  goto done;
1917 
1918  max_deq = transport_max_tx_dequeue (&tc->connection);
1919  max_deq -= tc->snd_nxt - tc->snd_una;
1920  if (max_deq)
1921  {
1922  snd_space = clib_min (max_deq, snd_space);
1923  burst_size = clib_min (burst_size - n_segs, snd_space / tc->snd_mss);
1924  n_segs_now = tcp_transmit_unsent (wrk, tc, burst_size);
1925  if (n_segs_now && max_deq > n_segs_now * tc->snd_mss)
1927  n_segs += n_segs_now;
1928  }
1929 
1930 done:
1932 
1933  sent_bytes = clib_min (n_segs * tc->snd_mss, burst_bytes);
1934  sent_bytes = cc_limited ? burst_bytes : sent_bytes;
1935  transport_connection_tx_pacer_update_bytes (&tc->connection, sent_bytes);
1936 
1937  return n_segs;
1938 }
1939 
1940 static int
1941 tcp_send_acks (tcp_connection_t * tc, u32 max_burst_size)
1942 {
1943  int j, n_acks;
1944 
1945  if (!tc->pending_dupacks)
1946  {
1947  if (tcp_in_cong_recovery (tc) || !tcp_max_tx_deq (tc)
1948  || tc->state != TCP_STATE_ESTABLISHED)
1949  {
1950  tcp_send_ack (tc);
1951  return 1;
1952  }
1953  return 0;
1954  }
1955 
1956  /* If we're supposed to send dupacks but have no ooo data
1957  * send only one ack */
1958  if (!vec_len (tc->snd_sacks))
1959  {
1960  tcp_send_ack (tc);
1961  tc->dupacks_out += 1;
1962  tc->pending_dupacks = 0;
1963  return 1;
1964  }
1965 
1966  /* Start with first sack block */
1967  tc->snd_sack_pos = 0;
1968 
1969  /* Generate enough dupacks to cover all sack blocks. Do not generate
1970  * more sacks than the number of packets received. But do generate at
1971  * least 3, i.e., the number needed to signal congestion, if needed. */
1972  n_acks = vec_len (tc->snd_sacks) / TCP_OPTS_MAX_SACK_BLOCKS;
1973  n_acks = clib_min (n_acks, tc->pending_dupacks);
1974  n_acks = clib_max (n_acks, clib_min (tc->pending_dupacks, 3));
1975  for (j = 0; j < clib_min (n_acks, max_burst_size); j++)
1976  tcp_send_ack (tc);
1977 
1978  if (n_acks < max_burst_size)
1979  {
1980  tc->pending_dupacks = 0;
1981  tc->snd_sack_pos = 0;
1982  tc->dupacks_out += n_acks;
1983  return n_acks;
1984  }
1985  else
1986  {
1987  TCP_DBG ("constrained by burst size");
1988  tc->pending_dupacks = n_acks - max_burst_size;
1989  tc->dupacks_out += max_burst_size;
1990  tcp_program_dupack (tc);
1991  return max_burst_size;
1992  }
1993 }
1994 
1995 static int
1997 {
1999  u32 n_segs;
2000 
2001  if (PREDICT_FALSE (tc->state == TCP_STATE_CLOSED))
2002  return 0;
2003 
2004  wrk = tcp_get_worker (tc->c_thread_index);
2005 
2006  if (tcp_opts_sack_permitted (&tc->rcv_opts))
2007  n_segs = tcp_retransmit_sack (wrk, tc, max_burst_size);
2008  else
2009  n_segs = tcp_retransmit_no_sack (wrk, tc, max_burst_size);
2010 
2011  return n_segs;
2012 }
2013 
2014 int
2016 {
2017  tcp_connection_t *tc = (tcp_connection_t *) conn;
2018  u32 n_segs = 0;
2019 
2020  if (tcp_in_cong_recovery (tc) && (tc->flags & TCP_CONN_RXT_PENDING))
2021  {
2022  tc->flags &= ~TCP_CONN_RXT_PENDING;
2023  n_segs = tcp_do_retransmit (tc, sp->max_burst_size);
2024  }
2025 
2026  if (!(tc->flags & TCP_CONN_SNDACK))
2027  return n_segs;
2028 
2029  tc->flags &= ~TCP_CONN_SNDACK;
2030 
2031  /* We have retransmitted packets and no dupack */
2032  if (n_segs && !tc->pending_dupacks)
2033  return n_segs;
2034 
2035  if (sp->max_burst_size <= n_segs)
2036  {
2037  tcp_program_ack (tc);
2038  return n_segs;
2039  }
2040 
2041  n_segs += tcp_send_acks (tc, sp->max_burst_size - n_segs);
2042 
2043  return n_segs;
2044 }
2045 #endif /* CLIB_MARCH_VARIANT */
2046 
2047 static void
2049  u16 * next0, u32 * error0)
2050 {
2051  ip_adjacency_t *adj;
2052  adj_index_t ai;
2053 
2054  /* Not thread safe but as long as the connection exists the adj should
2055  * not be removed */
2056  ai = adj_nbr_find (FIB_PROTOCOL_IP6, VNET_LINK_IP6, &tc0->c_rmt_ip,
2057  tc0->sw_if_index);
2058  if (ai == ADJ_INDEX_INVALID)
2059  {
2060  vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
2061  *next0 = TCP_OUTPUT_NEXT_DROP;
2062  *error0 = TCP_ERROR_LINK_LOCAL_RW;
2063  return;
2064  }
2065 
2066  adj = adj_get (ai);
2068  *next0 = TCP_OUTPUT_NEXT_IP_REWRITE;
2069  else if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP)
2070  *next0 = TCP_OUTPUT_NEXT_IP_ARP;
2071  else
2072  {
2073  *next0 = TCP_OUTPUT_NEXT_DROP;
2074  *error0 = TCP_ERROR_LINK_LOCAL_RW;
2075  }
2076  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ai;
2077 }
2078 
2079 static void
2081  u32 * to_next, u32 n_bufs)
2082 {
2083  tcp_connection_t *tc;
2084  tcp_tx_trace_t *t;
2085  vlib_buffer_t *b;
2086  tcp_header_t *th;
2087  int i;
2088 
2089  for (i = 0; i < n_bufs; i++)
2090  {
2091  b = vlib_get_buffer (vm, to_next[i]);
2092  if (!(b->flags & VLIB_BUFFER_IS_TRACED))
2093  continue;
2094  th = vlib_buffer_get_current (b);
2095  tc = tcp_connection_get (vnet_buffer (b)->tcp.connection_index,
2096  vm->thread_index);
2097  t = vlib_add_trace (vm, node, b, sizeof (*t));
2098  clib_memcpy_fast (&t->tcp_header, th, sizeof (t->tcp_header));
2099  clib_memcpy_fast (&t->tcp_connection, tc, sizeof (t->tcp_connection));
2100  }
2101 }
2102 
2103 always_inline void
2105  tcp_connection_t * tc0, u8 is_ip4)
2106 {
2107  TCP_EVT (TCP_EVT_OUTPUT, tc0,
2109  b0->current_length);
2110 
2111  if (is_ip4)
2112  vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, &tc0->c_rmt_ip4,
2113  IP_PROTOCOL_TCP, tcp_csum_offload (tc0));
2114  else
2115  vlib_buffer_push_ip6_custom (vm, b0, &tc0->c_lcl_ip6, &tc0->c_rmt_ip6,
2116  IP_PROTOCOL_TCP, tc0->ipv6_flow_label);
2117 }
2118 
2119 always_inline void
2121 {
2122  if (PREDICT_TRUE (!(tc->cfg_flags & TCP_CFG_F_TSO)))
2123  return;
2124 
2125  u16 data_len = b->current_length - sizeof (tcp_header_t) - tc->snd_opts_len;
2126 
2127  if (PREDICT_FALSE (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID))
2129 
2130  if (PREDICT_TRUE (data_len <= tc->snd_mss))
2131  return;
2132  else
2133  {
2134  ASSERT ((b->flags & VNET_BUFFER_F_L3_HDR_OFFSET_VALID) != 0);
2135  ASSERT ((b->flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID) != 0);
2136  b->flags |= VNET_BUFFER_F_GSO;
2137  vnet_buffer2 (b)->gso_l4_hdr_sz =
2138  sizeof (tcp_header_t) + tc->snd_opts_len;
2139  vnet_buffer2 (b)->gso_size = tc->snd_mss;
2140  }
2141 }
2142 
2143 always_inline void
2145  vlib_node_runtime_t * error_node, u16 * next0,
2146  u8 is_ip4)
2147 {
2148  /* If next_index is not drop use it */
2149  if (tc0->next_node_index)
2150  {
2151  *next0 = tc0->next_node_index;
2152  vnet_buffer (b0)->tcp.next_node_opaque = tc0->next_node_opaque;
2153  }
2154  else
2155  {
2156  *next0 = TCP_OUTPUT_NEXT_IP_LOOKUP;
2157  }
2158 
2159  vnet_buffer (b0)->sw_if_index[VLIB_TX] = tc0->c_fib_index;
2160  vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
2161 
2162  if (!is_ip4)
2163  {
2164  u32 error0 = 0;
2165 
2166  if (PREDICT_FALSE (ip6_address_is_link_local_unicast (&tc0->c_rmt_ip6)))
2167  tcp_output_handle_link_local (tc0, b0, next0, &error0);
2168 
2169  if (PREDICT_FALSE (error0))
2170  {
2171  b0->error = error_node->errors[error0];
2172  return;
2173  }
2174  }
2175 
2176  tc0->segs_out += 1;
2177 }
2178 
2181  vlib_frame_t * frame, int is_ip4)
2182 {
2186 
2188  n_left_from = frame->n_vectors;
2190 
2191  if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
2193 
2195  b = bufs;
2196  next = nexts;
2197 
2198  while (n_left_from >= 4)
2199  {
2200  tcp_connection_t *tc0, *tc1;
2201 
2202  {
2203  vlib_prefetch_buffer_header (b[2], STORE);
2204  CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
2205 
2206  vlib_prefetch_buffer_header (b[3], STORE);
2207  CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
2208  }
2209 
2210  tc0 = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
2211  thread_index);
2212  tc1 = tcp_connection_get (vnet_buffer (b[1])->tcp.connection_index,
2213  thread_index);
2214 
2215  if (PREDICT_TRUE (!tc0 + !tc1 == 0))
2216  {
2217  tcp_output_push_ip (vm, b[0], tc0, is_ip4);
2218  tcp_output_push_ip (vm, b[1], tc1, is_ip4);
2219 
2220  tcp_check_if_gso (tc0, b[0]);
2221  tcp_check_if_gso (tc1, b[1]);
2222 
2223  tcp_output_handle_packet (tc0, b[0], node, &next[0], is_ip4);
2224  tcp_output_handle_packet (tc1, b[1], node, &next[1], is_ip4);
2225  }
2226  else
2227  {
2228  if (tc0 != 0)
2229  {
2230  tcp_output_push_ip (vm, b[0], tc0, is_ip4);
2231  tcp_check_if_gso (tc0, b[0]);
2232  tcp_output_handle_packet (tc0, b[0], node, &next[0], is_ip4);
2233  }
2234  else
2235  {
2236  b[0]->error = node->errors[TCP_ERROR_INVALID_CONNECTION];
2238  }
2239  if (tc1 != 0)
2240  {
2241  tcp_output_push_ip (vm, b[1], tc1, is_ip4);
2242  tcp_check_if_gso (tc1, b[1]);
2243  tcp_output_handle_packet (tc1, b[1], node, &next[1], is_ip4);
2244  }
2245  else
2246  {
2247  b[1]->error = node->errors[TCP_ERROR_INVALID_CONNECTION];
2249  }
2250  }
2251 
2252  b += 2;
2253  next += 2;
2254  n_left_from -= 2;
2255  }
2256  while (n_left_from > 0)
2257  {
2258  tcp_connection_t *tc0;
2259 
2260  if (n_left_from > 1)
2261  {
2262  vlib_prefetch_buffer_header (b[1], STORE);
2263  CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
2264  }
2265 
2266  tc0 = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
2267  thread_index);
2268 
2269  if (PREDICT_TRUE (tc0 != 0))
2270  {
2271  tcp_output_push_ip (vm, b[0], tc0, is_ip4);
2272  tcp_check_if_gso (tc0, b[0]);
2273  tcp_output_handle_packet (tc0, b[0], node, &next[0], is_ip4);
2274  }
2275  else
2276  {
2277  b[0]->error = node->errors[TCP_ERROR_INVALID_CONNECTION];
2279  }
2280 
2281  b += 1;
2282  next += 1;
2283  n_left_from -= 1;
2284  }
2285 
2287  vlib_node_increment_counter (vm, tcp_node_index (output, is_ip4),
2288  TCP_ERROR_PKTS_SENT, frame->n_vectors);
2289  return frame->n_vectors;
2290 }
2291 
2294 {
2295  return tcp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */ );
2296 }
2297 
2300 {
2301  return tcp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ );
2302 }
2303 
2304 /* *INDENT-OFF* */
2306 {
2307  .name = "tcp4-output",
2308  /* Takes a vector of packets. */
2309  .vector_size = sizeof (u32),
2310  .n_errors = TCP_N_ERROR,
2311  .protocol_hint = VLIB_NODE_PROTO_HINT_TCP,
2312  .error_strings = tcp_error_strings,
2313  .n_next_nodes = TCP_OUTPUT_N_NEXT,
2314  .next_nodes = {
2315 #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
2317 #undef _
2318  },
2319  .format_buffer = format_tcp_header,
2320  .format_trace = format_tcp_tx_trace,
2321 };
2322 /* *INDENT-ON* */
2323 
2324 /* *INDENT-OFF* */
2326 {
2327  .name = "tcp6-output",
2328  /* Takes a vector of packets. */
2329  .vector_size = sizeof (u32),
2330  .n_errors = TCP_N_ERROR,
2331  .protocol_hint = VLIB_NODE_PROTO_HINT_TCP,
2332  .error_strings = tcp_error_strings,
2333  .n_next_nodes = TCP_OUTPUT_N_NEXT,
2334  .next_nodes = {
2335 #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
2337 #undef _
2338  },
2339  .format_buffer = format_tcp_header,
2340  .format_trace = format_tcp_tx_trace,
2341 };
2342 /* *INDENT-ON* */
2343 
2344 typedef enum _tcp_reset_next
2345 {
2350 
2351 #define foreach_tcp4_reset_next \
2352  _(DROP, "error-drop") \
2353  _(IP_LOOKUP, "ip4-lookup")
2354 
2355 #define foreach_tcp6_reset_next \
2356  _(DROP, "error-drop") \
2357  _(IP_LOOKUP, "ip6-lookup")
2358 
2359 static uword
2361  vlib_frame_t * from_frame, u8 is_ip4)
2362 {
2363  u32 error0 = TCP_ERROR_RST_SENT, next0 = TCP_RESET_NEXT_IP_LOOKUP;
2364  u32 n_left_from, next_index, *from, *to_next;
2365 
2367  n_left_from = from_frame->n_vectors;
2368 
2369  next_index = node->cached_next_index;
2370 
2371  while (n_left_from > 0)
2372  {
2373  u32 n_left_to_next;
2374 
2375  vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2376 
2377  while (n_left_from > 0 && n_left_to_next > 0)
2378  {
2379  vlib_buffer_t *b0;
2380  tcp_tx_trace_t *t0;
2381  tcp_header_t *th0;
2382  u32 bi0;
2383 
2384  bi0 = from[0];
2385  to_next[0] = bi0;
2386  from += 1;
2387  to_next += 1;
2388  n_left_from -= 1;
2389  n_left_to_next -= 1;
2390 
2391  b0 = vlib_get_buffer (vm, bi0);
2392  tcp_make_reset_in_place (vm, b0, is_ip4);
2393 
2394  /* Prepare to send to IP lookup */
2395  vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
2396 
2397  b0->error = node->errors[error0];
2398  b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
2399  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
2400  {
2401  th0 = vlib_buffer_get_current (b0);
2402  if (is_ip4)
2403  th0 = ip4_next_header ((ip4_header_t *) th0);
2404  else
2405  th0 = ip6_next_header ((ip6_header_t *) th0);
2406  t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
2407  clib_memcpy_fast (&t0->tcp_header, th0,
2408  sizeof (t0->tcp_header));
2409  }
2410 
2412  n_left_to_next, bi0, next0);
2413  }
2414  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2415  }
2416  return from_frame->n_vectors;
2417 }
2418 
2421 {
2422  return tcp46_send_reset_inline (vm, node, from_frame, 1);
2423 }
2424 
2427 {
2428  return tcp46_send_reset_inline (vm, node, from_frame, 0);
2429 }
2430 
2431 /* *INDENT-OFF* */
2433  .name = "tcp4-reset",
2434  .vector_size = sizeof (u32),
2435  .n_errors = TCP_N_ERROR,
2436  .error_strings = tcp_error_strings,
2437  .n_next_nodes = TCP_RESET_N_NEXT,
2438  .next_nodes = {
2439 #define _(s,n) [TCP_RESET_NEXT_##s] = n,
2441 #undef _
2442  },
2443  .format_trace = format_tcp_tx_trace,
2444 };
2445 /* *INDENT-ON* */
2446 
2447 /* *INDENT-OFF* */
2449  .name = "tcp6-reset",
2450  .vector_size = sizeof (u32),
2451  .n_errors = TCP_N_ERROR,
2452  .error_strings = tcp_error_strings,
2453  .n_next_nodes = TCP_RESET_N_NEXT,
2454  .next_nodes = {
2455 #define _(s,n) [TCP_RESET_NEXT_##s] = n,
2457 #undef _
2458  },
2459  .format_trace = format_tcp_tx_trace,
2460 };
2461 /* *INDENT-ON* */
2462 
2463 /*
2464  * fd.io coding-style-patch-verification: ON
2465  *
2466  * Local Variables:
2467  * eval: (c-set-style "gnu")
2468  * End:
2469  */
tmp
u32 * tmp
Definition: interface_output.c:1078
vlib_buffer_t::next_buffer
u32 next_buffer
Next buffer for this linked-list of buffers.
Definition: buffer.h:149
seq_gt
#define seq_gt(_s1, _s2)
Definition: tcp_packet.h:180
tcp4_reset_node
vlib_node_registration_t tcp4_reset_node
(constructor) VLIB_REGISTER_NODE (tcp4_reset_node)
Definition: tcp_output.c:2432
ip6_address_is_link_local_unicast
static uword ip6_address_is_link_local_unicast(const ip6_address_t *a)
Definition: ip6_packet.h:253
vlib_buffer_free
static void vlib_buffer_free(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Free buffers Frees the entire buffer chain for each buffer.
Definition: buffer_funcs.h:982
tcp_options_t::tsval
u32 tsval
Timestamp value.
Definition: tcp_packet.h:146
vlib_num_workers
static u32 vlib_num_workers()
Definition: threads.h:354
tcp_connection_cleanup
void tcp_connection_cleanup(tcp_connection_t *tc)
Cleans up connection state.
Definition: tcp.c:240
tcp6_output_node
vlib_node_registration_t tcp6_output_node
(constructor) VLIB_REGISTER_NODE (tcp6_output_node)
Definition: tcp_output.c:2325
seq_geq
#define seq_geq(_s1, _s2)
Definition: tcp_packet.h:181
TCP_RESET_NEXT_DROP
@ TCP_RESET_NEXT_DROP
Definition: tcp_output.c:2346
IP_LOOKUP_NEXT_ARP
@ IP_LOOKUP_NEXT_ARP
This packet matches an "incomplete adjacency" and packets need to be passed to ARP to find rewrite st...
Definition: adj.h:63
TCP_FLAG_RST
#define TCP_FLAG_RST
Definition: fa_node.h:14
thread_index
u32 thread_index
Definition: nat44_ei_hairpinning.c:492
bufs
vlib_buffer_t * bufs[VLIB_FRAME_SIZE]
Definition: nat44_ei_out2in.c:717
tcp_check_sack_reneging
static void tcp_check_sack_reneging(tcp_connection_t *tc)
Definition: tcp_output.c:1265
tcp_opts_tstamp
#define tcp_opts_tstamp(_to)
Definition: tcp_packet.h:156
TCP_FLAG_FIN
#define TCP_FLAG_FIN
Definition: fa_node.h:12
dst_port
vl_api_ip_port_and_mask_t dst_port
Definition: flow_types.api:92
vlib_prefetch_buffer_header
#define vlib_prefetch_buffer_header(b, type)
Prefetch buffer metadata.
Definition: buffer.h:231
frame
vlib_main_t vlib_node_runtime_t vlib_frame_t * frame
Definition: nat44_ei.c:3048
tcp_options_t::wscale
u8 wscale
Window scale advertised.
Definition: tcp_packet.h:150
TCP_USE_SACKS
#define TCP_USE_SACKS
Disable only for testing.
Definition: tcp_types.h:40
tcp_enqueue_to_output
static void tcp_enqueue_to_output(tcp_worker_ctx_t *wrk, vlib_buffer_t *b, u32 bi, u8 is_ip4)
Definition: tcp_output.c:565
tcp_send_reset
void tcp_send_reset(tcp_connection_t *tc)
Build and set reset packet for connection.
Definition: tcp_output.c:748
VLIB_BUFFER_TRACE_TRAJECTORY_INIT
#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b)
Definition: buffer.h:192
tcp_output_next_t
enum _tcp_output_next tcp_output_next_t
ip6_tcp_udp_icmp_compute_checksum
u16 ip6_tcp_udp_icmp_compute_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip6_header_t *ip0, int *bogus_lengthp)
Definition: ip6_forward.c:1098
tcp_inlines.h
TCP_RESET_N_NEXT
@ TCP_RESET_N_NEXT
Definition: tcp_output.c:2348
format_tcp_state
format_function_t format_tcp_state
Definition: tcp.h:340
next_index
nat44_ei_hairpin_src_next_t next_index
Definition: nat44_ei_hairpinning.c:412
tcp_make_syn
void tcp_make_syn(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to SYN.
Definition: tcp_output.c:497
TCP_OPTS_MAX_SACK_BLOCKS
#define TCP_OPTS_MAX_SACK_BLOCKS
Definition: tcp_packet.h:174
tcp_cc_init_rxt_timeout
static void tcp_cc_init_rxt_timeout(tcp_connection_t *tc)
Reset congestion control, switch cwnd to loss window and try again.
Definition: tcp_output.c:1281
clib_max
#define clib_max(x, y)
Definition: clib.h:335
tcp_ack
#define tcp_ack(_th)
Definition: tcp_packet.h:83
TCP_OPTION_LEN_WINDOW_SCALE
#define TCP_OPTION_LEN_WINDOW_SCALE
Definition: tcp_packet.h:165
format_tcp_tx_trace
static u8 * format_tcp_tx_trace(u8 *s, va_list *args)
Definition: tcp_output.c:56
session_transport_closing_notify
void session_transport_closing_notify(transport_connection_t *tc)
Notification from transport that connection is being closed.
Definition: session.c:1062
TCP_RTO_MAX
#define TCP_RTO_MAX
Definition: tcp_types.h:85
vlib_get_buffer
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
Definition: buffer_funcs.h:111
tcp_retransmit_timer_set
static void tcp_retransmit_timer_set(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
Definition: tcp_timer.h:55
ADJ_INDEX_INVALID
#define ADJ_INDEX_INVALID
Invalid ADJ index - used when no adj is known likewise blazoned capitals INVALID speak volumes where ...
Definition: adj_types.h:36
tcp_buffer_len
static u32 tcp_buffer_len(vlib_buffer_t *b)
Definition: tcp_output.c:985
tcp46_send_reset_inline
static uword tcp46_send_reset_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame, u8 is_ip4)
Definition: tcp_output.c:2360
ip4_inlines.h
format_tcp_header
format_function_t format_tcp_header
Definition: format.h:100
tcp_header_t
struct _tcp_header tcp_header_t
vlib_buffer_push_ip6
static void * vlib_buffer_push_ip6(vlib_main_t *vm, vlib_buffer_t *b, ip6_address_t *src, ip6_address_t *dst, int proto)
Push IPv6 header to buffer.
Definition: ip6_inlines.h:255
vlib_get_buffers
vlib_get_buffers(vm, from, b, n_left_from)
next
u16 * next
Definition: nat44_ei_out2in.c:718
VLIB_FRAME_SIZE
#define VLIB_FRAME_SIZE
Definition: node.h:368
scoreboard_get_hole
static sack_scoreboard_hole_t * scoreboard_get_hole(sack_scoreboard_t *sb, u32 index)
Definition: tcp_sack.h:35
node
vlib_main_t vlib_node_runtime_t * node
Definition: nat44_ei.c:3047
vnet_buffer_offload_flags_set
static_always_inline void vnet_buffer_offload_flags_set(vlib_buffer_t *b, vnet_buffer_oflags_t oflags)
Definition: buffer.h:522
tcp_connection_t
struct _tcp_connection tcp_connection_t
tcp_timer_retransmit_syn_handler
void tcp_timer_retransmit_syn_handler(tcp_connection_t *tc)
SYN retransmit timer handler.
Definition: tcp_output.c:1450
TCP_DBG
#define TCP_DBG(_fmt, _args...)
Definition: tcp_debug.h:146
ip4_address_t::as_u32
u32 as_u32
Definition: ip4_packet.h:57
tcp_zero_rwnd_sent_on
#define tcp_zero_rwnd_sent_on(tc)
Definition: tcp_types.h:438
wrk
session_worker_t * wrk
Definition: application.c:490
vlib_buffer_push_ip4
static void * vlib_buffer_push_ip4(vlib_main_t *vm, vlib_buffer_t *b, ip4_address_t *src, ip4_address_t *dst, int proto, u8 csum_offload)
Push IPv4 header to buffer.
Definition: ip4_inlines.h:150
tcp_session_custom_tx
int tcp_session_custom_tx(void *conn, transport_send_params_t *sp)
Definition: tcp_output.c:2015
vnet_get_tcp_main
static tcp_main_t * vnet_get_tcp_main()
Definition: tcp.h:277
session_add_pending_tx_buffer
static void session_add_pending_tx_buffer(u32 thread_index, u32 bi, u32 next_node)
Add session node pending buffer with custom node.
Definition: session.h:746
tcp_csum_offload
#define tcp_csum_offload(tc)
Definition: tcp_types.h:435
tcp_make_synack
static void tcp_make_synack(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to SYN-ACK.
Definition: tcp_output.c:523
tcp_get_worker
static tcp_worker_ctx_t * tcp_get_worker(u32 thread_index)
Definition: tcp.h:283
u16
unsigned short u16
Definition: types.h:57
TCP_DUPACK_THRESHOLD
#define TCP_DUPACK_THRESHOLD
Definition: tcp_types.h:37
tcp_options_t::sacks
sack_block_t * sacks
SACK blocks.
Definition: tcp_packet.h:145
vm
vlib_main_t * vm
X-connect all packets from the HOST to the PHY.
Definition: nat44_ei.c:3047
tcp_timer_update
static void tcp_timer_update(tcp_timer_wheel_t *tw, tcp_connection_t *tc, u8 timer_id, u32 interval)
Definition: tcp_timer.h:43
transport_connection_reschedule
void transport_connection_reschedule(transport_connection_t *tc)
Definition: transport.c:790
tcp_time_tstamp
static u32 tcp_time_tstamp(u32 thread_index)
Time used to generate timestamps, not the timestamp.
Definition: tcp_inlines.h:192
VLIB_RX
@ VLIB_RX
Definition: defs.h:46
state
vl_api_dhcp_client_state_t state
Definition: dhcp.api:201
session_transport_closed_notify
void session_transport_closed_notify(transport_connection_t *tc)
Notification from transport that it is closed.
Definition: session.c:1150
session_queue_run_on_main_thread
void session_queue_run_on_main_thread(vlib_main_t *vm)
Definition: session.c:1787
tcp_cc_congestion
static void tcp_cc_congestion(tcp_connection_t *tc)
Definition: tcp_cc.h:36
from_frame
vlib_main_t vlib_node_runtime_t vlib_frame_t * from_frame
Definition: esp_encrypt.c:1328
vlib_buffer_enqueue_to_next
vlib_buffer_enqueue_to_next(vm, node, from,(u16 *) nexts, frame->n_vectors)
tcp_fastrecovery_prr_snd_space
int tcp_fastrecovery_prr_snd_space(tcp_connection_t *tc)
Estimate send space using proportional rate reduction (RFC6937)
Definition: tcp_output.c:1655
tcp_options_t::tsecr
u32 tsecr
Echoed/reflected time stamp.
Definition: tcp_packet.h:147
vnet_buffer2
#define vnet_buffer2(b)
Definition: buffer.h:499
tcp_options_t::mss
u16 mss
Maximum segment size advertised.
Definition: tcp_packet.h:148
tcp_options_t::n_sack_blocks
u8 n_sack_blocks
Number of SACKs blocks.
Definition: tcp_packet.h:151
tcp_reuse_buffer
static void * tcp_reuse_buffer(vlib_main_t *vm, vlib_buffer_t *b)
Definition: tcp_output.c:338
transport_connection_t
struct _transport_connection transport_connection_t
transport_connection_tx_pacer_update_bytes
void transport_connection_tx_pacer_update_bytes(transport_connection_t *tc, u32 bytes)
Definition: transport.c:777
TCP_TO_TIMER_TICK
#define TCP_TO_TIMER_TICK
Factor for converting ticks to timer ticks.
Definition: tcp_types.h:82
ip6_next_header
static void * ip6_next_header(ip6_header_t *i)
Definition: ip6_packet.h:407
vlib_frame_t
Definition: node.h:372
ip_calculate_l4_checksum
static u16 ip_calculate_l4_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip_csum_t sum0, u32 payload_length, u8 *iph, u32 ip_header_size, u8 *l4h)
Definition: ip.h:184
vlib_buffer_length_in_chain
static uword vlib_buffer_length_in_chain(vlib_main_t *vm, vlib_buffer_t *b)
Get length in bytes of the buffer chain.
Definition: buffer_funcs.h:433
tcp_reset_next_t
enum _tcp_reset_next tcp_reset_next_t
clib_memcpy_fast
static_always_inline void * clib_memcpy_fast(void *restrict dst, const void *restrict src, size_t n)
Definition: string.h:92
tcp46_output_trace_frame
static void tcp46_output_trace_frame(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *to_next, u32 n_bufs)
Definition: tcp_output.c:2080
ip4_header_t
Definition: ip4_packet.h:87
tcp_error_strings
static char * tcp_error_strings[]
Definition: tcp_output.c:43
transport_connection_tx_pacer_burst
u32 transport_connection_tx_pacer_burst(transport_connection_t *tc)
Get tx pacer max burst.
Definition: transport.c:757
tcp_retransmit_first_unacked
int tcp_retransmit_first_unacked(tcp_worker_ctx_t *wrk, tcp_connection_t *tc)
Retransmit first unacked segment.
Definition: tcp_output.c:1597
foreach_tcp6_output_next
#define foreach_tcp6_output_next
Definition: tcp_output.c:37
tcp_main_t
struct _tcp_main tcp_main_t
TCP_FLAG_SYN
#define TCP_FLAG_SYN
Definition: fa_node.h:13
i32
signed int i32
Definition: types.h:77
tcp_make_ack
static void tcp_make_ack(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to ACK.
Definition: tcp_output.c:477
tcp_options_t
Definition: tcp_packet.h:143
tcp_options_t::flags
u8 flags
Option flags, see above.
Definition: tcp_packet.h:149
tcp_make_synack_options
static int tcp_make_synack_options(tcp_connection_t *tc, tcp_options_t *opts)
Definition: tcp_output.c:195
tcp_max_tx_deq
static u8 tcp_max_tx_deq(tcp_connection_t *tc)
Definition: tcp_output.c:1692
tcp_make_fin
static void tcp_make_fin(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to FIN-ACK.
Definition: tcp_output.c:488
tcp_state_t
enum _tcp_state tcp_state_t
tcp_update_rto
static void tcp_update_rto(tcp_connection_t *tc)
Definition: tcp_inlines.h:381
CLIB_PREFETCH
#define CLIB_PREFETCH(addr, size, type)
Definition: cache.h:80
tcp_timer_retransmit_handler
void tcp_timer_retransmit_handler(tcp_connection_t *tc)
Definition: tcp_output.c:1304
vlib_buffer_t::current_data
i16 current_data
signed offset in data[], pre_data[] that we are currently processing.
Definition: buffer.h:119
tcp_session_push_header
u32 tcp_session_push_header(transport_connection_t *tconn, vlib_buffer_t *b)
Definition: tcp_output.c:994
vlib_node_runtime_t::errors
vlib_error_t * errors
Vector of errors for this node.
Definition: node.h:460
tcp_update_burst_snd_vars
void tcp_update_burst_snd_vars(tcp_connection_t *tc)
Update burst send vars.
Definition: tcp_output.c:300
seq_leq
#define seq_leq(_s1, _s2)
Definition: tcp_packet.h:179
vec_len
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
Definition: vec_bootstrap.h:142
vlib_buffer_t::error
vlib_error_t error
Error code for buffers to be enqueued to error handler.
Definition: buffer.h:145
tcp_init_buffer
static void * tcp_init_buffer(vlib_main_t *vm, vlib_buffer_t *b)
Definition: tcp_output.c:355
math.h
tcp_send_syn
void tcp_send_syn(tcp_connection_t *tc)
Send SYN.
Definition: tcp_output.c:807
transport_connection_tx_pacer_reset_bucket
void transport_connection_tx_pacer_reset_bucket(transport_connection_t *tc, u32 bucket)
Reset tx pacer bucket.
Definition: transport.c:732
len
u8 len
Definition: ip_types.api:103
foreach_tcp6_reset_next
#define foreach_tcp6_reset_next
Definition: tcp_output.c:2355
tcp_program_cleanup
void tcp_program_cleanup(tcp_worker_ctx_t *wrk, tcp_connection_t *tc)
Definition: tcp.c:333
tcp_main
tcp_main_t tcp_main
Definition: tcp.c:28
VLIB_NODE_FN
#define VLIB_NODE_FN(node)
Definition: node.h:202
tcp_cc_event
static void tcp_cc_event(tcp_connection_t *tc, tcp_cc_event_t evt)
Definition: tcp_cc.h:61
tcp_opts_wscale
#define tcp_opts_wscale(_to)
Definition: tcp_packet.h:157
TCP_OUTPUT_NEXT_IP_ARP
@ TCP_OUTPUT_NEXT_IP_ARP
Definition: tcp_output.c:27
ip_adjacency_t_::lookup_next_index
ip_lookup_next_t lookup_next_index
Next hop after ip4-lookup.
Definition: adj.h:337
vlib_buffer_alloc
static __clib_warn_unused_result u32 vlib_buffer_alloc(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Allocate buffers into supplied array.
Definition: buffer_funcs.h:708
tcp_validate_txf_size
#define tcp_validate_txf_size(_tc, _a)
Definition: tcp.h:347
CLIB_UNUSED
#define CLIB_UNUSED(x)
Definition: clib.h:90
tcp_make_ack_i
static void tcp_make_ack_i(tcp_connection_t *tc, vlib_buffer_t *b, tcp_state_t state, u8 flags)
Prepare ACK.
Definition: tcp_output.c:441
vnet_buffer
#define vnet_buffer(b)
Definition: buffer.h:437
tcp_timer_persist_handler
void tcp_timer_persist_handler(tcp_connection_t *tc)
Got 0 snd_wnd from peer, try to do something about it.
Definition: tcp_output.c:1514
tcp_output_handle_packet
static void tcp_output_handle_packet(tcp_connection_t *tc0, vlib_buffer_t *b0, vlib_node_runtime_t *error_node, u16 *next0, u8 is_ip4)
Definition: tcp_output.c:2144
tcp_in_recovery
#define tcp_in_recovery(tc)
Definition: tcp_types.h:416
VLIB_NODE_FLAG_TRACE
#define VLIB_NODE_FLAG_TRACE
Definition: node.h:291
tcp_timer_is_active
static u8 tcp_timer_is_active(tcp_connection_t *tc, tcp_timers_e timer)
Definition: tcp_timer.h:110
offset
struct clib_bihash_value offset
template key/value backing page structure
tcp_send_ack
void tcp_send_ack(tcp_connection_t *tc)
Definition: tcp_output.c:1021
tcp_recovery_on
#define tcp_recovery_on(tc)
Definition: tcp_types.h:413
sack_scoreboard_t
struct _sack_scoreboard sack_scoreboard_t
PREDICT_FALSE
#define PREDICT_FALSE(x)
Definition: clib.h:124
tcp46_output_inline
static uword tcp46_output_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, int is_ip4)
Definition: tcp_output.c:2180
ARRAY_LEN
#define ARRAY_LEN(x)
Definition: clib.h:70
vlib_get_buffer_index
static u32 vlib_get_buffer_index(vlib_main_t *vm, void *p)
Translate buffer pointer into buffer index.
Definition: buffer_funcs.h:324
vlib_frame_vector_args
static void * vlib_frame_vector_args(vlib_frame_t *f)
Get pointer to frame vector data.
Definition: node_funcs.h:301
ip4_tcp_udp_compute_checksum
u16 ip4_tcp_udp_compute_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip4_header_t *ip0)
Definition: pnat_test_stubs.h:59
format_tcp_connection_id
format_function_t format_tcp_connection_id
Definition: tcp.h:345
vec_validate_aligned
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
Definition: vec.h:534
TCP_OPTS_ALIGN
#define TCP_OPTS_ALIGN
Definition: tcp_packet.h:173
tcp4_output_node
vlib_node_registration_t tcp4_output_node
(constructor) VLIB_REGISTER_NODE (tcp4_output_node)
Definition: tcp_output.c:2305
tcp_make_syn_options
static int tcp_make_syn_options(tcp_connection_t *tc, tcp_options_t *opts)
Definition: tcp_output.c:166
uword
u64 uword
Definition: types.h:112
tcp_node_index
#define tcp_node_index(node_id, is_ip4)
Definition: tcp.h:273
if
if(node->flags &VLIB_NODE_FLAG_TRACE) vnet_interface_output_trace(vm
scoreboard_rescue_rxt_valid
#define scoreboard_rescue_rxt_valid(_sb, _tc)
Definition: tcp_output.c:1698
IP_LOOKUP_NEXT_REWRITE
@ IP_LOOKUP_NEXT_REWRITE
This packet is to be rewritten and forwarded to the next processing node.
Definition: adj.h:73
clib_mem_unaligned
#define clib_mem_unaligned(pointer, type)
Definition: types.h:155
transport_max_tx_dequeue
static u32 transport_max_tx_dequeue(transport_connection_t *tc)
Definition: session.h:543
tcp_compute_checksum
static u16 tcp_compute_checksum(tcp_connection_t *tc, vlib_buffer_t *b)
Definition: tcp_output.c:415
vlib_main_t::thread_index
u32 thread_index
Definition: main.h:213
tcp_connection_set_state
static void tcp_connection_set_state(tcp_connection_t *tc, tcp_state_t state)
Definition: tcp_inlines.h:51
vlib_node_increment_counter
static void vlib_node_increment_counter(vlib_main_t *vm, u32 node_index, u32 counter_index, u64 increment)
Definition: node_funcs.h:1244
tcp_is_syn
#define tcp_is_syn(_th)
Definition: tcp_packet.h:89
scoreboard_last_hole
static sack_scoreboard_hole_t * scoreboard_last_hole(sack_scoreboard_t *sb)
Definition: tcp_sack.h:67
tcp_connection_timers_reset
void tcp_connection_timers_reset(tcp_connection_t *tc)
Stop all connection timers.
Definition: tcp.c:514
tcp6_reset_node
vlib_node_registration_t tcp6_reset_node
(constructor) VLIB_REGISTER_NODE (tcp6_reset_node)
Definition: tcp_output.c:2448
tcp_bt_track_tx
void tcp_bt_track_tx(tcp_connection_t *tc, u32 len)
Track a tcp tx burst.
Definition: tcp_bt.c:301
src_port
vl_api_ip_port_and_mask_t src_port
Definition: flow_types.api:91
i
sll srl srl sll sra u16x4 i
Definition: vector_sse42.h:261
tcp_retransmit_timer_update
static void tcp_retransmit_timer_update(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
Definition: tcp_timer.h:96
f64
double f64
Definition: types.h:142
tcp_tx_trace_t::tcp_header
tcp_header_t tcp_header
Definition: tcp_output.c:51
ip6_header_t::dst_address
ip6_address_t dst_address
Definition: ip6_packet.h:310
foreach_tcp4_reset_next
#define foreach_tcp4_reset_next
Definition: tcp_output.c:2351
tcp_make_options
static int tcp_make_options(tcp_connection_t *tc, tcp_options_t *opts, tcp_state_t state)
Definition: tcp_output.c:265
vlib_buffer_push_tcp_net_order
static void * vlib_buffer_push_tcp_net_order(vlib_buffer_t *b, u16 sp, u16 dp, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
Definition: tcp_inlines.h:409
tcp_opts_sack_permitted
#define tcp_opts_sack_permitted(_to)
Definition: tcp_packet.h:159
tcp_tx_trace_t
Definition: tcp_output.c:49
tcp_options_write
static u32 tcp_options_write(u8 *data, tcp_options_t *opts)
Write TCP options to segment.
Definition: tcp_packet.h:309
src
vl_api_address_t src
Definition: gre.api:54
fib_protocol_t
enum fib_protocol_t_ fib_protocol_t
Protocol Type.
TCP_EVT
#define TCP_EVT(_evt, _args...)
Definition: tcp_debug.h:145
tcp_send_reset_w_pkt
void tcp_send_reset_w_pkt(tcp_connection_t *tc, vlib_buffer_t *pkt, u32 thread_index, u8 is_ip4)
Send reset without reusing existing buffer.
Definition: tcp_output.c:660
ip4_address_t
Definition: ip4_packet.h:50
ip_adjacency_t_
IP unicast adjacency.
Definition: adj.h:235
FIB_PROTOCOL_IP4
@ FIB_PROTOCOL_IP4
Definition: fib_types.h:36
clib_min
#define clib_min(x, y)
Definition: clib.h:342
CLIB_CACHE_LINE_BYTES
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:59
tcp_tstamp
static u32 tcp_tstamp(tcp_connection_t *tc)
Generate timestamp for tcp connection.
Definition: tcp_inlines.h:201
vlib_node_registration_t
struct _vlib_node_registration vlib_node_registration_t
ip6_inlines.h
tcp_cc_loss
static void tcp_cc_loss(tcp_connection_t *tc)
Definition: tcp_cc.h:42
ip4_header_t::dst_address
ip4_address_t dst_address
Definition: ip4_packet.h:125
vlib_buffer_t::current_length
u16 current_length
Nbytes between current data and the end of this buffer.
Definition: buffer.h:122
tcp_is_fin
#define tcp_is_fin(_th)
Definition: tcp_packet.h:90
TRANSPORT_MAX_HDRS_LEN
#define TRANSPORT_MAX_HDRS_LEN
Definition: transport_types.h:25
TCP_MAX_WND_SCALE
#define TCP_MAX_WND_SCALE
Definition: tcp_packet.h:172
TCP_RTO_SYN_RETRIES
#define TCP_RTO_SYN_RETRIES
Definition: tcp_types.h:88
TCP_RXT_MAX_BURST
#define TCP_RXT_MAX_BURST
Definition: tcp_types.h:35
tcp_time_now_us
static f64 tcp_time_now_us(u32 thread_index)
Definition: tcp_inlines.h:208
TCP_RTO_BOFF_MAX
#define TCP_RTO_BOFF_MAX
Definition: tcp_types.h:90
transport_send_params_
Definition: transport.h:45
data
u8 data[128]
Definition: ipsec_types.api:92
tcp_buffer_hdr
static tcp_header_t * tcp_buffer_hdr(vlib_buffer_t *b)
Definition: tcp_inlines.h:22
transport_rx_fifo_req_deq_ntf
static void transport_rx_fifo_req_deq_ntf(transport_connection_t *tc)
Definition: session.h:578
tcp_send_window_update_ack
void tcp_send_window_update_ack(tcp_connection_t *tc)
Send window update ack.
Definition: tcp_output.c:1080
vlib_validate_buffer_enqueue_x1
#define vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, bi0, next0)
Finish enqueueing one buffer forward in the graph.
Definition: buffer_node.h:224
always_inline
#define always_inline
Definition: rdma_mlx5dv.h:23
clib_bihash_value
template key/value backing page structure
Definition: bihash_doc.h:44
space
description No buffer space
Definition: ikev2.api:563
tcp_transmit_unsent
static int tcp_transmit_unsent(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Definition: tcp_output.c:1616
sack_scoreboard_hole_t
struct _sack_scoreboard_hole sack_scoreboard_hole_t
ip4_header_t::src_address
ip4_address_t src_address
Definition: ip4_packet.h:125
tcp_update_rcv_wnd
static void tcp_update_rcv_wnd(tcp_connection_t *tc)
Definition: tcp_output.c:116
scoreboard_next_rxt_hole
sack_scoreboard_hole_t * scoreboard_next_rxt_hole(sack_scoreboard_t *sb, sack_scoreboard_hole_t *start, u8 have_unsent, u8 *can_rescue, u8 *snd_limited)
Figure out the next hole to retransmit.
Definition: tcp_sack.c:194
tcp_bt_track_rxt
void tcp_bt_track_rxt(tcp_connection_t *tc, u32 start, u32 end)
Track a tcp retransmission.
Definition: tcp_bt.c:338
TCP_OPTION_LEN_TIMESTAMP
#define TCP_OPTION_LEN_TIMESTAMP
Definition: tcp_packet.h:167
tcp_program_ack
void tcp_program_ack(tcp_connection_t *tc)
Definition: tcp_output.c:1041
session_tx_fifo_peek_bytes
int session_tx_fifo_peek_bytes(transport_connection_t *tc, u8 *buffer, u32 offset, u32 max_bytes)
Definition: session.c:684
VLIB_NODE_PROTO_HINT_TCP
@ VLIB_NODE_PROTO_HINT_TCP
Definition: node.h:64
tcp_connection_tx_pacer_reset
void tcp_connection_tx_pacer_reset(tcp_connection_t *tc, u32 window, u32 start_bucket)
Definition: tcp.c:1374
format
description fragment has unexpected format
Definition: map.api:433
ASSERT
#define ASSERT(truth)
Definition: error_bootstrap.h:69
tcp_program_dupack
void tcp_program_dupack(tcp_connection_t *tc)
Definition: tcp_output.c:1051
TCP_OPTION_LEN_SACK_PERMITTED
#define TCP_OPTION_LEN_SACK_PERMITTED
Definition: tcp_packet.h:166
tcp_in_fastrecovery
#define tcp_in_fastrecovery(tc)
Definition: tcp_types.h:415
format_get_indent
static u32 format_get_indent(u8 *s)
Definition: format.h:72
data_len
u8 data_len
Definition: ikev2_types.api:24
seq_lt
#define seq_lt(_s1, _s2)
Definition: tcp_packet.h:178
vlib_put_next_frame
vlib_put_next_frame(vm, node, next_index, 0)
tcp_window_compute_scale
static u8 tcp_window_compute_scale(u32 window)
Definition: tcp_output.c:73
ip_csum_with_carry
static ip_csum_t ip_csum_with_carry(ip_csum_t sum, ip_csum_t x)
Definition: ip_packet.h:248
fib_table_get_index_for_sw_if_index
u32 fib_table_get_index_for_sw_if_index(fib_protocol_t proto, u32 sw_if_index)
Get the index of the FIB bound to the interface.
Definition: fib_table.c:998
u32
unsigned int u32
Definition: types.h:88
vlib_buffer_make_headroom
static void * vlib_buffer_make_headroom(vlib_buffer_t *b, u8 size)
Make head room, typically for packet headers.
Definition: buffer.h:378
tcp_make_established_options
static int tcp_make_established_options(tcp_connection_t *tc, tcp_options_t *opts)
Definition: tcp_output.c:230
tcp.h
tcp_send_acks
static int tcp_send_acks(tcp_connection_t *tc, u32 max_burst_size)
Definition: tcp_output.c:1941
foreach_tcp4_output_next
#define foreach_tcp4_output_next
Definition: tcp_output.c:31
tcp_zero_rwnd_sent
#define tcp_zero_rwnd_sent(tc)
Definition: tcp_types.h:437
scoreboard_init_rxt
void scoreboard_init_rxt(sack_scoreboard_t *sb, u32 snd_una)
Definition: tcp_sack.c:254
n_bytes
u32 n_bytes
Definition: interface_output.c:401
tcp_send_fin
void tcp_send_fin(tcp_connection_t *tc)
Send FIN.
Definition: tcp_output.c:872
tcp_connection_get
static tcp_connection_t * tcp_connection_get(u32 conn_index, u32 thread_index)
Definition: tcp_inlines.h:30
tcp_push_hdr_i
static void tcp_push_hdr_i(tcp_connection_t *tc, vlib_buffer_t *b, u32 snd_nxt, u8 compute_opts, u8 maybe_burst, u8 update_snd_nxt)
Push TCP header and update connection variables.
Definition: tcp_output.c:921
FIB_PROTOCOL_IP6
@ FIB_PROTOCOL_IP6
Definition: fib_types.h:37
dst
vl_api_ip4_address_t dst
Definition: pnat.api:41
session_worker_::vm
vlib_main_t * vm
Convenience pointer to this worker's vlib_main.
Definition: session.h:104
scoreboard_clear_reneging
void scoreboard_clear_reneging(sack_scoreboard_t *sb, u32 start, u32 end)
Definition: tcp_sack.c:297
tcp_zero_rwnd_sent_off
#define tcp_zero_rwnd_sent_off(tc)
Definition: tcp_types.h:439
tcp_in_cong_recovery
#define tcp_in_cong_recovery(tc)
Definition: tcp_types.h:425
TCP_N_ERROR
@ TCP_N_ERROR
Definition: tcp.h:40
tcp_retransmit_no_sack
static int tcp_retransmit_no_sack(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Fast retransmit without SACK info.
Definition: tcp_output.c:1860
tcp_fastrecovery_first
#define tcp_fastrecovery_first(tc)
Definition: tcp_types.h:421
pool_elts
static uword pool_elts(void *v)
Number of active elements in a pool.
Definition: pool.h:127
tcp_retransmit_should_retry_head
static u8 tcp_retransmit_should_retry_head(tcp_connection_t *tc, sack_scoreboard_t *sb)
Definition: tcp_output.c:1679
tcp_send_synack
void tcp_send_synack(tcp_connection_t *tc)
Definition: tcp_output.c:843
ip6_tcp_compute_checksum_custom
u16 ip6_tcp_compute_checksum_custom(vlib_main_t *vm, vlib_buffer_t *p0, ip46_address_t *src, ip46_address_t *dst)
Definition: tcp_output.c:369
tcp_window_to_advertise
static u32 tcp_window_to_advertise(tcp_connection_t *tc, tcp_state_t state)
Compute and return window to advertise, scaled as per RFC1323.
Definition: tcp_output.c:156
TRANSPORT_PACER_MIN_BURST
#define TRANSPORT_PACER_MIN_BURST
Definition: transport.h:23
ip6_header_t
Definition: ip6_packet.h:294
round_down_pow2
static uword round_down_pow2(uword x, uword pow2)
Definition: clib.h:273
tcp_program_retransmit
void tcp_program_retransmit(tcp_connection_t *tc)
Definition: tcp_output.c:1063
TCP_RESET_NEXT_IP_LOOKUP
@ TCP_RESET_NEXT_IP_LOOKUP
Definition: tcp_output.c:2347
tcp_update_time_now
static void tcp_update_time_now(tcp_worker_ctx_t *wrk)
Definition: tcp_inlines.h:224
tcp_retransmit_sack
static int tcp_retransmit_sack(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Do retransmit with SACKs.
Definition: tcp_output.c:1706
adj_index_t
u32 adj_index_t
An index for adjacencies.
Definition: adj_types.h:30
tcp_push_ip_hdr
static void tcp_push_ip_hdr(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, vlib_buffer_t *b)
Definition: tcp_output.c:784
ip6_header_t::src_address
ip6_address_t src_address
Definition: ip6_packet.h:310
adj_nbr_find
adj_index_t adj_nbr_find(fib_protocol_t nh_proto, vnet_link_t link_type, const ip46_address_t *nh_addr, u32 sw_if_index)
Lookup neighbor adjancency.
Definition: adj_nbr.c:109
clib_memset
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
vlib_main_t
Definition: main.h:102
tcp_prepare_retransmit_segment
static u32 tcp_prepare_retransmit_segment(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 offset, u32 max_deq_bytes, vlib_buffer_t **b)
Build a retransmit segment.
Definition: tcp_output.c:1222
ip4_header_t::ip_version_and_header_length
u8 ip_version_and_header_length
Definition: ip4_packet.h:93
vlib_node_t
Definition: node.h:247
vlib_add_trace
void * vlib_add_trace(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_buffer_t *b, u32 n_data_bytes)
Definition: trace.c:628
transport_send_params_::max_burst_size
u32 max_burst_size
Definition: transport.h:59
TCP_FLAG_ACK
#define TCP_FLAG_ACK
Definition: fa_node.h:16
TCP_WND_MAX
#define TCP_WND_MAX
Definition: tcp_packet.h:171
tcp_bt_check_app_limited
void tcp_bt_check_app_limited(tcp_connection_t *tc)
Check if sample to be generated is app limited.
Definition: tcp_bt.c:286
tcp_flight_size
static u32 tcp_flight_size(const tcp_connection_t *tc)
Our estimate of the number of bytes in flight (pipe size)
Definition: tcp_inlines.h:92
b
vlib_buffer_t ** b
Definition: nat44_ei_out2in.c:717
u8
unsigned char u8
Definition: types.h:56
transport_max_rx_enqueue
static u32 transport_max_rx_enqueue(transport_connection_t *tc)
Definition: session.h:536
VNET_LINK_IP6
@ VNET_LINK_IP6
Definition: interface.h:348
vlib_buffer_get_current
static void * vlib_buffer_get_current(vlib_buffer_t *b)
Get pointer to current data to process.
Definition: buffer.h:257
scoreboard_first_hole
static sack_scoreboard_hole_t * scoreboard_first_hole(sack_scoreboard_t *sb)
Definition: tcp_sack.h:59
ip_csum_t
uword ip_csum_t
Definition: ip_packet.h:245
tcp_cfg
#define tcp_cfg
Definition: tcp.h:272
clib_warning
#define clib_warning(format, args...)
Definition: error.h:59
tcp_fastrecovery_first_off
#define tcp_fastrecovery_first_off(tc)
Definition: tcp_types.h:423
TCP_OUTPUT_NEXT_IP_LOOKUP
@ TCP_OUTPUT_NEXT_IP_LOOKUP
Definition: tcp_output.c:25
nexts
u16 nexts[VLIB_FRAME_SIZE]
Definition: nat44_ei_out2in.c:718
tcp_prepare_segment
static int tcp_prepare_segment(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 offset, u32 max_deq_bytes, vlib_buffer_t **b)
Allocate a new buffer and build a new tcp segment.
Definition: tcp_output.c:1106
tcp_error.def
tcp_tx_trace_t::tcp_connection
tcp_connection_t tcp_connection
Definition: tcp_output.c:52
tcp_output_push_ip
static void tcp_output_push_ip(vlib_main_t *vm, vlib_buffer_t *b0, tcp_connection_t *tc0, u8 is_ip4)
Definition: tcp_output.c:2104
vlib_buffer_free_one
static void vlib_buffer_free_one(vlib_main_t *vm, u32 buffer_index)
Free one buffer Shorthand to free a single buffer chain.
Definition: buffer_funcs.h:1015
ip4_tcp_compute_checksum_custom
u16 ip4_tcp_compute_checksum_custom(vlib_main_t *vm, vlib_buffer_t *p0, ip46_address_t *src, ip46_address_t *dst)
Definition: tcp_output.c:395
TCP_ESTABLISH_TIME
#define TCP_ESTABLISH_TIME
Definition: tcp_types.h:91
tcp_make_reset_in_place
static int tcp_make_reset_in_place(vlib_main_t *vm, vlib_buffer_t *b, u8 is_ip4)
Definition: tcp_output.c:578
TCP_OPTION_LEN_MSS
#define TCP_OPTION_LEN_MSS
Definition: tcp_packet.h:164
tcp_do_retransmit
static int tcp_do_retransmit(tcp_connection_t *tc, u32 max_burst_size)
Definition: tcp_output.c:1996
vlib_node_runtime_t
Definition: node.h:454
tcp_persist_timer_set
static void tcp_persist_timer_set(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
Definition: tcp_timer.h:69
tcp_worker_ctx_
Definition: tcp.h:75
vlib_buffer_push_tcp
static void * vlib_buffer_push_tcp(vlib_buffer_t *b, u16 sp_net, u16 dp_net, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
Definition: tcp_inlines.h:446
from
from
Definition: nat44_ei_hairpinning.c:415
PREDICT_TRUE
#define PREDICT_TRUE(x)
Definition: clib.h:125
tcp_check_if_gso
static void tcp_check_if_gso(tcp_connection_t *tc, vlib_buffer_t *b)
Definition: tcp_output.c:2120
vlib_buffer_t::total_length_not_including_first_buffer
u32 total_length_not_including_first_buffer
Only valid for first buffer in chain.
Definition: buffer.h:176
sw_if_index
vl_api_interface_index_t sw_if_index
Definition: wireguard.api:34
vlib_get_next_frame
#define vlib_get_next_frame(vm, node, next_index, vectors, n_vectors_left)
Get pointer to next frame vector data by (vlib_node_runtime_t, next_index).
Definition: node_funcs.h:395
TCP_OUTPUT_NEXT_DROP
@ TCP_OUTPUT_NEXT_DROP
Definition: tcp_output.c:24
TCP_CC_EVT_START_TX
@ TCP_CC_EVT_START_TX
Definition: tcp_types.h:264
tcp_worker_stats_inc
#define tcp_worker_stats_inc(_wrk, _stat, _val)
Definition: tcp.h:128
VLIB_TX
@ VLIB_TX
Definition: defs.h:47
TCP_OPTION_LEN_SACK_BLOCK
#define TCP_OPTION_LEN_SACK_BLOCK
Definition: tcp_packet.h:168
adj_get
static ip_adjacency_t * adj_get(adj_index_t adj_index)
Get a pointer to an adjacency object from its index.
Definition: adj.h:470
n_left_from
n_left_from
Definition: nat44_ei_hairpinning.c:416
tcp_initial_window_to_advertise
u32 tcp_initial_window_to_advertise(tcp_connection_t *tc)
Compute initial window and scale factor.
Definition: tcp_output.c:104
ip6_header_t::ip_version_traffic_class_and_flow_label
u32 ip_version_traffic_class_and_flow_label
Definition: ip6_packet.h:297
tcp_output_handle_link_local
static void tcp_output_handle_link_local(tcp_connection_t *tc0, vlib_buffer_t *b0, u16 *next0, u32 *error0)
Definition: tcp_output.c:2048
tcp_enqueue_to_ip_lookup
static void tcp_enqueue_to_ip_lookup(tcp_worker_ctx_t *wrk, vlib_buffer_t *b, u32 bi, u8 is_ip4, u32 fib_index)
Definition: tcp_output.c:545
TCP_OUTPUT_NEXT_IP_REWRITE
@ TCP_OUTPUT_NEXT_IP_REWRITE
Definition: tcp_output.c:26
vlib_buffer_push_ip6_custom
static void * vlib_buffer_push_ip6_custom(vlib_main_t *vm, vlib_buffer_t *b, ip6_address_t *src, ip6_address_t *dst, int proto, u32 flow_label)
Push IPv6 header to buffer.
Definition: ip6_inlines.h:212
tcp_is_descheduled
static u8 tcp_is_descheduled(tcp_connection_t *tc)
Definition: tcp_inlines.h:388
format_white_space
u8 * format_white_space(u8 *s, va_list *va)
Definition: std-formats.c:129
ip4_next_header
static void * ip4_next_header(ip4_header_t *i)
Definition: ip4_packet.h:196
tcp_initial_wnd_unscaled
static u32 tcp_initial_wnd_unscaled(tcp_connection_t *tc)
TCP's initial window.
Definition: tcp_output.c:85
session_add_self_custom_tx_evt
void session_add_self_custom_tx_evt(transport_connection_t *tc, u8 has_prio)
Definition: session.c:128
vlib_buffer_t::flags
u32 flags
buffer flags: VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index,...
Definition: buffer.h:133
TCP_OUTPUT_N_NEXT
@ TCP_OUTPUT_N_NEXT
Definition: tcp_output.c:28
session_stream_connect_notify
int session_stream_connect_notify(transport_connection_t *tc, session_error_t err)
Definition: session.c:888
vlib_buffer_t
VLIB buffer representation.
Definition: buffer.h:111
VLIB_REGISTER_NODE
#define VLIB_REGISTER_NODE(x,...)
Definition: node.h:169
tcp_half_open_connection_cleanup
int tcp_half_open_connection_cleanup(tcp_connection_t *tc)
Try to cleanup half-open connection.
Definition: tcp.c:207
tcp_available_cc_snd_space
static u32 tcp_available_cc_snd_space(const tcp_connection_t *tc)
Estimate of how many bytes we can still push into the network.
Definition: tcp_inlines.h:169
flags
vl_api_wireguard_peer_flags_t flags
Definition: wireguard.api:105