FD.io VPP  v21.01.1
Vector Packet Processing
tcp_output.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2019 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <vnet/tcp/tcp.h>
17 #include <vnet/tcp/tcp_inlines.h>
18 #include <math.h>
19 #include <vnet/ip/ip4_inlines.h>
20 #include <vnet/ip/ip6_inlines.h>
21 
22 typedef enum _tcp_output_next
23 {
30 
31 #define foreach_tcp4_output_next \
32  _ (DROP, "error-drop") \
33  _ (IP_LOOKUP, "ip4-lookup") \
34  _ (IP_REWRITE, "ip4-rewrite") \
35  _ (IP_ARP, "ip4-arp")
36 
37 #define foreach_tcp6_output_next \
38  _ (DROP, "error-drop") \
39  _ (IP_LOOKUP, "ip6-lookup") \
40  _ (IP_REWRITE, "ip6-rewrite") \
41  _ (IP_ARP, "ip6-discover-neighbor")
42 
43 static char *tcp_error_strings[] = {
44 #define tcp_error(n,s) s,
45 #include <vnet/tcp/tcp_error.def>
46 #undef tcp_error
47 };
48 
49 typedef struct
50 {
54 
55 static u8 *
56 format_tcp_tx_trace (u8 * s, va_list * args)
57 {
58  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
59  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
60  tcp_tx_trace_t *t = va_arg (*args, tcp_tx_trace_t *);
62  u32 indent = format_get_indent (s);
63 
64  s = format (s, "%U state %U\n%U%U", format_tcp_connection_id, tc,
65  format_tcp_state, tc->state, format_white_space, indent,
66  format_tcp_header, &t->tcp_header, 128);
67 
68  return s;
69 }
70 
71 #ifndef CLIB_MARCH_VARIANT
72 static u8
74 {
75  u8 wnd_scale = 0;
76  while (wnd_scale < TCP_MAX_WND_SCALE && (window >> wnd_scale) > TCP_WND_MAX)
77  wnd_scale++;
78  return wnd_scale;
79 }
80 
81 /**
82  * TCP's initial window
83  */
86 {
87  /* RFC 6928 recommends the value lower. However at the time our connections
88  * are initialized, fifos may not be allocated. Therefore, advertise the
89  * smallest possible unscaled window size and update once fifos are
90  * assigned to the session.
91  */
92  /*
93  tcp_update_rcv_mss (tc);
94  TCP_IW_N_SEGMENTS * tc->mss;
95  */
96  return tcp_cfg.min_rx_fifo;
97 }
98 
99 /**
100  * Compute initial window and scale factor. As per RFC1323, window field in
101  * SYN and SYN-ACK segments is never scaled.
102  */
103 u32
105 {
106  /* Compute rcv wscale only if peer advertised support for it */
107  if (tc->state != TCP_STATE_SYN_RCVD || tcp_opts_wscale (&tc->rcv_opts))
108  tc->rcv_wscale = tcp_window_compute_scale (tcp_cfg.max_rx_fifo);
109 
110  tc->rcv_wnd = tcp_initial_wnd_unscaled (tc);
111 
112  return clib_min (tc->rcv_wnd, TCP_WND_MAX);
113 }
114 
115 static inline void
117 {
118  u32 available_space, wnd;
119  i32 observed_wnd;
120 
121  /*
122  * Figure out how much space we have available
123  */
124  available_space = transport_max_rx_enqueue (&tc->connection);
125 
126  /*
127  * Use the above and what we know about what we've previously advertised
128  * to compute the new window
129  */
130  observed_wnd = (i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
131 
132  /* Check if we are about to retract the window. Do the comparison before
133  * rounding to avoid errors. Per RFC7323 sec. 2.4 we could remove this */
134  if (PREDICT_FALSE ((i32) available_space < observed_wnd))
135  {
136  wnd = round_down_pow2 (clib_max (observed_wnd, 0), 1 << tc->rcv_wscale);
137  TCP_EVT (TCP_EVT_RCV_WND_SHRUNK, tc, observed_wnd, available_space);
138  }
139  else
140  {
141  /* Make sure we have a multiple of 1 << rcv_wscale. We round down to
142  * avoid advertising a window larger than what can be buffered */
143  wnd = round_down_pow2 (available_space, 1 << tc->rcv_wscale);
144  }
145 
146  if (PREDICT_FALSE (wnd < tc->rcv_opts.mss))
147  wnd = 0;
148 
149  tc->rcv_wnd = clib_min (wnd, TCP_WND_MAX << tc->rcv_wscale);
150 }
151 
152 /**
153  * Compute and return window to advertise, scaled as per RFC1323
154  */
155 static inline u32
157 {
158  if (state < TCP_STATE_ESTABLISHED)
160 
161  tcp_update_rcv_wnd (tc);
162  return tc->rcv_wnd >> tc->rcv_wscale;
163 }
164 
165 static int
167 {
168  u8 len = 0;
169 
170  opts->flags |= TCP_OPTS_FLAG_MSS;
171  opts->mss = tc->mss;
172  len += TCP_OPTION_LEN_MSS;
173 
174  opts->flags |= TCP_OPTS_FLAG_WSCALE;
175  opts->wscale = tc->rcv_wscale;
177 
178  opts->flags |= TCP_OPTS_FLAG_TSTAMP;
179  opts->tsval = tcp_time_now ();
180  opts->tsecr = 0;
182 
183  if (TCP_USE_SACKS)
184  {
185  opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
187  }
188 
189  /* Align to needed boundary */
190  len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
191  return len;
192 }
193 
194 static int
196 {
197  u8 len = 0;
198 
199  opts->flags |= TCP_OPTS_FLAG_MSS;
200  opts->mss = tc->mss;
201  len += TCP_OPTION_LEN_MSS;
202 
203  if (tcp_opts_wscale (&tc->rcv_opts))
204  {
205  opts->flags |= TCP_OPTS_FLAG_WSCALE;
206  opts->wscale = tc->rcv_wscale;
208  }
209 
210  if (tcp_opts_tstamp (&tc->rcv_opts))
211  {
212  opts->flags |= TCP_OPTS_FLAG_TSTAMP;
213  opts->tsval = tcp_time_now ();
214  opts->tsecr = tc->tsval_recent;
216  }
217 
218  if (tcp_opts_sack_permitted (&tc->rcv_opts))
219  {
220  opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
222  }
223 
224  /* Align to needed boundary */
225  len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
226  return len;
227 }
228 
229 static int
231 {
232  u8 len = 0;
233 
234  opts->flags = 0;
235 
236  if (tcp_opts_tstamp (&tc->rcv_opts))
237  {
238  opts->flags |= TCP_OPTS_FLAG_TSTAMP;
239  opts->tsval = tcp_tstamp (tc);
240  opts->tsecr = tc->tsval_recent;
242  }
243  if (tcp_opts_sack_permitted (&tc->rcv_opts))
244  {
245  if (vec_len (tc->snd_sacks))
246  {
247  opts->flags |= TCP_OPTS_FLAG_SACK;
248  if (tc->snd_sack_pos >= vec_len (tc->snd_sacks))
249  tc->snd_sack_pos = 0;
250  opts->sacks = &tc->snd_sacks[tc->snd_sack_pos];
251  opts->n_sack_blocks = vec_len (tc->snd_sacks) - tc->snd_sack_pos;
252  opts->n_sack_blocks = clib_min (opts->n_sack_blocks,
254  tc->snd_sack_pos += opts->n_sack_blocks;
255  len += 2 + TCP_OPTION_LEN_SACK_BLOCK * opts->n_sack_blocks;
256  }
257  }
258 
259  /* Align to needed boundary */
260  len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
261  return len;
262 }
263 
264 always_inline int
267 {
268  switch (state)
269  {
270  case TCP_STATE_ESTABLISHED:
271  case TCP_STATE_CLOSE_WAIT:
272  case TCP_STATE_FIN_WAIT_1:
273  case TCP_STATE_LAST_ACK:
274  case TCP_STATE_CLOSING:
275  case TCP_STATE_FIN_WAIT_2:
276  case TCP_STATE_TIME_WAIT:
277  case TCP_STATE_CLOSED:
278  return tcp_make_established_options (tc, opts);
279  case TCP_STATE_SYN_RCVD:
280  return tcp_make_synack_options (tc, opts);
281  case TCP_STATE_SYN_SENT:
282  return tcp_make_syn_options (tc, opts);
283  default:
284  clib_warning ("State not handled! %d", state);
285  return 0;
286  }
287 }
288 
289 /**
290  * Update burst send vars
291  *
292  * - Updates snd_mss to reflect the effective segment size that we can send
293  * by taking into account all TCP options, including SACKs.
294  * - Cache 'on the wire' options for reuse
295  * - Updates receive window which can be reused for a burst.
296  *
297  * This should *only* be called when doing bursts
298  */
299 void
301 {
302  tcp_main_t *tm = &tcp_main;
303 
304  /* Compute options to be used for connection. These may be reused when
305  * sending data or to compute the effective mss (snd_mss) */
306  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts,
307  TCP_STATE_ESTABLISHED);
308 
309  /* XXX check if MTU has been updated */
310  tc->snd_mss = clib_min (tc->mss, tc->rcv_opts.mss) - tc->snd_opts_len;
311  ASSERT (tc->snd_mss > 0);
312 
313  tcp_options_write (tm->wrk_ctx[tc->c_thread_index].cached_opts,
314  &tc->snd_opts);
315 
316  tcp_update_rcv_wnd (tc);
317 
318  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
320 
321  if (tc->snd_una == tc->snd_nxt)
322  {
325  }
326 
327  if (tc->flags & TCP_CONN_PSH_PENDING)
328  {
329  u32 max_deq = transport_max_tx_dequeue (&tc->connection);
330  /* Last byte marked for push */
331  tc->psh_seq = tc->snd_una + max_deq - 1;
332  }
333 }
334 
335 #endif /* CLIB_MARCH_VARIANT */
336 
337 static void *
339 {
340  if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
342  /* Zero all flags but free list index and trace flag */
343  b->flags &= VLIB_BUFFER_NEXT_PRESENT - 1;
344  b->current_data = 0;
345  b->current_length = 0;
347  vnet_buffer (b)->tcp.flags = 0;
348 
349  /* Leave enough space for headers */
351 }
352 
353 #ifndef CLIB_MARCH_VARIANT
354 static void *
356 {
357  ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
358  b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
360  b->current_data = 0;
361  vnet_buffer (b)->tcp.flags = 0;
363  /* Leave enough space for headers */
365 }
366 
367 
368 /* Compute TCP checksum in software when offloading is disabled for a connection */
369 u16
371  ip46_address_t * src, ip46_address_t * dst)
372 {
373  ip_csum_t sum0;
374  u16 payload_length_host_byte_order;
375  u32 i;
376 
377  /* Initialize checksum with ip header. */
378  sum0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0)) +
379  clib_host_to_net_u16 (IP_PROTOCOL_TCP);
380  payload_length_host_byte_order = vlib_buffer_length_in_chain (vm, p0);
381 
382  for (i = 0; i < ARRAY_LEN (src->ip6.as_uword); i++)
383  {
384  sum0 = ip_csum_with_carry
385  (sum0, clib_mem_unaligned (&src->ip6.as_uword[i], uword));
386  sum0 = ip_csum_with_carry
387  (sum0, clib_mem_unaligned (&dst->ip6.as_uword[i], uword));
388  }
389 
390  return ip_calculate_l4_checksum (vm, p0, sum0,
391  payload_length_host_byte_order, NULL, 0,
392  NULL);
393 }
394 
395 u16
397  ip46_address_t * src, ip46_address_t * dst)
398 {
399  ip_csum_t sum0;
400  u32 payload_length_host_byte_order;
401 
402  payload_length_host_byte_order = vlib_buffer_length_in_chain (vm, p0);
403  sum0 =
404  clib_host_to_net_u32 (payload_length_host_byte_order +
405  (IP_PROTOCOL_TCP << 16));
406 
407  sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&src->ip4, u32));
408  sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&dst->ip4, u32));
409 
410  return ip_calculate_l4_checksum (vm, p0, sum0,
411  payload_length_host_byte_order, NULL, 0,
412  NULL);
413 }
414 
415 static inline u16
417 {
418  u16 checksum = 0;
419  if (PREDICT_FALSE (tc->cfg_flags & TCP_CFG_F_NO_CSUM_OFFLOAD))
420  {
421  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
422  vlib_main_t *vm = wrk->vm;
423 
424  if (tc->c_is_ip4)
426  (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
427  else
429  (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
430  }
431  else
432  {
433  b->flags |= VNET_BUFFER_F_OFFLOAD_TCP_CKSUM;
434  }
435  return checksum;
436 }
437 
438 /**
439  * Prepare ACK
440  */
441 static inline void
443  u8 flags)
444 {
445  tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
446  u8 tcp_opts_len, tcp_hdr_opts_len;
447  tcp_header_t *th;
448  u16 wnd;
449 
450  wnd = tcp_window_to_advertise (tc, state);
451 
452  /* Make and write options */
453  tcp_opts_len = tcp_make_established_options (tc, snd_opts);
454  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
455 
456  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
457  tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd);
458 
459  tcp_options_write ((u8 *) (th + 1), snd_opts);
460 
461  th->checksum = tcp_compute_checksum (tc, b);
462 
463  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
464 
465  if (wnd == 0)
467  else
469 }
470 
471 /**
472  * Convert buffer to ACK
473  */
474 static inline void
476 {
477  tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK);
478  TCP_EVT (TCP_EVT_ACK_SENT, tc);
479  tc->rcv_las = tc->rcv_nxt;
480 }
481 
482 /**
483  * Convert buffer to FIN-ACK
484  */
485 static void
487 {
488  tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_ACK);
489 }
490 
491 /**
492  * Convert buffer to SYN
493  */
494 void
496 {
497  u8 tcp_hdr_opts_len, tcp_opts_len;
498  tcp_header_t *th;
499  u16 initial_wnd;
500  tcp_options_t snd_opts;
501 
502  initial_wnd = tcp_initial_window_to_advertise (tc);
503 
504  /* Make and write options */
505  clib_memset (&snd_opts, 0, sizeof (snd_opts));
506  tcp_opts_len = tcp_make_syn_options (tc, &snd_opts);
507  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
508 
509  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
510  tc->rcv_nxt, tcp_hdr_opts_len, TCP_FLAG_SYN,
511  initial_wnd);
512  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
513  tcp_options_write ((u8 *) (th + 1), &snd_opts);
514  th->checksum = tcp_compute_checksum (tc, b);
515 }
516 
517 /**
518  * Convert buffer to SYN-ACK
519  */
520 static void
522 {
523  tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
524  u8 tcp_opts_len, tcp_hdr_opts_len;
525  tcp_header_t *th;
526  u16 initial_wnd;
527 
528  clib_memset (snd_opts, 0, sizeof (*snd_opts));
529  initial_wnd = tcp_initial_window_to_advertise (tc);
530  tcp_opts_len = tcp_make_synack_options (tc, snd_opts);
531  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
532 
533  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
534  tc->rcv_nxt, tcp_hdr_opts_len,
535  TCP_FLAG_SYN | TCP_FLAG_ACK, initial_wnd);
536  tcp_options_write ((u8 *) (th + 1), snd_opts);
537 
538  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
539  th->checksum = tcp_compute_checksum (tc, b);
540 }
541 
542 static void
544  u8 is_ip4, u32 fib_index)
545 {
546  tcp_main_t *tm = &tcp_main;
547  vlib_main_t *vm = wrk->vm;
548 
549  b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
550  b->error = 0;
551 
552  vnet_buffer (b)->sw_if_index[VLIB_TX] = fib_index;
553  vnet_buffer (b)->sw_if_index[VLIB_RX] = 0;
554 
556 
558  tm->ipl_next_node[!is_ip4]);
559 
560  if (vm->thread_index == 0 && vlib_num_workers ())
562 }
563 
564 static void
566  u8 is_ip4)
567 {
568  b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
569  b->error = 0;
570 
572  wrk->tco_next_node[!is_ip4]);
573 }
574 
575 #endif /* CLIB_MARCH_VARIANT */
576 
577 static int
579 {
580  ip4_header_t *ih4;
581  ip6_header_t *ih6;
582  tcp_header_t *th;
583  ip4_address_t src_ip4, dst_ip4;
584  ip6_address_t src_ip6, dst_ip6;
586  u32 tmp, len, seq, ack;
587  u8 flags;
588 
589  /* Find IP and TCP headers */
590  th = tcp_buffer_hdr (b);
591 
592  /* Save src and dst ip */
593  if (is_ip4)
594  {
595  ih4 = vlib_buffer_get_current (b);
596  ASSERT ((ih4->ip_version_and_header_length & 0xF0) == 0x40);
597  src_ip4.as_u32 = ih4->src_address.as_u32;
598  dst_ip4.as_u32 = ih4->dst_address.as_u32;
599  }
600  else
601  {
602  ih6 = vlib_buffer_get_current (b);
603  ASSERT ((ih6->ip_version_traffic_class_and_flow_label & 0xF0) == 0x60);
604  clib_memcpy_fast (&src_ip6, &ih6->src_address, sizeof (ip6_address_t));
605  clib_memcpy_fast (&dst_ip6, &ih6->dst_address, sizeof (ip6_address_t));
606  }
607 
608  src_port = th->src_port;
609  dst_port = th->dst_port;
610  flags = TCP_FLAG_RST;
611 
612  /*
613  * RFC 793. If the ACK bit is off, sequence number zero is used,
614  * <SEQ=0><ACK=SEG.SEQ+SEG.LEN><CTL=RST,ACK>
615  * If the ACK bit is on,
616  * <SEQ=SEG.ACK><CTL=RST>
617  */
618  if (tcp_ack (th))
619  {
620  seq = th->ack_number;
621  ack = 0;
622  }
623  else
624  {
625  flags |= TCP_FLAG_ACK;
626  tmp = clib_net_to_host_u32 (th->seq_number);
627  len = vnet_buffer (b)->tcp.data_len + tcp_is_syn (th) + tcp_is_fin (th);
628  ack = clib_host_to_net_u32 (tmp + len);
629  seq = 0;
630  }
631 
632  tcp_reuse_buffer (vm, b);
634  th = vlib_buffer_push_tcp_net_order (b, dst_port, src_port, seq, ack,
635  sizeof (tcp_header_t), flags, 0);
636 
637  if (is_ip4)
638  {
639  ih4 = vlib_buffer_push_ip4 (vm, b, &dst_ip4, &src_ip4,
640  IP_PROTOCOL_TCP, 1);
641  th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
642  }
643  else
644  {
645  int bogus = ~0;
646  ih6 = vlib_buffer_push_ip6 (vm, b, &dst_ip6, &src_ip6, IP_PROTOCOL_TCP);
647  th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
648  ASSERT (!bogus);
649  }
650 
651  return 0;
652 }
653 
654 #ifndef CLIB_MARCH_VARIANT
655 /**
656  * Send reset without reusing existing buffer
657  *
658  * It extracts connection info out of original packet
659  */
660 void
662  u32 thread_index, u8 is_ip4)
663 {
664  tcp_worker_ctx_t *wrk = tcp_get_worker (thread_index);
665  vlib_main_t *vm = wrk->vm;
666  vlib_buffer_t *b;
667  u32 bi, sw_if_index, fib_index;
668  u8 tcp_hdr_len, flags = 0;
669  tcp_header_t *th, *pkt_th;
670  u32 seq, ack;
671  ip4_header_t *ih4, *pkt_ih4;
672  ip6_header_t *ih6, *pkt_ih6;
673  fib_protocol_t fib_proto;
674 
675  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
676  {
677  tcp_worker_stats_inc (wrk, no_buffer, 1);
678  return;
679  }
680 
681  b = vlib_get_buffer (vm, bi);
682  sw_if_index = vnet_buffer (pkt)->sw_if_index[VLIB_RX];
683  fib_proto = is_ip4 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
684  fib_index = fib_table_get_index_for_sw_if_index (fib_proto, sw_if_index);
685  tcp_init_buffer (vm, b);
686 
687  /* Make and write options */
688  tcp_hdr_len = sizeof (tcp_header_t);
689 
690  if (is_ip4)
691  {
692  pkt_ih4 = vlib_buffer_get_current (pkt);
693  pkt_th = ip4_next_header (pkt_ih4);
694  }
695  else
696  {
697  pkt_ih6 = vlib_buffer_get_current (pkt);
698  pkt_th = ip6_next_header (pkt_ih6);
699  }
700 
701  if (tcp_ack (pkt_th))
702  {
703  flags = TCP_FLAG_RST;
704  seq = pkt_th->ack_number;
705  ack = (tc->state >= TCP_STATE_SYN_RCVD) ? tc->rcv_nxt : 0;
706  }
707  else
708  {
709  flags = TCP_FLAG_RST | TCP_FLAG_ACK;
710  seq = 0;
711  ack = clib_host_to_net_u32 (vnet_buffer (pkt)->tcp.seq_end);
712  }
713 
714  th = vlib_buffer_push_tcp_net_order (b, pkt_th->dst_port, pkt_th->src_port,
715  seq, ack, tcp_hdr_len, flags, 0);
716 
717  /* Swap src and dst ip */
718  if (is_ip4)
719  {
720  ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40);
721  ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address,
722  &pkt_ih4->src_address, IP_PROTOCOL_TCP,
723  tcp_csum_offload (tc));
724  th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
725  }
726  else
727  {
728  int bogus = ~0;
729  ASSERT ((pkt_ih6->ip_version_traffic_class_and_flow_label & 0xF0) ==
730  0x60);
731  ih6 = vlib_buffer_push_ip6_custom (vm, b, &pkt_ih6->dst_address,
732  &pkt_ih6->src_address,
733  IP_PROTOCOL_TCP,
734  tc->ipv6_flow_label);
735  th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
736  ASSERT (!bogus);
737  }
738 
739  tcp_enqueue_to_ip_lookup (wrk, b, bi, is_ip4, fib_index);
740  TCP_EVT (TCP_EVT_RST_SENT, tc);
741  vlib_node_increment_counter (vm, tcp_node_index (output, tc->c_is_ip4),
742  TCP_ERROR_RST_SENT, 1);
743 }
744 
745 /**
746  * Build and set reset packet for connection
747  */
748 void
750 {
751  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
752  vlib_main_t *vm = wrk->vm;
753  vlib_buffer_t *b;
754  u32 bi;
755  tcp_header_t *th;
756  u16 tcp_hdr_opts_len, advertise_wnd, opts_write_len;
757  u8 flags;
758 
759  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
760  {
761  tcp_worker_stats_inc (wrk, no_buffer, 1);
762  return;
763  }
764  b = vlib_get_buffer (vm, bi);
765  tcp_init_buffer (vm, b);
766 
767  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
768  tcp_hdr_opts_len = tc->snd_opts_len + sizeof (tcp_header_t);
769  advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
770  flags = TCP_FLAG_RST;
771  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
772  tc->rcv_nxt, tcp_hdr_opts_len, flags,
773  advertise_wnd);
774  opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
775  th->checksum = tcp_compute_checksum (tc, b);
776  ASSERT (opts_write_len == tc->snd_opts_len);
777  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
778  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
779  TCP_EVT (TCP_EVT_RST_SENT, tc);
780  vlib_node_increment_counter (vm, tcp_node_index (output, tc->c_is_ip4),
781  TCP_ERROR_RST_SENT, 1);
782 }
783 
784 static void
786  vlib_buffer_t * b)
787 {
788  if (tc->c_is_ip4)
789  {
790  vlib_buffer_push_ip4 (wrk->vm, b, &tc->c_lcl_ip4, &tc->c_rmt_ip4,
791  IP_PROTOCOL_TCP, tcp_csum_offload (tc));
792  }
793  else
794  {
795  vlib_buffer_push_ip6_custom (wrk->vm, b, &tc->c_lcl_ip6, &tc->c_rmt_ip6,
796  IP_PROTOCOL_TCP, tc->ipv6_flow_label);
797  }
798 }
799 
800 /**
801  * Send SYN
802  *
803  * Builds a SYN packet for a half-open connection and sends it to ipx_lookup.
804  * The packet is not forwarded through tcpx_output to avoid doing lookups
805  * in the half_open pool.
806  */
807 void
809 {
810  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
811  vlib_main_t *vm = wrk->vm;
812  vlib_buffer_t *b;
813  u32 bi;
814 
815  /*
816  * Setup retransmit and establish timers before requesting buffer
817  * such that we can return if we've ran out.
818  */
819  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
820  tc->rto * TCP_TO_TIMER_TICK);
821 
822  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
823  {
824  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN, 1);
825  tcp_worker_stats_inc (wrk, no_buffer, 1);
826  return;
827  }
828 
829  b = vlib_get_buffer (vm, bi);
830  tcp_init_buffer (vm, b);
831  tcp_make_syn (tc, b);
832 
833  /* Measure RTT with this */
834  tc->rtt_ts = tcp_time_now_us (vlib_num_workers ()? 1 : 0);
835  tc->rtt_seq = tc->snd_nxt;
836  tc->rto_boff = 0;
837 
838  tcp_push_ip_hdr (wrk, tc, b);
839  tcp_enqueue_to_ip_lookup (wrk, b, bi, tc->c_is_ip4, tc->c_fib_index);
840  TCP_EVT (TCP_EVT_SYN_SENT, tc);
841 }
842 
843 void
845 {
846  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
847  vlib_main_t *vm = wrk->vm;
848  vlib_buffer_t *b;
849  u32 bi;
850 
851  ASSERT (tc->snd_una != tc->snd_nxt);
853 
854  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
855  {
856  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT, 1);
857  tcp_worker_stats_inc (wrk, no_buffer, 1);
858  return;
859  }
860 
861  tc->rtt_ts = tcp_time_now_us (tc->c_thread_index);
862  b = vlib_get_buffer (vm, bi);
863  tcp_init_buffer (vm, b);
864  tcp_make_synack (tc, b);
865  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
866  TCP_EVT (TCP_EVT_SYNACK_SENT, tc);
867 }
868 
869 /**
870  * Send FIN
871  */
872 void
874 {
875  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
876  vlib_main_t *vm = wrk->vm;
877  vlib_buffer_t *b;
878  u32 bi;
879  u8 fin_snt = 0;
880 
881  fin_snt = tc->flags & TCP_CONN_FINSNT;
882  if (fin_snt)
883  tc->snd_nxt -= 1;
884 
885  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
886  {
887  /* Out of buffers so program fin retransmit ASAP */
888  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT, 1);
889  if (fin_snt)
890  tc->snd_nxt += 1;
891  else
892  /* Make sure retransmit retries a fin not data */
893  tc->flags |= TCP_CONN_FINSNT;
894  tcp_worker_stats_inc (wrk, no_buffer, 1);
895  return;
896  }
897 
898  /* If we have non-dupacks programmed, no need to send them */
899  if ((tc->flags & TCP_CONN_SNDACK) && !tc->pending_dupacks)
900  tc->flags &= ~TCP_CONN_SNDACK;
901 
902  b = vlib_get_buffer (vm, bi);
903  tcp_init_buffer (vm, b);
904  tcp_make_fin (tc, b);
905  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
906  TCP_EVT (TCP_EVT_FIN_SENT, tc);
907  /* Account for the FIN */
908  tc->snd_nxt += 1;
910  if (!fin_snt)
911  {
912  tc->flags |= TCP_CONN_FINSNT;
913  tc->flags &= ~TCP_CONN_FINPNDG;
914  }
915 }
916 
917 /**
918  * Push TCP header and update connection variables. Should only be called
919  * for segments with data, not for 'control' packets.
920  */
921 always_inline void
923  u8 compute_opts, u8 maybe_burst, u8 update_snd_nxt)
924 {
925  u8 tcp_hdr_opts_len, flags = TCP_FLAG_ACK;
926  u32 advertise_wnd, data_len;
927  tcp_main_t *tm = &tcp_main;
928  tcp_header_t *th;
929 
930  data_len = b->current_length;
931  if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
933 
934  vnet_buffer (b)->tcp.flags = 0;
935  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
936 
937  if (compute_opts)
938  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
939 
940  tcp_hdr_opts_len = tc->snd_opts_len + sizeof (tcp_header_t);
941 
942  if (maybe_burst)
943  advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
944  else
945  advertise_wnd = tcp_window_to_advertise (tc, TCP_STATE_ESTABLISHED);
946 
947  if (PREDICT_FALSE (tc->flags & TCP_CONN_PSH_PENDING))
948  {
949  if (seq_geq (tc->psh_seq, snd_nxt)
950  && seq_lt (tc->psh_seq, snd_nxt + data_len))
951  flags |= TCP_FLAG_PSH;
952  }
953  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, snd_nxt,
954  tc->rcv_nxt, tcp_hdr_opts_len, flags,
955  advertise_wnd);
956 
957  if (maybe_burst)
958  {
959  clib_memcpy_fast ((u8 *) (th + 1),
960  tm->wrk_ctx[tc->c_thread_index].cached_opts,
961  tc->snd_opts_len);
962  }
963  else
964  {
965  u8 len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
966  ASSERT (len == tc->snd_opts_len);
967  }
968 
969  /*
970  * Update connection variables
971  */
972 
973  if (update_snd_nxt)
974  tc->snd_nxt += data_len;
975  tc->rcv_las = tc->rcv_nxt;
976 
977  tc->bytes_out += data_len;
978  tc->data_segs_out += 1;
979 
980  th->checksum = tcp_compute_checksum (tc, b);
981 
982  TCP_EVT (TCP_EVT_PKTIZE, tc);
983 }
984 
987 {
989  if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
991  return data_len;
992 }
993 
994 u32
996 {
997  tcp_connection_t *tc = (tcp_connection_t *) tconn;
998 
999  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1000  tcp_bt_track_tx (tc, tcp_buffer_len (b));
1001 
1002  tcp_push_hdr_i (tc, b, tc->snd_nxt, /* compute opts */ 0, /* burst */ 1,
1003  /* update_snd_nxt */ 1);
1004 
1005  tcp_validate_txf_size (tc, tc->snd_nxt - tc->snd_una);
1006  /* If not tracking an ACK, start tracking */
1007  if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))
1008  {
1009  tc->rtt_ts = tcp_time_now_us (tc->c_thread_index);
1010  tc->rtt_seq = tc->snd_nxt;
1011  }
1012  if (PREDICT_FALSE (!tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)))
1013  {
1014  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
1016  tc->rto_boff = 0;
1017  }
1018  tcp_trajectory_add_start (b, 3);
1019  return 0;
1020 }
1021 
1022 void
1024 {
1025  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
1026  vlib_main_t *vm = wrk->vm;
1027  vlib_buffer_t *b;
1028  u32 bi;
1029 
1030  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1031  {
1032  tcp_update_rcv_wnd (tc);
1033  tcp_worker_stats_inc (wrk, no_buffer, 1);
1034  return;
1035  }
1036  b = vlib_get_buffer (vm, bi);
1037  tcp_init_buffer (vm, b);
1038  tcp_make_ack (tc, b);
1039  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1040 }
1041 
1042 void
1044 {
1045  if (!(tc->flags & TCP_CONN_SNDACK))
1046  {
1047  session_add_self_custom_tx_evt (&tc->connection, 1);
1048  tc->flags |= TCP_CONN_SNDACK;
1049  }
1050 }
1051 
1052 void
1054 {
1055  if (!(tc->flags & TCP_CONN_SNDACK))
1056  {
1057  session_add_self_custom_tx_evt (&tc->connection, 1);
1058  tc->flags |= TCP_CONN_SNDACK;
1059  }
1060  if (tc->pending_dupacks < 255)
1061  tc->pending_dupacks += 1;
1062 }
1063 
1064 void
1066 {
1067  if (!(tc->flags & TCP_CONN_RXT_PENDING))
1068  {
1069  session_add_self_custom_tx_evt (&tc->connection, 0);
1070  tc->flags |= TCP_CONN_RXT_PENDING;
1071  }
1072 }
1073 
1074 /**
1075  * Send window update ack
1076  *
1077  * Ensures that it will be sent only once, after a zero rwnd has been
1078  * advertised in a previous ack, and only if rwnd has grown beyond a
1079  * configurable value.
1080  */
1081 void
1083 {
1084  if (tcp_zero_rwnd_sent (tc))
1085  {
1086  tcp_update_rcv_wnd (tc);
1087  if (tc->rcv_wnd >= tcp_cfg.rwnd_min_update_ack * tc->snd_mss)
1088  {
1090  tcp_program_ack (tc);
1091  }
1092  }
1093 }
1094 
1095 /**
1096  * Allocate a new buffer and build a new tcp segment
1097  *
1098  * @param wrk tcp worker
1099  * @param tc connection for which the segment will be allocated
1100  * @param offset offset of the first byte in the tx fifo
1101  * @param max_deq_byte segment size
1102  * @param[out] b pointer to buffer allocated
1103  *
1104  * @return the number of bytes in the segment or 0 if buffer cannot be
1105  * allocated or no data available
1106  */
1107 static int
1109  u32 offset, u32 max_deq_bytes, vlib_buffer_t ** b)
1110 {
1111  u32 bytes_per_buffer = vnet_get_tcp_main ()->bytes_per_buffer;
1112  vlib_main_t *vm = wrk->vm;
1113  u32 bi, seg_size;
1114  int n_bytes = 0;
1115  u8 *data;
1116 
1117  seg_size = max_deq_bytes + TRANSPORT_MAX_HDRS_LEN;
1118 
1119  /*
1120  * Prepare options
1121  */
1122  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
1123 
1124  /*
1125  * Allocate and fill in buffer(s)
1126  */
1127 
1128  /* Easy case, buffer size greater than mss */
1129  if (PREDICT_TRUE (seg_size <= bytes_per_buffer))
1130  {
1131  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1132  {
1133  tcp_worker_stats_inc (wrk, no_buffer, 1);
1134  return 0;
1135  }
1136  *b = vlib_get_buffer (vm, bi);
1137  data = tcp_init_buffer (vm, *b);
1138  n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
1139  max_deq_bytes);
1140  ASSERT (n_bytes == max_deq_bytes);
1141  b[0]->current_length = n_bytes;
1142  tcp_push_hdr_i (tc, *b, tc->snd_una + offset, /* compute opts */ 0,
1143  /* burst */ 0, /* update_snd_nxt */ 0);
1144  }
1145  /* Split mss into multiple buffers */
1146  else
1147  {
1148  u32 chain_bi = ~0, n_bufs_per_seg, n_bufs;
1149  u16 n_peeked, len_to_deq;
1150  vlib_buffer_t *chain_b, *prev_b;
1151  int i;
1152 
1153  /* Make sure we have enough buffers */
1154  n_bufs_per_seg = ceil ((double) seg_size / bytes_per_buffer);
1155  vec_validate_aligned (wrk->tx_buffers, n_bufs_per_seg - 1,
1157  n_bufs = vlib_buffer_alloc (vm, wrk->tx_buffers, n_bufs_per_seg);
1158  if (PREDICT_FALSE (n_bufs != n_bufs_per_seg))
1159  {
1160  if (n_bufs)
1161  vlib_buffer_free (vm, wrk->tx_buffers, n_bufs);
1162  tcp_worker_stats_inc (wrk, no_buffer, 1);
1163  return 0;
1164  }
1165 
1166  *b = vlib_get_buffer (vm, wrk->tx_buffers[--n_bufs]);
1167  data = tcp_init_buffer (vm, *b);
1168  n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
1169  bytes_per_buffer -
1170  TRANSPORT_MAX_HDRS_LEN);
1171  b[0]->current_length = n_bytes;
1172  b[0]->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
1174  max_deq_bytes -= n_bytes;
1175 
1176  chain_b = *b;
1177  for (i = 1; i < n_bufs_per_seg; i++)
1178  {
1179  prev_b = chain_b;
1180  len_to_deq = clib_min (max_deq_bytes, bytes_per_buffer);
1181  chain_bi = wrk->tx_buffers[--n_bufs];
1182  chain_b = vlib_get_buffer (vm, chain_bi);
1183  chain_b->current_data = 0;
1184  data = vlib_buffer_get_current (chain_b);
1185  n_peeked = session_tx_fifo_peek_bytes (&tc->connection, data,
1186  offset + n_bytes,
1187  len_to_deq);
1188  ASSERT (n_peeked == len_to_deq);
1189  n_bytes += n_peeked;
1190  chain_b->current_length = n_peeked;
1191  chain_b->next_buffer = 0;
1192 
1193  /* update previous buffer */
1194  prev_b->next_buffer = chain_bi;
1195  prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
1196 
1197  max_deq_bytes -= n_peeked;
1198  b[0]->total_length_not_including_first_buffer += n_peeked;
1199  }
1200 
1201  tcp_push_hdr_i (tc, *b, tc->snd_una + offset, /* compute opts */ 0,
1202  /* burst */ 0, /* update_snd_nxt */ 0);
1203 
1204  if (PREDICT_FALSE (n_bufs))
1205  {
1206  clib_warning ("not all buffers consumed");
1207  vlib_buffer_free (vm, wrk->tx_buffers, n_bufs);
1208  }
1209  }
1210 
1211  ASSERT (n_bytes > 0);
1212  ASSERT (((*b)->current_data + (*b)->current_length) <= bytes_per_buffer);
1213 
1214  return n_bytes;
1215 }
1216 
1217 /**
1218  * Build a retransmit segment
1219  *
1220  * @return the number of bytes in the segment or 0 if there's nothing to
1221  * retransmit
1222  */
1223 static u32
1225  tcp_connection_t * tc, u32 offset,
1226  u32 max_deq_bytes, vlib_buffer_t ** b)
1227 {
1228  u32 start, available_bytes;
1229  int n_bytes = 0;
1230 
1231  ASSERT (tc->state >= TCP_STATE_ESTABLISHED);
1232  ASSERT (max_deq_bytes != 0);
1233 
1234  /*
1235  * Make sure we can retransmit something
1236  */
1237  available_bytes = transport_max_tx_dequeue (&tc->connection);
1238  ASSERT (available_bytes >= offset);
1239  available_bytes -= offset;
1240  if (!available_bytes)
1241  return 0;
1242 
1243  max_deq_bytes = clib_min (tc->snd_mss, max_deq_bytes);
1244  max_deq_bytes = clib_min (available_bytes, max_deq_bytes);
1245 
1246  start = tc->snd_una + offset;
1247  ASSERT (seq_leq (start + max_deq_bytes, tc->snd_nxt));
1248 
1249  n_bytes = tcp_prepare_segment (wrk, tc, offset, max_deq_bytes, b);
1250  if (!n_bytes)
1251  return 0;
1252 
1253  tc->snd_rxt_bytes += n_bytes;
1254 
1255  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1256  tcp_bt_track_rxt (tc, start, start + n_bytes);
1257 
1258  tc->bytes_retrans += n_bytes;
1259  tc->segs_retrans += 1;
1260  tcp_worker_stats_inc (wrk, rxt_segs, 1);
1261  TCP_EVT (TCP_EVT_CC_RTX, tc, offset, n_bytes);
1262 
1263  return n_bytes;
1264 }
1265 
1266 static void
1268 {
1269  sack_scoreboard_t *sb = &tc->sack_sb;
1270  sack_scoreboard_hole_t *hole;
1271 
1272  hole = scoreboard_first_hole (sb);
1273  if (!sb->is_reneging && (!hole || hole->start == tc->snd_una))
1274  return;
1275 
1276  scoreboard_clear_reneging (sb, tc->snd_una, tc->snd_nxt);
1277 }
1278 
1279 /**
1280  * Reset congestion control, switch cwnd to loss window and try again.
1281  */
1282 static void
1284 {
1285  TCP_EVT (TCP_EVT_CC_EVT, tc, 6);
1286 
1287  tc->prev_ssthresh = tc->ssthresh;
1288  tc->prev_cwnd = tc->cwnd;
1289 
1290  /* If we entrered loss without fast recovery, notify cc algo of the
1291  * congestion event such that it can update ssthresh and its state */
1292  if (!tcp_in_fastrecovery (tc))
1293  tcp_cc_congestion (tc);
1294 
1295  /* Let cc algo decide loss cwnd and ssthresh post unrecovered loss */
1296  tcp_cc_loss (tc);
1297 
1298  tc->rtt_ts = 0;
1299  tc->cwnd_acc_bytes = 0;
1300  tc->tr_occurences += 1;
1301  tc->sack_sb.reorder = TCP_DUPACK_THRESHOLD;
1302  tcp_recovery_on (tc);
1303 }
1304 
1305 void
1307 {
1308  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
1309  vlib_main_t *vm = wrk->vm;
1310  vlib_buffer_t *b = 0;
1311  u32 bi, n_bytes;
1312 
1313  tcp_worker_stats_inc (wrk, tr_events, 1);
1314 
1315  /* Should be handled by a different handler */
1316  if (PREDICT_FALSE (tc->state == TCP_STATE_SYN_SENT))
1317  return;
1318 
1319  /* Wait-close and retransmit could pop at the same time */
1320  if (tc->state == TCP_STATE_CLOSED)
1321  return;
1322 
1323  if (tc->state >= TCP_STATE_ESTABLISHED)
1324  {
1325  TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1326 
1327  /* Lost FIN, retransmit and return */
1328  if (tc->flags & TCP_CONN_FINSNT)
1329  {
1330  tcp_send_fin (tc);
1331  tc->rto_boff += 1;
1332  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1333  return;
1334  }
1335 
1336  /* Shouldn't be here */
1337  if (tc->snd_una == tc->snd_nxt)
1338  {
1339  ASSERT (!tcp_in_recovery (tc));
1340  tc->rto_boff = 0;
1341  return;
1342  }
1343 
1344  /* We're not in recovery so make sure rto_boff is 0. Can be non 0 due
1345  * to persist timer timeout */
1346  if (!tcp_in_recovery (tc) && tc->rto_boff > 0)
1347  {
1348  tc->rto_boff = 0;
1349  tcp_update_rto (tc);
1350  }
1351 
1352  /* Peer is dead or network connectivity is lost. Close connection.
1353  * RFC 1122 section 4.2.3.5 recommends a value of at least 100s. For
1354  * a min rto of 0.2s we need to retry about 8 times. */
1355  if (tc->rto_boff >= TCP_RTO_BOFF_MAX)
1356  {
1357  tcp_send_reset (tc);
1358  tcp_connection_set_state (tc, TCP_STATE_CLOSED);
1359  session_transport_closing_notify (&tc->connection);
1360  session_transport_closed_notify (&tc->connection);
1362  tcp_program_cleanup (wrk, tc);
1363  tcp_worker_stats_inc (wrk, tr_abort, 1);
1364  return;
1365  }
1366 
1367  if (tcp_opts_sack_permitted (&tc->rcv_opts))
1369 
1370  /* Update send congestion to make sure that rxt has data to send */
1371  tc->snd_congestion = tc->snd_nxt;
1372 
1373  /* Send the first unacked segment. If we're short on buffers, return
1374  * as soon as possible */
1375  n_bytes = clib_min (tc->snd_mss, tc->snd_nxt - tc->snd_una);
1376  n_bytes = tcp_prepare_retransmit_segment (wrk, tc, 0, n_bytes, &b);
1377  if (!n_bytes)
1378  {
1379  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT, 1);
1380  return;
1381  }
1382 
1383  bi = vlib_get_buffer_index (vm, b);
1384  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1385 
1386  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1388 
1389  tc->rto_boff += 1;
1390  if (tc->rto_boff == 1)
1391  {
1393  /* Record timestamp. Eifel detection algorithm RFC3522 */
1394  tc->snd_rxt_ts = tcp_tstamp (tc);
1395  }
1396 
1397  if (tcp_opts_sack_permitted (&tc->rcv_opts))
1398  scoreboard_init_rxt (&tc->sack_sb, tc->snd_una + n_bytes);
1399 
1401  }
1402  /* Retransmit SYN-ACK */
1403  else if (tc->state == TCP_STATE_SYN_RCVD)
1404  {
1405  TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1406 
1407  tc->rtt_ts = 0;
1408 
1409  /* Passive open establish timeout */
1410  if (tc->rto > TCP_ESTABLISH_TIME >> 1)
1411  {
1412  tcp_connection_set_state (tc, TCP_STATE_CLOSED);
1414  tcp_program_cleanup (wrk, tc);
1415  tcp_worker_stats_inc (wrk, tr_abort, 1);
1416  return;
1417  }
1418 
1419  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1420  {
1421  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT, 1);
1422  tcp_worker_stats_inc (wrk, no_buffer, 1);
1423  return;
1424  }
1425 
1426  tc->rto_boff += 1;
1427  if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
1428  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1429 
1430  ASSERT (tc->snd_una != tc->snd_nxt);
1432 
1433  b = vlib_get_buffer (vm, bi);
1434  tcp_init_buffer (vm, b);
1435  tcp_make_synack (tc, b);
1436  TCP_EVT (TCP_EVT_SYN_RXT, tc, 1);
1437 
1438  /* Retransmit timer already updated, just enqueue to output */
1439  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1440  }
1441  else
1442  {
1443  ASSERT (tc->state == TCP_STATE_CLOSED);
1444  return;
1445  }
1446 }
1447 
1448 /**
1449  * SYN retransmit timer handler. Active open only.
1450  */
1451 void
1453 {
1454  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
1455  vlib_main_t *vm = wrk->vm;
1456  vlib_buffer_t *b = 0;
1457  u32 bi;
1458 
1459  /* Note: the connection may have transitioned to ESTABLISHED... */
1460  if (PREDICT_FALSE (tc->state != TCP_STATE_SYN_SENT))
1461  return;
1462 
1463  /* Half-open connection actually moved to established but we were
1464  * waiting for syn retransmit to pop to call cleanup from the right
1465  * thread. */
1466  if (tc->flags & TCP_CONN_HALF_OPEN_DONE)
1467  {
1469  TCP_DBG ("could not remove half-open connection");
1470  return;
1471  }
1472 
1473  TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1474  tc->rtt_ts = 0;
1475 
1476  /* Active open establish timeout */
1477  if (tc->rto >= TCP_ESTABLISH_TIME >> 1)
1478  {
1479  session_stream_connect_notify (&tc->connection, SESSION_E_TIMEDOUT);
1481  return;
1482  }
1483 
1484  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1485  {
1486  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN, 1);
1487  tcp_worker_stats_inc (wrk, no_buffer, 1);
1488  return;
1489  }
1490 
1491  /* Try without increasing RTO a number of times. If this fails,
1492  * start growing RTO exponentially */
1493  tc->rto_boff += 1;
1494  if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
1495  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1496 
1497  b = vlib_get_buffer (vm, bi);
1498  tcp_init_buffer (vm, b);
1499  tcp_make_syn (tc, b);
1500 
1501  TCP_EVT (TCP_EVT_SYN_RXT, tc, 0);
1502 
1503  /* This goes straight to ipx_lookup */
1504  tcp_push_ip_hdr (wrk, tc, b);
1505  tcp_enqueue_to_ip_lookup (wrk, b, bi, tc->c_is_ip4, tc->c_fib_index);
1506 
1507  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
1508  tc->rto * TCP_TO_TIMER_TICK);
1509 }
1510 
1511 /**
1512  * Got 0 snd_wnd from peer, try to do something about it.
1513  *
1514  */
1515 void
1517 {
1518  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
1519  u32 bi, max_snd_bytes, available_bytes, offset;
1520  tcp_main_t *tm = vnet_get_tcp_main ();
1521  vlib_main_t *vm = wrk->vm;
1522  vlib_buffer_t *b;
1523  int n_bytes = 0;
1524  u8 *data;
1525 
1526  /* Problem already solved or worse */
1527  if (tc->state == TCP_STATE_CLOSED || tc->snd_wnd > tc->snd_mss
1528  || (tc->flags & TCP_CONN_FINSNT))
1529  goto update_scheduler;
1530 
1531  available_bytes = transport_max_tx_dequeue (&tc->connection);
1532  offset = tc->snd_nxt - tc->snd_una;
1533 
1534  /* Reprogram persist if no new bytes available to send. We may have data
1535  * next time */
1536  if (!available_bytes)
1537  {
1538  tcp_persist_timer_set (&wrk->timer_wheel, tc);
1539  return;
1540  }
1541 
1542  if (available_bytes <= offset)
1543  goto update_scheduler;
1544 
1545  /* Increment RTO backoff */
1546  tc->rto_boff += 1;
1547  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1548 
1549  /*
1550  * Try to force the first unsent segment (or buffer)
1551  */
1552  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1553  {
1554  tcp_persist_timer_set (&wrk->timer_wheel, tc);
1555  tcp_worker_stats_inc (wrk, no_buffer, 1);
1556  return;
1557  }
1558 
1559  b = vlib_get_buffer (vm, bi);
1560  data = tcp_init_buffer (vm, b);
1561 
1562  tcp_validate_txf_size (tc, offset);
1563  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
1564  max_snd_bytes = clib_min (tc->snd_mss,
1565  tm->bytes_per_buffer - TRANSPORT_MAX_HDRS_LEN);
1566  n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
1567  max_snd_bytes);
1568  b->current_length = n_bytes;
1569  ASSERT (n_bytes != 0 && (tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)
1570  || tc->snd_una == tc->snd_nxt
1571  || tc->rto_boff > 1));
1572 
1573  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1574  {
1576  tcp_bt_track_tx (tc, n_bytes);
1577  }
1578 
1579  tcp_push_hdr_i (tc, b, tc->snd_nxt, /* compute opts */ 0,
1580  /* burst */ 0, /* update_snd_nxt */ 1);
1581  tcp_validate_txf_size (tc, tc->snd_nxt - tc->snd_una);
1582  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1583 
1584  /* Just sent new data, enable retransmit */
1586 
1587  return;
1588 
1589 update_scheduler:
1590 
1591  if (tcp_is_descheduled (tc))
1592  transport_connection_reschedule (&tc->connection);
1593 }
1594 
1595 /**
1596  * Retransmit first unacked segment
1597  */
1598 int
1600 {
1601  vlib_main_t *vm = wrk->vm;
1602  vlib_buffer_t *b;
1603  u32 bi, n_bytes;
1604 
1605  TCP_EVT (TCP_EVT_CC_EVT, tc, 1);
1606 
1607  n_bytes = tcp_prepare_retransmit_segment (wrk, tc, 0, tc->snd_mss, &b);
1608  if (!n_bytes)
1609  return -1;
1610 
1611  bi = vlib_get_buffer_index (vm, b);
1612  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1613 
1614  return 0;
1615 }
1616 
1617 static int
1619  u32 burst_size)
1620 {
1621  u32 offset, n_segs = 0, n_written, bi, available_wnd;
1622  vlib_main_t *vm = wrk->vm;
1623  vlib_buffer_t *b = 0;
1624 
1625  offset = tc->snd_nxt - tc->snd_una;
1626  available_wnd = tc->snd_wnd - offset;
1627  burst_size = clib_min (burst_size, available_wnd / tc->snd_mss);
1628 
1629  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1631 
1632  while (n_segs < burst_size)
1633  {
1634  n_written = tcp_prepare_segment (wrk, tc, offset, tc->snd_mss, &b);
1635  if (!n_written)
1636  goto done;
1637 
1638  bi = vlib_get_buffer_index (vm, b);
1639  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1640  offset += n_written;
1641  n_segs += 1;
1642 
1643  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1644  tcp_bt_track_tx (tc, n_written);
1645 
1646  tc->snd_nxt += n_written;
1647  }
1648 
1649 done:
1650  return n_segs;
1651 }
1652 
1653 /**
1654  * Estimate send space using proportional rate reduction (RFC6937)
1655  */
1656 int
1658 {
1659  u32 pipe, prr_out;
1660  int space;
1661 
1662  pipe = tcp_flight_size (tc);
1663  prr_out = tc->snd_rxt_bytes + (tc->snd_nxt - tc->snd_congestion);
1664 
1665  if (pipe > tc->ssthresh)
1666  {
1667  space = ((int) tc->prr_delivered * ((f64) tc->ssthresh / tc->prev_cwnd))
1668  - prr_out;
1669  }
1670  else
1671  {
1672  int limit;
1673  limit = clib_max ((int) (tc->prr_delivered - prr_out), 0) + tc->snd_mss;
1674  space = clib_min (tc->ssthresh - pipe, limit);
1675  }
1676  space = clib_max (space, prr_out ? 0 : tc->snd_mss);
1677  return space;
1678 }
1679 
1680 static inline u8
1682  sack_scoreboard_t * sb)
1683 {
1684  u32 tx_adv_sack = sb->high_sacked - tc->snd_congestion;
1685  f64 rr = (f64) tc->ssthresh / tc->prev_cwnd;
1686 
1687  if (tcp_fastrecovery_first (tc))
1688  return 1;
1689 
1690  return (tx_adv_sack > (tc->snd_una - tc->prr_start) * rr);
1691 }
1692 
1693 static inline u8
1695 {
1696  return (transport_max_tx_dequeue (&tc->connection)
1697  - (tc->snd_nxt - tc->snd_una));
1698 }
1699 
1700 #define scoreboard_rescue_rxt_valid(_sb, _tc) \
1701  (seq_geq (_sb->rescue_rxt, _tc->snd_una) \
1702  && seq_leq (_sb->rescue_rxt, _tc->snd_congestion))
1703 
1704 /**
1705  * Do retransmit with SACKs
1706  */
1707 static int
1709  u32 burst_size)
1710 {
1711  u32 n_written = 0, offset, max_bytes, n_segs = 0;
1712  u8 snd_limited = 0, can_rescue = 0;
1713  u32 bi, max_deq, burst_bytes;
1714  sack_scoreboard_hole_t *hole;
1715  vlib_main_t *vm = wrk->vm;
1716  vlib_buffer_t *b = 0;
1717  sack_scoreboard_t *sb;
1718  int snd_space;
1719 
1721 
1722  burst_bytes = transport_connection_tx_pacer_burst (&tc->connection);
1723  burst_size = clib_min (burst_size, burst_bytes / tc->snd_mss);
1724  if (!burst_size)
1725  {
1727  return 0;
1728  }
1729 
1730  if (tcp_in_recovery (tc))
1731  snd_space = tcp_available_cc_snd_space (tc);
1732  else
1733  snd_space = tcp_fastrecovery_prr_snd_space (tc);
1734 
1735  if (snd_space < tc->snd_mss)
1736  goto done;
1737 
1738  sb = &tc->sack_sb;
1739 
1740  /* Check if snd_una is a lost retransmit */
1741  if (pool_elts (sb->holes)
1742  && seq_gt (sb->high_sacked, tc->snd_congestion)
1743  && tc->rxt_head != tc->snd_una
1745  {
1746  max_bytes = clib_min (tc->snd_mss, tc->snd_congestion - tc->snd_una);
1747  n_written = tcp_prepare_retransmit_segment (wrk, tc, 0, max_bytes, &b);
1748  if (!n_written)
1749  {
1751  goto done;
1752  }
1753  bi = vlib_get_buffer_index (vm, b);
1754  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1755  n_segs = 1;
1756 
1757  tc->rxt_head = tc->snd_una;
1758  tc->rxt_delivered += n_written;
1759  tc->prr_delivered += n_written;
1760  ASSERT (tc->rxt_delivered <= tc->snd_rxt_bytes);
1761  }
1762 
1764 
1765  TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
1766  hole = scoreboard_get_hole (sb, sb->cur_rxt_hole);
1767 
1768  max_deq = transport_max_tx_dequeue (&tc->connection);
1769  max_deq -= tc->snd_nxt - tc->snd_una;
1770 
1771  while (snd_space > 0 && n_segs < burst_size)
1772  {
1773  hole = scoreboard_next_rxt_hole (sb, hole, max_deq != 0, &can_rescue,
1774  &snd_limited);
1775  if (!hole)
1776  {
1777  /* We are out of lost holes to retransmit so send some new data. */
1778  if (max_deq > tc->snd_mss)
1779  {
1780  u32 n_segs_new;
1781  int av_wnd;
1782 
1783  /* Make sure we don't exceed available window and leave space
1784  * for one more packet, to avoid zero window acks */
1785  av_wnd = (int) tc->snd_wnd - (tc->snd_nxt - tc->snd_una);
1786  av_wnd = clib_max (av_wnd - tc->snd_mss, 0);
1787  snd_space = clib_min (snd_space, av_wnd);
1788  snd_space = clib_min (max_deq, snd_space);
1789  burst_size = clib_min (burst_size - n_segs,
1790  snd_space / tc->snd_mss);
1791  burst_size = clib_min (burst_size, TCP_RXT_MAX_BURST);
1792  n_segs_new = tcp_transmit_unsent (wrk, tc, burst_size);
1793  if (max_deq > n_segs_new * tc->snd_mss)
1795 
1796  n_segs += n_segs_new;
1797  goto done;
1798  }
1799 
1800  if (tcp_in_recovery (tc) || !can_rescue
1801  || scoreboard_rescue_rxt_valid (sb, tc))
1802  break;
1803 
1804  /* If rescue rxt undefined or less than snd_una then one segment of
1805  * up to SMSS octets that MUST include the highest outstanding
1806  * unSACKed sequence number SHOULD be returned, and RescueRxt set to
1807  * RecoveryPoint. HighRxt MUST NOT be updated.
1808  */
1809  hole = scoreboard_last_hole (sb);
1810  max_bytes = clib_min (tc->snd_mss, hole->end - hole->start);
1811  max_bytes = clib_min (max_bytes, snd_space);
1812  offset = hole->end - tc->snd_una - max_bytes;
1813  n_written = tcp_prepare_retransmit_segment (wrk, tc, offset,
1814  max_bytes, &b);
1815  if (!n_written)
1816  goto done;
1817 
1818  sb->rescue_rxt = tc->snd_congestion;
1819  bi = vlib_get_buffer_index (vm, b);
1820  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1821  n_segs += 1;
1822  break;
1823  }
1824 
1825  max_bytes = clib_min (hole->end - sb->high_rxt, snd_space);
1826  max_bytes = snd_limited ? clib_min (max_bytes, tc->snd_mss) : max_bytes;
1827  if (max_bytes == 0)
1828  break;
1829 
1830  offset = sb->high_rxt - tc->snd_una;
1831  n_written = tcp_prepare_retransmit_segment (wrk, tc, offset, max_bytes,
1832  &b);
1833  ASSERT (n_written <= snd_space);
1834 
1835  /* Nothing left to retransmit */
1836  if (n_written == 0)
1837  break;
1838 
1839  bi = vlib_get_buffer_index (vm, b);
1840  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1841 
1842  sb->high_rxt += n_written;
1843  ASSERT (seq_leq (sb->high_rxt, tc->snd_nxt));
1844 
1845  snd_space -= n_written;
1846  n_segs += 1;
1847  }
1848 
1849  if (hole)
1851 
1852 done:
1853 
1854  transport_connection_tx_pacer_reset_bucket (&tc->connection, 0);
1855  return n_segs;
1856 }
1857 
1858 /**
1859  * Fast retransmit without SACK info
1860  */
1861 static int
1863  u32 burst_size)
1864 {
1865  u32 n_written = 0, offset = 0, bi, max_deq, n_segs_now, max_bytes;
1866  u32 burst_bytes, sent_bytes;
1867  vlib_main_t *vm = wrk->vm;
1868  int snd_space, n_segs = 0;
1869  u8 cc_limited = 0;
1870  vlib_buffer_t *b;
1871 
1873  TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
1874 
1875  burst_bytes = transport_connection_tx_pacer_burst (&tc->connection);
1876  burst_size = clib_min (burst_size, burst_bytes / tc->snd_mss);
1877  if (!burst_size)
1878  {
1880  return 0;
1881  }
1882 
1883  snd_space = tcp_available_cc_snd_space (tc);
1884  cc_limited = snd_space < burst_bytes;
1885 
1886  if (!tcp_fastrecovery_first (tc))
1887  goto send_unsent;
1888 
1889  /* RFC 6582: [If a partial ack], retransmit the first unacknowledged
1890  * segment. */
1891  while (snd_space > 0 && n_segs < burst_size)
1892  {
1893  max_bytes = clib_min (tc->snd_mss,
1894  tc->snd_congestion - tc->snd_una - offset);
1895  if (!max_bytes)
1896  break;
1897  n_written = tcp_prepare_retransmit_segment (wrk, tc, offset, max_bytes,
1898  &b);
1899 
1900  /* Nothing left to retransmit */
1901  if (n_written == 0)
1902  break;
1903 
1904  bi = vlib_get_buffer_index (vm, b);
1905  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1906  snd_space -= n_written;
1907  offset += n_written;
1908  n_segs += 1;
1909  }
1910 
1911  if (n_segs == burst_size)
1912  goto done;
1913 
1914 send_unsent:
1915 
1916  /* RFC 6582: Send a new segment if permitted by the new value of cwnd. */
1917  if (snd_space < tc->snd_mss || tc->snd_mss == 0)
1918  goto done;
1919 
1920  max_deq = transport_max_tx_dequeue (&tc->connection);
1921  max_deq -= tc->snd_nxt - tc->snd_una;
1922  if (max_deq)
1923  {
1924  snd_space = clib_min (max_deq, snd_space);
1925  burst_size = clib_min (burst_size - n_segs, snd_space / tc->snd_mss);
1926  n_segs_now = tcp_transmit_unsent (wrk, tc, burst_size);
1927  if (n_segs_now && max_deq > n_segs_now * tc->snd_mss)
1929  n_segs += n_segs_now;
1930  }
1931 
1932 done:
1934 
1935  sent_bytes = clib_min (n_segs * tc->snd_mss, burst_bytes);
1936  sent_bytes = cc_limited ? burst_bytes : sent_bytes;
1937  transport_connection_tx_pacer_update_bytes (&tc->connection, sent_bytes);
1938 
1939  return n_segs;
1940 }
1941 
1942 static int
1943 tcp_send_acks (tcp_connection_t * tc, u32 max_burst_size)
1944 {
1945  int j, n_acks;
1946 
1947  if (!tc->pending_dupacks)
1948  {
1949  if (tcp_in_cong_recovery (tc) || !tcp_max_tx_deq (tc)
1950  || tc->state != TCP_STATE_ESTABLISHED)
1951  {
1952  tcp_send_ack (tc);
1953  return 1;
1954  }
1955  return 0;
1956  }
1957 
1958  /* If we're supposed to send dupacks but have no ooo data
1959  * send only one ack */
1960  if (!vec_len (tc->snd_sacks))
1961  {
1962  tcp_send_ack (tc);
1963  tc->dupacks_out += 1;
1964  tc->pending_dupacks = 0;
1965  return 1;
1966  }
1967 
1968  /* Start with first sack block */
1969  tc->snd_sack_pos = 0;
1970 
1971  /* Generate enough dupacks to cover all sack blocks. Do not generate
1972  * more sacks than the number of packets received. But do generate at
1973  * least 3, i.e., the number needed to signal congestion, if needed. */
1974  n_acks = vec_len (tc->snd_sacks) / TCP_OPTS_MAX_SACK_BLOCKS;
1975  n_acks = clib_min (n_acks, tc->pending_dupacks);
1976  n_acks = clib_max (n_acks, clib_min (tc->pending_dupacks, 3));
1977  for (j = 0; j < clib_min (n_acks, max_burst_size); j++)
1978  tcp_send_ack (tc);
1979 
1980  if (n_acks < max_burst_size)
1981  {
1982  tc->pending_dupacks = 0;
1983  tc->snd_sack_pos = 0;
1984  tc->dupacks_out += n_acks;
1985  return n_acks;
1986  }
1987  else
1988  {
1989  TCP_DBG ("constrained by burst size");
1990  tc->pending_dupacks = n_acks - max_burst_size;
1991  tc->dupacks_out += max_burst_size;
1992  tcp_program_dupack (tc);
1993  return max_burst_size;
1994  }
1995 }
1996 
1997 static int
1999 {
2000  tcp_worker_ctx_t *wrk;
2001  u32 n_segs;
2002 
2003  if (PREDICT_FALSE (tc->state == TCP_STATE_CLOSED))
2004  return 0;
2005 
2006  wrk = tcp_get_worker (tc->c_thread_index);
2007 
2008  if (tcp_opts_sack_permitted (&tc->rcv_opts))
2009  n_segs = tcp_retransmit_sack (wrk, tc, max_burst_size);
2010  else
2011  n_segs = tcp_retransmit_no_sack (wrk, tc, max_burst_size);
2012 
2013  return n_segs;
2014 }
2015 
2016 int
2018 {
2019  tcp_connection_t *tc = (tcp_connection_t *) conn;
2020  u32 n_segs = 0;
2021 
2022  if (tcp_in_cong_recovery (tc) && (tc->flags & TCP_CONN_RXT_PENDING))
2023  {
2024  tc->flags &= ~TCP_CONN_RXT_PENDING;
2025  n_segs = tcp_do_retransmit (tc, sp->max_burst_size);
2026  }
2027 
2028  if (!(tc->flags & TCP_CONN_SNDACK))
2029  return n_segs;
2030 
2031  tc->flags &= ~TCP_CONN_SNDACK;
2032 
2033  /* We have retransmitted packets and no dupack */
2034  if (n_segs && !tc->pending_dupacks)
2035  return n_segs;
2036 
2037  if (sp->max_burst_size <= n_segs)
2038  {
2039  tcp_program_ack (tc);
2040  return n_segs;
2041  }
2042 
2043  n_segs += tcp_send_acks (tc, sp->max_burst_size - n_segs);
2044 
2045  return n_segs;
2046 }
2047 #endif /* CLIB_MARCH_VARIANT */
2048 
2049 static void
2051  u16 * next0, u32 * error0)
2052 {
2053  ip_adjacency_t *adj;
2054  adj_index_t ai;
2055 
2056  /* Not thread safe but as long as the connection exists the adj should
2057  * not be removed */
2058  ai = adj_nbr_find (FIB_PROTOCOL_IP6, VNET_LINK_IP6, &tc0->c_rmt_ip,
2059  tc0->sw_if_index);
2060  if (ai == ADJ_INDEX_INVALID)
2061  {
2062  vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
2063  *next0 = TCP_OUTPUT_NEXT_DROP;
2064  *error0 = TCP_ERROR_LINK_LOCAL_RW;
2065  return;
2066  }
2067 
2068  adj = adj_get (ai);
2070  *next0 = TCP_OUTPUT_NEXT_IP_REWRITE;
2071  else if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP)
2072  *next0 = TCP_OUTPUT_NEXT_IP_ARP;
2073  else
2074  {
2075  *next0 = TCP_OUTPUT_NEXT_DROP;
2076  *error0 = TCP_ERROR_LINK_LOCAL_RW;
2077  }
2078  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ai;
2079 }
2080 
2081 static void
2083  u32 * to_next, u32 n_bufs)
2084 {
2085  tcp_connection_t *tc;
2086  tcp_tx_trace_t *t;
2087  vlib_buffer_t *b;
2088  tcp_header_t *th;
2089  int i;
2090 
2091  for (i = 0; i < n_bufs; i++)
2092  {
2093  b = vlib_get_buffer (vm, to_next[i]);
2094  if (!(b->flags & VLIB_BUFFER_IS_TRACED))
2095  continue;
2096  th = vlib_buffer_get_current (b);
2097  tc = tcp_connection_get (vnet_buffer (b)->tcp.connection_index,
2098  vm->thread_index);
2099  t = vlib_add_trace (vm, node, b, sizeof (*t));
2100  clib_memcpy_fast (&t->tcp_header, th, sizeof (t->tcp_header));
2101  clib_memcpy_fast (&t->tcp_connection, tc, sizeof (t->tcp_connection));
2102  }
2103 }
2104 
2105 always_inline void
2107  tcp_connection_t * tc0, u8 is_ip4)
2108 {
2109  TCP_EVT (TCP_EVT_OUTPUT, tc0,
2111  b0->current_length);
2112 
2113  if (is_ip4)
2114  vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, &tc0->c_rmt_ip4,
2115  IP_PROTOCOL_TCP, tcp_csum_offload (tc0));
2116  else
2117  vlib_buffer_push_ip6_custom (vm, b0, &tc0->c_lcl_ip6, &tc0->c_rmt_ip6,
2118  IP_PROTOCOL_TCP, tc0->ipv6_flow_label);
2119 }
2120 
2121 always_inline void
2123 {
2124  if (PREDICT_TRUE (!(tc->cfg_flags & TCP_CFG_F_TSO)))
2125  return;
2126 
2127  u16 data_len = b->current_length - sizeof (tcp_header_t) - tc->snd_opts_len;
2128 
2129  if (PREDICT_FALSE (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID))
2131 
2132  if (PREDICT_TRUE (data_len <= tc->snd_mss))
2133  return;
2134  else
2135  {
2136  ASSERT ((b->flags & VNET_BUFFER_F_L3_HDR_OFFSET_VALID) != 0);
2137  ASSERT ((b->flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID) != 0);
2138  b->flags |= VNET_BUFFER_F_GSO;
2139  vnet_buffer2 (b)->gso_l4_hdr_sz =
2140  sizeof (tcp_header_t) + tc->snd_opts_len;
2141  vnet_buffer2 (b)->gso_size = tc->snd_mss;
2142  }
2143 }
2144 
2145 always_inline void
2147  vlib_node_runtime_t * error_node, u16 * next0,
2148  u8 is_ip4)
2149 {
2150  /* If next_index is not drop use it */
2151  if (tc0->next_node_index)
2152  {
2153  *next0 = tc0->next_node_index;
2154  vnet_buffer (b0)->tcp.next_node_opaque = tc0->next_node_opaque;
2155  }
2156  else
2157  {
2158  *next0 = TCP_OUTPUT_NEXT_IP_LOOKUP;
2159  }
2160 
2161  vnet_buffer (b0)->sw_if_index[VLIB_TX] = tc0->c_fib_index;
2162  vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
2163 
2164  if (!is_ip4)
2165  {
2166  u32 error0 = 0;
2167 
2168  if (PREDICT_FALSE (ip6_address_is_link_local_unicast (&tc0->c_rmt_ip6)))
2169  tcp_output_handle_link_local (tc0, b0, next0, &error0);
2170 
2171  if (PREDICT_FALSE (error0))
2172  {
2173  b0->error = error_node->errors[error0];
2174  return;
2175  }
2176  }
2177 
2178  tc0->segs_out += 1;
2179 }
2180 
2183  vlib_frame_t * frame, int is_ip4)
2184 {
2185  u32 n_left_from, *from, thread_index = vm->thread_index;
2186  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
2187  u16 nexts[VLIB_FRAME_SIZE], *next;
2188 
2189  from = vlib_frame_vector_args (frame);
2190  n_left_from = frame->n_vectors;
2191  tcp_set_time_now (tcp_get_worker (thread_index));
2192 
2194  tcp46_output_trace_frame (vm, node, from, n_left_from);
2195 
2196  vlib_get_buffers (vm, from, bufs, n_left_from);
2197  b = bufs;
2198  next = nexts;
2199 
2200  while (n_left_from >= 4)
2201  {
2202  tcp_connection_t *tc0, *tc1;
2203 
2204  {
2205  vlib_prefetch_buffer_header (b[2], STORE);
2206  CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
2207 
2208  vlib_prefetch_buffer_header (b[3], STORE);
2209  CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
2210  }
2211 
2212  tc0 = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
2213  thread_index);
2214  tc1 = tcp_connection_get (vnet_buffer (b[1])->tcp.connection_index,
2215  thread_index);
2216 
2217  if (PREDICT_TRUE (!tc0 + !tc1 == 0))
2218  {
2219  tcp_output_push_ip (vm, b[0], tc0, is_ip4);
2220  tcp_output_push_ip (vm, b[1], tc1, is_ip4);
2221 
2222  tcp_check_if_gso (tc0, b[0]);
2223  tcp_check_if_gso (tc1, b[1]);
2224 
2225  tcp_output_handle_packet (tc0, b[0], node, &next[0], is_ip4);
2226  tcp_output_handle_packet (tc1, b[1], node, &next[1], is_ip4);
2227  }
2228  else
2229  {
2230  if (tc0 != 0)
2231  {
2232  tcp_output_push_ip (vm, b[0], tc0, is_ip4);
2233  tcp_check_if_gso (tc0, b[0]);
2234  tcp_output_handle_packet (tc0, b[0], node, &next[0], is_ip4);
2235  }
2236  else
2237  {
2238  b[0]->error = node->errors[TCP_ERROR_INVALID_CONNECTION];
2239  next[0] = TCP_OUTPUT_NEXT_DROP;
2240  }
2241  if (tc1 != 0)
2242  {
2243  tcp_output_push_ip (vm, b[1], tc1, is_ip4);
2244  tcp_check_if_gso (tc1, b[1]);
2245  tcp_output_handle_packet (tc1, b[1], node, &next[1], is_ip4);
2246  }
2247  else
2248  {
2249  b[1]->error = node->errors[TCP_ERROR_INVALID_CONNECTION];
2250  next[1] = TCP_OUTPUT_NEXT_DROP;
2251  }
2252  }
2253 
2254  b += 2;
2255  next += 2;
2256  n_left_from -= 2;
2257  }
2258  while (n_left_from > 0)
2259  {
2260  tcp_connection_t *tc0;
2261 
2262  if (n_left_from > 1)
2263  {
2264  vlib_prefetch_buffer_header (b[1], STORE);
2265  CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
2266  }
2267 
2268  tc0 = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
2269  thread_index);
2270 
2271  if (PREDICT_TRUE (tc0 != 0))
2272  {
2273  tcp_output_push_ip (vm, b[0], tc0, is_ip4);
2274  tcp_check_if_gso (tc0, b[0]);
2275  tcp_output_handle_packet (tc0, b[0], node, &next[0], is_ip4);
2276  }
2277  else
2278  {
2279  b[0]->error = node->errors[TCP_ERROR_INVALID_CONNECTION];
2280  next[0] = TCP_OUTPUT_NEXT_DROP;
2281  }
2282 
2283  b += 1;
2284  next += 1;
2285  n_left_from -= 1;
2286  }
2287 
2288  vlib_buffer_enqueue_to_next (vm, node, from, nexts, frame->n_vectors);
2289  vlib_node_increment_counter (vm, tcp_node_index (output, is_ip4),
2290  TCP_ERROR_PKTS_SENT, frame->n_vectors);
2291  return frame->n_vectors;
2292 }
2293 
2295  vlib_frame_t * from_frame)
2296 {
2297  return tcp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */ );
2298 }
2299 
2301  vlib_frame_t * from_frame)
2302 {
2303  return tcp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ );
2304 }
2305 
2306 /* *INDENT-OFF* */
2308 {
2309  .name = "tcp4-output",
2310  /* Takes a vector of packets. */
2311  .vector_size = sizeof (u32),
2312  .n_errors = TCP_N_ERROR,
2313  .protocol_hint = VLIB_NODE_PROTO_HINT_TCP,
2314  .error_strings = tcp_error_strings,
2315  .n_next_nodes = TCP_OUTPUT_N_NEXT,
2316  .next_nodes = {
2317 #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
2319 #undef _
2320  },
2321  .format_buffer = format_tcp_header,
2322  .format_trace = format_tcp_tx_trace,
2323 };
2324 /* *INDENT-ON* */
2325 
2326 /* *INDENT-OFF* */
2328 {
2329  .name = "tcp6-output",
2330  /* Takes a vector of packets. */
2331  .vector_size = sizeof (u32),
2332  .n_errors = TCP_N_ERROR,
2333  .protocol_hint = VLIB_NODE_PROTO_HINT_TCP,
2334  .error_strings = tcp_error_strings,
2335  .n_next_nodes = TCP_OUTPUT_N_NEXT,
2336  .next_nodes = {
2337 #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
2339 #undef _
2340  },
2341  .format_buffer = format_tcp_header,
2342  .format_trace = format_tcp_tx_trace,
2343 };
2344 /* *INDENT-ON* */
2345 
2346 typedef enum _tcp_reset_next
2347 {
2352 
2353 #define foreach_tcp4_reset_next \
2354  _(DROP, "error-drop") \
2355  _(IP_LOOKUP, "ip4-lookup")
2356 
2357 #define foreach_tcp6_reset_next \
2358  _(DROP, "error-drop") \
2359  _(IP_LOOKUP, "ip6-lookup")
2360 
2361 static uword
2363  vlib_frame_t * from_frame, u8 is_ip4)
2364 {
2365  u32 error0 = TCP_ERROR_RST_SENT, next0 = TCP_RESET_NEXT_IP_LOOKUP;
2366  u32 n_left_from, next_index, *from, *to_next;
2367 
2368  from = vlib_frame_vector_args (from_frame);
2369  n_left_from = from_frame->n_vectors;
2370 
2371  next_index = node->cached_next_index;
2372 
2373  while (n_left_from > 0)
2374  {
2375  u32 n_left_to_next;
2376 
2377  vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2378 
2379  while (n_left_from > 0 && n_left_to_next > 0)
2380  {
2381  vlib_buffer_t *b0;
2382  tcp_tx_trace_t *t0;
2383  tcp_header_t *th0;
2384  u32 bi0;
2385 
2386  bi0 = from[0];
2387  to_next[0] = bi0;
2388  from += 1;
2389  to_next += 1;
2390  n_left_from -= 1;
2391  n_left_to_next -= 1;
2392 
2393  b0 = vlib_get_buffer (vm, bi0);
2394  tcp_make_reset_in_place (vm, b0, is_ip4);
2395 
2396  /* Prepare to send to IP lookup */
2397  vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
2398 
2399  b0->error = node->errors[error0];
2400  b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
2401  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
2402  {
2403  th0 = vlib_buffer_get_current (b0);
2404  if (is_ip4)
2405  th0 = ip4_next_header ((ip4_header_t *) th0);
2406  else
2407  th0 = ip6_next_header ((ip6_header_t *) th0);
2408  t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
2409  clib_memcpy_fast (&t0->tcp_header, th0,
2410  sizeof (t0->tcp_header));
2411  }
2412 
2413  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
2414  n_left_to_next, bi0, next0);
2415  }
2416  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2417  }
2418  return from_frame->n_vectors;
2419 }
2420 
2422  vlib_frame_t * from_frame)
2423 {
2424  return tcp46_send_reset_inline (vm, node, from_frame, 1);
2425 }
2426 
2428  vlib_frame_t * from_frame)
2429 {
2430  return tcp46_send_reset_inline (vm, node, from_frame, 0);
2431 }
2432 
2433 /* *INDENT-OFF* */
2435  .name = "tcp4-reset",
2436  .vector_size = sizeof (u32),
2437  .n_errors = TCP_N_ERROR,
2438  .error_strings = tcp_error_strings,
2439  .n_next_nodes = TCP_RESET_N_NEXT,
2440  .next_nodes = {
2441 #define _(s,n) [TCP_RESET_NEXT_##s] = n,
2443 #undef _
2444  },
2445  .format_trace = format_tcp_tx_trace,
2446 };
2447 /* *INDENT-ON* */
2448 
2449 /* *INDENT-OFF* */
2451  .name = "tcp6-reset",
2452  .vector_size = sizeof (u32),
2453  .n_errors = TCP_N_ERROR,
2454  .error_strings = tcp_error_strings,
2455  .n_next_nodes = TCP_RESET_N_NEXT,
2456  .next_nodes = {
2457 #define _(s,n) [TCP_RESET_NEXT_##s] = n,
2459 #undef _
2460  },
2461  .format_trace = format_tcp_tx_trace,
2462 };
2463 /* *INDENT-ON* */
2464 
2465 /*
2466  * fd.io coding-style-patch-verification: ON
2467  *
2468  * Local Variables:
2469  * eval: (c-set-style "gnu")
2470  * End:
2471  */
u32 flags
buffer flags: VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index, VLIB_BUFFER_IS_TRACED: trace this buffer.
Definition: buffer.h:124
static void tcp_check_if_gso(tcp_connection_t *tc, vlib_buffer_t *b)
Definition: tcp_output.c:2122
static void tcp_check_sack_reneging(tcp_connection_t *tc)
Definition: tcp_output.c:1267
#define TCP_RXT_MAX_BURST
Definition: tcp_types.h:35
#define clib_min(x, y)
Definition: clib.h:328
static int tcp_send_acks(tcp_connection_t *tc, u32 max_burst_size)
Definition: tcp_output.c:1943
u16 ip4_tcp_compute_checksum_custom(vlib_main_t *vm, vlib_buffer_t *p0, ip46_address_t *src, ip46_address_t *dst)
Definition: tcp_output.c:396
#define CLIB_UNUSED(x)
Definition: clib.h:87
static void tcp_persist_timer_set(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
Definition: tcp_timer.h:69
#define TCP_OPTION_LEN_SACK_PERMITTED
Definition: tcp_packet.h:166
static u32 tcp_time_now(void)
Definition: tcp_inlines.h:191
void tcp_timer_persist_handler(tcp_connection_t *tc)
Got 0 snd_wnd from peer, try to do something about it.
Definition: tcp_output.c:1516
vl_api_wireguard_peer_flags_t flags
Definition: wireguard.api:105
static void vlib_buffer_free(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Free buffers Frees the entire buffer chain for each buffer.
Definition: buffer_funcs.h:937
static tcp_connection_t * tcp_connection_get(u32 conn_index, u32 thread_index)
Definition: tcp_inlines.h:30
ip4_address_t src_address
Definition: ip4_packet.h:125
#define tcp_node_index(node_id, is_ip4)
Definition: tcp.h:272
int session_tx_fifo_peek_bytes(transport_connection_t *tc, u8 *buffer, u32 offset, u32 max_bytes)
Definition: session.c:580
void session_queue_run_on_main_thread(vlib_main_t *vm)
Definition: session.c:1661
#define vnet_buffer2(b)
Definition: buffer.h:481
#define TCP_FLAG_SYN
Definition: fa_node.h:13
static void tcp_make_synack(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to SYN-ACK.
Definition: tcp_output.c:521
#define tcp_opts_tstamp(_to)
Definition: tcp_packet.h:156
#define PREDICT_TRUE(x)
Definition: clib.h:122
i16 current_data
signed offset in data[], pre_data[] that we are currently processing.
Definition: buffer.h:110
vl_api_ip_port_and_mask_t dst_port
Definition: flow_types.api:92
void tcp_timer_retransmit_syn_handler(tcp_connection_t *tc)
SYN retransmit timer handler.
Definition: tcp_output.c:1452
#define clib_memcpy_fast(a, b, c)
Definition: string.h:81
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
static int tcp_do_retransmit(tcp_connection_t *tc, u32 max_burst_size)
Definition: tcp_output.c:1998
IP unicast adjacency.
Definition: adj.h:235
u32 fib_table_get_index_for_sw_if_index(fib_protocol_t proto, u32 sw_if_index)
Get the index of the FIB bound to the interface.
Definition: fib_table.c:998
#define tcp_fastrecovery_first_off(tc)
Definition: tcp_types.h:424
static u32 tcp_options_write(u8 *data, tcp_options_t *opts)
Write TCP options to segment.
Definition: tcp_packet.h:309
void session_add_self_custom_tx_evt(transport_connection_t *tc, u8 has_prio)
Definition: session.c:122
void tcp_timer_retransmit_handler(tcp_connection_t *tc)
Definition: tcp_output.c:1306
struct _tcp_main tcp_main_t
u32 thread_index
Definition: main.h:250
static void * vlib_buffer_push_tcp_net_order(vlib_buffer_t *b, u16 sp, u16 dp, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
Definition: tcp_inlines.h:401
static sack_scoreboard_hole_t * scoreboard_get_hole(sack_scoreboard_t *sb, u32 index)
Definition: tcp_sack.h:35
void tcp_connection_timers_reset(tcp_connection_t *tc)
Stop all connection timers.
Definition: tcp.c:493
This packet is to be rewritten and forwarded to the next processing node.
Definition: adj.h:73
u16 current_length
Nbytes between current data and the end of this buffer.
Definition: buffer.h:113
static int tcp_transmit_unsent(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Definition: tcp_output.c:1618
struct _tcp_connection tcp_connection_t
#define TCP_OPTS_ALIGN
Definition: tcp_packet.h:173
static u32 tcp_set_time_now(tcp_worker_ctx_t *wrk)
Definition: tcp_inlines.h:219
static u32 tcp_initial_wnd_unscaled(tcp_connection_t *tc)
TCP&#39;s initial window.
Definition: tcp_output.c:85
enum _tcp_output_next tcp_output_next_t
vl_api_address_t src
Definition: gre.api:54
static u32 format_get_indent(u8 *s)
Definition: format.h:72
static void tcp_cc_congestion(tcp_connection_t *tc)
Definition: tcp_cc.h:36
uword ip_csum_t
Definition: ip_packet.h:246
vlib_main_t * vm
Definition: in2out_ed.c:1580
static ip_csum_t ip_csum_with_carry(ip_csum_t sum, ip_csum_t x)
Definition: ip_packet.h:249
#define TCP_RTO_SYN_RETRIES
Definition: tcp_types.h:89
#define VLIB_NODE_FN(node)
Definition: node.h:203
static void tcp_push_ip_hdr(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, vlib_buffer_t *b)
Definition: tcp_output.c:785
static void * vlib_buffer_push_ip6_custom(vlib_main_t *vm, vlib_buffer_t *b, ip6_address_t *src, ip6_address_t *dst, int proto, u32 flow_label)
Push IPv6 header to buffer.
Definition: ip6_inlines.h:207
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
Definition: vec.h:520
static uword tcp46_send_reset_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame, u8 is_ip4)
Definition: tcp_output.c:2362
vlib_error_t * errors
Vector of errors for this node.
Definition: node.h:470
static uword vlib_buffer_length_in_chain(vlib_main_t *vm, vlib_buffer_t *b)
Get length in bytes of the buffer chain.
Definition: buffer_funcs.h:402
static u8 tcp_is_descheduled(tcp_connection_t *tc)
Definition: tcp_inlines.h:380
u8 n_sack_blocks
Number of SACKs blocks.
Definition: tcp_packet.h:151
struct _sack_scoreboard sack_scoreboard_t
struct _tcp_header tcp_header_t
int tcp_half_open_connection_cleanup(tcp_connection_t *tc)
Try to cleanup half-open connection.
Definition: tcp.c:209
#define scoreboard_rescue_rxt_valid(_sb, _tc)
Definition: tcp_output.c:1700
ip6_address_t src_address
Definition: ip6_packet.h:310
#define tcp_in_cong_recovery(tc)
Definition: tcp_types.h:426
unsigned char u8
Definition: types.h:56
u8 wscale
Option flags, see above.
Definition: tcp_packet.h:146
u8 data[128]
Definition: ipsec_types.api:90
enum fib_protocol_t_ fib_protocol_t
Protocol Type.
#define TCP_OPTS_MAX_SACK_BLOCKS
Definition: tcp_packet.h:174
double f64
Definition: types.h:142
#define foreach_tcp4_reset_next
Definition: tcp_output.c:2353
static u32 tcp_prepare_retransmit_segment(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 offset, u32 max_deq_bytes, vlib_buffer_t **b)
Build a retransmit segment.
Definition: tcp_output.c:1224
static u16 ip_calculate_l4_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip_csum_t sum0, u32 payload_length, u8 *iph, u32 ip_header_size, u8 *l4h)
Definition: ip.h:184
int tcp_session_custom_tx(void *conn, transport_send_params_t *sp)
Definition: tcp_output.c:2017
void session_transport_closing_notify(transport_connection_t *tc)
Notification from transport that connection is being closed.
Definition: session.c:975
static uword tcp46_output_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, int is_ip4)
Definition: tcp_output.c:2182
static void * tcp_init_buffer(vlib_main_t *vm, vlib_buffer_t *b)
Definition: tcp_output.c:355
static ip_adjacency_t * adj_get(adj_index_t adj_index)
Get a pointer to an adjacency object from its index.
Definition: adj.h:467
void tcp_make_syn(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to SYN.
Definition: tcp_output.c:495
static int tcp_prepare_segment(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 offset, u32 max_deq_bytes, vlib_buffer_t **b)
Allocate a new buffer and build a new tcp segment.
Definition: tcp_output.c:1108
#define tcp_is_fin(_th)
Definition: tcp_packet.h:90
static void tcp_make_fin(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to FIN-ACK.
Definition: tcp_output.c:486
#define tcp_cfg
Definition: tcp.h:271
#define TCP_OPTION_LEN_SACK_BLOCK
Definition: tcp_packet.h:168
ip4_address_t dst_address
Definition: ip4_packet.h:125
#define seq_leq(_s1, _s2)
Definition: tcp_packet.h:179
#define TCP_FLAG_ACK
Definition: fa_node.h:16
u8 * format_white_space(u8 *s, va_list *va)
Definition: std-formats.c:129
description fragment has unexpected format
Definition: map.api:433
tcp_main_t tcp_main
Definition: tcp.c:28
#define vlib_prefetch_buffer_header(b, type)
Prefetch buffer metadata.
Definition: buffer.h:207
#define TCP_RTO_MAX
Definition: tcp_types.h:86
const cJSON *const b
Definition: cJSON.h:255
static void * ip4_next_header(ip4_header_t *i)
Definition: ip4_packet.h:196
#define tcp_zero_rwnd_sent(tc)
Definition: tcp_types.h:438
sack_block_t * sacks
SACK blocks.
Definition: tcp_packet.h:150
unsigned int u32
Definition: types.h:88
static sack_scoreboard_hole_t * scoreboard_first_hole(sack_scoreboard_t *sb)
Definition: tcp_sack.h:59
static tcp_header_t * tcp_buffer_hdr(vlib_buffer_t *b)
Definition: tcp_inlines.h:22
static void tcp46_output_trace_frame(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *to_next, u32 n_bufs)
Definition: tcp_output.c:2082
#define VLIB_FRAME_SIZE
Definition: node.h:378
#define tcp_validate_txf_size(_tc, _a)
Definition: tcp.h:354
static void tcp_push_hdr_i(tcp_connection_t *tc, vlib_buffer_t *b, u32 snd_nxt, u8 compute_opts, u8 maybe_burst, u8 update_snd_nxt)
Push TCP header and update connection variables.
Definition: tcp_output.c:922
#define tcp_in_fastrecovery(tc)
Definition: tcp_types.h:416
#define tcp_csum_offload(tc)
Definition: tcp_types.h:436
static u32 vlib_get_buffer_index(vlib_main_t *vm, void *p)
Translate buffer pointer into buffer index.
Definition: buffer_funcs.h:293
static void tcp_retransmit_timer_update(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
Definition: tcp_timer.h:96
u32 tcp_session_push_header(transport_connection_t *tconn, vlib_buffer_t *b)
Definition: tcp_output.c:995
static void * vlib_buffer_push_ip6(vlib_main_t *vm, vlib_buffer_t *b, ip6_address_t *src, ip6_address_t *dst, int proto)
Push IPv6 header to buffer.
Definition: ip6_inlines.h:250
#define TCP_OPTION_LEN_WINDOW_SCALE
Definition: tcp_packet.h:165
vlib_node_registration_t tcp6_reset_node
(constructor) VLIB_REGISTER_NODE (tcp6_reset_node)
Definition: tcp_output.c:2450
void scoreboard_clear_reneging(sack_scoreboard_t *sb, u32 start, u32 end)
Definition: tcp_sack.c:297
vlib_error_t error
Error code for buffers to be enqueued to error handler.
Definition: buffer.h:136
#define tcp_trajectory_add_start(b, start)
Definition: tcp.h:294
#define TRANSPORT_MAX_HDRS_LEN
static void tcp_retransmit_timer_set(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
Definition: tcp_timer.h:55
vlib_main_t * vm
convenience pointer to this thread&#39;s vlib main
Definition: tcp.h:92
void tcp_send_reset(tcp_connection_t *tc)
Build and set reset packet for connection.
Definition: tcp_output.c:749
void tcp_send_synack(tcp_connection_t *tc)
Definition: tcp_output.c:844
#define ADJ_INDEX_INVALID
Invalid ADJ index - used when no adj is known likewise blazoned capitals INVALID speak volumes where ...
Definition: adj_types.h:36
static int tcp_make_synack_options(tcp_connection_t *tc, tcp_options_t *opts)
Definition: tcp_output.c:195
static int tcp_make_syn_options(tcp_connection_t *tc, tcp_options_t *opts)
Definition: tcp_output.c:166
static void * vlib_buffer_make_headroom(vlib_buffer_t *b, u8 size)
Make head room, typically for packet headers.
Definition: buffer.h:354
static int tcp_retransmit_sack(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Do retransmit with SACKs.
Definition: tcp_output.c:1708
void tcp_connection_tx_pacer_reset(tcp_connection_t *tc, u32 window, u32 start_bucket)
Definition: tcp.c:1209
static void tcp_cc_loss(tcp_connection_t *tc)
Definition: tcp_cc.h:42
format_function_t format_tcp_connection_id
Definition: tcp.h:352
static __clib_warn_unused_result u32 vlib_buffer_alloc(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Allocate buffers into supplied array.
Definition: buffer_funcs.h:677
unsigned short u16
Definition: types.h:57
#define TCP_DUPACK_THRESHOLD
Definition: tcp_types.h:37
u8 data_len
Definition: ikev2_types.api:24
static void * vlib_buffer_get_current(vlib_buffer_t *b)
Get pointer to current data to process.
Definition: buffer.h:233
#define TCP_ESTABLISH_TIME
Definition: tcp_types.h:92
static void tcp_output_handle_link_local(tcp_connection_t *tc0, vlib_buffer_t *b0, u16 *next0, u32 *error0)
Definition: tcp_output.c:2050
tcp_timer_wheel_t timer_wheel
worker timer wheel
Definition: tcp.h:118
#define foreach_tcp6_output_next
Definition: tcp_output.c:37
#define PREDICT_FALSE(x)
Definition: clib.h:121
#define always_inline
Definition: ipsec.h:28
void tcp_program_dupack(tcp_connection_t *tc)
Definition: tcp_output.c:1053
#define TCP_FLAG_FIN
Definition: fa_node.h:12
int tcp_fastrecovery_prr_snd_space(tcp_connection_t *tc)
Estimate send space using proportional rate reduction (RFC6937)
Definition: tcp_output.c:1657
static u8 tcp_window_compute_scale(u32 window)
Definition: tcp_output.c:73
#define vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, bi0, next0)
Finish enqueueing one buffer forward in the graph.
Definition: buffer_node.h:224
vl_api_address_t dst
Definition: gre.api:55
#define vlib_get_next_frame(vm, node, next_index, vectors, n_vectors_left)
Get pointer to next frame vector data by (vlib_node_runtime_t, next_index).
Definition: node_funcs.h:391
#define TCP_OPTION_LEN_TIMESTAMP
Definition: tcp_packet.h:167
#define foreach_tcp4_output_next
Definition: tcp_output.c:31
#define TCP_WND_MAX
Definition: tcp_packet.h:171
static void tcp_enqueue_to_ip_lookup(tcp_worker_ctx_t *wrk, vlib_buffer_t *b, u32 bi, u8 is_ip4, u32 fib_index)
Definition: tcp_output.c:543
static void vlib_node_increment_counter(vlib_main_t *vm, u32 node_index, u32 counter_index, u64 increment)
Definition: node_funcs.h:1231
#define TCP_FLAG_RST
Definition: fa_node.h:14
#define TCP_DBG(_fmt, _args...)
Definition: tcp_debug.h:146
u8 len
Definition: ip_types.api:103
#define TCP_MAX_WND_SCALE
Definition: tcp_packet.h:172
void tcp_program_cleanup(tcp_worker_ctx_t *wrk, tcp_connection_t *tc)
Definition: tcp.c:335
static void tcp_output_handle_packet(tcp_connection_t *tc0, vlib_buffer_t *b0, vlib_node_runtime_t *error_node, u16 *next0, u8 is_ip4)
Definition: tcp_output.c:2146
vl_api_ip_port_and_mask_t src_port
Definition: flow_types.api:91
static void tcp_cc_event(tcp_connection_t *tc, tcp_cc_event_t evt)
Definition: tcp_cc.h:61
void transport_connection_reschedule(transport_connection_t *tc)
Definition: transport.c:766
static u32 tcp_flight_size(const tcp_connection_t *tc)
Our estimate of the number of bytes in flight (pipe size)
Definition: tcp_inlines.h:94
This packet matches an "incomplete adjacency" and packets need to be passed to ARP to find rewrite st...
Definition: adj.h:63
void tcp_bt_track_tx(tcp_connection_t *tc, u32 len)
Track a tcp tx burst.
Definition: tcp_bt.c:301
#define VLIB_REGISTER_NODE(x,...)
Definition: node.h:170
tcp_header_t tcp_header
Definition: tcp_output.c:51
u16 n_vectors
Definition: node.h:397
static sack_scoreboard_hole_t * scoreboard_last_hole(sack_scoreboard_t *sb)
Definition: tcp_sack.h:67
#define CLIB_PREFETCH(addr, size, type)
Definition: cache.h:80
static_always_inline void vlib_buffer_enqueue_to_next(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *buffers, u16 *nexts, uword count)
Definition: buffer_node.h:339
void tcp_send_window_update_ack(tcp_connection_t *tc)
Send window update ack.
Definition: tcp_output.c:1082
sll srl srl sll sra u16x4 i
Definition: vector_sse42.h:317
void tcp_program_retransmit(tcp_connection_t *tc)
Definition: tcp_output.c:1065
void tcp_send_reset_w_pkt(tcp_connection_t *tc, vlib_buffer_t *pkt, u32 thread_index, u8 is_ip4)
Send reset without reusing existing buffer.
Definition: tcp_output.c:661
format_function_t format_tcp_state
Definition: tcp.h:347
static void tcp_update_rto(tcp_connection_t *tc)
Definition: tcp_inlines.h:373
#define clib_warning(format, args...)
Definition: error.h:59
#define tcp_in_recovery(tc)
Definition: tcp_types.h:417
format_function_t format_tcp_header
Definition: format.h:100
struct _transport_connection transport_connection_t
#define TCP_TO_TIMER_TICK
Factor for converting ticks to timer ticks.
Definition: tcp_types.h:82
static u32 tcp_window_to_advertise(tcp_connection_t *tc, tcp_state_t state)
Compute and return window to advertise, scaled as per RFC1323.
Definition: tcp_output.c:156
u32 adj_index_t
An index for adjacencies.
Definition: adj_types.h:30
#define ARRAY_LEN(x)
Definition: clib.h:67
void vlib_put_next_frame(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, u32 n_vectors_left)
Release pointer to next frame vector data.
Definition: main.c:483
#define tcp_zero_rwnd_sent_on(tc)
Definition: tcp_types.h:439
u16 mss
Maximum segment size advertised.
Definition: tcp_packet.h:147
static u32 tcp_available_cc_snd_space(const tcp_connection_t *tc)
Estimate of how many bytes we can still push into the network.
Definition: tcp_inlines.h:171
static void * ip6_next_header(ip6_header_t *i)
Definition: ip6_packet.h:376
static int tcp_retransmit_no_sack(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Fast retransmit without SACK info.
Definition: tcp_output.c:1862
vlib_main_t vlib_node_runtime_t * node
Definition: in2out_ed.c:1580
static void * vlib_buffer_push_tcp(vlib_buffer_t *b, u16 sp_net, u16 dp_net, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
Definition: tcp_inlines.h:438
static void tcp_make_ack(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to ACK.
Definition: tcp_output.c:475
static u32 transport_max_tx_dequeue(transport_connection_t *tc)
Definition: session.h:509
#define seq_geq(_s1, _s2)
Definition: tcp_packet.h:181
u16 ip6_tcp_udp_icmp_compute_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip6_header_t *ip0, int *bogus_lengthp)
Definition: ip6_forward.c:1099
signed int i32
Definition: types.h:77
static int tcp_make_established_options(tcp_connection_t *tc, tcp_options_t *opts)
Definition: tcp_output.c:230
void tcp_bt_check_app_limited(tcp_connection_t *tc)
Check if sample to be generated is app limited.
Definition: tcp_bt.c:286
u16 cached_next_index
Next frame index that vector arguments were last enqueued to last time this node ran.
Definition: node.h:511
#define ASSERT(truth)
static void tcp_cc_init_rxt_timeout(tcp_connection_t *tc)
Reset congestion control, switch cwnd to loss window and try again.
Definition: tcp_output.c:1283
static void tcp_output_push_ip(vlib_main_t *vm, vlib_buffer_t *b0, tcp_connection_t *tc0, u8 is_ip4)
Definition: tcp_output.c:2106
#define tcp_recovery_on(tc)
Definition: tcp_types.h:414
static u8 * format_tcp_tx_trace(u8 *s, va_list *args)
Definition: tcp_output.c:56
u16 ip4_tcp_udp_compute_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip4_header_t *ip0)
Definition: ip4_forward.c:1328
void transport_connection_tx_pacer_reset_bucket(transport_connection_t *tc, u32 bucket)
Reset tx pacer bucket.
Definition: transport.c:714
void tcp_bt_track_rxt(tcp_connection_t *tc, u32 start, u32 end)
Track a tcp retransmission.
Definition: tcp_bt.c:338
void tcp_update_burst_snd_vars(tcp_connection_t *tc)
Update burst send vars.
Definition: tcp_output.c:300
#define TRANSPORT_PACER_MIN_BURST
Definition: transport.h:23
#define seq_gt(_s1, _s2)
Definition: tcp_packet.h:180
static uword ip6_address_is_link_local_unicast(const ip6_address_t *a)
Definition: ip6_packet.h:253
#define clib_mem_unaligned(pointer, type)
Definition: types.h:155
static void tcp_update_rcv_wnd(tcp_connection_t *tc)
Definition: tcp_output.c:116
struct _sack_scoreboard_hole sack_scoreboard_hole_t
#define TCP_RTO_BOFF_MAX
Definition: tcp_types.h:91
void tcp_send_fin(tcp_connection_t *tc)
Send FIN.
Definition: tcp_output.c:873
#define clib_max(x, y)
Definition: clib.h:321
void tcp_send_ack(tcp_connection_t *tc)
Definition: tcp_output.c:1023
void transport_connection_tx_pacer_update_bytes(transport_connection_t *tc, u32 bytes)
Definition: transport.c:759
int tcp_retransmit_first_unacked(tcp_worker_ctx_t *wrk, tcp_connection_t *tc)
Retransmit first unacked segment.
Definition: tcp_output.c:1599
template key/value backing page structure
Definition: bihash_doc.h:44
#define tcp_is_syn(_th)
Definition: tcp_packet.h:89
u32 ip_version_traffic_class_and_flow_label
Definition: ip6_packet.h:297
#define tcp_opts_wscale(_to)
Definition: tcp_packet.h:157
Definition: defs.h:47
u32 tsval
Timestamp value.
Definition: tcp_packet.h:148
static void tcp_timer_update(tcp_timer_wheel_t *tw, tcp_connection_t *tc, u8 timer_id, u32 interval)
Definition: tcp_timer.h:43
u32 tsecr
Echoed/reflected time stamp.
Definition: tcp_packet.h:149
#define tcp_fastrecovery_first(tc)
Definition: tcp_types.h:422
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
static u8 tcp_max_tx_deq(tcp_connection_t *tc)
Definition: tcp_output.c:1694
ip_lookup_next_t lookup_next_index
Next hop after ip4-lookup.
Definition: adj.h:337
u32 next_buffer
Next buffer for this linked-list of buffers.
Definition: buffer.h:140
vlib_main_t vlib_node_runtime_t vlib_frame_t * frame
Definition: in2out_ed.c:1581
#define foreach_tcp6_reset_next
Definition: tcp_output.c:2357
#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b)
Definition: buffer.h:497
static tcp_worker_ctx_t * tcp_get_worker(u32 thread_index)
Definition: tcp.h:282
void session_transport_closed_notify(transport_connection_t *tc)
Notification from transport that it is closed.
Definition: session.c:1063
VLIB buffer representation.
Definition: buffer.h:102
u64 uword
Definition: types.h:112
int session_stream_connect_notify(transport_connection_t *tc, session_error_t err)
Definition: session.c:784
static void * vlib_frame_vector_args(vlib_frame_t *f)
Get pointer to frame vector data.
Definition: node_funcs.h:297
static void tcp_make_ack_i(tcp_connection_t *tc, vlib_buffer_t *b, tcp_state_t state, u8 flags)
Prepare ACK.
Definition: tcp_output.c:442
static int tcp_make_reset_in_place(vlib_main_t *vm, vlib_buffer_t *b, u8 is_ip4)
Definition: tcp_output.c:578
#define TCP_OPTION_LEN_MSS
Definition: tcp_packet.h:164
sack_scoreboard_hole_t * scoreboard_next_rxt_hole(sack_scoreboard_t *sb, sack_scoreboard_hole_t *start, u8 have_unsent, u8 *can_rescue, u8 *snd_limited)
Figure out the next hole to retransmit.
Definition: tcp_sack.c:194
u16 ip6_tcp_compute_checksum_custom(vlib_main_t *vm, vlib_buffer_t *p0, ip46_address_t *src, ip46_address_t *dst)
Definition: tcp_output.c:370
static f64 tcp_time_now_us(u32 thread_index)
Definition: tcp_inlines.h:213
void scoreboard_init_rxt(sack_scoreboard_t *sb, u32 snd_una)
Definition: tcp_sack.c:254
static void tcp_connection_set_state(tcp_connection_t *tc, tcp_state_t state)
Definition: tcp_inlines.h:51
struct clib_bihash_value offset
template key/value backing page structure
u32 tcp_initial_window_to_advertise(tcp_connection_t *tc)
Compute initial window and scale factor.
Definition: tcp_output.c:104
#define vnet_buffer(b)
Definition: buffer.h:417
#define TCP_USE_SACKS
Disable only for testing.
Definition: tcp_types.h:40
vl_api_dhcp_client_state_t state
Definition: dhcp.api:201
static u32 vlib_num_workers()
Definition: threads.h:377
void tcp_connection_cleanup(tcp_connection_t *tc)
Cleans up connection state.
Definition: tcp.c:242
static u32 tcp_buffer_len(vlib_buffer_t *b)
Definition: tcp_output.c:986
static u8 tcp_retransmit_should_retry_head(tcp_connection_t *tc, sack_scoreboard_t *sb)
Definition: tcp_output.c:1681
void tcp_send_syn(tcp_connection_t *tc)
Send SYN.
Definition: tcp_output.c:808
vlib_node_registration_t tcp6_output_node
(constructor) VLIB_REGISTER_NODE (tcp6_output_node)
Definition: tcp_output.c:2327
u16 flags
Copy of main node flags.
Definition: node.h:501
void * vlib_add_trace(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_buffer_t *b, u32 n_data_bytes)
Definition: trace.c:634
static u8 tcp_timer_is_active(tcp_connection_t *tc, tcp_timers_e timer)
Definition: tcp_timer.h:110
static u16 tcp_compute_checksum(tcp_connection_t *tc, vlib_buffer_t *b)
Definition: tcp_output.c:416
enum _tcp_reset_next tcp_reset_next_t
static u32 transport_max_rx_enqueue(transport_connection_t *tc)
Definition: session.h:502
#define tcp_opts_sack_permitted(_to)
Definition: tcp_packet.h:159
static void vlib_buffer_free_one(vlib_main_t *vm, u32 buffer_index)
Free one buffer Shorthand to free a single buffer chain.
Definition: buffer_funcs.h:970
tcp_connection_t tcp_connection
Definition: tcp_output.c:52
static u32 tcp_tstamp(tcp_connection_t *tc)
Generate timestamp for tcp connection.
Definition: tcp_inlines.h:206
void tcp_program_ack(tcp_connection_t *tc)
Definition: tcp_output.c:1043
static void * tcp_reuse_buffer(vlib_main_t *vm, vlib_buffer_t *b)
Definition: tcp_output.c:338
u8 ip_version_and_header_length
Definition: ip4_packet.h:93
static_always_inline void vlib_get_buffers(vlib_main_t *vm, u32 *bi, vlib_buffer_t **b, int count)
Translate array of buffer indices into buffer pointers.
Definition: buffer_funcs.h:280
vlib_node_registration_t tcp4_reset_node
(constructor) VLIB_REGISTER_NODE (tcp4_reset_node)
Definition: tcp_output.c:2434
#define VLIB_NODE_FLAG_TRACE
Definition: node.h:302
static uword round_down_pow2(uword x, uword pow2)
Definition: clib.h:259
vlib_node_registration_t tcp4_output_node
(constructor) VLIB_REGISTER_NODE (tcp4_output_node)
Definition: tcp_output.c:2307
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:59
u32 total_length_not_including_first_buffer
Only valid for first buffer in chain.
Definition: buffer.h:167
static void * vlib_buffer_push_ip4(vlib_main_t *vm, vlib_buffer_t *b, ip4_address_t *src, ip4_address_t *dst, int proto, u8 csum_offload)
Push IPv4 header to buffer.
Definition: ip4_inlines.h:149
static void tcp_enqueue_to_output(tcp_worker_ctx_t *wrk, vlib_buffer_t *b, u32 bi, u8 is_ip4)
Definition: tcp_output.c:565
static tcp_main_t * vnet_get_tcp_main()
Definition: tcp.h:276
u32 tco_next_node[2]
Session layer edge indices to tcp output.
Definition: tcp.h:101
static char * tcp_error_strings[]
Definition: tcp_output.c:43
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
Definition: buffer_funcs.h:85
#define tcp_ack(_th)
Definition: tcp_packet.h:83
#define seq_lt(_s1, _s2)
Definition: tcp_packet.h:178
enum _tcp_state tcp_state_t
u32 transport_connection_tx_pacer_burst(transport_connection_t *tc)
Get tx pacer max burst.
Definition: transport.c:739
vl_api_interface_index_t sw_if_index
Definition: wireguard.api:34
Definition: defs.h:46
#define tcp_worker_stats_inc(_wrk, _stat, _val)
Definition: tcp.h:125
#define tcp_zero_rwnd_sent_off(tc)
Definition: tcp_types.h:440
ip6_address_t dst_address
Definition: ip6_packet.h:310
u32 * tx_buffers
tx buffer free list
Definition: tcp.h:112
static void session_add_pending_tx_buffer(u32 thread_index, u32 bi, u32 next_node)
Add session node pending buffer with custom node.
Definition: session.h:675
adj_index_t adj_nbr_find(fib_protocol_t nh_proto, vnet_link_t link_type, const ip46_address_t *nh_addr, u32 sw_if_index)
Lookup neighbor adjancency.
Definition: adj_nbr.c:109
#define TCP_EVT(_evt, _args...)
Definition: tcp_debug.h:145
static int tcp_make_options(tcp_connection_t *tc, tcp_options_t *opts, tcp_state_t state)
Definition: tcp_output.c:265
static uword pool_elts(void *v)
Number of active elements in a pool.
Definition: pool.h:127