FD.io VPP  v21.10.1-2-g0a485f517
Vector Packet Processing
tcp_output.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2019 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <vnet/tcp/tcp.h>
17 #include <vnet/tcp/tcp_inlines.h>
18 #include <math.h>
19 #include <vnet/ip/ip4_inlines.h>
20 #include <vnet/ip/ip6_inlines.h>
21 
22 typedef enum _tcp_output_next
23 {
30 
31 #define foreach_tcp4_output_next \
32  _ (DROP, "error-drop") \
33  _ (IP_LOOKUP, "ip4-lookup") \
34  _ (IP_REWRITE, "ip4-rewrite") \
35  _ (IP_ARP, "ip4-arp")
36 
37 #define foreach_tcp6_output_next \
38  _ (DROP, "error-drop") \
39  _ (IP_LOOKUP, "ip6-lookup") \
40  _ (IP_REWRITE, "ip6-rewrite") \
41  _ (IP_ARP, "ip6-discover-neighbor")
42 
43 static char *tcp_error_strings[] = {
44 #define tcp_error(n,s) s,
45 #include <vnet/tcp/tcp_error.def>
46 #undef tcp_error
47 };
48 
49 typedef struct
50 {
54 
55 static u8 *
56 format_tcp_tx_trace (u8 * s, va_list * args)
57 {
58  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
59  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
60  tcp_tx_trace_t *t = va_arg (*args, tcp_tx_trace_t *);
62  u32 indent = format_get_indent (s);
63 
64  s = format (s, "%U state %U\n%U%U", format_tcp_connection_id, tc,
65  format_tcp_state, tc->state, format_white_space, indent,
66  format_tcp_header, &t->tcp_header, 128);
67 
68  return s;
69 }
70 
71 #ifndef CLIB_MARCH_VARIANT
72 static u8
74 {
75  u8 wnd_scale = 0;
76  while (wnd_scale < TCP_MAX_WND_SCALE && (window >> wnd_scale) > TCP_WND_MAX)
77  wnd_scale++;
78  return wnd_scale;
79 }
80 
81 /**
82  * TCP's initial window
83  */
86 {
87  /* RFC 6928 recommends the value lower. However at the time our connections
88  * are initialized, fifos may not be allocated. Therefore, advertise the
89  * smallest possible unscaled window size and update once fifos are
90  * assigned to the session.
91  */
92  /*
93  tcp_update_rcv_mss (tc);
94  TCP_IW_N_SEGMENTS * tc->mss;
95  */
96  return tcp_cfg.min_rx_fifo;
97 }
98 
99 /**
100  * Compute initial window and scale factor. As per RFC1323, window field in
101  * SYN and SYN-ACK segments is never scaled.
102  */
103 u32
105 {
106  /* Compute rcv wscale only if peer advertised support for it */
107  if (tc->state != TCP_STATE_SYN_RCVD || tcp_opts_wscale (&tc->rcv_opts))
108  tc->rcv_wscale = tcp_window_compute_scale (tcp_cfg.max_rx_fifo);
109 
110  tc->rcv_wnd = tcp_initial_wnd_unscaled (tc);
111 
112  return clib_min (tc->rcv_wnd, TCP_WND_MAX);
113 }
114 
115 static inline void
117 {
118  u32 available_space, wnd;
119  i32 observed_wnd;
120 
121  /*
122  * Figure out how much space we have available
123  */
124  available_space = transport_max_rx_enqueue (&tc->connection);
125 
126  /*
127  * Use the above and what we know about what we've previously advertised
128  * to compute the new window
129  */
130  observed_wnd = (i32) tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
131 
132  /* Check if we are about to retract the window. Do the comparison before
133  * rounding to avoid errors. Per RFC7323 sec. 2.4 we could remove this */
134  if (PREDICT_FALSE ((i32) available_space < observed_wnd))
135  {
136  wnd = round_down_pow2 (clib_max (observed_wnd, 0), 1 << tc->rcv_wscale);
137  TCP_EVT (TCP_EVT_RCV_WND_SHRUNK, tc, observed_wnd, available_space);
138  }
139  else
140  {
141  /* Make sure we have a multiple of 1 << rcv_wscale. We round down to
142  * avoid advertising a window larger than what can be buffered */
143  wnd = round_down_pow2 (available_space, 1 << tc->rcv_wscale);
144  }
145 
146  if (PREDICT_FALSE (wnd < tc->rcv_opts.mss))
147  wnd = 0;
148 
149  tc->rcv_wnd = clib_min (wnd, TCP_WND_MAX << tc->rcv_wscale);
150 }
151 
152 /**
153  * Compute and return window to advertise, scaled as per RFC1323
154  */
155 static inline u32
157 {
158  if (state < TCP_STATE_ESTABLISHED)
160 
161  tcp_update_rcv_wnd (tc);
162  return tc->rcv_wnd >> tc->rcv_wscale;
163 }
164 
165 static int
167 {
168  u8 len = 0;
169 
170  opts->flags |= TCP_OPTS_FLAG_MSS;
171  opts->mss = tc->mss;
173 
174  opts->flags |= TCP_OPTS_FLAG_WSCALE;
175  opts->wscale = tc->rcv_wscale;
177 
178  opts->flags |= TCP_OPTS_FLAG_TSTAMP;
179  opts->tsval = tcp_time_tstamp (tc->c_thread_index);
180  opts->tsecr = 0;
182 
183  if (TCP_USE_SACKS)
184  {
185  opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
187  }
188 
189  /* Align to needed boundary */
191  return len;
192 }
193 
194 static int
196 {
197  u8 len = 0;
198 
199  opts->flags |= TCP_OPTS_FLAG_MSS;
200  opts->mss = tc->mss;
202 
203  if (tcp_opts_wscale (&tc->rcv_opts))
204  {
205  opts->flags |= TCP_OPTS_FLAG_WSCALE;
206  opts->wscale = tc->rcv_wscale;
208  }
209 
210  if (tcp_opts_tstamp (&tc->rcv_opts))
211  {
212  opts->flags |= TCP_OPTS_FLAG_TSTAMP;
213  opts->tsval = tcp_time_tstamp (tc->c_thread_index);
214  opts->tsecr = tc->tsval_recent;
216  }
217 
218  if (tcp_opts_sack_permitted (&tc->rcv_opts))
219  {
220  opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
222  }
223 
224  /* Align to needed boundary */
226  return len;
227 }
228 
229 static int
231 {
232  u8 len = 0;
233 
234  opts->flags = 0;
235 
236  if (tcp_opts_tstamp (&tc->rcv_opts))
237  {
238  opts->flags |= TCP_OPTS_FLAG_TSTAMP;
239  opts->tsval = tcp_tstamp (tc);
240  opts->tsecr = tc->tsval_recent;
242  }
243  if (tcp_opts_sack_permitted (&tc->rcv_opts))
244  {
245  if (vec_len (tc->snd_sacks))
246  {
247  opts->flags |= TCP_OPTS_FLAG_SACK;
248  if (tc->snd_sack_pos >= vec_len (tc->snd_sacks))
249  tc->snd_sack_pos = 0;
250  opts->sacks = &tc->snd_sacks[tc->snd_sack_pos];
251  opts->n_sack_blocks = vec_len (tc->snd_sacks) - tc->snd_sack_pos;
252  opts->n_sack_blocks = clib_min (opts->n_sack_blocks,
254  tc->snd_sack_pos += opts->n_sack_blocks;
256  }
257  }
258 
259  /* Align to needed boundary */
261  return len;
262 }
263 
264 always_inline int
267 {
268  switch (state)
269  {
270  case TCP_STATE_ESTABLISHED:
271  case TCP_STATE_CLOSE_WAIT:
272  case TCP_STATE_FIN_WAIT_1:
273  case TCP_STATE_LAST_ACK:
274  case TCP_STATE_CLOSING:
275  case TCP_STATE_FIN_WAIT_2:
276  case TCP_STATE_TIME_WAIT:
277  case TCP_STATE_CLOSED:
278  return tcp_make_established_options (tc, opts);
279  case TCP_STATE_SYN_RCVD:
280  return tcp_make_synack_options (tc, opts);
281  case TCP_STATE_SYN_SENT:
282  return tcp_make_syn_options (tc, opts);
283  default:
284  clib_warning ("State not handled! %d", state);
285  return 0;
286  }
287 }
288 
289 /**
290  * Update burst send vars
291  *
292  * - Updates snd_mss to reflect the effective segment size that we can send
293  * by taking into account all TCP options, including SACKs.
294  * - Cache 'on the wire' options for reuse
295  * - Updates receive window which can be reused for a burst.
296  *
297  * This should *only* be called when doing bursts
298  */
299 void
301 {
302  tcp_main_t *tm = &tcp_main;
303 
304  /* Compute options to be used for connection. These may be reused when
305  * sending data or to compute the effective mss (snd_mss) */
306  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts,
307  TCP_STATE_ESTABLISHED);
308 
309  /* XXX check if MTU has been updated */
310  tc->snd_mss = clib_min (tc->mss, tc->rcv_opts.mss) - tc->snd_opts_len;
311  ASSERT (tc->snd_mss > 0);
312 
313  tcp_options_write (tm->wrk_ctx[tc->c_thread_index].cached_opts,
314  &tc->snd_opts);
315 
316  tcp_update_rcv_wnd (tc);
317 
318  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
320 
321  if (tc->snd_una == tc->snd_nxt)
322  {
325  }
326 
327  if (tc->flags & TCP_CONN_PSH_PENDING)
328  {
329  u32 max_deq = transport_max_tx_dequeue (&tc->connection);
330  /* Last byte marked for push */
331  tc->psh_seq = tc->snd_una + max_deq - 1;
332  }
333 }
334 
335 #endif /* CLIB_MARCH_VARIANT */
336 
337 static void *
339 {
340  if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
342  /* Zero all flags but free list index and trace flag */
343  b->flags &= VLIB_BUFFER_NEXT_PRESENT - 1;
344  b->current_data = 0;
345  b->current_length = 0;
347  vnet_buffer (b)->tcp.flags = 0;
349  /* Leave enough space for headers */
351 }
352 
353 #ifndef CLIB_MARCH_VARIANT
354 static void *
356 {
357  ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
358  b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
360  b->current_data = 0;
361  vnet_buffer (b)->tcp.flags = 0;
362  /* Leave enough space for headers */
364 }
365 
366 
367 /* Compute TCP checksum in software when offloading is disabled for a connection */
368 u16
370  ip46_address_t * src, ip46_address_t * dst)
371 {
372  ip_csum_t sum0;
373  u16 payload_length_host_byte_order;
374  u32 i;
375 
376  /* Initialize checksum with ip header. */
377  sum0 = clib_host_to_net_u16 (vlib_buffer_length_in_chain (vm, p0)) +
378  clib_host_to_net_u16 (IP_PROTOCOL_TCP);
379  payload_length_host_byte_order = vlib_buffer_length_in_chain (vm, p0);
380 
381  for (i = 0; i < ARRAY_LEN (src->ip6.as_uword); i++)
382  {
383  sum0 = ip_csum_with_carry
384  (sum0, clib_mem_unaligned (&src->ip6.as_uword[i], uword));
385  sum0 = ip_csum_with_carry
386  (sum0, clib_mem_unaligned (&dst->ip6.as_uword[i], uword));
387  }
388 
389  return ip_calculate_l4_checksum (vm, p0, sum0,
390  payload_length_host_byte_order, NULL, 0,
391  NULL);
392 }
393 
394 u16
396  ip46_address_t * src, ip46_address_t * dst)
397 {
398  ip_csum_t sum0;
399  u32 payload_length_host_byte_order;
400 
401  payload_length_host_byte_order = vlib_buffer_length_in_chain (vm, p0);
402  sum0 =
403  clib_host_to_net_u32 (payload_length_host_byte_order +
404  (IP_PROTOCOL_TCP << 16));
405 
406  sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&src->ip4, u32));
407  sum0 = ip_csum_with_carry (sum0, clib_mem_unaligned (&dst->ip4, u32));
408 
409  return ip_calculate_l4_checksum (vm, p0, sum0,
410  payload_length_host_byte_order, NULL, 0,
411  NULL);
412 }
413 
414 static inline u16
416 {
417  u16 checksum = 0;
418  if (PREDICT_FALSE (tc->cfg_flags & TCP_CFG_F_NO_CSUM_OFFLOAD))
419  {
420  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
421  vlib_main_t *vm = wrk->vm;
422 
423  if (tc->c_is_ip4)
425  (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
426  else
428  (vm, b, &tc->c_lcl_ip, &tc->c_rmt_ip);
429  }
430  else
431  {
432  vnet_buffer_offload_flags_set (b, VNET_BUFFER_OFFLOAD_F_TCP_CKSUM);
433  }
434  return checksum;
435 }
436 
437 /**
438  * Prepare ACK
439  */
440 static inline void
442  u8 flags)
443 {
444  tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
445  u8 tcp_opts_len, tcp_hdr_opts_len;
446  tcp_header_t *th;
447  u16 wnd;
448 
449  wnd = tcp_window_to_advertise (tc, state);
450 
451  /* Make and write options */
452  tcp_opts_len = tcp_make_established_options (tc, snd_opts);
453  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
454 
455  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
456  tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd);
457 
458  tcp_options_write ((u8 *) (th + 1), snd_opts);
459 
460  th->checksum = tcp_compute_checksum (tc, b);
461 
462  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
463 
464  if (wnd == 0)
465  {
466  transport_rx_fifo_req_deq_ntf (&tc->connection);
468  }
469  else
471 }
472 
473 /**
474  * Convert buffer to ACK
475  */
476 static inline void
478 {
479  tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK);
480  TCP_EVT (TCP_EVT_ACK_SENT, tc);
481  tc->rcv_las = tc->rcv_nxt;
482 }
483 
484 /**
485  * Convert buffer to FIN-ACK
486  */
487 static void
489 {
490  tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_ACK);
491 }
492 
493 /**
494  * Convert buffer to SYN
495  */
496 void
498 {
499  u8 tcp_hdr_opts_len, tcp_opts_len;
500  tcp_header_t *th;
501  u16 initial_wnd;
502  tcp_options_t snd_opts;
503 
504  initial_wnd = tcp_initial_window_to_advertise (tc);
505 
506  /* Make and write options */
507  clib_memset (&snd_opts, 0, sizeof (snd_opts));
508  tcp_opts_len = tcp_make_syn_options (tc, &snd_opts);
509  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
510 
511  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
512  tc->rcv_nxt, tcp_hdr_opts_len, TCP_FLAG_SYN,
513  initial_wnd);
514  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
515  tcp_options_write ((u8 *) (th + 1), &snd_opts);
516  th->checksum = tcp_compute_checksum (tc, b);
517 }
518 
519 /**
520  * Convert buffer to SYN-ACK
521  */
522 static void
524 {
525  tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
526  u8 tcp_opts_len, tcp_hdr_opts_len;
527  tcp_header_t *th;
528  u16 initial_wnd;
529 
530  clib_memset (snd_opts, 0, sizeof (*snd_opts));
531  initial_wnd = tcp_initial_window_to_advertise (tc);
532  tcp_opts_len = tcp_make_synack_options (tc, snd_opts);
533  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
534 
535  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
536  tc->rcv_nxt, tcp_hdr_opts_len,
537  TCP_FLAG_SYN | TCP_FLAG_ACK, initial_wnd);
538  tcp_options_write ((u8 *) (th + 1), snd_opts);
539 
540  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
541  th->checksum = tcp_compute_checksum (tc, b);
542 }
543 
544 static void
546  vlib_buffer_t *b, u32 bi)
547 {
548  vlib_main_t *vm = wrk->vm;
549 
550  b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
551  b->error = 0;
552 
554  wrk->tco_next_node[!tc->c_is_ip4]);
555 
556  if (vm->thread_index == 0 && vlib_num_workers ())
558 }
559 
560 static void
562  u8 is_ip4)
563 {
564  b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
565  b->error = 0;
566 
568  wrk->tco_next_node[!is_ip4]);
569 }
570 
571 #endif /* CLIB_MARCH_VARIANT */
572 
573 static int
575 {
576  ip4_header_t *ih4;
577  ip6_header_t *ih6;
578  tcp_header_t *th;
579  ip4_address_t src_ip4, dst_ip4;
580  ip6_address_t src_ip6, dst_ip6;
582  u32 tmp, len, seq, ack;
583  u8 flags;
584 
585  /* Find IP and TCP headers */
586  th = tcp_buffer_hdr (b);
587 
588  /* Save src and dst ip */
589  if (is_ip4)
590  {
591  ih4 = vlib_buffer_get_current (b);
592  ASSERT ((ih4->ip_version_and_header_length & 0xF0) == 0x40);
593  src_ip4.as_u32 = ih4->src_address.as_u32;
594  dst_ip4.as_u32 = ih4->dst_address.as_u32;
595  }
596  else
597  {
598  ih6 = vlib_buffer_get_current (b);
599  ASSERT ((ih6->ip_version_traffic_class_and_flow_label & 0xF0) == 0x60);
600  clib_memcpy_fast (&src_ip6, &ih6->src_address, sizeof (ip6_address_t));
601  clib_memcpy_fast (&dst_ip6, &ih6->dst_address, sizeof (ip6_address_t));
602  }
603 
604  src_port = th->src_port;
605  dst_port = th->dst_port;
607 
608  /*
609  * RFC 793. If the ACK bit is off, sequence number zero is used,
610  * <SEQ=0><ACK=SEG.SEQ+SEG.LEN><CTL=RST,ACK>
611  * If the ACK bit is on,
612  * <SEQ=SEG.ACK><CTL=RST>
613  */
614  if (tcp_ack (th))
615  {
616  seq = th->ack_number;
617  ack = 0;
618  }
619  else
620  {
621  flags |= TCP_FLAG_ACK;
622  tmp = clib_net_to_host_u32 (th->seq_number);
623  len = vnet_buffer (b)->tcp.data_len + tcp_is_syn (th) + tcp_is_fin (th);
624  ack = clib_host_to_net_u32 (tmp + len);
625  seq = 0;
626  }
627 
628  tcp_reuse_buffer (vm, b);
630  sizeof (tcp_header_t), flags, 0);
631 
632  if (is_ip4)
633  {
634  ih4 = vlib_buffer_push_ip4 (vm, b, &dst_ip4, &src_ip4,
635  IP_PROTOCOL_TCP, 1);
636  th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
637  }
638  else
639  {
640  int bogus = ~0;
641  ih6 = vlib_buffer_push_ip6 (vm, b, &dst_ip6, &src_ip6, IP_PROTOCOL_TCP);
642  th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
643  ASSERT (!bogus);
644  }
645 
646  return 0;
647 }
648 
649 #ifndef CLIB_MARCH_VARIANT
650 /**
651  * Send reset without reusing existing buffer
652  *
653  * It extracts connection info out of original packet
654  */
655 void
657  u32 thread_index, u8 is_ip4)
658 {
660  vlib_main_t *vm = wrk->vm;
661  vlib_buffer_t *b;
662  u8 tcp_hdr_len, flags = 0;
663  tcp_header_t *th, *pkt_th;
664  u32 seq, ack, bi;
665  ip4_header_t *ih4, *pkt_ih4;
666  ip6_header_t *ih6, *pkt_ih6;
667 
668  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
669  {
670  tcp_worker_stats_inc (wrk, no_buffer, 1);
671  return;
672  }
673 
674  b = vlib_get_buffer (vm, bi);
675  tcp_init_buffer (vm, b);
676 
677  /* Make and write options */
678  tcp_hdr_len = sizeof (tcp_header_t);
679 
680  if (is_ip4)
681  {
682  pkt_ih4 = vlib_buffer_get_current (pkt);
683  pkt_th = ip4_next_header (pkt_ih4);
684  }
685  else
686  {
687  pkt_ih6 = vlib_buffer_get_current (pkt);
688  pkt_th = ip6_next_header (pkt_ih6);
689  }
690 
691  if (tcp_ack (pkt_th))
692  {
694  seq = pkt_th->ack_number;
695  ack = (tc->state >= TCP_STATE_SYN_RCVD) ? tc->rcv_nxt : 0;
696  }
697  else
698  {
700  seq = 0;
701  ack = clib_host_to_net_u32 (vnet_buffer (pkt)->tcp.seq_end);
702  }
703 
704  th = vlib_buffer_push_tcp_net_order (b, pkt_th->dst_port, pkt_th->src_port,
705  seq, ack, tcp_hdr_len, flags, 0);
706 
707  /* Swap src and dst ip */
708  if (is_ip4)
709  {
710  ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40);
711  ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address,
712  &pkt_ih4->src_address, IP_PROTOCOL_TCP,
713  tcp_csum_offload (tc));
714  th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
715  }
716  else
717  {
718  int bogus = ~0;
719  ASSERT ((pkt_ih6->ip_version_traffic_class_and_flow_label & 0xF0) ==
720  0x60);
721  ih6 = vlib_buffer_push_ip6_custom (vm, b, &pkt_ih6->dst_address,
722  &pkt_ih6->src_address,
723  IP_PROTOCOL_TCP,
724  tc->ipv6_flow_label);
725  th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
726  ASSERT (!bogus);
727  }
728 
729  tcp_enqueue_half_open (wrk, tc, b, bi);
730  TCP_EVT (TCP_EVT_RST_SENT, tc);
731  vlib_node_increment_counter (vm, tcp_node_index (output, tc->c_is_ip4),
732  TCP_ERROR_RST_SENT, 1);
733 }
734 
735 /**
736  * Build and set reset packet for connection
737  */
738 void
740 {
741  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
742  vlib_main_t *vm = wrk->vm;
743  vlib_buffer_t *b;
744  u32 bi;
745  tcp_header_t *th;
746  u16 tcp_hdr_opts_len, advertise_wnd, opts_write_len;
747  u8 flags;
748 
749  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
750  {
751  tcp_worker_stats_inc (wrk, no_buffer, 1);
752  return;
753  }
754  b = vlib_get_buffer (vm, bi);
755  tcp_init_buffer (vm, b);
756 
757  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
758  tcp_hdr_opts_len = tc->snd_opts_len + sizeof (tcp_header_t);
759  advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
761  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
762  tc->rcv_nxt, tcp_hdr_opts_len, flags,
763  advertise_wnd);
764  opts_write_len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
765  th->checksum = tcp_compute_checksum (tc, b);
766  ASSERT (opts_write_len == tc->snd_opts_len);
767  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
768  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
769  TCP_EVT (TCP_EVT_RST_SENT, tc);
770  vlib_node_increment_counter (vm, tcp_node_index (output, tc->c_is_ip4),
771  TCP_ERROR_RST_SENT, 1);
772 }
773 
774 /**
775  * Send SYN
776  *
777  * Builds a SYN packet for a half-open connection and sends it to tcp-output.
778  * The packet is handled by main thread and because half-open and established
779  * connections use the same pool the connection can be retrieved without
780  * additional logic.
781  */
782 void
784 {
785  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
786  vlib_main_t *vm = wrk->vm;
787  vlib_buffer_t *b;
788  u32 bi;
789 
790  /*
791  * Setup retransmit and establish timers before requesting buffer
792  * such that we can return if we've ran out.
793  */
794  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
795  tc->rto * TCP_TO_TIMER_TICK);
796 
797  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
798  {
799  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
800  tcp_cfg.alloc_err_timeout);
801  tcp_worker_stats_inc (wrk, no_buffer, 1);
802  return;
803  }
804 
805  b = vlib_get_buffer (vm, bi);
806  tcp_init_buffer (vm, b);
807  tcp_make_syn (tc, b);
808 
809  /* Measure RTT with this */
810  tc->rtt_ts = tcp_time_now_us (vlib_num_workers ()? 1 : 0);
811  tc->rtt_seq = tc->snd_nxt;
812  tc->rto_boff = 0;
813 
814  tcp_enqueue_half_open (wrk, tc, b, bi);
815  TCP_EVT (TCP_EVT_SYN_SENT, tc);
816 }
817 
818 void
820 {
821  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
822  vlib_main_t *vm = wrk->vm;
823  vlib_buffer_t *b;
824  u32 bi;
825 
826  ASSERT (tc->snd_una != tc->snd_nxt);
827  tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
828 
829  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
830  {
831  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT,
832  tcp_cfg.alloc_err_timeout);
833  tcp_worker_stats_inc (wrk, no_buffer, 1);
834  return;
835  }
836 
837  tc->rtt_ts = tcp_time_now_us (tc->c_thread_index);
838  b = vlib_get_buffer (vm, bi);
839  tcp_init_buffer (vm, b);
840  tcp_make_synack (tc, b);
841  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
842  TCP_EVT (TCP_EVT_SYNACK_SENT, tc);
843 }
844 
845 /**
846  * Send FIN
847  */
848 void
850 {
851  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
852  vlib_main_t *vm = wrk->vm;
853  vlib_buffer_t *b;
854  u32 bi;
855  u8 fin_snt = 0;
856 
857  fin_snt = tc->flags & TCP_CONN_FINSNT;
858  if (fin_snt)
859  tc->snd_nxt -= 1;
860 
861  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
862  {
863  /* Out of buffers so program fin retransmit ASAP */
864  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT,
865  tcp_cfg.alloc_err_timeout);
866  if (fin_snt)
867  tc->snd_nxt += 1;
868  else
869  /* Make sure retransmit retries a fin not data */
870  tc->flags |= TCP_CONN_FINSNT;
871  tcp_worker_stats_inc (wrk, no_buffer, 1);
872  return;
873  }
874 
875  /* If we have non-dupacks programmed, no need to send them */
876  if ((tc->flags & TCP_CONN_SNDACK) && !tc->pending_dupacks)
877  tc->flags &= ~TCP_CONN_SNDACK;
878 
879  b = vlib_get_buffer (vm, bi);
880  tcp_init_buffer (vm, b);
881  tcp_make_fin (tc, b);
882  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
883  TCP_EVT (TCP_EVT_FIN_SENT, tc);
884  /* Account for the FIN */
885  tc->snd_nxt += 1;
886  tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
887  if (!fin_snt)
888  {
889  tc->flags |= TCP_CONN_FINSNT;
890  tc->flags &= ~TCP_CONN_FINPNDG;
891  }
892 }
893 
894 /**
895  * Push TCP header and update connection variables. Should only be called
896  * for segments with data, not for 'control' packets.
897  */
898 always_inline void
900  u8 compute_opts, u8 maybe_burst, u8 update_snd_nxt)
901 {
902  u8 tcp_hdr_opts_len, flags = TCP_FLAG_ACK;
903  u32 advertise_wnd, data_len;
904  tcp_main_t *tm = &tcp_main;
905  tcp_header_t *th;
906 
908  if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
910 
911  vnet_buffer (b)->tcp.flags = 0;
912  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
913 
914  if (compute_opts)
915  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
916 
917  tcp_hdr_opts_len = tc->snd_opts_len + sizeof (tcp_header_t);
918 
919  if (maybe_burst)
920  advertise_wnd = tc->rcv_wnd >> tc->rcv_wscale;
921  else
922  advertise_wnd = tcp_window_to_advertise (tc, TCP_STATE_ESTABLISHED);
923 
924  if (PREDICT_FALSE (tc->flags & TCP_CONN_PSH_PENDING))
925  {
926  if (seq_geq (tc->psh_seq, snd_nxt)
927  && seq_lt (tc->psh_seq, snd_nxt + data_len))
928  flags |= TCP_FLAG_PSH;
929  }
930  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, snd_nxt,
931  tc->rcv_nxt, tcp_hdr_opts_len, flags,
932  advertise_wnd);
933 
934  if (maybe_burst)
935  {
936  clib_memcpy_fast ((u8 *) (th + 1),
937  tm->wrk_ctx[tc->c_thread_index].cached_opts,
938  tc->snd_opts_len);
939  }
940  else
941  {
942  u8 len = tcp_options_write ((u8 *) (th + 1), &tc->snd_opts);
943  ASSERT (len == tc->snd_opts_len);
944  }
945 
946  /*
947  * Update connection variables
948  */
949 
950  if (update_snd_nxt)
951  tc->snd_nxt += data_len;
952  tc->rcv_las = tc->rcv_nxt;
953 
954  tc->bytes_out += data_len;
955  tc->data_segs_out += 1;
956 
957  th->checksum = tcp_compute_checksum (tc, b);
958 
959  TCP_EVT (TCP_EVT_PKTIZE, tc);
960 }
961 
964 {
966  if (PREDICT_FALSE (b->flags & VLIB_BUFFER_NEXT_PRESENT))
968  return data_len;
969 }
970 
971 u32
973 {
974  tcp_connection_t *tc = (tcp_connection_t *) tconn;
975 
976  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
978 
979  tcp_push_hdr_i (tc, b, tc->snd_nxt, /* compute opts */ 0, /* burst */ 1,
980  /* update_snd_nxt */ 1);
981 
982  tcp_validate_txf_size (tc, tc->snd_nxt - tc->snd_una);
983  /* If not tracking an ACK, start tracking */
984  if (tc->rtt_ts == 0 && !tcp_in_cong_recovery (tc))
985  {
986  tc->rtt_ts = tcp_time_now_us (tc->c_thread_index);
987  tc->rtt_seq = tc->snd_nxt;
988  }
989  if (PREDICT_FALSE (!tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)))
990  {
991  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
992  tcp_retransmit_timer_set (&wrk->timer_wheel, tc);
993  tc->rto_boff = 0;
994  }
995  return 0;
996 }
997 
998 void
1000 {
1001  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
1002  vlib_main_t *vm = wrk->vm;
1003  vlib_buffer_t *b;
1004  u32 bi;
1005 
1006  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1007  {
1008  tcp_update_rcv_wnd (tc);
1009  tcp_worker_stats_inc (wrk, no_buffer, 1);
1010  return;
1011  }
1012  b = vlib_get_buffer (vm, bi);
1013  tcp_init_buffer (vm, b);
1014  tcp_make_ack (tc, b);
1015  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1016 }
1017 
1018 void
1020 {
1021  if (!(tc->flags & TCP_CONN_SNDACK))
1022  {
1023  session_add_self_custom_tx_evt (&tc->connection, 1);
1024  tc->flags |= TCP_CONN_SNDACK;
1025  }
1026 }
1027 
1028 void
1030 {
1031  if (!(tc->flags & TCP_CONN_SNDACK))
1032  {
1033  session_add_self_custom_tx_evt (&tc->connection, 1);
1034  tc->flags |= TCP_CONN_SNDACK;
1035  }
1036  if (tc->pending_dupacks < 255)
1037  tc->pending_dupacks += 1;
1038 }
1039 
1040 void
1042 {
1043  if (!(tc->flags & TCP_CONN_RXT_PENDING))
1044  {
1045  session_add_self_custom_tx_evt (&tc->connection, 0);
1046  tc->flags |= TCP_CONN_RXT_PENDING;
1047  }
1048 }
1049 
1050 /**
1051  * Send window update ack
1052  *
1053  * Ensures that it will be sent only once, after a zero rwnd has been
1054  * advertised in a previous ack, and only if rwnd has grown beyond a
1055  * configurable value.
1056  */
1057 void
1059 {
1060  if (tcp_zero_rwnd_sent (tc))
1061  {
1062  tcp_update_rcv_wnd (tc);
1063  if (tc->rcv_wnd >= tcp_cfg.rwnd_min_update_ack * tc->snd_mss)
1064  {
1066  tcp_program_ack (tc);
1067  }
1068  }
1069 }
1070 
1071 /**
1072  * Allocate a new buffer and build a new tcp segment
1073  *
1074  * @param wrk tcp worker
1075  * @param tc connection for which the segment will be allocated
1076  * @param offset offset of the first byte in the tx fifo
1077  * @param max_deq_byte segment size
1078  * @param[out] b pointer to buffer allocated
1079  *
1080  * @return the number of bytes in the segment or 0 if buffer cannot be
1081  * allocated or no data available
1082  */
1083 static int
1085  u32 offset, u32 max_deq_bytes, vlib_buffer_t ** b)
1086 {
1087  u32 bytes_per_buffer = vnet_get_tcp_main ()->bytes_per_buffer;
1088  vlib_main_t *vm = wrk->vm;
1089  u32 bi, seg_size;
1090  int n_bytes = 0;
1091  u8 *data;
1092 
1093  seg_size = max_deq_bytes + TRANSPORT_MAX_HDRS_LEN;
1094 
1095  /*
1096  * Prepare options
1097  */
1098  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
1099 
1100  /*
1101  * Allocate and fill in buffer(s)
1102  */
1103 
1104  /* Easy case, buffer size greater than mss */
1105  if (PREDICT_TRUE (seg_size <= bytes_per_buffer))
1106  {
1107  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1108  {
1109  tcp_worker_stats_inc (wrk, no_buffer, 1);
1110  return 0;
1111  }
1112  *b = vlib_get_buffer (vm, bi);
1113  data = tcp_init_buffer (vm, *b);
1114  n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
1115  max_deq_bytes);
1116  ASSERT (n_bytes == max_deq_bytes);
1117  b[0]->current_length = n_bytes;
1118  tcp_push_hdr_i (tc, *b, tc->snd_una + offset, /* compute opts */ 0,
1119  /* burst */ 0, /* update_snd_nxt */ 0);
1120  }
1121  /* Split mss into multiple buffers */
1122  else
1123  {
1124  u32 chain_bi = ~0, n_bufs_per_seg, n_bufs;
1125  u16 n_peeked, len_to_deq;
1126  vlib_buffer_t *chain_b, *prev_b;
1127  int i;
1128 
1129  /* Make sure we have enough buffers */
1130  n_bufs_per_seg = ceil ((double) seg_size / bytes_per_buffer);
1131  vec_validate_aligned (wrk->tx_buffers, n_bufs_per_seg - 1,
1133  n_bufs = vlib_buffer_alloc (vm, wrk->tx_buffers, n_bufs_per_seg);
1134  if (PREDICT_FALSE (n_bufs != n_bufs_per_seg))
1135  {
1136  if (n_bufs)
1137  vlib_buffer_free (vm, wrk->tx_buffers, n_bufs);
1138  tcp_worker_stats_inc (wrk, no_buffer, 1);
1139  return 0;
1140  }
1141 
1142  *b = vlib_get_buffer (vm, wrk->tx_buffers[--n_bufs]);
1143  data = tcp_init_buffer (vm, *b);
1144  n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
1145  bytes_per_buffer -
1147  b[0]->current_length = n_bytes;
1148  b[0]->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
1150  max_deq_bytes -= n_bytes;
1151 
1152  chain_b = *b;
1153  for (i = 1; i < n_bufs_per_seg; i++)
1154  {
1155  prev_b = chain_b;
1156  len_to_deq = clib_min (max_deq_bytes, bytes_per_buffer);
1157  chain_bi = wrk->tx_buffers[--n_bufs];
1158  chain_b = vlib_get_buffer (vm, chain_bi);
1159  chain_b->current_data = 0;
1160  data = vlib_buffer_get_current (chain_b);
1161  n_peeked = session_tx_fifo_peek_bytes (&tc->connection, data,
1162  offset + n_bytes,
1163  len_to_deq);
1164  ASSERT (n_peeked == len_to_deq);
1165  n_bytes += n_peeked;
1166  chain_b->current_length = n_peeked;
1167  chain_b->next_buffer = 0;
1168 
1169  /* update previous buffer */
1170  prev_b->next_buffer = chain_bi;
1171  prev_b->flags |= VLIB_BUFFER_NEXT_PRESENT;
1172 
1173  max_deq_bytes -= n_peeked;
1175  }
1176 
1177  tcp_push_hdr_i (tc, *b, tc->snd_una + offset, /* compute opts */ 0,
1178  /* burst */ 0, /* update_snd_nxt */ 0);
1179 
1180  if (PREDICT_FALSE (n_bufs))
1181  {
1182  clib_warning ("not all buffers consumed");
1183  vlib_buffer_free (vm, wrk->tx_buffers, n_bufs);
1184  }
1185  }
1186 
1187  ASSERT (n_bytes > 0);
1188  ASSERT (((*b)->current_data + (*b)->current_length) <= bytes_per_buffer);
1189 
1190  return n_bytes;
1191 }
1192 
1193 /**
1194  * Build a retransmit segment
1195  *
1196  * @return the number of bytes in the segment or 0 if there's nothing to
1197  * retransmit
1198  */
1199 static u32
1201  tcp_connection_t * tc, u32 offset,
1202  u32 max_deq_bytes, vlib_buffer_t ** b)
1203 {
1204  u32 start, available_bytes;
1205  int n_bytes = 0;
1206 
1207  ASSERT (tc->state >= TCP_STATE_ESTABLISHED);
1208  ASSERT (max_deq_bytes != 0);
1209 
1210  /*
1211  * Make sure we can retransmit something
1212  */
1213  available_bytes = transport_max_tx_dequeue (&tc->connection);
1214  ASSERT (available_bytes >= offset);
1215  available_bytes -= offset;
1216  if (!available_bytes)
1217  return 0;
1218 
1219  max_deq_bytes = clib_min (tc->snd_mss, max_deq_bytes);
1220  max_deq_bytes = clib_min (available_bytes, max_deq_bytes);
1221 
1222  start = tc->snd_una + offset;
1223  ASSERT (seq_leq (start + max_deq_bytes, tc->snd_nxt));
1224 
1225  n_bytes = tcp_prepare_segment (wrk, tc, offset, max_deq_bytes, b);
1226  if (!n_bytes)
1227  return 0;
1228 
1229  tc->snd_rxt_bytes += n_bytes;
1230 
1231  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1232  tcp_bt_track_rxt (tc, start, start + n_bytes);
1233 
1234  tc->bytes_retrans += n_bytes;
1235  tc->segs_retrans += 1;
1236  tcp_worker_stats_inc (wrk, rxt_segs, 1);
1237  TCP_EVT (TCP_EVT_CC_RTX, tc, offset, n_bytes);
1238 
1239  return n_bytes;
1240 }
1241 
1242 static void
1244 {
1245  sack_scoreboard_t *sb = &tc->sack_sb;
1246  sack_scoreboard_hole_t *hole;
1247 
1248  hole = scoreboard_first_hole (sb);
1249  if (!sb->is_reneging && (!hole || hole->start == tc->snd_una))
1250  return;
1251 
1252  scoreboard_clear_reneging (sb, tc->snd_una, tc->snd_nxt);
1253 }
1254 
1255 /**
1256  * Reset congestion control, switch cwnd to loss window and try again.
1257  */
1258 static void
1260 {
1261  TCP_EVT (TCP_EVT_CC_EVT, tc, 6);
1262 
1263  tc->prev_ssthresh = tc->ssthresh;
1264  tc->prev_cwnd = tc->cwnd;
1265 
1266  /* If we entrered loss without fast recovery, notify cc algo of the
1267  * congestion event such that it can update ssthresh and its state */
1268  if (!tcp_in_fastrecovery (tc))
1269  tcp_cc_congestion (tc);
1270 
1271  /* Let cc algo decide loss cwnd and ssthresh post unrecovered loss */
1272  tcp_cc_loss (tc);
1273 
1274  tc->rtt_ts = 0;
1275  tc->cwnd_acc_bytes = 0;
1276  tc->tr_occurences += 1;
1277  tc->sack_sb.reorder = TCP_DUPACK_THRESHOLD;
1278  tcp_recovery_on (tc);
1279 }
1280 
1281 void
1283 {
1284  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
1285  vlib_main_t *vm = wrk->vm;
1286  vlib_buffer_t *b = 0;
1287  u32 bi, n_bytes;
1288 
1289  tcp_worker_stats_inc (wrk, tr_events, 1);
1290 
1291  /* Should be handled by a different handler */
1292  if (PREDICT_FALSE (tc->state == TCP_STATE_SYN_SENT))
1293  return;
1294 
1295  /* Wait-close and retransmit could pop at the same time */
1296  if (tc->state == TCP_STATE_CLOSED)
1297  return;
1298 
1299  if (tc->state >= TCP_STATE_ESTABLISHED)
1300  {
1301  TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1302 
1303  /* Lost FIN, retransmit and return */
1304  if (tc->flags & TCP_CONN_FINSNT)
1305  {
1306  tcp_send_fin (tc);
1307  tc->rto_boff += 1;
1308  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1309  return;
1310  }
1311 
1312  /* Shouldn't be here */
1313  if (tc->snd_una == tc->snd_nxt)
1314  {
1315  ASSERT (!tcp_in_recovery (tc));
1316  tc->rto_boff = 0;
1317  return;
1318  }
1319 
1320  /* We're not in recovery so make sure rto_boff is 0. Can be non 0 due
1321  * to persist timer timeout */
1322  if (!tcp_in_recovery (tc) && tc->rto_boff > 0)
1323  {
1324  tc->rto_boff = 0;
1325  tcp_update_rto (tc);
1326  }
1327 
1328  /* Peer is dead or network connectivity is lost. Close connection.
1329  * RFC 1122 section 4.2.3.5 recommends a value of at least 100s. For
1330  * a min rto of 0.2s we need to retry about 8 times. */
1331  if (tc->rto_boff >= TCP_RTO_BOFF_MAX)
1332  {
1333  tcp_send_reset (tc);
1334  tcp_connection_set_state (tc, TCP_STATE_CLOSED);
1335  session_transport_closing_notify (&tc->connection);
1336  session_transport_closed_notify (&tc->connection);
1338  tcp_program_cleanup (wrk, tc);
1339  tcp_worker_stats_inc (wrk, tr_abort, 1);
1340  return;
1341  }
1342 
1343  if (tcp_opts_sack_permitted (&tc->rcv_opts))
1345 
1346  /* Update send congestion to make sure that rxt has data to send */
1347  tc->snd_congestion = tc->snd_nxt;
1348 
1349  /* Send the first unacked segment. If we're short on buffers, return
1350  * as soon as possible */
1351  n_bytes = clib_min (tc->snd_mss, tc->snd_nxt - tc->snd_una);
1353  if (!n_bytes)
1354  {
1355  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT,
1356  tcp_cfg.alloc_err_timeout);
1357  return;
1358  }
1359 
1360  bi = vlib_get_buffer_index (vm, b);
1361  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1362 
1363  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1364  tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
1365 
1366  tc->rto_boff += 1;
1367  if (tc->rto_boff == 1)
1368  {
1370  /* Record timestamp. Eifel detection algorithm RFC3522 */
1371  tc->snd_rxt_ts = tcp_tstamp (tc);
1372  }
1373 
1374  if (tcp_opts_sack_permitted (&tc->rcv_opts))
1375  scoreboard_init_rxt (&tc->sack_sb, tc->snd_una + n_bytes);
1376 
1378  }
1379  /* Retransmit SYN-ACK */
1380  else if (tc->state == TCP_STATE_SYN_RCVD)
1381  {
1382  TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1383 
1384  tc->rtt_ts = 0;
1385 
1386  /* Passive open establish timeout */
1387  if (tc->rto > TCP_ESTABLISH_TIME >> 1)
1388  {
1389  tcp_connection_set_state (tc, TCP_STATE_CLOSED);
1391  tcp_program_cleanup (wrk, tc);
1392  tcp_worker_stats_inc (wrk, tr_abort, 1);
1393  return;
1394  }
1395 
1396  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1397  {
1398  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT,
1399  tcp_cfg.alloc_err_timeout);
1400  tcp_worker_stats_inc (wrk, no_buffer, 1);
1401  return;
1402  }
1403 
1404  tc->rto_boff += 1;
1405  if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
1406  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1407 
1408  ASSERT (tc->snd_una != tc->snd_nxt);
1409  tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
1410 
1411  b = vlib_get_buffer (vm, bi);
1412  tcp_init_buffer (vm, b);
1413  tcp_make_synack (tc, b);
1414  TCP_EVT (TCP_EVT_SYN_RXT, tc, 1);
1415 
1416  /* Retransmit timer already updated, just enqueue to output */
1417  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1418  }
1419  else
1420  {
1421  ASSERT (tc->state == TCP_STATE_CLOSED);
1422  return;
1423  }
1424 }
1425 
1426 /**
1427  * SYN retransmit timer handler. Active open only.
1428  */
1429 void
1431 {
1432  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
1433  vlib_main_t *vm = wrk->vm;
1434  vlib_buffer_t *b = 0;
1435  u32 bi;
1436 
1437  /* Note: the connection may have transitioned to ESTABLISHED... */
1438  if (PREDICT_FALSE (tc->state != TCP_STATE_SYN_SENT))
1439  return;
1440 
1441  /* Half-open connection actually moved to established but we were
1442  * waiting for syn retransmit to pop to call cleanup from the right
1443  * thread. */
1444  if (tc->flags & TCP_CONN_HALF_OPEN_DONE)
1445  {
1447  TCP_DBG ("could not remove half-open connection");
1448  return;
1449  }
1450 
1451  TCP_EVT (TCP_EVT_CC_EVT, tc, 2);
1452  tc->rtt_ts = 0;
1453 
1454  /* Active open establish timeout */
1455  if (tc->rto >= TCP_ESTABLISH_TIME >> 1)
1456  {
1457  session_stream_connect_notify (&tc->connection, SESSION_E_TIMEDOUT);
1459  return;
1460  }
1461 
1462  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1463  {
1464  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
1465  tcp_cfg.alloc_err_timeout);
1466  tcp_worker_stats_inc (wrk, no_buffer, 1);
1467  return;
1468  }
1469 
1470  /* Try without increasing RTO a number of times. If this fails,
1471  * start growing RTO exponentially */
1472  tc->rto_boff += 1;
1473  if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
1474  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1475 
1476  b = vlib_get_buffer (vm, bi);
1477  tcp_init_buffer (vm, b);
1478  tcp_make_syn (tc, b);
1479 
1480  TCP_EVT (TCP_EVT_SYN_RXT, tc, 0);
1481 
1482  tcp_enqueue_half_open (wrk, tc, b, bi);
1483 
1484  tcp_timer_update (&wrk->timer_wheel, tc, TCP_TIMER_RETRANSMIT_SYN,
1485  tc->rto * TCP_TO_TIMER_TICK);
1486 }
1487 
1488 /**
1489  * Got 0 snd_wnd from peer, try to do something about it.
1490  *
1491  */
1492 void
1494 {
1495  tcp_worker_ctx_t *wrk = tcp_get_worker (tc->c_thread_index);
1496  u32 bi, max_snd_bytes, available_bytes, offset;
1497  tcp_main_t *tm = vnet_get_tcp_main ();
1498  vlib_main_t *vm = wrk->vm;
1499  vlib_buffer_t *b;
1500  int n_bytes = 0;
1501  u8 *data;
1502 
1503  /* Problem already solved or worse */
1504  if (tc->state == TCP_STATE_CLOSED || tc->snd_wnd > tc->snd_mss
1505  || (tc->flags & TCP_CONN_FINSNT))
1506  goto update_scheduler;
1507 
1508  available_bytes = transport_max_tx_dequeue (&tc->connection);
1509  offset = tc->snd_nxt - tc->snd_una;
1510 
1511  /* Reprogram persist if no new bytes available to send. We may have data
1512  * next time */
1513  if (!available_bytes)
1514  {
1515  tcp_persist_timer_set (&wrk->timer_wheel, tc);
1516  return;
1517  }
1518 
1519  if (available_bytes <= offset)
1520  goto update_scheduler;
1521 
1522  /* Increment RTO backoff */
1523  tc->rto_boff += 1;
1524  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1525 
1526  /*
1527  * Try to force the first unsent segment (or buffer)
1528  */
1529  if (PREDICT_FALSE (!vlib_buffer_alloc (vm, &bi, 1)))
1530  {
1531  tcp_persist_timer_set (&wrk->timer_wheel, tc);
1532  tcp_worker_stats_inc (wrk, no_buffer, 1);
1533  return;
1534  }
1535 
1536  b = vlib_get_buffer (vm, bi);
1537  data = tcp_init_buffer (vm, b);
1538 
1540  tc->snd_opts_len = tcp_make_options (tc, &tc->snd_opts, tc->state);
1541  max_snd_bytes = clib_min (tc->snd_mss,
1542  tm->bytes_per_buffer - TRANSPORT_MAX_HDRS_LEN);
1543  n_bytes = session_tx_fifo_peek_bytes (&tc->connection, data, offset,
1544  max_snd_bytes);
1546  ASSERT (n_bytes != 0 && (tcp_timer_is_active (tc, TCP_TIMER_RETRANSMIT)
1547  || tc->snd_una == tc->snd_nxt
1548  || tc->rto_boff > 1));
1549 
1550  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1551  {
1553  tcp_bt_track_tx (tc, n_bytes);
1554  }
1555 
1556  tcp_push_hdr_i (tc, b, tc->snd_nxt, /* compute opts */ 0,
1557  /* burst */ 0, /* update_snd_nxt */ 1);
1558  tcp_validate_txf_size (tc, tc->snd_nxt - tc->snd_una);
1559  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1560 
1561  /* Just sent new data, enable retransmit */
1562  tcp_retransmit_timer_update (&wrk->timer_wheel, tc);
1563 
1564  return;
1565 
1566 update_scheduler:
1567 
1568  if (tcp_is_descheduled (tc))
1569  transport_connection_reschedule (&tc->connection);
1570 }
1571 
1572 /**
1573  * Retransmit first unacked segment
1574  */
1575 int
1577 {
1578  vlib_main_t *vm = wrk->vm;
1579  vlib_buffer_t *b;
1580  u32 bi, n_bytes;
1581 
1582  TCP_EVT (TCP_EVT_CC_EVT, tc, 1);
1583 
1584  n_bytes = tcp_prepare_retransmit_segment (wrk, tc, 0, tc->snd_mss, &b);
1585  if (!n_bytes)
1586  return -1;
1587 
1588  bi = vlib_get_buffer_index (vm, b);
1589  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1590 
1591  return 0;
1592 }
1593 
1594 static int
1596  u32 burst_size)
1597 {
1598  u32 offset, n_segs = 0, n_written, bi, available_wnd;
1599  vlib_main_t *vm = wrk->vm;
1600  vlib_buffer_t *b = 0;
1601 
1602  offset = tc->snd_nxt - tc->snd_una;
1603  available_wnd = tc->snd_wnd - offset;
1604  burst_size = clib_min (burst_size, available_wnd / tc->snd_mss);
1605 
1606  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1608 
1609  while (n_segs < burst_size)
1610  {
1611  n_written = tcp_prepare_segment (wrk, tc, offset, tc->snd_mss, &b);
1612  if (!n_written)
1613  goto done;
1614 
1615  bi = vlib_get_buffer_index (vm, b);
1616  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1617  offset += n_written;
1618  n_segs += 1;
1619 
1620  if (tc->cfg_flags & TCP_CFG_F_RATE_SAMPLE)
1621  tcp_bt_track_tx (tc, n_written);
1622 
1623  tc->snd_nxt += n_written;
1624  }
1625 
1626 done:
1627  return n_segs;
1628 }
1629 
1630 /**
1631  * Estimate send space using proportional rate reduction (RFC6937)
1632  */
1633 int
1635 {
1636  u32 pipe, prr_out;
1637  int space;
1638 
1639  pipe = tcp_flight_size (tc);
1640  prr_out = tc->snd_rxt_bytes + (tc->snd_nxt - tc->snd_congestion);
1641 
1642  if (pipe > tc->ssthresh)
1643  {
1644  space = ((int) tc->prr_delivered * ((f64) tc->ssthresh / tc->prev_cwnd))
1645  - prr_out;
1646  }
1647  else
1648  {
1649  int limit;
1650  limit = clib_max ((int) (tc->prr_delivered - prr_out), 0) + tc->snd_mss;
1651  space = clib_min (tc->ssthresh - pipe, limit);
1652  }
1653  space = clib_max (space, prr_out ? 0 : tc->snd_mss);
1654  return space;
1655 }
1656 
1657 static inline u8
1659  sack_scoreboard_t * sb)
1660 {
1661  u32 tx_adv_sack = sb->high_sacked - tc->snd_congestion;
1662  f64 rr = (f64) tc->ssthresh / tc->prev_cwnd;
1663 
1664  if (tcp_fastrecovery_first (tc))
1665  return 1;
1666 
1667  return (tx_adv_sack > (tc->snd_una - tc->prr_start) * rr);
1668 }
1669 
1670 static inline u8
1672 {
1673  return (transport_max_tx_dequeue (&tc->connection)
1674  - (tc->snd_nxt - tc->snd_una));
1675 }
1676 
1677 #define scoreboard_rescue_rxt_valid(_sb, _tc) \
1678  (seq_geq (_sb->rescue_rxt, _tc->snd_una) \
1679  && seq_leq (_sb->rescue_rxt, _tc->snd_congestion))
1680 
1681 /**
1682  * Do retransmit with SACKs
1683  */
1684 static int
1686  u32 burst_size)
1687 {
1688  u32 n_written = 0, offset, max_bytes, n_segs = 0;
1689  u8 snd_limited = 0, can_rescue = 0;
1690  u32 bi, max_deq, burst_bytes;
1691  sack_scoreboard_hole_t *hole;
1692  vlib_main_t *vm = wrk->vm;
1693  vlib_buffer_t *b = 0;
1694  sack_scoreboard_t *sb;
1695  int snd_space;
1696 
1698 
1699  burst_bytes = transport_connection_tx_pacer_burst (&tc->connection);
1700  burst_size = clib_min (burst_size, burst_bytes / tc->snd_mss);
1701  if (!burst_size)
1702  {
1704  return 0;
1705  }
1706 
1707  if (tcp_in_recovery (tc))
1708  snd_space = tcp_available_cc_snd_space (tc);
1709  else
1710  snd_space = tcp_fastrecovery_prr_snd_space (tc);
1711 
1712  if (snd_space < tc->snd_mss)
1713  goto done;
1714 
1715  sb = &tc->sack_sb;
1716 
1717  /* Check if snd_una is a lost retransmit */
1718  if (pool_elts (sb->holes)
1719  && seq_gt (sb->high_sacked, tc->snd_congestion)
1720  && tc->rxt_head != tc->snd_una
1722  {
1723  max_bytes = clib_min (tc->snd_mss, tc->snd_congestion - tc->snd_una);
1724  n_written = tcp_prepare_retransmit_segment (wrk, tc, 0, max_bytes, &b);
1725  if (!n_written)
1726  {
1728  goto done;
1729  }
1730  bi = vlib_get_buffer_index (vm, b);
1731  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1732  n_segs = 1;
1733 
1734  tc->rxt_head = tc->snd_una;
1735  tc->rxt_delivered += n_written;
1736  tc->prr_delivered += n_written;
1737  ASSERT (tc->rxt_delivered <= tc->snd_rxt_bytes);
1738  }
1739 
1741 
1742  TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
1743  hole = scoreboard_get_hole (sb, sb->cur_rxt_hole);
1744 
1745  max_deq = transport_max_tx_dequeue (&tc->connection);
1746  max_deq -= tc->snd_nxt - tc->snd_una;
1747 
1748  while (snd_space > 0 && n_segs < burst_size)
1749  {
1750  hole = scoreboard_next_rxt_hole (sb, hole, max_deq != 0, &can_rescue,
1751  &snd_limited);
1752  if (!hole)
1753  {
1754  /* We are out of lost holes to retransmit so send some new data. */
1755  if (max_deq > tc->snd_mss)
1756  {
1757  u32 n_segs_new;
1758  int av_wnd;
1759 
1760  /* Make sure we don't exceed available window and leave space
1761  * for one more packet, to avoid zero window acks */
1762  av_wnd = (int) tc->snd_wnd - (tc->snd_nxt - tc->snd_una);
1763  av_wnd = clib_max (av_wnd - tc->snd_mss, 0);
1764  snd_space = clib_min (snd_space, av_wnd);
1765  snd_space = clib_min (max_deq, snd_space);
1766  burst_size = clib_min (burst_size - n_segs,
1767  snd_space / tc->snd_mss);
1768  burst_size = clib_min (burst_size, TCP_RXT_MAX_BURST);
1769  n_segs_new = tcp_transmit_unsent (wrk, tc, burst_size);
1770  if (max_deq > n_segs_new * tc->snd_mss)
1772 
1773  n_segs += n_segs_new;
1774  goto done;
1775  }
1776 
1777  if (tcp_in_recovery (tc) || !can_rescue
1778  || scoreboard_rescue_rxt_valid (sb, tc))
1779  break;
1780 
1781  /* If rescue rxt undefined or less than snd_una then one segment of
1782  * up to SMSS octets that MUST include the highest outstanding
1783  * unSACKed sequence number SHOULD be returned, and RescueRxt set to
1784  * RecoveryPoint. HighRxt MUST NOT be updated.
1785  */
1786  hole = scoreboard_last_hole (sb);
1787  max_bytes = clib_min (tc->snd_mss, hole->end - hole->start);
1788  max_bytes = clib_min (max_bytes, snd_space);
1789  offset = hole->end - tc->snd_una - max_bytes;
1790  n_written = tcp_prepare_retransmit_segment (wrk, tc, offset,
1791  max_bytes, &b);
1792  if (!n_written)
1793  goto done;
1794 
1795  sb->rescue_rxt = tc->snd_congestion;
1796  bi = vlib_get_buffer_index (vm, b);
1797  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1798  n_segs += 1;
1799  break;
1800  }
1801 
1802  max_bytes = clib_min (hole->end - sb->high_rxt, snd_space);
1803  max_bytes = snd_limited ? clib_min (max_bytes, tc->snd_mss) : max_bytes;
1804  if (max_bytes == 0)
1805  break;
1806 
1807  offset = sb->high_rxt - tc->snd_una;
1808  n_written = tcp_prepare_retransmit_segment (wrk, tc, offset, max_bytes,
1809  &b);
1810  ASSERT (n_written <= snd_space);
1811 
1812  /* Nothing left to retransmit */
1813  if (n_written == 0)
1814  break;
1815 
1816  bi = vlib_get_buffer_index (vm, b);
1817  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1818 
1819  sb->high_rxt += n_written;
1820  ASSERT (seq_leq (sb->high_rxt, tc->snd_nxt));
1821 
1822  snd_space -= n_written;
1823  n_segs += 1;
1824  }
1825 
1826  if (hole)
1828 
1829 done:
1830 
1831  transport_connection_tx_pacer_reset_bucket (&tc->connection, 0);
1832  return n_segs;
1833 }
1834 
1835 /**
1836  * Fast retransmit without SACK info
1837  */
1838 static int
1840  u32 burst_size)
1841 {
1842  u32 n_written = 0, offset = 0, bi, max_deq, n_segs_now, max_bytes;
1843  u32 burst_bytes, sent_bytes;
1844  vlib_main_t *vm = wrk->vm;
1845  int snd_space, n_segs = 0;
1846  u8 cc_limited = 0;
1847  vlib_buffer_t *b;
1848 
1850  TCP_EVT (TCP_EVT_CC_EVT, tc, 0);
1851 
1852  burst_bytes = transport_connection_tx_pacer_burst (&tc->connection);
1853  burst_size = clib_min (burst_size, burst_bytes / tc->snd_mss);
1854  if (!burst_size)
1855  {
1857  return 0;
1858  }
1859 
1860  snd_space = tcp_available_cc_snd_space (tc);
1861  cc_limited = snd_space < burst_bytes;
1862 
1863  if (!tcp_fastrecovery_first (tc))
1864  goto send_unsent;
1865 
1866  /* RFC 6582: [If a partial ack], retransmit the first unacknowledged
1867  * segment. */
1868  while (snd_space > 0 && n_segs < burst_size)
1869  {
1870  max_bytes = clib_min (tc->snd_mss,
1871  tc->snd_congestion - tc->snd_una - offset);
1872  if (!max_bytes)
1873  break;
1874  n_written = tcp_prepare_retransmit_segment (wrk, tc, offset, max_bytes,
1875  &b);
1876 
1877  /* Nothing left to retransmit */
1878  if (n_written == 0)
1879  break;
1880 
1881  bi = vlib_get_buffer_index (vm, b);
1882  tcp_enqueue_to_output (wrk, b, bi, tc->c_is_ip4);
1883  snd_space -= n_written;
1884  offset += n_written;
1885  n_segs += 1;
1886  }
1887 
1888  if (n_segs == burst_size)
1889  goto done;
1890 
1891 send_unsent:
1892 
1893  /* RFC 6582: Send a new segment if permitted by the new value of cwnd. */
1894  if (snd_space < tc->snd_mss || tc->snd_mss == 0)
1895  goto done;
1896 
1897  max_deq = transport_max_tx_dequeue (&tc->connection);
1898  max_deq -= tc->snd_nxt - tc->snd_una;
1899  if (max_deq)
1900  {
1901  snd_space = clib_min (max_deq, snd_space);
1902  burst_size = clib_min (burst_size - n_segs, snd_space / tc->snd_mss);
1903  n_segs_now = tcp_transmit_unsent (wrk, tc, burst_size);
1904  if (n_segs_now && max_deq > n_segs_now * tc->snd_mss)
1906  n_segs += n_segs_now;
1907  }
1908 
1909 done:
1911 
1912  sent_bytes = clib_min (n_segs * tc->snd_mss, burst_bytes);
1913  sent_bytes = cc_limited ? burst_bytes : sent_bytes;
1914  transport_connection_tx_pacer_update_bytes (&tc->connection, sent_bytes);
1915 
1916  return n_segs;
1917 }
1918 
1919 static int
1920 tcp_send_acks (tcp_connection_t * tc, u32 max_burst_size)
1921 {
1922  int j, n_acks;
1923 
1924  if (!tc->pending_dupacks)
1925  {
1926  if (tcp_in_cong_recovery (tc) || !tcp_max_tx_deq (tc)
1927  || tc->state != TCP_STATE_ESTABLISHED)
1928  {
1929  tcp_send_ack (tc);
1930  return 1;
1931  }
1932  return 0;
1933  }
1934 
1935  /* If we're supposed to send dupacks but have no ooo data
1936  * send only one ack */
1937  if (!vec_len (tc->snd_sacks))
1938  {
1939  tcp_send_ack (tc);
1940  tc->dupacks_out += 1;
1941  tc->pending_dupacks = 0;
1942  return 1;
1943  }
1944 
1945  /* Start with first sack block */
1946  tc->snd_sack_pos = 0;
1947 
1948  /* Generate enough dupacks to cover all sack blocks. Do not generate
1949  * more sacks than the number of packets received. But do generate at
1950  * least 3, i.e., the number needed to signal congestion, if needed. */
1951  n_acks = vec_len (tc->snd_sacks) / TCP_OPTS_MAX_SACK_BLOCKS;
1952  n_acks = clib_min (n_acks, tc->pending_dupacks);
1953  n_acks = clib_max (n_acks, clib_min (tc->pending_dupacks, 3));
1954  for (j = 0; j < clib_min (n_acks, max_burst_size); j++)
1955  tcp_send_ack (tc);
1956 
1957  if (n_acks < max_burst_size)
1958  {
1959  tc->pending_dupacks = 0;
1960  tc->snd_sack_pos = 0;
1961  tc->dupacks_out += n_acks;
1962  return n_acks;
1963  }
1964  else
1965  {
1966  TCP_DBG ("constrained by burst size");
1967  tc->pending_dupacks = n_acks - max_burst_size;
1968  tc->dupacks_out += max_burst_size;
1969  tcp_program_dupack (tc);
1970  return max_burst_size;
1971  }
1972 }
1973 
1974 static int
1976 {
1978  u32 n_segs;
1979 
1980  if (PREDICT_FALSE (tc->state == TCP_STATE_CLOSED))
1981  return 0;
1982 
1983  wrk = tcp_get_worker (tc->c_thread_index);
1984 
1985  if (tcp_opts_sack_permitted (&tc->rcv_opts))
1986  n_segs = tcp_retransmit_sack (wrk, tc, max_burst_size);
1987  else
1988  n_segs = tcp_retransmit_no_sack (wrk, tc, max_burst_size);
1989 
1990  return n_segs;
1991 }
1992 
1993 int
1995 {
1996  tcp_connection_t *tc = (tcp_connection_t *) conn;
1997  u32 n_segs = 0;
1998 
1999  if (tcp_in_cong_recovery (tc) && (tc->flags & TCP_CONN_RXT_PENDING))
2000  {
2001  tc->flags &= ~TCP_CONN_RXT_PENDING;
2002  n_segs = tcp_do_retransmit (tc, sp->max_burst_size);
2003  }
2004 
2005  if (!(tc->flags & TCP_CONN_SNDACK))
2006  return n_segs;
2007 
2008  tc->flags &= ~TCP_CONN_SNDACK;
2009 
2010  /* We have retransmitted packets and no dupack */
2011  if (n_segs && !tc->pending_dupacks)
2012  return n_segs;
2013 
2014  if (sp->max_burst_size <= n_segs)
2015  {
2016  tcp_program_ack (tc);
2017  return n_segs;
2018  }
2019 
2020  n_segs += tcp_send_acks (tc, sp->max_burst_size - n_segs);
2021 
2022  return n_segs;
2023 }
2024 #endif /* CLIB_MARCH_VARIANT */
2025 
2026 static void
2028  u16 * next0, u32 * error0)
2029 {
2030  ip_adjacency_t *adj;
2031  adj_index_t ai;
2032 
2033  /* Not thread safe but as long as the connection exists the adj should
2034  * not be removed */
2035  ai = adj_nbr_find (FIB_PROTOCOL_IP6, VNET_LINK_IP6, &tc0->c_rmt_ip,
2036  tc0->sw_if_index);
2037  if (ai == ADJ_INDEX_INVALID)
2038  {
2039  vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
2040  *next0 = TCP_OUTPUT_NEXT_DROP;
2041  *error0 = TCP_ERROR_LINK_LOCAL_RW;
2042  return;
2043  }
2044 
2045  adj = adj_get (ai);
2047  *next0 = TCP_OUTPUT_NEXT_IP_REWRITE;
2048  else if (adj->lookup_next_index == IP_LOOKUP_NEXT_ARP)
2049  *next0 = TCP_OUTPUT_NEXT_IP_ARP;
2050  else
2051  {
2052  *next0 = TCP_OUTPUT_NEXT_DROP;
2053  *error0 = TCP_ERROR_LINK_LOCAL_RW;
2054  }
2055  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = ai;
2056 }
2057 
2058 static void
2060  u32 * to_next, u32 n_bufs)
2061 {
2062  tcp_connection_t *tc;
2063  tcp_tx_trace_t *t;
2064  vlib_buffer_t *b;
2065  tcp_header_t *th;
2066  int i;
2067 
2068  for (i = 0; i < n_bufs; i++)
2069  {
2070  b = vlib_get_buffer (vm, to_next[i]);
2071  if (!(b->flags & VLIB_BUFFER_IS_TRACED))
2072  continue;
2073  th = vlib_buffer_get_current (b);
2074  tc = tcp_connection_get (vnet_buffer (b)->tcp.connection_index,
2075  vm->thread_index);
2076  t = vlib_add_trace (vm, node, b, sizeof (*t));
2077  clib_memcpy_fast (&t->tcp_header, th, sizeof (t->tcp_header));
2078  clib_memcpy_fast (&t->tcp_connection, tc, sizeof (t->tcp_connection));
2079  }
2080 }
2081 
2082 always_inline void
2084  tcp_connection_t * tc0, u8 is_ip4)
2085 {
2086  TCP_EVT (TCP_EVT_OUTPUT, tc0,
2088  b0->current_length);
2089 
2090  if (is_ip4)
2091  vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4, &tc0->c_rmt_ip4,
2092  IP_PROTOCOL_TCP, tcp_csum_offload (tc0));
2093  else
2094  vlib_buffer_push_ip6_custom (vm, b0, &tc0->c_lcl_ip6, &tc0->c_rmt_ip6,
2095  IP_PROTOCOL_TCP, tc0->ipv6_flow_label);
2096 }
2097 
2098 always_inline void
2100 {
2101  if (PREDICT_TRUE (!(tc->cfg_flags & TCP_CFG_F_TSO)))
2102  return;
2103 
2104  u16 data_len = b->current_length - sizeof (tcp_header_t) - tc->snd_opts_len;
2105 
2106  if (PREDICT_FALSE (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID))
2108 
2109  if (PREDICT_TRUE (data_len <= tc->snd_mss))
2110  return;
2111  else
2112  {
2113  ASSERT ((b->flags & VNET_BUFFER_F_L3_HDR_OFFSET_VALID) != 0);
2114  ASSERT ((b->flags & VNET_BUFFER_F_L4_HDR_OFFSET_VALID) != 0);
2115  b->flags |= VNET_BUFFER_F_GSO;
2116  vnet_buffer2 (b)->gso_l4_hdr_sz =
2117  sizeof (tcp_header_t) + tc->snd_opts_len;
2118  vnet_buffer2 (b)->gso_size = tc->snd_mss;
2119  }
2120 }
2121 
2122 always_inline void
2124  vlib_node_runtime_t * error_node, u16 * next0,
2125  u8 is_ip4)
2126 {
2127  /* If next_index is not drop use it */
2128  if (tc0->next_node_index)
2129  {
2130  *next0 = tc0->next_node_index;
2131  vnet_buffer (b0)->tcp.next_node_opaque = tc0->next_node_opaque;
2132  }
2133  else
2134  {
2135  *next0 = TCP_OUTPUT_NEXT_IP_LOOKUP;
2136  }
2137 
2138  vnet_buffer (b0)->sw_if_index[VLIB_TX] = tc0->c_fib_index;
2139  vnet_buffer (b0)->sw_if_index[VLIB_RX] = tc0->sw_if_index;
2140 
2141  if (!is_ip4)
2142  {
2143  u32 error0 = 0;
2144 
2145  if (PREDICT_FALSE (ip6_address_is_link_local_unicast (&tc0->c_rmt_ip6)))
2146  tcp_output_handle_link_local (tc0, b0, next0, &error0);
2147 
2148  if (PREDICT_FALSE (error0))
2149  {
2150  b0->error = error_node->errors[error0];
2151  return;
2152  }
2153  }
2154 
2155  tc0->segs_out += 1;
2156 }
2157 
2160  vlib_frame_t * frame, int is_ip4)
2161 {
2165 
2167  n_left_from = frame->n_vectors;
2169 
2170  if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
2172 
2174  b = bufs;
2175  next = nexts;
2176 
2177  while (n_left_from >= 4)
2178  {
2179  tcp_connection_t *tc0, *tc1;
2180 
2181  {
2182  vlib_prefetch_buffer_header (b[2], STORE);
2183  CLIB_PREFETCH (b[2]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
2184 
2185  vlib_prefetch_buffer_header (b[3], STORE);
2186  CLIB_PREFETCH (b[3]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
2187  }
2188 
2189  tc0 = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
2190  thread_index);
2191  tc1 = tcp_connection_get (vnet_buffer (b[1])->tcp.connection_index,
2192  thread_index);
2193 
2194  if (PREDICT_TRUE (!tc0 + !tc1 == 0))
2195  {
2196  tcp_output_push_ip (vm, b[0], tc0, is_ip4);
2197  tcp_output_push_ip (vm, b[1], tc1, is_ip4);
2198 
2199  tcp_check_if_gso (tc0, b[0]);
2200  tcp_check_if_gso (tc1, b[1]);
2201 
2202  tcp_output_handle_packet (tc0, b[0], node, &next[0], is_ip4);
2203  tcp_output_handle_packet (tc1, b[1], node, &next[1], is_ip4);
2204  }
2205  else
2206  {
2207  if (tc0 != 0)
2208  {
2209  tcp_output_push_ip (vm, b[0], tc0, is_ip4);
2210  tcp_check_if_gso (tc0, b[0]);
2211  tcp_output_handle_packet (tc0, b[0], node, &next[0], is_ip4);
2212  }
2213  else
2214  {
2215  b[0]->error = node->errors[TCP_ERROR_INVALID_CONNECTION];
2217  }
2218  if (tc1 != 0)
2219  {
2220  tcp_output_push_ip (vm, b[1], tc1, is_ip4);
2221  tcp_check_if_gso (tc1, b[1]);
2222  tcp_output_handle_packet (tc1, b[1], node, &next[1], is_ip4);
2223  }
2224  else
2225  {
2226  b[1]->error = node->errors[TCP_ERROR_INVALID_CONNECTION];
2228  }
2229  }
2230 
2231  b += 2;
2232  next += 2;
2233  n_left_from -= 2;
2234  }
2235  while (n_left_from > 0)
2236  {
2237  tcp_connection_t *tc0;
2238 
2239  if (n_left_from > 1)
2240  {
2241  vlib_prefetch_buffer_header (b[1], STORE);
2242  CLIB_PREFETCH (b[1]->data, 2 * CLIB_CACHE_LINE_BYTES, STORE);
2243  }
2244 
2245  tc0 = tcp_connection_get (vnet_buffer (b[0])->tcp.connection_index,
2246  thread_index);
2247 
2248  if (PREDICT_TRUE (tc0 != 0))
2249  {
2250  tcp_output_push_ip (vm, b[0], tc0, is_ip4);
2251  tcp_check_if_gso (tc0, b[0]);
2252  tcp_output_handle_packet (tc0, b[0], node, &next[0], is_ip4);
2253  }
2254  else
2255  {
2256  b[0]->error = node->errors[TCP_ERROR_INVALID_CONNECTION];
2258  }
2259 
2260  b += 1;
2261  next += 1;
2262  n_left_from -= 1;
2263  }
2264 
2266  vlib_node_increment_counter (vm, tcp_node_index (output, is_ip4),
2267  TCP_ERROR_PKTS_SENT, frame->n_vectors);
2268  return frame->n_vectors;
2269 }
2270 
2273 {
2274  return tcp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */ );
2275 }
2276 
2279 {
2280  return tcp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ );
2281 }
2282 
2283 /* *INDENT-OFF* */
2285 {
2286  .name = "tcp4-output",
2287  /* Takes a vector of packets. */
2288  .vector_size = sizeof (u32),
2289  .n_errors = TCP_N_ERROR,
2290  .protocol_hint = VLIB_NODE_PROTO_HINT_TCP,
2291  .error_strings = tcp_error_strings,
2292  .n_next_nodes = TCP_OUTPUT_N_NEXT,
2293  .next_nodes = {
2294 #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
2296 #undef _
2297  },
2298  .format_buffer = format_tcp_header,
2299  .format_trace = format_tcp_tx_trace,
2300 };
2301 /* *INDENT-ON* */
2302 
2303 /* *INDENT-OFF* */
2305 {
2306  .name = "tcp6-output",
2307  /* Takes a vector of packets. */
2308  .vector_size = sizeof (u32),
2309  .n_errors = TCP_N_ERROR,
2310  .protocol_hint = VLIB_NODE_PROTO_HINT_TCP,
2311  .error_strings = tcp_error_strings,
2312  .n_next_nodes = TCP_OUTPUT_N_NEXT,
2313  .next_nodes = {
2314 #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
2316 #undef _
2317  },
2318  .format_buffer = format_tcp_header,
2319  .format_trace = format_tcp_tx_trace,
2320 };
2321 /* *INDENT-ON* */
2322 
2323 typedef enum _tcp_reset_next
2324 {
2329 
2330 #define foreach_tcp4_reset_next \
2331  _(DROP, "error-drop") \
2332  _(IP_LOOKUP, "ip4-lookup")
2333 
2334 #define foreach_tcp6_reset_next \
2335  _(DROP, "error-drop") \
2336  _(IP_LOOKUP, "ip6-lookup")
2337 
2338 static uword
2340  vlib_frame_t * from_frame, u8 is_ip4)
2341 {
2342  u32 error0 = TCP_ERROR_RST_SENT, next0 = TCP_RESET_NEXT_IP_LOOKUP;
2343  u32 n_left_from, next_index, *from, *to_next;
2344 
2346  n_left_from = from_frame->n_vectors;
2347 
2348  next_index = node->cached_next_index;
2349 
2350  while (n_left_from > 0)
2351  {
2352  u32 n_left_to_next;
2353 
2354  vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
2355 
2356  while (n_left_from > 0 && n_left_to_next > 0)
2357  {
2358  vlib_buffer_t *b0;
2359  tcp_tx_trace_t *t0;
2360  tcp_header_t *th0;
2361  u32 bi0;
2362 
2363  bi0 = from[0];
2364  to_next[0] = bi0;
2365  from += 1;
2366  to_next += 1;
2367  n_left_from -= 1;
2368  n_left_to_next -= 1;
2369 
2370  b0 = vlib_get_buffer (vm, bi0);
2371  tcp_make_reset_in_place (vm, b0, is_ip4);
2372 
2373  /* Prepare to send to IP lookup */
2374  vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
2375 
2376  b0->error = node->errors[error0];
2377  b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
2378  if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
2379  {
2380  th0 = vlib_buffer_get_current (b0);
2381  if (is_ip4)
2382  th0 = ip4_next_header ((ip4_header_t *) th0);
2383  else
2384  th0 = ip6_next_header ((ip6_header_t *) th0);
2385  t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
2386  clib_memcpy_fast (&t0->tcp_header, th0,
2387  sizeof (t0->tcp_header));
2388  }
2389 
2391  n_left_to_next, bi0, next0);
2392  }
2393  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
2394  }
2395  return from_frame->n_vectors;
2396 }
2397 
2400 {
2401  return tcp46_send_reset_inline (vm, node, from_frame, 1);
2402 }
2403 
2406 {
2407  return tcp46_send_reset_inline (vm, node, from_frame, 0);
2408 }
2409 
2410 /* *INDENT-OFF* */
2412  .name = "tcp4-reset",
2413  .vector_size = sizeof (u32),
2414  .n_errors = TCP_N_ERROR,
2415  .error_strings = tcp_error_strings,
2416  .n_next_nodes = TCP_RESET_N_NEXT,
2417  .next_nodes = {
2418 #define _(s,n) [TCP_RESET_NEXT_##s] = n,
2420 #undef _
2421  },
2422  .format_trace = format_tcp_tx_trace,
2423 };
2424 /* *INDENT-ON* */
2425 
2426 /* *INDENT-OFF* */
2428  .name = "tcp6-reset",
2429  .vector_size = sizeof (u32),
2430  .n_errors = TCP_N_ERROR,
2431  .error_strings = tcp_error_strings,
2432  .n_next_nodes = TCP_RESET_N_NEXT,
2433  .next_nodes = {
2434 #define _(s,n) [TCP_RESET_NEXT_##s] = n,
2436 #undef _
2437  },
2438  .format_trace = format_tcp_tx_trace,
2439 };
2440 /* *INDENT-ON* */
2441 
2442 /*
2443  * fd.io coding-style-patch-verification: ON
2444  *
2445  * Local Variables:
2446  * eval: (c-set-style "gnu")
2447  * End:
2448  */
tmp
u32 * tmp
Definition: interface_output.c:1096
vlib_buffer_t::next_buffer
u32 next_buffer
Next buffer for this linked-list of buffers.
Definition: buffer.h:149
seq_gt
#define seq_gt(_s1, _s2)
Definition: tcp_packet.h:180
tcp4_reset_node
vlib_node_registration_t tcp4_reset_node
(constructor) VLIB_REGISTER_NODE (tcp4_reset_node)
Definition: tcp_output.c:2411
ip6_address_is_link_local_unicast
static uword ip6_address_is_link_local_unicast(const ip6_address_t *a)
Definition: ip6_packet.h:253
vlib_buffer_free
static void vlib_buffer_free(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Free buffers Frees the entire buffer chain for each buffer.
Definition: buffer_funcs.h:979
tcp_options_t::tsval
u32 tsval
Timestamp value.
Definition: tcp_packet.h:146
vlib_num_workers
static u32 vlib_num_workers()
Definition: threads.h:333
tcp_connection_cleanup
void tcp_connection_cleanup(tcp_connection_t *tc)
Cleans up connection state.
Definition: tcp.c:233
tcp6_output_node
vlib_node_registration_t tcp6_output_node
(constructor) VLIB_REGISTER_NODE (tcp6_output_node)
Definition: tcp_output.c:2304
seq_geq
#define seq_geq(_s1, _s2)
Definition: tcp_packet.h:181
TCP_RESET_NEXT_DROP
@ TCP_RESET_NEXT_DROP
Definition: tcp_output.c:2325
IP_LOOKUP_NEXT_ARP
@ IP_LOOKUP_NEXT_ARP
This packet matches an "incomplete adjacency" and packets need to be passed to ARP to find rewrite st...
Definition: adj.h:63
TCP_FLAG_RST
#define TCP_FLAG_RST
Definition: fa_node.h:14
thread_index
u32 thread_index
Definition: nat44_ei_hairpinning.c:495
bufs
vlib_buffer_t * bufs[VLIB_FRAME_SIZE]
Definition: nat44_ei_out2in.c:717
tcp_check_sack_reneging
static void tcp_check_sack_reneging(tcp_connection_t *tc)
Definition: tcp_output.c:1243
tcp_opts_tstamp
#define tcp_opts_tstamp(_to)
Definition: tcp_packet.h:156
TCP_FLAG_FIN
#define TCP_FLAG_FIN
Definition: fa_node.h:12
dst_port
vl_api_ip_port_and_mask_t dst_port
Definition: flow_types.api:92
vlib_prefetch_buffer_header
#define vlib_prefetch_buffer_header(b, type)
Prefetch buffer metadata.
Definition: buffer.h:231
frame
vlib_main_t vlib_node_runtime_t vlib_frame_t * frame
Definition: nat44_ei.c:3048
tcp_options_t::wscale
u8 wscale
Window scale advertised.
Definition: tcp_packet.h:150
TCP_USE_SACKS
#define TCP_USE_SACKS
Disable only for testing.
Definition: tcp_types.h:40
tcp_enqueue_to_output
static void tcp_enqueue_to_output(tcp_worker_ctx_t *wrk, vlib_buffer_t *b, u32 bi, u8 is_ip4)
Definition: tcp_output.c:561
tcp_send_reset
void tcp_send_reset(tcp_connection_t *tc)
Build and set reset packet for connection.
Definition: tcp_output.c:739
VLIB_BUFFER_TRACE_TRAJECTORY_INIT
#define VLIB_BUFFER_TRACE_TRAJECTORY_INIT(b)
Definition: buffer.h:192
tcp_output_next_t
enum _tcp_output_next tcp_output_next_t
ip6_tcp_udp_icmp_compute_checksum
u16 ip6_tcp_udp_icmp_compute_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip6_header_t *ip0, int *bogus_lengthp)
Definition: ip6_forward.c:1096
tcp_inlines.h
TCP_RESET_N_NEXT
@ TCP_RESET_N_NEXT
Definition: tcp_output.c:2327
format_tcp_state
format_function_t format_tcp_state
Definition: tcp.h:340
next_index
nat44_ei_hairpin_src_next_t next_index
Definition: nat44_ei_hairpinning.c:412
tcp_make_syn
void tcp_make_syn(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to SYN.
Definition: tcp_output.c:497
TCP_OPTS_MAX_SACK_BLOCKS
#define TCP_OPTS_MAX_SACK_BLOCKS
Definition: tcp_packet.h:174
tcp_cc_init_rxt_timeout
static void tcp_cc_init_rxt_timeout(tcp_connection_t *tc)
Reset congestion control, switch cwnd to loss window and try again.
Definition: tcp_output.c:1259
clib_max
#define clib_max(x, y)
Definition: clib.h:335
tcp_ack
#define tcp_ack(_th)
Definition: tcp_packet.h:83
TCP_OPTION_LEN_WINDOW_SCALE
#define TCP_OPTION_LEN_WINDOW_SCALE
Definition: tcp_packet.h:165
format_tcp_tx_trace
static u8 * format_tcp_tx_trace(u8 *s, va_list *args)
Definition: tcp_output.c:56
session_transport_closing_notify
void session_transport_closing_notify(transport_connection_t *tc)
Notification from transport that connection is being closed.
Definition: session.c:1062
TCP_RTO_MAX
#define TCP_RTO_MAX
Definition: tcp_types.h:85
vlib_get_buffer
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
Definition: buffer_funcs.h:111
tcp_retransmit_timer_set
static void tcp_retransmit_timer_set(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
Definition: tcp_timer.h:55
ADJ_INDEX_INVALID
#define ADJ_INDEX_INVALID
Invalid ADJ index - used when no adj is known likewise blazoned capitals INVALID speak volumes where ...
Definition: adj_types.h:36
tcp_buffer_len
static u32 tcp_buffer_len(vlib_buffer_t *b)
Definition: tcp_output.c:963
tcp46_send_reset_inline
static uword tcp46_send_reset_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame, u8 is_ip4)
Definition: tcp_output.c:2339
ip4_inlines.h
format_tcp_header
format_function_t format_tcp_header
Definition: format.h:100
tcp_header_t
struct _tcp_header tcp_header_t
vlib_buffer_push_ip6
static void * vlib_buffer_push_ip6(vlib_main_t *vm, vlib_buffer_t *b, ip6_address_t *src, ip6_address_t *dst, int proto)
Push IPv6 header to buffer.
Definition: ip6_inlines.h:255
vlib_get_buffers
vlib_get_buffers(vm, from, b, n_left_from)
next
u16 * next
Definition: nat44_ei_out2in.c:718
VLIB_FRAME_SIZE
#define VLIB_FRAME_SIZE
Definition: node.h:368
scoreboard_get_hole
static sack_scoreboard_hole_t * scoreboard_get_hole(sack_scoreboard_t *sb, u32 index)
Definition: tcp_sack.h:35
node
vlib_main_t vlib_node_runtime_t * node
Definition: nat44_ei.c:3047
vnet_buffer_offload_flags_set
static_always_inline void vnet_buffer_offload_flags_set(vlib_buffer_t *b, vnet_buffer_oflags_t oflags)
Definition: buffer.h:528
tcp_connection_t
struct _tcp_connection tcp_connection_t
tcp_timer_retransmit_syn_handler
void tcp_timer_retransmit_syn_handler(tcp_connection_t *tc)
SYN retransmit timer handler.
Definition: tcp_output.c:1430
TCP_DBG
#define TCP_DBG(_fmt, _args...)
Definition: tcp_debug.h:146
ip4_address_t::as_u32
u32 as_u32
Definition: ip4_packet.h:57
tcp_zero_rwnd_sent_on
#define tcp_zero_rwnd_sent_on(tc)
Definition: tcp_types.h:438
wrk
session_worker_t * wrk
Definition: application.c:490
vlib_buffer_push_ip4
static void * vlib_buffer_push_ip4(vlib_main_t *vm, vlib_buffer_t *b, ip4_address_t *src, ip4_address_t *dst, int proto, u8 csum_offload)
Push IPv4 header to buffer.
Definition: ip4_inlines.h:150
tcp_session_custom_tx
int tcp_session_custom_tx(void *conn, transport_send_params_t *sp)
Definition: tcp_output.c:1994
vnet_get_tcp_main
static tcp_main_t * vnet_get_tcp_main()
Definition: tcp.h:277
session_add_pending_tx_buffer
static void session_add_pending_tx_buffer(u32 thread_index, u32 bi, u32 next_node)
Add session node pending buffer with custom node.
Definition: session.h:747
tcp_csum_offload
#define tcp_csum_offload(tc)
Definition: tcp_types.h:435
tcp_make_synack
static void tcp_make_synack(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to SYN-ACK.
Definition: tcp_output.c:523
tcp_get_worker
static tcp_worker_ctx_t * tcp_get_worker(u32 thread_index)
Definition: tcp.h:283
u16
unsigned short u16
Definition: types.h:57
TCP_DUPACK_THRESHOLD
#define TCP_DUPACK_THRESHOLD
Definition: tcp_types.h:37
tcp_options_t::sacks
sack_block_t * sacks
SACK blocks.
Definition: tcp_packet.h:145
vm
vlib_main_t * vm
X-connect all packets from the HOST to the PHY.
Definition: nat44_ei.c:3047
tcp_timer_update
static void tcp_timer_update(tcp_timer_wheel_t *tw, tcp_connection_t *tc, u8 timer_id, u32 interval)
Definition: tcp_timer.h:43
transport_connection_reschedule
void transport_connection_reschedule(transport_connection_t *tc)
Definition: transport.c:790
tcp_time_tstamp
static u32 tcp_time_tstamp(u32 thread_index)
Time used to generate timestamps, not the timestamp.
Definition: tcp_inlines.h:189
VLIB_RX
@ VLIB_RX
Definition: defs.h:46
state
vl_api_dhcp_client_state_t state
Definition: dhcp.api:201
session_transport_closed_notify
void session_transport_closed_notify(transport_connection_t *tc)
Notification from transport that it is closed.
Definition: session.c:1150
session_queue_run_on_main_thread
void session_queue_run_on_main_thread(vlib_main_t *vm)
Definition: session.c:1785
tcp_cc_congestion
static void tcp_cc_congestion(tcp_connection_t *tc)
Definition: tcp_cc.h:36
from_frame
vlib_main_t vlib_node_runtime_t vlib_frame_t * from_frame
Definition: esp_encrypt.c:1328
vlib_buffer_enqueue_to_next
vlib_buffer_enqueue_to_next(vm, node, from,(u16 *) nexts, frame->n_vectors)
tcp_fastrecovery_prr_snd_space
int tcp_fastrecovery_prr_snd_space(tcp_connection_t *tc)
Estimate send space using proportional rate reduction (RFC6937)
Definition: tcp_output.c:1634
tcp_options_t::tsecr
u32 tsecr
Echoed/reflected time stamp.
Definition: tcp_packet.h:147
vnet_buffer2
#define vnet_buffer2(b)
Definition: buffer.h:505
tcp_options_t::mss
u16 mss
Maximum segment size advertised.
Definition: tcp_packet.h:148
tcp_options_t::n_sack_blocks
u8 n_sack_blocks
Number of SACKs blocks.
Definition: tcp_packet.h:151
tcp_reuse_buffer
static void * tcp_reuse_buffer(vlib_main_t *vm, vlib_buffer_t *b)
Definition: tcp_output.c:338
transport_connection_t
struct _transport_connection transport_connection_t
transport_connection_tx_pacer_update_bytes
void transport_connection_tx_pacer_update_bytes(transport_connection_t *tc, u32 bytes)
Definition: transport.c:777
TCP_TO_TIMER_TICK
#define TCP_TO_TIMER_TICK
Factor for converting ticks to timer ticks.
Definition: tcp_types.h:82
ip6_next_header
static void * ip6_next_header(ip6_header_t *i)
Definition: ip6_packet.h:407
vlib_frame_t
Definition: node.h:372
ip_calculate_l4_checksum
static u16 ip_calculate_l4_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip_csum_t sum0, u32 payload_length, u8 *iph, u32 ip_header_size, u8 *l4h)
Definition: ip.h:184
vlib_buffer_length_in_chain
static uword vlib_buffer_length_in_chain(vlib_main_t *vm, vlib_buffer_t *b)
Get length in bytes of the buffer chain.
Definition: buffer_funcs.h:433
tcp_reset_next_t
enum _tcp_reset_next tcp_reset_next_t
clib_memcpy_fast
static_always_inline void * clib_memcpy_fast(void *restrict dst, const void *restrict src, size_t n)
Definition: string.h:92
tcp46_output_trace_frame
static void tcp46_output_trace_frame(vlib_main_t *vm, vlib_node_runtime_t *node, u32 *to_next, u32 n_bufs)
Definition: tcp_output.c:2059
ip4_header_t
Definition: ip4_packet.h:87
tcp_error_strings
static char * tcp_error_strings[]
Definition: tcp_output.c:43
transport_connection_tx_pacer_burst
u32 transport_connection_tx_pacer_burst(transport_connection_t *tc)
Get tx pacer max burst.
Definition: transport.c:757
tcp_retransmit_first_unacked
int tcp_retransmit_first_unacked(tcp_worker_ctx_t *wrk, tcp_connection_t *tc)
Retransmit first unacked segment.
Definition: tcp_output.c:1576
foreach_tcp6_output_next
#define foreach_tcp6_output_next
Definition: tcp_output.c:37
tcp_main_t
struct _tcp_main tcp_main_t
TCP_FLAG_SYN
#define TCP_FLAG_SYN
Definition: fa_node.h:13
i32
signed int i32
Definition: types.h:77
tcp_make_ack
static void tcp_make_ack(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to ACK.
Definition: tcp_output.c:477
tcp_options_t
Definition: tcp_packet.h:143
tcp_options_t::flags
u8 flags
Option flags, see above.
Definition: tcp_packet.h:149
tcp_make_synack_options
static int tcp_make_synack_options(tcp_connection_t *tc, tcp_options_t *opts)
Definition: tcp_output.c:195
tcp_max_tx_deq
static u8 tcp_max_tx_deq(tcp_connection_t *tc)
Definition: tcp_output.c:1671
tcp_make_fin
static void tcp_make_fin(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to FIN-ACK.
Definition: tcp_output.c:488
tcp_state_t
enum _tcp_state tcp_state_t
tcp_update_rto
static void tcp_update_rto(tcp_connection_t *tc)
Definition: tcp_inlines.h:378
CLIB_PREFETCH
#define CLIB_PREFETCH(addr, size, type)
Definition: cache.h:76
tcp_timer_retransmit_handler
void tcp_timer_retransmit_handler(tcp_connection_t *tc)
Definition: tcp_output.c:1282
vlib_buffer_t::current_data
i16 current_data
signed offset in data[], pre_data[] that we are currently processing.
Definition: buffer.h:119
tcp_session_push_header
u32 tcp_session_push_header(transport_connection_t *tconn, vlib_buffer_t *b)
Definition: tcp_output.c:972
vlib_node_runtime_t::errors
vlib_error_t * errors
Vector of errors for this node.
Definition: node.h:460
tcp_update_burst_snd_vars
void tcp_update_burst_snd_vars(tcp_connection_t *tc)
Update burst send vars.
Definition: tcp_output.c:300
seq_leq
#define seq_leq(_s1, _s2)
Definition: tcp_packet.h:179
vec_len
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
Definition: vec_bootstrap.h:142
vlib_buffer_t::error
vlib_error_t error
Error code for buffers to be enqueued to error handler.
Definition: buffer.h:145
tcp_init_buffer
static void * tcp_init_buffer(vlib_main_t *vm, vlib_buffer_t *b)
Definition: tcp_output.c:355
math.h
tcp_send_syn
void tcp_send_syn(tcp_connection_t *tc)
Send SYN.
Definition: tcp_output.c:783
transport_connection_tx_pacer_reset_bucket
void transport_connection_tx_pacer_reset_bucket(transport_connection_t *tc, u32 bucket)
Reset tx pacer bucket.
Definition: transport.c:732
len
u8 len
Definition: ip_types.api:103
foreach_tcp6_reset_next
#define foreach_tcp6_reset_next
Definition: tcp_output.c:2334
tcp_program_cleanup
void tcp_program_cleanup(tcp_worker_ctx_t *wrk, tcp_connection_t *tc)
Definition: tcp.c:336
tcp_main
tcp_main_t tcp_main
Definition: tcp.c:28
VLIB_NODE_FN
#define VLIB_NODE_FN(node)
Definition: node.h:202
tcp_cc_event
static void tcp_cc_event(tcp_connection_t *tc, tcp_cc_event_t evt)
Definition: tcp_cc.h:61
tcp_opts_wscale
#define tcp_opts_wscale(_to)
Definition: tcp_packet.h:157
TCP_OUTPUT_NEXT_IP_ARP
@ TCP_OUTPUT_NEXT_IP_ARP
Definition: tcp_output.c:27
ip_adjacency_t_::lookup_next_index
ip_lookup_next_t lookup_next_index
Next hop after ip4-lookup.
Definition: adj.h:337
vlib_buffer_alloc
static __clib_warn_unused_result u32 vlib_buffer_alloc(vlib_main_t *vm, u32 *buffers, u32 n_buffers)
Allocate buffers into supplied array.
Definition: buffer_funcs.h:702
tcp_validate_txf_size
#define tcp_validate_txf_size(_tc, _a)
Definition: tcp.h:347
CLIB_UNUSED
#define CLIB_UNUSED(x)
Definition: clib.h:90
tcp_make_ack_i
static void tcp_make_ack_i(tcp_connection_t *tc, vlib_buffer_t *b, tcp_state_t state, u8 flags)
Prepare ACK.
Definition: tcp_output.c:441
vnet_buffer
#define vnet_buffer(b)
Definition: buffer.h:441
tcp_timer_persist_handler
void tcp_timer_persist_handler(tcp_connection_t *tc)
Got 0 snd_wnd from peer, try to do something about it.
Definition: tcp_output.c:1493
tcp_output_handle_packet
static void tcp_output_handle_packet(tcp_connection_t *tc0, vlib_buffer_t *b0, vlib_node_runtime_t *error_node, u16 *next0, u8 is_ip4)
Definition: tcp_output.c:2123
tcp_in_recovery
#define tcp_in_recovery(tc)
Definition: tcp_types.h:416
VLIB_NODE_FLAG_TRACE
#define VLIB_NODE_FLAG_TRACE
Definition: node.h:291
tcp_timer_is_active
static u8 tcp_timer_is_active(tcp_connection_t *tc, tcp_timers_e timer)
Definition: tcp_timer.h:110
offset
struct clib_bihash_value offset
template key/value backing page structure
tcp_send_ack
void tcp_send_ack(tcp_connection_t *tc)
Definition: tcp_output.c:999
tcp_recovery_on
#define tcp_recovery_on(tc)
Definition: tcp_types.h:413
sack_scoreboard_t
struct _sack_scoreboard sack_scoreboard_t
PREDICT_FALSE
#define PREDICT_FALSE(x)
Definition: clib.h:124
tcp46_output_inline
static uword tcp46_output_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame, int is_ip4)
Definition: tcp_output.c:2159
ARRAY_LEN
#define ARRAY_LEN(x)
Definition: clib.h:70
vlib_get_buffer_index
static u32 vlib_get_buffer_index(vlib_main_t *vm, void *p)
Translate buffer pointer into buffer index.
Definition: buffer_funcs.h:324
vlib_frame_vector_args
static void * vlib_frame_vector_args(vlib_frame_t *f)
Get pointer to frame vector data.
Definition: node_funcs.h:301
ip4_tcp_udp_compute_checksum
u16 ip4_tcp_udp_compute_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip4_header_t *ip0)
Definition: pnat_test_stubs.h:59
format_tcp_connection_id
format_function_t format_tcp_connection_id
Definition: tcp.h:345
vec_validate_aligned
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
Definition: vec.h:534
TCP_OPTS_ALIGN
#define TCP_OPTS_ALIGN
Definition: tcp_packet.h:173
tcp4_output_node
vlib_node_registration_t tcp4_output_node
(constructor) VLIB_REGISTER_NODE (tcp4_output_node)
Definition: tcp_output.c:2284
tcp_make_syn_options
static int tcp_make_syn_options(tcp_connection_t *tc, tcp_options_t *opts)
Definition: tcp_output.c:166
uword
u64 uword
Definition: types.h:112
tcp_node_index
#define tcp_node_index(node_id, is_ip4)
Definition: tcp.h:273
if
if(node->flags &VLIB_NODE_FLAG_TRACE) vnet_interface_output_trace(vm
scoreboard_rescue_rxt_valid
#define scoreboard_rescue_rxt_valid(_sb, _tc)
Definition: tcp_output.c:1677
IP_LOOKUP_NEXT_REWRITE
@ IP_LOOKUP_NEXT_REWRITE
This packet is to be rewritten and forwarded to the next processing node.
Definition: adj.h:73
clib_mem_unaligned
#define clib_mem_unaligned(pointer, type)
Definition: types.h:155
transport_max_tx_dequeue
static u32 transport_max_tx_dequeue(transport_connection_t *tc)
Definition: session.h:544
tcp_compute_checksum
static u16 tcp_compute_checksum(tcp_connection_t *tc, vlib_buffer_t *b)
Definition: tcp_output.c:415
vlib_main_t::thread_index
u32 thread_index
Definition: main.h:215
tcp_connection_set_state
static void tcp_connection_set_state(tcp_connection_t *tc, tcp_state_t state)
Definition: tcp_inlines.h:51
vlib_node_increment_counter
static void vlib_node_increment_counter(vlib_main_t *vm, u32 node_index, u32 counter_index, u64 increment)
Definition: node_funcs.h:1244
tcp_is_syn
#define tcp_is_syn(_th)
Definition: tcp_packet.h:89
scoreboard_last_hole
static sack_scoreboard_hole_t * scoreboard_last_hole(sack_scoreboard_t *sb)
Definition: tcp_sack.h:67
tcp_connection_timers_reset
void tcp_connection_timers_reset(tcp_connection_t *tc)
Stop all connection timers.
Definition: tcp.c:517
tcp6_reset_node
vlib_node_registration_t tcp6_reset_node
(constructor) VLIB_REGISTER_NODE (tcp6_reset_node)
Definition: tcp_output.c:2427
tcp_bt_track_tx
void tcp_bt_track_tx(tcp_connection_t *tc, u32 len)
Track a tcp tx burst.
Definition: tcp_bt.c:301
src_port
vl_api_ip_port_and_mask_t src_port
Definition: flow_types.api:91
tcp_retransmit_timer_update
static void tcp_retransmit_timer_update(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
Definition: tcp_timer.h:96
f64
double f64
Definition: types.h:142
tcp_tx_trace_t::tcp_header
tcp_header_t tcp_header
Definition: tcp_output.c:51
ip6_header_t::dst_address
ip6_address_t dst_address
Definition: ip6_packet.h:310
foreach_tcp4_reset_next
#define foreach_tcp4_reset_next
Definition: tcp_output.c:2330
tcp_make_options
static int tcp_make_options(tcp_connection_t *tc, tcp_options_t *opts, tcp_state_t state)
Definition: tcp_output.c:265
vlib_buffer_push_tcp_net_order
static void * vlib_buffer_push_tcp_net_order(vlib_buffer_t *b, u16 sp, u16 dp, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
Definition: tcp_inlines.h:406
tcp_opts_sack_permitted
#define tcp_opts_sack_permitted(_to)
Definition: tcp_packet.h:159
tcp_tx_trace_t
Definition: tcp_output.c:49
tcp_options_write
static u32 tcp_options_write(u8 *data, tcp_options_t *opts)
Write TCP options to segment.
Definition: tcp_packet.h:309
src
vl_api_address_t src
Definition: gre.api:54
TCP_EVT
#define TCP_EVT(_evt, _args...)
Definition: tcp_debug.h:145
tcp_send_reset_w_pkt
void tcp_send_reset_w_pkt(tcp_connection_t *tc, vlib_buffer_t *pkt, u32 thread_index, u8 is_ip4)
Send reset without reusing existing buffer.
Definition: tcp_output.c:656
ip4_address_t
Definition: ip4_packet.h:50
ip_adjacency_t_
IP unicast adjacency.
Definition: adj.h:235
clib_min
#define clib_min(x, y)
Definition: clib.h:342
CLIB_CACHE_LINE_BYTES
#define CLIB_CACHE_LINE_BYTES
Definition: cache.h:58
tcp_tstamp
static u32 tcp_tstamp(tcp_connection_t *tc)
Generate timestamp for tcp connection.
Definition: tcp_inlines.h:198
vlib_node_registration_t
struct _vlib_node_registration vlib_node_registration_t
ip6_inlines.h
tcp_cc_loss
static void tcp_cc_loss(tcp_connection_t *tc)
Definition: tcp_cc.h:42
ip4_header_t::dst_address
ip4_address_t dst_address
Definition: ip4_packet.h:125
vlib_buffer_t::current_length
u16 current_length
Nbytes between current data and the end of this buffer.
Definition: buffer.h:122
tcp_is_fin
#define tcp_is_fin(_th)
Definition: tcp_packet.h:90
TRANSPORT_MAX_HDRS_LEN
#define TRANSPORT_MAX_HDRS_LEN
Definition: transport_types.h:25
TCP_MAX_WND_SCALE
#define TCP_MAX_WND_SCALE
Definition: tcp_packet.h:172
TCP_RTO_SYN_RETRIES
#define TCP_RTO_SYN_RETRIES
Definition: tcp_types.h:88
TCP_RXT_MAX_BURST
#define TCP_RXT_MAX_BURST
Definition: tcp_types.h:35
tcp_time_now_us
static f64 tcp_time_now_us(u32 thread_index)
Definition: tcp_inlines.h:205
TCP_RTO_BOFF_MAX
#define TCP_RTO_BOFF_MAX
Definition: tcp_types.h:90
transport_send_params_
Definition: transport.h:45
data
u8 data[128]
Definition: ipsec_types.api:95
tcp_buffer_hdr
static tcp_header_t * tcp_buffer_hdr(vlib_buffer_t *b)
Definition: tcp_inlines.h:22
transport_rx_fifo_req_deq_ntf
static void transport_rx_fifo_req_deq_ntf(transport_connection_t *tc)
Definition: session.h:579
tcp_send_window_update_ack
void tcp_send_window_update_ack(tcp_connection_t *tc)
Send window update ack.
Definition: tcp_output.c:1058
vlib_validate_buffer_enqueue_x1
#define vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, bi0, next0)
Finish enqueueing one buffer forward in the graph.
Definition: buffer_node.h:224
always_inline
#define always_inline
Definition: rdma_mlx5dv.h:23
clib_bihash_value
template key/value backing page structure
Definition: bihash_doc.h:44
space
description No buffer space
Definition: ikev2.api:563
tcp_transmit_unsent
static int tcp_transmit_unsent(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Definition: tcp_output.c:1595
sack_scoreboard_hole_t
struct _sack_scoreboard_hole sack_scoreboard_hole_t
ip4_header_t::src_address
ip4_address_t src_address
Definition: ip4_packet.h:125
tcp_update_rcv_wnd
static void tcp_update_rcv_wnd(tcp_connection_t *tc)
Definition: tcp_output.c:116
tcp_enqueue_half_open
static void tcp_enqueue_half_open(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, vlib_buffer_t *b, u32 bi)
Definition: tcp_output.c:545
scoreboard_next_rxt_hole
sack_scoreboard_hole_t * scoreboard_next_rxt_hole(sack_scoreboard_t *sb, sack_scoreboard_hole_t *start, u8 have_unsent, u8 *can_rescue, u8 *snd_limited)
Figure out the next hole to retransmit.
Definition: tcp_sack.c:194
tcp_bt_track_rxt
void tcp_bt_track_rxt(tcp_connection_t *tc, u32 start, u32 end)
Track a tcp retransmission.
Definition: tcp_bt.c:338
TCP_OPTION_LEN_TIMESTAMP
#define TCP_OPTION_LEN_TIMESTAMP
Definition: tcp_packet.h:167
tcp_program_ack
void tcp_program_ack(tcp_connection_t *tc)
Definition: tcp_output.c:1019
session_tx_fifo_peek_bytes
int session_tx_fifo_peek_bytes(transport_connection_t *tc, u8 *buffer, u32 offset, u32 max_bytes)
Definition: session.c:684
VLIB_NODE_PROTO_HINT_TCP
@ VLIB_NODE_PROTO_HINT_TCP
Definition: node.h:64
tcp_connection_tx_pacer_reset
void tcp_connection_tx_pacer_reset(tcp_connection_t *tc, u32 window, u32 start_bucket)
Definition: tcp.c:1380
format
description fragment has unexpected format
Definition: map.api:433
ASSERT
#define ASSERT(truth)
Definition: error_bootstrap.h:69
tcp_program_dupack
void tcp_program_dupack(tcp_connection_t *tc)
Definition: tcp_output.c:1029
TCP_OPTION_LEN_SACK_PERMITTED
#define TCP_OPTION_LEN_SACK_PERMITTED
Definition: tcp_packet.h:166
tcp_in_fastrecovery
#define tcp_in_fastrecovery(tc)
Definition: tcp_types.h:415
format_get_indent
static u32 format_get_indent(u8 *s)
Definition: format.h:72
data_len
u8 data_len
Definition: ikev2_types.api:24
seq_lt
#define seq_lt(_s1, _s2)
Definition: tcp_packet.h:178
vlib_put_next_frame
vlib_put_next_frame(vm, node, next_index, 0)
tcp_window_compute_scale
static u8 tcp_window_compute_scale(u32 window)
Definition: tcp_output.c:73
ip_csum_with_carry
static ip_csum_t ip_csum_with_carry(ip_csum_t sum, ip_csum_t x)
Definition: ip_packet.h:248
u32
unsigned int u32
Definition: types.h:88
vlib_buffer_make_headroom
static void * vlib_buffer_make_headroom(vlib_buffer_t *b, u8 size)
Make head room, typically for packet headers.
Definition: buffer.h:378
tcp_make_established_options
static int tcp_make_established_options(tcp_connection_t *tc, tcp_options_t *opts)
Definition: tcp_output.c:230
tcp.h
tcp_send_acks
static int tcp_send_acks(tcp_connection_t *tc, u32 max_burst_size)
Definition: tcp_output.c:1920
foreach_tcp4_output_next
#define foreach_tcp4_output_next
Definition: tcp_output.c:31
tcp_zero_rwnd_sent
#define tcp_zero_rwnd_sent(tc)
Definition: tcp_types.h:437
scoreboard_init_rxt
void scoreboard_init_rxt(sack_scoreboard_t *sb, u32 snd_una)
Definition: tcp_sack.c:254
n_bytes
u32 n_bytes
Definition: interface_output.c:421
tcp_send_fin
void tcp_send_fin(tcp_connection_t *tc)
Send FIN.
Definition: tcp_output.c:849
tcp_connection_get
static tcp_connection_t * tcp_connection_get(u32 conn_index, u32 thread_index)
Definition: tcp_inlines.h:30
FIB_PROTOCOL_IP6
@ FIB_PROTOCOL_IP6
Definition: fib_types.h:37
tcp_push_hdr_i
static void tcp_push_hdr_i(tcp_connection_t *tc, vlib_buffer_t *b, u32 snd_nxt, u8 compute_opts, u8 maybe_burst, u8 update_snd_nxt)
Push TCP header and update connection variables.
Definition: tcp_output.c:899
dst
vl_api_ip4_address_t dst
Definition: pnat.api:41
session_worker_::vm
vlib_main_t * vm
Convenience pointer to this worker's vlib_main.
Definition: session.h:104
scoreboard_clear_reneging
void scoreboard_clear_reneging(sack_scoreboard_t *sb, u32 start, u32 end)
Definition: tcp_sack.c:297
tcp_zero_rwnd_sent_off
#define tcp_zero_rwnd_sent_off(tc)
Definition: tcp_types.h:439
tcp_in_cong_recovery
#define tcp_in_cong_recovery(tc)
Definition: tcp_types.h:425
TCP_N_ERROR
@ TCP_N_ERROR
Definition: tcp.h:40
tcp_retransmit_no_sack
static int tcp_retransmit_no_sack(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Fast retransmit without SACK info.
Definition: tcp_output.c:1839
tcp_fastrecovery_first
#define tcp_fastrecovery_first(tc)
Definition: tcp_types.h:421
pool_elts
static uword pool_elts(void *v)
Number of active elements in a pool.
Definition: pool.h:127
tcp_retransmit_should_retry_head
static u8 tcp_retransmit_should_retry_head(tcp_connection_t *tc, sack_scoreboard_t *sb)
Definition: tcp_output.c:1658
tcp_send_synack
void tcp_send_synack(tcp_connection_t *tc)
Definition: tcp_output.c:819
ip6_tcp_compute_checksum_custom
u16 ip6_tcp_compute_checksum_custom(vlib_main_t *vm, vlib_buffer_t *p0, ip46_address_t *src, ip46_address_t *dst)
Definition: tcp_output.c:369
tcp_window_to_advertise
static u32 tcp_window_to_advertise(tcp_connection_t *tc, tcp_state_t state)
Compute and return window to advertise, scaled as per RFC1323.
Definition: tcp_output.c:156
TRANSPORT_PACER_MIN_BURST
#define TRANSPORT_PACER_MIN_BURST
Definition: transport.h:23
ip6_header_t
Definition: ip6_packet.h:294
round_down_pow2
static uword round_down_pow2(uword x, uword pow2)
Definition: clib.h:273
tcp_program_retransmit
void tcp_program_retransmit(tcp_connection_t *tc)
Definition: tcp_output.c:1041
TCP_RESET_NEXT_IP_LOOKUP
@ TCP_RESET_NEXT_IP_LOOKUP
Definition: tcp_output.c:2326
tcp_update_time_now
static void tcp_update_time_now(tcp_worker_ctx_t *wrk)
Definition: tcp_inlines.h:221
tcp_retransmit_sack
static int tcp_retransmit_sack(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 burst_size)
Do retransmit with SACKs.
Definition: tcp_output.c:1685
adj_index_t
u32 adj_index_t
An index for adjacencies.
Definition: adj_types.h:30
ip6_header_t::src_address
ip6_address_t src_address
Definition: ip6_packet.h:310
adj_nbr_find
adj_index_t adj_nbr_find(fib_protocol_t nh_proto, vnet_link_t link_type, const ip46_address_t *nh_addr, u32 sw_if_index)
Lookup neighbor adjancency.
Definition: adj_nbr.c:109
clib_memset
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
vlib_main_t
Definition: main.h:102
tcp_prepare_retransmit_segment
static u32 tcp_prepare_retransmit_segment(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 offset, u32 max_deq_bytes, vlib_buffer_t **b)
Build a retransmit segment.
Definition: tcp_output.c:1200
ip4_header_t::ip_version_and_header_length
u8 ip_version_and_header_length
Definition: ip4_packet.h:93
vlib_node_t
Definition: node.h:247
vlib_add_trace
void * vlib_add_trace(vlib_main_t *vm, vlib_node_runtime_t *r, vlib_buffer_t *b, u32 n_data_bytes)
Definition: trace.c:628
transport_send_params_::max_burst_size
u32 max_burst_size
Definition: transport.h:59
TCP_FLAG_ACK
#define TCP_FLAG_ACK
Definition: fa_node.h:16
TCP_WND_MAX
#define TCP_WND_MAX
Definition: tcp_packet.h:171
tcp_bt_check_app_limited
void tcp_bt_check_app_limited(tcp_connection_t *tc)
Check if sample to be generated is app limited.
Definition: tcp_bt.c:286
tcp_flight_size
static u32 tcp_flight_size(const tcp_connection_t *tc)
Our estimate of the number of bytes in flight (pipe size)
Definition: tcp_inlines.h:89
b
vlib_buffer_t ** b
Definition: nat44_ei_out2in.c:717
u8
unsigned char u8
Definition: types.h:56
transport_max_rx_enqueue
static u32 transport_max_rx_enqueue(transport_connection_t *tc)
Definition: session.h:537
VNET_LINK_IP6
@ VNET_LINK_IP6
Definition: interface.h:348
vlib_buffer_get_current
static void * vlib_buffer_get_current(vlib_buffer_t *b)
Get pointer to current data to process.
Definition: buffer.h:257
scoreboard_first_hole
static sack_scoreboard_hole_t * scoreboard_first_hole(sack_scoreboard_t *sb)
Definition: tcp_sack.h:59
ip_csum_t
uword ip_csum_t
Definition: ip_packet.h:245
i
int i
Definition: flowhash_template.h:376
tcp_cfg
#define tcp_cfg
Definition: tcp.h:272
clib_warning
#define clib_warning(format, args...)
Definition: error.h:59
tcp_fastrecovery_first_off
#define tcp_fastrecovery_first_off(tc)
Definition: tcp_types.h:423
TCP_OUTPUT_NEXT_IP_LOOKUP
@ TCP_OUTPUT_NEXT_IP_LOOKUP
Definition: tcp_output.c:25
nexts
u16 nexts[VLIB_FRAME_SIZE]
Definition: nat44_ei_out2in.c:718
tcp_prepare_segment
static int tcp_prepare_segment(tcp_worker_ctx_t *wrk, tcp_connection_t *tc, u32 offset, u32 max_deq_bytes, vlib_buffer_t **b)
Allocate a new buffer and build a new tcp segment.
Definition: tcp_output.c:1084
tcp_error.def
tcp_tx_trace_t::tcp_connection
tcp_connection_t tcp_connection
Definition: tcp_output.c:52
tcp_output_push_ip
static void tcp_output_push_ip(vlib_main_t *vm, vlib_buffer_t *b0, tcp_connection_t *tc0, u8 is_ip4)
Definition: tcp_output.c:2083
vlib_buffer_free_one
static void vlib_buffer_free_one(vlib_main_t *vm, u32 buffer_index)
Free one buffer Shorthand to free a single buffer chain.
Definition: buffer_funcs.h:1012
ip4_tcp_compute_checksum_custom
u16 ip4_tcp_compute_checksum_custom(vlib_main_t *vm, vlib_buffer_t *p0, ip46_address_t *src, ip46_address_t *dst)
Definition: tcp_output.c:395
TCP_ESTABLISH_TIME
#define TCP_ESTABLISH_TIME
Definition: tcp_types.h:91
tcp_make_reset_in_place
static int tcp_make_reset_in_place(vlib_main_t *vm, vlib_buffer_t *b, u8 is_ip4)
Definition: tcp_output.c:574
TCP_OPTION_LEN_MSS
#define TCP_OPTION_LEN_MSS
Definition: tcp_packet.h:164
tcp_do_retransmit
static int tcp_do_retransmit(tcp_connection_t *tc, u32 max_burst_size)
Definition: tcp_output.c:1975
vlib_node_runtime_t
Definition: node.h:454
tcp_persist_timer_set
static void tcp_persist_timer_set(tcp_timer_wheel_t *tw, tcp_connection_t *tc)
Definition: tcp_timer.h:69
tcp_worker_ctx_
Definition: tcp.h:75
vlib_buffer_push_tcp
static void * vlib_buffer_push_tcp(vlib_buffer_t *b, u16 sp_net, u16 dp_net, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
Definition: tcp_inlines.h:443
from
from
Definition: nat44_ei_hairpinning.c:415
PREDICT_TRUE
#define PREDICT_TRUE(x)
Definition: clib.h:125
tcp_check_if_gso
static void tcp_check_if_gso(tcp_connection_t *tc, vlib_buffer_t *b)
Definition: tcp_output.c:2099
vlib_buffer_t::total_length_not_including_first_buffer
u32 total_length_not_including_first_buffer
Only valid for first buffer in chain.
Definition: buffer.h:176
vlib_get_next_frame
#define vlib_get_next_frame(vm, node, next_index, vectors, n_vectors_left)
Get pointer to next frame vector data by (vlib_node_runtime_t, next_index).
Definition: node_funcs.h:395
TCP_OUTPUT_NEXT_DROP
@ TCP_OUTPUT_NEXT_DROP
Definition: tcp_output.c:24
TCP_CC_EVT_START_TX
@ TCP_CC_EVT_START_TX
Definition: tcp_types.h:264
tcp_worker_stats_inc
#define tcp_worker_stats_inc(_wrk, _stat, _val)
Definition: tcp.h:128
VLIB_TX
@ VLIB_TX
Definition: defs.h:47
TCP_OPTION_LEN_SACK_BLOCK
#define TCP_OPTION_LEN_SACK_BLOCK
Definition: tcp_packet.h:168
adj_get
static ip_adjacency_t * adj_get(adj_index_t adj_index)
Get a pointer to an adjacency object from its index.
Definition: adj.h:470
n_left_from
n_left_from
Definition: nat44_ei_hairpinning.c:416
tcp_initial_window_to_advertise
u32 tcp_initial_window_to_advertise(tcp_connection_t *tc)
Compute initial window and scale factor.
Definition: tcp_output.c:104
ip6_header_t::ip_version_traffic_class_and_flow_label
u32 ip_version_traffic_class_and_flow_label
Definition: ip6_packet.h:297
tcp_output_handle_link_local
static void tcp_output_handle_link_local(tcp_connection_t *tc0, vlib_buffer_t *b0, u16 *next0, u32 *error0)
Definition: tcp_output.c:2027
TCP_OUTPUT_NEXT_IP_REWRITE
@ TCP_OUTPUT_NEXT_IP_REWRITE
Definition: tcp_output.c:26
vlib_buffer_push_ip6_custom
static void * vlib_buffer_push_ip6_custom(vlib_main_t *vm, vlib_buffer_t *b, ip6_address_t *src, ip6_address_t *dst, int proto, u32 flow_label)
Push IPv6 header to buffer.
Definition: ip6_inlines.h:212
tcp_is_descheduled
static u8 tcp_is_descheduled(tcp_connection_t *tc)
Definition: tcp_inlines.h:385
format_white_space
u8 * format_white_space(u8 *s, va_list *va)
Definition: std-formats.c:129
ip4_next_header
static void * ip4_next_header(ip4_header_t *i)
Definition: ip4_packet.h:196
tcp_initial_wnd_unscaled
static u32 tcp_initial_wnd_unscaled(tcp_connection_t *tc)
TCP's initial window.
Definition: tcp_output.c:85
session_add_self_custom_tx_evt
void session_add_self_custom_tx_evt(transport_connection_t *tc, u8 has_prio)
Definition: session.c:128
vlib_buffer_t::flags
u32 flags
buffer flags: VLIB_BUFFER_FREE_LIST_INDEX_MASK: bits used to store free list index,...
Definition: buffer.h:133
TCP_OUTPUT_N_NEXT
@ TCP_OUTPUT_N_NEXT
Definition: tcp_output.c:28
session_stream_connect_notify
int session_stream_connect_notify(transport_connection_t *tc, session_error_t err)
Definition: session.c:888
vlib_buffer_t
VLIB buffer representation.
Definition: buffer.h:111
VLIB_REGISTER_NODE
#define VLIB_REGISTER_NODE(x,...)
Definition: node.h:169
tcp_half_open_connection_cleanup
int tcp_half_open_connection_cleanup(tcp_connection_t *tc)
Try to cleanup half-open connection.
Definition: tcp.c:212
tcp_available_cc_snd_space
static u32 tcp_available_cc_snd_space(const tcp_connection_t *tc)
Estimate of how many bytes we can still push into the network.
Definition: tcp_inlines.h:166
flags
vl_api_wireguard_peer_flags_t flags
Definition: wireguard.api:105