FD.io VPP  v17.04.2-2-ga8f93f8
Vector Packet Processing
tcp_output.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <vnet/tcp/tcp.h>
17 #include <vnet/lisp-cp/packets.h>
18 
21 
22 typedef enum _tcp_output_nect
23 {
28 
29 #define foreach_tcp4_output_next \
30  _ (DROP, "error-drop") \
31  _ (IP_LOOKUP, "ip4-lookup")
32 
33 #define foreach_tcp6_output_next \
34  _ (DROP, "error-drop") \
35  _ (IP_LOOKUP, "ip6-lookup")
36 
37 static char *tcp_error_strings[] = {
38 #define tcp_error(n,s) s,
39 #include <vnet/tcp/tcp_error.def>
40 #undef tcp_error
41 };
42 
43 typedef struct
44 {
49 
50 u16 dummy_mtu = 400;
51 
52 u8 *
53 format_tcp_tx_trace (u8 * s, va_list * args)
54 {
55  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
56  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
57 
58  s = format (s, "TBD\n");
59 
60  return s;
61 }
62 
63 void
65 {
66  u16 snd_mss;
67 
68  /* TODO find our iface MTU */
69  snd_mss = dummy_mtu;
70 
71  /* TODO cache mss and consider PMTU discovery */
72  snd_mss = tc->opt.mss < snd_mss ? tc->opt.mss : snd_mss;
73 
74  tc->snd_mss = snd_mss;
75 
76  if (tc->snd_mss == 0)
77  {
78  clib_warning ("snd mss is 0");
79  tc->snd_mss = dummy_mtu;
80  }
81 }
82 
83 static u8
84 tcp_window_compute_scale (u32 available_space)
85 {
86  u8 wnd_scale = 0;
87  while (wnd_scale < TCP_MAX_WND_SCALE
88  && (available_space >> wnd_scale) > TCP_WND_MAX)
89  wnd_scale++;
90  return wnd_scale;
91 }
92 
93 /**
94  * TCP's IW as recommended by RFC6928
95  */
98 {
100 }
101 
102 /**
103  * Compute initial window and scale factor. As per RFC1323, window field in
104  * SYN and SYN-ACK segments is never scaled.
105  */
106 u32
108 {
109  u32 max_fifo;
110 
111  /* Initial wnd for SYN. Fifos are not allocated yet.
112  * Use some predefined value. For SYN-ACK we still want the
113  * scale to be computed in the same way */
114  max_fifo = TCP_MAX_RX_FIFO_SIZE;
115 
116  tc->rcv_wscale = tcp_window_compute_scale (max_fifo);
117  tc->rcv_wnd = tcp_initial_wnd_unscaled (tc);
118 
119  return clib_min (tc->rcv_wnd, TCP_WND_MAX);
120 }
121 
122 /**
123  * Compute and return window to advertise, scaled as per RFC1323
124  */
125 u32
127 {
128  u32 available_space, max_fifo, observed_wnd;
129 
130  if (state < TCP_STATE_ESTABLISHED)
132 
133  /*
134  * Figure out how much space we have available
135  */
136  available_space = stream_session_max_enqueue (&tc->connection);
137  max_fifo = stream_session_fifo_size (&tc->connection);
138 
139  ASSERT (tc->opt.mss < max_fifo);
140 
141  if (available_space < tc->opt.mss && available_space < max_fifo / 8)
142  available_space = 0;
143 
144  /*
145  * Use the above and what we know about what we've previously advertised
146  * to compute the new window
147  */
148  observed_wnd = tc->rcv_wnd - (tc->rcv_nxt - tc->rcv_las);
149 
150  /* Bad. Thou shalt not shrink */
151  if (available_space < observed_wnd)
152  {
153  if (available_space == 0)
154  clib_warning ("Didn't shrink rcv window despite not having space");
155  }
156 
157  tc->rcv_wnd = clib_min (available_space, TCP_WND_MAX << tc->rcv_wscale);
158 
159  if (tc->rcv_wnd == 0)
160  {
161  tc->flags |= TCP_CONN_SENT_RCV_WND0;
162  }
163 
164  return tc->rcv_wnd >> tc->rcv_wscale;
165 }
166 
167 /**
168  * Write TCP options to segment.
169  */
170 u32
172 {
173  u32 opts_len = 0;
174  u32 buf, seq_len = 4;
175 
176  if (tcp_opts_mss (opts))
177  {
178  *data++ = TCP_OPTION_MSS;
179  *data++ = TCP_OPTION_LEN_MSS;
180  buf = clib_host_to_net_u16 (opts->mss);
181  clib_memcpy (data, &buf, sizeof (opts->mss));
182  data += sizeof (opts->mss);
183  opts_len += TCP_OPTION_LEN_MSS;
184  }
185 
186  if (tcp_opts_wscale (opts))
187  {
188  *data++ = TCP_OPTION_WINDOW_SCALE;
189  *data++ = TCP_OPTION_LEN_WINDOW_SCALE;
190  *data++ = opts->wscale;
191  opts_len += TCP_OPTION_LEN_WINDOW_SCALE;
192  }
193 
194  if (tcp_opts_sack_permitted (opts))
195  {
196  *data++ = TCP_OPTION_SACK_PERMITTED;
198  opts_len += TCP_OPTION_LEN_SACK_PERMITTED;
199  }
200 
201  if (tcp_opts_tstamp (opts))
202  {
203  *data++ = TCP_OPTION_TIMESTAMP;
204  *data++ = TCP_OPTION_LEN_TIMESTAMP;
205  buf = clib_host_to_net_u32 (opts->tsval);
206  clib_memcpy (data, &buf, sizeof (opts->tsval));
207  data += sizeof (opts->tsval);
208  buf = clib_host_to_net_u32 (opts->tsecr);
209  clib_memcpy (data, &buf, sizeof (opts->tsecr));
210  data += sizeof (opts->tsecr);
211  opts_len += TCP_OPTION_LEN_TIMESTAMP;
212  }
213 
214  if (tcp_opts_sack (opts))
215  {
216  int i;
217  u32 n_sack_blocks = clib_min (vec_len (opts->sacks),
219 
220  if (n_sack_blocks != 0)
221  {
222  *data++ = TCP_OPTION_SACK_BLOCK;
223  *data++ = 2 + n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
224  for (i = 0; i < n_sack_blocks; i++)
225  {
226  buf = clib_host_to_net_u32 (opts->sacks[i].start);
227  clib_memcpy (data, &buf, seq_len);
228  data += seq_len;
229  buf = clib_host_to_net_u32 (opts->sacks[i].end);
230  clib_memcpy (data, &buf, seq_len);
231  data += seq_len;
232  }
233  opts_len += 2 + n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
234  }
235  }
236 
237  /* Terminate TCP options */
238  if (opts_len % 4)
239  {
240  *data++ = TCP_OPTION_EOL;
241  opts_len += TCP_OPTION_LEN_EOL;
242  }
243 
244  /* Pad with zeroes to a u32 boundary */
245  while (opts_len % 4)
246  {
247  *data++ = TCP_OPTION_NOOP;
248  opts_len += TCP_OPTION_LEN_NOOP;
249  }
250  return opts_len;
251 }
252 
253 always_inline int
255 {
256  u8 len = 0;
257 
258  opts->flags |= TCP_OPTS_FLAG_MSS;
259  opts->mss = dummy_mtu; /*XXX discover that */
260  len += TCP_OPTION_LEN_MSS;
261 
262  opts->flags |= TCP_OPTS_FLAG_WSCALE;
263  opts->wscale = wnd_scale;
265 
266  opts->flags |= TCP_OPTS_FLAG_TSTAMP;
267  opts->tsval = tcp_time_now ();
268  opts->tsecr = 0;
270 
271  opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
273 
274  /* Align to needed boundary */
275  len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
276  return len;
277 }
278 
279 always_inline int
281 {
282  u8 len = 0;
283 
284  opts->flags |= TCP_OPTS_FLAG_MSS;
285  opts->mss = dummy_mtu; /*XXX discover that */
286  len += TCP_OPTION_LEN_MSS;
287 
288  if (tcp_opts_wscale (&tc->opt))
289  {
290  opts->flags |= TCP_OPTS_FLAG_WSCALE;
291  opts->wscale = tc->rcv_wscale;
293  }
294 
295  if (tcp_opts_tstamp (&tc->opt))
296  {
297  opts->flags |= TCP_OPTS_FLAG_TSTAMP;
298  opts->tsval = tcp_time_now ();
299  opts->tsecr = tc->tsval_recent;
301  }
302 
303  if (tcp_opts_sack_permitted (&tc->opt))
304  {
305  opts->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
307  }
308 
309  /* Align to needed boundary */
310  len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
311  return len;
312 }
313 
314 always_inline int
316 {
317  u8 len = 0;
318 
319  opts->flags = 0;
320 
321  if (tcp_opts_tstamp (&tc->opt))
322  {
323  opts->flags |= TCP_OPTS_FLAG_TSTAMP;
324  opts->tsval = tcp_time_now ();
325  opts->tsecr = tc->tsval_recent;
327  }
328  if (tcp_opts_sack_permitted (&tc->opt))
329  {
330  if (vec_len (tc->snd_sacks))
331  {
332  opts->flags |= TCP_OPTS_FLAG_SACK;
333  opts->sacks = tc->snd_sacks;
334  opts->n_sack_blocks = vec_len (tc->snd_sacks);
335  len += 2 + TCP_OPTION_LEN_SACK_BLOCK * opts->n_sack_blocks;
336  }
337  }
338 
339  /* Align to needed boundary */
340  len += (TCP_OPTS_ALIGN - len % TCP_OPTS_ALIGN) % TCP_OPTS_ALIGN;
341  return len;
342 }
343 
344 always_inline int
347 {
348  switch (state)
349  {
350  case TCP_STATE_ESTABLISHED:
351  case TCP_STATE_FIN_WAIT_1:
352  return tcp_make_established_options (tc, opts);
353  case TCP_STATE_SYN_RCVD:
354  return tcp_make_synack_options (tc, opts);
355  case TCP_STATE_SYN_SENT:
356  return tcp_make_syn_options (opts, tc->rcv_wscale);
357  default:
358  clib_warning ("Not handled!");
359  return 0;
360  }
361 }
362 
363 #define tcp_get_free_buffer_index(tm, bidx) \
364 do { \
365  u32 *my_tx_buffers, n_free_buffers; \
366  u32 cpu_index = tm->vlib_main->cpu_index; \
367  my_tx_buffers = tm->tx_buffers[cpu_index]; \
368  if (PREDICT_FALSE(vec_len (my_tx_buffers) == 0)) \
369  { \
370  n_free_buffers = 32; /* TODO config or macro */ \
371  vec_validate (my_tx_buffers, n_free_buffers - 1); \
372  _vec_len(my_tx_buffers) = vlib_buffer_alloc_from_free_list ( \
373  tm->vlib_main, my_tx_buffers, n_free_buffers, \
374  VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); \
375  tm->tx_buffers[cpu_index] = my_tx_buffers; \
376  } \
377  /* buffer shortage */ \
378  if (PREDICT_FALSE (vec_len (my_tx_buffers) == 0)) \
379  return; \
380  *bidx = my_tx_buffers[_vec_len (my_tx_buffers)-1]; \
381  _vec_len (my_tx_buffers) -= 1; \
382 } while (0)
383 
384 always_inline void
386 {
387  vlib_buffer_t *it = b;
388  do
389  {
390  it->current_data = 0;
391  it->current_length = 0;
393  }
394  while ((it->flags & VLIB_BUFFER_NEXT_PRESENT)
395  && (it = vlib_get_buffer (vm, it->next_buffer)));
396 
397  /* Leave enough space for headers */
399  vnet_buffer (b)->tcp.flags = 0;
400 }
401 
402 /**
403  * Prepare ACK
404  */
405 void
407  u8 flags)
408 {
409  tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
410  u8 tcp_opts_len, tcp_hdr_opts_len;
411  tcp_header_t *th;
412  u16 wnd;
413 
414  wnd = tcp_window_to_advertise (tc, state);
415 
416  /* Make and write options */
417  tcp_opts_len = tcp_make_established_options (tc, snd_opts);
418  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
419 
420  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
421  tc->rcv_nxt, tcp_hdr_opts_len, flags, wnd);
422 
423  tcp_options_write ((u8 *) (th + 1), snd_opts);
424 
425  /* Mark as ACK */
426  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
427 }
428 
429 /**
430  * Convert buffer to ACK
431  */
432 void
434 {
435  tcp_main_t *tm = vnet_get_tcp_main ();
436  vlib_main_t *vm = tm->vlib_main;
437 
438  tcp_reuse_buffer (vm, b);
439  tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK);
440  vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK;
441 }
442 
443 /**
444  * Convert buffer to FIN-ACK
445  */
446 void
448 {
449  tcp_main_t *tm = vnet_get_tcp_main ();
450  vlib_main_t *vm = tm->vlib_main;
451  u8 flags = 0;
452 
453  tcp_reuse_buffer (vm, b);
454 
455  flags = TCP_FLAG_FIN | TCP_FLAG_ACK;
456  tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, flags);
457 
458  /* Reset flags, make sure ack is sent */
459  vnet_buffer (b)->tcp.flags &= ~TCP_BUF_FLAG_DUPACK;
460 
461  tc->snd_nxt += 1;
462 }
463 
464 /**
465  * Convert buffer to SYN-ACK
466  */
467 void
469 {
470  tcp_main_t *tm = vnet_get_tcp_main ();
471  vlib_main_t *vm = tm->vlib_main;
472  tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
473  u8 tcp_opts_len, tcp_hdr_opts_len;
474  tcp_header_t *th;
475  u16 initial_wnd;
476  u32 time_now;
477 
478  memset (snd_opts, 0, sizeof (*snd_opts));
479 
480  tcp_reuse_buffer (vm, b);
481 
482  /* Set random initial sequence */
483  time_now = tcp_time_now ();
484 
485  tc->iss = random_u32 (&time_now);
486  tc->snd_una = tc->iss;
487  tc->snd_nxt = tc->iss + 1;
488  tc->snd_una_max = tc->snd_nxt;
489 
490  initial_wnd = tcp_initial_window_to_advertise (tc);
491 
492  /* Make and write options */
493  tcp_opts_len = tcp_make_synack_options (tc, snd_opts);
494  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
495 
496  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
497  tc->rcv_nxt, tcp_hdr_opts_len,
498  TCP_FLAG_SYN | TCP_FLAG_ACK, initial_wnd);
499 
500  tcp_options_write ((u8 *) (th + 1), snd_opts);
501 
502  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
503  vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK;
504 
505  /* Init retransmit timer */
507 }
508 
509 always_inline void
511  u8 is_ip4)
512 {
513  u32 *to_next, next_index;
514  vlib_frame_t *f;
515 
517  b->error = 0;
518 
519  /* Default FIB for now */
520  vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
521 
522  /* Send to IP lookup */
523  next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
524  f = vlib_get_frame_to_node (vm, next_index);
525 
526  /* Enqueue the packet */
527  to_next = vlib_frame_vector_args (f);
528  to_next[0] = bi;
529  f->n_vectors = 1;
530  vlib_put_frame_to_node (vm, next_index, f);
531 }
532 
533 int
535  tcp_state_t state, u32 my_thread_index, u8 is_ip4)
536 {
537  u8 tcp_hdr_len = sizeof (tcp_header_t);
538  ip4_header_t *ih4;
539  ip6_header_t *ih6;
540  tcp_header_t *th0;
541  ip4_address_t src_ip40;
542  ip6_address_t src_ip60;
543  u16 src_port0;
544  u32 tmp;
545 
546  /* Find IP and TCP headers */
547  if (is_ip4)
548  {
549  ih4 = vlib_buffer_get_current (b0);
550  th0 = ip4_next_header (ih4);
551  }
552  else
553  {
554  ih6 = vlib_buffer_get_current (b0);
555  th0 = ip6_next_header (ih6);
556  }
557 
558  /* Swap src and dst ip */
559  if (is_ip4)
560  {
561  ASSERT ((ih4->ip_version_and_header_length & 0xF0) == 0x40);
562  src_ip40.as_u32 = ih4->src_address.as_u32;
563  ih4->src_address.as_u32 = ih4->dst_address.as_u32;
564  ih4->dst_address.as_u32 = src_ip40.as_u32;
565 
566  /* Chop the end of the pkt */
567  b0->current_length += ip4_header_bytes (ih4) + tcp_hdr_len;
568  }
569  else
570  {
571  ASSERT ((ih6->ip_version_traffic_class_and_flow_label & 0xF0) == 0x60);
572  clib_memcpy (&src_ip60, &ih6->src_address, sizeof (ip6_address_t));
573  clib_memcpy (&ih6->src_address, &ih6->dst_address,
574  sizeof (ip6_address_t));
575  clib_memcpy (&ih6->dst_address, &src_ip60, sizeof (ip6_address_t));
576 
577  /* Chop the end of the pkt */
578  b0->current_length += sizeof (ip6_header_t) + tcp_hdr_len;
579  }
580 
581  /* Try to determine what/why we're actually resetting and swap
582  * src and dst ports */
583  if (state == TCP_STATE_CLOSED)
584  {
585  if (!tcp_syn (th0))
586  return -1;
587 
588  tmp = clib_net_to_host_u32 (th0->seq_number);
589 
590  /* Got a SYN for no listener. */
591  th0->flags = TCP_FLAG_RST | TCP_FLAG_ACK;
592  th0->ack_number = clib_host_to_net_u32 (tmp + 1);
593  th0->seq_number = 0;
594 
595  }
596  else if (state >= TCP_STATE_SYN_SENT)
597  {
598  th0->flags = TCP_FLAG_RST | TCP_FLAG_ACK;
599  th0->seq_number = th0->ack_number;
600  th0->ack_number = 0;
601  }
602 
603  src_port0 = th0->src_port;
604  th0->src_port = th0->dst_port;
605  th0->dst_port = src_port0;
606  th0->window = 0;
607  th0->data_offset_and_reserved = (tcp_hdr_len >> 2) << 4;
608  th0->urgent_pointer = 0;
609 
610  /* Compute checksum */
611  if (is_ip4)
612  {
613  th0->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ih4);
614  }
615  else
616  {
617  int bogus = ~0;
618  th0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ih6, &bogus);
619  ASSERT (!bogus);
620  }
621 
622  return 0;
623 }
624 
625 /**
626  * Send reset without reusing existing buffer
627  */
628 void
630 {
631  vlib_buffer_t *b;
632  u32 bi;
633  tcp_main_t *tm = vnet_get_tcp_main ();
634  vlib_main_t *vm = tm->vlib_main;
635  u8 tcp_hdr_len, flags = 0;
636  tcp_header_t *th, *pkt_th;
637  u32 seq, ack;
638  ip4_header_t *ih4, *pkt_ih4;
639  ip6_header_t *ih6, *pkt_ih6;
640 
641  tcp_get_free_buffer_index (tm, &bi);
642  b = vlib_get_buffer (vm, bi);
643 
644  /* Leave enough space for headers */
646 
647  /* Make and write options */
648  tcp_hdr_len = sizeof (tcp_header_t);
649 
650  if (is_ip4)
651  {
652  pkt_ih4 = vlib_buffer_get_current (pkt);
653  pkt_th = ip4_next_header (pkt_ih4);
654  }
655  else
656  {
657  pkt_ih6 = vlib_buffer_get_current (pkt);
658  pkt_th = ip6_next_header (pkt_ih6);
659  }
660 
661  if (tcp_ack (pkt_th))
662  {
663  flags = TCP_FLAG_RST;
664  seq = pkt_th->ack_number;
665  ack = 0;
666  }
667  else
668  {
669  flags = TCP_FLAG_RST | TCP_FLAG_ACK;
670  seq = 0;
671  ack = clib_host_to_net_u32 (vnet_buffer (pkt)->tcp.seq_end);
672  }
673 
674  th = vlib_buffer_push_tcp_net_order (b, pkt_th->dst_port, pkt_th->src_port,
675  seq, ack, tcp_hdr_len, flags, 0);
676 
677  /* Swap src and dst ip */
678  if (is_ip4)
679  {
680  ASSERT ((pkt_ih4->ip_version_and_header_length & 0xF0) == 0x40);
681  ih4 = vlib_buffer_push_ip4 (vm, b, &pkt_ih4->dst_address,
682  &pkt_ih4->src_address, IP_PROTOCOL_TCP);
683  th->checksum = ip4_tcp_udp_compute_checksum (vm, b, ih4);
684  }
685  else
686  {
687  int bogus = ~0;
688  pkt_ih6 = (ip6_header_t *) (pkt_th - 1);
689  ASSERT ((pkt_ih6->ip_version_traffic_class_and_flow_label & 0xF0) ==
690  0x60);
691  ih6 =
692  vlib_buffer_push_ip6 (vm, b, &pkt_ih6->dst_address,
693  &pkt_ih6->src_address, IP_PROTOCOL_TCP);
694  th->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b, ih6, &bogus);
695  ASSERT (!bogus);
696  }
697 
698  tcp_enqueue_to_ip_lookup (vm, b, bi, is_ip4);
699 }
700 
701 void
703 {
705 
706  if (tc->c_is_ip4)
707  {
708  ip4_header_t *ih;
709  ih = vlib_buffer_push_ip4 (tm->vlib_main, b, &tc->c_lcl_ip4,
710  &tc->c_rmt_ip4, IP_PROTOCOL_TCP);
711  th->checksum = ip4_tcp_udp_compute_checksum (tm->vlib_main, b, ih);
712  }
713  else
714  {
715  ip6_header_t *ih;
716  int bogus = ~0;
717 
718  ih = vlib_buffer_push_ip6 (tm->vlib_main, b, &tc->c_lcl_ip6,
719  &tc->c_rmt_ip6, IP_PROTOCOL_TCP);
720  th->checksum = ip6_tcp_udp_icmp_compute_checksum (tm->vlib_main, b, ih,
721  &bogus);
722  ASSERT (!bogus);
723  }
724 }
725 
726 /**
727  * Send SYN
728  *
729  * Builds a SYN packet for a half-open connection and sends it to ipx_lookup.
730  * The packet is not forwarded through tcpx_output to avoid doing lookups
731  * in the half_open pool.
732  */
733 void
735 {
736  vlib_buffer_t *b;
737  u32 bi;
738  tcp_main_t *tm = vnet_get_tcp_main ();
739  vlib_main_t *vm = tm->vlib_main;
740  u8 tcp_hdr_opts_len, tcp_opts_len;
741  tcp_header_t *th;
742  u32 time_now;
743  u16 initial_wnd;
744  tcp_options_t snd_opts;
745 
746  tcp_get_free_buffer_index (tm, &bi);
747  b = vlib_get_buffer (vm, bi);
748 
749  /* Leave enough space for headers */
751 
752  /* Set random initial sequence */
753  time_now = tcp_time_now ();
754 
755  tc->iss = random_u32 (&time_now);
756  tc->snd_una = tc->iss;
757  tc->snd_una_max = tc->snd_nxt = tc->iss + 1;
758 
759  initial_wnd = tcp_initial_window_to_advertise (tc);
760 
761  /* Make and write options */
762  memset (&snd_opts, 0, sizeof (snd_opts));
763  tcp_opts_len = tcp_make_syn_options (&snd_opts, tc->rcv_wscale);
764  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
765 
766  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->iss,
767  tc->rcv_nxt, tcp_hdr_opts_len, TCP_FLAG_SYN,
768  initial_wnd);
769 
770  tcp_options_write ((u8 *) (th + 1), &snd_opts);
771 
772  /* Measure RTT with this */
773  tc->rtt_ts = tcp_time_now ();
774  tc->rtt_seq = tc->snd_nxt;
775 
776  /* Start retransmit trimer */
777  tcp_timer_set (tc, TCP_TIMER_RETRANSMIT_SYN, tc->rto * TCP_TO_TIMER_TICK);
778  tc->rto_boff = 0;
779 
780  /* Set the connection establishment timer */
781  tcp_timer_set (tc, TCP_TIMER_ESTABLISH, TCP_ESTABLISH_TIME);
782 
783  tcp_push_ip_hdr (tm, tc, b);
784  tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4);
785 }
786 
787 always_inline void
789 {
790  u32 *to_next, next_index;
791  vlib_frame_t *f;
792 
794  b->error = 0;
795 
796  /* Decide where to send the packet */
797  next_index = is_ip4 ? tcp4_output_node.index : tcp6_output_node.index;
798  f = vlib_get_frame_to_node (vm, next_index);
799 
800  /* Enqueue the packet */
801  to_next = vlib_frame_vector_args (f);
802  to_next[0] = bi;
803  f->n_vectors = 1;
804  vlib_put_frame_to_node (vm, next_index, f);
805 }
806 
807 /**
808  * Send FIN
809  */
810 void
812 {
813  vlib_buffer_t *b;
814  u32 bi;
815  tcp_main_t *tm = vnet_get_tcp_main ();
816  vlib_main_t *vm = tm->vlib_main;
817 
818  tcp_get_free_buffer_index (tm, &bi);
819  b = vlib_get_buffer (vm, bi);
820 
821  /* Leave enough space for headers */
823 
824  tcp_make_fin (tc, b);
825  tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
826  tc->flags |= TCP_CONN_FINSNT;
827  TCP_EVT_DBG (TCP_EVT_FIN_SENT, tc);
828 }
829 
832 {
833  switch (next_state)
834  {
835  case TCP_STATE_ESTABLISHED:
836  return TCP_FLAG_ACK;
837  case TCP_STATE_SYN_RCVD:
838  return TCP_FLAG_SYN | TCP_FLAG_ACK;
839  case TCP_STATE_SYN_SENT:
840  return TCP_FLAG_SYN;
841  case TCP_STATE_LAST_ACK:
842  case TCP_STATE_FIN_WAIT_1:
843  return TCP_FLAG_FIN;
844  default:
845  clib_warning ("Shouldn't be here!");
846  }
847  return 0;
848 }
849 
850 /**
851  * Push TCP header and update connection variables
852  */
853 static void
855  tcp_state_t next_state)
856 {
857  u32 advertise_wnd, data_len;
858  u8 tcp_opts_len, tcp_hdr_opts_len, opts_write_len, flags;
859  tcp_options_t _snd_opts, *snd_opts = &_snd_opts;
860  tcp_header_t *th;
861 
862  data_len = b->current_length;
863  vnet_buffer (b)->tcp.flags = 0;
864 
865  /* Make and write options */
866  memset (snd_opts, 0, sizeof (*snd_opts));
867  tcp_opts_len = tcp_make_options (tc, snd_opts, next_state);
868  tcp_hdr_opts_len = tcp_opts_len + sizeof (tcp_header_t);
869 
870  /* Get rcv window to advertise */
871  advertise_wnd = tcp_window_to_advertise (tc, next_state);
872  flags = tcp_make_state_flags (next_state);
873 
874  /* Push header and options */
875  th = vlib_buffer_push_tcp (b, tc->c_lcl_port, tc->c_rmt_port, tc->snd_nxt,
876  tc->rcv_nxt, tcp_hdr_opts_len, flags,
877  advertise_wnd);
878 
879  opts_write_len = tcp_options_write ((u8 *) (th + 1), snd_opts);
880 
881  ASSERT (opts_write_len == tcp_opts_len);
882 
883  /* Tag the buffer with the connection index */
884  vnet_buffer (b)->tcp.connection_index = tc->c_c_index;
885 
886  tc->snd_nxt += data_len;
887  TCP_EVT_DBG (TCP_EVT_PKTIZE, tc);
888 }
889 
890 /* Send delayed ACK when timer expires */
891 void
893 {
894  tcp_main_t *tm = vnet_get_tcp_main ();
895  vlib_main_t *vm = tm->vlib_main;
896  u32 thread_index = os_get_cpu_number ();
897  tcp_connection_t *tc;
898  vlib_buffer_t *b;
899  u32 bi;
900 
901  tc = tcp_connection_get (index, thread_index);
902 
903  /* Get buffer */
904  tcp_get_free_buffer_index (tm, &bi);
905  b = vlib_get_buffer (vm, bi);
906 
907  /* Fill in the ACK */
908  tcp_make_ack (tc, b);
909 
910  tc->timers[TCP_TIMER_DELACK] = TCP_TIMER_HANDLE_INVALID;
911  tc->flags &= ~TCP_CONN_DELACK;
912 
913  tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
914 }
915 
916 /** Build a retransmit segment
917  *
918  * @return the number of bytes in the segment or 0 if there's nothing to
919  * retransmit
920  * */
921 u32
923  u32 max_bytes)
924 {
925  tcp_main_t *tm = vnet_get_tcp_main ();
926  vlib_main_t *vm = tm->vlib_main;
927  u32 n_bytes, offset = 0;
929  u32 hole_size;
930 
931  tcp_reuse_buffer (vm, b);
932 
933  ASSERT (tc->state >= TCP_STATE_ESTABLISHED);
934  ASSERT (max_bytes != 0);
935 
936  if (tcp_opts_sack_permitted (&tc->opt))
937  {
938  /* XXX get first hole not retransmitted yet */
939  hole = scoreboard_first_hole (&tc->sack_sb);
940  if (!hole)
941  return 0;
942 
943  offset = hole->start - tc->snd_una;
944  hole_size = hole->end - hole->start;
945 
946  ASSERT (hole_size);
947 
948  if (hole_size < max_bytes)
949  max_bytes = hole_size;
950  }
951  else
952  {
953  if (seq_geq (tc->snd_nxt, tc->snd_una_max))
954  return 0;
955  }
956 
957  n_bytes = stream_session_peek_bytes (&tc->connection,
958  vlib_buffer_get_current (b), offset,
959  max_bytes);
960  ASSERT (n_bytes != 0);
961 
962  tcp_push_hdr_i (tc, b, tc->state);
963 
964  return n_bytes;
965 }
966 
967 static void
969 {
970  tcp_main_t *tm = vnet_get_tcp_main ();
971  vlib_main_t *vm = tm->vlib_main;
972  u32 thread_index = os_get_cpu_number ();
973  tcp_connection_t *tc;
974  vlib_buffer_t *b;
975  u32 bi, max_bytes, snd_space;
976 
977  if (is_syn)
978  {
979  tc = tcp_half_open_connection_get (index);
980  }
981  else
982  {
983  tc = tcp_connection_get (index, thread_index);
984  }
985 
986  /* Make sure timer handle is set to invalid */
987  tc->timers[TCP_TIMER_RETRANSMIT] = TCP_TIMER_HANDLE_INVALID;
988 
989  /* Increment RTO backoff (also equal to number of retries) */
990  tc->rto_boff += 1;
991 
992  /* Go back to first un-acked byte */
993  tc->snd_nxt = tc->snd_una;
994 
995  /* Get buffer */
996  tcp_get_free_buffer_index (tm, &bi);
997  b = vlib_get_buffer (vm, bi);
998 
999  if (tc->state >= TCP_STATE_ESTABLISHED)
1000  {
1001  tcp_fastrecovery_off (tc);
1002 
1003  /* Exponential backoff */
1004  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1005 
1006  /* Figure out what and how many bytes we can send */
1007  snd_space = tcp_available_snd_space (tc);
1008  max_bytes = clib_min (tc->snd_mss, snd_space);
1009 
1010  if (max_bytes == 0)
1011  {
1012  clib_warning ("no wnd to retransmit");
1013  return;
1014  }
1015  tcp_prepare_retransmit_segment (tc, b, max_bytes);
1016 
1017  tc->rtx_bytes += max_bytes;
1018 
1019  /* No fancy recovery for now! */
1020  scoreboard_clear (&tc->sack_sb);
1021  }
1022  else
1023  {
1024  /* Retransmit for SYN/SYNACK */
1025  ASSERT (tc->state == TCP_STATE_SYN_RCVD
1026  || tc->state == TCP_STATE_SYN_SENT);
1027 
1028  /* Try without increasing RTO a number of times. If this fails,
1029  * start growing RTO exponentially */
1030  if (tc->rto_boff > TCP_RTO_SYN_RETRIES)
1031  tc->rto = clib_min (tc->rto << 1, TCP_RTO_MAX);
1032 
1034 
1035  tcp_push_hdr_i (tc, b, tc->state);
1036 
1037  /* Account for the SYN */
1038  tc->snd_nxt += 1;
1039  }
1040 
1041  if (!is_syn)
1042  {
1043  tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4);
1044 
1045  /* Re-enable retransmit timer */
1047  }
1048  else
1049  {
1050  ASSERT (tc->state == TCP_STATE_SYN_SENT);
1051 
1052  /* This goes straight to ipx_lookup */
1053  tcp_push_ip_hdr (tm, tc, b);
1054  tcp_enqueue_to_ip_lookup (vm, b, bi, tc->c_is_ip4);
1055 
1056  /* Re-enable retransmit timer */
1057  tcp_timer_set (tc, TCP_TIMER_RETRANSMIT_SYN,
1058  tc->rto * TCP_TO_TIMER_TICK);
1059  }
1060 }
1061 
1062 void
1064 {
1065  tcp_timer_retransmit_handler_i (index, 0);
1066 }
1067 
1068 void
1070 {
1071  tcp_timer_retransmit_handler_i (index, 1);
1072 }
1073 
1074 /**
1075  * Retansmit first unacked segment */
1076 void
1078 {
1079  tcp_main_t *tm = vnet_get_tcp_main ();
1080  u32 snd_nxt = tc->snd_nxt;
1081  vlib_buffer_t *b;
1082  u32 bi;
1083 
1084  tc->snd_nxt = tc->snd_una;
1085 
1086  /* Get buffer */
1087  tcp_get_free_buffer_index (tm, &bi);
1088  b = vlib_get_buffer (tm->vlib_main, bi);
1089 
1090  tcp_prepare_retransmit_segment (tc, b, tc->snd_mss);
1091  tcp_enqueue_to_output (tm->vlib_main, b, bi, tc->c_is_ip4);
1092 
1093  tc->snd_nxt = snd_nxt;
1094  tc->rtx_bytes += tc->snd_mss;
1095 }
1096 
1097 void
1099 {
1100  tcp_main_t *tm = vnet_get_tcp_main ();
1101  u32 snd_space, max_bytes, n_bytes, bi;
1102  vlib_buffer_t *b;
1103 
1104  ASSERT (tcp_in_fastrecovery (tc));
1105 
1106  clib_warning ("fast retransmit!");
1107 
1108  /* Start resending from first un-acked segment */
1109  tc->snd_nxt = tc->snd_una;
1110 
1111  snd_space = tcp_available_snd_space (tc);
1112 
1113  while (snd_space)
1114  {
1115  tcp_get_free_buffer_index (tm, &bi);
1116  b = vlib_get_buffer (tm->vlib_main, bi);
1117 
1118  max_bytes = clib_min (tc->snd_mss, snd_space);
1119  n_bytes = tcp_prepare_retransmit_segment (tc, b, max_bytes);
1120 
1121  /* Nothing left to retransmit */
1122  if (n_bytes == 0)
1123  return;
1124 
1125  tcp_enqueue_to_output (tm->vlib_main, b, bi, tc->c_is_ip4);
1126 
1127  snd_space -= n_bytes;
1128  }
1129 
1130  /* If window allows, send new data */
1131  tc->snd_nxt = tc->snd_una_max;
1132 }
1133 
1136 {
1137  stream_session_t *s =
1138  stream_session_get (tc->c_s_index, tc->c_thread_index);
1139  return svm_fifo_has_ooo_data (s->server_rx_fifo);
1140 }
1141 
1144  vlib_node_runtime_t * node,
1145  vlib_frame_t * from_frame, int is_ip4)
1146 {
1147  u32 n_left_from, next_index, *from, *to_next;
1148  u32 my_thread_index = vm->cpu_index;
1149 
1150  from = vlib_frame_vector_args (from_frame);
1151  n_left_from = from_frame->n_vectors;
1152 
1153  next_index = node->cached_next_index;
1154 
1155  while (n_left_from > 0)
1156  {
1157  u32 n_left_to_next;
1158 
1159  vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1160 
1161  while (n_left_from > 0 && n_left_to_next > 0)
1162  {
1163  u32 bi0;
1164  vlib_buffer_t *b0;
1165  tcp_connection_t *tc0;
1166  tcp_header_t *th0;
1167  u32 error0 = TCP_ERROR_PKTS_SENT, next0 = TCP_OUTPUT_NEXT_IP_LOOKUP;
1168 
1169  bi0 = from[0];
1170  to_next[0] = bi0;
1171  from += 1;
1172  to_next += 1;
1173  n_left_from -= 1;
1174  n_left_to_next -= 1;
1175 
1176  b0 = vlib_get_buffer (vm, bi0);
1177  tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index,
1178  my_thread_index);
1179  if (PREDICT_FALSE (tc0 == 0 || tc0->state == TCP_STATE_CLOSED))
1180  {
1181  error0 = TCP_ERROR_INVALID_CONNECTION;
1182  next0 = TCP_OUTPUT_NEXT_DROP;
1183  goto done;
1184  }
1185 
1186  th0 = vlib_buffer_get_current (b0);
1187  TCP_EVT_DBG (TCP_EVT_OUTPUT, tc0, th0->flags, b0->current_length);
1188 
1189  if (is_ip4)
1190  {
1191  ip4_header_t *ih0;
1192  ih0 = vlib_buffer_push_ip4 (vm, b0, &tc0->c_lcl_ip4,
1193  &tc0->c_rmt_ip4, IP_PROTOCOL_TCP);
1194  th0->checksum = ip4_tcp_udp_compute_checksum (vm, b0, ih0);
1195  }
1196  else
1197  {
1198  ip6_header_t *ih0;
1199  int bogus = ~0;
1200 
1201  ih0 = vlib_buffer_push_ip6 (vm, b0, &tc0->c_lcl_ip6,
1202  &tc0->c_rmt_ip6, IP_PROTOCOL_TCP);
1203  th0->checksum = ip6_tcp_udp_icmp_compute_checksum (vm, b0, ih0,
1204  &bogus);
1205  ASSERT (!bogus);
1206  }
1207 
1208  /* Filter out DUPACKs if there are no OOO segments left */
1209  if (PREDICT_FALSE
1210  (vnet_buffer (b0)->tcp.flags & TCP_BUF_FLAG_DUPACK))
1211  {
1212  ASSERT (tc0->snt_dupacks > 0);
1213  tc0->snt_dupacks--;
1214  if (!tcp_session_has_ooo_data (tc0))
1215  {
1216  error0 = TCP_ERROR_FILTERED_DUPACKS;
1217  next0 = TCP_OUTPUT_NEXT_DROP;
1218  goto done;
1219  }
1220  }
1221 
1222  /* Retransmitted SYNs do reach this but it should be harmless */
1223  tc0->rcv_las = tc0->rcv_nxt;
1224 
1225  /* Stop DELACK timer and fix flags */
1226  tc0->flags &=
1227  ~(TCP_CONN_SNDACK | TCP_CONN_DELACK | TCP_CONN_BURSTACK);
1228  if (tcp_timer_is_active (tc0, TCP_TIMER_DELACK))
1229  {
1230  tcp_timer_reset (tc0, TCP_TIMER_DELACK);
1231  }
1232 
1233  /* If not retransmitting
1234  * 1) update snd_una_max (SYN, SYNACK, new data, FIN)
1235  * 2) If we're not tracking an ACK, start tracking */
1236  if (seq_lt (tc0->snd_una_max, tc0->snd_nxt))
1237  {
1238  tc0->snd_una_max = tc0->snd_nxt;
1239  if (tc0->rtt_ts == 0)
1240  {
1241  tc0->rtt_ts = tcp_time_now ();
1242  tc0->rtt_seq = tc0->snd_nxt;
1243  }
1244  }
1245 
1246  /* Set the retransmit timer if not set already and not
1247  * doing a pure ACK */
1248  if (!tcp_timer_is_active (tc0, TCP_TIMER_RETRANSMIT)
1249  && tc0->snd_nxt != tc0->snd_una)
1250  {
1252  tc0->rto_boff = 0;
1253  }
1254 
1255  /* set fib index to default and lookup node */
1256  /* XXX network virtualization (vrf/vni) */
1257  vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
1258  vnet_buffer (b0)->sw_if_index[VLIB_TX] = (u32) ~ 0;
1259 
1261  done:
1262  b0->error = node->errors[error0];
1264  {
1265 
1266  }
1267 
1268  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1269  n_left_to_next, bi0, next0);
1270  }
1271 
1272  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1273  }
1274 
1275  return from_frame->n_vectors;
1276 }
1277 
1278 static uword
1280  vlib_frame_t * from_frame)
1281 {
1282  return tcp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */ );
1283 }
1284 
1285 static uword
1287  vlib_frame_t * from_frame)
1288 {
1289  return tcp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ );
1290 }
1291 
1292 /* *INDENT-OFF* */
1294 {
1295  .function = tcp4_output,.name = "tcp4-output",
1296  /* Takes a vector of packets. */
1297  .vector_size = sizeof (u32),
1298  .n_errors = TCP_N_ERROR,
1299  .error_strings = tcp_error_strings,
1300  .n_next_nodes = TCP_OUTPUT_N_NEXT,
1301  .next_nodes = {
1302 #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
1304 #undef _
1305  },
1306  .format_buffer = format_tcp_header,
1307  .format_trace = format_tcp_tx_trace,
1308 };
1309 /* *INDENT-ON* */
1310 
1312 
1313 /* *INDENT-OFF* */
1315 {
1316  .function = tcp6_output,
1317  .name = "tcp6-output",
1318  /* Takes a vector of packets. */
1319  .vector_size = sizeof (u32),
1320  .n_errors = TCP_N_ERROR,
1321  .error_strings = tcp_error_strings,
1322  .n_next_nodes = TCP_OUTPUT_N_NEXT,
1323  .next_nodes = {
1324 #define _(s,n) [TCP_OUTPUT_NEXT_##s] = n,
1326 #undef _
1327  },
1328  .format_buffer = format_tcp_header,
1329  .format_trace = format_tcp_tx_trace,
1330 };
1331 /* *INDENT-ON* */
1332 
1334 
1335 u32
1337 {
1338  tcp_connection_t *tc;
1339 
1340  tc = (tcp_connection_t *) tconn;
1341  tcp_push_hdr_i (tc, b, TCP_STATE_ESTABLISHED);
1342  return 0;
1343 }
1344 
1345 typedef enum _tcp_reset_next
1346 {
1351 
1352 #define foreach_tcp4_reset_next \
1353  _(DROP, "error-drop") \
1354  _(IP_LOOKUP, "ip4-lookup")
1355 
1356 #define foreach_tcp6_reset_next \
1357  _(DROP, "error-drop") \
1358  _(IP_LOOKUP, "ip6-lookup")
1359 
1360 static uword
1362  vlib_frame_t * from_frame, u8 is_ip4)
1363 {
1364  u32 n_left_from, next_index, *from, *to_next;
1365  u32 my_thread_index = vm->cpu_index;
1366 
1367  from = vlib_frame_vector_args (from_frame);
1368  n_left_from = from_frame->n_vectors;
1369 
1370  next_index = node->cached_next_index;
1371 
1372  while (n_left_from > 0)
1373  {
1374  u32 n_left_to_next;
1375 
1376  vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
1377 
1378  while (n_left_from > 0 && n_left_to_next > 0)
1379  {
1380  u32 bi0;
1381  vlib_buffer_t *b0;
1382  u32 error0 = TCP_ERROR_RST_SENT, next0 = TCP_RESET_NEXT_IP_LOOKUP;
1383 
1384  bi0 = from[0];
1385  to_next[0] = bi0;
1386  from += 1;
1387  to_next += 1;
1388  n_left_from -= 1;
1389  n_left_to_next -= 1;
1390 
1391  b0 = vlib_get_buffer (vm, bi0);
1392 
1393  if (tcp_make_reset_in_place (vm, b0, vnet_buffer (b0)->tcp.flags,
1394  my_thread_index, is_ip4))
1395  {
1396  error0 = TCP_ERROR_LOOKUP_DROPS;
1397  next0 = TCP_RESET_NEXT_DROP;
1398  goto done;
1399  }
1400 
1401  /* Prepare to send to IP lookup */
1402  vnet_buffer (b0)->sw_if_index[VLIB_TX] = 0;
1403  next0 = TCP_RESET_NEXT_IP_LOOKUP;
1404 
1405  done:
1406  b0->error = node->errors[error0];
1409  {
1410 
1411  }
1412 
1413  vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
1414  n_left_to_next, bi0, next0);
1415  }
1416  vlib_put_next_frame (vm, node, next_index, n_left_to_next);
1417  }
1418  return from_frame->n_vectors;
1419 }
1420 
1421 static uword
1423  vlib_frame_t * from_frame)
1424 {
1425  return tcp46_send_reset_inline (vm, node, from_frame, 1);
1426 }
1427 
1428 static uword
1430  vlib_frame_t * from_frame)
1431 {
1432  return tcp46_send_reset_inline (vm, node, from_frame, 0);
1433 }
1434 
1435 /* *INDENT-OFF* */
1437  .function = tcp4_send_reset,
1438  .name = "tcp4-reset",
1439  .vector_size = sizeof (u32),
1440  .n_errors = TCP_N_ERROR,
1441  .error_strings = tcp_error_strings,
1442  .n_next_nodes = TCP_RESET_N_NEXT,
1443  .next_nodes = {
1444 #define _(s,n) [TCP_RESET_NEXT_##s] = n,
1446 #undef _
1447  },
1448 };
1449 /* *INDENT-ON* */
1450 
1452 
1453 /* *INDENT-OFF* */
1455  .function = tcp6_send_reset,
1456  .name = "tcp6-reset",
1457  .vector_size = sizeof (u32),
1458  .n_errors = TCP_N_ERROR,
1459  .error_strings = tcp_error_strings,
1460  .n_next_nodes = TCP_RESET_N_NEXT,
1461  .next_nodes = {
1462 #define _(s,n) [TCP_RESET_NEXT_##s] = n,
1464 #undef _
1465  },
1466 };
1467 /* *INDENT-ON* */
1468 
1470 
1471 /*
1472  * fd.io coding-style-patch-verification: ON
1473  *
1474  * Local Variables:
1475  * eval: (c-set-style "gnu")
1476  * End:
1477  */
void tcp_make_fin(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to FIN-ACK.
Definition: tcp_output.c:447
#define TCP_IW_N_SEGMENTS
Definition: tcp.h:35
vlib_frame_t * vlib_get_frame_to_node(vlib_main_t *vm, u32 to_node_index)
Definition: main.c:187
End of options.
Definition: tcp_packet.h:104
sll srl srl sll sra u16x4 i
Definition: vector_sse2.h:343
u32 stream_session_peek_bytes(transport_connection_t *tc, u8 *buffer, u32 offset, u32 max_bytes)
Definition: session.c:771
#define clib_min(x, y)
Definition: clib.h:332
#define TCP_OPTION_LEN_EOL
Definition: tcp_packet.h:164
#define CLIB_UNUSED(x)
Definition: clib.h:79
static void tcp_enqueue_to_ip_lookup(vlib_main_t *vm, vlib_buffer_t *b, u32 bi, u8 is_ip4)
Definition: tcp_output.c:510
static void tcp_retransmit_timer_set(tcp_connection_t *tc)
Definition: tcp.h:481
#define TCP_OPTION_LEN_SACK_PERMITTED
Definition: tcp_packet.h:168
void tcp_timer_retransmit_handler(u32 index)
Definition: tcp_output.c:1063
ip4_address_t src_address
Definition: ip4_packet.h:163
static void tcp_push_hdr_i(tcp_connection_t *tc, vlib_buffer_t *b, tcp_state_t next_state)
Push TCP header and update connection variables.
Definition: tcp_output.c:854
struct _transport_connection transport_connection_t
#define TCP_TO_TIMER_TICK
Definition: tcp.h:88
Selective Ack permitted.
Definition: tcp_packet.h:108
#define TCP_FLAG_SYN
Definition: fa_node.h:8
static u8 svm_fifo_has_ooo_data(svm_fifo_t *f)
Definition: svm_fifo.h:110
#define tcp_opts_tstamp(_to)
Definition: tcp_packet.h:158
static uword tcp4_send_reset(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
Definition: tcp_output.c:1422
void tcp_make_synack(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to SYN-ACK.
Definition: tcp_output.c:468
static void tcp_enqueue_to_output(vlib_main_t *vm, vlib_buffer_t *b, u32 bi, u8 is_ip4)
Definition: tcp_output.c:788
static int tcp_make_syn_options(tcp_options_t *opts, u8 wnd_scale)
Definition: tcp_output.c:254
static int ip4_header_bytes(ip4_header_t *i)
Definition: ip4_packet.h:226
static tcp_connection_t * tcp_half_open_connection_get(u32 conn_index)
Definition: tcp.h:369
void vlib_put_next_frame(vlib_main_t *vm, vlib_node_runtime_t *r, u32 next_index, u32 n_vectors_left)
Release pointer to next frame vector data.
Definition: main.c:459
struct _tcp_main tcp_main_t
#define TCP_OPTS_ALIGN
Definition: tcp_packet.h:174
static u32 tcp_initial_wnd_unscaled(tcp_connection_t *tc)
TCP&#39;s IW as recommended by RFC6928.
Definition: tcp_output.c:97
struct _vlib_node_registration vlib_node_registration_t
static u32 tcp_session_has_ooo_data(tcp_connection_t *tc)
Definition: tcp_output.c:1135
struct _tcp_connection tcp_connection_t
u8 * format(u8 *s, const char *fmt,...)
Definition: format.c:418
#define tcp_get_free_buffer_index(tm, bidx)
Definition: tcp_output.c:363
#define tcp_opts_sack(_to)
Definition: tcp_packet.h:160
void tcp_make_ack_i(tcp_connection_t *tc, vlib_buffer_t *b, tcp_state_t state, u8 flags)
Prepare ACK.
Definition: tcp_output.c:406
static void scoreboard_clear(sack_scoreboard_t *sb)
Definition: tcp.h:537
static uword tcp46_send_reset_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame, u8 is_ip4)
Definition: tcp_output.c:1361
vlib_error_t * errors
Vector of errors for this node.
Definition: node.h:418
No operation.
Definition: tcp_packet.h:105
u8 n_sack_blocks
Number of SACKs blocks.
Definition: tcp_packet.h:153
struct _tcp_header tcp_header_t
static u32 tcp_available_snd_space(const tcp_connection_t *tc)
Definition: tcp.h:432
ip6_address_t src_address
Definition: ip6_packet.h:341
struct _sack_scoreboard_hole sack_scoreboard_hole_t
u8 wscale
Window scale advertised by peer.
Definition: tcp_packet.h:149
static void tcp_reuse_buffer(vlib_main_t *vm, vlib_buffer_t *b)
Definition: tcp_output.c:385
#define TCP_OPTS_MAX_SACK_BLOCKS
Definition: tcp_packet.h:175
#define TCP_MAX_RX_FIFO_SIZE
Definition: tcp.h:34
u16 dummy_mtu
Definition: tcp_output.c:50
vlib_node_registration_t ip4_lookup_node
(constructor) VLIB_REGISTER_NODE (ip4_lookup_node)
Definition: ip4_forward.c:512
struct _stream_session_t stream_session_t
static void tcp_timer_retransmit_handler_i(u32 index, u8 is_syn)
Definition: tcp_output.c:968
#define foreach_tcp4_reset_next
Definition: tcp_output.c:1352
Limit MSS.
Definition: tcp_packet.h:106
static u32 stream_session_fifo_size(transport_connection_t *tc)
Definition: session.h:343
static stream_session_t * stream_session_get(u64 si, u32 thread_index)
Definition: session.h:303
#define VLIB_BUFFER_NEXT_PRESENT
Definition: buffer.h:87
i16 current_data
signed offset in data[], pre_data[] that we are currently processing.
Definition: buffer.h:67
void tcp_push_ip_hdr(tcp_main_t *tm, tcp_connection_t *tc, vlib_buffer_t *b)
Definition: tcp_output.c:702
#define always_inline
Definition: clib.h:84
#define TCP_OPTION_LEN_SACK_BLOCK
Definition: tcp_packet.h:170
ip4_address_t dst_address
Definition: ip4_packet.h:163
#define TCP_FLAG_ACK
Definition: fa_node.h:11
static u32 stream_session_max_enqueue(transport_connection_t *tc)
Definition: session.h:336
u8 * format_tcp_tx_trace(u8 *s, va_list *args)
Definition: tcp_output.c:53
enum _tcp_state tcp_state_t
u32 cpu_index
Definition: main.h:159
#define TCP_RTO_MAX
Definition: tcp.h:96
static void * ip4_next_header(ip4_header_t *i)
Definition: ip4_packet.h:232
static u32 tcp_time_now(void)
Definition: tcp.h:448
sack_block_t * sacks
SACK blocks received.
Definition: tcp_packet.h:152
#define TCP_ESTABLISH_TIME
Definition: tcp.h:91
#define TCP_EVT_DBG(_evt, _args...)
Definition: tcp_debug.h:302
static void tcp_timer_set(tcp_connection_t *tc, u8 timer_id, u32 interval)
Definition: tcp.h:473
#define TCP_OPTION_LEN_WINDOW_SCALE
Definition: tcp_packet.h:167
vlib_node_registration_t tcp6_reset_node
(constructor) VLIB_REGISTER_NODE (tcp6_reset_node)
Definition: tcp_output.c:1454
int tcp_make_reset_in_place(vlib_main_t *vm, vlib_buffer_t *b0, tcp_state_t state, u32 my_thread_index, u8 is_ip4)
Definition: tcp_output.c:534
#define TCP_RTO_SYN_RETRIES
Definition: tcp.h:98
static int tcp_make_synack_options(tcp_connection_t *tc, tcp_options_t *opts)
Definition: tcp_output.c:280
u16 current_length
Nbytes between current data and the end of this buffer.
Definition: buffer.h:71
static void * vlib_buffer_make_headroom(vlib_buffer_t *b, u8 size)
Make head room, typically for packet headers.
Definition: buffer.h:288
#define MAX_HDRS_LEN
Definition: session.h:28
#define tcp_in_fastrecovery(tc)
Definition: tcp.h:252
static void * vlib_buffer_push_tcp_net_order(vlib_buffer_t *b, u16 sp, u16 dp, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
Definition: tcp.h:587
#define tcp_opts_mss(_to)
Definition: tcp_packet.h:157
uword os_get_cpu_number(void)
Definition: unix-misc.c:224
static void * vlib_buffer_get_current(vlib_buffer_t *b)
Get pointer to current data to process.
Definition: buffer.h:188
#define TCP_TIMER_HANDLE_INVALID
Definition: tcp.h:85
#define foreach_tcp6_output_next
Definition: tcp_output.c:33
#define PREDICT_FALSE(x)
Definition: clib.h:97
static void * vlib_buffer_push_ip4(vlib_main_t *vm, vlib_buffer_t *b, ip4_address_t *src, ip4_address_t *dst, int proto)
Push IPv4 header to buffer.
Definition: ip4.h:353
void tcp_fast_retransmit(tcp_connection_t *tc)
Definition: tcp_output.c:1098
#define TCP_FLAG_FIN
Definition: fa_node.h:7
void tcp_timer_retransmit_syn_handler(u32 index)
Definition: tcp_output.c:1069
#define vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next, n_left_to_next, bi0, next0)
Finish enqueueing one buffer forward in the graph.
Definition: buffer_node.h:216
#define vlib_get_next_frame(vm, node, next_index, vectors, n_vectors_left)
Get pointer to next frame vector data by (vlib_node_runtime_t, next_index).
Definition: node_funcs.h:350
#define TCP_OPTION_LEN_TIMESTAMP
Definition: tcp_packet.h:169
#define foreach_tcp4_output_next
Definition: tcp_output.c:29
vlib_error_t error
Error code for buffers to be enqueued to error handler.
Definition: buffer.h:113
#define TCP_WND_MAX
Definition: tcp_packet.h:172
void tcp_set_snd_mss(tcp_connection_t *tc)
Definition: tcp_output.c:64
Selective Ack block.
Definition: tcp_packet.h:109
void tcp_make_ack(tcp_connection_t *tc, vlib_buffer_t *b)
Convert buffer to ACK.
Definition: tcp_output.c:433
#define TCP_FLAG_RST
Definition: fa_node.h:9
#define TCP_MAX_WND_SCALE
Definition: tcp_packet.h:173
static void tcp_timer_reset(tcp_connection_t *tc, u8 timer_id)
Definition: tcp.h:489
static sack_scoreboard_hole_t * scoreboard_first_hole(sack_scoreboard_t *sb)
Definition: tcp.h:529
static void * vlib_buffer_push_tcp(vlib_buffer_t *b, u16 sp_net, u16 dp_net, u32 seq, u32 ack, u8 tcp_hdr_opts_len, u8 flags, u16 wnd)
Push TCP header to buffer.
Definition: tcp.h:622
u16 n_vectors
Definition: node.h:344
#define VNET_BUFFER_LOCALLY_ORIGINATED
Definition: buffer.h:68
vlib_main_t * vm
Definition: buffer.c:276
enum _tcp_output_nect tcp_output_next_t
#define clib_warning(format, args...)
Definition: error.h:59
#define VLIB_BUFFER_IS_TRACED
Definition: buffer.h:85
#define clib_memcpy(a, b, c)
Definition: string.h:69
format_function_t format_tcp_header
Definition: format.h:102
u16 mss
Option flags, see above.
Definition: tcp_packet.h:148
static void * ip6_next_header(ip6_header_t *i)
Definition: ip6_packet.h:345
u16 ip6_tcp_udp_icmp_compute_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip6_header_t *ip0, int *bogus_lengthp)
Definition: ip6_forward.c:1139
vlib_node_registration_t ip6_lookup_node
(constructor) VLIB_REGISTER_NODE (ip6_lookup_node)
Definition: ip6_forward.c:691
static int tcp_make_established_options(tcp_connection_t *tc, tcp_options_t *opts)
Definition: tcp_output.c:315
u16 cached_next_index
Next frame index that vector arguments were last enqueued to last time this node ran.
Definition: node.h:455
#define ASSERT(truth)
#define tcp_syn(_th)
Definition: tcp_packet.h:80
unsigned int u32
Definition: types.h:88
static uword tcp6_output(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
Definition: tcp_output.c:1286
u16 ip4_tcp_udp_compute_checksum(vlib_main_t *vm, vlib_buffer_t *p0, ip4_header_t *ip0)
Definition: ip4_forward.c:1394
u32 tcp_push_header(transport_connection_t *tconn, vlib_buffer_t *b)
Definition: tcp_output.c:1336
#define seq_geq(_s1, _s2)
Definition: tcp.h:392
u32 tcp_window_to_advertise(tcp_connection_t *tc, tcp_state_t state)
Compute and return window to advertise, scaled as per RFC1323.
Definition: tcp_output.c:126
vhost_vring_state_t state
Definition: vhost-user.h:83
void tcp_retransmit_first_unacked(tcp_connection_t *tc)
Retansmit first unacked segment.
Definition: tcp_output.c:1077
u32 next_buffer
Next buffer for this linked-list of buffers.
Definition: buffer.h:109
u32 tcp_prepare_retransmit_segment(tcp_connection_t *tc, vlib_buffer_t *b, u32 max_bytes)
Build a retransmit segment.
Definition: tcp_output.c:922
static uword tcp6_send_reset(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
Definition: tcp_output.c:1429
static uword tcp46_output_inline(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame, int is_ip4)
Definition: tcp_output.c:1143
static u8 tcp_make_state_flags(tcp_state_t next_state)
Definition: tcp_output.c:831
void tcp_send_fin(tcp_connection_t *tc)
Send FIN.
Definition: tcp_output.c:811
static u8 tcp_window_compute_scale(u32 available_space)
Definition: tcp_output.c:84
VLIB_NODE_FUNCTION_MULTIARCH(tcp4_output_node, tcp4_output)
u64 uword
Definition: types.h:112
u32 total_length_not_including_first_buffer
Only valid for first buffer in chain.
Definition: buffer.h:103
#define seq_lt(_s1, _s2)
Definition: tcp.h:389
template key/value backing page structure
Definition: bihash_doc.h:44
u32 ip_version_traffic_class_and_flow_label
Definition: ip6_packet.h:328
#define tcp_opts_wscale(_to)
Definition: tcp_packet.h:159
Definition: defs.h:47
unsigned short u16
Definition: types.h:57
u32 tsval
Peer&#39;s timestamp value.
Definition: tcp_packet.h:150
u32 tsecr
Echoed/reflected time stamp.
Definition: tcp_packet.h:151
static void * vlib_buffer_push_ip6(vlib_main_t *vm, vlib_buffer_t *b, ip6_address_t *src, ip6_address_t *dst, int proto)
Push IPv6 header to buffer.
Definition: ip6.h:553
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
unsigned char u8
Definition: types.h:56
#define foreach_tcp6_reset_next
Definition: tcp_output.c:1356
static void * vlib_frame_vector_args(vlib_frame_t *f)
Get pointer to frame vector data.
Definition: node_funcs.h:253
void tcp_timer_delack_handler(u32 index)
Definition: tcp_output.c:892
#define TCP_OPTION_LEN_MSS
Definition: tcp_packet.h:166
u32 tcp_initial_window_to_advertise(tcp_connection_t *tc)
Compute initial window and scale factor.
Definition: tcp_output.c:107
#define vnet_buffer(b)
Definition: buffer.h:294
static tcp_connection_t * tcp_connection_get(u32 conn_index, u32 thread_index)
Definition: tcp.h:337
static u32 random_u32(u32 *seed)
32-bit random number generator
Definition: random.h:69
#define VLIB_REGISTER_NODE(x,...)
Definition: node.h:143
#define TCP_OPTION_LEN_NOOP
Definition: tcp_packet.h:165
void tcp_send_syn(tcp_connection_t *tc)
Send SYN.
Definition: tcp_output.c:734
vlib_node_registration_t tcp6_output_node
(constructor) VLIB_REGISTER_NODE (tcp6_output_node)
Definition: tcp_output.c:20
Window scale.
Definition: tcp_packet.h:107
enum _tcp_reset_next tcp_reset_next_t
#define tcp_opts_sack_permitted(_to)
Definition: tcp_packet.h:161
void vlib_put_frame_to_node(vlib_main_t *vm, u32 to_node_index, vlib_frame_t *f)
Definition: main.c:196
u8 ip_version_and_header_length
Definition: ip4_packet.h:131
Timestamps.
Definition: tcp_packet.h:110
vlib_node_registration_t tcp4_reset_node
(constructor) VLIB_REGISTER_NODE (tcp4_reset_node)
Definition: tcp_output.c:1436
vlib_node_registration_t tcp4_output_node
(constructor) VLIB_REGISTER_NODE (tcp4_output_node)
Definition: tcp_output.c:19
u32 flags
Definition: vhost-user.h:78
u32 flags
buffer flags: VLIB_BUFFER_IS_TRACED: trace this buffer.
Definition: buffer.h:74
static tcp_main_t * vnet_get_tcp_main()
Definition: tcp.h:329
#define tcp_fastrecovery_off(tc)
Definition: tcp.h:251
static char * tcp_error_strings[]
Definition: tcp_output.c:37
static uword tcp4_output(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *from_frame)
Definition: tcp_output.c:1279
static vlib_buffer_t * vlib_get_buffer(vlib_main_t *vm, u32 buffer_index)
Translate buffer index into buffer pointer.
Definition: buffer_funcs.h:57
#define tcp_ack(_th)
Definition: tcp_packet.h:83
static u8 tcp_timer_is_active(tcp_connection_t *tc, tcp_timers_e timer)
Definition: tcp.h:511
Definition: defs.h:46
ip6_address_t dst_address
Definition: ip6_packet.h:341
u32 tcp_options_write(u8 *data, tcp_options_t *opts)
Write TCP options to segment.
Definition: tcp_output.c:171
void tcp_send_reset(vlib_buffer_t *pkt, u8 is_ip4)
Send reset without reusing existing buffer.
Definition: tcp_output.c:629
static int tcp_make_options(tcp_connection_t *tc, tcp_options_t *opts, tcp_state_t state)
Definition: tcp_output.c:345