FD.io VPP  v21.10.1-2-g0a485f517
Vector Packet Processing
tcp_packet.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2019 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef included_tcp_packet_h
17 #define included_tcp_packet_h
18 
19 #include <vnet/vnet.h>
20 
21 /* TCP flags bit 0 first. */
22 #define foreach_tcp_flag \
23  _ (FIN) /**< No more data from sender. */ \
24  _ (SYN) /**< Synchronize sequence numbers. */ \
25  _ (RST) /**< Reset the connection. */ \
26  _ (PSH) /**< Push function. */ \
27  _ (ACK) /**< Ack field significant. */ \
28  _ (URG) /**< Urgent pointer field significant. */ \
29  _ (ECE) /**< ECN-echo. Receiver got CE packet */ \
30  _ (CWR) /**< Sender reduced congestion window */
31 
32 enum
33 {
34 #define _(f) TCP_FLAG_BIT_##f,
36 #undef _
38 };
39 
40 enum
41 {
42 #define _(f) TCP_FLAG_##f = 1 << TCP_FLAG_BIT_##f,
44 #undef _
45 };
46 
47 typedef struct _tcp_header
48 {
49  union
50  {
51  struct
52  {
53  u16 src_port; /**< Source port. */
54  u16 dst_port; /**< Destination port. */
55  };
56  struct
57  {
58  u16 src, dst;
59  };
60  };
61 
62  u32 seq_number; /**< Sequence number of the first data octet in this
63  * segment, except when SYN is present. If SYN
64  * is present the seq number is is the ISN and the
65  * first data octet is ISN+1 */
66  u32 ack_number; /**< Acknowledgement number if ACK is set. It contains
67  * the value of the next sequence number the sender
68  * of the segment is expecting to receive. */
69  u8 data_offset_and_reserved;
70  u8 flags; /**< Flags: see the macro above */
71  u16 window; /**< Number of bytes sender is willing to receive. */
72 
73  u16 checksum; /**< Checksum of TCP pseudo header and data. */
74  u16 urgent_pointer; /**< Seq number of the byte after the urgent data. */
75 } __attribute__ ((packed)) tcp_header_t;
76 
77 /* Flag tests that return 0 or !0 */
78 #define tcp_doff(_th) ((_th)->data_offset_and_reserved >> 4)
79 #define tcp_fin(_th) ((_th)->flags & TCP_FLAG_FIN)
80 #define tcp_syn(_th) ((_th)->flags & TCP_FLAG_SYN)
81 #define tcp_rst(_th) ((_th)->flags & TCP_FLAG_RST)
82 #define tcp_psh(_th) ((_th)->flags & TCP_FLAG_PSH)
83 #define tcp_ack(_th) ((_th)->flags & TCP_FLAG_ACK)
84 #define tcp_urg(_th) ((_th)->flags & TCP_FLAG_URG)
85 #define tcp_ece(_th) ((_th)->flags & TCP_FLAG_ECE)
86 #define tcp_cwr(_th) ((_th)->flags & TCP_FLAG_CWR)
87 
88 /* Flag tests that return 0 or 1 */
89 #define tcp_is_syn(_th) !!((_th)->flags & TCP_FLAG_SYN)
90 #define tcp_is_fin(_th) !!((_th)->flags & TCP_FLAG_FIN)
91 
92 always_inline int
94 {
95  return tcp_doff (t) * sizeof (u32);
96 }
97 
98 /*
99  * TCP options.
100  */
101 
102 typedef enum tcp_option_type
103 {
104  TCP_OPTION_EOL = 0, /**< End of options. */
105  TCP_OPTION_NOOP = 1, /**< No operation. */
106  TCP_OPTION_MSS = 2, /**< Limit MSS. */
107  TCP_OPTION_WINDOW_SCALE = 3, /**< Window scale. */
108  TCP_OPTION_SACK_PERMITTED = 4, /**< Selective Ack permitted. */
109  TCP_OPTION_SACK_BLOCK = 5, /**< Selective Ack block. */
110  TCP_OPTION_TIMESTAMP = 8, /**< Timestamps. */
111  TCP_OPTION_UTO = 28, /**< User timeout. */
112  TCP_OPTION_AO = 29, /**< Authentication Option. */
114 
115 #define foreach_tcp_options_flag \
116  _ (MSS) /**< MSS advertised in SYN */ \
117  _ (TSTAMP) /**< Timestamp capability advertised in SYN */ \
118  _ (WSCALE) /**< Wnd scale capability advertised in SYN */ \
119  _ (SACK_PERMITTED) /**< SACK capability advertised in SYN */ \
120  _ (SACK) /**< SACK present */
121 
122 enum
123 {
124 #define _(f) TCP_OPTS_FLAG_BIT_##f,
126 #undef _
128 };
129 
130 enum
131 {
132 #define _(f) TCP_OPTS_FLAG_##f = 1 << TCP_OPTS_FLAG_BIT_##f,
134 #undef _
135 };
136 
137 typedef struct _sack_block
138 {
139  u32 start; /**< Start sequence number */
140  u32 end; /**< End sequence number (first outside) */
141 } sack_block_t;
142 
143 typedef struct
144 {
145  sack_block_t *sacks; /**< SACK blocks */
146  u32 tsval; /**< Timestamp value */
147  u32 tsecr; /**< Echoed/reflected time stamp */
148  u16 mss; /**< Maximum segment size advertised */
149  u8 flags; /**< Option flags, see above */
150  u8 wscale; /**< Window scale advertised */
151  u8 n_sack_blocks; /**< Number of SACKs blocks */
152 } tcp_options_t;
153 
154 /* Flag tests that return 0 or !0 */
155 #define tcp_opts_mss(_to) ((_to)->flags & TCP_OPTS_FLAG_MSS)
156 #define tcp_opts_tstamp(_to) ((_to)->flags & TCP_OPTS_FLAG_TSTAMP)
157 #define tcp_opts_wscale(_to) ((_to)->flags & TCP_OPTS_FLAG_WSCALE)
158 #define tcp_opts_sack(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK)
159 #define tcp_opts_sack_permitted(_to) ((_to)->flags & TCP_OPTS_FLAG_SACK_PERMITTED)
160 
161 /* TCP option lengths */
162 #define TCP_OPTION_LEN_EOL 1
163 #define TCP_OPTION_LEN_NOOP 1
164 #define TCP_OPTION_LEN_MSS 4
165 #define TCP_OPTION_LEN_WINDOW_SCALE 3
166 #define TCP_OPTION_LEN_SACK_PERMITTED 2
167 #define TCP_OPTION_LEN_TIMESTAMP 10
168 #define TCP_OPTION_LEN_SACK_BLOCK 8
169 
170 #define TCP_HDR_LEN_MAX 60
171 #define TCP_WND_MAX 65535U
172 #define TCP_MAX_WND_SCALE 14 /* See RFC 1323 */
173 #define TCP_OPTS_ALIGN 4
174 #define TCP_OPTS_MAX_SACK_BLOCKS 3
175 #define TCP_MAX_GSO_SZ 65536
176 
177 /* Modulo arithmetic for TCP sequence numbers */
178 #define seq_lt(_s1, _s2) ((i32)((_s1)-(_s2)) < 0)
179 #define seq_leq(_s1, _s2) ((i32)((_s1)-(_s2)) <= 0)
180 #define seq_gt(_s1, _s2) ((i32)((_s1)-(_s2)) > 0)
181 #define seq_geq(_s1, _s2) ((i32)((_s1)-(_s2)) >= 0)
182 #define seq_max(_s1, _s2) (seq_gt((_s1), (_s2)) ? (_s1) : (_s2))
183 
184 /* Modulo arithmetic for timestamps */
185 #define timestamp_lt(_t1, _t2) ((i32)((_t1)-(_t2)) < 0)
186 #define timestamp_leq(_t1, _t2) ((i32)((_t1)-(_t2)) <= 0)
187 
188 /**
189  * Parse TCP header options.
190  *
191  * @param th TCP header
192  * @param to TCP options data structure to be populated
193  * @param is_syn set if packet is syn
194  * @return -1 if parsing failed
195  */
196 always_inline int
198 {
199  const u8 *data;
200  u8 opt_len, opts_len, kind;
201  int j;
202  sack_block_t b;
203 
204  opts_len = (tcp_doff (th) << 2) - sizeof (tcp_header_t);
205  data = (const u8 *) (th + 1);
206 
207  /* Zero out all flags but those set in SYN */
208  to->flags &= (TCP_OPTS_FLAG_SACK_PERMITTED | TCP_OPTS_FLAG_WSCALE
209  | TCP_OPTS_FLAG_TSTAMP | TCP_OPTS_FLAG_MSS);
210 
211  for (; opts_len > 0; opts_len -= opt_len, data += opt_len)
212  {
213  kind = data[0];
214 
215  /* Get options length */
216  if (kind == TCP_OPTION_EOL)
217  break;
218  else if (kind == TCP_OPTION_NOOP)
219  {
220  opt_len = 1;
221  continue;
222  }
223  else
224  {
225  /* broken options */
226  if (opts_len < 2)
227  return -1;
228  opt_len = data[1];
229 
230  /* weird option length */
231  if (opt_len < 2 || opt_len > opts_len)
232  return -1;
233  }
234 
235  /* Parse options */
236  switch (kind)
237  {
238  case TCP_OPTION_MSS:
239  if (!is_syn)
240  break;
241  if ((opt_len == TCP_OPTION_LEN_MSS) && tcp_syn (th))
242  {
243  to->flags |= TCP_OPTS_FLAG_MSS;
244  to->mss = clib_net_to_host_u16 (*(u16 *) (data + 2));
245  }
246  break;
248  if (!is_syn)
249  break;
250  if ((opt_len == TCP_OPTION_LEN_WINDOW_SCALE) && tcp_syn (th))
251  {
252  to->flags |= TCP_OPTS_FLAG_WSCALE;
253  to->wscale = data[2];
254  if (to->wscale > TCP_MAX_WND_SCALE)
255  to->wscale = TCP_MAX_WND_SCALE;
256  }
257  break;
259  if (is_syn)
260  to->flags |= TCP_OPTS_FLAG_TSTAMP;
261  if ((to->flags & TCP_OPTS_FLAG_TSTAMP)
262  && opt_len == TCP_OPTION_LEN_TIMESTAMP)
263  {
264  to->tsval = clib_net_to_host_u32 (*(u32 *) (data + 2));
265  to->tsecr = clib_net_to_host_u32 (*(u32 *) (data + 6));
266  }
267  break;
269  if (!is_syn)
270  break;
271  if (opt_len == TCP_OPTION_LEN_SACK_PERMITTED && tcp_syn (th))
272  to->flags |= TCP_OPTS_FLAG_SACK_PERMITTED;
273  break;
275  /* If SACK permitted was not advertised or a SYN, break */
276  if ((to->flags & TCP_OPTS_FLAG_SACK_PERMITTED) == 0 || tcp_syn (th))
277  break;
278 
279  /* If too short or not correctly formatted, break */
280  if (opt_len < 10 || ((opt_len - 2) % TCP_OPTION_LEN_SACK_BLOCK))
281  break;
282 
283  to->flags |= TCP_OPTS_FLAG_SACK;
284  to->n_sack_blocks = (opt_len - 2) / TCP_OPTION_LEN_SACK_BLOCK;
285  vec_reset_length (to->sacks);
286  for (j = 0; j < to->n_sack_blocks; j++)
287  {
288  b.start = clib_net_to_host_u32 (*(u32 *) (data + 2 + 8 * j));
289  b.end = clib_net_to_host_u32 (*(u32 *) (data + 6 + 8 * j));
290  vec_add1 (to->sacks, b);
291  }
292  break;
293  default:
294  /* Nothing to see here */
295  continue;
296  }
297  }
298  return 0;
299 }
300 
301 /**
302  * Write TCP options to segment.
303  *
304  * @param data buffer where to write the options
305  * @param opts options to write
306  * @return length of options written
307  */
310 {
311  u32 opts_len = 0;
312  u32 buf, seq_len = 4;
313 
314  if (tcp_opts_mss (opts))
315  {
316  *data++ = TCP_OPTION_MSS;
318  buf = clib_host_to_net_u16 (opts->mss);
319  clib_memcpy_fast (data, &buf, sizeof (opts->mss));
320  data += sizeof (opts->mss);
321  opts_len += TCP_OPTION_LEN_MSS;
322  }
323 
324  if (tcp_opts_wscale (opts))
325  {
328  *data++ = opts->wscale;
329  opts_len += TCP_OPTION_LEN_WINDOW_SCALE;
330  }
331 
332  if (tcp_opts_sack_permitted (opts))
333  {
336  opts_len += TCP_OPTION_LEN_SACK_PERMITTED;
337  }
338 
339  if (tcp_opts_tstamp (opts))
340  {
343  buf = clib_host_to_net_u32 (opts->tsval);
344  clib_memcpy_fast (data, &buf, sizeof (opts->tsval));
345  data += sizeof (opts->tsval);
346  buf = clib_host_to_net_u32 (opts->tsecr);
347  clib_memcpy_fast (data, &buf, sizeof (opts->tsecr));
348  data += sizeof (opts->tsecr);
349  opts_len += TCP_OPTION_LEN_TIMESTAMP;
350  }
351 
352  if (tcp_opts_sack (opts))
353  {
354  int i;
355 
356  if (opts->n_sack_blocks != 0)
357  {
360  for (i = 0; i < opts->n_sack_blocks; i++)
361  {
362  buf = clib_host_to_net_u32 (opts->sacks[i].start);
363  clib_memcpy_fast (data, &buf, seq_len);
364  data += seq_len;
365  buf = clib_host_to_net_u32 (opts->sacks[i].end);
366  clib_memcpy_fast (data, &buf, seq_len);
367  data += seq_len;
368  }
369  opts_len += 2 + opts->n_sack_blocks * TCP_OPTION_LEN_SACK_BLOCK;
370  }
371  }
372 
373  /* Terminate TCP options by padding with NOPs to a u32 boundary. Avoid using
374  * EOL because, it seems, it can break peers with broken option parsers that
375  * rely on options ending on a u32 boundary.
376  */
377  while (opts_len % 4)
378  {
379  *data++ = TCP_OPTION_NOOP;
380  opts_len += TCP_OPTION_LEN_NOOP;
381  }
382  return opts_len;
383 }
384 
385 #endif /* included_tcp_packet_h */
386 
387 /*
388  * fd.io coding-style-patch-verification: ON
389  *
390  * Local Variables:
391  * eval: (c-set-style "gnu")
392  * End:
393  */
vec_reset_length
#define vec_reset_length(v)
Reset vector length to zero NULL-pointer tolerant.
Definition: vec_bootstrap.h:194
to
u32 * to
Definition: interface_output.c:1096
tcp_doff
#define tcp_doff(_th)
Definition: tcp_packet.h:78
tcp_options_t::tsval
u32 tsval
Timestamp value.
Definition: tcp_packet.h:146
tcp_header_bytes
static int tcp_header_bytes(tcp_header_t *t)
Definition: tcp_packet.h:93
TCP_OPTION_EOL
@ TCP_OPTION_EOL
End of options.
Definition: tcp_packet.h:104
tcp_opts_tstamp
#define tcp_opts_tstamp(_to)
Definition: tcp_packet.h:156
dst_port
vl_api_ip_port_and_mask_t dst_port
Definition: flow_types.api:92
tcp_options_t::wscale
u8 wscale
Window scale advertised.
Definition: tcp_packet.h:150
TCP_OPTIONS_N_FLAG_BITS
@ TCP_OPTIONS_N_FLAG_BITS
Definition: tcp_packet.h:127
TCP_OPTION_LEN_WINDOW_SCALE
#define TCP_OPTION_LEN_WINDOW_SCALE
Definition: tcp_packet.h:165
tcp_header_t
struct _tcp_header tcp_header_t
TCP_OPTION_WINDOW_SCALE
@ TCP_OPTION_WINDOW_SCALE
Window scale.
Definition: tcp_packet.h:107
u16
unsigned short u16
Definition: types.h:57
tcp_options_t::sacks
sack_block_t * sacks
SACK blocks.
Definition: tcp_packet.h:145
TCP_OPTION_SACK_PERMITTED
@ TCP_OPTION_SACK_PERMITTED
Selective Ack permitted.
Definition: tcp_packet.h:108
TCP_OPTION_NOOP
@ TCP_OPTION_NOOP
No operation.
Definition: tcp_packet.h:105
tcp_options_t::tsecr
u32 tsecr
Echoed/reflected time stamp.
Definition: tcp_packet.h:147
tcp_options_t::mss
u16 mss
Maximum segment size advertised.
Definition: tcp_packet.h:148
tcp_options_t::n_sack_blocks
u8 n_sack_blocks
Number of SACKs blocks.
Definition: tcp_packet.h:151
clib_memcpy_fast
static_always_inline void * clib_memcpy_fast(void *restrict dst, const void *restrict src, size_t n)
Definition: string.h:92
tcp_syn
#define tcp_syn(_th)
Definition: tcp_packet.h:80
TCP_OPTION_TIMESTAMP
@ TCP_OPTION_TIMESTAMP
Timestamps.
Definition: tcp_packet.h:110
tcp_options_t
Definition: tcp_packet.h:143
tcp_options_t::flags
u8 flags
Option flags, see above.
Definition: tcp_packet.h:149
vec_add1
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
Definition: vec.h:606
tcp_opts_wscale
#define tcp_opts_wscale(_to)
Definition: tcp_packet.h:157
TCP_OPTION_AO
@ TCP_OPTION_AO
Authentication Option.
Definition: tcp_packet.h:112
TCP_OPTION_SACK_BLOCK
@ TCP_OPTION_SACK_BLOCK
Selective Ack block.
Definition: tcp_packet.h:109
foreach_tcp_flag
#define foreach_tcp_flag
Sender reduced congestion window.
Definition: tcp_packet.h:22
end
f64 end
end of the time range
Definition: mactime.api:44
tcp_opts_mss
#define tcp_opts_mss(_to)
Definition: tcp_packet.h:155
src_port
vl_api_ip_port_and_mask_t src_port
Definition: flow_types.api:91
tcp_opts_sack_permitted
#define tcp_opts_sack_permitted(_to)
Definition: tcp_packet.h:159
foreach_tcp_options_flag
#define foreach_tcp_options_flag
SACK present.
Definition: tcp_packet.h:115
tcp_options_write
static u32 tcp_options_write(u8 *data, tcp_options_t *opts)
Write TCP options to segment.
Definition: tcp_packet.h:309
src
vl_api_address_t src
Definition: gre.api:54
sack_block_t
struct _sack_block sack_block_t
tcp_opts_sack
#define tcp_opts_sack(_to)
Definition: tcp_packet.h:158
TCP_MAX_WND_SCALE
#define TCP_MAX_WND_SCALE
Definition: tcp_packet.h:172
tcp_option_type_t
enum tcp_option_type tcp_option_type_t
data
u8 data[128]
Definition: ipsec_types.api:95
TCP_OPTION_UTO
@ TCP_OPTION_UTO
User timeout.
Definition: tcp_packet.h:111
always_inline
#define always_inline
Definition: rdma_mlx5dv.h:23
TCP_OPTION_LEN_TIMESTAMP
#define TCP_OPTION_LEN_TIMESTAMP
Definition: tcp_packet.h:167
tcp_option_type
tcp_option_type
Definition: tcp_packet.h:102
TCP_OPTION_LEN_SACK_PERMITTED
#define TCP_OPTION_LEN_SACK_PERMITTED
Definition: tcp_packet.h:166
buf
u64 buf
Definition: application.c:493
u32
unsigned int u32
Definition: types.h:88
dst
vl_api_ip4_address_t dst
Definition: pnat.api:41
TCP_N_FLAG_BITS
@ TCP_N_FLAG_BITS
Definition: tcp_packet.h:37
TCP_OPTION_MSS
@ TCP_OPTION_MSS
Limit MSS.
Definition: tcp_packet.h:106
TCP_OPTION_LEN_NOOP
#define TCP_OPTION_LEN_NOOP
Definition: tcp_packet.h:163
b
vlib_buffer_t ** b
Definition: nat44_ei_out2in.c:717
u8
unsigned char u8
Definition: types.h:56
i
int i
Definition: flowhash_template.h:376
vnet.h
TCP_OPTION_LEN_MSS
#define TCP_OPTION_LEN_MSS
Definition: tcp_packet.h:164
tcp_options_parse
static int tcp_options_parse(tcp_header_t *th, tcp_options_t *to, u8 is_syn)
Parse TCP header options.
Definition: tcp_packet.h:197
TCP_OPTION_LEN_SACK_BLOCK
#define TCP_OPTION_LEN_SACK_BLOCK
Definition: tcp_packet.h:168
flags
vl_api_wireguard_peer_flags_t flags
Definition: wireguard.api:105