FD.io VPP  v20.05.1-6-gf53edbc3b
Vector Packet Processing
tap.c
Go to the documentation of this file.
1 /*
2  *------------------------------------------------------------------
3  * Copyright (c) 2017 Cisco and/or its affiliates.
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at:
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *------------------------------------------------------------------
16  */
17 
18 #define _GNU_SOURCE
19 #include <sys/types.h>
20 #include <sys/stat.h>
21 #include <fcntl.h>
22 #include <net/if.h>
23 #include <linux/if_tun.h>
24 #include <sys/ioctl.h>
25 #include <linux/virtio_net.h>
26 #include <linux/vhost.h>
27 #include <sys/eventfd.h>
28 #include <net/if_arp.h>
29 #include <sched.h>
30 #include <limits.h>
31 
32 #include <linux/netlink.h>
33 #include <linux/rtnetlink.h>
34 
35 #include <vlib/vlib.h>
36 #include <vlib/physmem.h>
37 #include <vlib/unix/unix.h>
38 #include <vnet/ethernet/ethernet.h>
39 #include <vnet/ip/ip4_packet.h>
40 #include <vnet/ip/ip6_packet.h>
41 #include <vnet/devices/netlink.h>
43 #include <vnet/devices/tap/tap.h>
44 
46 
47 #define tap_log_err(dev, f, ...) \
48  vlib_log (VLIB_LOG_LEVEL_ERR, tap_main.log_default, "tap%u: " f, dev->dev_instance, ## __VA_ARGS__)
49 #define tap_log_dbg(dev, f, ...) \
50  vlib_log (VLIB_LOG_LEVEL_DEBUG, tap_main.log_default, "tap%u: " f, dev->dev_instance, ## __VA_ARGS__)
51 
52 #define _IOCTL(fd,a,...) \
53  if (ioctl (fd, a, __VA_ARGS__) < 0) \
54  { \
55  err = clib_error_return_unix (0, "ioctl(" #a ")"); \
56  tap_log_err (vif, "%U", format_clib_error, err); \
57  goto error; \
58  }
59 
60  /* *INDENT-OFF* */
61 VNET_HW_INTERFACE_CLASS (tun_device_hw_interface_class, static) =
62 {
63  .name = "tun-device",
65 };
66  /* *INDENT-ON* */
67 
68 static u32
70  u32 flags)
71 {
72  /* nothing for now */
73  //TODO On MTU change call vnet_netlink_set_if_mtu
74  return 0;
75 }
76 
77 static int
78 open_netns_fd (char *netns)
79 {
80  u8 *s = 0;
81  int fd;
82 
83  if (strncmp (netns, "pid:", 4) == 0)
84  s = format (0, "/proc/%u/ns/net%c", atoi (netns + 4), 0);
85  else if (netns[0] == '/')
86  s = format (0, "%s%c", netns, 0);
87  else
88  s = format (0, "/var/run/netns/%s%c", netns, 0);
89 
90  fd = open ((char *) s, O_RDONLY);
91  vec_free (s);
92  return fd;
93 }
94 
95 #define TAP_MAX_INSTANCE 1024
96 
97 static void
99 {
100  virtio_main_t *mm = &virtio_main;
101  tap_main_t *tm = &tap_main;
102  clib_error_t *err = 0;
103  int i;
104 
105  /* *INDENT-OFF* */
106  vec_foreach_index (i, vif->vhost_fds) if (vif->vhost_fds[i] != -1)
107  close (vif->vhost_fds[i]);
108  vec_foreach_index (i, vif->rxq_vrings)
109  virtio_vring_free_rx (vm, vif, RX_QUEUE (i));
110  vec_foreach_index (i, vif->txq_vrings)
111  virtio_vring_free_tx (vm, vif, TX_QUEUE (i));
112  /* *INDENT-ON* */
113 
114  _IOCTL (vif->tap_fds[0], TUNSETPERSIST, (void *) (uintptr_t) 0);
115  tap_log_dbg (vif, "TUNSETPERSIST: unset");
116 error:
117  vec_foreach_index (i, vif->tap_fds) close (vif->tap_fds[i]);
118 
119  vec_free (vif->vhost_fds);
120  vec_free (vif->rxq_vrings);
121  vec_free (vif->txq_vrings);
122  vec_free (vif->host_if_name);
123  vec_free (vif->net_ns);
124  vec_free (vif->host_bridge);
125  clib_error_free (vif->error);
126 
127  tm->tap_ids = clib_bitmap_set (tm->tap_ids, vif->id, 0);
128  clib_memset (vif, 0, sizeof (*vif));
129  pool_put (mm->interfaces, vif);
130 }
131 
132 void
134 {
136  vlib_physmem_main_t *vpm = &vm->physmem_main;
137  vnet_main_t *vnm = vnet_get_main ();
138  virtio_main_t *vim = &virtio_main;
139  tap_main_t *tm = &tap_main;
142  int i, num_vhost_queues;
143  int old_netns_fd = -1;
144  struct ifreq ifr = {.ifr_flags = IFF_NO_PI | IFF_VNET_HDR };
145  struct ifreq get_ifr = {.ifr_flags = 0 };
146  size_t hdrsz;
147  struct vhost_memory *vhost_mem = 0;
148  virtio_if_t *vif = 0;
149  clib_error_t *err = 0;
150  unsigned int tap_features;
151  int tfd = -1, qfd = -1, vfd = -1, nfd = -1;
152  char *host_if_name = 0;
153  unsigned int offload = 0;
154  int sndbuf = 0;
155 
156  if (args->id != ~0)
157  {
158  if (clib_bitmap_get (tm->tap_ids, args->id))
159  {
160  args->rv = VNET_API_ERROR_INVALID_INTERFACE;
161  args->error = clib_error_return (0, "interface already exists");
162  return;
163  }
164  }
165  else
166  {
167  args->id = clib_bitmap_first_clear (tm->tap_ids);
168  }
169 
170  if (args->id > TAP_MAX_INSTANCE)
171  {
172  args->rv = VNET_API_ERROR_UNSPECIFIED;
173  args->error = clib_error_return (0, "cannot find free interface id");
174  return;
175  }
176 
177  pool_get_zero (vim->interfaces, vif);
178 
179  if (args->tap_flags & TAP_FLAG_TUN)
180  {
181  vif->type = VIRTIO_IF_TYPE_TUN;
182  ifr.ifr_flags |= IFF_TUN;
183  args->tap_flags &= ~(TAP_FLAG_GSO | TAP_FLAG_CSUM_OFFLOAD);
184 
185  /*
186  * From kernel 4.20, xdp support has been added in tun_sendmsg.
187  * If sndbuf == INT_MAX, vhost batches the packet and processes
188  * them using xdp data path for tun driver. It assumes packets
189  * are ethernet frames (It needs to be fixed).
190  * To avoid xdp data path in tun driver, sndbuf value should
191  * be < INT_MAX.
192  */
193  sndbuf = INT_MAX - 1;
194  }
195  else
196  {
197  vif->type = VIRTIO_IF_TYPE_TAP;
198  ifr.ifr_flags |= IFF_TAP;
199  sndbuf = INT_MAX;
200  }
201 
202  vif->dev_instance = vif - vim->interfaces;
203  vif->id = args->id;
204  vif->num_txqs = thm->n_vlib_mains;
205  vif->num_rxqs = clib_max (args->num_rx_queues, 1);
206 
207  if (args->tap_flags & TAP_FLAG_ATTACH)
208  {
209  if (args->host_if_name != NULL)
210  {
211  host_if_name = (char *) args->host_if_name;
212  clib_memcpy (ifr.ifr_name, host_if_name,
213  clib_min (IFNAMSIZ, strlen (host_if_name)));
214  }
215  else
216  {
217  args->rv = VNET_API_ERROR_NO_MATCHING_INTERFACE;
218  err = clib_error_return (0, "host_if_name is not provided");
219  goto error;
220  }
221  if (args->host_namespace)
222  {
223  old_netns_fd = open ("/proc/self/ns/net", O_RDONLY);
224  if ((nfd = open_netns_fd ((char *) args->host_namespace)) == -1)
225  {
226  args->rv = VNET_API_ERROR_SYSCALL_ERROR_2;
227  args->error = clib_error_return_unix (0, "open_netns_fd '%s'",
228  args->host_namespace);
229  goto error;
230  }
231  if (setns (nfd, CLONE_NEWNET) == -1)
232  {
233  args->rv = VNET_API_ERROR_SYSCALL_ERROR_3;
234  args->error = clib_error_return_unix (0, "setns '%s'",
235  args->host_namespace);
236  goto error;
237  }
238  }
239  }
240 
241  if ((tfd = open ("/dev/net/tun", O_RDWR | O_NONBLOCK)) < 0)
242  {
243  args->rv = VNET_API_ERROR_SYSCALL_ERROR_2;
244  args->error = clib_error_return_unix (0, "open '/dev/net/tun'");
245  goto error;
246  }
247  vec_add1 (vif->tap_fds, tfd);
248  tap_log_dbg (vif, "open tap fd %d", tfd);
249 
250  _IOCTL (tfd, TUNGETFEATURES, &tap_features);
251  tap_log_dbg (vif, "TUNGETFEATURES: features 0x%lx", tap_features);
252  if ((tap_features & IFF_VNET_HDR) == 0)
253  {
254  args->rv = VNET_API_ERROR_SYSCALL_ERROR_2;
255  args->error = clib_error_return (0, "vhost-net backend not available");
256  goto error;
257  }
258 
259  if ((tap_features & IFF_MULTI_QUEUE) == 0)
260  {
261  if (vif->num_rxqs > 1)
262  {
263  args->rv = VNET_API_ERROR_SYSCALL_ERROR_2;
264  args->error = clib_error_return (0, "multiqueue not supported");
265  goto error;
266  }
267  vif->num_rxqs = vif->num_txqs = 1;
268  }
269  else
270  ifr.ifr_flags |= IFF_MULTI_QUEUE;
271 
272  hdrsz = sizeof (struct virtio_net_hdr_v1);
273  if (args->tap_flags & TAP_FLAG_GSO)
274  {
275  offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
276  vif->gso_enabled = 1;
277  }
278  else if (args->tap_flags & TAP_FLAG_CSUM_OFFLOAD)
279  {
280  offload = TUN_F_CSUM;
281  vif->csum_offload_enabled = 1;
282  }
283 
284  _IOCTL (tfd, TUNSETIFF, (void *) &ifr);
285  tap_log_dbg (vif, "TUNSETIFF fd %d name %s flags 0x%x", tfd,
286  ifr.ifr_ifrn.ifrn_name, ifr.ifr_flags);
287 
288  vif->ifindex = if_nametoindex (ifr.ifr_ifrn.ifrn_name);
289  tap_log_dbg (vif, "ifindex %d", vif->ifindex);
290 
291  if (!args->host_if_name)
292  host_if_name = ifr.ifr_ifrn.ifrn_name;
293  else
294  host_if_name = (char *) args->host_if_name;
295 
296  /*
297  * unset the persistence when attaching to existing
298  * interface
299  */
300  if (args->tap_flags & TAP_FLAG_ATTACH)
301  {
302  _IOCTL (tfd, TUNSETPERSIST, (void *) (uintptr_t) 0);
303  tap_log_dbg (vif, "TUNSETPERSIST: unset");
304  }
305 
306  /* set the persistence */
307  if (args->tap_flags & TAP_FLAG_PERSIST)
308  {
309  _IOCTL (tfd, TUNSETPERSIST, (void *) (uintptr_t) 1);
310  tap_log_dbg (vif, "TUNSETPERSIST: set");
311 
312  /* verify persistence is set, read the flags */
313  _IOCTL (tfd, TUNGETIFF, (void *) &get_ifr);
314  tap_log_dbg (vif, "TUNGETIFF: flags 0x%lx", get_ifr.ifr_flags);
315  if ((get_ifr.ifr_flags & IFF_PERSIST) == 0)
316  {
317  args->rv = VNET_API_ERROR_SYSCALL_ERROR_2;
318  args->error = clib_error_return (0, "persistence not supported");
319  goto error;
320  }
321  }
322 
323  /* create additional queues on the linux side.
324  * we create as many linux queue pairs as we have rx queues
325  */
326  for (i = 1; i < vif->num_rxqs; i++)
327  {
328  if ((qfd = open ("/dev/net/tun", O_RDWR | O_NONBLOCK)) < 0)
329  {
330  args->rv = VNET_API_ERROR_SYSCALL_ERROR_2;
331  args->error = clib_error_return_unix (0, "open '/dev/net/tun'");
332  goto error;
333  }
334  _IOCTL (qfd, TUNSETIFF, (void *) &ifr);
335  tap_log_dbg (vif, "TUNSETIFF fd %d name %s flags 0x%x", qfd,
336  ifr.ifr_ifrn.ifrn_name, ifr.ifr_flags);
337  vec_add1 (vif->tap_fds, qfd);
338  }
339 
340  for (i = 0; i < vif->num_rxqs; i++)
341  {
342  tap_log_dbg (vif, "TUNSETVNETHDRSZ: fd %d vnet_hdr_sz %u",
343  vif->tap_fds[i], hdrsz);
344  _IOCTL (vif->tap_fds[i], TUNSETVNETHDRSZ, &hdrsz);
345 
346  tap_log_dbg (vif, "TUNSETSNDBUF: fd %d sndbuf %d", vif->tap_fds[i],
347  sndbuf);
348  _IOCTL (vif->tap_fds[i], TUNSETSNDBUF, &sndbuf);
349 
350  tap_log_dbg (vif, "TUNSETOFFLOAD: fd %d offload 0x%lx", vif->tap_fds[i],
351  offload);
352  _IOCTL (vif->tap_fds[i], TUNSETOFFLOAD, offload);
353 
354  if (fcntl (vif->tap_fds[i], F_SETFL, O_NONBLOCK) < 0)
355  {
356  err = clib_error_return_unix (0, "fcntl(tfd, F_SETFL, O_NONBLOCK)");
357  tap_log_err (vif, "set nonblocking: %U", format_clib_error, err);
358  goto error;
359  }
360  }
361 
362  /* open as many vhost-net fds as required and set ownership */
363  num_vhost_queues = clib_max (vif->num_rxqs, vif->num_txqs);
364  for (i = 0; i < num_vhost_queues; i++)
365  {
366  if ((vfd = open ("/dev/vhost-net", O_RDWR | O_NONBLOCK)) < 0)
367  {
368  args->rv = VNET_API_ERROR_SYSCALL_ERROR_1;
369  args->error = clib_error_return_unix (0, "open '/dev/vhost-net'");
370  goto error;
371  }
372  vec_add1 (vif->vhost_fds, vfd);
373  virtio_log_debug (vif, "open vhost-net fd %d qpair %u", vfd, i);
374  _IOCTL (vfd, VHOST_SET_OWNER, 0);
375  virtio_log_debug (vif, "VHOST_SET_OWNER: fd %u", vfd);
376  }
377 
378  _IOCTL (vif->vhost_fds[0], VHOST_GET_FEATURES, &vif->remote_features);
379  virtio_log_debug (vif, "VHOST_GET_FEATURES: features 0x%lx",
380  vif->remote_features);
381 
382  if ((vif->remote_features & VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF)) == 0)
383  {
384  args->rv = VNET_API_ERROR_UNSUPPORTED;
385  args->error = clib_error_return (0, "vhost-net backend doesn't support "
386  "VIRTIO_NET_F_MRG_RXBUF feature");
387  goto error;
388  }
389 
390  if ((vif->remote_features & VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC)) ==
391  0)
392  {
393  args->rv = VNET_API_ERROR_UNSUPPORTED;
394  args->error = clib_error_return (0, "vhost-net backend doesn't support "
395  "VIRTIO_RING_F_INDIRECT_DESC feature");
396  goto error;
397  }
398 
399  if ((vif->remote_features & VIRTIO_FEATURE (VIRTIO_F_VERSION_1)) == 0)
400  {
401  args->rv = VNET_API_ERROR_UNSUPPORTED;
402  args->error = clib_error_return (0, "vhost-net backend doesn't support "
403  "VIRTIO_F_VERSION_1 features");
404  goto error;
405  }
406 
407  vif->features |= VIRTIO_FEATURE (VIRTIO_NET_F_MRG_RXBUF);
408  vif->features |= VIRTIO_FEATURE (VIRTIO_F_VERSION_1);
409  vif->features |= VIRTIO_FEATURE (VIRTIO_RING_F_INDIRECT_DESC);
410 
412 
413  if (!(args->tap_flags & TAP_FLAG_ATTACH))
414  {
415  /* if namespace is specified, all further netlink messages should be executed
416  after we change our net namespace */
417  if (args->host_namespace)
418  {
419  old_netns_fd = open ("/proc/self/ns/net", O_RDONLY);
420  if ((nfd = open_netns_fd ((char *) args->host_namespace)) == -1)
421  {
422  args->rv = VNET_API_ERROR_SYSCALL_ERROR_2;
423  args->error = clib_error_return_unix (0, "open_netns_fd '%s'",
424  args->host_namespace);
425  goto error;
426  }
427  args->error = vnet_netlink_set_link_netns (vif->ifindex, nfd,
428  host_if_name);
429  if (args->error)
430  {
431  args->rv = VNET_API_ERROR_NETLINK_ERROR;
432  goto error;
433  }
434  if (setns (nfd, CLONE_NEWNET) == -1)
435  {
436  args->rv = VNET_API_ERROR_SYSCALL_ERROR_3;
437  args->error = clib_error_return_unix (0, "setns '%s'",
438  args->host_namespace);
439  goto error;
440  }
441  if ((vif->ifindex = if_nametoindex (host_if_name)) == 0)
442  {
443  args->rv = VNET_API_ERROR_SYSCALL_ERROR_3;
444  args->error = clib_error_return_unix (0, "if_nametoindex '%s'",
445  host_if_name);
446  goto error;
447  }
448  }
449  else if (host_if_name)
450  {
451  args->error =
452  vnet_netlink_set_link_name (vif->ifindex, host_if_name);
453  if (args->error)
454  {
455  args->rv = VNET_API_ERROR_NETLINK_ERROR;
456  goto error;
457  }
458  }
459  }
460 
461  if (vif->type == VIRTIO_IF_TYPE_TAP)
462  {
466  args->host_mac_addr.bytes);
467  if (args->error)
468  {
469  args->rv = VNET_API_ERROR_NETLINK_ERROR;
470  goto error;
471  }
472 
473  if (args->host_bridge)
474  {
476  (char *)
477  args->host_bridge);
478  if (args->error)
479  {
480  args->rv = VNET_API_ERROR_NETLINK_ERROR;
481  goto error;
482  }
483  }
484  }
485 
486  if (args->host_ip4_prefix_len)
487  {
489  &args->host_ip4_addr,
490  args->host_ip4_prefix_len);
491  if (args->error)
492  {
493  args->rv = VNET_API_ERROR_NETLINK_ERROR;
494  goto error;
495  }
496  }
497 
498  if (args->host_ip6_prefix_len)
499  {
501  &args->host_ip6_addr,
502  args->host_ip6_prefix_len);
503  if (args->error)
504  {
505  args->rv = VNET_API_ERROR_NETLINK_ERROR;
506  goto error;
507  }
508  }
509 
510  args->error = vnet_netlink_set_link_state (vif->ifindex, 1 /* UP */ );
511  if (args->error)
512  {
513  args->rv = VNET_API_ERROR_NETLINK_ERROR;
514  goto error;
515  }
516 
517  if (args->host_ip4_gw_set)
518  {
519  args->error = vnet_netlink_add_ip4_route (0, 0, &args->host_ip4_gw);
520  if (args->error)
521  {
522  args->rv = VNET_API_ERROR_NETLINK_ERROR;
523  goto error;
524  }
525  }
526 
527  if (args->host_ip6_gw_set)
528  {
529  args->error = vnet_netlink_add_ip6_route (0, 0, &args->host_ip6_gw);
530  if (args->error)
531  {
532  args->rv = VNET_API_ERROR_NETLINK_ERROR;
533  goto error;
534  }
535  }
536 
537  if (args->host_mtu_set)
538  {
539  args->error =
541  if (args->error)
542  {
543  args->rv = VNET_API_ERROR_NETLINK_ERROR;
544  goto error;
545  }
546  }
547  else if (tm->host_mtu_size != 0)
548  {
549  args->error =
551  if (args->error)
552  {
553  args->rv = VNET_API_ERROR_NETLINK_ERROR;
554  goto error;
555  }
556  args->host_mtu_set = 1;
557  args->host_mtu_size = tm->host_mtu_size;
558  }
559 
560  /* switch back to old net namespace */
561  if (args->host_namespace)
562  {
563  if (setns (old_netns_fd, CLONE_NEWNET) == -1)
564  {
565  args->rv = VNET_API_ERROR_SYSCALL_ERROR_2;
566  args->error = clib_error_return_unix (0, "setns '%s'",
567  args->host_namespace);
568  goto error;
569  }
570  }
571 
572  for (i = 0; i < num_vhost_queues; i++)
573  {
574  if (i < vif->num_rxqs && (args->error =
575  virtio_vring_init (vm, vif, RX_QUEUE (i),
576  args->rx_ring_sz)))
577  {
578  args->rv = VNET_API_ERROR_INIT_FAILED;
579  goto error;
580  }
581 
582  if (i < vif->num_txqs && (args->error =
583  virtio_vring_init (vm, vif, TX_QUEUE (i),
584  args->tx_ring_sz)))
585  {
586  args->rv = VNET_API_ERROR_INIT_FAILED;
587  goto error;
588  }
589  }
590 
591  /* setup features and memtable */
592  i = sizeof (struct vhost_memory) + sizeof (struct vhost_memory_region);
593  vhost_mem = clib_mem_alloc (i);
594  clib_memset (vhost_mem, 0, i);
595  vhost_mem->nregions = 1;
596  vhost_mem->regions[0].memory_size = vpm->max_size;
597  vhost_mem->regions[0].guest_phys_addr = vpm->base_addr;
598  vhost_mem->regions[0].userspace_addr =
599  vhost_mem->regions[0].guest_phys_addr;
600 
601  for (i = 0; i < vhost_mem->nregions; i++)
602  virtio_log_debug (vif, "memtable region %u memory_size 0x%lx "
603  "guest_phys_addr 0x%lx userspace_addr 0x%lx", i,
604  vhost_mem->regions[0].memory_size,
605  vhost_mem->regions[0].guest_phys_addr,
606  vhost_mem->regions[0].userspace_addr);
607 
608 
609  for (i = 0; i < num_vhost_queues; i++)
610  {
611  int fd = vif->vhost_fds[i];
612  _IOCTL (fd, VHOST_SET_FEATURES, &vif->features);
613  virtio_log_debug (vif, "VHOST_SET_FEATURES: fd %u features 0x%lx",
614  fd, vif->features);
615  _IOCTL (fd, VHOST_SET_MEM_TABLE, vhost_mem);
616  virtio_log_debug (vif, "VHOST_SET_MEM_TABLE: fd %u", fd);
617  }
618 
619  /* finish initializing queue pair */
620  for (i = 0; i < num_vhost_queues * 2; i++)
621  {
622  struct vhost_vring_addr addr = { 0 };
623  struct vhost_vring_state state = { 0 };
624  struct vhost_vring_file file = { 0 };
625  virtio_vring_t *vring;
626  u16 qp = i >> 1;
627  int fd = vif->vhost_fds[qp];
628 
629  if (i & 1)
630  {
631  if (qp >= vif->num_txqs)
632  continue;
633  vring = vec_elt_at_index (vif->txq_vrings, qp);
634  }
635  else
636  {
637  if (qp >= vif->num_rxqs)
638  continue;
639  vring = vec_elt_at_index (vif->rxq_vrings, qp);
640  }
641 
642  addr.index = state.index = file.index = vring->queue_id & 1;
643  state.num = vring->size;
644  virtio_log_debug (vif, "VHOST_SET_VRING_NUM fd %d index %u num %u", fd,
645  state.index, state.num);
646  _IOCTL (fd, VHOST_SET_VRING_NUM, &state);
647 
648  addr.flags = 0;
649  addr.desc_user_addr = pointer_to_uword (vring->desc);
650  addr.avail_user_addr = pointer_to_uword (vring->avail);
651  addr.used_user_addr = pointer_to_uword (vring->used);
652 
653  virtio_log_debug (vif, "VHOST_SET_VRING_ADDR fd %d index %u flags 0x%x "
654  "desc_user_addr 0x%lx avail_user_addr 0x%lx "
655  "used_user_addr 0x%lx", fd, addr.index,
656  addr.flags, addr.desc_user_addr, addr.avail_user_addr,
657  addr.used_user_addr);
658  _IOCTL (fd, VHOST_SET_VRING_ADDR, &addr);
659 
660  file.fd = vring->call_fd;
661  virtio_log_debug (vif, "VHOST_SET_VRING_CALL fd %d index %u call_fd %d",
662  fd, file.index, file.fd);
663  _IOCTL (fd, VHOST_SET_VRING_CALL, &file);
664 
665  file.fd = vring->kick_fd;
666  virtio_log_debug (vif, "VHOST_SET_VRING_KICK fd %d index %u kick_fd %d",
667  fd, file.index, file.fd);
668  _IOCTL (fd, VHOST_SET_VRING_KICK, &file);
669 
670  file.fd = vif->tap_fds[qp % vif->num_rxqs];
671  virtio_log_debug (vif, "VHOST_NET_SET_BACKEND fd %d index %u tap_fd %d",
672  fd, file.index, file.fd);
673  _IOCTL (fd, VHOST_NET_SET_BACKEND, &file);
674  }
675 
676  if (vif->type == VIRTIO_IF_TYPE_TAP)
677  {
678  if (!args->mac_addr_set)
680 
681  clib_memcpy (vif->mac_addr, args->mac_addr.bytes, 6);
682  vif->host_bridge = format (0, "%s%c", args->host_bridge, 0);
683  }
684  vif->host_if_name = format (0, "%s%c", host_if_name, 0);
685  vif->net_ns = format (0, "%s%c", args->host_namespace, 0);
686  vif->host_mtu_size = args->host_mtu_size;
687  vif->tap_flags = args->tap_flags;
688  clib_memcpy (vif->host_mac_addr, args->host_mac_addr.bytes, 6);
691  if (args->host_ip4_prefix_len)
692  clib_memcpy (&vif->host_ip4_addr, &args->host_ip4_addr, 4);
693  if (args->host_ip6_prefix_len)
694  clib_memcpy (&vif->host_ip6_addr, &args->host_ip6_addr, 16);
695 
696  if (vif->type != VIRTIO_IF_TYPE_TUN)
697  {
698  args->error =
700  vif->dev_instance, vif->mac_addr,
701  &vif->hw_if_index,
703  if (args->error)
704  {
705  args->rv = VNET_API_ERROR_INVALID_REGISTRATION;
706  goto error;
707  }
708 
709  }
710  else
711  {
713  (vnm, virtio_device_class.index,
714  vif->dev_instance /* device instance */ ,
715  tun_device_hw_interface_class.index, vif->dev_instance);
716 
717  }
718  tm->tap_ids = clib_bitmap_set (tm->tap_ids, vif->id, 1);
719  sw = vnet_get_hw_sw_interface (vnm, vif->hw_if_index);
720  vif->sw_if_index = sw->sw_if_index;
721  args->sw_if_index = vif->sw_if_index;
722  args->rv = 0;
723  hw = vnet_get_hw_interface (vnm, vif->hw_if_index);
725  if (args->tap_flags & TAP_FLAG_GSO)
726  {
729  }
730  else if (args->tap_flags & TAP_FLAG_CSUM_OFFLOAD)
731  {
733  }
735  virtio_input_node.index);
736 
737  for (i = 0; i < vif->num_rxqs; i++)
738  {
742  virtio_vring_set_numa_node (vm, vif, RX_QUEUE (i));
743  }
744 
745  vif->per_interface_next_index = ~0;
746  vif->flags |= VIRTIO_IF_FLAG_ADMIN_UP;
749  vif->cxq_vring = NULL;
750 
751  goto done;
752 
753 error:
754  if (err)
755  {
756  ASSERT (args->error == 0);
757  args->error = err;
758  args->rv = VNET_API_ERROR_SYSCALL_ERROR_3;
759  }
760 
761  tap_log_err (vif, "%U", format_clib_error, args->error);
762  tap_free (vm, vif);
763 done:
764  if (vhost_mem)
765  clib_mem_free (vhost_mem);
766  if (old_netns_fd != -1)
767  close (old_netns_fd);
768  if (nfd != -1)
769  close (nfd);
770 }
771 
772 int
774 {
775  vnet_main_t *vnm = vnet_get_main ();
776  virtio_main_t *mm = &virtio_main;
777  int i;
778  virtio_if_t *vif;
780 
781  hw = vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index);
782  if (hw == NULL || virtio_device_class.index != hw->dev_class_index)
783  return VNET_API_ERROR_INVALID_SW_IF_INDEX;
784 
785  vif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
786 
787  if ((vif->type != VIRTIO_IF_TYPE_TAP) && (vif->type != VIRTIO_IF_TYPE_TUN))
788  return VNET_API_ERROR_INVALID_INTERFACE;
789 
790  /* bring down the interface */
793  for (i = 0; i < vif->num_rxqs; i++)
795 
796  if (vif->type == VIRTIO_IF_TYPE_TAP)
798  else /* VIRTIO_IF_TYPE_TUN */
800  vif->hw_if_index = ~0;
801 
802  tap_free (vm, vif);
803 
804  return 0;
805 }
806 
807 int
809  int enable_disable)
810 {
811  vnet_main_t *vnm = vnet_get_main ();
812  virtio_main_t *mm = &virtio_main;
813  virtio_if_t *vif;
815  clib_error_t *err = 0;
816  int i = 0;
817 
818  hw = vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index);
819 
820  if (hw == NULL || virtio_device_class.index != hw->dev_class_index)
821  return VNET_API_ERROR_INVALID_SW_IF_INDEX;
822 
823  vif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
824 
825  if (vif->type == VIRTIO_IF_TYPE_TUN)
826  return VNET_API_ERROR_UNIMPLEMENTED;
827 
828  const unsigned int csum_offload_on = TUN_F_CSUM;
829  const unsigned int csum_offload_off = 0;
830  unsigned int offload = enable_disable ? csum_offload_on : csum_offload_off;
831  vec_foreach_index (i, vif->tap_fds)
832  _IOCTL (vif->tap_fds[i], TUNSETOFFLOAD, offload);
833  vif->gso_enabled = 0;
834  vif->csum_offload_enabled = enable_disable ? 1 : 0;
835 
837  {
839  }
840 
841  if (enable_disable)
842  {
844  0)
845  {
847  }
848  }
849  else
850  {
852  0)
853  {
855  }
856  }
857 
858 error:
859  if (err)
860  {
861  clib_warning ("Error %s checksum offload on sw_if_index %d",
862  enable_disable ? "enabling" : "disabling", sw_if_index);
863  return VNET_API_ERROR_SYSCALL_ERROR_3;
864  }
865  return 0;
866 }
867 
868 int
870 {
871  vnet_main_t *vnm = vnet_get_main ();
872  virtio_main_t *mm = &virtio_main;
873  virtio_if_t *vif;
875  clib_error_t *err = 0;
876  int i = 0;
877 
878  hw = vnet_get_sup_hw_interface_api_visible_or_null (vnm, sw_if_index);
879 
880  if (hw == NULL || virtio_device_class.index != hw->dev_class_index)
881  return VNET_API_ERROR_INVALID_SW_IF_INDEX;
882 
883  vif = pool_elt_at_index (mm->interfaces, hw->dev_instance);
884 
885  if (vif->type == VIRTIO_IF_TYPE_TUN)
886  return VNET_API_ERROR_UNIMPLEMENTED;
887 
888  const unsigned int gso_on = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6;
889  const unsigned int gso_off = 0;
890  unsigned int offload = enable_disable ? gso_on : gso_off;
891  vec_foreach_index (i, vif->tap_fds)
892  _IOCTL (vif->tap_fds[i], TUNSETOFFLOAD, offload);
893  vif->gso_enabled = enable_disable ? 1 : 0;
894  vif->csum_offload_enabled = 0;
895  if (enable_disable)
896  {
898  {
901  }
902  }
903  else
904  {
906  {
909  }
910  }
911 
912 error:
913  if (err)
914  {
915  clib_warning ("Error %s gso on sw_if_index %d",
916  enable_disable ? "enabling" : "disabling", sw_if_index);
917  return VNET_API_ERROR_SYSCALL_ERROR_3;
918  }
919  return 0;
920 }
921 
922 int
924 {
925  vnet_main_t *vnm = vnet_get_main ();
926  virtio_main_t *mm = &virtio_main;
927  virtio_if_t *vif;
928  virtio_vring_t *vring;
930  tap_interface_details_t *r_tapids = NULL;
931  tap_interface_details_t *tapid = NULL;
932 
933  /* *INDENT-OFF* */
934  pool_foreach (vif, mm->interfaces,
935  if ((vif->type != VIRTIO_IF_TYPE_TAP)
936  && (vif->type != VIRTIO_IF_TYPE_TUN))
937  continue;
938  vec_add2(r_tapids, tapid, 1);
939  clib_memset (tapid, 0, sizeof (*tapid));
940  tapid->id = vif->id;
941  tapid->sw_if_index = vif->sw_if_index;
942  hi = vnet_get_hw_interface (vnm, vif->hw_if_index);
943  clib_memcpy(tapid->dev_name, hi->name,
944  MIN (ARRAY_LEN (tapid->dev_name) - 1,
945  strlen ((const char *) hi->name)));
946  vring = vec_elt_at_index (vif->rxq_vrings, RX_QUEUE_ACCESS(0));
947  tapid->rx_ring_sz = vring->size;
948  vring = vec_elt_at_index (vif->txq_vrings, TX_QUEUE_ACCESS(0));
949  tapid->tx_ring_sz = vring->size;
950  tapid->tap_flags = vif->tap_flags;
951  clib_memcpy(&tapid->host_mac_addr, vif->host_mac_addr, 6);
952  if (vif->host_if_name)
953  {
955  MIN (ARRAY_LEN (tapid->host_if_name) - 1,
956  strlen ((const char *) vif->host_if_name)));
957  }
958  if (vif->net_ns)
959  {
960  clib_memcpy(tapid->host_namespace, vif->net_ns,
961  MIN (ARRAY_LEN (tapid->host_namespace) - 1,
962  strlen ((const char *) vif->net_ns)));
963  }
964  if (vif->host_bridge)
965  {
966  clib_memcpy(tapid->host_bridge, vif->host_bridge,
967  MIN (ARRAY_LEN (tapid->host_bridge) - 1,
968  strlen ((const char *) vif->host_bridge)));
969  }
970  if (vif->host_ip4_prefix_len)
971  clib_memcpy(tapid->host_ip4_addr.as_u8, &vif->host_ip4_addr, 4);
973  if (vif->host_ip6_prefix_len)
974  clib_memcpy(tapid->host_ip6_addr.as_u8, &vif->host_ip6_addr, 16);
976  tapid->host_mtu_size = vif->host_mtu_size;
977  );
978  /* *INDENT-ON* */
979 
980  *out_tapids = r_tapids;
981 
982  return 0;
983 }
984 
985 static clib_error_t *
987 {
988  tap_main_t *tm = &tap_main;
989 
991  {
992  if (unformat (input, "host-mtu %d", &tm->host_mtu_size))
993  ;
994  else
995  return clib_error_return (0, "unknown input `%U'",
996  format_unformat_error, input);
997  }
998 
999  return 0;
1000 }
1001 
1002 /* tap { host-mtu <size> } configuration. */
1004 
1005 static clib_error_t *
1007 {
1008  tap_main_t *tm = &tap_main;
1009  clib_error_t *error = 0;
1010 
1011  tm->log_default = vlib_log_register_class ("tap", 0);
1012  vlib_log_debug (tm->log_default, "initialized");
1013 
1014  tm->host_mtu_size = 0;
1015 
1016  return error;
1017 }
1018 
1020 
1021 /*
1022  * fd.io coding-style-patch-verification: ON
1023  *
1024  * Local Variables:
1025  * eval: (c-set-style "gnu")
1026  * End:
1027  */
u32 per_interface_next_index
Definition: virtio.h:156
vlib_log_class_t vlib_log_register_class(char *class, char *subclass)
Definition: log.c:209
struct vring_used * used
Definition: virtio.h:110
vlib_node_registration_t virtio_input_node
(constructor) VLIB_REGISTER_NODE (virtio_input_node)
Definition: node.c:442
#define vec_foreach_index(var, v)
Iterate over vector indices.
uword * tap_ids
Definition: tap.h:95
u8 * format_clib_error(u8 *s, va_list *va)
Definition: error.c:191
int host_mtu_size
Definition: tap.h:98
void virtio_set_net_hdr_size(virtio_if_t *vif)
Definition: virtio.c:240
virtio_if_t * interfaces
Definition: virtio.h:195
#define clib_min(x, y)
Definition: clib.h:319
static u32 virtio_eth_flag_change(vnet_main_t *vnm, vnet_hw_interface_t *hi, u32 flags)
Definition: tap.c:69
static void tap_free(vlib_main_t *vm, virtio_if_t *vif)
Definition: tap.c:98
VNET_HW_INTERFACE_CLASS(tun_device_hw_interface_class, static)
vlib_log_class_t log_default
Definition: tap.h:92
ip4_address_t host_ip4_addr
Definition: virtio.h:178
u8 host_if_name[64]
Definition: tap.h:79
u8 host_namespace[64]
Definition: tap.h:80
ip4_address_t host_ip4_addr
Definition: tap.h:53
void ethernet_delete_interface(vnet_main_t *vnm, u32 hw_if_index)
Definition: interface.c:378
vnet_main_t * vnet_get_main(void)
Definition: misc.c:46
#define pool_get_zero(P, E)
Allocate an object E from a pool P and zero it.
Definition: pool.h:255
int tap_gso_enable_disable(vlib_main_t *vm, u32 sw_if_index, int enable_disable)
Definition: tap.c:869
vlib_physmem_main_t physmem_main
Definition: main.h:155
int gso_enabled
Definition: virtio.h:183
u32 host_mtu_size
Definition: virtio.h:182
u32 dev_instance
Definition: virtio.h:145
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
static vnet_hw_interface_t * vnet_get_hw_interface(vnet_main_t *vnm, u32 hw_if_index)
u8 host_ip6_prefix_len
Definition: tap.h:58
vnet_device_class_t virtio_device_class
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
Definition: vec.h:590
#define vec_add2(V, P, N)
Add N elements to end of vector V, return pointer to new elements in P.
Definition: vec.h:628
void virtio_vring_set_numa_node(vlib_main_t *vm, virtio_if_t *vif, u32 idx)
Definition: virtio.c:224
static uword * clib_bitmap_set(uword *ai, uword i, uword value)
Sets the ith bit of a bitmap to new_value Removes trailing zeros from the bitmap. ...
Definition: bitmap.h:167
u8 * format(u8 *s, const char *fmt,...)
Definition: format.c:424
#define tap_log_dbg(dev, f,...)
Definition: tap.c:49
int tap_csum_offload_enable_disable(vlib_main_t *vm, u32 sw_if_index, int enable_disable)
Definition: tap.c:808
int tap_dump_ifs(tap_interface_details_t **out_tapids)
Definition: tap.c:923
u8 host_ip4_gw_set
Definition: tap.h:56
unsigned char u8
Definition: types.h:56
clib_error_t * virtio_vring_free_tx(vlib_main_t *vm, virtio_if_t *vif, u32 idx)
Definition: virtio.c:201
static vnet_sw_interface_t * vnet_get_hw_sw_interface(vnet_main_t *vnm, u32 hw_if_index)
#define clib_memcpy(d, s, n)
Definition: string.h:180
u8 host_bridge[64]
Definition: tap.h:81
struct vring_avail * avail
Definition: virtio.h:111
u64 features
Definition: virtio.h:163
u32 hw_if_index
Definition: virtio.h:146
u8 host_ip6_prefix_len
Definition: virtio.h:181
#define pool_foreach(VAR, POOL, BODY)
Iterate through pool.
Definition: pool.h:513
vl_api_interface_index_t sw_if_index
Definition: gre.api:53
#define TX_QUEUE_ACCESS(X)
Definition: virtio.h:79
#define VLIB_INIT_FUNCTION(x)
Definition: init.h:173
u8 * host_bridge
Definition: virtio.h:176
int ifindex
Definition: virtio.h:186
vnet_hw_interface_flags_t flags
Definition: interface.h:526
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
int tap_delete_if(vlib_main_t *vm, u32 sw_if_index)
Definition: tap.c:773
#define clib_error_return(e, args...)
Definition: error.h:99
unsigned int u32
Definition: types.h:88
u32 vnet_register_interface(vnet_main_t *vnm, u32 dev_class_index, u32 dev_instance, u32 hw_class_index, u32 hw_instance)
Definition: interface.c:768
#define vlib_log_debug(...)
Definition: log.h:109
#define MIN(x, y)
Definition: node.h:31
u32 id
Definition: virtio.h:153
u16 queue_id
Definition: virtio.h:119
u16 num_txqs
Definition: virtio.h:170
#define TX_QUEUE(X)
Definition: virtio.h:77
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
Definition: pool.h:534
int * vhost_fds
Definition: virtio.h:157
mac_address_t host_mac_addr
Definition: tap.h:78
struct _unformat_input_t unformat_input_t
unsigned short u16
Definition: types.h:57
#define clib_error_return_unix(e, args...)
Definition: error.h:102
#define pool_put(P, E)
Free an object E in pool P.
Definition: pool.h:302
#define VLIB_CONFIG_FUNCTION(x, n,...)
Definition: init.h:182
ip4_address_t host_ip4_gw
Definition: tap.h:55
ip6_address_t host_ip6_addr
Definition: tap.h:57
u8 host_ip4_prefix_len
Definition: tap.h:54
vlib_main_t * vm
Definition: in2out_ed.c:1599
#define virtio_log_debug(vif, f,...)
Definition: virtio.h:236
ip4_address_t host_ip4_addr
Definition: tap.h:82
u16 num_rxqs
Definition: virtio.h:169
virtio_vring_t * rxq_vrings
Definition: virtio.h:161
#define UNFORMAT_END_OF_INPUT
Definition: format.h:145
u32 flags
Definition: vhost_user.h:248
#define TAP_MAX_INSTANCE
Definition: tap.c:95
sll srl srl sll sra u16x4 i
Definition: vector_sse42.h:317
static int open_netns_fd(char *netns)
Definition: tap.c:78
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:380
u32 host_mtu_size
Definition: tap.h:62
ip6_address_t host_ip6_addr
Definition: virtio.h:180
static vnet_hw_interface_t * vnet_get_sup_hw_interface_api_visible_or_null(vnet_main_t *vnm, u32 sw_if_index)
#define clib_warning(format, args...)
Definition: error.h:59
u8 * net_ns
Definition: virtio.h:175
u8 mac_addr[6]
Definition: virtio.h:173
u32 flags
Definition: virtio.h:143
#define ARRAY_LEN(x)
Definition: clib.h:66
static uword clib_bitmap_get(uword *ai, uword i)
Gets the ith bit value from a bitmap.
Definition: bitmap.h:197
clib_error_t * error
Definition: virtio.h:166
u32 tap_flags
Definition: virtio.h:185
u8 * host_bridge
Definition: tap.h:52
virtio_if_type_t type
Definition: virtio.h:150
#define ASSERT(truth)
void vnet_hw_interface_assign_rx_thread(vnet_main_t *vnm, u32 hw_if_index, u16 queue_id, uword thread_index)
Definition: devices.c:139
u64 remote_features
Definition: virtio.h:163
ip6_address_t host_ip6_gw
Definition: tap.h:59
#define VIRTIO_FEATURE(X)
Definition: virtio.h:75
clib_error_t * virtio_vring_init(vlib_main_t *vm, virtio_if_t *vif, u16 idx, u16 sz)
Definition: virtio.c:65
u8 * host_if_name
Definition: tap.h:50
u8 host_mac_addr[6]
Definition: virtio.h:177
static void clib_mem_free(void *p)
Definition: mem.h:215
unsigned int if_nametoindex(const char *ifname)
static void * clib_mem_alloc(uword size)
Definition: mem.h:157
static uword pointer_to_uword(const void *p)
Definition: types.h:131
#define clib_max(x, y)
Definition: clib.h:312
virtio_main_t virtio_main
Definition: virtio.c:37
vl_api_ip4_address_t hi
Definition: arp.api:37
void vnet_delete_hw_interface(vnet_main_t *vnm, u32 hw_if_index)
Definition: interface.c:980
u8 host_ip6_gw_set
Definition: tap.h:60
void tap_create_if(vlib_main_t *vm, tap_create_if_args_t *args)
Definition: tap.c:133
#define RX_QUEUE_ACCESS(X)
Definition: virtio.h:80
tap_main_t tap_main
Definition: tap.c:45
int csum_offload_enabled
Definition: virtio.h:184
#define tap_log_err(dev, f,...)
Definition: tap.c:47
static void ethernet_mac_address_generate(u8 *mac)
Definition: mac_address.h:74
clib_error_t * ethernet_register_interface(vnet_main_t *vnm, u32 dev_class_index, u32 dev_instance, const u8 *address, u32 *hw_if_index_return, ethernet_flag_change_function_t flag_change)
Definition: interface.c:331
clib_error_t * vnet_hw_interface_set_flags(vnet_main_t *vnm, u32 hw_if_index, vnet_hw_interface_flags_t flags)
Definition: interface.c:498
Definition: tap.h:89
mac_address_t host_mac_addr
Definition: tap.h:51
a point 2 point interface
Definition: interface.h:375
mac_address_t mac_addr
Definition: tap.h:44
#define clib_error_free(e)
Definition: error.h:86
clib_error_t * error
Definition: tap.h:66
ip6_address_t host_ip6_addr
Definition: tap.h:84
u8 host_ip4_prefix_len
Definition: virtio.h:179
int vnet_hw_interface_unassign_rx_thread(vnet_main_t *vnm, u32 hw_if_index, u16 queue_id)
Definition: devices.c:188
u32 sw_if_index
Definition: virtio.h:147
virtio_vring_t * cxq_vring
Definition: virtio.h:187
u8 * format_unformat_error(u8 *s, va_list *va)
Definition: unformat.c:91
static int ethernet_mac_address_is_zero(const u8 *mac)
Definition: mac_address.h:68
clib_error_t * virtio_vring_free_rx(vlib_main_t *vm, virtio_if_t *vif, u32 idx)
Definition: virtio.c:153
TAP interface details struct.
Definition: tap.h:70
clib_error_t * vnet_sw_interface_set_flags(vnet_main_t *vnm, u32 sw_if_index, vnet_sw_interface_flags_t flags)
Definition: interface.c:507
static vlib_thread_main_t * vlib_get_thread_main()
Definition: global_funcs.h:32
static clib_error_t * tap_mtu_config(vlib_main_t *vm, unformat_input_t *input)
Definition: tap.c:986
struct vring_desc * desc
Definition: virtio.h:109
int vnet_hw_interface_set_rx_mode(vnet_main_t *vnm, u32 hw_if_index, u16 queue_id, vnet_hw_interface_rx_mode mode)
Definition: devices.c:253
u8 * host_namespace
Definition: tap.h:49
static uword clib_bitmap_first_clear(uword *ai)
Return the lowest numbered clear bit in a bitmap.
Definition: bitmap.h:445
#define RX_QUEUE(X)
Definition: virtio.h:78
int * tap_fds
Definition: virtio.h:158
static void vnet_hw_interface_set_input_node(vnet_main_t *vnm, u32 hw_if_index, u32 node_index)
Definition: devices.h:79
uword unformat(unformat_input_t *i, const char *fmt,...)
Definition: unformat.c:978
static uword unformat_check_input(unformat_input_t *i)
Definition: format.h:171
virtio_vring_t * txq_vrings
Definition: virtio.h:162
static clib_error_t * tap_init(vlib_main_t *vm)
Definition: tap.c:1006
u8 * host_if_name
Definition: virtio.h:174