31 #include <sys/mount.h> 42 &rte_pktmbuf_pool_init,
45 #define LINK_STATE_ELOGS 0 47 #define DEFAULT_HUGE_DIR "/run/vpp/hugepages" 48 #define VPP_RUN_DIR "/run/vpp" 61 .mq_mode = ETH_MQ_TX_NONE,
156 if (xd->
pmd == VNET_DPDK_PMD_VICE ||
157 xd->
pmd == VNET_DPDK_PMD_ENIC)
159 struct rte_eth_dev_info dev_info;
168 "Cisco VIC mtu can only be changed " 169 "using CIMC then rebooting the server!");
180 rv = rte_eth_dev_configure
188 "rte_eth_dev_configure[%d]: err %d",
201 extern int rte_netmap_probe(
void);
246 u8 af_packet_port_id = 0;
255 if (!tr || tr->
count == 0)
266 if (tr && tr->
count > 0)
279 if(rte_netmap_probe() < 0)
283 nports = rte_eth_dev_count();
290 clib_warning (
"DPDK drivers found %d ports...", nports);
300 for (i = 0; i < nports; i++)
304 struct rte_eth_dev_info dev_info;
306 struct rte_eth_link l;
313 rte_eth_dev_info_get(i, &dev_info);
316 sizeof(
struct rte_eth_txconf));
319 xd->
tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
324 xd->
tx_conf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS;
338 xd->
port_conf.rxmode.mq_mode = ETH_MQ_RX_RSS;
339 xd->
port_conf.rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP | ETH_RSS_UDP | ETH_RSS_TCP;
347 if (!dev_info.driver_name)
348 dev_info.driver_name = dev_info.pci_dev->driver->name;
349 ASSERT(dev_info.driver_name);
354 #define _(s,f) else if (!strcmp(dev_info.driver_name, s)) \ 355 xd->pmd = VNET_DPDK_PMD_##f; 366 case VNET_DPDK_PMD_E1000EM:
367 case VNET_DPDK_PMD_IGB:
368 case VNET_DPDK_PMD_IGBVF:
373 case VNET_DPDK_PMD_IXGBE:
374 case VNET_DPDK_PMD_IXGBEVF:
375 case VNET_DPDK_PMD_THUNDERX:
382 case VNET_DPDK_PMD_VICE:
383 case VNET_DPDK_PMD_ENIC:
384 rte_eth_link_get_nowait(i, &l);
386 if (l.link_speed == 40000)
399 case VNET_DPDK_PMD_I40E:
400 case VNET_DPDK_PMD_I40EVF:
405 switch (dev_info.pci_dev->id.device_id) {
406 case I40E_DEV_ID_10G_BASE_T:
407 case I40E_DEV_ID_SFP_XL710:
410 case I40E_DEV_ID_QSFP_A:
411 case I40E_DEV_ID_QSFP_B:
412 case I40E_DEV_ID_QSFP_C:
416 rte_eth_link_get_nowait(i, &l);
425 case VNET_DPDK_PMD_CXGBE:
426 switch (dev_info.pci_dev->id.device_id) {
440 case VNET_DPDK_PMD_FM10K:
447 case VNET_DPDK_PMD_VIRTIO:
454 case VNET_DPDK_PMD_VMXNET3:
456 xd->
tx_conf.txq_flags |= ETH_TXQ_FLAGS_NOMULTSEGS;
459 case VNET_DPDK_PMD_AF_PACKET:
464 case VNET_DPDK_PMD_BOND:
473 if(strncmp(dev_info.driver_name,
"vale", 4) == 0
474 || strncmp(dev_info.driver_name,
"netmap", 6) == 0)
476 xd->
pmd = VNET_DPDK_PMD_NETMAP;
477 xd->
port_type = VNET_DPDK_PORT_TYPE_NETMAP;
494 xd->
port_conf.rxmode.max_rx_pkt_len = dev_info.max_rx_pktlen;
512 xd->
port_conf.rxmode.max_rx_pkt_len += 4;
516 #if RTE_VERSION < RTE_VERSION_NUM(16, 4, 0, 0) 520 if (xd->
pmd == VNET_DPDK_PMD_VMXNET3)
522 xd->
port_conf.rxmode.max_rx_pkt_len = 1518;
527 if (xd->
pmd == VNET_DPDK_PMD_AF_PACKET)
531 rnd = (
u32) (now * 1e6);
538 rte_eth_macaddr_get(i,(
struct ether_addr *)addr);
623 if (xd->
pmd == VNET_DPDK_PMD_VICE ||
624 xd->
pmd == VNET_DPDK_PMD_ENIC)
635 vlan_off = rte_eth_dev_get_vlan_offload(xd->
device_index);
636 vlan_off |= ETH_VLAN_STRIP_OFFLOAD;
637 rte_eth_dev_set_vlan_offload(xd->
device_index, vlan_off);
640 #if RTE_VERSION < RTE_VERSION_NUM(16, 4, 0, 0) 644 else if (xd->
pmd == VNET_DPDK_PMD_VMXNET3)
654 #ifdef RTE_LIBRTE_KNI 658 for (i = 0; i < dm->
num_kni; i++)
707 rnd = (
u32) (now * 1e6);
734 clib_warning (
"%d mbufs allocated but total rx/tx ring size is %d\n",
753 c = &d->config0.header;
754 vec_reset_length (pci_addr);
755 pci_addr = format (pci_addr,
"%U%c", format_vlib_pci_addr, &d->bus_address, 0);
757 if (c->device_class != PCI_CLASS_NETWORK_ETHERNET)
761 if (dm->eth_if_whitelist &&
762 !strstr ((char *) dm->eth_if_whitelist, (char *) pci_addr))
766 if (c->vendor_id == 0x1af4 && c->device_id == 0x1000)
769 else if (c->vendor_id == 0x15ad && c->device_id == 0x07b0)
772 else if (c->vendor_id == 0x8086)
775 else if (c->vendor_id == 0x1137 && c->device_id == 0x0043)
778 else if (c->vendor_id == 0x1425 && (c->device_id & 0xe000) == 0x4000)
782 clib_warning (
"Unsupported Ethernet PCI device 0x%04x:0x%04x found " 783 "at PCI address %s\n", (u16) c->vendor_id, (u16) c->device_id,
809 u8 * rte_cmd = 0, * ethname = 0;
814 int rxrings, txrings, rxslots, txslots, txburst;
837 else if (
unformat (input,
"enable-tcp-udp-checksum"))
843 else if (
unformat (input,
"decimal-interface-names"))
846 else if (
unformat (input,
"no-multi-seg"))
849 else if (
unformat (input,
"dev %s", &pci_dev_id))
868 else if (
unformat(input,
"netmap %s/%d:%d/%d:%d/%d",
869 &nmname, &rxrings, &rxslots, &txrings, &txslots, &txburst)) {
872 eth_nm_args(nmname, rxrings, rxslots, txrings, txslots, txburst);
877 }
else if (
unformat(input,
"netmap %s", &nmname)) {
880 eth_nm_args(nmname, 0, 0, 0, 0, 0);
896 else if (
unformat (input,
"socket-mem %s", &socket_mem))
902 else if (
unformat (input,
"enable-vhost-user"))
908 else if (unformat(input, #a)) \ 910 if (!strncmp(#a, "no-pci", 6)) \ 912 tmp = format (0, "--%s%c", #a, 0); \ 913 vec_add1 (dm->eal_init_args, tmp); \ 919 else if (unformat(input, #a " %s", &s)) \ 921 if (!strncmp(#a, "huge-dir", 8)) \ 923 else if (!strncmp(#a, "file-prefix", 11)) \ 925 tmp = format (0, "--%s%c", #a, 0); \ 926 vec_add1 (dm->eal_init_args, tmp); \ 928 vec_add1 (dm->eal_init_args, s); \ 934 else if (unformat(input, #a " %s", &s)) \ 936 tmp = format (0, "-%s%c", #b, 0); \ 937 vec_add1 (dm->eal_init_args, tmp); \ 939 vec_add1 (dm->eal_init_args, s); \ 945 else if (unformat(input, #a " %s", &s)) \ 947 tmp = format (0, "-%s%c", #b, 0); \ 948 vec_add1 (dm->eal_init_args, tmp); \ 950 vec_add1 (dm->eal_init_args, s); \ 951 dm->a##_set_manually = 1; \ 956 else if (
unformat(input,
"default"))
973 if (!no_huge && !huge_dir)
975 u32 x, * mem_by_socket = 0;
1013 vec_validate(mem_by_socket, c);
1014 mem_by_socket[c] = 512;
1022 u32 pages_avail, page_size, mem;
1024 char * path =
"/sys/devices/system/node/node%u/hugepages/" 1025 "hugepages-%ukB/free_hugepages%c";
1027 vec_validate(mem_by_socket, c);
1028 mem = mem_by_socket[c];
1032 s = format (s, path, c, page_size * 1024, 0);
1033 read_sys_fs ((char *) s,
"%u", &pages_avail);
1034 vec_reset_length (s);
1036 if (page_size * pages_avail < mem)
1041 s = format (s, path, c, page_size * 1024, 0);
1042 read_sys_fs ((char *) s,
"%u", &pages_avail);
1043 vec_reset_length (s);
1045 if (page_size * pages_avail < mem)
1050 _vec_len (mem_by_socket) = c + 1;
1054 socket_mem =
format (socket_mem,
"%s%u",
1055 socket_mem ?
"," :
"",
1057 socket_mem =
format (socket_mem,
"%c", 0);
1062 if (rv && errno != EEXIST)
1070 if (rv && errno != EEXIST)
1077 if (use_1g && !(less_than_1g && use_2m))
1096 tmp =
format (0,
"--huge-dir%c", 0);
1102 tmp =
format (0,
"--file-prefix%c", 0);
1104 tmp =
format (0,
"vpp%c", 0);
1119 uword * coremask = 0;
1128 coremask = clib_bitmap_or(coremask, tr->
coremask);
1146 if (no_pci == 0 && geteuid() == 0)
1172 tmp =
format (0, fmt, 0);
1179 tmp =
format (0,
"--master-lcore%c", 0);
1185 tmp =
format (0,
"--socket-mem%c", 0);
1187 tmp =
format (0,
"%s%c", socket_mem, 0);
1196 log_level = (CLIB_DEBUG > 0) ? RTE_LOG_DEBUG : RTE_LOG_NOTICE;
1198 rte_set_log_level (log_level);
1216 fprintf(stdout,
"DPDK physical memory layout:\n");
1217 rte_dump_physmem_layout(stdout);
1224 for (i = 0; i < RTE_MAX_LCORE; i++)
1227 rte_lcore_to_socket_id(i));
1246 struct rte_eth_link prev_link = xd->
link;
1248 u8 hw_flags_chg = 0;
1255 memset(&xd->
link, 0,
sizeof(xd->
link));
1263 "update-link-state: sw_if_index %d, admin_up %d," 1264 "old link_state %d new link_state %d",
1265 .format_args =
"i4i1i1i1",
1268 struct {
u32 sw_if_index;
u8 admin_up;
1269 u8 old_link_state;
u8 new_link_state;} *ed;
1273 ed->old_link_state = (
u8)
1275 ed->new_link_state = (
u8) xd->
link.link_status;
1279 ((xd->
link.link_status != 0) ^
1283 hw_flags |= (xd->
link.link_status ?
1287 if (hw_flags_chg || (xd->
link.link_duplex != prev_link.link_duplex))
1290 switch (xd->
link.link_duplex)
1292 case ETH_LINK_HALF_DUPLEX:
1295 case ETH_LINK_FULL_DUPLEX:
1302 #if RTE_VERSION >= RTE_VERSION_NUM(16, 4, 0, 0) 1303 if (hw_flags_chg || (xd->
link.link_speed != prev_link.link_speed))
1306 switch (xd->
link.link_speed)
1308 case ETH_SPEED_NUM_10M:
1311 case ETH_SPEED_NUM_100M:
1314 case ETH_SPEED_NUM_1G:
1317 case ETH_SPEED_NUM_10G:
1320 case ETH_SPEED_NUM_40G:
1331 if (hw_flags_chg || (xd->
link.link_speed != prev_link.link_speed))
1334 switch (xd->
link.link_speed)
1336 case ETH_LINK_SPEED_10:
1339 case ETH_LINK_SPEED_100:
1342 case ETH_LINK_SPEED_1000:
1345 case ETH_LINK_SPEED_10000:
1348 case ETH_LINK_SPEED_40G:
1366 .format =
"update-link-state: sw_if_index %d, new flags %d",
1367 .format_args =
"i4i4",
1373 ed->flags = hw_flags;
1404 VLIB_NODE_STATE_POLLING);
1407 VLIB_NODE_STATE_POLLING);
1412 VLIB_NODE_STATE_POLLING);
1434 int nports = rte_eth_dev_count();
1436 for (i = 0; i < nports; i++) {
1437 struct rte_eth_dev_info dev_info;
1438 rte_eth_dev_info_get(i, &dev_info);
1439 if (!dev_info.driver_name)
1440 dev_info.driver_name = dev_info.pci_dev->driver->name;
1441 ASSERT(dev_info.driver_name);
1442 if (strncmp(dev_info.driver_name,
"rte_bond_pmd", 12) == 0) {
1445 int nlink = rte_eth_bond_slaves_get(i, slink, 16);
1450 rte_eth_macaddr_get(slink[0], (
struct ether_addr *)addr);
1452 rte_eth_bond_mac_address_set(i, (
struct ether_addr *)addr);
1463 while (nlink >= 1) {
1464 int slave = slink[--nlink];
1469 if (nlink) rte_eth_dev_mac_addr_add(
1470 slave, (
struct ether_addr *)addr, 0);
1528 .name =
"dpdk-process",
1529 .process_log2_n_stack_bytes = 17,
1535 return (VNET_API_ERROR_INVALID_VALUE);
1545 return (VNET_API_ERROR_INVALID_VALUE);
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
#define DPDK_NB_TX_DESC_10GE
f64 time_last_link_update
static u8 * format_bitmap_hex(u8 *s, va_list *args)
#define vec_foreach_index(var, v)
Iterate over vector indices.
#define hash_set(h, key, value)
always_inline vlib_thread_main_t * vlib_get_thread_main()
sll srl srl sll sra u16x4 i
vlib_node_registration_t dpdk_io_input_node
(constructor) VLIB_REGISTER_NODE (dpdk_io_input_node)
static u8 * format_vlib_pci_addr(u8 *s, va_list *va)
#define VNET_HW_INTERFACE_FLAG_SPEED_1G
clib_error_t * vnet_hw_interface_set_flags(vnet_main_t *vnm, u32 hw_if_index, u32 flags)
static clib_error_t * dpdk_lib_init(dpdk_main_t *dm)
vnet_device_class_t dpdk_device_class
static void dpdk_bind_devices_to_uio(dpdk_main_t *dm)
static u32 dpdk_flag_change(vnet_main_t *vnm, vnet_hw_interface_t *hi, u32 flags)
always_inline void clib_mem_free(void *p)
u32 vhost_coalesce_frames
#define vec_add2_aligned(V, P, N, A)
Add N elements to end of vector V, return pointer to new elements in P.
uword dpdk_input_rss(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *f)
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
#define DPDK_EFD_DISABLED
#define DPDK_NB_RX_DESC_VIRTIO
#define vec_add2(V, P, N)
Add N elements to end of vector V, return pointer to new elements in P.
vlib_buffer_main_t * buffer_main
u32 per_interface_next_index
u8 nchannels_set_manually
static clib_error_t * dpdk_config(vlib_main_t *vm, unformat_input_t *input)
#define DPDK_NB_TX_DESC_DEFAULT
#define clib_error_report(e)
#define foreach_eal_double_hyphen_predicate_arg
#define VNET_HW_INTERFACE_FLAG_LINK_UP
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
always_inline vlib_main_t * vlib_get_main(void)
#define vec_reset_length(v)
Reset vector length to zero NULL-pointer tolerant.
clib_error_t * vlib_buffer_pool_create(vlib_main_t *vm, unsigned num_mbufs, unsigned socket_id)
vnet_main_t * vnet_get_main(void)
struct rte_mbuf *** tx_vectors
#define pool_foreach(VAR, POOL, BODY)
vlib_node_function_t * function
#define VLIB_INIT_FUNCTION(x)
vlib_node_registration_t dpdk_input_node
(constructor) VLIB_REGISTER_NODE (dpdk_input_node)
#define IP_BUFFER_L4_CHECKSUM_CORRECT
vlib_pci_addr_t bus_address
int input_cpu_first_index
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
clib_error_t * vlib_pci_bind_to_uio(vlib_pci_device_t *d, char *uio_driver_name)
always_inline void vlib_node_set_state(vlib_main_t *vm, u32 node_index, vlib_node_state_t new_state)
void dpdk_vhost_user_process_cleanup(void *ctx)
#define clib_warning(format, args...)
#define vlib_call_init_function(vm, x)
always_inline u32 random_u32(u32 *seed)
32-bit random number generator
#define DPDK_NB_TX_DESC_VIRTIO
struct rte_eth_conf port_conf
#define DPDK_NB_TX_DESC_40GE
f64 time_last_stats_update
u16 consec_full_frames_hi_thresh
always_inline void * clib_mem_alloc_aligned(uword size, uword align)
ethernet_main_t ethernet_main
static struct rte_eth_conf port_conf_template
clib_error_t * dpdk_init(vlib_main_t *vm)
struct rte_eth_txconf tx_conf
#define clib_bitmap_foreach(i, ai, body)
#define pool_elt_at_index(p, i)
#define vec_insert(V, N, M)
Insert N vector elements starting at element M, initialize new elements to zero (no header...
#define foreach_eal_double_hyphen_arg
#define VLIB_BUFFER_DEFAULT_FREE_LIST_BYTES
dpdk_device_and_queue_t ** devices_by_cpu
#define ETHERNET_INTERFACE_FLAG_CONFIG_MTU(flags)
#define VNET_HW_INTERFACE_FLAG_SPEED_10M
vlib_pci_device_t * pci_devs
#define VNET_SW_INTERFACE_FLAG_BOND_SLAVE
#define foreach_eal_single_hyphen_mandatory_arg
int dpdk_set_link_state_poll_interval(f64 interval)
always_inline uword vnet_hw_interface_is_link_up(vnet_main_t *vnm, u32 hw_if_index)
#define VNET_HW_INTERFACE_FLAG_HALF_DUPLEX
uword os_get_cpu_number(void)
vlib_node_registration_t dpdk_process_node
(constructor) VLIB_REGISTER_NODE (dpdk_process_node)
always_inline f64 vlib_process_wait_for_event_or_clock(vlib_main_t *vm, f64 dt)
dpdk_port_type_t port_type
static uword dpdk_process(vlib_main_t *vm, vlib_node_runtime_t *rt, vlib_frame_t *f)
#define VLIB_CONFIG_FUNCTION(x, n,...)
always_inline uword * clib_bitmap_set(uword *ai, uword i, uword value)
#define DPDK_MIN_STATS_POLL_INTERVAL
void * vlib_weakly_linked_functions[]
u32 main_thread_is_io_node
void vlib_cli_output(vlib_main_t *vm, char *fmt,...)
#define VNET_HW_INTERFACE_BOND_INFO_SLAVE
#define foreach_eal_single_hyphen_arg
int dpdk_set_stat_poll_interval(f64 interval)
#define DPDK_NB_RX_DESC_DEFAULT
linux_pci_main_t linux_pci_main
static void dpdk_update_counters(dpdk_device_t *xd, f64 now)
u16 * cpu_socket_id_by_queue
#define DPDK_EFD_DEFAULT_DEVICE_QUEUE_HI_THRESH_PCT
#define vec_free(V)
Free vector's memory (no header).
struct rte_mbuf *** rx_vectors
void dpdk_device_lock_free(dpdk_device_t *xd)
#define clib_memcpy(a, b, c)
#define DPDK_NB_RX_DESC_ENIC
#define VLIB_BUFFER_TOTAL_LENGTH_VALID
#define ETHERNET_INTERFACE_FLAG_ACCEPT_ALL
#define ELOG_TYPE_DECLARE(f)
always_inline vnet_hw_interface_t * vnet_get_hw_interface(vnet_main_t *vnm, u32 hw_if_index)
clib_error_t * dpdk_port_setup(dpdk_main_t *dm, dpdk_device_t *xd)
#define VNET_HW_INTERFACE_FLAG_SPEED_10G
#define vec_validate_ha(V, I, H, A)
Make sure vector is long enough for given index (general version).
void dpdk_update_link_state(dpdk_device_t *xd, f64 now)
vlib_worker_thread_t * vlib_worker_threads
#define hash_create(elts, value_bytes)
#define VNET_HW_INTERFACE_FLAG_FULL_DUPLEX
u32 max_l3_packet_bytes[VLIB_N_RX_TX]
#define DPDK_TX_RING_SIZE
void dpdk_vhost_user_process_init(void **ctx)
u32 vlib_buffer_get_or_create_free_list(vlib_main_t *vm, u32 n_data_bytes, char *fmt,...)
#define DPDK_EFD_DEFAULT_CONSEC_FULL_FRAMES_HI_THRESH
clib_error_t * ethernet_register_interface(vnet_main_t *vnm, u32 dev_class_index, u32 dev_instance, u8 *address, u32 *hw_if_index_return, ethernet_flag_change_function_t flag_change)
#define clib_bitmap_free(v)
#define DPDK_LINK_POLL_INTERVAL
uword * thread_registrations_by_name
#define DPDK_NB_RX_DESC_10GE
#define IP_BUFFER_L4_CHECKSUM_COMPUTED
vlib_node_t * vlib_get_node_by_name(vlib_main_t *vm, u8 *name)
#define DPDK_STATS_POLL_INTERVAL
#define VNET_HW_INTERFACE_FLAG_SPEED_100M
u8 interface_name_format_decimal
u32 ethernet_input_node_index
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
dpdk_device_type_t dev_type
always_inline vnet_sw_interface_t * vnet_get_sw_interface(vnet_main_t *vnm, u32 sw_if_index)
void dpdk_device_lock_init(dpdk_device_t *xd)
#define DPDK_MIN_LINK_POLL_INTERVAL
#define hash_get_mem(h, key)
u32 buffer_flags_template
#define VNET_HW_INTERFACE_FLAG_SPEED_40G
u32 vlib_buffer_free_list_index
#define VLIB_REGISTER_NODE(x,...)
u32 dpdk_get_admin_up_down_in_progress(void)
always_inline vlib_node_runtime_t * vlib_node_get_runtime(vlib_main_t *vm, u32 node_index)
volatile u32 io_thread_release
#define ETHERNET_MAX_PACKET_BYTES
#define vec_foreach(var, vec)
Vector iterator.
always_inline f64 vlib_time_now(vlib_main_t *vm)
#define ETHERNET_INTERFACE_FLAG_CONFIG_PROMISC(flags)
uword * cpu_socket_bitmap
#define clib_error_return(e, args...)
vlib_thread_registration_t ** registrations
#define CLIB_CACHE_LINE_BYTES
ethernet_interface_t * interfaces
uword dpdk_vhost_user_process_if(vlib_main_t *vm, dpdk_device_t *xd, void *ctx)
#define DPDK_NB_RX_DESC_40GE
vlib_main_t ** vlib_mains
uword * dpdk_device_by_kni_port_id
uword * vu_sw_if_index_by_listener_fd
f64 link_state_poll_interval
CLIB vectors are ubiquitous dynamically resized arrays with by user defined "headers".
clib_error_t * dpdk_cli_init(vlib_main_t *vm)
always_inline vnet_sw_interface_t * vnet_get_hw_sw_interface(vnet_main_t *vnm, u32 hw_if_index)
uword * vu_sw_if_index_by_sock_fd