18 #include <vpp/app/version.h> 23 #define KP_GARBAGE_RUN 60 26 #define KP_CONCURRENCY_TIMEOUT 10 30 #define kp_get_writer_lock() do {} while(__sync_lock_test_and_set (kp_main.writer_lock, 1)) 31 #define kp_put_writer_lock() kp_main.writer_lock[0] = 0 38 prefix->as_u64[0] = 0;
39 prefix->as_u64[1] = 0;
40 }
else if (plen <= 64) {
41 prefix->as_u64[0] &= clib_host_to_net_u64(0xffffffffffffffffL << (64 - plen));
42 prefix->as_u64[1] = 0;
44 prefix->as_u64[1] &= clib_host_to_net_u64(0xffffffffffffffffL << (128 - plen));
51 ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
52 u8 *len = va_arg (*args,
u8 *);
60 ip46->pad[0] = ip46->pad[1] = ip46->pad[2] = 0;
73 ip46_address_t *ip46 = va_arg (*args, ip46_address_t *);
74 u32 len = va_arg (*args,
u32);
120 for(thread_index = 0; thread_index < tm->
n_vlib_mains; thread_index++ ) {
123 s =
format(s,
"core %d\n", thread_index);
145 return format(s, kp_vip_type_strings[i]);
146 return format(s,
"_WRONG_TYPE_");
154 if (
unformat(input, kp_vip_type_strings[i])) {
164 return format(s,
"%U %U port:%u target_port:%u node_port:%u " 165 "new_size:%u #pod:%u%s",
189 s =
format(s,
"%U %U [%u] %U port:%u target_port:%u node_port:%u%s\n" 201 s =
format(s,
"%U counters:\n",
205 s =
format(s,
"%U %s: %d\n",
211 s =
format(s,
"%U #pod:%u\n",
225 pod = &kpm->pods[*pod_index];
226 s = format(s,
"%U %U %d buckets %d flows dpo:%u %s\n",
227 format_white_space, indent,
228 format_ip46_address, &pod->address, IP46_TYPE_ANY,
229 count[pod - kpm->pods],
230 vlib_refcount_get(&kpm->pod_refcount, pod - kpm->pods),
232 (pod->flags & KP_POD_FLAGS_USED)?
"used":
" removed");
275 pod = &kpm->pods[*pod_index];
276 if (!(pod->flags & KP_POD_FLAGS_USED) &&
277 clib_u32_loop_gt(now, pod->last_used + KP_CONCURRENCY_TIMEOUT) &&
278 (vlib_refcount_get(&kpm->pod_refcount, pod - kpm->pods) == 0))
280 fib_entry_child_remove(pod->next_hop_fib_entry_index,
281 pod->next_hop_child_index);
282 fib_table_entry_delete_index(pod->next_hop_fib_entry_index,
284 pod->next_hop_fib_entry_index = FIB_NODE_INDEX_INVALID;
286 pool_put(vip->pod_indexes, pod_index);
287 pool_put(kpm->pods, pod);
297 u32 *to_be_removed_vips = 0, *
i;
299 kp_vip_garbage_collection(vip);
301 if (!(vip->flags & KP_VIP_FLAGS_USED) &&
302 (pool_elts(vip->pod_indexes) == 0)) {
303 vec_add1(to_be_removed_vips, vip - kpm->vips);
308 vip = &kpm->vips[*
i];
332 pod = &kpm->pods[*pod_index];
333 if (pod->flags & KP_POD_FLAGS_USED) {
344 new_flow_table[
i].pod_index = 0;
355 pod = &kpm->pods[*pod_index];
356 if (!(pod->flags & KP_POD_FLAGS_USED))
359 sort_arr[i].pod_index = pod - kpm->pods;
362 _vec_len(sort_arr) =
i;
368 kp_pod_t *pod = &kpm->pods[pr->pod_index];
379 pr->skip = ((seed & 0xffffffff) | 1) & vip->new_flow_table_mask;
380 pr->last = (seed >> 32) & vip->new_flow_table_mask;
386 new_flow_table[
i].pod_index = ~0;
393 pr->last = (pr->last + pr->skip) & vip->new_flow_table_mask;
394 if (new_flow_table[last].pod_index == ~0) {
395 new_flow_table[
last].pod_index = pr->pod_index;
400 if (done ==
vec_len(new_flow_table))
412 if (vip->new_flow_table == 0 ||
413 new_flow_table[
i].pod_index != vip->new_flow_table[
i].pod_index)
416 old_table = vip->new_flow_table;
417 vip->new_flow_table = new_flow_table;
425 if (!
is_pow2(per_cpu_sticky_buckets))
426 return VNET_API_ERROR_INVALID_MEMORY_SIZE;
443 if ((vip->flags & KP_POD_FLAGS_USED) &&
445 vip->prefix.as_u64[0] == prefix->as_u64[0] &&
446 vip->prefix.as_u64[1] == prefix->as_u64[1]) {
447 *vip_index = vip - kpm->vips;
451 return VNET_API_ERROR_NO_SUCH_ENTRY;
470 pod = &kpm->pods[*podi];
471 if (pod->vip_index == (vip - kpm->vips) &&
472 pod->address.as_u64[0] == address->as_u64[0] &&
473 pod->address.as_u64[1] == address->as_u64[1]) {
474 *pod_index = pod - kpm->pods;
488 return VNET_API_ERROR_NO_SUCH_ENTRY;
492 u32 *to_be_added = 0;
493 u32 *to_be_updated = 0;
508 return VNET_API_ERROR_VALUE_EXIST;
518 return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
524 if (addresses[n2].
as_u64[0] == addresses[n].
as_u64[0] &&
550 *pod_index = pod - kpm->
pods;
581 memset (m, 0,
sizeof (*m));
628 return VNET_API_ERROR_NO_SUCH_ENTRY;
636 return VNET_API_ERROR_NO_SUCH_ENTRY;
642 if (addresses[n2].
as_u64[0] == addresses[n].
as_u64[0] &&
656 if (indexes !=
NULL) {
698 proto, vip - kpm->
vips);
726 u32 new_length,
u32 *vip_index,
727 u16 port,
u16 target_port,
u16 node_port)
740 return VNET_API_ERROR_VALUE_EXIST;
745 return VNET_API_ERROR_INVALID_MEMORY_SIZE;
752 return VNET_API_ERROR_INVALID_ADDRESS_FAMILY;
762 vip->
port = clib_host_to_net_u16(port);
763 vip->
target_port = clib_host_to_net_u16(target_port);
764 vip->
node_port = clib_host_to_net_u16(node_port);
788 key = clib_host_to_net_u16(node_port);
792 return VNET_API_ERROR_VALUE_EXIST;
804 *vip_index = vip - kpm->
vips;
817 return VNET_API_ERROR_NO_SUCH_ENTRY;
825 ip46_address_t *pods = 0;
829 pod = &kpm->pods[*pod_index];
830 vec_add1(pods, pod->address);
849 .version = VPP_BUILD_VER,
850 .description =
"kube-proxy data plane",
911 sw_if_index, 0, 0, 0);
916 sw_if_index, 1, 0, 0);
959 default_pod->
flags = 0;
962 default_pod->
address.ip6.as_u64[0] = 0xffffffffffffffffL;
963 default_pod->
address.ip6.as_u64[1] = 0xffffffffffffffffL;
972 #define _(a,b,c) kpm->vip_counters[c].name = b; #define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
fib_protocol_t fp_proto
protocol type
static void kp_pod_stack(kp_pod_t *pod)
dpo_lock_fn_t dv_lock
A reference counting lock function.
Recursive resolution source.
u8 * format_ip46_prefix(u8 *s, va_list *args)
A virtual function table regisitered for a DPO type.
static const char *const *const kp_dpo_nat6_nodes[DPO_PROTO_NUM]
vlib_refcount_t pod_refcount
Each POD has an associated reference counter.
vnet_main_t * vnet_get_main(void)
static void kp_vip_update_new_flow_table(kp_vip_t *vip)
#define kp_vip_is_nat4(vip)
u32 fib_entry_child_add(fib_node_index_t fib_entry_index, fib_node_type_t child_type, fib_node_index_t child_index)
int kp_vip_del_pods_withlock(u32 vip_index, ip46_address_t *addresses, u32 n)
static f64 vlib_time_now(vlib_main_t *vm)
vlib_simple_counter_main_t vip_counters[KP_N_VIP_COUNTERS]
Per VIP counter.
static const char *const kp_dpo_nat6_ip6[]
enum fib_node_back_walk_rc_t_ fib_node_back_walk_rc_t
Return code from a back walk function.
const dpo_id_t * fib_entry_contribute_ip_forwarding(fib_node_index_t fib_entry_index)
u32 index_t
A Data-Path Object is an object that represents actions that are applied to packets are they are swit...
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
#define KP_POD_FLAGS_USED
static u64 clib_xxhash(u64 key)
static heap_elt_t * last(heap_header_t *h)
static_always_inline void vlib_refcount_init(vlib_refcount_t *r)
#define hash_set_mem(h, key, value)
#define STRUCT_OFFSET_OF(t, f)
kp_per_cpu_t * per_cpu
Some global data is per-cpu.
#define KP_MAPPING_MEMORY_SIZE
static void kp_vip_garbage_collection(kp_vip_t *vip)
#define ip46_address_type(ip46)
void ip46_prefix_normalize(ip46_address_t *prefix, u8 plen)
u32 vip_index
PODs are indexed by address and VIP Index.
#define pool_get(P, E)
Allocate an object E from a pool P (unspecified alignment).
#define vec_alloc(V, N)
Allocate space for N more elements (no header, unspecified alignment)
fib_node_t fib_node
Registration to FIB event.
uword unformat_kp_vip_type(unformat_input_t *input, va_list *args)
kp_vip_type_t
kube-proxy supports IPv4 and IPv6 traffic and NAT4 and NAT6.
#define pool_len(p)
Number of elements in pool vector.
fib_node_type_t fib_node_register_new_type(const fib_node_vft_t *vft)
Create a new FIB node type and Register the function table for it.
int kp_vip_add(ip46_address_t *prefix, u8 plen, kp_vip_type_t type, u32 new_length, u32 *vip_index, u16 port, u16 target_port, u16 node_port)
#define KP_MAPPING_BUCKETS
#define kp_vip_is_ip4(vip)
static fib_node_back_walk_rc_t kp_fib_node_back_walk_notify(fib_node_t *node, fib_node_back_walk_ctx_t *ctx)
static counter_t vlib_get_simple_counter(vlib_simple_counter_main_t *cm, u32 index)
Get the value of a simple counter Scrapes the entire set of per-thread counters.
#define pool_foreach(VAR, POOL, BODY)
Iterate through pool.
#define VLIB_INIT_FUNCTION(x)
void fib_table_entry_special_remove(u32 fib_index, const fib_prefix_t *prefix, fib_source_t source)
Remove a 'special' entry from the FIB.
int kp_vip_del(u32 vip_index)
clib_error_t * kp_init(vlib_main_t *vm)
u32 kp_hash_time_now(vlib_main_t *vm)
u32 last_used
Rotating timestamp of when KP_POD_FLAGS_USED flag was last set.
u8 plen
The VIP prefix length.
vlib_node_registration_t kp6_nodeport_node
(constructor) VLIB_REGISTER_NODE (kp6_nodeport_node)
A high priority source a plugin can use.
static void kp_vip_add_adjacency(kp_main_t *kpm, kp_vip_t *vip)
Add the VIP adjacency to the ip4 or ip6 fib.
Aggregrate type for a prefix.
kp_snat_mapping_t * snat_mappings
u8 * format_kp_dpo(u8 *s, va_list *va)
u32 * pod_indexes
Pool of POD indexes used for this VIP.
static void kp_fib_node_last_lock_gone(fib_node_t *node)
enum dpo_proto_t_ dpo_proto_t
Data path protocol.
u16 fp_len
The mask length.
#define kp_vip_get_by_index(index)
dpo_type_t dpo_register_new_type(const dpo_vft_t *vft, const char *const *const *nodes)
Create and register a new DPO type.
kp_new_flow_entry_t * new_flow_table
Vector mapping (flow-hash & new_connect_table_mask) to POD index.
u16 target_port
Pod's port corresponding to specific service.
char * name
The counter collection's name.
The identity of a DPO is a combination of its type and its instance number/index of objects of that t...
#define hash_create_mem(elts, key_bytes, value_bytes)
dpo_type_t dpo_nat4_type
DPO used to send packet from IP4/6 lookup to KP node.
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
static const char *const kp_dpo_nat4_ip4[]
int kp_vip_del_pods(u32 vip_index, ip46_address_t *addresses, u32 n)
ip46_address_t fp_addr
The address type is not deriveable from the fp_addr member.
ip46_address_t prefix
A Virtual IP represents a given service delivered by a set of PODs.
volatile u32 * writer_lock
#define pool_put(P, E)
Free an object E in pool P.
static void kp_dpo_unlock(dpo_id_t *dpo)
ip46_address_t address
Destination address used to transfer traffic towards to that POD.
fib_node_index_t next_hop_fib_entry_index
The FIB entry index for the next-hop.
u32 last_garbage_collection
last time garbage collection was run to free the PODs.
void kp_garbage_collection()
An node in the FIB graph.
#define clib_u32_loop_gt(a, b)
32 bits integer comparison for running values.
static char * kp_vip_type_strings[]
static fib_node_t * kp_fib_node_get_node(fib_node_index_t index)
#define ip46_address_is_ip4(ip46)
u8 * format_kp_vip(u8 *s, va_list *args)
#define pool_free(p)
Free a pool.
fib_node_index_t fib_table_entry_special_add(u32 fib_index, const fib_prefix_t *prefix, fib_source_t source, fib_entry_flag_t flags)
Add a 'special' entry to the FIB.
u32 per_cpu_sticky_buckets
Number of buckets in the per-cpu sticky hash table.
#define vec_free(V)
Free vector's memory (no header).
static_always_inline u32 kp_hash_elts(kp_hash_t *h, u32 time_now)
Each VIP is configured with a set of PODs.
#define clib_memcpy(a, b, c)
u32 fib_node_index_t
A typedef of a node index.
#define kp_put_writer_lock()
8 octet key, 8 octet key value pair
void dpo_set(dpo_id_t *dpo, dpo_type_t type, dpo_proto_t proto, index_t index)
Set/create a DPO ID The DPO will be locked.
Context passed between object during a back walk.
uword unformat_ip46_prefix(unformat_input_t *input, va_list *args)
fib_node_index_t fib_table_entry_special_dpo_add(u32 fib_index, const fib_prefix_t *prefix, fib_source_t source, fib_entry_flag_t flags, const dpo_id_t *dpo)
Add a 'special' entry to the FIB that links to the DPO passed A special entry is an entry that the FI...
#define KP_DEFAULT_FLOW_TIMEOUT
void vlib_validate_simple_counter(vlib_simple_counter_main_t *cm, u32 index)
validate a simple counter
u32 flow_timeout
Flow timeout in seconds.
u8 * format_kp_vip_detailed(u8 *s, va_list *args)
kp_vip_type_t type
The type of traffic for this.
vlib_node_registration_t kp4_nodeport_node
(constructor) VLIB_REGISTER_NODE (kp4_nodeport_node)
static kp_pod_t * kp_pod_from_fib_node(fib_node_t *node)
static void vlib_zero_simple_counter(vlib_simple_counter_main_t *cm, u32 index)
Clear a simple counter Clears the set of per-thread u16 counters, and the u64 counter.
static void * clib_mem_alloc(uword size)
static vlib_main_t * vlib_get_main(void)
u32 next_hop_child_index
The child index on the FIB entry.
static uword is_pow2(uword x)
u32 new_flow_table_mask
New flows table length - 1 (length MUST be a power of 2)
u8 * format_kp_pod(u8 *s, va_list *args)
int kp_nat4_interface_add_del(u32 sw_if_index, int is_del)
u8 * format_kp_main(u8 *s, va_list *args)
static int kp_vip_find_index_with_lock(ip46_address_t *prefix, u8 plen, u32 *vip_index)
#define KP_VIP_FLAGS_USED
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
u8 * format_kp_vip_type(u8 *s, va_list *args)
u8 flags
Flags related to this VIP.
dpo_id_t dpo
The next DPO in the graph to follow.
static int kp_pod_find_index_vip(kp_vip_t *vip, ip46_address_t *address, u32 *pod_index)
#define kp_foreach_vip_counter
kp_pod_t * pods
Pool of PODs.
#define kp_get_writer_lock()
#define vec_sort_with_function(vec, f)
Sort a vector using the supplied element comparison function.
#define DPO_INVALID
An initialiser for DPOs declared on the stack.
int kp_vip_add_pods(u32 vip_index, ip46_address_t *addresses, u32 n)
#define hash_get_mem(h, key)
A FIB graph nodes virtual function table.
static int kp_pseudorand_compare(void *a, void *b)
static void * clib_mem_alloc_aligned(uword size, uword align)
static const char *const kp_dpo_nat6_ip4[]
kp_vip_t * vips
Pool of all Virtual IPs.
static void kp_dpo_lock(dpo_id_t *dpo)
static vlib_thread_main_t * vlib_get_thread_main()
void dpo_reset(dpo_id_t *dpo)
reset a DPO ID The DPO will be unlocked.
#define vec_foreach(var, vec)
Vector iterator.
int kp_vip_find_index(ip46_address_t *prefix, u8 plen, u32 *vip_index)
static const char *const kp_dpo_nat4_ip6[]
fib_node_type_t fib_node_type
Node type for registering to fib changes.
static void kp_vip_del_adjacency(kp_main_t *kpm, kp_vip_t *vip)
Deletes the adjacency podsociated with the VIP.
u16 dpoi_next_node
The next VLIB node to follow.
void udp_register_dst_port(vlib_main_t *vm, udp_dst_port_t dst_port, u32 node_index, u8 is_ip4)
#define ip46_prefix_is_ip4(ip46, len)
u8 flags
Some per-POD flags.
clib_bihash_8_8_t mapping_by_pod
#define CLIB_CACHE_LINE_BYTES
Load balancing service is provided per VIP.
#define KP_DEFAULT_PER_CPU_STICKY_BUCKETS
kp-plugin implements a MagLev-like load balancer.
static const char *const *const kp_dpo_nat4_nodes[DPO_PROTO_NUM]
int kp_conf(u32 per_cpu_sticky_buckets, u32 flow_timeout)
Fix global kube-proxy parameters.
u16 node_port
Node's port, can access service via NodeIP:node_port.
int vnet_feature_enable_disable(const char *arc_name, const char *node_name, u32 sw_if_index, int enable_disable, void *feature_config, u32 n_feature_config_bytes)
void dpo_stack(dpo_type_t child_type, dpo_proto_t child_proto, dpo_id_t *dpo, const dpo_id_t *parent)
Stack one DPO object on another, and thus establish a child-parent relationship.
kp_hash_t * sticky_ht
Each CPU has its own sticky flow hash table.
static uword pool_elts(void *v)
Number of active elements in a pool.