18 #include <sys/types.h> 22 #include <linux/mempolicy.h> 23 #include <linux/memfd.h> 33 #if __SIZEOF_POINTER__ >= 8 34 #define DEFAULT_RESERVED_MB 16384 36 #define DEFAULT_RESERVED_MB 256 48 return round_pow2 (size, 1ULL << log2_page_sz) >> log2_page_sz;
57 if (
getcpu (&cpu, numa_node) != 0)
78 if (pt == 0 || pt[0] == 0)
101 u32 n_blocks,
u32 block_align,
u32 numa_node)
107 u32 alloc_chunk_index;
113 u32 i, start = 0, prev = ~0;
139 off = (block_align - (c->
start & (block_align - 1))) & (block_align - 1);
141 if (c->
used || n_blocks + off > c->
size)
145 alloc_chunk_index = c->
next;
152 u32 offset_chunk_index;
156 offset_chunk_index = alloc_chunk_index;
157 alloc_chunk_index = c - pp->
chunks;
163 c->
prev = offset_chunk_index;
165 co->
next = alloc_chunk_index;
169 if (c->
size > n_blocks)
171 u32 tail_chunk_index;
175 tail_chunk_index = ct - pp->
chunks;
179 ct->
prev = alloc_chunk_index;
183 c->
next = tail_chunk_index;
187 else if (c->
next != ~0)
202 uword seek, va, pa, p;
209 p = (
uword) first *elts_per_page;
221 fd = open ((
char *)
"/proc/self/pagemap", O_RDONLY);
222 while (p < (
uword) elts_per_page * count)
227 if (fd != -1 && lseek (fd, seek, SEEK_SET) == seek &&
228 read (fd, &pa,
sizeof (pa)) == (
sizeof (pa)) &&
243 u32 numa_node,
u32 n_pages)
246 int status, rv,
i, mmap_flags;
247 void *va = MAP_FAILED;
249 long unsigned int mask[16] = { 0 };
250 long unsigned int old_mask[16] = { 0 };
270 rv =
get_mempolicy (&old_mpol, old_mask,
sizeof (old_mask) * 8 + 1, 0, 0);
272 if (rv == -1 && numa_node != 0)
278 mask[0] = 1 << numa_node;
280 if (rv == -1 && numa_node != 0)
283 "numa node %u", numa_node);
287 mmap_flags = MAP_FIXED;
291 mmap_flags |= MAP_SHARED;
298 if ((ftruncate (a->
fd, size)) == -1)
304 mmap_flags |= MAP_HUGETLB;
306 mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
311 if (mmap (va, size, PROT_READ | PROT_WRITE, mmap_flags, a->
fd, 0) ==
315 "fd %d numa %d flags 0x%x", n_pages,
316 va, a->
fd, numa_node, mmap_flags);
329 rv =
set_mempolicy (old_mpol, old_mask,
sizeof (old_mask) * 8 + 1);
330 if (rv == -1 && numa_node != 0)
339 if ((rv == 0 && status != numa_node) || (rv != 0 && numa_node != 0))
341 pm->
error = rv == -1 ?
343 "%u status %d", numa_node, status) :
345 "%u status %d", numa_node, status);
350 for (i = 0; i < n_pages; i++)
373 return pp - (n_pages - 1);
376 if (va != MAP_FAILED)
380 mmap (va, size, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
398 if (log2_page_sz == 0)
404 1 << (log2_page_sz - 10));
427 memset (a, 0,
sizeof (*a));
440 u32 n_blocks, block_align, *page_index;
458 a->
name =
format (0,
"default-numa-%u%c", numa_node, 0);
518 if (ci1 == ~0 || ci2 == ~0)
541 u32 chunk_index, page_index;
565 if (next->
next != ~0)
567 memset (next, 0,
sizeof (*next));
580 memset (c, 0,
sizeof (*c));
589 u32 log2_page_sz = va_arg (*va,
u32);
591 if (log2_page_sz >= 30)
592 return format (s,
"%uGB", 1 << (log2_page_sz - 30));
594 if (log2_page_sz >= 20)
595 return format (s,
"%uMB", 1 << (log2_page_sz - 20));
597 if (log2_page_sz >= 10)
598 return format (s,
"%uKB", 1 << (log2_page_sz - 10));
600 return format (s,
"%uB", 1 << log2_page_sz);
608 int verbose = va_arg (*va,
int);
614 s =
format (s,
"free %u chunks %u free-chunks %d ",
622 s =
format (s,
"\n%U%12s%12s%8s%8s%8s%8s",
624 "chunk offset",
"size",
"used",
"index",
"prev",
"next");
627 s =
format (s,
"\n%U%12u%12u%8s%8d%8d%8d",
631 c->
used ?
"yes" :
"no",
645 int verbose = va_arg (*va,
int);
651 s =
format (s,
"used-pages %u reserved-pages %u default-page-size %U " 670 s = format (s,
"\n%Uarena '%s' pages %u subpage-size %U numa-node %u",
671 format_white_space, indent + 2, a->name,
672 vec_len (a->page_indices), format_log2_page_size,
673 a->log2_subpage_sz, a->numa_node);
675 s = format (s,
" shared fd %d", a->fd);
677 vec_foreach (page_index, a->page_indices)
679 pp = vec_elt_at_index (pm->pages, *page_index);
680 s = format (s,
"\n%U%U", format_white_space, indent + 4,
681 format_pmalloc_page, pp, verbose);
695 s =
format (s,
"%16s %13s %8s",
"virtual-addr",
"physical-addr",
"size");
698 uword *lookup_val, pa, va;
703 pa = va - *lookup_val;
#define vec_foreach_index(var, v)
Iterate over vector indices.
u8 * format_clib_error(u8 *s, va_list *va)
#define hash_set(h, key, value)
clib_error_t * clib_sysfs_prealloc_hugepages(int numa_node, int log2_page_size, int nr)
#define hash_unset(h, key)
static u8 * format_pmalloc_page(u8 *s, va_list *va)
#define CLIB_PMALLOC_NUMA_LOCAL
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
void * clib_pmalloc_alloc_aligned_on_numa(clib_pmalloc_main_t *pm, uword size, uword align, u32 numa_node)
static void * clib_pmalloc_alloc_inline(clib_pmalloc_main_t *pm, clib_pmalloc_arena_t *a, uword size, uword align, u32 numa_node)
static void pmalloc_update_lookup_table(clib_pmalloc_main_t *pm, u32 first, u32 count)
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
static u32 clib_pmalloc_get_page_index(clib_pmalloc_main_t *pm, void *va)
#define vec_add2(V, P, N)
Add N elements to end of vector V, return pointer to new elements in P.
void * clib_pmalloc_create_shared_arena(clib_pmalloc_main_t *pm, char *name, uword size, u32 log2_page_sz, u32 numa_node)
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
#define pool_get(P, E)
Allocate an object E from a pool P (unspecified alignment).
void * clib_pmalloc_alloc_aligned(clib_pmalloc_main_t *pm, uword size, uword align)
static int pmalloc_chunks_mergeable(clib_pmalloc_arena_t *a, clib_pmalloc_page_t *pp, u32 ci1, u32 ci2)
static uword min_log2(uword x)
clib_pmalloc_chunk_t * chunks
clib_pmalloc_arena_t * arenas
static u8 * format_log2_page_size(u8 *s, va_list *va)
static int pmalloc_validate_numa_node(u32 *numa_node)
static clib_pmalloc_page_t * pmalloc_map_pages(clib_pmalloc_main_t *pm, clib_pmalloc_arena_t *a, u32 numa_node, u32 n_pages)
int clib_pmalloc_init(clib_pmalloc_main_t *pm, uword base_addr, uword size)
static clib_pmalloc_chunk_t * get_chunk(clib_pmalloc_page_t *pp, u32 index)
#define pool_foreach(VAR, POOL, BODY)
Iterate through pool.
static long set_mempolicy(int mode, const unsigned long *nodemask, unsigned long maxnode)
u8 * format_pmalloc_map(u8 *s, va_list *va)
static void * alloc_chunk_from_page(clib_pmalloc_main_t *pm, clib_pmalloc_page_t *pp, u32 n_blocks, u32 block_align, u32 numa_node)
static uword pow2_mask(uword x)
#define DEFAULT_RESERVED_MB
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
#define clib_error_return(e, args...)
static uword pmalloc_size2pages(uword size, u32 log2_page_sz)
#define clib_error_create(args...)
static heap_elt_t * first(heap_header_t *h)
#define PMALLOC_LOG2_BLOCK_SZ
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
void * clib_pmalloc_alloc_from_arena(clib_pmalloc_main_t *pm, void *arena_va, uword size, uword align)
#define CLIB_PMALLOC_F_NO_PAGEMAP
static int getcpu(unsigned *cpu, unsigned *node)
#define clib_error_return_unix(e, args...)
#define pool_put(P, E)
Free an object E in pool P.
u32 * default_arena_for_numa_node
uword clib_mem_vm_reserve(uword start, uword size, u32 log2_page_sz)
static int get_mempolicy(int *mode, unsigned long *nodemask, unsigned long maxnode, void *addr, unsigned long flags)
clib_pmalloc_page_t * pages
sll srl srl sll sra u16x4 i
#define vec_free(V)
Free vector's memory (no header).
static uword round_pow2(uword x, uword pow2)
#define uword_to_pointer(u, type)
void clib_pmalloc_free(clib_pmalloc_main_t *pm, void *va)
uword * chunk_index_by_va
uword clib_mem_get_default_hugepage_size(void)
static uword pointer_to_uword(const void *p)
clib_error_t * clib_mem_create_hugetlb_fd(char *name, int *fdp)
static uword is_pow2(uword x)
clib_error_t * clib_mem_create_fd(char *name, int *fdp)
static clib_pmalloc_arena_t * clib_pmalloc_get_arena(clib_pmalloc_main_t *pm, void *va)
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
u64 * clib_mem_vm_get_paddr(void *mem, int log2_page_size, int n_pages)
#define clib_error_free(e)
#define vec_foreach(var, vec)
Vector iterator.
static long move_pages(int pid, unsigned long count, void **pages, const int *nodes, int *status, int flags)
#define vec_validate_init_empty(V, I, INIT)
Make sure vector is long enough for given index and initialize empty space (no header, unspecified alignment)
#define CLIB_CACHE_LINE_BYTES
u8 * format_pmalloc(u8 *s, va_list *va)
#define CLIB_PMALLOC_ARENA_F_SHARED_MEM
static uword pool_elts(void *v)
Number of active elements in a pool.