18 #include <sys/types.h> 22 #include <linux/mempolicy.h> 23 #include <linux/memfd.h> 33 #if __SIZEOF_POINTER__ >= 8 34 #define DEFAULT_RESERVED_MB 16384 36 #define DEFAULT_RESERVED_MB 256 48 return round_pow2 (size, 1ULL << log2_page_sz) >> log2_page_sz;
57 if (
getcpu (&cpu, numa_node) != 0)
79 if (pt == 0 || pt[0] == 0)
88 mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS;
91 mmap_flags |= MAP_FIXED;
94 PROT_NONE, mmap_flags, -1, 0);
96 if (pm->
base == MAP_FAILED)
108 munmap (pm->
base, off);
118 u32 n_blocks,
u32 block_align,
u32 numa_node)
124 u32 alloc_chunk_index;
130 u32 i, start = 0, prev = ~0;
156 off = (block_align - (c->
start & (block_align - 1))) & (block_align - 1);
158 if (c->
used || n_blocks + off > c->
size)
162 alloc_chunk_index = c->
next;
169 u32 offset_chunk_index;
173 offset_chunk_index = alloc_chunk_index;
174 alloc_chunk_index = c - pp->
chunks;
180 c->
prev = offset_chunk_index;
182 co->
next = alloc_chunk_index;
186 if (c->
size > n_blocks)
188 u32 tail_chunk_index;
192 tail_chunk_index = ct - pp->
chunks;
196 ct->
prev = alloc_chunk_index;
200 c->
next = tail_chunk_index;
204 else if (c->
next != ~0)
219 uword seek, va, pa, p;
226 p = (
uword) first *elts_per_page;
238 fd = open ((
char *)
"/proc/self/pagemap", O_RDONLY);
239 while (p < (
uword) elts_per_page * count)
244 if (fd != -1 && lseek (fd, seek, SEEK_SET) == seek &&
245 read (fd, &pa,
sizeof (pa)) == (
sizeof (pa)) &&
260 u32 numa_node,
u32 n_pages)
263 int status, rv,
i, mmap_flags;
264 void *va = MAP_FAILED;
266 long unsigned int mask[16] = { 0 };
267 long unsigned int old_mask[16] = { 0 };
287 rv =
get_mempolicy (&old_mpol, old_mask,
sizeof (old_mask) * 8 + 1, 0, 0);
289 if (rv == -1 && numa_node != 0)
295 mask[0] = 1 << numa_node;
297 if (rv == -1 && numa_node != 0)
300 "numa node %u", numa_node);
304 mmap_flags = MAP_FIXED;
308 mmap_flags |= MAP_SHARED;
315 if ((ftruncate (a->
fd, size)) == -1)
321 mmap_flags |= MAP_HUGETLB;
323 mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
328 if (mmap (va, size, PROT_READ | PROT_WRITE, mmap_flags, a->
fd, 0) ==
332 "fd %d numa %d flags 0x%x", n_pages,
333 va, a->
fd, numa_node, mmap_flags);
346 rv =
set_mempolicy (old_mpol, old_mask,
sizeof (old_mask) * 8 + 1);
347 if (rv == -1 && numa_node != 0)
356 if ((rv == 0 && status != numa_node) || (rv != 0 && numa_node != 0))
358 pm->
error = rv == -1 ?
360 "%u status %d", numa_node, status) :
362 "%u status %d", numa_node, status);
367 for (i = 0; i < n_pages; i++)
390 return pp - (n_pages - 1);
393 if (va != MAP_FAILED)
397 mmap (va, size, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
415 if (log2_page_sz == 0)
421 1 << (log2_page_sz - 10));
444 memset (a, 0,
sizeof (*a));
457 u32 n_blocks, block_align, *page_index;
475 a->
name =
format (0,
"default-numa-%u%c", numa_node, 0);
535 if (ci1 == ~0 || ci2 == ~0)
558 u32 chunk_index, page_index;
582 if (next->
next != ~0)
584 memset (next, 0,
sizeof (*next));
597 memset (c, 0,
sizeof (*c));
606 u32 log2_page_sz = va_arg (*va,
u32);
608 if (log2_page_sz >= 30)
609 return format (s,
"%uGB", 1 << (log2_page_sz - 30));
611 if (log2_page_sz >= 20)
612 return format (s,
"%uMB", 1 << (log2_page_sz - 20));
614 if (log2_page_sz >= 10)
615 return format (s,
"%uKB", 1 << (log2_page_sz - 10));
617 return format (s,
"%uB", 1 << log2_page_sz);
625 int verbose = va_arg (*va,
int);
631 s =
format (s,
"free %u chunks %u free-chunks %d ",
639 s =
format (s,
"\n%U%12s%12s%8s%8s%8s%8s",
641 "chunk offset",
"size",
"used",
"index",
"prev",
"next");
644 s =
format (s,
"\n%U%12u%12u%8s%8d%8d%8d",
648 c->
used ?
"yes" :
"no",
662 int verbose = va_arg (*va,
int);
668 s =
format (s,
"used-pages %u reserved-pages %u default-page-size %U " 687 s = format (s,
"\n%Uarena '%s' pages %u subpage-size %U numa-node %u",
688 format_white_space, indent + 2, a->name,
689 vec_len (a->page_indices), format_log2_page_size,
690 a->log2_subpage_sz, a->numa_node);
692 s = format (s,
" shared fd %d", a->fd);
694 vec_foreach (page_index, a->page_indices)
696 pp = vec_elt_at_index (pm->pages, *page_index);
697 s = format (s,
"\n%U%U", format_white_space, indent + 4,
698 format_pmalloc_page, pp, verbose);
712 s =
format (s,
"%16s %13s %8s",
"virtual-addr",
"physical-addr",
"size");
715 uword *lookup_val, pa, va;
720 pa = va - *lookup_val;
#define vec_foreach_index(var, v)
Iterate over vector indices.
u8 * format_clib_error(u8 *s, va_list *va)
#define hash_set(h, key, value)
clib_error_t * clib_sysfs_prealloc_hugepages(int numa_node, int log2_page_size, int nr)
#define hash_unset(h, key)
static u8 * format_pmalloc_page(u8 *s, va_list *va)
#define CLIB_PMALLOC_NUMA_LOCAL
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
void * clib_pmalloc_alloc_aligned_on_numa(clib_pmalloc_main_t *pm, uword size, uword align, u32 numa_node)
static void * clib_pmalloc_alloc_inline(clib_pmalloc_main_t *pm, clib_pmalloc_arena_t *a, uword size, uword align, u32 numa_node)
static void pmalloc_update_lookup_table(clib_pmalloc_main_t *pm, u32 first, u32 count)
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
static u32 clib_pmalloc_get_page_index(clib_pmalloc_main_t *pm, void *va)
#define vec_add2(V, P, N)
Add N elements to end of vector V, return pointer to new elements in P.
void * clib_pmalloc_create_shared_arena(clib_pmalloc_main_t *pm, char *name, uword size, u32 log2_page_sz, u32 numa_node)
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
#define pool_get(P, E)
Allocate an object E from a pool P (unspecified alignment).
void * clib_pmalloc_alloc_aligned(clib_pmalloc_main_t *pm, uword size, uword align)
static int pmalloc_chunks_mergeable(clib_pmalloc_arena_t *a, clib_pmalloc_page_t *pp, u32 ci1, u32 ci2)
static uword min_log2(uword x)
clib_pmalloc_chunk_t * chunks
clib_pmalloc_arena_t * arenas
static u8 * format_log2_page_size(u8 *s, va_list *va)
static int pmalloc_validate_numa_node(u32 *numa_node)
static clib_pmalloc_page_t * pmalloc_map_pages(clib_pmalloc_main_t *pm, clib_pmalloc_arena_t *a, u32 numa_node, u32 n_pages)
int clib_pmalloc_init(clib_pmalloc_main_t *pm, uword base_addr, uword size)
static clib_pmalloc_chunk_t * get_chunk(clib_pmalloc_page_t *pp, u32 index)
#define pool_foreach(VAR, POOL, BODY)
Iterate through pool.
static long set_mempolicy(int mode, const unsigned long *nodemask, unsigned long maxnode)
u8 * format_pmalloc_map(u8 *s, va_list *va)
static void * alloc_chunk_from_page(clib_pmalloc_main_t *pm, clib_pmalloc_page_t *pp, u32 n_blocks, u32 block_align, u32 numa_node)
static uword pow2_mask(uword x)
#define DEFAULT_RESERVED_MB
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
#define clib_error_return(e, args...)
static uword pmalloc_size2pages(uword size, u32 log2_page_sz)
#define clib_error_create(args...)
static heap_elt_t * first(heap_header_t *h)
#define PMALLOC_LOG2_BLOCK_SZ
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
void * clib_pmalloc_alloc_from_arena(clib_pmalloc_main_t *pm, void *arena_va, uword size, uword align)
#define CLIB_PMALLOC_F_NO_PAGEMAP
static int getcpu(unsigned *cpu, unsigned *node)
#define clib_error_return_unix(e, args...)
#define pool_put(P, E)
Free an object E in pool P.
u32 * default_arena_for_numa_node
static int get_mempolicy(int *mode, unsigned long *nodemask, unsigned long maxnode, void *addr, unsigned long flags)
clib_pmalloc_page_t * pages
#define vec_free(V)
Free vector's memory (no header).
static uword round_pow2(uword x, uword pow2)
#define uword_to_pointer(u, type)
void clib_pmalloc_free(clib_pmalloc_main_t *pm, void *va)
uword * chunk_index_by_va
uword clib_mem_get_default_hugepage_size(void)
static uword pointer_to_uword(const void *p)
clib_error_t * clib_mem_create_hugetlb_fd(char *name, int *fdp)
static uword is_pow2(uword x)
clib_error_t * clib_mem_create_fd(char *name, int *fdp)
static clib_pmalloc_arena_t * clib_pmalloc_get_arena(clib_pmalloc_main_t *pm, void *va)
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
u64 * clib_mem_vm_get_paddr(void *mem, int log2_page_size, int n_pages)
#define clib_error_free(e)
#define vec_foreach(var, vec)
Vector iterator.
static long move_pages(int pid, unsigned long count, void **pages, const int *nodes, int *status, int flags)
#define vec_validate_init_empty(V, I, INIT)
Make sure vector is long enough for given index and initialize empty space (no header, unspecified alignment)
#define CLIB_CACHE_LINE_BYTES
u8 * format_pmalloc(u8 *s, va_list *va)
#define CLIB_PMALLOC_ARENA_F_SHARED_MEM
static uword pool_elts(void *v)
Number of active elements in a pool.