18 #include <sys/types.h> 22 #include <linux/mempolicy.h> 23 #include <linux/memfd.h> 32 #if __SIZEOF_POINTER__ >= 8 33 #define DEFAULT_RESERVED_MB 16384 35 #define DEFAULT_RESERVED_MB 256 47 return round_pow2 (size, 1ULL << log2_page_sz) >> log2_page_sz;
56 if (
getcpu (&cpu, numa_node, 0) != 0)
78 if (pt == 0 || pt[0] == 0)
87 mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS;
90 mmap_flags |= MAP_FIXED;
93 PROT_NONE, mmap_flags, -1, 0);
95 if (pm->
base == MAP_FAILED)
107 munmap (pm->
base, off);
117 u32 n_blocks,
u32 block_align,
u32 numa_node)
123 u32 alloc_chunk_index;
129 u32 i, start = 0, prev = ~0;
152 off = (block_align - (c->
start & (block_align - 1))) & (block_align - 1);
154 if (c->
used || n_blocks + off > c->
size)
158 alloc_chunk_index = c->
next;
165 u32 offset_chunk_index;
169 offset_chunk_index = alloc_chunk_index;
170 alloc_chunk_index = c - pp->
chunks;
176 c->
prev = offset_chunk_index;
178 co->
next = alloc_chunk_index;
182 if (c->
size > n_blocks)
184 u32 tail_chunk_index;
188 tail_chunk_index = ct - pp->
chunks;
192 ct->
prev = alloc_chunk_index;
196 c->
next = tail_chunk_index;
200 else if (c->
next != ~0)
215 uword seek, va, pa, p;
222 p = (
uword) first *elts_per_page;
234 fd = open ((
char *)
"/proc/self/pagemap", O_RDONLY);
235 while (p < (
uword) elts_per_page * count)
240 if (fd != -1 && lseek (fd, seek, SEEK_SET) == seek &&
241 read (fd, &pa,
sizeof (pa)) == (
sizeof (pa)) &&
256 u32 numa_node,
u32 n_pages)
259 int status, rv,
i, mmap_flags;
262 long unsigned int mask[16] = { 0 };
263 long unsigned int old_mask[16] = { 0 };
284 rv =
get_mempolicy (&old_mpol, old_mask,
sizeof (old_mask) * 8 + 1, 0, 0);
286 if (rv == -1 && numa_node != 0)
292 mask[0] = 1 << numa_node;
294 if (rv == -1 && numa_node != 0)
297 "numa node %u", numa_node);
301 mmap_flags = MAP_FIXED;
304 mmap_flags |= MAP_LOCKED;
308 mmap_flags |= MAP_SHARED;
315 if ((ftruncate (a->
fd, size)) == -1)
321 mmap_flags |= MAP_HUGETLB;
323 mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
328 if (mmap (va, size, PROT_READ | PROT_WRITE, mmap_flags, a->
fd, 0) ==
332 "fd %d numa %d flags 0x%x", n_pages,
333 va, a->
fd, numa_node, mmap_flags);
341 for (
int i = 0; i < n_pages; i++)
344 mincore (va + i * page_size, 1, &flag);
350 "Unable to fulfill huge page allocation request");
358 rv =
set_mempolicy (old_mpol, old_mask,
sizeof (old_mask) * 8 + 1);
359 if (rv == -1 && numa_node != 0)
368 if ((rv == 0 && status != numa_node) || (rv != 0 && numa_node != 0))
370 pm->
error = rv == -1 ?
372 "%u status %d", numa_node, status) :
374 "%u status %d", numa_node, status);
378 mmap (va, size, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
383 for (i = 0; i < n_pages; i++)
406 return pp - (n_pages - 1);
424 if (log2_page_sz == 0)
430 1 << (log2_page_sz - 10));
453 memset (a, 0,
sizeof (*a));
466 u32 n_blocks, block_align, *page_index;
484 a->
name =
format (0,
"default-numa-%u%c", numa_node, 0);
544 if (ci1 == ~0 || ci2 == ~0)
567 u32 chunk_index, page_index;
591 if (next->
next != ~0)
593 memset (next, 0,
sizeof (*next));
606 memset (c, 0,
sizeof (*c));
615 u32 log2_page_sz = va_arg (*va,
u32);
617 if (log2_page_sz >= 30)
618 return format (s,
"%uGB", 1 << (log2_page_sz - 30));
620 if (log2_page_sz >= 20)
621 return format (s,
"%uMB", 1 << (log2_page_sz - 20));
623 if (log2_page_sz >= 10)
624 return format (s,
"%uKB", 1 << (log2_page_sz - 10));
626 return format (s,
"%uB", 1 << log2_page_sz);
634 int verbose = va_arg (*va,
int);
640 s =
format (s,
"free %u chunks %u free-chunks %d ",
648 s =
format (s,
"\n%U%12s%12s%8s%8s%8s%8s",
650 "chunk offset",
"size",
"used",
"index",
"prev",
"next");
653 s =
format (s,
"\n%U%12u%12u%8s%8d%8d%8d",
657 c->
used ?
"yes" :
"no",
671 int verbose = va_arg (*va,
int);
677 s =
format (s,
"used-pages %u reserved-pages %u default-page-size %U " 696 s = format (s,
"\n%Uarena '%s' pages %u subpage-size %U numa-node %u",
697 format_white_space, indent + 2, a->name,
698 vec_len (a->page_indices), format_log2_page_size,
699 a->log2_subpage_sz, a->numa_node);
701 s = format (s,
" shared fd %d", a->fd);
703 vec_foreach (page_index, a->page_indices)
705 pp = vec_elt_at_index (pm->pages, *page_index);
706 s = format (s,
"\n%U%U", format_white_space, indent + 4,
707 format_pmalloc_page, pp, verbose);
721 s =
format (s,
"%16s %13s %8s",
"virtual-addr",
"physical-addr",
"size");
724 uword *lookup_val, pa, va;
729 pa = va - *lookup_val;
#define vec_foreach_index(var, v)
Iterate over vector indices.
u8 * format_clib_error(u8 *s, va_list *va)
#define hash_set(h, key, value)
clib_error_t * clib_sysfs_prealloc_hugepages(int numa_node, int log2_page_size, int nr)
#define hash_unset(h, key)
static int getcpu(unsigned *cpu, unsigned *node, void *tcache)
static u8 * format_pmalloc_page(u8 *s, va_list *va)
#define CLIB_PMALLOC_NUMA_LOCAL
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
void * clib_pmalloc_alloc_aligned_on_numa(clib_pmalloc_main_t *pm, uword size, uword align, u32 numa_node)
static void * clib_pmalloc_alloc_inline(clib_pmalloc_main_t *pm, clib_pmalloc_arena_t *a, uword size, uword align, u32 numa_node)
static void pmalloc_update_lookup_table(clib_pmalloc_main_t *pm, u32 first, u32 count)
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
static u32 clib_pmalloc_get_page_index(clib_pmalloc_main_t *pm, void *va)
#define vec_add2(V, P, N)
Add N elements to end of vector V, return pointer to new elements in P.
void * clib_pmalloc_create_shared_arena(clib_pmalloc_main_t *pm, char *name, uword size, u32 log2_page_sz, u32 numa_node)
#define vec_validate_aligned(V, I, A)
Make sure vector is long enough for given index (no header, specified alignment)
#define pool_get(P, E)
Allocate an object E from a pool P (unspecified alignment).
void * clib_pmalloc_alloc_aligned(clib_pmalloc_main_t *pm, uword size, uword align)
static int pmalloc_chunks_mergeable(clib_pmalloc_arena_t *a, clib_pmalloc_page_t *pp, u32 ci1, u32 ci2)
static uword min_log2(uword x)
clib_pmalloc_chunk_t * chunks
clib_pmalloc_arena_t * arenas
static u8 * format_log2_page_size(u8 *s, va_list *va)
static int pmalloc_validate_numa_node(u32 *numa_node)
static clib_pmalloc_page_t * pmalloc_map_pages(clib_pmalloc_main_t *pm, clib_pmalloc_arena_t *a, u32 numa_node, u32 n_pages)
int clib_pmalloc_init(clib_pmalloc_main_t *pm, uword base_addr, uword size)
static clib_pmalloc_chunk_t * get_chunk(clib_pmalloc_page_t *pp, u32 index)
#define pool_foreach(VAR, POOL, BODY)
Iterate through pool.
static long set_mempolicy(int mode, const unsigned long *nodemask, unsigned long maxnode)
u8 * format_pmalloc_map(u8 *s, va_list *va)
static void * alloc_chunk_from_page(clib_pmalloc_main_t *pm, clib_pmalloc_page_t *pp, u32 n_blocks, u32 block_align, u32 numa_node)
static uword pow2_mask(uword x)
#define DEFAULT_RESERVED_MB
#define vec_elt_at_index(v, i)
Get vector value at index i checking that i is in bounds.
#define clib_error_return(e, args...)
static uword pmalloc_size2pages(uword size, u32 log2_page_sz)
#define clib_error_create(args...)
static heap_elt_t * first(heap_header_t *h)
#define PMALLOC_LOG2_BLOCK_SZ
#define pool_elt_at_index(p, i)
Returns pointer to element at given index.
void * clib_pmalloc_alloc_from_arena(clib_pmalloc_main_t *pm, void *arena_va, uword size, uword align)
#define CLIB_PMALLOC_F_NO_PAGEMAP
#define clib_error_return_unix(e, args...)
#define pool_put(P, E)
Free an object E in pool P.
u32 * default_arena_for_numa_node
static int get_mempolicy(int *mode, unsigned long *nodemask, unsigned long maxnode, void *addr, unsigned long flags)
clib_pmalloc_page_t * pages
#define vec_free(V)
Free vector's memory (no header).
static uword round_pow2(uword x, uword pow2)
#define uword_to_pointer(u, type)
void clib_pmalloc_free(clib_pmalloc_main_t *pm, void *va)
uword * chunk_index_by_va
uword clib_mem_get_default_hugepage_size(void)
static uword pointer_to_uword(const void *p)
clib_error_t * clib_mem_create_hugetlb_fd(char *name, int *fdp)
static uword is_pow2(uword x)
clib_error_t * clib_mem_create_fd(char *name, int *fdp)
static clib_pmalloc_arena_t * clib_pmalloc_get_arena(clib_pmalloc_main_t *pm, void *va)
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
u64 * clib_mem_vm_get_paddr(void *mem, int log2_page_size, int n_pages)
#define clib_error_free(e)
#define vec_foreach(var, vec)
Vector iterator.
static long move_pages(int pid, unsigned long count, void **pages, const int *nodes, int *status, int flags)
#define vec_validate_init_empty(V, I, INIT)
Make sure vector is long enough for given index and initialize empty space (no header, unspecified alignment)
#define CLIB_CACHE_LINE_BYTES
u8 * format_pmalloc(u8 *s, va_list *va)
#define CLIB_PMALLOC_ARENA_F_SHARED_MEM
static uword pool_elts(void *v)
Number of active elements in a pool.