18 #include <sys/types.h> 21 #include <sys/mount.h> 24 #include <linux/mempolicy.h> 25 #include <linux/memfd.h> 36 #ifndef F_LINUX_SPECIFIC_BASE 37 #define F_LINUX_SPECIFIC_BASE 1024 41 #define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9) 42 #define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10) 44 #define F_SEAL_SEAL 0x0001 45 #define F_SEAL_SHRINK 0x0002 46 #define F_SEAL_GROW 0x0004 47 #define F_SEAL_WRITE 0x0008 51 #define MFD_HUGETLB 0x0004U 54 #ifndef MAP_HUGE_SHIFT 55 #define MAP_HUGE_SHIFT 26 58 #ifndef MFD_HUGE_SHIFT 59 #define MFD_HUGE_SHIFT 26 62 #ifndef MAP_FIXED_NOREPLACE 63 #define MAP_FIXED_NOREPLACE 0x100000 95 if ((fd = open (
"/proc/meminfo", 0)) == -1)
102 if (
unformat (&input,
"Hugepagesize:%_%u kB", &size))
110 return 1024ULL *
size;
120 if ((fp = fopen (
"/proc/meminfo",
"r")) == NULL)
123 while (fscanf (fp,
"%32s", tmp) > 0)
124 if (strncmp (
"Hugepagesize:", tmp, 13) == 0)
127 if (fscanf (fp,
"%u", &size) > 0)
128 log2_page_size = 10 +
min_log2 (size);
133 return log2_page_size;
148 page_size = sysconf (_SC_PAGESIZE);
161 va = mmap (0, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE |
162 MAP_ANONYMOUS, -1, 0);
163 if (va == MAP_FAILED)
166 if (mlock (va, page_size))
177 munmap (va, page_size);
183 struct stat st = { 0 };
184 if (fstat (fd, &st) == -1)
186 return st.st_blksize;
202 if (log2_page_size <= 12)
204 else if (log2_page_size > 12 && log2_page_size <= 16)
227 temp =
format (0,
"/tmp/hugepage_mount.XXXXXX%c", 0);
230 if ((mount_dir = mkdtemp ((
char *) temp)) == 0)
238 if (mount (
"none", mount_dir,
"hugetlbfs", 0, NULL))
241 rmdir ((
char *) mount_dir);
247 filename =
format (0,
"%s/%s%c", mount_dir, name, 0);
249 if ((fd = open ((
char *) filename, O_CREAT | O_RDWR, 0755)) == -1)
255 umount2 ((
char *) mount_dir, MNT_DETACH);
256 rmdir ((
char *) mount_dir);
268 unsigned int memfd_flags;
277 switch (log2_page_size)
285 memfd_flags = MFD_HUGETLB;
288 memfd_flags = MFD_HUGETLB | log2_page_size << MFD_HUGE_SHIFT;
304 if (fd == -1 && errno ==
EINVAL &&
335 uword pagesize = 1ULL << log2_page_sz;
352 base = (
void *) start - sys_page_sz;
353 base = mmap (base, size + sys_page_sz, PROT_NONE,
355 return (base == MAP_FAILED) ? ~0 : start;
361 base = mmap (0, size + pagesize, PROT_NONE,
362 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
364 if (base == MAP_FAILED)
368 p = base + size + pagesize;
377 n_bytes = pagesize - sys_page_sz - n_bytes;
380 munmap (base, n_bytes);
384 return (
uword) base + sys_page_sz;
397 mprotect (hdr, sys_page_sz, PROT_READ);
401 mprotect (hdr, sys_page_sz, PROT_NONE);
403 mprotect (next, sys_page_sz, PROT_READ);
414 int mmap_flags = MAP_FIXED, is_huge = 0;
418 mmap_flags |= MAP_SHARED;
425 mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
430 switch (log2_page_sz)
439 mmap_flags |= MAP_HUGETLB;
444 mmap_flags |= MAP_HUGETLB;
453 size =
round_pow2 (size, 1ULL << log2_page_sz);
457 if (base == (
void *) ~0)
460 base = mmap (base, size, PROT_READ | PROT_WRITE, mmap_flags, fd, offset);
462 if (base == MAP_FAILED)
465 if (is_huge && (mlock (base, size) != 0))
471 hdr = mmap (base - sys_page_sz, sys_page_sz, PROT_READ | PROT_WRITE,
472 MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
474 if (hdr != base - sys_page_sz)
484 mprotect (mm->
last_map, sys_page_sz, PROT_READ | PROT_WRITE);
486 mprotect (mm->
last_map, sys_page_sz, PROT_NONE);
498 hdr->base_addr = (
uword) base;
499 hdr->log2_page_sz = log2_page_sz;
500 hdr->num_pages = size >> log2_page_sz;
504 mprotect (hdr, sys_page_sz, PROT_NONE);
517 if (mprotect (hdr, sys_page_sz, PROT_READ | PROT_WRITE) != 0)
520 size = hdr->num_pages << hdr->log2_page_sz;
521 if (munmap ((
void *) hdr->base_addr, size) != 0)
528 mprotect (hdr->next, sys_page_sz, PROT_READ | PROT_WRITE);
529 hdr->next->prev = hdr->prev;
530 mprotect (hdr->next, sys_page_sz, PROT_NONE);
537 mprotect (hdr->prev, sys_page_sz, PROT_READ | PROT_WRITE);
538 hdr->prev->next = hdr->next;
539 mprotect (hdr->prev, sys_page_sz, PROT_NONE);
546 if (munmap (hdr, sys_page_sz) != 0)
564 for (i = 0; i < n_pages; i++)
565 ptr[i] = start + (i << log2_page_size);
568 stats->
total = n_pages;
571 if (
move_pages (0, n_pages, ptr, 0, status, 0) != 0)
577 for (i = 0; i < n_pages; i++)
584 else if (status[i] == -EFAULT)
596 int pagesize = sysconf (_SC_PAGESIZE);
603 if ((fd = open ((
char *)
"/proc/self/pagemap", O_RDONLY)) == -1)
606 for (i = 0; i < n_pages; i++)
608 u64 seek, pagemap = 0;
610 seek = ((
u64) vaddr / pagesize) *
sizeof (
u64);
611 if (lseek (fd, seek, SEEK_SET) != seek)
614 if (read (fd, &pagemap,
sizeof (pagemap)) != (
sizeof (pagemap)))
617 if ((pagemap & (1ULL << 63)) == 0)
638 long unsigned int mask[16] = { 0 };
639 int mask_len =
sizeof (
mask) * 8 + 1;
655 mask[0] = 1 << numa_node;
657 if (
set_mempolicy (force ? MPOL_BIND : MPOL_PREFERRED, mask, mask_len))
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
__clib_export int clib_mem_vm_create_fd(clib_mem_page_sz_t log2_page_size, char *fmt,...)
#define CLIB_MEM_UNPOISON(a, s)
#define CLIB_MEM_VM_MAP_FAILED
__clib_export int clib_mem_vm_unmap(void *base)
__clib_export void clib_mem_vm_randomize_va(uword *requested_va, clib_mem_page_sz_t log2_page_size)
#define MAP_FIXED_NOREPLACE
struct _clib_mem_vm_map_hdr clib_mem_vm_map_hdr_t
clib_memset(h->entries, 0, sizeof(h->entries[0]) *entries)
static int memfd_create(const char *name, unsigned int flags)
clib_mem_vm_map_hdr_t * first_map
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
static u64 clib_cpu_time_now(void)
__clib_export clib_mem_vm_map_hdr_t * clib_mem_vm_get_next_map_hdr(clib_mem_vm_map_hdr_t *hdr)
static_always_inline uword clib_mem_get_page_size(void)
static clib_mem_page_sz_t legacy_get_log2_default_hugepage_size(void)
static uword min_log2(uword x)
#define vec_reset_length(v)
Reset vector length to zero NULL-pointer tolerant.
#define MFD_ALLOW_SEALING
void * clib_mem_vm_map_internal(void *base, clib_mem_page_sz_t log2_page_sz, uword size, int fd, uword offset, char *name)
static_always_inline clib_mem_page_sz_t clib_mem_log2_page_size_validate(clib_mem_page_sz_t log2_page_size)
static long set_mempolicy(int mode, const unsigned long *nodemask, unsigned long maxnode)
static uword pow2_mask(uword x)
description fragment has unexpected format
#define clib_error_return(e, args...)
__clib_export int clib_mem_set_numa_affinity(u8 numa_node, int force)
uword per_numa[CLIB_MAX_NUMAS]
#define clib_atomic_test_and_set(a)
#define clib_atomic_release(a)
#define CLIB_VM_MAP_HDR_NAME_MAX_LEN
#define clib_error_return_unix(e, args...)
static int legacy_memfd_create(u8 *name)
clib_mem_vm_map_hdr_t * last_map
clib_mem_page_sz_t log2_default_hugepage_sz
sll srl srl sll sra u16x4 i
#define vec_free(V)
Free vector's memory (no header).
__clib_export u64 * clib_mem_vm_get_paddr(void *mem, clib_mem_page_sz_t log2_page_size, int n_pages)
__clib_export uword clib_mem_get_default_hugepage_size(void)
static uword round_pow2(uword x, uword pow2)
uword clib_mem_vm_reserve(uword start, uword size, clib_mem_page_sz_t log2_page_sz)
__clib_export void clib_mem_get_page_stats(void *start, clib_mem_page_sz_t log2_page_size, uword n_pages, clib_mem_page_stats_t *stats)
__clib_export clib_mem_page_sz_t clib_mem_get_fd_log2_page_size(int fd)
static uword pointer_to_uword(const void *p)
__clib_export u64 clib_mem_get_fd_page_size(int fd)
template key/value backing page structure
clib_mem_page_sz_t log2_page_sz
clib_mem_page_sz_t log2_page_sz
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
__clib_export clib_mem_main_t clib_mem_main
static long move_pages(int pid, unsigned long count, void **pages, const int *nodes, int *status, int flags)
void clib_mem_main_init()
__clib_export int clib_mem_set_default_numa_affinity()