18 #include <sys/types.h>
21 #include <sys/mount.h>
24 #include <linux/mempolicy.h>
25 #include <linux/memfd.h>
35 #ifndef F_LINUX_SPECIFIC_BASE
36 #define F_LINUX_SPECIFIC_BASE 1024
40 #define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
41 #define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
43 #define F_SEAL_SEAL 0x0001
44 #define F_SEAL_SHRINK 0x0002
45 #define F_SEAL_GROW 0x0004
46 #define F_SEAL_WRITE 0x0008
50 #define MFD_HUGETLB 0x0004U
53 #ifndef MAP_HUGE_SHIFT
54 #define MAP_HUGE_SHIFT 26
57 #ifndef MFD_HUGE_SHIFT
58 #define MFD_HUGE_SHIFT 26
61 #ifndef MAP_FIXED_NOREPLACE
62 #define MAP_FIXED_NOREPLACE 0x100000
94 if ((fd = open (
"/proc/meminfo", 0)) == -1)
109 return 1024ULL *
size;
119 if ((fp = fopen (
"/proc/meminfo",
"r")) == NULL)
122 while (fscanf (fp,
"%32s",
tmp) > 0)
123 if (strncmp (
"Hugepagesize:",
tmp, 13) == 0)
126 if (fscanf (fp,
"%u", &
size) > 0)
132 return log2_page_size;
147 page_size = sysconf (_SC_PAGESIZE);
151 if ((fd = syscall (__NR_memfd_create,
"test", MFD_HUGETLB)) != -1)
160 va = mmap (0, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE |
161 MAP_ANONYMOUS, -1, 0);
162 if (va == MAP_FAILED)
165 if (mlock (va, page_size))
171 if (syscall (__NR_move_pages, 0, 1, &va, &
i, &status, 0) == 0)
176 munmap (va, page_size);
182 struct stat st = { 0 };
183 if (fstat (fd, &st) == -1)
185 return st.st_blksize;
201 if (log2_page_size <= 12)
203 else if (log2_page_size > 12 && log2_page_size <= 16)
226 temp =
format (0,
"/tmp/hugepage_mount.XXXXXX%c", 0);
229 if ((mount_dir = mkdtemp ((
char *) temp)) == 0)
237 if (mount (
"none", mount_dir,
"hugetlbfs", 0, NULL))
240 rmdir ((
char *) mount_dir);
246 filename =
format (0,
"%s/%s%c", mount_dir,
name, 0);
248 if ((fd = open ((
char *) filename, O_CREAT | O_RDWR, 0755)) == -1)
254 umount2 ((
char *) mount_dir, MNT_DETACH);
255 rmdir ((
char *) mount_dir);
267 unsigned int memfd_flags;
276 switch (log2_page_size)
284 memfd_flags = MFD_HUGETLB;
287 memfd_flags = MFD_HUGETLB | log2_page_size << MFD_HUGE_SHIFT;
300 fd = syscall (__NR_memfd_create, (
char *) s, memfd_flags);
303 if (fd == -1 && errno == EINVAL &&
334 uword pagesize = 1ULL << log2_page_sz;
351 base = (
void *) start - sys_page_sz;
352 base = mmap (base,
size + sys_page_sz, PROT_NONE,
353 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0);
354 return (base == MAP_FAILED) ? ~0 : start;
360 base = mmap (0,
size + pagesize, PROT_NONE,
361 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
363 if (base == MAP_FAILED)
367 p = base +
size + pagesize;
383 return (
uword) base + sys_page_sz;
396 mprotect (hdr, sys_page_sz, PROT_READ);
400 mprotect (hdr, sys_page_sz, PROT_NONE);
402 mprotect (
next, sys_page_sz, PROT_READ);
413 int mmap_flags = MAP_FIXED, is_huge = 0;
417 mmap_flags |= MAP_SHARED;
424 mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
429 switch (log2_page_sz)
438 mmap_flags |= MAP_HUGETLB;
443 mmap_flags |= MAP_HUGETLB;
456 if (base == (
void *) ~0)
459 base = mmap (base,
size, PROT_READ | PROT_WRITE, mmap_flags, fd,
offset);
461 if (base == MAP_FAILED)
464 if (is_huge && (mlock (base,
size) != 0))
470 hdr = mmap (base - sys_page_sz, sys_page_sz, PROT_READ | PROT_WRITE,
471 MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
473 if (hdr != base - sys_page_sz)
483 mprotect (mm->
last_map, sys_page_sz, PROT_READ | PROT_WRITE);
485 mprotect (mm->
last_map, sys_page_sz, PROT_NONE);
498 hdr->base_addr = (
uword) base;
499 hdr->log2_page_sz = log2_page_sz;
500 hdr->num_pages =
size >> log2_page_sz;
503 mprotect (hdr, sys_page_sz, PROT_NONE);
516 if (mprotect (hdr, sys_page_sz, PROT_READ | PROT_WRITE) != 0)
519 size = hdr->num_pages << hdr->log2_page_sz;
520 if (munmap ((
void *) hdr->base_addr,
size) != 0)
527 mprotect (hdr->next, sys_page_sz, PROT_READ | PROT_WRITE);
528 hdr->next->prev = hdr->prev;
529 mprotect (hdr->next, sys_page_sz, PROT_NONE);
536 mprotect (hdr->prev, sys_page_sz, PROT_READ | PROT_WRITE);
537 hdr->prev->next = hdr->next;
538 mprotect (hdr->prev, sys_page_sz, PROT_NONE);
545 if (munmap (hdr, sys_page_sz) != 0)
563 for (
i = 0;
i < n_pages;
i++)
564 ptr[
i] = start + (
i << log2_page_size);
567 stats->total = n_pages;
568 stats->log2_page_sz = log2_page_size;
570 if (syscall (__NR_move_pages, 0, n_pages, ptr, 0, status, 0) != 0)
572 stats->unknown = n_pages;
576 for (
i = 0;
i < n_pages;
i++)
581 stats->per_numa[status[
i]]++;
583 else if (status[
i] == -EFAULT)
599 int pagesize = sysconf (_SC_PAGESIZE);
606 if ((fd = open ((
char *)
"/proc/self/pagemap", O_RDONLY)) == -1)
609 for (
i = 0;
i < n_pages;
i++)
611 u64 seek, pagemap = 0;
613 seek = ((
u64) vaddr / pagesize) *
sizeof (
u64);
614 if (lseek (fd, seek, SEEK_SET) != seek)
617 if (read (fd, &pagemap,
sizeof (pagemap)) != (
sizeof (pagemap)))
620 if ((pagemap & (1ULL << 63)) == 0)
641 long unsigned int mask[16] = { 0 };
642 int mask_len =
sizeof (
mask) * 8 + 1;
658 mask[0] = 1 << numa_node;
660 if (syscall (__NR_set_mempolicy, force ? MPOL_BIND : MPOL_PREFERRED,
mask,
678 if (syscall (__NR_set_mempolicy, MPOL_DEFAULT, 0, 0))