18 #include <sys/types.h>
21 #include <sys/mount.h>
24 #include <linux/mempolicy.h>
25 #include <linux/memfd.h>
35 #ifndef F_LINUX_SPECIFIC_BASE
36 #define F_LINUX_SPECIFIC_BASE 1024
40 #define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
41 #define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
43 #define F_SEAL_SEAL 0x0001
44 #define F_SEAL_SHRINK 0x0002
45 #define F_SEAL_GROW 0x0004
46 #define F_SEAL_WRITE 0x0008
50 #define MFD_HUGETLB 0x0004U
53 #ifndef MAP_HUGE_SHIFT
54 #define MAP_HUGE_SHIFT 26
57 #ifndef MFD_HUGE_SHIFT
58 #define MFD_HUGE_SHIFT 26
61 #ifndef MAP_FIXED_NOREPLACE
62 #define MAP_FIXED_NOREPLACE 0x100000
85 if ((fp = fopen (
"/proc/meminfo",
"r")) == NULL)
88 while (fscanf (fp,
"%32s",
tmp) > 0)
89 if (strncmp (
"Hugepagesize:",
tmp, 13) == 0)
92 if (fscanf (fp,
"%u", &
size) > 0)
98 return log2_page_size;
113 page_size = sysconf (_SC_PAGESIZE);
117 if ((fd = syscall (__NR_memfd_create,
"test", MFD_HUGETLB)) != -1)
128 va = mmap (0, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE |
129 MAP_ANONYMOUS, -1, 0);
130 if (va == MAP_FAILED)
133 if (mlock (va, page_size))
139 if (syscall (__NR_move_pages, 0, 1, &va, &
i, &status, 0) == 0)
144 munmap (va, page_size);
150 struct stat st = { 0 };
151 if (fstat (fd, &st) == -1)
153 return st.st_blksize;
169 if (log2_page_size <= 12)
171 else if (log2_page_size > 12 && log2_page_size <= 16)
194 temp =
format (0,
"/tmp/hugepage_mount.XXXXXX%c", 0);
197 if ((mount_dir = mkdtemp ((
char *) temp)) == 0)
205 if (mount (
"none", mount_dir,
"hugetlbfs", 0, NULL))
208 rmdir ((
char *) mount_dir);
214 filename =
format (0,
"%s/%s%c", mount_dir,
name, 0);
216 if ((fd = open ((
char *) filename, O_CREAT | O_RDWR, 0755)) == -1)
222 umount2 ((
char *) mount_dir, MNT_DETACH);
223 rmdir ((
char *) mount_dir);
235 unsigned int memfd_flags;
244 switch (log2_page_size)
252 memfd_flags = MFD_HUGETLB;
255 memfd_flags = MFD_HUGETLB | log2_page_size << MFD_HUGE_SHIFT;
268 fd = syscall (__NR_memfd_create, (
char *) s, memfd_flags);
271 if (fd == -1 && errno == EINVAL &&
302 uword pagesize = 1ULL << log2_page_sz;
319 base = (
void *) start - sys_page_sz;
320 base = mmap (base,
size + sys_page_sz, PROT_NONE,
321 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0);
322 return (base == MAP_FAILED) ? ~0 : start;
328 base = mmap (0,
size + pagesize, PROT_NONE,
329 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
331 if (base == MAP_FAILED)
335 p = base +
size + pagesize;
351 return (
uword) base + sys_page_sz;
364 mprotect (hdr, sys_page_sz, PROT_READ);
368 mprotect (hdr, sys_page_sz, PROT_NONE);
370 mprotect (
next, sys_page_sz, PROT_READ);
381 int mmap_flags = MAP_FIXED, is_huge = 0;
385 mmap_flags |= MAP_SHARED;
392 mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
397 switch (log2_page_sz)
406 mmap_flags |= MAP_HUGETLB;
411 mmap_flags |= MAP_HUGETLB;
424 if (base == (
void *) ~0)
427 base = mmap (base,
size, PROT_READ | PROT_WRITE, mmap_flags, fd,
offset);
429 if (base == MAP_FAILED)
432 if (is_huge && (mlock (base,
size) != 0))
438 hdr = mmap (base - sys_page_sz, sys_page_sz, PROT_READ | PROT_WRITE,
439 MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
441 if (hdr != base - sys_page_sz)
451 mprotect (mm->
last_map, sys_page_sz, PROT_READ | PROT_WRITE);
453 mprotect (mm->
last_map, sys_page_sz, PROT_NONE);
466 hdr->base_addr = (
uword) base;
467 hdr->log2_page_sz = log2_page_sz;
468 hdr->num_pages =
size >> log2_page_sz;
471 mprotect (hdr, sys_page_sz, PROT_NONE);
485 if (mprotect (hdr, sys_page_sz, PROT_READ | PROT_WRITE) != 0)
488 size = hdr->num_pages << hdr->log2_page_sz;
489 if (munmap ((
void *) hdr->base_addr,
size) != 0)
494 mprotect (hdr->next, sys_page_sz, PROT_READ | PROT_WRITE);
495 hdr->next->prev = hdr->prev;
496 mprotect (hdr->next, sys_page_sz, PROT_NONE);
503 mprotect (hdr->prev, sys_page_sz, PROT_READ | PROT_WRITE);
504 hdr->prev->next = hdr->next;
505 mprotect (hdr->prev, sys_page_sz, PROT_NONE);
512 if (munmap (hdr, sys_page_sz) != 0)
533 for (
i = 0;
i < n_pages;
i++)
534 ptr[
i] = start + (
i << log2_page_size);
537 stats->total = n_pages;
538 stats->log2_page_sz = log2_page_size;
540 if (syscall (__NR_move_pages, 0, n_pages, ptr, 0, status, 0) != 0)
542 stats->unknown = n_pages;
546 for (
i = 0;
i < n_pages;
i++)
551 stats->per_numa[status[
i]]++;
553 else if (status[
i] == -EFAULT)
569 int pagesize = sysconf (_SC_PAGESIZE);
576 if ((fd = open ((
char *)
"/proc/self/pagemap", O_RDONLY)) == -1)
579 for (
i = 0;
i < n_pages;
i++)
581 u64 seek, pagemap = 0;
583 seek = ((
u64) vaddr / pagesize) *
sizeof (
u64);
584 if (lseek (fd, seek, SEEK_SET) != seek)
587 if (read (fd, &pagemap,
sizeof (pagemap)) != (
sizeof (pagemap)))
590 if ((pagemap & (1ULL << 63)) == 0)
611 long unsigned int mask[16] = { 0 };
612 int mask_len =
sizeof (
mask) * 8 + 1;
628 mask[0] = 1 << numa_node;
630 if (syscall (__NR_set_mempolicy, force ? MPOL_BIND : MPOL_PREFERRED,
mask,
648 if (syscall (__NR_set_mempolicy, MPOL_DEFAULT, 0, 0))