FD.io VPP  v20.09-64-g4f7b92f0a
Vector Packet Processing
mem.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017 Cisco and/or its affiliates.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at:
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #define _GNU_SOURCE
17 #include <stdlib.h>
18 #include <sys/types.h>
19 #include <sys/stat.h>
20 #include <unistd.h>
21 #include <sys/mount.h>
22 #include <sys/mman.h>
23 #include <fcntl.h>
24 #include <linux/mempolicy.h>
25 #include <linux/memfd.h>
26 
27 #include <vppinfra/clib.h>
28 #include <vppinfra/mem.h>
29 #include <vppinfra/time.h>
30 #include <vppinfra/format.h>
31 #include <vppinfra/clib_error.h>
32 #include <vppinfra/linux/syscall.h>
33 #include <vppinfra/linux/sysfs.h>
34 
35 #ifndef F_LINUX_SPECIFIC_BASE
36 #define F_LINUX_SPECIFIC_BASE 1024
37 #endif
38 
39 #ifndef F_ADD_SEALS
40 #define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
41 #define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
42 
43 #define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
44 #define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
45 #define F_SEAL_GROW 0x0004 /* prevent file from growing */
46 #define F_SEAL_WRITE 0x0008 /* prevent writes */
47 #endif
48 
49 
50 uword
52 {
53  return getpagesize ();
54 }
55 
56 uword
58 {
59  unformat_input_t input;
60  static u32 size = 0;
61  int fd;
62 
63  if (size)
64  goto done;
65 
66  /*
67  * If the kernel doesn't support hugepages, /proc/meminfo won't
68  * say anything about it. Use the regular page size as a default.
69  */
70  size = clib_mem_get_page_size () / 1024;
71 
72  if ((fd = open ("/proc/meminfo", 0)) == -1)
73  return 0;
74 
75  unformat_init_clib_file (&input, fd);
76 
78  {
79  if (unformat (&input, "Hugepagesize:%_%u kB", &size))
80  ;
81  else
82  unformat_skip_line (&input);
83  }
84  unformat_free (&input);
85  close (fd);
86 done:
87  return 1024ULL * size;
88 }
89 
90 u64
92 {
93  struct stat st = { 0 };
94  if (fstat (fd, &st) == -1)
95  return 0;
96  return st.st_blksize;
97 }
98 
99 int
101 {
102  return min_log2 (clib_mem_get_fd_page_size (fd));
103 }
104 
105 void
106 clib_mem_vm_randomize_va (uword * requested_va, u32 log2_page_size)
107 {
108  u8 bit_mask = 15;
109 
110  if (log2_page_size <= 12)
111  bit_mask = 15;
112  else if (log2_page_size > 12 && log2_page_size <= 16)
113  bit_mask = 3;
114  else
115  bit_mask = 0;
116 
117  *requested_va +=
118  (clib_cpu_time_now () & bit_mask) * (1ull << log2_page_size);
119 }
120 
121 #ifndef MFD_HUGETLB
122 #define MFD_HUGETLB 0x0004U
123 #endif
124 
125 clib_error_t *
126 clib_mem_create_fd (char *name, int *fdp)
127 {
128  int fd;
129 
130  ASSERT (name);
131 
132  if ((fd = memfd_create (name, MFD_ALLOW_SEALING)) == -1)
133  return clib_error_return_unix (0, "memfd_create");
134 
135  if ((fcntl (fd, F_ADD_SEALS, F_SEAL_SHRINK)) == -1)
136  {
137  close (fd);
138  return clib_error_return_unix (0, "fcntl (F_ADD_SEALS)");
139  }
140 
141  *fdp = fd;
142  return 0;
143 }
144 
145 clib_error_t *
147 {
148  clib_error_t *err = 0;
149  int fd = -1;
150  static int memfd_hugetlb_supported = 1;
151  char *mount_dir;
152  char template[] = "/tmp/hugepage_mount.XXXXXX";
153  u8 *filename;
154 
155  ASSERT (name);
156 
157  if (memfd_hugetlb_supported)
158  {
159  if ((fd = memfd_create (name, MFD_HUGETLB)) != -1)
160  goto done;
161 
162  /* avoid further tries if memfd MFD_HUGETLB is not supported */
163  if (errno == EINVAL && strnlen (name, 256) <= 249)
164  memfd_hugetlb_supported = 0;
165  }
166 
167  mount_dir = mkdtemp (template);
168  if (mount_dir == 0)
169  return clib_error_return_unix (0, "mkdtemp \'%s\'", template);
170 
171  if (mount ("none", (char *) mount_dir, "hugetlbfs", 0, NULL))
172  {
173  rmdir ((char *) mount_dir);
174  err = clib_error_return_unix (0, "mount hugetlb directory '%s'",
175  mount_dir);
176  }
177 
178  filename = format (0, "%s/%s%c", mount_dir, name, 0);
179  fd = open ((char *) filename, O_CREAT | O_RDWR, 0755);
180  umount2 ((char *) mount_dir, MNT_DETACH);
181  rmdir ((char *) mount_dir);
182 
183  if (fd == -1)
184  err = clib_error_return_unix (0, "open");
185 
186 done:
187  if (fd != -1)
188  fdp[0] = fd;
189  return err;
190 }
191 
192 clib_error_t *
194 {
195  int fd = -1;
196  clib_error_t *err = 0;
197  void *addr = 0;
198  u8 *filename = 0;
199  int mmap_flags = 0;
200  int log2_page_size;
201  int n_pages;
202  int old_mpol = -1;
203  long unsigned int old_mask[16] = { 0 };
204 
205  /* save old numa mem policy if needed */
207  {
208  int rv;
209  rv = get_mempolicy (&old_mpol, old_mask, sizeof (old_mask) * 8 + 1,
210  0, 0);
211 
212  if (rv == -1)
213  {
214  if (a->numa_node != 0 && (a->flags & CLIB_MEM_VM_F_NUMA_FORCE) != 0)
215  {
216  err = clib_error_return_unix (0, "get_mempolicy");
217  goto error;
218  }
219  else
220  old_mpol = -1;
221  }
222  }
223 
224  if (a->flags & CLIB_MEM_VM_F_LOCKED)
225  mmap_flags |= MAP_LOCKED;
226 
227  /* if we are creating shared segment, we need file descriptor */
228  if (a->flags & CLIB_MEM_VM_F_SHARED)
229  {
230  mmap_flags |= MAP_SHARED;
231  /* if hugepages are needed we need to create mount point */
232  if (a->flags & CLIB_MEM_VM_F_HUGETLB)
233  {
234  if ((err = clib_mem_create_hugetlb_fd (a->name, &fd)))
235  goto error;
236 
237  mmap_flags |= MAP_LOCKED;
238  }
239  else
240  {
241  if ((err = clib_mem_create_fd (a->name, &fd)))
242  goto error;
243  }
244 
245  log2_page_size = clib_mem_get_fd_log2_page_size (fd);
246  if (log2_page_size == 0)
247  {
248  err = clib_error_return_unix (0, "cannot determine page size");
249  goto error;
250  }
251 
252  if (a->requested_va)
253  {
254  clib_mem_vm_randomize_va (&a->requested_va, log2_page_size);
255  mmap_flags |= MAP_FIXED;
256  }
257  }
258  else /* not CLIB_MEM_VM_F_SHARED */
259  {
260  mmap_flags |= MAP_PRIVATE | MAP_ANONYMOUS;
261  if (a->flags & CLIB_MEM_VM_F_HUGETLB)
262  {
263  mmap_flags |= MAP_HUGETLB;
264  log2_page_size = 21;
265  }
266  else
267  {
268  log2_page_size = min_log2 (sysconf (_SC_PAGESIZE));
269  }
270  }
271 
272  n_pages = ((a->size - 1) >> log2_page_size) + 1;
273 
275  {
276  err = clib_sysfs_prealloc_hugepages (a->numa_node, log2_page_size,
277  n_pages);
278  if (err)
279  goto error;
280 
281  }
282 
283  if (fd != -1)
284  if ((ftruncate (fd, (u64) n_pages * (1 << log2_page_size))) == -1)
285  {
286  err = clib_error_return_unix (0, "ftruncate");
287  goto error;
288  }
289 
290  if (old_mpol != -1)
291  {
292  int rv;
293  long unsigned int mask[16] = { 0 };
294  mask[0] = 1 << a->numa_node;
295  rv = set_mempolicy (MPOL_BIND, mask, sizeof (mask) * 8 + 1);
296  if (rv == -1 && a->numa_node != 0 &&
297  (a->flags & CLIB_MEM_VM_F_NUMA_FORCE) != 0)
298  {
299  err = clib_error_return_unix (0, "set_mempolicy");
300  goto error;
301  }
302  }
303 
304  addr = mmap (uword_to_pointer (a->requested_va, void *), a->size,
305  (PROT_READ | PROT_WRITE), mmap_flags, fd, 0);
306  if (addr == MAP_FAILED)
307  {
308  err = clib_error_return_unix (0, "mmap");
309  goto error;
310  }
311 
312  /* re-apply old numa memory policy */
313  if (old_mpol != -1 &&
314  set_mempolicy (old_mpol, old_mask, sizeof (old_mask) * 8 + 1) == -1)
315  {
316  err = clib_error_return_unix (0, "set_mempolicy");
317  goto error;
318  }
319 
320  a->log2_page_size = log2_page_size;
321  a->n_pages = n_pages;
322  a->addr = addr;
323  a->fd = fd;
324  CLIB_MEM_UNPOISON (addr, a->size);
325  goto done;
326 
327 error:
328  if (fd != -1)
329  close (fd);
330 
331 done:
332  vec_free (filename);
333  return err;
334 }
335 
336 void
338 {
339  if (a != 0)
340  {
341  clib_mem_vm_free (a->addr, 1ull << a->log2_page_size);
342  if (a->fd != -1)
343  close (a->fd);
344  }
345 }
346 
347 uword
348 clib_mem_vm_reserve (uword start, uword size, u32 log2_page_sz)
349 {
350  uword off, pagesize = 1ULL << log2_page_sz;
351  int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS;
352  u8 *p;
353 
354  if (start)
355  mmap_flags |= MAP_FIXED;
356 
357  size = round_pow2 (size, pagesize);
358 
359  p = uword_to_pointer (start, void *);
360  p = mmap (p, size + pagesize, PROT_NONE, mmap_flags, -1, 0);
361 
362  if (p == MAP_FAILED)
363  return ~0;
364 
365  off = round_pow2 ((uword) p, pagesize) - (uword) p;
366 
367  /* trim start and end of reservation to be page aligned */
368  if (off)
369  {
370  munmap (p, off);
371  p += off;
372  }
373 
374  munmap (p + size, pagesize - off);
375 
376  return (uword) p;
377 }
378 
379 u64 *
380 clib_mem_vm_get_paddr (void *mem, int log2_page_size, int n_pages)
381 {
382  int pagesize = sysconf (_SC_PAGESIZE);
383  int fd;
384  int i;
385  u64 *r = 0;
386 
387  if ((fd = open ((char *) "/proc/self/pagemap", O_RDONLY)) == -1)
388  return 0;
389 
390  for (i = 0; i < n_pages; i++)
391  {
392  u64 seek, pagemap = 0;
393  uword vaddr = pointer_to_uword (mem) + (((u64) i) << log2_page_size);
394  seek = ((u64) vaddr / pagesize) * sizeof (u64);
395  if (lseek (fd, seek, SEEK_SET) != seek)
396  goto done;
397 
398  if (read (fd, &pagemap, sizeof (pagemap)) != (sizeof (pagemap)))
399  goto done;
400 
401  if ((pagemap & (1ULL << 63)) == 0)
402  goto done;
403 
404  pagemap &= pow2_mask (55);
405  vec_add1 (r, pagemap * pagesize);
406  }
407 
408 done:
409  close (fd);
410  if (vec_len (r) != n_pages)
411  {
412  vec_free (r);
413  return 0;
414  }
415  return r;
416 }
417 
418 clib_error_t *
420 {
421  long unsigned int old_mask[16] = { 0 };
422  int mmap_flags = MAP_SHARED;
423  clib_error_t *err = 0;
424  int old_mpol = -1;
425  void *addr;
426  int rv;
427 
428  if (a->numa_node)
429  {
430  rv = get_mempolicy (&old_mpol, old_mask, sizeof (old_mask) * 8 + 1, 0,
431  0);
432 
433  if (rv == -1)
434  {
435  err = clib_error_return_unix (0, "get_mempolicy");
436  goto done;
437  }
438  }
439 
440  if (a->requested_va)
441  mmap_flags |= MAP_FIXED;
442 
443  if (old_mpol != -1)
444  {
445  long unsigned int mask[16] = { 0 };
446  mask[0] = 1 << a->numa_node;
447  rv = set_mempolicy (MPOL_BIND, mask, sizeof (mask) * 8 + 1);
448  if (rv == -1)
449  {
450  err = clib_error_return_unix (0, "set_mempolicy");
451  goto done;
452  }
453  }
454 
455  addr = (void *) mmap (uword_to_pointer (a->requested_va, void *), a->size,
456  PROT_READ | PROT_WRITE, mmap_flags, a->fd, 0);
457 
458  if (addr == MAP_FAILED)
459  return clib_error_return_unix (0, "mmap");
460 
461  /* re-apply old numa memory policy */
462  if (old_mpol != -1 &&
463  set_mempolicy (old_mpol, old_mask, sizeof (old_mask) * 8 + 1) == -1)
464  {
465  err = clib_error_return_unix (0, "set_mempolicy");
466  goto done;
467  }
468 
469  a->addr = addr;
470  CLIB_MEM_UNPOISON (addr, a->size);
471 
472 done:
473  return err;
474 }
475 
476 /*
477  * fd.io coding-style-patch-verification: ON
478  *
479  * Local Variables:
480  * eval: (c-set-style "gnu")
481  * End:
482  */
void clib_mem_vm_ext_free(clib_mem_vm_alloc_t *a)
Definition: mem.c:337
#define CLIB_MEM_VM_F_HUGETLB
Definition: mem.h:380
#define CLIB_MEM_UNPOISON(a, s)
Definition: sanitizer.h:47
clib_error_t * clib_sysfs_prealloc_hugepages(int numa_node, int log2_page_size, int nr)
Definition: sysfs.c:239
a
Definition: bitmap.h:538
#define CLIB_MEM_VM_F_NUMA_PREFER
Definition: mem.h:381
unsigned long u64
Definition: types.h:89
#define F_ADD_SEALS
Definition: mem.c:40
void * addr
Pointer to allocated memory, set on successful allocation.
Definition: mem.h:399
uword requested_va
Request fixed position mapping.
Definition: mem.h:403
#define EINVAL
Definition: string.h:93
static int memfd_create(const char *name, unsigned int flags)
Definition: syscall.h:52
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
Definition: vec.h:592
static u64 clib_cpu_time_now(void)
Definition: time.h:81
int numa_node
numa node preference.
Definition: mem.h:398
u8 * format(u8 *s, const char *fmt,...)
Definition: format.c:424
u16 mask
Definition: flow_types.api:52
vhost_vring_addr_t addr
Definition: vhost_user.h:111
unsigned char u8
Definition: types.h:56
void unformat_init_clib_file(unformat_input_t *input, int file_descriptor)
Definition: unformat.c:1064
static uword min_log2(uword x)
Definition: clib.h:161
#define MFD_ALLOW_SEALING
Definition: main.c:104
clib_error_t * clib_mem_vm_ext_map(clib_mem_vm_map_t *a)
Definition: mem.c:419
static void unformat_skip_line(unformat_input_t *i)
Definition: format.h:222
clib_error_t * clib_mem_vm_ext_alloc(clib_mem_vm_alloc_t *a)
Definition: mem.c:193
uword requested_va
Request fixed position mapping.
Definition: mem.h:422
static long set_mempolicy(int mode, const unsigned long *nodemask, unsigned long maxnode)
Definition: syscall.h:31
static uword pow2_mask(uword x)
Definition: clib.h:237
char * name
Name for memory allocation, set by caller.
Definition: mem.h:396
uword size
Allocation size, set by caller.
Definition: mem.h:397
unsigned int u32
Definition: types.h:88
#define F_SEAL_SHRINK
Definition: mem.c:44
#define CLIB_MEM_VM_F_SHARED
Definition: mem.h:379
int fd
File descriptor, set on successful allocation if CLIB_MEM_VM_F_SHARED is set.
Definition: mem.h:400
struct _unformat_input_t unformat_input_t
uword clib_mem_get_page_size(void)
Definition: mem.c:51
#define clib_error_return_unix(e, args...)
Definition: error.h:102
u32 size
Definition: vhost_user.h:106
#define CLIB_MEM_VM_F_NUMA_FORCE
Definition: mem.h:382
uword clib_mem_vm_reserve(uword start, uword size, u32 log2_page_sz)
Definition: mem.c:348
static int get_mempolicy(int *mode, unsigned long *nodemask, unsigned long maxnode, void *addr, unsigned long flags)
Definition: syscall.h:37
int clib_mem_get_fd_log2_page_size(int fd)
Definition: mem.c:100
u64 clib_mem_get_fd_page_size(int fd)
Definition: mem.c:91
#define UNFORMAT_END_OF_INPUT
Definition: format.h:145
sll srl srl sll sra u16x4 i
Definition: vector_sse42.h:317
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:380
static uword round_pow2(uword x, uword pow2)
Definition: clib.h:264
string name[64]
Definition: ip.api:44
u32 flags
vm allocation flags: CLIB_MEM_VM_F_SHARED: request shared memory, file descriptor will be provided ...
Definition: mem.h:385
#define CLIB_MEM_VM_F_HUGETLB_PREALLOC
Definition: mem.h:383
#define uword_to_pointer(u, type)
Definition: types.h:136
#define ASSERT(truth)
#define CLIB_MEM_VM_F_LOCKED
Definition: mem.h:384
uword size
Map size.
Definition: mem.h:420
uword clib_mem_get_default_hugepage_size(void)
Definition: mem.c:57
static uword pointer_to_uword(const void *p)
Definition: types.h:131
clib_error_t * clib_mem_create_hugetlb_fd(char *name, int *fdp)
Definition: mem.c:146
clib_error_t * clib_mem_create_fd(char *name, int *fdp)
Definition: mem.c:126
static void clib_mem_vm_free(void *addr, uword size)
Definition: mem.h:338
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
u64 uword
Definition: types.h:112
static void unformat_free(unformat_input_t *i)
Definition: format.h:163
u64 * clib_mem_vm_get_paddr(void *mem, int log2_page_size, int n_pages)
Definition: mem.c:380
void clib_mem_vm_randomize_va(uword *requested_va, u32 log2_page_size)
Definition: mem.c:106
void * mem
int fd
File descriptor to be mapped.
Definition: mem.h:421
int log2_page_size
Definition: mem.h:401
void * addr
Pointer to mapped memory, if successful.
Definition: mem.h:423
uword unformat(unformat_input_t *i, const char *fmt,...)
Definition: unformat.c:978
static uword unformat_check_input(unformat_input_t *i)
Definition: format.h:171