FD.io VPP  v16.06
Vector Packet Processing
svm.c
Go to the documentation of this file.
1 /*
2  *------------------------------------------------------------------
3  * svm.c - shared VM allocation, mmap(...MAP_FIXED...)
4  * library
5  *
6  * Copyright (c) 2009 Cisco and/or its affiliates.
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at:
10  *
11  * http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *------------------------------------------------------------------
19  */
20 
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #include <sys/stat.h>
26 #include <netinet/in.h>
27 #include <signal.h>
28 #include <pthread.h>
29 #include <unistd.h>
30 #include <time.h>
31 #include <fcntl.h>
32 #include <string.h>
33 #include <vppinfra/clib.h>
34 #include <vppinfra/vec.h>
35 #include <vppinfra/hash.h>
36 #include <vppinfra/bitmap.h>
37 #include <vppinfra/fifo.h>
38 #include <vppinfra/time.h>
39 #include <vppinfra/mheap.h>
40 #include <vppinfra/heap.h>
41 #include <vppinfra/pool.h>
42 #include <vppinfra/format.h>
43 
44 #include "svm.h"
45 
47 static int root_rp_refcount;
48 
49 #define MAXLOCK 2
50 static pthread_mutex_t *mutexes_held [MAXLOCK];
51 static int nheld;
52 
54 {
55  return root_rp;
56 }
57 
58 #define MUTEX_DEBUG
59 
60 static void region_lock(svm_region_t *rp, int tag)
61 {
62  pthread_mutex_lock(&rp->mutex);
63 #ifdef MUTEX_DEBUG
64  rp->mutex_owner_pid = getpid();
65  rp->mutex_owner_tag = tag;
66 #endif
67  ASSERT(nheld < MAXLOCK);
68  /*
69  * Keep score of held mutexes so we can try to exit
70  * cleanly if the world comes to an end at the worst possible
71  * moment
72  */
73  mutexes_held [nheld++] = &rp->mutex;
74 }
75 
76 static void region_unlock(svm_region_t *rp)
77 {
78  int i,j;
79 #ifdef MUTEX_DEBUG
80  rp->mutex_owner_pid = 0;
81  rp->mutex_owner_tag = 0;
82 #endif
83 
84  for (i = nheld-1; i >= 0; i--) {
85  if (mutexes_held[i] == &rp->mutex) {
86  for (j = i; j < MAXLOCK-1; j++)
87  mutexes_held[j] = mutexes_held[j+1];
88  nheld--;
89  goto found;
90  }
91  }
92  ASSERT(0);
93 
94 found:
96  pthread_mutex_unlock(&rp->mutex);
97 }
98 
99 
100 static u8 * format_svm_flags (u8 * s, va_list * args)
101 {
102  uword f = va_arg (*args, uword);
103 
104  if (f & SVM_FLAGS_MHEAP)
105  s = format (s, "MHEAP ");
106  if (f & SVM_FLAGS_FILE)
107  s = format (s, "FILE ");
108  if (f & SVM_FLAGS_NODATA)
109  s = format (s, "NODATA ");
110  if (f & SVM_FLAGS_NEED_DATA_INIT)
111  s = format (s, "INIT ");
112 
113  return (s);
114 }
115 
116 static u8 * format_svm_size (u8 * s, va_list * args)
117 {
118  uword size = va_arg (*args, uword);
119 
120  if (size >= (1<<20)) {
121  s = format (s, "(%d mb)", size >> 20);
122  } else if (size >= (1<<10)) {
123  s = format (s, "(%d kb)", size >> 10);
124  } else {
125  s = format (s, "(%d bytes)", size);
126  }
127  return (s);
128 }
129 
130 u8 * format_svm_region (u8 * s, va_list * args)
131 {
132  svm_region_t *rp = va_arg (*args, svm_region_t *);
133  int verbose = va_arg (*args, int);
134  int i;
135  uword lo, hi;
136 
137  s = format (s, "%s: base va 0x%x size 0x%x %U\n",
138  rp->region_name, rp->virtual_base,
140  s = format (s, " user_ctx 0x%x, bitmap_size %d\n",
141  rp->user_ctx, rp->bitmap_size);
142 
143  if (verbose) {
144  s = format (s, " flags: 0x%x %U\n", rp->flags,
145  format_svm_flags, rp->flags);
146  s = format (s,
147  " region_heap 0x%x data_base 0x%x data_heap 0x%x\n",
148  rp->region_heap, rp->data_base, rp->data_heap);
149  }
150 
151  s = format (s, " %d clients, pids: ",
152  vec_len(rp->client_pids));
153 
154  for (i = 0; i < vec_len(rp->client_pids); i++)
155  s = format (s, "%d ", rp->client_pids[i]);
156 
157  s = format (s, "\n");
158 
159  if (verbose) {
160  lo = hi = ~0;
161 
162  s = format (s, " VM in use: ");
163 
164  for (i = 0; i < rp->bitmap_size; i++) {
165  if (clib_bitmap_get_no_check (rp->bitmap, i) != 0) {
166  if (lo == ~0) {
167  hi = lo = rp->virtual_base + i*MMAP_PAGESIZE;
168  } else {
169  hi = rp->virtual_base + i*MMAP_PAGESIZE;
170  }
171  } else {
172  if (lo != ~0) {
173  hi = rp->virtual_base + i*MMAP_PAGESIZE -1;
174  s = format (s, " 0x%x - 0x%x (%dk)\n", lo, hi,
175  (hi - lo)>>10);
176  lo = hi = ~0;
177  }
178  }
179  }
180  s = format (s, " rgn heap stats: %U", format_mheap,
181  rp->region_heap, 0);
182  if ((rp->flags & SVM_FLAGS_MHEAP) && rp->data_heap) {
183  s = format (s, "\n data heap stats: %U", format_mheap,
184  rp->data_heap, 1);
185  }
186  s = format (s, "\n");
187  }
188 
189  return(s);
190 }
191 
192 /*
193  * rnd_pagesize
194  * Round to a pagesize multiple, presumably 4k works
195  */
196 static unsigned int rnd_pagesize(unsigned int size)
197 {
198  unsigned int rv;
199 
200  rv = (size + (MMAP_PAGESIZE-1)) & ~(MMAP_PAGESIZE-1);
201  return(rv);
202 }
203 
204 /*
205  * svm_data_region_setup
206  */
208  svm_region_t *rp)
209 {
210  int fd;
211  u8 junk = 0;
212  uword map_size;
213 
214  map_size = rp->virtual_size - (MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE);
215 
216  if (a->flags & SVM_FLAGS_FILE) {
217  struct stat statb;
218 
219  fd = open (a->backing_file, O_RDWR | O_CREAT, 0777);
220 
221  if (fd < 0) {
222  clib_unix_warning ("open");
223  return -1;
224  }
225 
226  if (fstat(fd, &statb) < 0) {
227  clib_unix_warning("fstat");
228  return -2;
229  }
230 
231  if (statb.st_mode & S_IFREG) {
232  if (statb.st_size == 0) {
233  lseek(fd, map_size, SEEK_SET);
234  if (write(fd, &junk, 1) != 1)
235  clib_unix_warning ("set region size");
236  } else {
237  map_size = rnd_pagesize (statb.st_size);
238  }
239  } else {
240  map_size = a->backing_mmap_size;
241  }
242 
243  ASSERT(map_size <= rp->virtual_size -
245 
246  if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE,
247  MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED) {
248  clib_unix_warning("mmap");
249  return -3;
250  }
251  close(fd);
252  rp->backing_file = (char *) format(0, "%s\0", a->backing_file);
253  rp->flags |= SVM_FLAGS_FILE;
254  }
255 
256  if (a->flags & SVM_FLAGS_MHEAP) {
257  rp->data_heap =
258  mheap_alloc_with_flags ((void *)(rp->data_base), map_size,
260  rp->flags |= SVM_FLAGS_MHEAP;
261  }
262  return 0;
263 }
264 
266  svm_region_t *rp)
267 {
268  int fd;
269  u8 junk = 0;
270  uword map_size;
271  struct stat statb;
272 
273  map_size = rp->virtual_size - (MMAP_PAGESIZE + SVM_PVT_MHEAP_SIZE);
274 
275  if (a->flags & SVM_FLAGS_FILE) {
276 
277  fd = open (a->backing_file, O_RDWR, 0777);
278 
279  if (fd < 0) {
280  clib_unix_warning ("open");
281  return -1;
282  }
283 
284  if (fstat(fd, &statb) < 0) {
285  clib_unix_warning("fstat");
286  return -2;
287  }
288 
289  if (statb.st_mode & S_IFREG) {
290  if (statb.st_size == 0) {
291  lseek(fd, map_size, SEEK_SET);
292  if (write(fd, &junk, 1) != 1)
293  clib_unix_warning ("set region size");
294  } else {
295  map_size = rnd_pagesize (statb.st_size);
296  }
297  } else {
298  map_size = a->backing_mmap_size;
299  }
300 
301  ASSERT(map_size <= rp->virtual_size
303 
304  if (mmap (rp->data_base, map_size, PROT_READ | PROT_WRITE,
305  MAP_SHARED | MAP_FIXED, fd, 0) == MAP_FAILED) {
306  clib_unix_warning("mmap");
307  return -3;
308  }
309  close(fd);
310  }
311  return 0;
312 }
313 
315 {
316  u8 *path;
317  u8 *shm_name;
318  u8 *split_point;
319  u8 *mkdir_arg = 0;
320  int root_path_offset = 0;
321  int name_offset = 0;
322 
323  if (a->root_path) {
324  /* Tolerate present or absent slashes */
325  if (a->root_path[0] == '/')
326  root_path_offset++;
327 
328  /* create the root_path under /dev/shm
329  iterate through path creating directories */
330 
331  path = format (0, "/dev/shm/%s%c", &a->root_path[root_path_offset], 0);
332  split_point = path+1;
333  vec_add1(mkdir_arg, '-');
334 
335  while (*split_point) {
336  while (*split_point && *split_point != '/') {
337  vec_add1 (mkdir_arg, *split_point);
338  split_point++;
339  }
340  vec_add1 (mkdir_arg, 0);
341 
342  /* ready to descend another level */
343  mkdir_arg[vec_len(mkdir_arg)-1] = '-';
344  split_point++;
345  }
346  vec_free(mkdir_arg);
347  vec_free(path);
348 
349  if (a->name[0] == '/')
350  name_offset = 1;
351 
352  shm_name = format (0, "/%s-%s%c", a->root_path,
353  &a->name[name_offset], 0);
354  }
355  else
356  shm_name = format (0, "%s%c", a->name, 0);
357  return (shm_name);
358 }
359 
360 /*
361  * svm_map_region
362  */
364 {
365  int svm_fd;
366  svm_region_t *rp;
367  pthread_mutexattr_t attr;
368  pthread_condattr_t cattr;
369  int deadman=0;
370  u8 junk = 0;
371  void *oldheap;
372  int overhead_space;
373  int rv;
374  uword data_base;
375  int nbits, words, bit;
376  int pid_holding_region_lock;
377  u8 *shm_name;
378  int dead_region_recovery = 0;
379  int time_left;
380  struct stat stat;
381  struct timespec ts, tsrem;
382 
383  if (CLIB_DEBUG > 1)
384  clib_warning ("[%d] map region %s", getpid(), a->name);
385 
386  ASSERT((a->size & ~(MMAP_PAGESIZE-1)) == a->size);
387  ASSERT(a->name);
388 
389  shm_name = shm_name_from_svm_map_region_args (a);
390 
391  svm_fd = shm_open((char *) shm_name, O_RDWR | O_CREAT | O_EXCL, 0777);
392 
393  if (svm_fd >= 0) {
394  if (fchmod (svm_fd, 0770) < 0)
395  clib_unix_warning ("segment chmod");
396  /* This turns out to fail harmlessly if the client starts first */
397  if (fchown (svm_fd, a->uid, a->gid) < 0)
398  clib_unix_warning ("segment chown [ok if client starts first]");
399 
400  vec_free(shm_name);
401 
402  lseek(svm_fd, a->size, SEEK_SET);
403  if (write(svm_fd, &junk, 1) != 1)
404  clib_warning ("set region size");
405 
406  rp = mmap((void *)a->baseva, a->size,
407  PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, svm_fd, 0);
408 
409  if (rp == (svm_region_t *) MAP_FAILED) {
410  clib_unix_warning ("mmap create");
411  return (0);
412  }
413  close(svm_fd);
414  memset(rp, 0, sizeof(*rp));
415 
416  if (pthread_mutexattr_init(&attr))
417  clib_unix_warning("mutexattr_init");
418 
419  if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED))
420  clib_unix_warning("mutexattr_setpshared");
421 
422  if (pthread_mutex_init(&rp->mutex, &attr))
423  clib_unix_warning("mutex_init");
424 
425  if (pthread_mutexattr_destroy(&attr))
426  clib_unix_warning("mutexattr_destroy");
427 
428  if (pthread_condattr_init(&cattr))
429  clib_unix_warning("condattr_init");
430 
431  if (pthread_condattr_setpshared(&cattr, PTHREAD_PROCESS_SHARED))
432  clib_unix_warning("condattr_setpshared");
433 
434  if (pthread_cond_init(&rp->condvar, &cattr))
435  clib_unix_warning("cond_init");
436 
437  if(pthread_condattr_destroy(&cattr))
438  clib_unix_warning("condattr_destroy");
439 
440  region_lock (rp, 1);
441 
442  rp->virtual_base = a->baseva;
443  rp->virtual_size = a->size;
444 
445  rp->region_heap =
449  oldheap = svm_push_pvt_heap(rp);
450 
451  rp->region_name = (char *)format (0, "%s%c", a->name, 0);
452  vec_add1(rp->client_pids, getpid());
453 
454  nbits = rp->virtual_size / MMAP_PAGESIZE;
455 
456  ASSERT (nbits > 0);
457  rp->bitmap_size = nbits;
458  words = (nbits + BITS(uword)-1) / BITS(uword);
459  vec_validate (rp->bitmap, words-1);
460 
461  overhead_space = MMAP_PAGESIZE /* header */ +
463 
464  bit = 0;
465  data_base = (uword)rp->virtual_base;
466 
467  if (a->flags & SVM_FLAGS_NODATA)
469 
470  do {
471  clib_bitmap_set_no_check (rp->bitmap, bit, 1);
472  bit++;
473  overhead_space -= MMAP_PAGESIZE;
474  data_base += MMAP_PAGESIZE;
475  } while (overhead_space > 0);
476 
477  rp->data_base = (void *)data_base;
478 
479  /*
480  * Note: although the POSIX spec guarantees that only one
481  * process enters this block, we have to play games
482  * to hold off clients until e.g. the mutex is ready
483  */
484  rp->version = SVM_VERSION;
485 
486  /* setup the data portion of the region */
487 
488  rv = svm_data_region_create (a, rp);
489  if (rv) {
490  clib_warning ("data_region_create: %d", rv);
491  }
492 
493  region_unlock(rp);
494 
495  svm_pop_heap(oldheap);
496 
497  return ((void *) rp);
498  } else {
499  svm_fd = shm_open((char *)shm_name, O_RDWR, 0777);
500 
501  vec_free(shm_name);
502 
503  if (svm_fd < 0) {
504  perror("svm_region_map(mmap open)");
505  return (0);
506  }
507 
508  time_left = 20;
509  while (1) {
510  if (0 != fstat(svm_fd, &stat)) {
511  clib_warning("fstat failed: %d", errno);
512  return (0);
513  }
514  if (stat.st_size > 0) {
515  break;
516  }
517  if (0 == time_left) {
518  clib_warning("waiting for resize of shm file timed out");
519  return (0);
520  }
521  ts.tv_sec = 0;
522  ts.tv_nsec = 100000000;
523  while (nanosleep(&ts, &tsrem) < 0)
524  ts = tsrem;
525  time_left--;
526  }
527 
528  rp = mmap(0, MMAP_PAGESIZE,
529  PROT_READ | PROT_WRITE, MAP_SHARED, svm_fd, 0);
530 
531  if (rp == (svm_region_t *) MAP_FAILED) {
532  close(svm_fd);
533  clib_warning("mmap");
534  return (0);
535  }
536  /*
537  * We lost the footrace to create this region; make sure
538  * the winner has crossed the finish line.
539  */
540  while (rp->version == 0 && deadman++ < 5) {
541  sleep(1);
542  }
543 
544  /*
545  * <bleep>-ed?
546  */
547  if (rp->version == 0) {
548  close(svm_fd);
549  munmap(rp, a->size);
550  clib_warning("rp->version %d not %d", rp->version,
551  SVM_VERSION);
552  return (0);
553  }
554  /* Remap now that the region has been placed */
555  a->baseva = rp->virtual_base;
556  a->size = rp->virtual_size;
557  munmap(rp, MMAP_PAGESIZE);
558 
559  rp = (void *) mmap ((void *)a->baseva, a->size,
560  PROT_READ | PROT_WRITE,
561  MAP_SHARED | MAP_FIXED, svm_fd, 0);
562  if ((uword)rp == (uword)MAP_FAILED) {
563  clib_unix_warning ("mmap");
564  return (0);
565  }
566 
567  if ((uword) rp != rp->virtual_base) {
568  clib_warning("mmap botch");
569  }
570 
571  /*
572  * Try to fix the region mutex if it is held by
573  * a dead process
574  */
575  pid_holding_region_lock = rp->mutex_owner_pid;
576  if (pid_holding_region_lock &&
577  kill (pid_holding_region_lock, 0) < 0) {
578  clib_warning (
579  "region %s mutex held by dead pid %d, tag %d, force unlock",
580  rp->region_name, pid_holding_region_lock, rp->mutex_owner_tag);
581  /* owner pid is nonexistent */
582  rp->mutex.__data.__owner = 0;
583  rp->mutex.__data.__lock = 0;
584  dead_region_recovery = 1;
585  }
586 
587  if (dead_region_recovery)
588  clib_warning ("recovery: attempt to re-lock region");
589 
590  region_lock(rp, 2);
591  oldheap = svm_push_pvt_heap (rp);
592  vec_add1(rp->client_pids, getpid());
593 
594  if (dead_region_recovery)
595  clib_warning ("recovery: attempt svm_data_region_map");
596 
597  rv = svm_data_region_map (a, rp);
598  if (rv) {
599  clib_warning ("data_region_map: %d", rv);
600  }
601 
602  if (dead_region_recovery)
603  clib_warning ("unlock and continue");
604 
605  region_unlock(rp);
606 
607  svm_pop_heap(oldheap);
608 
609  return ((void *) rp);
610 
611  }
612  return 0; /* NOTREACHED */
613 }
614 
615 static void svm_mutex_cleanup (void)
616 {
617  int i;
618  for (i = 0; i < nheld; i++) {
619  pthread_mutex_unlock (mutexes_held[i]);
620  }
621 }
622 
623 static void svm_region_init_internal (char *root_path, int uid, int gid)
624 {
625  svm_region_t *rp;
626  svm_map_region_args_t _a, *a=&_a;
627  u64 ticks = clib_cpu_time_now();
628  uword randomize_baseva;
629 
630  /* guard against klutz calls */
631  if (root_rp)
632  return;
633 
635 
636  atexit(svm_mutex_cleanup);
637 
638  /* Randomize the shared-VM base at init time */
639  if (MMAP_PAGESIZE <= (4<<10))
640  randomize_baseva = (ticks & 15) * MMAP_PAGESIZE;
641  else
642  randomize_baseva = (ticks & 3) * MMAP_PAGESIZE;
643 
644  memset (a, 0, sizeof (*a));
645  a->root_path = root_path;
647  a->baseva = SVM_GLOBAL_REGION_BASEVA + randomize_baseva;
649  a->flags = SVM_FLAGS_NODATA;
650  a->uid = uid;
651  a->gid = gid;
652 
653  rp = svm_map_region (a);
654  ASSERT(rp);
655 
656  region_lock(rp, 3);
657 
658  /* Set up the main region data structures */
659  if (rp->flags & SVM_FLAGS_NEED_DATA_INIT) {
660  svm_main_region_t *mp = 0;
661  void *oldheap;
662 
664 
665  oldheap = svm_push_pvt_heap (rp);
666  vec_validate (mp, 0);
667  mp->name_hash = hash_create_string (0, sizeof(uword));
668  mp->root_path = root_path
669  ? format (0, "%s%c", root_path, 0) : 0 ;
670  rp->data_base = mp;
671  svm_pop_heap (oldheap);
672  }
673  region_unlock(rp);
674  root_rp = rp;
675 }
676 
677 void svm_region_init (void)
678 {
679  svm_region_init_internal (0, 0 /* uid */, 0 /* gid */);
680 }
681 
682 void svm_region_init_chroot (char *root_path)
683 {
684  svm_region_init_internal (root_path, 0 /* uid */, 0 /* gid */);
685 }
686 
687 void svm_region_init_chroot_uid_gid (char *root_path, int uid, int gid)
688 {
689  svm_region_init_internal (root_path, uid, gid);
690 }
691 
693 {
694  svm_main_region_t *mp;
695  svm_region_t *rp;
696  uword need_nbits;
697  int index, i;
698  void *oldheap;
699  uword *p;
700  u8 *name;
701  svm_subregion_t *subp;
702 
703  ASSERT(root_rp);
704 
706  a->size = rnd_pagesize(a->size);
707 
708  region_lock (root_rp, 4);
709  oldheap = svm_push_pvt_heap(root_rp);
710  mp = root_rp->data_base;
711 
712  ASSERT(mp);
713 
714  /* Map the named region from the correct chroot environment */
715  a->root_path = (char *) mp->root_path;
716 
717  /*
718  * See if this region is already known. If it is, we're
719  * almost done...
720  */
721  p = hash_get_mem (mp->name_hash, a->name);
722 
723  if (p) {
724  rp = svm_map_region (a);
725  region_unlock(root_rp);
726  svm_pop_heap (oldheap);
727  return rp;
728  }
729 
730  /* Create the region. */
731  ASSERT((a->size & ~(MMAP_PAGESIZE-1)) == a->size);
732 
733  need_nbits = a->size / MMAP_PAGESIZE;
734 
735  index = 1; /* $$$ fixme, figure out how many bit to really skip */
736 
737  /*
738  * Scan the virtual space allocation bitmap, looking for a large
739  * enough chunk
740  */
741  do {
742  if (clib_bitmap_get_no_check(root_rp->bitmap, index) == 0) {
743  for (i = 0; i < (need_nbits-1); i++) {
744  if (clib_bitmap_get_no_check(root_rp->bitmap,
745  index+i) == 1) {
746  index = index + i;
747  goto next;
748  }
749  }
750  break;
751  }
752  index++;
753  next:;
754  } while (index < root_rp->bitmap_size);
755 
756  /* Completely out of VM? */
757  if (index >= root_rp->bitmap_size) {
758  clib_warning("region %s: not enough VM to allocate 0x%x",
759  root_rp->region_name, a->size);
760  svm_pop_heap (oldheap);
761  region_unlock (root_rp);
762  return 0;
763  }
764 
765  /*
766  * Mark virtual space allocated
767  */
768 #if CLIB_DEBUG > 1
769  clib_warning ("set %d bits at index %d", need_nbits, index);
770 #endif
771 
772  for (i = 0; i < need_nbits; i++) {
773  clib_bitmap_set_no_check (root_rp->bitmap, index+i, 1);
774  }
775 
776  /* Place this region where it goes... */
777  a->baseva = root_rp->virtual_base + index*MMAP_PAGESIZE;
778 
779  rp = svm_map_region (a);
780 
781  pool_get (mp->subregions, subp);
782  name = format (0, "%s%c", a->name, 0);
783  subp->subregion_name = name;
784 
785  hash_set_mem (mp->name_hash, name, subp - mp->subregions);
786 
787  svm_pop_heap (oldheap);
788 
789  region_unlock (root_rp);
790 
791  return (rp);
792 }
793 
794 /*
795  * svm_region_unmap
796  *
797  * Let go of the indicated region. If the calling process
798  * is the last customer, throw it away completely.
799  * The root region mutex guarantees atomicity with respect to
800  * a new region client showing up at the wrong moment.
801  */
802 void svm_region_unmap (void *rp_arg)
803 {
804  int i, mypid = getpid();
805  int nclients_left;
806  void *oldheap;
807  uword virtual_base, virtual_size;
808  svm_region_t *rp = rp_arg;
809  char *name;
810 
811  /*
812  * If we take a signal while holding one or more shared-memory
813  * mutexes, we may end up back here from an otherwise
814  * benign exit handler. Bail out to avoid a recursive
815  * mutex screw-up.
816  */
817  if (nheld)
818  return;
819 
820  ASSERT(rp);
821  ASSERT(root_rp);
822 
823  if (CLIB_DEBUG > 1)
824  clib_warning ("[%d] unmap region %s", getpid(), rp->region_name);
825 
826  region_lock (root_rp, 5);
827  region_lock (rp, 6);
828 
829  oldheap = svm_push_pvt_heap (rp); /* nb vec_delete() in the loop */
830 
831  /* Remove the caller from the list of mappers */
832  for (i = 0; i < vec_len(rp->client_pids); i++) {
833  if (rp->client_pids[i] == mypid) {
834  vec_delete (rp->client_pids, 1, i);
835  goto found;
836  }
837  }
838  clib_warning("pid %d AWOL", mypid);
839 
840  found:
841 
842  svm_pop_heap (oldheap);
843 
844  nclients_left = vec_len(rp->client_pids);
845  virtual_base = rp->virtual_base;
846  virtual_size = rp->virtual_size;
847 
848  if (nclients_left == 0) {
849  int index, nbits, i;
850  svm_main_region_t *mp;
851  uword *p;
852  svm_subregion_t *subp;
853 
854  /* Kill the region, last guy on his way out */
855 
856  oldheap = svm_push_pvt_heap (root_rp);
857  name = vec_dup (rp->region_name);
858 
859  virtual_base = rp->virtual_base;
860  virtual_size = rp->virtual_size;
861 
862  /* Figure out which bits to clear in the root region bitmap */
863  index = (virtual_base - root_rp->virtual_base)
864  / MMAP_PAGESIZE;
865 
866  nbits = (virtual_size + MMAP_PAGESIZE - 1)
867  / MMAP_PAGESIZE;
868 
869 #if CLIB_DEBUG > 1
870  clib_warning ("clear %d bits at index %d", nbits, index);
871 #endif
872  /* Give back the allocated VM */
873  for (i = 0; i < nbits; i++) {
874  clib_bitmap_set_no_check (root_rp->bitmap, index+i, 0);
875  }
876 
877  mp = root_rp->data_base;
878 
879  p = hash_get_mem (mp->name_hash, name);
880 
881  /* Better never happen ... */
882  if (p == NULL) {
883  region_unlock (rp);
884  region_unlock (root_rp);
885  svm_pop_heap (oldheap);
886  clib_warning ("Region name '%s' not found?", name);
887  return;
888  }
889 
890  /* Remove from the root region subregion pool */
891  subp = mp->subregions + p[0];
892  pool_put (mp->subregions, subp);
893 
894  hash_unset_mem (mp->name_hash, name);
895 
896  vec_free(name);
897 
898  region_unlock (rp);
899  shm_unlink(rp->region_name);
900  munmap ((void *)virtual_base, virtual_size);
901  region_unlock (root_rp);
902  svm_pop_heap (oldheap);
903  return;
904  }
905 
906  region_unlock(rp);
907  region_unlock(root_rp);
908 
909  munmap ((void *)virtual_base, virtual_size);
910 }
911 
912 /*
913  * svm_region_exit
914  * There is no clean way to unlink the
915  * root region when all clients go away,
916  * so remove the pid entry and call it a day.
917  */
919 {
920  void *oldheap;
921  int i, mypid = getpid();
922  uword virtual_base, virtual_size;
923 
924  /* It felt so nice we did it twice... */
925  if (root_rp == 0)
926  return;
927 
928  if (--root_rp_refcount > 0)
929  return;
930 
931  /*
932  * If we take a signal while holding one or more shared-memory
933  * mutexes, we may end up back here from an otherwise
934  * benign exit handler. Bail out to avoid a recursive
935  * mutex screw-up.
936  */
937  if (nheld)
938  return;
939 
940  region_lock(root_rp, 7);
941  oldheap = svm_push_pvt_heap (root_rp);
942 
943  virtual_base = root_rp->virtual_base;
944  virtual_size = root_rp->virtual_size;
945 
946  for (i = 0; i < vec_len(root_rp->client_pids); i++) {
947  if (root_rp->client_pids[i] == mypid) {
948  vec_delete (root_rp->client_pids, 1, i);
949  goto found;
950  }
951  }
952  clib_warning("pid %d AWOL", mypid);
953 
954  found:
955 
956  region_unlock(root_rp);
957  svm_pop_heap (oldheap);
958 
959  root_rp = 0;
960  munmap ((void *)virtual_base, virtual_size);
961 }
962 
964 {
965  int j;
966  int mypid = getpid();
967  void *oldheap;
968 
969  for (j = 0; j < vec_len(rp->client_pids); j++) {
970  if (mypid == rp->client_pids[j])
971  continue;
972  if (rp->client_pids[j] && (kill (rp->client_pids[j], 0) < 0)) {
973  clib_warning ("%s: cleanup ghost pid %d",
974  rp->region_name, rp->client_pids[j]);
975  /* nb: client vec in rp->region_heap */
976  oldheap = svm_push_pvt_heap (rp);
977  vec_delete (rp->client_pids, 1, j);
978  j--;
979  svm_pop_heap (oldheap);
980  }
981  }
982 }
983 
984 
985 /*
986  * Scan svm regions for dead clients
987  */
988 void svm_client_scan(char *root_path)
989 {
990  int i, j;
991  svm_main_region_t *mp;
994  svm_region_t *rp;
995  svm_subregion_t *subp;
996  u8 *name=0;
997  u8 ** svm_names=0;
998  void *oldheap;
999  int mypid = getpid();
1000 
1001  vec_validate (a, 0);
1002 
1003  svm_region_init_chroot(root_path);
1004 
1005  root_rp = svm_get_root_rp();
1006 
1007  pthread_mutex_lock (&root_rp->mutex);
1008 
1009  mp = root_rp->data_base;
1010 
1011  for (j = 0; j < vec_len (root_rp->client_pids); j++) {
1012  if (mypid == root_rp->client_pids[j])
1013  continue;
1014  if (root_rp->client_pids[j]
1015  && (kill (root_rp->client_pids[j], 0) < 0)) {
1016  clib_warning ("%s: cleanup ghost pid %d",
1017  root_rp->region_name, root_rp->client_pids[j]);
1018  /* nb: client vec in root_rp->region_heap */
1019  oldheap = svm_push_pvt_heap (root_rp);
1020  vec_delete (root_rp->client_pids, 1, j);
1021  j--;
1022  svm_pop_heap (oldheap);
1023  }
1024  }
1025 
1026  /*
1027  * Snapshoot names, can't hold root rp mutex across
1028  * find_or_create.
1029  */
1030  pool_foreach (subp, mp->subregions, ({
1031  name = vec_dup (subp->subregion_name);
1032  vec_add1(svm_names, name);
1033  }));
1034 
1035  pthread_mutex_unlock (&root_rp->mutex);
1036 
1037  for (i = 0; i < vec_len(svm_names); i++) {
1038  vec_validate(a, 0);
1039  a->root_path = root_path;
1040  a->name = (char *) svm_names[i];
1041  rp = svm_region_find_or_create (a);
1042  if (rp) {
1043  pthread_mutex_lock (&rp->mutex);
1044 
1046 
1047  pthread_mutex_unlock (&rp->mutex);
1048  svm_region_unmap (rp);
1049  vec_free(svm_names[i]);
1050  }
1051  vec_free (a);
1052  }
1053  vec_free(svm_names);
1054 
1055  svm_region_exit ();
1056 
1057  vec_free (a);
1058 }
char * root_path
Definition: svm.h:70
#define vec_validate(V, I)
Make sure vector is long enough for given index (no header, unspecified alignment) ...
Definition: vec.h:394
u8 * root_path
Definition: svm.h:107
vmrglw vmrglh hi
svm_region_t * svm_get_root_rp(void)
Definition: svm.c:53
sll srl srl sll sra u16x4 i
Definition: vector_sse2.h:267
static int nheld
Definition: svm.c:51
static void svm_pop_heap(void *oldheap)
Definition: svm.h:179
a
Definition: bitmap.h:393
void * svm_map_region(svm_map_region_args_t *a)
Definition: svm.c:363
#define NULL
Definition: clib.h:55
#define vec_add1(V, E)
Add 1 element to end of vector (unspecified alignment).
Definition: vec.h:480
uword virtual_base
Definition: svm.h:46
#define hash_set_mem(h, key, value)
Definition: hash.h:257
#define pool_get(P, E)
Definition: pool.h:186
static u8 * format_svm_size(u8 *s, va_list *args)
Definition: svm.c:116
u8 * format_mheap(u8 *s, va_list *va)
Definition: mheap.c:1113
#define SVM_FLAGS_MHEAP
Definition: svm.h:32
void svm_region_init(void)
Definition: svm.c:677
#define pool_foreach(VAR, POOL, BODY)
Definition: pool.h:328
uword * client_pids
Definition: svm.h:58
#define MHEAP_FLAG_DISABLE_VM
static void svm_region_init_internal(char *root_path, int uid, int gid)
Definition: svm.c:623
void * svm_region_find_or_create(svm_map_region_args_t *a)
Definition: svm.c:692
volatile void * user_ctx
Definition: svm.h:51
char * name
Definition: svm.h:71
#define clib_warning(format, args...)
Definition: error.h:59
unsigned long u64
Definition: types.h:89
#define SVM_FLAGS_NEED_DATA_INIT
Definition: svm.h:35
#define SVM_GLOBAL_REGION_BASEVA
Definition: svm.h:88
pthread_cond_t condvar
Definition: svm.h:42
u8 * format_svm_region(u8 *s, va_list *args)
Definition: svm.c:130
#define hash_create_string(elts, value_bytes)
Definition: hash.h:609
#define SVM_FLAGS_NODATA
Definition: svm.h:34
void * data_base
Definition: svm.h:49
#define hash_unset_mem(h, key)
Definition: hash.h:263
always_inline uword clib_bitmap_set_no_check(uword *a, uword i, uword new_value)
Definition: bitmap.h:112
u8 * subregion_name
Definition: svm.h:101
uword * name_hash
Definition: svm.h:106
#define MAXLOCK
Definition: svm.c:49
#define pool_put(P, E)
Definition: pool.h:200
#define vec_dup(V)
Return copy of vector (no header, no alignment)
Definition: vec.h:332
svm_subregion_t * subregions
Definition: svm.h:105
char * backing_file
Definition: svm.h:56
uword virtual_size
Definition: svm.h:47
void svm_region_exit()
Definition: svm.c:918
char * backing_file
Definition: svm.h:75
char * region_name
Definition: svm.h:55
static void * svm_push_pvt_heap(svm_region_t *rp)
Definition: svm.h:165
static int root_rp_refcount
Definition: svm.c:47
#define vec_free(V)
Free vector&#39;s memory (no header).
Definition: vec.h:298
#define clib_unix_warning(format, args...)
Definition: error.h:68
static pthread_mutex_t * mutexes_held[MAXLOCK]
Definition: svm.c:50
u8 * shm_name_from_svm_map_region_args(svm_map_region_args_t *a)
Definition: svm.c:314
uword bitmap_size
Definition: svm.h:53
void svm_region_init_chroot_uid_gid(char *root_path, int uid, int gid)
Definition: svm.c:687
static int svm_data_region_map(svm_map_region_args_t *a, svm_region_t *rp)
Definition: svm.c:265
void * mheap_alloc_with_flags(void *memory, uword memory_size, uword flags)
Definition: mheap.c:842
#define SVM_FLAGS_FILE
Definition: svm.h:33
#define ASSERT(truth)
void svm_region_init_chroot(char *root_path)
Definition: svm.c:682
#define vec_delete(V, N, M)
Delete N elements starting at element M.
Definition: vec.h:743
static void region_lock(svm_region_t *rp, int tag)
Definition: svm.c:60
u8 * format(u8 *s, char *fmt,...)
Definition: format.c:405
u32 size
Definition: vhost-user.h:74
volatile uword version
Definition: svm.h:40
static void region_unlock(svm_region_t *rp)
Definition: svm.c:76
int mutex_owner_tag
Definition: svm.h:44
#define SVM_GLOBAL_REGION_SIZE
Definition: svm.h:89
#define SVM_VERSION
Definition: svm.h:30
#define MMAP_PAGESIZE
Definition: ssvm.h:41
u64 uword
Definition: types.h:112
#define SVM_GLOBAL_REGION_NAME
Definition: svm.h:90
always_inline uword clib_bitmap_get_no_check(uword *ai, uword i)
Definition: bitmap.h:170
uword backing_mmap_size
Definition: svm.h:76
static unsigned int rnd_pagesize(unsigned int size)
Definition: svm.c:196
#define SVM_PVT_MHEAP_SIZE
Definition: svm.h:37
#define vec_len(v)
Number of elements in vector (rvalue-only, NULL tolerant)
unsigned char u8
Definition: types.h:56
void svm_region_unmap(void *rp_arg)
Definition: svm.c:802
void svm_client_scan_this_region_nolock(svm_region_t *rp)
Definition: svm.c:963
#define hash_get_mem(h, key)
Definition: hash.h:251
void * region_heap
Definition: svm.h:48
uword * bitmap
Definition: svm.h:54
#define CLIB_MEMORY_BARRIER()
Definition: clib.h:101
void * data_heap
Definition: svm.h:50
static int svm_data_region_create(svm_map_region_args_t *a, svm_region_t *rp)
Definition: svm.c:207
static u8 * format_svm_flags(u8 *s, va_list *args)
Definition: svm.c:100
void svm_client_scan(char *root_path)
Definition: svm.c:988
always_inline u64 clib_cpu_time_now(void)
Definition: time.h:71
int mutex_owner_pid
Definition: svm.h:43
#define BITS(x)
Definition: clib.h:58
uword flags
Definition: svm.h:45
static void svm_mutex_cleanup(void)
Definition: svm.c:615
uword baseva
Definition: svm.h:72
pthread_mutex_t mutex
Definition: svm.h:41
CLIB vectors are ubiquitous dynamically resized arrays with by user defined "headers".
static svm_region_t * root_rp
Definition: svm.c:46