1 /**
2  * Copyright (C) Mellanox Technologies Ltd. 2001-2015.  ALL RIGHTS RESERVED.
3  *
4  * See file LICENSE for terms.
5  */
6 
7 #ifdef HAVE_CONFIG_H
8 #  include "config.h"
9 #endif
10 
11 #include "event.h"
12 
13 #include <ucm/mmap/mmap.h>
14 #include <ucm/malloc/malloc_hook.h>
15 #include <ucm/util/sys.h>
16 #include <ucs/arch/cpu.h>
17 #include <ucs/datastruct/khash.h>
18 #include <ucs/sys/compiler.h>
19 #include <ucs/sys/module.h>
20 #include <ucs/type/init_once.h>
21 #include <ucs/type/spinlock.h>
22 
23 #include <sys/mman.h>
24 #include <pthread.h>
25 #include <sys/shm.h>
26 #include <sys/ipc.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <errno.h>
30 
31 
32 UCS_LIST_HEAD(ucm_event_installer_list);
33 
34 static ucs_recursive_spinlock_t ucm_kh_lock;
35 #define ucm_ptr_hash(_ptr)  kh_int64_hash_func((uintptr_t)(_ptr))
36 KHASH_INIT(ucm_ptr_size, const void*, size_t, 1, ucm_ptr_hash, kh_int64_hash_equal)
37 
38 static pthread_rwlock_t ucm_event_lock = PTHREAD_RWLOCK_INITIALIZER;
39 static ucs_list_link_t ucm_event_handlers;
40 static int ucm_external_events = 0;
41 static khash_t(ucm_ptr_size) ucm_shmat_ptrs;
42 
ucm_shm_size(int shmid)43 static size_t ucm_shm_size(int shmid)
44 {
45     struct shmid_ds ds;
46     int ret;
47 
48     ret = shmctl(shmid, IPC_STAT, &ds);
49     if (ret < 0) {
50         return 0;
51     }
52 
53     return ds.shm_segsz;
54 }
55 
ucm_event_call_orig(ucm_event_type_t event_type,ucm_event_t * event,void * arg)56 static void ucm_event_call_orig(ucm_event_type_t event_type, ucm_event_t *event,
57                                 void *arg)
58 {
59     switch (event_type) {
60     case UCM_EVENT_MMAP:
61         if (event->mmap.result == MAP_FAILED) {
62             event->mmap.result = ucm_orig_mmap(event->mmap.address,
63                                                event->mmap.size,
64                                                event->mmap.prot,
65                                                event->mmap.flags,
66                                                event->mmap.fd,
67                                                event->mmap.offset);
68         }
69         break;
70     case UCM_EVENT_MUNMAP:
71         if (event->munmap.result == -1) {
72             event->munmap.result = ucm_orig_munmap(event->munmap.address,
73                                                    event->munmap.size);
74         }
75         break;
76     case UCM_EVENT_MREMAP:
77         if (event->mremap.result == MAP_FAILED) {
78             event->mremap.result = ucm_orig_mremap(event->mremap.address,
79                                                    event->mremap.old_size,
80                                                    event->mremap.new_size,
81                                                    event->mremap.flags);
82         }
83         break;
84     case UCM_EVENT_SHMAT:
85         if (event->shmat.result == MAP_FAILED) {
86             event->shmat.result = ucm_orig_shmat(event->shmat.shmid,
87                                                  event->shmat.shmaddr,
88                                                  event->shmat.shmflg);
89         }
90         break;
91     case UCM_EVENT_SHMDT:
92         if (event->shmdt.result == -1) {
93             event->shmdt.result = ucm_orig_shmdt(event->shmdt.shmaddr);
94         }
95         break;
96     case UCM_EVENT_SBRK:
97         if (event->sbrk.result == MAP_FAILED) {
98             event->sbrk.result = ucm_orig_sbrk(event->sbrk.increment);
99         }
100         break;
101     case UCM_EVENT_MADVISE:
102         if (event->madvise.result == -1) {
103             event->madvise.result = ucm_orig_madvise(event->madvise.addr,
104                                                      event->madvise.length,
105                                                      event->madvise.advice);
106         }
107         break;
108     default:
109         ucm_warn("Got unknown event %d", event_type);
110         break;
111     }
112 }
113 
114 /*
115  * Add a handler which calls the original implementation, and declare the callback
116  * list so that initially it will be the single element on that list.
117  */
118 static ucm_event_handler_t ucm_event_orig_handler = {
119     .list     = UCS_LIST_INITIALIZER(&ucm_event_handlers, &ucm_event_handlers),
120     .events   = UCM_EVENT_MMAP | UCM_EVENT_MUNMAP | UCM_EVENT_MREMAP |
121                 UCM_EVENT_SHMAT | UCM_EVENT_SHMDT | UCM_EVENT_SBRK |
122                 UCM_EVENT_MADVISE,      /* All events */
123     .priority = 0,                      /* Between negative and positive handlers */
124     .cb       = ucm_event_call_orig
125 };
126 static ucs_list_link_t ucm_event_handlers =
127                 UCS_LIST_INITIALIZER(&ucm_event_orig_handler.list,
128                                      &ucm_event_orig_handler.list);
129 
130 
ucm_event_dispatch(ucm_event_type_t event_type,ucm_event_t * event)131 void ucm_event_dispatch(ucm_event_type_t event_type, ucm_event_t *event)
132 {
133     ucm_event_handler_t *handler;
134 
135     ucs_list_for_each(handler, &ucm_event_handlers, list) {
136         if (handler->events & event_type) {
137             handler->cb(event_type, event, handler->arg);
138         }
139     }
140 }
141 
142 #define ucm_event_lock(_lock_func) \
143     { \
144         int ret; \
145         do { \
146             ret = _lock_func(&ucm_event_lock); \
147         } while (ret == EAGAIN); \
148         if (ret != 0) { \
149             ucm_fatal("%s() failed: %s", #_lock_func, strerror(ret)); \
150         } \
151     }
152 
ucm_event_enter()153 void ucm_event_enter()
154 {
155     ucm_event_lock(pthread_rwlock_rdlock);
156 }
157 
ucm_event_enter_exclusive()158 void ucm_event_enter_exclusive()
159 {
160     ucm_event_lock(pthread_rwlock_wrlock);
161 }
162 
ucm_event_leave()163 void ucm_event_leave()
164 {
165     pthread_rwlock_unlock(&ucm_event_lock);
166 }
167 
ucm_mmap(void * addr,size_t length,int prot,int flags,int fd,off_t offset)168 void *ucm_mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
169 {
170     ucm_event_t event;
171 
172     ucm_trace("ucm_mmap(addr=%p length=%lu prot=0x%x flags=0x%x fd=%d offset=%ld)",
173               addr, length, prot, flags, fd, offset);
174 
175     ucm_event_enter();
176 
177     if ((flags & MAP_FIXED) && (addr != NULL)) {
178         ucm_dispatch_vm_munmap(addr, length);
179     }
180 
181     event.mmap.result  = MAP_FAILED;
182     event.mmap.address = addr;
183     event.mmap.size    = length;
184     event.mmap.prot    = prot;
185     event.mmap.flags   = flags;
186     event.mmap.fd      = fd;
187     event.mmap.offset  = offset;
188     ucm_event_dispatch(UCM_EVENT_MMAP, &event);
189 
190     if (event.mmap.result != MAP_FAILED) {
191         /* Use original length */
192         ucm_dispatch_vm_mmap(event.mmap.result, length);
193     }
194 
195     ucm_event_leave();
196 
197     return event.mmap.result;
198 }
199 
ucm_munmap(void * addr,size_t length)200 int ucm_munmap(void *addr, size_t length)
201 {
202     ucm_event_t event;
203 
204     ucm_event_enter();
205 
206     ucm_trace("ucm_munmap(addr=%p length=%lu)", addr, length);
207 
208     ucm_dispatch_vm_munmap(addr, length);
209 
210     event.munmap.result  = -1;
211     event.munmap.address = addr;
212     event.munmap.size    = length;
213     ucm_event_dispatch(UCM_EVENT_MUNMAP, &event);
214 
215     ucm_event_leave();
216 
217     return event.munmap.result;
218 }
219 
ucm_vm_mmap(void * addr,size_t length)220 void ucm_vm_mmap(void *addr, size_t length)
221 {
222     ucm_event_enter();
223 
224     ucm_trace("ucm_vm_mmap(addr=%p length=%lu)", addr, length);
225     ucm_dispatch_vm_mmap(addr, length);
226 
227     ucm_event_leave();
228 }
229 
ucm_vm_munmap(void * addr,size_t length)230 void ucm_vm_munmap(void *addr, size_t length)
231 {
232     ucm_event_enter();
233 
234     ucm_trace("ucm_vm_munmap(addr=%p length=%lu)", addr, length);
235     ucm_dispatch_vm_munmap(addr, length);
236 
237     ucm_event_leave();
238 }
239 
ucm_mremap(void * old_address,size_t old_size,size_t new_size,int flags)240 void *ucm_mremap(void *old_address, size_t old_size, size_t new_size, int flags)
241 {
242     ucm_event_t event;
243 
244     ucm_event_enter();
245 
246     ucm_trace("ucm_mremap(old_address=%p old_size=%lu new_size=%ld flags=0x%x)",
247               old_address, old_size, new_size, flags);
248 
249     ucm_dispatch_vm_munmap(old_address, old_size);
250 
251     event.mremap.result   = MAP_FAILED;
252     event.mremap.address  = old_address;
253     event.mremap.old_size = old_size;
254     event.mremap.new_size = new_size;
255     event.mremap.flags    = flags;
256     ucm_event_dispatch(UCM_EVENT_MREMAP, &event);
257 
258     if (event.mremap.result != MAP_FAILED) {
259         /* Use original new_size */
260         ucm_dispatch_vm_mmap(event.mremap.result, new_size);
261     }
262 
263     ucm_event_leave();
264 
265     return event.mremap.result;
266 }
267 
ucm_shm_del_entry_from_khash(const void * addr,size_t * size)268 static int ucm_shm_del_entry_from_khash(const void *addr, size_t *size)
269 { /* must be called in locked ucm_kh_lock */
270     khiter_t iter;
271 
272     ucs_recursive_spin_lock(&ucm_kh_lock);
273     iter = kh_get(ucm_ptr_size, &ucm_shmat_ptrs, addr);
274     if (iter != kh_end(&ucm_shmat_ptrs)) {
275         if (size != NULL) {
276             *size = kh_value(&ucm_shmat_ptrs, iter);
277         }
278         kh_del(ucm_ptr_size, &ucm_shmat_ptrs, iter);
279         ucs_recursive_spin_unlock(&ucm_kh_lock);
280         return 1;
281     }
282 
283     ucs_recursive_spin_unlock(&ucm_kh_lock);
284     return 0;
285 }
286 
ucm_shmat(int shmid,const void * shmaddr,int shmflg)287 void *ucm_shmat(int shmid, const void *shmaddr, int shmflg)
288 {
289     uintptr_t attach_addr;
290     ucm_event_t event;
291     khiter_t iter;
292     size_t size;
293     int result;
294 
295     ucm_event_enter();
296 
297     ucm_trace("ucm_shmat(shmid=%d shmaddr=%p shmflg=0x%x)",
298               shmid, shmaddr, shmflg);
299 
300     size = ucm_shm_size(shmid);
301 
302     if ((shmflg & SHM_REMAP) && (shmaddr != NULL)) {
303         attach_addr = (uintptr_t)shmaddr;
304         if (shmflg & SHM_RND) {
305             attach_addr -= attach_addr % SHMLBA;
306         }
307         ucm_dispatch_vm_munmap((void*)attach_addr, size);
308         ucm_shm_del_entry_from_khash((void*)attach_addr, NULL);
309     }
310 
311     event.shmat.result  = MAP_FAILED;
312     event.shmat.shmid   = shmid;
313     event.shmat.shmaddr = shmaddr;
314     event.shmat.shmflg  = shmflg;
315     ucm_event_dispatch(UCM_EVENT_SHMAT, &event);
316 
317     if (event.shmat.result != MAP_FAILED) {
318         ucs_recursive_spin_lock(&ucm_kh_lock);
319         iter = kh_put(ucm_ptr_size, &ucm_shmat_ptrs, event.mmap.result, &result);
320         if (result != -1) {
321             kh_value(&ucm_shmat_ptrs, iter) = size;
322         }
323         ucs_recursive_spin_unlock(&ucm_kh_lock);
324         ucm_dispatch_vm_mmap(event.shmat.result, size);
325     }
326 
327     ucm_event_leave();
328 
329     return event.shmat.result;
330 }
331 
ucm_shmdt(const void * shmaddr)332 int ucm_shmdt(const void *shmaddr)
333 {
334     ucm_event_t event;
335     size_t size;
336 
337     ucm_event_enter();
338 
339     ucm_debug("ucm_shmdt(shmaddr=%p)", shmaddr);
340 
341     if (!ucm_shm_del_entry_from_khash(shmaddr, &size)) {
342         size = ucm_get_shm_seg_size(shmaddr);
343     }
344 
345     ucm_dispatch_vm_munmap((void*)shmaddr, size);
346 
347     event.shmdt.result  = -1;
348     event.shmdt.shmaddr = shmaddr;
349     ucm_event_dispatch(UCM_EVENT_SHMDT, &event);
350 
351     ucm_event_leave();
352 
353     return event.shmdt.result;
354 }
355 
ucm_sbrk(intptr_t increment)356 void *ucm_sbrk(intptr_t increment)
357 {
358     ucm_event_t event;
359 
360     ucm_event_enter();
361 
362     ucm_trace("ucm_sbrk(increment=%+ld)", increment);
363 
364     if (increment < 0) {
365         ucm_dispatch_vm_munmap(UCS_PTR_BYTE_OFFSET(ucm_orig_sbrk(0), increment),
366                                -increment);
367     }
368 
369     event.sbrk.result    = MAP_FAILED;
370     event.sbrk.increment = increment;
371     ucm_event_dispatch(UCM_EVENT_SBRK, &event);
372 
373     if ((increment > 0) && (event.sbrk.result != MAP_FAILED)) {
374         ucm_dispatch_vm_mmap(UCS_PTR_BYTE_OFFSET(ucm_orig_sbrk(0), -increment),
375                              increment);
376     }
377 
378     ucm_event_leave();
379 
380     return event.sbrk.result;
381 }
382 
ucm_brk(void * addr)383 int ucm_brk(void *addr)
384 {
385 #if UCM_BISTRO_HOOKS
386     void *old_addr;
387     intptr_t increment;
388     ucm_event_t event;
389 
390     old_addr  = ucm_brk_syscall(0);
391     /* in case if addr == NULL - it just returns current pointer */
392     increment = addr ? ((intptr_t)addr - (intptr_t)old_addr) : 0;
393 
394     ucm_event_enter();
395 
396     ucm_trace("ucm_brk(addr=%p)", addr);
397 
398     if (increment < 0) {
399         ucm_dispatch_vm_munmap(UCS_PTR_BYTE_OFFSET(old_addr, increment),
400                                -increment);
401     }
402 
403     event.sbrk.result    = (void*)-1;
404     event.sbrk.increment = increment;
405     ucm_event_dispatch(UCM_EVENT_SBRK, &event);
406 
407     if ((increment > 0) && (event.sbrk.result != MAP_FAILED)) {
408         ucm_dispatch_vm_mmap(old_addr, increment);
409     }
410 
411     ucm_event_leave();
412 
413     return event.sbrk.result == MAP_FAILED ? -1 : 0;
414 #else
415     return -1;
416 #endif
417 }
418 
ucm_madvise(void * addr,size_t length,int advice)419 int ucm_madvise(void *addr, size_t length, int advice)
420 {
421     ucm_event_t event;
422 
423     ucm_event_enter();
424 
425     ucm_trace("ucm_madvise(addr=%p length=%zu advice=%d)", addr, length, advice);
426 
427     /* madvise(MADV_DONTNEED) and madvise(MADV_FREE) are releasing pages */
428     if ((advice == MADV_DONTNEED)
429 #if HAVE_DECL_MADV_REMOVE
430         || (advice == MADV_REMOVE)
431 #endif
432 #if HAVE_DECL_POSIX_MADV_DONTNEED
433         || (advice == POSIX_MADV_DONTNEED)
434 #endif
435 #if HAVE_DECL_MADV_FREE
436         || (advice == MADV_FREE)
437 #endif
438        ) {
439         ucm_dispatch_vm_munmap(addr, length);
440     }
441 
442     event.madvise.result = -1;
443     event.madvise.addr   = addr;
444     event.madvise.length = length;
445     event.madvise.advice = advice;
446     ucm_event_dispatch(UCM_EVENT_MADVISE, &event);
447 
448     ucm_event_leave();
449 
450     return event.madvise.result;
451 }
452 
ucm_event_handler_add(ucm_event_handler_t * handler)453 void ucm_event_handler_add(ucm_event_handler_t *handler)
454 {
455     ucm_event_handler_t *elem;
456 
457     ucm_event_enter_exclusive();
458     ucs_list_for_each(elem, &ucm_event_handlers, list) {
459         if (handler->priority < elem->priority) {
460             ucs_list_insert_before(&elem->list, &handler->list);
461             ucm_event_leave();
462             return;
463         }
464     }
465 
466     ucs_list_add_tail(&ucm_event_handlers, &handler->list);
467     ucm_event_leave();
468 }
469 
ucm_event_handler_remove(ucm_event_handler_t * handler)470 void ucm_event_handler_remove(ucm_event_handler_t *handler)
471 {
472     ucm_event_enter_exclusive();
473     ucs_list_del(&handler->list);
474     ucm_event_leave();
475 }
476 
ucm_event_install(int events)477 static ucs_status_t ucm_event_install(int events)
478 {
479     static ucs_init_once_t init_once = UCS_INIT_ONCE_INITIALIZER;
480     UCS_MODULE_FRAMEWORK_DECLARE(ucm);
481     ucm_event_installer_t *event_installer;
482     int malloc_events;
483     ucs_status_t status;
484 
485     UCS_INIT_ONCE(&init_once) {
486         ucm_prevent_dl_unload();
487     }
488 
489     /* TODO lock */
490     status = ucm_mmap_install(events);
491     if (status != UCS_OK) {
492         ucm_debug("failed to install mmap events");
493         goto out_unlock;
494     }
495 
496     ucm_debug("mmap hooks are ready");
497 
498     malloc_events = events & ~(UCM_EVENT_MEM_TYPE_ALLOC |
499                                UCM_EVENT_MEM_TYPE_FREE);
500     status = ucm_malloc_install(malloc_events);
501     if (status != UCS_OK) {
502         ucm_debug("failed to install malloc events");
503         goto out_unlock;
504     }
505 
506     ucm_debug("malloc hooks are ready");
507 
508     /* Call extra event installers */
509     UCS_MODULE_FRAMEWORK_LOAD(ucm, UCS_MODULE_LOAD_FLAG_NODELETE);
510     ucs_list_for_each(event_installer, &ucm_event_installer_list, list) {
511         status = event_installer->install(events);
512         if (status != UCS_OK) {
513             goto out_unlock;
514         }
515     }
516 
517     status = UCS_OK;
518 
519 out_unlock:
520     return status;
521 
522 }
523 
ucm_set_event_handler(int events,int priority,ucm_event_callback_t cb,void * arg)524 ucs_status_t ucm_set_event_handler(int events, int priority,
525                                    ucm_event_callback_t cb, void *arg)
526 {
527     ucm_event_installer_t *event_installer;
528     ucm_event_handler_t *handler;
529     ucs_status_t status;
530     int flags;
531 
532     if (events & ~(UCM_EVENT_MMAP|UCM_EVENT_MUNMAP|UCM_EVENT_MREMAP|
533                    UCM_EVENT_SHMAT|UCM_EVENT_SHMDT|
534                    UCM_EVENT_SBRK|
535                    UCM_EVENT_MADVISE|
536                    UCM_EVENT_VM_MAPPED|UCM_EVENT_VM_UNMAPPED|
537                    UCM_EVENT_MEM_TYPE_ALLOC|UCM_EVENT_MEM_TYPE_FREE|
538                    UCM_EVENT_FLAG_NO_INSTALL|
539                    UCM_EVENT_FLAG_EXISTING_ALLOC)) {
540         return UCS_ERR_INVALID_PARAM;
541     }
542 
543     if (events && !ucm_global_opts.enable_events) {
544         return UCS_ERR_UNSUPPORTED;
545     }
546 
547     /* separate event flags from real events */
548     flags   = events & (UCM_EVENT_FLAG_NO_INSTALL |
549                         UCM_EVENT_FLAG_EXISTING_ALLOC);
550     events &= ~flags;
551 
552     if (!(flags & UCM_EVENT_FLAG_NO_INSTALL) && (events & ~ucm_external_events)) {
553         status = ucm_event_install(events & ~ucm_external_events);
554         if (status != UCS_OK) {
555             return status;
556         }
557     }
558 
559     handler = malloc(sizeof(*handler));
560     if (handler == NULL) {
561         return UCS_ERR_NO_MEMORY;
562     }
563 
564     handler->events   = events;
565     handler->priority = priority;
566     handler->cb       = cb;
567     handler->arg      = arg;
568 
569     ucm_event_handler_add(handler);
570 
571     if (flags & UCM_EVENT_FLAG_EXISTING_ALLOC) {
572         ucs_list_for_each(event_installer, &ucm_event_installer_list, list) {
573             event_installer->get_existing_alloc(handler);
574         }
575     }
576 
577     ucm_debug("added user handler (func=%p arg=%p) for events=0x%x prio=%d", cb,
578               arg, events, priority);
579     return UCS_OK;
580 }
581 
ucm_set_external_event(int events)582 void ucm_set_external_event(int events)
583 {
584     ucm_event_enter_exclusive();
585     ucm_external_events |= events;
586     ucm_event_leave();
587 }
588 
ucm_unset_external_event(int events)589 void ucm_unset_external_event(int events)
590 {
591     ucm_event_enter_exclusive();
592     ucm_external_events &= ~events;
593     ucm_event_leave();
594 }
595 
ucm_unset_event_handler(int events,ucm_event_callback_t cb,void * arg)596 void ucm_unset_event_handler(int events, ucm_event_callback_t cb, void *arg)
597 {
598     ucm_event_handler_t *elem, *tmp;
599     UCS_LIST_HEAD(gc_list);
600 
601     ucm_event_enter_exclusive();
602     ucs_list_for_each_safe(elem, tmp, &ucm_event_handlers, list) {
603         if ((cb == elem->cb) && (arg == elem->arg)) {
604             elem->events &= ~events;
605             if (elem->events == 0) {
606                 ucs_list_del(&elem->list);
607                 ucs_list_add_tail(&gc_list, &elem->list);
608             }
609         }
610     }
611     ucm_event_leave();
612 
613     /* Do not release memory while we hold event lock - may deadlock */
614     ucs_list_for_each_safe(elem, tmp, &gc_list, list) {
615         free(elem);
616     }
617 }
618 
ucm_test_events(int events)619 ucs_status_t ucm_test_events(int events)
620 {
621     return ucm_mmap_test_installed_events(events);
622 }
623 
ucm_test_external_events(int events)624 ucs_status_t ucm_test_external_events(int events)
625 {
626     return ucm_mmap_test_events(events & ucm_external_events, "external");
627 }
628 
629 UCS_STATIC_INIT {
630     ucs_recursive_spinlock_init(&ucm_kh_lock, 0);
631     kh_init_inplace(ucm_ptr_size, &ucm_shmat_ptrs);
632 }
633 
634 UCS_STATIC_CLEANUP {
635     ucs_status_t status;
636 
637     kh_destroy_inplace(ucm_ptr_size, &ucm_shmat_ptrs);
638 
639     status = ucs_recursive_spinlock_destroy(&ucm_kh_lock);
640     if (status != UCS_OK) {
641         ucm_warn("ucs_recursive_spinlock_destroy() failed (%d)", status);
642     }
643 }
644