1 /**
2 * Copyright (C) Mellanox Technologies Ltd. 2001-2015. ALL RIGHTS RESERVED.
3 *
4 * See file LICENSE for terms.
5 */
6
7 #ifdef HAVE_CONFIG_H
8 # include "config.h"
9 #endif
10
11 #include "event.h"
12
13 #include <ucm/mmap/mmap.h>
14 #include <ucm/malloc/malloc_hook.h>
15 #include <ucm/util/sys.h>
16 #include <ucs/arch/cpu.h>
17 #include <ucs/datastruct/khash.h>
18 #include <ucs/sys/compiler.h>
19 #include <ucs/sys/module.h>
20 #include <ucs/type/init_once.h>
21 #include <ucs/type/spinlock.h>
22
23 #include <sys/mman.h>
24 #include <pthread.h>
25 #include <sys/shm.h>
26 #include <sys/ipc.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <errno.h>
30
31
32 UCS_LIST_HEAD(ucm_event_installer_list);
33
34 static ucs_recursive_spinlock_t ucm_kh_lock;
35 #define ucm_ptr_hash(_ptr) kh_int64_hash_func((uintptr_t)(_ptr))
36 KHASH_INIT(ucm_ptr_size, const void*, size_t, 1, ucm_ptr_hash, kh_int64_hash_equal)
37
38 static pthread_rwlock_t ucm_event_lock = PTHREAD_RWLOCK_INITIALIZER;
39 static ucs_list_link_t ucm_event_handlers;
40 static int ucm_external_events = 0;
41 static khash_t(ucm_ptr_size) ucm_shmat_ptrs;
42
ucm_shm_size(int shmid)43 static size_t ucm_shm_size(int shmid)
44 {
45 struct shmid_ds ds;
46 int ret;
47
48 ret = shmctl(shmid, IPC_STAT, &ds);
49 if (ret < 0) {
50 return 0;
51 }
52
53 return ds.shm_segsz;
54 }
55
ucm_event_call_orig(ucm_event_type_t event_type,ucm_event_t * event,void * arg)56 static void ucm_event_call_orig(ucm_event_type_t event_type, ucm_event_t *event,
57 void *arg)
58 {
59 switch (event_type) {
60 case UCM_EVENT_MMAP:
61 if (event->mmap.result == MAP_FAILED) {
62 event->mmap.result = ucm_orig_mmap(event->mmap.address,
63 event->mmap.size,
64 event->mmap.prot,
65 event->mmap.flags,
66 event->mmap.fd,
67 event->mmap.offset);
68 }
69 break;
70 case UCM_EVENT_MUNMAP:
71 if (event->munmap.result == -1) {
72 event->munmap.result = ucm_orig_munmap(event->munmap.address,
73 event->munmap.size);
74 }
75 break;
76 case UCM_EVENT_MREMAP:
77 if (event->mremap.result == MAP_FAILED) {
78 event->mremap.result = ucm_orig_mremap(event->mremap.address,
79 event->mremap.old_size,
80 event->mremap.new_size,
81 event->mremap.flags);
82 }
83 break;
84 case UCM_EVENT_SHMAT:
85 if (event->shmat.result == MAP_FAILED) {
86 event->shmat.result = ucm_orig_shmat(event->shmat.shmid,
87 event->shmat.shmaddr,
88 event->shmat.shmflg);
89 }
90 break;
91 case UCM_EVENT_SHMDT:
92 if (event->shmdt.result == -1) {
93 event->shmdt.result = ucm_orig_shmdt(event->shmdt.shmaddr);
94 }
95 break;
96 case UCM_EVENT_SBRK:
97 if (event->sbrk.result == MAP_FAILED) {
98 event->sbrk.result = ucm_orig_sbrk(event->sbrk.increment);
99 }
100 break;
101 case UCM_EVENT_MADVISE:
102 if (event->madvise.result == -1) {
103 event->madvise.result = ucm_orig_madvise(event->madvise.addr,
104 event->madvise.length,
105 event->madvise.advice);
106 }
107 break;
108 default:
109 ucm_warn("Got unknown event %d", event_type);
110 break;
111 }
112 }
113
114 /*
115 * Add a handler which calls the original implementation, and declare the callback
116 * list so that initially it will be the single element on that list.
117 */
118 static ucm_event_handler_t ucm_event_orig_handler = {
119 .list = UCS_LIST_INITIALIZER(&ucm_event_handlers, &ucm_event_handlers),
120 .events = UCM_EVENT_MMAP | UCM_EVENT_MUNMAP | UCM_EVENT_MREMAP |
121 UCM_EVENT_SHMAT | UCM_EVENT_SHMDT | UCM_EVENT_SBRK |
122 UCM_EVENT_MADVISE, /* All events */
123 .priority = 0, /* Between negative and positive handlers */
124 .cb = ucm_event_call_orig
125 };
126 static ucs_list_link_t ucm_event_handlers =
127 UCS_LIST_INITIALIZER(&ucm_event_orig_handler.list,
128 &ucm_event_orig_handler.list);
129
130
ucm_event_dispatch(ucm_event_type_t event_type,ucm_event_t * event)131 void ucm_event_dispatch(ucm_event_type_t event_type, ucm_event_t *event)
132 {
133 ucm_event_handler_t *handler;
134
135 ucs_list_for_each(handler, &ucm_event_handlers, list) {
136 if (handler->events & event_type) {
137 handler->cb(event_type, event, handler->arg);
138 }
139 }
140 }
141
142 #define ucm_event_lock(_lock_func) \
143 { \
144 int ret; \
145 do { \
146 ret = _lock_func(&ucm_event_lock); \
147 } while (ret == EAGAIN); \
148 if (ret != 0) { \
149 ucm_fatal("%s() failed: %s", #_lock_func, strerror(ret)); \
150 } \
151 }
152
ucm_event_enter()153 void ucm_event_enter()
154 {
155 ucm_event_lock(pthread_rwlock_rdlock);
156 }
157
ucm_event_enter_exclusive()158 void ucm_event_enter_exclusive()
159 {
160 ucm_event_lock(pthread_rwlock_wrlock);
161 }
162
ucm_event_leave()163 void ucm_event_leave()
164 {
165 pthread_rwlock_unlock(&ucm_event_lock);
166 }
167
ucm_mmap(void * addr,size_t length,int prot,int flags,int fd,off_t offset)168 void *ucm_mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
169 {
170 ucm_event_t event;
171
172 ucm_trace("ucm_mmap(addr=%p length=%lu prot=0x%x flags=0x%x fd=%d offset=%ld)",
173 addr, length, prot, flags, fd, offset);
174
175 ucm_event_enter();
176
177 if ((flags & MAP_FIXED) && (addr != NULL)) {
178 ucm_dispatch_vm_munmap(addr, length);
179 }
180
181 event.mmap.result = MAP_FAILED;
182 event.mmap.address = addr;
183 event.mmap.size = length;
184 event.mmap.prot = prot;
185 event.mmap.flags = flags;
186 event.mmap.fd = fd;
187 event.mmap.offset = offset;
188 ucm_event_dispatch(UCM_EVENT_MMAP, &event);
189
190 if (event.mmap.result != MAP_FAILED) {
191 /* Use original length */
192 ucm_dispatch_vm_mmap(event.mmap.result, length);
193 }
194
195 ucm_event_leave();
196
197 return event.mmap.result;
198 }
199
ucm_munmap(void * addr,size_t length)200 int ucm_munmap(void *addr, size_t length)
201 {
202 ucm_event_t event;
203
204 ucm_event_enter();
205
206 ucm_trace("ucm_munmap(addr=%p length=%lu)", addr, length);
207
208 ucm_dispatch_vm_munmap(addr, length);
209
210 event.munmap.result = -1;
211 event.munmap.address = addr;
212 event.munmap.size = length;
213 ucm_event_dispatch(UCM_EVENT_MUNMAP, &event);
214
215 ucm_event_leave();
216
217 return event.munmap.result;
218 }
219
ucm_vm_mmap(void * addr,size_t length)220 void ucm_vm_mmap(void *addr, size_t length)
221 {
222 ucm_event_enter();
223
224 ucm_trace("ucm_vm_mmap(addr=%p length=%lu)", addr, length);
225 ucm_dispatch_vm_mmap(addr, length);
226
227 ucm_event_leave();
228 }
229
ucm_vm_munmap(void * addr,size_t length)230 void ucm_vm_munmap(void *addr, size_t length)
231 {
232 ucm_event_enter();
233
234 ucm_trace("ucm_vm_munmap(addr=%p length=%lu)", addr, length);
235 ucm_dispatch_vm_munmap(addr, length);
236
237 ucm_event_leave();
238 }
239
ucm_mremap(void * old_address,size_t old_size,size_t new_size,int flags)240 void *ucm_mremap(void *old_address, size_t old_size, size_t new_size, int flags)
241 {
242 ucm_event_t event;
243
244 ucm_event_enter();
245
246 ucm_trace("ucm_mremap(old_address=%p old_size=%lu new_size=%ld flags=0x%x)",
247 old_address, old_size, new_size, flags);
248
249 ucm_dispatch_vm_munmap(old_address, old_size);
250
251 event.mremap.result = MAP_FAILED;
252 event.mremap.address = old_address;
253 event.mremap.old_size = old_size;
254 event.mremap.new_size = new_size;
255 event.mremap.flags = flags;
256 ucm_event_dispatch(UCM_EVENT_MREMAP, &event);
257
258 if (event.mremap.result != MAP_FAILED) {
259 /* Use original new_size */
260 ucm_dispatch_vm_mmap(event.mremap.result, new_size);
261 }
262
263 ucm_event_leave();
264
265 return event.mremap.result;
266 }
267
ucm_shm_del_entry_from_khash(const void * addr,size_t * size)268 static int ucm_shm_del_entry_from_khash(const void *addr, size_t *size)
269 { /* must be called in locked ucm_kh_lock */
270 khiter_t iter;
271
272 ucs_recursive_spin_lock(&ucm_kh_lock);
273 iter = kh_get(ucm_ptr_size, &ucm_shmat_ptrs, addr);
274 if (iter != kh_end(&ucm_shmat_ptrs)) {
275 if (size != NULL) {
276 *size = kh_value(&ucm_shmat_ptrs, iter);
277 }
278 kh_del(ucm_ptr_size, &ucm_shmat_ptrs, iter);
279 ucs_recursive_spin_unlock(&ucm_kh_lock);
280 return 1;
281 }
282
283 ucs_recursive_spin_unlock(&ucm_kh_lock);
284 return 0;
285 }
286
ucm_shmat(int shmid,const void * shmaddr,int shmflg)287 void *ucm_shmat(int shmid, const void *shmaddr, int shmflg)
288 {
289 uintptr_t attach_addr;
290 ucm_event_t event;
291 khiter_t iter;
292 size_t size;
293 int result;
294
295 ucm_event_enter();
296
297 ucm_trace("ucm_shmat(shmid=%d shmaddr=%p shmflg=0x%x)",
298 shmid, shmaddr, shmflg);
299
300 size = ucm_shm_size(shmid);
301
302 if ((shmflg & SHM_REMAP) && (shmaddr != NULL)) {
303 attach_addr = (uintptr_t)shmaddr;
304 if (shmflg & SHM_RND) {
305 attach_addr -= attach_addr % SHMLBA;
306 }
307 ucm_dispatch_vm_munmap((void*)attach_addr, size);
308 ucm_shm_del_entry_from_khash((void*)attach_addr, NULL);
309 }
310
311 event.shmat.result = MAP_FAILED;
312 event.shmat.shmid = shmid;
313 event.shmat.shmaddr = shmaddr;
314 event.shmat.shmflg = shmflg;
315 ucm_event_dispatch(UCM_EVENT_SHMAT, &event);
316
317 if (event.shmat.result != MAP_FAILED) {
318 ucs_recursive_spin_lock(&ucm_kh_lock);
319 iter = kh_put(ucm_ptr_size, &ucm_shmat_ptrs, event.mmap.result, &result);
320 if (result != -1) {
321 kh_value(&ucm_shmat_ptrs, iter) = size;
322 }
323 ucs_recursive_spin_unlock(&ucm_kh_lock);
324 ucm_dispatch_vm_mmap(event.shmat.result, size);
325 }
326
327 ucm_event_leave();
328
329 return event.shmat.result;
330 }
331
ucm_shmdt(const void * shmaddr)332 int ucm_shmdt(const void *shmaddr)
333 {
334 ucm_event_t event;
335 size_t size;
336
337 ucm_event_enter();
338
339 ucm_debug("ucm_shmdt(shmaddr=%p)", shmaddr);
340
341 if (!ucm_shm_del_entry_from_khash(shmaddr, &size)) {
342 size = ucm_get_shm_seg_size(shmaddr);
343 }
344
345 ucm_dispatch_vm_munmap((void*)shmaddr, size);
346
347 event.shmdt.result = -1;
348 event.shmdt.shmaddr = shmaddr;
349 ucm_event_dispatch(UCM_EVENT_SHMDT, &event);
350
351 ucm_event_leave();
352
353 return event.shmdt.result;
354 }
355
ucm_sbrk(intptr_t increment)356 void *ucm_sbrk(intptr_t increment)
357 {
358 ucm_event_t event;
359
360 ucm_event_enter();
361
362 ucm_trace("ucm_sbrk(increment=%+ld)", increment);
363
364 if (increment < 0) {
365 ucm_dispatch_vm_munmap(UCS_PTR_BYTE_OFFSET(ucm_orig_sbrk(0), increment),
366 -increment);
367 }
368
369 event.sbrk.result = MAP_FAILED;
370 event.sbrk.increment = increment;
371 ucm_event_dispatch(UCM_EVENT_SBRK, &event);
372
373 if ((increment > 0) && (event.sbrk.result != MAP_FAILED)) {
374 ucm_dispatch_vm_mmap(UCS_PTR_BYTE_OFFSET(ucm_orig_sbrk(0), -increment),
375 increment);
376 }
377
378 ucm_event_leave();
379
380 return event.sbrk.result;
381 }
382
ucm_brk(void * addr)383 int ucm_brk(void *addr)
384 {
385 #if UCM_BISTRO_HOOKS
386 void *old_addr;
387 intptr_t increment;
388 ucm_event_t event;
389
390 old_addr = ucm_brk_syscall(0);
391 /* in case if addr == NULL - it just returns current pointer */
392 increment = addr ? ((intptr_t)addr - (intptr_t)old_addr) : 0;
393
394 ucm_event_enter();
395
396 ucm_trace("ucm_brk(addr=%p)", addr);
397
398 if (increment < 0) {
399 ucm_dispatch_vm_munmap(UCS_PTR_BYTE_OFFSET(old_addr, increment),
400 -increment);
401 }
402
403 event.sbrk.result = (void*)-1;
404 event.sbrk.increment = increment;
405 ucm_event_dispatch(UCM_EVENT_SBRK, &event);
406
407 if ((increment > 0) && (event.sbrk.result != MAP_FAILED)) {
408 ucm_dispatch_vm_mmap(old_addr, increment);
409 }
410
411 ucm_event_leave();
412
413 return event.sbrk.result == MAP_FAILED ? -1 : 0;
414 #else
415 return -1;
416 #endif
417 }
418
ucm_madvise(void * addr,size_t length,int advice)419 int ucm_madvise(void *addr, size_t length, int advice)
420 {
421 ucm_event_t event;
422
423 ucm_event_enter();
424
425 ucm_trace("ucm_madvise(addr=%p length=%zu advice=%d)", addr, length, advice);
426
427 /* madvise(MADV_DONTNEED) and madvise(MADV_FREE) are releasing pages */
428 if ((advice == MADV_DONTNEED)
429 #if HAVE_DECL_MADV_REMOVE
430 || (advice == MADV_REMOVE)
431 #endif
432 #if HAVE_DECL_POSIX_MADV_DONTNEED
433 || (advice == POSIX_MADV_DONTNEED)
434 #endif
435 #if HAVE_DECL_MADV_FREE
436 || (advice == MADV_FREE)
437 #endif
438 ) {
439 ucm_dispatch_vm_munmap(addr, length);
440 }
441
442 event.madvise.result = -1;
443 event.madvise.addr = addr;
444 event.madvise.length = length;
445 event.madvise.advice = advice;
446 ucm_event_dispatch(UCM_EVENT_MADVISE, &event);
447
448 ucm_event_leave();
449
450 return event.madvise.result;
451 }
452
ucm_event_handler_add(ucm_event_handler_t * handler)453 void ucm_event_handler_add(ucm_event_handler_t *handler)
454 {
455 ucm_event_handler_t *elem;
456
457 ucm_event_enter_exclusive();
458 ucs_list_for_each(elem, &ucm_event_handlers, list) {
459 if (handler->priority < elem->priority) {
460 ucs_list_insert_before(&elem->list, &handler->list);
461 ucm_event_leave();
462 return;
463 }
464 }
465
466 ucs_list_add_tail(&ucm_event_handlers, &handler->list);
467 ucm_event_leave();
468 }
469
ucm_event_handler_remove(ucm_event_handler_t * handler)470 void ucm_event_handler_remove(ucm_event_handler_t *handler)
471 {
472 ucm_event_enter_exclusive();
473 ucs_list_del(&handler->list);
474 ucm_event_leave();
475 }
476
ucm_event_install(int events)477 static ucs_status_t ucm_event_install(int events)
478 {
479 static ucs_init_once_t init_once = UCS_INIT_ONCE_INITIALIZER;
480 UCS_MODULE_FRAMEWORK_DECLARE(ucm);
481 ucm_event_installer_t *event_installer;
482 int malloc_events;
483 ucs_status_t status;
484
485 UCS_INIT_ONCE(&init_once) {
486 ucm_prevent_dl_unload();
487 }
488
489 /* TODO lock */
490 status = ucm_mmap_install(events);
491 if (status != UCS_OK) {
492 ucm_debug("failed to install mmap events");
493 goto out_unlock;
494 }
495
496 ucm_debug("mmap hooks are ready");
497
498 malloc_events = events & ~(UCM_EVENT_MEM_TYPE_ALLOC |
499 UCM_EVENT_MEM_TYPE_FREE);
500 status = ucm_malloc_install(malloc_events);
501 if (status != UCS_OK) {
502 ucm_debug("failed to install malloc events");
503 goto out_unlock;
504 }
505
506 ucm_debug("malloc hooks are ready");
507
508 /* Call extra event installers */
509 UCS_MODULE_FRAMEWORK_LOAD(ucm, UCS_MODULE_LOAD_FLAG_NODELETE);
510 ucs_list_for_each(event_installer, &ucm_event_installer_list, list) {
511 status = event_installer->install(events);
512 if (status != UCS_OK) {
513 goto out_unlock;
514 }
515 }
516
517 status = UCS_OK;
518
519 out_unlock:
520 return status;
521
522 }
523
ucm_set_event_handler(int events,int priority,ucm_event_callback_t cb,void * arg)524 ucs_status_t ucm_set_event_handler(int events, int priority,
525 ucm_event_callback_t cb, void *arg)
526 {
527 ucm_event_installer_t *event_installer;
528 ucm_event_handler_t *handler;
529 ucs_status_t status;
530 int flags;
531
532 if (events & ~(UCM_EVENT_MMAP|UCM_EVENT_MUNMAP|UCM_EVENT_MREMAP|
533 UCM_EVENT_SHMAT|UCM_EVENT_SHMDT|
534 UCM_EVENT_SBRK|
535 UCM_EVENT_MADVISE|
536 UCM_EVENT_VM_MAPPED|UCM_EVENT_VM_UNMAPPED|
537 UCM_EVENT_MEM_TYPE_ALLOC|UCM_EVENT_MEM_TYPE_FREE|
538 UCM_EVENT_FLAG_NO_INSTALL|
539 UCM_EVENT_FLAG_EXISTING_ALLOC)) {
540 return UCS_ERR_INVALID_PARAM;
541 }
542
543 if (events && !ucm_global_opts.enable_events) {
544 return UCS_ERR_UNSUPPORTED;
545 }
546
547 /* separate event flags from real events */
548 flags = events & (UCM_EVENT_FLAG_NO_INSTALL |
549 UCM_EVENT_FLAG_EXISTING_ALLOC);
550 events &= ~flags;
551
552 if (!(flags & UCM_EVENT_FLAG_NO_INSTALL) && (events & ~ucm_external_events)) {
553 status = ucm_event_install(events & ~ucm_external_events);
554 if (status != UCS_OK) {
555 return status;
556 }
557 }
558
559 handler = malloc(sizeof(*handler));
560 if (handler == NULL) {
561 return UCS_ERR_NO_MEMORY;
562 }
563
564 handler->events = events;
565 handler->priority = priority;
566 handler->cb = cb;
567 handler->arg = arg;
568
569 ucm_event_handler_add(handler);
570
571 if (flags & UCM_EVENT_FLAG_EXISTING_ALLOC) {
572 ucs_list_for_each(event_installer, &ucm_event_installer_list, list) {
573 event_installer->get_existing_alloc(handler);
574 }
575 }
576
577 ucm_debug("added user handler (func=%p arg=%p) for events=0x%x prio=%d", cb,
578 arg, events, priority);
579 return UCS_OK;
580 }
581
ucm_set_external_event(int events)582 void ucm_set_external_event(int events)
583 {
584 ucm_event_enter_exclusive();
585 ucm_external_events |= events;
586 ucm_event_leave();
587 }
588
ucm_unset_external_event(int events)589 void ucm_unset_external_event(int events)
590 {
591 ucm_event_enter_exclusive();
592 ucm_external_events &= ~events;
593 ucm_event_leave();
594 }
595
ucm_unset_event_handler(int events,ucm_event_callback_t cb,void * arg)596 void ucm_unset_event_handler(int events, ucm_event_callback_t cb, void *arg)
597 {
598 ucm_event_handler_t *elem, *tmp;
599 UCS_LIST_HEAD(gc_list);
600
601 ucm_event_enter_exclusive();
602 ucs_list_for_each_safe(elem, tmp, &ucm_event_handlers, list) {
603 if ((cb == elem->cb) && (arg == elem->arg)) {
604 elem->events &= ~events;
605 if (elem->events == 0) {
606 ucs_list_del(&elem->list);
607 ucs_list_add_tail(&gc_list, &elem->list);
608 }
609 }
610 }
611 ucm_event_leave();
612
613 /* Do not release memory while we hold event lock - may deadlock */
614 ucs_list_for_each_safe(elem, tmp, &gc_list, list) {
615 free(elem);
616 }
617 }
618
ucm_test_events(int events)619 ucs_status_t ucm_test_events(int events)
620 {
621 return ucm_mmap_test_installed_events(events);
622 }
623
ucm_test_external_events(int events)624 ucs_status_t ucm_test_external_events(int events)
625 {
626 return ucm_mmap_test_events(events & ucm_external_events, "external");
627 }
628
629 UCS_STATIC_INIT {
630 ucs_recursive_spinlock_init(&ucm_kh_lock, 0);
631 kh_init_inplace(ucm_ptr_size, &ucm_shmat_ptrs);
632 }
633
634 UCS_STATIC_CLEANUP {
635 ucs_status_t status;
636
637 kh_destroy_inplace(ucm_ptr_size, &ucm_shmat_ptrs);
638
639 status = ucs_recursive_spinlock_destroy(&ucm_kh_lock);
640 if (status != UCS_OK) {
641 ucm_warn("ucs_recursive_spinlock_destroy() failed (%d)", status);
642 }
643 }
644