1 /*
2 * kmp_threadprivate.cpp -- OpenMP threadprivate support library
3 */
4
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "kmp.h"
14 #include "kmp_i18n.h"
15 #include "kmp_itt.h"
16
17 #define USE_CHECKS_COMMON
18
19 #define KMP_INLINE_SUBR 1
20
21 void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
22 void *data_addr, size_t pc_size);
23 struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
24 void *data_addr,
25 size_t pc_size);
26
27 struct shared_table __kmp_threadprivate_d_table;
28
29 static
30 #ifdef KMP_INLINE_SUBR
31 __forceinline
32 #endif
33 struct private_common *
__kmp_threadprivate_find_task_common(struct common_table * tbl,int gtid,void * pc_addr)34 __kmp_threadprivate_find_task_common(struct common_table *tbl, int gtid,
35 void *pc_addr)
36
37 {
38 struct private_common *tn;
39
40 #ifdef KMP_TASK_COMMON_DEBUG
41 KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, called with "
42 "address %p\n",
43 gtid, pc_addr));
44 dump_list();
45 #endif
46
47 for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
48 if (tn->gbl_addr == pc_addr) {
49 #ifdef KMP_TASK_COMMON_DEBUG
50 KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, found "
51 "node %p on list\n",
52 gtid, pc_addr));
53 #endif
54 return tn;
55 }
56 }
57 return 0;
58 }
59
60 static
61 #ifdef KMP_INLINE_SUBR
62 __forceinline
63 #endif
64 struct shared_common *
__kmp_find_shared_task_common(struct shared_table * tbl,int gtid,void * pc_addr)65 __kmp_find_shared_task_common(struct shared_table *tbl, int gtid,
66 void *pc_addr) {
67 struct shared_common *tn;
68
69 for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
70 if (tn->gbl_addr == pc_addr) {
71 #ifdef KMP_TASK_COMMON_DEBUG
72 KC_TRACE(
73 10,
74 ("__kmp_find_shared_task_common: thread#%d, found node %p on list\n",
75 gtid, pc_addr));
76 #endif
77 return tn;
78 }
79 }
80 return 0;
81 }
82
83 // Create a template for the data initialized storage. Either the template is
84 // NULL indicating zero fill, or the template is a copy of the original data.
__kmp_init_common_data(void * pc_addr,size_t pc_size)85 static struct private_data *__kmp_init_common_data(void *pc_addr,
86 size_t pc_size) {
87 struct private_data *d;
88 size_t i;
89 char *p;
90
91 d = (struct private_data *)__kmp_allocate(sizeof(struct private_data));
92 /*
93 d->data = 0; // AC: commented out because __kmp_allocate zeroes the
94 memory
95 d->next = 0;
96 */
97 d->size = pc_size;
98 d->more = 1;
99
100 p = (char *)pc_addr;
101
102 for (i = pc_size; i > 0; --i) {
103 if (*p++ != '\0') {
104 d->data = __kmp_allocate(pc_size);
105 KMP_MEMCPY(d->data, pc_addr, pc_size);
106 break;
107 }
108 }
109
110 return d;
111 }
112
113 // Initialize the data area from the template.
__kmp_copy_common_data(void * pc_addr,struct private_data * d)114 static void __kmp_copy_common_data(void *pc_addr, struct private_data *d) {
115 char *addr = (char *)pc_addr;
116 int i, offset;
117
118 for (offset = 0; d != 0; d = d->next) {
119 for (i = d->more; i > 0; --i) {
120 if (d->data == 0)
121 memset(&addr[offset], '\0', d->size);
122 else
123 KMP_MEMCPY(&addr[offset], d->data, d->size);
124 offset += d->size;
125 }
126 }
127 }
128
129 /* we are called from __kmp_serial_initialize() with __kmp_initz_lock held. */
__kmp_common_initialize(void)130 void __kmp_common_initialize(void) {
131 if (!TCR_4(__kmp_init_common)) {
132 int q;
133 #ifdef KMP_DEBUG
134 int gtid;
135 #endif
136
137 __kmp_threadpriv_cache_list = NULL;
138
139 #ifdef KMP_DEBUG
140 /* verify the uber masters were initialized */
141 for (gtid = 0; gtid < __kmp_threads_capacity; gtid++)
142 if (__kmp_root[gtid]) {
143 KMP_DEBUG_ASSERT(__kmp_root[gtid]->r.r_uber_thread);
144 for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
145 KMP_DEBUG_ASSERT(
146 !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q]);
147 /* __kmp_root[ gitd ]-> r.r_uber_thread ->
148 * th.th_pri_common -> data[ q ] = 0;*/
149 }
150 #endif /* KMP_DEBUG */
151
152 for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
153 __kmp_threadprivate_d_table.data[q] = 0;
154
155 TCW_4(__kmp_init_common, TRUE);
156 }
157 }
158
159 /* Call all destructors for threadprivate data belonging to all threads.
160 Currently unused! */
__kmp_common_destroy(void)161 void __kmp_common_destroy(void) {
162 if (TCR_4(__kmp_init_common)) {
163 int q;
164
165 TCW_4(__kmp_init_common, FALSE);
166
167 for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
168 int gtid;
169 struct private_common *tn;
170 struct shared_common *d_tn;
171
172 /* C++ destructors need to be called once per thread before exiting.
173 Don't call destructors for master thread though unless we used copy
174 constructor */
175
176 for (d_tn = __kmp_threadprivate_d_table.data[q]; d_tn;
177 d_tn = d_tn->next) {
178 if (d_tn->is_vec) {
179 if (d_tn->dt.dtorv != 0) {
180 for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
181 if (__kmp_threads[gtid]) {
182 if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
183 : (!KMP_UBER_GTID(gtid))) {
184 tn = __kmp_threadprivate_find_task_common(
185 __kmp_threads[gtid]->th.th_pri_common, gtid,
186 d_tn->gbl_addr);
187 if (tn) {
188 (*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
189 }
190 }
191 }
192 }
193 if (d_tn->obj_init != 0) {
194 (*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
195 }
196 }
197 } else {
198 if (d_tn->dt.dtor != 0) {
199 for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
200 if (__kmp_threads[gtid]) {
201 if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
202 : (!KMP_UBER_GTID(gtid))) {
203 tn = __kmp_threadprivate_find_task_common(
204 __kmp_threads[gtid]->th.th_pri_common, gtid,
205 d_tn->gbl_addr);
206 if (tn) {
207 (*d_tn->dt.dtor)(tn->par_addr);
208 }
209 }
210 }
211 }
212 if (d_tn->obj_init != 0) {
213 (*d_tn->dt.dtor)(d_tn->obj_init);
214 }
215 }
216 }
217 }
218 __kmp_threadprivate_d_table.data[q] = 0;
219 }
220 }
221 }
222
223 /* Call all destructors for threadprivate data belonging to this thread */
__kmp_common_destroy_gtid(int gtid)224 void __kmp_common_destroy_gtid(int gtid) {
225 struct private_common *tn;
226 struct shared_common *d_tn;
227
228 if (!TCR_4(__kmp_init_gtid)) {
229 // This is possible when one of multiple roots initiates early library
230 // termination in a sequential region while other teams are active, and its
231 // child threads are about to end.
232 return;
233 }
234
235 KC_TRACE(10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid));
236 if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) : (!KMP_UBER_GTID(gtid))) {
237
238 if (TCR_4(__kmp_init_common)) {
239
240 /* Cannot do this here since not all threads have destroyed their data */
241 /* TCW_4(__kmp_init_common, FALSE); */
242
243 for (tn = __kmp_threads[gtid]->th.th_pri_head; tn; tn = tn->link) {
244
245 d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
246 tn->gbl_addr);
247
248 KMP_DEBUG_ASSERT(d_tn);
249
250 if (d_tn->is_vec) {
251 if (d_tn->dt.dtorv != 0) {
252 (void)(*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
253 }
254 if (d_tn->obj_init != 0) {
255 (void)(*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
256 }
257 } else {
258 if (d_tn->dt.dtor != 0) {
259 (void)(*d_tn->dt.dtor)(tn->par_addr);
260 }
261 if (d_tn->obj_init != 0) {
262 (void)(*d_tn->dt.dtor)(d_tn->obj_init);
263 }
264 }
265 }
266 KC_TRACE(30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors "
267 "complete\n",
268 gtid));
269 }
270 }
271 }
272
273 #ifdef KMP_TASK_COMMON_DEBUG
dump_list(void)274 static void dump_list(void) {
275 int p, q;
276
277 for (p = 0; p < __kmp_all_nth; ++p) {
278 if (!__kmp_threads[p])
279 continue;
280 for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
281 if (__kmp_threads[p]->th.th_pri_common->data[q]) {
282 struct private_common *tn;
283
284 KC_TRACE(10, ("\tdump_list: gtid:%d addresses\n", p));
285
286 for (tn = __kmp_threads[p]->th.th_pri_common->data[q]; tn;
287 tn = tn->next) {
288 KC_TRACE(10,
289 ("\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n",
290 tn->gbl_addr, tn->par_addr));
291 }
292 }
293 }
294 }
295 }
296 #endif /* KMP_TASK_COMMON_DEBUG */
297
298 // NOTE: this routine is to be called only from the serial part of the program.
kmp_threadprivate_insert_private_data(int gtid,void * pc_addr,void * data_addr,size_t pc_size)299 void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
300 void *data_addr, size_t pc_size) {
301 struct shared_common **lnk_tn, *d_tn;
302 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
303 __kmp_threads[gtid]->th.th_root->r.r_active == 0);
304
305 d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
306 pc_addr);
307
308 if (d_tn == 0) {
309 d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
310
311 d_tn->gbl_addr = pc_addr;
312 d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
313 /*
314 d_tn->obj_init = 0; // AC: commented out because __kmp_allocate
315 zeroes the memory
316 d_tn->ct.ctor = 0;
317 d_tn->cct.cctor = 0;;
318 d_tn->dt.dtor = 0;
319 d_tn->is_vec = FALSE;
320 d_tn->vec_len = 0L;
321 */
322 d_tn->cmn_size = pc_size;
323
324 __kmp_acquire_lock(&__kmp_global_lock, gtid);
325
326 lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
327
328 d_tn->next = *lnk_tn;
329 *lnk_tn = d_tn;
330
331 __kmp_release_lock(&__kmp_global_lock, gtid);
332 }
333 }
334
kmp_threadprivate_insert(int gtid,void * pc_addr,void * data_addr,size_t pc_size)335 struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
336 void *data_addr,
337 size_t pc_size) {
338 struct private_common *tn, **tt;
339 struct shared_common *d_tn;
340
341 /* +++++++++ START OF CRITICAL SECTION +++++++++ */
342 __kmp_acquire_lock(&__kmp_global_lock, gtid);
343
344 tn = (struct private_common *)__kmp_allocate(sizeof(struct private_common));
345
346 tn->gbl_addr = pc_addr;
347
348 d_tn = __kmp_find_shared_task_common(
349 &__kmp_threadprivate_d_table, gtid,
350 pc_addr); /* Only the MASTER data table exists. */
351
352 if (d_tn != 0) {
353 /* This threadprivate variable has already been seen. */
354
355 if (d_tn->pod_init == 0 && d_tn->obj_init == 0) {
356 d_tn->cmn_size = pc_size;
357
358 if (d_tn->is_vec) {
359 if (d_tn->ct.ctorv != 0) {
360 /* Construct from scratch so no prototype exists */
361 d_tn->obj_init = 0;
362 } else if (d_tn->cct.cctorv != 0) {
363 /* Now data initialize the prototype since it was previously
364 * registered */
365 d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
366 (void)(*d_tn->cct.cctorv)(d_tn->obj_init, pc_addr, d_tn->vec_len);
367 } else {
368 d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
369 }
370 } else {
371 if (d_tn->ct.ctor != 0) {
372 /* Construct from scratch so no prototype exists */
373 d_tn->obj_init = 0;
374 } else if (d_tn->cct.cctor != 0) {
375 /* Now data initialize the prototype since it was previously
376 registered */
377 d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
378 (void)(*d_tn->cct.cctor)(d_tn->obj_init, pc_addr);
379 } else {
380 d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
381 }
382 }
383 }
384 } else {
385 struct shared_common **lnk_tn;
386
387 d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
388 d_tn->gbl_addr = pc_addr;
389 d_tn->cmn_size = pc_size;
390 d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
391 /*
392 d_tn->obj_init = 0; // AC: commented out because __kmp_allocate
393 zeroes the memory
394 d_tn->ct.ctor = 0;
395 d_tn->cct.cctor = 0;
396 d_tn->dt.dtor = 0;
397 d_tn->is_vec = FALSE;
398 d_tn->vec_len = 0L;
399 */
400 lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
401
402 d_tn->next = *lnk_tn;
403 *lnk_tn = d_tn;
404 }
405
406 tn->cmn_size = d_tn->cmn_size;
407
408 if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) {
409 tn->par_addr = (void *)pc_addr;
410 } else {
411 tn->par_addr = (void *)__kmp_allocate(tn->cmn_size);
412 }
413
414 __kmp_release_lock(&__kmp_global_lock, gtid);
415 /* +++++++++ END OF CRITICAL SECTION +++++++++ */
416
417 #ifdef USE_CHECKS_COMMON
418 if (pc_size > d_tn->cmn_size) {
419 KC_TRACE(
420 10, ("__kmp_threadprivate_insert: THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
421 " ,%" KMP_UINTPTR_SPEC ")\n",
422 pc_addr, pc_size, d_tn->cmn_size));
423 KMP_FATAL(TPCommonBlocksInconsist);
424 }
425 #endif /* USE_CHECKS_COMMON */
426
427 tt = &(__kmp_threads[gtid]->th.th_pri_common->data[KMP_HASH(pc_addr)]);
428
429 #ifdef KMP_TASK_COMMON_DEBUG
430 if (*tt != 0) {
431 KC_TRACE(
432 10,
433 ("__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n",
434 gtid, pc_addr));
435 }
436 #endif
437 tn->next = *tt;
438 *tt = tn;
439
440 #ifdef KMP_TASK_COMMON_DEBUG
441 KC_TRACE(10,
442 ("__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n",
443 gtid, pc_addr));
444 dump_list();
445 #endif
446
447 /* Link the node into a simple list */
448
449 tn->link = __kmp_threads[gtid]->th.th_pri_head;
450 __kmp_threads[gtid]->th.th_pri_head = tn;
451
452 if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid)))
453 return tn;
454
455 /* if C++ object with copy constructor, use it;
456 * else if C++ object with constructor, use it for the non-master copies only;
457 * else use pod_init and memcpy
458 *
459 * C++ constructors need to be called once for each non-master thread on
460 * allocate
461 * C++ copy constructors need to be called once for each thread on allocate */
462
463 /* C++ object with constructors/destructors; don't call constructors for
464 master thread though */
465 if (d_tn->is_vec) {
466 if (d_tn->ct.ctorv != 0) {
467 (void)(*d_tn->ct.ctorv)(tn->par_addr, d_tn->vec_len);
468 } else if (d_tn->cct.cctorv != 0) {
469 (void)(*d_tn->cct.cctorv)(tn->par_addr, d_tn->obj_init, d_tn->vec_len);
470 } else if (tn->par_addr != tn->gbl_addr) {
471 __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
472 }
473 } else {
474 if (d_tn->ct.ctor != 0) {
475 (void)(*d_tn->ct.ctor)(tn->par_addr);
476 } else if (d_tn->cct.cctor != 0) {
477 (void)(*d_tn->cct.cctor)(tn->par_addr, d_tn->obj_init);
478 } else if (tn->par_addr != tn->gbl_addr) {
479 __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
480 }
481 }
482 /* !BUILD_OPENMP_C
483 if (tn->par_addr != tn->gbl_addr)
484 __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */
485
486 return tn;
487 }
488
489 /* ------------------------------------------------------------------------ */
490 /* We are currently parallel, and we know the thread id. */
491 /* ------------------------------------------------------------------------ */
492
493 /*!
494 @ingroup THREADPRIVATE
495
496 @param loc source location information
497 @param data pointer to data being privatized
498 @param ctor pointer to constructor function for data
499 @param cctor pointer to copy constructor function for data
500 @param dtor pointer to destructor function for data
501
502 Register constructors and destructors for thread private data.
503 This function is called when executing in parallel, when we know the thread id.
504 */
__kmpc_threadprivate_register(ident_t * loc,void * data,kmpc_ctor ctor,kmpc_cctor cctor,kmpc_dtor dtor)505 void __kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor,
506 kmpc_cctor cctor, kmpc_dtor dtor) {
507 struct shared_common *d_tn, **lnk_tn;
508
509 KC_TRACE(10, ("__kmpc_threadprivate_register: called\n"));
510
511 #ifdef USE_CHECKS_COMMON
512 /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
513 KMP_ASSERT(cctor == 0);
514 #endif /* USE_CHECKS_COMMON */
515
516 /* Only the global data table exists. */
517 d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, -1, data);
518
519 if (d_tn == 0) {
520 d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
521 d_tn->gbl_addr = data;
522
523 d_tn->ct.ctor = ctor;
524 d_tn->cct.cctor = cctor;
525 d_tn->dt.dtor = dtor;
526 /*
527 d_tn->is_vec = FALSE; // AC: commented out because __kmp_allocate
528 zeroes the memory
529 d_tn->vec_len = 0L;
530 d_tn->obj_init = 0;
531 d_tn->pod_init = 0;
532 */
533 lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
534
535 d_tn->next = *lnk_tn;
536 *lnk_tn = d_tn;
537 }
538 }
539
__kmpc_threadprivate(ident_t * loc,kmp_int32 global_tid,void * data,size_t size)540 void *__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data,
541 size_t size) {
542 void *ret;
543 struct private_common *tn;
544
545 KC_TRACE(10, ("__kmpc_threadprivate: T#%d called\n", global_tid));
546
547 #ifdef USE_CHECKS_COMMON
548 if (!__kmp_init_serial)
549 KMP_FATAL(RTLNotInitialized);
550 #endif /* USE_CHECKS_COMMON */
551
552 if (!__kmp_threads[global_tid]->th.th_root->r.r_active && !__kmp_foreign_tp) {
553 /* The parallel address will NEVER overlap with the data_address */
554 /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the
555 * data_address; use data_address = data */
556
557 KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting private data\n",
558 global_tid));
559 kmp_threadprivate_insert_private_data(global_tid, data, data, size);
560
561 ret = data;
562 } else {
563 KC_TRACE(
564 50,
565 ("__kmpc_threadprivate: T#%d try to find private data at address %p\n",
566 global_tid, data));
567 tn = __kmp_threadprivate_find_task_common(
568 __kmp_threads[global_tid]->th.th_pri_common, global_tid, data);
569
570 if (tn) {
571 KC_TRACE(20, ("__kmpc_threadprivate: T#%d found data\n", global_tid));
572 #ifdef USE_CHECKS_COMMON
573 if ((size_t)size > tn->cmn_size) {
574 KC_TRACE(10, ("THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
575 " ,%" KMP_UINTPTR_SPEC ")\n",
576 data, size, tn->cmn_size));
577 KMP_FATAL(TPCommonBlocksInconsist);
578 }
579 #endif /* USE_CHECKS_COMMON */
580 } else {
581 /* The parallel address will NEVER overlap with the data_address */
582 /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use
583 * data_address = data */
584 KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid));
585 tn = kmp_threadprivate_insert(global_tid, data, data, size);
586 }
587
588 ret = tn->par_addr;
589 }
590 KC_TRACE(10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n",
591 global_tid, ret));
592
593 return ret;
594 }
595
__kmp_find_cache(void * data)596 static kmp_cached_addr_t *__kmp_find_cache(void *data) {
597 kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
598 while (ptr && ptr->data != data)
599 ptr = ptr->next;
600 return ptr;
601 }
602
603 /*!
604 @ingroup THREADPRIVATE
605 @param loc source location information
606 @param global_tid global thread number
607 @param data pointer to data to privatize
608 @param size size of data to privatize
609 @param cache pointer to cache
610 @return pointer to private storage
611
612 Allocate private storage for threadprivate data.
613 */
614 void *
__kmpc_threadprivate_cached(ident_t * loc,kmp_int32 global_tid,void * data,size_t size,void *** cache)615 __kmpc_threadprivate_cached(ident_t *loc,
616 kmp_int32 global_tid, // gtid.
617 void *data, // Pointer to original global variable.
618 size_t size, // Size of original global variable.
619 void ***cache) {
620 KC_TRACE(10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, "
621 "address: %p, size: %" KMP_SIZE_T_SPEC "\n",
622 global_tid, *cache, data, size));
623
624 if (TCR_PTR(*cache) == 0) {
625 __kmp_acquire_lock(&__kmp_global_lock, global_tid);
626
627 if (TCR_PTR(*cache) == 0) {
628 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
629 // Compiler often passes in NULL cache, even if it's already been created
630 void **my_cache;
631 kmp_cached_addr_t *tp_cache_addr;
632 // Look for an existing cache
633 tp_cache_addr = __kmp_find_cache(data);
634 if (!tp_cache_addr) { // Cache was never created; do it now
635 __kmp_tp_cached = 1;
636 KMP_ITT_IGNORE(my_cache = (void **)__kmp_allocate(
637 sizeof(void *) * __kmp_tp_capacity +
638 sizeof(kmp_cached_addr_t)););
639 // No need to zero the allocated memory; __kmp_allocate does that.
640 KC_TRACE(50, ("__kmpc_threadprivate_cached: T#%d allocated cache at "
641 "address %p\n",
642 global_tid, my_cache));
643 /* TODO: free all this memory in __kmp_common_destroy using
644 * __kmp_threadpriv_cache_list */
645 /* Add address of mycache to linked list for cleanup later */
646 tp_cache_addr = (kmp_cached_addr_t *)&my_cache[__kmp_tp_capacity];
647 tp_cache_addr->addr = my_cache;
648 tp_cache_addr->data = data;
649 tp_cache_addr->compiler_cache = cache;
650 tp_cache_addr->next = __kmp_threadpriv_cache_list;
651 __kmp_threadpriv_cache_list = tp_cache_addr;
652 } else { // A cache was already created; use it
653 my_cache = tp_cache_addr->addr;
654 tp_cache_addr->compiler_cache = cache;
655 }
656 KMP_MB();
657
658 TCW_PTR(*cache, my_cache);
659 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
660
661 KMP_MB();
662 }
663 __kmp_release_lock(&__kmp_global_lock, global_tid);
664 }
665
666 void *ret;
667 if ((ret = TCR_PTR((*cache)[global_tid])) == 0) {
668 ret = __kmpc_threadprivate(loc, global_tid, data, (size_t)size);
669
670 TCW_PTR((*cache)[global_tid], ret);
671 }
672 KC_TRACE(10,
673 ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n",
674 global_tid, ret));
675 return ret;
676 }
677
678 // This function should only be called when both __kmp_tp_cached_lock and
679 // kmp_forkjoin_lock are held.
__kmp_threadprivate_resize_cache(int newCapacity)680 void __kmp_threadprivate_resize_cache(int newCapacity) {
681 KC_TRACE(10, ("__kmp_threadprivate_resize_cache: called with size: %d\n",
682 newCapacity));
683
684 kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
685
686 while (ptr) {
687 if (ptr->data) { // this location has an active cache; resize it
688 void **my_cache;
689 KMP_ITT_IGNORE(my_cache =
690 (void **)__kmp_allocate(sizeof(void *) * newCapacity +
691 sizeof(kmp_cached_addr_t)););
692 // No need to zero the allocated memory; __kmp_allocate does that.
693 KC_TRACE(50, ("__kmp_threadprivate_resize_cache: allocated cache at %p\n",
694 my_cache));
695 // Now copy old cache into new cache
696 void **old_cache = ptr->addr;
697 for (int i = 0; i < __kmp_tp_capacity; ++i) {
698 my_cache[i] = old_cache[i];
699 }
700
701 // Add address of new my_cache to linked list for cleanup later
702 kmp_cached_addr_t *tp_cache_addr;
703 tp_cache_addr = (kmp_cached_addr_t *)&my_cache[newCapacity];
704 tp_cache_addr->addr = my_cache;
705 tp_cache_addr->data = ptr->data;
706 tp_cache_addr->compiler_cache = ptr->compiler_cache;
707 tp_cache_addr->next = __kmp_threadpriv_cache_list;
708 __kmp_threadpriv_cache_list = tp_cache_addr;
709
710 // Copy new cache to compiler's location: We can copy directly
711 // to (*compiler_cache) if compiler guarantees it will keep
712 // using the same location for the cache. This is not yet true
713 // for some compilers, in which case we have to check if
714 // compiler_cache is still pointing at old cache, and if so, we
715 // can point it at the new cache with an atomic compare&swap
716 // operation. (Old method will always work, but we should shift
717 // to new method (commented line below) when Intel and Clang
718 // compilers use new method.)
719 (void)KMP_COMPARE_AND_STORE_PTR(tp_cache_addr->compiler_cache, old_cache,
720 my_cache);
721 // TCW_PTR(*(tp_cache_addr->compiler_cache), my_cache);
722
723 // If the store doesn't happen here, the compiler's old behavior will
724 // inevitably call __kmpc_threadprivate_cache with a new location for the
725 // cache, and that function will store the resized cache there at that
726 // point.
727
728 // Nullify old cache's data pointer so we skip it next time
729 ptr->data = NULL;
730 }
731 ptr = ptr->next;
732 }
733 // After all caches are resized, update __kmp_tp_capacity to the new size
734 *(volatile int *)&__kmp_tp_capacity = newCapacity;
735 }
736
737 /*!
738 @ingroup THREADPRIVATE
739 @param loc source location information
740 @param data pointer to data being privatized
741 @param ctor pointer to constructor function for data
742 @param cctor pointer to copy constructor function for data
743 @param dtor pointer to destructor function for data
744 @param vector_length length of the vector (bytes or elements?)
745 Register vector constructors and destructors for thread private data.
746 */
__kmpc_threadprivate_register_vec(ident_t * loc,void * data,kmpc_ctor_vec ctor,kmpc_cctor_vec cctor,kmpc_dtor_vec dtor,size_t vector_length)747 void __kmpc_threadprivate_register_vec(ident_t *loc, void *data,
748 kmpc_ctor_vec ctor, kmpc_cctor_vec cctor,
749 kmpc_dtor_vec dtor,
750 size_t vector_length) {
751 struct shared_common *d_tn, **lnk_tn;
752
753 KC_TRACE(10, ("__kmpc_threadprivate_register_vec: called\n"));
754
755 #ifdef USE_CHECKS_COMMON
756 /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
757 KMP_ASSERT(cctor == 0);
758 #endif /* USE_CHECKS_COMMON */
759
760 d_tn = __kmp_find_shared_task_common(
761 &__kmp_threadprivate_d_table, -1,
762 data); /* Only the global data table exists. */
763
764 if (d_tn == 0) {
765 d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
766 d_tn->gbl_addr = data;
767
768 d_tn->ct.ctorv = ctor;
769 d_tn->cct.cctorv = cctor;
770 d_tn->dt.dtorv = dtor;
771 d_tn->is_vec = TRUE;
772 d_tn->vec_len = (size_t)vector_length;
773 // d_tn->obj_init = 0; // AC: __kmp_allocate zeroes the memory
774 // d_tn->pod_init = 0;
775 lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
776
777 d_tn->next = *lnk_tn;
778 *lnk_tn = d_tn;
779 }
780 }
781
__kmp_cleanup_threadprivate_caches()782 void __kmp_cleanup_threadprivate_caches() {
783 kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
784
785 while (ptr) {
786 void **cache = ptr->addr;
787 __kmp_threadpriv_cache_list = ptr->next;
788 if (*ptr->compiler_cache)
789 *ptr->compiler_cache = NULL;
790 ptr->compiler_cache = NULL;
791 ptr->data = NULL;
792 ptr->addr = NULL;
793 ptr->next = NULL;
794 // Threadprivate data pointed at by cache entries are destroyed at end of
795 // __kmp_launch_thread with __kmp_common_destroy_gtid.
796 __kmp_free(cache); // implicitly frees ptr too
797 ptr = __kmp_threadpriv_cache_list;
798 }
799 }
800