1 /*
2  * kmp_threadprivate.cpp -- OpenMP threadprivate support library
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp.h"
14 #include "kmp_i18n.h"
15 #include "kmp_itt.h"
16 
17 #define USE_CHECKS_COMMON
18 
19 #define KMP_INLINE_SUBR 1
20 
21 void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
22                                            void *data_addr, size_t pc_size);
23 struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
24                                                 void *data_addr,
25                                                 size_t pc_size);
26 
27 struct shared_table __kmp_threadprivate_d_table;
28 
29 static
30 #ifdef KMP_INLINE_SUBR
31     __forceinline
32 #endif
33     struct private_common *
34     __kmp_threadprivate_find_task_common(struct common_table *tbl, int gtid,
35                                          void *pc_addr)
36 
37 {
38   struct private_common *tn;
39 
40 #ifdef KMP_TASK_COMMON_DEBUG
41   KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, called with "
42                 "address %p\n",
43                 gtid, pc_addr));
44   dump_list();
45 #endif
46 
47   for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
48     if (tn->gbl_addr == pc_addr) {
49 #ifdef KMP_TASK_COMMON_DEBUG
50       KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, found "
51                     "node %p on list\n",
52                     gtid, pc_addr));
53 #endif
54       return tn;
55     }
56   }
57   return 0;
58 }
59 
60 static
61 #ifdef KMP_INLINE_SUBR
62     __forceinline
63 #endif
64     struct shared_common *
65     __kmp_find_shared_task_common(struct shared_table *tbl, int gtid,
66                                   void *pc_addr) {
67   struct shared_common *tn;
68 
69   for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) {
70     if (tn->gbl_addr == pc_addr) {
71 #ifdef KMP_TASK_COMMON_DEBUG
72       KC_TRACE(
73           10,
74           ("__kmp_find_shared_task_common: thread#%d, found node %p on list\n",
75            gtid, pc_addr));
76 #endif
77       return tn;
78     }
79   }
80   return 0;
81 }
82 
83 // Create a template for the data initialized storage. Either the template is
84 // NULL indicating zero fill, or the template is a copy of the original data.
85 static struct private_data *__kmp_init_common_data(void *pc_addr,
86                                                    size_t pc_size) {
87   struct private_data *d;
88   size_t i;
89   char *p;
90 
91   d = (struct private_data *)__kmp_allocate(sizeof(struct private_data));
92   /*
93       d->data = 0;  // AC: commented out because __kmp_allocate zeroes the
94      memory
95       d->next = 0;
96   */
97   d->size = pc_size;
98   d->more = 1;
99 
100   p = (char *)pc_addr;
101 
102   for (i = pc_size; i > 0; --i) {
103     if (*p++ != '\0') {
104       d->data = __kmp_allocate(pc_size);
105       KMP_MEMCPY(d->data, pc_addr, pc_size);
106       break;
107     }
108   }
109 
110   return d;
111 }
112 
113 // Initialize the data area from the template.
114 static void __kmp_copy_common_data(void *pc_addr, struct private_data *d) {
115   char *addr = (char *)pc_addr;
116 
117   for (size_t offset = 0; d != 0; d = d->next) {
118     for (int i = d->more; i > 0; --i) {
119       if (d->data == 0)
120         memset(&addr[offset], '\0', d->size);
121       else
122         KMP_MEMCPY(&addr[offset], d->data, d->size);
123       offset += d->size;
124     }
125   }
126 }
127 
128 /* we are called from __kmp_serial_initialize() with __kmp_initz_lock held. */
129 void __kmp_common_initialize(void) {
130   if (!TCR_4(__kmp_init_common)) {
131     int q;
132 #ifdef KMP_DEBUG
133     int gtid;
134 #endif
135 
136     __kmp_threadpriv_cache_list = NULL;
137 
138 #ifdef KMP_DEBUG
139     /* verify the uber masters were initialized */
140     for (gtid = 0; gtid < __kmp_threads_capacity; gtid++)
141       if (__kmp_root[gtid]) {
142         KMP_DEBUG_ASSERT(__kmp_root[gtid]->r.r_uber_thread);
143         for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
144           KMP_DEBUG_ASSERT(
145               !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q]);
146         /*                    __kmp_root[ gitd ]-> r.r_uber_thread ->
147          * th.th_pri_common -> data[ q ] = 0;*/
148       }
149 #endif /* KMP_DEBUG */
150 
151     for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q)
152       __kmp_threadprivate_d_table.data[q] = 0;
153 
154     TCW_4(__kmp_init_common, TRUE);
155   }
156 }
157 
158 /* Call all destructors for threadprivate data belonging to all threads.
159    Currently unused! */
160 void __kmp_common_destroy(void) {
161   if (TCR_4(__kmp_init_common)) {
162     int q;
163 
164     TCW_4(__kmp_init_common, FALSE);
165 
166     for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
167       int gtid;
168       struct private_common *tn;
169       struct shared_common *d_tn;
170 
171       /* C++ destructors need to be called once per thread before exiting.
172          Don't call destructors for master thread though unless we used copy
173          constructor */
174 
175       for (d_tn = __kmp_threadprivate_d_table.data[q]; d_tn;
176            d_tn = d_tn->next) {
177         if (d_tn->is_vec) {
178           if (d_tn->dt.dtorv != 0) {
179             for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
180               if (__kmp_threads[gtid]) {
181                 if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
182                                        : (!KMP_UBER_GTID(gtid))) {
183                   tn = __kmp_threadprivate_find_task_common(
184                       __kmp_threads[gtid]->th.th_pri_common, gtid,
185                       d_tn->gbl_addr);
186                   if (tn) {
187                     (*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
188                   }
189                 }
190               }
191             }
192             if (d_tn->obj_init != 0) {
193               (*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
194             }
195           }
196         } else {
197           if (d_tn->dt.dtor != 0) {
198             for (gtid = 0; gtid < __kmp_all_nth; ++gtid) {
199               if (__kmp_threads[gtid]) {
200                 if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid))
201                                        : (!KMP_UBER_GTID(gtid))) {
202                   tn = __kmp_threadprivate_find_task_common(
203                       __kmp_threads[gtid]->th.th_pri_common, gtid,
204                       d_tn->gbl_addr);
205                   if (tn) {
206                     (*d_tn->dt.dtor)(tn->par_addr);
207                   }
208                 }
209               }
210             }
211             if (d_tn->obj_init != 0) {
212               (*d_tn->dt.dtor)(d_tn->obj_init);
213             }
214           }
215         }
216       }
217       __kmp_threadprivate_d_table.data[q] = 0;
218     }
219   }
220 }
221 
222 /* Call all destructors for threadprivate data belonging to this thread */
223 void __kmp_common_destroy_gtid(int gtid) {
224   struct private_common *tn;
225   struct shared_common *d_tn;
226 
227   if (!TCR_4(__kmp_init_gtid)) {
228     // This is possible when one of multiple roots initiates early library
229     // termination in a sequential region while other teams are active, and its
230     // child threads are about to end.
231     return;
232   }
233 
234   KC_TRACE(10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid));
235   if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) : (!KMP_UBER_GTID(gtid))) {
236 
237     if (TCR_4(__kmp_init_common)) {
238 
239       /* Cannot do this here since not all threads have destroyed their data */
240       /* TCW_4(__kmp_init_common, FALSE); */
241 
242       for (tn = __kmp_threads[gtid]->th.th_pri_head; tn; tn = tn->link) {
243 
244         d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
245                                              tn->gbl_addr);
246         if (d_tn == NULL)
247           continue;
248         if (d_tn->is_vec) {
249           if (d_tn->dt.dtorv != 0) {
250             (void)(*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len);
251           }
252           if (d_tn->obj_init != 0) {
253             (void)(*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len);
254           }
255         } else {
256           if (d_tn->dt.dtor != 0) {
257             (void)(*d_tn->dt.dtor)(tn->par_addr);
258           }
259           if (d_tn->obj_init != 0) {
260             (void)(*d_tn->dt.dtor)(d_tn->obj_init);
261           }
262         }
263       }
264       KC_TRACE(30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors "
265                     "complete\n",
266                     gtid));
267     }
268   }
269 }
270 
271 #ifdef KMP_TASK_COMMON_DEBUG
272 static void dump_list(void) {
273   int p, q;
274 
275   for (p = 0; p < __kmp_all_nth; ++p) {
276     if (!__kmp_threads[p])
277       continue;
278     for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) {
279       if (__kmp_threads[p]->th.th_pri_common->data[q]) {
280         struct private_common *tn;
281 
282         KC_TRACE(10, ("\tdump_list: gtid:%d addresses\n", p));
283 
284         for (tn = __kmp_threads[p]->th.th_pri_common->data[q]; tn;
285              tn = tn->next) {
286           KC_TRACE(10,
287                    ("\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n",
288                     tn->gbl_addr, tn->par_addr));
289         }
290       }
291     }
292   }
293 }
294 #endif /* KMP_TASK_COMMON_DEBUG */
295 
296 // NOTE: this routine is to be called only from the serial part of the program.
297 void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,
298                                            void *data_addr, size_t pc_size) {
299   struct shared_common **lnk_tn, *d_tn;
300   KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
301                    __kmp_threads[gtid]->th.th_root->r.r_active == 0);
302 
303   d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid,
304                                        pc_addr);
305 
306   if (d_tn == 0) {
307     d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
308 
309     d_tn->gbl_addr = pc_addr;
310     d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
311     /*
312             d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate
313        zeroes the memory
314             d_tn->ct.ctor = 0;
315             d_tn->cct.cctor = 0;;
316             d_tn->dt.dtor = 0;
317             d_tn->is_vec = FALSE;
318             d_tn->vec_len = 0L;
319     */
320     d_tn->cmn_size = pc_size;
321 
322     __kmp_acquire_lock(&__kmp_global_lock, gtid);
323 
324     lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
325 
326     d_tn->next = *lnk_tn;
327     *lnk_tn = d_tn;
328 
329     __kmp_release_lock(&__kmp_global_lock, gtid);
330   }
331 }
332 
333 struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,
334                                                 void *data_addr,
335                                                 size_t pc_size) {
336   struct private_common *tn, **tt;
337   struct shared_common *d_tn;
338 
339   /* +++++++++ START OF CRITICAL SECTION +++++++++ */
340   __kmp_acquire_lock(&__kmp_global_lock, gtid);
341 
342   tn = (struct private_common *)__kmp_allocate(sizeof(struct private_common));
343 
344   tn->gbl_addr = pc_addr;
345 
346   d_tn = __kmp_find_shared_task_common(
347       &__kmp_threadprivate_d_table, gtid,
348       pc_addr); /* Only the MASTER data table exists. */
349 
350   if (d_tn != 0) {
351     /* This threadprivate variable has already been seen. */
352 
353     if (d_tn->pod_init == 0 && d_tn->obj_init == 0) {
354       d_tn->cmn_size = pc_size;
355 
356       if (d_tn->is_vec) {
357         if (d_tn->ct.ctorv != 0) {
358           /* Construct from scratch so no prototype exists */
359           d_tn->obj_init = 0;
360         } else if (d_tn->cct.cctorv != 0) {
361           /* Now data initialize the prototype since it was previously
362            * registered */
363           d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
364           (void)(*d_tn->cct.cctorv)(d_tn->obj_init, pc_addr, d_tn->vec_len);
365         } else {
366           d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
367         }
368       } else {
369         if (d_tn->ct.ctor != 0) {
370           /* Construct from scratch so no prototype exists */
371           d_tn->obj_init = 0;
372         } else if (d_tn->cct.cctor != 0) {
373           /* Now data initialize the prototype since it was previously
374              registered */
375           d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size);
376           (void)(*d_tn->cct.cctor)(d_tn->obj_init, pc_addr);
377         } else {
378           d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size);
379         }
380       }
381     }
382   } else {
383     struct shared_common **lnk_tn;
384 
385     d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
386     d_tn->gbl_addr = pc_addr;
387     d_tn->cmn_size = pc_size;
388     d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size);
389     /*
390             d_tn->obj_init = 0;  // AC: commented out because __kmp_allocate
391        zeroes the memory
392             d_tn->ct.ctor = 0;
393             d_tn->cct.cctor = 0;
394             d_tn->dt.dtor = 0;
395             d_tn->is_vec = FALSE;
396             d_tn->vec_len = 0L;
397     */
398     lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]);
399 
400     d_tn->next = *lnk_tn;
401     *lnk_tn = d_tn;
402   }
403 
404   tn->cmn_size = d_tn->cmn_size;
405 
406   if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) {
407     tn->par_addr = (void *)pc_addr;
408   } else {
409     tn->par_addr = (void *)__kmp_allocate(tn->cmn_size);
410   }
411 
412   __kmp_release_lock(&__kmp_global_lock, gtid);
413 /* +++++++++ END OF CRITICAL SECTION +++++++++ */
414 
415 #ifdef USE_CHECKS_COMMON
416   if (pc_size > d_tn->cmn_size) {
417     KC_TRACE(
418         10, ("__kmp_threadprivate_insert: THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
419              " ,%" KMP_UINTPTR_SPEC ")\n",
420              pc_addr, pc_size, d_tn->cmn_size));
421     KMP_FATAL(TPCommonBlocksInconsist);
422   }
423 #endif /* USE_CHECKS_COMMON */
424 
425   tt = &(__kmp_threads[gtid]->th.th_pri_common->data[KMP_HASH(pc_addr)]);
426 
427 #ifdef KMP_TASK_COMMON_DEBUG
428   if (*tt != 0) {
429     KC_TRACE(
430         10,
431         ("__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n",
432          gtid, pc_addr));
433   }
434 #endif
435   tn->next = *tt;
436   *tt = tn;
437 
438 #ifdef KMP_TASK_COMMON_DEBUG
439   KC_TRACE(10,
440            ("__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n",
441             gtid, pc_addr));
442   dump_list();
443 #endif
444 
445   /* Link the node into a simple list */
446 
447   tn->link = __kmp_threads[gtid]->th.th_pri_head;
448   __kmp_threads[gtid]->th.th_pri_head = tn;
449 
450   if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid)))
451     return tn;
452 
453   /* if C++ object with copy constructor, use it;
454    * else if C++ object with constructor, use it for the non-master copies only;
455    * else use pod_init and memcpy
456    *
457    * C++ constructors need to be called once for each non-master thread on
458    * allocate
459    * C++ copy constructors need to be called once for each thread on allocate */
460 
461   /* C++ object with constructors/destructors; don't call constructors for
462      master thread though */
463   if (d_tn->is_vec) {
464     if (d_tn->ct.ctorv != 0) {
465       (void)(*d_tn->ct.ctorv)(tn->par_addr, d_tn->vec_len);
466     } else if (d_tn->cct.cctorv != 0) {
467       (void)(*d_tn->cct.cctorv)(tn->par_addr, d_tn->obj_init, d_tn->vec_len);
468     } else if (tn->par_addr != tn->gbl_addr) {
469       __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
470     }
471   } else {
472     if (d_tn->ct.ctor != 0) {
473       (void)(*d_tn->ct.ctor)(tn->par_addr);
474     } else if (d_tn->cct.cctor != 0) {
475       (void)(*d_tn->cct.cctor)(tn->par_addr, d_tn->obj_init);
476     } else if (tn->par_addr != tn->gbl_addr) {
477       __kmp_copy_common_data(tn->par_addr, d_tn->pod_init);
478     }
479   }
480   /* !BUILD_OPENMP_C
481       if (tn->par_addr != tn->gbl_addr)
482           __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */
483 
484   return tn;
485 }
486 
487 /* ------------------------------------------------------------------------ */
488 /* We are currently parallel, and we know the thread id.                    */
489 /* ------------------------------------------------------------------------ */
490 
491 /*!
492  @ingroup THREADPRIVATE
493 
494  @param loc source location information
495  @param data  pointer to data being privatized
496  @param ctor  pointer to constructor function for data
497  @param cctor  pointer to copy constructor function for data
498  @param dtor  pointer to destructor function for data
499 
500  Register constructors and destructors for thread private data.
501  This function is called when executing in parallel, when we know the thread id.
502 */
503 void __kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor,
504                                    kmpc_cctor cctor, kmpc_dtor dtor) {
505   struct shared_common *d_tn, **lnk_tn;
506 
507   KC_TRACE(10, ("__kmpc_threadprivate_register: called\n"));
508 
509 #ifdef USE_CHECKS_COMMON
510   /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
511   KMP_ASSERT(cctor == 0);
512 #endif /* USE_CHECKS_COMMON */
513 
514   /* Only the global data table exists. */
515   d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, -1, data);
516 
517   if (d_tn == 0) {
518     d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
519     d_tn->gbl_addr = data;
520 
521     d_tn->ct.ctor = ctor;
522     d_tn->cct.cctor = cctor;
523     d_tn->dt.dtor = dtor;
524     /*
525             d_tn->is_vec = FALSE;  // AC: commented out because __kmp_allocate
526        zeroes the memory
527             d_tn->vec_len = 0L;
528             d_tn->obj_init = 0;
529             d_tn->pod_init = 0;
530     */
531     lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
532 
533     d_tn->next = *lnk_tn;
534     *lnk_tn = d_tn;
535   }
536 }
537 
538 void *__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data,
539                            size_t size) {
540   void *ret;
541   struct private_common *tn;
542 
543   KC_TRACE(10, ("__kmpc_threadprivate: T#%d called\n", global_tid));
544 
545 #ifdef USE_CHECKS_COMMON
546   if (!__kmp_init_serial)
547     KMP_FATAL(RTLNotInitialized);
548 #endif /* USE_CHECKS_COMMON */
549 
550   if (!__kmp_threads[global_tid]->th.th_root->r.r_active && !__kmp_foreign_tp) {
551     /* The parallel address will NEVER overlap with the data_address */
552     /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the
553      * data_address; use data_address = data */
554 
555     KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting private data\n",
556                   global_tid));
557     kmp_threadprivate_insert_private_data(global_tid, data, data, size);
558 
559     ret = data;
560   } else {
561     KC_TRACE(
562         50,
563         ("__kmpc_threadprivate: T#%d try to find private data at address %p\n",
564          global_tid, data));
565     tn = __kmp_threadprivate_find_task_common(
566         __kmp_threads[global_tid]->th.th_pri_common, global_tid, data);
567 
568     if (tn) {
569       KC_TRACE(20, ("__kmpc_threadprivate: T#%d found data\n", global_tid));
570 #ifdef USE_CHECKS_COMMON
571       if ((size_t)size > tn->cmn_size) {
572         KC_TRACE(10, ("THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC
573                       " ,%" KMP_UINTPTR_SPEC ")\n",
574                       data, size, tn->cmn_size));
575         KMP_FATAL(TPCommonBlocksInconsist);
576       }
577 #endif /* USE_CHECKS_COMMON */
578     } else {
579       /* The parallel address will NEVER overlap with the data_address */
580       /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use
581        * data_address = data */
582       KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid));
583       tn = kmp_threadprivate_insert(global_tid, data, data, size);
584     }
585 
586     ret = tn->par_addr;
587   }
588   KC_TRACE(10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n",
589                 global_tid, ret));
590 
591   return ret;
592 }
593 
594 static kmp_cached_addr_t *__kmp_find_cache(void *data) {
595   kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
596   while (ptr && ptr->data != data)
597     ptr = ptr->next;
598   return ptr;
599 }
600 
601 /*!
602  @ingroup THREADPRIVATE
603  @param loc source location information
604  @param global_tid  global thread number
605  @param data  pointer to data to privatize
606  @param size  size of data to privatize
607  @param cache  pointer to cache
608  @return pointer to private storage
609 
610  Allocate private storage for threadprivate data.
611 */
612 void *
613 __kmpc_threadprivate_cached(ident_t *loc,
614                             kmp_int32 global_tid, // gtid.
615                             void *data, // Pointer to original global variable.
616                             size_t size, // Size of original global variable.
617                             void ***cache) {
618   KC_TRACE(10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, "
619                 "address: %p, size: %" KMP_SIZE_T_SPEC "\n",
620                 global_tid, *cache, data, size));
621 
622   if (TCR_PTR(*cache) == 0) {
623     __kmp_acquire_lock(&__kmp_global_lock, global_tid);
624 
625     if (TCR_PTR(*cache) == 0) {
626       __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
627       // Compiler often passes in NULL cache, even if it's already been created
628       void **my_cache;
629       kmp_cached_addr_t *tp_cache_addr;
630       // Look for an existing cache
631       tp_cache_addr = __kmp_find_cache(data);
632       if (!tp_cache_addr) { // Cache was never created; do it now
633         __kmp_tp_cached = 1;
634         KMP_ITT_IGNORE(my_cache = (void **)__kmp_allocate(
635                            sizeof(void *) * __kmp_tp_capacity +
636                            sizeof(kmp_cached_addr_t)););
637         // No need to zero the allocated memory; __kmp_allocate does that.
638         KC_TRACE(50, ("__kmpc_threadprivate_cached: T#%d allocated cache at "
639                       "address %p\n",
640                       global_tid, my_cache));
641         /* TODO: free all this memory in __kmp_common_destroy using
642          * __kmp_threadpriv_cache_list */
643         /* Add address of mycache to linked list for cleanup later  */
644         tp_cache_addr = (kmp_cached_addr_t *)&my_cache[__kmp_tp_capacity];
645         tp_cache_addr->addr = my_cache;
646         tp_cache_addr->data = data;
647         tp_cache_addr->compiler_cache = cache;
648         tp_cache_addr->next = __kmp_threadpriv_cache_list;
649         __kmp_threadpriv_cache_list = tp_cache_addr;
650       } else { // A cache was already created; use it
651         my_cache = tp_cache_addr->addr;
652         tp_cache_addr->compiler_cache = cache;
653       }
654       KMP_MB();
655 
656       TCW_PTR(*cache, my_cache);
657       __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
658 
659       KMP_MB();
660     }
661     __kmp_release_lock(&__kmp_global_lock, global_tid);
662   }
663 
664   void *ret;
665   if ((ret = TCR_PTR((*cache)[global_tid])) == 0) {
666     ret = __kmpc_threadprivate(loc, global_tid, data, (size_t)size);
667 
668     TCW_PTR((*cache)[global_tid], ret);
669   }
670   KC_TRACE(10,
671            ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n",
672             global_tid, ret));
673   return ret;
674 }
675 
676 // This function should only be called when both __kmp_tp_cached_lock and
677 // kmp_forkjoin_lock are held.
678 void __kmp_threadprivate_resize_cache(int newCapacity) {
679   KC_TRACE(10, ("__kmp_threadprivate_resize_cache: called with size: %d\n",
680                 newCapacity));
681 
682   kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
683 
684   while (ptr) {
685     if (ptr->data) { // this location has an active cache; resize it
686       void **my_cache;
687       KMP_ITT_IGNORE(my_cache =
688                          (void **)__kmp_allocate(sizeof(void *) * newCapacity +
689                                                  sizeof(kmp_cached_addr_t)););
690       // No need to zero the allocated memory; __kmp_allocate does that.
691       KC_TRACE(50, ("__kmp_threadprivate_resize_cache: allocated cache at %p\n",
692                     my_cache));
693       // Now copy old cache into new cache
694       void **old_cache = ptr->addr;
695       for (int i = 0; i < __kmp_tp_capacity; ++i) {
696         my_cache[i] = old_cache[i];
697       }
698 
699       // Add address of new my_cache to linked list for cleanup later
700       kmp_cached_addr_t *tp_cache_addr;
701       tp_cache_addr = (kmp_cached_addr_t *)&my_cache[newCapacity];
702       tp_cache_addr->addr = my_cache;
703       tp_cache_addr->data = ptr->data;
704       tp_cache_addr->compiler_cache = ptr->compiler_cache;
705       tp_cache_addr->next = __kmp_threadpriv_cache_list;
706       __kmp_threadpriv_cache_list = tp_cache_addr;
707 
708       // Copy new cache to compiler's location: We can copy directly
709       // to (*compiler_cache) if compiler guarantees it will keep
710       // using the same location for the cache. This is not yet true
711       // for some compilers, in which case we have to check if
712       // compiler_cache is still pointing at old cache, and if so, we
713       // can point it at the new cache with an atomic compare&swap
714       // operation. (Old method will always work, but we should shift
715       // to new method (commented line below) when Intel and Clang
716       // compilers use new method.)
717       (void)KMP_COMPARE_AND_STORE_PTR(tp_cache_addr->compiler_cache, old_cache,
718                                       my_cache);
719       // TCW_PTR(*(tp_cache_addr->compiler_cache), my_cache);
720 
721       // If the store doesn't happen here, the compiler's old behavior will
722       // inevitably call __kmpc_threadprivate_cache with a new location for the
723       // cache, and that function will store the resized cache there at that
724       // point.
725 
726       // Nullify old cache's data pointer so we skip it next time
727       ptr->data = NULL;
728     }
729     ptr = ptr->next;
730   }
731   // After all caches are resized, update __kmp_tp_capacity to the new size
732   *(volatile int *)&__kmp_tp_capacity = newCapacity;
733 }
734 
735 /*!
736  @ingroup THREADPRIVATE
737  @param loc source location information
738  @param data  pointer to data being privatized
739  @param ctor  pointer to constructor function for data
740  @param cctor  pointer to copy constructor function for data
741  @param dtor  pointer to destructor function for data
742  @param vector_length length of the vector (bytes or elements?)
743  Register vector constructors and destructors for thread private data.
744 */
745 void __kmpc_threadprivate_register_vec(ident_t *loc, void *data,
746                                        kmpc_ctor_vec ctor, kmpc_cctor_vec cctor,
747                                        kmpc_dtor_vec dtor,
748                                        size_t vector_length) {
749   struct shared_common *d_tn, **lnk_tn;
750 
751   KC_TRACE(10, ("__kmpc_threadprivate_register_vec: called\n"));
752 
753 #ifdef USE_CHECKS_COMMON
754   /* copy constructor must be zero for current code gen (Nov 2002 - jph) */
755   KMP_ASSERT(cctor == 0);
756 #endif /* USE_CHECKS_COMMON */
757 
758   d_tn = __kmp_find_shared_task_common(
759       &__kmp_threadprivate_d_table, -1,
760       data); /* Only the global data table exists. */
761 
762   if (d_tn == 0) {
763     d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common));
764     d_tn->gbl_addr = data;
765 
766     d_tn->ct.ctorv = ctor;
767     d_tn->cct.cctorv = cctor;
768     d_tn->dt.dtorv = dtor;
769     d_tn->is_vec = TRUE;
770     d_tn->vec_len = (size_t)vector_length;
771     // d_tn->obj_init = 0;  // AC: __kmp_allocate zeroes the memory
772     // d_tn->pod_init = 0;
773     lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]);
774 
775     d_tn->next = *lnk_tn;
776     *lnk_tn = d_tn;
777   }
778 }
779 
780 void __kmp_cleanup_threadprivate_caches() {
781   kmp_cached_addr_t *ptr = __kmp_threadpriv_cache_list;
782 
783   while (ptr) {
784     void **cache = ptr->addr;
785     __kmp_threadpriv_cache_list = ptr->next;
786     if (*ptr->compiler_cache)
787       *ptr->compiler_cache = NULL;
788     ptr->compiler_cache = NULL;
789     ptr->data = NULL;
790     ptr->addr = NULL;
791     ptr->next = NULL;
792     // Threadprivate data pointed at by cache entries are destroyed at end of
793     // __kmp_launch_thread with __kmp_common_destroy_gtid.
794     __kmp_free(cache); // implicitly frees ptr too
795     ptr = __kmp_threadpriv_cache_list;
796   }
797 }
798