1 /*
2  * Copyright 2010-2012 PathScale, Inc. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *
7  * 1. Redistributions of source code must retain the above copyright notice,
8  *    this list of conditions and the following disclaimer.
9  *
10  * 2. Redistributions in binary form must reproduce the above copyright notice,
11  *    this list of conditions and the following disclaimer in the documentation
12  *    and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS
15  * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
18  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
24  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /**
28  * guard.cc: Functions for thread-safe static initialisation.
29  *
30  * Static values in C++ can be initialised lazily their first use.  This file
31  * contains functions that are used to ensure that two threads attempting to
32  * initialize the same static do not call the constructor twice.  This is
33  * important because constructors can have side effects, so calling the
34  * constructor twice may be very bad.
35  *
36  * Statics that require initialisation are protected by a 64-bit value.  Any
37  * platform that can do 32-bit atomic test and set operations can use this
38  * value as a low-overhead lock.  Because statics (in most sane code) are
39  * accessed far more times than they are initialised, this lock implementation
40  * is heavily optimised towards the case where the static has already been
41  * initialised.
42  */
43 #include <stdint.h>
44 #include <stdlib.h>
45 #include <stdio.h>
46 
47 #if !defined(__minix)
48 #include <pthread.h>
49 #else
50 #define _MTHREADIFY_PTHREADS 1
51 #include <minix/mthread.h>
52 #pragma weak sched_yield
53 #define sched_yield() do {\
54 	if (sched_yield) sched_yield();\
55 	} while(0)
56 #endif /* !defined(__minix) */
57 
58 #include <assert.h>
59 #include "atomic.h"
60 
61 // Older GCC doesn't define __LITTLE_ENDIAN__
62 #ifndef __LITTLE_ENDIAN__
63 	// If __BYTE_ORDER__ is defined, use that instead
64 #	ifdef __BYTE_ORDER__
65 #		if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
66 #			define __LITTLE_ENDIAN__
67 #		endif
68 	// x86 and ARM are the most common little-endian CPUs, so let's have a
69 	// special case for them (ARM is already special cased).  Assume everything
70 	// else is big endian.
71 #	elif defined(__x86_64) || defined(__i386)
72 #		define __LITTLE_ENDIAN__
73 #	endif
74 #endif
75 
76 
77 /*
78  * The least significant bit of the guard variable indicates that the object
79  * has been initialised, the most significant bit is used for a spinlock.
80  */
81 #ifdef __arm__
82 // ARM ABI - 32-bit guards.
83 typedef uint32_t guard_t;
84 typedef uint32_t guard_lock_t;
85 static const uint32_t LOCKED = static_cast<guard_t>(1) << 31;
86 static const uint32_t INITIALISED = 1;
87 #define LOCK_PART(guard) (guard)
88 #define INIT_PART(guard) (guard)
89 #elif defined(_LP64)
90 typedef uint64_t guard_t;
91 typedef uint64_t guard_lock_t;
92 #	if defined(__LITTLE_ENDIAN__)
93 static const guard_t LOCKED = static_cast<guard_t>(1) << 63;
94 static const guard_t INITIALISED = 1;
95 #	else
96 static const guard_t LOCKED = 1;
97 static const guard_t INITIALISED = static_cast<guard_t>(1) << 56;
98 #	endif
99 #define LOCK_PART(guard) (guard)
100 #define INIT_PART(guard) (guard)
101 #else
102 typedef uint32_t guard_lock_t;
103 #	if defined(__LITTLE_ENDIAN__)
104 typedef struct {
105 	uint32_t init_half;
106 	uint32_t lock_half;
107 } guard_t;
108 static const uint32_t LOCKED = static_cast<guard_lock_t>(1) << 31;
109 static const uint32_t INITIALISED = 1;
110 #	else
111 typedef struct {
112 	uint32_t init_half;
113 	uint32_t lock_half;
114 } guard_t;
115 static_assert(sizeof(guard_t) == sizeof(uint64_t), "");
116 static const uint32_t LOCKED = 1;
117 static const uint32_t INITIALISED = static_cast<guard_lock_t>(1) << 24;
118 #	endif
119 #define LOCK_PART(guard) (&(guard)->lock_half)
120 #define INIT_PART(guard) (&(guard)->init_half)
121 #endif
122 static const guard_lock_t INITIAL = 0;
123 
124 /**
125  * Acquires a lock on a guard, returning 0 if the object has already been
126  * initialised, and 1 if it has not.  If the object is already constructed then
127  * this function just needs to read a byte from memory and return.
128  */
__cxa_guard_acquire(volatile guard_t * guard_object)129 extern "C" int __cxa_guard_acquire(volatile guard_t *guard_object)
130 {
131 	guard_lock_t old;
132 	// Not an atomic read, doesn't establish a happens-before relationship, but
133 	// if one is already established and we end up seeing an initialised state
134 	// then it's a fast path, otherwise we'll do something more expensive than
135 	// this test anyway...
136 	if (INITIALISED == *INIT_PART(guard_object))
137 		return 0;
138 	// Spin trying to do the initialisation
139 	for (;;)
140 	{
141 		// Loop trying to move the value of the guard from 0 (not
142 		// locked, not initialised) to the locked-uninitialised
143 		// position.
144 		old = __sync_val_compare_and_swap(LOCK_PART(guard_object),
145 		    INITIAL, LOCKED);
146 		if (old == INITIAL) {
147 			// Lock obtained.  If lock and init bit are
148 			// in separate words, check for init race.
149 			if (INIT_PART(guard_object) == LOCK_PART(guard_object))
150 				return 1;
151 			if (INITIALISED != *INIT_PART(guard_object))
152 				return 1;
153 
154 			// No need for a memory barrier here,
155 			// see first comment.
156 			*LOCK_PART(guard_object) = INITIAL;
157 			return 0;
158 		}
159 		// If lock and init bit are in the same word, check again
160 		// if we are done.
161 		if (INIT_PART(guard_object) == LOCK_PART(guard_object) &&
162 		    old == INITIALISED)
163 			return 0;
164 
165 		assert(old == LOCKED);
166 		// Another thread holds the lock.
167 		// If lock and init bit are in different words, check
168 		// if we are done before yielding and looping.
169 		if (INIT_PART(guard_object) != LOCK_PART(guard_object) &&
170 		    INITIALISED == *INIT_PART(guard_object))
171 			return 0;
172 		sched_yield();
173 	}
174 }
175 
176 /**
177  * Releases the lock without marking the object as initialised.  This function
178  * is called if initialising a static causes an exception to be thrown.
179  */
__cxa_guard_abort(volatile guard_t * guard_object)180 extern "C" void __cxa_guard_abort(volatile guard_t *guard_object)
181 {
182 	__attribute__((unused))
183 	bool reset = __sync_bool_compare_and_swap(LOCK_PART(guard_object),
184 	    LOCKED, INITIAL);
185 	assert(reset);
186 }
187 /**
188  * Releases the guard and marks the object as initialised.  This function is
189  * called after successful initialisation of a static.
190  */
__cxa_guard_release(volatile guard_t * guard_object)191 extern "C" void __cxa_guard_release(volatile guard_t *guard_object)
192 {
193 	guard_lock_t old;
194 	if (INIT_PART(guard_object) == LOCK_PART(guard_object))
195 		old = LOCKED;
196 	else
197 		old = INITIAL;
198 	__attribute__((unused))
199 	bool reset = __sync_bool_compare_and_swap(INIT_PART(guard_object),
200 	    old, INITIALISED);
201 	assert(reset);
202 	if (INIT_PART(guard_object) != LOCK_PART(guard_object))
203 		*LOCK_PART(guard_object) = INITIAL;
204 }
205