1 /*-------------------------------------------------------------------------
2 *
3 * posix_sema.c
4 * Implement PGSemaphores using POSIX semaphore facilities
5 *
6 * We prefer the unnamed style of POSIX semaphore (the kind made with
7 * sem_init). We can cope with the kind made with sem_open, however.
8 *
9 * In either implementation, typedef PGSemaphore is equivalent to "sem_t *".
10 * With unnamed semaphores, the sem_t structs live in an array in shared
11 * memory. With named semaphores, that's not true because we cannot persuade
12 * sem_open to do its allocation there. Therefore, the named-semaphore code
13 * *does not cope with EXEC_BACKEND*. The sem_t structs will just be in the
14 * postmaster's private memory, where they are successfully inherited by
15 * forked backends, but they could not be accessed by exec'd backends.
16 *
17 *
18 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
19 * Portions Copyright (c) 1994, Regents of the University of California
20 *
21 * IDENTIFICATION
22 * src/backend/port/posix_sema.c
23 *
24 *-------------------------------------------------------------------------
25 */
26 #include "postgres.h"
27
28 #include <fcntl.h>
29 #include <semaphore.h>
30 #include <signal.h>
31 #include <unistd.h>
32
33 #include "miscadmin.h"
34 #include "storage/ipc.h"
35 #include "storage/pg_sema.h"
36 #include "storage/shmem.h"
37
38
39 /* see file header comment */
40 #if defined(USE_NAMED_POSIX_SEMAPHORES) && defined(EXEC_BACKEND)
41 #error cannot use named POSIX semaphores with EXEC_BACKEND
42 #endif
43
44 typedef union SemTPadded
45 {
46 sem_t pgsem;
47 char pad[PG_CACHE_LINE_SIZE];
48 } SemTPadded;
49
50 /* typedef PGSemaphore is equivalent to pointer to sem_t */
51 typedef struct PGSemaphoreData
52 {
53 SemTPadded sem_padded;
54 } PGSemaphoreData;
55
56 #define PG_SEM_REF(x) (&(x)->sem_padded.pgsem)
57
58 #define IPCProtection (0600) /* access/modify by user only */
59
60 #ifdef USE_NAMED_POSIX_SEMAPHORES
61 static sem_t **mySemPointers; /* keep track of created semaphores */
62 #else
63 static PGSemaphore sharedSemas; /* array of PGSemaphoreData in shared memory */
64 #endif
65 static int numSems; /* number of semas acquired so far */
66 static int maxSems; /* allocated size of above arrays */
67 static int nextSemKey; /* next name to try */
68
69
70 static void ReleaseSemaphores(int status, Datum arg);
71
72
73 #ifdef USE_NAMED_POSIX_SEMAPHORES
74
75 /*
76 * PosixSemaphoreCreate
77 *
78 * Attempt to create a new named semaphore.
79 *
80 * If we fail with a failure code other than collision-with-existing-sema,
81 * print out an error and abort. Other types of errors suggest nonrecoverable
82 * problems.
83 */
84 static sem_t *
PosixSemaphoreCreate(void)85 PosixSemaphoreCreate(void)
86 {
87 int semKey;
88 char semname[64];
89 sem_t *mySem;
90
91 for (;;)
92 {
93 semKey = nextSemKey++;
94
95 snprintf(semname, sizeof(semname), "/pgsql-%d", semKey);
96
97 mySem = sem_open(semname, O_CREAT | O_EXCL,
98 (mode_t) IPCProtection, (unsigned) 1);
99
100 #ifdef SEM_FAILED
101 if (mySem != (sem_t *) SEM_FAILED)
102 break;
103 #else
104 if (mySem != (sem_t *) (-1))
105 break;
106 #endif
107
108 /* Loop if error indicates a collision */
109 if (errno == EEXIST || errno == EACCES || errno == EINTR)
110 continue;
111
112 /*
113 * Else complain and abort
114 */
115 elog(FATAL, "sem_open(\"%s\") failed: %m", semname);
116 }
117
118 /*
119 * Unlink the semaphore immediately, so it can't be accessed externally.
120 * This also ensures that it will go away if we crash.
121 */
122 sem_unlink(semname);
123
124 return mySem;
125 }
126 #else /* !USE_NAMED_POSIX_SEMAPHORES */
127
128 /*
129 * PosixSemaphoreCreate
130 *
131 * Attempt to create a new unnamed semaphore.
132 */
133 static void
PosixSemaphoreCreate(sem_t * sem)134 PosixSemaphoreCreate(sem_t *sem)
135 {
136 if (sem_init(sem, 1, 1) < 0)
137 elog(FATAL, "sem_init failed: %m");
138 }
139 #endif /* USE_NAMED_POSIX_SEMAPHORES */
140
141
142 /*
143 * PosixSemaphoreKill - removes a semaphore
144 */
145 static void
PosixSemaphoreKill(sem_t * sem)146 PosixSemaphoreKill(sem_t *sem)
147 {
148 #ifdef USE_NAMED_POSIX_SEMAPHORES
149 /* Got to use sem_close for named semaphores */
150 if (sem_close(sem) < 0)
151 elog(LOG, "sem_close failed: %m");
152 #else
153 /* Got to use sem_destroy for unnamed semaphores */
154 if (sem_destroy(sem) < 0)
155 elog(LOG, "sem_destroy failed: %m");
156 #endif
157 }
158
159
160 /*
161 * Report amount of shared memory needed for semaphores
162 */
163 Size
PGSemaphoreShmemSize(int maxSemas)164 PGSemaphoreShmemSize(int maxSemas)
165 {
166 #ifdef USE_NAMED_POSIX_SEMAPHORES
167 /* No shared memory needed in this case */
168 return 0;
169 #else
170 /* Need a PGSemaphoreData per semaphore */
171 return mul_size(maxSemas, sizeof(PGSemaphoreData));
172 #endif
173 }
174
175 /*
176 * PGReserveSemaphores --- initialize semaphore support
177 *
178 * This is called during postmaster start or shared memory reinitialization.
179 * It should do whatever is needed to be able to support up to maxSemas
180 * subsequent PGSemaphoreCreate calls. Also, if any system resources
181 * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
182 * callback to release them.
183 *
184 * The port number is passed for possible use as a key (for Posix, we use
185 * it to generate the starting semaphore name). In a standalone backend,
186 * zero will be passed.
187 *
188 * In the Posix implementation, we acquire semaphores on-demand; the
189 * maxSemas parameter is just used to size the arrays. For unnamed
190 * semaphores, there is an array of PGSemaphoreData structs in shared memory.
191 * For named semaphores, we keep a postmaster-local array of sem_t pointers,
192 * which we use for releasing the semphores when done.
193 * (This design minimizes the dependency of postmaster shutdown on the
194 * contents of shared memory, which a failed backend might have clobbered.
195 * We can't do much about the possibility of sem_destroy() crashing, but
196 * we don't have to expose the counters to other processes.)
197 */
198 void
PGReserveSemaphores(int maxSemas,int port)199 PGReserveSemaphores(int maxSemas, int port)
200 {
201 #ifdef USE_NAMED_POSIX_SEMAPHORES
202 mySemPointers = (sem_t **) malloc(maxSemas * sizeof(sem_t *));
203 if (mySemPointers == NULL)
204 elog(PANIC, "out of memory");
205 #else
206
207 /*
208 * We must use ShmemAllocUnlocked(), since the spinlock protecting
209 * ShmemAlloc() won't be ready yet. (This ordering is necessary when we
210 * are emulating spinlocks with semaphores.)
211 */
212 sharedSemas = (PGSemaphore)
213 ShmemAllocUnlocked(PGSemaphoreShmemSize(maxSemas));
214 #endif
215
216 numSems = 0;
217 maxSems = maxSemas;
218 nextSemKey = port * 1000;
219
220 on_shmem_exit(ReleaseSemaphores, 0);
221 }
222
223 /*
224 * Release semaphores at shutdown or shmem reinitialization
225 *
226 * (called as an on_shmem_exit callback, hence funny argument list)
227 */
228 static void
ReleaseSemaphores(int status,Datum arg)229 ReleaseSemaphores(int status, Datum arg)
230 {
231 int i;
232
233 #ifdef USE_NAMED_POSIX_SEMAPHORES
234 for (i = 0; i < numSems; i++)
235 PosixSemaphoreKill(mySemPointers[i]);
236 free(mySemPointers);
237 #endif
238
239 #ifdef USE_UNNAMED_POSIX_SEMAPHORES
240 for (i = 0; i < numSems; i++)
241 PosixSemaphoreKill(PG_SEM_REF(sharedSemas + i));
242 #endif
243 }
244
245 /*
246 * PGSemaphoreCreate
247 *
248 * Allocate a PGSemaphore structure with initial count 1
249 */
250 PGSemaphore
PGSemaphoreCreate(void)251 PGSemaphoreCreate(void)
252 {
253 PGSemaphore sema;
254 sem_t *newsem;
255
256 /* Can't do this in a backend, because static state is postmaster's */
257 Assert(!IsUnderPostmaster);
258
259 if (numSems >= maxSems)
260 elog(PANIC, "too many semaphores created");
261
262 #ifdef USE_NAMED_POSIX_SEMAPHORES
263 newsem = PosixSemaphoreCreate();
264 /* Remember new sema for ReleaseSemaphores */
265 mySemPointers[numSems] = newsem;
266 sema = (PGSemaphore) newsem;
267 #else
268 sema = &sharedSemas[numSems];
269 newsem = PG_SEM_REF(sema);
270 PosixSemaphoreCreate(newsem);
271 #endif
272
273 numSems++;
274
275 return sema;
276 }
277
278 /*
279 * PGSemaphoreReset
280 *
281 * Reset a previously-initialized PGSemaphore to have count 0
282 */
283 void
PGSemaphoreReset(PGSemaphore sema)284 PGSemaphoreReset(PGSemaphore sema)
285 {
286 /*
287 * There's no direct API for this in POSIX, so we have to ratchet the
288 * semaphore down to 0 with repeated trywait's.
289 */
290 for (;;)
291 {
292 if (sem_trywait(PG_SEM_REF(sema)) < 0)
293 {
294 if (errno == EAGAIN || errno == EDEADLK)
295 break; /* got it down to 0 */
296 if (errno == EINTR)
297 continue; /* can this happen? */
298 elog(FATAL, "sem_trywait failed: %m");
299 }
300 }
301 }
302
303 /*
304 * PGSemaphoreLock
305 *
306 * Lock a semaphore (decrement count), blocking if count would be < 0
307 */
308 void
PGSemaphoreLock(PGSemaphore sema)309 PGSemaphoreLock(PGSemaphore sema)
310 {
311 int errStatus;
312
313 /* See notes in sysv_sema.c's implementation of PGSemaphoreLock. */
314 do
315 {
316 errStatus = sem_wait(PG_SEM_REF(sema));
317 } while (errStatus < 0 && errno == EINTR);
318
319 if (errStatus < 0)
320 elog(FATAL, "sem_wait failed: %m");
321 }
322
323 /*
324 * PGSemaphoreUnlock
325 *
326 * Unlock a semaphore (increment count)
327 */
328 void
PGSemaphoreUnlock(PGSemaphore sema)329 PGSemaphoreUnlock(PGSemaphore sema)
330 {
331 int errStatus;
332
333 /*
334 * Note: if errStatus is -1 and errno == EINTR then it means we returned
335 * from the operation prematurely because we were sent a signal. So we
336 * try and unlock the semaphore again. Not clear this can really happen,
337 * but might as well cope.
338 */
339 do
340 {
341 errStatus = sem_post(PG_SEM_REF(sema));
342 } while (errStatus < 0 && errno == EINTR);
343
344 if (errStatus < 0)
345 elog(FATAL, "sem_post failed: %m");
346 }
347
348 /*
349 * PGSemaphoreTryLock
350 *
351 * Lock a semaphore only if able to do so without blocking
352 */
353 bool
PGSemaphoreTryLock(PGSemaphore sema)354 PGSemaphoreTryLock(PGSemaphore sema)
355 {
356 int errStatus;
357
358 /*
359 * Note: if errStatus is -1 and errno == EINTR then it means we returned
360 * from the operation prematurely because we were sent a signal. So we
361 * try and lock the semaphore again.
362 */
363 do
364 {
365 errStatus = sem_trywait(PG_SEM_REF(sema));
366 } while (errStatus < 0 && errno == EINTR);
367
368 if (errStatus < 0)
369 {
370 if (errno == EAGAIN || errno == EDEADLK)
371 return false; /* failed to lock it */
372 /* Otherwise we got trouble */
373 elog(FATAL, "sem_trywait failed: %m");
374 }
375
376 return true;
377 }
378