1 /*-------------------------------------------------------------------------
2  *
3  * posix_sema.c
4  *	  Implement PGSemaphores using POSIX semaphore facilities
5  *
6  * We prefer the unnamed style of POSIX semaphore (the kind made with
7  * sem_init).  We can cope with the kind made with sem_open, however.
8  *
9  * In either implementation, typedef PGSemaphore is equivalent to "sem_t *".
10  * With unnamed semaphores, the sem_t structs live in an array in shared
11  * memory.  With named semaphores, that's not true because we cannot persuade
12  * sem_open to do its allocation there.  Therefore, the named-semaphore code
13  * *does not cope with EXEC_BACKEND*.  The sem_t structs will just be in the
14  * postmaster's private memory, where they are successfully inherited by
15  * forked backends, but they could not be accessed by exec'd backends.
16  *
17  *
18  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
19  * Portions Copyright (c) 1994, Regents of the University of California
20  *
21  * IDENTIFICATION
22  *	  src/backend/port/posix_sema.c
23  *
24  *-------------------------------------------------------------------------
25  */
26 #include "postgres.h"
27 
28 #include <fcntl.h>
29 #include <semaphore.h>
30 #include <signal.h>
31 #include <unistd.h>
32 
33 #include "miscadmin.h"
34 #include "storage/ipc.h"
35 #include "storage/pg_sema.h"
36 #include "storage/shmem.h"
37 
38 
39 /* see file header comment */
40 #if defined(USE_NAMED_POSIX_SEMAPHORES) && defined(EXEC_BACKEND)
41 #error cannot use named POSIX semaphores with EXEC_BACKEND
42 #endif
43 
44 typedef union SemTPadded
45 {
46 	sem_t		pgsem;
47 	char		pad[PG_CACHE_LINE_SIZE];
48 } SemTPadded;
49 
50 /* typedef PGSemaphore is equivalent to pointer to sem_t */
51 typedef struct PGSemaphoreData
52 {
53 	SemTPadded	sem_padded;
54 } PGSemaphoreData;
55 
56 #define PG_SEM_REF(x)	(&(x)->sem_padded.pgsem)
57 
58 #define IPCProtection	(0600)	/* access/modify by user only */
59 
60 #ifdef USE_NAMED_POSIX_SEMAPHORES
61 static sem_t **mySemPointers;	/* keep track of created semaphores */
62 #else
63 static PGSemaphore sharedSemas; /* array of PGSemaphoreData in shared memory */
64 #endif
65 static int	numSems;			/* number of semas acquired so far */
66 static int	maxSems;			/* allocated size of above arrays */
67 static int	nextSemKey;			/* next name to try */
68 
69 
70 static void ReleaseSemaphores(int status, Datum arg);
71 
72 
73 #ifdef USE_NAMED_POSIX_SEMAPHORES
74 
75 /*
76  * PosixSemaphoreCreate
77  *
78  * Attempt to create a new named semaphore.
79  *
80  * If we fail with a failure code other than collision-with-existing-sema,
81  * print out an error and abort.  Other types of errors suggest nonrecoverable
82  * problems.
83  */
84 static sem_t *
PosixSemaphoreCreate(void)85 PosixSemaphoreCreate(void)
86 {
87 	int			semKey;
88 	char		semname[64];
89 	sem_t	   *mySem;
90 
91 	for (;;)
92 	{
93 		semKey = nextSemKey++;
94 
95 		snprintf(semname, sizeof(semname), "/pgsql-%d", semKey);
96 
97 		mySem = sem_open(semname, O_CREAT | O_EXCL,
98 						 (mode_t) IPCProtection, (unsigned) 1);
99 
100 #ifdef SEM_FAILED
101 		if (mySem != (sem_t *) SEM_FAILED)
102 			break;
103 #else
104 		if (mySem != (sem_t *) (-1))
105 			break;
106 #endif
107 
108 		/* Loop if error indicates a collision */
109 		if (errno == EEXIST || errno == EACCES || errno == EINTR)
110 			continue;
111 
112 		/*
113 		 * Else complain and abort
114 		 */
115 		elog(FATAL, "sem_open(\"%s\") failed: %m", semname);
116 	}
117 
118 	/*
119 	 * Unlink the semaphore immediately, so it can't be accessed externally.
120 	 * This also ensures that it will go away if we crash.
121 	 */
122 	sem_unlink(semname);
123 
124 	return mySem;
125 }
126 #else							/* !USE_NAMED_POSIX_SEMAPHORES */
127 
128 /*
129  * PosixSemaphoreCreate
130  *
131  * Attempt to create a new unnamed semaphore.
132  */
133 static void
PosixSemaphoreCreate(sem_t * sem)134 PosixSemaphoreCreate(sem_t *sem)
135 {
136 	if (sem_init(sem, 1, 1) < 0)
137 		elog(FATAL, "sem_init failed: %m");
138 }
139 #endif							/* USE_NAMED_POSIX_SEMAPHORES */
140 
141 
142 /*
143  * PosixSemaphoreKill	- removes a semaphore
144  */
145 static void
PosixSemaphoreKill(sem_t * sem)146 PosixSemaphoreKill(sem_t *sem)
147 {
148 #ifdef USE_NAMED_POSIX_SEMAPHORES
149 	/* Got to use sem_close for named semaphores */
150 	if (sem_close(sem) < 0)
151 		elog(LOG, "sem_close failed: %m");
152 #else
153 	/* Got to use sem_destroy for unnamed semaphores */
154 	if (sem_destroy(sem) < 0)
155 		elog(LOG, "sem_destroy failed: %m");
156 #endif
157 }
158 
159 
160 /*
161  * Report amount of shared memory needed for semaphores
162  */
163 Size
PGSemaphoreShmemSize(int maxSemas)164 PGSemaphoreShmemSize(int maxSemas)
165 {
166 #ifdef USE_NAMED_POSIX_SEMAPHORES
167 	/* No shared memory needed in this case */
168 	return 0;
169 #else
170 	/* Need a PGSemaphoreData per semaphore */
171 	return mul_size(maxSemas, sizeof(PGSemaphoreData));
172 #endif
173 }
174 
175 /*
176  * PGReserveSemaphores --- initialize semaphore support
177  *
178  * This is called during postmaster start or shared memory reinitialization.
179  * It should do whatever is needed to be able to support up to maxSemas
180  * subsequent PGSemaphoreCreate calls.  Also, if any system resources
181  * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
182  * callback to release them.
183  *
184  * The port number is passed for possible use as a key (for Posix, we use
185  * it to generate the starting semaphore name).  In a standalone backend,
186  * zero will be passed.
187  *
188  * In the Posix implementation, we acquire semaphores on-demand; the
189  * maxSemas parameter is just used to size the arrays.  For unnamed
190  * semaphores, there is an array of PGSemaphoreData structs in shared memory.
191  * For named semaphores, we keep a postmaster-local array of sem_t pointers,
192  * which we use for releasing the semphores when done.
193  * (This design minimizes the dependency of postmaster shutdown on the
194  * contents of shared memory, which a failed backend might have clobbered.
195  * We can't do much about the possibility of sem_destroy() crashing, but
196  * we don't have to expose the counters to other processes.)
197  */
198 void
PGReserveSemaphores(int maxSemas,int port)199 PGReserveSemaphores(int maxSemas, int port)
200 {
201 #ifdef USE_NAMED_POSIX_SEMAPHORES
202 	mySemPointers = (sem_t **) malloc(maxSemas * sizeof(sem_t *));
203 	if (mySemPointers == NULL)
204 		elog(PANIC, "out of memory");
205 #else
206 
207 	/*
208 	 * We must use ShmemAllocUnlocked(), since the spinlock protecting
209 	 * ShmemAlloc() won't be ready yet.  (This ordering is necessary when we
210 	 * are emulating spinlocks with semaphores.)
211 	 */
212 	sharedSemas = (PGSemaphore)
213 		ShmemAllocUnlocked(PGSemaphoreShmemSize(maxSemas));
214 #endif
215 
216 	numSems = 0;
217 	maxSems = maxSemas;
218 	nextSemKey = port * 1000;
219 
220 	on_shmem_exit(ReleaseSemaphores, 0);
221 }
222 
223 /*
224  * Release semaphores at shutdown or shmem reinitialization
225  *
226  * (called as an on_shmem_exit callback, hence funny argument list)
227  */
228 static void
ReleaseSemaphores(int status,Datum arg)229 ReleaseSemaphores(int status, Datum arg)
230 {
231 	int			i;
232 
233 #ifdef USE_NAMED_POSIX_SEMAPHORES
234 	for (i = 0; i < numSems; i++)
235 		PosixSemaphoreKill(mySemPointers[i]);
236 	free(mySemPointers);
237 #endif
238 
239 #ifdef USE_UNNAMED_POSIX_SEMAPHORES
240 	for (i = 0; i < numSems; i++)
241 		PosixSemaphoreKill(PG_SEM_REF(sharedSemas + i));
242 #endif
243 }
244 
245 /*
246  * PGSemaphoreCreate
247  *
248  * Allocate a PGSemaphore structure with initial count 1
249  */
250 PGSemaphore
PGSemaphoreCreate(void)251 PGSemaphoreCreate(void)
252 {
253 	PGSemaphore sema;
254 	sem_t	   *newsem;
255 
256 	/* Can't do this in a backend, because static state is postmaster's */
257 	Assert(!IsUnderPostmaster);
258 
259 	if (numSems >= maxSems)
260 		elog(PANIC, "too many semaphores created");
261 
262 #ifdef USE_NAMED_POSIX_SEMAPHORES
263 	newsem = PosixSemaphoreCreate();
264 	/* Remember new sema for ReleaseSemaphores */
265 	mySemPointers[numSems] = newsem;
266 	sema = (PGSemaphore) newsem;
267 #else
268 	sema = &sharedSemas[numSems];
269 	newsem = PG_SEM_REF(sema);
270 	PosixSemaphoreCreate(newsem);
271 #endif
272 
273 	numSems++;
274 
275 	return sema;
276 }
277 
278 /*
279  * PGSemaphoreReset
280  *
281  * Reset a previously-initialized PGSemaphore to have count 0
282  */
283 void
PGSemaphoreReset(PGSemaphore sema)284 PGSemaphoreReset(PGSemaphore sema)
285 {
286 	/*
287 	 * There's no direct API for this in POSIX, so we have to ratchet the
288 	 * semaphore down to 0 with repeated trywait's.
289 	 */
290 	for (;;)
291 	{
292 		if (sem_trywait(PG_SEM_REF(sema)) < 0)
293 		{
294 			if (errno == EAGAIN || errno == EDEADLK)
295 				break;			/* got it down to 0 */
296 			if (errno == EINTR)
297 				continue;		/* can this happen? */
298 			elog(FATAL, "sem_trywait failed: %m");
299 		}
300 	}
301 }
302 
303 /*
304  * PGSemaphoreLock
305  *
306  * Lock a semaphore (decrement count), blocking if count would be < 0
307  */
308 void
PGSemaphoreLock(PGSemaphore sema)309 PGSemaphoreLock(PGSemaphore sema)
310 {
311 	int			errStatus;
312 
313 	/* See notes in sysv_sema.c's implementation of PGSemaphoreLock. */
314 	do
315 	{
316 		errStatus = sem_wait(PG_SEM_REF(sema));
317 	} while (errStatus < 0 && errno == EINTR);
318 
319 	if (errStatus < 0)
320 		elog(FATAL, "sem_wait failed: %m");
321 }
322 
323 /*
324  * PGSemaphoreUnlock
325  *
326  * Unlock a semaphore (increment count)
327  */
328 void
PGSemaphoreUnlock(PGSemaphore sema)329 PGSemaphoreUnlock(PGSemaphore sema)
330 {
331 	int			errStatus;
332 
333 	/*
334 	 * Note: if errStatus is -1 and errno == EINTR then it means we returned
335 	 * from the operation prematurely because we were sent a signal.  So we
336 	 * try and unlock the semaphore again. Not clear this can really happen,
337 	 * but might as well cope.
338 	 */
339 	do
340 	{
341 		errStatus = sem_post(PG_SEM_REF(sema));
342 	} while (errStatus < 0 && errno == EINTR);
343 
344 	if (errStatus < 0)
345 		elog(FATAL, "sem_post failed: %m");
346 }
347 
348 /*
349  * PGSemaphoreTryLock
350  *
351  * Lock a semaphore only if able to do so without blocking
352  */
353 bool
PGSemaphoreTryLock(PGSemaphore sema)354 PGSemaphoreTryLock(PGSemaphore sema)
355 {
356 	int			errStatus;
357 
358 	/*
359 	 * Note: if errStatus is -1 and errno == EINTR then it means we returned
360 	 * from the operation prematurely because we were sent a signal.  So we
361 	 * try and lock the semaphore again.
362 	 */
363 	do
364 	{
365 		errStatus = sem_trywait(PG_SEM_REF(sema));
366 	} while (errStatus < 0 && errno == EINTR);
367 
368 	if (errStatus < 0)
369 	{
370 		if (errno == EAGAIN || errno == EDEADLK)
371 			return false;		/* failed to lock it */
372 		/* Otherwise we got trouble */
373 		elog(FATAL, "sem_trywait failed: %m");
374 	}
375 
376 	return true;
377 }
378