1 /*-------------------------------------------------------------------------
2  *
3  * sysv_sema.c
4  *	  Implement PGSemaphores using SysV semaphore facilities
5  *
6  *
7  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  *	  src/backend/port/sysv_sema.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 #include "postgres.h"
16 
17 #include <signal.h>
18 #include <unistd.h>
19 #include <sys/file.h>
20 #include <sys/stat.h>
21 #ifdef HAVE_SYS_IPC_H
22 #include <sys/ipc.h>
23 #endif
24 #ifdef HAVE_SYS_SEM_H
25 #include <sys/sem.h>
26 #endif
27 
28 #include "miscadmin.h"
29 #include "storage/ipc.h"
30 #include "storage/pg_sema.h"
31 #include "storage/shmem.h"
32 
33 
34 typedef struct PGSemaphoreData
35 {
36 	int			semId;			/* semaphore set identifier */
37 	int			semNum;			/* semaphore number within set */
38 } PGSemaphoreData;
39 
40 #ifndef HAVE_UNION_SEMUN
41 union semun
42 {
43 	int			val;
44 	struct semid_ds *buf;
45 	unsigned short *array;
46 };
47 #endif
48 
49 typedef key_t IpcSemaphoreKey;	/* semaphore key passed to semget(2) */
50 typedef int IpcSemaphoreId;		/* semaphore ID returned by semget(2) */
51 
52 /*
53  * SEMAS_PER_SET is the number of useful semaphores in each semaphore set
54  * we allocate.  It must be *less than* your kernel's SEMMSL (max semaphores
55  * per set) parameter, which is often around 25.  (Less than, because we
56  * allocate one extra sema in each set for identification purposes.)
57  */
58 #define SEMAS_PER_SET	16
59 
60 #define IPCProtection	(0600)	/* access/modify by user only */
61 
62 #define PGSemaMagic		537		/* must be less than SEMVMX */
63 
64 
65 static PGSemaphore sharedSemas; /* array of PGSemaphoreData in shared memory */
66 static int	numSharedSemas;		/* number of PGSemaphoreDatas used so far */
67 static int	maxSharedSemas;		/* allocated size of PGSemaphoreData array */
68 static IpcSemaphoreId *mySemaSets;	/* IDs of sema sets acquired so far */
69 static int	numSemaSets;		/* number of sema sets acquired so far */
70 static int	maxSemaSets;		/* allocated size of mySemaSets array */
71 static IpcSemaphoreKey nextSemaKey; /* next key to try using */
72 static int	nextSemaNumber;		/* next free sem num in last sema set */
73 
74 
75 static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,
76 												 int numSems);
77 static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum,
78 								   int value);
79 static void IpcSemaphoreKill(IpcSemaphoreId semId);
80 static int	IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum);
81 static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum);
82 static IpcSemaphoreId IpcSemaphoreCreate(int numSems);
83 static void ReleaseSemaphores(int status, Datum arg);
84 
85 
86 /*
87  * InternalIpcSemaphoreCreate
88  *
89  * Attempt to create a new semaphore set with the specified key.
90  * Will fail (return -1) if such a set already exists.
91  *
92  * If we fail with a failure code other than collision-with-existing-set,
93  * print out an error and abort.  Other types of errors suggest nonrecoverable
94  * problems.
95  */
96 static IpcSemaphoreId
InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,int numSems)97 InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, int numSems)
98 {
99 	int			semId;
100 
101 	semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | IPCProtection);
102 
103 	if (semId < 0)
104 	{
105 		int			saved_errno = errno;
106 
107 		/*
108 		 * Fail quietly if error indicates a collision with existing set. One
109 		 * would expect EEXIST, given that we said IPC_EXCL, but perhaps we
110 		 * could get a permission violation instead?  Also, EIDRM might occur
111 		 * if an old set is slated for destruction but not gone yet.
112 		 */
113 		if (saved_errno == EEXIST || saved_errno == EACCES
114 #ifdef EIDRM
115 			|| saved_errno == EIDRM
116 #endif
117 			)
118 			return -1;
119 
120 		/*
121 		 * Else complain and abort
122 		 */
123 		ereport(FATAL,
124 				(errmsg("could not create semaphores: %m"),
125 				 errdetail("Failed system call was semget(%lu, %d, 0%o).",
126 						   (unsigned long) semKey, numSems,
127 						   IPC_CREAT | IPC_EXCL | IPCProtection),
128 				 (saved_errno == ENOSPC) ?
129 				 errhint("This error does *not* mean that you have run out of disk space.  "
130 						 "It occurs when either the system limit for the maximum number of "
131 						 "semaphore sets (SEMMNI), or the system wide maximum number of "
132 						 "semaphores (SEMMNS), would be exceeded.  You need to raise the "
133 						 "respective kernel parameter.  Alternatively, reduce PostgreSQL's "
134 						 "consumption of semaphores by reducing its max_connections parameter.\n"
135 						 "The PostgreSQL documentation contains more information about "
136 						 "configuring your system for PostgreSQL.") : 0));
137 	}
138 
139 	return semId;
140 }
141 
142 /*
143  * Initialize a semaphore to the specified value.
144  */
145 static void
IpcSemaphoreInitialize(IpcSemaphoreId semId,int semNum,int value)146 IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, int value)
147 {
148 	union semun semun;
149 
150 	semun.val = value;
151 	if (semctl(semId, semNum, SETVAL, semun) < 0)
152 	{
153 		int			saved_errno = errno;
154 
155 		ereport(FATAL,
156 				(errmsg_internal("semctl(%d, %d, SETVAL, %d) failed: %m",
157 								 semId, semNum, value),
158 				 (saved_errno == ERANGE) ?
159 				 errhint("You possibly need to raise your kernel's SEMVMX value to be at least "
160 						 "%d.  Look into the PostgreSQL documentation for details.",
161 						 value) : 0));
162 	}
163 }
164 
165 /*
166  * IpcSemaphoreKill(semId)	- removes a semaphore set
167  */
168 static void
IpcSemaphoreKill(IpcSemaphoreId semId)169 IpcSemaphoreKill(IpcSemaphoreId semId)
170 {
171 	union semun semun;
172 
173 	semun.val = 0;				/* unused, but keep compiler quiet */
174 
175 	if (semctl(semId, 0, IPC_RMID, semun) < 0)
176 		elog(LOG, "semctl(%d, 0, IPC_RMID, ...) failed: %m", semId);
177 }
178 
179 /* Get the current value (semval) of the semaphore */
180 static int
IpcSemaphoreGetValue(IpcSemaphoreId semId,int semNum)181 IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum)
182 {
183 	union semun dummy;			/* for Solaris */
184 
185 	dummy.val = 0;				/* unused */
186 
187 	return semctl(semId, semNum, GETVAL, dummy);
188 }
189 
190 /* Get the PID of the last process to do semop() on the semaphore */
191 static pid_t
IpcSemaphoreGetLastPID(IpcSemaphoreId semId,int semNum)192 IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum)
193 {
194 	union semun dummy;			/* for Solaris */
195 
196 	dummy.val = 0;				/* unused */
197 
198 	return semctl(semId, semNum, GETPID, dummy);
199 }
200 
201 
202 /*
203  * Create a semaphore set with the given number of useful semaphores
204  * (an additional sema is actually allocated to serve as identifier).
205  * Dead Postgres sema sets are recycled if found, but we do not fail
206  * upon collision with non-Postgres sema sets.
207  *
208  * The idea here is to detect and re-use keys that may have been assigned
209  * by a crashed postmaster or backend.
210  */
211 static IpcSemaphoreId
IpcSemaphoreCreate(int numSems)212 IpcSemaphoreCreate(int numSems)
213 {
214 	IpcSemaphoreId semId;
215 	union semun semun;
216 	PGSemaphoreData mysema;
217 
218 	/* Loop till we find a free IPC key */
219 	for (nextSemaKey++;; nextSemaKey++)
220 	{
221 		pid_t		creatorPID;
222 
223 		/* Try to create new semaphore set */
224 		semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1);
225 		if (semId >= 0)
226 			break;				/* successful create */
227 
228 		/* See if it looks to be leftover from a dead Postgres process */
229 		semId = semget(nextSemaKey, numSems + 1, 0);
230 		if (semId < 0)
231 			continue;			/* failed: must be some other app's */
232 		if (IpcSemaphoreGetValue(semId, numSems) != PGSemaMagic)
233 			continue;			/* sema belongs to a non-Postgres app */
234 
235 		/*
236 		 * If the creator PID is my own PID or does not belong to any extant
237 		 * process, it's safe to zap it.
238 		 */
239 		creatorPID = IpcSemaphoreGetLastPID(semId, numSems);
240 		if (creatorPID <= 0)
241 			continue;			/* oops, GETPID failed */
242 		if (creatorPID != getpid())
243 		{
244 			if (kill(creatorPID, 0) == 0 || errno != ESRCH)
245 				continue;		/* sema belongs to a live process */
246 		}
247 
248 		/*
249 		 * The sema set appears to be from a dead Postgres process, or from a
250 		 * previous cycle of life in this same process.  Zap it, if possible.
251 		 * This probably shouldn't fail, but if it does, assume the sema set
252 		 * belongs to someone else after all, and continue quietly.
253 		 */
254 		semun.val = 0;			/* unused, but keep compiler quiet */
255 		if (semctl(semId, 0, IPC_RMID, semun) < 0)
256 			continue;
257 
258 		/*
259 		 * Now try again to create the sema set.
260 		 */
261 		semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1);
262 		if (semId >= 0)
263 			break;				/* successful create */
264 
265 		/*
266 		 * Can only get here if some other process managed to create the same
267 		 * sema key before we did.  Let him have that one, loop around to try
268 		 * next key.
269 		 */
270 	}
271 
272 	/*
273 	 * OK, we created a new sema set.  Mark it as created by this process. We
274 	 * do this by setting the spare semaphore to PGSemaMagic-1 and then
275 	 * incrementing it with semop().  That leaves it with value PGSemaMagic
276 	 * and sempid referencing this process.
277 	 */
278 	IpcSemaphoreInitialize(semId, numSems, PGSemaMagic - 1);
279 	mysema.semId = semId;
280 	mysema.semNum = numSems;
281 	PGSemaphoreUnlock(&mysema);
282 
283 	return semId;
284 }
285 
286 
287 /*
288  * Report amount of shared memory needed for semaphores
289  */
290 Size
PGSemaphoreShmemSize(int maxSemas)291 PGSemaphoreShmemSize(int maxSemas)
292 {
293 	return mul_size(maxSemas, sizeof(PGSemaphoreData));
294 }
295 
296 /*
297  * PGReserveSemaphores --- initialize semaphore support
298  *
299  * This is called during postmaster start or shared memory reinitialization.
300  * It should do whatever is needed to be able to support up to maxSemas
301  * subsequent PGSemaphoreCreate calls.  Also, if any system resources
302  * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
303  * callback to release them.
304  *
305  * In the SysV implementation, we acquire semaphore sets on-demand; the
306  * maxSemas parameter is just used to size the arrays.  There is an array
307  * of PGSemaphoreData structs in shared memory, and a postmaster-local array
308  * with one entry per SysV semaphore set, which we use for releasing the
309  * semaphore sets when done.  (This design ensures that postmaster shutdown
310  * doesn't rely on the contents of shared memory, which a failed backend might
311  * have clobbered.)
312  */
313 void
PGReserveSemaphores(int maxSemas)314 PGReserveSemaphores(int maxSemas)
315 {
316 	struct stat statbuf;
317 
318 	/*
319 	 * We use the data directory's inode number to seed the search for free
320 	 * semaphore keys.  This minimizes the odds of collision with other
321 	 * postmasters, while maximizing the odds that we will detect and clean up
322 	 * semaphores left over from a crashed postmaster in our own directory.
323 	 */
324 	if (stat(DataDir, &statbuf) < 0)
325 		ereport(FATAL,
326 				(errcode_for_file_access(),
327 				 errmsg("could not stat data directory \"%s\": %m",
328 						DataDir)));
329 
330 	/*
331 	 * We must use ShmemAllocUnlocked(), since the spinlock protecting
332 	 * ShmemAlloc() won't be ready yet.  (This ordering is necessary when we
333 	 * are emulating spinlocks with semaphores.)
334 	 */
335 	sharedSemas = (PGSemaphore)
336 		ShmemAllocUnlocked(PGSemaphoreShmemSize(maxSemas));
337 	numSharedSemas = 0;
338 	maxSharedSemas = maxSemas;
339 
340 	maxSemaSets = (maxSemas + SEMAS_PER_SET - 1) / SEMAS_PER_SET;
341 	mySemaSets = (IpcSemaphoreId *)
342 		malloc(maxSemaSets * sizeof(IpcSemaphoreId));
343 	if (mySemaSets == NULL)
344 		elog(PANIC, "out of memory");
345 	numSemaSets = 0;
346 	nextSemaKey = statbuf.st_ino;
347 	nextSemaNumber = SEMAS_PER_SET; /* force sema set alloc on 1st call */
348 
349 	on_shmem_exit(ReleaseSemaphores, 0);
350 }
351 
352 /*
353  * Release semaphores at shutdown or shmem reinitialization
354  *
355  * (called as an on_shmem_exit callback, hence funny argument list)
356  */
357 static void
ReleaseSemaphores(int status,Datum arg)358 ReleaseSemaphores(int status, Datum arg)
359 {
360 	int			i;
361 
362 	for (i = 0; i < numSemaSets; i++)
363 		IpcSemaphoreKill(mySemaSets[i]);
364 	free(mySemaSets);
365 }
366 
367 /*
368  * PGSemaphoreCreate
369  *
370  * Allocate a PGSemaphore structure with initial count 1
371  */
372 PGSemaphore
PGSemaphoreCreate(void)373 PGSemaphoreCreate(void)
374 {
375 	PGSemaphore sema;
376 
377 	/* Can't do this in a backend, because static state is postmaster's */
378 	Assert(!IsUnderPostmaster);
379 
380 	if (nextSemaNumber >= SEMAS_PER_SET)
381 	{
382 		/* Time to allocate another semaphore set */
383 		if (numSemaSets >= maxSemaSets)
384 			elog(PANIC, "too many semaphores created");
385 		mySemaSets[numSemaSets] = IpcSemaphoreCreate(SEMAS_PER_SET);
386 		numSemaSets++;
387 		nextSemaNumber = 0;
388 	}
389 	/* Use the next shared PGSemaphoreData */
390 	if (numSharedSemas >= maxSharedSemas)
391 		elog(PANIC, "too many semaphores created");
392 	sema = &sharedSemas[numSharedSemas++];
393 	/* Assign the next free semaphore in the current set */
394 	sema->semId = mySemaSets[numSemaSets - 1];
395 	sema->semNum = nextSemaNumber++;
396 	/* Initialize it to count 1 */
397 	IpcSemaphoreInitialize(sema->semId, sema->semNum, 1);
398 
399 	return sema;
400 }
401 
402 /*
403  * PGSemaphoreReset
404  *
405  * Reset a previously-initialized PGSemaphore to have count 0
406  */
407 void
PGSemaphoreReset(PGSemaphore sema)408 PGSemaphoreReset(PGSemaphore sema)
409 {
410 	IpcSemaphoreInitialize(sema->semId, sema->semNum, 0);
411 }
412 
413 /*
414  * PGSemaphoreLock
415  *
416  * Lock a semaphore (decrement count), blocking if count would be < 0
417  */
418 void
PGSemaphoreLock(PGSemaphore sema)419 PGSemaphoreLock(PGSemaphore sema)
420 {
421 	int			errStatus;
422 	struct sembuf sops;
423 
424 	sops.sem_op = -1;			/* decrement */
425 	sops.sem_flg = 0;
426 	sops.sem_num = sema->semNum;
427 
428 	/*
429 	 * Note: if errStatus is -1 and errno == EINTR then it means we returned
430 	 * from the operation prematurely because we were sent a signal.  So we
431 	 * try and lock the semaphore again.
432 	 *
433 	 * We used to check interrupts here, but that required servicing
434 	 * interrupts directly from signal handlers. Which is hard to do safely
435 	 * and portably.
436 	 */
437 	do
438 	{
439 		errStatus = semop(sema->semId, &sops, 1);
440 	} while (errStatus < 0 && errno == EINTR);
441 
442 	if (errStatus < 0)
443 		elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
444 }
445 
446 /*
447  * PGSemaphoreUnlock
448  *
449  * Unlock a semaphore (increment count)
450  */
451 void
PGSemaphoreUnlock(PGSemaphore sema)452 PGSemaphoreUnlock(PGSemaphore sema)
453 {
454 	int			errStatus;
455 	struct sembuf sops;
456 
457 	sops.sem_op = 1;			/* increment */
458 	sops.sem_flg = 0;
459 	sops.sem_num = sema->semNum;
460 
461 	/*
462 	 * Note: if errStatus is -1 and errno == EINTR then it means we returned
463 	 * from the operation prematurely because we were sent a signal.  So we
464 	 * try and unlock the semaphore again. Not clear this can really happen,
465 	 * but might as well cope.
466 	 */
467 	do
468 	{
469 		errStatus = semop(sema->semId, &sops, 1);
470 	} while (errStatus < 0 && errno == EINTR);
471 
472 	if (errStatus < 0)
473 		elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
474 }
475 
476 /*
477  * PGSemaphoreTryLock
478  *
479  * Lock a semaphore only if able to do so without blocking
480  */
481 bool
PGSemaphoreTryLock(PGSemaphore sema)482 PGSemaphoreTryLock(PGSemaphore sema)
483 {
484 	int			errStatus;
485 	struct sembuf sops;
486 
487 	sops.sem_op = -1;			/* decrement */
488 	sops.sem_flg = IPC_NOWAIT;	/* but don't block */
489 	sops.sem_num = sema->semNum;
490 
491 	/*
492 	 * Note: if errStatus is -1 and errno == EINTR then it means we returned
493 	 * from the operation prematurely because we were sent a signal.  So we
494 	 * try and lock the semaphore again.
495 	 */
496 	do
497 	{
498 		errStatus = semop(sema->semId, &sops, 1);
499 	} while (errStatus < 0 && errno == EINTR);
500 
501 	if (errStatus < 0)
502 	{
503 		/* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */
504 #ifdef EAGAIN
505 		if (errno == EAGAIN)
506 			return false;		/* failed to lock it */
507 #endif
508 #if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
509 		if (errno == EWOULDBLOCK)
510 			return false;		/* failed to lock it */
511 #endif
512 		/* Otherwise we got trouble */
513 		elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
514 	}
515 
516 	return true;
517 }
518