1 /*-------------------------------------------------------------------------
2 *
3 * sysv_sema.c
4 * Implement PGSemaphores using SysV semaphore facilities
5 *
6 *
7 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * IDENTIFICATION
11 * src/backend/port/sysv_sema.c
12 *
13 *-------------------------------------------------------------------------
14 */
15 #include "postgres.h"
16
17 #include <signal.h>
18 #include <unistd.h>
19 #include <sys/file.h>
20 #include <sys/stat.h>
21 #ifdef HAVE_SYS_IPC_H
22 #include <sys/ipc.h>
23 #endif
24 #ifdef HAVE_SYS_SEM_H
25 #include <sys/sem.h>
26 #endif
27
28 #include "miscadmin.h"
29 #include "storage/ipc.h"
30 #include "storage/pg_sema.h"
31 #include "storage/shmem.h"
32
33
34 typedef struct PGSemaphoreData
35 {
36 int semId; /* semaphore set identifier */
37 int semNum; /* semaphore number within set */
38 } PGSemaphoreData;
39
40 #ifndef HAVE_UNION_SEMUN
41 union semun
42 {
43 int val;
44 struct semid_ds *buf;
45 unsigned short *array;
46 };
47 #endif
48
49 typedef key_t IpcSemaphoreKey; /* semaphore key passed to semget(2) */
50 typedef int IpcSemaphoreId; /* semaphore ID returned by semget(2) */
51
52 /*
53 * SEMAS_PER_SET is the number of useful semaphores in each semaphore set
54 * we allocate. It must be *less than* your kernel's SEMMSL (max semaphores
55 * per set) parameter, which is often around 25. (Less than, because we
56 * allocate one extra sema in each set for identification purposes.)
57 */
58 #define SEMAS_PER_SET 16
59
60 #define IPCProtection (0600) /* access/modify by user only */
61
62 #define PGSemaMagic 537 /* must be less than SEMVMX */
63
64
65 static PGSemaphore sharedSemas; /* array of PGSemaphoreData in shared memory */
66 static int numSharedSemas; /* number of PGSemaphoreDatas used so far */
67 static int maxSharedSemas; /* allocated size of PGSemaphoreData array */
68 static IpcSemaphoreId *mySemaSets; /* IDs of sema sets acquired so far */
69 static int numSemaSets; /* number of sema sets acquired so far */
70 static int maxSemaSets; /* allocated size of mySemaSets array */
71 static IpcSemaphoreKey nextSemaKey; /* next key to try using */
72 static int nextSemaNumber; /* next free sem num in last sema set */
73
74
75 static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,
76 int numSems);
77 static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum,
78 int value);
79 static void IpcSemaphoreKill(IpcSemaphoreId semId);
80 static int IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum);
81 static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum);
82 static IpcSemaphoreId IpcSemaphoreCreate(int numSems);
83 static void ReleaseSemaphores(int status, Datum arg);
84
85
86 /*
87 * InternalIpcSemaphoreCreate
88 *
89 * Attempt to create a new semaphore set with the specified key.
90 * Will fail (return -1) if such a set already exists.
91 *
92 * If we fail with a failure code other than collision-with-existing-set,
93 * print out an error and abort. Other types of errors suggest nonrecoverable
94 * problems.
95 */
96 static IpcSemaphoreId
InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey,int numSems)97 InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, int numSems)
98 {
99 int semId;
100
101 semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | IPCProtection);
102
103 if (semId < 0)
104 {
105 int saved_errno = errno;
106
107 /*
108 * Fail quietly if error indicates a collision with existing set. One
109 * would expect EEXIST, given that we said IPC_EXCL, but perhaps we
110 * could get a permission violation instead? Also, EIDRM might occur
111 * if an old set is slated for destruction but not gone yet.
112 */
113 if (saved_errno == EEXIST || saved_errno == EACCES
114 #ifdef EIDRM
115 || saved_errno == EIDRM
116 #endif
117 )
118 return -1;
119
120 /*
121 * Else complain and abort
122 */
123 ereport(FATAL,
124 (errmsg("could not create semaphores: %m"),
125 errdetail("Failed system call was semget(%lu, %d, 0%o).",
126 (unsigned long) semKey, numSems,
127 IPC_CREAT | IPC_EXCL | IPCProtection),
128 (saved_errno == ENOSPC) ?
129 errhint("This error does *not* mean that you have run out of disk space. "
130 "It occurs when either the system limit for the maximum number of "
131 "semaphore sets (SEMMNI), or the system wide maximum number of "
132 "semaphores (SEMMNS), would be exceeded. You need to raise the "
133 "respective kernel parameter. Alternatively, reduce PostgreSQL's "
134 "consumption of semaphores by reducing its max_connections parameter.\n"
135 "The PostgreSQL documentation contains more information about "
136 "configuring your system for PostgreSQL.") : 0));
137 }
138
139 return semId;
140 }
141
142 /*
143 * Initialize a semaphore to the specified value.
144 */
145 static void
IpcSemaphoreInitialize(IpcSemaphoreId semId,int semNum,int value)146 IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, int value)
147 {
148 union semun semun;
149
150 semun.val = value;
151 if (semctl(semId, semNum, SETVAL, semun) < 0)
152 {
153 int saved_errno = errno;
154
155 ereport(FATAL,
156 (errmsg_internal("semctl(%d, %d, SETVAL, %d) failed: %m",
157 semId, semNum, value),
158 (saved_errno == ERANGE) ?
159 errhint("You possibly need to raise your kernel's SEMVMX value to be at least "
160 "%d. Look into the PostgreSQL documentation for details.",
161 value) : 0));
162 }
163 }
164
165 /*
166 * IpcSemaphoreKill(semId) - removes a semaphore set
167 */
168 static void
IpcSemaphoreKill(IpcSemaphoreId semId)169 IpcSemaphoreKill(IpcSemaphoreId semId)
170 {
171 union semun semun;
172
173 semun.val = 0; /* unused, but keep compiler quiet */
174
175 if (semctl(semId, 0, IPC_RMID, semun) < 0)
176 elog(LOG, "semctl(%d, 0, IPC_RMID, ...) failed: %m", semId);
177 }
178
179 /* Get the current value (semval) of the semaphore */
180 static int
IpcSemaphoreGetValue(IpcSemaphoreId semId,int semNum)181 IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum)
182 {
183 union semun dummy; /* for Solaris */
184
185 dummy.val = 0; /* unused */
186
187 return semctl(semId, semNum, GETVAL, dummy);
188 }
189
190 /* Get the PID of the last process to do semop() on the semaphore */
191 static pid_t
IpcSemaphoreGetLastPID(IpcSemaphoreId semId,int semNum)192 IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum)
193 {
194 union semun dummy; /* for Solaris */
195
196 dummy.val = 0; /* unused */
197
198 return semctl(semId, semNum, GETPID, dummy);
199 }
200
201
202 /*
203 * Create a semaphore set with the given number of useful semaphores
204 * (an additional sema is actually allocated to serve as identifier).
205 * Dead Postgres sema sets are recycled if found, but we do not fail
206 * upon collision with non-Postgres sema sets.
207 *
208 * The idea here is to detect and re-use keys that may have been assigned
209 * by a crashed postmaster or backend.
210 */
211 static IpcSemaphoreId
IpcSemaphoreCreate(int numSems)212 IpcSemaphoreCreate(int numSems)
213 {
214 IpcSemaphoreId semId;
215 union semun semun;
216 PGSemaphoreData mysema;
217
218 /* Loop till we find a free IPC key */
219 for (nextSemaKey++;; nextSemaKey++)
220 {
221 pid_t creatorPID;
222
223 /* Try to create new semaphore set */
224 semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1);
225 if (semId >= 0)
226 break; /* successful create */
227
228 /* See if it looks to be leftover from a dead Postgres process */
229 semId = semget(nextSemaKey, numSems + 1, 0);
230 if (semId < 0)
231 continue; /* failed: must be some other app's */
232 if (IpcSemaphoreGetValue(semId, numSems) != PGSemaMagic)
233 continue; /* sema belongs to a non-Postgres app */
234
235 /*
236 * If the creator PID is my own PID or does not belong to any extant
237 * process, it's safe to zap it.
238 */
239 creatorPID = IpcSemaphoreGetLastPID(semId, numSems);
240 if (creatorPID <= 0)
241 continue; /* oops, GETPID failed */
242 if (creatorPID != getpid())
243 {
244 if (kill(creatorPID, 0) == 0 || errno != ESRCH)
245 continue; /* sema belongs to a live process */
246 }
247
248 /*
249 * The sema set appears to be from a dead Postgres process, or from a
250 * previous cycle of life in this same process. Zap it, if possible.
251 * This probably shouldn't fail, but if it does, assume the sema set
252 * belongs to someone else after all, and continue quietly.
253 */
254 semun.val = 0; /* unused, but keep compiler quiet */
255 if (semctl(semId, 0, IPC_RMID, semun) < 0)
256 continue;
257
258 /*
259 * Now try again to create the sema set.
260 */
261 semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1);
262 if (semId >= 0)
263 break; /* successful create */
264
265 /*
266 * Can only get here if some other process managed to create the same
267 * sema key before we did. Let him have that one, loop around to try
268 * next key.
269 */
270 }
271
272 /*
273 * OK, we created a new sema set. Mark it as created by this process. We
274 * do this by setting the spare semaphore to PGSemaMagic-1 and then
275 * incrementing it with semop(). That leaves it with value PGSemaMagic
276 * and sempid referencing this process.
277 */
278 IpcSemaphoreInitialize(semId, numSems, PGSemaMagic - 1);
279 mysema.semId = semId;
280 mysema.semNum = numSems;
281 PGSemaphoreUnlock(&mysema);
282
283 return semId;
284 }
285
286
287 /*
288 * Report amount of shared memory needed for semaphores
289 */
290 Size
PGSemaphoreShmemSize(int maxSemas)291 PGSemaphoreShmemSize(int maxSemas)
292 {
293 return mul_size(maxSemas, sizeof(PGSemaphoreData));
294 }
295
296 /*
297 * PGReserveSemaphores --- initialize semaphore support
298 *
299 * This is called during postmaster start or shared memory reinitialization.
300 * It should do whatever is needed to be able to support up to maxSemas
301 * subsequent PGSemaphoreCreate calls. Also, if any system resources
302 * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit
303 * callback to release them.
304 *
305 * In the SysV implementation, we acquire semaphore sets on-demand; the
306 * maxSemas parameter is just used to size the arrays. There is an array
307 * of PGSemaphoreData structs in shared memory, and a postmaster-local array
308 * with one entry per SysV semaphore set, which we use for releasing the
309 * semaphore sets when done. (This design ensures that postmaster shutdown
310 * doesn't rely on the contents of shared memory, which a failed backend might
311 * have clobbered.)
312 */
313 void
PGReserveSemaphores(int maxSemas)314 PGReserveSemaphores(int maxSemas)
315 {
316 struct stat statbuf;
317
318 /*
319 * We use the data directory's inode number to seed the search for free
320 * semaphore keys. This minimizes the odds of collision with other
321 * postmasters, while maximizing the odds that we will detect and clean up
322 * semaphores left over from a crashed postmaster in our own directory.
323 */
324 if (stat(DataDir, &statbuf) < 0)
325 ereport(FATAL,
326 (errcode_for_file_access(),
327 errmsg("could not stat data directory \"%s\": %m",
328 DataDir)));
329
330 /*
331 * We must use ShmemAllocUnlocked(), since the spinlock protecting
332 * ShmemAlloc() won't be ready yet. (This ordering is necessary when we
333 * are emulating spinlocks with semaphores.)
334 */
335 sharedSemas = (PGSemaphore)
336 ShmemAllocUnlocked(PGSemaphoreShmemSize(maxSemas));
337 numSharedSemas = 0;
338 maxSharedSemas = maxSemas;
339
340 maxSemaSets = (maxSemas + SEMAS_PER_SET - 1) / SEMAS_PER_SET;
341 mySemaSets = (IpcSemaphoreId *)
342 malloc(maxSemaSets * sizeof(IpcSemaphoreId));
343 if (mySemaSets == NULL)
344 elog(PANIC, "out of memory");
345 numSemaSets = 0;
346 nextSemaKey = statbuf.st_ino;
347 nextSemaNumber = SEMAS_PER_SET; /* force sema set alloc on 1st call */
348
349 on_shmem_exit(ReleaseSemaphores, 0);
350 }
351
352 /*
353 * Release semaphores at shutdown or shmem reinitialization
354 *
355 * (called as an on_shmem_exit callback, hence funny argument list)
356 */
357 static void
ReleaseSemaphores(int status,Datum arg)358 ReleaseSemaphores(int status, Datum arg)
359 {
360 int i;
361
362 for (i = 0; i < numSemaSets; i++)
363 IpcSemaphoreKill(mySemaSets[i]);
364 free(mySemaSets);
365 }
366
367 /*
368 * PGSemaphoreCreate
369 *
370 * Allocate a PGSemaphore structure with initial count 1
371 */
372 PGSemaphore
PGSemaphoreCreate(void)373 PGSemaphoreCreate(void)
374 {
375 PGSemaphore sema;
376
377 /* Can't do this in a backend, because static state is postmaster's */
378 Assert(!IsUnderPostmaster);
379
380 if (nextSemaNumber >= SEMAS_PER_SET)
381 {
382 /* Time to allocate another semaphore set */
383 if (numSemaSets >= maxSemaSets)
384 elog(PANIC, "too many semaphores created");
385 mySemaSets[numSemaSets] = IpcSemaphoreCreate(SEMAS_PER_SET);
386 numSemaSets++;
387 nextSemaNumber = 0;
388 }
389 /* Use the next shared PGSemaphoreData */
390 if (numSharedSemas >= maxSharedSemas)
391 elog(PANIC, "too many semaphores created");
392 sema = &sharedSemas[numSharedSemas++];
393 /* Assign the next free semaphore in the current set */
394 sema->semId = mySemaSets[numSemaSets - 1];
395 sema->semNum = nextSemaNumber++;
396 /* Initialize it to count 1 */
397 IpcSemaphoreInitialize(sema->semId, sema->semNum, 1);
398
399 return sema;
400 }
401
402 /*
403 * PGSemaphoreReset
404 *
405 * Reset a previously-initialized PGSemaphore to have count 0
406 */
407 void
PGSemaphoreReset(PGSemaphore sema)408 PGSemaphoreReset(PGSemaphore sema)
409 {
410 IpcSemaphoreInitialize(sema->semId, sema->semNum, 0);
411 }
412
413 /*
414 * PGSemaphoreLock
415 *
416 * Lock a semaphore (decrement count), blocking if count would be < 0
417 */
418 void
PGSemaphoreLock(PGSemaphore sema)419 PGSemaphoreLock(PGSemaphore sema)
420 {
421 int errStatus;
422 struct sembuf sops;
423
424 sops.sem_op = -1; /* decrement */
425 sops.sem_flg = 0;
426 sops.sem_num = sema->semNum;
427
428 /*
429 * Note: if errStatus is -1 and errno == EINTR then it means we returned
430 * from the operation prematurely because we were sent a signal. So we
431 * try and lock the semaphore again.
432 *
433 * We used to check interrupts here, but that required servicing
434 * interrupts directly from signal handlers. Which is hard to do safely
435 * and portably.
436 */
437 do
438 {
439 errStatus = semop(sema->semId, &sops, 1);
440 } while (errStatus < 0 && errno == EINTR);
441
442 if (errStatus < 0)
443 elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
444 }
445
446 /*
447 * PGSemaphoreUnlock
448 *
449 * Unlock a semaphore (increment count)
450 */
451 void
PGSemaphoreUnlock(PGSemaphore sema)452 PGSemaphoreUnlock(PGSemaphore sema)
453 {
454 int errStatus;
455 struct sembuf sops;
456
457 sops.sem_op = 1; /* increment */
458 sops.sem_flg = 0;
459 sops.sem_num = sema->semNum;
460
461 /*
462 * Note: if errStatus is -1 and errno == EINTR then it means we returned
463 * from the operation prematurely because we were sent a signal. So we
464 * try and unlock the semaphore again. Not clear this can really happen,
465 * but might as well cope.
466 */
467 do
468 {
469 errStatus = semop(sema->semId, &sops, 1);
470 } while (errStatus < 0 && errno == EINTR);
471
472 if (errStatus < 0)
473 elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
474 }
475
476 /*
477 * PGSemaphoreTryLock
478 *
479 * Lock a semaphore only if able to do so without blocking
480 */
481 bool
PGSemaphoreTryLock(PGSemaphore sema)482 PGSemaphoreTryLock(PGSemaphore sema)
483 {
484 int errStatus;
485 struct sembuf sops;
486
487 sops.sem_op = -1; /* decrement */
488 sops.sem_flg = IPC_NOWAIT; /* but don't block */
489 sops.sem_num = sema->semNum;
490
491 /*
492 * Note: if errStatus is -1 and errno == EINTR then it means we returned
493 * from the operation prematurely because we were sent a signal. So we
494 * try and lock the semaphore again.
495 */
496 do
497 {
498 errStatus = semop(sema->semId, &sops, 1);
499 } while (errStatus < 0 && errno == EINTR);
500
501 if (errStatus < 0)
502 {
503 /* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */
504 #ifdef EAGAIN
505 if (errno == EAGAIN)
506 return false; /* failed to lock it */
507 #endif
508 #if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN))
509 if (errno == EWOULDBLOCK)
510 return false; /* failed to lock it */
511 #endif
512 /* Otherwise we got trouble */
513 elog(FATAL, "semop(id=%d) failed: %m", sema->semId);
514 }
515
516 return true;
517 }
518