1 /* -------------------------------------------------------------------------
2 *
3 * contrib/sepgsql/uavc.c
4 *
5 * Implementation of userspace access vector cache; that enables to cache
6 * access control decisions recently used, and reduce number of kernel
7 * invocations to avoid unnecessary performance hit.
8 *
9 * Copyright (c) 2011-2018, PostgreSQL Global Development Group
10 *
11 * -------------------------------------------------------------------------
12 */
13 #include "postgres.h"
14
15 #include "access/hash.h"
16 #include "catalog/pg_proc.h"
17 #include "commands/seclabel.h"
18 #include "storage/ipc.h"
19 #include "utils/guc.h"
20 #include "utils/memutils.h"
21
22 #include "sepgsql.h"
23
24 /*
25 * avc_cache
26 *
27 * It enables to cache access control decision (and behavior on execution of
28 * trusted procedure, db_procedure class only) for a particular pair of
29 * security labels and object class in userspace.
30 */
31 typedef struct
32 {
33 uint32 hash; /* hash value of this cache entry */
34 char *scontext; /* security context of the subject */
35 char *tcontext; /* security context of the target */
36 uint16 tclass; /* object class of the target */
37
38 uint32 allowed; /* permissions to be allowed */
39 uint32 auditallow; /* permissions to be audited on allowed */
40 uint32 auditdeny; /* permissions to be audited on denied */
41
42 bool permissive; /* true, if permissive rule */
43 bool hot_cache; /* true, if recently referenced */
44 bool tcontext_is_valid;
45 /* true, if tcontext is valid */
46 char *ncontext; /* temporary scontext on execution of trusted
47 * procedure, or NULL elsewhere */
48 } avc_cache;
49
50 /*
51 * Declaration of static variables
52 */
53 #define AVC_NUM_SLOTS 512
54 #define AVC_NUM_RECLAIM 16
55 #define AVC_DEF_THRESHOLD 384
56
57 static MemoryContext avc_mem_cxt;
58 static List *avc_slots[AVC_NUM_SLOTS]; /* avc's hash buckets */
59 static int avc_num_caches; /* number of caches currently used */
60 static int avc_lru_hint; /* index of the buckets to be reclaimed next */
61 static int avc_threshold; /* threshold to launch cache-reclaiming */
62 static char *avc_unlabeled; /* system 'unlabeled' label */
63
64 /*
65 * Hash function
66 */
67 static uint32
sepgsql_avc_hash(const char * scontext,const char * tcontext,uint16 tclass)68 sepgsql_avc_hash(const char *scontext, const char *tcontext, uint16 tclass)
69 {
70 return hash_any((const unsigned char *) scontext, strlen(scontext))
71 ^ hash_any((const unsigned char *) tcontext, strlen(tcontext))
72 ^ tclass;
73 }
74
75 /*
76 * Reset all the avc caches
77 */
78 static void
sepgsql_avc_reset(void)79 sepgsql_avc_reset(void)
80 {
81 MemoryContextReset(avc_mem_cxt);
82
83 memset(avc_slots, 0, sizeof(List *) * AVC_NUM_SLOTS);
84 avc_num_caches = 0;
85 avc_lru_hint = 0;
86 avc_unlabeled = NULL;
87 }
88
89 /*
90 * Reclaim caches recently unreferenced
91 */
92 static void
sepgsql_avc_reclaim(void)93 sepgsql_avc_reclaim(void)
94 {
95 ListCell *cell;
96 ListCell *next;
97 ListCell *prev;
98 int index;
99
100 while (avc_num_caches >= avc_threshold - AVC_NUM_RECLAIM)
101 {
102 index = avc_lru_hint;
103
104 prev = NULL;
105 for (cell = list_head(avc_slots[index]); cell; cell = next)
106 {
107 avc_cache *cache = lfirst(cell);
108
109 next = lnext(cell);
110 if (!cache->hot_cache)
111 {
112 avc_slots[index]
113 = list_delete_cell(avc_slots[index], cell, prev);
114
115 pfree(cache->scontext);
116 pfree(cache->tcontext);
117 if (cache->ncontext)
118 pfree(cache->ncontext);
119 pfree(cache);
120
121 avc_num_caches--;
122 }
123 else
124 {
125 cache->hot_cache = false;
126 prev = cell;
127 }
128 }
129 avc_lru_hint = (avc_lru_hint + 1) % AVC_NUM_SLOTS;
130 }
131 }
132
133 /* -------------------------------------------------------------------------
134 *
135 * sepgsql_avc_check_valid
136 *
137 * This function checks whether the cached entries are still valid. If
138 * the security policy has been reloaded (or any other events that requires
139 * resetting userspace caches has occurred) since the last reference to
140 * the access vector cache, we must flush the cache.
141 *
142 * Access control decisions must be atomic, but multiple system calls may
143 * be required to make a decision; thus, when referencing the access vector
144 * cache, we must loop until we complete without an intervening cache flush
145 * event. In practice, looping even once should be very rare. Callers should
146 * do something like this:
147 *
148 * sepgsql_avc_check_valid();
149 * do {
150 * :
151 * <reference to uavc>
152 * :
153 * } while (!sepgsql_avc_check_valid())
154 *
155 * -------------------------------------------------------------------------
156 */
157 static bool
sepgsql_avc_check_valid(void)158 sepgsql_avc_check_valid(void)
159 {
160 if (selinux_status_updated() > 0)
161 {
162 sepgsql_avc_reset();
163
164 return false;
165 }
166 return true;
167 }
168
169 /*
170 * sepgsql_avc_unlabeled
171 *
172 * Returns an alternative label to be applied when no label or an invalid
173 * label would otherwise be assigned.
174 */
175 static char *
sepgsql_avc_unlabeled(void)176 sepgsql_avc_unlabeled(void)
177 {
178 if (!avc_unlabeled)
179 {
180 security_context_t unlabeled;
181
182 if (security_get_initial_context_raw("unlabeled", &unlabeled) < 0)
183 ereport(ERROR,
184 (errcode(ERRCODE_INTERNAL_ERROR),
185 errmsg("SELinux: failed to get initial security label: %m")));
186 PG_TRY();
187 {
188 avc_unlabeled = MemoryContextStrdup(avc_mem_cxt, unlabeled);
189 }
190 PG_CATCH();
191 {
192 freecon(unlabeled);
193 PG_RE_THROW();
194 }
195 PG_END_TRY();
196
197 freecon(unlabeled);
198 }
199 return avc_unlabeled;
200 }
201
202 /*
203 * sepgsql_avc_compute
204 *
205 * A fallback path, when cache mishit. It asks SELinux its access control
206 * decision for the supplied pair of security context and object class.
207 */
208 static avc_cache *
sepgsql_avc_compute(const char * scontext,const char * tcontext,uint16 tclass)209 sepgsql_avc_compute(const char *scontext, const char *tcontext, uint16 tclass)
210 {
211 char *ucontext = NULL;
212 char *ncontext = NULL;
213 MemoryContext oldctx;
214 avc_cache *cache;
215 uint32 hash;
216 int index;
217 struct av_decision avd;
218
219 hash = sepgsql_avc_hash(scontext, tcontext, tclass);
220 index = hash % AVC_NUM_SLOTS;
221
222 /*
223 * Validation check of the supplied security context. Because it always
224 * invoke system-call, frequent check should be avoided. Unless security
225 * policy is reloaded, validation status shall be kept, so we also cache
226 * whether the supplied security context was valid, or not.
227 */
228 if (security_check_context_raw((security_context_t) tcontext) != 0)
229 ucontext = sepgsql_avc_unlabeled();
230
231 /*
232 * Ask SELinux its access control decision
233 */
234 if (!ucontext)
235 sepgsql_compute_avd(scontext, tcontext, tclass, &avd);
236 else
237 sepgsql_compute_avd(scontext, ucontext, tclass, &avd);
238
239 /*
240 * It also caches a security label to be switched when a client labeled as
241 * 'scontext' executes a procedure labeled as 'tcontext', not only access
242 * control decision on the procedure. The security label to be switched
243 * shall be computed uniquely on a pair of 'scontext' and 'tcontext',
244 * thus, it is reasonable to cache the new label on avc, and enables to
245 * reduce unnecessary system calls. It shall be referenced at
246 * sepgsql_needs_fmgr_hook to check whether the supplied function is a
247 * trusted procedure, or not.
248 */
249 if (tclass == SEPG_CLASS_DB_PROCEDURE)
250 {
251 if (!ucontext)
252 ncontext = sepgsql_compute_create(scontext, tcontext,
253 SEPG_CLASS_PROCESS, NULL);
254 else
255 ncontext = sepgsql_compute_create(scontext, ucontext,
256 SEPG_CLASS_PROCESS, NULL);
257 if (strcmp(scontext, ncontext) == 0)
258 {
259 pfree(ncontext);
260 ncontext = NULL;
261 }
262 }
263
264 /*
265 * Set up an avc_cache object
266 */
267 oldctx = MemoryContextSwitchTo(avc_mem_cxt);
268
269 cache = palloc0(sizeof(avc_cache));
270
271 cache->hash = hash;
272 cache->scontext = pstrdup(scontext);
273 cache->tcontext = pstrdup(tcontext);
274 cache->tclass = tclass;
275
276 cache->allowed = avd.allowed;
277 cache->auditallow = avd.auditallow;
278 cache->auditdeny = avd.auditdeny;
279 cache->hot_cache = true;
280 if (avd.flags & SELINUX_AVD_FLAGS_PERMISSIVE)
281 cache->permissive = true;
282 if (!ucontext)
283 cache->tcontext_is_valid = true;
284 if (ncontext)
285 cache->ncontext = pstrdup(ncontext);
286
287 avc_num_caches++;
288
289 if (avc_num_caches > avc_threshold)
290 sepgsql_avc_reclaim();
291
292 avc_slots[index] = lcons(cache, avc_slots[index]);
293
294 MemoryContextSwitchTo(oldctx);
295
296 return cache;
297 }
298
299 /*
300 * sepgsql_avc_lookup
301 *
302 * Look up a cache entry that matches the supplied security contexts and
303 * object class. If not found, create a new cache entry.
304 */
305 static avc_cache *
sepgsql_avc_lookup(const char * scontext,const char * tcontext,uint16 tclass)306 sepgsql_avc_lookup(const char *scontext, const char *tcontext, uint16 tclass)
307 {
308 avc_cache *cache;
309 ListCell *cell;
310 uint32 hash;
311 int index;
312
313 hash = sepgsql_avc_hash(scontext, tcontext, tclass);
314 index = hash % AVC_NUM_SLOTS;
315
316 foreach(cell, avc_slots[index])
317 {
318 cache = lfirst(cell);
319
320 if (cache->hash == hash &&
321 cache->tclass == tclass &&
322 strcmp(cache->tcontext, tcontext) == 0 &&
323 strcmp(cache->scontext, scontext) == 0)
324 {
325 cache->hot_cache = true;
326 return cache;
327 }
328 }
329 /* not found, so insert a new cache */
330 return sepgsql_avc_compute(scontext, tcontext, tclass);
331 }
332
333 /*
334 * sepgsql_avc_check_perms(_label)
335 *
336 * It returns 'true', if the security policy suggested to allow the required
337 * permissions. Otherwise, it returns 'false' or raises an error according
338 * to the 'abort_on_violation' argument.
339 * The 'tobject' and 'tclass' identify the target object being referenced,
340 * and 'required' is a bitmask of permissions (SEPG_*__*) defined for each
341 * object classes.
342 * The 'audit_name' is the object name (optional). If SEPGSQL_AVC_NOAUDIT
343 * was supplied, it means to skip all the audit messages.
344 */
345 bool
sepgsql_avc_check_perms_label(const char * tcontext,uint16 tclass,uint32 required,const char * audit_name,bool abort_on_violation)346 sepgsql_avc_check_perms_label(const char *tcontext,
347 uint16 tclass, uint32 required,
348 const char *audit_name,
349 bool abort_on_violation)
350 {
351 char *scontext = sepgsql_get_client_label();
352 avc_cache *cache;
353 uint32 denied;
354 uint32 audited;
355 bool result;
356
357 sepgsql_avc_check_valid();
358 do
359 {
360 result = true;
361
362 /*
363 * If the target object is unlabeled, we perform the check using the
364 * label supplied by sepgsql_avc_unlabeled().
365 */
366 if (tcontext)
367 cache = sepgsql_avc_lookup(scontext, tcontext, tclass);
368 else
369 cache = sepgsql_avc_lookup(scontext,
370 sepgsql_avc_unlabeled(), tclass);
371
372 denied = required & ~cache->allowed;
373
374 /*
375 * Compute permissions to be audited
376 */
377 if (sepgsql_get_debug_audit())
378 audited = (denied ? (denied & ~0) : (required & ~0));
379 else
380 audited = denied ? (denied & cache->auditdeny)
381 : (required & cache->auditallow);
382
383 if (denied)
384 {
385 /*
386 * In permissive mode or permissive domain, violated permissions
387 * shall be audited to the log files at once, and then implicitly
388 * allowed to avoid a flood of access denied logs, because the
389 * purpose of permissive mode/domain is to collect a violation log
390 * that will make it possible to fix up the security policy.
391 */
392 if (!sepgsql_getenforce() || cache->permissive)
393 cache->allowed |= required;
394 else
395 result = false;
396 }
397 } while (!sepgsql_avc_check_valid());
398
399 /*
400 * In the case when we have something auditable actions here,
401 * sepgsql_audit_log shall be called with text representation of security
402 * labels for both of subject and object. It records this access
403 * violation, so DBA will be able to find out unexpected security problems
404 * later.
405 */
406 if (audited != 0 &&
407 audit_name != SEPGSQL_AVC_NOAUDIT &&
408 sepgsql_get_mode() != SEPGSQL_MODE_INTERNAL)
409 {
410 sepgsql_audit_log(denied != 0,
411 cache->scontext,
412 cache->tcontext_is_valid ?
413 cache->tcontext : sepgsql_avc_unlabeled(),
414 cache->tclass,
415 audited,
416 audit_name);
417 }
418
419 if (abort_on_violation && !result)
420 ereport(ERROR,
421 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
422 errmsg("SELinux: security policy violation")));
423
424 return result;
425 }
426
427 bool
sepgsql_avc_check_perms(const ObjectAddress * tobject,uint16 tclass,uint32 required,const char * audit_name,bool abort_on_violation)428 sepgsql_avc_check_perms(const ObjectAddress *tobject,
429 uint16 tclass, uint32 required,
430 const char *audit_name,
431 bool abort_on_violation)
432 {
433 char *tcontext = GetSecurityLabel(tobject, SEPGSQL_LABEL_TAG);
434 bool rc;
435
436 rc = sepgsql_avc_check_perms_label(tcontext,
437 tclass, required,
438 audit_name, abort_on_violation);
439 if (tcontext)
440 pfree(tcontext);
441
442 return rc;
443 }
444
445 /*
446 * sepgsql_avc_trusted_proc
447 *
448 * If the supplied function OID is configured as a trusted procedure, this
449 * function will return a security label to be used during the execution of
450 * that function. Otherwise, it returns NULL.
451 */
452 char *
sepgsql_avc_trusted_proc(Oid functionId)453 sepgsql_avc_trusted_proc(Oid functionId)
454 {
455 char *scontext = sepgsql_get_client_label();
456 char *tcontext;
457 ObjectAddress tobject;
458 avc_cache *cache;
459
460 tobject.classId = ProcedureRelationId;
461 tobject.objectId = functionId;
462 tobject.objectSubId = 0;
463 tcontext = GetSecurityLabel(&tobject, SEPGSQL_LABEL_TAG);
464
465 sepgsql_avc_check_valid();
466 do
467 {
468 if (tcontext)
469 cache = sepgsql_avc_lookup(scontext, tcontext,
470 SEPG_CLASS_DB_PROCEDURE);
471 else
472 cache = sepgsql_avc_lookup(scontext, sepgsql_avc_unlabeled(),
473 SEPG_CLASS_DB_PROCEDURE);
474 } while (!sepgsql_avc_check_valid());
475
476 return cache->ncontext;
477 }
478
479 /*
480 * sepgsql_avc_exit
481 *
482 * Clean up userspace AVC on process exit.
483 */
484 static void
sepgsql_avc_exit(int code,Datum arg)485 sepgsql_avc_exit(int code, Datum arg)
486 {
487 selinux_status_close();
488 }
489
490 /*
491 * sepgsql_avc_init
492 *
493 * Initialize the userspace AVC. This should be called from _PG_init.
494 */
495 void
sepgsql_avc_init(void)496 sepgsql_avc_init(void)
497 {
498 int rc;
499
500 /*
501 * All the avc stuff shall be allocated in avc_mem_cxt
502 */
503 avc_mem_cxt = AllocSetContextCreate(TopMemoryContext,
504 "userspace access vector cache",
505 ALLOCSET_DEFAULT_SIZES);
506 memset(avc_slots, 0, sizeof(avc_slots));
507 avc_num_caches = 0;
508 avc_lru_hint = 0;
509 avc_threshold = AVC_DEF_THRESHOLD;
510
511 /*
512 * SELinux allows to mmap(2) its kernel status page in read-only mode to
513 * inform userspace applications its status updating (such as policy
514 * reloading) without system-call invocations. This feature is only
515 * supported in Linux-2.6.38 or later, however, libselinux provides a
516 * fallback mode to know its status using netlink sockets.
517 */
518 rc = selinux_status_open(1);
519 if (rc < 0)
520 ereport(ERROR,
521 (errcode(ERRCODE_INTERNAL_ERROR),
522 errmsg("SELinux: could not open selinux status : %m")));
523 else if (rc > 0)
524 ereport(LOG,
525 (errmsg("SELinux: kernel status page uses fallback mode")));
526
527 /* Arrange to close selinux status page on process exit. */
528 on_proc_exit(sepgsql_avc_exit, 0);
529 }
530