1 /* -------------------------------------------------------------------------
2  *
3  * contrib/sepgsql/uavc.c
4  *
5  * Implementation of userspace access vector cache; that enables to cache
6  * access control decisions recently used, and reduce number of kernel
7  * invocations to avoid unnecessary performance hit.
8  *
9  * Copyright (c) 2011-2018, PostgreSQL Global Development Group
10  *
11  * -------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "access/hash.h"
16 #include "catalog/pg_proc.h"
17 #include "commands/seclabel.h"
18 #include "storage/ipc.h"
19 #include "utils/guc.h"
20 #include "utils/memutils.h"
21 
22 #include "sepgsql.h"
23 
24 /*
25  * avc_cache
26  *
27  * It enables to cache access control decision (and behavior on execution of
28  * trusted procedure, db_procedure class only) for a particular pair of
29  * security labels and object class in userspace.
30  */
31 typedef struct
32 {
33 	uint32		hash;			/* hash value of this cache entry */
34 	char	   *scontext;		/* security context of the subject */
35 	char	   *tcontext;		/* security context of the target */
36 	uint16		tclass;			/* object class of the target */
37 
38 	uint32		allowed;		/* permissions to be allowed */
39 	uint32		auditallow;		/* permissions to be audited on allowed */
40 	uint32		auditdeny;		/* permissions to be audited on denied */
41 
42 	bool		permissive;		/* true, if permissive rule */
43 	bool		hot_cache;		/* true, if recently referenced */
44 	bool		tcontext_is_valid;
45 	/* true, if tcontext is valid */
46 	char	   *ncontext;		/* temporary scontext on execution of trusted
47 								 * procedure, or NULL elsewhere */
48 }			avc_cache;
49 
50 /*
51  * Declaration of static variables
52  */
53 #define AVC_NUM_SLOTS		512
54 #define AVC_NUM_RECLAIM		16
55 #define AVC_DEF_THRESHOLD	384
56 
57 static MemoryContext avc_mem_cxt;
58 static List *avc_slots[AVC_NUM_SLOTS];	/* avc's hash buckets */
59 static int	avc_num_caches;		/* number of caches currently used */
60 static int	avc_lru_hint;		/* index of the buckets to be reclaimed next */
61 static int	avc_threshold;		/* threshold to launch cache-reclaiming  */
62 static char *avc_unlabeled;		/* system 'unlabeled' label */
63 
64 /*
65  * Hash function
66  */
67 static uint32
sepgsql_avc_hash(const char * scontext,const char * tcontext,uint16 tclass)68 sepgsql_avc_hash(const char *scontext, const char *tcontext, uint16 tclass)
69 {
70 	return hash_any((const unsigned char *) scontext, strlen(scontext))
71 		^ hash_any((const unsigned char *) tcontext, strlen(tcontext))
72 		^ tclass;
73 }
74 
75 /*
76  * Reset all the avc caches
77  */
78 static void
sepgsql_avc_reset(void)79 sepgsql_avc_reset(void)
80 {
81 	MemoryContextReset(avc_mem_cxt);
82 
83 	memset(avc_slots, 0, sizeof(List *) * AVC_NUM_SLOTS);
84 	avc_num_caches = 0;
85 	avc_lru_hint = 0;
86 	avc_unlabeled = NULL;
87 }
88 
89 /*
90  * Reclaim caches recently unreferenced
91  */
92 static void
sepgsql_avc_reclaim(void)93 sepgsql_avc_reclaim(void)
94 {
95 	ListCell   *cell;
96 	ListCell   *next;
97 	ListCell   *prev;
98 	int			index;
99 
100 	while (avc_num_caches >= avc_threshold - AVC_NUM_RECLAIM)
101 	{
102 		index = avc_lru_hint;
103 
104 		prev = NULL;
105 		for (cell = list_head(avc_slots[index]); cell; cell = next)
106 		{
107 			avc_cache  *cache = lfirst(cell);
108 
109 			next = lnext(cell);
110 			if (!cache->hot_cache)
111 			{
112 				avc_slots[index]
113 					= list_delete_cell(avc_slots[index], cell, prev);
114 
115 				pfree(cache->scontext);
116 				pfree(cache->tcontext);
117 				if (cache->ncontext)
118 					pfree(cache->ncontext);
119 				pfree(cache);
120 
121 				avc_num_caches--;
122 			}
123 			else
124 			{
125 				cache->hot_cache = false;
126 				prev = cell;
127 			}
128 		}
129 		avc_lru_hint = (avc_lru_hint + 1) % AVC_NUM_SLOTS;
130 	}
131 }
132 
133 /* -------------------------------------------------------------------------
134  *
135  * sepgsql_avc_check_valid
136  *
137  * This function checks whether the cached entries are still valid.  If
138  * the security policy has been reloaded (or any other events that requires
139  * resetting userspace caches has occurred) since the last reference to
140  * the access vector cache, we must flush the cache.
141  *
142  * Access control decisions must be atomic, but multiple system calls may
143  * be required to make a decision; thus, when referencing the access vector
144  * cache, we must loop until we complete without an intervening cache flush
145  * event.  In practice, looping even once should be very rare.  Callers should
146  * do something like this:
147  *
148  *	 sepgsql_avc_check_valid();
149  *	 do {
150  *			 :
151  *		 <reference to uavc>
152  *			 :
153  *	 } while (!sepgsql_avc_check_valid())
154  *
155  * -------------------------------------------------------------------------
156  */
157 static bool
sepgsql_avc_check_valid(void)158 sepgsql_avc_check_valid(void)
159 {
160 	if (selinux_status_updated() > 0)
161 	{
162 		sepgsql_avc_reset();
163 
164 		return false;
165 	}
166 	return true;
167 }
168 
169 /*
170  * sepgsql_avc_unlabeled
171  *
172  * Returns an alternative label to be applied when no label or an invalid
173  * label would otherwise be assigned.
174  */
175 static char *
sepgsql_avc_unlabeled(void)176 sepgsql_avc_unlabeled(void)
177 {
178 	if (!avc_unlabeled)
179 	{
180 		security_context_t unlabeled;
181 
182 		if (security_get_initial_context_raw("unlabeled", &unlabeled) < 0)
183 			ereport(ERROR,
184 					(errcode(ERRCODE_INTERNAL_ERROR),
185 					 errmsg("SELinux: failed to get initial security label: %m")));
186 		PG_TRY();
187 		{
188 			avc_unlabeled = MemoryContextStrdup(avc_mem_cxt, unlabeled);
189 		}
190 		PG_CATCH();
191 		{
192 			freecon(unlabeled);
193 			PG_RE_THROW();
194 		}
195 		PG_END_TRY();
196 
197 		freecon(unlabeled);
198 	}
199 	return avc_unlabeled;
200 }
201 
202 /*
203  * sepgsql_avc_compute
204  *
205  * A fallback path, when cache mishit. It asks SELinux its access control
206  * decision for the supplied pair of security context and object class.
207  */
208 static avc_cache *
sepgsql_avc_compute(const char * scontext,const char * tcontext,uint16 tclass)209 sepgsql_avc_compute(const char *scontext, const char *tcontext, uint16 tclass)
210 {
211 	char	   *ucontext = NULL;
212 	char	   *ncontext = NULL;
213 	MemoryContext oldctx;
214 	avc_cache  *cache;
215 	uint32		hash;
216 	int			index;
217 	struct av_decision avd;
218 
219 	hash = sepgsql_avc_hash(scontext, tcontext, tclass);
220 	index = hash % AVC_NUM_SLOTS;
221 
222 	/*
223 	 * Validation check of the supplied security context. Because it always
224 	 * invoke system-call, frequent check should be avoided. Unless security
225 	 * policy is reloaded, validation status shall be kept, so we also cache
226 	 * whether the supplied security context was valid, or not.
227 	 */
228 	if (security_check_context_raw((security_context_t) tcontext) != 0)
229 		ucontext = sepgsql_avc_unlabeled();
230 
231 	/*
232 	 * Ask SELinux its access control decision
233 	 */
234 	if (!ucontext)
235 		sepgsql_compute_avd(scontext, tcontext, tclass, &avd);
236 	else
237 		sepgsql_compute_avd(scontext, ucontext, tclass, &avd);
238 
239 	/*
240 	 * It also caches a security label to be switched when a client labeled as
241 	 * 'scontext' executes a procedure labeled as 'tcontext', not only access
242 	 * control decision on the procedure. The security label to be switched
243 	 * shall be computed uniquely on a pair of 'scontext' and 'tcontext',
244 	 * thus, it is reasonable to cache the new label on avc, and enables to
245 	 * reduce unnecessary system calls. It shall be referenced at
246 	 * sepgsql_needs_fmgr_hook to check whether the supplied function is a
247 	 * trusted procedure, or not.
248 	 */
249 	if (tclass == SEPG_CLASS_DB_PROCEDURE)
250 	{
251 		if (!ucontext)
252 			ncontext = sepgsql_compute_create(scontext, tcontext,
253 											  SEPG_CLASS_PROCESS, NULL);
254 		else
255 			ncontext = sepgsql_compute_create(scontext, ucontext,
256 											  SEPG_CLASS_PROCESS, NULL);
257 		if (strcmp(scontext, ncontext) == 0)
258 		{
259 			pfree(ncontext);
260 			ncontext = NULL;
261 		}
262 	}
263 
264 	/*
265 	 * Set up an avc_cache object
266 	 */
267 	oldctx = MemoryContextSwitchTo(avc_mem_cxt);
268 
269 	cache = palloc0(sizeof(avc_cache));
270 
271 	cache->hash = hash;
272 	cache->scontext = pstrdup(scontext);
273 	cache->tcontext = pstrdup(tcontext);
274 	cache->tclass = tclass;
275 
276 	cache->allowed = avd.allowed;
277 	cache->auditallow = avd.auditallow;
278 	cache->auditdeny = avd.auditdeny;
279 	cache->hot_cache = true;
280 	if (avd.flags & SELINUX_AVD_FLAGS_PERMISSIVE)
281 		cache->permissive = true;
282 	if (!ucontext)
283 		cache->tcontext_is_valid = true;
284 	if (ncontext)
285 		cache->ncontext = pstrdup(ncontext);
286 
287 	avc_num_caches++;
288 
289 	if (avc_num_caches > avc_threshold)
290 		sepgsql_avc_reclaim();
291 
292 	avc_slots[index] = lcons(cache, avc_slots[index]);
293 
294 	MemoryContextSwitchTo(oldctx);
295 
296 	return cache;
297 }
298 
299 /*
300  * sepgsql_avc_lookup
301  *
302  * Look up a cache entry that matches the supplied security contexts and
303  * object class.  If not found, create a new cache entry.
304  */
305 static avc_cache *
sepgsql_avc_lookup(const char * scontext,const char * tcontext,uint16 tclass)306 sepgsql_avc_lookup(const char *scontext, const char *tcontext, uint16 tclass)
307 {
308 	avc_cache  *cache;
309 	ListCell   *cell;
310 	uint32		hash;
311 	int			index;
312 
313 	hash = sepgsql_avc_hash(scontext, tcontext, tclass);
314 	index = hash % AVC_NUM_SLOTS;
315 
316 	foreach(cell, avc_slots[index])
317 	{
318 		cache = lfirst(cell);
319 
320 		if (cache->hash == hash &&
321 			cache->tclass == tclass &&
322 			strcmp(cache->tcontext, tcontext) == 0 &&
323 			strcmp(cache->scontext, scontext) == 0)
324 		{
325 			cache->hot_cache = true;
326 			return cache;
327 		}
328 	}
329 	/* not found, so insert a new cache */
330 	return sepgsql_avc_compute(scontext, tcontext, tclass);
331 }
332 
333 /*
334  * sepgsql_avc_check_perms(_label)
335  *
336  * It returns 'true', if the security policy suggested to allow the required
337  * permissions. Otherwise, it returns 'false' or raises an error according
338  * to the 'abort_on_violation' argument.
339  * The 'tobject' and 'tclass' identify the target object being referenced,
340  * and 'required' is a bitmask of permissions (SEPG_*__*) defined for each
341  * object classes.
342  * The 'audit_name' is the object name (optional). If SEPGSQL_AVC_NOAUDIT
343  * was supplied, it means to skip all the audit messages.
344  */
345 bool
sepgsql_avc_check_perms_label(const char * tcontext,uint16 tclass,uint32 required,const char * audit_name,bool abort_on_violation)346 sepgsql_avc_check_perms_label(const char *tcontext,
347 							  uint16 tclass, uint32 required,
348 							  const char *audit_name,
349 							  bool abort_on_violation)
350 {
351 	char	   *scontext = sepgsql_get_client_label();
352 	avc_cache  *cache;
353 	uint32		denied;
354 	uint32		audited;
355 	bool		result;
356 
357 	sepgsql_avc_check_valid();
358 	do
359 	{
360 		result = true;
361 
362 		/*
363 		 * If the target object is unlabeled, we perform the check using the
364 		 * label supplied by sepgsql_avc_unlabeled().
365 		 */
366 		if (tcontext)
367 			cache = sepgsql_avc_lookup(scontext, tcontext, tclass);
368 		else
369 			cache = sepgsql_avc_lookup(scontext,
370 									   sepgsql_avc_unlabeled(), tclass);
371 
372 		denied = required & ~cache->allowed;
373 
374 		/*
375 		 * Compute permissions to be audited
376 		 */
377 		if (sepgsql_get_debug_audit())
378 			audited = (denied ? (denied & ~0) : (required & ~0));
379 		else
380 			audited = denied ? (denied & cache->auditdeny)
381 				: (required & cache->auditallow);
382 
383 		if (denied)
384 		{
385 			/*
386 			 * In permissive mode or permissive domain, violated permissions
387 			 * shall be audited to the log files at once, and then implicitly
388 			 * allowed to avoid a flood of access denied logs, because the
389 			 * purpose of permissive mode/domain is to collect a violation log
390 			 * that will make it possible to fix up the security policy.
391 			 */
392 			if (!sepgsql_getenforce() || cache->permissive)
393 				cache->allowed |= required;
394 			else
395 				result = false;
396 		}
397 	} while (!sepgsql_avc_check_valid());
398 
399 	/*
400 	 * In the case when we have something auditable actions here,
401 	 * sepgsql_audit_log shall be called with text representation of security
402 	 * labels for both of subject and object. It records this access
403 	 * violation, so DBA will be able to find out unexpected security problems
404 	 * later.
405 	 */
406 	if (audited != 0 &&
407 		audit_name != SEPGSQL_AVC_NOAUDIT &&
408 		sepgsql_get_mode() != SEPGSQL_MODE_INTERNAL)
409 	{
410 		sepgsql_audit_log(denied != 0,
411 						  cache->scontext,
412 						  cache->tcontext_is_valid ?
413 						  cache->tcontext : sepgsql_avc_unlabeled(),
414 						  cache->tclass,
415 						  audited,
416 						  audit_name);
417 	}
418 
419 	if (abort_on_violation && !result)
420 		ereport(ERROR,
421 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
422 				 errmsg("SELinux: security policy violation")));
423 
424 	return result;
425 }
426 
427 bool
sepgsql_avc_check_perms(const ObjectAddress * tobject,uint16 tclass,uint32 required,const char * audit_name,bool abort_on_violation)428 sepgsql_avc_check_perms(const ObjectAddress *tobject,
429 						uint16 tclass, uint32 required,
430 						const char *audit_name,
431 						bool abort_on_violation)
432 {
433 	char	   *tcontext = GetSecurityLabel(tobject, SEPGSQL_LABEL_TAG);
434 	bool		rc;
435 
436 	rc = sepgsql_avc_check_perms_label(tcontext,
437 									   tclass, required,
438 									   audit_name, abort_on_violation);
439 	if (tcontext)
440 		pfree(tcontext);
441 
442 	return rc;
443 }
444 
445 /*
446  * sepgsql_avc_trusted_proc
447  *
448  * If the supplied function OID is configured as a trusted procedure, this
449  * function will return a security label to be used during the execution of
450  * that function.  Otherwise, it returns NULL.
451  */
452 char *
sepgsql_avc_trusted_proc(Oid functionId)453 sepgsql_avc_trusted_proc(Oid functionId)
454 {
455 	char	   *scontext = sepgsql_get_client_label();
456 	char	   *tcontext;
457 	ObjectAddress tobject;
458 	avc_cache  *cache;
459 
460 	tobject.classId = ProcedureRelationId;
461 	tobject.objectId = functionId;
462 	tobject.objectSubId = 0;
463 	tcontext = GetSecurityLabel(&tobject, SEPGSQL_LABEL_TAG);
464 
465 	sepgsql_avc_check_valid();
466 	do
467 	{
468 		if (tcontext)
469 			cache = sepgsql_avc_lookup(scontext, tcontext,
470 									   SEPG_CLASS_DB_PROCEDURE);
471 		else
472 			cache = sepgsql_avc_lookup(scontext, sepgsql_avc_unlabeled(),
473 									   SEPG_CLASS_DB_PROCEDURE);
474 	} while (!sepgsql_avc_check_valid());
475 
476 	return cache->ncontext;
477 }
478 
479 /*
480  * sepgsql_avc_exit
481  *
482  * Clean up userspace AVC on process exit.
483  */
484 static void
sepgsql_avc_exit(int code,Datum arg)485 sepgsql_avc_exit(int code, Datum arg)
486 {
487 	selinux_status_close();
488 }
489 
490 /*
491  * sepgsql_avc_init
492  *
493  * Initialize the userspace AVC.  This should be called from _PG_init.
494  */
495 void
sepgsql_avc_init(void)496 sepgsql_avc_init(void)
497 {
498 	int			rc;
499 
500 	/*
501 	 * All the avc stuff shall be allocated in avc_mem_cxt
502 	 */
503 	avc_mem_cxt = AllocSetContextCreate(TopMemoryContext,
504 										"userspace access vector cache",
505 										ALLOCSET_DEFAULT_SIZES);
506 	memset(avc_slots, 0, sizeof(avc_slots));
507 	avc_num_caches = 0;
508 	avc_lru_hint = 0;
509 	avc_threshold = AVC_DEF_THRESHOLD;
510 
511 	/*
512 	 * SELinux allows to mmap(2) its kernel status page in read-only mode to
513 	 * inform userspace applications its status updating (such as policy
514 	 * reloading) without system-call invocations. This feature is only
515 	 * supported in Linux-2.6.38 or later, however, libselinux provides a
516 	 * fallback mode to know its status using netlink sockets.
517 	 */
518 	rc = selinux_status_open(1);
519 	if (rc < 0)
520 		ereport(ERROR,
521 				(errcode(ERRCODE_INTERNAL_ERROR),
522 				 errmsg("SELinux: could not open selinux status : %m")));
523 	else if (rc > 0)
524 		ereport(LOG,
525 				(errmsg("SELinux: kernel status page uses fallback mode")));
526 
527 	/* Arrange to close selinux status page on process exit. */
528 	on_proc_exit(sepgsql_avc_exit, 0);
529 }
530