1 /* -*-pgsql-c-*- */
2 /*
3  * $Header$
4  *
5  * pgpool: a language independent connection pool server for PostgreSQL
6  * written by Tatsuo Ishii
7  *
8  * Copyright (c) 2003-2020	PgPool Global Development Group
9  *
10  * Permission to use, copy, modify, and distribute this software and
11  * its documentation for any purpose and without fee is hereby
12  * granted, provided that the above copyright notice appear in all
13  * copies and that both that copyright notice and this permission
14  * notice appear in supporting documentation, and that the name of the
15  * author not be used in advertising or publicity pertaining to
16  * distribution of the software without specific, written prior
17  * permission. The author makes no representations about the
18  * suitability of this software for any purpose.  It is provided "as
19  * is" without express or implied warranty.
20  *
21  * pool_relcache.c: Per process relation cache modules
22  */
23 #include "config.h"
24 #include <stdlib.h>
25 #include <unistd.h>
26 #include <string.h>
27 #include <limits.h>
28 #include <time.h>
29 
30 #include "pool.h"
31 #include "utils/pool_relcache.h"
32 #include "context/pool_session_context.h"
33 #include "query_cache/pool_memqcache.h"
34 #include "protocol/pool_process_query.h"
35 #include "pool_config.h"
36 #include "utils/palloc.h"
37 #include "utils/memutils.h"
38 #include "utils/elog.h"
39 #include "parser/scansup.h"
40 
41 static void SearchRelCacheErrorCb(void *arg);
42 static POOL_SELECT_RESULT *query_cache_to_relation_cache(char *data, size_t size);
43 static char *relation_cache_to_query_cache(POOL_SELECT_RESULT *res,size_t *size);
44 
45 
46 /*
47  * Create relation cache
48  */
49 POOL_RELCACHE *
pool_create_relcache(int cachesize,char * sql,func_ptr register_func,func_ptr unregister_func,bool issessionlocal)50 pool_create_relcache(int cachesize, char *sql,
51 					 func_ptr register_func, func_ptr unregister_func,
52 					 bool issessionlocal)
53 {
54 	POOL_RELCACHE *p;
55 	PoolRelCache *ip;
56 	MemoryContext old_context;
57 
58 	if (cachesize < 0)
59 	{
60 		ereport(WARNING,
61 				(errmsg("failed to create relcache: wrong cache size: %d", cachesize)));
62 		return NULL;
63 	}
64 
65 	/*
66 	 * Create the relcache in session context if the cache is session local,
67 	 * otherwise make home in TopMemoryContext
68 	 */
69 	old_context = MemoryContextSwitchTo(TopMemoryContext);
70 
71 	ip = (PoolRelCache *) palloc0(sizeof(PoolRelCache) * cachesize);
72 	p = (POOL_RELCACHE *) palloc(sizeof(POOL_RELCACHE));
73 
74 	MemoryContextSwitchTo(old_context);
75 
76 	p->num = cachesize;
77 	strlcpy(p->sql, sql, sizeof(p->sql));
78 	p->register_func = register_func;
79 	p->unregister_func = unregister_func;
80 	p->cache_is_session_local = issessionlocal;
81 	p->no_cache_if_zero = false;
82 	p->cache = ip;
83 
84 	return p;
85 }
86 
87 /*
88  * Discard relation cache.
89  */
90 void
pool_discard_relcache(POOL_RELCACHE * relcache)91 pool_discard_relcache(POOL_RELCACHE * relcache)
92 {
93 	int			i;
94 
95 	for (i = 0; i < relcache->num; i++)
96 	{
97 		(*relcache->unregister_func) (relcache->cache[i].data);
98 	}
99 	pfree(relcache->cache);
100 	pfree(relcache);
101 }
102 
103 /*
104  * Search relcache. If found, return user data. Otherwise return 0.
105  * If not found in cache, do the query and store the result into cache and return it.
106  */
107 void *
pool_search_relcache(POOL_RELCACHE * relcache,POOL_CONNECTION_POOL * backend,char * table)108 pool_search_relcache(POOL_RELCACHE * relcache, POOL_CONNECTION_POOL * backend, char *table)
109 {
110 	char	   *dbname;
111 	int			i;
112 	int			maxrefcnt = INT_MAX;
113 	char		query[1024];
114 	POOL_SELECT_RESULT *res = NULL;
115 	int			index = 0;
116 	int			local_session_id;
117 	time_t		now;
118 	void		*result;
119 	ErrorContextCallback callback;
120     pool_sigset_t oldmask;
121 	bool locked;
122 	int			query_cache_not_found = 1;
123 	char		*query_cache_data = NULL;
124 	size_t		query_cache_len;
125 	POOL_SESSION_CONTEXT *session_context;
126 	int			node_id;
127 
128 	session_context = pool_get_session_context(false);
129 
130 	local_session_id = pool_get_local_session_id();
131 	if (local_session_id < 0)
132 		return NULL;
133 
134 	/*
135 	 * Obtain database name and node id to be sent query.  If
136 	 * relcache_query_target is RELQTARGET_LOADL_BALANCE_NODE, we consider
137 	 * load balance node id to be used to send queries.
138 	 *
139 	 * Note that we need to use VALID_BACKEND_RAW, rather than VALID_BACKEND
140 	 * since pool_is_node_to_be_sent_in_current_query(being called by
141 	 * VALID_BACKEND) assumes that if query context exists, where_to_send map
142 	 * is already setup but it's not always the case because
143 	 * pool_search_relcache is mostly called *before* the where_to_send map is
144 	 * established.
145 	 */
146 	if (pool_config->relcache_query_target == RELQTARGET_LOAD_BALANCE_NODE &&
147 		session_context && VALID_BACKEND_RAW(session_context->load_balance_node_id) &&
148 		backend->slots[session_context->load_balance_node_id])
149 	{
150 		dbname = backend->slots[session_context->load_balance_node_id]->sp->database;
151 		node_id = session_context->load_balance_node_id;
152 	}
153 	else
154 	{
155 		dbname = MAIN_CONNECTION(backend)->sp->database;
156 
157 		/*
158 		 * If in streaming replication mode, prefer to send query to the
159 		 * primary node if it exists.
160 		 */
161 		if (STREAM && PRIMARY_NODE_ID >= 0)
162 			node_id = PRIMARY_NODE_ID;
163 		else
164 			node_id = MAIN_NODE_ID;
165 	}
166 
167 	now = time(NULL);
168 
169 	/* Look for cache first */
170 	for (i = 0; i < relcache->num; i++)
171 	{
172 		/*
173 		 * If cache is session local, we need to check session id
174 		 */
175 		if (relcache->cache_is_session_local)
176 		{
177 			if (relcache->cache[i].session_id != local_session_id)
178 				continue;
179 		}
180 
181 		if (strcasecmp(relcache->cache[i].dbname, dbname) == 0 &&
182 			strcasecmp(relcache->cache[i].relname, table) == 0)
183 		{
184 			if (relcache->cache[i].expire > 0)
185 			{
186 				if (now > relcache->cache[i].expire)
187 				{
188 					ereport(DEBUG1,
189 							(errmsg("searching relcache"),
190 							 errdetail("relcache for database:%s table:%s expired. now:%ld expiration time:%ld", dbname, table, now, relcache->cache[i].expire)));
191 
192 					relcache->cache[i].refcnt = 0;
193 					break;
194 				}
195 			}
196 
197 			/* Found */
198 			if (relcache->cache[i].refcnt < INT_MAX)
199 				relcache->cache[i].refcnt++;
200 
201 			ereport(DEBUG1,
202 					(errmsg("hit local relation cache"),
203 					errdetail("query:%s", relcache->sql)));
204 
205 			return relcache->cache[i].data;
206 		}
207 	}
208 
209 	/* Not in cache. Check the system catalog */
210 	snprintf(query, sizeof(query), relcache->sql, table);
211 
212 	per_node_statement_log(backend, node_id, query);
213 
214 	/*
215 	 * Register a error context callback to throw proper context message
216 	 */
217 	callback.callback = SearchRelCacheErrorCb;
218 	callback.arg = NULL;
219 	callback.previous = error_context_stack;
220 	error_context_stack = &callback;
221 
222 	locked = pool_is_shmem_lock();
223 	/*
224 	 * if enable_shared_relcache is true, search query cache.
225 	 */
226     if (pool_config->enable_shared_relcache)
227 	{
228 		/* if shmem is not locked by this process, get the lock */
229 		if (!locked)
230 		{
231 			POOL_SETMASK2(&BlockSig, &oldmask);
232 			pool_shmem_lock();
233 		}
234 	    PG_TRY();
235 		{
236 			/* search catalog cache in query cache */
237 			query_cache_not_found = pool_fetch_cache(backend, query, &query_cache_data, &query_cache_len);
238 		}
239 	    PG_CATCH();
240 		{
241 			pool_shmem_unlock();
242 			POOL_SETMASK(&oldmask);
243 	        PG_RE_THROW();
244 		}
245 		PG_END_TRY();
246 	}
247 	/* If not in query cache or not used, send query for backend. */
248 	if (query_cache_not_found)
249 	{
250 		ereport(DEBUG1,
251 				(errmsg("not hit local relation cache and query cache"),
252 				errdetail("query:%s", query)));
253 
254 		do_query(CONNECTION(backend, node_id), query, &res, MAJOR(backend));
255 		/* Register cache */
256 		result = (*relcache->register_func) (res);
257 		/* save local catalog cache in query cache */
258 	    if (pool_config->enable_shared_relcache)
259 		{
260 			query_cache_data = relation_cache_to_query_cache(res, &query_cache_len);
261 			pool_catalog_commit_cache(backend, query, query_cache_data, query_cache_len);
262 		}
263 	}
264 	else
265 	{
266 		ereport(DEBUG1,
267 				(errmsg("hit query cache"),
268 				errdetail("query:%s", query)));
269 
270 		/* catalog cache found in query_cache, copy local relation cache */
271 		res = query_cache_to_relation_cache(query_cache_data,query_cache_len);
272 		result = (*relcache->register_func) (res);
273 	}
274 	/* if shmem is locked by this function, unlock it */
275 	if (pool_config->enable_shared_relcache && !locked)
276 	{
277 		pool_shmem_unlock();
278 		POOL_SETMASK(&oldmask);
279 	}
280 
281 	error_context_stack = callback.previous;
282 
283 	/*
284 	 * Look for replacement in cache
285 	 */
286 	for (i = 0; i < relcache->num; i++)
287 	{
288 		/*
289 		 * If cache is session local, we can discard old cache immediately
290 		 */
291 		if (relcache->cache_is_session_local)
292 		{
293 			if (relcache->cache[i].session_id != local_session_id)
294 			{
295 				index = i;
296 				relcache->cache[i].refcnt = 0;
297 				break;
298 			}
299 		}
300 
301 		if (relcache->cache[i].refcnt == 0)
302 		{
303 			/* Found empty slot */
304 			index = i;
305 			break;
306 		}
307 		else if (relcache->cache[i].refcnt < maxrefcnt)
308 		{
309 			maxrefcnt = relcache->cache[i].refcnt;
310 			index = i;
311 		}
312 	}
313 
314 	if (relcache->cache[index].refcnt != 0)
315 	{
316 		ereport(LOG,
317 				(errmsg("searching relcache. cache replacement occurred")));
318 
319 	}
320 
321 	if (!pool_is_ignore_till_sync() && (!relcache->no_cache_if_zero || result))
322 	{
323 		strlcpy(relcache->cache[index].dbname, dbname, MAX_ITEM_LENGTH);
324 		strlcpy(relcache->cache[index].relname, table, MAX_ITEM_LENGTH);
325 		relcache->cache[index].refcnt = 1;
326 		relcache->cache[index].session_id = local_session_id;
327 		if (pool_config->relcache_expire > 0)
328 		{
329 			relcache->cache[index].expire = now + pool_config->relcache_expire;
330 		}
331 		else
332 		{
333 			relcache->cache[index].expire = 0;
334 		}
335 
336 		/*
337 		 * Call user defined unregister/register function.
338 		 */
339 		(*relcache->unregister_func) (relcache->cache[index].data);
340 		relcache->cache[index].data = result;
341 	}
342 	free_select_result(res);
343 	if (query_cache_data)
344 		pfree(query_cache_data);
345 	return result;
346 }
347 
348 static void
SearchRelCacheErrorCb(void * arg)349 SearchRelCacheErrorCb(void *arg)
350 {
351 	errcontext("while searching system catalog, When relcache is missed");
352 }
353 
354 
355 /*
356  * SplitIdentifierString --- parse a string containing identifiers
357  *
358  * This is the guts of textToQualifiedNameList, and is exported for use in
359  * other situations such as parsing GUC variables.  In the GUC case, it's
360  * important to avoid memory leaks, so the API is designed to minimize the
361  * amount of stuff that needs to be allocated and freed.
362  *
363  * Inputs:
364  *	rawstring: the input string; must be overwritable!	On return, it's
365  *			   been modified to contain the separated identifiers.
366  *	separator: the separator punctuation expected between identifiers
367  *			   (typically '.' or ',').  Whitespace may also appear around
368  *			   identifiers.
369  * Outputs:
370  *	namelist: filled with a palloc'd list of pointers to identifiers within
371  *			  rawstring.  Caller should list_free() this even on error return.
372  *
373  * Returns true if okay, false if there is a syntax error in the string.
374  *
375  * Note that an empty string is considered okay here, though not in
376  * textToQualifiedNameList.
377  */
378 bool
SplitIdentifierString(char * rawstring,char separator,Node ** nlist)379 SplitIdentifierString(char *rawstring, char separator,
380 					  Node **nlist)
381 {
382 	char	   *nextp = rawstring;
383 	bool		done = false;
384 	List	  **namelist = (List **) nlist;
385 
386 	*namelist = NIL;
387 
388 	while (scanner_isspace(*nextp))
389 		nextp++;				/* skip leading whitespace */
390 
391 	if (*nextp == '\0')
392 		return true;			/* allow empty string */
393 
394 	/* At the top of the loop, we are at start of a new identifier. */
395 	do
396 	{
397 		char	   *curname;
398 		char	   *endp;
399 
400 		if (*nextp == '"')
401 		{
402 			/* Quoted name --- collapse quote-quote pairs, no downcasing */
403 			curname = nextp + 1;
404 			for (;;)
405 			{
406 				endp = strchr(nextp + 1, '"');
407 				if (endp == NULL)
408 					return false;	/* mismatched quotes */
409 				if (endp[1] != '"')
410 					break;		/* found end of quoted name */
411 				/* Collapse adjacent quotes into one quote, and look again */
412 				memmove(endp, endp + 1, strlen(endp));
413 				nextp = endp;
414 			}
415 			/* endp now points at the terminating quote */
416 			nextp = endp + 1;
417 		}
418 		else
419 		{
420 			/* Unquoted name --- extends to separator or whitespace */
421 			char	   *downname;
422 			int			len;
423 
424 			curname = nextp;
425 			while (*nextp && *nextp != separator &&
426 				   !scanner_isspace(*nextp))
427 				nextp++;
428 			endp = nextp;
429 			if (curname == nextp)
430 				return false;	/* empty unquoted name not allowed */
431 
432 			/*
433 			 * Downcase the identifier, using same code as main lexer does.
434 			 *
435 			 * XXX because we want to overwrite the input in-place, we cannot
436 			 * support a downcasing transformation that increases the string
437 			 * length.  This is not a problem given the current implementation
438 			 * of downcase_truncate_identifier, but we'll probably have to do
439 			 * something about this someday.
440 			 */
441 			len = endp - curname;
442 			downname = downcase_truncate_identifier(curname, len, false);
443 			Assert(strlen(downname) <= len);
444 			strncpy(curname, downname, len);	/* strncpy is required here */
445 			pfree(downname);
446 		}
447 
448 		while (scanner_isspace(*nextp))
449 			nextp++;			/* skip trailing whitespace */
450 
451 		if (*nextp == separator)
452 		{
453 			nextp++;
454 			while (scanner_isspace(*nextp))
455 				nextp++;		/* skip leading whitespace for next */
456 			/* we expect another name, so done remains false */
457 		}
458 		else if (*nextp == '\0')
459 			done = true;
460 		else
461 			return false;		/* invalid syntax */
462 
463 		/* Now safe to overwrite separator with a null */
464 		*endp = '\0';
465 
466 		/* Truncate name if it's overlength */
467 		truncate_identifier(curname, strlen(curname), false);
468 
469 		/*
470 		 * Finished isolating current name --- add it to list
471 		 */
472 		*namelist = lappend(*namelist, curname);
473 
474 		/* Loop back if we didn't reach end of string */
475 	} while (!done);
476 
477 	return true;
478 }
479 
480 char *
remove_quotes_and_schema_from_relname(char * table)481 remove_quotes_and_schema_from_relname(char *table)
482 {
483 	static char rel[MAX_ITEM_LENGTH];
484 	char	   *rawstring;
485 	List	   *names;
486 
487 	rawstring = pstrdup(table);
488 	if(SplitIdentifierString(rawstring, '.', (Node **) &names) && names != NIL)
489 	{
490 		/*
491 		 * Since table name is always the last one in the list,
492 		 * we use llast() to get table name.
493 		 */
494 		strlcpy(rel, llast(names), sizeof(rel));
495 	}
496 	else
497 	{
498 		rel[0] = '\0';
499 	}
500 
501 	pfree(rawstring);
502 	list_free(names);
503 
504 	return rel;
505 }
506 
507 /*
508  * Standard register/unregister function for "SELECT count(*)" type
509  * query. Returns row count.
510  */
511 void *
int_register_func(POOL_SELECT_RESULT * res)512 int_register_func(POOL_SELECT_RESULT * res)
513 {
514 	if (res->numrows >= 1)
515 		return (void *) atol(res->data[0]);
516 	return (void *) 0;
517 }
518 
519 void *
int_unregister_func(void * data)520 int_unregister_func(void *data)
521 {
522 	/* Nothing to do since no memory was allocated */
523 	return NULL;
524 }
525 
526 void *
string_register_func(POOL_SELECT_RESULT * res)527 string_register_func(POOL_SELECT_RESULT * res)
528 {
529 	return (res->numrows > 0) ? strdup(res->data[0]) : NULL;
530 }
531 
532 void *
string_unregister_func(void * data)533 string_unregister_func(void *data)
534 {
535 	if (data)
536 		free(data);
537 	return (void *) 0;
538 }
539 
540 static POOL_SELECT_RESULT *
query_cache_to_relation_cache(char * data,size_t size)541 query_cache_to_relation_cache(char *data, size_t size)
542 {
543 	POOL_SELECT_RESULT *res;
544 	char *p;
545 	int i;
546 	int len;
547 
548 	p = data;
549 
550 	res = palloc0(sizeof(*res));
551 	res->rowdesc = palloc0(sizeof(RowDesc));
552 
553 	/* rowdesc */
554 	res->rowdesc->num_attrs = *((int *)p);
555 	p += sizeof(int);
556 
557 	/* numrows */
558 	res->numrows = *((int *)p);
559 	p += sizeof(int);
560 
561 	len = res->rowdesc->num_attrs * res->numrows;
562 
563 	res->nullflags = palloc(len * sizeof(int));
564 	res->data = palloc0(len * sizeof(char *));
565 
566 	/* nullflags */
567 	for (i = 0; i < len; i++)
568 	{
569 		res->nullflags[i] = *((int *)p);
570 		p += sizeof(int);
571 	}
572 	/* data */
573 	for (i = 0; i < len; i++)
574 	{
575 		if ( res->nullflags[i] > 0)
576 		{
577 			res->data[i] = palloc(res->nullflags[i] + 1);
578 			memcpy(res->data[i], p, res->nullflags[i]);
579 			*(res->data[i] + res->nullflags[i]) = '\0';
580 			p += res->nullflags[i];
581 		}
582 	}
583 
584 	return res;
585 }
586 
587 static char *
relation_cache_to_query_cache(POOL_SELECT_RESULT * res,size_t * size)588 relation_cache_to_query_cache(POOL_SELECT_RESULT *res,size_t *size)
589 {
590 	char * data;
591 	char * p;
592 
593 	int i;
594 	int array_size;
595 	int mysize;
596 
597 	mysize = 0;
598 
599 	/* RoeDesc *rowdesc */
600 	/* int res->rowdesc->num_attrs;*/
601 	mysize += sizeof(int); /* only rodesc->num_attrs */
602 	/* int numrows */
603 	mysize += sizeof(int);
604 	/* int *nullflags  */
605 	mysize += sizeof(int) * res->rowdesc->num_attrs * res->numrows;
606 	/*  char **data */
607 	/* res->rowdesc->num_attrs * res->numrows */
608 	for (i = 0; i < res->rowdesc->num_attrs * res->numrows; i++)
609 	{
610 		if(res->nullflags[i] > 0)
611 		{
612 			mysize += res->nullflags[i];
613 		}
614 	}
615 
616 	/* init array */
617 	*size = mysize;
618 	data = palloc(mysize + 1);
619 
620 	/* data copy */
621 	p = data;
622 	memcpy(p, &res->rowdesc->num_attrs, sizeof(int));
623 	p += sizeof(int);
624 	memcpy(p, &res->numrows, sizeof(int));
625 	p += sizeof(int);
626 
627 	array_size = res->rowdesc->num_attrs * res->numrows;
628 
629 	memcpy(p, res->nullflags, sizeof(int) * array_size);
630 	p += sizeof(int) * array_size;
631 
632 	for (i = 0; i < array_size; i++)
633 	{
634 		if( res->nullflags[i] > 0) /* NOT NULL? */
635 		{
636 			memcpy(p, res->data[i], res->nullflags[i]);
637 			p += res->nullflags[i];
638 		}
639 	}
640 
641 	return data;
642 }
643