1 /*
2  * Python procedure manipulation for plpython
3  *
4  * src/pl/plpython/plpy_procedure.c
5  */
6 
7 #include "postgres.h"
8 
9 #include "access/htup_details.h"
10 #include "access/transam.h"
11 #include "funcapi.h"
12 #include "catalog/pg_proc.h"
13 #include "catalog/pg_proc_fn.h"
14 #include "catalog/pg_type.h"
15 #include "utils/builtins.h"
16 #include "utils/hsearch.h"
17 #include "utils/inval.h"
18 #include "utils/memutils.h"
19 #include "utils/syscache.h"
20 
21 #include "plpython.h"
22 
23 #include "plpy_procedure.h"
24 
25 #include "plpy_elog.h"
26 #include "plpy_main.h"
27 
28 
29 static HTAB *PLy_procedure_cache = NULL;
30 
31 static PLyProcedure *PLy_procedure_create(HeapTuple procTup, Oid fn_oid, bool is_trigger);
32 static bool PLy_procedure_argument_valid(PLyTypeInfo *arg);
33 static bool PLy_procedure_valid(PLyProcedure *proc, HeapTuple procTup);
34 static char *PLy_procedure_munge_source(const char *name, const char *src);
35 
36 
37 void
init_procedure_caches(void)38 init_procedure_caches(void)
39 {
40 	HASHCTL		hash_ctl;
41 
42 	memset(&hash_ctl, 0, sizeof(hash_ctl));
43 	hash_ctl.keysize = sizeof(PLyProcedureKey);
44 	hash_ctl.entrysize = sizeof(PLyProcedureEntry);
45 	PLy_procedure_cache = hash_create("PL/Python procedures", 32, &hash_ctl,
46 									  HASH_ELEM | HASH_BLOBS);
47 }
48 
49 /*
50  * PLy_procedure_name: get the name of the specified procedure.
51  *
52  * NB: this returns the SQL name, not the internal Python procedure name
53  */
54 char *
PLy_procedure_name(PLyProcedure * proc)55 PLy_procedure_name(PLyProcedure *proc)
56 {
57 	if (proc == NULL)
58 		return "<unknown procedure>";
59 	return proc->proname;
60 }
61 
62 /*
63  * PLy_procedure_get: returns a cached PLyProcedure, or creates, stores and
64  * returns a new PLyProcedure.
65  *
66  * fn_oid is the OID of the function requested
67  * fn_rel is InvalidOid or the relation this function triggers on
68  * is_trigger denotes whether the function is a trigger function
69  *
70  * The reason that both fn_rel and is_trigger need to be passed is that when
71  * trigger functions get validated we don't know which relation(s) they'll
72  * be used with, so no sensible fn_rel can be passed.
73  */
74 PLyProcedure *
PLy_procedure_get(Oid fn_oid,Oid fn_rel,bool is_trigger)75 PLy_procedure_get(Oid fn_oid, Oid fn_rel, bool is_trigger)
76 {
77 	bool		use_cache = !(is_trigger && fn_rel == InvalidOid);
78 	HeapTuple	procTup;
79 	PLyProcedureKey key;
80 	PLyProcedureEntry *volatile entry = NULL;
81 	PLyProcedure *volatile proc = NULL;
82 	bool		found = false;
83 
84 	procTup = SearchSysCache1(PROCOID, ObjectIdGetDatum(fn_oid));
85 	if (!HeapTupleIsValid(procTup))
86 		elog(ERROR, "cache lookup failed for function %u", fn_oid);
87 
88 	/*
89 	 * Look for the function in the cache, unless we don't have the necessary
90 	 * information (e.g. during validation). In that case we just don't cache
91 	 * anything.
92 	 */
93 	if (use_cache)
94 	{
95 		key.fn_oid = fn_oid;
96 		key.fn_rel = fn_rel;
97 		entry = hash_search(PLy_procedure_cache, &key, HASH_ENTER, &found);
98 		proc = entry->proc;
99 	}
100 
101 	PG_TRY();
102 	{
103 		if (!found)
104 		{
105 			/* Haven't found it, create a new procedure */
106 			proc = PLy_procedure_create(procTup, fn_oid, is_trigger);
107 			if (use_cache)
108 				entry->proc = proc;
109 		}
110 		else if (!PLy_procedure_valid(proc, procTup))
111 		{
112 			/* Found it, but it's invalid, free and reuse the cache entry */
113 			entry->proc = NULL;
114 			if (proc)
115 				PLy_procedure_delete(proc);
116 			proc = PLy_procedure_create(procTup, fn_oid, is_trigger);
117 			entry->proc = proc;
118 		}
119 		/* Found it and it's valid, it's fine to use it */
120 	}
121 	PG_CATCH();
122 	{
123 		/* Do not leave an uninitialized entry in the cache */
124 		if (use_cache)
125 			hash_search(PLy_procedure_cache, &key, HASH_REMOVE, NULL);
126 		PG_RE_THROW();
127 	}
128 	PG_END_TRY();
129 
130 	ReleaseSysCache(procTup);
131 
132 	return proc;
133 }
134 
135 /*
136  * Create a new PLyProcedure structure
137  */
138 static PLyProcedure *
PLy_procedure_create(HeapTuple procTup,Oid fn_oid,bool is_trigger)139 PLy_procedure_create(HeapTuple procTup, Oid fn_oid, bool is_trigger)
140 {
141 	char		procName[NAMEDATALEN + 256];
142 	Form_pg_proc procStruct;
143 	PLyProcedure *volatile proc;
144 	MemoryContext cxt;
145 	MemoryContext oldcxt;
146 	int			rv;
147 	char	   *ptr;
148 
149 	procStruct = (Form_pg_proc) GETSTRUCT(procTup);
150 	rv = snprintf(procName, sizeof(procName),
151 				  "__plpython_procedure_%s_%u",
152 				  NameStr(procStruct->proname),
153 				  fn_oid);
154 	if (rv >= sizeof(procName) || rv < 0)
155 		elog(ERROR, "procedure name would overrun buffer");
156 
157 	/* Replace any not-legal-in-Python-names characters with '_' */
158 	for (ptr = procName; *ptr; ptr++)
159 	{
160 		if (!((*ptr >= 'A' && *ptr <= 'Z') ||
161 			  (*ptr >= 'a' && *ptr <= 'z') ||
162 			  (*ptr >= '0' && *ptr <= '9')))
163 			*ptr = '_';
164 	}
165 
166 	cxt = AllocSetContextCreate(TopMemoryContext,
167 								procName,
168 								ALLOCSET_DEFAULT_SIZES);
169 
170 	oldcxt = MemoryContextSwitchTo(cxt);
171 
172 	proc = (PLyProcedure *) palloc0(sizeof(PLyProcedure));
173 	proc->mcxt = cxt;
174 
175 	PG_TRY();
176 	{
177 		Datum		protrftypes_datum;
178 		Datum		prosrcdatum;
179 		bool		isnull;
180 		char	   *procSource;
181 		int			i;
182 
183 		proc->proname = pstrdup(NameStr(procStruct->proname));
184 		proc->pyname = pstrdup(procName);
185 		proc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data);
186 		proc->fn_tid = procTup->t_self;
187 		proc->fn_readonly = (procStruct->provolatile != PROVOLATILE_VOLATILE);
188 		proc->is_setof = procStruct->proretset;
189 		PLy_typeinfo_init(&proc->result, proc->mcxt);
190 		proc->src = NULL;
191 		proc->argnames = NULL;
192 		for (i = 0; i < FUNC_MAX_ARGS; i++)
193 			PLy_typeinfo_init(&proc->args[i], proc->mcxt);
194 		proc->nargs = 0;
195 		proc->langid = procStruct->prolang;
196 		protrftypes_datum = SysCacheGetAttr(PROCOID, procTup,
197 											Anum_pg_proc_protrftypes,
198 											&isnull);
199 		proc->trftypes = isnull ? NIL : oid_array_to_list(protrftypes_datum);
200 		proc->code = NULL;
201 		proc->statics = NULL;
202 		proc->globals = NULL;
203 		proc->calldepth = 0;
204 		proc->argstack = NULL;
205 
206 		/*
207 		 * get information required for output conversion of the return value,
208 		 * but only if this isn't a trigger.
209 		 */
210 		if (!is_trigger)
211 		{
212 			HeapTuple	rvTypeTup;
213 			Form_pg_type rvTypeStruct;
214 
215 			rvTypeTup = SearchSysCache1(TYPEOID,
216 										ObjectIdGetDatum(procStruct->prorettype));
217 			if (!HeapTupleIsValid(rvTypeTup))
218 				elog(ERROR, "cache lookup failed for type %u",
219 					 procStruct->prorettype);
220 			rvTypeStruct = (Form_pg_type) GETSTRUCT(rvTypeTup);
221 
222 			/* Disallow pseudotype result, except for void or record */
223 			if (rvTypeStruct->typtype == TYPTYPE_PSEUDO)
224 			{
225 				if (procStruct->prorettype == TRIGGEROID)
226 					ereport(ERROR,
227 							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
228 							 errmsg("trigger functions can only be called as triggers")));
229 				else if (procStruct->prorettype != VOIDOID &&
230 						 procStruct->prorettype != RECORDOID)
231 					ereport(ERROR,
232 							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
233 							 errmsg("PL/Python functions cannot return type %s",
234 									format_type_be(procStruct->prorettype))));
235 			}
236 
237 			if (rvTypeStruct->typtype == TYPTYPE_COMPOSITE ||
238 				procStruct->prorettype == RECORDOID)
239 			{
240 				/*
241 				 * Tuple: set up later, during first call to
242 				 * PLy_function_handler
243 				 */
244 				proc->result.out.d.typoid = procStruct->prorettype;
245 				proc->result.out.d.typmod = -1;
246 				proc->result.is_rowtype = 2;
247 			}
248 			else
249 			{
250 				/* do the real work */
251 				PLy_output_datum_func(&proc->result, rvTypeTup, proc->langid, proc->trftypes);
252 			}
253 
254 			ReleaseSysCache(rvTypeTup);
255 		}
256 
257 		/*
258 		 * Now get information required for input conversion of the
259 		 * procedure's arguments.  Note that we ignore output arguments here.
260 		 * If the function returns record, those I/O functions will be set up
261 		 * when the function is first called.
262 		 */
263 		if (procStruct->pronargs)
264 		{
265 			Oid		   *types;
266 			char	  **names,
267 					   *modes;
268 			int			pos,
269 						total;
270 
271 			/* extract argument type info from the pg_proc tuple */
272 			total = get_func_arg_info(procTup, &types, &names, &modes);
273 
274 			/* count number of in+inout args into proc->nargs */
275 			if (modes == NULL)
276 				proc->nargs = total;
277 			else
278 			{
279 				/* proc->nargs was initialized to 0 above */
280 				for (i = 0; i < total; i++)
281 				{
282 					if (modes[i] != PROARGMODE_OUT &&
283 						modes[i] != PROARGMODE_TABLE)
284 						(proc->nargs)++;
285 				}
286 			}
287 
288 			proc->argnames = (char **) palloc0(sizeof(char *) * proc->nargs);
289 			for (i = pos = 0; i < total; i++)
290 			{
291 				HeapTuple	argTypeTup;
292 				Form_pg_type argTypeStruct;
293 
294 				if (modes &&
295 					(modes[i] == PROARGMODE_OUT ||
296 					 modes[i] == PROARGMODE_TABLE))
297 					continue;	/* skip OUT arguments */
298 
299 				Assert(types[i] == procStruct->proargtypes.values[pos]);
300 
301 				argTypeTup = SearchSysCache1(TYPEOID,
302 											 ObjectIdGetDatum(types[i]));
303 				if (!HeapTupleIsValid(argTypeTup))
304 					elog(ERROR, "cache lookup failed for type %u", types[i]);
305 				argTypeStruct = (Form_pg_type) GETSTRUCT(argTypeTup);
306 
307 				/* check argument type is OK, set up I/O function info */
308 				switch (argTypeStruct->typtype)
309 				{
310 					case TYPTYPE_PSEUDO:
311 						/* Disallow pseudotype argument */
312 						ereport(ERROR,
313 								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
314 								 errmsg("PL/Python functions cannot accept type %s",
315 										format_type_be(types[i]))));
316 						break;
317 					case TYPTYPE_COMPOSITE:
318 						/* we'll set IO funcs at first call */
319 						proc->args[pos].is_rowtype = 2;
320 						break;
321 					default:
322 						PLy_input_datum_func(&(proc->args[pos]),
323 											 types[i],
324 											 argTypeTup,
325 											 proc->langid,
326 											 proc->trftypes);
327 						break;
328 				}
329 
330 				/* get argument name */
331 				proc->argnames[pos] = names ? pstrdup(names[i]) : NULL;
332 
333 				ReleaseSysCache(argTypeTup);
334 
335 				pos++;
336 			}
337 		}
338 
339 		/*
340 		 * get the text of the function.
341 		 */
342 		prosrcdatum = SysCacheGetAttr(PROCOID, procTup,
343 									  Anum_pg_proc_prosrc, &isnull);
344 		if (isnull)
345 			elog(ERROR, "null prosrc");
346 		procSource = TextDatumGetCString(prosrcdatum);
347 
348 		PLy_procedure_compile(proc, procSource);
349 
350 		pfree(procSource);
351 	}
352 	PG_CATCH();
353 	{
354 		MemoryContextSwitchTo(oldcxt);
355 		PLy_procedure_delete(proc);
356 		PG_RE_THROW();
357 	}
358 	PG_END_TRY();
359 
360 	MemoryContextSwitchTo(oldcxt);
361 	return proc;
362 }
363 
364 /*
365  * Insert the procedure into the Python interpreter
366  */
367 void
PLy_procedure_compile(PLyProcedure * proc,const char * src)368 PLy_procedure_compile(PLyProcedure *proc, const char *src)
369 {
370 	PyObject   *crv = NULL;
371 	char	   *msrc;
372 
373 	proc->globals = PyDict_Copy(PLy_interp_globals);
374 
375 	/*
376 	 * SD is private preserved data between calls. GD is global data shared by
377 	 * all functions
378 	 */
379 	proc->statics = PyDict_New();
380 	PyDict_SetItemString(proc->globals, "SD", proc->statics);
381 
382 	/*
383 	 * insert the function code into the interpreter
384 	 */
385 	msrc = PLy_procedure_munge_source(proc->pyname, src);
386 	/* Save the mangled source for later inclusion in tracebacks */
387 	proc->src = MemoryContextStrdup(proc->mcxt, msrc);
388 	crv = PyRun_String(msrc, Py_file_input, proc->globals, NULL);
389 	pfree(msrc);
390 
391 	if (crv != NULL)
392 	{
393 		int			clen;
394 		char		call[NAMEDATALEN + 256];
395 
396 		Py_DECREF(crv);
397 
398 		/*
399 		 * compile a call to the function
400 		 */
401 		clen = snprintf(call, sizeof(call), "%s()", proc->pyname);
402 		if (clen < 0 || clen >= sizeof(call))
403 			elog(ERROR, "string would overflow buffer");
404 		proc->code = Py_CompileString(call, "<string>", Py_eval_input);
405 		if (proc->code != NULL)
406 			return;
407 	}
408 
409 	if (proc->proname)
410 		PLy_elog(ERROR, "could not compile PL/Python function \"%s\"",
411 				 proc->proname);
412 	else
413 		PLy_elog(ERROR, "could not compile anonymous PL/Python code block");
414 }
415 
416 void
PLy_procedure_delete(PLyProcedure * proc)417 PLy_procedure_delete(PLyProcedure *proc)
418 {
419 	Py_XDECREF(proc->code);
420 	Py_XDECREF(proc->statics);
421 	Py_XDECREF(proc->globals);
422 	MemoryContextDelete(proc->mcxt);
423 }
424 
425 /*
426  * Check if our cached information about a datatype is still valid
427  */
428 static bool
PLy_procedure_argument_valid(PLyTypeInfo * arg)429 PLy_procedure_argument_valid(PLyTypeInfo *arg)
430 {
431 	HeapTuple	relTup;
432 	bool		valid;
433 
434 	/* Nothing to cache unless type is composite */
435 	if (arg->is_rowtype != 1)
436 		return true;
437 
438 	/*
439 	 * Zero typ_relid means that we got called on an output argument of a
440 	 * function returning an unnamed record type; the info for it can't
441 	 * change.
442 	 */
443 	if (!OidIsValid(arg->typ_relid))
444 		return true;
445 
446 	/* Else we should have some cached data */
447 	Assert(TransactionIdIsValid(arg->typrel_xmin));
448 	Assert(ItemPointerIsValid(&arg->typrel_tid));
449 
450 	/* Get the pg_class tuple for the data type */
451 	relTup = SearchSysCache1(RELOID, ObjectIdGetDatum(arg->typ_relid));
452 	if (!HeapTupleIsValid(relTup))
453 		elog(ERROR, "cache lookup failed for relation %u", arg->typ_relid);
454 
455 	/* If it has changed, the cached data is not valid */
456 	valid = (arg->typrel_xmin == HeapTupleHeaderGetRawXmin(relTup->t_data) &&
457 			 ItemPointerEquals(&arg->typrel_tid, &relTup->t_self));
458 
459 	ReleaseSysCache(relTup);
460 
461 	return valid;
462 }
463 
464 /*
465  * Decide whether a cached PLyProcedure struct is still valid
466  */
467 static bool
PLy_procedure_valid(PLyProcedure * proc,HeapTuple procTup)468 PLy_procedure_valid(PLyProcedure *proc, HeapTuple procTup)
469 {
470 	int			i;
471 	bool		valid;
472 
473 	if (proc == NULL)
474 		return false;
475 
476 	/* If the pg_proc tuple has changed, it's not valid */
477 	if (!(proc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) &&
478 		  ItemPointerEquals(&proc->fn_tid, &procTup->t_self)))
479 		return false;
480 
481 	/* Else check the input argument datatypes */
482 	valid = true;
483 	for (i = 0; i < proc->nargs; i++)
484 	{
485 		valid = PLy_procedure_argument_valid(&proc->args[i]);
486 
487 		/* Short-circuit on first changed argument */
488 		if (!valid)
489 			break;
490 	}
491 
492 	/* if the output type is composite, it might have changed */
493 	if (valid)
494 		valid = PLy_procedure_argument_valid(&proc->result);
495 
496 	return valid;
497 }
498 
499 static char *
PLy_procedure_munge_source(const char * name,const char * src)500 PLy_procedure_munge_source(const char *name, const char *src)
501 {
502 	char	   *mrc,
503 			   *mp;
504 	const char *sp;
505 	size_t		mlen;
506 	int			plen;
507 
508 	/*
509 	 * room for function source and the def statement
510 	 */
511 	mlen = (strlen(src) * 2) + strlen(name) + 16;
512 
513 	mrc = palloc(mlen);
514 	plen = snprintf(mrc, mlen, "def %s():\n\t", name);
515 	Assert(plen >= 0 && plen < mlen);
516 
517 	sp = src;
518 	mp = mrc + plen;
519 
520 	while (*sp != '\0')
521 	{
522 		if (*sp == '\r' && *(sp + 1) == '\n')
523 			sp++;
524 
525 		if (*sp == '\n' || *sp == '\r')
526 		{
527 			*mp++ = '\n';
528 			*mp++ = '\t';
529 			sp++;
530 		}
531 		else
532 			*mp++ = *sp++;
533 	}
534 	*mp++ = '\n';
535 	*mp++ = '\n';
536 	*mp = '\0';
537 
538 	if (mp > (mrc + mlen))
539 		elog(FATAL, "buffer overrun in PLy_munge_source");
540 
541 	return mrc;
542 }
543