1 /*
2  * Python procedure manipulation for plpython
3  *
4  * src/pl/plpython/plpy_procedure.c
5  */
6 
7 #include "postgres.h"
8 
9 #include "access/htup_details.h"
10 #include "access/transam.h"
11 #include "funcapi.h"
12 #include "catalog/pg_proc.h"
13 #include "catalog/pg_type.h"
14 #include "utils/builtins.h"
15 #include "utils/hsearch.h"
16 #include "utils/inval.h"
17 #include "utils/lsyscache.h"
18 #include "utils/memutils.h"
19 #include "utils/syscache.h"
20 
21 #include "plpython.h"
22 
23 #include "plpy_procedure.h"
24 
25 #include "plpy_elog.h"
26 #include "plpy_main.h"
27 
28 
29 static HTAB *PLy_procedure_cache = NULL;
30 
31 static PLyProcedure *PLy_procedure_create(HeapTuple procTup, Oid fn_oid, bool is_trigger);
32 static bool PLy_procedure_valid(PLyProcedure *proc, HeapTuple procTup);
33 static char *PLy_procedure_munge_source(const char *name, const char *src);
34 
35 
36 void
init_procedure_caches(void)37 init_procedure_caches(void)
38 {
39 	HASHCTL		hash_ctl;
40 
41 	memset(&hash_ctl, 0, sizeof(hash_ctl));
42 	hash_ctl.keysize = sizeof(PLyProcedureKey);
43 	hash_ctl.entrysize = sizeof(PLyProcedureEntry);
44 	PLy_procedure_cache = hash_create("PL/Python procedures", 32, &hash_ctl,
45 									  HASH_ELEM | HASH_BLOBS);
46 }
47 
48 /*
49  * PLy_procedure_name: get the name of the specified procedure.
50  *
51  * NB: this returns the SQL name, not the internal Python procedure name
52  */
53 char *
PLy_procedure_name(PLyProcedure * proc)54 PLy_procedure_name(PLyProcedure *proc)
55 {
56 	if (proc == NULL)
57 		return "<unknown procedure>";
58 	return proc->proname;
59 }
60 
61 /*
62  * PLy_procedure_get: returns a cached PLyProcedure, or creates, stores and
63  * returns a new PLyProcedure.
64  *
65  * fn_oid is the OID of the function requested
66  * fn_rel is InvalidOid or the relation this function triggers on
67  * is_trigger denotes whether the function is a trigger function
68  *
69  * The reason that both fn_rel and is_trigger need to be passed is that when
70  * trigger functions get validated we don't know which relation(s) they'll
71  * be used with, so no sensible fn_rel can be passed.
72  */
73 PLyProcedure *
PLy_procedure_get(Oid fn_oid,Oid fn_rel,bool is_trigger)74 PLy_procedure_get(Oid fn_oid, Oid fn_rel, bool is_trigger)
75 {
76 	bool		use_cache = !(is_trigger && fn_rel == InvalidOid);
77 	HeapTuple	procTup;
78 	PLyProcedureKey key;
79 	PLyProcedureEntry *volatile entry = NULL;
80 	PLyProcedure *volatile proc = NULL;
81 	bool		found = false;
82 
83 	procTup = SearchSysCache1(PROCOID, ObjectIdGetDatum(fn_oid));
84 	if (!HeapTupleIsValid(procTup))
85 		elog(ERROR, "cache lookup failed for function %u", fn_oid);
86 
87 	/*
88 	 * Look for the function in the cache, unless we don't have the necessary
89 	 * information (e.g. during validation). In that case we just don't cache
90 	 * anything.
91 	 */
92 	if (use_cache)
93 	{
94 		key.fn_oid = fn_oid;
95 		key.fn_rel = fn_rel;
96 		entry = hash_search(PLy_procedure_cache, &key, HASH_ENTER, &found);
97 		proc = entry->proc;
98 	}
99 
100 	PG_TRY();
101 	{
102 		if (!found)
103 		{
104 			/* Haven't found it, create a new procedure */
105 			proc = PLy_procedure_create(procTup, fn_oid, is_trigger);
106 			if (use_cache)
107 				entry->proc = proc;
108 		}
109 		else if (!PLy_procedure_valid(proc, procTup))
110 		{
111 			/* Found it, but it's invalid, free and reuse the cache entry */
112 			entry->proc = NULL;
113 			if (proc)
114 				PLy_procedure_delete(proc);
115 			proc = PLy_procedure_create(procTup, fn_oid, is_trigger);
116 			entry->proc = proc;
117 		}
118 		/* Found it and it's valid, it's fine to use it */
119 	}
120 	PG_CATCH();
121 	{
122 		/* Do not leave an uninitialized entry in the cache */
123 		if (use_cache)
124 			hash_search(PLy_procedure_cache, &key, HASH_REMOVE, NULL);
125 		PG_RE_THROW();
126 	}
127 	PG_END_TRY();
128 
129 	ReleaseSysCache(procTup);
130 
131 	return proc;
132 }
133 
134 /*
135  * Create a new PLyProcedure structure
136  */
137 static PLyProcedure *
PLy_procedure_create(HeapTuple procTup,Oid fn_oid,bool is_trigger)138 PLy_procedure_create(HeapTuple procTup, Oid fn_oid, bool is_trigger)
139 {
140 	char		procName[NAMEDATALEN + 256];
141 	Form_pg_proc procStruct;
142 	PLyProcedure *volatile proc;
143 	MemoryContext cxt;
144 	MemoryContext oldcxt;
145 	int			rv;
146 	char	   *ptr;
147 
148 	procStruct = (Form_pg_proc) GETSTRUCT(procTup);
149 	rv = snprintf(procName, sizeof(procName),
150 				  "__plpython_procedure_%s_%u",
151 				  NameStr(procStruct->proname),
152 				  fn_oid);
153 	if (rv >= sizeof(procName) || rv < 0)
154 		elog(ERROR, "procedure name would overrun buffer");
155 
156 	/* Replace any not-legal-in-Python-names characters with '_' */
157 	for (ptr = procName; *ptr; ptr++)
158 	{
159 		if (!((*ptr >= 'A' && *ptr <= 'Z') ||
160 			  (*ptr >= 'a' && *ptr <= 'z') ||
161 			  (*ptr >= '0' && *ptr <= '9')))
162 			*ptr = '_';
163 	}
164 
165 	/* Create long-lived context that all procedure info will live in */
166 	cxt = AllocSetContextCreate(TopMemoryContext,
167 								"PL/Python function",
168 								ALLOCSET_DEFAULT_SIZES);
169 
170 	oldcxt = MemoryContextSwitchTo(cxt);
171 
172 	proc = (PLyProcedure *) palloc0(sizeof(PLyProcedure));
173 	proc->mcxt = cxt;
174 
175 	PG_TRY();
176 	{
177 		Datum		protrftypes_datum;
178 		Datum		prosrcdatum;
179 		bool		isnull;
180 		char	   *procSource;
181 		int			i;
182 
183 		proc->proname = pstrdup(NameStr(procStruct->proname));
184 		MemoryContextSetIdentifier(cxt, proc->proname);
185 		proc->pyname = pstrdup(procName);
186 		proc->fn_xmin = HeapTupleHeaderGetRawXmin(procTup->t_data);
187 		proc->fn_tid = procTup->t_self;
188 		proc->fn_readonly = (procStruct->provolatile != PROVOLATILE_VOLATILE);
189 		proc->is_setof = procStruct->proretset;
190 		proc->is_procedure = (procStruct->prokind == PROKIND_PROCEDURE);
191 		proc->src = NULL;
192 		proc->argnames = NULL;
193 		proc->args = NULL;
194 		proc->nargs = 0;
195 		proc->langid = procStruct->prolang;
196 		protrftypes_datum = SysCacheGetAttr(PROCOID, procTup,
197 											Anum_pg_proc_protrftypes,
198 											&isnull);
199 		proc->trftypes = isnull ? NIL : oid_array_to_list(protrftypes_datum);
200 		proc->code = NULL;
201 		proc->statics = NULL;
202 		proc->globals = NULL;
203 		proc->calldepth = 0;
204 		proc->argstack = NULL;
205 
206 		/*
207 		 * get information required for output conversion of the return value,
208 		 * but only if this isn't a trigger.
209 		 */
210 		if (!is_trigger)
211 		{
212 			Oid			rettype = procStruct->prorettype;
213 			HeapTuple	rvTypeTup;
214 			Form_pg_type rvTypeStruct;
215 
216 			rvTypeTup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(rettype));
217 			if (!HeapTupleIsValid(rvTypeTup))
218 				elog(ERROR, "cache lookup failed for type %u", rettype);
219 			rvTypeStruct = (Form_pg_type) GETSTRUCT(rvTypeTup);
220 
221 			/* Disallow pseudotype result, except for void or record */
222 			if (rvTypeStruct->typtype == TYPTYPE_PSEUDO)
223 			{
224 				if (rettype == VOIDOID ||
225 					rettype == RECORDOID)
226 					 /* okay */ ;
227 				else if (rettype == TRIGGEROID || rettype == EVTTRIGGEROID)
228 					ereport(ERROR,
229 							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
230 							 errmsg("trigger functions can only be called as triggers")));
231 				else
232 					ereport(ERROR,
233 							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
234 							 errmsg("PL/Python functions cannot return type %s",
235 									format_type_be(rettype))));
236 			}
237 
238 			/* set up output function for procedure result */
239 			PLy_output_setup_func(&proc->result, proc->mcxt,
240 								  rettype, -1, proc);
241 
242 			ReleaseSysCache(rvTypeTup);
243 		}
244 		else
245 		{
246 			/*
247 			 * In a trigger function, we use proc->result and proc->result_in
248 			 * for converting tuples, but we don't yet have enough info to set
249 			 * them up.  PLy_exec_trigger will deal with it.
250 			 */
251 			proc->result.typoid = InvalidOid;
252 			proc->result_in.typoid = InvalidOid;
253 		}
254 
255 		/*
256 		 * Now get information required for input conversion of the
257 		 * procedure's arguments.  Note that we ignore output arguments here.
258 		 * If the function returns record, those I/O functions will be set up
259 		 * when the function is first called.
260 		 */
261 		if (procStruct->pronargs)
262 		{
263 			Oid		   *types;
264 			char	  **names,
265 					   *modes;
266 			int			pos,
267 						total;
268 
269 			/* extract argument type info from the pg_proc tuple */
270 			total = get_func_arg_info(procTup, &types, &names, &modes);
271 
272 			/* count number of in+inout args into proc->nargs */
273 			if (modes == NULL)
274 				proc->nargs = total;
275 			else
276 			{
277 				/* proc->nargs was initialized to 0 above */
278 				for (i = 0; i < total; i++)
279 				{
280 					if (modes[i] != PROARGMODE_OUT &&
281 						modes[i] != PROARGMODE_TABLE)
282 						(proc->nargs)++;
283 				}
284 			}
285 
286 			/* Allocate arrays for per-input-argument data */
287 			proc->argnames = (char **) palloc0(sizeof(char *) * proc->nargs);
288 			proc->args = (PLyDatumToOb *) palloc0(sizeof(PLyDatumToOb) * proc->nargs);
289 
290 			for (i = pos = 0; i < total; i++)
291 			{
292 				HeapTuple	argTypeTup;
293 				Form_pg_type argTypeStruct;
294 
295 				if (modes &&
296 					(modes[i] == PROARGMODE_OUT ||
297 					 modes[i] == PROARGMODE_TABLE))
298 					continue;	/* skip OUT arguments */
299 
300 				Assert(types[i] == procStruct->proargtypes.values[pos]);
301 
302 				argTypeTup = SearchSysCache1(TYPEOID,
303 											 ObjectIdGetDatum(types[i]));
304 				if (!HeapTupleIsValid(argTypeTup))
305 					elog(ERROR, "cache lookup failed for type %u", types[i]);
306 				argTypeStruct = (Form_pg_type) GETSTRUCT(argTypeTup);
307 
308 				/* disallow pseudotype arguments */
309 				if (argTypeStruct->typtype == TYPTYPE_PSEUDO)
310 					ereport(ERROR,
311 							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
312 							 errmsg("PL/Python functions cannot accept type %s",
313 									format_type_be(types[i]))));
314 
315 				/* set up I/O function info */
316 				PLy_input_setup_func(&proc->args[pos], proc->mcxt,
317 									 types[i], -1,	/* typmod not known */
318 									 proc);
319 
320 				/* get argument name */
321 				proc->argnames[pos] = names ? pstrdup(names[i]) : NULL;
322 
323 				ReleaseSysCache(argTypeTup);
324 
325 				pos++;
326 			}
327 		}
328 
329 		/*
330 		 * get the text of the function.
331 		 */
332 		prosrcdatum = SysCacheGetAttr(PROCOID, procTup,
333 									  Anum_pg_proc_prosrc, &isnull);
334 		if (isnull)
335 			elog(ERROR, "null prosrc");
336 		procSource = TextDatumGetCString(prosrcdatum);
337 
338 		PLy_procedure_compile(proc, procSource);
339 
340 		pfree(procSource);
341 	}
342 	PG_CATCH();
343 	{
344 		MemoryContextSwitchTo(oldcxt);
345 		PLy_procedure_delete(proc);
346 		PG_RE_THROW();
347 	}
348 	PG_END_TRY();
349 
350 	MemoryContextSwitchTo(oldcxt);
351 	return proc;
352 }
353 
354 /*
355  * Insert the procedure into the Python interpreter
356  */
357 void
PLy_procedure_compile(PLyProcedure * proc,const char * src)358 PLy_procedure_compile(PLyProcedure *proc, const char *src)
359 {
360 	PyObject   *crv = NULL;
361 	char	   *msrc;
362 
363 	proc->globals = PyDict_Copy(PLy_interp_globals);
364 
365 	/*
366 	 * SD is private preserved data between calls. GD is global data shared by
367 	 * all functions
368 	 */
369 	proc->statics = PyDict_New();
370 	if (!proc->statics)
371 		PLy_elog(ERROR, NULL);
372 	PyDict_SetItemString(proc->globals, "SD", proc->statics);
373 
374 	/*
375 	 * insert the function code into the interpreter
376 	 */
377 	msrc = PLy_procedure_munge_source(proc->pyname, src);
378 	/* Save the mangled source for later inclusion in tracebacks */
379 	proc->src = MemoryContextStrdup(proc->mcxt, msrc);
380 	crv = PyRun_String(msrc, Py_file_input, proc->globals, NULL);
381 	pfree(msrc);
382 
383 	if (crv != NULL)
384 	{
385 		int			clen;
386 		char		call[NAMEDATALEN + 256];
387 
388 		Py_DECREF(crv);
389 
390 		/*
391 		 * compile a call to the function
392 		 */
393 		clen = snprintf(call, sizeof(call), "%s()", proc->pyname);
394 		if (clen < 0 || clen >= sizeof(call))
395 			elog(ERROR, "string would overflow buffer");
396 		proc->code = Py_CompileString(call, "<string>", Py_eval_input);
397 		if (proc->code != NULL)
398 			return;
399 	}
400 
401 	if (proc->proname)
402 		PLy_elog(ERROR, "could not compile PL/Python function \"%s\"",
403 				 proc->proname);
404 	else
405 		PLy_elog(ERROR, "could not compile anonymous PL/Python code block");
406 }
407 
408 void
PLy_procedure_delete(PLyProcedure * proc)409 PLy_procedure_delete(PLyProcedure *proc)
410 {
411 	Py_XDECREF(proc->code);
412 	Py_XDECREF(proc->statics);
413 	Py_XDECREF(proc->globals);
414 	MemoryContextDelete(proc->mcxt);
415 }
416 
417 /*
418  * Decide whether a cached PLyProcedure struct is still valid
419  */
420 static bool
PLy_procedure_valid(PLyProcedure * proc,HeapTuple procTup)421 PLy_procedure_valid(PLyProcedure *proc, HeapTuple procTup)
422 {
423 	if (proc == NULL)
424 		return false;
425 
426 	/* If the pg_proc tuple has changed, it's not valid */
427 	if (!(proc->fn_xmin == HeapTupleHeaderGetRawXmin(procTup->t_data) &&
428 		  ItemPointerEquals(&proc->fn_tid, &procTup->t_self)))
429 		return false;
430 
431 	return true;
432 }
433 
434 static char *
PLy_procedure_munge_source(const char * name,const char * src)435 PLy_procedure_munge_source(const char *name, const char *src)
436 {
437 	char	   *mrc,
438 			   *mp;
439 	const char *sp;
440 	size_t		mlen;
441 	int			plen;
442 
443 	/*
444 	 * room for function source and the def statement
445 	 */
446 	mlen = (strlen(src) * 2) + strlen(name) + 16;
447 
448 	mrc = palloc(mlen);
449 	plen = snprintf(mrc, mlen, "def %s():\n\t", name);
450 	Assert(plen >= 0 && plen < mlen);
451 
452 	sp = src;
453 	mp = mrc + plen;
454 
455 	while (*sp != '\0')
456 	{
457 		if (*sp == '\r' && *(sp + 1) == '\n')
458 			sp++;
459 
460 		if (*sp == '\n' || *sp == '\r')
461 		{
462 			*mp++ = '\n';
463 			*mp++ = '\t';
464 			sp++;
465 		}
466 		else
467 			*mp++ = *sp++;
468 	}
469 	*mp++ = '\n';
470 	*mp++ = '\n';
471 	*mp = '\0';
472 
473 	if (mp > (mrc + mlen))
474 		elog(FATAL, "buffer overrun in PLy_munge_source");
475 
476 	return mrc;
477 }
478