1 /*-------------------------------------------------------------------------
2  *
3  * hashvalidate.c
4  *	  Opclass validator for hash.
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  * IDENTIFICATION
10  *	  src/backend/access/hash/hashvalidate.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15 
16 #include "access/amvalidate.h"
17 #include "access/hash.h"
18 #include "access/htup_details.h"
19 #include "access/xact.h"
20 #include "catalog/pg_am.h"
21 #include "catalog/pg_amop.h"
22 #include "catalog/pg_amproc.h"
23 #include "catalog/pg_opclass.h"
24 #include "catalog/pg_opfamily.h"
25 #include "catalog/pg_proc.h"
26 #include "catalog/pg_type.h"
27 #include "parser/parse_coerce.h"
28 #include "utils/builtins.h"
29 #include "utils/fmgroids.h"
30 #include "utils/lsyscache.h"
31 #include "utils/regproc.h"
32 #include "utils/syscache.h"
33 
34 
35 static bool check_hash_func_signature(Oid funcid, int16 amprocnum, Oid argtype);
36 
37 
38 /*
39  * Validator for a hash opclass.
40  *
41  * Some of the checks done here cover the whole opfamily, and therefore are
42  * redundant when checking each opclass in a family.  But they don't run long
43  * enough to be much of a problem, so we accept the duplication rather than
44  * complicate the amvalidate API.
45  */
46 bool
hashvalidate(Oid opclassoid)47 hashvalidate(Oid opclassoid)
48 {
49 	bool		result = true;
50 	HeapTuple	classtup;
51 	Form_pg_opclass classform;
52 	Oid			opfamilyoid;
53 	Oid			opcintype;
54 	char	   *opclassname;
55 	HeapTuple	familytup;
56 	Form_pg_opfamily familyform;
57 	char	   *opfamilyname;
58 	CatCList   *proclist,
59 			   *oprlist;
60 	List	   *grouplist;
61 	OpFamilyOpFuncGroup *opclassgroup;
62 	List	   *hashabletypes = NIL;
63 	int			i;
64 	ListCell   *lc;
65 
66 	/* Fetch opclass information */
67 	classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclassoid));
68 	if (!HeapTupleIsValid(classtup))
69 		elog(ERROR, "cache lookup failed for operator class %u", opclassoid);
70 	classform = (Form_pg_opclass) GETSTRUCT(classtup);
71 
72 	opfamilyoid = classform->opcfamily;
73 	opcintype = classform->opcintype;
74 	opclassname = NameStr(classform->opcname);
75 
76 	/* Fetch opfamily information */
77 	familytup = SearchSysCache1(OPFAMILYOID, ObjectIdGetDatum(opfamilyoid));
78 	if (!HeapTupleIsValid(familytup))
79 		elog(ERROR, "cache lookup failed for operator family %u", opfamilyoid);
80 	familyform = (Form_pg_opfamily) GETSTRUCT(familytup);
81 
82 	opfamilyname = NameStr(familyform->opfname);
83 
84 	/* Fetch all operators and support functions of the opfamily */
85 	oprlist = SearchSysCacheList1(AMOPSTRATEGY, ObjectIdGetDatum(opfamilyoid));
86 	proclist = SearchSysCacheList1(AMPROCNUM, ObjectIdGetDatum(opfamilyoid));
87 
88 	/* Check individual support functions */
89 	for (i = 0; i < proclist->n_members; i++)
90 	{
91 		HeapTuple	proctup = &proclist->members[i]->tuple;
92 		Form_pg_amproc procform = (Form_pg_amproc) GETSTRUCT(proctup);
93 
94 		/*
95 		 * All hash functions should be registered with matching left/right
96 		 * types
97 		 */
98 		if (procform->amproclefttype != procform->amprocrighttype)
99 		{
100 			ereport(INFO,
101 					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
102 					 errmsg("operator family \"%s\" of access method %s contains support function %s with different left and right input types",
103 							opfamilyname, "hash",
104 							format_procedure(procform->amproc))));
105 			result = false;
106 		}
107 
108 		/* Check procedure numbers and function signatures */
109 		switch (procform->amprocnum)
110 		{
111 			case HASHSTANDARD_PROC:
112 			case HASHEXTENDED_PROC:
113 				if (!check_hash_func_signature(procform->amproc, procform->amprocnum,
114 											   procform->amproclefttype))
115 				{
116 					ereport(INFO,
117 							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
118 							 errmsg("operator family \"%s\" of access method %s contains function %s with wrong signature for support number %d",
119 									opfamilyname, "hash",
120 									format_procedure(procform->amproc),
121 									procform->amprocnum)));
122 					result = false;
123 				}
124 				else
125 				{
126 					/* Remember which types we can hash */
127 					hashabletypes =
128 						list_append_unique_oid(hashabletypes,
129 											   procform->amproclefttype);
130 				}
131 				break;
132 			case HASHOPTIONS_PROC:
133 				if (!check_amoptsproc_signature(procform->amproc))
134 					result = false;
135 				break;
136 			default:
137 				ereport(INFO,
138 						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
139 						 errmsg("operator family \"%s\" of access method %s contains function %s with invalid support number %d",
140 								opfamilyname, "hash",
141 								format_procedure(procform->amproc),
142 								procform->amprocnum)));
143 				result = false;
144 				break;
145 		}
146 	}
147 
148 	/* Check individual operators */
149 	for (i = 0; i < oprlist->n_members; i++)
150 	{
151 		HeapTuple	oprtup = &oprlist->members[i]->tuple;
152 		Form_pg_amop oprform = (Form_pg_amop) GETSTRUCT(oprtup);
153 
154 		/* Check that only allowed strategy numbers exist */
155 		if (oprform->amopstrategy < 1 ||
156 			oprform->amopstrategy > HTMaxStrategyNumber)
157 		{
158 			ereport(INFO,
159 					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
160 					 errmsg("operator family \"%s\" of access method %s contains operator %s with invalid strategy number %d",
161 							opfamilyname, "hash",
162 							format_operator(oprform->amopopr),
163 							oprform->amopstrategy)));
164 			result = false;
165 		}
166 
167 		/* hash doesn't support ORDER BY operators */
168 		if (oprform->amoppurpose != AMOP_SEARCH ||
169 			OidIsValid(oprform->amopsortfamily))
170 		{
171 			ereport(INFO,
172 					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
173 					 errmsg("operator family \"%s\" of access method %s contains invalid ORDER BY specification for operator %s",
174 							opfamilyname, "hash",
175 							format_operator(oprform->amopopr))));
176 			result = false;
177 		}
178 
179 		/* Check operator signature --- same for all hash strategies */
180 		if (!check_amop_signature(oprform->amopopr, BOOLOID,
181 								  oprform->amoplefttype,
182 								  oprform->amoprighttype))
183 		{
184 			ereport(INFO,
185 					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
186 					 errmsg("operator family \"%s\" of access method %s contains operator %s with wrong signature",
187 							opfamilyname, "hash",
188 							format_operator(oprform->amopopr))));
189 			result = false;
190 		}
191 
192 		/* There should be relevant hash functions for each datatype */
193 		if (!list_member_oid(hashabletypes, oprform->amoplefttype) ||
194 			!list_member_oid(hashabletypes, oprform->amoprighttype))
195 		{
196 			ereport(INFO,
197 					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
198 					 errmsg("operator family \"%s\" of access method %s lacks support function for operator %s",
199 							opfamilyname, "hash",
200 							format_operator(oprform->amopopr))));
201 			result = false;
202 		}
203 	}
204 
205 	/* Now check for inconsistent groups of operators/functions */
206 	grouplist = identify_opfamily_groups(oprlist, proclist);
207 	opclassgroup = NULL;
208 	foreach(lc, grouplist)
209 	{
210 		OpFamilyOpFuncGroup *thisgroup = (OpFamilyOpFuncGroup *) lfirst(lc);
211 
212 		/* Remember the group exactly matching the test opclass */
213 		if (thisgroup->lefttype == opcintype &&
214 			thisgroup->righttype == opcintype)
215 			opclassgroup = thisgroup;
216 
217 		/*
218 		 * Complain if there seems to be an incomplete set of operators for
219 		 * this datatype pair (implying that we have a hash function but no
220 		 * operator).
221 		 */
222 		if (thisgroup->operatorset != (1 << HTEqualStrategyNumber))
223 		{
224 			ereport(INFO,
225 					(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
226 					 errmsg("operator family \"%s\" of access method %s is missing operator(s) for types %s and %s",
227 							opfamilyname, "hash",
228 							format_type_be(thisgroup->lefttype),
229 							format_type_be(thisgroup->righttype))));
230 			result = false;
231 		}
232 	}
233 
234 	/* Check that the originally-named opclass is supported */
235 	/* (if group is there, we already checked it adequately above) */
236 	if (!opclassgroup)
237 	{
238 		ereport(INFO,
239 				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
240 				 errmsg("operator class \"%s\" of access method %s is missing operator(s)",
241 						opclassname, "hash")));
242 		result = false;
243 	}
244 
245 	/*
246 	 * Complain if the opfamily doesn't have entries for all possible
247 	 * combinations of its supported datatypes.  While missing cross-type
248 	 * operators are not fatal, it seems reasonable to insist that all
249 	 * built-in hash opfamilies be complete.
250 	 */
251 	if (list_length(grouplist) !=
252 		list_length(hashabletypes) * list_length(hashabletypes))
253 	{
254 		ereport(INFO,
255 				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
256 				 errmsg("operator family \"%s\" of access method %s is missing cross-type operator(s)",
257 						opfamilyname, "hash")));
258 		result = false;
259 	}
260 
261 	ReleaseCatCacheList(proclist);
262 	ReleaseCatCacheList(oprlist);
263 	ReleaseSysCache(familytup);
264 	ReleaseSysCache(classtup);
265 
266 	return result;
267 }
268 
269 
270 /*
271  * We need a custom version of check_amproc_signature because of assorted
272  * hacks in the core hash opclass definitions.
273  */
274 static bool
check_hash_func_signature(Oid funcid,int16 amprocnum,Oid argtype)275 check_hash_func_signature(Oid funcid, int16 amprocnum, Oid argtype)
276 {
277 	bool		result = true;
278 	Oid			restype;
279 	int16		nargs;
280 	HeapTuple	tp;
281 	Form_pg_proc procform;
282 
283 	switch (amprocnum)
284 	{
285 		case HASHSTANDARD_PROC:
286 			restype = INT4OID;
287 			nargs = 1;
288 			break;
289 
290 		case HASHEXTENDED_PROC:
291 			restype = INT8OID;
292 			nargs = 2;
293 			break;
294 
295 		default:
296 			elog(ERROR, "invalid amprocnum");
297 	}
298 
299 	tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
300 	if (!HeapTupleIsValid(tp))
301 		elog(ERROR, "cache lookup failed for function %u", funcid);
302 	procform = (Form_pg_proc) GETSTRUCT(tp);
303 
304 	if (procform->prorettype != restype || procform->proretset ||
305 		procform->pronargs != nargs)
306 		result = false;
307 
308 	if (!IsBinaryCoercible(argtype, procform->proargtypes.values[0]))
309 	{
310 		/*
311 		 * Some of the built-in hash opclasses cheat by using hash functions
312 		 * that are different from but physically compatible with the opclass
313 		 * datatype.  In some of these cases, even a "binary coercible" check
314 		 * fails because there's no relevant cast.  For the moment, fix it by
315 		 * having a list of allowed cases.  Test the specific function
316 		 * identity, not just its input type, because hashvarlena() takes
317 		 * INTERNAL and allowing any such function seems too scary.
318 		 */
319 		if ((funcid == F_HASHINT4 || funcid == F_HASHINT4EXTENDED) &&
320 			(argtype == DATEOID ||
321 			 argtype == XIDOID || argtype == CIDOID))
322 			 /* okay, allowed use of hashint4() */ ;
323 		else if ((funcid == F_HASHINT8 || funcid == F_HASHINT8EXTENDED) &&
324 				 (argtype == XID8OID))
325 			 /* okay, allowed use of hashint8() */ ;
326 		else if ((funcid == F_TIMESTAMP_HASH ||
327 				  funcid == F_TIMESTAMP_HASH_EXTENDED) &&
328 				 argtype == TIMESTAMPTZOID)
329 			 /* okay, allowed use of timestamp_hash() */ ;
330 		else if ((funcid == F_HASHCHAR || funcid == F_HASHCHAREXTENDED) &&
331 				 argtype == BOOLOID)
332 			 /* okay, allowed use of hashchar() */ ;
333 		else if ((funcid == F_HASHVARLENA || funcid == F_HASHVARLENAEXTENDED) &&
334 				 argtype == BYTEAOID)
335 			 /* okay, allowed use of hashvarlena() */ ;
336 		else
337 			result = false;
338 	}
339 
340 	/* If function takes a second argument, it must be for a 64-bit salt. */
341 	if (nargs == 2 && procform->proargtypes.values[1] != INT8OID)
342 		result = false;
343 
344 	ReleaseSysCache(tp);
345 	return result;
346 }
347 
348 /*
349  * Prechecking function for adding operators/functions to a hash opfamily.
350  */
351 void
hashadjustmembers(Oid opfamilyoid,Oid opclassoid,List * operators,List * functions)352 hashadjustmembers(Oid opfamilyoid,
353 				  Oid opclassoid,
354 				  List *operators,
355 				  List *functions)
356 {
357 	Oid			opcintype;
358 	ListCell   *lc;
359 
360 	/*
361 	 * Hash operators and required support functions are always "loose"
362 	 * members of the opfamily if they are cross-type.  If they are not
363 	 * cross-type, we prefer to tie them to the appropriate opclass ... but if
364 	 * the user hasn't created one, we can't do that, and must fall back to
365 	 * using the opfamily dependency.  (We mustn't force creation of an
366 	 * opclass in such a case, as leaving an incomplete opclass laying about
367 	 * would be bad.  Throwing an error is another undesirable alternative.)
368 	 *
369 	 * This behavior results in a bit of a dump/reload hazard, in that the
370 	 * order of restoring objects could affect what dependencies we end up
371 	 * with.  pg_dump's existing behavior will preserve the dependency choices
372 	 * in most cases, but not if a cross-type operator has been bound tightly
373 	 * into an opclass.  That's a mistake anyway, so silently "fixing" it
374 	 * isn't awful.
375 	 *
376 	 * Optional support functions are always "loose" family members.
377 	 *
378 	 * To avoid repeated lookups, we remember the most recently used opclass's
379 	 * input type.
380 	 */
381 	if (OidIsValid(opclassoid))
382 	{
383 		/* During CREATE OPERATOR CLASS, need CCI to see the pg_opclass row */
384 		CommandCounterIncrement();
385 		opcintype = get_opclass_input_type(opclassoid);
386 	}
387 	else
388 		opcintype = InvalidOid;
389 
390 	/*
391 	 * We handle operators and support functions almost identically, so rather
392 	 * than duplicate this code block, just join the lists.
393 	 */
394 	foreach(lc, list_concat_copy(operators, functions))
395 	{
396 		OpFamilyMember *op = (OpFamilyMember *) lfirst(lc);
397 
398 		if (op->is_func && op->number != HASHSTANDARD_PROC)
399 		{
400 			/* Optional support proc, so always a soft family dependency */
401 			op->ref_is_hard = false;
402 			op->ref_is_family = true;
403 			op->refobjid = opfamilyoid;
404 		}
405 		else if (op->lefttype != op->righttype)
406 		{
407 			/* Cross-type, so always a soft family dependency */
408 			op->ref_is_hard = false;
409 			op->ref_is_family = true;
410 			op->refobjid = opfamilyoid;
411 		}
412 		else
413 		{
414 			/* Not cross-type; is there a suitable opclass? */
415 			if (op->lefttype != opcintype)
416 			{
417 				/* Avoid repeating this expensive lookup, even if it fails */
418 				opcintype = op->lefttype;
419 				opclassoid = opclass_for_family_datatype(HASH_AM_OID,
420 														 opfamilyoid,
421 														 opcintype);
422 			}
423 			if (OidIsValid(opclassoid))
424 			{
425 				/* Hard dependency on opclass */
426 				op->ref_is_hard = true;
427 				op->ref_is_family = false;
428 				op->refobjid = opclassoid;
429 			}
430 			else
431 			{
432 				/* We're stuck, so make a soft dependency on the opfamily */
433 				op->ref_is_hard = false;
434 				op->ref_is_family = true;
435 				op->refobjid = opfamilyoid;
436 			}
437 		}
438 	}
439 }
440