1 /*-------------------------------------------------------------------------
2  *
3  * like.c
4  *	  like expression handling code.
5  *
6  *	 NOTES
7  *		A big hack of the regexp.c code!! Contributed by
8  *		Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
9  *
10  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
11  * Portions Copyright (c) 1994, Regents of the University of California
12  *
13  * IDENTIFICATION
14  *	src/backend/utils/adt/like.c
15  *
16  *-------------------------------------------------------------------------
17  */
18 #include "postgres.h"
19 
20 #include <ctype.h>
21 
22 #include "catalog/pg_collation.h"
23 #include "mb/pg_wchar.h"
24 #include "miscadmin.h"
25 #include "utils/builtins.h"
26 #include "utils/pg_locale.h"
27 
28 
29 #define LIKE_TRUE						1
30 #define LIKE_FALSE						0
31 #define LIKE_ABORT						(-1)
32 
33 
34 static int	SB_MatchText(const char *t, int tlen, const char *p, int plen,
35 						 pg_locale_t locale, bool locale_is_c);
36 static text *SB_do_like_escape(text *, text *);
37 
38 static int	MB_MatchText(const char *t, int tlen, const char *p, int plen,
39 						 pg_locale_t locale, bool locale_is_c);
40 static text *MB_do_like_escape(text *, text *);
41 
42 static int	UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
43 						   pg_locale_t locale, bool locale_is_c);
44 
45 static int	SB_IMatchText(const char *t, int tlen, const char *p, int plen,
46 						  pg_locale_t locale, bool locale_is_c);
47 
48 static int	GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
49 static int	Generic_Text_IC_like(text *str, text *pat, Oid collation);
50 
51 /*--------------------
52  * Support routine for MatchText. Compares given multibyte streams
53  * as wide characters. If they match, returns 1 otherwise returns 0.
54  *--------------------
55  */
56 static inline int
wchareq(const char * p1,const char * p2)57 wchareq(const char *p1, const char *p2)
58 {
59 	int			p1_len;
60 
61 	/* Optimization:  quickly compare the first byte. */
62 	if (*p1 != *p2)
63 		return 0;
64 
65 	p1_len = pg_mblen(p1);
66 	if (pg_mblen(p2) != p1_len)
67 		return 0;
68 
69 	/* They are the same length */
70 	while (p1_len--)
71 	{
72 		if (*p1++ != *p2++)
73 			return 0;
74 	}
75 	return 1;
76 }
77 
78 /*
79  * Formerly we had a routine iwchareq() here that tried to do case-insensitive
80  * comparison of multibyte characters.  It did not work at all, however,
81  * because it relied on tolower() which has a single-byte API ... and
82  * towlower() wouldn't be much better since we have no suitably cheap way
83  * of getting a single character transformed to the system's wchar_t format.
84  * So now, we just downcase the strings using lower() and apply regular LIKE
85  * comparison.  This should be revisited when we install better locale support.
86  */
87 
88 /*
89  * We do handle case-insensitive matching for single-byte encodings using
90  * fold-on-the-fly processing, however.
91  */
92 static char
SB_lower_char(unsigned char c,pg_locale_t locale,bool locale_is_c)93 SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
94 {
95 	if (locale_is_c)
96 		return pg_ascii_tolower(c);
97 #ifdef HAVE_LOCALE_T
98 	else if (locale)
99 		return tolower_l(c, locale->info.lt);
100 #endif
101 	else
102 		return pg_tolower(c);
103 }
104 
105 
106 #define NextByte(p, plen)	((p)++, (plen)--)
107 
108 /* Set up to compile like_match.c for multibyte characters */
109 #define CHAREQ(p1, p2) wchareq((p1), (p2))
110 #define NextChar(p, plen) \
111 	do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
112 #define CopyAdvChar(dst, src, srclen) \
113 	do { int __l = pg_mblen(src); \
114 		 (srclen) -= __l; \
115 		 while (__l-- > 0) \
116 			 *(dst)++ = *(src)++; \
117 	   } while (0)
118 
119 #define MatchText	MB_MatchText
120 #define do_like_escape	MB_do_like_escape
121 
122 #include "like_match.c"
123 
124 /* Set up to compile like_match.c for single-byte characters */
125 #define CHAREQ(p1, p2) (*(p1) == *(p2))
126 #define NextChar(p, plen) NextByte((p), (plen))
127 #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
128 
129 #define MatchText	SB_MatchText
130 #define do_like_escape	SB_do_like_escape
131 
132 #include "like_match.c"
133 
134 /* setup to compile like_match.c for single byte case insensitive matches */
135 #define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
136 #define NextChar(p, plen) NextByte((p), (plen))
137 #define MatchText SB_IMatchText
138 
139 #include "like_match.c"
140 
141 /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
142 
143 #define NextChar(p, plen) \
144 	do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
145 #define MatchText	UTF8_MatchText
146 
147 #include "like_match.c"
148 
149 /* Generic for all cases not requiring inline case-folding */
150 static inline int
GenericMatchText(const char * s,int slen,const char * p,int plen,Oid collation)151 GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
152 {
153 	if (collation && !lc_ctype_is_c(collation) && collation != DEFAULT_COLLATION_OID)
154 	{
155 		pg_locale_t locale = pg_newlocale_from_collation(collation);
156 
157 		if (locale && !locale->deterministic)
158 			ereport(ERROR,
159 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
160 					 errmsg("nondeterministic collations are not supported for LIKE")));
161 	}
162 
163 	if (pg_database_encoding_max_length() == 1)
164 		return SB_MatchText(s, slen, p, plen, 0, true);
165 	else if (GetDatabaseEncoding() == PG_UTF8)
166 		return UTF8_MatchText(s, slen, p, plen, 0, true);
167 	else
168 		return MB_MatchText(s, slen, p, plen, 0, true);
169 }
170 
171 static inline int
Generic_Text_IC_like(text * str,text * pat,Oid collation)172 Generic_Text_IC_like(text *str, text *pat, Oid collation)
173 {
174 	char	   *s,
175 			   *p;
176 	int			slen,
177 				plen;
178 	pg_locale_t locale = 0;
179 	bool		locale_is_c = false;
180 
181 	if (lc_ctype_is_c(collation))
182 		locale_is_c = true;
183 	else if (collation != DEFAULT_COLLATION_OID)
184 	{
185 		if (!OidIsValid(collation))
186 		{
187 			/*
188 			 * This typically means that the parser could not resolve a
189 			 * conflict of implicit collations, so report it that way.
190 			 */
191 			ereport(ERROR,
192 					(errcode(ERRCODE_INDETERMINATE_COLLATION),
193 					 errmsg("could not determine which collation to use for ILIKE"),
194 					 errhint("Use the COLLATE clause to set the collation explicitly.")));
195 		}
196 		locale = pg_newlocale_from_collation(collation);
197 
198 		if (locale && !locale->deterministic)
199 			ereport(ERROR,
200 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
201 					 errmsg("nondeterministic collations are not supported for ILIKE")));
202 	}
203 
204 	/*
205 	 * For efficiency reasons, in the single byte case we don't call lower()
206 	 * on the pattern and text, but instead call SB_lower_char on each
207 	 * character.  In the multi-byte case we don't have much choice :-(. Also,
208 	 * ICU does not support single-character case folding, so we go the long
209 	 * way.
210 	 */
211 
212 	if (pg_database_encoding_max_length() > 1 || (locale && locale->provider == COLLPROVIDER_ICU))
213 	{
214 		pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
215 													 PointerGetDatum(pat)));
216 		p = VARDATA_ANY(pat);
217 		plen = VARSIZE_ANY_EXHDR(pat);
218 		str = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
219 													 PointerGetDatum(str)));
220 		s = VARDATA_ANY(str);
221 		slen = VARSIZE_ANY_EXHDR(str);
222 		if (GetDatabaseEncoding() == PG_UTF8)
223 			return UTF8_MatchText(s, slen, p, plen, 0, true);
224 		else
225 			return MB_MatchText(s, slen, p, plen, 0, true);
226 	}
227 	else
228 	{
229 		p = VARDATA_ANY(pat);
230 		plen = VARSIZE_ANY_EXHDR(pat);
231 		s = VARDATA_ANY(str);
232 		slen = VARSIZE_ANY_EXHDR(str);
233 		return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
234 	}
235 }
236 
237 /*
238  *	interface routines called by the function manager
239  */
240 
241 Datum
namelike(PG_FUNCTION_ARGS)242 namelike(PG_FUNCTION_ARGS)
243 {
244 	Name		str = PG_GETARG_NAME(0);
245 	text	   *pat = PG_GETARG_TEXT_PP(1);
246 	bool		result;
247 	char	   *s,
248 			   *p;
249 	int			slen,
250 				plen;
251 
252 	s = NameStr(*str);
253 	slen = strlen(s);
254 	p = VARDATA_ANY(pat);
255 	plen = VARSIZE_ANY_EXHDR(pat);
256 
257 	result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
258 
259 	PG_RETURN_BOOL(result);
260 }
261 
262 Datum
namenlike(PG_FUNCTION_ARGS)263 namenlike(PG_FUNCTION_ARGS)
264 {
265 	Name		str = PG_GETARG_NAME(0);
266 	text	   *pat = PG_GETARG_TEXT_PP(1);
267 	bool		result;
268 	char	   *s,
269 			   *p;
270 	int			slen,
271 				plen;
272 
273 	s = NameStr(*str);
274 	slen = strlen(s);
275 	p = VARDATA_ANY(pat);
276 	plen = VARSIZE_ANY_EXHDR(pat);
277 
278 	result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
279 
280 	PG_RETURN_BOOL(result);
281 }
282 
283 Datum
textlike(PG_FUNCTION_ARGS)284 textlike(PG_FUNCTION_ARGS)
285 {
286 	text	   *str = PG_GETARG_TEXT_PP(0);
287 	text	   *pat = PG_GETARG_TEXT_PP(1);
288 	bool		result;
289 	char	   *s,
290 			   *p;
291 	int			slen,
292 				plen;
293 
294 	s = VARDATA_ANY(str);
295 	slen = VARSIZE_ANY_EXHDR(str);
296 	p = VARDATA_ANY(pat);
297 	plen = VARSIZE_ANY_EXHDR(pat);
298 
299 	result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
300 
301 	PG_RETURN_BOOL(result);
302 }
303 
304 Datum
textnlike(PG_FUNCTION_ARGS)305 textnlike(PG_FUNCTION_ARGS)
306 {
307 	text	   *str = PG_GETARG_TEXT_PP(0);
308 	text	   *pat = PG_GETARG_TEXT_PP(1);
309 	bool		result;
310 	char	   *s,
311 			   *p;
312 	int			slen,
313 				plen;
314 
315 	s = VARDATA_ANY(str);
316 	slen = VARSIZE_ANY_EXHDR(str);
317 	p = VARDATA_ANY(pat);
318 	plen = VARSIZE_ANY_EXHDR(pat);
319 
320 	result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
321 
322 	PG_RETURN_BOOL(result);
323 }
324 
325 Datum
bytealike(PG_FUNCTION_ARGS)326 bytealike(PG_FUNCTION_ARGS)
327 {
328 	bytea	   *str = PG_GETARG_BYTEA_PP(0);
329 	bytea	   *pat = PG_GETARG_BYTEA_PP(1);
330 	bool		result;
331 	char	   *s,
332 			   *p;
333 	int			slen,
334 				plen;
335 
336 	s = VARDATA_ANY(str);
337 	slen = VARSIZE_ANY_EXHDR(str);
338 	p = VARDATA_ANY(pat);
339 	plen = VARSIZE_ANY_EXHDR(pat);
340 
341 	result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
342 
343 	PG_RETURN_BOOL(result);
344 }
345 
346 Datum
byteanlike(PG_FUNCTION_ARGS)347 byteanlike(PG_FUNCTION_ARGS)
348 {
349 	bytea	   *str = PG_GETARG_BYTEA_PP(0);
350 	bytea	   *pat = PG_GETARG_BYTEA_PP(1);
351 	bool		result;
352 	char	   *s,
353 			   *p;
354 	int			slen,
355 				plen;
356 
357 	s = VARDATA_ANY(str);
358 	slen = VARSIZE_ANY_EXHDR(str);
359 	p = VARDATA_ANY(pat);
360 	plen = VARSIZE_ANY_EXHDR(pat);
361 
362 	result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
363 
364 	PG_RETURN_BOOL(result);
365 }
366 
367 /*
368  * Case-insensitive versions
369  */
370 
371 Datum
nameiclike(PG_FUNCTION_ARGS)372 nameiclike(PG_FUNCTION_ARGS)
373 {
374 	Name		str = PG_GETARG_NAME(0);
375 	text	   *pat = PG_GETARG_TEXT_PP(1);
376 	bool		result;
377 	text	   *strtext;
378 
379 	strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
380 												 NameGetDatum(str)));
381 	result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
382 
383 	PG_RETURN_BOOL(result);
384 }
385 
386 Datum
nameicnlike(PG_FUNCTION_ARGS)387 nameicnlike(PG_FUNCTION_ARGS)
388 {
389 	Name		str = PG_GETARG_NAME(0);
390 	text	   *pat = PG_GETARG_TEXT_PP(1);
391 	bool		result;
392 	text	   *strtext;
393 
394 	strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
395 												 NameGetDatum(str)));
396 	result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
397 
398 	PG_RETURN_BOOL(result);
399 }
400 
401 Datum
texticlike(PG_FUNCTION_ARGS)402 texticlike(PG_FUNCTION_ARGS)
403 {
404 	text	   *str = PG_GETARG_TEXT_PP(0);
405 	text	   *pat = PG_GETARG_TEXT_PP(1);
406 	bool		result;
407 
408 	result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
409 
410 	PG_RETURN_BOOL(result);
411 }
412 
413 Datum
texticnlike(PG_FUNCTION_ARGS)414 texticnlike(PG_FUNCTION_ARGS)
415 {
416 	text	   *str = PG_GETARG_TEXT_PP(0);
417 	text	   *pat = PG_GETARG_TEXT_PP(1);
418 	bool		result;
419 
420 	result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
421 
422 	PG_RETURN_BOOL(result);
423 }
424 
425 /*
426  * like_escape() --- given a pattern and an ESCAPE string,
427  * convert the pattern to use Postgres' standard backslash escape convention.
428  */
429 Datum
like_escape(PG_FUNCTION_ARGS)430 like_escape(PG_FUNCTION_ARGS)
431 {
432 	text	   *pat = PG_GETARG_TEXT_PP(0);
433 	text	   *esc = PG_GETARG_TEXT_PP(1);
434 	text	   *result;
435 
436 	if (pg_database_encoding_max_length() == 1)
437 		result = SB_do_like_escape(pat, esc);
438 	else
439 		result = MB_do_like_escape(pat, esc);
440 
441 	PG_RETURN_TEXT_P(result);
442 }
443 
444 /*
445  * like_escape_bytea() --- given a pattern and an ESCAPE string,
446  * convert the pattern to use Postgres' standard backslash escape convention.
447  */
448 Datum
like_escape_bytea(PG_FUNCTION_ARGS)449 like_escape_bytea(PG_FUNCTION_ARGS)
450 {
451 	bytea	   *pat = PG_GETARG_BYTEA_PP(0);
452 	bytea	   *esc = PG_GETARG_BYTEA_PP(1);
453 	bytea	   *result = SB_do_like_escape((text *) pat, (text *) esc);
454 
455 	PG_RETURN_BYTEA_P((bytea *) result);
456 }
457