1 /*-------------------------------------------------------------------------
2  *
3  * kwlookup.c
4  *	  Key word lookup for PostgreSQL
5  *
6  *
7  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  *
11  * IDENTIFICATION
12  *	  src/common/kwlookup.c
13  *
14  *-------------------------------------------------------------------------
15  */
16 #include "c.h"
17 
18 #include "common/kwlookup.h"
19 
20 
21 /*
22  * ScanKeywordLookup - see if a given word is a keyword
23  *
24  * The list of keywords to be matched against is passed as a ScanKeywordList.
25  *
26  * Returns the keyword number (0..N-1) of the keyword, or -1 if no match.
27  * Callers typically use the keyword number to index into information
28  * arrays, but that is no concern of this code.
29  *
30  * The match is done case-insensitively.  Note that we deliberately use a
31  * dumbed-down case conversion that will only translate 'A'-'Z' into 'a'-'z',
32  * even if we are in a locale where tolower() would produce more or different
33  * translations.  This is to conform to the SQL99 spec, which says that
34  * keywords are to be matched in this way even though non-keyword identifiers
35  * receive a different case-normalization mapping.
36  */
37 int
ScanKeywordLookup(const char * str,const ScanKeywordList * keywords)38 ScanKeywordLookup(const char *str,
39 				  const ScanKeywordList *keywords)
40 {
41 	size_t		len;
42 	int			h;
43 	const char *kw;
44 
45 	/*
46 	 * Reject immediately if too long to be any keyword.  This saves useless
47 	 * hashing and downcasing work on long strings.
48 	 */
49 	len = strlen(str);
50 	if (len > keywords->max_kw_len)
51 		return -1;
52 
53 	/*
54 	 * Compute the hash function.  We assume it was generated to produce
55 	 * case-insensitive results.  Since it's a perfect hash, we need only
56 	 * match to the specific keyword it identifies.
57 	 */
58 	h = keywords->hash(str, len);
59 
60 	/* An out-of-range result implies no match */
61 	if (h < 0 || h >= keywords->num_keywords)
62 		return -1;
63 
64 	/*
65 	 * Compare character-by-character to see if we have a match, applying an
66 	 * ASCII-only downcasing to the input characters.  We must not use
67 	 * tolower() since it may produce the wrong translation in some locales
68 	 * (eg, Turkish).
69 	 */
70 	kw = GetScanKeyword(h, keywords);
71 	while (*str != '\0')
72 	{
73 		char		ch = *str++;
74 
75 		if (ch >= 'A' && ch <= 'Z')
76 			ch += 'a' - 'A';
77 		if (ch != *kw++)
78 			return -1;
79 	}
80 	if (*kw != '\0')
81 		return -1;
82 
83 	/* Success! */
84 	return h;
85 }
86