1 /*-------------------------------------------------------------------------
2 *
3 * scansup.c
4 * scanner support routines used by the core lexer
5 *
6 * Portions Copyright (c) 1996-2021, PgPool Global Development Group
7 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 *
11 * IDENTIFICATION
12 * src/backend/parser/scansup.c
13 *
14 *-------------------------------------------------------------------------
15 */
16 #include "pool_parser.h"
17 #include "utils/palloc.h"
18
19 #include <ctype.h>
20 #include <string.h>
21
22 #include "pg_wchar.h"
23 #include "scansup.h"
24
25
26
27 /*
28 * downcase_truncate_identifier() --- do appropriate downcasing and
29 * truncation of an unquoted identifier. Optionally warn of truncation.
30 *
31 * Returns a palloc'd string containing the adjusted identifier.
32 *
33 * Note: in some usages the passed string is not null-terminated.
34 *
35 * Note: the API of this function is designed to allow for downcasing
36 * transformations that increase the string length, but we don't yet
37 * support that. If you want to implement it, you'll need to fix
38 * SplitIdentifierString() in utils/adt/varlena.c.
39 */
40 char *
downcase_truncate_identifier(const char * ident,int len,bool warn)41 downcase_truncate_identifier(const char *ident, int len, bool warn)
42 {
43 return downcase_identifier(ident, len, warn, true);
44 }
45
46 /*
47 * a workhorse for downcase_truncate_identifier
48 */
49 char *
downcase_identifier(const char * ident,int len,bool warn,bool truncate)50 downcase_identifier(const char *ident, int len, bool warn, bool truncate)
51 {
52 char *result;
53 int i;
54 bool enc_is_single_byte;
55
56 result = palloc(len + 1);
57 enc_is_single_byte = pg_database_encoding_max_length() == 1;
58
59 /*
60 * SQL99 specifies Unicode-aware case normalization, which we don't yet
61 * have the infrastructure for. Instead we use tolower() to provide a
62 * locale-aware translation. However, there are some locales where this
63 * is not right either (eg, Turkish may do strange things with 'i' and
64 * 'I'). Our current compromise is to use tolower() for characters with
65 * the high bit set, as long as they aren't part of a multi-byte
66 * character, and use an ASCII-only downcasing for 7-bit characters.
67 */
68 for (i = 0; i < len; i++)
69 {
70 unsigned char ch = (unsigned char) ident[i];
71
72 if (ch >= 'A' && ch <= 'Z')
73 ch += 'a' - 'A';
74 else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch))
75 ch = tolower(ch);
76 result[i] = (char) ch;
77 }
78 result[i] = '\0';
79
80 if (i >= NAMEDATALEN && truncate)
81 truncate_identifier(result, i, warn);
82
83 return result;
84 }
85
86
87 /*
88 * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
89 *
90 * The given string is modified in-place, if necessary. A warning is
91 * issued if requested.
92 *
93 * We require the caller to pass in the string length since this saves a
94 * strlen() call in some common usages.
95 */
96 void
truncate_identifier(char * ident,int len,bool warn)97 truncate_identifier(char *ident, int len, bool warn)
98 {
99 #if PGPOOL_NOT_USED
100 if (len >= NAMEDATALEN)
101 {
102 len = pg_mbcliplen(ident, len, NAMEDATALEN - 1);
103 if (warn)
104 ereport(NOTICE,
105 (errcode(ERRCODE_NAME_TOO_LONG),
106 errmsg("identifier \"%s\" will be truncated to \"%.*s\"",
107 ident, len, ident)));
108 ident[len] = '\0';
109 }
110 #endif
111 }
112
113 /*
114 * scanner_isspace() --- return true if flex scanner considers char whitespace
115 *
116 * This should be used instead of the potentially locale-dependent isspace()
117 * function when it's important to match the lexer's behavior.
118 *
119 * In principle we might need similar functions for isalnum etc, but for the
120 * moment only isspace seems needed.
121 */
122 bool
scanner_isspace(char ch)123 scanner_isspace(char ch)
124 {
125 /* This must match scan.l's list of {space} characters */
126 if (ch == ' ' ||
127 ch == '\t' ||
128 ch == '\n' ||
129 ch == '\r' ||
130 ch == '\f')
131 return true;
132 return false;
133 }
134