1 /*------------------------------------------------------------------------- 2 * 3 * dict_synonym.c 4 * Synonym dictionary: replace word by its synonym 5 * 6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group 7 * 8 * 9 * IDENTIFICATION 10 * src/backend/tsearch/dict_synonym.c 11 * 12 *------------------------------------------------------------------------- 13 */ 14 #include "postgres.h" 15 16 #include "commands/defrem.h" 17 #include "tsearch/ts_locale.h" 18 #include "tsearch/ts_utils.h" 19 #include "utils/builtins.h" 20 21 typedef struct 22 { 23 char *in; 24 char *out; 25 int outlen; 26 uint16 flags; 27 } Syn; 28 29 typedef struct 30 { 31 int len; /* length of syn array */ 32 Syn *syn; 33 bool case_sensitive; 34 } DictSyn; 35 36 /* 37 * Finds the next whitespace-delimited word within the 'in' string. 38 * Returns a pointer to the first character of the word, and a pointer 39 * to the next byte after the last character in the word (in *end). 40 * Character '*' at the end of word will not be threated as word 41 * character if flags is not null. 42 */ 43 static char * 44 findwrd(char *in, char **end, uint16 *flags) 45 { 46 char *start; 47 char *lastchar; 48 49 /* Skip leading spaces */ 50 while (*in && t_isspace(in)) 51 in += pg_mblen(in); 52 53 /* Return NULL on empty lines */ 54 if (*in == '\0') 55 { 56 *end = NULL; 57 return NULL; 58 } 59 60 lastchar = start = in; 61 62 /* Find end of word */ 63 while (*in && !t_isspace(in)) 64 { 65 lastchar = in; 66 in += pg_mblen(in); 67 } 68 69 if (in - lastchar == 1 && t_iseq(lastchar, '*') && flags) 70 { 71 *flags = TSL_PREFIX; 72 *end = lastchar; 73 } 74 else 75 { 76 if (flags) 77 *flags = 0; 78 *end = in; 79 } 80 81 return start; 82 } 83 84 static int 85 compareSyn(const void *a, const void *b) 86 { 87 return strcmp(((const Syn *) a)->in, ((const Syn *) b)->in); 88 } 89 90 91 Datum 92 dsynonym_init(PG_FUNCTION_ARGS) 93 { 94 List *dictoptions = (List *) PG_GETARG_POINTER(0); 95 DictSyn *d; 96 ListCell *l; 97 char *filename = NULL; 98 bool case_sensitive = false; 99 tsearch_readline_state trst; 100 char *starti, 101 *starto, 102 *end = NULL; 103 int cur = 0; 104 char *line = NULL; 105 uint16 flags = 0; 106 107 foreach(l, dictoptions) 108 { 109 DefElem *defel = (DefElem *) lfirst(l); 110 111 if (strcmp(defel->defname, "synonyms") == 0) 112 filename = defGetString(defel); 113 else if (strcmp(defel->defname, "casesensitive") == 0) 114 case_sensitive = defGetBoolean(defel); 115 else 116 ereport(ERROR, 117 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 118 errmsg("unrecognized synonym parameter: \"%s\"", 119 defel->defname))); 120 } 121 122 if (!filename) 123 ereport(ERROR, 124 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 125 errmsg("missing Synonyms parameter"))); 126 127 filename = get_tsearch_config_filename(filename, "syn"); 128 129 if (!tsearch_readline_begin(&trst, filename)) 130 ereport(ERROR, 131 (errcode(ERRCODE_CONFIG_FILE_ERROR), 132 errmsg("could not open synonym file \"%s\": %m", 133 filename))); 134 135 d = (DictSyn *) palloc0(sizeof(DictSyn)); 136 137 while ((line = tsearch_readline(&trst)) != NULL) 138 { 139 starti = findwrd(line, &end, NULL); 140 if (!starti) 141 { 142 /* Empty line */ 143 goto skipline; 144 } 145 if (*end == '\0') 146 { 147 /* A line with only one word. Ignore silently. */ 148 goto skipline; 149 } 150 *end = '\0'; 151 152 starto = findwrd(end + 1, &end, &flags); 153 if (!starto) 154 { 155 /* A line with only one word (+whitespace). Ignore silently. */ 156 goto skipline; 157 } 158 *end = '\0'; 159 160 /* 161 * starti now points to the first word, and starto to the second word 162 * on the line, with a \0 terminator at the end of both words. 163 */ 164 165 if (cur >= d->len) 166 { 167 if (d->len == 0) 168 { 169 d->len = 64; 170 d->syn = (Syn *) palloc(sizeof(Syn) * d->len); 171 } 172 else 173 { 174 d->len *= 2; 175 d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len); 176 } 177 } 178 179 if (case_sensitive) 180 { 181 d->syn[cur].in = pstrdup(starti); 182 d->syn[cur].out = pstrdup(starto); 183 } 184 else 185 { 186 d->syn[cur].in = lowerstr(starti); 187 d->syn[cur].out = lowerstr(starto); 188 } 189 190 d->syn[cur].outlen = strlen(starto); 191 d->syn[cur].flags = flags; 192 193 cur++; 194 195 skipline: 196 pfree(line); 197 } 198 199 tsearch_readline_end(&trst); 200 201 d->len = cur; 202 qsort(d->syn, d->len, sizeof(Syn), compareSyn); 203 204 d->case_sensitive = case_sensitive; 205 206 PG_RETURN_POINTER(d); 207 } 208 209 Datum 210 dsynonym_lexize(PG_FUNCTION_ARGS) 211 { 212 DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0); 213 char *in = (char *) PG_GETARG_POINTER(1); 214 int32 len = PG_GETARG_INT32(2); 215 Syn key, 216 *found; 217 TSLexeme *res; 218 219 /* note: d->len test protects against Solaris bsearch-of-no-items bug */ 220 if (len <= 0 || d->len <= 0) 221 PG_RETURN_POINTER(NULL); 222 223 if (d->case_sensitive) 224 key.in = pnstrdup(in, len); 225 else 226 key.in = lowerstr_with_len(in, len); 227 228 key.out = NULL; 229 230 found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn); 231 pfree(key.in); 232 233 if (!found) 234 PG_RETURN_POINTER(NULL); 235 236 res = palloc0(sizeof(TSLexeme) * 2); 237 res[0].lexeme = pnstrdup(found->out, found->outlen); 238 res[0].flags = found->flags; 239 240 PG_RETURN_POINTER(res); 241 } 242