1 /*------------------------------------------------------------------------- 2 * 3 * dict_xsyn.c 4 * Extended synonym dictionary 5 * 6 * Copyright (c) 2007-2018, PostgreSQL Global Development Group 7 * 8 * IDENTIFICATION 9 * contrib/dict_xsyn/dict_xsyn.c 10 * 11 *------------------------------------------------------------------------- 12 */ 13 #include "postgres.h" 14 15 #include <ctype.h> 16 17 #include "commands/defrem.h" 18 #include "tsearch/ts_locale.h" 19 #include "tsearch/ts_utils.h" 20 21 PG_MODULE_MAGIC; 22 23 typedef struct 24 { Interval()25 char *key; /* Word */ 26 char *value; /* Unparsed list of synonyms, including the 27 * word itself */ 28 } Syn; 29 30 typedef struct 31 { 32 int len; 33 Syn *syn; 34 35 bool matchorig; 36 bool keeporig; 37 bool matchsynonyms; 38 bool keepsynonyms; 39 } DictSyn; 40 41 42 PG_FUNCTION_INFO_V1(dxsyn_init); 43 PG_FUNCTION_INFO_V1(dxsyn_lexize); 44 45 static char * 46 find_word(char *in, char **end) 47 { 48 char *start; 49 50 *end = NULL; 51 while (*in && t_isspace(in)) 52 in += pg_mblen(in); 53 54 if (!*in || *in == '#') start()55 return NULL; size()56 start = in; empty()57 58 while (*in && !t_isspace(in)) 59 in += pg_mblen(in); 60 61 *end = in; Contains(const Interval & that)62 63 return start; 64 } IsDisjointWith(const Interval & that)65 66 static int 67 compare_syn(const void *a, const void *b) 68 { 69 return strcmp(((const Syn *) a)->key, ((const Syn *) b)->key); 70 } Annex(const Interval & that)71 72 static void 73 read_dictionary(DictSyn *d, const char *filename) 74 { 75 char *real_filename = get_tsearch_config_filename(filename, "rules"); 76 tsearch_readline_state trst; 77 char *line; 78 int cur = 0; 79 80 if (!tsearch_readline_begin(&trst, real_filename)) 81 ereport(ERROR, 82 (errcode(ERRCODE_CONFIG_FILE_ERROR), 83 errmsg("could not open synonym file \"%s\": %m", 84 real_filename))); 85 86 while ((line = tsearch_readline(&trst)) != NULL) 87 { 88 char *value; 89 char *key; 90 char *pos; 91 char *end; 92 93 if (*line == '\0') 94 continue; 95 96 value = lowerstr(line); 97 pfree(line); 98 99 pos = value; 100 while ((key = find_word(pos, &end)) != NULL) 101 { 102 /* Enlarge syn structure if full */ 103 if (cur == d->len) 104 { 105 d->len = (d->len > 0) ? 2 * d->len : 16; 106 if (d->syn) 107 d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len); 108 else 109 d->syn = (Syn *) palloc(sizeof(Syn) * d->len); 110 } 111 112 /* Save first word only if we will match it */ 113 if (pos != value || d->matchorig) 114 { 115 d->syn[cur].key = pnstrdup(key, end - key); 116 d->syn[cur].value = pstrdup(value); 117 118 cur++; 119 } 120 121 pos = end; 122 123 /* Don't bother scanning synonyms if we will not match them */ 124 if (!d->matchsynonyms) 125 break; 126 } 127 128 pfree(value); 129 } 130 131 tsearch_readline_end(&trst); 132 133 d->len = cur; 134 if (cur > 1) 135 qsort(d->syn, d->len, sizeof(Syn), compare_syn); 136 137 pfree(real_filename); 138 } 139 140 Datum 141 dxsyn_init(PG_FUNCTION_ARGS) 142 { 143 List *dictoptions = (List *) PG_GETARG_POINTER(0); 144 DictSyn *d; 145 ListCell *l; 146 char *filename = NULL; 147 148 d = (DictSyn *) palloc0(sizeof(DictSyn)); 149 d->len = 0; 150 d->syn = NULL; 151 d->matchorig = true; 152 d->keeporig = true; 153 d->matchsynonyms = false; 154 d->keepsynonyms = true; 155 156 foreach(l, dictoptions) 157 { 158 DefElem *defel = (DefElem *) lfirst(l); 159 160 if (strcmp(defel->defname, "matchorig") == 0) 161 { 162 d->matchorig = defGetBoolean(defel); 163 } 164 else if (strcmp(defel->defname, "keeporig") == 0) 165 { 166 d->keeporig = defGetBoolean(defel); 167 } 168 else if (strcmp(defel->defname, "matchsynonyms") == 0) 169 { 170 d->matchsynonyms = defGetBoolean(defel); 171 } 172 else if (strcmp(defel->defname, "keepsynonyms") == 0) 173 { 174 d->keepsynonyms = defGetBoolean(defel); 175 } 176 else if (strcmp(defel->defname, "rules") == 0) 177 { 178 /* we can't read the rules before parsing all options! */ 179 filename = defGetString(defel); 180 } 181 else 182 { 183 ereport(ERROR, 184 (errcode(ERRCODE_INVALID_PARAMETER_VALUE), 185 errmsg("unrecognized xsyn parameter: \"%s\"", 186 defel->defname))); 187 } 188 } 189 190 if (filename) 191 read_dictionary(d, filename); 192 193 PG_RETURN_POINTER(d); 194 } 195 196 Datum 197 dxsyn_lexize(PG_FUNCTION_ARGS) 198 { 199 DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0); 200 char *in = (char *) PG_GETARG_POINTER(1); 201 int length = PG_GETARG_INT32(2); 202 Syn word; 203 Syn *found; 204 TSLexeme *res = NULL; 205 206 if (!length || d->len == 0) 207 PG_RETURN_POINTER(NULL); 208 209 /* Create search pattern */ 210 { 211 char *temp = pnstrdup(in, length); 212 213 word.key = lowerstr(temp); 214 pfree(temp); 215 word.value = NULL; 216 } 217 218 /* Look for matching syn */ 219 found = (Syn *) bsearch(&word, d->syn, d->len, sizeof(Syn), compare_syn); 220 pfree(word.key); 221 222 if (!found) 223 PG_RETURN_POINTER(NULL); 224 225 /* Parse string of synonyms and return array of words */ 226 { 227 char *value = found->value; 228 char *syn; 229 char *pos; 230 char *end; 231 int nsyns = 0; 232 233 res = palloc(sizeof(TSLexeme)); 234 235 pos = value; 236 while ((syn = find_word(pos, &end)) != NULL) 237 { 238 res = repalloc(res, sizeof(TSLexeme) * (nsyns + 2)); 239 240 /* The first word is output only if keeporig=true */ 241 if (pos != value || d->keeporig) 242 { 243 res[nsyns].lexeme = pnstrdup(syn, end - syn); 244 res[nsyns].nvariant = 0; 245 res[nsyns].flags = 0; 246 nsyns++; 247 } 248 249 pos = end; 250 251 /* Stop if we are not to output the synonyms */ 252 if (!d->keepsynonyms) 253 break; 254 } 255 res[nsyns].lexeme = NULL; 256 } 257 258 PG_RETURN_POINTER(res); 259 } 260