1 /*-------------------------------------------------------------------------
2 *
3 * dict_xsyn.c
4 * Extended synonym dictionary
5 *
6 * Copyright (c) 2007-2019, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * contrib/dict_xsyn/dict_xsyn.c
10 *
11 *-------------------------------------------------------------------------
12 */
13 #include "postgres.h"
14
15 #include <ctype.h>
16
17 #include "commands/defrem.h"
18 #include "tsearch/ts_locale.h"
19 #include "tsearch/ts_utils.h"
20
21 PG_MODULE_MAGIC;
22
23 typedef struct
24 {
25 char *key; /* Word */
26 char *value; /* Unparsed list of synonyms, including the
27 * word itself */
28 } Syn;
29
30 typedef struct
31 {
32 int len;
33 Syn *syn;
34
35 bool matchorig;
36 bool keeporig;
37 bool matchsynonyms;
38 bool keepsynonyms;
39 } DictSyn;
40
41
42 PG_FUNCTION_INFO_V1(dxsyn_init);
43 PG_FUNCTION_INFO_V1(dxsyn_lexize);
44
45 static char *
find_word(char * in,char ** end)46 find_word(char *in, char **end)
47 {
48 char *start;
49
50 *end = NULL;
51 while (*in && t_isspace(in))
52 in += pg_mblen(in);
53
54 if (!*in || *in == '#')
55 return NULL;
56 start = in;
57
58 while (*in && !t_isspace(in))
59 in += pg_mblen(in);
60
61 *end = in;
62
63 return start;
64 }
65
66 static int
compare_syn(const void * a,const void * b)67 compare_syn(const void *a, const void *b)
68 {
69 return strcmp(((const Syn *) a)->key, ((const Syn *) b)->key);
70 }
71
72 static void
read_dictionary(DictSyn * d,const char * filename)73 read_dictionary(DictSyn *d, const char *filename)
74 {
75 char *real_filename = get_tsearch_config_filename(filename, "rules");
76 tsearch_readline_state trst;
77 char *line;
78 int cur = 0;
79
80 if (!tsearch_readline_begin(&trst, real_filename))
81 ereport(ERROR,
82 (errcode(ERRCODE_CONFIG_FILE_ERROR),
83 errmsg("could not open synonym file \"%s\": %m",
84 real_filename)));
85
86 while ((line = tsearch_readline(&trst)) != NULL)
87 {
88 char *value;
89 char *key;
90 char *pos;
91 char *end;
92
93 if (*line == '\0')
94 continue;
95
96 value = lowerstr(line);
97 pfree(line);
98
99 pos = value;
100 while ((key = find_word(pos, &end)) != NULL)
101 {
102 /* Enlarge syn structure if full */
103 if (cur == d->len)
104 {
105 d->len = (d->len > 0) ? 2 * d->len : 16;
106 if (d->syn)
107 d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
108 else
109 d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
110 }
111
112 /* Save first word only if we will match it */
113 if (pos != value || d->matchorig)
114 {
115 d->syn[cur].key = pnstrdup(key, end - key);
116 d->syn[cur].value = pstrdup(value);
117
118 cur++;
119 }
120
121 pos = end;
122
123 /* Don't bother scanning synonyms if we will not match them */
124 if (!d->matchsynonyms)
125 break;
126 }
127
128 pfree(value);
129 }
130
131 tsearch_readline_end(&trst);
132
133 d->len = cur;
134 if (cur > 1)
135 qsort(d->syn, d->len, sizeof(Syn), compare_syn);
136
137 pfree(real_filename);
138 }
139
140 Datum
dxsyn_init(PG_FUNCTION_ARGS)141 dxsyn_init(PG_FUNCTION_ARGS)
142 {
143 List *dictoptions = (List *) PG_GETARG_POINTER(0);
144 DictSyn *d;
145 ListCell *l;
146 char *filename = NULL;
147
148 d = (DictSyn *) palloc0(sizeof(DictSyn));
149 d->len = 0;
150 d->syn = NULL;
151 d->matchorig = true;
152 d->keeporig = true;
153 d->matchsynonyms = false;
154 d->keepsynonyms = true;
155
156 foreach(l, dictoptions)
157 {
158 DefElem *defel = (DefElem *) lfirst(l);
159
160 if (strcmp(defel->defname, "matchorig") == 0)
161 {
162 d->matchorig = defGetBoolean(defel);
163 }
164 else if (strcmp(defel->defname, "keeporig") == 0)
165 {
166 d->keeporig = defGetBoolean(defel);
167 }
168 else if (strcmp(defel->defname, "matchsynonyms") == 0)
169 {
170 d->matchsynonyms = defGetBoolean(defel);
171 }
172 else if (strcmp(defel->defname, "keepsynonyms") == 0)
173 {
174 d->keepsynonyms = defGetBoolean(defel);
175 }
176 else if (strcmp(defel->defname, "rules") == 0)
177 {
178 /* we can't read the rules before parsing all options! */
179 filename = defGetString(defel);
180 }
181 else
182 {
183 ereport(ERROR,
184 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
185 errmsg("unrecognized xsyn parameter: \"%s\"",
186 defel->defname)));
187 }
188 }
189
190 if (filename)
191 read_dictionary(d, filename);
192
193 PG_RETURN_POINTER(d);
194 }
195
196 Datum
dxsyn_lexize(PG_FUNCTION_ARGS)197 dxsyn_lexize(PG_FUNCTION_ARGS)
198 {
199 DictSyn *d = (DictSyn *) PG_GETARG_POINTER(0);
200 char *in = (char *) PG_GETARG_POINTER(1);
201 int length = PG_GETARG_INT32(2);
202 Syn word;
203 Syn *found;
204 TSLexeme *res = NULL;
205
206 if (!length || d->len == 0)
207 PG_RETURN_POINTER(NULL);
208
209 /* Create search pattern */
210 {
211 char *temp = pnstrdup(in, length);
212
213 word.key = lowerstr(temp);
214 pfree(temp);
215 word.value = NULL;
216 }
217
218 /* Look for matching syn */
219 found = (Syn *) bsearch(&word, d->syn, d->len, sizeof(Syn), compare_syn);
220 pfree(word.key);
221
222 if (!found)
223 PG_RETURN_POINTER(NULL);
224
225 /* Parse string of synonyms and return array of words */
226 {
227 char *value = found->value;
228 char *syn;
229 char *pos;
230 char *end;
231 int nsyns = 0;
232
233 res = palloc(sizeof(TSLexeme));
234
235 pos = value;
236 while ((syn = find_word(pos, &end)) != NULL)
237 {
238 res = repalloc(res, sizeof(TSLexeme) * (nsyns + 2));
239
240 /* The first word is output only if keeporig=true */
241 if (pos != value || d->keeporig)
242 {
243 res[nsyns].lexeme = pnstrdup(syn, end - syn);
244 res[nsyns].nvariant = 0;
245 res[nsyns].flags = 0;
246 nsyns++;
247 }
248
249 pos = end;
250
251 /* Stop if we are not to output the synonyms */
252 if (!d->keepsynonyms)
253 break;
254 }
255 res[nsyns].lexeme = NULL;
256 }
257
258 PG_RETURN_POINTER(res);
259 }
260