1 /* Copyright (C) 2000-2015 Lavtech.com corp. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2 of the License, or
6 (at your option) any later version.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
17
18 #include "udm_config.h"
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <sys/types.h>
22 #include <string.h>
23 #include <errno.h>
24
25 #include "udm_common.h"
26 #include "udm_ctype.h"
27 #include "udm_spell.h"
28 #include "udm_proto.h"
29 #include "udm_url.h"
30 #include "udm_parser.h"
31 #include "udm_conf.h"
32 #include "udm_log.h"
33 #include "udm_hrefs.h"
34 #include "udm_robots.h"
35 #include "udm_utils.h"
36 #include "udm_host.h"
37 #include "udm_server.h"
38 #include "udm_alias.h"
39 #include "udm_search_tl.h"
40 #include "udm_env.h"
41 #include "udm_match.h"
42 #include "udm_stopwords.h"
43 #include "udm_guesser.h"
44 #include "udm_unicode.h"
45 #include "udm_synonym.h"
46 #include "udm_vars.h"
47 #include "udm_db.h"
48 #include "udm_agent.h"
49 #include "udm_chinese.h"
50 #include "udm_contentencoding.h"
51 #include "udm_indexcache.h"
52
53 static udm_rc_t EnvLoad(UDM_CFG *Cfg,const char *cname);
54
55 /**************************** Load Configuration **********************/
56
57 udm_search_mode_t
UdmSearchMode(const char * mode)58 UdmSearchMode(const char *mode)
59 {
60 if(!mode)return(UDM_MODE_ALL);
61 if(!strcmp(mode,"all-minus"))return(UDM_MODE_ALL_MINUS);
62 if(!strcmp(mode,"all-minus-half"))return(UDM_MODE_ALL_MINUS_HALF);
63 if(!strcmp(mode,"all"))return(UDM_MODE_ALL);
64 if(!strcmp(mode,"any"))return(UDM_MODE_ANY);
65 if(!strcmp(mode,"bool"))return(UDM_MODE_BOOL);
66 if(!strcmp(mode,"phrase"))return(UDM_MODE_PHRASE);
67 return(UDM_MODE_ALL);
68 }
69
70
71 udm_match_mode_t
UdmMatchMode(const char * mode)72 UdmMatchMode(const char * mode)
73 {
74 if(!mode)return(UDM_MATCH_FULL);
75 if(!strcmp(mode,"wrd"))return(UDM_MATCH_FULL);
76 if(!strcmp(mode,"full"))return(UDM_MATCH_FULL);
77 if(!strcmp(mode,"beg"))return(UDM_MATCH_BEGIN);
78 if(!strcmp(mode,"end"))return(UDM_MATCH_END);
79 if(!strcmp(mode,"sub"))return(UDM_MATCH_SUBSTR);
80 return(UDM_MATCH_FULL);
81 }
82
83
84 UDM_API(const char *)
UdmFollowStr(udm_webspace_t method)85 UdmFollowStr(udm_webspace_t method)
86 {
87 switch(method)
88 {
89 case UDM_WEBSPACE_PAGE: return "Page";
90 case UDM_WEBSPACE_PATH: return "Path";
91 case UDM_WEBSPACE_SITE: return "Site";
92 case UDM_WEBSPACE_WORLD: return "World";
93 case UDM_WEBSPACE_URLLIST: return "URLList";
94 case UDM_WEBSPACE_UNKNOWN: break;
95 }
96 return "<Unknown follow type>";
97 }
98
99
UdmFollowType(const char * follow)100 udm_webspace_t UdmFollowType(const char * follow)
101 {
102 UDM_ASSERT(follow != NULL);
103 if (!strcasecmp(follow,"page")) return UDM_WEBSPACE_PAGE;
104 if (!strcasecmp(follow,"path")) return UDM_WEBSPACE_PATH;
105 if (!strcasecmp(follow,"site")) return UDM_WEBSPACE_SITE;
106 if (!strcasecmp(follow,"world")) return UDM_WEBSPACE_WORLD;
107 if (!strcasecmp(follow,"urllist")) return UDM_WEBSPACE_URLLIST;
108 return UDM_WEBSPACE_UNKNOWN;
109 }
110
UdmMethodStr(udm_method_t method)111 const char *UdmMethodStr(udm_method_t method)
112 {
113 switch(method)
114 {
115 case UDM_METHOD_DISALLOW: return "Disallow";
116 case UDM_METHOD_GET: return "Allow";
117 case UDM_METHOD_CHECKMP3ONLY: return "CheckMP3Only";
118 case UDM_METHOD_CHECKMP3: return "CheckMP3";
119 case UDM_METHOD_HEAD: return "CheckOnly";
120 case UDM_METHOD_HREFONLY: return "HrefOnly";
121 case UDM_METHOD_VISITLATER: return "Skip";
122 case UDM_METHOD_INDEX: return "IndexIf";
123 case UDM_METHOD_NOINDEX: return "NoIndexIf";
124 case UDM_METHOD_IMPORTONLY: return "ImportOnly";
125 case UDM_METHOD_UNKNOWN: break;
126 }
127 return "<Unknown method>";
128 }
129
130
UdmMethod(const char * s)131 udm_method_t UdmMethod(const char *s)
132 {
133 if (s == NULL)
134 return UDM_METHOD_UNKNOWN;
135 if (!strcasecmp(s,"Disallow")) return UDM_METHOD_DISALLOW;
136 if (!strcasecmp(s,"Allow")) return UDM_METHOD_GET;
137 if (!strcasecmp(s,"CheckMP3Only")) return UDM_METHOD_CHECKMP3ONLY;
138 if (!strcasecmp(s,"CheckMP3")) return UDM_METHOD_CHECKMP3;
139 if (!strcasecmp(s,"CheckOnly")) return UDM_METHOD_HEAD;
140 if (!strcasecmp(s,"HrefOnly")) return UDM_METHOD_HREFONLY;
141 if (!strcasecmp(s,"Skip")) return UDM_METHOD_VISITLATER;
142 if (!strcasecmp(s,"SkipIf")) return UDM_METHOD_VISITLATER;
143 if (!strcasecmp(s,"IndexIf")) return UDM_METHOD_INDEX;
144 if (!strcasecmp(s,"NoIndexIf")) return UDM_METHOD_NOINDEX;
145 if (!strcasecmp(s,"ImportOnly")) return UDM_METHOD_IMPORTONLY;
146 return UDM_METHOD_UNKNOWN;
147 }
148
149
150 /*
151 Convert 0..9A..Z into integer.
152 TODO34: using WF with big values (e.g. 'Q') overflows
153 */
154 static int
UdmBase_09AZ_to_int(int h)155 UdmBase_09AZ_to_int(int h)
156 {
157 if((h>='0')&&(h<='9'))return(h-'0');
158 if((h>='A')&&(h<='Z'))return(h-'A'+10);
159 if((h>='a')&&(h<='Z'))return(h-'a'+10);
160 return 0;
161 }
162
163
164 static void
UdmWeightFactorsInit(char * res,const char * wf,size_t num)165 UdmWeightFactorsInit(char *res, const char *wf, size_t num)
166 {
167 size_t len;
168 int sn;
169
170 for(sn=0;sn<256;sn++)
171 res[sn]=1;
172
173 len=strlen(wf);
174 if((len>0)&&(len<256))
175 {
176 const char *sec;
177 int secno;
178
179 for(sec= wf + len - 1, secno= 1; sec >= wf; sec--)
180 {
181 if (*sec != '-' && *sec != '.')
182 {
183 res[secno]= UdmBase_09AZ_to_int(*sec);
184 secno++;
185 }
186 }
187 }
188
189 for (sn= num + 1 ; sn < 256; sn++)
190 res[sn]= 0;
191 }
192
193
UdmWeightFactorsInit2(char * res,UDM_VARLIST * V1,UDM_VARLIST * V2,const char * name)194 size_t UdmWeightFactorsInit2(char *res,
195 UDM_VARLIST *V1,
196 UDM_VARLIST *V2,
197 const char *name)
198 {
199 size_t numsections= UdmVarListFindInt(V1, "NumSections", 256);
200 const char *wf1= UdmVarListFindStr(V1, name, "");
201 const char *wf2= UdmVarListFindStr(V2, name, "");
202 const char *wf3= wf2[0] ? wf2 : wf1;
203 UdmWeightFactorsInit(res, wf3, numsections);
204 return wf3[0] ? numsections : 0;
205 }
206
207 /* hold the path of the current config file */
208 static char current[1024]= ""; /* TODO34: get rid of this */
209
update_current(const char * name)210 static void update_current(const char *name)
211 {
212 char *slash;
213 strcpy(current, name);
214 if ((slash= strrchr(current, '/')))
215 {
216 *slash= 0;
217 }
218 else
219 {
220 *current= 0;
221 }
222 }
223
rel_cur_name(char * res,size_t maxlen,const char * name)224 static size_t rel_cur_name(char *res, size_t maxlen, const char *name)
225 {
226 size_t n = 0;
227 if (*current)
228 {
229 #ifdef WIN32
230 n= udm_snprintf(res, maxlen, "%s", name);
231 #else
232 n= udm_snprintf(res, maxlen, "%s%s%s", current, UDMSLASHSTR, name);
233 #endif
234 }
235 else
236 {
237 strncpy(res, name, maxlen);
238 }
239 res[maxlen]= 0;
240 return n;
241 }
242
243
244 static size_t
rel_name(UDM_ENV * Env,char * res,size_t maxlen,const char * varname,const char * dirname,const char * name)245 rel_name(UDM_ENV *Env, char *res, size_t maxlen,
246 const char *varname, const char *dirname, const char *name)
247 {
248 size_t n;
249 const char *dir= UdmVarListFindStr(&Env->Vars, varname, dirname);
250 if(name[0]=='/' || (name[0] && name[1] == ':'))
251 n= udm_snprintf(res, maxlen, "%s", name);
252 else
253 n= udm_snprintf(res,maxlen,"%s%s%s",dir,UDMSLASHSTR,name);
254 res[maxlen]='\0';
255 return n;
256 }
257
258
259 /* Relative name for included .conf files */
260 static size_t
rel_etc_name(UDM_ENV * Env,char * res,size_t maxlen,const char * fname)261 rel_etc_name(UDM_ENV *Env, char *res, size_t maxlen, const char *fname)
262 {
263 return rel_name(Env, res, maxlen, "ConfDir", UDM_CONF_DIR, fname);
264 }
265
266
267 /*
268 Relative name for langmap, stopwords, synonym files.
269 Their position depends on --enable-fhs-layot.
270 */
271 static size_t
rel_etc2_name(UDM_ENV * Env,char * res,size_t maxlen,const char * fname)272 rel_etc2_name(UDM_ENV *Env, char *res, size_t maxlen, const char *fname)
273 {
274 #ifdef HAVE_FHS_LAYOUT
275 /* Better FHS layout */
276 return rel_name(Env, res, maxlen, "ShareDir", UDM_SHARE_DIR, fname);
277 #else
278 /* Traditional mnoGoSearch layout */
279 return rel_name(Env, res, maxlen, "ConfDir", UDM_CONF_DIR, fname);
280 #endif
281 }
282
283
284 static size_t
rel_langmap_name(UDM_ENV * Env,char * res,size_t maxlen,const char * fname)285 rel_langmap_name(UDM_ENV *Env, char *res, size_t maxlen, const char *fname)
286 {
287 return rel_etc2_name(Env, res, maxlen, fname);
288 }
289
290
291 static size_t
rel_stopwords_name(UDM_ENV * Env,char * res,size_t maxlen,const char * fname)292 rel_stopwords_name(UDM_ENV *Env, char *res, size_t maxlen, const char *fname)
293 {
294 return rel_etc2_name(Env, res, maxlen, fname);
295 }
296
297
298 static size_t
rel_synonym_name(UDM_ENV * Env,char * res,size_t maxlen,const char * fname)299 rel_synonym_name(UDM_ENV *Env, char *res, size_t maxlen, const char *fname)
300 {
301 return rel_etc2_name(Env, res, maxlen, fname);
302 }
303
304
305 /*
306 *.freq files is a special case.
307 It's installed to /share/freq/xxx.freq with --enable-fhs-layout,
308 and to /etc/xxx.freq otherwise.
309 */
310 static size_t
rel_freq_name(UDM_ENV * Env,char * res,size_t maxlen,const char * fname)311 rel_freq_name(UDM_ENV *Env, char *res, size_t maxlen, const char *fname)
312 {
313 #ifdef HAVE_FHS_LAYOUT
314 char freqname[128];
315 if (fname[0] == '/')
316 udm_snprintf(freqname, sizeof(freqname), "%s", fname);
317 else
318 udm_snprintf(freqname, sizeof(freqname), "freq/%s", fname);
319 return rel_name(Env, res, maxlen, "ShareDir", UDM_SHARE_DIR, freqname);
320 #else
321 return rel_name(Env, res, maxlen, "ConfDir", UDM_CONF_DIR, fname);
322 #endif
323 }
324
325
326 /* Relative name for /var files */
rel_var_name(UDM_ENV * Env,char * res,size_t maxlen,const char * name)327 static size_t rel_var_name(UDM_ENV *Env,char *res,size_t maxlen,
328 const char *name)
329 {
330 size_t n;
331 const char *dir=UdmVarListFindStr(&Env->Vars,"VarDir",UDM_VAR_DIR);
332 if(name[0]=='/')n = udm_snprintf(res, maxlen, "%s", name);
333 else n = udm_snprintf(res,maxlen,"%s%s%s",dir,UDMSLASHSTR,name);
334 res[maxlen]='\0';
335 return n;
336 }
337
338 size_t
UdmGetArgs(char * str,const char ** av,size_t max)339 UdmGetArgs(char *str, const char **av, size_t max)
340 {
341 size_t ac=0;
342 char *lt;
343 char *tok;
344
345 bzero((void*)av, max * sizeof(*av));
346 tok=UdmGetStrToken(str,<);
347
348 while (tok && (ac<max))
349 {
350 av[ac]=tok;
351 ac++;
352 tok=UdmGetStrToken(NULL,<);
353 }
354 return ac;
355 }
356
357
358 static udm_bool_t
UdmMatchParamModeRegexOrString(UDM_MATCH_PARAM * Param,const char * prm)359 UdmMatchParamModeRegexOrString(UDM_MATCH_PARAM *Param, const char *prm)
360 {
361 if (!strcasecmp(prm, "string"))
362 {
363 Param->match_mode= UDM_MATCH_WILD;
364 return UDM_FALSE;
365 }
366 if (!strcasecmp(prm, "regex"))
367 {
368 Param->match_mode= UDM_MATCH_REGEX;
369 return UDM_FALSE;
370 }
371 if (!strcasecmp(prm, "regexp"))
372 {
373 Param->match_mode= UDM_MATCH_REGEX;
374 return UDM_FALSE;
375 }
376 return UDM_TRUE;
377 }
378
379
380 static udm_bool_t
UdmMatchParamStrToMode(UDM_MATCH_PARAM * Param,const char * prm)381 UdmMatchParamStrToMode(UDM_MATCH_PARAM *Param, const char *prm)
382 {
383 /* Match modes */
384 if (!UdmMatchParamModeRegexOrString(Param, prm))
385 return UDM_FALSE;
386 if (!strcasecmp(prm, "prefix"))
387 {
388 Param->match_mode= UDM_MATCH_BEGIN;
389 return UDM_FALSE;
390 }
391 return UDM_TRUE;
392 }
393
394
395 static udm_bool_t
UdmMatchParamStrToCaseSensitivity(UDM_MATCH_PARAM * Param,const char * prm)396 UdmMatchParamStrToCaseSensitivity(UDM_MATCH_PARAM *Param, const char *prm)
397 {
398 /* Case sensitivity */
399 if (!strcasecmp(prm, "nocase"))
400 {
401 UdmMatchParamSetCaseInsensitive(Param, UDM_FALSE);
402 return UDM_FALSE;
403 }
404 if (!strcasecmp(prm, "case"))
405 {
406 UdmMatchParamSetCaseInsensitive(Param, UDM_TRUE);
407 return UDM_FALSE;
408 }
409 return UDM_TRUE;
410 }
411
412
413 static udm_bool_t
UdmMatchParamStrToNegative(UDM_MATCH_PARAM * Param,const char * prm)414 UdmMatchParamStrToNegative(UDM_MATCH_PARAM *Param, const char *prm)
415 {
416 /* Negative condition */
417 if (!strcasecmp(prm, "match"))
418 {
419 UdmMatchParamSetNegative(Param, UDM_FALSE);
420 return UDM_FALSE;
421 }
422 if (!strcasecmp(prm, "nomatch"))
423 {
424 UdmMatchParamSetNegative(Param, UDM_TRUE);
425 return UDM_FALSE;
426 }
427 return UDM_TRUE;
428 }
429
430
431 static udm_bool_t
UdmMatchParamSetParam(UDM_MATCH_PARAM * Param,const char * prm)432 UdmMatchParamSetParam(UDM_MATCH_PARAM *Param, const char *prm)
433 {
434 if (!UdmMatchParamStrToMode(Param, prm))
435 return UDM_FALSE;
436 if (!UdmMatchParamStrToCaseSensitivity(Param, prm))
437 return UDM_FALSE;
438 if (!UdmMatchParamStrToNegative(Param, prm))
439 return UDM_FALSE;
440 return UDM_TRUE;
441 }
442
443
444 static udm_rc_t
add_srv(UDM_CFG * C,size_t ac,const char ** av)445 add_srv(UDM_CFG *C, size_t ac, const char **av)
446 {
447 UDM_ENV *Conf=C->Indexer->Conf;
448 UDM_AGENT *Indexer = C->Indexer;
449 size_t i;
450 int has_alias=0;
451
452 if(!(C->flags & UDM_FLAG_ADD_SERV))
453 return UDM_OK;
454
455 C->Srv->command = 'S';
456 C->Srv->webspace= UDM_WEBSPACE_DEFAULT;
457 C->Srv->ordre = ++C->ordre;
458 C->Srv->Filter.method= UDM_METHOD_DEFAULT;
459 UdmMatchParamSetNegative(&C->Srv->Filter.Match.Param, UDM_FALSE);
460 UdmMatchParamSetCaseInsensitive(&C->Srv->Filter.Match.Param, UDM_TRUE);
461 UdmMatchParamSetOptimization(&C->Srv->Filter.Match.Param,
462 !UDM_TEST((C->flags & UDM_FLAG_DONT_ADD_TO_DB)));
463
464 if(!strcasecmp(av[0],"Server"))
465 {
466 C->Srv->Filter.Match.Param.match_mode= UDM_MATCH_BEGIN;
467 }
468 else if(!strcasecmp(av[0],"Subnet"))
469 {
470 C->Srv->Filter.Match.Param.match_mode= UDM_MATCH_SUBNET;
471 Conf->Servers.have_subnets=1;
472 }
473 else
474 {
475 C->Srv->Filter.Match.Param.match_mode= UDM_MATCH_WILD;
476 }
477
478
479 for(i=1; i<ac; i++)
480 {
481 int o;
482
483 if (UDM_WEBSPACE_UNKNOWN!= (o= UdmFollowType(av[i])))
484 C->Srv->webspace= (udm_webspace_t) o;
485 else if (UDM_METHOD_UNKNOWN!= (o= UdmMethod(av[i])))
486 C->Srv->Filter.method= (udm_method_t) o;
487 else if (UdmMatchParamSetParam(&C->Srv->Filter.Match.Param, av[i]))
488 {
489 if (!UdmMatchPatternConstStr(&C->Srv->Filter.Match))
490 UdmMatchSetPattern(&C->Srv->Filter.Match, av[i]);
491 else if(!has_alias)
492 {
493 has_alias=1;
494 UdmVarListReplaceStr(&C->Srv->Vars,"Alias",av[i]);
495 }
496 else
497 {
498 sprintf(Conf->errstr,"too many argiments: '%s'",av[i]);
499 return UDM_ERROR;
500 }
501 }
502 }
503 if (!UdmMatchPatternConstStr(&C->Srv->Filter.Match))
504 {
505 sprintf(Conf->errstr,"too few argiments in '%s' command", av[0]);
506 return UDM_ERROR;
507 }
508 if(UDM_OK != UdmServerAdd(Indexer, C->Srv, C->flags))
509 {
510 char * s_err;
511 s_err = (char*)UdmStrdup(Conf->errstr);
512 sprintf(Conf->errstr,"%s",s_err);
513 UDM_FREE(s_err);
514 UdmMatchFreeAndInit(&C->Srv->Filter.Match);
515 return UDM_ERROR;
516 }
517 if ((C->Srv->Filter.Match.Param.match_mode == UDM_MATCH_BEGIN) &&
518 (UdmMatchPatternConstStr(&C->Srv->Filter.Match)[0])&&
519 (C->flags&UDM_FLAG_ADD_SERVURL))
520 {
521 UDM_HREFPARAM HrefParam;
522 UdmHrefParamInit(&HrefParam);
523 HrefParam.server_id= C->Srv->site_id;
524 HrefParam.hops= (uint4) UdmVarListFindInt(&C->Srv->Vars, "StartHops", 0);
525 HrefParam.link_source= UDM_LINK_SOURCE_CONF;
526 UdmHrefListAddConst(&Conf->Hrefs, &HrefParam,
527 UdmMatchPatternConstStr(&C->Srv->Filter.Match));
528 }
529 UdmMatchFreeAndInit(&C->Srv->Filter.Match);
530 UdmVarListDel(&C->Srv->Vars,"AuthBasic");
531 UdmVarListDel(&C->Srv->Vars,"Alias");
532 return UDM_OK;
533 }
534
535
536 static udm_rc_t
UdmFilterListAddWithServer(UDM_AGENT * A,UDM_FILTERLIST * L,UDM_MATCH_PARAM * Param,udm_method_t method,const UDM_CONST_STR * Pattern,char * err,size_t errsize,int ordre)537 UdmFilterListAddWithServer(UDM_AGENT *A,
538 UDM_FILTERLIST *L, UDM_MATCH_PARAM *Param,
539 udm_method_t method,
540 const UDM_CONST_STR *Pattern,
541 char *err, size_t errsize, int ordre)
542 {
543 if (UDM_OK != UdmFilterListAdd(L, Param, method, Pattern, err, errsize))
544 return UDM_ERROR;
545
546 if (A != NULL)
547 {
548 UDM_SERVERLIST S;
549 UDM_SERVER n;
550 udm_rc_t rc;
551
552 bzero((void*)&n, sizeof(n));
553 S.Server= &n;
554 n.command= 'F';
555 UdmMatchSetPattern(&n.Filter.Match, Pattern->str);
556 n.Filter.Match.Param= *Param;
557 n.Filter.method= method;
558 n.ordre= ordre;
559
560 rc= UdmSrvAction(A, &S, UDM_SRV_ACTION_ADD);
561 UdmVarListFree(&n.Vars);
562
563 if (rc != UDM_OK) return rc;
564 }
565 return UDM_OK;
566 }
567
568
569 static udm_rc_t
add_alias(UDM_CFG * C,size_t ac,const char ** av)570 add_alias(UDM_CFG *C, size_t ac, const char **av)
571 {
572 UDM_ENV *Conf = C->Indexer->Conf;
573 UDM_MATCH_PARAM Param;
574 UDM_CONST_STR Pattern;
575 size_t i;
576
577 UdmConstStrInit(&Pattern);
578 UdmMatchParamInit(&Param);
579 Param.match_mode= UDM_MATCH_BEGIN;
580 UdmMatchParamSetCaseInsensitive(&Param, UDM_TRUE);
581 UdmMatchParamSetOptimization(&Param,
582 !UDM_TEST(C->flags & UDM_FLAG_DONT_ADD_TO_DB));
583 for (i= 1; i < ac; i++)
584 {
585 if (!UdmMatchParamStrToMode(&Param, av[i]) ||
586 !UdmMatchParamStrToCaseSensitivity(&Param, av[i]))
587 continue;
588
589 if (!Pattern.str)
590 {
591 UdmConstStrSetStr(&Pattern, av[i]);
592 }
593 else
594 {
595 char err[120]= "";
596 UDM_REPLACELIST *L= NULL;
597 UDM_CONST_STR Alias;
598
599 UdmConstStrSetStr(&Alias, av[i]);
600 if (!strcasecmp(av[0], "Alias")) L= &Conf->Aliases;
601 if (!strcasecmp(av[0], "ReverseAlias")) L= &Conf->ReverseAliases;
602
603 if (UDM_OK != UdmReplaceListAdd(L, &Param, &Pattern, &Alias, err, sizeof(err)))
604 {
605 udm_snprintf(Conf->errstr, sizeof(Conf->errstr), "%s", err);
606 return UDM_ERROR;
607 }
608 }
609 }
610 if (!Pattern.str)
611 {
612 udm_snprintf(Conf->errstr, sizeof(Conf->errstr), "too few arguments");
613 return UDM_ERROR;
614 }
615 return UDM_OK;
616 }
617
618
619 static udm_rc_t
UdmExcerptQualityParse(UDM_ENV * Env,UDM_EXCERPT_FRAGMENT * Fragment,const char * str)620 UdmExcerptQualityParse(UDM_ENV *Env,
621 UDM_EXCERPT_FRAGMENT *Fragment, const char *str)
622 {
623 if (!strcasecmp(str, "ignore"))
624 {
625 Fragment->quality= 0;
626 return UDM_OK;
627 }
628 if (udm_isdigit(str[0]))
629 {
630 Fragment->quality= atoi(str);
631 return UDM_OK;
632 }
633 udm_snprintf(Env->errstr, sizeof(Env->errstr),
634 "ExcerptFragment: syntax error near '%s'; "
635 "Expected 'ignore' or an unsigned number.", str);
636 return UDM_ERROR;
637 }
638
639
640 /*
641 ExcerptFragment [Case | NoCase] [String | Regex] quality pattern
642 */
643 static udm_rc_t
add_excerpt_fragment(UDM_CFG * C,size_t ac,const char ** av)644 add_excerpt_fragment(UDM_CFG *C, size_t ac, const char **av)
645 {
646 UDM_ENV *Conf= C->Indexer->Conf;
647 UDM_EXCERPT_FRAGMENT Specific, *VarSpecific;
648 UDM_VAR *Var;
649 char err[128];
650 char name[64];
651 size_t i;
652 udm_rc_t rc;
653 UDM_VALUE_HANDLER *ha= &UdmValueHandlerExcerptFragment;
654
655 UdmExcerptFragmentInit(&Specific);
656 Specific.Match.Param.match_mode= UDM_MATCH_WILD;
657 UdmMatchParamSetCaseInsensitive(&Specific.Match.Param, UDM_TRUE);
658
659 for (i= 1; i < ac; i++)
660 {
661 if (UdmMatchParamModeRegexOrString(&Specific.Match.Param, av[i]) &&
662 UdmMatchParamStrToCaseSensitivity(&Specific.Match.Param, av[i]))
663 break;
664 }
665
666 if (ac - i != 2)
667 {
668 udm_snprintf(Conf->errstr, sizeof(Conf->errstr) - 1,
669 "too %s (%d) required paramenters",
670 ac - i > 2 ? "many" : "few", (int) (ac - i));
671 return UDM_ERROR;
672 }
673
674 if (UDM_OK != UdmExcerptQualityParse(Conf, &Specific, av[i]))
675 {
676 UdmExcerptFragmentFree(&Specific);
677 return UDM_ERROR;
678 }
679
680 if (UDM_OK != UdmMatchSetPattern(&Specific.Match, av[i + 1]))
681 return UDM_ERROR;
682
683 if (UDM_OK != UdmMatchComp(&Specific.Match, err, sizeof(err)))
684 {
685 udm_snprintf(Conf->errstr, sizeof(Conf->errstr) - 1, "%s", err);
686 UdmExcerptFragmentFree(&Specific);
687 return UDM_ERROR;
688 }
689
690 udm_snprintf(name, sizeof(name), "ExcerptFragment%04d",
691 (int) C->excerpt_fragments_count++);
692 if (UDM_OK != (rc= UdmVarCreate(ha, &Var, name, NULL, 0)))
693 {
694 UdmExcerptFragmentFree(&Specific);
695 return rc;
696 }
697
698 VarSpecific= (UDM_EXCERPT_FRAGMENT *) UdmVarDataPtr(Var);
699 VarSpecific[0]= Specific;
700 bzero((void *) &Specific, sizeof(Specific));
701
702 if (UDM_OK != (rc= UdmVarListReplaceVar(&Conf->Vars, Var)))
703 {
704 UdmVarFree(Var);
705 return rc;
706 }
707 return UDM_OK;
708 }
709
710
711 static udm_rc_t
add_filter(UDM_CFG * C,size_t ac,const char ** av)712 add_filter(UDM_CFG *C, size_t ac, const char **av)
713 {
714 UDM_ENV *Conf= C->Indexer->Conf;
715 UDM_MATCH_PARAM Param;
716 size_t i;
717 udm_method_t method= UdmMethod(av[0]);
718
719 if (method == UDM_METHOD_UNKNOWN)
720 {
721 udm_snprintf(Conf->errstr, sizeof(Conf->errstr), "Unknown method %s", av[0]);
722 return UDM_ERROR;
723 }
724 if (!(C->flags & UDM_FLAG_ADD_SERV))
725 return UDM_OK;
726
727 UdmMatchParamInit(&Param);
728 Param.match_mode= UDM_MATCH_WILD;
729 UdmMatchParamSetCaseInsensitive(&Param, UDM_TRUE);
730 UdmMatchParamSetOptimization(&Param, !UDM_TEST(C->flags & UDM_FLAG_DONT_ADD_TO_DB));
731
732 C->ordre++;
733 for (i= 1; i < ac ; i++)
734 {
735 if (UdmMatchParamSetParam(&Param, av[i]))
736 {
737 char err[120]= "";
738 UDM_CONST_STR Pattern;
739 UdmConstStrSetStr(&Pattern, av[i]);
740
741 if (UDM_OK != UdmFilterListAddWithServer(NULL, &Conf->Filters,
742 &Param, method, &Pattern,
743 err, sizeof(err), ++C->ordre))
744 {
745 udm_snprintf(Conf->errstr,sizeof(Conf->errstr)-1,"%s",err);
746 return UDM_ERROR;
747 }
748 }
749 }
750 return UDM_OK;
751 }
752
753
754 static udm_rc_t
add_section_filter(UDM_CFG * C,size_t ac,const char ** av)755 add_section_filter(UDM_CFG *C, size_t ac, const char **av)
756 {
757 UDM_ENV *Conf= C->Indexer->Conf;
758 UDM_MATCH_PARAM MatchParam;
759 size_t i;
760 const char *section= NULL;
761 udm_method_t method= UdmMethod(av[0]);
762
763 UdmMatchParamInit(&MatchParam);
764 MatchParam.match_mode= UDM_MATCH_WILD;
765 UdmMatchParamSetCaseInsensitive(&MatchParam, UDM_TRUE);
766 UdmMatchParamSetOptimization(&MatchParam, !UDM_TEST(C->flags & UDM_FLAG_DONT_ADD_TO_DB));
767
768 C->ordre++;
769 for(i=1; i<ac ; i++)
770 {
771 if (!UdmMatchParamSetParam(&MatchParam, av[i]))
772 continue;
773 if (!section)
774 {
775 section= av[i];
776 }
777 else
778 {
779 char err[120]="";
780 UDM_CONST_STR Pattern;
781
782 UdmConstStrSetStr(&Pattern, av[i]);
783 if (UDM_OK != UdmSectionFilterListAdd(&Conf->SectionFilters,
784 &MatchParam, method, &Pattern, section,
785 err, sizeof(err)))
786 {
787 udm_snprintf(Conf->errstr,sizeof(Conf->errstr)-1,"%s",err);
788 return UDM_ERROR;
789 }
790 }
791 }
792
793 if (!section)
794 {
795 udm_snprintf(Conf->errstr, sizeof(Conf->errstr) - 1,
796 "No section given for %s", av[0]);
797 return UDM_ERROR;
798 }
799 return UDM_OK;
800 }
801
802
803 static udm_rc_t
add_type_internal(UDM_CFG * C,size_t ac,const char ** av,UDM_REPLACELIST * Lst)804 add_type_internal(UDM_CFG *C, size_t ac, const char **av, UDM_REPLACELIST *Lst)
805 {
806 UDM_ENV *Conf=C->Indexer->Conf;
807 UDM_MATCH_PARAM MatchParam;
808 UDM_CONST_STR Type;
809 size_t i;
810 udm_rc_t rc= UDM_OK;
811 char err[128];
812
813 UdmConstStrInit(&Type);
814 UdmMatchParamInit(&MatchParam);
815 MatchParam.match_mode= UDM_MATCH_WILD;
816 UdmMatchParamSetCaseInsensitive(&MatchParam, UDM_TRUE);
817 UdmMatchParamSetOptimization(&MatchParam, !UDM_TEST(C->flags & UDM_FLAG_DONT_ADD_TO_DB));
818
819 for (i=1; i<ac; i++)
820 {
821 if (!UdmMatchParamSetParam(&MatchParam, av[i]))
822 continue;
823 if (!Type.str)
824 UdmConstStrSetStr(&Type, av[i]);
825 else
826 {
827 UDM_CONST_STR Pattern;
828 UdmConstStrSetStr(&Pattern, av[i]);
829 if(UDM_OK != (rc = UdmReplaceListAdd(Lst ,&MatchParam, &Pattern, &Type,
830 err,sizeof(err))))
831 {
832 udm_snprintf(Conf->errstr, sizeof(Conf->errstr), "%s", err);
833 return rc;
834 }
835 }
836 }
837 return rc;
838 }
839
840
841 static udm_rc_t
add_type(UDM_CFG * C,size_t ac,const char ** av)842 add_type(UDM_CFG *C, size_t ac, const char **av)
843 {
844 UDM_ENV *Conf=C->Indexer->Conf;
845 return add_type_internal(C, ac, av, &Conf->MimeTypes);
846 }
847
848
849 static udm_rc_t
add_encoding(UDM_CFG * C,size_t ac,const char ** av)850 add_encoding(UDM_CFG *C, size_t ac, const char **av)
851 {
852 UDM_ENV *Conf= C->Indexer->Conf;
853 return add_type_internal(C, ac, av, &Conf->Encodings);
854 }
855
856
857 static udm_rc_t
add_parser(UDM_CFG * C,size_t ac,const char ** av)858 add_parser(UDM_CFG *C, size_t ac, const char **av)
859 {
860 UDM_ENV *Conf=C->Indexer->Conf;
861 UDM_PARSER P;
862 P.from_mime= UdmStrdup(av[1]);
863 P.to_mime= UdmStrdup(av[2]);
864 P.cmd= UdmStrdup(av[3] ? av[3] : "");
865 P.src= av[4] ? UdmStrdup(av[4]) : NULL;
866 UdmParserAdd(&Conf->Parsers,&P);
867 return UDM_OK;
868 }
869
870
871 static udm_rc_t
add_separator(UDM_VARLIST * Vars,const char * name,const char * val)872 add_separator(UDM_VARLIST *Vars, const char *name, const char *val)
873 {
874 UDM_DSTR buf;
875 UdmDSTRInit(&buf, 128);
876 UdmDSTRReset(&buf);
877 UdmDSTRAppendf(&buf, "separator.%s", name);
878 UdmVarListAddStr(Vars, UdmDSTRPtr(&buf), val);
879 UdmDSTRFree(&buf);
880 return UDM_OK;
881 }
882
883
884 static udm_rc_t
add_maxlen(UDM_VARLIST * Vars,const char * name,size_t maxlen)885 add_maxlen(UDM_VARLIST *Vars, const char *name, size_t maxlen)
886 {
887 UDM_DSTR buf;
888 UdmDSTRInit(&buf, 128);
889 UdmDSTRReset(&buf);
890 UdmDSTRAppendf(&buf, "maxlen.%s", name);
891 UdmVarListReplaceInt(Vars, UdmDSTRPtr(&buf), (int) maxlen);
892 UdmDSTRFree(&buf);
893 return UDM_OK;
894 }
895
896
897 static udm_rc_t
add_section(UDM_CFG * C,size_t ac,const char ** av)898 add_section(UDM_CFG *C, size_t ac, const char **av)
899 {
900 UDM_ENV *Conf=C->Indexer->Conf;
901 UDM_SECTION_PARAM Param;
902 int cdon, noindex= 0;
903 UDM_USERSECTIONLIST *SectionMatch= &Conf->SectionMatch;
904 const char *name= av[1];
905 int secno= 0, maxlen= 0;
906
907 UdmSectionParamInit(&Param, 0);
908 if (!strncasecmp(name, UDM_CSTR_WITH_LEN("Raw.")))
909 Param.flags= (udm_var_flag_t) (Param.flags | UDM_VARFLAG_RAW);
910 /*
911 Do not use "url.*" sections in
912 clone detection by default
913 */
914 cdon= strncasecmp(av[1], "url", 3) ? 1 : 0;
915 if ((!(secno= atoi(av[2])) && av[2][0] != '0') || secno < 0 || secno > 255)
916 {
917 sprintf(Conf->errstr,"Section ID is not a valid number: %s",av[2]);
918 return UDM_ERROR;
919 }
920 if ((ac > 3 && !(maxlen= atoi(av[3])) && av[3][0] != '0') || maxlen < 0)
921 {
922 sprintf(Conf->errstr,"Section length is not a valid number: %s",av[3]);
923 return UDM_ERROR;
924 }
925
926 if (ac > 3)
927 {
928 av++;
929 ac--;
930 }
931 av+= 3;
932 ac-= 3;
933
934 for ( ; ac ; ac--, av++)
935 {
936 if (!strcasecmp(av[0], "cdon") || !strcasecmp(av[0], "DetectClones"))
937 cdon= 1;
938 else if (!strcasecmp(av[0], "cdoff") || !strcasecmp(av[0], "NoDetectClones"))
939 cdon= 0;
940 else if (!strcasecmp(av[0], "html"))
941 Param.flags= (udm_var_flag_t) (Param.flags| UDM_VARFLAG_HTMLSOURCE);
942 else if (!strcasecmp(av[0], "decimal"))
943 Param.flags= (udm_var_flag_t) (Param.flags | UDM_VARFLAG_DECIMAL);
944 else if (!strcasecmp(av[0], "wiki"))
945 Param.flags= (udm_var_flag_t) (Param.flags | UDM_VARFLAG_HTMLSOURCE | UDM_VARFLAG_WIKI);
946 else if (!strcasecmp(av[0], "noindex"))
947 noindex= 1;
948 else if (!strcasecmp(av[0], "index"))
949 noindex= 0;
950 else if (!strcasecmp(av[0], "text"))
951 /* do nothing */;
952 else if (!strcasecmp(av[0], "afterheaders"))
953 SectionMatch= &Conf->SectionHdrMatch;
954 else if (!strcasecmp(av[0], "afterguesser"))
955 SectionMatch= &Conf->SectionGsrMatch;
956 else if (!strcasecmp(av[0], "afterparser"))
957 SectionMatch= &Conf->SectionMatch;
958 else
959 break;
960 }
961 if (secno > 0) /* Don't set maxlen for secno=0 */
962 Conf->SectionParam.maxlen[secno]= (size_t) maxlen;
963 Param.secno= (udm_secno_t) secno;
964 Param.flags= (udm_var_flag_t) (Param.flags | (cdon ? 0 : UDM_VARFLAG_NOCLONE));
965 Param.flags= (udm_var_flag_t) (Param.flags | (noindex ? UDM_VARFLAG_NOINDEX : 0));
966
967 if (!secno)
968 add_maxlen(&Conf->Vars, name, maxlen);
969 if (ac == 0)
970 {
971 /* no optional arguments */
972 }
973 else if (ac == 1)
974 {
975 /* <sep> */
976 add_separator(&Conf->Vars, name, av[0]);
977 }
978 else if (ac >= 2 && ac <= 4)
979 {
980 /*
981 <expr> <repl>
982 <sep> <expr> <repl>
983 <sep> <src> <expr> <repl>
984 */
985
986 UDM_MATCH_PARAM MatchParam;
987 char err[120]= "";
988 UDM_CONST_STR Name, Source, Pattern, Replacement;
989
990 UdmMatchParamInit(&MatchParam);
991 UdmConstStrSetStr(&Name, name);
992 UdmConstStrInit(&Source);
993 UdmConstStrInit(&Pattern);
994 UdmConstStrInit(&Replacement);
995 MatchParam.match_mode= UDM_MATCH_REGEX;
996 UdmMatchParamSetCaseInsensitive(&MatchParam, UDM_TRUE);
997 UdmMatchParamSetOptimization(&MatchParam, !UDM_TEST(C->flags & UDM_FLAG_DONT_ADD_TO_DB));
998
999 switch (ac)
1000 {
1001 case 2:
1002 UdmConstStrSetStr(&Pattern, av[0]);
1003 UdmConstStrSetStr(&Replacement, av[1]);
1004 break;
1005
1006 case 3:
1007 add_separator(&Conf->Vars, name, av[0]);
1008 UdmConstStrSetStr(&Pattern, av[1]);
1009 UdmConstStrSetStr(&Replacement, av[2]);
1010 break;
1011
1012 case 4:
1013 add_separator(&Conf->Vars, name, av[0]);
1014 UdmConstStrSetStr(&Source, av[1]);
1015 UdmConstStrSetStr(&Pattern, av[2]);
1016 UdmConstStrSetStr(&Replacement, av[3]);
1017 break;
1018 }
1019
1020 if(UDM_OK != UdmUserSectionListAdd(SectionMatch, &MatchParam,
1021 &Name, &Source, &Pattern, &Replacement,
1022 err, sizeof(err)))
1023 {
1024 udm_snprintf(Conf->errstr,sizeof(Conf->errstr)-1,"%s",err);
1025 return UDM_ERROR;
1026 }
1027 Param.flags= (udm_var_flag_t) (Param.flags | UDM_VARFLAG_USERDEF);
1028 }
1029 else
1030 {
1031 sprintf(Conf->errstr,"too many argiments: '%s'", av[0]);
1032 return UDM_ERROR;
1033 }
1034
1035 UdmVarListReplaceStrnWithParam(&Conf->Sections, &Param, name, NULL, 0);
1036 return UDM_OK;
1037 }
1038
1039
1040 static udm_rc_t
do_include(UDM_CFG * C,size_t ac,const char ** av)1041 do_include(UDM_CFG *C, size_t ac, const char **av)
1042 {
1043 FILE *test;
1044 char save[1024];
1045 if(C->level<5)
1046 {
1047 udm_rc_t rc;
1048 char fname[1024];
1049 rel_cur_name(fname, sizeof(fname)-1, av[1]);
1050 if ((test= fopen(fname, "r")))
1051 fclose(test);
1052 else
1053 rel_etc_name(C->Indexer->Conf, fname, sizeof(fname)-1, av[1]);
1054 strcpy(save, current);
1055 C->level++;
1056 rc= EnvLoad(C,fname);
1057 strcpy(current, save);
1058 C->level--;
1059 return rc;
1060 }
1061 else
1062 {
1063 sprintf(C->Indexer->Conf->errstr,"too big (%d) level in included files",C->level);
1064 return UDM_ERROR;
1065 }
1066 return UDM_OK;
1067 }
1068
1069
1070 static udm_rc_t
add_affix(UDM_CFG * C,size_t ac,const char ** av)1071 add_affix(UDM_CFG *C, size_t ac, const char **av)
1072 {
1073 UDM_ENV *Conf=C->Indexer->Conf;
1074
1075 if(C->flags&UDM_FLAG_SPELL)
1076 {
1077 char fname[1024];
1078 rel_etc_name(Conf, fname, sizeof(fname) - 1, av[3]);
1079 if(UdmAffixListListAdd(&Conf->Affixes,av[1],av[2],fname))
1080 {
1081 sprintf(Conf->errstr,"Can't add affix :%s",fname);
1082 return UDM_ERROR;
1083 }
1084 }
1085 return UDM_OK;
1086 }
1087
1088
1089 static udm_rc_t
add_spell(UDM_CFG * C,size_t ac,const char ** av)1090 add_spell(UDM_CFG *C, size_t ac, const char **av)
1091 {
1092 UDM_ENV *Conf=C->Indexer->Conf;
1093
1094 if(C->flags&UDM_FLAG_SPELL)
1095 {
1096 char fname[1024];
1097 rel_etc_name(Conf, fname, sizeof(fname) - 1, av[3]);
1098 if(UdmSpellListListAdd(&Conf->Spells,av[1],av[2],fname))
1099 {
1100 sprintf(Conf->errstr,"Can't load dictionary :%s",fname);
1101 return UDM_ERROR;
1102 }
1103 }
1104 return UDM_OK;
1105 }
1106
1107
1108 static udm_rc_t
add_stoplist(UDM_CFG * C,size_t ac,const char ** av)1109 add_stoplist(UDM_CFG *C, size_t ac, const char **av)
1110 {
1111 UDM_ENV *Conf=C->Indexer->Conf;
1112 char fname[1024];
1113 rel_stopwords_name(Conf, fname, sizeof(fname) - 1, av[1]);
1114 return UdmStopListLoad(Conf,fname);
1115 }
1116
1117
1118 static udm_rc_t
add_langmap(UDM_CFG * C,size_t ac,const char ** av)1119 add_langmap(UDM_CFG *C, size_t ac, const char **av)
1120 {
1121 UDM_ENV *Conf=C->Indexer->Conf;
1122 udm_rc_t rc= UDM_OK;
1123 if (C->flags&UDM_FLAG_LOAD_LANGMAP)
1124 {
1125 char fname[1024];
1126 rel_langmap_name(Conf, fname, sizeof(fname) - 1, av[1]);
1127 rc= UdmLoadLangMapFile(&Conf->LangMaps, fname);
1128 }
1129 return rc;
1130 }
1131
1132
1133 static udm_rc_t
add_synonym(UDM_CFG * C,size_t ac,const char ** av)1134 add_synonym(UDM_CFG *C, size_t ac, const char **av)
1135 {
1136 UDM_ENV *Conf=C->Indexer->Conf;
1137 udm_rc_t rc= UDM_OK;
1138 if(C->flags&UDM_FLAG_SPELL)
1139 {
1140 char fname[1024];
1141 rel_synonym_name(Conf, fname, sizeof(fname) - 1, av[1]);
1142 rc= UdmSynonymListLoad(Conf,fname);
1143 }
1144 return rc;
1145 }
1146
1147
1148 static udm_rc_t
add_chinese(UDM_CFG * C,size_t ac,const char ** av)1149 add_chinese(UDM_CFG *C, size_t ac, const char **av)
1150 {
1151 UDM_ENV *Conf=C->Indexer->Conf;
1152
1153 /*
1154 This line was wrong: ChinesList was not really loaded
1155 from search.cgi
1156 */
1157 /* if(C->flags & UDM_FLAG_ADD_SERV)*/
1158
1159 {
1160 char fname[1024];
1161 rel_freq_name(Conf, fname, sizeof(fname)-1, av[2] ? av[2] : "mandarin.freq");
1162 return UdmChineseListLoad(C->Indexer, &Conf->Chi,
1163 av[1] ? av[1] : "GB2312", fname);
1164 }
1165 return UDM_OK;
1166 }
1167
1168
1169 static udm_rc_t
add_thai(UDM_CFG * C,size_t ac,const char ** av)1170 add_thai(UDM_CFG *C, size_t ac, const char **av)
1171 {
1172 UDM_ENV *Conf= C->Indexer->Conf;
1173 if (C->flags & UDM_FLAG_ADD_SERV)
1174 {
1175 char fname[1024];
1176 rel_freq_name(Conf, fname, sizeof(fname)-1, av[2] ? av[2] : "thai.freq");
1177 return UdmChineseListLoad(C->Indexer, &Conf->Thai,
1178 av[1] ? av[1] : "tis-620", fname);
1179 }
1180 return UDM_OK;
1181 }
1182
1183
1184 static udm_rc_t
add_url(UDM_CFG * C,size_t ac,const char ** av)1185 add_url(UDM_CFG *C, size_t ac, const char **av)
1186 {
1187 UDM_AGENT *A= C->Indexer;
1188
1189 if (C->flags&UDM_FLAG_ADD_SERV)
1190 {
1191 char *al = NULL;
1192 UDM_SERVER *Srv;
1193 if ((Srv= UdmServerFind(A, &A->Conf->Servers, av[1], &al)))
1194 {
1195 UDM_HREFPARAM HrefParam;
1196 UdmHrefParamInit(&HrefParam);
1197 HrefParam.link_source= UDM_LINK_SOURCE_CONF;
1198 UdmHrefListAddConst(&A->Conf->Hrefs, &HrefParam, av[1]);
1199 }
1200 UDM_FREE(al);
1201 }
1202 return UDM_OK;
1203 }
1204
1205
1206 static udm_rc_t
add_srv_table(UDM_CFG * C,size_t ac,const char ** av)1207 add_srv_table(UDM_CFG *C, size_t ac, const char **av)
1208 {
1209 UDM_ENV *Conf=C->Indexer->Conf;
1210 udm_rc_t rc= UDM_OK;
1211 UDM_DBLIST dbl;
1212
1213 /*
1214 Skip ServerTable when loading for search, not for indexing.
1215 Useful when the ServerTable options are written in a shared
1216 include.conf file together with DBAddr options, and this file
1217 is included from both indexer.conf and search.htm
1218 */
1219 if (!(C->flags & UDM_FLAG_ADD_SERV))
1220 return UDM_OK;
1221
1222 UdmDBListInit(&dbl);
1223 if (UDM_OK != (rc= UdmDBListAdd(&dbl, av[1], Conf->errstr, sizeof(Conf->errstr))))
1224 goto ex;
1225 UDM_ASSERT(dbl.nitems == 1);
1226
1227 if (UDM_OK != (rc= dbl.Item[0].dbhandler->ServerAction(C->Indexer, &dbl.Item[0], &Conf->Servers, UDM_SRV_ACTION_TABLE)))
1228 UdmEnvCopyErrMsgFromDB(Conf, &dbl.Item[0]);
1229 ex:
1230 UdmDBListFree(&dbl);
1231 return rc;
1232 }
1233
1234
1235 static udm_rc_t
add_limit(UDM_CFG * C,size_t ac,const char ** av)1236 add_limit(UDM_CFG *C, size_t ac, const char **av)
1237 {
1238 UDM_ENV *Conf=C->Indexer->Conf;
1239 char * sc;
1240 char * nm;
1241
1242 if (ac == 2)
1243 {
1244 if((sc = strchr(av[1],':')))
1245 {
1246 *sc++='\0';
1247 nm=(char*)UdmMalloc(strlen(av[1])+8);
1248 sprintf(nm,"Limit-%s",av[1]);
1249 UdmVarListReplaceStr(&Conf->Vars, nm, sc);
1250 UDM_FREE(nm);
1251 }
1252 }
1253 else if (ac == 3)
1254 {
1255 char name[128];
1256 udm_snprintf(name, sizeof(name), "Limit.%s", av[1]);
1257 UdmVarListReplaceStr(&Conf->Vars, name, av[2]);
1258 }
1259 return UDM_OK;
1260 }
1261
1262
1263 static udm_rc_t
add_user_score(UDM_CFG * C,size_t ac,const char ** av)1264 add_user_score(UDM_CFG *C, size_t ac, const char **av)
1265 {
1266 UDM_ENV *Conf=C->Indexer->Conf;
1267 char name[128];
1268 UDM_ASSERT(ac == 3);
1269 udm_snprintf(name, sizeof(name), "Score.%s", av[1]);
1270 UdmVarListReplaceStr(&Conf->Vars, name, av[2]);
1271 return UDM_OK;
1272 }
1273
1274
1275 static udm_rc_t
add_user_site_score(UDM_CFG * C,size_t ac,const char ** av)1276 add_user_site_score(UDM_CFG *C, size_t ac, const char **av)
1277 {
1278 UDM_ENV *Conf=C->Indexer->Conf;
1279 char name[128];
1280 udm_snprintf(name, sizeof(name), "SiteScore.%s", av[1]);
1281 UdmVarListReplaceStr(&Conf->Vars, name, av[2]);
1282 return UDM_OK;
1283 }
1284
1285
1286 static udm_rc_t
add_user_order(UDM_CFG * C,size_t ac,const char ** av)1287 add_user_order(UDM_CFG *C, size_t ac, const char **av)
1288 {
1289 UDM_ENV *Conf= C->Indexer->Conf;
1290 char name[128];
1291 udm_snprintf(name, sizeof(name), "Order.%s", av[1]);
1292 UdmVarListReplaceStr(&Conf->Vars, name, av[2]);
1293 return UDM_OK;
1294 }
1295
1296
1297 static udm_rc_t
flush_srv_table(UDM_CFG * C,size_t ac,const char ** av)1298 flush_srv_table(UDM_CFG *C, size_t ac, const char **av)
1299 {
1300 UDM_ENV *Conf=C->Indexer->Conf;
1301 udm_rc_t rc= UDM_OK;
1302 if(C->flags&UDM_FLAG_ADD_SERV)
1303 {
1304 UDM_AGENT A;
1305 A.Conf= Conf;
1306 rc= UdmSrvAction(&A, &Conf->Servers, UDM_SRV_ACTION_FLUSH);
1307 }
1308 return rc;
1309 }
1310
1311
1312 static udm_rc_t
dblist_free(UDM_CFG * C,size_t ac,const char ** av)1313 dblist_free(UDM_CFG *C, size_t ac, const char **av)
1314 {
1315 UDM_ENV *Conf=C->Indexer->Conf;
1316 UdmDBListFree(&Conf->DBList);
1317 return UDM_OK;
1318 }
1319
1320
1321 static udm_rc_t
env_rpl_casefolding(UDM_CFG * C,size_t ac,const char ** av)1322 env_rpl_casefolding(UDM_CFG *C, size_t ac, const char **av)
1323 {
1324 UDM_ENV *Conf = C->Indexer->Conf;
1325 UDM_UNIDATA *unidata;
1326 if (!(unidata= UdmUnidataGetByName(av[1])))
1327 {
1328 sprintf(Conf->errstr,"CaseFolding '%s' is not supported", av[1]);
1329 return UDM_ERROR;
1330 }
1331 Conf->unidata= unidata;
1332 return UDM_OK;
1333 }
1334
1335
1336 static udm_rc_t
env_rpl_charset(UDM_CFG * C,size_t ac,const char ** av)1337 env_rpl_charset(UDM_CFG *C, size_t ac, const char **av)
1338 {
1339 UDM_ENV *Conf= C->Indexer->Conf;
1340 UDM_CHARSET *cs;
1341 if (!(cs= UdmGetCharSet(av[1])))
1342 {
1343 sprintf(Conf->errstr,"charset '%s' is not supported",av[1]);
1344 return UDM_ERROR;
1345 }
1346 if (!strcasecmp(av[0],"LocalCharset"))
1347 {
1348 if (!cs->cset->septoken)
1349 {
1350 sprintf(Conf->errstr,
1351 "charset '%s' is not supported as LocalCharset", av[1]);
1352 return UDM_ERROR;
1353 }
1354 Conf->lcs= cs;
1355 UdmVarListReplaceStr(&Conf->Vars,av[0],av[1]);
1356 }
1357 else if(!strcasecmp(av[0],"BrowserCharset")){
1358 Conf->bcs=cs;
1359 UdmVarListReplaceStr(&Conf->Vars,av[0],av[1]);
1360 }
1361 return UDM_OK;
1362 }
1363
1364
1365 static udm_rc_t
srv_rpl_charset(UDM_CFG * C,size_t ac,const char ** av)1366 srv_rpl_charset(UDM_CFG *C, size_t ac, const char **av)
1367 {
1368 UDM_ENV *Conf = C->Indexer->Conf;
1369 UDM_CHARSET *cs;
1370 if (!(cs= UdmGetCharSet(av[1])))
1371 {
1372 sprintf(Conf->errstr,"charset '%s' is not supported",av[1]);
1373 return UDM_ERROR;
1374 }
1375 UdmVarListReplaceStr(&C->Srv->Vars,av[0],av[1]);
1376 return UDM_OK;
1377 }
1378
1379
1380 static udm_rc_t
srv_rpl_mirror(UDM_CFG * C,size_t ac,const char ** av)1381 srv_rpl_mirror(UDM_CFG *C, size_t ac, const char **av)
1382 {
1383 if (!strcasecmp(av[0],"MirrorRoot") || !strcasecmp(av[0],"MirrorHeadersRoot"))
1384 {
1385 char fname[1024];
1386 rel_var_name(C->Indexer->Conf, fname, sizeof(fname)-1, av[1]);
1387 UdmVarListReplaceStr(&C->Srv->Vars,av[0],fname);
1388 }
1389 else if(!strcasecmp(av[0],"MirrorPeriod"))
1390 {
1391 int tm= Udm_dp2time_t(av[1]);
1392 UdmVarListReplaceInt(&C->Srv->Vars,"MirrorPeriod",tm);
1393 }
1394 return UDM_OK;
1395 }
1396
1397
1398 static udm_rc_t
srv_rpl_auth(UDM_CFG * C,size_t ac,const char ** av)1399 srv_rpl_auth(UDM_CFG *C, size_t ac, const char **av)
1400 {
1401 char name[128];
1402 udm_snprintf(name, sizeof(name) - 1, "%s", av[0]);
1403 name[sizeof(name)-1]= '\0';
1404 if (av[1])
1405 {
1406 size_t len= strlen(av[1]);
1407 char *auth=(char*)UdmMalloc(BASE64_LEN(strlen(av[1])));
1408 udm_base64_encode(av[1],auth,len);
1409 UdmVarListReplaceStr(&C->Srv->Vars,name,auth);
1410 UDM_FREE(auth);
1411 }
1412 else
1413 {
1414 UdmVarListReplaceStr(&C->Srv->Vars,name,"");
1415 }
1416 return UDM_OK;
1417 }
1418
1419
1420 char *
UdmParseEnvVar(UDM_ENV * Conf,const char * str)1421 UdmParseEnvVar(UDM_ENV *Conf, const char *str)
1422 {
1423 const char *p1= str, *p2= str;
1424 UDM_DSTR rc;
1425 UDM_STR tmp;
1426 UdmDSTRInit(&rc, 256);
1427 while ((p1= strstr(p1, "$(")))
1428 {
1429 const char *p3;
1430 UdmDSTRAppend(&rc, p2, p1 - p2);
1431 if ((p3= strchr(p1 + 2, ')')))
1432 {
1433 const char *s;
1434 char varname[128];
1435 udm_snprintf(varname, sizeof(varname), "%.*s", (int) (p3 - p1 - 2), p1 + 2);
1436 if ((s= UdmVarListFindStr(&Conf->Vars, varname, NULL)))
1437 UdmDSTRAppendSTR(&rc, s);
1438 p1= p2= p3 + 1;
1439 }
1440 else
1441 {
1442 UdmDSTRFree(&rc);
1443 return(NULL);
1444 }
1445 }
1446 UdmDSTRAppendSTR(&rc, p2);
1447 UdmDSTRGiveValue(&rc, &tmp);
1448 return tmp.str;
1449 }
1450
1451
1452 static udm_rc_t
env_rpl_env_var(UDM_CFG * C,size_t ac,const char ** av)1453 env_rpl_env_var(UDM_CFG *C, size_t ac, const char **av)
1454 {
1455 UDM_ENV *Conf= C->Indexer->Conf;
1456 char *p= getenv(av[1]);
1457 if (!p)
1458 {
1459 sprintf(Conf->errstr, "ImportEnv '%s': no such variable.", av[1]);
1460 return UDM_ERROR;
1461 }
1462 UdmVarListReplaceStr(&Conf->Vars, av[1], p);
1463 return UDM_OK;
1464 }
1465
1466
1467 static udm_rc_t
env_rpl_encoding(UDM_CFG * C,size_t ac,const char ** av)1468 env_rpl_encoding(UDM_CFG *C, size_t ac, const char **av)
1469 {
1470 UDM_ENV *Conf=C->Indexer->Conf;
1471 udm_content_encoding_t ce;
1472 UDM_ASSERT(ac == 2);
1473
1474 UdmVarListReplaceStr(&Conf->Vars, av[0], av[1]);
1475 ce= UdmContentEncodingID(av[1]);
1476 switch (ce)
1477 {
1478 case UDM_CONTENT_ENCODING_IDENTITY:
1479 case UDM_CONTENT_ENCODING_DEFLATE:
1480 return UDM_OK;
1481 default:
1482 udm_snprintf(Conf->errstr, sizeof(Conf->errstr),
1483 "Bad value for '%s': '%s'", av[0], av[1]);
1484 return UDM_ERROR;
1485 }
1486 return UDM_OK;
1487 }
1488
1489
1490 static udm_rc_t
env_dbaddr(UDM_CFG * C,size_t ac,const char ** av)1491 env_dbaddr(UDM_CFG *C, size_t ac, const char **av)
1492 {
1493 UDM_ENV *Conf=C->Indexer->Conf;
1494 udm_rc_t rc;
1495 size_t i;
1496 for (i= 1; i < ac; i++)
1497 {
1498 if (UDM_OK != (rc= UdmEnvDBListAdd(Conf, av[i] ? av[i] : "")))
1499 return rc;
1500 }
1501 return UDM_OK;
1502 }
1503
1504
1505 static udm_rc_t
env_rpl_var(UDM_CFG * C,size_t ac,const char ** av)1506 env_rpl_var(UDM_CFG *C, size_t ac, const char **av)
1507 {
1508 UDM_ENV *Conf=C->Indexer->Conf;
1509 if (!strcasecmp(av[0], "Segmenter"))
1510 {
1511 int seg= 0;
1512 #ifdef CHASEN
1513 if (!strcasecmp(av[1], "Chasen"))
1514 seg= 1;
1515 #endif
1516 #ifdef MECAB
1517 if (!strcasecmp(av[1], "Mecab"))
1518 seg= 1;
1519 #endif
1520 if (!strcasecmp(av[1], "Freq"))
1521 seg= 1;
1522 if (!strcasecmp(av[1], "CJK"))
1523 seg= 1;
1524 if (!seg)
1525 {
1526 sprintf(Conf->errstr, "Unsupported segmenter method: '%s'", av[1]);
1527 return UDM_ERROR;
1528 }
1529 }
1530 if (!strcasecmp(av[0], "Log2Stderr"))
1531 Conf->Log.logFD= udm_strntobool(av[1], strlen(av[1])) ? stderr : NULL;
1532 UdmVarListReplaceStr(&Conf->Vars,av[0],av[1]);
1533 return UDM_OK;
1534 }
1535
1536
1537 static udm_rc_t
env_rpl_named_var(UDM_CFG * C,size_t ac,const char ** av)1538 env_rpl_named_var(UDM_CFG *C, size_t ac, const char **av)
1539 {
1540 UDM_ENV *Conf= C->Indexer->Conf;
1541 UDM_SECTION_PARAM Param;
1542 UdmSectionParamInit(&Param, 0);
1543 if (!strncasecmp(av[1], UDM_CSTR_WITH_LEN("Raw.")))
1544 Param.flags= (udm_var_flag_t) (Param.flags | UDM_VARFLAG_RAW);
1545 UdmVarListReplaceStrnWithParam(&Conf->Vars, &Param, av[1], av[2], strlen(av[2]));
1546 return UDM_OK;
1547 }
1548
1549
1550 static udm_rc_t
rpl_xml_hook(UDM_CFG * C,size_t ac,const char ** av)1551 rpl_xml_hook(UDM_CFG *C, size_t ac, const char **av)
1552 {
1553 UDM_ENV *Conf= C->Indexer->Conf;
1554 UDM_VARLIST *Vars= !strcasecmp(av[0], "XMLEnterHook") ?
1555 &Conf->XMLEnterHooks :
1556 !strcasecmp(av[0], "XMLDataHook") ?
1557 &Conf->XMLDataHooks : &Conf->XMLLeaveHooks;
1558 UdmVarListReplaceStr(Vars,av[1],av[2]);
1559 return UDM_OK;
1560 }
1561
1562
1563 static udm_rc_t
srv_rpl_var(UDM_CFG * C,size_t ac,const char ** av)1564 srv_rpl_var(UDM_CFG *C, size_t ac, const char **av)
1565 {
1566 UdmVarListReplaceStr(&C->Srv->Vars,av[0],av[1]);
1567 return UDM_OK;
1568 }
1569
1570
1571 static udm_rc_t
srv_add_proxy(UDM_CFG * C,const char * cmd,const char * arg)1572 srv_add_proxy(UDM_CFG *C, const char *cmd, const char *arg)
1573 {
1574 UDM_SERVER *Server= C->Srv;
1575 udm_rc_t rc= UDM_OK;
1576 UDM_URL url;
1577 size_t auth_length;
1578
1579 if (!strcasecmp(arg, "none") ||
1580 !strcasecmp(arg, ""))
1581 {
1582 UdmURLListFree(&Server->ProxyList);
1583 return UDM_OK;
1584 }
1585
1586 UdmURLInit(&url);
1587 if (UDM_OK != UdmURLParse(&url, arg) ||
1588 !url.schema || strcasecmp(url.schema, "http") ||
1589 url.filename || (url.path && strcmp(url.path, "/")) ||
1590 (url.auth && (auth_length= strlen(url.auth)) > 64))
1591 {
1592 UDM_ENV *Conf= C->Indexer->Conf;
1593 udm_snprintf(Conf->errstr, sizeof(Conf->errstr),
1594 "Bad '%s' value: '%s'", cmd, arg);
1595 rc= UDM_ERROR;
1596 }
1597 if (rc == UDM_OK && url.auth)
1598 {
1599 /* Wrap the authorization part into Base64 */
1600 char auth[128];
1601 udm_base64_encode(url.auth, auth, auth_length);
1602 UdmFree(url.auth);
1603 url.auth= UdmStrdup(auth);
1604 }
1605 UdmURLListAdd(&Server->ProxyList, &url);
1606 UdmURLFree(&url);
1607 return rc;
1608 }
1609
1610
1611 static udm_rc_t
srv_proxy_var(UDM_CFG * C,size_t ac,const char ** av)1612 srv_proxy_var(UDM_CFG *C, size_t ac, const char **av)
1613 {
1614 size_t i;
1615 UdmURLListFree(&C->Srv->ProxyList);
1616 for (i= 1; i < ac; i++)
1617 {
1618 if (UDM_OK != srv_add_proxy(C, av[0], av[i]))
1619 return UDM_ERROR;
1620 }
1621 return UDM_OK;
1622 }
1623
1624
1625 static int
collect_links_destination(const char * str)1626 collect_links_destination(const char *str)
1627 {
1628 if (!strcasecmp(str, "yes"))
1629 return UDM_COLLECT_LINKS_YES;
1630 if (!strcasecmp(str, "all"))
1631 return UDM_COLLECT_LINKS_ALL_DST;
1632 if (!strcasecmp(str, "inner"))
1633 return UDM_COLLECT_LINKS_INNER;
1634 if (!strcasecmp(str, "outer"))
1635 return UDM_COLLECT_LINKS_OUTER;
1636 if (!strcasecmp(str, "site"))
1637 return UDM_COLLECT_LINKS_SITE;
1638 if (!strcasecmp(str, "page"))
1639 return UDM_COLLECT_LINKS_PAGE;
1640 if (!strcasecmp(str, "badscheme"))
1641 return UDM_COLLECT_LINKS_BADSCHEME;
1642 if (!strcasecmp(str, "bad"))
1643 return UDM_COLLECT_LINKS_BAD;
1644 if (!strcasecmp(str, "hops"))
1645 return UDM_COLLECT_LINKS_HOPS;
1646 if (!strcasecmp(str, "filter"))
1647 return UDM_COLLECT_LINKS_FILTER;
1648 if (!strcasecmp(str, "persite"))
1649 return UDM_COLLECT_LINKS_PERSITE;
1650 if (!strcasecmp(str, "no"))
1651 return UDM_COLLECT_LINKS_NONE;
1652 return -1;
1653 }
1654
1655
1656 static int
collect_links_format(const char * str)1657 collect_links_format(const char *str)
1658 {
1659 if (!strcasecmp(str, "asis"))
1660 return UDM_COLLECT_LINKS_ASIS;
1661 if (!strcasecmp(str, "absolute"))
1662 return UDM_COLLECT_LINKS_ABSOLUTE;
1663 return -1;
1664 }
1665
1666
1667 static udm_rc_t
srv_rpl_collect_links(UDM_CFG * C,size_t ac,const char ** av)1668 srv_rpl_collect_links(UDM_CFG *C, size_t ac, const char **av)
1669 {
1670 size_t i;
1671 int destination= UDM_COLLECT_LINKS_NONE;
1672 int format= UDM_COLLECT_LINKS_ASIS;
1673 for (i= 1; i < ac; i++)
1674 {
1675 int flag;
1676 if ((flag= collect_links_destination(av[i])) >= 0)
1677 destination|= flag;
1678 else if ((flag= collect_links_format(av[i])) >= 0)
1679 format= flag;
1680 else
1681 {
1682 UDM_ENV *Conf= C->Indexer->Conf;
1683 udm_snprintf(Conf->errstr, sizeof(Conf->errstr),
1684 "%s: unknown flag: '%s'", av[0], av[i]);
1685 return UDM_ERROR;
1686 }
1687 }
1688 UdmVarListReplaceInt(&C->Srv->Vars,av[0], destination | format);
1689 return UDM_OK;
1690 }
1691
1692
1693 static udm_rc_t
srv_rpl_follow_links(UDM_CFG * C,size_t ac,const char ** av)1694 srv_rpl_follow_links(UDM_CFG *C, size_t ac, const char **av)
1695 {
1696 size_t i;
1697 int follow= 0;
1698 for (i= 1; i < ac; i++)
1699 {
1700 udm_link_source_t source;
1701 if (!strcasecmp(av[i], "yes"))
1702 follow= UDM_LINK_SOURCES_YES;
1703 else if (!strcasecmp(av[i], "no"))
1704 follow= 0;
1705 else if (av[i][0] == '-' &&
1706 (source= UdmLinkSourceByName(av[i] + 1)) != UDM_LINK_SOURCE_UNKNOWN)
1707 {
1708 follow&= ~(1 << source);
1709 }
1710 else if ((source= UdmLinkSourceByName(av[i])) != UDM_LINK_SOURCE_UNKNOWN)
1711 {
1712 follow|= (1 << source);
1713 }
1714 else
1715 {
1716 UDM_ENV *Conf= C->Indexer->Conf;
1717 udm_snprintf(Conf->errstr, sizeof(Conf->errstr),
1718 "%s: unknown link source: '%s'", av[0], av[i]);
1719 return UDM_ERROR;
1720 }
1721 }
1722 UdmVarListReplaceInt(&C->Srv->Vars, av[0], follow);
1723 return UDM_OK;
1724 }
1725
1726
1727 static int
robots_flag(const char * str)1728 robots_flag(const char *str)
1729 {
1730 if (!strcasecmp(str, "yes"))
1731 return UDM_ROBOTS_ALL;
1732 if (!strcasecmp(str, "xrobotstag"))
1733 return UDM_ROBOTS_HEADER;
1734 if (!strcasecmp(str, "robotstxt"))
1735 return UDM_ROBOTS_TXT;
1736 if (!strcasecmp(str, "meta"))
1737 return UDM_ROBOTS_META;
1738 if (!strcasecmp(str, "rel"))
1739 return UDM_ROBOTS_REL;
1740 if (!strcasecmp(str, "no"))
1741 return UDM_ROBOTS_NONE;
1742 return -1;
1743 }
1744
1745
1746 static udm_rc_t
srv_rpl_robots(UDM_CFG * C,size_t ac,const char ** av)1747 srv_rpl_robots(UDM_CFG *C, size_t ac, const char **av)
1748 {
1749 size_t i;
1750 int flags= UDM_ROBOTS_NONE;
1751 for (i= 1; i < ac; i++)
1752 {
1753 int flag;
1754 if ((flag= robots_flag(av[i])) >= 0)
1755 flags|= flag;
1756 else
1757 {
1758 UDM_ENV *Conf= C->Indexer->Conf;
1759 udm_snprintf(Conf->errstr, sizeof(Conf->errstr),
1760 "%s: unknown flag: '%s'", av[0], av[i]);
1761 return UDM_ERROR;
1762 }
1763 }
1764 UdmVarListReplaceInt(&C->Srv->Vars, av[0], flags);
1765 return UDM_OK;
1766 }
1767
1768
1769 static udm_rc_t
srv_rpl_hdr(UDM_CFG * C,size_t ac,const char ** av)1770 srv_rpl_hdr(UDM_CFG *C, size_t ac, const char **av)
1771 {
1772 const char *semicolon= NULL;
1773 char name[128];
1774
1775 switch(ac)
1776 {
1777 case 3:
1778 udm_snprintf(name, sizeof(name), "Request.%s", av[1]);
1779 return UdmVarListReplaceStr(&C->Srv->Vars, name, av[2]);
1780
1781 case 2:
1782 if ((semicolon= strchr(av[1],':')))
1783 {
1784 UDM_CONST_STR val;
1785 size_t namelength= semicolon - av[1];
1786 udm_snprintf(name, sizeof(name), "Request.%.*s", (int) namelength, av[1]);
1787 UdmConstStrSetStr(&val, semicolon + 1);
1788 UdmConstStrTrim(&val," \t");
1789 return UdmVarListReplaceStrn(&C->Srv->Vars, name, val.str, val.length);
1790 }
1791 udm_snprintf(C->Indexer->Conf->errstr, sizeof(C->Indexer->Conf->errstr),
1792 "No semicolon found in %s\n", av[0]);
1793 return UDM_ERROR;
1794 break;
1795 }
1796 UDM_ASSERT(0);
1797 return UDM_ERROR;
1798 }
1799
1800
1801 static udm_rc_t
env_rpl_bool_var(UDM_CFG * C,size_t ac,const char ** av)1802 env_rpl_bool_var(UDM_CFG *C, size_t ac, const char **av)
1803 {
1804 UDM_ENV *Conf= C->Indexer->Conf;
1805 int res= !strcasecmp(av[1],"yes") || atoi(av[1]) == 1;
1806 if(!strcasecmp(av[0], "CVSIgnore")) Conf->CVS_ignore= res;
1807 UdmVarListReplaceInt(&Conf->Vars,av[0],res);
1808 return UDM_OK;
1809 }
1810
1811
1812 static udm_rc_t
srv_rpl_bool_var(UDM_CFG * C,size_t ac,const char ** av)1813 srv_rpl_bool_var(UDM_CFG *C, size_t ac, const char **av)
1814 {
1815 int res= !strcasecmp(av[1],"yes") || atoi(av[1]) == 1;
1816 UdmVarListReplaceInt(&C->Srv->Vars,av[0],res);
1817 return UDM_OK;
1818 }
1819
1820
1821 static udm_rc_t
env_rpl_size(UDM_CFG * C,size_t ac,const char ** av)1822 env_rpl_size(UDM_CFG *C, size_t ac, const char **av)
1823 {
1824 UDM_ENV *Conf= C->Indexer->Conf;
1825 int error;
1826 char *endptr;
1827 unsigned long long res= UdmStrToSize(av[1], &endptr, &error);
1828 if (error || endptr[0])
1829 {
1830 udm_snprintf(Conf->errstr, sizeof(Conf->errstr),
1831 "Bad size value: %s", av[1]);
1832 return UDM_ERROR;
1833 }
1834 UdmVarListReplaceULongLong(&Conf->Vars, av[0], res);
1835 return UDM_OK;
1836 }
1837
1838
1839 static udm_rc_t
env_rpl_num_var(UDM_CFG * C,size_t ac,const char ** av)1840 env_rpl_num_var(UDM_CFG *C, size_t ac, const char **av)
1841 {
1842 UDM_ENV *Conf=C->Indexer->Conf;
1843 int res=atoi(av[1]);
1844 if(!strcasecmp(av[0], "DocSizeWeight"))
1845 {
1846 UdmVarListReplaceInt(&Conf->Vars, "MaxCoordFactor" ,res);
1847 return UDM_OK;
1848 }
1849 if(!strcasecmp(av[0],"MinWordLength"))Conf->WordParam.min_word_len=res;
1850 if(!strcasecmp(av[0],"MaxWordLength"))Conf->WordParam.max_word_len=res;
1851 if (!strcasecmp(av[0],"IndexerThreads") &&
1852 (res < 1 || res > UDM_INDEXER_THREADS_MAX))
1853 {
1854 udm_snprintf(Conf->errstr, sizeof(Conf->errstr),
1855 "%s: bad value: %d. Valid values: 1..%d.",
1856 av[0], res, UDM_INDEXER_THREADS_MAX);
1857 return UDM_ERROR;
1858 }
1859 if (!strcasecmp(av[0], "LogLevel"))
1860 Conf->Log.level= atoi(av[1]);
1861 UdmVarListReplaceInt(&Conf->Vars,av[0],res);
1862 return UDM_OK;
1863 }
1864
1865
1866 static udm_rc_t
srv_rpl_num_var(UDM_CFG * C,size_t ac,const char ** av)1867 srv_rpl_num_var(UDM_CFG *C, size_t ac, const char **av)
1868 {
1869 int res=atoi(av[1]);
1870 UdmVarListReplaceInt(&C->Srv->Vars,av[0],res);
1871 if (strcasecmp(av[0], "MaxHops") == 0) C->Srv->MaxHops = (uint4) res;
1872 if (strcasecmp(av[0], "ServerWeight") == 0) C->Srv->weight = atof(av[1]);
1873 return UDM_OK;
1874 }
1875
1876
1877 static udm_rc_t
srv_rpl_time_var(UDM_CFG * C,size_t ac,const char ** av)1878 srv_rpl_time_var(UDM_CFG *C, size_t ac, const char **av)
1879 {
1880 UDM_ENV *Conf=C->Indexer->Conf;
1881 int res= Udm_dp2time_t(av[1]);
1882 udm_rc_t rc= UDM_OK;
1883 if (res == -1)
1884 {
1885 sprintf(Conf->errstr,"bad time interval: %s",av[1]);
1886 rc= UDM_ERROR;
1887 }
1888 UdmVarListReplaceInt(&C->Srv->Vars,av[0],res);
1889 return rc;
1890 }
1891
1892
1893 typedef struct conf_cmd_st
1894 {
1895 const char *name;
1896 size_t argmin;
1897 size_t argmax;
1898 udm_rc_t (*action)(UDM_CFG *a, size_t n, const char **av);
1899 } UDM_CONFCMD;
1900
1901
1902 static UDM_CONFCMD commands[] =
1903 {
1904 {"Include", 1,1, do_include}, /* Documented */
1905
1906 {"ImportEnv", 1,1, env_rpl_env_var}, /* Documented */
1907 {"DBAddr", 1,100, env_dbaddr}, /* Documented */
1908 {"DefaultContentType", 1,1, env_rpl_var}, /* Documented */
1909 {"ResultContentType", 1,1, env_rpl_var}, /* Documented */
1910 {"Listen", 1,1, env_rpl_var}, /* TODO */
1911 {"UseRemoteContentType", 1,1, env_rpl_bool_var},/* Documented */
1912 {"UsePopularity", 1,1, env_rpl_bool_var},/* Documented */
1913 {"UseCRC32URLId", 1,1, env_rpl_var}, /* Documented */
1914 {"NewsExtensions", 1,1, env_rpl_var}, /* Documented */
1915 {"SyslogFacility", 1,1, env_rpl_var}, /* Documented */
1916 {"AliasProg", 1,1, env_rpl_var}, /* Documented */
1917 {"ForceIISCharset1251", 1,1, env_rpl_var}, /* Documented */
1918 {"GroupBySite", 1,1, env_rpl_bool_var},/* Documented */
1919 {"wf", 1,1, env_rpl_var}, /* Documented */
1920 {"StrictModeThreshold", 1,1, env_rpl_var}, /* Documented */
1921 /*{"PopRankSkipSameSite", 1,1, env_rpl_var},*/ /* TODO34: restore */
1922 {"VarDir", 1,1, env_rpl_var}, /* Documented */
1923 {"DocMemCacheSize", 1,1, env_rpl_var}, /* Documented */
1924 {"IspellUsePrefixes", 1,1, env_rpl_var}, /* Documented */
1925 {"URLSelectCacheSize", 1,1, env_rpl_var}, /* Documented */
1926 {"URLSelectSkipLock", 1,1, env_rpl_bool_var},/* Documented */
1927 {"MaxDocSize", 1,1, env_rpl_var}, /* Documented */
1928 {"ParserTimeOut", 1,1, env_rpl_var}, /* Documented */
1929 {"NumSections", 1,1, env_rpl_var}, /* Documented */
1930 {"DateFormat", 1,1, env_rpl_var}, /* Documented */
1931 {"GuesserUseMeta", 1,1, env_rpl_var}, /* Documented */
1932 {"ResultsLimit", 1,1, env_rpl_var}, /* Documented */
1933 {"Segmenter", 1,1, env_rpl_var}, /* TODO */
1934 {"Log2stderr", 1,1, env_rpl_var}, /* Documented */
1935 {"LogFlags", 1,1, env_rpl_num_var}, /* TODO */
1936 {"SQLClearDBHook", 1,1, env_rpl_var}, /* TODO */
1937 {"UserCacheQuery", 1,1, env_rpl_var}, /* Documented */
1938 #ifdef HAVE_SETVBUF
1939 {"StdoutBufferSize", 1,1, env_rpl_var}, /* TODO */
1940 #endif
1941 {"AlwaysFoundWord", 1,1, env_rpl_var}, /* Documented */
1942 {"CustomLog", 1,1, env_rpl_var}, /* Documented */
1943 {"CurrentTime", 1,1, env_rpl_var}, /* TODO */
1944 {"Locale", 1,1, env_rpl_var}, /* Documented */
1945 {"WordDistanceWeight", 1,1, env_rpl_num_var}, /* Documented */
1946 {"MaxCoordFactor", 1,1, env_rpl_num_var}, /* TODO */
1947 {"PopularityFactor", 1,1, env_rpl_num_var}, /* Documented */
1948 {"IDFFactor", 1,1, env_rpl_num_var}, /* Documented */
1949 {"MinCoordFactor", 1,1, env_rpl_num_var}, /* Documented */
1950 {"NumWordFactor", 1,1, env_rpl_var}, /* Documented */
1951 {"NumDistinctWordFactor",1,1, env_rpl_num_var}, /* Documented */
1952 {"UserScoreFactor", 1,1, env_rpl_num_var}, /* Documented */
1953 {"WordDensityFactor" ,1,1, env_rpl_num_var}, /* Documented */
1954 {"WordFormFactor" ,1,1, env_rpl_num_var}, /* Documented */
1955 {"URLDataThreshold", 1,1, env_rpl_num_var}, /* Documented */
1956 {"DocSizeWeight", 1,1, env_rpl_num_var}, /* Documented */
1957 {"RelevancyFactor", 1,1, env_rpl_num_var}, /* TODO */
1958 {"Phrase2CountFactor", 1,1, env_rpl_num_var}, /* Documented */
1959 {"Phrase3CountFactor", 1,1, env_rpl_num_var}, /* Documented */
1960 {"DateFactor", 1,1, env_rpl_num_var}, /* Documented */
1961 {"MinWordLength", 1,1, env_rpl_num_var}, /* Documented */
1962 {"MaxWordLength", 1,1, env_rpl_num_var}, /* Documented */
1963 {"SubstringMatchMinWordLength", 1,1, env_rpl_num_var}, /* Documented */
1964
1965 {"ExcerptSize", 1,1, env_rpl_num_var}, /* Documented */
1966 {"ExcerptPadding", 1,1, env_rpl_num_var}, /* Documented */
1967 {"ExcerptFragment", 2,100, add_excerpt_fragment}, /* TODO */
1968
1969 {"LogLevel", 1,1, env_rpl_num_var}, /* Documented */
1970 {"CrawlerThreads", 1,1, env_rpl_num_var}, /* Documented */
1971 {"IndexerThreads", 1,1, env_rpl_num_var}, /* Documented */
1972 {"WordCacheSize", 1,1, env_rpl_num_var}, /* Documented */
1973 {"IPRequestPerMinLimit", 1,1, env_rpl_num_var}, /* Documented */
1974 {"CVSIgnore", 1,1, env_rpl_bool_var},/* Documented */
1975 {"UseHTDBURLId", 1,1, env_rpl_bool_var},/* TODO */
1976 {"Suggest", 1,1, env_rpl_bool_var},/* Documented */
1977 {"IndexTime", 1,1, env_rpl_bool_var},/* Documented */
1978 {"ExcerptStopword", 1,1, env_rpl_bool_var},/* Documented */
1979 {"UseCookie", 1,1, env_rpl_bool_var},/* Documented */
1980 {"UseSitemap", 1,1, env_rpl_bool_var},/* Documented */
1981 {"UseNumericOperators", 1,1, env_rpl_bool_var},/* Documented */
1982 {"UseRangeOperators", 1,1, env_rpl_bool_var},/* Documented */
1983 {"SaveSectionSize", 1,1, env_rpl_bool_var},/* Documented */
1984 {"Dehyphenate", 1,1, env_rpl_bool_var},/* Documented */
1985 {"HyphenateNumbers", 1,1, env_rpl_bool_var},/* TODO */
1986 {"StripAccents", 1,1, env_rpl_bool_var},/* Documented */
1987 {"LoadURLInfo", 1,1, env_rpl_bool_var},/* Documented */
1988 {"LoadURLBasicInfo", 1,1, env_rpl_bool_var},/* Documented */
1989 {"LoadTagInfo", 1,1, env_rpl_bool_var},/* Documented */
1990 {"ComplexSynonyms", 1,1, env_rpl_bool_var},/* Documented */
1991
1992 {"ReplaceVar", 2,2, env_rpl_named_var},/* Documented */
1993
1994 {"LocalCharset", 1,1, env_rpl_charset}, /* Documented */
1995 {"BrowserCharset", 1,1, env_rpl_charset}, /* Documented */
1996 {"CaseFolding", 1,1, env_rpl_casefolding},/* Documented */
1997
1998 {"XMLEnterHook", 2,2, rpl_xml_hook}, /* TODO */
1999 {"XMLLeaveHook", 2,2, rpl_xml_hook}, /* TODO */
2000 {"XMLDataHook", 2,2, rpl_xml_hook}, /* TODO */
2001
2002 {"HTDBAddr", 1,1, srv_rpl_var}, /* Documented */
2003 {"HTDBList", 1,1, srv_rpl_var}, /* Documented */
2004 {"HTDBDoc", 1,1, srv_rpl_var}, /* Documented */
2005 {"HTDBLimit", 1,1, srv_rpl_var}, /* Documented */
2006 {"SQLImportSection", 1,1, srv_rpl_var}, /* TODO */
2007 {"SQLExportHref", 1,1, srv_rpl_var}, /* TODO */
2008 {"SQLExportPopularityTable",1,1,env_rpl_var}, /* TODO */
2009 {"SQLWordForms", 1,1, env_rpl_var}, /* Documented */
2010 {"DefaultLang", 1,1, srv_rpl_var}, /* Documented */
2011 {"Tag", 1,1, srv_rpl_var}, /* Documented */
2012 {"Proxy", 0,100, srv_proxy_var}, /* Documented */
2013 {"VaryLang", 1,1, srv_rpl_var}, /* Documented */
2014 {"UseRobotsTxtURL", 1,1, srv_rpl_var}, /* TODO */
2015 {"MaxNetErrors", 1,1, srv_rpl_num_var}, /* Documented */
2016 {"CrawlDelay", 1,1, srv_rpl_num_var}, /* Documented */
2017 {"MaxHops", 1,1, srv_rpl_num_var}, /* Documented */
2018 {"StartHops", 1,1, srv_rpl_num_var}, /* Documented */
2019 {"MaxDocPerSite", 1,1, srv_rpl_num_var}, /* Documented */
2020 {"ServerWeight", 1,1, srv_rpl_num_var}, /* Documented */
2021 {"Robots", 1,4, srv_rpl_robots}, /* Documented */
2022 {"DetectClones", 1,1, srv_rpl_bool_var}, /* Documented */
2023 {"CollectLinks", 1,100, srv_rpl_collect_links},/*Documented*/
2024 {"Index", 1,1, srv_rpl_bool_var}, /* Documented */
2025 {"FollowLinks", 1,100, srv_rpl_follow_links},/*Documented*/
2026 {"FollowSymLinks", 1,1, srv_rpl_bool_var}, /* Documented */
2027 {"AjaxLinks", 1,1, srv_rpl_bool_var}, /* TODO */
2028 {"NetErrorDelayTime", 1,1, srv_rpl_time_var}, /* Documented */
2029 {"ReadTimeOut", 1,1, srv_rpl_time_var}, /* Documented */
2030 {"DocTimeOut", 1,1, srv_rpl_time_var}, /* Documented */
2031 {"Period", 1,1, srv_rpl_time_var}, /* Documented */
2032 {"HoldBadHrefs", 1,1, srv_rpl_time_var}, /* Documented */
2033 {"DNSCacheTimeout", 1,1, srv_rpl_time_var}, /* Documented */
2034 {"HTTPHeader", 1,2, srv_rpl_hdr}, /* Documented */
2035 {"AuthBasic", 1,1, srv_rpl_auth}, /* Documented */
2036 {"MirrorRoot", 1,1, srv_rpl_mirror}, /* Documented */
2037 {"MirrorHeadersRoot", 1,1, srv_rpl_mirror}, /* Documented */
2038 {"MirrorPeriod", 1,1, srv_rpl_mirror}, /* Documented */
2039 {"RemoteCharset", 1,1, srv_rpl_charset}, /* Documented */
2040 {"RemoteFileNameCharset",1,1, srv_rpl_charset}, /* Documented */
2041 {"RobotsDisallowAction", 1,1, srv_rpl_num_var}, /* TODO */
2042
2043 {"Disallow", 1,100, add_filter}, /* Documented */
2044 {"Allow", 1,100, add_filter}, /* Documented */
2045 {"CheckMP3Only", 1,100, add_filter}, /* Documented */
2046 {"CheckMP3", 1,100, add_filter}, /* Documented */
2047 {"CheckOnly", 1,100, add_filter}, /* Documented */
2048 {"HrefOnly", 1,100, add_filter}, /* Documented */
2049 {"ImportOnly", 1,100, add_filter}, /* TODO */
2050 {"Skip", 1,100, add_filter}, /* Documented */
2051
2052 {"IndexIf", 1,100, add_section_filter},/* Documented */
2053 {"NoIndexIf", 1,100, add_section_filter},/* Documented */
2054 {"SkipIf", 1,100, add_section_filter},/* Documented */
2055
2056 {"Server", 1,100, add_srv}, /* Documented */
2057 {"Realm", 1,100, add_srv}, /* Documented */
2058 {"Subnet", 1,100, add_srv}, /* Documented */
2059 {"URL", 1,1, add_url}, /* Documented */
2060
2061 {"Alias", 1,100, add_alias}, /* Documented */
2062 {"ReverseAlias", 1,100, add_alias}, /* Documented */
2063
2064 {"AddType", 1,100, add_type}, /* Documented */
2065 {"AddEncoding", 1,100, add_encoding}, /* Documented */
2066 {"CachedCopyEncoding", 1,1, env_rpl_encoding}, /* Documented */
2067 {"Mime", 2,4, add_parser}, /* Documented */
2068 {"Section", 2,10, add_section}, /* Documented */ /* TODO: index/noindex */
2069 {"Affix", 3,3, add_affix}, /* Documented */
2070 {"Spell", 3,3, add_spell}, /* Documented */
2071 {"StopwordFile", 1,1, add_stoplist}, /* Documented */
2072 {"LangMapFile", 1,1, add_langmap}, /* Documented */
2073 {"LangMapUpdate", 1,1, env_rpl_var}, /* Documented */
2074 {"Synonym", 1,1, add_synonym}, /* Documented */
2075 {"LoadChineseList", 0,2, add_chinese}, /* Documented */
2076 {"LoadThaiList", 0,2, add_thai}, /* Documented */
2077 {"Limit", 1,2, add_limit}, /* Documented */
2078 {"UserScore", 2,2, add_user_score}, /* Documented */
2079 {"UserSiteScore", 2,2, add_user_site_score},/* Documented */
2080 {"UserOrder", 2,2, add_user_order}, /* Documented */
2081 {"ServerTable", 1,1, add_srv_table}, /* Documented */
2082 {"FlushServerTable", 0,0, flush_srv_table}, /* Documented */
2083 {"DBListFree", 0,0, dblist_free}, /* TODO */
2084
2085 {"IndexCacheSize", 1,1, env_rpl_size}, /* Documented */
2086 {NULL,0,0,0}
2087 };
2088
2089
2090 UDM_API(udm_rc_t)
UdmEnvAddLine(UDM_CFG * C,char * str)2091 UdmEnvAddLine(UDM_CFG *C, char *str)
2092 {
2093 UDM_ENV *Conf=C->Indexer->Conf;
2094 UDM_CONFCMD *Cmd;
2095 const char *av[255];
2096 size_t ac= UdmGetArgs(str, av, 255);
2097
2098 for (Cmd= commands ; Cmd->name ; Cmd++)
2099 {
2100 if(!strcasecmp(Cmd->name,av[0]))
2101 {
2102 udm_rc_t rc= UDM_OK;
2103 int argc=ac;
2104 size_t i;
2105 char *p;
2106 char *tmpav[255];
2107
2108 argc--;
2109 if(ac<Cmd->argmin+1)
2110 {
2111 sprintf(Conf->errstr,"too few (%d) arguments for command '%s'",
2112 argc,Cmd->name);
2113 return UDM_ERROR;
2114 }
2115
2116 if(ac>Cmd->argmax+1)
2117 {
2118 sprintf(Conf->errstr,"too many (%d) arguments for command '%s'",
2119 argc,Cmd->name);
2120 return UDM_ERROR;
2121 }
2122
2123 for (i= 1; i < ac; i++)
2124 {
2125 if (!av[i])
2126 {
2127 tmpav[i]= NULL;
2128 continue;
2129 }
2130 if (!(p = UdmParseEnvVar(Conf, av[i])))
2131 {
2132 sprintf(Conf->errstr, "An error occured while parsing '%s'", av[i]);
2133 return UDM_ERROR;
2134 }
2135 av[i]= tmpav[i]= p;
2136 }
2137
2138 if (Cmd->action)
2139 rc= Cmd->action(C, ac, av);
2140
2141 for (i= 1; i < ac; i++)
2142 UDM_FREE(tmpav[i]);
2143
2144 if (Cmd->action)
2145 return rc;
2146 }
2147 }
2148 udm_snprintf(Conf->errstr, sizeof(Conf->errstr),
2149 "Unknown command: %s", av[0]);
2150 return UDM_ERROR;
2151 }
2152
2153
2154 UDM_API(udm_rc_t)
UdmAgentAddLine(UDM_AGENT * Agent,const char * line)2155 UdmAgentAddLine(UDM_AGENT *Agent, const char *line)
2156 {
2157 UDM_CFG Cfg;
2158 char str[1024];
2159 bzero((void*) &Cfg, sizeof(Cfg));
2160 Cfg.Indexer= Agent;
2161 udm_snprintf(str, sizeof(str) - 1, "%s", line);
2162 return UdmEnvAddLine(&Cfg, str);
2163 }
2164
2165
2166 static udm_rc_t
EnvLoad(UDM_CFG * Cfg,const char * cname)2167 EnvLoad(UDM_CFG *Cfg, const char *cname)
2168 {
2169 char *str0 = NULL; /* Unsafe copy - will be used in strtok */
2170 char str1[1024]=""; /* To concatenate lines */
2171 FILE *config; /* File struct */
2172 udm_rc_t rc= UDM_OK;
2173 size_t line = 0, str0len = 0, str1len, str0size = 4096;
2174
2175 if ((str0 = (char*)UdmMalloc(str0size)) == NULL)
2176 {
2177 sprintf(Cfg->Indexer->Conf->errstr,
2178 "Can't alloc %d bytes at '%s': %d",
2179 (int) str0size, __FILE__, __LINE__);
2180 return UDM_ERROR;
2181 }
2182 str0[0]=0;
2183
2184 /* Open config file */
2185 if(!(config=fopen(cname,"r")))
2186 {
2187 sprintf(Cfg->Indexer->Conf->errstr,
2188 "Can't open config file '%s': %s", cname, strerror(errno));
2189 UDM_FREE(str0);
2190 return UDM_ERROR;
2191 }
2192
2193 update_current(cname);
2194
2195 /* Read lines and parse */
2196 while(fgets(str1,sizeof(str1),config))
2197 {
2198 char *end;
2199
2200 line++;
2201
2202 if(str1[0]=='#')continue;
2203 for (end = str1 + (str1len = strlen(str1)) - 1 ;
2204 (end>=str1) && (*end=='\r'||*end=='\n'||*end==' '||*end=='\t') ;
2205 *end--='\0');
2206 if(!str1[0])continue;
2207
2208 if(*end=='\\')
2209 {
2210 *end=0;
2211 if (str0len + str1len >= str0size)
2212 {
2213 str0size += 4096 + str1len;
2214 if ((str0 = (char*)UdmRealloc(str0, str0size)) == NULL)
2215 {
2216 sprintf(Cfg->Indexer->Conf->errstr,
2217 "Can't realloc %d bytes at '%s': %d",
2218 (int) str0size, __FILE__, __LINE__);
2219 return UDM_ERROR;
2220 }
2221 }
2222 strcat(str0,str1);
2223 str0len += str1len;
2224 continue;
2225 }
2226 strcat(str0,str1);
2227 str0len += str1len;
2228
2229 if (UDM_OK != (rc= UdmEnvAddLine(Cfg,str0)))
2230 {
2231 char err[2048];
2232 strcpy(err,Cfg->Indexer->Conf->errstr);
2233 sprintf(Cfg->Indexer->Conf->errstr, "%s:%d: %s", cname, (int) line, err);
2234 break;
2235 }
2236
2237 str0[0]=0;
2238 str0len = 0;
2239 }
2240 UDM_FREE(str0);
2241 fclose(config);
2242 return rc;
2243 }
2244
2245
2246 UDM_API(udm_rc_t)
UdmEnvLoad(UDM_AGENT * Indexer,const char * cname,int lflags)2247 UdmEnvLoad(UDM_AGENT *Indexer, const char *cname, int lflags)
2248 {
2249 UDM_CFG Cfg;
2250 UDM_SERVER Srv;
2251 udm_rc_t rc= UDM_OK;
2252 const char *dbaddr=NULL;
2253
2254 UdmServerInit(&Srv);
2255 bzero((void*)&Cfg, sizeof(Cfg));
2256 Cfg.Indexer = Indexer;
2257 Indexer->Conf->Cfg_Srv = Cfg.Srv = &Srv;
2258 Cfg.flags=lflags;
2259 Cfg.level=0;
2260
2261 /* Set DBAddr if for example passed from environment */
2262 if((dbaddr=UdmVarListFindStr(&Indexer->Conf->Vars,"DBAddr",NULL)))
2263 {
2264 if(UDM_OK != (rc= UdmEnvDBListAdd(Indexer->Conf, dbaddr)))
2265 goto freeex;
2266 }
2267
2268 if(UDM_OK == (rc=EnvLoad(&Cfg,cname)))
2269 {
2270 UDM_ENV *Env= Indexer->Conf;
2271
2272 if (UDM_OK != (rc= UdmEnvPrepare(Env)))
2273 goto freeex;
2274
2275 UdmVarListInsStr(&Env->Vars, "Request.User-Agent", UDM_USER_AGENT);
2276 }
2277
2278 freeex:
2279 UdmServerFree(&Srv);
2280 return rc;
2281 }
2282
2283
2284
2285 static size_t
UdmMatchToStr(char * str,size_t size,const UDM_MATCH * M,const char * arg,const char * cmd)2286 UdmMatchToStr(char *str, size_t size,
2287 const UDM_MATCH *M, const char *arg, const char *cmd)
2288 {
2289 if (cmd)
2290 return udm_snprintf(str, size, "%s %s%s%s \"%s\" \"%s\"",
2291 cmd,
2292 M->Param.match_mode == UDM_MATCH_REGEX ? " regex" : "",
2293 UdmMatchIsNegative(M) ? " nomatch" : "",
2294 UdmMatchIsCaseInsensitive(M) ? "" : " NoCase",
2295 arg, UdmMatchPatternConstStr(M));
2296 else
2297 return udm_snprintf(str, size, "%s %s%s%s \"%s\"",
2298 arg,
2299 M->Param.match_mode == UDM_MATCH_REGEX ? " regex" : "",
2300 UdmMatchIsNegative(M) ? " nomatch" : "",
2301 UdmMatchIsCaseInsensitive(M) ? "" : " NoCase",
2302 UdmMatchPatternConstStr(M));
2303
2304 }
2305
2306
2307 /*
2308 static udm_rc_t
2309 UdmMatchListPrint(FILE *f, UDM_MATCHLIST *L, const char *cmd)
2310 {
2311 size_t i;
2312 char str[128];
2313 for (i= 0; i < L->nmatches; i++)
2314 {
2315 UDM_MATCH *M= &L->Match[i];
2316 UdmMatchToStr(str, sizeof(str), M, M->arg, cmd);
2317 fprintf(f, "%s\n", str);
2318 }
2319 return UDM_OK;
2320 }
2321 */
2322
2323 static udm_rc_t
UdmDBListPrint(FILE * f,UDM_DBLIST * L)2324 UdmDBListPrint(FILE *f, UDM_DBLIST *L)
2325 {
2326 size_t i;
2327 for (i= 0; i < L->nitems; i++)
2328 {
2329 UDM_DB *db= &L->Item[i];
2330 char dbaddr[128]= "<noaddr>";
2331 size_t nbytes;
2332 db->dbhandler->Info(db, dbaddr, sizeof(dbaddr), &nbytes, UDM_DBINFO_ADDR);
2333 fprintf(f, "DBAddr %s\n", dbaddr);
2334 }
2335 return UDM_OK;
2336 }
2337
2338
2339 static const char *
UdmMatchTypeToServerCommand(int match_type)2340 UdmMatchTypeToServerCommand(int match_type)
2341 {
2342 switch (match_type)
2343 {
2344 case UDM_MATCH_WILD: return "Realm";
2345 case UDM_MATCH_REGEX: return "Realm regex";
2346 case UDM_MATCH_SUBNET: return "Subnet";
2347 case UDM_MATCH_BEGIN: return "Server";
2348 default: return "<UnknownMatchType>";
2349 }
2350 }
2351
2352
2353 static udm_rc_t
UdmServerOptionsPrint(FILE * f,UDM_SERVER * Prev,UDM_SERVER * Curr)2354 UdmServerOptionsPrint(FILE *f, UDM_SERVER *Prev, UDM_SERVER *Curr)
2355 {
2356 UDM_CONFCMD *cmd;
2357 for (cmd= commands; cmd->name; cmd++)
2358 {
2359 if (cmd->action == srv_rpl_var ||
2360 cmd->action == srv_rpl_num_var ||
2361 cmd->action == srv_rpl_bool_var ||
2362 cmd->action == srv_rpl_time_var ||
2363 /* cmd->action == srv_rpl_hdr || */
2364 cmd->action == srv_rpl_auth ||
2365 /* cmd->action == srv_rpl_mirror || */
2366 cmd->action == srv_rpl_charset ||
2367 0)
2368 {
2369 const char *cval= UdmVarListFindStr(&Curr->Vars, cmd->name, "");
2370 if (cmd->action == srv_rpl_auth)
2371 {
2372 if (cval[0])
2373 {
2374 char encoded[128], decoded[128];
2375 udm_snprintf(encoded, sizeof(encoded), "%s", cval);
2376 udm_base64_decode(decoded, encoded, sizeof(decoded));
2377 fprintf(f, "%s '%s'\n", cmd->name, decoded);
2378 }
2379 }
2380 else
2381 {
2382 const char *pval= Prev ? UdmVarListFindStr(&Prev->Vars, cmd->name, "") : "";
2383 if (strcmp(pval, cval))
2384 fprintf(f, "%s '%s'\n", cmd->name, cval);
2385 }
2386 }
2387 }
2388 return UDM_OK;
2389 }
2390
2391
2392 static size_t
UdmServerToStr(char * str,size_t size,UDM_SERVER * S)2393 UdmServerToStr(char *str, size_t size, UDM_SERVER *S)
2394 {
2395 const char *method= UdmMethodStr(S->Filter.method);
2396 udm_bool_t case_sense= UdmVarListFindBool(&S->Vars, "case_sense", UDM_TRUE);
2397 udm_bool_t nomatch= UdmVarListFindBool(&S->Vars, "nomatch", UDM_FALSE);
2398 const char *case_str= case_sense ? "" : "NoCase";
2399 const char *match_str= nomatch ? "NoMatch" : "";
2400 const char *follow_str= UdmFollowStr(S->webspace);
2401 const char *command= UdmMatchTypeToServerCommand(S->Filter.Match.Param.match_mode);
2402 const char *alias= UdmVarListFindStr(&S->Vars, "Alias", "");
2403
2404 /* TODO: Server site: cuts directory name */
2405 switch (S->Filter.Match.Param.match_mode)
2406 {
2407 case UDM_MATCH_WILD:
2408 case UDM_MATCH_REGEX:
2409 case UDM_MATCH_SUBNET:
2410 follow_str= "";
2411 break;
2412 case UDM_MATCH_BEGIN:
2413 case UDM_MATCH_FULL:
2414 case UDM_MATCH_SUBSTR:
2415 case UDM_MATCH_END:
2416 case UDM_MATCH_NUMERIC_LT:
2417 case UDM_MATCH_NUMERIC_GT:
2418 case UDM_MATCH_RANGE:
2419 break;
2420 }
2421
2422 return udm_snprintf(str, size,
2423 "%s %s %s %s %s '%s'%s%s",
2424 command, follow_str, method,
2425 case_str, match_str,
2426 UdmMatchPatternConstStr(&S->Filter.Match),
2427 alias[0] ? " " : "", alias);
2428 }
2429
2430
2431 static udm_rc_t
UdmServerListPrint(FILE * f,UDM_SERVERLIST * L)2432 UdmServerListPrint(FILE *f, UDM_SERVERLIST *L)
2433 {
2434 size_t i;
2435 char str[128];
2436 for (i= 0; i < L->nservers; i++)
2437 {
2438 UDM_SERVER *S= &L->Server[i];
2439 UDM_SERVER *P= i ? &L->Server[i-1] : NULL;
2440 /* UdmVarListPrint(f, &S->Vars);*/
2441 UdmServerOptionsPrint(f, P, S);
2442 UdmServerToStr(str, sizeof(str), S);
2443 fprintf(f, "%s\n", str);
2444 }
2445 return UDM_OK;
2446 }
2447
2448
2449 static udm_rc_t
UdmAliasListPrint(FILE * f,UDM_REPLACELIST * L,const char * cmd)2450 UdmAliasListPrint(FILE *f, UDM_REPLACELIST *L, const char *cmd)
2451 {
2452 size_t i;
2453 char str[128];
2454 for (i= 0; i < L->nitems; i++)
2455 {
2456 UdmMatchToStr(str, sizeof(str), &L->Item[i].Match, L->Item[i].Replace.str, cmd);
2457 fprintf(f, "%s\n", str);
2458 }
2459 return UDM_OK;
2460 }
2461
2462
2463 static udm_rc_t
UdmParserListPrint(FILE * f,UDM_PARSERLIST * L)2464 UdmParserListPrint(FILE *f, UDM_PARSERLIST *L)
2465 {
2466 size_t i;
2467 char str[1024];
2468 for (i= 0; i < L->nparsers; i++)
2469 {
2470 UDM_PARSER *P= &L->Parser[i];
2471 udm_snprintf(str, sizeof(str),
2472 "Mime \"%s\" \"%s\" '%s'%s%s%s",
2473 P->from_mime, P->to_mime, P->cmd,
2474 P->src ? " \"" : "",
2475 P->src ? P->src : "",
2476 P->src ? "\"" : "");
2477 fprintf(f, "%s\n", str);
2478 }
2479 return UDM_OK;
2480 }
2481
2482
2483 static udm_rc_t
UdmAffixListListPrint(FILE * f,UDM_AFFIXLISTLIST * L)2484 UdmAffixListListPrint(FILE *f, UDM_AFFIXLISTLIST *L)
2485 {
2486 size_t i;
2487 char str[256];
2488 for (i= 0; i < L->nitems; i++)
2489 {
2490 UDM_AFFIXLIST *A= &L->Item[i];
2491 udm_snprintf(str, sizeof(str),
2492 "Affix %s %s '%s'", A->lang, A->cset, A->fname);
2493 fprintf(f, "%s\n", str);
2494 }
2495 return UDM_OK;
2496 }
2497
2498
2499 static udm_rc_t
UdmSpellListListPrint(FILE * f,UDM_SPELLLISTLIST * L)2500 UdmSpellListListPrint(FILE *f, UDM_SPELLLISTLIST *L)
2501 {
2502 size_t i;
2503 char str[256];
2504 for (i= 0; i < L->nitems; i++)
2505 {
2506 UDM_SPELLLIST *S= &L->Item[i];
2507 udm_snprintf(str, sizeof(str),
2508 "Spell %s %s '%s'", S->lang, S->cset, S->fname);
2509 fprintf(f, "%s\n", str);
2510 }
2511 return UDM_OK;
2512 }
2513
2514
2515 static udm_rc_t
UdmLangmapListPrint(FILE * f,UDM_LANGMAPLIST * L)2516 UdmLangmapListPrint(FILE *f, UDM_LANGMAPLIST *L)
2517 {
2518 size_t i;
2519 char str[256];
2520 for (i= 0; i < L->nmaps; i++)
2521 {
2522 UDM_LANGMAP *M= &L->Map[i];
2523 udm_snprintf(str, sizeof(str),
2524 "LangmapFile '%s'", M->filename);
2525 fprintf(f, "%s\n", str);
2526 }
2527 return UDM_OK;
2528 }
2529
2530
2531 static udm_rc_t
UdmStopListListPrint(FILE * f,UDM_STOPLISTLIST * L)2532 UdmStopListListPrint(FILE *f, UDM_STOPLISTLIST *L)
2533 {
2534 size_t i;
2535 char str[256];
2536 for (i= 0; i < L->nitems; i++)
2537 {
2538 UDM_STOPLIST *S= &L->Item[i];
2539 udm_snprintf(str, sizeof(str),
2540 "StopwordFile '%s'", S->fname);
2541 fprintf(f, "%s\n", str);
2542 }
2543 return UDM_OK;
2544 }
2545
2546
2547 static udm_rc_t
UdmSynonymListListPrint(FILE * f,UDM_SYNONYMLISTLIST * L)2548 UdmSynonymListListPrint(FILE *f, UDM_SYNONYMLISTLIST *L)
2549 {
2550 size_t i;
2551 char str[256];
2552 for (i= 0; i < L->nitems; i++)
2553 {
2554 UDM_SYNONYMLIST *S= &L->Item[i];
2555 udm_snprintf(str, sizeof(str),
2556 "Synonym '%s'", S->fname);
2557 fprintf(f, "%s\n", str);
2558 }
2559 return UDM_OK;
2560 }
2561
2562
2563 UDM_API(udm_rc_t)
UdmEnvSave(UDM_AGENT * Indexer,const char * cname,int lflags)2564 UdmEnvSave(UDM_AGENT *Indexer, const char *cname, int lflags)
2565 {
2566 FILE *f;
2567 UDM_ENV *E= Indexer->Conf;
2568
2569 if (!strcmp(cname, "-"))
2570 f= stdout;
2571 else if (!(f= fopen(cname,"w")))
2572 {
2573 sprintf(Indexer->Conf->errstr,
2574 "Can't open output file '%s': %s", cname, strerror(errno));
2575 return UDM_ERROR;
2576 }
2577
2578 /* TODO: put interpreter line */
2579
2580 UdmDBListPrint(f, &E->DBList);
2581
2582 fprintf(f, "LocalCharset %s\n", E->lcs->name);
2583 fprintf(f, "BrowserCharset %s\n", E->bcs->name);
2584
2585 UdmSectionListPrint(&E->Sections, f);
2586 UdmVarListPrint(&E->Vars, f);
2587
2588 /*UdmMatchListPrint(f, &E->MimeTypes, "AddType");*/
2589 UdmParserListPrint(f, &E->Parsers);
2590 /*UdmMatchListPrint(f, &E->Filters, NULL);*/
2591
2592 /*
2593 UDM_MATCHLIST SectionFilters;
2594 UDM_MATCHLIST SectionHdrMatch;
2595 UDM_MATCHLIST SectionGsrMatch;
2596 UDM_MATCHLIST SectionMatch;
2597 */
2598
2599 UdmStopListListPrint(f, &E->StopWord);
2600 UdmSynonymListListPrint(f, &E->Synonym);
2601 UdmAffixListListPrint(f, &E->Affixes);
2602 UdmSpellListListPrint(f, &E->Spells);
2603 UdmLangmapListPrint(f, &E->LangMaps);
2604
2605 /*
2606 int CVS_ignore;
2607 UDM_WORDPARAM WordParam;
2608 UDM_CHINALIST Chi;
2609 UDM_CHINALIST Thai;
2610 #ifdef MECAB
2611 mecab_t *mecab;
2612 #endif
2613 UDM_UNIDATA *unidata;
2614 */
2615
2616
2617 /*
2618 UDM_VARLIST XMLEnterHooks;
2619 UDM_VARLIST XMLLeaveHooks;
2620 UDM_VARLIST XMLDataHooks;
2621 */
2622
2623 UdmAliasListPrint(f, &E->Aliases, "Alias");
2624 UdmAliasListPrint(f, &E->ReverseAliases, "ReverseAlias");
2625 UdmServerListPrint(f, &E->Servers);
2626
2627 if (f != stdout)
2628 fclose(f);
2629 return UDM_OK;
2630 }
2631