1 /* Copyright (C) 2000-2015 Lavtech.com corp. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License as published by
5    the Free Software Foundation; either version 2 of the License, or
6    (at your option) any later version.
7 
8    This program is distributed in the hope that it will be useful,
9    but WITHOUT ANY WARRANTY; without even the implied warranty of
10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11    GNU General Public License for more details.
12 
13    You should have received a copy of the GNU General Public License
14    along with this program; if not, write to the Free Software
15    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16 */
17 
18 #include "udm_config.h"
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <sys/types.h>
22 #include <string.h>
23 #include <errno.h>
24 
25 #include "udm_common.h"
26 #include "udm_ctype.h"
27 #include "udm_spell.h"
28 #include "udm_proto.h"
29 #include "udm_url.h"
30 #include "udm_parser.h"
31 #include "udm_conf.h"
32 #include "udm_log.h"
33 #include "udm_hrefs.h"
34 #include "udm_robots.h"
35 #include "udm_utils.h"
36 #include "udm_host.h"
37 #include "udm_server.h"
38 #include "udm_alias.h"
39 #include "udm_search_tl.h"
40 #include "udm_env.h"
41 #include "udm_match.h"
42 #include "udm_stopwords.h"
43 #include "udm_guesser.h"
44 #include "udm_unicode.h"
45 #include "udm_synonym.h"
46 #include "udm_vars.h"
47 #include "udm_db.h"
48 #include "udm_agent.h"
49 #include "udm_chinese.h"
50 #include "udm_contentencoding.h"
51 #include "udm_indexcache.h"
52 
53 static udm_rc_t EnvLoad(UDM_CFG *Cfg,const char *cname);
54 
55 /****************************  Load Configuration **********************/
56 
57 udm_search_mode_t
UdmSearchMode(const char * mode)58 UdmSearchMode(const char *mode)
59 {
60   if(!mode)return(UDM_MODE_ALL);
61   if(!strcmp(mode,"all-minus"))return(UDM_MODE_ALL_MINUS);
62   if(!strcmp(mode,"all-minus-half"))return(UDM_MODE_ALL_MINUS_HALF);
63   if(!strcmp(mode,"all"))return(UDM_MODE_ALL);
64   if(!strcmp(mode,"any"))return(UDM_MODE_ANY);
65   if(!strcmp(mode,"bool"))return(UDM_MODE_BOOL);
66   if(!strcmp(mode,"phrase"))return(UDM_MODE_PHRASE);
67   return(UDM_MODE_ALL);
68 }
69 
70 
71 udm_match_mode_t
UdmMatchMode(const char * mode)72 UdmMatchMode(const char * mode)
73 {
74   if(!mode)return(UDM_MATCH_FULL);
75   if(!strcmp(mode,"wrd"))return(UDM_MATCH_FULL);
76   if(!strcmp(mode,"full"))return(UDM_MATCH_FULL);
77   if(!strcmp(mode,"beg"))return(UDM_MATCH_BEGIN);
78   if(!strcmp(mode,"end"))return(UDM_MATCH_END);
79   if(!strcmp(mode,"sub"))return(UDM_MATCH_SUBSTR);
80   return(UDM_MATCH_FULL);
81 }
82 
83 
84 UDM_API(const char *)
UdmFollowStr(udm_webspace_t method)85 UdmFollowStr(udm_webspace_t method)
86 {
87   switch(method)
88   {
89     case UDM_WEBSPACE_PAGE:  return "Page";
90     case UDM_WEBSPACE_PATH:  return "Path";
91     case UDM_WEBSPACE_SITE:  return "Site";
92     case UDM_WEBSPACE_WORLD: return "World";
93     case UDM_WEBSPACE_URLLIST: return "URLList";
94     case UDM_WEBSPACE_UNKNOWN: break;
95   }
96   return "<Unknown follow type>";
97 }
98 
99 
UdmFollowType(const char * follow)100 udm_webspace_t UdmFollowType(const char * follow)
101 {
102   UDM_ASSERT(follow != NULL);
103   if (!strcasecmp(follow,"page")) return UDM_WEBSPACE_PAGE;
104   if (!strcasecmp(follow,"path")) return UDM_WEBSPACE_PATH;
105   if (!strcasecmp(follow,"site")) return UDM_WEBSPACE_SITE;
106   if (!strcasecmp(follow,"world")) return UDM_WEBSPACE_WORLD;
107   if (!strcasecmp(follow,"urllist")) return UDM_WEBSPACE_URLLIST;
108   return UDM_WEBSPACE_UNKNOWN;
109 }
110 
UdmMethodStr(udm_method_t method)111 const char *UdmMethodStr(udm_method_t method)
112 {
113   switch(method)
114   {
115     case UDM_METHOD_DISALLOW:     return "Disallow";
116     case UDM_METHOD_GET:          return "Allow";
117     case UDM_METHOD_CHECKMP3ONLY: return "CheckMP3Only";
118     case UDM_METHOD_CHECKMP3:     return "CheckMP3";
119     case UDM_METHOD_HEAD:         return "CheckOnly";
120     case UDM_METHOD_HREFONLY:     return "HrefOnly";
121     case UDM_METHOD_VISITLATER:   return "Skip";
122     case UDM_METHOD_INDEX:        return "IndexIf";
123     case UDM_METHOD_NOINDEX:      return "NoIndexIf";
124     case UDM_METHOD_IMPORTONLY:   return "ImportOnly";
125     case UDM_METHOD_UNKNOWN:      break;
126   }
127   return "<Unknown method>";
128 }
129 
130 
UdmMethod(const char * s)131 udm_method_t UdmMethod(const char *s)
132 {
133   if (s == NULL)
134     return UDM_METHOD_UNKNOWN;
135   if (!strcasecmp(s,"Disallow"))     return UDM_METHOD_DISALLOW;
136   if (!strcasecmp(s,"Allow"))        return UDM_METHOD_GET;
137   if (!strcasecmp(s,"CheckMP3Only")) return UDM_METHOD_CHECKMP3ONLY;
138   if (!strcasecmp(s,"CheckMP3"))     return UDM_METHOD_CHECKMP3;
139   if (!strcasecmp(s,"CheckOnly"))    return UDM_METHOD_HEAD;
140   if (!strcasecmp(s,"HrefOnly"))     return UDM_METHOD_HREFONLY;
141   if (!strcasecmp(s,"Skip"))         return UDM_METHOD_VISITLATER;
142   if (!strcasecmp(s,"SkipIf"))       return UDM_METHOD_VISITLATER;
143   if (!strcasecmp(s,"IndexIf"))      return UDM_METHOD_INDEX;
144   if (!strcasecmp(s,"NoIndexIf"))    return UDM_METHOD_NOINDEX;
145   if (!strcasecmp(s,"ImportOnly"))   return UDM_METHOD_IMPORTONLY;
146   return UDM_METHOD_UNKNOWN;
147 }
148 
149 
150 /*
151   Convert 0..9A..Z into integer.
152   TODO34: using WF with big values (e.g. 'Q') overflows
153 */
154 static int
UdmBase_09AZ_to_int(int h)155 UdmBase_09AZ_to_int(int h)
156 {
157   if((h>='0')&&(h<='9'))return(h-'0');
158   if((h>='A')&&(h<='Z'))return(h-'A'+10);
159   if((h>='a')&&(h<='Z'))return(h-'a'+10);
160   return 0;
161 }
162 
163 
164 static void
UdmWeightFactorsInit(char * res,const char * wf,size_t num)165 UdmWeightFactorsInit(char *res, const char *wf, size_t num)
166 {
167   size_t len;
168   int sn;
169 
170   for(sn=0;sn<256;sn++)
171     res[sn]=1;
172 
173   len=strlen(wf);
174   if((len>0)&&(len<256))
175   {
176     const char *sec;
177     int secno;
178 
179     for(sec= wf + len - 1, secno= 1; sec >= wf; sec--)
180     {
181       if (*sec != '-' && *sec != '.')
182       {
183         res[secno]= UdmBase_09AZ_to_int(*sec);
184         secno++;
185       }
186     }
187   }
188 
189   for (sn= num + 1 ; sn < 256; sn++)
190     res[sn]= 0;
191 }
192 
193 
UdmWeightFactorsInit2(char * res,UDM_VARLIST * V1,UDM_VARLIST * V2,const char * name)194 size_t UdmWeightFactorsInit2(char *res,
195                              UDM_VARLIST *V1,
196                              UDM_VARLIST *V2,
197                              const char *name)
198 {
199   size_t numsections= UdmVarListFindInt(V1, "NumSections", 256);
200   const char *wf1= UdmVarListFindStr(V1, name, "");
201   const char *wf2= UdmVarListFindStr(V2, name, "");
202   const char *wf3= wf2[0] ? wf2 : wf1;
203   UdmWeightFactorsInit(res, wf3, numsections);
204   return wf3[0] ? numsections : 0;
205 }
206 
207 /* hold the path of the current config file */
208 static char current[1024]= ""; /* TODO34: get rid of this */
209 
update_current(const char * name)210 static void update_current(const char *name)
211 {
212   char *slash;
213   strcpy(current, name);
214   if ((slash= strrchr(current, '/')))
215   {
216    *slash= 0;
217   }
218   else
219   {
220     *current= 0;
221   }
222 }
223 
rel_cur_name(char * res,size_t maxlen,const char * name)224 static size_t rel_cur_name(char *res, size_t maxlen, const char *name)
225 {
226   size_t n = 0;
227   if (*current)
228   {
229 #ifdef WIN32
230     n= udm_snprintf(res, maxlen, "%s", name);
231 #else
232     n= udm_snprintf(res, maxlen, "%s%s%s", current, UDMSLASHSTR, name);
233 #endif
234   }
235   else
236   {
237     strncpy(res, name, maxlen);
238   }
239   res[maxlen]= 0;
240   return n;
241 }
242 
243 
244 static size_t
rel_name(UDM_ENV * Env,char * res,size_t maxlen,const char * varname,const char * dirname,const char * name)245 rel_name(UDM_ENV *Env, char *res, size_t maxlen,
246          const char *varname, const char *dirname, const char *name)
247 {
248   size_t    n;
249   const char  *dir= UdmVarListFindStr(&Env->Vars, varname, dirname);
250   if(name[0]=='/' || (name[0] && name[1] == ':'))
251     n= udm_snprintf(res, maxlen, "%s", name);
252   else
253     n= udm_snprintf(res,maxlen,"%s%s%s",dir,UDMSLASHSTR,name);
254   res[maxlen]='\0';
255   return n;
256 }
257 
258 
259 /* Relative name for included .conf files */
260 static size_t
rel_etc_name(UDM_ENV * Env,char * res,size_t maxlen,const char * fname)261 rel_etc_name(UDM_ENV *Env, char *res, size_t maxlen, const char *fname)
262 {
263   return rel_name(Env, res, maxlen, "ConfDir", UDM_CONF_DIR, fname);
264 }
265 
266 
267 /*
268   Relative name for langmap, stopwords, synonym files.
269   Their position depends on --enable-fhs-layot.
270 */
271 static size_t
rel_etc2_name(UDM_ENV * Env,char * res,size_t maxlen,const char * fname)272 rel_etc2_name(UDM_ENV *Env, char *res, size_t maxlen, const char *fname)
273 {
274 #ifdef HAVE_FHS_LAYOUT
275   /* Better FHS layout */
276   return rel_name(Env, res, maxlen, "ShareDir", UDM_SHARE_DIR, fname);
277 #else
278   /* Traditional mnoGoSearch layout */
279   return rel_name(Env, res, maxlen, "ConfDir", UDM_CONF_DIR, fname);
280 #endif
281 }
282 
283 
284 static size_t
rel_langmap_name(UDM_ENV * Env,char * res,size_t maxlen,const char * fname)285 rel_langmap_name(UDM_ENV *Env, char *res, size_t maxlen, const char *fname)
286 {
287   return rel_etc2_name(Env, res, maxlen, fname);
288 }
289 
290 
291 static size_t
rel_stopwords_name(UDM_ENV * Env,char * res,size_t maxlen,const char * fname)292 rel_stopwords_name(UDM_ENV *Env, char *res, size_t maxlen, const char *fname)
293 {
294   return rel_etc2_name(Env, res, maxlen, fname);
295 }
296 
297 
298 static size_t
rel_synonym_name(UDM_ENV * Env,char * res,size_t maxlen,const char * fname)299 rel_synonym_name(UDM_ENV *Env, char *res, size_t maxlen, const char *fname)
300 {
301   return rel_etc2_name(Env, res, maxlen, fname);
302 }
303 
304 
305 /*
306   *.freq files is a special case.
307   It's installed to /share/freq/xxx.freq with --enable-fhs-layout,
308   and to /etc/xxx.freq otherwise.
309 */
310 static size_t
rel_freq_name(UDM_ENV * Env,char * res,size_t maxlen,const char * fname)311 rel_freq_name(UDM_ENV *Env, char *res, size_t maxlen, const char *fname)
312 {
313 #ifdef HAVE_FHS_LAYOUT
314   char freqname[128];
315   if (fname[0] == '/')
316     udm_snprintf(freqname, sizeof(freqname), "%s", fname);
317   else
318     udm_snprintf(freqname, sizeof(freqname), "freq/%s", fname);
319   return rel_name(Env, res, maxlen, "ShareDir", UDM_SHARE_DIR, freqname);
320 #else
321   return rel_name(Env, res, maxlen, "ConfDir", UDM_CONF_DIR, fname);
322 #endif
323 }
324 
325 
326 /* Relative name for /var files */
rel_var_name(UDM_ENV * Env,char * res,size_t maxlen,const char * name)327 static size_t rel_var_name(UDM_ENV *Env,char *res,size_t maxlen,
328                            const char *name)
329 {
330   size_t    n;
331   const char  *dir=UdmVarListFindStr(&Env->Vars,"VarDir",UDM_VAR_DIR);
332   if(name[0]=='/')n = udm_snprintf(res, maxlen, "%s", name);
333   else    n = udm_snprintf(res,maxlen,"%s%s%s",dir,UDMSLASHSTR,name);
334   res[maxlen]='\0';
335   return n;
336 }
337 
338 size_t
UdmGetArgs(char * str,const char ** av,size_t max)339 UdmGetArgs(char *str, const char **av, size_t max)
340 {
341   size_t  ac=0;
342   char  *lt;
343   char  *tok;
344 
345   bzero((void*)av, max * sizeof(*av));
346   tok=UdmGetStrToken(str,&lt);
347 
348   while (tok && (ac<max))
349   {
350     av[ac]=tok;
351     ac++;
352     tok=UdmGetStrToken(NULL,&lt);
353   }
354   return ac;
355 }
356 
357 
358 static udm_bool_t
UdmMatchParamModeRegexOrString(UDM_MATCH_PARAM * Param,const char * prm)359 UdmMatchParamModeRegexOrString(UDM_MATCH_PARAM *Param, const char *prm)
360 {
361   if (!strcasecmp(prm, "string"))
362   {
363     Param->match_mode= UDM_MATCH_WILD;
364     return UDM_FALSE;
365   }
366   if (!strcasecmp(prm, "regex"))
367   {
368     Param->match_mode= UDM_MATCH_REGEX;
369     return UDM_FALSE;
370   }
371   if (!strcasecmp(prm, "regexp"))
372   {
373     Param->match_mode= UDM_MATCH_REGEX;
374     return UDM_FALSE;
375   }
376   return UDM_TRUE;
377 }
378 
379 
380 static udm_bool_t
UdmMatchParamStrToMode(UDM_MATCH_PARAM * Param,const char * prm)381 UdmMatchParamStrToMode(UDM_MATCH_PARAM *Param, const char *prm)
382 {
383   /* Match modes */
384   if (!UdmMatchParamModeRegexOrString(Param, prm))
385     return UDM_FALSE;
386   if (!strcasecmp(prm, "prefix"))
387   {
388     Param->match_mode= UDM_MATCH_BEGIN;
389     return UDM_FALSE;
390   }
391   return UDM_TRUE;
392 }
393 
394 
395 static udm_bool_t
UdmMatchParamStrToCaseSensitivity(UDM_MATCH_PARAM * Param,const char * prm)396 UdmMatchParamStrToCaseSensitivity(UDM_MATCH_PARAM *Param, const char *prm)
397 {
398   /* Case sensitivity */
399   if (!strcasecmp(prm, "nocase"))
400   {
401     UdmMatchParamSetCaseInsensitive(Param, UDM_FALSE);
402     return UDM_FALSE;
403   }
404   if (!strcasecmp(prm, "case"))
405   {
406     UdmMatchParamSetCaseInsensitive(Param, UDM_TRUE);
407     return UDM_FALSE;
408   }
409   return UDM_TRUE;
410 }
411 
412 
413 static udm_bool_t
UdmMatchParamStrToNegative(UDM_MATCH_PARAM * Param,const char * prm)414 UdmMatchParamStrToNegative(UDM_MATCH_PARAM *Param, const char *prm)
415 {
416   /* Negative condition */
417   if (!strcasecmp(prm, "match"))
418   {
419     UdmMatchParamSetNegative(Param, UDM_FALSE);
420     return UDM_FALSE;
421   }
422   if (!strcasecmp(prm, "nomatch"))
423   {
424     UdmMatchParamSetNegative(Param, UDM_TRUE);
425     return UDM_FALSE;
426   }
427   return UDM_TRUE;
428 }
429 
430 
431 static udm_bool_t
UdmMatchParamSetParam(UDM_MATCH_PARAM * Param,const char * prm)432 UdmMatchParamSetParam(UDM_MATCH_PARAM *Param, const char *prm)
433 {
434   if (!UdmMatchParamStrToMode(Param, prm))
435     return UDM_FALSE;
436   if (!UdmMatchParamStrToCaseSensitivity(Param, prm))
437     return UDM_FALSE;
438   if (!UdmMatchParamStrToNegative(Param, prm))
439     return UDM_FALSE;
440   return UDM_TRUE;
441 }
442 
443 
444 static udm_rc_t
add_srv(UDM_CFG * C,size_t ac,const char ** av)445 add_srv(UDM_CFG *C, size_t ac, const char **av)
446 {
447   UDM_ENV  *Conf=C->Indexer->Conf;
448   UDM_AGENT *Indexer = C->Indexer;
449   size_t  i;
450   int  has_alias=0;
451 
452   if(!(C->flags & UDM_FLAG_ADD_SERV))
453     return UDM_OK;
454 
455   C->Srv->command = 'S';
456   C->Srv->webspace= UDM_WEBSPACE_DEFAULT;
457   C->Srv->ordre = ++C->ordre;
458   C->Srv->Filter.method= UDM_METHOD_DEFAULT;
459   UdmMatchParamSetNegative(&C->Srv->Filter.Match.Param, UDM_FALSE);
460   UdmMatchParamSetCaseInsensitive(&C->Srv->Filter.Match.Param, UDM_TRUE);
461   UdmMatchParamSetOptimization(&C->Srv->Filter.Match.Param,
462                                !UDM_TEST((C->flags & UDM_FLAG_DONT_ADD_TO_DB)));
463 
464   if(!strcasecmp(av[0],"Server"))
465   {
466     C->Srv->Filter.Match.Param.match_mode= UDM_MATCH_BEGIN;
467   }
468   else if(!strcasecmp(av[0],"Subnet"))
469   {
470     C->Srv->Filter.Match.Param.match_mode= UDM_MATCH_SUBNET;
471     Conf->Servers.have_subnets=1;
472   }
473   else
474   {
475     C->Srv->Filter.Match.Param.match_mode= UDM_MATCH_WILD;
476   }
477 
478 
479   for(i=1; i<ac; i++)
480   {
481     int  o;
482 
483     if (UDM_WEBSPACE_UNKNOWN!= (o= UdmFollowType(av[i])))
484       C->Srv->webspace= (udm_webspace_t) o;
485     else if (UDM_METHOD_UNKNOWN!= (o= UdmMethod(av[i])))
486       C->Srv->Filter.method= (udm_method_t) o;
487     else if (UdmMatchParamSetParam(&C->Srv->Filter.Match.Param, av[i]))
488     {
489       if (!UdmMatchPatternConstStr(&C->Srv->Filter.Match))
490         UdmMatchSetPattern(&C->Srv->Filter.Match, av[i]);
491       else if(!has_alias)
492       {
493         has_alias=1;
494         UdmVarListReplaceStr(&C->Srv->Vars,"Alias",av[i]);
495       }
496       else
497       {
498         sprintf(Conf->errstr,"too many argiments: '%s'",av[i]);
499         return UDM_ERROR;
500       }
501     }
502   }
503   if (!UdmMatchPatternConstStr(&C->Srv->Filter.Match))
504   {
505     sprintf(Conf->errstr,"too few argiments in '%s' command", av[0]);
506     return UDM_ERROR;
507   }
508   if(UDM_OK != UdmServerAdd(Indexer, C->Srv, C->flags))
509   {
510     char * s_err;
511     s_err = (char*)UdmStrdup(Conf->errstr);
512     sprintf(Conf->errstr,"%s",s_err);
513     UDM_FREE(s_err);
514     UdmMatchFreeAndInit(&C->Srv->Filter.Match);
515     return UDM_ERROR;
516   }
517   if ((C->Srv->Filter.Match.Param.match_mode == UDM_MATCH_BEGIN) &&
518       (UdmMatchPatternConstStr(&C->Srv->Filter.Match)[0])&&
519       (C->flags&UDM_FLAG_ADD_SERVURL))
520   {
521     UDM_HREFPARAM HrefParam;
522     UdmHrefParamInit(&HrefParam);
523     HrefParam.server_id= C->Srv->site_id;
524     HrefParam.hops= (uint4) UdmVarListFindInt(&C->Srv->Vars, "StartHops", 0);
525     HrefParam.link_source= UDM_LINK_SOURCE_CONF;
526     UdmHrefListAddConst(&Conf->Hrefs, &HrefParam,
527                         UdmMatchPatternConstStr(&C->Srv->Filter.Match));
528   }
529   UdmMatchFreeAndInit(&C->Srv->Filter.Match);
530   UdmVarListDel(&C->Srv->Vars,"AuthBasic");
531   UdmVarListDel(&C->Srv->Vars,"Alias");
532   return UDM_OK;
533 }
534 
535 
536 static udm_rc_t
UdmFilterListAddWithServer(UDM_AGENT * A,UDM_FILTERLIST * L,UDM_MATCH_PARAM * Param,udm_method_t method,const UDM_CONST_STR * Pattern,char * err,size_t errsize,int ordre)537 UdmFilterListAddWithServer(UDM_AGENT *A,
538                            UDM_FILTERLIST *L, UDM_MATCH_PARAM *Param,
539                            udm_method_t method,
540                            const UDM_CONST_STR *Pattern,
541                            char *err, size_t errsize, int ordre)
542 {
543   if (UDM_OK != UdmFilterListAdd(L, Param, method, Pattern, err, errsize))
544     return UDM_ERROR;
545 
546   if (A != NULL)
547   {
548     UDM_SERVERLIST S;
549     UDM_SERVER n;
550     udm_rc_t rc;
551 
552     bzero((void*)&n, sizeof(n));
553     S.Server= &n;
554     n.command= 'F';
555     UdmMatchSetPattern(&n.Filter.Match, Pattern->str);
556     n.Filter.Match.Param= *Param;
557     n.Filter.method= method;
558     n.ordre= ordre;
559 
560     rc= UdmSrvAction(A, &S, UDM_SRV_ACTION_ADD);
561     UdmVarListFree(&n.Vars);
562 
563     if (rc != UDM_OK) return rc;
564   }
565   return UDM_OK;
566 }
567 
568 
569 static udm_rc_t
add_alias(UDM_CFG * C,size_t ac,const char ** av)570 add_alias(UDM_CFG *C, size_t ac, const char **av)
571 {
572   UDM_ENV *Conf = C->Indexer->Conf;
573   UDM_MATCH_PARAM Param;
574   UDM_CONST_STR Pattern;
575   size_t    i;
576 
577   UdmConstStrInit(&Pattern);
578   UdmMatchParamInit(&Param);
579   Param.match_mode= UDM_MATCH_BEGIN;
580   UdmMatchParamSetCaseInsensitive(&Param, UDM_TRUE);
581   UdmMatchParamSetOptimization(&Param,
582                                !UDM_TEST(C->flags & UDM_FLAG_DONT_ADD_TO_DB));
583   for (i= 1; i < ac; i++)
584   {
585     if (!UdmMatchParamStrToMode(&Param, av[i]) ||
586         !UdmMatchParamStrToCaseSensitivity(&Param, av[i]))
587       continue;
588 
589     if (!Pattern.str)
590     {
591       UdmConstStrSetStr(&Pattern, av[i]);
592     }
593     else
594     {
595       char err[120]= "";
596       UDM_REPLACELIST  *L= NULL;
597       UDM_CONST_STR Alias;
598 
599       UdmConstStrSetStr(&Alias, av[i]);
600       if (!strcasecmp(av[0], "Alias")) L= &Conf->Aliases;
601       if (!strcasecmp(av[0], "ReverseAlias")) L= &Conf->ReverseAliases;
602 
603       if (UDM_OK != UdmReplaceListAdd(L, &Param, &Pattern, &Alias, err, sizeof(err)))
604       {
605         udm_snprintf(Conf->errstr, sizeof(Conf->errstr), "%s", err);
606         return UDM_ERROR;
607       }
608     }
609   }
610   if (!Pattern.str)
611   {
612     udm_snprintf(Conf->errstr, sizeof(Conf->errstr), "too few arguments");
613     return UDM_ERROR;
614   }
615   return UDM_OK;
616 }
617 
618 
619 static udm_rc_t
UdmExcerptQualityParse(UDM_ENV * Env,UDM_EXCERPT_FRAGMENT * Fragment,const char * str)620 UdmExcerptQualityParse(UDM_ENV *Env,
621                        UDM_EXCERPT_FRAGMENT *Fragment, const char *str)
622 {
623   if (!strcasecmp(str, "ignore"))
624   {
625     Fragment->quality= 0;
626     return UDM_OK;
627   }
628   if (udm_isdigit(str[0]))
629   {
630     Fragment->quality= atoi(str);
631     return UDM_OK;
632   }
633   udm_snprintf(Env->errstr, sizeof(Env->errstr),
634                "ExcerptFragment: syntax error near '%s'; "
635                "Expected 'ignore' or an unsigned number.", str);
636   return UDM_ERROR;
637 }
638 
639 
640 /*
641   ExcerptFragment  [Case | NoCase] [String | Regex] quality pattern
642 */
643 static udm_rc_t
add_excerpt_fragment(UDM_CFG * C,size_t ac,const char ** av)644 add_excerpt_fragment(UDM_CFG *C, size_t ac, const char **av)
645 {
646   UDM_ENV *Conf= C->Indexer->Conf;
647   UDM_EXCERPT_FRAGMENT Specific, *VarSpecific;
648   UDM_VAR *Var;
649   char err[128];
650   char name[64];
651   size_t i;
652   udm_rc_t rc;
653   UDM_VALUE_HANDLER *ha= &UdmValueHandlerExcerptFragment;
654 
655   UdmExcerptFragmentInit(&Specific);
656   Specific.Match.Param.match_mode= UDM_MATCH_WILD;
657   UdmMatchParamSetCaseInsensitive(&Specific.Match.Param, UDM_TRUE);
658 
659   for (i= 1; i < ac; i++)
660   {
661     if (UdmMatchParamModeRegexOrString(&Specific.Match.Param, av[i]) &&
662         UdmMatchParamStrToCaseSensitivity(&Specific.Match.Param, av[i]))
663       break;
664   }
665 
666   if (ac - i != 2)
667   {
668     udm_snprintf(Conf->errstr, sizeof(Conf->errstr) - 1,
669                  "too %s (%d) required paramenters",
670                  ac - i > 2 ? "many" : "few", (int) (ac - i));
671     return UDM_ERROR;
672   }
673 
674   if (UDM_OK != UdmExcerptQualityParse(Conf, &Specific, av[i]))
675   {
676     UdmExcerptFragmentFree(&Specific);
677     return UDM_ERROR;
678   }
679 
680   if (UDM_OK != UdmMatchSetPattern(&Specific.Match, av[i + 1]))
681     return UDM_ERROR;
682 
683   if (UDM_OK != UdmMatchComp(&Specific.Match, err, sizeof(err)))
684   {
685     udm_snprintf(Conf->errstr, sizeof(Conf->errstr) - 1, "%s", err);
686     UdmExcerptFragmentFree(&Specific);
687     return UDM_ERROR;
688   }
689 
690   udm_snprintf(name, sizeof(name), "ExcerptFragment%04d",
691                (int) C->excerpt_fragments_count++);
692   if (UDM_OK != (rc= UdmVarCreate(ha, &Var, name, NULL, 0)))
693   {
694     UdmExcerptFragmentFree(&Specific);
695     return rc;
696   }
697 
698   VarSpecific= (UDM_EXCERPT_FRAGMENT *) UdmVarDataPtr(Var);
699   VarSpecific[0]= Specific;
700   bzero((void *) &Specific, sizeof(Specific));
701 
702   if (UDM_OK != (rc= UdmVarListReplaceVar(&Conf->Vars, Var)))
703   {
704     UdmVarFree(Var);
705     return rc;
706   }
707   return UDM_OK;
708 }
709 
710 
711 static udm_rc_t
add_filter(UDM_CFG * C,size_t ac,const char ** av)712 add_filter(UDM_CFG *C, size_t ac, const char **av)
713 {
714   UDM_ENV    *Conf= C->Indexer->Conf;
715   UDM_MATCH_PARAM Param;
716   size_t     i;
717   udm_method_t method= UdmMethod(av[0]);
718 
719   if (method == UDM_METHOD_UNKNOWN)
720   {
721     udm_snprintf(Conf->errstr, sizeof(Conf->errstr), "Unknown method %s", av[0]);
722     return UDM_ERROR;
723   }
724   if (!(C->flags & UDM_FLAG_ADD_SERV))
725     return UDM_OK;
726 
727   UdmMatchParamInit(&Param);
728   Param.match_mode= UDM_MATCH_WILD;
729   UdmMatchParamSetCaseInsensitive(&Param, UDM_TRUE);
730   UdmMatchParamSetOptimization(&Param, !UDM_TEST(C->flags & UDM_FLAG_DONT_ADD_TO_DB));
731 
732   C->ordre++;
733   for (i= 1; i < ac ; i++)
734   {
735     if (UdmMatchParamSetParam(&Param, av[i]))
736     {
737       char err[120]= "";
738       UDM_CONST_STR Pattern;
739       UdmConstStrSetStr(&Pattern, av[i]);
740 
741       if (UDM_OK != UdmFilterListAddWithServer(NULL, &Conf->Filters,
742                                                &Param, method, &Pattern,
743                                                err, sizeof(err), ++C->ordre))
744       {
745         udm_snprintf(Conf->errstr,sizeof(Conf->errstr)-1,"%s",err);
746         return UDM_ERROR;
747       }
748     }
749   }
750   return UDM_OK;
751 }
752 
753 
754 static udm_rc_t
add_section_filter(UDM_CFG * C,size_t ac,const char ** av)755 add_section_filter(UDM_CFG *C, size_t ac, const char **av)
756 {
757   UDM_ENV *Conf= C->Indexer->Conf;
758   UDM_MATCH_PARAM MatchParam;
759   size_t    i;
760   const char *section= NULL;
761   udm_method_t method= UdmMethod(av[0]);
762 
763   UdmMatchParamInit(&MatchParam);
764   MatchParam.match_mode= UDM_MATCH_WILD;
765   UdmMatchParamSetCaseInsensitive(&MatchParam, UDM_TRUE);
766   UdmMatchParamSetOptimization(&MatchParam, !UDM_TEST(C->flags & UDM_FLAG_DONT_ADD_TO_DB));
767 
768   C->ordre++;
769   for(i=1; i<ac ; i++)
770   {
771     if (!UdmMatchParamSetParam(&MatchParam, av[i]))
772       continue;
773     if (!section)
774     {
775       section= av[i];
776     }
777     else
778     {
779       char err[120]="";
780       UDM_CONST_STR Pattern;
781 
782       UdmConstStrSetStr(&Pattern, av[i]);
783       if (UDM_OK != UdmSectionFilterListAdd(&Conf->SectionFilters,
784                                        &MatchParam, method, &Pattern, section,
785                                        err, sizeof(err)))
786       {
787         udm_snprintf(Conf->errstr,sizeof(Conf->errstr)-1,"%s",err);
788         return UDM_ERROR;
789       }
790     }
791   }
792 
793   if (!section)
794   {
795     udm_snprintf(Conf->errstr, sizeof(Conf->errstr) - 1,
796                  "No section given for %s", av[0]);
797     return UDM_ERROR;
798   }
799   return UDM_OK;
800 }
801 
802 
803 static udm_rc_t
add_type_internal(UDM_CFG * C,size_t ac,const char ** av,UDM_REPLACELIST * Lst)804 add_type_internal(UDM_CFG *C, size_t ac, const char **av, UDM_REPLACELIST *Lst)
805 {
806   UDM_ENV    *Conf=C->Indexer->Conf;
807   UDM_MATCH_PARAM MatchParam;
808   UDM_CONST_STR Type;
809   size_t    i;
810   udm_rc_t rc= UDM_OK;
811   char err[128];
812 
813   UdmConstStrInit(&Type);
814   UdmMatchParamInit(&MatchParam);
815   MatchParam.match_mode= UDM_MATCH_WILD;
816   UdmMatchParamSetCaseInsensitive(&MatchParam, UDM_TRUE);
817   UdmMatchParamSetOptimization(&MatchParam, !UDM_TEST(C->flags & UDM_FLAG_DONT_ADD_TO_DB));
818 
819   for (i=1; i<ac; i++)
820   {
821     if (!UdmMatchParamSetParam(&MatchParam, av[i]))
822       continue;
823     if (!Type.str)
824       UdmConstStrSetStr(&Type, av[i]);
825     else
826     {
827       UDM_CONST_STR Pattern;
828       UdmConstStrSetStr(&Pattern, av[i]);
829       if(UDM_OK != (rc = UdmReplaceListAdd(Lst ,&MatchParam, &Pattern, &Type,
830                                            err,sizeof(err))))
831       {
832         udm_snprintf(Conf->errstr, sizeof(Conf->errstr), "%s", err);
833         return rc;
834       }
835     }
836   }
837   return rc;
838 }
839 
840 
841 static udm_rc_t
add_type(UDM_CFG * C,size_t ac,const char ** av)842 add_type(UDM_CFG *C, size_t ac, const char **av)
843 {
844   UDM_ENV *Conf=C->Indexer->Conf;
845   return add_type_internal(C, ac, av, &Conf->MimeTypes);
846 }
847 
848 
849 static udm_rc_t
add_encoding(UDM_CFG * C,size_t ac,const char ** av)850 add_encoding(UDM_CFG *C, size_t ac, const char **av)
851 {
852   UDM_ENV *Conf= C->Indexer->Conf;
853   return add_type_internal(C, ac, av, &Conf->Encodings);
854 }
855 
856 
857 static udm_rc_t
add_parser(UDM_CFG * C,size_t ac,const char ** av)858 add_parser(UDM_CFG *C, size_t ac, const char **av)
859 {
860   UDM_ENV  *Conf=C->Indexer->Conf;
861   UDM_PARSER P;
862   P.from_mime= UdmStrdup(av[1]);
863   P.to_mime= UdmStrdup(av[2]);
864   P.cmd= UdmStrdup(av[3] ? av[3] : "");
865   P.src= av[4] ? UdmStrdup(av[4]) : NULL;
866   UdmParserAdd(&Conf->Parsers,&P);
867   return UDM_OK;
868 }
869 
870 
871 static udm_rc_t
add_separator(UDM_VARLIST * Vars,const char * name,const char * val)872 add_separator(UDM_VARLIST *Vars, const char *name, const char *val)
873 {
874   UDM_DSTR buf;
875   UdmDSTRInit(&buf, 128);
876   UdmDSTRReset(&buf);
877   UdmDSTRAppendf(&buf, "separator.%s", name);
878   UdmVarListAddStr(Vars, UdmDSTRPtr(&buf), val);
879   UdmDSTRFree(&buf);
880   return UDM_OK;
881 }
882 
883 
884 static udm_rc_t
add_maxlen(UDM_VARLIST * Vars,const char * name,size_t maxlen)885 add_maxlen(UDM_VARLIST *Vars, const char *name, size_t maxlen)
886 {
887   UDM_DSTR buf;
888   UdmDSTRInit(&buf, 128);
889   UdmDSTRReset(&buf);
890   UdmDSTRAppendf(&buf, "maxlen.%s", name);
891   UdmVarListReplaceInt(Vars, UdmDSTRPtr(&buf), (int) maxlen);
892   UdmDSTRFree(&buf);
893   return UDM_OK;
894 }
895 
896 
897 static udm_rc_t
add_section(UDM_CFG * C,size_t ac,const char ** av)898 add_section(UDM_CFG *C, size_t ac, const char **av)
899 {
900   UDM_ENV  *Conf=C->Indexer->Conf;
901   UDM_SECTION_PARAM Param;
902   int      cdon, noindex= 0;
903   UDM_USERSECTIONLIST *SectionMatch= &Conf->SectionMatch;
904   const char *name= av[1];
905   int secno= 0, maxlen= 0;
906 
907   UdmSectionParamInit(&Param, 0);
908   if (!strncasecmp(name, UDM_CSTR_WITH_LEN("Raw.")))
909     Param.flags= (udm_var_flag_t) (Param.flags | UDM_VARFLAG_RAW);
910   /*
911     Do not use "url.*" sections in
912     clone detection by default
913   */
914   cdon= strncasecmp(av[1], "url", 3) ? 1 : 0;
915   if ((!(secno= atoi(av[2])) && av[2][0] != '0') || secno < 0 || secno > 255)
916   {
917     sprintf(Conf->errstr,"Section ID is not a valid number: %s",av[2]);
918     return UDM_ERROR;
919   }
920   if ((ac > 3 && !(maxlen= atoi(av[3])) && av[3][0] != '0') || maxlen < 0)
921   {
922     sprintf(Conf->errstr,"Section length is not a valid number: %s",av[3]);
923     return UDM_ERROR;
924   }
925 
926   if (ac > 3)
927   {
928     av++;
929     ac--;
930   }
931   av+= 3;
932   ac-= 3;
933 
934   for ( ; ac ; ac--, av++)
935   {
936     if (!strcasecmp(av[0], "cdon") || !strcasecmp(av[0], "DetectClones"))
937       cdon= 1;
938     else if (!strcasecmp(av[0], "cdoff") || !strcasecmp(av[0], "NoDetectClones"))
939       cdon= 0;
940     else if (!strcasecmp(av[0], "html"))
941       Param.flags= (udm_var_flag_t) (Param.flags| UDM_VARFLAG_HTMLSOURCE);
942     else if (!strcasecmp(av[0], "decimal"))
943       Param.flags= (udm_var_flag_t) (Param.flags | UDM_VARFLAG_DECIMAL);
944     else if (!strcasecmp(av[0], "wiki"))
945       Param.flags= (udm_var_flag_t) (Param.flags | UDM_VARFLAG_HTMLSOURCE | UDM_VARFLAG_WIKI);
946     else if (!strcasecmp(av[0], "noindex"))
947       noindex= 1;
948     else if (!strcasecmp(av[0], "index"))
949       noindex= 0;
950     else if (!strcasecmp(av[0], "text"))
951       /* do nothing */;
952     else if (!strcasecmp(av[0], "afterheaders"))
953       SectionMatch= &Conf->SectionHdrMatch;
954     else if (!strcasecmp(av[0], "afterguesser"))
955       SectionMatch= &Conf->SectionGsrMatch;
956     else if (!strcasecmp(av[0], "afterparser"))
957       SectionMatch= &Conf->SectionMatch;
958     else
959       break;
960   }
961   if (secno > 0) /* Don't set maxlen for secno=0 */
962     Conf->SectionParam.maxlen[secno]= (size_t) maxlen;
963   Param.secno= (udm_secno_t) secno;
964   Param.flags= (udm_var_flag_t) (Param.flags | (cdon ? 0 : UDM_VARFLAG_NOCLONE));
965   Param.flags= (udm_var_flag_t) (Param.flags | (noindex ? UDM_VARFLAG_NOINDEX : 0));
966 
967   if (!secno)
968     add_maxlen(&Conf->Vars, name, maxlen);
969   if (ac == 0)
970   {
971     /* no optional arguments */
972   }
973   else if (ac == 1)
974   {
975     /* <sep> */
976     add_separator(&Conf->Vars, name, av[0]);
977   }
978   else if (ac >= 2 && ac <= 4)
979   {
980     /*
981        <expr> <repl>
982        <sep> <expr> <repl>
983        <sep> <src> <expr> <repl>
984     */
985 
986     UDM_MATCH_PARAM MatchParam;
987     char err[120]= "";
988     UDM_CONST_STR Name, Source, Pattern, Replacement;
989 
990     UdmMatchParamInit(&MatchParam);
991     UdmConstStrSetStr(&Name, name);
992     UdmConstStrInit(&Source);
993     UdmConstStrInit(&Pattern);
994     UdmConstStrInit(&Replacement);
995     MatchParam.match_mode= UDM_MATCH_REGEX;
996     UdmMatchParamSetCaseInsensitive(&MatchParam, UDM_TRUE);
997     UdmMatchParamSetOptimization(&MatchParam, !UDM_TEST(C->flags & UDM_FLAG_DONT_ADD_TO_DB));
998 
999     switch (ac)
1000     {
1001       case 2:
1002         UdmConstStrSetStr(&Pattern, av[0]);
1003         UdmConstStrSetStr(&Replacement, av[1]);
1004         break;
1005 
1006       case 3:
1007         add_separator(&Conf->Vars, name, av[0]);
1008         UdmConstStrSetStr(&Pattern, av[1]);
1009         UdmConstStrSetStr(&Replacement, av[2]);
1010         break;
1011 
1012       case 4:
1013         add_separator(&Conf->Vars, name, av[0]);
1014         UdmConstStrSetStr(&Source, av[1]);
1015         UdmConstStrSetStr(&Pattern, av[2]);
1016         UdmConstStrSetStr(&Replacement, av[3]);
1017         break;
1018     }
1019 
1020     if(UDM_OK != UdmUserSectionListAdd(SectionMatch, &MatchParam,
1021                                        &Name, &Source, &Pattern, &Replacement,
1022                                        err, sizeof(err)))
1023     {
1024       udm_snprintf(Conf->errstr,sizeof(Conf->errstr)-1,"%s",err);
1025       return UDM_ERROR;
1026     }
1027     Param.flags= (udm_var_flag_t) (Param.flags | UDM_VARFLAG_USERDEF);
1028   }
1029   else
1030   {
1031     sprintf(Conf->errstr,"too many argiments: '%s'", av[0]);
1032     return UDM_ERROR;
1033   }
1034 
1035   UdmVarListReplaceStrnWithParam(&Conf->Sections, &Param, name, NULL, 0);
1036   return UDM_OK;
1037 }
1038 
1039 
1040 static udm_rc_t
do_include(UDM_CFG * C,size_t ac,const char ** av)1041 do_include(UDM_CFG *C, size_t ac, const char **av)
1042 {
1043   FILE *test;
1044   char save[1024];
1045   if(C->level<5)
1046   {
1047     udm_rc_t rc;
1048     char  fname[1024];
1049     rel_cur_name(fname, sizeof(fname)-1, av[1]);
1050     if ((test= fopen(fname, "r")))
1051       fclose(test);
1052     else
1053       rel_etc_name(C->Indexer->Conf, fname, sizeof(fname)-1, av[1]);
1054     strcpy(save, current);
1055     C->level++;
1056     rc= EnvLoad(C,fname);
1057     strcpy(current, save);
1058     C->level--;
1059     return rc;
1060   }
1061   else
1062   {
1063     sprintf(C->Indexer->Conf->errstr,"too big (%d) level in included files",C->level);
1064     return UDM_ERROR;
1065   }
1066   return UDM_OK;
1067 }
1068 
1069 
1070 static udm_rc_t
add_affix(UDM_CFG * C,size_t ac,const char ** av)1071 add_affix(UDM_CFG *C, size_t ac, const char **av)
1072 {
1073   UDM_ENV  *Conf=C->Indexer->Conf;
1074 
1075   if(C->flags&UDM_FLAG_SPELL)
1076   {
1077     char  fname[1024];
1078     rel_etc_name(Conf, fname, sizeof(fname) - 1, av[3]);
1079     if(UdmAffixListListAdd(&Conf->Affixes,av[1],av[2],fname))
1080     {
1081       sprintf(Conf->errstr,"Can't add affix :%s",fname);
1082       return UDM_ERROR;
1083     }
1084   }
1085   return UDM_OK;
1086 }
1087 
1088 
1089 static udm_rc_t
add_spell(UDM_CFG * C,size_t ac,const char ** av)1090 add_spell(UDM_CFG *C, size_t ac, const char **av)
1091 {
1092   UDM_ENV  *Conf=C->Indexer->Conf;
1093 
1094   if(C->flags&UDM_FLAG_SPELL)
1095   {
1096     char  fname[1024];
1097     rel_etc_name(Conf, fname, sizeof(fname) - 1, av[3]);
1098     if(UdmSpellListListAdd(&Conf->Spells,av[1],av[2],fname))
1099     {
1100      sprintf(Conf->errstr,"Can't load dictionary :%s",fname);
1101       return UDM_ERROR;
1102     }
1103   }
1104   return UDM_OK;
1105 }
1106 
1107 
1108 static udm_rc_t
add_stoplist(UDM_CFG * C,size_t ac,const char ** av)1109 add_stoplist(UDM_CFG *C, size_t ac, const char **av)
1110 {
1111   UDM_ENV  *Conf=C->Indexer->Conf;
1112   char  fname[1024];
1113   rel_stopwords_name(Conf, fname, sizeof(fname) - 1, av[1]);
1114   return UdmStopListLoad(Conf,fname);
1115 }
1116 
1117 
1118 static udm_rc_t
add_langmap(UDM_CFG * C,size_t ac,const char ** av)1119 add_langmap(UDM_CFG *C, size_t ac, const char **av)
1120 {
1121   UDM_ENV  *Conf=C->Indexer->Conf;
1122   udm_rc_t rc= UDM_OK;
1123   if (C->flags&UDM_FLAG_LOAD_LANGMAP)
1124   {
1125     char  fname[1024];
1126     rel_langmap_name(Conf, fname, sizeof(fname) - 1, av[1]);
1127     rc= UdmLoadLangMapFile(&Conf->LangMaps, fname);
1128   }
1129   return rc;
1130 }
1131 
1132 
1133 static udm_rc_t
add_synonym(UDM_CFG * C,size_t ac,const char ** av)1134 add_synonym(UDM_CFG *C, size_t ac, const char **av)
1135 {
1136   UDM_ENV  *Conf=C->Indexer->Conf;
1137   udm_rc_t rc= UDM_OK;
1138   if(C->flags&UDM_FLAG_SPELL)
1139   {
1140     char  fname[1024];
1141     rel_synonym_name(Conf, fname, sizeof(fname) - 1, av[1]);
1142     rc= UdmSynonymListLoad(Conf,fname);
1143   }
1144   return rc;
1145 }
1146 
1147 
1148 static udm_rc_t
add_chinese(UDM_CFG * C,size_t ac,const char ** av)1149 add_chinese(UDM_CFG *C, size_t ac, const char **av)
1150 {
1151   UDM_ENV *Conf=C->Indexer->Conf;
1152 
1153   /*
1154     This line was wrong: ChinesList was not really loaded
1155     from search.cgi
1156   */
1157   /* if(C->flags & UDM_FLAG_ADD_SERV)*/
1158 
1159   {
1160     char fname[1024];
1161     rel_freq_name(Conf, fname, sizeof(fname)-1, av[2] ? av[2] : "mandarin.freq");
1162     return UdmChineseListLoad(C->Indexer, &Conf->Chi,
1163                               av[1] ? av[1] : "GB2312", fname);
1164   }
1165   return UDM_OK;
1166 }
1167 
1168 
1169 static udm_rc_t
add_thai(UDM_CFG * C,size_t ac,const char ** av)1170 add_thai(UDM_CFG *C, size_t ac, const char **av)
1171 {
1172   UDM_ENV *Conf= C->Indexer->Conf;
1173   if (C->flags & UDM_FLAG_ADD_SERV)
1174   {
1175     char fname[1024];
1176     rel_freq_name(Conf, fname, sizeof(fname)-1, av[2] ? av[2] : "thai.freq");
1177     return UdmChineseListLoad(C->Indexer, &Conf->Thai,
1178                               av[1] ? av[1] : "tis-620", fname);
1179   }
1180   return UDM_OK;
1181 }
1182 
1183 
1184 static udm_rc_t
add_url(UDM_CFG * C,size_t ac,const char ** av)1185 add_url(UDM_CFG *C, size_t ac, const char **av)
1186 {
1187   UDM_AGENT *A= C->Indexer;
1188 
1189   if (C->flags&UDM_FLAG_ADD_SERV)
1190   {
1191     char    *al = NULL;
1192     UDM_SERVER  *Srv;
1193     if ((Srv= UdmServerFind(A, &A->Conf->Servers, av[1], &al)))
1194     {
1195       UDM_HREFPARAM HrefParam;
1196       UdmHrefParamInit(&HrefParam);
1197       HrefParam.link_source= UDM_LINK_SOURCE_CONF;
1198       UdmHrefListAddConst(&A->Conf->Hrefs, &HrefParam, av[1]);
1199     }
1200     UDM_FREE(al);
1201   }
1202   return UDM_OK;
1203 }
1204 
1205 
1206 static udm_rc_t
add_srv_table(UDM_CFG * C,size_t ac,const char ** av)1207 add_srv_table(UDM_CFG *C, size_t ac, const char **av)
1208 {
1209   UDM_ENV *Conf=C->Indexer->Conf;
1210   udm_rc_t rc= UDM_OK;
1211   UDM_DBLIST dbl;
1212 
1213   /*
1214     Skip ServerTable when loading for search, not for indexing.
1215     Useful when the ServerTable options are written in a shared
1216     include.conf file together with DBAddr options, and this file
1217     is included from both indexer.conf and search.htm
1218   */
1219   if (!(C->flags & UDM_FLAG_ADD_SERV))
1220     return UDM_OK;
1221 
1222   UdmDBListInit(&dbl);
1223   if (UDM_OK != (rc= UdmDBListAdd(&dbl, av[1], Conf->errstr, sizeof(Conf->errstr))))
1224     goto ex;
1225   UDM_ASSERT(dbl.nitems == 1);
1226 
1227   if (UDM_OK != (rc= dbl.Item[0].dbhandler->ServerAction(C->Indexer, &dbl.Item[0], &Conf->Servers, UDM_SRV_ACTION_TABLE)))
1228     UdmEnvCopyErrMsgFromDB(Conf, &dbl.Item[0]);
1229 ex:
1230   UdmDBListFree(&dbl);
1231   return rc;
1232 }
1233 
1234 
1235 static udm_rc_t
add_limit(UDM_CFG * C,size_t ac,const char ** av)1236 add_limit(UDM_CFG *C, size_t ac, const char **av)
1237 {
1238   UDM_ENV  *Conf=C->Indexer->Conf;
1239   char * sc;
1240   char * nm;
1241 
1242   if (ac == 2)
1243   {
1244     if((sc = strchr(av[1],':')))
1245     {
1246       *sc++='\0';
1247       nm=(char*)UdmMalloc(strlen(av[1])+8);
1248       sprintf(nm,"Limit-%s",av[1]);
1249       UdmVarListReplaceStr(&Conf->Vars, nm, sc);
1250       UDM_FREE(nm);
1251     }
1252   }
1253   else if (ac == 3)
1254   {
1255     char name[128];
1256     udm_snprintf(name, sizeof(name), "Limit.%s", av[1]);
1257     UdmVarListReplaceStr(&Conf->Vars, name, av[2]);
1258   }
1259   return UDM_OK;
1260 }
1261 
1262 
1263 static udm_rc_t
add_user_score(UDM_CFG * C,size_t ac,const char ** av)1264 add_user_score(UDM_CFG *C, size_t ac, const char **av)
1265 {
1266   UDM_ENV  *Conf=C->Indexer->Conf;
1267   char name[128];
1268   UDM_ASSERT(ac == 3);
1269   udm_snprintf(name, sizeof(name), "Score.%s", av[1]);
1270   UdmVarListReplaceStr(&Conf->Vars, name, av[2]);
1271   return UDM_OK;
1272 }
1273 
1274 
1275 static udm_rc_t
add_user_site_score(UDM_CFG * C,size_t ac,const char ** av)1276 add_user_site_score(UDM_CFG *C, size_t ac, const char **av)
1277 {
1278   UDM_ENV  *Conf=C->Indexer->Conf;
1279   char name[128];
1280   udm_snprintf(name, sizeof(name), "SiteScore.%s", av[1]);
1281   UdmVarListReplaceStr(&Conf->Vars, name, av[2]);
1282   return UDM_OK;
1283 }
1284 
1285 
1286 static udm_rc_t
add_user_order(UDM_CFG * C,size_t ac,const char ** av)1287 add_user_order(UDM_CFG *C, size_t ac, const char **av)
1288 {
1289   UDM_ENV *Conf= C->Indexer->Conf;
1290   char name[128];
1291   udm_snprintf(name, sizeof(name), "Order.%s", av[1]);
1292   UdmVarListReplaceStr(&Conf->Vars, name, av[2]);
1293   return UDM_OK;
1294 }
1295 
1296 
1297 static udm_rc_t
flush_srv_table(UDM_CFG * C,size_t ac,const char ** av)1298 flush_srv_table(UDM_CFG *C, size_t ac, const char **av)
1299 {
1300   UDM_ENV *Conf=C->Indexer->Conf;
1301   udm_rc_t rc= UDM_OK;
1302   if(C->flags&UDM_FLAG_ADD_SERV)
1303   {
1304     UDM_AGENT A;
1305     A.Conf= Conf;
1306     rc= UdmSrvAction(&A, &Conf->Servers, UDM_SRV_ACTION_FLUSH);
1307   }
1308   return rc;
1309 }
1310 
1311 
1312 static udm_rc_t
dblist_free(UDM_CFG * C,size_t ac,const char ** av)1313 dblist_free(UDM_CFG *C, size_t ac, const char **av)
1314 {
1315   UDM_ENV  *Conf=C->Indexer->Conf;
1316   UdmDBListFree(&Conf->DBList);
1317   return UDM_OK;
1318 }
1319 
1320 
1321 static udm_rc_t
env_rpl_casefolding(UDM_CFG * C,size_t ac,const char ** av)1322 env_rpl_casefolding(UDM_CFG *C, size_t ac, const char **av)
1323 {
1324   UDM_ENV *Conf = C->Indexer->Conf;
1325   UDM_UNIDATA *unidata;
1326   if (!(unidata= UdmUnidataGetByName(av[1])))
1327   {
1328     sprintf(Conf->errstr,"CaseFolding '%s' is not supported", av[1]);
1329     return UDM_ERROR;
1330   }
1331   Conf->unidata= unidata;
1332   return UDM_OK;
1333 }
1334 
1335 
1336 static udm_rc_t
env_rpl_charset(UDM_CFG * C,size_t ac,const char ** av)1337 env_rpl_charset(UDM_CFG *C, size_t ac, const char **av)
1338 {
1339   UDM_ENV *Conf= C->Indexer->Conf;
1340   UDM_CHARSET *cs;
1341   if (!(cs= UdmGetCharSet(av[1])))
1342   {
1343     sprintf(Conf->errstr,"charset '%s' is not supported",av[1]);
1344     return UDM_ERROR;
1345   }
1346   if (!strcasecmp(av[0],"LocalCharset"))
1347   {
1348     if (!cs->cset->septoken)
1349     {
1350       sprintf(Conf->errstr,
1351               "charset '%s' is not supported as LocalCharset", av[1]);
1352       return UDM_ERROR;
1353     }
1354     Conf->lcs= cs;
1355     UdmVarListReplaceStr(&Conf->Vars,av[0],av[1]);
1356   }
1357   else if(!strcasecmp(av[0],"BrowserCharset")){
1358     Conf->bcs=cs;
1359     UdmVarListReplaceStr(&Conf->Vars,av[0],av[1]);
1360   }
1361   return UDM_OK;
1362 }
1363 
1364 
1365 static udm_rc_t
srv_rpl_charset(UDM_CFG * C,size_t ac,const char ** av)1366 srv_rpl_charset(UDM_CFG *C, size_t ac, const char **av)
1367 {
1368   UDM_ENV *Conf = C->Indexer->Conf;
1369   UDM_CHARSET *cs;
1370   if (!(cs= UdmGetCharSet(av[1])))
1371   {
1372     sprintf(Conf->errstr,"charset '%s' is not supported",av[1]);
1373     return UDM_ERROR;
1374   }
1375   UdmVarListReplaceStr(&C->Srv->Vars,av[0],av[1]);
1376   return UDM_OK;
1377 }
1378 
1379 
1380 static udm_rc_t
srv_rpl_mirror(UDM_CFG * C,size_t ac,const char ** av)1381 srv_rpl_mirror(UDM_CFG *C, size_t ac, const char **av)
1382 {
1383   if (!strcasecmp(av[0],"MirrorRoot") || !strcasecmp(av[0],"MirrorHeadersRoot"))
1384   {
1385     char fname[1024];
1386     rel_var_name(C->Indexer->Conf, fname, sizeof(fname)-1, av[1]);
1387     UdmVarListReplaceStr(&C->Srv->Vars,av[0],fname);
1388   }
1389   else if(!strcasecmp(av[0],"MirrorPeriod"))
1390   {
1391     int tm= Udm_dp2time_t(av[1]);
1392     UdmVarListReplaceInt(&C->Srv->Vars,"MirrorPeriod",tm);
1393   }
1394   return UDM_OK;
1395 }
1396 
1397 
1398 static udm_rc_t
srv_rpl_auth(UDM_CFG * C,size_t ac,const char ** av)1399 srv_rpl_auth(UDM_CFG *C, size_t ac, const char **av)
1400 {
1401   char name[128];
1402   udm_snprintf(name, sizeof(name) - 1, "%s", av[0]);
1403   name[sizeof(name)-1]= '\0';
1404   if (av[1])
1405   {
1406     size_t  len= strlen(av[1]);
1407     char *auth=(char*)UdmMalloc(BASE64_LEN(strlen(av[1])));
1408     udm_base64_encode(av[1],auth,len);
1409     UdmVarListReplaceStr(&C->Srv->Vars,name,auth);
1410     UDM_FREE(auth);
1411   }
1412   else
1413   {
1414     UdmVarListReplaceStr(&C->Srv->Vars,name,"");
1415   }
1416   return UDM_OK;
1417 }
1418 
1419 
1420 char *
UdmParseEnvVar(UDM_ENV * Conf,const char * str)1421 UdmParseEnvVar(UDM_ENV *Conf, const char *str)
1422 {
1423   const char *p1= str, *p2= str;
1424   UDM_DSTR rc;
1425   UDM_STR tmp;
1426   UdmDSTRInit(&rc, 256);
1427   while ((p1= strstr(p1, "$(")))
1428   {
1429     const char *p3;
1430     UdmDSTRAppend(&rc, p2, p1 - p2);
1431     if ((p3= strchr(p1 + 2, ')')))
1432     {
1433       const char *s;
1434       char varname[128];
1435       udm_snprintf(varname, sizeof(varname), "%.*s", (int) (p3 - p1 - 2), p1 + 2);
1436       if ((s= UdmVarListFindStr(&Conf->Vars, varname, NULL)))
1437         UdmDSTRAppendSTR(&rc, s);
1438       p1= p2= p3 + 1;
1439     }
1440     else
1441     {
1442       UdmDSTRFree(&rc);
1443       return(NULL);
1444     }
1445   }
1446   UdmDSTRAppendSTR(&rc, p2);
1447   UdmDSTRGiveValue(&rc, &tmp);
1448   return tmp.str;
1449 }
1450 
1451 
1452 static udm_rc_t
env_rpl_env_var(UDM_CFG * C,size_t ac,const char ** av)1453 env_rpl_env_var(UDM_CFG *C, size_t ac, const char **av)
1454 {
1455   UDM_ENV *Conf= C->Indexer->Conf;
1456   char *p= getenv(av[1]);
1457   if (!p)
1458   {
1459     sprintf(Conf->errstr, "ImportEnv '%s': no such variable.", av[1]);
1460     return UDM_ERROR;
1461   }
1462   UdmVarListReplaceStr(&Conf->Vars, av[1], p);
1463   return UDM_OK;
1464 }
1465 
1466 
1467 static udm_rc_t
env_rpl_encoding(UDM_CFG * C,size_t ac,const char ** av)1468 env_rpl_encoding(UDM_CFG *C, size_t ac, const char **av)
1469 {
1470   UDM_ENV  *Conf=C->Indexer->Conf;
1471   udm_content_encoding_t ce;
1472   UDM_ASSERT(ac == 2);
1473 
1474   UdmVarListReplaceStr(&Conf->Vars, av[0], av[1]);
1475   ce= UdmContentEncodingID(av[1]);
1476   switch (ce)
1477   {
1478     case UDM_CONTENT_ENCODING_IDENTITY:
1479     case UDM_CONTENT_ENCODING_DEFLATE:
1480       return UDM_OK;
1481     default:
1482       udm_snprintf(Conf->errstr, sizeof(Conf->errstr),
1483                    "Bad value for '%s': '%s'", av[0], av[1]);
1484       return UDM_ERROR;
1485   }
1486   return UDM_OK;
1487 }
1488 
1489 
1490 static udm_rc_t
env_dbaddr(UDM_CFG * C,size_t ac,const char ** av)1491 env_dbaddr(UDM_CFG *C, size_t ac, const char **av)
1492 {
1493   UDM_ENV  *Conf=C->Indexer->Conf;
1494   udm_rc_t rc;
1495   size_t i;
1496   for (i= 1; i < ac; i++)
1497   {
1498     if (UDM_OK != (rc= UdmEnvDBListAdd(Conf, av[i] ? av[i] : "")))
1499       return rc;
1500   }
1501   return UDM_OK;
1502 }
1503 
1504 
1505 static udm_rc_t
env_rpl_var(UDM_CFG * C,size_t ac,const char ** av)1506 env_rpl_var(UDM_CFG *C, size_t ac, const char **av)
1507 {
1508   UDM_ENV  *Conf=C->Indexer->Conf;
1509   if (!strcasecmp(av[0], "Segmenter"))
1510   {
1511     int seg= 0;
1512 #ifdef CHASEN
1513     if (!strcasecmp(av[1], "Chasen"))
1514       seg= 1;
1515 #endif
1516 #ifdef MECAB
1517     if (!strcasecmp(av[1], "Mecab"))
1518       seg= 1;
1519 #endif
1520     if (!strcasecmp(av[1], "Freq"))
1521       seg= 1;
1522     if (!strcasecmp(av[1], "CJK"))
1523       seg= 1;
1524     if (!seg)
1525     {
1526       sprintf(Conf->errstr, "Unsupported segmenter method: '%s'", av[1]);
1527       return UDM_ERROR;
1528     }
1529   }
1530   if (!strcasecmp(av[0], "Log2Stderr"))
1531     Conf->Log.logFD= udm_strntobool(av[1], strlen(av[1])) ? stderr : NULL;
1532   UdmVarListReplaceStr(&Conf->Vars,av[0],av[1]);
1533   return UDM_OK;
1534 }
1535 
1536 
1537 static udm_rc_t
env_rpl_named_var(UDM_CFG * C,size_t ac,const char ** av)1538 env_rpl_named_var(UDM_CFG *C, size_t ac, const char **av)
1539 {
1540   UDM_ENV *Conf= C->Indexer->Conf;
1541   UDM_SECTION_PARAM Param;
1542   UdmSectionParamInit(&Param, 0);
1543   if (!strncasecmp(av[1], UDM_CSTR_WITH_LEN("Raw.")))
1544     Param.flags= (udm_var_flag_t) (Param.flags | UDM_VARFLAG_RAW);
1545   UdmVarListReplaceStrnWithParam(&Conf->Vars, &Param, av[1], av[2], strlen(av[2]));
1546   return UDM_OK;
1547 }
1548 
1549 
1550 static udm_rc_t
rpl_xml_hook(UDM_CFG * C,size_t ac,const char ** av)1551 rpl_xml_hook(UDM_CFG *C, size_t ac, const char **av)
1552 {
1553   UDM_ENV *Conf= C->Indexer->Conf;
1554   UDM_VARLIST *Vars= !strcasecmp(av[0], "XMLEnterHook") ?
1555                      &Conf->XMLEnterHooks :
1556                      !strcasecmp(av[0], "XMLDataHook") ?
1557                      &Conf->XMLDataHooks : &Conf->XMLLeaveHooks;
1558   UdmVarListReplaceStr(Vars,av[1],av[2]);
1559   return UDM_OK;
1560 }
1561 
1562 
1563 static udm_rc_t
srv_rpl_var(UDM_CFG * C,size_t ac,const char ** av)1564 srv_rpl_var(UDM_CFG *C, size_t ac, const char **av)
1565 {
1566   UdmVarListReplaceStr(&C->Srv->Vars,av[0],av[1]);
1567   return UDM_OK;
1568 }
1569 
1570 
1571 static udm_rc_t
srv_add_proxy(UDM_CFG * C,const char * cmd,const char * arg)1572 srv_add_proxy(UDM_CFG *C, const char *cmd, const char *arg)
1573 {
1574   UDM_SERVER *Server= C->Srv;
1575   udm_rc_t rc= UDM_OK;
1576   UDM_URL url;
1577   size_t auth_length;
1578 
1579   if (!strcasecmp(arg, "none") ||
1580       !strcasecmp(arg, ""))
1581   {
1582     UdmURLListFree(&Server->ProxyList);
1583     return UDM_OK;
1584   }
1585 
1586   UdmURLInit(&url);
1587   if (UDM_OK != UdmURLParse(&url, arg) ||
1588       !url.schema || strcasecmp(url.schema, "http") ||
1589       url.filename || (url.path && strcmp(url.path, "/")) ||
1590       (url.auth && (auth_length= strlen(url.auth)) > 64))
1591   {
1592     UDM_ENV *Conf= C->Indexer->Conf;
1593     udm_snprintf(Conf->errstr, sizeof(Conf->errstr),
1594                  "Bad '%s' value: '%s'", cmd, arg);
1595     rc= UDM_ERROR;
1596   }
1597   if (rc == UDM_OK && url.auth)
1598   {
1599     /* Wrap the authorization part into Base64 */
1600     char auth[128];
1601     udm_base64_encode(url.auth, auth, auth_length);
1602     UdmFree(url.auth);
1603     url.auth= UdmStrdup(auth);
1604   }
1605   UdmURLListAdd(&Server->ProxyList, &url);
1606   UdmURLFree(&url);
1607   return rc;
1608 }
1609 
1610 
1611 static udm_rc_t
srv_proxy_var(UDM_CFG * C,size_t ac,const char ** av)1612 srv_proxy_var(UDM_CFG *C, size_t ac, const char **av)
1613 {
1614   size_t i;
1615   UdmURLListFree(&C->Srv->ProxyList);
1616   for (i= 1; i < ac; i++)
1617   {
1618     if (UDM_OK != srv_add_proxy(C, av[0], av[i]))
1619       return UDM_ERROR;
1620   }
1621   return UDM_OK;
1622 }
1623 
1624 
1625 static int
collect_links_destination(const char * str)1626 collect_links_destination(const char *str)
1627 {
1628   if (!strcasecmp(str, "yes"))
1629     return UDM_COLLECT_LINKS_YES;
1630   if (!strcasecmp(str, "all"))
1631     return UDM_COLLECT_LINKS_ALL_DST;
1632   if (!strcasecmp(str, "inner"))
1633     return UDM_COLLECT_LINKS_INNER;
1634   if (!strcasecmp(str, "outer"))
1635     return UDM_COLLECT_LINKS_OUTER;
1636   if (!strcasecmp(str, "site"))
1637     return UDM_COLLECT_LINKS_SITE;
1638   if (!strcasecmp(str, "page"))
1639     return UDM_COLLECT_LINKS_PAGE;
1640   if (!strcasecmp(str, "badscheme"))
1641     return UDM_COLLECT_LINKS_BADSCHEME;
1642   if (!strcasecmp(str, "bad"))
1643     return UDM_COLLECT_LINKS_BAD;
1644   if (!strcasecmp(str, "hops"))
1645     return UDM_COLLECT_LINKS_HOPS;
1646   if (!strcasecmp(str, "filter"))
1647     return UDM_COLLECT_LINKS_FILTER;
1648   if (!strcasecmp(str, "persite"))
1649     return UDM_COLLECT_LINKS_PERSITE;
1650   if (!strcasecmp(str, "no"))
1651     return UDM_COLLECT_LINKS_NONE;
1652   return -1;
1653 }
1654 
1655 
1656 static int
collect_links_format(const char * str)1657 collect_links_format(const char *str)
1658 {
1659   if (!strcasecmp(str, "asis"))
1660     return UDM_COLLECT_LINKS_ASIS;
1661   if (!strcasecmp(str, "absolute"))
1662     return UDM_COLLECT_LINKS_ABSOLUTE;
1663   return -1;
1664 }
1665 
1666 
1667 static udm_rc_t
srv_rpl_collect_links(UDM_CFG * C,size_t ac,const char ** av)1668 srv_rpl_collect_links(UDM_CFG *C, size_t ac, const char **av)
1669 {
1670   size_t i;
1671   int destination= UDM_COLLECT_LINKS_NONE;
1672   int format= UDM_COLLECT_LINKS_ASIS;
1673   for (i= 1; i < ac; i++)
1674   {
1675     int flag;
1676     if ((flag= collect_links_destination(av[i])) >= 0)
1677       destination|= flag;
1678     else if ((flag= collect_links_format(av[i])) >= 0)
1679       format= flag;
1680     else
1681     {
1682       UDM_ENV  *Conf= C->Indexer->Conf;
1683       udm_snprintf(Conf->errstr, sizeof(Conf->errstr),
1684                    "%s: unknown flag: '%s'", av[0], av[i]);
1685       return UDM_ERROR;
1686     }
1687   }
1688   UdmVarListReplaceInt(&C->Srv->Vars,av[0], destination | format);
1689   return UDM_OK;
1690 }
1691 
1692 
1693 static udm_rc_t
srv_rpl_follow_links(UDM_CFG * C,size_t ac,const char ** av)1694 srv_rpl_follow_links(UDM_CFG *C, size_t ac, const char **av)
1695 {
1696   size_t i;
1697   int follow= 0;
1698   for (i= 1; i < ac; i++)
1699   {
1700     udm_link_source_t source;
1701     if (!strcasecmp(av[i], "yes"))
1702       follow= UDM_LINK_SOURCES_YES;
1703     else if (!strcasecmp(av[i], "no"))
1704       follow= 0;
1705     else if (av[i][0] == '-' &&
1706              (source= UdmLinkSourceByName(av[i] + 1)) != UDM_LINK_SOURCE_UNKNOWN)
1707     {
1708       follow&= ~(1 << source);
1709     }
1710     else if ((source= UdmLinkSourceByName(av[i])) != UDM_LINK_SOURCE_UNKNOWN)
1711     {
1712       follow|= (1 << source);
1713     }
1714     else
1715     {
1716       UDM_ENV  *Conf= C->Indexer->Conf;
1717       udm_snprintf(Conf->errstr, sizeof(Conf->errstr),
1718                    "%s: unknown link source: '%s'", av[0], av[i]);
1719       return UDM_ERROR;
1720     }
1721   }
1722   UdmVarListReplaceInt(&C->Srv->Vars, av[0], follow);
1723   return UDM_OK;
1724 }
1725 
1726 
1727 static int
robots_flag(const char * str)1728 robots_flag(const char *str)
1729 {
1730   if (!strcasecmp(str, "yes"))
1731     return UDM_ROBOTS_ALL;
1732   if (!strcasecmp(str, "xrobotstag"))
1733     return UDM_ROBOTS_HEADER;
1734   if (!strcasecmp(str, "robotstxt"))
1735     return UDM_ROBOTS_TXT;
1736   if (!strcasecmp(str, "meta"))
1737     return UDM_ROBOTS_META;
1738   if (!strcasecmp(str, "rel"))
1739     return UDM_ROBOTS_REL;
1740   if (!strcasecmp(str, "no"))
1741     return UDM_ROBOTS_NONE;
1742   return -1;
1743 }
1744 
1745 
1746 static udm_rc_t
srv_rpl_robots(UDM_CFG * C,size_t ac,const char ** av)1747 srv_rpl_robots(UDM_CFG *C, size_t ac, const char **av)
1748 {
1749   size_t i;
1750   int flags= UDM_ROBOTS_NONE;
1751   for (i= 1; i < ac; i++)
1752   {
1753     int flag;
1754     if ((flag= robots_flag(av[i])) >= 0)
1755       flags|= flag;
1756     else
1757     {
1758       UDM_ENV  *Conf= C->Indexer->Conf;
1759       udm_snprintf(Conf->errstr, sizeof(Conf->errstr),
1760                    "%s: unknown flag: '%s'", av[0], av[i]);
1761       return UDM_ERROR;
1762     }
1763   }
1764   UdmVarListReplaceInt(&C->Srv->Vars, av[0], flags);
1765   return UDM_OK;
1766 }
1767 
1768 
1769 static udm_rc_t
srv_rpl_hdr(UDM_CFG * C,size_t ac,const char ** av)1770 srv_rpl_hdr(UDM_CFG *C, size_t ac, const char **av)
1771 {
1772   const char *semicolon= NULL;
1773   char  name[128];
1774 
1775   switch(ac)
1776   {
1777     case 3:
1778       udm_snprintf(name, sizeof(name), "Request.%s", av[1]);
1779       return UdmVarListReplaceStr(&C->Srv->Vars, name, av[2]);
1780 
1781     case 2:
1782       if ((semicolon= strchr(av[1],':')))
1783       {
1784         UDM_CONST_STR val;
1785         size_t namelength= semicolon - av[1];
1786         udm_snprintf(name, sizeof(name), "Request.%.*s", (int) namelength, av[1]);
1787         UdmConstStrSetStr(&val, semicolon + 1);
1788         UdmConstStrTrim(&val," \t");
1789         return UdmVarListReplaceStrn(&C->Srv->Vars, name, val.str, val.length);
1790       }
1791       udm_snprintf(C->Indexer->Conf->errstr, sizeof(C->Indexer->Conf->errstr),
1792                    "No semicolon found in %s\n", av[0]);
1793       return UDM_ERROR;
1794       break;
1795   }
1796   UDM_ASSERT(0);
1797   return UDM_ERROR;
1798 }
1799 
1800 
1801 static udm_rc_t
env_rpl_bool_var(UDM_CFG * C,size_t ac,const char ** av)1802 env_rpl_bool_var(UDM_CFG *C, size_t ac, const char **av)
1803 {
1804   UDM_ENV *Conf= C->Indexer->Conf;
1805   int res= !strcasecmp(av[1],"yes") || atoi(av[1]) == 1;
1806   if(!strcasecmp(av[0], "CVSIgnore")) Conf->CVS_ignore= res;
1807   UdmVarListReplaceInt(&Conf->Vars,av[0],res);
1808   return UDM_OK;
1809 }
1810 
1811 
1812 static udm_rc_t
srv_rpl_bool_var(UDM_CFG * C,size_t ac,const char ** av)1813 srv_rpl_bool_var(UDM_CFG *C, size_t ac, const char **av)
1814 {
1815   int res= !strcasecmp(av[1],"yes") || atoi(av[1]) == 1;
1816   UdmVarListReplaceInt(&C->Srv->Vars,av[0],res);
1817   return UDM_OK;
1818 }
1819 
1820 
1821 static udm_rc_t
env_rpl_size(UDM_CFG * C,size_t ac,const char ** av)1822 env_rpl_size(UDM_CFG *C, size_t ac, const char **av)
1823 {
1824   UDM_ENV *Conf= C->Indexer->Conf;
1825   int error;
1826   char *endptr;
1827   unsigned long long res= UdmStrToSize(av[1], &endptr, &error);
1828   if (error || endptr[0])
1829   {
1830     udm_snprintf(Conf->errstr, sizeof(Conf->errstr),
1831                  "Bad size value: %s", av[1]);
1832     return UDM_ERROR;
1833   }
1834   UdmVarListReplaceULongLong(&Conf->Vars, av[0], res);
1835   return UDM_OK;
1836 }
1837 
1838 
1839 static udm_rc_t
env_rpl_num_var(UDM_CFG * C,size_t ac,const char ** av)1840 env_rpl_num_var(UDM_CFG *C, size_t ac, const char **av)
1841 {
1842   UDM_ENV  *Conf=C->Indexer->Conf;
1843   int  res=atoi(av[1]);
1844   if(!strcasecmp(av[0], "DocSizeWeight"))
1845   {
1846     UdmVarListReplaceInt(&Conf->Vars, "MaxCoordFactor" ,res);
1847     return UDM_OK;
1848   }
1849   if(!strcasecmp(av[0],"MinWordLength"))Conf->WordParam.min_word_len=res;
1850   if(!strcasecmp(av[0],"MaxWordLength"))Conf->WordParam.max_word_len=res;
1851   if (!strcasecmp(av[0],"IndexerThreads") &&
1852       (res < 1 || res > UDM_INDEXER_THREADS_MAX))
1853   {
1854     udm_snprintf(Conf->errstr, sizeof(Conf->errstr),
1855                  "%s: bad value: %d. Valid values: 1..%d.",
1856                  av[0], res, UDM_INDEXER_THREADS_MAX);
1857     return UDM_ERROR;
1858   }
1859   if (!strcasecmp(av[0], "LogLevel"))
1860     Conf->Log.level= atoi(av[1]);
1861   UdmVarListReplaceInt(&Conf->Vars,av[0],res);
1862   return UDM_OK;
1863 }
1864 
1865 
1866 static udm_rc_t
srv_rpl_num_var(UDM_CFG * C,size_t ac,const char ** av)1867 srv_rpl_num_var(UDM_CFG *C, size_t ac, const char **av)
1868 {
1869   int res=atoi(av[1]);
1870   UdmVarListReplaceInt(&C->Srv->Vars,av[0],res);
1871   if (strcasecmp(av[0], "MaxHops") == 0) C->Srv->MaxHops = (uint4) res;
1872   if (strcasecmp(av[0], "ServerWeight") == 0) C->Srv->weight = atof(av[1]);
1873   return UDM_OK;
1874 }
1875 
1876 
1877 static udm_rc_t
srv_rpl_time_var(UDM_CFG * C,size_t ac,const char ** av)1878 srv_rpl_time_var(UDM_CFG *C, size_t ac, const char **av)
1879 {
1880   UDM_ENV *Conf=C->Indexer->Conf;
1881   int res= Udm_dp2time_t(av[1]);
1882   udm_rc_t rc= UDM_OK;
1883   if (res == -1)
1884   {
1885     sprintf(Conf->errstr,"bad time interval: %s",av[1]);
1886     rc= UDM_ERROR;
1887   }
1888   UdmVarListReplaceInt(&C->Srv->Vars,av[0],res);
1889   return rc;
1890 }
1891 
1892 
1893 typedef struct conf_cmd_st
1894 {
1895   const char  *name;
1896   size_t    argmin;
1897   size_t    argmax;
1898   udm_rc_t  (*action)(UDM_CFG *a, size_t n, const char **av);
1899 } UDM_CONFCMD;
1900 
1901 
1902 static UDM_CONFCMD commands[] =
1903 {
1904   {"Include",              1,1,   do_include},      /* Documented */
1905 
1906   {"ImportEnv",            1,1,   env_rpl_env_var}, /* Documented */
1907   {"DBAddr",               1,100, env_dbaddr},      /* Documented */
1908   {"DefaultContentType",   1,1,   env_rpl_var},     /* Documented */
1909   {"ResultContentType",    1,1,   env_rpl_var},     /* Documented */
1910   {"Listen",               1,1,   env_rpl_var},     /* TODO       */
1911   {"UseRemoteContentType", 1,1,   env_rpl_bool_var},/* Documented */
1912   {"UsePopularity",        1,1,   env_rpl_bool_var},/* Documented */
1913   {"UseCRC32URLId",        1,1,   env_rpl_var},     /* Documented */
1914   {"NewsExtensions",       1,1,   env_rpl_var},     /* Documented */
1915   {"SyslogFacility",       1,1,   env_rpl_var},     /* Documented */
1916   {"AliasProg",            1,1,   env_rpl_var},     /* Documented */
1917   {"ForceIISCharset1251",  1,1,   env_rpl_var},     /* Documented */
1918   {"GroupBySite",          1,1,   env_rpl_bool_var},/* Documented */
1919   {"wf",                   1,1,   env_rpl_var},     /* Documented */
1920   {"StrictModeThreshold",  1,1,   env_rpl_var},     /* Documented */
1921 /*{"PopRankSkipSameSite",  1,1,   env_rpl_var},*/     /* TODO34: restore */
1922   {"VarDir",               1,1,   env_rpl_var},     /* Documented */
1923   {"DocMemCacheSize",      1,1,   env_rpl_var},     /* Documented */
1924   {"IspellUsePrefixes",    1,1,   env_rpl_var},     /* Documented */
1925   {"URLSelectCacheSize",   1,1,   env_rpl_var},     /* Documented */
1926   {"URLSelectSkipLock",    1,1,   env_rpl_bool_var},/* Documented */
1927   {"MaxDocSize",           1,1,   env_rpl_var},     /* Documented */
1928   {"ParserTimeOut",        1,1,   env_rpl_var},     /* Documented */
1929   {"NumSections",          1,1,   env_rpl_var},     /* Documented */
1930   {"DateFormat",           1,1,   env_rpl_var},     /* Documented */
1931   {"GuesserUseMeta",       1,1,   env_rpl_var},     /* Documented */
1932   {"ResultsLimit",         1,1,   env_rpl_var},     /* Documented */
1933   {"Segmenter",            1,1,   env_rpl_var},     /* TODO       */
1934   {"Log2stderr",           1,1,   env_rpl_var},     /* Documented */
1935   {"LogFlags",             1,1,   env_rpl_num_var}, /* TODO     */
1936   {"SQLClearDBHook",       1,1,   env_rpl_var},     /* TODO       */
1937   {"UserCacheQuery",       1,1,   env_rpl_var},     /* Documented */
1938 #ifdef HAVE_SETVBUF
1939   {"StdoutBufferSize",     1,1,   env_rpl_var},     /* TODO       */
1940 #endif
1941   {"AlwaysFoundWord",      1,1,   env_rpl_var},     /* Documented */
1942   {"CustomLog",            1,1,   env_rpl_var},     /* Documented */
1943   {"CurrentTime",          1,1,   env_rpl_var},     /* TODO       */
1944   {"Locale",               1,1,   env_rpl_var},     /* Documented */
1945   {"WordDistanceWeight",   1,1,   env_rpl_num_var}, /* Documented */
1946   {"MaxCoordFactor",       1,1,   env_rpl_num_var}, /* TODO       */
1947   {"PopularityFactor",     1,1,   env_rpl_num_var}, /* Documented */
1948   {"IDFFactor",            1,1,   env_rpl_num_var}, /* Documented */
1949   {"MinCoordFactor",       1,1,   env_rpl_num_var}, /* Documented */
1950   {"NumWordFactor",        1,1,   env_rpl_var},     /* Documented */
1951   {"NumDistinctWordFactor",1,1,   env_rpl_num_var}, /* Documented */
1952   {"UserScoreFactor",      1,1,   env_rpl_num_var}, /* Documented */
1953   {"WordDensityFactor"    ,1,1,   env_rpl_num_var}, /* Documented */
1954   {"WordFormFactor"       ,1,1,   env_rpl_num_var}, /* Documented */
1955   {"URLDataThreshold",     1,1,   env_rpl_num_var}, /* Documented */
1956   {"DocSizeWeight",        1,1,   env_rpl_num_var}, /* Documented */
1957   {"RelevancyFactor",      1,1,   env_rpl_num_var}, /* TODO       */
1958   {"Phrase2CountFactor",   1,1,   env_rpl_num_var}, /* Documented */
1959   {"Phrase3CountFactor",   1,1,   env_rpl_num_var}, /* Documented */
1960   {"DateFactor",           1,1,   env_rpl_num_var}, /* Documented */
1961   {"MinWordLength",        1,1,   env_rpl_num_var}, /* Documented */
1962   {"MaxWordLength",        1,1,   env_rpl_num_var}, /* Documented */
1963   {"SubstringMatchMinWordLength", 1,1, env_rpl_num_var}, /* Documented */
1964 
1965   {"ExcerptSize",          1,1,   env_rpl_num_var}, /* Documented */
1966   {"ExcerptPadding",       1,1,   env_rpl_num_var}, /* Documented */
1967   {"ExcerptFragment",      2,100, add_excerpt_fragment}, /* TODO */
1968 
1969   {"LogLevel",             1,1,   env_rpl_num_var}, /* Documented */
1970   {"CrawlerThreads",       1,1,   env_rpl_num_var}, /* Documented */
1971   {"IndexerThreads",       1,1,   env_rpl_num_var}, /* Documented */
1972   {"WordCacheSize",        1,1,   env_rpl_num_var}, /* Documented */
1973   {"IPRequestPerMinLimit", 1,1,   env_rpl_num_var}, /* Documented */
1974   {"CVSIgnore",            1,1,   env_rpl_bool_var},/* Documented */
1975   {"UseHTDBURLId",         1,1,   env_rpl_bool_var},/* TODO       */
1976   {"Suggest",              1,1,   env_rpl_bool_var},/* Documented */
1977   {"IndexTime",            1,1,   env_rpl_bool_var},/* Documented */
1978   {"ExcerptStopword",      1,1,   env_rpl_bool_var},/* Documented */
1979   {"UseCookie",            1,1,   env_rpl_bool_var},/* Documented */
1980   {"UseSitemap",           1,1,   env_rpl_bool_var},/* Documented */
1981   {"UseNumericOperators",  1,1,   env_rpl_bool_var},/* Documented */
1982   {"UseRangeOperators",    1,1,   env_rpl_bool_var},/* Documented */
1983   {"SaveSectionSize",      1,1,   env_rpl_bool_var},/* Documented */
1984   {"Dehyphenate",          1,1,   env_rpl_bool_var},/* Documented */
1985   {"HyphenateNumbers",     1,1,   env_rpl_bool_var},/* TODO       */
1986   {"StripAccents",         1,1,   env_rpl_bool_var},/* Documented */
1987   {"LoadURLInfo",          1,1,   env_rpl_bool_var},/* Documented */
1988   {"LoadURLBasicInfo",     1,1,   env_rpl_bool_var},/* Documented */
1989   {"LoadTagInfo",          1,1,   env_rpl_bool_var},/* Documented */
1990   {"ComplexSynonyms",      1,1,   env_rpl_bool_var},/* Documented */
1991 
1992   {"ReplaceVar",           2,2,   env_rpl_named_var},/* Documented */
1993 
1994   {"LocalCharset",         1,1,   env_rpl_charset},  /* Documented */
1995   {"BrowserCharset",       1,1,   env_rpl_charset},  /* Documented */
1996   {"CaseFolding",          1,1,   env_rpl_casefolding},/* Documented */
1997 
1998   {"XMLEnterHook",         2,2,   rpl_xml_hook},     /* TODO       */
1999   {"XMLLeaveHook",         2,2,   rpl_xml_hook},     /* TODO       */
2000   {"XMLDataHook",          2,2,   rpl_xml_hook},     /* TODO       */
2001 
2002   {"HTDBAddr",             1,1,   srv_rpl_var},      /* Documented */
2003   {"HTDBList",             1,1,   srv_rpl_var},      /* Documented */
2004   {"HTDBDoc",              1,1,   srv_rpl_var},      /* Documented */
2005   {"HTDBLimit",            1,1,   srv_rpl_var},      /* Documented */
2006   {"SQLImportSection",     1,1,   srv_rpl_var},      /* TODO       */
2007   {"SQLExportHref",        1,1,   srv_rpl_var},      /* TODO       */
2008   {"SQLExportPopularityTable",1,1,env_rpl_var},      /* TODO       */
2009   {"SQLWordForms",         1,1,   env_rpl_var},      /* Documented */
2010   {"DefaultLang",          1,1,   srv_rpl_var},      /* Documented */
2011   {"Tag",                  1,1,   srv_rpl_var},      /* Documented */
2012   {"Proxy",                0,100, srv_proxy_var},    /* Documented */
2013   {"VaryLang",             1,1,   srv_rpl_var},      /* Documented */
2014   {"UseRobotsTxtURL",      1,1,   srv_rpl_var},      /* TODO       */
2015   {"MaxNetErrors",         1,1,   srv_rpl_num_var},  /* Documented */
2016   {"CrawlDelay",           1,1,   srv_rpl_num_var},  /* Documented */
2017   {"MaxHops",              1,1,   srv_rpl_num_var},  /* Documented */
2018   {"StartHops",            1,1,   srv_rpl_num_var},  /* Documented */
2019   {"MaxDocPerSite",        1,1,   srv_rpl_num_var},  /* Documented */
2020   {"ServerWeight",         1,1,   srv_rpl_num_var},  /* Documented */
2021   {"Robots",               1,4,   srv_rpl_robots},   /* Documented */
2022   {"DetectClones",         1,1,   srv_rpl_bool_var}, /* Documented */
2023   {"CollectLinks",         1,100, srv_rpl_collect_links},/*Documented*/
2024   {"Index",                1,1,   srv_rpl_bool_var}, /* Documented */
2025   {"FollowLinks",          1,100, srv_rpl_follow_links},/*Documented*/
2026   {"FollowSymLinks",       1,1,   srv_rpl_bool_var}, /* Documented */
2027   {"AjaxLinks",            1,1,   srv_rpl_bool_var}, /* TODO */
2028   {"NetErrorDelayTime",    1,1,   srv_rpl_time_var}, /* Documented */
2029   {"ReadTimeOut",          1,1,   srv_rpl_time_var}, /* Documented */
2030   {"DocTimeOut",           1,1,   srv_rpl_time_var}, /* Documented */
2031   {"Period",               1,1,   srv_rpl_time_var}, /* Documented */
2032   {"HoldBadHrefs",         1,1,   srv_rpl_time_var}, /* Documented */
2033   {"DNSCacheTimeout",      1,1,   srv_rpl_time_var}, /* Documented */
2034   {"HTTPHeader",           1,2,   srv_rpl_hdr},      /* Documented */
2035   {"AuthBasic",            1,1,   srv_rpl_auth},     /* Documented */
2036   {"MirrorRoot",           1,1,   srv_rpl_mirror},   /* Documented */
2037   {"MirrorHeadersRoot",    1,1,   srv_rpl_mirror},   /* Documented */
2038   {"MirrorPeriod",         1,1,   srv_rpl_mirror},   /* Documented */
2039   {"RemoteCharset",        1,1,   srv_rpl_charset},  /* Documented */
2040   {"RemoteFileNameCharset",1,1,   srv_rpl_charset},  /* Documented */
2041   {"RobotsDisallowAction", 1,1,   srv_rpl_num_var},  /* TODO */
2042 
2043   {"Disallow",             1,100, add_filter},       /* Documented */
2044   {"Allow",                1,100, add_filter},       /* Documented */
2045   {"CheckMP3Only",         1,100, add_filter},       /* Documented */
2046   {"CheckMP3",             1,100, add_filter},       /* Documented */
2047   {"CheckOnly",            1,100, add_filter},       /* Documented */
2048   {"HrefOnly",             1,100, add_filter},       /* Documented */
2049   {"ImportOnly",           1,100, add_filter},       /* TODO       */
2050   {"Skip",                 1,100, add_filter},       /* Documented */
2051 
2052   {"IndexIf",              1,100, add_section_filter},/* Documented */
2053   {"NoIndexIf",            1,100, add_section_filter},/* Documented */
2054   {"SkipIf",               1,100, add_section_filter},/* Documented */
2055 
2056   {"Server",               1,100, add_srv},          /* Documented */
2057   {"Realm",                1,100, add_srv},          /* Documented */
2058   {"Subnet",               1,100, add_srv},          /* Documented */
2059   {"URL",                  1,1,   add_url},          /* Documented */
2060 
2061   {"Alias",                1,100, add_alias},        /* Documented */
2062   {"ReverseAlias",         1,100, add_alias},        /* Documented */
2063 
2064   {"AddType",              1,100, add_type},         /* Documented */
2065   {"AddEncoding",          1,100, add_encoding},     /* Documented */
2066   {"CachedCopyEncoding",   1,1,   env_rpl_encoding}, /* Documented */
2067   {"Mime",                 2,4,   add_parser},       /* Documented */
2068   {"Section",              2,10,  add_section},      /* Documented */ /* TODO: index/noindex */
2069   {"Affix",                3,3,   add_affix},        /* Documented */
2070   {"Spell",                3,3,   add_spell},        /* Documented */
2071   {"StopwordFile",         1,1,   add_stoplist},     /* Documented */
2072   {"LangMapFile",          1,1,   add_langmap},      /* Documented */
2073   {"LangMapUpdate",        1,1,   env_rpl_var},      /* Documented */
2074   {"Synonym",              1,1,   add_synonym},      /* Documented */
2075   {"LoadChineseList",      0,2,   add_chinese},      /* Documented */
2076   {"LoadThaiList",         0,2,   add_thai},         /* Documented */
2077   {"Limit",                1,2,   add_limit},        /* Documented */
2078   {"UserScore",            2,2,   add_user_score},   /* Documented */
2079   {"UserSiteScore",        2,2,   add_user_site_score},/* Documented */
2080   {"UserOrder",            2,2,   add_user_order},   /* Documented */
2081   {"ServerTable",          1,1,   add_srv_table},    /* Documented */
2082   {"FlushServerTable",     0,0,   flush_srv_table},  /* Documented */
2083   {"DBListFree",           0,0,   dblist_free},      /* TODO       */
2084 
2085   {"IndexCacheSize",       1,1,   env_rpl_size},     /* Documented */
2086   {NULL,0,0,0}
2087 };
2088 
2089 
2090 UDM_API(udm_rc_t)
UdmEnvAddLine(UDM_CFG * C,char * str)2091 UdmEnvAddLine(UDM_CFG *C, char *str)
2092 {
2093   UDM_ENV    *Conf=C->Indexer->Conf;
2094   UDM_CONFCMD  *Cmd;
2095   const char *av[255];
2096   size_t  ac= UdmGetArgs(str, av, 255);
2097 
2098   for (Cmd= commands ; Cmd->name ; Cmd++)
2099   {
2100     if(!strcasecmp(Cmd->name,av[0]))
2101     {
2102       udm_rc_t rc= UDM_OK;
2103       int argc=ac;
2104       size_t i;
2105       char *p;
2106       char *tmpav[255];
2107 
2108       argc--;
2109       if(ac<Cmd->argmin+1)
2110       {
2111         sprintf(Conf->errstr,"too few (%d) arguments for command '%s'",
2112                 argc,Cmd->name);
2113         return UDM_ERROR;
2114       }
2115 
2116       if(ac>Cmd->argmax+1)
2117       {
2118         sprintf(Conf->errstr,"too many (%d) arguments for command '%s'",
2119                 argc,Cmd->name);
2120         return UDM_ERROR;
2121       }
2122 
2123       for (i= 1; i < ac; i++)
2124       {
2125         if (!av[i])
2126         {
2127           tmpav[i]= NULL;
2128           continue;
2129         }
2130         if (!(p = UdmParseEnvVar(Conf, av[i])))
2131         {
2132           sprintf(Conf->errstr, "An error occured while parsing '%s'", av[i]);
2133           return UDM_ERROR;
2134         }
2135         av[i]= tmpav[i]= p;
2136       }
2137 
2138       if (Cmd->action)
2139         rc= Cmd->action(C, ac, av);
2140 
2141       for (i= 1; i < ac; i++)
2142         UDM_FREE(tmpav[i]);
2143 
2144       if (Cmd->action)
2145         return rc;
2146     }
2147   }
2148   udm_snprintf(Conf->errstr, sizeof(Conf->errstr),
2149                "Unknown command: %s", av[0]);
2150   return UDM_ERROR;
2151 }
2152 
2153 
2154 UDM_API(udm_rc_t)
UdmAgentAddLine(UDM_AGENT * Agent,const char * line)2155 UdmAgentAddLine(UDM_AGENT *Agent, const char *line)
2156 {
2157   UDM_CFG Cfg;
2158   char str[1024];
2159   bzero((void*) &Cfg, sizeof(Cfg));
2160   Cfg.Indexer= Agent;
2161   udm_snprintf(str, sizeof(str) - 1, "%s", line);
2162   return UdmEnvAddLine(&Cfg, str);
2163 }
2164 
2165 
2166 static udm_rc_t
EnvLoad(UDM_CFG * Cfg,const char * cname)2167 EnvLoad(UDM_CFG *Cfg, const char *cname)
2168 {
2169   char  *str0 = NULL;  /* Unsafe copy - will be used in strtok  */
2170   char  str1[1024]="";  /* To concatenate lines      */
2171   FILE  *config;  /* File struct */
2172   udm_rc_t rc= UDM_OK;
2173   size_t  line = 0, str0len = 0, str1len, str0size = 4096;
2174 
2175   if ((str0 = (char*)UdmMalloc(str0size)) == NULL)
2176   {
2177     sprintf(Cfg->Indexer->Conf->errstr,
2178             "Can't alloc %d bytes at '%s': %d",
2179             (int) str0size, __FILE__, __LINE__);
2180     return UDM_ERROR;
2181   }
2182   str0[0]=0;
2183 
2184   /* Open config file */
2185   if(!(config=fopen(cname,"r")))
2186   {
2187     sprintf(Cfg->Indexer->Conf->errstr,
2188             "Can't open config file '%s': %s", cname, strerror(errno));
2189     UDM_FREE(str0);
2190     return UDM_ERROR;
2191   }
2192 
2193   update_current(cname);
2194 
2195   /*  Read lines and parse */
2196   while(fgets(str1,sizeof(str1),config))
2197   {
2198     char  *end;
2199 
2200     line++;
2201 
2202     if(str1[0]=='#')continue;
2203     for (end = str1 + (str1len = strlen(str1)) - 1 ;
2204          (end>=str1) && (*end=='\r'||*end=='\n'||*end==' '||*end=='\t') ;
2205          *end--='\0');
2206     if(!str1[0])continue;
2207 
2208     if(*end=='\\')
2209     {
2210       *end=0;
2211       if (str0len + str1len >= str0size)
2212       {
2213         str0size += 4096 + str1len;
2214         if ((str0 = (char*)UdmRealloc(str0, str0size)) == NULL)
2215         {
2216           sprintf(Cfg->Indexer->Conf->errstr,
2217                   "Can't realloc %d bytes at '%s': %d",
2218                   (int) str0size, __FILE__, __LINE__);
2219           return UDM_ERROR;
2220         }
2221       }
2222       strcat(str0,str1);
2223       str0len += str1len;
2224       continue;
2225     }
2226     strcat(str0,str1);
2227     str0len += str1len;
2228 
2229     if (UDM_OK != (rc= UdmEnvAddLine(Cfg,str0)))
2230     {
2231       char  err[2048];
2232       strcpy(err,Cfg->Indexer->Conf->errstr);
2233       sprintf(Cfg->Indexer->Conf->errstr, "%s:%d: %s", cname, (int) line, err);
2234       break;
2235     }
2236 
2237     str0[0]=0;
2238     str0len = 0;
2239   }
2240   UDM_FREE(str0);
2241   fclose(config);
2242   return rc;
2243 }
2244 
2245 
2246 UDM_API(udm_rc_t)
UdmEnvLoad(UDM_AGENT * Indexer,const char * cname,int lflags)2247 UdmEnvLoad(UDM_AGENT *Indexer, const char *cname, int lflags)
2248 {
2249   UDM_CFG    Cfg;
2250   UDM_SERVER  Srv;
2251   udm_rc_t rc= UDM_OK;
2252   const char  *dbaddr=NULL;
2253 
2254   UdmServerInit(&Srv);
2255   bzero((void*)&Cfg, sizeof(Cfg));
2256   Cfg.Indexer = Indexer;
2257   Indexer->Conf->Cfg_Srv = Cfg.Srv = &Srv;
2258   Cfg.flags=lflags;
2259   Cfg.level=0;
2260 
2261   /* Set DBAddr if for example passed from environment */
2262   if((dbaddr=UdmVarListFindStr(&Indexer->Conf->Vars,"DBAddr",NULL)))
2263   {
2264     if(UDM_OK != (rc= UdmEnvDBListAdd(Indexer->Conf, dbaddr)))
2265       goto freeex;
2266   }
2267 
2268   if(UDM_OK == (rc=EnvLoad(&Cfg,cname)))
2269   {
2270     UDM_ENV *Env= Indexer->Conf;
2271 
2272     if (UDM_OK != (rc= UdmEnvPrepare(Env)))
2273       goto freeex;
2274 
2275     UdmVarListInsStr(&Env->Vars, "Request.User-Agent", UDM_USER_AGENT);
2276   }
2277 
2278 freeex:
2279   UdmServerFree(&Srv);
2280   return rc;
2281 }
2282 
2283 
2284 
2285 static size_t
UdmMatchToStr(char * str,size_t size,const UDM_MATCH * M,const char * arg,const char * cmd)2286 UdmMatchToStr(char *str, size_t size,
2287               const UDM_MATCH *M, const char *arg, const char *cmd)
2288 {
2289   if (cmd)
2290     return udm_snprintf(str, size, "%s %s%s%s \"%s\" \"%s\"",
2291                         cmd,
2292                         M->Param.match_mode == UDM_MATCH_REGEX ? " regex" : "",
2293                         UdmMatchIsNegative(M) ? " nomatch" : "",
2294                         UdmMatchIsCaseInsensitive(M) ? "" : " NoCase",
2295                         arg, UdmMatchPatternConstStr(M));
2296   else
2297     return udm_snprintf(str, size, "%s %s%s%s \"%s\"",
2298                         arg,
2299                         M->Param.match_mode == UDM_MATCH_REGEX ? " regex" : "",
2300                         UdmMatchIsNegative(M) ? " nomatch" : "",
2301                         UdmMatchIsCaseInsensitive(M) ? "" : " NoCase",
2302                         UdmMatchPatternConstStr(M));
2303 
2304 }
2305 
2306 
2307 /*
2308 static udm_rc_t
2309 UdmMatchListPrint(FILE *f, UDM_MATCHLIST *L, const char *cmd)
2310 {
2311   size_t i;
2312   char str[128];
2313   for (i= 0; i < L->nmatches; i++)
2314   {
2315     UDM_MATCH *M= &L->Match[i];
2316     UdmMatchToStr(str, sizeof(str), M, M->arg, cmd);
2317     fprintf(f, "%s\n", str);
2318   }
2319   return UDM_OK;
2320 }
2321 */
2322 
2323 static udm_rc_t
UdmDBListPrint(FILE * f,UDM_DBLIST * L)2324 UdmDBListPrint(FILE *f, UDM_DBLIST *L)
2325 {
2326   size_t i;
2327   for (i= 0; i < L->nitems; i++)
2328   {
2329     UDM_DB *db= &L->Item[i];
2330     char dbaddr[128]= "<noaddr>";
2331     size_t nbytes;
2332     db->dbhandler->Info(db, dbaddr, sizeof(dbaddr), &nbytes, UDM_DBINFO_ADDR);
2333     fprintf(f, "DBAddr %s\n", dbaddr);
2334   }
2335   return UDM_OK;
2336 }
2337 
2338 
2339 static const char *
UdmMatchTypeToServerCommand(int match_type)2340 UdmMatchTypeToServerCommand(int match_type)
2341 {
2342   switch (match_type)
2343   {
2344     case UDM_MATCH_WILD:   return "Realm";
2345     case UDM_MATCH_REGEX:  return "Realm regex";
2346     case UDM_MATCH_SUBNET: return "Subnet";
2347     case UDM_MATCH_BEGIN:  return "Server";
2348     default: return "<UnknownMatchType>";
2349   }
2350 }
2351 
2352 
2353 static udm_rc_t
UdmServerOptionsPrint(FILE * f,UDM_SERVER * Prev,UDM_SERVER * Curr)2354 UdmServerOptionsPrint(FILE *f, UDM_SERVER *Prev, UDM_SERVER *Curr)
2355 {
2356   UDM_CONFCMD *cmd;
2357   for (cmd= commands; cmd->name; cmd++)
2358   {
2359     if (cmd->action == srv_rpl_var      ||
2360         cmd->action == srv_rpl_num_var  ||
2361         cmd->action == srv_rpl_bool_var ||
2362         cmd->action == srv_rpl_time_var ||
2363      /* cmd->action ==  srv_rpl_hdr     || */
2364         cmd->action ==  srv_rpl_auth    ||
2365      /* cmd->action ==  srv_rpl_mirror  || */
2366         cmd->action ==  srv_rpl_charset ||
2367         0)
2368     {
2369       const char *cval= UdmVarListFindStr(&Curr->Vars, cmd->name, "");
2370       if (cmd->action == srv_rpl_auth)
2371       {
2372         if (cval[0])
2373         {
2374           char encoded[128], decoded[128];
2375           udm_snprintf(encoded, sizeof(encoded), "%s", cval);
2376           udm_base64_decode(decoded, encoded, sizeof(decoded));
2377           fprintf(f, "%s '%s'\n", cmd->name, decoded);
2378         }
2379       }
2380       else
2381       {
2382         const char *pval= Prev ? UdmVarListFindStr(&Prev->Vars, cmd->name, "") : "";
2383         if (strcmp(pval, cval))
2384           fprintf(f, "%s '%s'\n", cmd->name, cval);
2385       }
2386     }
2387   }
2388   return UDM_OK;
2389 }
2390 
2391 
2392 static size_t
UdmServerToStr(char * str,size_t size,UDM_SERVER * S)2393 UdmServerToStr(char *str, size_t size, UDM_SERVER *S)
2394 {
2395   const char *method= UdmMethodStr(S->Filter.method);
2396   udm_bool_t case_sense= UdmVarListFindBool(&S->Vars, "case_sense", UDM_TRUE);
2397   udm_bool_t nomatch= UdmVarListFindBool(&S->Vars, "nomatch", UDM_FALSE);
2398   const char *case_str= case_sense ? "" : "NoCase";
2399   const char *match_str= nomatch ? "NoMatch" : "";
2400   const char *follow_str= UdmFollowStr(S->webspace);
2401   const char *command= UdmMatchTypeToServerCommand(S->Filter.Match.Param.match_mode);
2402   const char *alias= UdmVarListFindStr(&S->Vars, "Alias", "");
2403 
2404   /* TODO: Server site: cuts directory name */
2405   switch (S->Filter.Match.Param.match_mode)
2406   {
2407     case UDM_MATCH_WILD:
2408     case UDM_MATCH_REGEX:
2409     case UDM_MATCH_SUBNET:
2410       follow_str= "";
2411       break;
2412     case UDM_MATCH_BEGIN:
2413     case UDM_MATCH_FULL:
2414     case UDM_MATCH_SUBSTR:
2415     case UDM_MATCH_END:
2416     case UDM_MATCH_NUMERIC_LT:
2417     case UDM_MATCH_NUMERIC_GT:
2418     case UDM_MATCH_RANGE:
2419       break;
2420   }
2421 
2422   return udm_snprintf(str, size,
2423                       "%s %s %s %s %s '%s'%s%s",
2424                       command, follow_str, method,
2425                       case_str, match_str,
2426                       UdmMatchPatternConstStr(&S->Filter.Match),
2427                       alias[0] ? " " : "", alias);
2428 }
2429 
2430 
2431 static udm_rc_t
UdmServerListPrint(FILE * f,UDM_SERVERLIST * L)2432 UdmServerListPrint(FILE *f, UDM_SERVERLIST *L)
2433 {
2434   size_t i;
2435   char str[128];
2436   for (i= 0; i < L->nservers; i++)
2437   {
2438     UDM_SERVER *S= &L->Server[i];
2439     UDM_SERVER *P= i ? &L->Server[i-1] : NULL;
2440 /*    UdmVarListPrint(f, &S->Vars);*/
2441     UdmServerOptionsPrint(f, P, S);
2442     UdmServerToStr(str, sizeof(str), S);
2443     fprintf(f, "%s\n", str);
2444   }
2445   return UDM_OK;
2446 }
2447 
2448 
2449 static udm_rc_t
UdmAliasListPrint(FILE * f,UDM_REPLACELIST * L,const char * cmd)2450 UdmAliasListPrint(FILE *f, UDM_REPLACELIST *L, const char *cmd)
2451 {
2452   size_t i;
2453   char str[128];
2454   for (i= 0; i < L->nitems; i++)
2455   {
2456     UdmMatchToStr(str, sizeof(str), &L->Item[i].Match, L->Item[i].Replace.str, cmd);
2457     fprintf(f, "%s\n", str);
2458   }
2459   return UDM_OK;
2460 }
2461 
2462 
2463 static udm_rc_t
UdmParserListPrint(FILE * f,UDM_PARSERLIST * L)2464 UdmParserListPrint(FILE *f, UDM_PARSERLIST *L)
2465 {
2466   size_t i;
2467   char str[1024];
2468   for (i= 0; i < L->nparsers; i++)
2469   {
2470     UDM_PARSER *P= &L->Parser[i];
2471     udm_snprintf(str, sizeof(str),
2472                  "Mime \"%s\" \"%s\" '%s'%s%s%s",
2473                  P->from_mime, P->to_mime, P->cmd,
2474                  P->src ? " \"" : "",
2475                  P->src ? P->src : "",
2476                  P->src ? "\"" : "");
2477     fprintf(f, "%s\n", str);
2478   }
2479   return UDM_OK;
2480 }
2481 
2482 
2483 static udm_rc_t
UdmAffixListListPrint(FILE * f,UDM_AFFIXLISTLIST * L)2484 UdmAffixListListPrint(FILE *f, UDM_AFFIXLISTLIST *L)
2485 {
2486   size_t i;
2487   char str[256];
2488   for (i= 0; i < L->nitems; i++)
2489   {
2490     UDM_AFFIXLIST *A= &L->Item[i];
2491     udm_snprintf(str, sizeof(str),
2492                  "Affix %s %s '%s'", A->lang, A->cset, A->fname);
2493     fprintf(f, "%s\n", str);
2494   }
2495   return UDM_OK;
2496 }
2497 
2498 
2499 static udm_rc_t
UdmSpellListListPrint(FILE * f,UDM_SPELLLISTLIST * L)2500 UdmSpellListListPrint(FILE *f, UDM_SPELLLISTLIST *L)
2501 {
2502   size_t i;
2503   char str[256];
2504   for (i= 0; i < L->nitems; i++)
2505   {
2506     UDM_SPELLLIST *S= &L->Item[i];
2507     udm_snprintf(str, sizeof(str),
2508                  "Spell %s %s '%s'", S->lang, S->cset, S->fname);
2509     fprintf(f, "%s\n", str);
2510   }
2511   return UDM_OK;
2512 }
2513 
2514 
2515 static udm_rc_t
UdmLangmapListPrint(FILE * f,UDM_LANGMAPLIST * L)2516 UdmLangmapListPrint(FILE *f, UDM_LANGMAPLIST *L)
2517 {
2518   size_t i;
2519   char str[256];
2520   for (i= 0; i < L->nmaps; i++)
2521   {
2522     UDM_LANGMAP *M= &L->Map[i];
2523     udm_snprintf(str, sizeof(str),
2524                  "LangmapFile '%s'", M->filename);
2525     fprintf(f, "%s\n", str);
2526   }
2527   return UDM_OK;
2528 }
2529 
2530 
2531 static udm_rc_t
UdmStopListListPrint(FILE * f,UDM_STOPLISTLIST * L)2532 UdmStopListListPrint(FILE *f, UDM_STOPLISTLIST *L)
2533 {
2534   size_t i;
2535   char str[256];
2536   for (i= 0; i < L->nitems; i++)
2537   {
2538     UDM_STOPLIST *S= &L->Item[i];
2539     udm_snprintf(str, sizeof(str),
2540                  "StopwordFile '%s'", S->fname);
2541     fprintf(f, "%s\n", str);
2542   }
2543   return UDM_OK;
2544 }
2545 
2546 
2547 static udm_rc_t
UdmSynonymListListPrint(FILE * f,UDM_SYNONYMLISTLIST * L)2548 UdmSynonymListListPrint(FILE *f, UDM_SYNONYMLISTLIST *L)
2549 {
2550   size_t i;
2551   char str[256];
2552   for (i= 0; i < L->nitems; i++)
2553   {
2554     UDM_SYNONYMLIST *S= &L->Item[i];
2555     udm_snprintf(str, sizeof(str),
2556                  "Synonym '%s'", S->fname);
2557     fprintf(f, "%s\n", str);
2558   }
2559   return UDM_OK;
2560 }
2561 
2562 
2563 UDM_API(udm_rc_t)
UdmEnvSave(UDM_AGENT * Indexer,const char * cname,int lflags)2564 UdmEnvSave(UDM_AGENT *Indexer, const char *cname, int lflags)
2565 {
2566   FILE *f;
2567   UDM_ENV *E= Indexer->Conf;
2568 
2569   if (!strcmp(cname, "-"))
2570     f= stdout;
2571   else if (!(f= fopen(cname,"w")))
2572   {
2573     sprintf(Indexer->Conf->errstr,
2574             "Can't open output file '%s': %s", cname, strerror(errno));
2575     return UDM_ERROR;
2576   }
2577 
2578   /* TODO: put interpreter line */
2579 
2580   UdmDBListPrint(f, &E->DBList);
2581 
2582   fprintf(f, "LocalCharset %s\n", E->lcs->name);
2583   fprintf(f, "BrowserCharset %s\n", E->bcs->name);
2584 
2585   UdmSectionListPrint(&E->Sections, f);
2586   UdmVarListPrint(&E->Vars, f);
2587 
2588   /*UdmMatchListPrint(f, &E->MimeTypes, "AddType");*/
2589   UdmParserListPrint(f, &E->Parsers);
2590   /*UdmMatchListPrint(f, &E->Filters, NULL);*/
2591 
2592   /*
2593   UDM_MATCHLIST	SectionFilters;
2594   UDM_MATCHLIST SectionHdrMatch;
2595   UDM_MATCHLIST SectionGsrMatch;
2596   UDM_MATCHLIST SectionMatch;
2597   */
2598 
2599   UdmStopListListPrint(f, &E->StopWord);
2600   UdmSynonymListListPrint(f, &E->Synonym);
2601   UdmAffixListListPrint(f, &E->Affixes);
2602   UdmSpellListListPrint(f, &E->Spells);
2603   UdmLangmapListPrint(f, &E->LangMaps);
2604 
2605   /*
2606   int		CVS_ignore;
2607   UDM_WORDPARAM	WordParam;
2608   UDM_CHINALIST   Chi;
2609   UDM_CHINALIST   Thai;
2610 #ifdef MECAB
2611   mecab_t         *mecab;
2612 #endif
2613   UDM_UNIDATA *unidata;
2614   */
2615 
2616 
2617   /*
2618   UDM_VARLIST	XMLEnterHooks;
2619   UDM_VARLIST	XMLLeaveHooks;
2620   UDM_VARLIST	XMLDataHooks;
2621   */
2622 
2623   UdmAliasListPrint(f, &E->Aliases, "Alias");
2624   UdmAliasListPrint(f, &E->ReverseAliases, "ReverseAlias");
2625   UdmServerListPrint(f, &E->Servers);
2626 
2627   if (f != stdout)
2628     fclose(f);
2629   return UDM_OK;
2630 }
2631