1 /*************************************************************************************************
2 * The pseudo master of node servers
3 * Copyright (C) 2004-2007 Mikio Hirabayashi
4 * This file is part of Hyper Estraier.
5 * Hyper Estraier is free software; you can redistribute it and/or modify it under the terms of
6 * the GNU Lesser General Public License as published by the Free Software Foundation; either
7 * version 2.1 of the License or any later version. Hyper Estraier is distributed in the hope
8 * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
10 * License for more details.
11 * You should have received a copy of the GNU Lesser General Public License along with Hyper
12 * Estraier; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
13 * Boston, MA 02111-1307 USA.
14 *************************************************************************************************/
15
16
17 #if defined(MYFCGI)
18 #include <fcgi_stdio.h>
19 #endif
20 #include "mastermod.h"
21
22 #define CONFSUFFIX ".conf" /* suffix of the configuration file */
23 #define OUTBUFSIZ 262144 /* size of the output buffer */
24 #define PPOUTDOC "_OUTDOC_" /* pseudo parameter to delete a document */
25 #define PPPOSTCMD "_POSTCMD_" /* pseudo parameter of the post command */
26
27
28 /* global variables for configurations */
29 const char *g_conffile = NULL; /* path of the configuration file */
30 const char *g_indexdir = NULL; /* path of the directory containing indexes */
31 int g_runmode = 0; /* running mode */
32 const char *g_pidxsuffix = NULL; /* suffix of pseudo indexes */
33 int g_pidxdocmax = -1; /* maximum number of documents in each pseudo index */
34 int g_pidxdocmin = -1; /* minimum number of documents in each pseudo index */
35 int g_lockindex = FALSE; /* whether to perform file locking to the database */
36 int g_searchmax = 0; /* maximum number of documents to send */
37 int g_rateuri = FALSE; /* whether to rate URI for scoring */
38 int g_mergemethod = -1; /* merge method of meta search */
39 int g_scoreexpr = -1; /* score expression */
40 int g_wildmax = -1; /* maximum number of expansion of wild cards */
41 int g_snipwwidth = -1; /* whole width of the snippet */
42 int g_sniphwidth = -1; /* width of beginning of the text */
43 int g_snipawidth = -1; /* width around each highlighted word */
44 int g_scancheck = FALSE; /* whether to check documents by scanning */
45 int g_smlrvnum = -1; /* number of keywords for similarity search */
46 int g_extdelay = 0; /* number of documents for delay of extraction */
47
48
49 /* other global variables */
50 char g_outbuf[OUTBUFSIZ]; /* output buffer */
51 const char *g_scriptname = NULL; /* name of the script */
52
53
54 /* function prototypes */
55 int main(int argc, char **argv);
56 static int realmain(int argc, char **argv);
57 static void die(const char *msg);
58 static const char *skiplabel(const char *str);
59 static CBMAP *getparameters(void);
60 static void senderror(int code, const char *msg);
61 static void sendnodecmdinform(ESTMTDB *db, const char *myurl, const char *mylabel,
62 CBMAP *params);
63 static void sendnodecmdcacheusage(ESTMTDB *db, const char *myurl, const char *mylabel,
64 CBMAP *params);
65 static void sendnodecmdsearch(ESTMTDB *db, const char *myurl, const char *mylabel,
66 CBMAP *params);
67 static void sendnodecmdgetdoc(ESTMTDB *db, const char *myurl, const char *mylabel,
68 CBMAP *params);
69 static void sendnodecmdgetdocattr(ESTMTDB *db, const char *myurl, const char *mylabel,
70 CBMAP *params);
71 static void sendnodecmdetchdoc(ESTMTDB *db, const char *myurl, const char *mylabel,
72 CBMAP *params);
73 static void sendnodecmduritoid(ESTMTDB *db, const char *myurl, const char *mylabel,
74 CBMAP *params);
75 static void sendnodecmdputdoc(ESTMTDB *db, const char *myurl, const char *mylabel,
76 CBMAP *params);
77 static void sendnodecmdoutdoc(ESTMTDB *db, const char *myurl, const char *mylabel,
78 CBMAP *params);
79 static void sendnodecmdeditdoc(ESTMTDB *db, const char *myurl, const char *mylabel,
80 CBMAP *params);
81 static void sendnodecmdsync(ESTMTDB *db, const char *myurl, const char *mylabel,
82 CBMAP *params);
83 static void sendnodecmdoptimize(ESTMTDB *db, const char *myurl, const char *mylabel,
84 CBMAP *params);
85 static void procpostcmdoutdoc(const char *idxname, const char *uri);
86 static void procpostcmdsync(const char *idxname);
87 static void procpostcmdoptimize(const char *idxname);
88
89
90 /* main routine */
main(int argc,char ** argv)91 int main(int argc, char **argv){
92 #if defined(MYFCGI)
93 static int cnt = 0;
94 est_proc_env_reset();
95 while(FCGI_Accept() >= 0){
96 realmain(argc, argv);
97 fflush(stdout);
98 if(++cnt >= 64) exit(0);
99 }
100 return 0;
101 #else
102 est_proc_env_reset();
103 return realmain(argc, argv);
104 #endif
105 }
106
107
108 /* real main routine */
realmain(int argc,char ** argv)109 static int realmain(int argc, char **argv){
110 ESTMTDB *db;
111 CBLIST *lines;
112 CBMAP *params;
113 CBDATUM *myurl;
114 const char *rp, *idxname, *cmd;
115 char *tmp, *wp;
116 int i, omode, ecode;
117 /* set configurations */
118 setvbuf(stdout, g_outbuf, _IOFBF, OUTBUFSIZ);
119 g_scriptname = argv[0];
120 if((rp = getenv("SCRIPT_NAME")) != NULL) g_scriptname = rp;
121 if((rp = strrchr(g_scriptname, '/')) != NULL) g_scriptname = rp + 1;
122 tmp = cbmalloc(strlen(g_scriptname) + strlen(CONFSUFFIX) + 1);
123 sprintf(tmp, "%s", g_scriptname);
124 cbglobalgc(tmp, free);
125 if(!(wp = strrchr(tmp, '.'))) wp = tmp + strlen(tmp);
126 sprintf(wp, "%s", CONFSUFFIX);
127 g_conffile = tmp;
128 if(!(lines = cbreadlines(g_conffile))) die("the configuration file is missing.");
129 cbglobalgc(lines, (void (*)(void *))cblistclose);
130 for(i = 0; i < cblistnum(lines); i++){
131 rp = cblistval(lines, i, NULL);
132 if(cbstrfwimatch(rp, "indexdir:")){
133 g_indexdir = skiplabel(rp);
134 } else if(cbstrfwimatch(rp, "runmode:")){
135 g_runmode = atoi(skiplabel(rp));
136 } else if(cbstrfwimatch(rp, "pidxsuffix:")){
137 g_pidxsuffix = skiplabel(rp);
138 } else if(cbstrfwimatch(rp, "pidxdocmax:")){
139 g_pidxdocmax = atoi(skiplabel(rp));
140 } else if(cbstrfwimatch(rp, "pidxdocmin:")){
141 g_pidxdocmin = atoi(skiplabel(rp));
142 } else if(cbstrfwimatch(rp, "lockindex:")){
143 g_lockindex = atoi(skiplabel(rp)) > 0;
144 } else if(cbstrfwimatch(rp, "searchmax:")){
145 g_searchmax = atoi(skiplabel(rp));
146 } else if(cbstrfwimatch(rp, "rateuri:")){
147 g_rateuri = atoi(skiplabel(rp)) > 0;
148 } else if(cbstrfwimatch(rp, "mergemethod:")){
149 g_mergemethod = atoi(skiplabel(rp));
150 } else if(cbstrfwimatch(rp, "scoreexpr:")){
151 g_scoreexpr = atoi(skiplabel(rp));
152 } else if(cbstrfwimatch(rp, "wildmax:")){
153 g_wildmax = atoi(skiplabel(rp));
154 } else if(cbstrfwimatch(rp, "snipwwidth:")){
155 g_snipwwidth = atoi(skiplabel(rp));
156 } else if(cbstrfwimatch(rp, "sniphwidth:")){
157 g_sniphwidth = atoi(skiplabel(rp));
158 } else if(cbstrfwimatch(rp, "snipawidth:")){
159 g_snipawidth = atoi(skiplabel(rp));
160 } else if(cbstrfwimatch(rp, "scancheck:")){
161 g_scancheck = atoi(skiplabel(rp)) > 0;
162 } else if(cbstrfwimatch(rp, "smlrvnum:")){
163 g_smlrvnum = atoi(skiplabel(rp));
164 } else if(cbstrfwimatch(rp, "extdelay:")){
165 g_extdelay = atoi(skiplabel(rp));
166 }
167 }
168 if(!g_indexdir) die("indexdir is undefined.");
169 if(g_runmode < RM_NORMAL || g_runmode > RM_RDONLY) die("runmode is undefined.");
170 if(!g_pidxsuffix) die("pidxsuffix is undefined.");
171 if(g_pidxdocmax < 0) die("pidxdocmax is undefined.");
172 if(g_pidxdocmin < 0) die("pidxdocmin is undefined.");
173 if(g_searchmax < 0) die("searchmax is undefined.");
174 if(g_rateuri < 0) die("rateuri is undefined.");
175 if(g_mergemethod < 0) die("mergemethod is undefined.");
176 if(g_scoreexpr < 0) die("scoreexpr is undefined.");
177 if(g_wildmax < 0) die("wildmax is undefined.");
178 if(g_snipwwidth < 0) die("snipwwidth is undefined.");
179 if(g_sniphwidth < 0) die("sniphwidth is undefined.");
180 if(g_snipawidth < 0) die("snipawidth is undefined.");
181 if(g_smlrvnum < 0) die("smlrvnum is undefined.");
182 /* read parameters */
183 if(!(idxname = getenv("PATH_INFO"))) idxname = "";
184 if(*idxname == '/') idxname++;
185 if((rp = strrchr(idxname, '/')) != NULL){
186 tmp = cbmemdup(idxname, rp - idxname);
187 idxname = tmp;
188 cbglobalgc(tmp, free);
189 if(strstr(idxname, ESTPATHSTR ESTCDIRSTR ESTPATHSTR) ||
190 strstr(idxname, ESTPATHSTR ESTPDIRSTR ESTPATHSTR)) idxname = "";
191 cmd = rp + 1;
192 } else {
193 cmd = "";
194 }
195 params = getparameters();
196 cbglobalgc(params, (void (*)(void *))cbmapclose);
197 cbmapout(params, PPOUTDOC, -1);
198 cbmapout(params, PPPOSTCMD, -1);
199 myurl = cbdatumopen(NULL, -1);
200 if((rp = getenv("HTTP_HOST")) != NULL) cbdatumprintf(myurl, "http://%s", rp);
201 if((rp = getenv("SCRIPT_NAME")) != NULL) cbdatumprintf(myurl, "%s", rp);
202 cbdatumprintf(myurl, "/%s", idxname);
203 /* open the database */
204 tmp = cbsprintf("%s%c%s", g_indexdir, ESTPATHCHR, idxname);
205 omode = ESTDBREADER;
206 if(!g_lockindex && strcmp(cmd, "put_doc") && strcmp(cmd, "out_doc")) omode |= ESTDBNOLCK;
207 if((db = est_mtdb_open(tmp, omode, &ecode)) != NULL && *g_pidxsuffix != '\0'){
208 free(tmp);
209 tmp = cbsprintf("%s%c%s%s", g_indexdir, ESTPATHCHR, idxname, g_pidxsuffix);
210 est_mtdb_add_pseudo_index(db, tmp);
211 }
212 free(tmp);
213 /* send the result */
214 if(!db){
215 senderror(404, "Not Found (the node does not exist)");
216 } else if(!strcmp(cmd, "inform")){
217 sendnodecmdinform(db, cbdatumptr(myurl), idxname, params);
218 } else if(!strcmp(cmd, "cacheusage")){
219 sendnodecmdcacheusage(db, cbdatumptr(myurl), idxname, params);
220 } else if(!strcmp(cmd, "search")){
221 sendnodecmdsearch(db, cbdatumptr(myurl), idxname, params);
222 } else if(!strcmp(cmd, "get_doc")){
223 sendnodecmdgetdoc(db, cbdatumptr(myurl), idxname, params);
224 } else if(!strcmp(cmd, "get_doc_attr")){
225 sendnodecmdgetdocattr(db, cbdatumptr(myurl), idxname, params);
226 } else if(!strcmp(cmd, "etch_doc")){
227 sendnodecmdetchdoc(db, cbdatumptr(myurl), idxname, params);
228 } else if(!strcmp(cmd, "uri_to_id")){
229 sendnodecmduritoid(db, cbdatumptr(myurl), idxname, params);
230 } else if(!strcmp(cmd, "put_doc")){
231 if(g_runmode == RM_RDONLY){
232 senderror(503, "Service Unavailable (read only)");
233 } else {
234 sendnodecmdputdoc(db, cbdatumptr(myurl), idxname, params);
235 }
236 } else if(!strcmp(cmd, "out_doc")){
237 if(g_runmode == RM_RDONLY){
238 senderror(503, "Service Unavailable (read only)");
239 } else {
240 sendnodecmdoutdoc(db, cbdatumptr(myurl), idxname, params);
241 }
242 } else if(!strcmp(cmd, "edit_doc")){
243 if(g_runmode == RM_RDONLY){
244 senderror(503, "Service Unavailable (read only)");
245 } else {
246 sendnodecmdeditdoc(db, cbdatumptr(myurl), idxname, params);
247 }
248 } else if(!strcmp(cmd, "sync")){
249 if(g_runmode == RM_RDONLY){
250 senderror(503, "Service Unavailable (read only)");
251 } else {
252 sendnodecmdsync(db, cbdatumptr(myurl), idxname, params);
253 }
254 } else if(!strcmp(cmd, "optimize")){
255 if(g_runmode == RM_RDONLY){
256 senderror(503, "Service Unavailable (read only)");
257 } else {
258 sendnodecmdoptimize(db, cbdatumptr(myurl), idxname, params);
259 }
260 } else if(!strcmp(cmd, "_set_user") || !strcmp(cmd, "_set_link")){
261 senderror(501, "Not Implemented (editing meta data is not supported)");
262 } else {
263 senderror(400, "Bad Request (the command is invalid)");
264 }
265 /* release resources */
266 if(db) est_mtdb_close(db, &ecode);
267 cbdatumclose(myurl);
268 /* perform the post commands */
269 if((cmd = cbmapget(params, PPOUTDOC, -1, NULL)) != NULL) procpostcmdoutdoc(idxname, cmd);
270 if((cmd = cbmapget(params, PPPOSTCMD, -1, NULL)) != NULL){
271 if(!strcmp(cmd, "sync")){
272 procpostcmdsync(idxname);
273 } else if(!strcmp(cmd, "optimize")){
274 procpostcmdoptimize(idxname);
275 }
276 }
277 return 0;
278 }
279
280
281 /* show the error page and exit */
die(const char * msg)282 static void die(const char *msg){
283 printf("Status: 500 Internal Server Error\r\n");
284 printf("Content-Type: text/plain; charset=UTF-8\r\n");
285 printf("\r\n");
286 printf("Error: %s\n", msg);
287 exit(1);
288 }
289
290
291 /* skip the label of a line */
skiplabel(const char * str)292 static const char *skiplabel(const char *str){
293 if(!(str = strchr(str, ':'))) return "";
294 str++;
295 while(*str != '\0' && (*str == ' ' || *str == '\t')){
296 str++;
297 }
298 return str;
299 }
300
301
302 /* get CGI parameters */
getparameters(void)303 static CBMAP *getparameters(void){
304 int maxlen = 1024 * 1024 * 32;
305 CBMAP *map, *attrs;
306 CBLIST *pairs, *parts;
307 const char *type, *rp, *body;
308 char *buf, *key, *val, *dkey, *dval, *wp, *bound, *fbuf, *aname;
309 int i, len, c, blen, flen;
310 map = cbmapopenex(37);
311 buf = NULL;
312 len = 0;
313 if((rp = getenv("REQUEST_METHOD")) != NULL && !strcmp(rp, "POST") &&
314 (rp = getenv("CONTENT_LENGTH")) != NULL && (len = atoi(rp)) > 0){
315 if(len > maxlen) len = maxlen;
316 buf = cbmalloc(len + 1);
317 for(i = 0; i < len && (c = getchar()) != EOF; i++){
318 buf[i] = c;
319 }
320 buf[i] = '\0';
321 if(i != len){
322 free(buf);
323 buf = NULL;
324 }
325 } else if((rp = getenv("QUERY_STRING")) != NULL){
326 buf = cbmemdup(rp, -1);
327 len = strlen(buf);
328 }
329 if(buf && len > 0){
330 if(!(type = getenv("CONTENT_TYPE"))) type = "text/plain";
331 if(cbstrfwmatch(type, "multipart/form-data") && (rp = strstr(rp, "boundary=")) != NULL){
332 rp += 9;
333 bound = cbmemdup(rp, -1);
334 if((wp = strchr(bound, ';')) != NULL) *wp = '\0';
335 parts = cbmimeparts(buf, len, bound);
336 for(i = 0; i < cblistnum(parts); i++){
337 body = cblistval(parts, i, &blen);
338 attrs = cbmapopen();
339 fbuf = cbmimebreak(body, blen, attrs, &flen);
340 if((rp = cbmapget(attrs, "NAME", -1, NULL)) != NULL){
341 cbmapput(map, rp, -1, fbuf, flen, FALSE);
342 aname = cbsprintf("%s-filename", rp);
343 if((rp = cbmapget(attrs, "FILENAME", -1, NULL)) != NULL)
344 cbmapput(map, aname, -1, rp, -1, FALSE);
345 free(aname);
346 }
347 free(fbuf);
348 cbmapclose(attrs);
349 }
350 cblistclose(parts);
351 free(bound);
352 } else if(cbstrfwmatch(type, ESTDRAFTTYPE)){
353 cbmapput(map, "draft", -1, buf, -1, FALSE);
354 } else {
355 pairs = cbsplit(buf, -1, "&");
356 for(i = 0; i < cblistnum(pairs); i++){
357 key = cbmemdup(cblistval(pairs, i, NULL), -1);
358 if((val = strchr(key, '=')) != NULL){
359 *(val++) = '\0';
360 dkey = cburldecode(key, NULL);
361 dval = cburldecode(val, NULL);
362 cbmapput(map, dkey, -1, dval, -1, FALSE);
363 free(dval);
364 free(dkey);
365 }
366 free(key);
367 }
368 cblistclose(pairs);
369 }
370 }
371 free(buf);
372 return map;
373 }
374
375
376 /* send the error page */
senderror(int code,const char * msg)377 static void senderror(int code, const char *msg){
378 printf("Status: %d %s\r\n", code, msg);
379 printf("Content-Type: text/plain; charset=UTF-8\r\n");
380 printf("\r\n");
381 printf("%s\n", msg);
382 }
383
384
385 /* send the result of the inform command */
sendnodecmdinform(ESTMTDB * db,const char * myurl,const char * mylabel,CBMAP * params)386 static void sendnodecmdinform(ESTMTDB *db, const char *myurl, const char *mylabel, CBMAP *params){
387 printf("Content-Type: %s; charset=UTF-8\r\n", ESTINFORMTYPE);
388 printf("\r\n");
389 printf("%s\t%s\t%d\t%d\t%.0f\n",
390 myurl, mylabel, est_mtdb_doc_num(db) + est_mtdb_pseudo_doc_num(db),
391 est_mtdb_word_num(db), est_mtdb_size(db));
392 printf("\n");
393 printf("dummy-admin\n");
394 printf("\n");
395 printf("dummy-user\n");
396 printf("\n");
397 }
398
399
400 /* send the result of the cacheusage command */
sendnodecmdcacheusage(ESTMTDB * db,const char * myurl,const char * mylabel,CBMAP * params)401 static void sendnodecmdcacheusage(ESTMTDB *db, const char *myurl, const char *mylabel,
402 CBMAP *params){
403 printf("Content-Type: text/plain; charset=UTF-8\r\n");
404 printf("\r\n");
405 printf("%0.6f\n", 0.0);
406 }
407
408
409 /* send the result of the search command */
sendnodecmdsearch(ESTMTDB * db,const char * myurl,const char * mylabel,CBMAP * params)410 static void sendnodecmdsearch(ESTMTDB *db, const char *myurl, const char *mylabel,
411 CBMAP *params){
412 RESMAP *resmap;
413 RESDOC **resdocs, *resdoc;
414 ESTCOND *cond;
415 CBMAP *hints, *kwords;
416 const CBLIST *texts;
417 CBLIST *attrs, *words, *list;
418 ESTDOC *doc;
419 const char *rp, *phrase, *order, *distinct, *bordstr, *kbuf, *vbuf;
420 char name[NUMBUFSIZ], *snippet;
421 int i, j, len, max, options, auxiliary, wwidth, hwidth, awidth, skip, lmax, rateuri;
422 int *res, rnum, hnum, miss, cnt, score, down, dnum, wnum, end, num, id;
423 double curtime, itime, weight, fsiz;
424 if(!(phrase = cbmapget(params, "phrase", -1, NULL))) phrase = "";
425 while(*phrase == ' ' || *phrase == '\t'){
426 phrase++;
427 }
428 attrs = cblistopen();
429 cbglobalgc(attrs, (void (*)(void *))cblistclose);
430 if((rp = cbmapget(params, "attr", -1, NULL)) != NULL){
431 while(*rp == ' ' || *rp == '\t'){
432 rp++;
433 }
434 if(*rp != '\0') cblistpush(attrs, rp, -1);
435 }
436 for(i = 0; i <= CONDATTRMAX; i++){
437 len = sprintf(name, "attr%d", i);
438 if((rp = cbmapget(params, name, len, NULL)) != NULL){
439 while(*rp == ' ' || *rp == '\t'){
440 rp++;
441 }
442 if(*rp != '\0') cblistpush(attrs, rp, -1);
443 }
444 }
445 if(!(order = cbmapget(params, "order", -1, NULL))) order = "";
446 while(*order == ' ' || *order == '\t'){
447 order++;
448 }
449 max = -1;
450 if((rp = cbmapget(params, "max", -1, NULL)) != NULL) max = atoi(rp);
451 if(max < 1) max = DEFMAXSRCH;
452 if(max > g_searchmax) max = g_searchmax;
453 options = -1;
454 if((rp = cbmapget(params, "options", -1, NULL)) != NULL) options = atoi(rp);
455 auxiliary = INT_MIN;
456 if((rp = cbmapget(params, "auxiliary", -1, NULL)) != NULL) auxiliary = atoi(rp);
457 if(!(distinct = cbmapget(params, "distinct", -1, NULL))) distinct = "";
458 while(*distinct == ' ' || *distinct == '\t'){
459 distinct++;
460 }
461 wwidth = -1;
462 if((rp = cbmapget(params, "wwidth", -1, NULL)) != NULL) wwidth = atoi(rp);
463 if(wwidth < 0) wwidth = g_snipwwidth;
464 hwidth = -1;
465 if((rp = cbmapget(params, "hwidth", -1, NULL)) != NULL) hwidth = atoi(rp);
466 if(hwidth < 0) hwidth = g_snipwwidth;
467 awidth = -1;
468 if((rp = cbmapget(params, "awidth", -1, NULL)) != NULL) awidth = atoi(rp);
469 if(awidth < 0) awidth = g_snipawidth;
470 skip = -1;
471 if((rp = cbmapget(params, "skip", -1, NULL)) != NULL) skip = atoi(rp);
472 if(skip < 0) skip = 0;
473 bordstr = est_border_str();
474 cond = est_cond_new();
475 if(*phrase != '\0') est_cond_set_phrase(cond, phrase);
476 for(i = 0; i < cblistnum(attrs); i++){
477 est_cond_add_attr(cond, cblistval(attrs, i, NULL));
478 }
479 if(*order != '\0') est_cond_set_order(cond, order);
480 if(options > 0) est_cond_set_options(cond, options);
481 est_cond_set_options(cond, ESTCONDSCFB);
482 if(auxiliary != INT_MIN) est_cond_set_auxiliary(cond, auxiliary);
483 if(*distinct != '\0') est_cond_set_distinct(cond, distinct);
484 resmap = resmap_new();
485 hints = cbmapopenex(MINIBNUM);
486 curtime = est_gettimeofday();
487 lmax = max + skip + 1;
488 res = est_mtdb_search(db, cond, &rnum, hints);
489 hnum = (vbuf = cbmapget(hints, "", 0, NULL)) ? atoi(vbuf) : rnum;
490 if(max >= 0 && hnum < max + 1 && est_cond_auxiliary_word(cond, "")){
491 free(res);
492 est_cond_set_auxiliary(cond, -1);
493 res = est_mtdb_search(db, cond, &rnum, hints);
494 hnum = (vbuf = cbmapget(hints, "", 0, NULL)) ? atoi(vbuf) : rnum;
495 }
496 miss = 0;
497 itime = est_gettimeofday() - curtime;
498 cnt = 0;
499 rateuri = g_rateuri && !cbstrfwmatch(phrase, ESTOPSIMILAR);
500 for(i = 0; i < rnum && cnt < lmax; i++){
501 if(!(doc = est_mtdb_get_doc(db, res[i], 0))) continue;
502 if(g_scancheck && *phrase != '\0' && *phrase != '[' && *phrase != '*' &&
503 res[i] < ESTPDOCIDMIN && !est_mtdb_scan_doc(db, doc, cond)){
504 est_doc_delete(doc);
505 miss++;
506 continue;
507 }
508 score = est_cond_score(cond, i);
509 if(rateuri && g_scoreexpr != SE_ASIS){
510 if((vbuf = est_doc_attr(doc, ESTDATTRURI)) != NULL){
511 if(score < 100) score = 100;
512 down = 4;
513 if(cbstrfwimatch(vbuf, "file://")){
514 vbuf += 7;
515 } else if(cbstrfwimatch(vbuf, "ftp://")){
516 vbuf += 6;
517 } else if(cbstrfwimatch(vbuf, "http://")){
518 vbuf += 7;
519 } else if(cbstrfwimatch(vbuf, "https://")){
520 vbuf += 8;
521 } else {
522 down += 3;
523 }
524 while(vbuf[0] != '\0'){
525 if(vbuf[0] == '?' || vbuf[0] == '#'){
526 down++;
527 break;
528 }
529 if(vbuf[0] == '/' && vbuf[1] != '\0') down++;
530 vbuf++;
531 }
532 score *= 8.0 / (double)down;
533 } else {
534 score = 0;
535 }
536 }
537 est_doc_add_attr(doc, DATTRNDURL, myurl);
538 est_doc_add_attr(doc, DATTRNDLABEL, mylabel);
539 if(score >= 0){
540 sprintf(name, "%d", score);
541 est_doc_add_attr(doc, DATTRNDSCORE, name);
542 }
543 if(g_scoreexpr != SE_ASIS){
544 weight = 1.0;
545 if((vbuf = est_doc_attr(doc, ESTDATTRWEIGHT)) != NULL){
546 weight = strtod(vbuf, NULL);
547 weight = weight >= 0.01 ? weight : 0.01;
548 }
549 weight /= 10.0;
550 switch(g_mergemethod){
551 case MM_SCORE:
552 score = score * weight;
553 break;
554 case MM_SCRK:
555 score = score * (max * 2 - cnt) * weight;
556 break;
557 case MM_RANK:
558 score = SELFCREDIT * (max - cnt);
559 break;
560 }
561 }
562 resmap_put(resmap, score, doc, NULL, NULL);
563 cnt++;
564 }
565 free(res);
566 words = est_hints_to_words(hints);
567 dnum = est_mtdb_doc_num(db) + est_mtdb_pseudo_doc_num(db);
568 wnum = est_mtdb_word_num(db);
569 fsiz = est_mtdb_size(db);
570 end = max + skip;
571 curtime = est_gettimeofday() - curtime;
572 printf("Content-Type: %s; charset=UTF-8\r\n", ESTSEARCHTYPE);
573 printf("\r\n");
574 printf("%s\n", bordstr);
575 printf("VERSION\t%s\n", _EST_PROTVER);
576 printf("NODE\t%s\n", myurl);
577 printf("HIT\t%d\n", hnum - miss);
578 cbmapiterinit(hints);
579 num = 1;
580 while((kbuf = cbmapiternext(hints, NULL)) != NULL){
581 if(*kbuf == '\0') continue;
582 printf("HINT#%d\t%s\t%s\n", num, kbuf, cbmapiterval(kbuf, NULL));
583 num++;
584 }
585 printf("DOCNUM\t%d\n", dnum);
586 printf("WORDNUM\t%d\n", wnum);
587 printf("TIME\t%.6f\n", curtime / 1000.0);
588 printf("TIME#i\t%.6f\n", itime / 1000.0);
589 printf("TIME#0\t%.6f\n", curtime / 1000.0);
590 printf("LINK#0\t%s\t", myurl);
591 printf("%s\t%d\t%d\t%d\t%.0f\t%d\n", mylabel, SELFCREDIT, dnum, wnum, fsiz, hnum - miss);
592 printf("VIEW\tSNIPPET\n");
593 printf("\n");
594 if(*order == '\0' && cbstrfwmatch(phrase, ESTOPSIMILAR)) order = DATTRNDSCORE " " ESTORDNUMD;
595 resdocs = resmap_list(resmap, &rnum, order, NULL);
596 for(i = skip; i < rnum && i < end; i++){
597 resdoc = resdocs[i];
598 if(!resdoc->doc) continue;
599 printf("%s\n", bordstr);
600 list = est_doc_attr_names(resdoc->doc);
601 for(j = 0; j < cblistnum(list); j++){
602 vbuf = cblistval(list, j, NULL);
603 printf("%s=%s\n", vbuf, est_doc_attr(resdoc->doc, vbuf));
604 }
605 cblistclose(list);
606 if(g_smlrvnum > 0){
607 printf("%s", ESTDCNTLVECTOR);
608 id = est_doc_id(resdoc->doc);
609 kwords = id > 0 ? est_mtdb_get_keywords(db, id) : NULL;
610 if(!kwords){
611 if(g_extdelay < 0){
612 kwords = est_morph_etch_doc(resdoc->doc, g_smlrvnum);
613 } else {
614 kwords = est_mtdb_etch_doc(db, resdoc->doc, g_smlrvnum);
615 }
616 }
617 cbmapiterinit(kwords);
618 while((kbuf = cbmapiternext(kwords, NULL)) != NULL){
619 printf("\t%s\t%s", kbuf, cbmapiterval(kbuf, NULL));
620 }
621 cbmapclose(kwords);
622 printf("\n");
623 }
624 printf("\n");
625 if(wwidth < 0){
626 texts = est_doc_texts(resdoc->doc);
627 for(j = 0; j < cblistnum(texts); j++){
628 printf("%s\n", cblistval(texts, j, NULL));
629 }
630 vbuf = est_doc_hidden_texts(resdoc->doc);
631 if(vbuf[0] != '\0') printf("\t%s\n", vbuf);
632 } else if(wwidth > 0){
633 snippet = est_doc_make_snippet(resdoc->doc, words, wwidth, hwidth, awidth);
634 printf("%s", snippet);
635 free(snippet);
636 }
637 }
638 free(resdocs);
639 printf("%s:END\n", bordstr);
640 cblistclose(words);
641 cbmapclose(hints);
642 resmap_delete(resmap);
643 est_cond_delete(cond);
644 }
645
646
647 /* send the result of the get_doc command */
sendnodecmdgetdoc(ESTMTDB * db,const char * myurl,const char * mylabel,CBMAP * params)648 static void sendnodecmdgetdoc(ESTMTDB *db, const char *myurl, const char *mylabel,
649 CBMAP *params){
650 ESTDOC *doc;
651 const char *rp, *uri;
652 char *draft;
653 int id;
654 id = (rp = cbmapget(params, "id", -1, NULL)) ? atoi(rp) : 0;
655 if(!(uri = cbmapget(params, "uri", -1, NULL))) uri = "";
656 if(id < 1 && uri[0] == '\0'){
657 senderror(400, "Bad Request (the parameters lack)");
658 return;
659 }
660 if(id < 1) id = est_mtdb_uri_to_id(db, uri);
661 if(id > 0 && (doc = est_mtdb_get_doc(db, id, 0)) != NULL){
662 est_doc_add_attr(doc, DATTRNDURL, myurl);
663 est_doc_add_attr(doc, DATTRNDLABEL, mylabel);
664 draft = est_doc_dump_draft(doc);
665 printf("Content-Type: %s; charset=UTF-8\r\n", ESTDRAFTTYPE);
666 printf("\r\n");
667 printf("%s", draft);
668 free(draft);
669 est_doc_delete(doc);
670 } else {
671 senderror(400, "Bad Request (maybe, the document does not exist)");
672 }
673 }
674
675
676 /* send the result of the get_doc_attr command */
sendnodecmdgetdocattr(ESTMTDB * db,const char * myurl,const char * mylabel,CBMAP * params)677 static void sendnodecmdgetdocattr(ESTMTDB *db, const char *myurl, const char *mylabel,
678 CBMAP *params){
679 const char *rp, *uri, *attr;
680 char *value;
681 int id;
682 id = (rp = cbmapget(params, "id", -1, NULL)) ? atoi(rp) : 0;
683 if(!(uri = cbmapget(params, "uri", -1, NULL))) uri = "";
684 if(!(attr = cbmapget(params, "attr", -1, NULL))) attr = "";
685 if((id < 1 && uri[0] == '\0') || attr[0] == '\0'){
686 senderror(400, "Bad Request (the parameters lack)");
687 return;
688 }
689 if(id < 1) id = est_mtdb_uri_to_id(db, uri);
690 if(id > 0 && (value = est_mtdb_get_doc_attr(db, id, attr)) != NULL){
691 printf("Content-Type: text/plain; charset=UTF-8\r\n");
692 printf("\r\n");
693 printf("%s\n", value);
694 free(value);
695 } else {
696 senderror(400, "Bad Request (maybe, the document or the attribute does not exist)");
697 }
698 }
699
700
701 /* send the result of the etch_doc command */
sendnodecmdetchdoc(ESTMTDB * db,const char * myurl,const char * mylabel,CBMAP * params)702 static void sendnodecmdetchdoc(ESTMTDB *db, const char *myurl, const char *mylabel,
703 CBMAP *params){
704 ESTDOC *doc;
705 CBMAP *kwords;
706 const char *rp, *uri, *kbuf;
707 int id, ksiz;
708 id = (rp = cbmapget(params, "id", -1, NULL)) ? atoi(rp) : 0;
709 if(!(uri = cbmapget(params, "uri", -1, NULL))) uri = "";
710 if(id < 1 && uri[0] == '\0'){
711 senderror(400, "Bad Request (the parameters lack)");
712 return;
713 }
714 if(id < 1) id = est_mtdb_uri_to_id(db, uri);
715 kwords = NULL;
716 if(id > 0){
717 kwords = est_mtdb_get_keywords(db, id);
718 if(!kwords && (doc = est_mtdb_get_doc(db, id, 0)) != NULL){
719 kwords = est_mtdb_etch_doc(db, doc, g_smlrvnum > 0 ? g_smlrvnum : KWORDNUM);
720 est_doc_delete(doc);
721 }
722 }
723 if(kwords){
724 printf("Content-Type: text/plain; charset=UTF-8\r\n");
725 printf("\r\n");
726 cbmapiterinit(kwords);
727 while((kbuf = cbmapiternext(kwords, &ksiz)) != NULL){
728 printf("%s\t%s\n", kbuf, cbmapiterval(kbuf, NULL));
729 }
730 cbmapclose(kwords);
731 } else {
732 senderror(400, "Bad Request (maybe, the document does not exist)");
733 }
734 }
735
736
737 /* send the result of the uri_to_id command */
sendnodecmduritoid(ESTMTDB * db,const char * myurl,const char * mylabel,CBMAP * params)738 static void sendnodecmduritoid(ESTMTDB *db, const char *myurl, const char *mylabel,
739 CBMAP *params){
740 const char *uri;
741 int id;
742 uri = cbmapget(params, "uri", -1, NULL);
743 if(!uri){
744 senderror(400, "Bad Request (the parameters lack)");
745 return;
746 }
747 if((id = est_mtdb_uri_to_id(db, uri)) > 0){
748 printf("Content-Type: text/plain; charset=UTF-8\r\n");
749 printf("\r\n");
750 printf("%d\n", id);
751 } else {
752 senderror(400, "Bad Request (maybe, the URI is not registered)");
753 }
754 }
755
756
757 /* send the result of the put_doc command */
sendnodecmdputdoc(ESTMTDB * db,const char * myurl,const char * mylabel,CBMAP * params)758 static void sendnodecmdputdoc(ESTMTDB *db, const char *myurl, const char *mylabel,
759 CBMAP *params){
760 ESTDOC *doc;
761 CBMAP *kwords;
762 const char *draft, *uri;
763 char *dbuf, *tmp;
764 draft = cbmapget(params, "draft", -1, NULL);
765 if(!draft){
766 senderror(400, "Bad Request (the parameters lack)");
767 return;
768 }
769 doc = est_doc_new_from_draft(draft);
770 if(!(uri = est_doc_attr(doc, ESTDATTRURI))){
771 senderror(400, "Bad Request (the parameters lack)");
772 est_doc_delete(doc);
773 return;
774 }
775 dbuf = NULL;
776 if(g_smlrvnum > 0 && g_extdelay < 0 && !est_doc_keywords(doc)){
777 kwords = est_morph_etch_doc(doc, g_smlrvnum);
778 est_doc_set_keywords(doc, kwords);
779 cbmapclose(kwords);
780 dbuf = est_doc_dump_draft(doc);
781 draft = dbuf;
782 }
783 tmp = cbsprintf("%s%s%c%012d%06d.est",
784 est_mtdb_name(db), g_pidxsuffix, ESTPATHCHR, (int)time(NULL), (int)getpid());
785 if(cbwritefile(tmp, draft, -1)){
786 if(est_mtdb_uri_to_id(db, uri) > 0) cbmapput(params, PPOUTDOC, -1, uri, -1, TRUE);
787 if(est_mtdb_pseudo_doc_num(db) > g_pidxdocmax)
788 cbmapput(params, PPPOSTCMD, -1, "sync", -1, TRUE);
789 printf("Content-Type: text/plain; charset=UTF-8\r\n");
790 printf("\r\n");
791 printf("OK\n");
792 } else {
793 senderror(403, "Forbidden (writing a pseudo document failed)");
794 }
795 free(tmp);
796 free(dbuf);
797 est_doc_delete(doc);
798 }
799
800
801 /* send the result of the out_doc command */
sendnodecmdoutdoc(ESTMTDB * db,const char * myurl,const char * mylabel,CBMAP * params)802 static void sendnodecmdoutdoc(ESTMTDB *db, const char *myurl, const char *mylabel,
803 CBMAP *params){
804 const char *rp, *uri;
805 char *tmp;
806 int id;
807 id = (rp = cbmapget(params, "id", -1, NULL)) ? atoi(rp) : 0;
808 if(!(uri = cbmapget(params, "uri", -1, NULL))) uri = "";
809 if(id < 1 && uri[0] == '\0'){
810 senderror(400, "Bad Request (the parameters lack)");
811 return;
812 }
813 if(id < 1) id = est_mtdb_uri_to_id(db, uri);
814 if(id > 0){
815 if((tmp = est_mtdb_get_doc_attr(db, id, ESTDATTRURI)) != NULL){
816 cbmapput(params, PPOUTDOC, -1, tmp, -1, TRUE);
817 free(tmp);
818 }
819 printf("Content-Type: text/plain; charset=UTF-8\r\n");
820 printf("\r\n");
821 printf("OK\n");
822 } else {
823 senderror(400, "Bad Request (maybe, the document does not exist)");
824 }
825 }
826
827
828 /* send the result of the edit_doc command */
sendnodecmdeditdoc(ESTMTDB * db,const char * myurl,const char * mylabel,CBMAP * params)829 static void sendnodecmdeditdoc(ESTMTDB *db, const char *myurl, const char *mylabel,
830 CBMAP *params){
831 ESTDOC *doc, *odoc, *ndoc;
832 const CBLIST *texts;
833 CBLIST *list, *names;
834 const char *draft, *uri, *name, *vbuf;
835 char *tmp, numbuf[NUMBUFSIZ], *odraft, *ndraft;
836 int i, j, id, done;
837 draft = cbmapget(params, "draft", -1, NULL);
838 if(!draft){
839 senderror(400, "Bad Request (the parameters lack)");
840 return;
841 }
842 doc = est_doc_new_from_draft(draft);
843 if(!(uri = est_doc_attr(doc, ESTDATTRURI))){
844 senderror(400, "Bad Request (the parameters lack)");
845 est_doc_delete(doc);
846 return;
847 }
848 if((id = est_mtdb_uri_to_id(db, uri)) < 1){
849 senderror(400, "Bad Request (maybe, the document does not exist)");
850 est_doc_delete(doc);
851 return;
852 }
853 if(id >= ESTPDOCIDMIN){
854 est_doc_add_attr(doc, ESTDATTRID, NULL);
855 tmp = cbsprintf("%s%s", est_mtdb_name(db), g_pidxsuffix);
856 list = cbdirlist(tmp);
857 free(tmp);
858 done = FALSE;
859 if(list){
860 cblistsort(list);
861 for(i = 0; !done && i < cblistnum(list); i++){
862 name = cblistval(list, i, NULL);
863 if(!strcmp(name, ESTCDIRSTR) || !strcmp(name, ESTPDIRSTR)) continue;
864 tmp = cbsprintf("%s%s%c%s", est_mtdb_name(db), g_pidxsuffix, ESTPATHCHR, name);
865 if((odraft = cbreadfile(tmp, NULL)) != NULL){
866 odoc = est_doc_new_from_draft(odraft);
867 if((vbuf = est_doc_attr(odoc, ESTDATTRURI)) != NULL && !strcmp(vbuf, uri)){
868 ndoc = est_doc_new();
869 names = est_doc_attr_names(doc);
870 for(j = 0; j < cblistnum(names); j++){
871 vbuf = cblistval(names, j, NULL);
872 if(!strcmp(vbuf, ESTDATTRID)) continue;
873 est_doc_add_attr(ndoc, vbuf, est_doc_attr(doc, vbuf));
874 }
875 texts = est_doc_texts(odoc);
876 for(j = 0; j < cblistnum(texts); j++){
877 est_doc_add_text(ndoc, cblistval(texts, j, NULL));
878 }
879 if((vbuf = est_doc_hidden_texts(odoc)) != NULL && vbuf[0] != '\0')
880 est_doc_add_hidden_text(ndoc, vbuf);
881 ndraft = est_doc_dump_draft(ndoc);
882 if(cbwritefile(tmp, ndraft, -1)) done = TRUE;
883 free(ndraft);
884 cblistclose(names);
885 est_doc_delete(ndoc);
886 }
887 est_doc_delete(odoc);
888 free(odraft);
889 }
890 free(tmp);
891 }
892 cblistclose(list);
893 }
894 if(done){
895 printf("Content-Type: text/plain; charset=UTF-8\r\n");
896 printf("\r\n");
897 printf("OK\n");
898 } else {
899 senderror(403, "Forbidden (writing a pseudo document failed)");
900 }
901 } else {
902 sprintf(numbuf, "%d", id);
903 est_doc_add_attr(doc, ESTDATTRID, numbuf);
904 if(est_mtdb_edit_doc(db, doc)){
905 printf("Content-Type: text/plain; charset=UTF-8\r\n");
906 printf("\r\n");
907 printf("OK\n");
908 } else {
909 senderror(400, "Bad Request (maybe, the document is invalid)");
910 }
911 }
912 est_doc_delete(doc);
913 }
914
915
916 /* send the result of the sync command */
sendnodecmdsync(ESTMTDB * db,const char * myurl,const char * mylabel,CBMAP * params)917 static void sendnodecmdsync(ESTMTDB *db, const char *myurl, const char *mylabel,
918 CBMAP *params){
919 cbmapput(params, PPPOSTCMD, -1, "sync", -1, TRUE);
920 printf("Content-Type: text/plain; charset=UTF-8\r\n");
921 printf("\r\n");
922 printf("OK\n");
923 }
924
925
926 /* send the result of the optimize command */
sendnodecmdoptimize(ESTMTDB * db,const char * myurl,const char * mylabel,CBMAP * params)927 static void sendnodecmdoptimize(ESTMTDB *db, const char *myurl, const char *mylabel,
928 CBMAP *params){
929 cbmapput(params, PPPOSTCMD, -1, "optimize", -1, TRUE);
930 printf("Content-Type: text/plain; charset=UTF-8\r\n");
931 printf("\r\n");
932 printf("OK\n");
933 }
934
935
936 /* perform the sync post command */
procpostcmdoutdoc(const char * idxname,const char * uri)937 static void procpostcmdoutdoc(const char *idxname, const char *uri){
938 ESTMTDB *db;
939 ESTDOC *doc;
940 CBLIST *list;
941 const char *name, *vbuf;
942 char *tmp, *draft;
943 int i, ecode, done, id;
944 tmp = cbsprintf("%s%c%s", g_indexdir, ESTPATHCHR, idxname);
945 if(!(db = est_mtdb_open(tmp, ESTDBWRITER, &ecode))){
946 free(tmp);
947 return;
948 }
949 free(tmp);
950 done = FALSE;
951 if((id = est_mtdb_uri_to_id(db, uri)) > 0 && est_mtdb_out_doc(db, id, ESTODCLEAN)) done = TRUE;
952 if(!done){
953 tmp = cbsprintf("%s%c%s%s", g_indexdir, ESTPATHCHR, idxname, g_pidxsuffix);
954 list = cbdirlist(tmp);
955 free(tmp);
956 if(list){
957 cblistsort(list);
958 for(i = 0; !done && i < cblistnum(list); i++){
959 name = cblistval(list, i, NULL);
960 if(!strcmp(name, ESTCDIRSTR) || !strcmp(name, ESTPDIRSTR)) continue;
961 tmp = cbsprintf("%s%c%s%s%c%s",
962 g_indexdir, ESTPATHCHR, idxname, g_pidxsuffix, ESTPATHCHR, name);
963 if((draft = cbreadfile(tmp, NULL)) != NULL){
964 doc = est_doc_new_from_draft(draft);
965 if((vbuf = est_doc_attr(doc, ESTDATTRURI)) != NULL && !strcmp(vbuf, uri)){
966 unlink(tmp);
967 done = TRUE;
968 }
969 est_doc_delete(doc);
970 free(draft);
971 }
972 free(tmp);
973 }
974 cblistclose(list);
975 }
976 }
977 est_mtdb_close(db, &ecode);
978 }
979
980
981 /* perform the sync post command */
procpostcmdsync(const char * idxname)982 static void procpostcmdsync(const char *idxname){
983 ESTMTDB *db;
984 ESTDOC *doc;
985 CBLIST *list;
986 const char *name;
987 char *tmp, *draft;
988 int i, ecode, num;
989 tmp = cbsprintf("%s%c%s", g_indexdir, ESTPATHCHR, idxname);
990 if(!(db = est_mtdb_open(tmp, ESTDBWRITER, &ecode))){
991 free(tmp);
992 return;
993 }
994 free(tmp);
995 tmp = cbsprintf("%s%c%s%s", g_indexdir, ESTPATHCHR, idxname, g_pidxsuffix);
996 list = cbdirlist(tmp);
997 free(tmp);
998 if(list){
999 cblistsort(list);
1000 num = cblistnum(list) - g_pidxdocmin;
1001 for(i = 0; i < num; i++){
1002 name = cblistval(list, i, NULL);
1003 if(!strcmp(name, ESTCDIRSTR) || !strcmp(name, ESTPDIRSTR)) continue;
1004 tmp = cbsprintf("%s%c%s%s%c%s",
1005 g_indexdir, ESTPATHCHR, idxname, g_pidxsuffix, ESTPATHCHR, name);
1006 if((draft = cbreadfile(tmp, NULL)) != NULL){
1007 doc = est_doc_new_from_draft(draft);
1008 if(est_mtdb_put_doc(db, doc, ESTPDCLEAN)) unlink(tmp);
1009 est_doc_delete(doc);
1010 free(draft);
1011 }
1012 free(tmp);
1013 }
1014 cblistclose(list);
1015 }
1016 est_mtdb_close(db, &ecode);
1017 }
1018
1019
1020 /* perform the optimize post command */
procpostcmdoptimize(const char * idxname)1021 static void procpostcmdoptimize(const char *idxname){
1022 ESTMTDB *db;
1023 char *tmp;
1024 int ecode;
1025 tmp = cbsprintf("%s%c%s", g_indexdir, ESTPATHCHR, idxname);
1026 if(!(db = est_mtdb_open(tmp, ESTDBWRITER, &ecode))){
1027 free(tmp);
1028 return;
1029 }
1030 free(tmp);
1031 est_mtdb_optimize(db, ESTOPTNOPURGE);
1032 est_mtdb_close(db, &ecode);
1033 }
1034
1035
1036
1037 /* END OF FILE */
1038