1 /*************************************************************************************************
2 * Utility for debugging Odeum and its applications
3 * Copyright (C) 2000-2007 Mikio Hirabayashi
4 * This file is part of QDBM, Quick Database Manager.
5 * QDBM is free software; you can redistribute it and/or modify it under the terms of the GNU
6 * Lesser General Public License as published by the Free Software Foundation; either version
7 * 2.1 of the License or any later version. QDBM is distributed in the hope that it will be
8 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
9 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
10 * details.
11 * You should have received a copy of the GNU Lesser General Public License along with QDBM; if
12 * not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
13 * 02111-1307 USA.
14 *************************************************************************************************/
15
16
17 #include <depot.h>
18 #include <cabin.h>
19 #include <odeum.h>
20 #include <stdlib.h>
21 #include <stdio.h>
22 #include <string.h>
23
24 #undef TRUE
25 #define TRUE 1 /* boolean true */
26 #undef FALSE
27 #define FALSE 0 /* boolean false */
28
29 #define MAXSRCHWORDS 256 /* max number of search words */
30 #define WOCCRPOINT 10000 /* points per occurence */
31 #define MAXKEYWORDS 8 /* max number of keywords */
32 #define SUMMARYWIDTH 16 /* width of each phrase in a summary */
33 #define MAXSUMMARY 128 /* max number of words in a summary */
34
35
36 /* for RISC OS */
37 #if defined(__riscos__) || defined(__riscos)
38 #include <unixlib/local.h>
39 int __riscosify_control = __RISCOSIFY_NO_PROCESS;
40 #endif
41
42
43 /* global variables */
44 const char *progname; /* program name */
45
46
47 /* function prototypes */
48 int main(int argc, char **argv);
49 void usage(void);
50 char *readstdin(int *sp);
51 void otcb(const char *fname, ODEUM *odeum, const char *msg);
52 int runcreate(int argc, char **argv);
53 int runput(int argc, char **argv);
54 int runout(int argc, char **argv);
55 int runget(int argc, char **argv);
56 int runsearch(int argc, char **argv);
57 int runlist(int argc, char **argv);
58 int runoptimize(int argc, char **argv);
59 int runinform(int argc, char **argv);
60 int runmerge(int argc, char **argv);
61 int runremove(int argc, char **argv);
62 int runbreak(int argc, char **argv);
63 void pdperror(const char *name);
64 void printdoc(const ODDOC *doc, int tb, int hb, int score, ODEUM *odeum, const CBLIST *skeys);
65 char *docsummary(const ODDOC *doc, const CBLIST *kwords, int num, int hilight);
66 CBMAP *listtomap(const CBLIST *list);
67 int docreate(const char *name);
68 int doput(const char *name, const char *text, const char *uri, const char *title,
69 const char *author, const char *date, int wmax, int keep);
70 int doout(const char *name, const char *uri, int id);
71 int doget(const char *name, const char *uri, int id, int tb, int hb);
72 int dosearch(const char *name, const char *text, int max, int or, int idf, int ql,
73 int tb, int hb, int nb);
74 int dolist(const char *name, int tb, int hb);
75 int dooptimize(const char *name);
76 int doinform(const char *name);
77 int domerge(const char *name, const CBLIST *elems);
78 int doremove(const char *name);
79 int dobreak(const char *text, int hb, int kb, int sb);
80
81
82 /* main routine */
main(int argc,char ** argv)83 int main(int argc, char **argv){
84 char *env;
85 int rv;
86 cbstdiobin();
87 progname = argv[0];
88 if((env = getenv("QDBMDBGFD")) != NULL) dpdbgfd = atoi(env);
89 if(argc < 2) usage();
90 rv = 0;
91 if(!strcmp(argv[1], "create")){
92 odsetotcb(otcb);
93 rv = runcreate(argc, argv);
94 } else if(!strcmp(argv[1], "put")){
95 odsetotcb(otcb);
96 rv = runput(argc, argv);
97 } else if(!strcmp(argv[1], "out")){
98 odsetotcb(otcb);
99 rv = runout(argc, argv);
100 } else if(!strcmp(argv[1], "get")){
101 rv = runget(argc, argv);
102 } else if(!strcmp(argv[1], "search")){
103 rv = runsearch(argc, argv);
104 } else if(!strcmp(argv[1], "list")){
105 rv = runlist(argc, argv);
106 } else if(!strcmp(argv[1], "optimize")){
107 odsetotcb(otcb);
108 rv = runoptimize(argc, argv);
109 } else if(!strcmp(argv[1], "inform")){
110 rv = runinform(argc, argv);
111 } else if(!strcmp(argv[1], "merge")){
112 odsetotcb(otcb);
113 rv = runmerge(argc, argv);
114 } else if(!strcmp(argv[1], "remove")){
115 rv = runremove(argc, argv);
116 } else if(!strcmp(argv[1], "break")){
117 rv = runbreak(argc, argv);
118 } else if(!strcmp(argv[1], "version") || !strcmp(argv[1], "--version")){
119 printf("Powered by QDBM version %s\n", dpversion);
120 printf("Copyright (c) 2000-2007 Mikio Hirabayashi\n");
121 rv = 0;
122 } else {
123 usage();
124 }
125 return rv;
126 }
127
128
129 /* print the usage and exit */
usage(void)130 void usage(void){
131 fprintf(stderr, "%s: administration utility for Odeum\n", progname);
132 fprintf(stderr, "\n");
133 fprintf(stderr, "usage:\n");
134 fprintf(stderr, " %s create name\n", progname);
135 fprintf(stderr, " %s put [-uri str] [-title str] [-author str] [-date str]"
136 " [-wmax num] [-keep] name [file]\n", progname);
137 fprintf(stderr, " %s out [-id] name expr\n", progname);
138 fprintf(stderr, " %s get [-id] [-t|-h] name expr\n", progname);
139 fprintf(stderr, " %s search [-max num] [-or] [-idf] [-t|-h|-n] name words...\n", progname);
140 fprintf(stderr, " %s list [-t|-h] name\n", progname);
141 fprintf(stderr, " %s optimize name\n", progname);
142 fprintf(stderr, " %s inform name\n", progname);
143 fprintf(stderr, " %s merge name elems...\n", progname);
144 fprintf(stderr, " %s remove name\n", progname);
145 fprintf(stderr, " %s break [-h|-k|-s] [file]\n", progname);
146 fprintf(stderr, " %s version\n", progname);
147 fprintf(stderr, "\n");
148 exit(1);
149 }
150
151
152 /* read the standard input */
readstdin(int * sp)153 char *readstdin(int *sp){
154 char *buf;
155 int i, blen, c;
156 blen = 256;
157 buf = cbmalloc(blen);
158 for(i = 0; (c = getchar()) != EOF; i++){
159 if(i >= blen - 1) buf = cbrealloc(buf, blen *= 2);
160 buf[i] = c;
161 }
162 buf[i] = '\0';
163 *sp = i;
164 return buf;
165 }
166
167
168 /* report the outturn */
otcb(const char * fname,ODEUM * odeum,const char * msg)169 void otcb(const char *fname, ODEUM *odeum, const char *msg){
170 char *name;
171 name = odname(odeum);
172 printf("%s: %s: %s: %s\n", progname, fname, name, msg);
173 free(name);
174 }
175
176
177 /* parse arguments of create command */
runcreate(int argc,char ** argv)178 int runcreate(int argc, char **argv){
179 char *name;
180 int i, rv;
181 name = NULL;
182 for(i = 2; i < argc; i++){
183 if(!name && argv[i][0] == '-'){
184 usage();
185 } else if(!name){
186 name = argv[i];
187 } else {
188 usage();
189 }
190 }
191 if(!name) usage();
192 rv = docreate(name);
193 return rv;
194 }
195
196
197 /* parse arguments of put command */
runput(int argc,char ** argv)198 int runput(int argc, char **argv){
199 char *name, *file, *uri, *title, *author, *date, *text;
200 int i, wmax, keep, size, rv;
201 name = NULL;
202 file = NULL;
203 uri = NULL;
204 title = NULL;
205 author = NULL;
206 date = NULL;
207 wmax = -1;
208 keep = FALSE;
209 for(i = 2; i < argc; i++){
210 if(!name && argv[i][0] == '-'){
211 if(!strcmp(argv[i], "-uri")){
212 if(++i >= argc) usage();
213 uri = argv[i];
214 } else if(!strcmp(argv[i], "-uri")){
215 if(++i >= argc) usage();
216 uri = argv[i];
217 } else if(!strcmp(argv[i], "-title")){
218 if(++i >= argc) usage();
219 title = argv[i];
220 } else if(!strcmp(argv[i], "-author")){
221 if(++i >= argc) usage();
222 author = argv[i];
223 } else if(!strcmp(argv[i], "-date")){
224 if(++i >= argc) usage();
225 date = argv[i];
226 } else if(!strcmp(argv[i], "-wmax")){
227 if(++i >= argc) usage();
228 wmax = atoi(argv[i]);
229 } else if(!strcmp(argv[i], "-keep")){
230 keep = TRUE;
231 } else {
232 usage();
233 }
234 } else if(!name){
235 name = argv[i];
236 } else if(!file){
237 file = argv[i];
238 } else {
239 usage();
240 }
241 }
242 if(!name) usage();
243 if(!uri) uri = file;
244 if(!uri) usage();
245 if(file){
246 if(!(text = cbreadfile(file, &size))){
247 fprintf(stderr, "%s: %s: cannot open\n", progname, file);
248 return 1;
249 }
250 } else {
251 text = readstdin(&size);
252 }
253 rv = doput(name, text, uri, title, author, date, wmax, keep);
254 free(text);
255 return rv;
256 }
257
258
259 /* parse arguments of out command */
runout(int argc,char ** argv)260 int runout(int argc, char **argv){
261 char *name, *expr;
262 int i, ib, id, rv;
263 name = NULL;
264 expr = NULL;
265 ib = FALSE;
266 for(i = 2; i < argc; i++){
267 if(!name && argv[i][0] == '-'){
268 if(!strcmp(argv[i], "-id")){
269 ib = TRUE;
270 } else {
271 usage();
272 }
273 } else if(!name){
274 name = argv[i];
275 } else if(!expr){
276 expr = argv[i];
277 } else {
278 usage();
279 }
280 }
281 if(!name || !expr) usage();
282 id = -1;
283 if(ib){
284 id = atoi(expr);
285 if(id < 1) usage();
286 }
287 rv = doout(name, expr, id);
288 return rv;
289 }
290
291
292 /* parse arguments of get command */
runget(int argc,char ** argv)293 int runget(int argc, char **argv){
294 char *name, *expr;
295 int i, ib, tb, hb, id, rv;
296 name = NULL;
297 expr = NULL;
298 ib = FALSE;
299 tb = FALSE;
300 hb = FALSE;
301 for(i = 2; i < argc; i++){
302 if(!name && argv[i][0] == '-'){
303 if(!strcmp(argv[i], "-id")){
304 ib = TRUE;
305 } else if(!strcmp(argv[i], "-t")){
306 tb = TRUE;
307 } else if(!strcmp(argv[i], "-h")){
308 hb = TRUE;
309 } else {
310 usage();
311 }
312 } else if(!name){
313 name = argv[i];
314 } else if(!expr){
315 expr = argv[i];
316 } else {
317 usage();
318 }
319 }
320 if(!name || !expr) usage();
321 id = -1;
322 if(ib){
323 id = atoi(expr);
324 if(id < 1) usage();
325 }
326 rv = doget(name, expr, id, tb, hb);
327 return rv;
328 }
329
330
331 /* parse arguments of search command */
runsearch(int argc,char ** argv)332 int runsearch(int argc, char **argv){
333 char *name, *srchwords[MAXSRCHWORDS];
334 int i, wnum, max, or, idf, ql, tb, hb, nb, rv;
335 CBDATUM *text;
336 name = NULL;
337 wnum = 0;
338 max = -1;
339 or = FALSE;
340 idf = FALSE;
341 ql = FALSE;
342 tb = FALSE;
343 hb = FALSE;
344 nb = FALSE;
345 for(i = 2; i < argc; i++){
346 if(!name && argv[i][0] == '-'){
347 if(!strcmp(argv[i], "-max")){
348 if(++i >= argc) usage();
349 max = atoi(argv[i]);
350 } else if(!strcmp(argv[i], "-or")){
351 or = TRUE;
352 } else if(!strcmp(argv[i], "-idf")){
353 idf = TRUE;
354 } else if(!strcmp(argv[i], "-ql")){
355 ql = TRUE;
356 } else if(!strcmp(argv[i], "-t")){
357 tb = TRUE;
358 } else if(!strcmp(argv[i], "-h")){
359 hb = TRUE;
360 } else if(!strcmp(argv[i], "-n")){
361 nb = TRUE;
362 } else {
363 usage();
364 }
365 } else if(!name){
366 name = argv[i];
367 } else if(wnum < MAXSRCHWORDS){
368 srchwords[wnum++] = argv[i];
369 }
370 }
371 if(!name) usage();
372 text = cbdatumopen(NULL, -1);
373 for(i = 0; i < wnum; i++){
374 if(i > 0) cbdatumcat(text, " ", 1);
375 cbdatumcat(text, srchwords[i], -1);
376 }
377 rv = dosearch(name, cbdatumptr(text), max, or, idf, ql, tb, hb, nb);
378 cbdatumclose(text);
379 return rv;
380 }
381
382
383 /* parse arguments of list command */
runlist(int argc,char ** argv)384 int runlist(int argc, char **argv){
385 char *name;
386 int i, tb, hb, rv;
387 name = NULL;
388 tb = FALSE;
389 hb = FALSE;
390 for(i = 2; i < argc; i++){
391 if(!name && argv[i][0] == '-'){
392 if(!strcmp(argv[i], "-t")){
393 tb = TRUE;
394 } else if(!strcmp(argv[i], "-h")){
395 hb = TRUE;
396 } else {
397 usage();
398 }
399 } else if(!name){
400 name = argv[i];
401 } else {
402 usage();
403 }
404 }
405 if(!name) usage();
406 rv = dolist(name, tb, hb);
407 return rv;
408 }
409
410
411 /* parse arguments of optimize command */
runoptimize(int argc,char ** argv)412 int runoptimize(int argc, char **argv){
413 char *name;
414 int i, rv;
415 name = NULL;
416 for(i = 2; i < argc; i++){
417 if(!name && argv[i][0] == '-'){
418 usage();
419 } else if(!name){
420 name = argv[i];
421 } else {
422 usage();
423 }
424 }
425 if(!name) usage();
426 rv = dooptimize(name);
427 return rv;
428 }
429
430
431 /* parse arguments of inform command */
runinform(int argc,char ** argv)432 int runinform(int argc, char **argv){
433 char *name;
434 int i, rv;
435 name = NULL;
436 for(i = 2; i < argc; i++){
437 if(!name && argv[i][0] == '-'){
438 usage();
439 } else if(!name){
440 name = argv[i];
441 } else {
442 usage();
443 }
444 }
445 if(!name) usage();
446 rv = doinform(name);
447 return rv;
448 }
449
450
451 /* parse arguments of merge command */
runmerge(int argc,char ** argv)452 int runmerge(int argc, char **argv){
453 char *name;
454 CBLIST *elems;
455 int i, rv;
456 name = NULL;
457 elems = cblistopen();
458 for(i = 2; i < argc; i++){
459 if(!name && argv[i][0] == '-'){
460 usage();
461 } else if(!name){
462 name = argv[i];
463 } else {
464 cblistpush(elems, argv[i], -1);
465 }
466 }
467 if(!name) usage();
468 if(cblistnum(elems) < 1){
469 cblistclose(elems);
470 usage();
471 }
472 rv = domerge(name, elems);
473 cblistclose(elems);
474 return rv;
475 }
476
477
478 /* parse arguments of remove command */
runremove(int argc,char ** argv)479 int runremove(int argc, char **argv){
480 char *name;
481 int i, rv;
482 name = NULL;
483 for(i = 2; i < argc; i++){
484 if(!name && argv[i][0] == '-'){
485 usage();
486 } else if(!name){
487 name = argv[i];
488 } else {
489 usage();
490 }
491 }
492 if(!name) usage();
493 rv = doremove(name);
494 return rv;
495 }
496
497
498 /* parse arguments of break command */
runbreak(int argc,char ** argv)499 int runbreak(int argc, char **argv){
500 char *file, *text;
501 int i, hb, kb, sb, size, rv;
502 file = NULL;
503 hb = FALSE;
504 kb = FALSE;
505 sb = FALSE;
506 for(i = 2; i < argc; i++){
507 if(!file && argv[i][0] == '-'){
508 if(!strcmp(argv[i], "-h")){
509 hb = TRUE;
510 } else if(!strcmp(argv[i], "-k")){
511 kb = TRUE;
512 } else if(!strcmp(argv[i], "-s")){
513 sb = TRUE;
514 } else {
515 usage();
516 }
517 } else if(!file){
518 file = argv[i];
519 } else {
520 usage();
521 }
522 }
523 if(file){
524 if(!(text = cbreadfile(file, &size))){
525 fprintf(stderr, "%s: %s: cannot open\n", progname, file);
526 return 1;
527 }
528 } else {
529 text = readstdin(&size);
530 }
531 rv = dobreak(text, hb, kb, sb);
532 free(text);
533 return rv;
534 }
535
536
537 /* print an error message */
pdperror(const char * name)538 void pdperror(const char *name){
539 fprintf(stderr, "%s: %s: %s\n", progname, name, dperrmsg(dpecode));
540 }
541
542
543 /* print the contents of a document */
printdoc(const ODDOC * doc,int tb,int hb,int score,ODEUM * odeum,const CBLIST * skeys)544 void printdoc(const ODDOC *doc, int tb, int hb, int score, ODEUM *odeum, const CBLIST *skeys){
545 const CBLIST *words;
546 CBMAP *scores;
547 CBLIST *kwords;
548 const char *title, *author, *word, *date;
549 char *summary;
550 int i, wsiz;
551 title = oddocgetattr(doc, "title");
552 author = oddocgetattr(doc, "author");
553 date = oddocgetattr(doc, "date");
554 if(hb){
555 printf("ID: %d\n", oddocid(doc));
556 printf("URI: %s\n", oddocuri(doc));
557 if(title) printf("TITLE: %s\n", title);
558 if(author) printf("AUTHOR: %s\n", author);
559 if(date) printf("DATE: %s\n", date);
560 if(score >= 0) printf("SCORE: %d\n", score);
561 scores = oddocscores(doc, MAXKEYWORDS, odeum);
562 kwords = cblistopen();
563 printf("KEYWORDS: ");
564 cbmapiterinit(scores);
565 while((word = cbmapiternext(scores, &wsiz)) != NULL){
566 if(cblistnum(kwords) > 0) printf(", ");
567 printf("%s (%s)", word, cbmapget(scores, word, wsiz, NULL));
568 cblistpush(kwords, word, wsiz);
569 }
570 putchar('\n');
571 summary = docsummary(doc, skeys ? skeys : kwords, MAXSUMMARY, skeys != NULL);
572 printf("SUMMARY: %s\n", summary);
573 free(summary);
574 cblistclose(kwords);
575 cbmapclose(scores);
576 printf("\n\n");
577 } else if(tb){
578 printf("%d\t%s\t%s\t%s\t%s\t%d\n", oddocid(doc), oddocuri(doc),
579 title ? title : "", author ? author : "", date ? date : "", score);
580 words = oddocnwords(doc);
581 for(i = 0; i < cblistnum(words); i++){
582 word = cblistval(words, i, &wsiz);
583 if(i > 0) putchar('\t');
584 printf("%s", word);
585 }
586 putchar('\n');
587 words = oddocawords(doc);
588 for(i = 0; i < cblistnum(words); i++){
589 word = cblistval(words, i, &wsiz);
590 if(i > 0) putchar('\t');
591 printf("%s", word);
592 }
593 putchar('\n');
594 } else {
595 printf("%d\t%s\t%d\n", oddocid(doc), oddocuri(doc), score);
596 }
597 }
598
599
600 /* get a list handle contains summary of a document */
docsummary(const ODDOC * doc,const CBLIST * kwords,int num,int hilight)601 char *docsummary(const ODDOC *doc, const CBLIST *kwords, int num, int hilight){
602 const CBLIST *nwords, *awords;
603 CBMAP *kmap, *map;
604 const char *normal, *asis;
605 char *sbuf;
606 int i, j, bsiz, ssiz, lnum, nwsiz, awsiz, pv, bi, first;
607 bsiz = 256;
608 sbuf = cbmalloc(bsiz);
609 ssiz = 0;
610 nwords = oddocnwords(doc);
611 awords = oddocawords(doc);
612 kmap = listtomap(kwords);
613 map = listtomap(kwords);
614 lnum = cblistnum(nwords);
615 first = TRUE;
616 for(i = 0; i < lnum && i < SUMMARYWIDTH; i++){
617 normal = cblistval(nwords, i, &nwsiz);
618 asis = cblistval(awords, i, &awsiz);
619 if(awsiz < 1) continue;
620 cbmapout(map, normal, nwsiz);
621 if(ssiz + awsiz + 16 >= bsiz){
622 bsiz = bsiz * 2 + awsiz;
623 sbuf = cbrealloc(sbuf, bsiz);
624 }
625 if(!first) ssiz += sprintf(sbuf + ssiz, " ");
626 if(hilight && normal[0] != '\0' && cbmapget(kmap, normal, nwsiz, NULL)){
627 ssiz += sprintf(sbuf + ssiz, "<<%s>>", asis);
628 } else {
629 ssiz += sprintf(sbuf + ssiz, "%s", asis);
630 }
631 first = FALSE;
632 num--;
633 }
634 ssiz += sprintf(sbuf + ssiz, " ...");
635 pv = i;
636 while(i < lnum){
637 if(cbmaprnum(map) < 1){
638 cbmapclose(map);
639 map = listtomap(kwords);
640 }
641 normal = cblistval(nwords, i, &nwsiz);
642 if(cbmapget(map, normal, nwsiz, NULL)){
643 bi = i - SUMMARYWIDTH / 2;
644 bi = bi > pv ? bi : pv;
645 for(j = bi; j < lnum && j <= bi + SUMMARYWIDTH; j++){
646 normal = cblistval(nwords, j, &nwsiz);
647 asis = cblistval(awords, j, &awsiz);
648 if(awsiz < 1) continue;
649 cbmapout(map, normal, nwsiz);
650 if(ssiz + awsiz + 16 >= bsiz){
651 bsiz = bsiz * 2 + awsiz;
652 sbuf = cbrealloc(sbuf, bsiz);
653 }
654 ssiz += sprintf(sbuf + ssiz, " ");
655 if(hilight && normal[0] != '\0' && cbmapget(kmap, normal, nwsiz, NULL)){
656 ssiz += sprintf(sbuf + ssiz, "<<%s>>", asis);
657 } else {
658 ssiz += sprintf(sbuf + ssiz, "%s", asis);
659 }
660 num--;
661 }
662 ssiz += sprintf(sbuf + ssiz, " ...");
663 i = j;
664 pv = i;
665 } else {
666 i++;
667 }
668 if(num <= 0) break;
669 }
670 cbmapclose(map);
671 cbmapclose(kmap);
672 return sbuf;
673 }
674
675
676 /* get a map made from a list */
listtomap(const CBLIST * list)677 CBMAP *listtomap(const CBLIST *list){
678 CBMAP *map;
679 const char *tmp;
680 int i, tsiz;
681 map = cbmapopen();
682 for(i = 0; i < cblistnum(list); i++){
683 tmp = cblistval(list, i, &tsiz);
684 cbmapput(map, tmp, tsiz, "", 0, FALSE);
685 }
686 return map;
687 }
688
689
690 /* perform create command */
docreate(const char * name)691 int docreate(const char *name){
692 ODEUM *odeum;
693 if(!(odeum = odopen(name, OD_OWRITER | OD_OCREAT | OD_OTRUNC))){
694 pdperror(name);
695 return 1;
696 }
697 if(!odclose(odeum)){
698 pdperror(name);
699 return 1;
700 }
701 return 0;
702 }
703
704
705 /* perform put command */
doput(const char * name,const char * text,const char * uri,const char * title,const char * author,const char * date,int wmax,int keep)706 int doput(const char *name, const char *text, const char *uri, const char *title,
707 const char *author, const char *date, int wmax, int keep){
708 ODEUM *odeum;
709 ODDOC *doc;
710 CBLIST *awords;
711 const char *asis;
712 char *normal;
713 int i;
714 if(!(odeum = odopen(name, OD_OWRITER))){
715 pdperror(name);
716 return 1;
717 }
718 doc = oddocopen(uri);
719 if(title) oddocaddattr(doc, "title", title);
720 if(author) oddocaddattr(doc, "author", author);
721 if(date) oddocaddattr(doc, "date", date);
722 awords = odbreaktext(text);
723 for(i = 0; i < cblistnum(awords); i++){
724 asis = cblistval(awords, i, NULL);
725 normal = odnormalizeword(asis);
726 oddocaddword(doc, normal, asis);
727 free(normal);
728 }
729 cblistclose(awords);
730 if(!odput(odeum, doc, wmax, keep ? FALSE : TRUE)){
731 pdperror(name);
732 oddocclose(doc);
733 odclose(odeum);
734 return 1;
735 }
736 oddocclose(doc);
737 if(!odclose(odeum)){
738 pdperror(name);
739 return 1;
740 }
741 return 0;
742 }
743
744
745 /* perform out command */
doout(const char * name,const char * uri,int id)746 int doout(const char *name, const char *uri, int id){
747 ODEUM *odeum;
748 if(!(odeum = odopen(name, OD_OWRITER))){
749 pdperror(name);
750 return 1;
751 }
752 if(id > 0){
753 if(!odoutbyid(odeum, id)){
754 pdperror(name);
755 odclose(odeum);
756 return 1;
757 }
758 } else {
759 if(!odout(odeum, uri)){
760 pdperror(name);
761 odclose(odeum);
762 return 1;
763 }
764 }
765 if(!odclose(odeum)){
766 pdperror(name);
767 return 1;
768 }
769 return 0;
770 }
771
772
773 /* perform get command */
doget(const char * name,const char * uri,int id,int tb,int hb)774 int doget(const char *name, const char *uri, int id, int tb, int hb){
775 ODEUM *odeum;
776 ODDOC *doc;
777 if(!(odeum = odopen(name, OD_OREADER))){
778 pdperror(name);
779 return 1;
780 }
781 if(id > 0){
782 if(!(doc = odgetbyid(odeum, id))){
783 pdperror(name);
784 odclose(odeum);
785 return 1;
786 }
787 } else {
788 if(!(doc = odget(odeum, uri))){
789 pdperror(name);
790 odclose(odeum);
791 return 1;
792 }
793 }
794 printdoc(doc, tb, hb, -1, odeum, NULL);
795 oddocclose(doc);
796 if(!odclose(odeum)){
797 pdperror(name);
798 return 1;
799 }
800 return 0;
801 }
802
803
804 /* perform search command */
dosearch(const char * name,const char * text,int max,int or,int idf,int ql,int tb,int hb,int nb)805 int dosearch(const char *name, const char *text, int max, int or, int idf, int ql,
806 int tb, int hb, int nb){
807 ODEUM *odeum;
808 CBLIST *awords, *nwords, *uris, *hits;
809 ODPAIR *pairs, *last, *tmp;
810 ODDOC *doc;
811 const char *asis;
812 char *normal, numbuf[32];
813 int i, j, pnum, lnum, hnum, tnum, shows;
814 double ival;
815 if(!(odeum = odopen(name, OD_OREADER))){
816 pdperror(name);
817 return 1;
818 }
819 awords = odbreaktext(text);
820 nwords = cblistopen();
821 uris = cblistopen();
822 hits = cblistopen();
823 last = NULL;
824 lnum = 0;
825 if(ql){
826 last= odquery(odeum, text, &lnum, NULL);
827 } else {
828 for(i = 0; i < cblistnum(awords); i++){
829 asis = cblistval(awords, i, NULL);
830 normal = odnormalizeword(asis);
831 cblistpush(nwords, normal, -1);
832 if(strlen(normal) < 1){
833 free(normal);
834 continue;
835 }
836 if(!(pairs = odsearch(odeum, normal, or ? max : -1, &pnum))){
837 pdperror(name);
838 free(normal);
839 continue;
840 }
841 if((hnum = odsearchdnum(odeum, normal)) < 0) hnum = 0;
842 if(idf){
843 ival = odlogarithm(hnum);
844 ival = (ival * ival) / 4.0;
845 if(ival < 4.0) ival = 4.0;
846 for(j = 0; j < pnum; j++){
847 pairs[j].score = (int)(pairs[j].score / ival);
848 }
849 }
850 cblistpush(uris, normal, -1);
851 sprintf(numbuf, "%d", hnum);
852 cblistpush(hits, numbuf, -1);
853 if(last){
854 if(or){
855 tmp = odpairsor(last, lnum, pairs, pnum, &tnum);
856 } else {
857 tmp = odpairsand(last, lnum, pairs, pnum, &tnum);
858 }
859 free(last);
860 free(pairs);
861 last = tmp;
862 lnum = tnum;
863 } else {
864 last = pairs;
865 lnum = pnum;
866 }
867 free(normal);
868 }
869 }
870 if(hb){
871 printf("TOTAL: %d\n", lnum);
872 printf("EACHWORD: ");
873 } else {
874 printf("%d", lnum);
875 }
876 for(i = 0; i < cblistnum(uris); i++){
877 if(hb){
878 if(i > 0) printf(", ");
879 printf("%s(%s)", cblistval(uris, i, NULL), cblistval(hits, i, NULL));
880 } else {
881 printf("\t%s\t%s", cblistval(uris, i, NULL), cblistval(hits, i, NULL));
882 }
883 }
884 putchar('\n');
885 if(hb) putchar('\n');
886 if(last){
887 if(max < 0) max = lnum;
888 shows = 0;
889 for(i = 0; i < lnum && shows < max; i++){
890 if(nb){
891 printf("%d\t%d\n", last[i].id, last[i].score);
892 shows++;
893 } else {
894 if(!(doc = odgetbyid(odeum, last[i].id))) continue;
895 printdoc(doc, tb, hb, last[i].score, odeum, nwords);
896 oddocclose(doc);
897 shows++;
898 }
899 }
900 free(last);
901 }
902 cblistclose(uris);
903 cblistclose(hits);
904 cblistclose(nwords);
905 cblistclose(awords);
906 if(!odclose(odeum)){
907 pdperror(name);
908 return 1;
909 }
910 return 0;
911 }
912
913
914 /* perform list command */
dolist(const char * name,int tb,int hb)915 int dolist(const char *name, int tb, int hb){
916 ODEUM *odeum;
917 ODDOC *doc;
918 if(!(odeum = odopen(name, OD_OREADER))){
919 pdperror(name);
920 return 1;
921 }
922 if(!oditerinit(odeum)){
923 odclose(odeum);
924 pdperror(name);
925 return 1;
926 }
927 while(TRUE){
928 if(!(doc = oditernext(odeum))){
929 if(dpecode == DP_ENOITEM) break;
930 odclose(odeum);
931 pdperror(name);
932 return 1;
933 }
934 printdoc(doc, tb, hb, -1, odeum, NULL);
935 oddocclose(doc);
936 }
937 if(!odclose(odeum)){
938 pdperror(name);
939 return 1;
940 }
941 return 0;
942 }
943
944
945 /* perform optimize command */
dooptimize(const char * name)946 int dooptimize(const char *name){
947 ODEUM *odeum;
948 if(!(odeum = odopen(name, OD_OWRITER))){
949 pdperror(name);
950 return 1;
951 }
952 if(!odoptimize(odeum)){
953 pdperror(name);
954 odclose(odeum);
955 return 1;
956 }
957 if(!odclose(odeum)){
958 pdperror(name);
959 return 1;
960 }
961 return 0;
962 }
963
964
965 /* perform inform command */
doinform(const char * name)966 int doinform(const char *name){
967 ODEUM *odeum;
968 char *tmp;
969 if(!(odeum = odopen(name, OD_OREADER))){
970 pdperror(name);
971 return 1;
972 }
973 tmp = odname(odeum);
974 printf("name: %s\n", tmp ? tmp : "(null)");
975 free(tmp);
976 printf("file size: %.0f\n", odfsiz(odeum));
977 printf("index buckets: %d\n", odbnum(odeum));
978 printf("used buckets: %d\n", odbusenum(odeum));
979 printf("all documents: %d\n", oddnum(odeum));
980 printf("all words: %d\n", odwnum(odeum));
981 printf("inode number: %d\n", odinode(odeum));
982 printf("modified time: %.0f\n", (double)odmtime(odeum));
983 if(!odclose(odeum)){
984 pdperror(name);
985 return 1;
986 }
987 return 0;
988 }
989
990
991 /* perform merge command */
domerge(const char * name,const CBLIST * elems)992 int domerge(const char *name, const CBLIST *elems){
993 if(!odmerge(name, elems)){
994 pdperror(name);
995 return 1;
996 }
997 return 0;
998 }
999
1000
1001 /* perform remove command */
doremove(const char * name)1002 int doremove(const char *name){
1003 if(!odremove(name)){
1004 pdperror(name);
1005 return 1;
1006 }
1007 return 0;
1008 }
1009
1010
1011 /* perform break command */
dobreak(const char * text,int hb,int kb,int sb)1012 int dobreak(const char *text, int hb, int kb, int sb){
1013 CBLIST *awords, *kwords;
1014 CBMAP *scores;
1015 ODDOC *doc;
1016 const char *asis;
1017 char *normal, *summary;
1018 int i, first;
1019 awords = odbreaktext(text);
1020 if(kb || sb){
1021 doc = oddocopen("");
1022 for(i = 0; i < cblistnum(awords); i++){
1023 asis = cblistval(awords, i, NULL);
1024 normal = odnormalizeword(asis);
1025 oddocaddword(doc, normal, asis);
1026 free(normal);
1027 }
1028 scores = oddocscores(doc, MAXKEYWORDS, NULL);
1029 cbmapiterinit(scores);
1030 kwords = cbmapkeys(scores);
1031 if(kb){
1032 for(i = 0; i < cblistnum(kwords); i++){
1033 if(i > 0) putchar('\t');
1034 printf("%s", cblistval(kwords, i, NULL));
1035 }
1036 putchar('\n');
1037 } else {
1038 summary = docsummary(doc, kwords, MAXSUMMARY, FALSE);
1039 printf("%s\n", summary);
1040 free(summary);
1041 }
1042 cblistclose(kwords);
1043 cbmapclose(scores);
1044 oddocclose(doc);
1045 } else if(hb){
1046 printf("NWORDS: ");
1047 first = TRUE;
1048 for(i = 0; i < cblistnum(awords); i++){
1049 asis = cblistval(awords, i, NULL);
1050 normal = odnormalizeword(asis);
1051 if(normal[0] == '\0'){
1052 free(normal);
1053 continue;
1054 }
1055 if(!first) putchar(' ');
1056 first = FALSE;
1057 printf("%s", normal);
1058 free(normal);
1059 }
1060 putchar('\n');
1061 printf("AWORDS: ");
1062 first = TRUE;
1063 for(i = 0; i < cblistnum(awords); i++){
1064 asis = cblistval(awords, i, NULL);
1065 if(asis[0] == '\0') continue;
1066 if(!first) putchar(' ');
1067 first = FALSE;
1068 printf("%s", asis);
1069 }
1070 putchar('\n');
1071 } else {
1072 for(i = 0; i < cblistnum(awords); i++){
1073 asis = cblistval(awords, i, NULL);
1074 normal = odnormalizeword(asis);
1075 printf("%s\t%s\n", normal, asis);
1076 free(normal);
1077 }
1078 }
1079 cblistclose(awords);
1080 return 0;
1081 }
1082
1083
1084
1085 /* END OF FILE */
1086