1#! /usr/bin/perl -w
2#--
3# Perl binding of Hyper Estraier
4#                                                        Copyright (C) 2004-2007 Mikio Hirabayashi
5#  This file is part of Hyper Estraier.
6#  Hyper Estraier is free software; you can redistribute it and/or modify it under the terms of
7#  the GNU Lesser General Public License as published by the Free Software Foundation; either
8#  version 2.1 of the License or any later version.  Hyper Estraier is distributed in the hope
9#  that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
10#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
11#  License for more details.
12#  You should have received a copy of the GNU Lesser General Public License along with Hyper
13#  Estraier; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
14#  Boston, MA 02111-1307 USA.
15
16
17use lib qw(./src/blib/lib ./src/blib/arch);
18use strict;
19use warnings;
20use ExtUtils::testlib;
21use Time::HiRes qw(gettimeofday);
22use Estraier;
23$Estraier::DEBUG = 1;
24
25
26# global constants
27use constant {
28    TRUE => 1,
29    FALSE => 0,
30    PROTVER => "1.0",
31    SEARCHMAX => 10,
32    SEARCHAUX => 32,
33    SNIPWWIDTH => 480,
34    SNIPHWIDTH => 96,
35    SNIPAWIDTH => 96,
36    VM_ID => 0,
37    VM_URI => 1,
38    VM_ATTR => 2,
39    VM_FULL => 3,
40    VM_SNIP => 4,
41    READMAX => 1024 * 1024 * 256,
42};
43
44
45# main routine
46sub main {
47    (scalar(@ARGV) >= 1) || usage();
48    my $rv;
49    if($ARGV[0] eq "put"){
50        $rv = runput();
51    } elsif($ARGV[0] eq "out"){
52        $rv = runout();
53    } elsif($ARGV[0] eq "edit"){
54        $rv = runedit();
55    } elsif($ARGV[0] eq "get"){
56        $rv = runget();
57    } elsif($ARGV[0] eq "uriid"){
58        $rv = runuriid();
59    } elsif($ARGV[0] eq "inform"){
60        $rv = runinform();
61    } elsif($ARGV[0] eq "optimize"){
62        $rv = runoptimize();
63    } elsif($ARGV[0] eq "merge"){
64        $rv = runmerge();
65    } elsif($ARGV[0] eq "search"){
66        $rv = runsearch();
67    } else {
68        usage();
69    }
70    return $rv;
71}
72
73
74# print the usage and exit
75sub usage {
76    printf(STDERR "%s: command line utility for the core API of Hyper Estraier\n", $0);
77    printf(STDERR "\n");
78    printf(STDERR "usage:\n");
79    printf(STDERR "  %s put [-cl] [-ws] db [file]\n", $0);
80    printf(STDERR "  %s out [-cl] db expr\n", $0);
81    printf(STDERR "  %s edit db expr name [value]\n", $0);
82    printf(STDERR "  %s get db expr [attr]\n", $0);
83    printf(STDERR "  %s uriid db uri\n", $0);
84    printf(STDERR "  %s inform db\n", $0);
85    printf(STDERR "  %s optimize [-onp] [-ond] db\n", $0);
86    printf(STDERR "  %s merge [-cl] db target\n", $0);
87    printf(STDERR "  %s search [-vu|-va|-vf|-vs] [-gs|-gf|-ga] [-cd] [-ni] [-sf|-sfr|-sfu|-sfi]" .
88           " [-attr expr] [-ord expr] [-max num] [-sk num] [-aux num] [-dis name]" .
89           " db [phrase]\n", $0);
90    printf(STDERR "\n");
91    exit(1);
92}
93
94
95# print error string and flush the buffer */
96sub printerror {
97    my $msg = shift;
98    printf(STDERR "%s: ERROR: %s\n", $0, $msg);
99}
100
101
102# parse arguments of the put command
103sub runput {
104    my $dbname = undef;
105    my $file = undef;
106    my $opts = 0;
107    my $i;
108    for($i = 1; $i < scalar(@ARGV); $i++){
109        if(!defined($dbname) && $ARGV[$i] =~ /^-/){
110            if($ARGV[$i] eq "-cl"){
111                $opts |= Database::PDCLEAN;
112            } elsif($ARGV[$i] eq "-ws"){
113                $opts |= Database::PDWEIGHT;
114            } else {
115                usage();
116            }
117        } elsif(!defined($dbname)){
118            $dbname = $ARGV[$i];
119        } elsif(!defined($file)){
120            $file = $ARGV[$i];
121        } else {
122            usage();
123        }
124    }
125    usage() if(!defined($dbname));
126    procput($dbname, $file, $opts);
127}
128
129
130# parse arguments of the out command
131sub runout {
132    my $dbname = undef;
133    my $expr = undef;
134    my $opts = 0;
135    my $i;
136    for($i = 1; $i < scalar(@ARGV); $i++){
137        if(!defined($dbname) && $ARGV[$i] =~ /^-/){
138            if($ARGV[$i] eq "-cl"){
139                $opts |= Database::ODCLEAN;
140            } else {
141                usage();
142            }
143        } elsif(!defined($dbname)){
144            $dbname = $ARGV[$i];
145        } elsif(!defined($expr)){
146            $expr = $ARGV[$i];
147        } else {
148            usage();
149        }
150    }
151    usage() if(!defined($dbname) || !defined($expr));
152    procout($dbname, $expr, $opts);
153}
154
155
156# parse arguments of the edit command
157sub runedit {
158    my $dbname = undef;
159    my $expr = undef;
160    my $name = undef;
161    my $value = undef;
162    my $i;
163    for($i = 1; $i < scalar(@ARGV); $i++){
164        if(!defined($dbname) && $ARGV[$i] =~ /^-/){
165            usage();
166        } elsif(!defined($dbname)){
167            $dbname = $ARGV[$i];
168        } elsif(!defined($expr)){
169            $expr = $ARGV[$i];
170        } elsif(!defined($name)){
171            $name = $ARGV[$i];
172        } elsif(!defined($value)){
173            $value = $ARGV[$i];
174        } else {
175            usage();
176        }
177    }
178    usage() if(!defined($dbname) || !defined($expr) || !defined($name));
179    procedit($dbname, $expr, $name, $value);
180}
181
182
183# parse arguments of the get command
184sub runget {
185    my $dbname = undef;
186    my $expr = undef;
187    my $attr = undef;
188    my $i;
189    for($i = 1; $i < scalar(@ARGV); $i++){
190        if(!defined($dbname) && $ARGV[$i] =~ /^-/){
191            usage();
192        } elsif(!defined($dbname)){
193            $dbname = $ARGV[$i];
194        } elsif(!defined($expr)){
195            $expr = $ARGV[$i];
196        } elsif(!defined($attr)){
197            $attr = $ARGV[$i];
198        } else {
199            usage();
200        }
201    }
202    usage() if(!defined($dbname) || !defined($expr));
203    procget($dbname, $expr, $attr);
204}
205
206
207# parse arguments of the uriid command
208sub runuriid {
209    my $dbname = undef;
210    my $uri = undef;
211    my $i;
212    for($i = 1; $i < scalar(@ARGV); $i++){
213        if(!defined($dbname) && $ARGV[$i] =~ /^-/){
214            usage();
215        } elsif(!defined($dbname)){
216            $dbname = $ARGV[$i];
217        } elsif(!defined($uri)){
218            $uri = $ARGV[$i];
219        } else {
220            usage();
221        }
222    }
223    usage() if(!defined($dbname) || !defined($uri));
224    procuriid($dbname, $uri);
225}
226
227
228# parse arguments of the inform command
229sub runinform {
230    my $dbname = undef;
231    my $i;
232    for($i = 1; $i < scalar(@ARGV); $i++){
233        if(!defined($dbname) && $ARGV[$i] =~ /^-/){
234            usage();
235        } elsif(!defined($dbname)){
236            $dbname = $ARGV[$i];
237        } else {
238            usage();
239        }
240    }
241    usage() if(!defined($dbname));
242    procinform($dbname);
243}
244
245
246# parse arguments of the optimize command
247sub runoptimize {
248    my $dbname = undef;
249    my $opts = 0;
250    my $i;
251    for($i = 1; $i < scalar(@ARGV); $i++){
252        if(!defined($dbname) && $ARGV[$i] =~ /^-/){
253            if($ARGV[$i] eq "-onp"){
254                $opts |= Database::OPTNOPURGE;
255            } elsif($ARGV[$i] eq "-ond"){
256                $opts |= Database::OPTNODBOPT;
257            } else {
258                usage();
259            }
260        } elsif(!defined($dbname)){
261            $dbname = $ARGV[$i];
262        } else {
263            usage();
264        }
265    }
266    usage() if(!defined($dbname));
267    procoptimize($dbname, $opts);
268
269}
270
271
272# parse arguments of the merge command
273sub runmerge {
274    my $dbname = undef;
275    my $tgname = undef;
276    my $opts = 0;
277    my $i;
278    for($i = 1; $i < scalar(@ARGV); $i++){
279        if(!defined($dbname) && $ARGV[$i] =~ /^-/){
280            if($ARGV[$i] eq "-cl"){
281                $opts |= Database::MGCLEAN;
282            } else {
283                usage();
284            }
285        } elsif(!defined($dbname)){
286            $dbname = $ARGV[$i];
287        } elsif(!defined($tgname)){
288            $tgname = $ARGV[$i];
289        } else {
290            usage();
291        }
292    }
293    usage() if(!defined($dbname) || !defined($tgname));
294    procmerge($dbname, $tgname, $opts);
295}
296
297
298# parse arguments of the search command
299sub runsearch {
300    my $dbname = undef;
301    my $phrase = undef;
302    my @attrs = ();
303    my $ord = undef;
304    my $max = SEARCHMAX;
305    my $skip = 0;
306    my $opts = 0;
307    my $aux = SEARCHAUX;
308    my $dis = undef;
309    my $cd = FALSE;
310    my $view = VM_ID;
311    my $i;
312    for($i = 1; $i < scalar(@ARGV); $i++){
313        if(!defined($dbname) && $ARGV[$i] =~ /^-/){
314            if($ARGV[$i] eq "-vu"){
315                $view = VM_URI;
316            } elsif($ARGV[$i] eq "-va"){
317                $view = VM_ATTR;
318            } elsif($ARGV[$i] eq "-vf"){
319                $view = VM_FULL;
320            } elsif($ARGV[$i] eq "-vs"){
321                $view = VM_SNIP;
322            } elsif($ARGV[$i] eq "-gs"){
323                $opts |= Condition::SURE;
324            } elsif($ARGV[$i] eq "-gf"){
325                $opts |= Condition::FAST;
326            } elsif($ARGV[$i] eq "-ga"){
327                $opts |= Condition::AGITO;
328            } elsif($ARGV[$i] eq "-cd"){
329                $cd = TRUE;
330            } elsif($ARGV[$i] eq "-ni"){
331                $opts |= Condition::NOIDF;
332            } elsif($ARGV[$i] eq "-sf"){
333                $opts |= Condition::SIMPLE;
334            } elsif($ARGV[$i] eq "-sfr"){
335                $opts |= Condition::ROUGH;
336            } elsif($ARGV[$i] eq "-sfu"){
337                $opts |= Condition::UNION;
338            } elsif($ARGV[$i] eq "-sfi"){
339                $opts |= Condition::ISECT;
340            } elsif($ARGV[$i] eq "-attr"){
341                usage() if(++$i >= scalar(@ARGV));
342                push(@attrs, $ARGV[$i]);
343            } elsif($ARGV[$i] eq "-ord"){
344                usage() if(++$i >= scalar(@ARGV));
345                $ord = $ARGV[$i];
346            } elsif($ARGV[$i] eq "-max"){
347                usage() if(++$i >= scalar(@ARGV));
348                $max = Estraier::atoi($ARGV[$i]);
349            } elsif($ARGV[$i] eq "-sk"){
350                usage() if(++$i >= scalar(@ARGV));
351                $skip = Estraier::atoi($ARGV[$i]);
352            } elsif($ARGV[$i] eq "-aux"){
353                usage() if(++$i >= scalar(@ARGV));
354                $aux = Estraier::atoi($ARGV[$i]);
355            } elsif($ARGV[$i] eq "-dis"){
356                usage() if(++$i >= scalar(@ARGV));
357                $dis = $ARGV[$i];
358            } else {
359                usage();
360            }
361        } elsif(!defined($dbname)){
362            $dbname = $ARGV[$i];
363        } elsif(!defined($phrase)){
364            $phrase = $ARGV[$i];
365        } else {
366            $phrase = $phrase . " " . $ARGV[$i];
367        }
368    }
369    usage() if(!defined($dbname));
370    procsearch($dbname, $phrase, \@attrs, $ord, $max, $skip, $opts, $aux, $dis, $cd, $view);
371}
372
373
374# perform the put command
375sub procput {
376    my $dbname = shift;
377    my $file = shift;
378    my $opts = shift;
379    my $draft = "";
380    if(defined($file)){
381        unless(open(IN, "<$file")){
382            printerror($dbname . ": could not open");
383            return 1;
384        }
385        binmode(IN);
386        sysread(IN, $draft, READMAX);
387        close(IN);
388    } else {
389        binmode(STDIN);
390        sysread(STDIN, $draft, READMAX);
391    }
392    my $doc = new Document($draft);
393    my $db = new Database();
394    unless($db->open($dbname, Database::DBWRITER | Database::DBCREAT)){
395        printerror($dbname . ": " . $db->err_msg($db->error()));
396        return 1;
397    }
398    $db->set_informer("main::informer");
399    unless($db->put_doc($doc, $opts)){
400        printerror($dbname . ": " . $db->err_msg($db->error()));
401        $db->close();
402        return 1;
403    }
404    unless($db->close()){
405        printerror($dbname . ": " . $db->err_msg($db->error()));
406        return 1;
407    }
408    return 0;
409}
410
411
412# perform the out command
413sub procout {
414    my $dbname = shift;
415    my $expr = shift;
416    my $opts = shift;
417    my $db = new Database();
418    unless($db->open($dbname, Database::DBWRITER)){
419        printerror($dbname . ": " . $db->err_msg($db->error()));
420        return 1;
421    }
422    $db->set_informer("main::informer");
423    my $id = Estraier::atoi($expr);
424    if($id < 1 && ($id = $db->uri_to_id($expr)) < 1){
425        printerror($dbname . ": " . $db->err_msg($db->error()));
426        $db->close();
427        return 1;
428    }
429    unless($db->out_doc($id, $opts)){
430        printerror($dbname . ": " . $db->err_msg($db->error()));
431        $db->close();
432        return 1;
433    }
434    unless($db->close()){
435        printerror($dbname . ": " . $db->err_msg($db->error()));
436        return 1;
437    }
438    return 0;
439}
440
441
442# perform the edit command
443sub procedit {
444    my $dbname = shift;
445    my $expr = shift;
446    my $name = shift;
447    my $value = shift;
448    my $db = new Database();
449    unless($db->open($dbname, Database::DBWRITER)){
450        printerror($dbname . ": " . $db->err_msg($db->error()));
451        return 1;
452    }
453    $db->set_informer("main::informer");
454    my $id = Estraier::atoi($expr);
455    if($id < 1 && ($id = $db->uri_to_id($expr)) < 1){
456        printerror($dbname . ": " . $db->err_msg($db->error()));
457        $db->close();
458        return 1;
459    }
460    my $doc = $db->get_doc($id, Database::GDNOTEXT);
461    unless(defined($doc)){
462        printerror($dbname . ": " . $db->err_msg($db->error()));
463        $db->close();
464        return 1;
465    }
466    $doc->add_attr($name, $value);
467    unless($db->edit_doc($doc)){
468        printerror($dbname . ": " . $db->err_msg($db->error()));
469        $db->close();
470        return 1;
471    }
472    unless($db->close()){
473        printerror($dbname . ": " . $db->err_msg($db->error()));
474        return 1;
475    }
476    return 0;
477}
478
479
480# perform the get command
481sub procget {
482    my $dbname = shift;
483    my $expr = shift;
484    my $attr = shift;
485    my $db = new Database();
486    unless($db->open($dbname, Database::DBREADER)){
487        printerror($dbname . ": " . $db->err_msg($db->error()));
488        return 1;
489    }
490    my $id = Estraier::atoi($expr);
491    if($id < 1 && ($id = $db->uri_to_id($expr)) < 1){
492        printerror($dbname . ": " . $db->err_msg($db->error()));
493        $db->close();
494        return 1;
495    }
496    if(defined($attr)){
497        my $value = $db->get_doc_attr($id, $attr);
498        unless(defined($value)){
499            printerror($dbname . ": " . $db->err_msg($db->error()));
500            $db->close();
501            return 1;
502        }
503        printf("%s\n", $value);
504    } else {
505        my $doc = $db->get_doc($id, 0);
506        unless(defined($doc)){
507            printerror($dbname . ": " . $db->err_msg($db->error()));
508            $db->close();
509            return 1;
510        }
511        printf("%s", $doc->dump_draft());
512
513    }
514    unless($db->close()){
515        printerror($dbname . ": " . $db->err_msg($db->error()));
516        return 1;
517    }
518    return 0;
519}
520
521
522# perform the uriid command
523sub procuriid {
524    my $dbname = shift;
525    my $uri = shift;
526    my $db = new Database();
527    unless($db->open($dbname, Database::DBREADER)){
528        printerror($dbname . ": " . $db->err_msg($db->error()));
529        return 1;
530    }
531    my $id = $db->uri_to_id($uri);
532    unless($id > 0){
533        printerror($dbname . ": " . $db->err_msg($db->error()));
534        $db->close();
535        return 1;
536    }
537    printf("%d\n", $id);
538    unless($db->close()){
539        printerror($dbname . ": " . $db->err_msg($db->error()));
540        return 1;
541    }
542    return 0;
543}
544
545
546# perform the inform command
547sub procinform {
548    my $dbname = shift;
549    my $db = new Database();
550    unless($db->open($dbname, Database::DBREADER)){
551        printerror($dbname . ": " . $db->err_msg($db->error()));
552        return 1;
553    }
554    printf("number of documents: %d\n", $db->doc_num());
555    printf("number of words: %d\n", $db->word_num());
556    printf("file size: %d\n", $db->size());
557    unless($db->close()){
558        printerror($dbname . ": " . $db->err_msg($db->error()));
559        return 1;
560    }
561    return 0;
562}
563
564
565# perform the optimize command
566sub procoptimize {
567    my $dbname = shift;
568    my $opts = shift;
569    my $db = new Database();
570    unless($db->open($dbname, Database::DBWRITER)){
571        printerror($dbname . ": " . $db->err_msg($db->error()));
572        return 1;
573    }
574    $db->set_informer("main::informer");
575    unless($db->optimize($opts)){
576        printerror($dbname . ": " . $db->err_msg($db->error()));
577        $db->close();
578        return 1;
579    }
580    unless($db->close()){
581        printerror($dbname . ": " . $db->err_msg($db->error()));
582        return 1;
583    }
584    return 0;
585}
586
587
588# perform the merge command
589sub procmerge {
590    my $dbname = shift;
591    my $tgname = shift;
592    my $opts = shift;
593    my $db = new Database();
594    unless($db->open($dbname, Database::DBWRITER)){
595        printerror($dbname . ": " . $db->err_msg($db->error()));
596        return 1;
597    }
598    $db->set_informer("main::informer");
599    unless($db->merge($tgname, $opts)){
600        printerror($dbname . ": " . $db->err_msg($db->error()));
601        $db->close();
602        return 1;
603    }
604    unless($db->close()){
605        printerror($dbname . ": " . $db->err_msg($db->error()));
606        return 1;
607    }
608    return 0;
609}
610
611
612# perform the search command
613sub procsearch {
614    my $dbname = shift;
615    my $phrase = shift;
616    my $attrs = shift;
617    my $ord = shift;
618    my $max = shift;
619    my $skip = shift;
620    my $opts = shift;
621    my $aux = shift;
622    my $dis = shift;
623    my $cd = shift;
624    my $view = shift;
625    my $db = new Database();
626    unless($db->open($dbname, Database::DBREADER)){
627        printerror($dbname . ": " . $db->err_msg($db->error()));
628        return 1;
629    }
630    my $cond = new Condition();
631    $cond->set_phrase($phrase) if(defined($phrase));
632    foreach my $expr (@$attrs) {
633        $cond->add_attr($expr);
634    }
635    $cond->set_order($ord) if(defined($ord));
636    $cond->set_max($max) if($max >= 0);
637    $cond->set_skip($skip) if($skip >= 0);
638    $cond->set_options($opts);
639    $cond->set_auxiliary($aux);
640    $cond->set_distinct($dis) if(defined($dis));
641    my ($sec, $usec) = gettimeofday();
642    my $stime = $sec + $usec / 1000000.0;
643    my $res = $db->search($cond);
644    ($sec, $usec) = gettimeofday();
645    my $etime = $sec + $usec / 1000000.0;
646    my $border = sprintf("--------[%.0f]--------", $stime * 100);
647    printf("%s\n", $border);
648    printf("VERSION\t%s\n", PROTVER);
649    printf("NODE\tlocal\n");
650    printf("HIT\t%d\n", $res->hint(""));
651    my @snwords = ();
652    my $words = $res->hint_words();
653    my $i;
654    foreach my $word (@$words){
655        $i++;
656        my $hits = $res->hint($word);
657        push(@snwords, $word) if $hits > 0;
658        printf("HINT#%d\t%s\t%d\n", $i, $word, $hits)
659    }
660    printf("TIME\t%0.3f\n", $etime - $stime);
661    printf("DOCNUM\t%d\n", $db->doc_num());
662    printf("WORDNUM\t%d\n", $db->word_num());
663    if($view == VM_URI){
664        printf("VIEW\tURI\n");
665    } elsif($view == VM_ATTR){
666        printf("VIEW\tATTRIBUTE\n");
667    } elsif($view == VM_FULL){
668        printf("VIEW\tFULL\n");
669    } elsif($view == VM_SNIP){
670        printf("VIEW\tSNIPPET\n");
671    } else {
672        printf("VIEW\tID\n");
673    }
674    printf("\n");
675    printf("%s\n", $border) if($view != VM_ATTR && $view != VM_FULL && $view != VM_SNIP);
676    my $dnum = $res->doc_num();
677    foreach my $i (0..$dnum-1){
678        my $id = $res->get_doc_id($i);
679        if($view == VM_URI){
680            my $doc = $db->get_doc($id, $cd ? 0 : Database::GDNOTEXT);
681            next unless defined($doc);
682            next if $cd && !$db->scan_doc($doc, $cond);
683            printf("%d\t%s\n", $id, $doc->attr('@uri'));
684        } elsif($view == VM_ATTR){
685            my $doc = $db->get_doc($id, $cd ? 0 : Database::GDNOTEXT);
686            next unless defined($doc);
687            printf("%s\n", $border);
688            my $names = $doc->attr_names();
689            foreach my $name (@$names){
690                printf("%s=%s\n", $name, $doc->attr($name));
691            }
692            printf("\n");
693        } elsif($view == VM_FULL){
694            my $doc = $db->get_doc($id, 0);
695            next unless defined($doc);
696            printf("%s\n", $border);
697            printf("%s", $doc->dump_draft());
698        } elsif($view == VM_SNIP){
699            my $doc = $db->get_doc($id, 0);
700            next unless defined($doc);
701            printf("%s\n", $border);
702            my $names = $doc->attr_names();
703            foreach my $name (@$names){
704                printf("%s=%s\n", $name, $doc->attr($name));
705            }
706            printf("\n");
707            printf("%s", $doc->make_snippet(\@snwords, SNIPWWIDTH, SNIPHWIDTH, SNIPAWIDTH));
708        } else {
709            printf("%d\n", $id);
710        }
711    }
712    printf("%s:END\n", $border);
713    unless($db->close()){
714        printerror($dbname . ": " . $db->err_msg($db->error()));
715        return 1;
716    }
717    return 0;
718}
719
720
721# callback function for database events
722sub informer {
723    printf("%s: INFO: %s\n", $0, shift);
724}
725
726
727# perform the main routine
728$0 =~ s/.*\///;
729exit(main());
730
731
732
733# END OF FILE
734