1 /*************************************************************************************************
2  * The core API of Hyper Estraier
3  *                                                      Copyright (C) 2004-2007 Mikio Hirabayashi
4  * This file is part of Hyper Estraier.
5  * Hyper Estraier is free software; you can redistribute it and/or modify it under the terms of
6  * the GNU Lesser General Public License as published by the Free Software Foundation; either
7  * version 2.1 of the License or any later version.  Hyper Estraier is distributed in the hope
8  * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
10  * License for more details.
11  * You should have received a copy of the GNU Lesser General Public License along with Hyper
12  * Estraier; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
13  * Boston, MA 02111-1307 USA.
14  *************************************************************************************************/
15 
16 
17 #ifndef _ESTRAIER_H                      /* duplication check */
18 #define _ESTRAIER_H
19 
20 #if defined(__cplusplus)                 /* export for C++ */
21 extern "C" {
22 #endif
23 
24 
25 
26 /*************************************************************************************************
27  * common settings
28  *************************************************************************************************/
29 
30 
31 /* version of Hyper Estraier */
32 extern const char *est_version;
33 
34 
35 
36 /*************************************************************************************************
37  * underlying headers
38  *************************************************************************************************/
39 
40 
41 #include <depot.h>
42 #include <curia.h>
43 #include <cabin.h>
44 #include <villa.h>
45 #include <stdlib.h>
46 
47 
48 
49 /*************************************************************************************************
50  * API for document
51  *************************************************************************************************/
52 
53 
54 #define ESTDATTRID     "@id"             /* name of the attribute of the ID number */
55 #define ESTDATTRURI    "@uri"            /* name of the attribute of the URI */
56 #define ESTDATTRDIGEST "@digest"         /* name of the attribute of message digest */
57 #define ESTDATTRCDATE  "@cdate"          /* name of the attribute of creation date */
58 #define ESTDATTRMDATE  "@mdate"          /* name of the attribute of modification date */
59 #define ESTDATTRADATE  "@adate"          /* name of the attribute of access date */
60 #define ESTDATTRTITLE  "@title"          /* name of the attribute of title */
61 #define ESTDATTRAUTHOR "@author"         /* name of the attribute of author */
62 #define ESTDATTRTYPE   "@type"           /* name of the attribute of content type */
63 #define ESTDATTRLANG   "@lang"           /* name of the attribute of language */
64 #define ESTDATTRGENRE  "@genre"          /* name of the attribute of genre */
65 #define ESTDATTRSIZE   "@size"           /* name of the attribute of entity size */
66 #define ESTDATTRWEIGHT "@weight"         /* name of the attribute of scoring weight */
67 #define ESTDATTRMISC   "@misc"           /* name of the attribute of miscellaneous information */
68 #define ESTDCNTLVECTOR "%VECTOR"         /* name of the control code for keyword vector */
69 #define ESTDCNTLSCORE  "%SCORE"          /* name of the control code for substitute score */
70 #define ESTDCNTLSHADOW "%SHADOW"         /* name of the control code for shadow document */
71 
72 typedef struct {                         /* type of structure for a document */
73   int id;                                /* identification number */
74   CBMAP *attrs;                          /* map of attributes */
75   CBLIST *dtexts;                        /* list of shown text */
76   CBMAP *kwords;                         /* map of keywords */
77 } ESTDOC;
78 
79 
80 /* Create a document object.
81    The return value is an object of a document. */
82 ESTDOC *est_doc_new(void);
83 
84 
85 /* Create a document object made from draft data.
86    `draft' specifies a string of draft data.
87    The return value is an object of a document. */
88 ESTDOC *est_doc_new_from_draft(const char *draft);
89 
90 
91 /* Destroy a document object.
92    `doc' specifies a document object. */
93 void est_doc_delete(ESTDOC *doc);
94 
95 
96 /* Add an attribute to a document object.
97    `doc' specifies a document object.
98    `name' specifies the name of an attribute.
99    `value' specifies the value of the attribute.  If it is `NULL', the attribute is removed. */
100 void est_doc_add_attr(ESTDOC *doc, const char *name, const char *value);
101 
102 
103 /* Add a sentence of text to a document object.
104    `doc' specifies a document object.
105    `text' specifies a sentence of text. */
106 void est_doc_add_text(ESTDOC *doc, const char *text);
107 
108 
109 /* Add a hidden sentence to a document object.
110    `doc' specifies a document object.
111    `text' specifies a hidden sentence. */
112 void est_doc_add_hidden_text(ESTDOC *doc, const char *text);
113 
114 
115 /* Attach keywords to a document object.
116    `doc' specifies a document object.
117    `kwords' specifies a map object of keywords.  Keys of the map should be keywords of the
118    document and values should be their scores in decimal string.  The map object is copied
119    internally. */
120 void est_doc_set_keywords(ESTDOC *doc, CBMAP *kwords);
121 
122 
123 /* Set the substitute score of a document object.
124    `doc' specifies a document object.
125    `score' specifies the substitute score.  It it is negative, the substitute score setting is
126    nullified. */
127 void est_doc_set_score(ESTDOC *doc, int score);
128 
129 
130 /* Get the ID number of a document object.
131    `doc' specifies a document object.
132    The return value is the ID number of the document object.  If the object has not been
133    registered, -1 is returned. */
134 int est_doc_id(ESTDOC *doc);
135 
136 
137 /* Get a list of attribute names of a document object.
138    `doc' specifies a document object.
139    The return value is a new list object of attribute names of the document object.  Because
140    the object of the return value is opened with the function `cblistopen', it should be closed
141    with the function `cblistclose' if it is no longer in use. */
142 CBLIST *est_doc_attr_names(ESTDOC *doc);
143 
144 
145 /* Get the value of an attribute of a document object.
146    `doc' specifies a document object.
147    `name' specifies the name of an attribute.
148    The return value is the value of the attribute or `NULL' if it does not exist.  The life
149    duration of the returned string is synchronous with the one of the document object. */
150 const char *est_doc_attr(ESTDOC *doc, const char *name);
151 
152 
153 /* Get a list of sentences of the text of a document object.
154    `doc' specifies a document object.
155    The return value is a list object of sentences of the text of the document object.  The life
156    duration of the returned object is synchronous with the one of the document object. */
157 const CBLIST *est_doc_texts(ESTDOC *doc);
158 
159 
160 /* Concatenate sentences of the text of a document object.
161    `doc' specifies a document object.
162    The return value is concatenated sentences of the document object.  Because the region of the
163    return value is allocated with the `malloc' call, it should be released with the `free' call
164    if it is no longer in use. */
165 char *est_doc_cat_texts(ESTDOC *doc);
166 
167 
168 /* Get attached keywords of a document object.
169    `doc' specifies a document object.
170    The return value is a map object of keywords and their scores in decimal string.  If no
171    keyword is attached, `NULL' is returned.  The life duration of the returned object is
172    synchronous with the one of the document object. */
173 CBMAP *est_doc_keywords(ESTDOC *doc);
174 
175 
176 /* Get the substitute score of a document object.
177    `doc' specifies a document object.
178    The return value is the substitute score or -1 if it is not set. */
179 int est_doc_score(ESTDOC *doc);
180 
181 
182 /* Dump draft data of a document object.
183    `doc' specifies a document object.
184    The return value is draft data of the document object.  Because the region of the return value
185    is allocated with the `malloc' call, it should be released with the `free' call if it is no
186    longer in use. */
187 char *est_doc_dump_draft(ESTDOC *doc);
188 
189 
190 /* Make a snippet of the body text of a document object.
191    `doc' specifies a document object.
192    `word' specifies a list object of words to be highlight.
193    `wwidth' specifies whole width of the result.
194    `hwidth' specifies width of strings picked up from the beginning of the text.
195    `awidth' specifies width of strings picked up around each highlighted word.
196    The return value is a snippet string of the body text of the document object.  There are tab
197    separated values.  Each line is a string to be shown.  Though most lines have only one field,
198    some lines have two fields.  If the second field exists, the first field is to be shown with
199    highlighted, and the second field means its normalized form.  Because the region of the
200    return value is allocated with the `malloc' call, it should be released with the `free' call
201    if it is no longer in use. */
202 char *est_doc_make_snippet(ESTDOC *doc, const CBLIST *words, int wwidth, int hwidth, int awidth);
203 
204 
205 
206 /*************************************************************************************************
207  * API for search conditions
208  *************************************************************************************************/
209 
210 
211 #define ESTOPUVSET     "[UVSET]"         /* universal set */
212 #define ESTOPID        "[ID]"            /* ID matching search */
213 #define ESTOPURI       "[URI]"           /* URI matching search */
214 #define ESTOPSIMILAR   "[SIMILAR]"       /* similarity search */
215 #define ESTOPRANK      "[RANK]"          /* ranking search */
216 
217 #define ESTOPUNION     "OR"              /* union (conjunction) */
218 #define ESTOPISECT     "AND"             /* intersection (disjunction) */
219 #define ESTOPDIFF      "ANDNOT"          /* difference (intersection with negation) */
220 #define ESTOPWCBW      "[BW]"            /* wild card for words beginning with a string */
221 #define ESTOPWCEW      "[EW]"            /* wild card for words ending with a string */
222 #define ESTOPWCRX      "[RX]"            /* wild card for words matching regular expressions */
223 #define ESTOPWITH      "WITH"            /* delimiter for elements */
224 
225 #define ESTOPSTREQ     "STREQ"           /* string is equal */
226 #define ESTOPSTRNE     "STRNE"           /* string is not equal */
227 #define ESTOPSTRINC    "STRINC"          /* string is included in */
228 #define ESTOPSTRBW     "STRBW"           /* string begins with */
229 #define ESTOPSTREW     "STREW"           /* string ends with */
230 #define ESTOPSTRAND    "STRAND"          /* string includes all tokens in */
231 #define ESTOPSTROR     "STROR"           /* string includes at least one token in */
232 #define ESTOPSTROREQ   "STROREQ"         /* string is equal at least one token in */
233 #define ESTOPSTRRX     "STRRX"           /* string matches regular expressions of */
234 #define ESTOPNUMEQ     "NUMEQ"           /* number or date is equal */
235 #define ESTOPNUMNE     "NUMNE"           /* number or date is not equal */
236 #define ESTOPNUMGT     "NUMGT"           /* number or date is greater than */
237 #define ESTOPNUMGE     "NUMGE"           /* number or date is greater than or equal to */
238 #define ESTOPNUMLT     "NUMLT"           /* number or date is less than */
239 #define ESTOPNUMLE     "NUMLE"           /* number or date is less than or equal to */
240 #define ESTOPNUMBT     "NUMBT"           /* number or date is between two tokens of */
241 
242 #define ESTORDIDA      "[IDA]"           /* ID numbers in ascending order */
243 #define ESTORDIDD      "[IDD]"           /* ID numbers in descending order */
244 #define ESTORDSCA      "[SCA]"           /* scores in ascending order */
245 #define ESTORDSCD      "[SCD]"           /* scores in descending order */
246 #define ESTORDSTRA     "STRA"            /* strings in ascending order */
247 #define ESTORDSTRD     "STRD"            /* strings in descending order */
248 #define ESTORDNUMA     "NUMA"            /* numbers in ascending order */
249 #define ESTORDNUMD     "NUMD"            /* numbers in descending order */
250 
251 #define ESTECLSIMURL   10.0              /* eclipse considering similarity and URL */
252 #define ESTECLSERV     100.0             /* eclipse on server basis */
253 #define ESTECLDIR      101.0             /* eclipse on directory basis */
254 #define ESTECLFILE     102.0             /* eclipse on file basis */
255 
256 typedef struct {                         /* type of structure for search conditions */
257   char *phrase;                          /* search phrase */
258   int gstep;                             /* step of N-gram */
259   int tfidf;                             /* whether with TF-IDF tuning */
260   int pmode;                             /* mode of phrase form */
261   void (*cbxpn)(const char *, CBLIST *); /* callback function for query expansion */
262   CBLIST *attrs;                         /* conditions with attributes */
263   char *order;                           /* sorting order */
264   int max;                               /* maximum number of retrieval */
265   int skip;                              /* number of documents to be skipped */
266   int auxmin;                            /* minimum hits to adopt the auxiliary index */
267   CBMAP *auxwords;                       /* words which the auxiliary index has been used */
268   int scfb;                              /* whether to feed back scores */
269   int *scores;                           /* array of scores */
270   int snum;                              /* number of elemnts of the score array */
271   const int *nscores;                    /* array of narrowing scores */
272   int nsnum;                             /* number of elemnts of the narrowing score array */
273   int opts;                              /* options for preservation */
274   double ecllim;                         /* lower limit of similarity eclipse */
275   CBMAP *shadows;                        /* map of eclipsed documents */
276   char *distinct;                        /* distinct attribute */
277   int mask;                              /* mask for meta search */
278 } ESTCOND;
279 
280 enum {                                   /* enumeration for options */
281   ESTCONDSURE = 1 << 0,                  /* check every N-gram key */
282   ESTCONDUSUAL = 1 << 1,                 /* check N-gram keys skipping by one */
283   ESTCONDFAST = 1 << 2,                  /* check N-gram keys skipping by two */
284   ESTCONDAGITO = 1 << 3,                 /* check N-gram keys skipping by three */
285   ESTCONDNOIDF = 1 << 4,                 /* without TF-IDF tuning */
286   ESTCONDSIMPLE = 1 << 10,               /* with the simplified phrase */
287   ESTCONDROUGH = 1 << 11,                /* with the rough phrase */
288   ESTCONDUNION = 1 << 15,                /* with the union phrase */
289   ESTCONDISECT = 1 << 16,                /* with the intersection phrase */
290   ESTCONDSCFB = 1 << 30                  /* feed back scores (for debug) */
291 };
292 
293 
294 /* Create a condition object.
295    The return value is an object of search conditions. */
296 ESTCOND *est_cond_new(void);
297 
298 
299 /* Destroy a condition object.
300    `cond' specifies a condition object. */
301 void est_cond_delete(ESTCOND *cond);
302 
303 
304 /* Set the search phrase to a condition object.
305    `cond' specifies a condition object.
306    `phrase' specifies a search phrase. */
307 void est_cond_set_phrase(ESTCOND *cond, const char *phrase);
308 
309 
310 /* Add an expression for an attribute to a condition object.
311    `cond' specifies a condition object.
312    `expr' specifies an expression for an attribute. */
313 void est_cond_add_attr(ESTCOND *cond, const char *expr);
314 
315 
316 /* Set the order of a condition object.
317    `cond' specifies a condition object.
318    `expr' specifies an expression for the order.  By default, the order is by score descending. */
319 void est_cond_set_order(ESTCOND *cond, const char *expr);
320 
321 
322 /* Set the maximum number of retrieval of a condition object.
323    `cond' specifies a condition object.
324    `max' specifies the maximum number of retrieval.  By default, the number of retrieval is not
325    limited. */
326 void est_cond_set_max(ESTCOND *cond, int max);
327 
328 
329 /* Set the number of skipped documents of a condition object.
330    `cond' specifies a condition object.
331    `skip' specifies the number of documents to be skipped in the search result. */
332 void est_cond_set_skip(ESTCOND *cond, int skip);
333 
334 
335 /* Set options of retrieval of a condition object.
336    `cond' specifies a condition object.
337    `options' specifies options: `ESTCONDSURE' specifies that it checks every N-gram key,
338    `ESTCONDUSUAL', which is the default, specifies that it checks N-gram keys with skipping one
339    key, `ESTCONDFAST' skips two keys, `ESTCONDAGITO' skips three keys, `ESTCONDNOIDF' specifies
340    not to perform TF-IDF tuning, `ESTCONDSIMPLE' specifies to use simplified phrase,
341    `ESTCONDROUGH' specifies to use rough phrase, `ESTCONDUNION' specifies to use union phrase,
342    `ESTCONDISECT' specifies to use intersection phrase, `ESTCONDSCFB' specifies to feed back
343    scores (only for debugging).  Each option can be specified at the same time by bitwise or.  If
344    keys are skipped, though search speed is improved, the relevance ratio grows less. */
345 void est_cond_set_options(ESTCOND *cond, int options);
346 
347 
348 /* Set permission to adopt result of the auxiliary index.
349    `cond' specifies a condition object.
350    `min' specifies the minimum hits to adopt result of the auxiliary index.  If it is not more
351    than 0, the auxiliary index is not used.  By default, it is 32. */
352 void est_cond_set_auxiliary(ESTCOND *cond, int min);
353 
354 
355 /* Set the lower limit of similarity eclipse.
356    `cond' specifies a condition object.
357    `limit' specifies the lower limit of similarity for documents to be eclipsed.  Similarity is
358    between 0.0 and 1.0.  If the limit is added by `ESTECLSIMURL', similarity is weighted by URL.
359    If the limit is `ESTECLSERV', similarity is ignored and documents in the same server are
360    eclipsed.  If the limit is `ESTECLDIR', similarity is ignored and documents in the same
361    directory are eclipsed.  If the limit is `ESTECLFILE', similarity is ignored and documents of
362    the same file are eclipsed. */
363 void est_cond_set_eclipse(ESTCOND *cond, double limit);
364 
365 
366 /* Set the attribute distinction filter.
367    `cond' specifies a condition object.
368    `name' specifies the name of an attribute to be distinct.
369    If this filter is set, candidates which have same value of the attribute is omitted. */
370 void est_cond_set_distinct(ESTCOND *cond, const char *name);
371 
372 
373 /* Set the mask of targets of meta search.
374    `cond' specifies a condition object.
375    `mask' specifies a masking number.  1 means the first target, 2 means the second target, 4
376    means the third target, and power values of 2 and their summation compose the mask. */
377 void est_cond_set_mask(ESTCOND *cond, int mask);
378 
379 
380 
381 /*************************************************************************************************
382  * API for database
383  *************************************************************************************************/
384 
385 
386 #define ESTIDXDMAX     256               /* max number of the inverted index */
387 #define ESTIDXDSTD     16                /* standard number of the inverted index */
388 #define ESTPDOCIDMIN   2000000001        /* minimum ID number of pseudo documents */
389 
390 typedef struct {                         /* type of structure for the inverted index */
391   char *name;                            /* name of the database */
392   int omode;                             /* open mode */
393   VILLA *dbs[ESTIDXDMAX];                /* database handles */
394   int dnum;                              /* number of division */
395   VILLA *cdb;                            /* current database handle */
396 } ESTIDX;
397 
398 typedef struct {                         /* type of structure for a database object */
399   char *name;                            /* name of the database */
400   int inode;                             /* inode of the database */
401   DEPOT *metadb;                         /* handle of the meta database */
402   ESTIDX *idxdb;                         /* handles of the inverted indexs */
403   VILLA *fwmdb;                          /* handle of the database for forward matching */
404   VILLA *auxdb;                          /* handle of the auxiliary index */
405   VILLA *xfmdb;                          /* handle of the database for aux forward matching */
406   CURIA *attrdb;                         /* handle of the database for attrutes */
407   CURIA *textdb;                         /* handle of the database for texts */
408   CURIA *kwddb;                          /* handle of the database for keywords */
409   VILLA *listdb;                         /* handle of the database for document list */
410   CBMAP *aidxs;                          /* map of attribute indexes */
411   CBLIST *pdocs;                         /* list of pseudo documents */
412   CBMAP *puris;                          /* map of URIs of pseudo documents */
413   int ecode;                             /* last happened error code */
414   int fatal;                             /* whether to have a fatal error */
415   int dseq;                              /* sequence for document IDs */
416   int dnum;                              /* number of the documents */
417   int amode;                             /* mode of text analyzer */
418   int zmode;                             /* mode of data compression */
419   int smode;                             /* mode of score type */
420   CBMAP *idxcc;                          /* cache for the inverted index */
421   CBMAP *auxcc;                          /* cache for the auxiliary index */
422   size_t icsiz;                          /* power of the cache */
423   size_t icmax;                          /* max size of the cache */
424   CBMAP *outcc;                          /* cache for deleted documents */
425   CBMAP *keycc;                          /* cache for keys for TF-IDF */
426   int  kcmnum;                           /* max number of the key cache */
427   CBMAP *attrcc;                         /* cache for attributes */
428   int acmnum;                            /* max number of the attribute cache */
429   CBMAP *textcc;                         /* cache for texts */
430   int tcmnum;                            /* max number of the text cache */
431   CBMAP *veccc;                          /* cache for keyword vectors */
432   int vcmnum;                            /* max number of the vector cache */
433   CBMAP *rescc;                          /* cache for results */
434   int rcmnum;                            /* max number of the result cache */
435   CBMAP *spacc;                          /* special cache for attributes */
436   int scmnum;                            /* max number of the special cache */
437   char *scname;                          /* name of the attribute for the special cache */
438   void (*infocb)(const char *, void *);  /* callback function to inform of events */
439   void *infoop;                          /* opaque for the informing callback */
440   DEPOT *dfdb;                           /* handle of the database for document frequency */
441   int wildmax;                           /* maximum number of expansion of wild cards */
442   CBMAP *metacc;                         /* cache for meta data */
443   int flsflag;                           /* flag of flushing */
444   int intflag;                           /* flag of thread interruption */
445 } ESTDB;
446 
447 enum {                                   /* enumeration for error codes */
448   ESTENOERR,                             /* no error */
449   ESTEINVAL,                             /* invalid argument */
450   ESTEACCES,                             /* access forbidden */
451   ESTELOCK,                              /* lock failure */
452   ESTEDB,                                /* database problem */
453   ESTEIO,                                /* I/O problem */
454   ESTENOITEM,                            /* no item */
455   ESTEMISC = 9999                        /* miscellaneous */
456 };
457 
458 enum {                                   /* enumeration for open modes */
459   ESTDBREADER = 1 << 0,                  /* open as a reader */
460   ESTDBWRITER = 1 << 1,                  /* open as a writer */
461   ESTDBCREAT = 1 << 2,                   /* a writer creating */
462   ESTDBTRUNC = 1 << 3,                   /* a writer truncating */
463   ESTDBNOLCK = 1 << 4,                   /* open without locking */
464   ESTDBLCKNB = 1 << 5,                   /* lock without blocking */
465   ESTDBPERFNG = 1 << 10,                 /* use perfect N-gram analyzer */
466   ESTDBCHRCAT = 1 << 11,                 /* use character category analyzer */
467   ESTDBSMALL = 1 << 20,                  /* small tuning */
468   ESTDBLARGE = 1 << 21,                  /* large tuning */
469   ESTDBHUGE = 1 << 22,                   /* huge tuning */
470   ESTDBHUGE2 = 1 << 23,                  /* huge tuning second */
471   ESTDBHUGE3 = 1 << 24,                  /* huge tuning third */
472   ESTDBSCVOID = 1 << 25,                 /* store scores as void */
473   ESTDBSCINT = 1 << 26,                  /* store scores as integer */
474   ESTDBSCASIS = 1 << 27                  /* refrain from adjustment of scores */
475 };
476 
477 enum {                                   /* enumeration for data types of attribute index */
478   ESTIDXATTRSEQ,                         /* for multipurpose sequencial access method */
479   ESTIDXATTRSTR,                         /* for narrowing with attributes as strings */
480   ESTIDXATTRNUM                          /* for narrowing with attributes as numbers */
481 };
482 
483 enum {                                   /* enumeration for options of optimization */
484   ESTOPTNOPURGE = 1 << 0,                /* omit purging dispensable region of deleted */
485   ESTOPTNODBOPT = 1 << 1                 /* omit optimization of the database files */
486 };
487 
488 enum {                                   /* enumeration for options of document merger */
489   ESTMGCLEAN = 1 << 0                    /* clean up dispensable regions */
490 };
491 
492 enum {                                   /* enumeration for options of document registration */
493   ESTPDCLEAN = 1 << 0,                   /* clean up dispensable regions */
494   ESTPDWEIGHT = 1 << 1                   /* weight scores statically when indexing */
495 };
496 
497 enum {                                   /* enumeration for options of document deletion */
498   ESTODCLEAN = 1 << 0                    /* clean up dispensable regions */
499 };
500 
501 enum {                                   /* enumeration for options of document retrieval */
502   ESTGDNOATTR = 1 << 0,                  /* no attributes */
503   ESTGDNOTEXT = 1 << 1,                  /* no text */
504   ESTGDNOKWD = 1 << 2                    /* no keywords */
505 };
506 
507 
508 /* Get the string of an error code.
509    `ecode' specifies an error code.
510    The return value is the string of the error code. */
511 const char *est_err_msg(int ecode);
512 
513 
514 /* Open a database.
515    `name' specifies the name of a database directory.
516    `omode' specifies open modes: `ESTDBWRITER' as a writer, `ESTDBREADER' as a reader.  If the
517    mode is `ESTDBWRITER', the following may be added by bitwise or: `ESTDBCREAT', which means it
518    creates a new database if not exist, `ESTDBTRUNC', which means it creates a new database
519    regardless if one exists.  Both of `ESTDBREADER' and  `ESTDBWRITER' can be added to by
520    bitwise or: `ESTDBNOLCK', which means it opens a database file without file locking, or
521    `ESTDBLCKNB', which means locking is performed without blocking.  If `ESTDBNOLCK' is used,
522    the application is responsible for exclusion control.  `ESTDBCREAT' can be added to by bitwise
523    or: `ESTDBPERFNG', which means N-gram analysis is performed against European text also,
524    `ESTDBCHRCAT', which means character category analysis is performed instead of N-gram analysis,
525    `ESTDBSMALL', which means the index is tuned to register less than 50000 documents,
526    `ESTDBLARGE', which means the index is tuned to register more than 300000 documents,
527    `ESTDBHUGE', which means the index is tuned to register more than 1000000 documents,
528    `ESTDBHUGE2', which means the index is tuned to register more than 5000000 documents,
529    `ESTDBHUGE3', which means the index is tuned to register more than 10000000 documents,
530    `ESTDBSCVOID', which means scores are stored as void, `ESTDBSCINT', which means scores are
531    stored as 32-bit integer, `ESTDBSCASIS', which means scores are stored as-is and marked not
532    to be tuned when search.
533    `ecp' specifies the pointer to a variable to which the error code is assigned.
534    The return value is a database object of the database or `NULL' if failure. */
535 ESTDB *est_db_open(const char *name, int omode, int *ecp);
536 
537 
538 /* Close a database.
539    `db' specifies a database object.
540    `ecp' specifies the pointer to a variable to which the error code is assigned.
541    The return value is true if success, else it is false. */
542 int est_db_close(ESTDB *db, int *ecp);
543 
544 
545 /* Get the last happened error code of a database.
546    `db' specifies a database object.
547    The return value is the last happened error code of the database. */
548 int est_db_error(ESTDB *db);
549 
550 
551 /* Check whether a database has a fatal error.
552    `db' specifies a database object.
553    The return value is true if the database has fatal erroor, else it is false. */
554 int est_db_fatal(ESTDB *db);
555 
556 
557 /* Add an index for narrowing or sorting with document attributes.
558    `db' specifies a database object connected as a writer.
559    `name' specifies the name of an attribute.
560    `type' specifies the data type of attribute index; `ESTIDXATTRSEQ' for multipurpose sequencial
561    access method, `ESTIDXATTRSTR' for narrowing with attributes as strings, `ESTIDXATTRNUM' for
562    narrowing with attributes as numbers.
563    The return value is true if success, else it is false.
564    Note that this function should be called before the first document is registered. */
565 int est_db_add_attr_index(ESTDB *db, const char *name, int type);
566 
567 
568 /* Flush index words in the cache of a database.
569    `db' specifies a database object connected as a writer.
570    `max' specifies the maximum number of words to be flushed.  If it not more than zero, all
571    words are flushed.
572    The return value is true if success, else it is false. */
573 int est_db_flush(ESTDB *db, int max);
574 
575 
576 /* Synchronize updating contents of a database.
577    `db' specifies a database object connected as a writer.
578    The return value is true if success, else it is false. */
579 int est_db_sync(ESTDB *db);
580 
581 
582 /* Optimize a database.
583    `db' specifies a database object connected as a writer.
584    `options' specifies options: `ESTOPTNOPURGE' to omit purging dispensable region of deleted
585    documents, `ESTOPTNODBOPT' to omit optimization of the database files.  The two can be
586    specified at the same time by bitwise or.
587    The return value is true if success, else it is false. */
588 int est_db_optimize(ESTDB *db, int options);
589 
590 
591 /* Merge another database.
592    `db' specifies a database object connected as a writer.
593    `name' specifies the name of another database directory.
594    `options' specifies options: `ESTMGCLEAN' to clean up dispensable regions of the deleted
595    document.
596    The return value is true if success, else it is false.
597    Creation options of the two databases should be same entirely.  ID numbers of imported
598    documents are changed within the sequence of the desitination database.  If URIs of imported
599    documents conflict ones of exsisting documents, existing documents are removed. */
600 int est_db_merge(ESTDB *db, const char *name, int options);
601 
602 
603 /* Add a document to a database.
604    `db' specifies a database object connected as a writer.
605    `doc' specifies a document object.  The document object should have the URI attribute.
606    `options' specifies options: `ESTPDCLEAN' to clean up dispensable regions of the overwritten
607    document, `ESTPDWEIGHT' to weight scores statically with score weighting attribute.
608    The return value is true if success, else it is false.
609    If the URI attribute is same with an existing document in the database, the existing one is
610    deleted. */
611 int est_db_put_doc(ESTDB *db, ESTDOC *doc, int options);
612 
613 
614 /* Remove a document from a database.
615    `db' specifies a database object connected as a writer.
616    `id' specifies the ID number of a registered document.
617    `options' specifies options: `ESTODCLEAN' to clean up dispensable regions of the deleted
618    document.
619    The return value is true if success, else it is false. */
620 int est_db_out_doc(ESTDB *db, int id, int options);
621 
622 
623 /* Edit attributes of a document in a database.
624    `db' specifies a database object connected as a writer.
625    `doc' specifies a document object.
626    The return value is true if success, else it is false.
627    The ID can not be changed.  If the URI is changed and it overlaps the URI of another
628    registered document, this function fails. */
629 int est_db_edit_doc(ESTDB *db, ESTDOC *doc);
630 
631 
632 /* Retrieve a document in a database.
633    `db' specifies a database object.
634    `id' specifies the ID number of a registered document.
635    `options' specifies options: `ESTGDNOATTR' to ignore attributes, `ESTGDNOTEXT' to ignore
636    the body text, `ESTGDNOKWD' to ignore keywords.  The three can be specified at the same time
637    by bitwise or.
638    The return value is a document object.  It should be deleted with `est_doc_delete' if it is
639    no longer in use.  On error, `NULL' is returned. */
640 ESTDOC *est_db_get_doc(ESTDB *db, int id, int options);
641 
642 
643 /* Retrieve the value of an attribute of a document in a database.
644    `db' specifies a database object.
645    `id' specifies the ID number of a registered document.
646    `name' specifies the name of an attribute.
647    The return value is the value of the attribute or `NULL' if it does not exist.  Because the
648    region of the return value is allocated with the `malloc' call, it should be released with
649    the `free' call if it is no longer in use. */
650 char *est_db_get_doc_attr(ESTDB *db, int id, const char *name);
651 
652 
653 /* Get the ID of a document specified by URI.
654    `db' specifies a database object.
655    `uri' specifies the URI of a registered document.
656    The return value is the ID of the document.  On error, -1 is returned. */
657 int est_db_uri_to_id(ESTDB *db, const char *uri);
658 
659 
660 /* Get the name of a database.
661    `db' specifies a database object.
662    The return value is the name of the database.  The life duration of the returned string is
663    synchronous with the one of the database object. */
664 const char *est_db_name(ESTDB *db);
665 
666 
667 /* Get the number of documents in a database.
668    `db' specifies a database object.
669    The return value is the number of documents in the database. */
670 int est_db_doc_num(ESTDB *db);
671 
672 
673 /* Get the number of unique words in a database.
674    `db' specifies a database object.
675    The return value is the number of unique words in the database. */
676 int est_db_word_num(ESTDB *db);
677 
678 
679 /* Get the size of a database.
680    `db' specifies a database object.
681    The return value is the size of the database. */
682 double est_db_size(ESTDB *db);
683 
684 
685 /* Search a database for documents corresponding a condition.
686    `db' specifies a database object.
687    `cond' specifies a condition object.
688    `nump' specifies the pointer to a variable to which the number of elements in the result is
689    assigned.
690    `hints' specifies a map object into which the number of documents corresponding to each word
691    is stored.  If a word is in a negative condition, the number is negative.  The element whose
692    key is an empty string specifies the number of whole result.  If it is `NULL', it is not used.
693    The return value is an array whose elements are ID numbers of corresponding documents.
694    This function does never fail.  Even if no document corresponds or an error occurs, an empty
695    array is returned.  Because the region of the return value is allocated with the `malloc'
696    call, it should be released with the `free' call if it is no longer in use. */
697 int *est_db_search(ESTDB *db, ESTCOND *cond, int *nump, CBMAP *hints);
698 
699 
700 /* Search plural databases for documents corresponding a condition.
701    `dbs' specifies an array whose elements are database objects.
702    `dbnum' specifies the number of elements of the array.
703    `cond' specifies a condition object.
704    `nump' specifies the pointer to a variable to which the number of elements in the result is
705    assigned.
706    `hints' specifies a map object into which the number of documents corresponding to each word
707    is stored.  If a word is in a negative condition, the number is negative.  The element whose
708    key is an empty string specifies the number of whole result.  If it is `NULL', it is not used.
709    The return value is an array whose elements are indexes of container databases and ID numbers
710    of in each database alternately.
711    This function does never fail.  Even if no document corresponds or an error occurs, an empty
712    array is returned.  Because the region of the return value is allocated with the `malloc'
713    call, it should be released with the `free' call if it is no longer in use. */
714 int *est_db_search_meta(ESTDB **dbs, int dbnum, ESTCOND *cond, int *nump, CBMAP *hints);
715 
716 
717 /* Check whether a document object matches the phrase of a search condition object definitely.
718    `db' specifies a database object.
719    `doc' specifies a document object.
720    `cond' specifies a search condition object.
721    The return value is true if the document matches the phrase of the condition object
722    definitely, else it is false. */
723 int est_db_scan_doc(ESTDB *db, ESTDOC *doc, ESTCOND *cond);
724 
725 
726 /* Set the maximum size of the cache memory of a database.
727    `db' specifies a database object.
728    `size' specifies the maximum size of the index cache.  By default, it is 64MB.  If it is
729    negative, the current size is not changed.
730    `anum' specifies the maximum number of cached records for document attributes.  By default, it
731    is 8192.  If it is negative, the current size is not changed.
732    `tnum' specifies the maximum number of cached records for document texts.  By default, it is
733    1024.  If it is negative, the current size is not changed.
734    `rnum' specifies the maximum number of cached records for occurrence results.  By default, it
735    is 256.  If it is negative, the current size is not changed. */
736 void est_db_set_cache_size(ESTDB *db, size_t size, int anum, int tnum, int rnum);
737 
738 
739 /* Add a pseudo index directory to a database.
740    `db' specifies a database object.
741    `path' specifies the path of a pseudo index directory.
742    The return value is true if success, else it is false. */
743 int est_db_add_pseudo_index(ESTDB *db, const char *path);
744 
745 
746 
747 /*************************************************************************************************
748  * features for experts
749  *************************************************************************************************/
750 
751 
752 #define _EST_VERSION   "1.4.13"
753 #define _EST_LIBVER    838
754 #define _EST_PROTVER   "1.0"
755 
756 #define _EST_PROJURL   "http://hyperestraier.sourceforge.net/"
757 #define _EST_XNSEARCH  "http://hyperestraier.sourceforge.net/xmlns/search"
758 #define _EST_XNNODE    "http://hyperestraier.sourceforge.net/xmlns/node"
759 
760 enum {                                   /* enumeration for languages */
761   ESTLANGEN,                             /* English */
762   ESTLANGJA,                             /* Japanese */
763   ESTLANGZH,                             /* Chinese */
764   ESTLANGKO,                             /* Korean */
765   ESTLANGMISC                            /* miscellaneous */
766 };
767 
768 enum {                                   /* enumeration for document parts */
769   ESTMDATTR = 1 << 0,                    /* attributes */
770   ESTMDTEXT = 1 << 1,                    /* texts */
771   ESTMDKWD = 1 << 2                      /* keywords */
772 };
773 
774 enum {                                   /* enumeration for database repair */
775   ESTRPSTRICT = 1 << 0,                  /* perform strict consistency check */
776   ESTRPSHODDY = 1 << 1                   /* omit consistency check */
777 };
778 
779 typedef struct {                         /* type of structure for an element of result map */
780   const char *key;                       /* pointer to the key string */
781   int score;                             /* total score */
782 } ESTRESMAPELEM;
783 
784 enum {                                   /* enumeration for scoring for result map */
785   ESTRMLOSUM,                            /* summation */
786   ESTRMLOMAX,                            /* maximum */
787   ESTRMLOMIN,                            /* minimum */
788   ESTRMLOAVG                             /* average */
789 };
790 
791 
792 /* Break a sentence of text and extract words.
793    `text' specifies a sentence of text.
794    `list' specifies a list object to which extract words are added.
795    `norm' specifies whether to normalize the text.
796    `tail' specifies whether to pick up oddness N-gram at the end. */
797 void est_break_text(const char *text, CBLIST *list, int norm, int tail);
798 
799 
800 /* Break a sentence of text and extract words using perfect N-gram analyzer.
801    `text' specifies a sentence of text.
802    `list' specifies a list object to which extract words are added.
803    `norm' specifies whether to normalize the text.
804    `tail' specifies whether to pick up oddness N-gram at the end. */
805 void est_break_text_perfng(const char *text, CBLIST *list, int norm, int tail);
806 
807 
808 /* Break a sentence of text and extract words, using character category analyzer.
809    `text' specifies a sentence of text.
810    `list' specifies a list object to which extract words are added.
811    `norm' specifies whether to normalize the text. */
812 void est_break_text_chrcat(const char *text, CBLIST *list, int norm);
813 
814 
815 /* Make a snippet of an arbitrary string.
816    `word' specifies a list object of words to be highlight.
817    `wwidth' specifies whole width of the result.
818    `hwidth' specifies width of strings picked up from the beginning of the text.
819    `awidth' specifies width of strings picked up around each highlighted word.
820    The return value is a snippet string of the string.  Because the region of the return value is
821    allocated with the `malloc' call, it should be released with the `free' call if it is no
822    longer in use. */
823 char *est_str_make_snippet(const char *str, const CBLIST *words,
824                            int wwidth, int hwidth, int awidth);
825 
826 
827 /* Convert the character encoding of a string.
828    `ptr' specifies the pointer to a region.
829    `size' specifies the size of the region.  If it is negative, the size is assigned with
830    `strlen(ptr)'.
831    `icode' specifies the name of encoding of the input string.
832    `ocode' specifies the name of encoding of the output string.
833    `sp' specifies the pointer to a variable to which the size of the region of the return
834    value is assigned.  If it is `NULL', it is not used.
835    `mp' specifies the pointer to a variable to which the number of missing characters by failure
836    of conversion is assigned.  If it is `NULL', it is not used.
837    If successful, the return value is the pointer to the result object, else, it is `NULL'.
838    Because an additional zero code is appended at the end of the region of the return value,
839    the return value can be treated as a character string.  Because the region of the return
840    value is allocated with the `malloc' call, it should be released with the `free' call if it
841    is no longer in use. */
842 char *est_iconv(const char *ptr, int size, const char *icode, const char *ocode,
843                 int *sp, int *mp);
844 
845 
846 /* Detect the encoding of a string automatically.
847    `ptr' specifies the pointer to a region.
848    `size' specifies the size of the region.  If it is negative, the size is assigned with
849    `strlen(ptr)'.
850    `plang' specifies a preferred language.  As for now, `ESTLANGEN', `ESTLANGJA', `ESTLANGZH',
851    and `ESTLANGKO' are supported.
852    The return value is the string of the encoding name of the string. */
853 const char *est_enc_name(const char *ptr, int size, int plang);
854 
855 
856 /* Convert a UTF-8 string into UTF-16BE.
857    `ptr' specifies the pointer to a region.
858    `size' specifies the size of the region.
859    `sp' specifies the pointer to a variable to which the size of the region of the return
860    value is assigned.
861    The return value is the pointer to the result object.  Because an additional zero code is
862    appended at the end of the region of the return value, the return value can be treated as a
863    character string.  Because the region of the return value is allocated with the `malloc' call,
864    it should be released with the `free' call if it is no longer in use. */
865 char *est_uconv_in(const char *ptr, int size, int *sp);
866 
867 
868 /* Convert a UTF-16BE string into UTF-8.
869    `ptr' specifies the pointer to a region.
870    `size' specifies the size of the region.
871    `sp' specifies the pointer to a variable to which the size of the region of the return
872    value is assigned.  If it is `NULL', it is not used.
873    The return value is the pointer to the result object.  Because an additional zero code is
874    appended at the end of the region of the return value, the return value can be treated as a
875    character string.  Because the region of the return value is allocated with the `malloc' call,
876    it should be released with the `free' call if it is no longer in use. */
877 char *est_uconv_out(const char *ptr, int size, int *sp);
878 
879 
880 /* Compress a serial object with ZLIB.
881    `ptr' specifies the pointer to a region.
882    `size' specifies the size of the region.  If it is negative, the size is assigned with
883    `strlen(ptr)'.
884    `sp' specifies the pointer to a variable to which the size of the region of the return
885    value is assigned.
886    `mode' specifies detail behavior.  0 specifies using the standard deflate encoding, -1
887    specifies the raw deflate encoding, and 1 specifies the GZIP encoding.
888    If successful, the return value is the pointer to the result object, else, it is `NULL'.
889    Because the region of the return value is allocated with the `malloc' call, it should be
890    released with the `free' call if it is no longer in use. */
891 char *est_deflate(const char *ptr, int size, int *sp, int mode);
892 
893 
894 /* Decompress a serial object compressed with ZLIB.
895    `ptr' specifies the pointer to a region.
896    `size' specifies the size of the region.
897    `sp' specifies the pointer to a variable to which the size of the region of the return
898    value is assigned.  If it is `NULL', it is not used.
899    `mode' specifies detail behavior.  0 specifies using the standard deflate encoding, -1
900    specifies the raw deflate encoding, and 1 specifies the GZIP encoding.
901    If successful, the return value is the pointer to the result object, else, it is `NULL'.
902    Because an additional zero code is appended at the end of the region of the return value,
903    the return value can be treated as a character string.  Because the region of the return
904    value is allocated with the `malloc' call, it should be released with the `free' call if it
905    is no longer in use. */
906 char *est_inflate(const char *ptr, int size, int *sp, int mode);
907 
908 
909 /* Compress a serial object with LZO.
910    `ptr' specifies the pointer to a region.
911    `size' specifies the size of the region.  If it is negative, the size is assigned with
912    `strlen(ptr)'.
913    `sp' specifies the pointer to a variable to which the size of the region of the return
914    value is assigned.
915    If successful, the return value is the pointer to the result object, else, it is `NULL'.
916    Because the region of the return value is allocated with the `malloc' call, it should be
917    released with the `free' call if it is no longer in use. */
918 char *est_lzoencode(const char *ptr, int size, int *sp);
919 
920 
921 /* Decompress a serial object compressed with LZO.
922    `ptr' specifies the pointer to a region.
923    `size' specifies the size of the region.
924    `sp' specifies the pointer to a variable to which the size of the region of the return
925    value is assigned.  If it is `NULL', it is not used.
926    If successful, the return value is the pointer to the result object, else, it is `NULL'.
927    Because an additional zero code is appended at the end of the region of the return value,
928    the return value can be treated as a character string.  Because the region of the return
929    value is allocated with the `malloc' call, it should be released with the `free' call if it
930    is no longer in use. */
931 char *est_lzodecode(const char *ptr, int size, int *sp);
932 
933 
934 /* Compress a serial object with BZIP2.
935    `ptr' specifies the pointer to a region.
936    `size' specifies the size of the region.  If it is negative, the size is assigned with
937    `strlen(ptr)'.
938    `sp' specifies the pointer to a variable to which the size of the region of the return
939    value is assigned.
940    If successful, the return value is the pointer to the result object, else, it is `NULL'.
941    Because the region of the return value is allocated with the `malloc' call, it should be
942    released with the `free' call if it is no longer in use. */
943 char *est_bzencode(const char *ptr, int size, int *sp);
944 
945 
946 /* Decompress a serial object compressed with BZIP2.
947    `ptr' specifies the pointer to a region.
948    `size' specifies the size of the region.
949    `sp' specifies the pointer to a variable to which the size of the region of the return
950    value is assigned.  If it is `NULL', it is not used.
951    If successful, the return value is the pointer to the result object, else, it is `NULL'.
952    Because an additional zero code is appended at the end of the region of the return value,
953    the return value can be treated as a character string.  Because the region of the return
954    value is allocated with the `malloc' call, it should be released with the `free' call if it
955    is no longer in use. */
956 char *est_bzdecode(const char *ptr, int size, int *sp);
957 
958 
959 /* Get the border string for draft data of documents.
960    The return value is the border string for draft data of documents. */
961 const char *est_border_str(void);
962 
963 
964 /* Get the real random number.
965    The return value is the real random number between 0.0 and 1.0. */
966 double est_random(void);
967 
968 
969 /* Get the random number in normal distribution.
970    The return value is the random number in normal distribution between 0.0 and 1.0. */
971 double est_random_nd(void);
972 
973 
974 /* Get an MD5 hash string of a key string.
975    `key' specifies a string to be encrypted.
976    The return value is an MD5 hash string of the key string.  Because the region of the return
977    value is allocated with the `malloc' call, it should be released with the `free' call if it
978    is no longer in use. */
979 char *est_make_crypt(const char *key);
980 
981 
982 /* Check whether a key matches an MD5 hash string.
983    `key' specifies a string to be checked.
984    `hash' specifies an MD5 hash string.
985    The return value is true if the key matches the hash string, else it is false. */
986 int est_match_crypt(const char *key, const char *hash);
987 
988 
989 /* Create a regular expression object.
990    `str' specifies a string of regular expressions.
991    The return value is a regular expression object or `NULL' if failure.
992    If the expression is leaded by "*I:", the pattern is case insensitive. */
993 void *est_regex_new(const char *str);
994 
995 
996 /* Delete a regular expression object.
997    `regex' specifies a regular expression object. */
998 void est_regex_delete(void *regex);
999 
1000 
1001 /* Check whether a regular expression matches a string.
1002    `regex' specifies a regular expression object.
1003    `str' specifies a string.
1004    The return value is true if the regular expression object matchs the string. */
1005 int est_regex_match(const void *regex, const char *str);
1006 
1007 
1008 /* Check whether a regular expression matches a string.
1009    `rstr' specifies a regular expression string.
1010    `tstr' specifies a target string.
1011    The return value is true if the regular expression string matchs the target string. */
1012 int est_regex_match_str(const char *rstr, const char *tstr);
1013 
1014 
1015 /* Replace each substring matching a regular expression string.
1016    `str' specifies a target string.
1017    `bef' specifies a string of regular expressions for substrings.
1018    `aft' specifies a string with which each substrings are replaced.  Each "&" in the string is
1019    replaced with the matched substring.  Each "\" in the string escapes the following character.
1020    Special escapes "\1" through "\9" referring to the corresponding matching sub-expressions in
1021    the regular expression string are supported.
1022    The return value is a new converted string.  Even if the regular expression is invalid, a copy
1023    of the original string is returned.  Because the region of the return value is allocated with
1024    the `malloc' call, it should be released with the `free' call if it is no longer in use. */
1025 char *est_regex_replace(const char *str, const char *bef, const char *aft);
1026 
1027 
1028 /* Duplicate a document object.
1029    `doc' specifies a document object.
1030    The return value is a duplicated document object. */
1031 ESTDOC *est_doc_dup(ESTDOC *doc);
1032 
1033 
1034 /* Set the ID number of a document object.
1035    `doc' specifies a document object.
1036    `id' specifies the ID number to set. */
1037 void est_doc_set_id(ESTDOC *doc, int id);
1038 
1039 
1040 /* Get the hidden texts of a document object.
1041    `doc' specifies a document object.
1042    The return value is concatenated sentences of the hidden text of the document object.  The
1043    life duration of the returned string is synchronous with the one of the document object. */
1044 const char *est_doc_hidden_texts(ESTDOC *doc);
1045 
1046 
1047 /* Reduce the texts to fit to the specified size.
1048    `doc' specifies a document object.
1049    `len' specifies the total size of the texts. */
1050 void est_doc_slim(ESTDOC *doc, int size);
1051 
1052 
1053 /* Check whether a docuemnt object is empty.
1054    `doc' specifies a document object.
1055    The return value is true the document is empty, else it is false. */
1056 int est_doc_is_empty(ESTDOC *doc);
1057 
1058 
1059 /* Duplicate a condition object.
1060    `cond' specifies a condition object.
1061    The return value is a duplicated condition object. */
1062 ESTCOND *est_cond_dup(ESTCOND *cond);
1063 
1064 
1065 /* Get the phrase of a condition object.
1066    `cond' specifies a condition object.
1067    The return value is the phrase of the condition object or `NULL' if it is not specified.  The
1068    life duration of the returned string is synchronous with the one of the condition object. */
1069 const char *est_cond_phrase(ESTCOND *cond);
1070 
1071 
1072 /* Get a list object of attribute expressions of a condition object.
1073    `cond' specifies a condition object.
1074    The return value is a list object of attribute expressions of the condition object or `NULL' if
1075    it is not specified.  The life duration of the returned object is synchronous with the one of
1076    the condition object. */
1077 const CBLIST *est_cond_attrs(ESTCOND *cond);
1078 
1079 
1080 /* Get the order expression of a condition object.
1081    `cond' specifies a condition object.
1082    The return value is the order expression of the condition object or `NULL' if it is not
1083    specified.  The life duration of the returned string is synchronous with the one of the
1084    condition object. */
1085 const char *est_cond_order(ESTCOND *cond);
1086 
1087 
1088 /* Get the maximum number of retrieval of a condition object.
1089    `cond' specifies a condition object.
1090    The return value is the maximum number of retrieval of the condition object or -1 if it is not
1091    specified. */
1092 int est_cond_max(ESTCOND *cond);
1093 
1094 
1095 /* Get the number of skipped documents of a condition object.
1096    `cond' specifies a condition object.
1097    The return value is the number of documents to be skipped in the search result. */
1098 int est_cond_skip(ESTCOND *cond);
1099 
1100 
1101 /* Get the options of a condition object.
1102    `cond' specifies a condition object.
1103    The return value is the options of the condition object. */
1104 int est_cond_options(ESTCOND *cond);
1105 
1106 
1107 /* Get permission to adopt result of the auxiliary index.
1108    `cond' specifies a condition object.
1109    The return value is permission to adopt result of the auxiliary index. */
1110 int est_cond_auxiliary(ESTCOND *cond);
1111 
1112 
1113 /* Get the attribute distinction filter.
1114    `cond' specifies a condition object.
1115    The return value is the name of the distinct attribute or `NULL' if it is not specified.  The
1116    life duration of the returned string is synchronous with the one of the condition object. */
1117 const char *est_cond_distinct(ESTCOND *cond);
1118 
1119 
1120 /* Get the mask of targets of meta search.
1121    `cond' specifies a condition object.
1122    The return value is the mask of targets of meta search. */
1123 int est_cond_mask(ESTCOND *cond);
1124 
1125 
1126 /* Get the score of a document corresponding to a condition object.
1127    `cond' specifies a condition object.
1128    `index' specifies the index of an element of the result array of `est_db_search'.
1129    The return value is the score of the element or -1 if the index is out of bounds. */
1130 int est_cond_score(ESTCOND *cond, int index);
1131 
1132 
1133 /* Get the score array of corresponding documents of a condition object.
1134    `cond' specifies a condition object.
1135    `nump' specifies the pointer to a variable to which the number of elements in the score array
1136    is assigned.
1137    The return value is the score array of corresponding documents. */
1138 const int *est_cond_scores(ESTCOND *cond, int *nump);
1139 
1140 
1141 /* Set the narrowing scores of a condition object.
1142    `cond' specifies a condition object.
1143    `scores' specifies the pointer to an array of narrowing scores.  The life duration of the
1144    array should be equal to or longer than the condition object itself.
1145    `num' specifies the number of the array. */
1146 void est_cond_set_narrowing_scores(ESTCOND *cond, const int *scores, int num);
1147 
1148 
1149 /* Check whether a condition object has used the auxiliary index.
1150    `cond' specifies a condition object.
1151    `word' specifies a keyword to be checked.  If it is an empty string, whether at least one
1152    keyword is used is checked.
1153    The return value is true if the condition object has used the auxiliary index, else it is
1154    false */
1155 int est_cond_auxiliary_word(ESTCOND *cond, const char *word);
1156 
1157 
1158 /* Get an array of ID numbers of eclipsed docuemnts of a document in a condition object.
1159    `cond' specifies a condition object.
1160    `id' specifies the ID number of a parent document.
1161    `np' specifies the pointer to a variable to which the number of elements of the return value
1162    is assigned.
1163    The return value is an array whose elements expresse the ID numbers and their scores
1164    alternately. */
1165 const int *est_cond_shadows(ESTCOND *cond, int id, int *np);
1166 
1167 
1168 /* Set the callback function for query expansion.
1169    `cond' specifies a condition object.
1170    `func' specifies the pointer to a function.  The first argument of the callback specifies a
1171    word to be expand.  The second argument speciifes a list object into which renewed words to
1172    be stored. */
1173 void est_cond_set_expander(ESTCOND *cond, void (*func)(const char *, CBLIST *));
1174 
1175 
1176 /* Set the error code of a database.
1177    `db' specifies a database object.
1178    `ecode' specifies a error code to set. */
1179 void est_db_set_ecode(ESTDB *db, int ecode);
1180 
1181 
1182 /* Check whether an option is set.
1183    `db' specifies a database object.
1184    `option' specifies an option used when opening the database.
1185    The return value is 1 if the option is set, 0 if the option is not set, or -1 if it is
1186    unknown. */
1187 int est_db_check_option(ESTDB *db, int option);
1188 
1189 
1190 /* Get the inode number of a database.
1191    `db' specifies a database object.
1192    The return value is the inode number of the database. */
1193 int est_db_inode(ESTDB *db);
1194 
1195 
1196 /* Set the entity data of a document in a database.
1197    `db' specifies a database object connected as a writer.
1198    `id' specifies the ID number of a registered document.
1199    `ptr' specifies the pointer to a region of entity data.  If it is `NULL', the entity data is
1200    removed.
1201    `size' specifies the size of the region.
1202    The return value is true if success, else it is false. */
1203 int est_db_set_doc_entity(ESTDB *db, int id, const char *ptr, int size);
1204 
1205 
1206 /* Get the entity data of a document in a database.
1207    `db' specifies a database object.
1208    `id' specifies the ID number of a registered document.
1209    `sp' specifies the pointer to a variable to which the size of the region of the return value
1210    is assigned.
1211    The return value is the value of the entity data or `NULL' if it does not exist.  Because the
1212    region of the return value is allocated with the `malloc' call, it should be released with
1213    the `free' call if it is no longer in use. */
1214 char *est_db_get_doc_entity(ESTDB *db, int id, int *sp);
1215 
1216 
1217 /* Set the maximum number of expansion of wild cards.
1218    `db' specifies a database object.
1219    `num' specifies the maximum number of expansion of wild cards. */
1220 void est_db_set_wildmax(ESTDB *db, int num);
1221 
1222 
1223 /* Add a piece of meta data to a database.
1224    `db' specifies a database object connected as a writer.
1225    `name' specifies the name of a piece of meta data.
1226    `value' specifies the value of the meta data.  If it is `NULL', the meta data is removed. */
1227 void est_db_add_meta(ESTDB *db, const char *name, const char *value);
1228 
1229 
1230 /* Get a list of names of meta data of a database.
1231    `db' specifies a database object.
1232    The return value is a new list object of meta data names of the document object.  Because the
1233    object of the return value is opened with the function `cblistopen', it should be closed with
1234    the function `cblistclose' if it is no longer in use. */
1235 CBLIST *est_db_meta_names(ESTDB *db);
1236 
1237 
1238 /* Get the value of a piece of meta data of a database.
1239    `db' specifies a database object.
1240    `name' specifies the name of a piece of meta data.
1241    The return value is the value of the meta data or `NULL' if it does not exist.  Because the
1242    region of the return value is allocated with the `malloc' call, it should be released with
1243    the `free' call if it is no longer in use. */
1244 char *est_db_meta(ESTDB *db, const char *name);
1245 
1246 
1247 /* Extract keywords of a document object.
1248    `db' specifies a database object for TF-IDF tuning.  If it is `NULL', it is not used.
1249    `doc' specifies a document object.
1250    `max' specifies the maximum number of keywords to be extracted.
1251    The return value is a new map object of keywords and their scores in decimal string.  Because
1252    the object of the return value is opened with the function `cbmapopen', it should be closed
1253    with the function `cbmapclose' if it is no longer in use. */
1254 CBMAP *est_db_etch_doc(ESTDB *db, ESTDOC *doc, int max);
1255 
1256 
1257 /* Store a map object of keywords.
1258    `db' specifies a database object connected as a writer.
1259    `id' specifies the ID number of a document.
1260    `kwords' specifies a map object of keywords of the document.
1261    `weight' specifies weighting bias of scores.
1262    The return value is true if success, else it is false. */
1263 int est_db_put_keywords(ESTDB *db, int id, CBMAP *kwords, double weight);
1264 
1265 
1266 /* Remove keywords of a document.
1267    `db' specifies a database object connected as a writer.
1268    `id' specifies the ID number of a document.
1269    The return value is true if success, else it is false. */
1270 int est_db_out_keywords(ESTDB *db, int id);
1271 
1272 
1273 /* Retrieve a map object of keywords.
1274    `db' specifies a database object.
1275    `id' specifies the ID number of a document.
1276    The return value is a new map object of keywords and their scores in decimal string.  If
1277    keywords of the document is not stored, `NULL' is returned.  Because the object of the return
1278    value is opened with the function `cbmapopen', it should be closed with the function
1279    `cbmapclose' if it is no longer in use. */
1280 CBMAP *est_db_get_keywords(ESTDB *db, int id);
1281 
1282 
1283 /* Mesure the total size of each inner records of a stored document.
1284    `db' specifies a database object.
1285    `id' specifies the ID number of a document.
1286    `parts' specifies document parts: `ESTMDATTR' for attributes, `ESTMDTEXT' for texts, and
1287    `ESTMDKWD' for keywords.  They can be specified at the same time by bitwise or.
1288    The return value is the total size of each inner records of a stored document. */
1289 int est_db_measure_doc(ESTDB *db, int id, int parts);
1290 
1291 
1292 /* Initialize the document iterator of a database.
1293    `db' specifies a database object.
1294    `prev' specifies the URI of the previous element of iteration.  If it is `NULL', it is not used.
1295    The return value is true if success, else it is false. */
1296 int est_db_iter_init(ESTDB *db, const char *prev);
1297 
1298 
1299 /* Get the next ID of the document iterator of a database.
1300    `db' specifies a database object.
1301    The return value is the next ID.  If there is no more document, 0 is returned.  On error,
1302    -1 is returned. */
1303 int est_db_iter_next(ESTDB *db);
1304 
1305 
1306 /* Initialize the word iterator of a database.
1307    `db' specifies a database object.
1308    The return value is true if success, else it is false. */
1309 int est_db_word_iter_init(ESTDB *db);
1310 
1311 
1312 /* Get the next word of the word iterator of a database.
1313    `db' specifies a database object.
1314    The return value is the next word.  If there is no more word, `NULL' is returned.  Because
1315    the region of the return value is allocated with the `malloc' call, it should be released
1316    with the `free' call if it is no longer in use. */
1317 char *est_db_word_iter_next(ESTDB *db);
1318 
1319 
1320 /* Get the size of the record of a word.
1321    `db' specifies a database object.
1322    `word' specifies a word.
1323    The return value is the size of the record of the word.  If there is no corresponding record,
1324    0 is returned. */
1325 int est_db_word_rec_size(ESTDB *db, const char *word);
1326 
1327 
1328 /* Get the number of unique keywords in a database.
1329    `db' specifies a database object.
1330    The return value is the number of unique keywords in the database. */
1331 int est_db_keyword_num(ESTDB *db);
1332 
1333 
1334 /* Initialize the keyword iterator of a database.
1335    `db' specifies a database object.
1336    The return value is true if success, else it is false. */
1337 int est_db_keyword_iter_init(ESTDB *db);
1338 
1339 
1340 /* Get the next keyword of the word iterator of a database.
1341    `db' specifies a database object.
1342    The return value is the next word.  If there is no more keyword, `NULL' is returned.  Because
1343    the region of the return value is allocated with the `malloc' call, it should be released
1344    with the `free' call if it is no longer in use. */
1345 char *est_db_keyword_iter_next(ESTDB *db);
1346 
1347 
1348 /* Get the size of the record of a keyword.
1349    `db' specifies a database object.
1350    `word' specifies a keyword.
1351    The return value is the size of the record of the keyword.  If there is no corresponding
1352    record, 0 is returned. */
1353 int est_db_keyword_rec_size(ESTDB *db, const char *word);
1354 
1355 
1356 /* Search documents corresponding a keyword for a database.
1357    `db' specifies a database object.
1358    `word' specifies a keyword.
1359    `nump' specifies the pointer to a variable to which the number of elements in the result is
1360    assigned.
1361    The return value is an array whose elements are ID numbers of corresponding documents.
1362    This function does never fail.  Even if no document corresponds or an error occurs, an empty
1363    array is returned.  Because the region of the return value is allocated with the `malloc'
1364    call, it should be released with the `free' call if it is no longer in use. */
1365 int *est_db_keyword_search(ESTDB *db, const char *word, int *nump);
1366 
1367 
1368 /* Get the number of records in the cache memory of a database.
1369    `db' specifies a database object.
1370    The return value is the cache memory of a database. */
1371 int est_db_cache_num(ESTDB *db);
1372 
1373 
1374 /* Get the size of used cache region.
1375    `db' specifies a database object.
1376    The return value is the size of used cache region. */
1377 int est_db_used_cache_size(ESTDB *db);
1378 
1379 
1380 /* Set the special cache for narrowing and sorting with document attributes.
1381    `db' specifies a database object.
1382    `name' specifies the name of a document.
1383    `num' specifies the maximum number of cached records. */
1384 void est_db_set_special_cache(ESTDB *db, const char *name, int num);
1385 
1386 
1387 /* Set the callback function to inform of database events.
1388    `db' specifies a database object.
1389    `func' specifies the pointer to a function.  The first argument of the callback specifies a
1390    message of each event.  The second argument specifies an arbitrary pointer of a opaque data.
1391    `opaque' specifies the pointer of the second argument of the callback. */
1392 void est_db_set_informer(ESTDB *db, void (*func)(const char *, void *), void *opaque);
1393 
1394 
1395 /* Fill the cache for keys for TF-IDF.
1396    `db' specifies a database object. */
1397 void est_db_fill_key_cache(ESTDB *db);
1398 
1399 
1400 /* Set the database of document frequency.
1401    `db' specifies a database object.
1402    `dfdb' specifies a database object of `DEPOT'.  If it is `NULL', the setting is cleared. */
1403 void est_db_set_dfdb(ESTDB *db, DEPOT *dfdb);
1404 
1405 
1406 /* Clear the result cache.
1407    `db' specifies a database object. */
1408 void est_db_refresh_rescc(ESTDB *db);
1409 
1410 
1411 /* Charge the result cache.
1412    `db' specifies a database object.
1413    `max' specifies the maximum number of words to be charged.  If it not more than zero, all
1414    words are charged. */
1415 void est_db_charge_rescc(ESTDB *db, int max);
1416 
1417 
1418 /* Get a list of words in the result cache.
1419    `db' specifies a database object.
1420    The return value is a new list object of words in the result cache.  Because the object of the
1421    return value is opened with the function `cblistopen', it should be closed with the function
1422    `cblistclose' if it is no longer in use. */
1423 CBLIST *est_db_list_rescc(ESTDB *db);
1424 
1425 
1426 /* Get the number of pseudo documents in a database.
1427    `db' specifies a database object.
1428    The return value is the number of pseudo documents in the database. */
1429 int est_db_pseudo_doc_num(ESTDB *db);
1430 
1431 
1432 /* Get a list of expressions of attribute indexes of a database.
1433    `db' specifies a database object.
1434    The return value is a new list object of expressions of attribute indexes.  Because the object
1435    of the return value is opened with the function `cblistopen', it should be closed with the
1436    function `cblistclose' if it is no longer in use. */
1437 CBLIST *est_db_attr_index_exprs(ESTDB *db);
1438 
1439 
1440 /* Interrupt long time processing.
1441    `db' specifies a database object. */
1442 void est_db_interrupt(ESTDB *db);
1443 
1444 
1445 /* Repair a broken database directory.
1446    `name' specifies the name of a database directory.
1447    `options' specifies options: `ESTRPSTRICT' to perform strict consistency check, `ESTRPSHODDY'
1448    to omit consistency check.
1449    `ecp' specifies the pointer to a variable to which the error code is assigned.
1450    The return value is true if success, else it is false. */
1451 int est_db_repair(const char *name, int options, int *ecp);
1452 
1453 
1454 /* Extract words for snippet from hints of search.
1455    `hints' specifies a map object whose records were set by `est_db_search'.
1456    The return value is a new list object of words to be highlighted.  Because the object of the
1457    return value is opened with the function `cblistopen', it should be closed with the function
1458    `cblistclose' if it is no longer in use. */
1459 CBLIST *est_hints_to_words(CBMAP *hints);
1460 
1461 
1462 /* Add a record into a result map for logical operation.
1463    `map' specifies a map object.
1464    `key' specifies the key of a record.
1465    `score' specifies the score of the record.
1466    `method' specifies a scoring method when logical operation.  As for now, `ESTRMLOSUM',
1467    `ESTRMLOMAX', `ESTRMLOMIN', and `ESTRMLOAVG'. */
1468 void est_resmap_add(CBMAP *map, const char *key, int score, int method);
1469 
1470 
1471 /* Dump a result list of a result map for logical operation.
1472    `map' specifies a map object.
1473    `min' specifies the minimum number of times for which each element of the result occurs.
1474    `nump' specifies the pointer to a variable to which the number of elements in the result is
1475    assigned.
1476    The return value is an array whose elements are structures of keys and scores.  Because the
1477    region of the return value is allocated with the `malloc' call, it should be released with the
1478    `free' call if it is no longer in use. */
1479 ESTRESMAPELEM *est_resmap_dump(CBMAP *map, int min, int *nump);
1480 
1481 
1482 /* Reset the environment of the process.
1483    This function sets the standard streams as binary mode and resets environment variables for
1484    locale. */
1485 void est_proc_env_reset(void);
1486 
1487 
1488 /* Make a directory.
1489    `path' specifies the path of a new directory.
1490    The return value is true if success, else it is false. */
1491 int est_mkdir(const char *path);
1492 
1493 
1494 /* Remove a directory and its contents recursively.
1495    `path' specifies the path of a directory.
1496    The return value is true if success, else it is false. */
1497 int est_rmdir_rec(const char *path);
1498 
1499 
1500 /* Get the canonicalized absolute pathname of a file.
1501    `path' specifies the path of a file.
1502    The return value is the canonicalized absolute pathname of a file.  Because the region of the
1503    return value is allocated with the `malloc' call, it should be released with the `free' call
1504    if it is no longer in use. */
1505 char *est_realpath(const char *path);
1506 
1507 
1508 /* Get the inode number of a file.
1509    `path' specifies the path of a file.
1510    The return value is the inode number of a file or -1 on error. */
1511 int est_inode(const char *path);
1512 
1513 
1514 /* Change modification time of a file.
1515    `path' specifies the path of a file.
1516    `mtime' specifies modification time.  If it is negative, the current time is set.
1517    The return value is true if success, else it is false. */
1518 int est_utime(const char *path, time_t mtime);
1519 
1520 
1521 /* Get the time of day in milliseconds.
1522    The return value is the time of day in milliseconds. */
1523 double est_gettimeofday(void);
1524 
1525 
1526 /* Suspend execution for microsecond intervals.
1527    `usec' specifies microseconds to sleep for. */
1528 void est_usleep(unsigned long usec);
1529 
1530 
1531 /* Set a signal handler.
1532    `signum' specifies the number of a target signal.
1533    `sighandler' specifies the pointer to a function.  The argument of the handler specifies the
1534    number of the catched signal.  If it is `SIG_IGN', the signal is ignored. */
1535 void est_signal(int signum, void (*sighandler)(int));
1536 
1537 
1538 /* Send a signal to a process.
1539    `pid' specifies the PID of a target process.
1540    `sig' specifies a signal code.
1541    The return value is true if success, else it is false. */
1542 int est_kill(int pid, int sig);
1543 
1544 
1545 /* Get the load ratio of the physical memory.
1546    The return value is the load ratio of the physical memory.
1547    As for now, this function returns 0.0 on platforms except for Windows. */
1548 double est_memory_usage(void);
1549 
1550 
1551 /* Get the media type of an extention.
1552    `ext' specifies the extension of a file path.
1553    The return value is the media time of the extension. */
1554 const char *est_ext_type(const char *ext);
1555 
1556 
1557 /* Set a seed vector from a map object.
1558    `svmap' specifies a map object of a seed vector.
1559    `svec' specifies a vector object.
1560    `vnum' specifies the number of dimensions of the vector. */
1561 void est_vector_set_seed(CBMAP *svmap, int *svec, int vnum);
1562 
1563 
1564 /* Set a target vector from a map object.
1565    `svmap' specifies a map object of a seed vector.
1566    `tvmap' specifies a map object of a target vector.
1567    `tvec' specifies a vector object.
1568    `vnum' specifies the number of dimensions of the vector. */
1569 void est_vector_set_target(CBMAP *svmap, CBMAP *tvmap, int *tvec, int vnum);
1570 
1571 
1572 /* Get the cosine of the angle of two vectors.
1573    `avec' specifies a vector object.
1574    `bvec' specifies the other vector object.
1575    `vnum' specifies the number of dimensions of the vector.
1576    The return value is the cosine of the angle of two vectors. */
1577 double est_vector_cosine(const int *avec, const int *bvec, int vnum);
1578 
1579 
1580 
1581 #if defined(__cplusplus)                 /* export for C++ */
1582 }
1583 #endif
1584 
1585 #endif                                   /* duplication check */
1586 
1587 
1588 /* END OF FILE */
1589