1 /*************************************************************************************************
2  * The core API of Tokyo Dystopia
3  *                                                               Copyright (C) 2007-2010 FAL Labs
4  * This file is part of Tokyo Dystopia.
5  * Tokyo Dystopia is free software; you can redistribute it and/or modify it under the terms of
6  * the GNU Lesser General Public License as published by the Free Software Foundation; either
7  * version 2.1 of the License or any later version.  Tokyo Dystopia is distributed in the hope
8  * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
10  * License for more details.
11  * You should have received a copy of the GNU Lesser General Public License along with Tokyo
12  * Dystopia; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
13  * Boston, MA 02111-1307 USA.
14  *************************************************************************************************/
15 
16 
17 #ifndef _DYSTOPIA_H                      /* duplication check */
18 #define _DYSTOPIA_H
19 
20 
21 #if defined(__cplusplus)
22 #define __DYSTOPIA_CLINKAGEBEGIN extern "C" {
23 #define __DYSTOPIA_CLINKAGEEND }
24 #else
25 #define __DYSTOPIA_CLINKAGEBEGIN
26 #define __DYSTOPIA_CLINKAGEEND
27 #endif
28 __DYSTOPIA_CLINKAGEBEGIN
29 
30 
31 #include <tcutil.h>
32 #include <tchdb.h>
33 #include <tcbdb.h>
34 #include <tcqdb.h>
35 
36 
37 
38 /*************************************************************************************************
39  * API
40  *************************************************************************************************/
41 
42 
43 #define IDBQDBMAX      32                /* maximum number of the internal databases */
44 
45 typedef struct {                         /* type of structure for an indexed database object */
46   void *mmtx;                            /* mutex for method */
47   char *path;                            /* path of the database directory */
48   bool wmode;                            /* whether to be writable */
49   uint8_t qopts;                         /* tuning options of q-gram databases */
50   int qomode;                            /* open mode of q-gram databases */
51   TCHDB *txdb;                           /* text database object */
52   TCQDB *idxs[IDBQDBMAX];                /* q-gram database objects */
53   uint8_t inum;                          /* number of the q-gram database objects */
54   uint8_t cnum;                          /* current number of the q-gram database */
55   uint32_t ernum;                        /* expected number of records */
56   uint32_t etnum;                        /* expected number of tokens */
57   uint64_t iusiz;                        /* unit size of each index file */
58   uint8_t opts;                          /* options */
59   bool (*synccb)(int, int, const char *, void *);  /* callback function for sync progression */
60   void *syncopq;                         /* opaque for the sync callback function */
61   uint8_t exopts;                        /* expert options */
62 } TCIDB;
63 
64 enum {                                   /* enumeration for tuning options */
65   IDBTLARGE = 1 << 0,                    /* use 64-bit bucket array */
66   IDBTDEFLATE = 1 << 1,                  /* compress each page with Deflate */
67   IDBTBZIP = 1 << 2,                     /* compress each record with BZIP2 */
68   IDBTTCBS = 1 << 3                      /* compress each page with TCBS */
69 };
70 
71 enum {                                   /* enumeration for open modes */
72   IDBOREADER = 1 << 0,                   /* open as a reader */
73   IDBOWRITER = 1 << 1,                   /* open as a writer */
74   IDBOCREAT = 1 << 2,                    /* writer creating */
75   IDBOTRUNC = 1 << 3,                    /* writer truncating */
76   IDBONOLCK = 1 << 4,                    /* open without locking */
77   IDBOLCKNB = 1 << 5                     /* lock without blocking */
78 };
79 
80 enum {                                   /* enumeration for get modes */
81   IDBSSUBSTR = QDBSSUBSTR,               /* substring matching */
82   IDBSPREFIX = QDBSPREFIX,               /* prefix matching */
83   IDBSSUFFIX = QDBSSUFFIX,               /* suffix matching */
84   IDBSFULL = QDBSFULL,                   /* full matching */
85   IDBSTOKEN,                             /* token matching */
86   IDBSTOKPRE,                            /* token prefix matching */
87   IDBSTOKSUF                             /* token suffix matching */
88 };
89 
90 
91 /* Get the message string corresponding to an error code.
92    `ecode' specifies the error code.
93    The return value is the message string of the error code. */
94 const char *tcidberrmsg(int ecode);
95 
96 
97 /* Create an indexed database object.
98    The return value is the new indexed database object. */
99 TCIDB *tcidbnew(void);
100 
101 
102 /* Delete an indexed database object.
103    `idb' specifies the indexed database object.
104    If the database is not closed, it is closed implicitly.  Note that the deleted object and its
105    derivatives can not be used anymore. */
106 void tcidbdel(TCIDB *idb);
107 
108 
109 /* Get the last happened error code of an indexed database object.
110    `idb' specifies the indexed database object.
111    The return value is the last happened error code.
112    The following error code is defined: `TCESUCCESS' for success, `TCETHREAD' for threading
113    error, `TCEINVALID' for invalid operation, `TCENOFILE' for file not found, `TCENOPERM' for no
114    permission, `TCEMETA' for invalid meta data, `TCERHEAD' for invalid record header, `TCEOPEN'
115    for open error, `TCECLOSE' for close error, `TCETRUNC' for trunc error, `TCESYNC' for sync
116    error, `TCESTAT' for stat error, `TCESEEK' for seek error, `TCEREAD' for read error,
117    `TCEWRITE' for write error, `TCEMMAP' for mmap error, `TCELOCK' for lock error, `TCEUNLINK'
118    for unlink error, `TCERENAME' for rename error, `TCEMKDIR' for mkdir error, `TCERMDIR' for
119    rmdir error, `TCEKEEP' for existing record, `TCENOREC' for no record found, and `TCEMISC' for
120    miscellaneous error. */
121 int tcidbecode(TCIDB *idb);
122 
123 
124 /* Set the tuning parameters of an indexed database object.
125    `idb' specifies the indexed database object which is not opened.
126    `ernum' specifies the expected number of records to be stored.  If it is not more than 0, the
127    default value is specified.  The default value is 1000000.
128    `etnum' specifies the expected number of tokens to be stored.  If it is not more than 0, the
129    default value is specified.  The default value is 1000000.
130    `iusiz' specifies the unit size of each index file.  If it is not more than 0, the default
131    value is specified.  The default value is 536870912.
132    `opts' specifies options by bitwise-or: `IDBTLARGE' specifies that the size of the database
133    can be larger than 2GB by using 64-bit bucket array, `IDBTDEFLATE' specifies that each page
134    is compressed with Deflate encoding, `IDBTBZIP' specifies that each page is compressed with
135    BZIP2 encoding, `IDBTTCBS' specifies that each page is compressed with TCBS encoding.
136    If successful, the return value is true, else, it is false.
137    Note that the tuning parameters should be set before the database is opened. */
138 bool tcidbtune(TCIDB *idb, int64_t ernum, int64_t etnum, int64_t iusiz, uint8_t opts);
139 
140 
141 /* Set the caching parameters of an indexed database object.
142    `idb' specifies the indexed database object which is not opened.
143    `icsiz' specifies the capacity size of the token cache.  If it is not more than 0, the default
144    value is specified.  The default value is 134217728.
145    `lcnum' specifies the maximum number of cached leaf nodes of B+ tree.  If it is not more than
146    0, the default value is specified.  The default value is 64 for writer or 1024 for reader.
147    If successful, the return value is true, else, it is false.
148    Note that the caching parameters should be set before the database is opened. */
149 bool tcidbsetcache(TCIDB *idb, int64_t icsiz, int32_t lcnum);
150 
151 
152 /* Set the maximum number of forward matching expansion of an indexed database object.
153    `idb' specifies the indexed database object.
154    `fwmmax' specifies the maximum number of forward matching expansion.
155    If successful, the return value is true, else, it is false.
156    Note that the matching parameters should be set before the database is opened. */
157 bool tcidbsetfwmmax(TCIDB *idb, uint32_t fwmmax);
158 
159 
160 /* Open an indexed database object.
161    `idb' specifies the indexed database object.
162    `path' specifies the path of the database directory.
163    `omode' specifies the connection mode: `IDBOWRITER' as a writer, `IDBOREADER' as a reader.
164    If the mode is `IDBOWRITER', the following may be added by bitwise-or: `IDBOCREAT', which
165    means it creates a new database if not exist, `IDBOTRUNC', which means it creates a new
166    database regardless if one exists.  Both of `IDBOREADER' and `IDBOWRITER' can be added to by
167    bitwise-or: `IDBONOLCK', which means it opens the database directory without file locking, or
168    `IDBOLCKNB', which means locking is performed without blocking.
169    If successful, the return value is true, else, it is false. */
170 bool tcidbopen(TCIDB *idb, const char *path, int omode);
171 
172 
173 /* Close an indexed database object.
174    `idb' specifies the indexed database object.
175    If successful, the return value is true, else, it is false.
176    Update of a database is assured to be written when the database is closed.  If a writer opens
177    a database but does not close it appropriately, the database will be broken. */
178 bool tcidbclose(TCIDB *idb);
179 
180 
181 /* Store a record into an indexed database object.
182    `idb' specifies the indexed database object connected as a writer.
183    `id' specifies the ID number of the record.  It should be positive.
184    `text' specifies the string of the record, whose encoding should be UTF-8.
185    If successful, the return value is true, else, it is false. */
186 bool tcidbput(TCIDB *idb, int64_t id, const char *text);
187 
188 
189 /* Remove a record of an indexed database object.
190    `idb' specifies the indexed database object connected as a writer.
191    `id' specifies the ID number of the record.  It should be positive.
192    If successful, the return value is true, else, it is false. */
193 bool tcidbout(TCIDB *idb, int64_t id);
194 
195 
196 /* Retrieve a record of an indexed database object.
197    `idb' specifies the indexed database object connected as a writer.
198    `id' specifies the ID number of the record.  It should be positive.
199    If successful, the return value is the string of the corresponding record, else, it is `NULL'.
200    Because the region of the return value is allocated with the `malloc' call, it should be
201    released with the `free' call when it is no longer in use. */
202 char *tcidbget(TCIDB *idb, int64_t id);
203 
204 
205 /* Search an indexed database.
206    `idb' specifies the indexed database object.
207    `word' specifies the string of the word to be matched to.
208    `smode' specifies the matching mode: `IDBSSUBSTR' as substring matching, `IDBSPREFIX' as prefix
209    matching, `IDBSSUFFIX' as suffix matching, `IDBSFULL' as full matching, `IDBSTOKEN' as token
210    matching, `IDBSTOKPRE' as token prefix matching, or `IDBSTOKSUF' as token suffix matching.
211    `np' specifies the pointer to the variable into which the number of elements of the return
212    value is assigned.
213    If successful, the return value is the pointer to an array of ID numbers of the corresponding
214    records.  `NULL' is returned on failure.
215    Because the region of the return value is allocated with the `malloc' call, it should be
216    released with the `free' call when it is no longer in use. */
217 uint64_t *tcidbsearch(TCIDB *idb, const char *word, int smode, int *np);
218 
219 
220 /* Search an indexed database with a compound expression.
221    `idb' specifies the indexed database object.
222    `expr' specifies the string of the compound expression.
223    `np' specifies the pointer to the variable into which the number of elements of the return
224    value is assigned.
225    If successful, the return value is the pointer to an array of ID numbers of the corresponding
226    records.  `NULL' is returned on failure.
227    Because the region of the return value is allocated with the `malloc' call, it should be
228    released with the `free' call when it is no longer in use. */
229 uint64_t *tcidbsearch2(TCIDB *idb, const char *expr, int *np);
230 
231 
232 /* Initialize the iterator of an indexed database object.
233    `idb' specifies the indexed database object.
234    If successful, the return value is true, else, it is false.
235    The iterator is used in order to access the ID number of every record stored in a database. */
236 bool tcidbiterinit(TCIDB *idb);
237 
238 
239 /* Get the next ID number of the iterator of an indexed database object.
240    `idb' specifies the indexed database object.
241    If successful, the return value is the ID number of the next record, else, it is 0.  0 is
242    returned when no record is to be get out of the iterator.
243    It is possible to access every record by iteration of calling this function.  It is allowed to
244    update or remove records whose keys are fetched while the iteration.  However, it is not
245    assured if updating the database is occurred while the iteration.  Besides, the order of this
246    traversal access method is arbitrary, so it is not assured that the order of storing matches
247    the one of the traversal access. */
248 uint64_t tcidbiternext(TCIDB *idb);
249 
250 
251 /* Synchronize updated contents of an indexed database object with the files and the device.
252    `idb' specifies the indexed database object connected as a writer.
253    If successful, the return value is true, else, it is false.
254    This function is useful when another process connects the same database directory. */
255 bool tcidbsync(TCIDB *idb);
256 
257 
258 /* Optimize the files of an indexed database object.
259    `idb' specifies the indexed database object connected as a writer.
260    If successful, the return value is true, else, it is false.
261    This function is useful to reduce the size of the database files with data fragmentation by
262    successive updating. */
263 bool tcidboptimize(TCIDB *idb);
264 
265 
266 /* Remove all records of an indexed database object.
267    `idb' specifies the indexed database object connected as a writer.
268    If successful, the return value is true, else, it is false. */
269 bool tcidbvanish(TCIDB *idb);
270 
271 
272 /* Copy the database directory of an indexed database object.
273    `idb' specifies the indexed database object.
274    `path' specifies the path of the destination directory.  If it begins with `@', the trailing
275    substring is executed as a command line.
276    If successful, the return value is true, else, it is false.  False is returned if the executed
277    command returns non-zero code.
278    The database directory is assured to be kept synchronized and not modified while the copying or
279    executing operation is in progress.  So, this function is useful to create a backup directory
280    of the database directory. */
281 bool tcidbcopy(TCIDB *idb, const char *path);
282 
283 
284 /* Get the directory path of an indexed database object.
285    `idb' specifies the indexed database object.
286    The return value is the path of the database directory or `NULL' if the object does not
287    connect to any database directory. */
288 const char *tcidbpath(TCIDB *idb);
289 
290 
291 /* Get the number of records of an indexed database object.
292    `idb' specifies the indexed database object.
293    The return value is the number of records or 0 if the object does not connect to any database
294    directory. */
295 uint64_t tcidbrnum(TCIDB *idb);
296 
297 
298 /* Get the total size of the database files of an indexed database object.
299    `idb' specifies the indexed database object.
300    The return value is the size of the database files or 0 if the object does not connect to any
301    database directory. */
302 uint64_t tcidbfsiz(TCIDB *idb);
303 
304 
305 
306 /*************************************************************************************************
307  * features for experts
308  *************************************************************************************************/
309 
310 
311 enum {                                   /* enumeration for expert options */
312   IDBXNOTXT = 1 << 0                     /* no text mode */
313 };
314 
315 
316 /* Set the file descriptor for debugging output.
317    `idb' specifies the indexed database object.
318    `fd' specifies the file descriptor for debugging output. */
319 void tcidbsetdbgfd(TCIDB *idb, int fd);
320 
321 
322 /* Get the file descriptor for debugging output.
323    `idb' specifies the indexed database object.
324    The return value is the file descriptor for debugging output. */
325 int tcidbdbgfd(TCIDB *idb);
326 
327 
328 /* Synchronize updating contents on memory of an indexed database object.
329    `idb' specifies the indexed database object.
330    `level' specifies the synchronization lavel; 0 means cache synchronization, 1 means database
331    synchronization, and 2 means file synchronization.
332    If successful, the return value is true, else, it is false. */
333 bool tcidbmemsync(TCIDB *idb, int level);
334 
335 
336 /* Get the inode number of the database directory of an indexed database object.
337    `idb' specifies the indexed database object.
338    The return value is the inode number of the database directory or 0 the object does not
339    connect to any database directory. */
340 uint64_t tcidbinode(TCIDB *idb);
341 
342 
343 /* Get the modification time of the database directory of an indexed database object.
344    `idb' specifies the indexed database object.
345    The return value is the inode number of the database directory or 0 the object does not
346    connect to any database directory. */
347 time_t tcidbmtime(TCIDB *idb);
348 
349 
350 /* Get the options of an indexed database object.
351    `idb' specifies the indexed database object.
352    The return value is the options. */
353 uint8_t tcidbopts(TCIDB *idb);
354 
355 
356 /* Set the callback function for sync progression of an indexed database object.
357    `idb' specifies the indexed database object.
358    `cb' specifies the pointer to the callback function for sync progression.  Its first argument
359    specifies the number of tokens to be synchronized.  Its second argument specifies the number
360    of processed tokens.  Its third argument specifies the message string.  The fourth argument
361    specifies an arbitrary pointer.  Its return value should be true usually, or false if the sync
362    operation should be terminated.
363    `opq' specifies the arbitrary pointer to be given to the callback function. */
364 void tcidbsetsynccb(TCIDB *idb, bool (*cb)(int, int, const char *, void *), void *opq);
365 
366 
367 /* Set the expert options of an indexed database object.
368    `idb' specifies the indexed database object.
369    `exopts' specifies options by bitwise-or: `IDBXNOTXT' specifies that the text database does
370    not record any record. */
371 void tcidbsetexopts(TCIDB *idb, uint32_t exopts);
372 
373 
374 
375 __DYSTOPIA_CLINKAGEEND
376 #endif                                   /* duplication check */
377 
378 
379 /* END OF FILE */
380