1 /************************************************************************************************* 2 * The core API of Tokyo Dystopia 3 * Copyright (C) 2007-2010 FAL Labs 4 * This file is part of Tokyo Dystopia. 5 * Tokyo Dystopia is free software; you can redistribute it and/or modify it under the terms of 6 * the GNU Lesser General Public License as published by the Free Software Foundation; either 7 * version 2.1 of the License or any later version. Tokyo Dystopia is distributed in the hope 8 * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 10 * License for more details. 11 * You should have received a copy of the GNU Lesser General Public License along with Tokyo 12 * Dystopia; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, 13 * Boston, MA 02111-1307 USA. 14 *************************************************************************************************/ 15 16 17 #ifndef _DYSTOPIA_H /* duplication check */ 18 #define _DYSTOPIA_H 19 20 21 #if defined(__cplusplus) 22 #define __DYSTOPIA_CLINKAGEBEGIN extern "C" { 23 #define __DYSTOPIA_CLINKAGEEND } 24 #else 25 #define __DYSTOPIA_CLINKAGEBEGIN 26 #define __DYSTOPIA_CLINKAGEEND 27 #endif 28 __DYSTOPIA_CLINKAGEBEGIN 29 30 31 #include <tcutil.h> 32 #include <tchdb.h> 33 #include <tcbdb.h> 34 #include <tcqdb.h> 35 36 37 38 /************************************************************************************************* 39 * API 40 *************************************************************************************************/ 41 42 43 #define IDBQDBMAX 32 /* maximum number of the internal databases */ 44 45 typedef struct { /* type of structure for an indexed database object */ 46 void *mmtx; /* mutex for method */ 47 char *path; /* path of the database directory */ 48 bool wmode; /* whether to be writable */ 49 uint8_t qopts; /* tuning options of q-gram databases */ 50 int qomode; /* open mode of q-gram databases */ 51 TCHDB *txdb; /* text database object */ 52 TCQDB *idxs[IDBQDBMAX]; /* q-gram database objects */ 53 uint8_t inum; /* number of the q-gram database objects */ 54 uint8_t cnum; /* current number of the q-gram database */ 55 uint32_t ernum; /* expected number of records */ 56 uint32_t etnum; /* expected number of tokens */ 57 uint64_t iusiz; /* unit size of each index file */ 58 uint8_t opts; /* options */ 59 bool (*synccb)(int, int, const char *, void *); /* callback function for sync progression */ 60 void *syncopq; /* opaque for the sync callback function */ 61 uint8_t exopts; /* expert options */ 62 } TCIDB; 63 64 enum { /* enumeration for tuning options */ 65 IDBTLARGE = 1 << 0, /* use 64-bit bucket array */ 66 IDBTDEFLATE = 1 << 1, /* compress each page with Deflate */ 67 IDBTBZIP = 1 << 2, /* compress each record with BZIP2 */ 68 IDBTTCBS = 1 << 3 /* compress each page with TCBS */ 69 }; 70 71 enum { /* enumeration for open modes */ 72 IDBOREADER = 1 << 0, /* open as a reader */ 73 IDBOWRITER = 1 << 1, /* open as a writer */ 74 IDBOCREAT = 1 << 2, /* writer creating */ 75 IDBOTRUNC = 1 << 3, /* writer truncating */ 76 IDBONOLCK = 1 << 4, /* open without locking */ 77 IDBOLCKNB = 1 << 5 /* lock without blocking */ 78 }; 79 80 enum { /* enumeration for get modes */ 81 IDBSSUBSTR = QDBSSUBSTR, /* substring matching */ 82 IDBSPREFIX = QDBSPREFIX, /* prefix matching */ 83 IDBSSUFFIX = QDBSSUFFIX, /* suffix matching */ 84 IDBSFULL = QDBSFULL, /* full matching */ 85 IDBSTOKEN, /* token matching */ 86 IDBSTOKPRE, /* token prefix matching */ 87 IDBSTOKSUF /* token suffix matching */ 88 }; 89 90 91 /* Get the message string corresponding to an error code. 92 `ecode' specifies the error code. 93 The return value is the message string of the error code. */ 94 const char *tcidberrmsg(int ecode); 95 96 97 /* Create an indexed database object. 98 The return value is the new indexed database object. */ 99 TCIDB *tcidbnew(void); 100 101 102 /* Delete an indexed database object. 103 `idb' specifies the indexed database object. 104 If the database is not closed, it is closed implicitly. Note that the deleted object and its 105 derivatives can not be used anymore. */ 106 void tcidbdel(TCIDB *idb); 107 108 109 /* Get the last happened error code of an indexed database object. 110 `idb' specifies the indexed database object. 111 The return value is the last happened error code. 112 The following error code is defined: `TCESUCCESS' for success, `TCETHREAD' for threading 113 error, `TCEINVALID' for invalid operation, `TCENOFILE' for file not found, `TCENOPERM' for no 114 permission, `TCEMETA' for invalid meta data, `TCERHEAD' for invalid record header, `TCEOPEN' 115 for open error, `TCECLOSE' for close error, `TCETRUNC' for trunc error, `TCESYNC' for sync 116 error, `TCESTAT' for stat error, `TCESEEK' for seek error, `TCEREAD' for read error, 117 `TCEWRITE' for write error, `TCEMMAP' for mmap error, `TCELOCK' for lock error, `TCEUNLINK' 118 for unlink error, `TCERENAME' for rename error, `TCEMKDIR' for mkdir error, `TCERMDIR' for 119 rmdir error, `TCEKEEP' for existing record, `TCENOREC' for no record found, and `TCEMISC' for 120 miscellaneous error. */ 121 int tcidbecode(TCIDB *idb); 122 123 124 /* Set the tuning parameters of an indexed database object. 125 `idb' specifies the indexed database object which is not opened. 126 `ernum' specifies the expected number of records to be stored. If it is not more than 0, the 127 default value is specified. The default value is 1000000. 128 `etnum' specifies the expected number of tokens to be stored. If it is not more than 0, the 129 default value is specified. The default value is 1000000. 130 `iusiz' specifies the unit size of each index file. If it is not more than 0, the default 131 value is specified. The default value is 536870912. 132 `opts' specifies options by bitwise-or: `IDBTLARGE' specifies that the size of the database 133 can be larger than 2GB by using 64-bit bucket array, `IDBTDEFLATE' specifies that each page 134 is compressed with Deflate encoding, `IDBTBZIP' specifies that each page is compressed with 135 BZIP2 encoding, `IDBTTCBS' specifies that each page is compressed with TCBS encoding. 136 If successful, the return value is true, else, it is false. 137 Note that the tuning parameters should be set before the database is opened. */ 138 bool tcidbtune(TCIDB *idb, int64_t ernum, int64_t etnum, int64_t iusiz, uint8_t opts); 139 140 141 /* Set the caching parameters of an indexed database object. 142 `idb' specifies the indexed database object which is not opened. 143 `icsiz' specifies the capacity size of the token cache. If it is not more than 0, the default 144 value is specified. The default value is 134217728. 145 `lcnum' specifies the maximum number of cached leaf nodes of B+ tree. If it is not more than 146 0, the default value is specified. The default value is 64 for writer or 1024 for reader. 147 If successful, the return value is true, else, it is false. 148 Note that the caching parameters should be set before the database is opened. */ 149 bool tcidbsetcache(TCIDB *idb, int64_t icsiz, int32_t lcnum); 150 151 152 /* Set the maximum number of forward matching expansion of an indexed database object. 153 `idb' specifies the indexed database object. 154 `fwmmax' specifies the maximum number of forward matching expansion. 155 If successful, the return value is true, else, it is false. 156 Note that the matching parameters should be set before the database is opened. */ 157 bool tcidbsetfwmmax(TCIDB *idb, uint32_t fwmmax); 158 159 160 /* Open an indexed database object. 161 `idb' specifies the indexed database object. 162 `path' specifies the path of the database directory. 163 `omode' specifies the connection mode: `IDBOWRITER' as a writer, `IDBOREADER' as a reader. 164 If the mode is `IDBOWRITER', the following may be added by bitwise-or: `IDBOCREAT', which 165 means it creates a new database if not exist, `IDBOTRUNC', which means it creates a new 166 database regardless if one exists. Both of `IDBOREADER' and `IDBOWRITER' can be added to by 167 bitwise-or: `IDBONOLCK', which means it opens the database directory without file locking, or 168 `IDBOLCKNB', which means locking is performed without blocking. 169 If successful, the return value is true, else, it is false. */ 170 bool tcidbopen(TCIDB *idb, const char *path, int omode); 171 172 173 /* Close an indexed database object. 174 `idb' specifies the indexed database object. 175 If successful, the return value is true, else, it is false. 176 Update of a database is assured to be written when the database is closed. If a writer opens 177 a database but does not close it appropriately, the database will be broken. */ 178 bool tcidbclose(TCIDB *idb); 179 180 181 /* Store a record into an indexed database object. 182 `idb' specifies the indexed database object connected as a writer. 183 `id' specifies the ID number of the record. It should be positive. 184 `text' specifies the string of the record, whose encoding should be UTF-8. 185 If successful, the return value is true, else, it is false. */ 186 bool tcidbput(TCIDB *idb, int64_t id, const char *text); 187 188 189 /* Remove a record of an indexed database object. 190 `idb' specifies the indexed database object connected as a writer. 191 `id' specifies the ID number of the record. It should be positive. 192 If successful, the return value is true, else, it is false. */ 193 bool tcidbout(TCIDB *idb, int64_t id); 194 195 196 /* Retrieve a record of an indexed database object. 197 `idb' specifies the indexed database object connected as a writer. 198 `id' specifies the ID number of the record. It should be positive. 199 If successful, the return value is the string of the corresponding record, else, it is `NULL'. 200 Because the region of the return value is allocated with the `malloc' call, it should be 201 released with the `free' call when it is no longer in use. */ 202 char *tcidbget(TCIDB *idb, int64_t id); 203 204 205 /* Search an indexed database. 206 `idb' specifies the indexed database object. 207 `word' specifies the string of the word to be matched to. 208 `smode' specifies the matching mode: `IDBSSUBSTR' as substring matching, `IDBSPREFIX' as prefix 209 matching, `IDBSSUFFIX' as suffix matching, `IDBSFULL' as full matching, `IDBSTOKEN' as token 210 matching, `IDBSTOKPRE' as token prefix matching, or `IDBSTOKSUF' as token suffix matching. 211 `np' specifies the pointer to the variable into which the number of elements of the return 212 value is assigned. 213 If successful, the return value is the pointer to an array of ID numbers of the corresponding 214 records. `NULL' is returned on failure. 215 Because the region of the return value is allocated with the `malloc' call, it should be 216 released with the `free' call when it is no longer in use. */ 217 uint64_t *tcidbsearch(TCIDB *idb, const char *word, int smode, int *np); 218 219 220 /* Search an indexed database with a compound expression. 221 `idb' specifies the indexed database object. 222 `expr' specifies the string of the compound expression. 223 `np' specifies the pointer to the variable into which the number of elements of the return 224 value is assigned. 225 If successful, the return value is the pointer to an array of ID numbers of the corresponding 226 records. `NULL' is returned on failure. 227 Because the region of the return value is allocated with the `malloc' call, it should be 228 released with the `free' call when it is no longer in use. */ 229 uint64_t *tcidbsearch2(TCIDB *idb, const char *expr, int *np); 230 231 232 /* Initialize the iterator of an indexed database object. 233 `idb' specifies the indexed database object. 234 If successful, the return value is true, else, it is false. 235 The iterator is used in order to access the ID number of every record stored in a database. */ 236 bool tcidbiterinit(TCIDB *idb); 237 238 239 /* Get the next ID number of the iterator of an indexed database object. 240 `idb' specifies the indexed database object. 241 If successful, the return value is the ID number of the next record, else, it is 0. 0 is 242 returned when no record is to be get out of the iterator. 243 It is possible to access every record by iteration of calling this function. It is allowed to 244 update or remove records whose keys are fetched while the iteration. However, it is not 245 assured if updating the database is occurred while the iteration. Besides, the order of this 246 traversal access method is arbitrary, so it is not assured that the order of storing matches 247 the one of the traversal access. */ 248 uint64_t tcidbiternext(TCIDB *idb); 249 250 251 /* Synchronize updated contents of an indexed database object with the files and the device. 252 `idb' specifies the indexed database object connected as a writer. 253 If successful, the return value is true, else, it is false. 254 This function is useful when another process connects the same database directory. */ 255 bool tcidbsync(TCIDB *idb); 256 257 258 /* Optimize the files of an indexed database object. 259 `idb' specifies the indexed database object connected as a writer. 260 If successful, the return value is true, else, it is false. 261 This function is useful to reduce the size of the database files with data fragmentation by 262 successive updating. */ 263 bool tcidboptimize(TCIDB *idb); 264 265 266 /* Remove all records of an indexed database object. 267 `idb' specifies the indexed database object connected as a writer. 268 If successful, the return value is true, else, it is false. */ 269 bool tcidbvanish(TCIDB *idb); 270 271 272 /* Copy the database directory of an indexed database object. 273 `idb' specifies the indexed database object. 274 `path' specifies the path of the destination directory. If it begins with `@', the trailing 275 substring is executed as a command line. 276 If successful, the return value is true, else, it is false. False is returned if the executed 277 command returns non-zero code. 278 The database directory is assured to be kept synchronized and not modified while the copying or 279 executing operation is in progress. So, this function is useful to create a backup directory 280 of the database directory. */ 281 bool tcidbcopy(TCIDB *idb, const char *path); 282 283 284 /* Get the directory path of an indexed database object. 285 `idb' specifies the indexed database object. 286 The return value is the path of the database directory or `NULL' if the object does not 287 connect to any database directory. */ 288 const char *tcidbpath(TCIDB *idb); 289 290 291 /* Get the number of records of an indexed database object. 292 `idb' specifies the indexed database object. 293 The return value is the number of records or 0 if the object does not connect to any database 294 directory. */ 295 uint64_t tcidbrnum(TCIDB *idb); 296 297 298 /* Get the total size of the database files of an indexed database object. 299 `idb' specifies the indexed database object. 300 The return value is the size of the database files or 0 if the object does not connect to any 301 database directory. */ 302 uint64_t tcidbfsiz(TCIDB *idb); 303 304 305 306 /************************************************************************************************* 307 * features for experts 308 *************************************************************************************************/ 309 310 311 enum { /* enumeration for expert options */ 312 IDBXNOTXT = 1 << 0 /* no text mode */ 313 }; 314 315 316 /* Set the file descriptor for debugging output. 317 `idb' specifies the indexed database object. 318 `fd' specifies the file descriptor for debugging output. */ 319 void tcidbsetdbgfd(TCIDB *idb, int fd); 320 321 322 /* Get the file descriptor for debugging output. 323 `idb' specifies the indexed database object. 324 The return value is the file descriptor for debugging output. */ 325 int tcidbdbgfd(TCIDB *idb); 326 327 328 /* Synchronize updating contents on memory of an indexed database object. 329 `idb' specifies the indexed database object. 330 `level' specifies the synchronization lavel; 0 means cache synchronization, 1 means database 331 synchronization, and 2 means file synchronization. 332 If successful, the return value is true, else, it is false. */ 333 bool tcidbmemsync(TCIDB *idb, int level); 334 335 336 /* Get the inode number of the database directory of an indexed database object. 337 `idb' specifies the indexed database object. 338 The return value is the inode number of the database directory or 0 the object does not 339 connect to any database directory. */ 340 uint64_t tcidbinode(TCIDB *idb); 341 342 343 /* Get the modification time of the database directory of an indexed database object. 344 `idb' specifies the indexed database object. 345 The return value is the inode number of the database directory or 0 the object does not 346 connect to any database directory. */ 347 time_t tcidbmtime(TCIDB *idb); 348 349 350 /* Get the options of an indexed database object. 351 `idb' specifies the indexed database object. 352 The return value is the options. */ 353 uint8_t tcidbopts(TCIDB *idb); 354 355 356 /* Set the callback function for sync progression of an indexed database object. 357 `idb' specifies the indexed database object. 358 `cb' specifies the pointer to the callback function for sync progression. Its first argument 359 specifies the number of tokens to be synchronized. Its second argument specifies the number 360 of processed tokens. Its third argument specifies the message string. The fourth argument 361 specifies an arbitrary pointer. Its return value should be true usually, or false if the sync 362 operation should be terminated. 363 `opq' specifies the arbitrary pointer to be given to the callback function. */ 364 void tcidbsetsynccb(TCIDB *idb, bool (*cb)(int, int, const char *, void *), void *opq); 365 366 367 /* Set the expert options of an indexed database object. 368 `idb' specifies the indexed database object. 369 `exopts' specifies options by bitwise-or: `IDBXNOTXT' specifies that the text database does 370 not record any record. */ 371 void tcidbsetexopts(TCIDB *idb, uint32_t exopts); 372 373 374 375 __DYSTOPIA_CLINKAGEEND 376 #endif /* duplication check */ 377 378 379 /* END OF FILE */ 380