1 /*************************************************************************************************
2  * Common modules related to estmaster
3  *                                                      Copyright (C) 2004-2007 Mikio Hirabayashi
4  * This file is part of Hyper Estraier.
5  * Hyper Estraier is free software; you can redistribute it and/or modify it under the terms of
6  * the GNU Lesser General Public License as published by the Free Software Foundation; either
7  * version 2.1 of the License or any later version.  Hyper Estraier is distributed in the hope
8  * that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
10  * License for more details.
11  * You should have received a copy of the GNU Lesser General Public License along with Hyper
12  * Estraier; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
13  * Boston, MA 02111-1307 USA.
14  *************************************************************************************************/
15 
16 
17 #ifndef _MASTERMOD_H                     /* duplication check */
18 #define _MASTERMOD_H
19 
20 #include "estraier.h"
21 #include "estmtdb.h"
22 #include "estnode.h"
23 #include "myconf.h"
24 #include "mymorph.h"
25 
26 
27 
28 /*************************************************************************************************
29  * pseudo API
30  *************************************************************************************************/
31 
32 
33 #define NUMBUFSIZ      32                /* size of a buffer for a number */
34 #define URIBUFSIZ      8192              /* size of a buffer for an URI */
35 #define IOBUFSIZ       8192              /* size of a buffer for I/O */
36 #define HOSTBUFSIZ     256               /* size of a buffer for a host name */
37 #define ADDRBUFSIZ     48                /* size of a buffer for an address */
38 #define NODENAMEMAX    128               /* maximum length of node name  */
39 #define MINIBNUM       31                /* bucket number of a small map */
40 #define KWORDNUM       32                /* number of shown keywords */
41 #define CONDATTRMAX    9                 /* maximum number of attribute conditions */
42 #define DEFMAXSRCH     10                /* default max number of retrieved documents */
43 #define SELFCREDIT     10000             /* credit of the node itself */
44 
45 #define CONFFILE       "_conf"           /* name of the configuration file */
46 #define USERFILE       "_user"           /* name of the user list file */
47 #define LOGFILE        "_log"            /* name of the log file */
48 #define METAFILE       "_meta"           /* name of the meta database */
49 #define PIDFILE        "_pid"            /* name of the process ID file */
50 #define STOPFILE       "_stop"           /* name of the process ID file */
51 #define DFDBFILE       "_dfdb"           /* name of the document frequency database */
52 #define NODEDIR        "_node"           /* name of the node directory */
53 #define SESSDIR        "_sess"           /* name of the session directory */
54 #define NULLDEV        "/dev/null"       /* path of null device */
55 
56 #define INFORMHELPER   "estbutler inform"    /* command name of the inform helper */
57 #define SEARCHHELPER   "estbutler search"    /* command name of the search helper */
58 #define GETDOCHELPER   "estbutler getdoc"    /* command name of the get_doc helper */
59 #define GETDOCATTRHELPER  "estbutler getdocattr"  /* command name of the get_doc helper */
60 #define ETCHDOCHELPER  "estbutler etchdoc"   /* command name of the etch_doc helper */
61 #define URITOIDHELPER  "estbutler uritoid"   /* command name of the uri_to_id helper */
62 
63 #define MMKMAGIC       "magic"           /* meta key of the magic number of meta DB */
64 #define MMKMAGVAL      "[ESTMASTER]"     /* value of the magic number of meta DB */
65 #define NMKNAME        "name"            /* meta key of the name of node DB */
66 #define NMKLABEL       "label"           /* meta key of the label of node DB */
67 #define NMKADMINS      "admins"          /* meta key of the administrators of node DB */
68 #define NMKUSERS       "users"           /* meta key of the users of node DB */
69 #define NMKLINKS       "links"           /* meta key of the links of node DB */
70 
71 #define DATTRNDURL     "#nodeurl"        /* name of the pseudo-attribute of the node URL */
72 #define DATTRNDLABEL   "#nodelabel"      /* name of the pseudo-attribute of the node label */
73 #define DATTRNDSCORE   "#nodescore"      /* name of the pseudo-attribute of the node score */
74 #define DATTRLFILE     "_lfile"          /* name of the attribute of the file name */
75 
76 enum {                                   /* enumeration for running modes */
77   RM_NORMAL = 1,                         /* normal */
78   RM_RDONLY = 2                          /* read only */
79 };
80 
81 enum {                                   /* enumeration for running modes */
82   AM_NONE = 1,                           /* none */
83   AM_ADMIN = 2,                          /* admin */
84   AM_ALL = 3                             /* all */
85 };
86 
87 enum {                                   /* enumeration for running modes */
88   LL_DEBUG = 1,                          /* debug */
89   LL_INFO = 2,                           /* information */
90   LL_WARN = 3,                           /* warning */
91   LL_ERROR = 4,                          /* error */
92   LL_NONE = 5,                           /* none */
93   LL_CHECK = 6                           /* check to open */
94 };
95 
96 enum {                                   /* enumeration for scale prediction */
97   SP_SMALL = 1,                          /* small */
98   SP_MEDIUM = 2,                         /* medium */
99   SP_LARGE = 3,                          /* large */
100   SP_HUGE = 4                            /* huge */
101 };
102 
103 enum {                                   /* enumeration for scoring expression modes */
104   SE_VOID = 1,                           /* void */
105   SE_CHAR = 2,                           /* char */
106   SE_INT = 3,                            /* int */
107   SE_ASIS = 4                            /* asis */
108 };
109 
110 enum {                                   /* enumeration for UI operations */
111   MM_SCORE = 1,                          /* score */
112   MM_SCRK = 2,                           /* score and rank */
113   MM_RANK = 3                            /* rank */
114 };
115 
116 enum {                                   /* enumeration for phrase mode */
117   PM_USUAL = 1,                          /* usual phrase */
118   PM_SIMPLE = 2,                         /* simplified phrase */
119   PM_ROUGH = 3,                          /* rough phrase */
120   PM_UNION = 4,                          /* union phrase */
121   PM_ISECT = 5                           /* intersection phrase */
122 };
123 
124 typedef struct {                         /* type of structure for a user object */
125   char *name;                            /* unique name */
126   char *passwd;                          /* encrypted password */
127   char *flags;                           /* flags */
128   char *fname;                           /* full name */
129   char *misc;                            /* miscellaneous */
130   time_t atime;                          /* last access time */
131   CBMAP *sess;                           /* session variables */
132   pthread_mutex_t mutex;                 /* mutex per user */
133 } USER;
134 
135 typedef struct {                         /* type of structure for a user manager object */
136   char *rootdir;                         /* path of the root directory */
137   CBMAP *users;                          /* map of names and entities */
138 } UMGR;
139 
140 typedef struct {                         /* type of structure for a node object */
141   ESTMTDB *db;                           /* index database */
142   char *name;                            /* unique name of the index */
143   char *label;                           /* label for display */
144   CBMAP *admins;                         /* set of ID numbers of administrators */
145   CBMAP *users;                          /* set of ID numbers of users */
146   CBMAP *links;                          /* list of expressions of links */
147   time_t mtime;                          /* modification date */
148   int dirty;                             /* whether to have dirty cache */
149   pthread_mutex_t mutex;                 /* mutex per node */
150 } NODE;
151 
152 typedef struct {                         /* type of structure for a node manager object */
153   char *rootdir;                         /* path of the root directory */
154   CBMAP *nodes;                          /* map of names and entities */
155   CBMAP *aidxs;                          /* map of attribute indexes */
156 } NMGR;
157 
158 typedef struct {                         /* type of structure for a read/write lock object */
159   int readers;                           /* number of readers */
160   int writers;                           /* number of writers */
161   pthread_mutex_t mutex;                 /* mutex per lock */
162   pthread_cond_t cond;                   /* conditional variable */
163 } RWLOCK;
164 
165 typedef struct {                         /* type of structure for a document in result */
166   int score;                             /* score */
167   ESTDOC *doc;                           /* document object */
168   CBMAP *attrs;                          /* map object for attributes */
169   char *body;                            /* body data */
170   const char *value;                     /* value of an attribute for sorting */
171 } RESDOC;
172 
173 typedef struct {                         /* type of structure for documents in result */
174   CBMAP *uris;                           /* map of URIs and entities */
175   pthread_mutex_t mutex;                 /* mutex per result */
176 } RESMAP;
177 
178 
179 /* The handles of the log file. */
180 extern FILE *log_fp;
181 
182 
183 /* The level of logging. */
184 extern int log_level;
185 
186 
187 /* Open the log file.
188    `rootdir' specifies the path of the root directory.
189    `path' specifies the path of the log file.
190    `level' specifies the leve of logging.
191    `trunc' specifies whether to truncate the log file.
192    The return value is true if success, else it is false. */
193 int log_open(const char *rootdir, const char *path, int level, int trunc);
194 
195 
196 /* Print formatted string into the log file. */
197 void log_print(int level, const char *format, ...);
198 
199 
200 /* Rotete the log file.
201    `rootdir' specifies the path of the root directory.
202    `path' specifies the path of the log file.
203    The return value is true if success, else it is false. */
204 int log_rotate(const char *rootdir, const char *path);
205 
206 
207 /* Initialize the root directory.
208    `rootdir' specifies the path of the root directory.
209    The return value is true if success, else it is false. */
210 int master_init(const char *rootdir);
211 
212 
213 /* Get the PID of the process locking the root directory.
214    `rootdir' specifies the path of the root directory.
215    The return value is the PID of the process. */
216 int lockerpid(const char *rootdir);
217 
218 
219 /* Check whether a name includes alpha numeric characters only.
220    `name' specifies a name.
221    The return value is true if so or false if not. */
222 int check_alnum_name(const char *name);
223 
224 
225 /* Create a user manager object.
226    `rootdir' specifies the path of the root directory.
227    The return value is a user manager object. */
228 UMGR *umgr_new(const char *rootdir);
229 
230 
231 /* Destroy a user manager object.
232    `umgr' specifies a user manager object.
233    The return value is true if success, else it is false. */
234 int umgr_delete(UMGR *umgr);
235 
236 
237 /* Load all users from the user file.
238    `umgr' specifies a user manager object.
239    The return value is true if success, else it is false. */
240 int umgr_load(UMGR *umgr);
241 
242 
243 /* Synchronize all users into the user file.
244    `umgr' specifies a user manager object.
245    The return value is true if success, else it is false. */
246 int umgr_sync(UMGR *umgr);
247 
248 
249 /* Add a user to a user manager object.
250    `umgr' specifies a user manager object.
251    `name' specifies the unique name of a user.
252    `passwd' specifies the encrypted password of the user.
253    `flags' specifies the flags of the user.
254    `fname' specifies the full name of the user.
255    `misc' specifies the miscellaneous information of the user.
256    The return value is true if success, else it is false. */
257 int umgr_put(UMGR *umgr, const char *name, const char *passwd, const char *flags,
258              const char *fname, const char *misc);
259 
260 
261 /* Remove a user from a user manager object.
262    `umgr' specifies a user manager object.
263    `name' specifies the unique name of a user.
264    The return value is true if success, else it is false. */
265 int umgr_out(UMGR *umgr, const char *name);
266 
267 
268 /* Get a list of names of users in a user manager object.
269    `umgr' specifies a user manager object.
270    The return value is a list object of names of users in a user manager object.
271    The returned object should be destroyed with `cblistclose'. */
272 CBLIST *umgr_names(UMGR *umgr);
273 
274 
275 /* Get a user object in a user manager object.
276    `umgr' specifies a user manager object.
277    `name' specifies the unique name of a user.
278    The return value is a user object or `NULL' on failure. */
279 USER *umgr_get(UMGR *umgr, const char *name);
280 
281 
282 /* Make the session of a user object.
283    `user' specifies a user object. */
284 void user_make_sess(USER *user);
285 
286 
287 /* Clear the session of a user object.
288    `user' specifies a user object. */
289 void user_clear_sess(USER *user);
290 
291 
292 /* Set a session variable of a user object.
293    `user' specifies a user object.
294    `name' specifies the name of a variable.
295    `value' specifies the value of the variable.  If it is `NULL', the variable is deleted. */
296 void user_set_sess_val(USER *user, const char *name, const char *value);
297 
298 
299 /* Get the value of a session variable of a user object.
300    `user' specifies a user object.
301    `name' specifies the name of a variable.
302    The return value is the value of the session variable or `NULL' if it does not exist.
303    Because the region of the return value is  allocated with the `malloc' call, it should be
304    released with the `free' call if it is no longer in use. */
305 char *user_sess_val(USER *user, const char *name);
306 
307 
308 /* Create a node manager object.
309    `rootdir' specifies the path of the root directory.
310    The return value is a node manager object. */
311 NMGR *nmgr_new(const char *rootdir);
312 
313 
314 /* Destroy a node manager object.
315    `nmgr' specifies a node manager object.
316    The return value is true if success, else it is false. */
317 int nmgr_delete(NMGR *nmgr);
318 
319 
320 /* Load all nodes from the node directory.
321    `nmgr' specifies a node manager object.
322    `wmode' specifies whether the node is a writer or a reader.
323    The return value is true if success, else it is false. */
324 int nmgr_load(NMGR *nmgr, int wmode);
325 
326 
327 /* Synchronize all nodes into the node directory.
328    `nmgr' specifies a node manager object.
329    `phis' specifies whether to synchronize phisically.
330    The return value is true if success, else it is false. */
331 int nmgr_sync(NMGR *nmgr, int phis);
332 
333 
334 /* Add an attribute index to a node manager object.
335    `nmgr' specifies a node manager object.
336    `name' specifies the name of a target attribute.
337    `type' specifies the expression of a data type. */
338 void nmgr_add_aidx(NMGR *nmgr, const char *name, const char *type);
339 
340 
341 /* Add a node to a node manager object.
342    `nmgr' specifies a node manager object.
343    `name' specifies the unique name of a node.
344    `wmode' specifies whether the node is a writer or a reader.
345    `options' specifies options for the database of the node.
346    The return value is true if success, else it is false. */
347 int nmgr_put(NMGR *nmgr, const char *name, int wmode, int options);
348 
349 
350 /* Remove a node from a node manager object.
351    `nmgr' specifies a node manager object.
352    `name' specifies the unique name of a node.
353    The return value is true if success, else it is false. */
354 int nmgr_out(NMGR *nmgr, const char *name);
355 
356 
357 /* Clear registered documents in a node in a node manager object.
358    `nmgr' specifies a node manager object.
359    `name' specifies the unique name of a node.
360    `options' specifies options for the database of the node.
361    The return value is true if success, else it is false. */
362 int nmgr_clear(NMGR *nmgr, const char *name, int options);
363 
364 
365 /* Get a list of names of nodes in a noder manager object.
366    `nmgr' specifies a node manager object.
367    The return value is a list object of names of nodes in a node manager object.
368    The returned object should be destroyed with `cblistclose'. */
369 CBLIST *nmgr_names(NMGR *nmgr);
370 
371 
372 /* Get a node object in a node manager object.
373    `nmgr' specifies a node manager object.
374    `name' specifies the unique name of a node.
375    The return value is a node object or `NULL' on failure. */
376 NODE *nmgr_get(NMGR *nmgr, const char *name);
377 
378 
379 /* Set a link object of a node.
380    `node' specifies a node object.
381    `url' specifies the URL of a link object.
382    `label' specifies the label of the link object.  If it is `NULL', the link is removed.
383    `credit' specifies the credit of the link object. */
384 void node_set_link(NODE *node, const char *url, const char *label, int credit);
385 
386 
387 /* Get a vector of keywords.
388    `node' specifies a node object.
389    `id' specifies the ID number of a document.
390    The return value is a map object of keywords.
391    The returned object should be destroyed with `cbmapclose'. */
392 CBMAP *node_etch_doc(NODE *node, int id);
393 
394 
395 /* Create a read-write lock object.
396    The return value is a read-write lock object. */
397 RWLOCK *rwlock_new(void);
398 
399 
400 /* Destroy a read-write lock object.
401    `rwlock' specifies a read-write lock object. */
402 void rwlock_delete(RWLOCK *rwlock);
403 
404 
405 /* Lock a read-write lock object.
406    `rwlock' specifies a read-write lock object.
407    `wmode' specifies whether the lock is writer.
408    The return value is true if success, else it is false. */
409 int rwlock_lock(RWLOCK *rwlock, int wmode);
410 
411 
412 /* Unlock a read-write lock object.
413    `rwlock' specifies a read-write lock object.
414    The return value is true if success, else it is false. */
415 int rwlock_unlock(RWLOCK *rwlock);
416 
417 
418 /* Get the number of readers locking a read-write lock object.
419    `rwlock' specifies a read-write lock object.
420    The return value is the number of readers locking a read-write lock object. */
421 int rwlock_rnum(RWLOCK *rwlock);
422 
423 
424 /* Create a result map object.
425    The return value is a result map object. */
426 RESMAP *resmap_new(void);
427 
428 
429 /* Destroy a result map object.
430    `resmap' specifies a result map object. */
431 void resmap_delete(RESMAP *resmap);
432 
433 
434 /* Add a result document data to a result map object.
435    `resmap' specifies a result map object.
436    `score' specifies the score of the document.
437    `doc' specifies a document object of the document.  It is closed internally.
438    `attrs' specifies the attributes of the document.  It can be `NULL'.  It is deleted internally.
439    `body' specifies the body data of the document.  It can be `NULL'.  It is freed internally. */
440 void resmap_put(RESMAP *resmap, int score, ESTDOC *doc, CBMAP *attrs, char *body);
441 
442 
443 /* Get a list object of result objects in a result map objects.
444    `resmap' specifies a result map object.
445    `nump' specifies the pointer to a variable to which the number of elements in the result is
446    assigned.
447    `order' specifies an expression for the order.  If it is `NULL', the order is by score
448    descending.
449    `distinct' specifies the name of the distinct attribute.  If it is `NULL', no filter is used.
450    The return value is an array whose elements are result document objects.  Because the region
451    of the return value is allocated with the `malloc' call, it should be released with the `free'
452    call if it is no longer in use. */
453 RESDOC **resmap_list(RESMAP *resmap, int *nump, const char *order, const char *distinct);
454 
455 
456 /* Be a daemon process.
457    `curdir' specifies the path of the current directory of the daemon.
458    The return value is true if success, else it is false. */
459 int be_daemon(const char *curdir);
460 
461 
462 
463 #endif                                   /* duplication check */
464 
465 
466 /* END OF FILE */
467