1 /* $Id: dbtext.c,v 1.12 2002/10/19 09:59:35 tommy Exp $ */
2
3 /*
4 * Copyright (c) 2002 Tom Marshall <tommy@tig-grr.com>
5 *
6 * This program is free software. It may be distributed under the terms
7 * in the file LICENSE, found in the top level of the distribution.
8 *
9 * dbtext.c: flatfile database handler
10 */
11
12 #include "config.h"
13 #include "dbg.h"
14 #include "str.h"
15 #include "lex.h"
16 #include "vec.h"
17
18 #include "dbh.h"
19 #include "dbtext.h"
20
dbtext_table_setsize(dbttext_t * pthis,uint nsize)21 static void dbtext_table_setsize( dbttext_t* pthis, uint nsize )
22 {
23 if( nsize > pthis->nalloc )
24 {
25 uint nnewalloc;
26 rec_t* pnewitems;
27 uint n;
28
29 nnewalloc = pthis->nalloc * 2;
30 if( nnewalloc < nsize ) nnewalloc = nsize;
31 pnewitems = (rec_t*)realloc( pthis->pitems, nnewalloc*sizeof(rec_t) );
32 if( pnewitems == NULL )
33 {
34 exit( 2 );
35 }
36 for( n = pthis->nitems; n < nsize; n++ )
37 {
38 str_create( &pnewitems[n].w );
39 pnewitems[n].n = 0;
40 }
41 pthis->pitems = pnewitems;
42 pthis->nalloc = nnewalloc;
43 }
44 }
45
dbtext_db_open(cpchar dbhost,cpchar dbname,cpchar dbuser,cpchar dbpass)46 dbh_t* dbtext_db_open( cpchar dbhost, cpchar dbname, cpchar dbuser, cpchar dbpass )
47 {
48 dbhtext_t* pthis;
49
50 uint dirlen;
51 cpchar phome;
52 struct stat st;
53
54 pthis = (dbhtext_t*)malloc( sizeof(dbhtext_t) );
55 if( pthis == NULL )
56 {
57 goto bail;
58 }
59 pthis->close = dbtext_db_close;
60 pthis->opentable = dbtext_db_opentable;
61 if( dbname != NULL && *dbname != '\0' )
62 {
63 dirlen = strlen( dbname );
64 pthis->dir = strdup( dbname );
65 if( pthis->dir[dirlen-1] == '/' )
66 {
67 pthis->dir[dirlen-1] = '\0';
68 }
69 }
70 else
71 {
72 phome = getenv( "HOME" );
73 if( phome == NULL || *phome == '\0' )
74 {
75 phome = ".";
76 }
77 pthis->dir = (char*)malloc( strlen(phome)+5+1 );
78 if( pthis->dir == NULL )
79 {
80 goto bail;
81 }
82 sprintf( pthis->dir, "%s/.bmf", phome );
83 }
84
85 /* ensure config directory exists */
86 if( stat( pthis->dir, &st ) != 0 )
87 {
88 if( errno == ENOENT )
89 {
90 if( mkdir( pthis->dir, S_IRUSR|S_IWUSR|S_IXUSR ) != 0 )
91 {
92 goto bail;
93 }
94 }
95 else
96 {
97 goto bail;
98 }
99 }
100 else
101 {
102 if( !S_ISDIR( st.st_mode ) )
103 {
104 goto bail;
105 }
106 }
107
108 return (dbh_t*)pthis;
109
110 bail:
111 return NULL;
112 }
113
dbtext_db_close(dbhtext_t * pthis)114 bool_t dbtext_db_close( dbhtext_t* pthis )
115 {
116 free( pthis->dir );
117 pthis->dir = NULL;
118 return true;
119 }
120
dbtext_db_opentable(dbhtext_t * pthis,cpchar table,bool_t rdonly)121 dbt_t* dbtext_db_opentable( dbhtext_t* pthis, cpchar table, bool_t rdonly )
122 {
123 dbttext_t* ptable = NULL;
124
125 #ifndef NOLOCK
126 struct flock lock;
127 #endif /* ndef NOLOCK */
128 char szpath[PATH_MAX];
129 int flags;
130 struct stat st;
131
132 char* pbegin;
133 char* pend;
134 rec_t r;
135 uint pos;
136
137 if( pthis->dir == NULL )
138 {
139 goto bail;
140 }
141
142 ptable = (dbttext_t*)malloc( sizeof(dbttext_t) );
143 if( ptable == NULL )
144 {
145 perror( "malloc()" );
146 goto bail;
147 }
148 ptable->close = dbtext_table_close;
149 ptable->mergeclose = dbtext_table_mergeclose;
150 ptable->unmergeclose = dbtext_table_unmergeclose;
151 ptable->import = dbtext_table_import;
152 ptable->export = dbtext_table_export;
153 ptable->getmsgcount = dbtext_table_getmsgcount;
154 ptable->getcount = dbtext_table_getcount;
155 ptable->fd = -1;
156 ptable->pbuf = NULL;
157 ptable->nmsgs = 0;
158 ptable->nalloc = 0;
159 ptable->nitems = 0;
160 ptable->pitems = NULL;
161
162 sprintf( szpath, "%s/%s.txt", pthis->dir, table );
163 flags = (rdonly ? O_RDONLY|O_CREAT : O_RDWR|O_CREAT);
164 ptable->fd = open( szpath, flags, 0644 );
165 if( ptable->fd == -1 )
166 {
167 perror( "open()" );
168 goto bail;
169 }
170
171 #ifndef NOLOCK
172 memset( &lock, 0, sizeof(lock) );
173 lock.l_type = rdonly ? F_RDLCK : F_WRLCK;
174 lock.l_start = 0;
175 lock.l_whence = SEEK_SET;
176 lock.l_len = 0;
177 fcntl( ptable->fd, F_SETLKW, &lock );
178 #endif /* ndef NOLOCK */
179
180 if( fstat( ptable->fd, &st ) != 0 )
181 {
182 perror( "fstat()" );
183 goto bail_uc;
184 }
185
186 if( st.st_size == 0 )
187 {
188 return (dbt_t*)ptable;
189 }
190
191 ptable->pbuf = (char*)malloc( st.st_size );
192 if( ptable->pbuf == NULL )
193 {
194 perror( "malloc()" );
195 goto bail_uc;
196 }
197
198 if( read( ptable->fd, ptable->pbuf, st.st_size ) != st.st_size )
199 {
200 perror( "read()" );
201 goto bail_fuc;
202 }
203
204 /* XXX: bogofilter compatibility */
205 if( sscanf( ptable->pbuf, BOGOFILTER_HEADER, &ptable->nmsgs ) != 1 )
206 {
207 goto bail_fuc;
208 }
209 pbegin = ptable->pbuf;
210 while( *pbegin != '\n' ) pbegin++;
211 pbegin++;
212
213 pos = 0;
214 while( pbegin < ptable->pbuf + st.st_size )
215 {
216 pend = pbegin;
217 r.w.p = pbegin;
218 r.w.len = 0;
219 r.n = 0;
220
221 while( *pend != '\n' )
222 {
223 if( pend >= ptable->pbuf + st.st_size )
224 {
225 goto bail_fuc;
226 }
227 *pend = tolower(*pend);
228 if( *pend == ' ' )
229 {
230 r.w.len = (pend-pbegin);
231 r.n = strtol( pend+1, NULL, 10 );
232 }
233 pend++;
234 }
235 if( pend > pbegin && *pbegin != '#' && *pbegin != ';' )
236 {
237 if( r.w.len == 0 || r.w.len > MAXWORDLEN )
238 {
239 fprintf( stderr, "dbh_loadfile: bad file format\n" );
240 goto bail_fuc;
241 }
242 dbtext_table_setsize( ptable, pos+1 );
243 ptable->pitems[pos++] = r;
244 ptable->nitems = pos;
245 }
246 pbegin = pend+1;
247 }
248
249 if( rdonly )
250 {
251 #ifndef NOLOCK
252 lock.l_type = F_UNLCK;
253 fcntl( ptable->fd, F_SETLKW, &lock );
254 #endif /* ndef NOLOCK */
255 close( ptable->fd );
256 ptable->fd = -1;
257 }
258
259 return (dbt_t*)ptable;
260
261 bail_fuc:
262 free( ptable->pbuf );
263
264 bail_uc:
265 #ifndef NOLOCK
266 lock.l_type = F_UNLCK;
267 fcntl( ptable->fd, F_SETLKW, &lock );
268 #endif /* ndef NOLOCK */
269
270 close( ptable->fd );
271 ptable->fd = -1;
272
273 bail:
274 free( ptable );
275 return NULL;
276 }
277
dbtext_table_close(dbttext_t * pthis)278 bool_t dbtext_table_close( dbttext_t* pthis )
279 {
280 struct flock lockall;
281
282 free( pthis->pbuf );
283 pthis->pbuf = NULL;
284 free( pthis->pitems );
285 pthis->pitems = NULL;
286
287 if( pthis->fd != -1 )
288 {
289 #ifndef NOLOCK
290 memset( &lockall, 0, sizeof(lockall) );
291 lockall.l_type = F_UNLCK;
292 lockall.l_start = 0;
293 lockall.l_whence = SEEK_SET;
294 lockall.l_len = 0;
295 fcntl( pthis->fd, F_SETLKW, &lockall );
296 #endif /* ndef NOLOCK */
297 close( pthis->fd );
298 pthis->fd = -1;
299 }
300
301 return true;
302 }
303
dbtext_table_mergeclose(dbttext_t * pthis,vec_t * pmsg)304 bool_t dbtext_table_mergeclose( dbttext_t* pthis, vec_t* pmsg )
305 {
306 /* note that we require both vectors to be sorted */
307
308 uint pos;
309 rec_t* prec;
310 veciter_t msgiter;
311 str_t* pmsgstr;
312 uint count;
313 char iobuf[IOBUFSIZE];
314 char* p;
315
316 if( pthis->fd == -1 )
317 {
318 return false;
319 }
320 ftruncate( pthis->fd, 0 );
321 lseek( pthis->fd, 0, SEEK_SET );
322
323 pthis->nmsgs++;
324
325 p = iobuf;
326 p += sprintf( p, BOGOFILTER_HEADER, pthis->nmsgs );
327
328 vec_first( pmsg, &msgiter );
329 pmsgstr = veciter_get( &msgiter );
330
331 pos = 0;
332 while( pos < pthis->nitems || pmsgstr != NULL )
333 {
334 int cmp = 0;
335 prec = &pthis->pitems[pos];
336 if( pmsgstr != NULL && pos < pthis->nitems )
337 {
338 cmp = str_casecmp( &prec->w, pmsgstr );
339 }
340 else
341 {
342 /* we exhausted one list or the other (but not both) */
343 cmp = (pos < pthis->nitems) ? -1 : 1;
344 }
345 if( cmp < 0 )
346 {
347 /* write existing str */
348 assert( prec->w.p != NULL && prec->w.len > 0 );
349 assert( prec->w.len <= MAXWORDLEN );
350 count = prec->n;
351 strncpylwr( p, prec->w.p, prec->w.len ); p += prec->w.len;
352 *p++ = ' ';
353 p += sprintf( p, "%u\n", count );
354
355 pos++;
356 }
357 else if( cmp == 0 )
358 {
359 /* same str, merge and write sum */
360 assert( prec->w.p != NULL && prec->w.len > 0 );
361 assert( pmsgstr->p != NULL && pmsgstr->len > 0 );
362 assert( prec->w.len <= MAXWORDLEN );
363 assert( pmsgstr->len <= MAXWORDLEN );
364 count = db_getnewcount( &msgiter );
365 count += prec->n;
366 strncpylwr( p, prec->w.p, prec->w.len ); p += prec->w.len;
367 *p++ = ' ';
368 p += sprintf( p, "%u\n", count );
369
370 pos++;
371 veciter_next( &msgiter );
372 pmsgstr = veciter_get( &msgiter );
373 }
374 else /* cmp > 0 */
375 {
376 /* write new str */
377 assert( pmsgstr->p != NULL && pmsgstr->len > 0 );
378 assert( pmsgstr->len <= MAXWORDLEN );
379 count = db_getnewcount( &msgiter );
380 strncpylwr( p, pmsgstr->p, pmsgstr->len ); p += pmsgstr->len;
381 *p++ = ' ';
382 p += sprintf( p, "%u\n", count );
383
384 veciter_next( &msgiter );
385 pmsgstr = veciter_get( &msgiter );
386 }
387
388 if( p+TEXTDB_MAXLINELEN > (iobuf+1) )
389 {
390 write( pthis->fd, iobuf, p-iobuf );
391 p = iobuf;
392 }
393 }
394 if( p != iobuf )
395 {
396 write( pthis->fd, iobuf, p-iobuf );
397 }
398
399 veciter_destroy( &msgiter );
400 return dbtext_table_close( pthis );
401 }
402
dbtext_table_unmergeclose(dbttext_t * pthis,vec_t * pmsg)403 bool_t dbtext_table_unmergeclose( dbttext_t* pthis, vec_t* pmsg )
404 {
405 /* note that we require both vectors to be sorted */
406
407 uint pos;
408 rec_t* prec;
409 veciter_t msgiter;
410 str_t* pmsgstr;
411 uint count;
412 char iobuf[IOBUFSIZE];
413 char* p;
414
415 if( pthis->fd == -1 )
416 {
417 return false;
418 }
419 ftruncate( pthis->fd, 0 );
420 lseek( pthis->fd, 0, SEEK_SET );
421
422 pthis->nmsgs--;
423
424 p = iobuf;
425 p += sprintf( p, BOGOFILTER_HEADER, pthis->nmsgs );
426
427 vec_first( pmsg, &msgiter );
428 pmsgstr = veciter_get( &msgiter );
429
430 pos = 0;
431 while( pos < pthis->nitems || pmsgstr != NULL )
432 {
433 int cmp = 0;
434 prec = &pthis->pitems[pos];
435 if( pmsgstr != NULL && pos < pthis->nitems )
436 {
437 cmp = str_casecmp( &prec->w, pmsgstr );
438 }
439 else
440 {
441 /* we exhausted one list or the other (but not both) */
442 cmp = (pos < pthis->nitems) ? -1 : 1;
443 }
444 if( cmp < 0 )
445 {
446 /* write existing str */
447 assert( prec->w.p != NULL && prec->w.len > 0 );
448 assert( prec->w.len <= MAXWORDLEN );
449 count = prec->n;
450 strncpylwr( p, prec->w.p, prec->w.len ); p += prec->w.len;
451 *p++ = ' ';
452 p += sprintf( p, "%u\n", count );
453
454 pos++;
455 }
456 else if( cmp == 0 )
457 {
458 /* same str, merge and write difference */
459 assert( prec->w.p != NULL && prec->w.len > 0 );
460 assert( pmsgstr->p != NULL && pmsgstr->len > 0 );
461 assert( prec->w.len <= MAXWORDLEN );
462 assert( pmsgstr->len <= MAXWORDLEN );
463 count = db_getnewcount( &msgiter );
464 count = (prec->n > count) ? (prec->n - count) : 0;
465 strncpylwr( p, prec->w.p, prec->w.len ); p += prec->w.len;
466 *p++ = ' ';
467 p += sprintf( p, "%u\n", count );
468
469 pos++;
470 veciter_next( &msgiter );
471 pmsgstr = veciter_get( &msgiter );
472 }
473 else /* cmp > 0 */
474 {
475 /* this should not happen, so write with count=0 */
476 assert( pmsgstr->p != NULL && pmsgstr->len > 0 );
477 assert( pmsgstr->len <= MAXWORDLEN );
478 db_getnewcount( &msgiter );
479 count = 0;
480 strncpylwr( p, pmsgstr->p, pmsgstr->len ); p += pmsgstr->len;
481 *p++ = ' ';
482 p += sprintf( p, "%u\n", count );
483
484 veciter_next( &msgiter );
485 pmsgstr = veciter_get( &msgiter );
486 }
487
488 if( p+TEXTDB_MAXLINELEN > (iobuf+1) )
489 {
490 write( pthis->fd, iobuf, p-iobuf );
491 p = iobuf;
492 }
493 }
494 if( p != iobuf )
495 {
496 write( pthis->fd, iobuf, p-iobuf );
497 }
498
499 veciter_destroy( &msgiter );
500 return dbtext_table_close( pthis );
501 }
502
dbtext_table_import(dbttext_t * pthis,cpchar filename)503 bool_t dbtext_table_import( dbttext_t* pthis, cpchar filename )
504 {
505 return false;
506 }
507
dbtext_table_export(dbttext_t * pthis,cpchar filename)508 bool_t dbtext_table_export( dbttext_t* pthis, cpchar filename )
509 {
510 return false;
511 }
512
dbtext_table_getmsgcount(dbttext_t * pthis)513 uint dbtext_table_getmsgcount( dbttext_t* pthis )
514 {
515 return pthis->nmsgs;
516 }
517
dbtext_table_getcount(dbttext_t * pthis,str_t * pword)518 uint dbtext_table_getcount( dbttext_t* pthis, str_t* pword )
519 {
520 int lo, hi, mid;
521
522 if( pthis->nitems == 0 )
523 {
524 return 0;
525 }
526
527 hi = pthis->nitems - 1;
528 lo = -1;
529 while( hi-lo > 1 )
530 {
531 mid = (hi+lo)/2;
532 if( str_casecmp( pword, &pthis->pitems[mid].w ) <= 0 )
533 hi = mid;
534 else
535 lo = mid;
536 }
537 assert( hi >= 0 && hi < pthis->nitems );
538
539 if( str_casecmp( pword, &pthis->pitems[hi].w ) != 0 )
540 {
541 return 0;
542 }
543
544 return pthis->pitems[hi].n;
545 }
546
547 #ifdef UNIT_TEST
main(int argc,char ** argv)548 int main( int argc, char** argv )
549 {
550 dbh_t* pdb;
551 veciter_t iter;
552 str_t* pstr;
553 uint n;
554
555 if( argc != 2 )
556 {
557 fprintf( stderr, "usage: %s <file>\n", argv[0] );
558 return 1;
559 }
560
561 for( n = 0; n < 100; n++ )
562 {
563 pdb = dbh_open( "testlist", true );
564
565 vec_first( &db, &iter );
566 while( (pstr = veciter_get( &iter )) != NULL )
567 {
568 char buf[MAXWORDLEN+32];
569 char* p;
570 if( pstr->len > 200 )
571 {
572 fprintf( stderr, "str too long: %u chars\n", pstr->len );
573 break;
574 }
575 p = buf;
576 strcpy( buf, "str: " );
577 p += 6;
578 memcpy( p, pstr->p, pstr->len );
579 p += pstr->len;
580 sprintf( p, " %u", pstr->count );
581 puts( buf );
582
583 veciter_next( &iter );
584 }
585
586 dbh_close( &db );
587 }
588
589 return 0;
590 }
591 #endif /* def UNIT_TEST */
592