1 /*
2 database.c
3
4 $Id: database.c,v 1.22 2003/02/13 10:04:01 bears Exp $
5
6 Uses GNU gdbm library. Using Berkeley db (included in libc6) was
7 cumbersome. It is based on Berkeley db 1.85, which has severe bugs
8 (e.g. it is not recommended to delete or overwrite entries with
9 overflow pages).
10 */
11
12 #if HAVE_CONFIG_H
13 #include <config.h>
14 #endif
15
16 #include <stdio.h>
17 #include <ctype.h>
18 #include <errno.h>
19 #include <gdbm.h>
20 #include <unistd.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include "configfile.h"
24 #include "content.h"
25 #include "database.h"
26 #include "group.h"
27 #include "log.h"
28 #include "protocol.h"
29 #include "util.h"
30 #include "portable.h"
31
32 static const char ARTICLE_FILENAME_FMT[] = "%s/data/articles.gdbm";
33 static const char ARTICLE_NEW_FILENAME_FMT[] = "%s/data/articles.gdbm.new";
34
35
36 static struct Db
37 {
38 GDBM_FILE dbf;
39
40 /* Start string for Xref header line: "Xref: <host>" */
41 Str xrefHost;
42
43 /* Msg Id of presently loaded article, empty if none loaded */
44 Str msgId;
45
46 /* Status of loaded article */
47 unsigned status; /* Flags */
48 time_t lastAccess;
49
50 /* Overview of loaded article */
51 Str subj;
52 Str from;
53 Str date;
54 Str ref;
55 Str xref;
56 unsigned long bytes;
57 unsigned long lines;
58
59 /* Article text (except for overview header lines) */
60 DynStr *txt;
61
62 } db = { NULL, "(unknown)", "", 0, 0, "", "", "", "", "", 0, 0, NULL };
63
64 static const char *
errMsg(void)65 errMsg( void )
66 {
67 if ( gdbm_errno == GDBM_NO_ERROR )
68 return strerror( errno );
69 return gdbm_strerror( gdbm_errno );
70 }
71
72 Bool
Db_open(void)73 Db_open( void )
74 {
75 Str name, host;
76 int flags;
77
78 ASSERT( db.dbf == NULL );
79 snprintf( name, MAXCHAR, ARTICLE_FILENAME_FMT, Cfg_spoolDir() );
80 flags = GDBM_WRCREAT | GDBM_FAST;
81
82 if ( ! ( db.dbf = gdbm_open( name, 512, flags, 0644, Log_gdbm_fatal ) ) )
83 {
84 Log_err( "Error opening %s for r/w (%s)", name, errMsg() );
85 return FALSE;
86 }
87 Log_dbg( LOG_DBG_NEWSBASE, "%s opened for r/w", name );
88
89 if ( db.txt == NULL )
90 db.txt = new_DynStr( 5000 );
91
92 if ( ! Utl_getFQDN( host ) )
93 Utl_cpyStr( host, "localhost.localdomain" );
94 snprintf( db.xrefHost, MAXCHAR, "Xref: %s", host );
95
96 return TRUE;
97 }
98
99 void
Db_close(void)100 Db_close( void )
101 {
102 ASSERT( db.dbf );
103 Log_dbg( LOG_DBG_NEWSBASE, "Closing database" );
104 gdbm_close( db.dbf );
105 db.dbf = NULL;
106 del_DynStr( db.txt );
107 db.txt = NULL;
108 Utl_cpyStr( db.msgId, "" );
109 }
110
111 static Bool
loadArt(const char * msgId)112 loadArt( const char *msgId )
113 {
114 static void *dptr = NULL;
115
116 datum key, val;
117 Str t = "";
118 const char *p;
119
120 ASSERT( db.dbf );
121
122 if ( strcmp( msgId, db.msgId ) == 0 )
123 return TRUE;
124
125 key.dptr = (void *)msgId;
126 key.dsize = strlen( msgId ) + 1;
127 if ( dptr != NULL )
128 {
129 free( dptr );
130 dptr = NULL;
131 }
132 val = gdbm_fetch( db.dbf, key );
133 dptr = val.dptr;
134 if ( dptr == NULL )
135 {
136 Log_dbg( LOG_DBG_NEWSBASE,
137 "database.c loadArt: gdbm_fetch found no entry" );
138 return FALSE;
139 }
140
141 Utl_cpyStr( db.msgId, msgId );
142 p = Utl_getLn( t, (char *)dptr );
143 if ( ! p || sscanf( t, "%x", &db.status ) != 1 )
144 {
145 Log_err( "Entry in database '%s' is corrupt (status)", msgId );
146 return FALSE;
147 }
148 p = Utl_getLn( t, p );
149 if ( ! p || sscanf( t, "%lu", (unsigned long *)&db.lastAccess ) != 1 )
150 {
151 Log_err( "Entry in database '%s' is corrupt (lastAccess)", msgId );
152 return FALSE;
153 }
154 p = Utl_getHeaderLn( db.subj, p );
155 p = Utl_getHeaderLn( db.from, p );
156 p = Utl_getHeaderLn( db.date, p );
157 p = Utl_getHeaderLn( db.ref, p );
158 p = Utl_getHeaderLn( db.xref, p );
159 if ( ! p )
160 {
161 Log_err( "Entry in database '%s' is corrupt (overview)", msgId );
162 return FALSE;
163 }
164 p = Utl_getHeaderLn( t, p );
165 if ( ! p || sscanf( t, "%lu", &db.bytes ) != 1 )
166 {
167 Log_err( "Entry in database '%s' is corrupt (bytes)", msgId );
168 return FALSE;
169 }
170 p = Utl_getHeaderLn( t, p );
171 if ( ! p || sscanf( t, "%lu", &db.lines ) != 1 )
172 {
173 Log_err( "Entry in database '%s' is corrupt (lines)", msgId );
174 return FALSE;
175 }
176 DynStr_clear( db.txt );
177 DynStr_app( db.txt, p );
178 return TRUE;
179 }
180
181 static Bool
saveArt(void)182 saveArt( void )
183 {
184 DynStr *s;
185 Str t = "";
186 datum key, val;
187
188 if ( strcmp( db.msgId, "" ) == 0 )
189 return FALSE;
190 s = new_DynStr( 5000 );
191 snprintf( t, MAXCHAR, "%x", db.status );
192 DynStr_appLn( s, t );
193 snprintf( t, MAXCHAR, "%lu", db.lastAccess );
194 DynStr_appLn( s, t );
195 DynStr_appLn( s, db.subj );
196 DynStr_appLn( s, db.from );
197 DynStr_appLn( s, db.date );
198 DynStr_appLn( s, db.ref );
199 DynStr_appLn( s, db.xref );
200 snprintf( t, MAXCHAR, "%lu", db.bytes );
201 DynStr_appLn( s, t );
202 snprintf( t, MAXCHAR, "%lu", db.lines );
203 DynStr_appLn( s, t );
204 DynStr_appDynStr( s, db.txt );
205
206 key.dptr = (void *)db.msgId;
207 key.dsize = strlen( db.msgId ) + 1;
208 val.dptr = (void *)DynStr_str( s );
209 val.dsize = DynStr_len( s ) + 1;
210 if ( gdbm_store( db.dbf, key, val, GDBM_REPLACE ) != 0 )
211 {
212 Log_err( "Could not store %s in database (%s)", errMsg() );
213 return FALSE;
214 }
215
216 del_DynStr( s );
217 return TRUE;
218 }
219
220 Bool
Db_prepareEntry(const Over * ov,const char * grp,int numb)221 Db_prepareEntry( const Over *ov, const char *grp, int numb )
222 {
223 const char *msgId;
224
225 ASSERT( db.dbf );
226 ASSERT( ov );
227 ASSERT( grp );
228
229 msgId = Ov_msgId( ov );
230 Log_dbg( LOG_DBG_NEWSBASE, "Preparing entry %s", msgId );
231 if ( Db_contains( msgId ) )
232 Log_err( "Preparing article twice: %s", msgId );
233
234 db.status = DB_NOT_DOWNLOADED;
235 db.lastAccess = time( NULL );
236
237 Utl_cpyStr( db.msgId, msgId );
238 Utl_cpyStr( db.subj, Ov_subj( ov ) );
239 Utl_cpyStr( db.from, Ov_from( ov ) );
240 Utl_cpyStr( db.date, Ov_date( ov ) );
241 Utl_cpyStr( db.ref, Ov_ref( ov ) );
242 snprintf( db.xref, MAXCHAR, "%s:%i", grp, numb );
243 db.bytes = Ov_bytes( ov );
244 db.lines = Ov_lines( ov );
245
246 DynStr_clear( db.txt );
247
248 return saveArt();
249 }
250
251 Bool
Db_storeArt(const char * msgId,const char * artTxt)252 Db_storeArt( const char *msgId, const char *artTxt )
253 {
254 Str line, lineEx, field, value;
255
256 ASSERT( db.dbf );
257
258 Log_dbg( LOG_DBG_NEWSBASE, "Store article %s", msgId );
259 if ( ! loadArt( msgId ) )
260 {
261 Log_err( "Cannot find info about '%s' in database", msgId );
262 return FALSE;
263 }
264 if ( ! ( db.status & DB_NOT_DOWNLOADED ) )
265 {
266 Log_err( "Trying to store already retrieved article '%s'", msgId );
267 return FALSE;
268 }
269 db.status &= ~DB_NOT_DOWNLOADED;
270 db.status &= ~DB_RETRIEVING_FAILED;
271 db.lastAccess = time( NULL );
272
273 DynStr_clear( db.txt );
274
275 /* Read header */
276 while ( ( artTxt = Utl_getHeaderLn( lineEx, artTxt ) ) != NULL )
277 {
278 Bool continuation;
279
280 if ( lineEx[ 0 ] == '\0' )
281 {
282 DynStr_appLn( db.txt, lineEx );
283 break;
284 }
285 /* Remove fields already in overview and handle x-noffle
286 headers correctly in case of cascading NOFFLEs */
287 if ( Prt_getField( field, value, &continuation, lineEx ) )
288 {
289 if ( strcmp( field, "x-noffle-status" ) == 0 )
290 {
291 if ( strstr( value, "NOT_DOWNLOADED" ) != 0 )
292 db.status |= DB_NOT_DOWNLOADED;
293 }
294 else if ( strcmp( field, "message-id" ) != 0
295 && strcmp( field, "xref" ) != 0
296 && strcmp( field, "references" ) != 0
297 && strcmp( field, "subject" ) != 0
298 && strcmp( field, "from" ) != 0
299 && strcmp( field, "date" ) != 0
300 && strcmp( field, "bytes" ) != 0
301 && strcmp( field, "lines" ) != 0
302 && strcmp( field, "x-noffle-lastaccess" ) != 0 )
303 DynStr_appLn( db.txt, lineEx );
304 }
305 }
306
307 if ( artTxt == NULL )
308 {
309 /*
310 * This article has no body. Bereft of text it lies...
311 *
312 * I'm not completely sure how surprising the rest of
313 * Noffle would find a body-less article, so substitute
314 * an empty line.
315 */
316 Log_inf( "Article %s malformed: missing body", msgId );
317 artTxt = "\n";
318 }
319
320 /* Read body */
321 while ( ( artTxt = Utl_getLn( line, artTxt ) ) != NULL )
322 if ( ! ( db.status & DB_NOT_DOWNLOADED ) )
323 DynStr_appLn( db.txt, line );
324
325 return saveArt();
326 }
327
328 void
Db_setStatus(const char * msgId,unsigned status)329 Db_setStatus( const char *msgId, unsigned status )
330 {
331 if ( loadArt( msgId ) )
332 {
333 db.status = status;
334 saveArt();
335 }
336 }
337
338 void
Db_updateLastAccess(const char * msgId)339 Db_updateLastAccess( const char *msgId )
340 {
341 if ( loadArt( msgId ) )
342 {
343 db.lastAccess = time( NULL );
344 saveArt();
345 }
346 }
347
348 void
Db_setXref(const char * msgId,const char * xref)349 Db_setXref( const char *msgId, const char *xref )
350 {
351 if ( loadArt( msgId ) )
352 {
353 Utl_cpyStr( db.xref, xref );
354 saveArt();
355 }
356 }
357
358 /* Search best position for breaking a line */
359 static const char *
searchBreakPos(const char * line,int wantedLength)360 searchBreakPos( const char *line, int wantedLength )
361 {
362 const char *lastSpace = NULL;
363 Bool lastWasSpace = FALSE;
364 int len = 0;
365
366 while ( *line != '\0' )
367 {
368 if ( isspace( *line ) )
369 {
370 if ( len > wantedLength && lastSpace != NULL )
371 return lastSpace;
372 if ( ! lastWasSpace )
373 lastSpace = line;
374 lastWasSpace = TRUE;
375 }
376 else
377 lastWasSpace = FALSE;
378 ++len;
379 ++line;
380 }
381 if ( len > wantedLength && lastSpace != NULL )
382 return lastSpace;
383 return line;
384 }
385
386 /* Append header line by breaking long line into multiple lines */
387 static void
appendLongHeader(DynStr * target,const char * field,const char * value)388 appendLongHeader( DynStr *target, const char *field, const char *value )
389 {
390 const int wantedLength = 78;
391 const char *breakPos, *old;
392 int len;
393
394 len = strlen( field );
395 DynStr_appN( target, field, len );
396 DynStr_appN( target, " ", 1 );
397 old = value;
398 while ( isspace( *old ) )
399 ++old;
400 breakPos = searchBreakPos( old, wantedLength - len - 1 );
401 DynStr_appN( target, old, breakPos - old );
402 if ( *breakPos == '\0' )
403 {
404 DynStr_appN( target, "\n", 1 );
405 return;
406 }
407 DynStr_appN( target, "\n ", 2 );
408 while ( TRUE )
409 {
410 old = breakPos;
411 while ( isspace( *old ) )
412 ++old;
413 breakPos = searchBreakPos( old, wantedLength - 1 );
414 DynStr_appN( target, old, breakPos - old );
415 if ( *breakPos == '\0' )
416 {
417 DynStr_appN( target, "\n", 1 );
418 return;
419 }
420 DynStr_appN( target, "\n ", 2 );
421 }
422 }
423
424 const char *
Db_header(const char * msgId)425 Db_header( const char *msgId )
426 {
427 static DynStr *s = NULL;
428
429 Str date, t;
430 unsigned status;
431 const char *p;
432
433 if ( s == NULL )
434 s = new_DynStr( 5000 );
435 else
436 DynStr_clear( s );
437 ASSERT( db.dbf );
438 if ( ! loadArt( msgId ) )
439 return NULL;
440 strftime( date, MAXCHAR, "%Y-%m-%d %H:%M:%S",
441 localtime( &db.lastAccess ) );
442 status = db.status;
443 snprintf( t, MAXCHAR,
444 "Message-ID: %s\n"
445 "X-NOFFLE-Status:%s%s%s\n"
446 "X-NOFFLE-LastAccess: %s\n",
447 msgId,
448 status & DB_INTERESTING ? " INTERESTING" : "",
449 status & DB_NOT_DOWNLOADED ? " NOT_DOWNLOADED" : "",
450 status & DB_RETRIEVING_FAILED ? " RETRIEVING_FAILED" : "",
451 date );
452 DynStr_app( s, t );
453 appendLongHeader( s, "Subject:", db.subj );
454 appendLongHeader( s, "From:", db.from );
455 appendLongHeader( s, "Date:", db.date );
456 appendLongHeader( s, "References:", db.ref );
457 DynStr_app( s, "Bytes: " );
458 snprintf( t, MAXCHAR, "%lu", db.bytes );
459 DynStr_appLn( s, t );
460 DynStr_app( s, "Lines: " );
461 snprintf( t, MAXCHAR, "%lu", db.lines );
462 DynStr_appLn( s, t );
463 appendLongHeader( s, db.xrefHost, db.xref );
464 p = strstr( DynStr_str( db.txt ), "\n\n" );
465 if ( ! p )
466 DynStr_appDynStr( s, db.txt );
467 else
468 DynStr_appN( s, DynStr_str( db.txt ), p - DynStr_str( db.txt ) + 1 );
469 return DynStr_str( s );
470 }
471
472 const char *
Db_body(const char * msgId)473 Db_body( const char *msgId )
474 {
475 const char *p;
476
477 if ( ! loadArt( msgId ) )
478 return "";
479 p = strstr( DynStr_str( db.txt ), "\n\n" );
480 if ( ! p )
481 return "";
482 return ( p + 2 );
483 }
484
485 unsigned
Db_status(const char * msgId)486 Db_status( const char *msgId )
487 {
488 if ( ! loadArt( msgId ) )
489 return 0;
490 return db.status;
491 }
492
493 time_t
Db_lastAccess(const char * msgId)494 Db_lastAccess( const char *msgId )
495 {
496 if ( ! loadArt( msgId ) )
497 return -1;
498 return db.lastAccess;
499 }
500
501 const char *
Db_ref(const char * msgId)502 Db_ref( const char *msgId )
503 {
504 if ( ! loadArt( msgId ) )
505 return "";
506 return db.ref;
507 }
508
509 const char *
Db_xref(const char * msgId)510 Db_xref( const char *msgId )
511 {
512 if ( ! loadArt( msgId ) )
513 return "";
514 return db.xref;
515 }
516
517 const char *
Db_from(const char * msgId)518 Db_from( const char *msgId )
519 {
520 if ( ! loadArt( msgId ) )
521 return "";
522 return db.from;
523 }
524
525 const char *
Db_date(const char * msgId)526 Db_date( const char *msgId )
527 {
528 if ( ! loadArt( msgId ) )
529 return "";
530 return db.date;
531 }
532
533 Over *
Db_over(const char * msgId)534 Db_over( const char *msgId )
535 {
536 if ( ! loadArt( msgId ) )
537 return NULL;
538 return new_Over( db.subj, db.from, db.date, msgId,
539 db.ref, db.bytes, db.lines );
540 }
541
542 Bool
Db_contains(const char * msgId)543 Db_contains( const char *msgId )
544 {
545 datum key;
546
547 ASSERT( db.dbf );
548 if ( strcmp( msgId, db.msgId ) == 0 )
549 return TRUE;
550 key.dptr = (void*)msgId;
551 key.dsize = strlen( msgId ) + 1;
552 return gdbm_exists( db.dbf, key );
553 }
554
555 void
Db_delete(const char * msgId)556 Db_delete( const char *msgId )
557 {
558 datum key;
559
560 ASSERT( db.dbf );
561 if ( strcmp( msgId, db.msgId ) == 0 )
562 db.msgId[ 0 ] = '\0';
563 key.dptr = (void*)msgId;
564 key.dsize = strlen( msgId ) + 1;
565 gdbm_delete( db.dbf, key );
566 }
567
568 static datum cursor = { NULL, 0 };
569
570 Bool
Db_first(const char ** msgId)571 Db_first( const char** msgId )
572 {
573 ASSERT( db.dbf );
574 if ( cursor.dptr != NULL )
575 {
576 free( cursor.dptr );
577 cursor.dptr = NULL;
578 }
579 cursor = gdbm_firstkey( db.dbf );
580 *msgId = cursor.dptr;
581 return ( cursor.dptr != NULL );
582 }
583
584 Bool
Db_next(const char ** msgId)585 Db_next( const char** msgId )
586 {
587 void *oldDptr = cursor.dptr;
588
589 ASSERT( db.dbf );
590 if ( cursor.dptr == NULL )
591 return FALSE;
592 cursor = gdbm_nextkey( db.dbf, cursor );
593 free( oldDptr );
594 *msgId = cursor.dptr;
595 return ( cursor.dptr != NULL );
596 }
597
598 void
Db_compact(void)599 Db_compact( void )
600 {
601 ASSERT( db.dbf );
602
603 /*
604 * You'd think it would be sensible to do something like
605 *
606 * if ( gdbm_reorganize( db.dbf ) != 0 )
607 * Log_err( "Error compacting article base: %s", errMsg() );
608 *
609 * here. But this just copies the database to a new one and renames,
610 * which is what Db_rebuild() does.
611 *
612 * So do nothing. We don't want expire to chew disc space.
613 */
614 }
615
616 /*
617 Helper functions for database rebuild.
618 */
619
620 static struct DbNew
621 {
622 GDBM_FILE dbf;
623
624 } dbNew = { NULL };
625
626
627 static Bool
newOpen(void)628 newOpen( void )
629 {
630 Str name;
631 int flags;
632
633 ASSERT( dbNew.dbf == NULL );
634 snprintf( name, MAXCHAR, ARTICLE_NEW_FILENAME_FMT, Cfg_spoolDir() );
635 flags = GDBM_WRCREAT | GDBM_FAST;
636
637 if ( ! ( dbNew.dbf = gdbm_open( name, 512, flags, 0644, Log_gdbm_fatal ) ) )
638 {
639 Log_err( "Error opening %s for r/w (%s)", name, errMsg() );
640 return FALSE;
641 }
642 Log_dbg( LOG_DBG_NEWSBASE, "%s opened for r/w", name );
643 return TRUE;
644 }
645
646 static Bool
newClose(Bool makeMain)647 newClose( Bool makeMain )
648 {
649 Str newName;
650
651 ASSERT( dbNew.dbf );
652 Log_dbg( LOG_DBG_NEWSBASE, "Closing new database" );
653 gdbm_close( dbNew.dbf );
654 dbNew.dbf = NULL;
655
656 snprintf( newName, MAXCHAR, ARTICLE_NEW_FILENAME_FMT, Cfg_spoolDir() );
657
658 if ( makeMain )
659 {
660 Str name;
661
662 ASSERT( db.dbf );
663 Db_close();
664 snprintf( name, MAXCHAR, ARTICLE_FILENAME_FMT, Cfg_spoolDir() );
665 if ( rename( newName, name ) != 0 )
666 {
667 Log_err( "Rename %s to %s failed: %s",
668 newName, name, strerror( errno ) );
669 return FALSE;
670 }
671 Log_dbg( LOG_DBG_NEWSBASE, "Renamed %s to %s", newName, name );
672 return Db_open();
673 }
674 else
675 {
676 if ( unlink( newName ) != 0 )
677 {
678 Log_err( "Unlink %s failed: %s", newName, strerror( errno ) );
679 return FALSE;
680 }
681 Log_dbg( LOG_DBG_NEWSBASE, "Deleted %s", newName );
682 return TRUE;
683 }
684 }
685
686 static Bool
newCopyArt(const char * msgId)687 newCopyArt( const char *msgId )
688 {
689 datum key, val;
690
691 ASSERT( db.dbf );
692 ASSERT( dbNew.dbf );
693 key.dptr = (void *)msgId;
694 key.dsize = strlen( msgId ) + 1;
695
696 val = gdbm_fetch( db.dbf, key );
697 if ( val.dptr != NULL )
698 {
699 Bool res;
700
701 res = ( gdbm_store( dbNew.dbf, key, val, GDBM_INSERT ) == 0 );
702 if ( ! res )
703 Log_err( "Could not store %s in new database (%s)",
704 msgId, errMsg() );
705 free( val.dptr );
706 return res;
707 }
708 Log_err( "%s not found in database", msgId );
709 return FALSE;
710 }
711
712 static Bool
newContains(const char * msgId)713 newContains( const char *msgId )
714 {
715 datum key;
716
717 ASSERT( dbNew.dbf );
718 key.dptr = (void*)msgId;
719 key.dsize = strlen( msgId ) + 1;
720 return gdbm_exists( dbNew.dbf, key );
721 }
722
723 Bool
Db_rebuild(void)724 Db_rebuild( void )
725 {
726 const Over *ov;
727 int i;
728 Str grp;
729 const char *msgId;
730 Bool err;
731
732 if ( ! Cont_firstGrp( grp ) )
733 return FALSE;
734 if ( ! newOpen() )
735 return FALSE;
736
737 Log_inf( "Rebuilding article database" );
738 err = FALSE;
739 do
740 {
741 if ( ! Grp_exists( grp ) )
742 Log_err( "Overview file for unknown group %s exists", grp );
743 else
744 {
745 Cont_read( grp );
746 for ( i = Cont_first(); i <= Cont_last(); ++i )
747 {
748 if ( ! Cont_validNumb( i ) )
749 continue;
750
751 if ( ( ov = Cont_get( i ) ) )
752 {
753 msgId = Ov_msgId( ov );
754 if ( msgId == NULL )
755 {
756 err = TRUE;
757 Log_err( "Overview in %s has no msg id", grp );
758 }
759 else if ( ! newContains( msgId ) )
760 err |= ! newCopyArt( msgId );
761 }
762 else
763 {
764 err = TRUE;
765 Log_err( "Overview %d not available in group %s", i, grp );
766 }
767 }
768 }
769 }
770 while ( Cont_nextGrp( grp ) );
771
772 return newClose( ! err );
773 }
774
775