1 /*******************WARNING*********************
2 
3 This is a *MODIFIED* version of Geoff Coller's proof-of-concept NOV
4 implementation.
5 
6 It has been modified to support threading directly from a file handle
7 to a NNTP server without a temporary file.
8 
9 This is not a complete distribution.  We have only distributed enough
10 to support NN's needs.
11 
12 The original version came from world.std.com:/src/news/nov.dist.tar.Z
13 and was dated 11 Aug 1993.
14 
15 In any case, bugs you find here are probably my fault, as I've trimmed
16 a fair bit of unused code.
17 
18 -Peter Wemm  <peter@DIALix.oz.au>
19 */
20 
21 /*
22  * Copyright (c) Geoffrey Collyer 1992, 1993.
23  * All rights reserved.
24  * Written by Geoffrey Collyer.
25  * Thanks to UUNET Communications Services Inc for financial support.
26  *
27  * This software is not subject to any license of the American Telephone
28  * and Telegraph Company, the Regents of the University of California, or
29  * the Free Software Foundation.
30  *
31  * Permission is granted to anyone to use this software for any purpose on
32  * any computer system, and to alter it and redistribute it freely, subject
33  * to the following restrictions:
34  *
35  * 1. The authors are not responsible for the consequences of use of this
36  *    software, no matter how awful, even if they arise from flaws in it.
37  *
38  * 2. The origin of this software must not be misrepresented, either by
39  *    explicit claim or by omission.  Since few users ever read sources,
40  *    credits must appear in the documentation.
41  *
42  * 3. Altered versions must be plainly marked as such, and must not be
43  *    misrepresented as being the original software.  Since few users
44  *    ever read sources, credits must appear in the documentation.
45  *
46  * 4. This notice may not be removed or altered.
47  */
48 
49 
50 /*
51  * library to access news history adjunct data
52  */
53 
54 #include <stdlib.h>
55 #include <string.h>
56 #include <strings.h>
57 #include "config.h"
58 #include "global.h"
59 #include "awksplit.h"
60 #include "digest.h"
61 #include "hash.h"
62 #include "newsoverview.h"
63 #include "nntp.h"
64 #include "split.h"
65 
66 #ifndef NEWS_DIRECTORY
67 #define NEWS_DIRECTORY	"/usr/spool/news"
68 #endif
69 
70 #ifndef OVFILENAME
71 #define OVFILENAME ".overview"
72 #endif
73 
74 #define	STREQ(a, b)	(*(a) == *(b) && strcmp((a), (b)) == 0)
75 
76 /* imports */
77 static char    *newsarts = NEWS_DIRECTORY;	/* news spool */
78 static char    *overviewfiles = OVFILENAME;	/* overview */
79 static int      prsoverview(register struct novgroup *, register article_number, register article_number);
80 
81 #ifdef DO_NOV_DIGEST
82 static void     de_digest(struct novgroup *, struct novart *);
83 #endif
84 
85 void
novartdir(char * dir)86 novartdir(char *dir)
87 {
88     newsarts = (dir == NULL ? NEWS_DIRECTORY : dir);
89 }
90 
91 void
novfilename(char * name)92 novfilename(char *name)
93 {
94     overviewfiles = (name == NULL ? OVFILENAME : name);
95 }
96 
97 static struct novgroup *	/* malloced */
novnew(void)98 novnew(void)
99 {
100     register struct novgroup *gp = (struct novgroup *) malloc(sizeof *gp);
101 
102     if (gp != NULL) {
103 	gp->g_first = gp->g_curr = NULL;
104 	gp->g_msgids = gp->g_roots = NULL;
105 	gp->g_dir = NULL;
106 	gp->g_stream = NULL;
107     }
108     return gp;
109 }
110 
111 struct novgroup *		/* malloced cookie */
novopen(char * grp)112 novopen(char *grp)
113 {				/* change to group grp */
114     register struct novgroup *gp = novnew();
115     register char  *sgrp;
116     register char  *s;
117 
118     if (gp == NULL)
119 	return NULL;
120     sgrp = strsave(grp);
121     if (sgrp == NULL) {
122 	free((char *) gp);
123 	return NULL;
124     }
125     for (s = sgrp; *s != '\0'; s++)
126 	if (*s == '.')
127 	    *s = '/';
128     gp->g_dir = str3save(newsarts, "/", sgrp);
129     free(sgrp);
130     return gp;
131 }
132 
133 struct novgroup *
novstream(register FILE * fp)134 novstream(register FILE * fp)
135 {
136     register struct novgroup *gp = novnew();
137 
138     if (gp != NULL)
139 	gp->g_stream = fp;
140     return gp;
141 }
142 
143 /*
144  * novseek()
145  *	For local overview file, use binary search to find first line
146  *	which is at artnum or before.
147  *	Ripped off from inn1.4/nnrpd/newnews.c
148  */
149 static int
novseek(register FILE * fp,register article_number artnum)150 novseek(register FILE * fp, register article_number artnum)
151 {
152     char           *line;
153     long            upper;
154     long            lower;
155     long            middle;
156 
157     /* Read first line -- is it in our range? */
158     (void) fseek(fp, 0L, 0);
159     if ((line = fgetstr(fp)) == NULL)
160 	return 0;
161     if (atol(line) >= artnum) {
162 	(void) fseek(fp, 0L, 0);
163 	return 1;
164     }
165     /* Set search ranges and go. */
166     lower = 0;
167     (void) fseek(fp, 0L, 2);
168     upper = ftell(fp);
169     for (;;) {
170 	/* Seek to middle line. */
171 	middle = (upper + lower) / 2;
172 	(void) fseek(fp, middle, 0);
173 	while (++middle <= upper && getc(fp) != '\n')
174 	    continue;
175 
176 	if (middle >= upper)
177 	    break;
178 
179 	if ((line = fgetstr(fp)) != NULL && atol(line) > artnum)
180 	    upper = middle;
181 	else if (lower == middle)
182 	    break;
183 	else
184 	    lower = middle;
185     }
186 
187     /* Move to lower bound; we know this will always be the start of a line. */
188     (void) fseek(fp, lower, 0);
189     while ((line = fgetstr(fp)) != NULL)
190 	if (atol(line) >= artnum) {
191 	    (void) fseek(fp, lower, 0);
192 	    return 1;
193 	}
194     return 0;
195 }
196 
197 
198 struct novart  *
novall(register struct novgroup * gp,register article_number first,register article_number last)199 novall(register struct novgroup * gp, register article_number first, register article_number last)
200 {
201     if (gp->g_first == NULL)	/* new group? */
202 	(void) prsoverview(gp, first, last);
203     return gp->g_first;
204 }
205 
206 struct novart  *
novnext(register struct novgroup * gp)207 novnext(register struct novgroup * gp)
208  /* gp		cookie from novopen */
209 {
210     register struct novart *thisart;
211 
212     if (gp->g_first == NULL)	/* new group? */
213 	(void) prsoverview(gp, 1, 201);
214     thisart = gp->g_curr;
215     if (thisart != NULL)
216 	gp->g_curr = thisart->a_nxtnum;
217     return thisart;
218 }
219 
220 static void
freeart(register struct novart * art)221 freeart(register struct novart * art)
222 {
223     if (art->a_refs != NULL)
224 	free(art->a_refs);
225     if (art->a_parent != NULL)
226 	free(art->a_parent);
227     if (art->a_num != NULL)
228 	free(art->a_num);	/* the original input line, chopped */
229     free((char *) art);
230 }
231 
232 #define MAXFIELDS 9		/* last field is "other" fields */
233 #define DEFREFS 20
234 
235 #define PRSFAIL 0		/* disaster (out of memory, etc.) */
236 #define PRSOKAY 1
237 #define PRSBAD  2		/* bad syntax */
238 
239 static int
prsovline(register char * line,register struct novgroup * gp,register struct novart * art,register struct novart * prevart)240 prsovline(register char *line, register struct novgroup * gp, register struct novart * art, register struct novart * prevart)
241  /* line		malloced; will be chopped up */
242 {
243     register int    nf, nrefs, len;
244     char           *fields[MAXFIELDS], *refs[DEFREFS];
245     char          **refsp = refs;
246     static struct novart zart;
247 
248     *art = zart;		/* make freeart safe if we bail out early */
249     len = strlen(line);
250     if (len > 0 && line[len - 1] == '\n')
251 	line[len - 1] = '\0';	/* make field count straightforward */
252     nf = split(line, fields, MAXFIELDS, "\t");
253     if (nf < MAXFIELDS - 1)	/* only "others" fields are optional */
254 	return PRSBAD;		/* skip this line */
255     while (nf < MAXFIELDS)
256 	fields[nf++] = "";	/* fake missing fields */
257 
258     /*
259      * duplicate message-ids would confuse the threading code and anyway
260      * should not happen (now that relaynews suppresses multiple links within
261      * a group for the same article), so ignore any entries for duplicate
262      * message-ids.
263      */
264     if (hashfetch(gp->g_msgids, fields[4]) != NULL)
265 	return PRSBAD;
266 
267     art->a_parent = NULL;
268     art->a_refs = strsave(fields[5]);	/* fields[5] will be split below */
269     if (art->a_refs == NULL)
270 	return PRSFAIL;
271     if (art->a_refs[0] != '\0') {	/* at least one ref? */
272 	nrefs = awksplit(fields[5], &refsp, DEFREFS, "");
273 	if (refsp == NULL)
274 	    return PRSFAIL;
275 	if (nrefs > 0) {	/* last ref is parent */
276 	    if (refsp[nrefs - 1] == NULL)
277 		return PRSFAIL;
278 	    art->a_parent = strsave(refsp[nrefs - 1]);
279 	    if (art->a_parent == NULL)
280 		return PRSFAIL;
281 	    if (refsp != refs)
282 		free((char *) refsp);
283 	}
284     }
285     art->a_num = fields[0];	/* line */
286     art->a_subj = fields[1];
287     art->a_from = fields[2];
288     art->a_date = fields[3];
289     art->a_msgid = fields[4];
290     /* see above for fields[5] */
291     art->a_bytes = fields[6];
292     art->a_lines = fields[7];
293     art->a_others = fields[8];
294     art->a_nxtnum = NULL;
295 
296     if (!hashstore(gp->g_msgids, art->a_msgid, (char *) art))
297 	return PRSFAIL;
298     if (gp->g_first == NULL)
299 	gp->g_first = art;
300     if (prevart != NULL)
301 	prevart->a_nxtnum = art;
302     return PRSOKAY;
303 }
304 
305 static int
prsoverview(register struct novgroup * gp,register article_number first,register article_number last)306 prsoverview(register struct novgroup * gp, register article_number first, register article_number last)
307  /* gp			cookie from novopen */
308 {
309     register struct novart *art, *prevart = NULL;
310     register int    prssts;
311     unsigned        hsize;
312     char           *line;
313 
314     gp->g_curr = gp->g_first = NULL;
315     if (gp->g_dir == NULL && gp->g_stream == NULL)
316 	return 0;
317     if (gp->g_stream == NULL) {
318 	line = str3save(gp->g_dir, "/", overviewfiles);
319 	if (line == NULL)
320 	    return 0;
321 	gp->g_stream = fopen(line, "r");
322 	free(line);
323 	if (gp->g_stream == NULL)
324 	    return 0;
325     }
326     /* parse input and store in gp->g_msgids for later traversal */
327     hsize = (last - first) | 0x7f;
328     gp->g_msgids = hashcreate(hsize, (unsigned (*) ()) NULL);
329     if (gp->g_msgids == NULL) {
330 	if (gp->g_dir != NULL)	/* we opened the stream? */
331 	    (void) fclose(gp->g_stream);
332 	return 0;
333     }
334     if (!use_nntp) {
335 	if (!novseek(gp->g_stream, first))
336 	    goto done;
337     }
338     while ((line = fgetstr(gp->g_stream)) != NULL) {
339 	if (strcmp(line, ".") == 0)	/* EOF on a NNTP stream */
340 	    break;
341 	art = (struct novart *) malloc(sizeof *art);
342 	if (art == NULL || (prssts = prsovline(strsave(line), gp, art, prevart)) == PRSFAIL) {
343 	    if (gp->g_dir != NULL)	/* we opened the stream? */
344 		(void) fclose(gp->g_stream);
345 	    if (art != NULL)
346 		freeart(art);
347 	    return 0;
348 	}
349 	if (prssts == PRSOKAY)
350 	    prevart = art;
351 	else
352 	    freeart(art);
353     }
354 done:
355     if (gp->g_dir != NULL)	/* we opened the stream? */
356 	(void) fclose(gp->g_stream);
357     gp->g_curr = gp->g_first;
358 
359 #ifdef DO_NOV_DIGEST
360 
361     /*
362      * This is really horrible.  NOV doesn't break down digests (I don't
363      * think it should), but NN wants all the information up front. We have
364      * to find any digest and break it apart.
365      */
366     for (art = gp->g_first; art; art = art->a_nxtnum) {
367 	if (is_digest(art->a_subj))
368 	    de_digest(gp, art);
369     }
370 #endif
371 
372     return 1;
373 }
374 
375 #ifdef DO_NOV_DIGEST
376 static char    *build_nov_line(struct novart *, struct digest_header *, int);
377 static char    *detab_cp(register char *, register char *);
378 
379 static void
de_digest(struct novgroup * gp,struct novart * ap)380 de_digest(struct novgroup * gp, struct novart * ap)
381  /* gp			cookie from novopen */
382 {
383     register struct novart *art, *prevart;
384     news_header_buffer dgbuf;
385     int             cont, seq;
386     FILE           *fp;
387     char           *line;
388 
389 #ifdef NNTP
390     if (use_nntp) {
391 	if (atol(ap->a_num) == 0)
392 	    return;
393 	fp = nntp_get_article(atol(ap->a_num), 0);
394     } else
395 #endif				/* NNTP */
396 
397 	fp = open_file(ap->a_num, OPEN_READ);
398 
399     if (fp == NULL)
400 	return;
401 
402     cont = 1;
403     prevart = ap;
404     seq = 0;
405 
406     skip_digest_body(fp);
407     while (cont && (cont = get_digest_article(fp, dgbuf)) >= 0) {
408 	if (seq == 0) {
409 
410 #ifndef NO_MEMMOVE
411 	    memmove(ap->a_num + 1, ap->a_num,
412 		    ap->a_bytes - ap->a_num);
413 #else
414 	    bcopy(ap->a_num, ap->a_num + 1,
415 		  ap->a_bytes - ap->a_num);
416 #endif				/* NO_MEMMOVE */
417 
418 	    ap->a_num[0] = '-';
419 	    ap->a_subj++;
420 	    ap->a_from++;
421 	    ap->a_date++;
422 	    ap->a_msgid++;
423 	} else {
424 	    if ((art = (struct novart *) malloc(sizeof *art)) == NULL)
425 		break;
426 	    if ((line = build_nov_line(ap, &digest, seq)) == NULL) {
427 		free(art);
428 		break;
429 	    }
430 	    if (prsovline(line, gp, art, (struct novart *) NULL) != PRSOKAY) {
431 		if (art->a_num != line)
432 		    free(line);
433 		freeart(art);
434 		continue;
435 	    }
436 	    art->a_nxtnum = prevart->a_nxtnum;
437 	    prevart->a_nxtnum = art;
438 	    prevart = art;
439 	}
440 	seq++;
441     }
442     fclose(fp);
443 }
444 
445 static char    *
build_nov_line(struct novart * ap,struct digest_header * dp,int seq)446 build_nov_line(struct novart * ap, struct digest_header * dp, int seq)
447 {
448     char           *cp, *bp;
449     int             len, i;
450     char           *flds[10];
451 
452     flds[0] = dp->dg_subj;
453     flds[1] = dp->dg_from;
454 
455     if (dp->dg_date)
456 	flds[2] = dp->dg_date;
457     else
458 	flds[2] = ap->a_date;
459 
460     flds[3] = ap->a_msgid;
461     flds[4] = ap->a_refs;
462     flds[5] = ap->a_bytes;
463     flds[6] = ap->a_others;
464 
465     len = 64;
466     for (i = 0; i <= 6; i++) {
467 	if (flds[i])
468 	    len += strlen(flds[i]);
469 	else
470 	    flds[i] = "";
471     }
472     if ((bp = malloc(len)) == NULL)
473 	return (bp);
474 
475     cp = bp;
476     *cp++ = '0';
477     *cp++ = '\t';
478     cp = detab_cp(cp, flds[0]);
479     *cp++ = '\t';
480     cp = detab_cp(cp, flds[1]);
481     *cp++ = '\t';
482     cp = detab_cp(cp, flds[2]);
483     *cp++ = '\t';
484 
485     cp = detab_cp(cp, flds[3]);	/* need unique msgid */
486     sprintf(cp, ".%d\t", seq);
487     cp += strlen(cp);
488 
489     cp = detab_cp(cp, flds[4]);
490     *cp++ = '\t';
491 
492     cp = detab_cp(cp, flds[5]);	/* add position data to byte count */
493     sprintf(cp, ":%ld:%ld:%ld\t", (long) dp->dg_hpos,
494 	    (long) dp->dg_fpos - (long) dp->dg_hpos, (long) dp->dg_lpos);
495     cp += strlen(cp);
496 
497     sprintf(cp, "%d\t", --dp->dg_lines);
498     cp += strlen(cp);
499 
500     detab_cp(cp, flds[6]);
501 
502     return (bp);
503 }
504 
505 static char    *
detab_cp(register char * dst,register char * src)506 detab_cp(register char *dst, register char *src)
507 {
508     while ((*dst = *src++)) {
509 	if (*dst == '\t')
510 	    *dst = ' ';
511 	dst++;
512     }
513     return (dst);
514 }
515 
516 #endif				/* DO_NOV_DIGEST */
517 
518 #ifdef THREAD
519 /*
520  * if this article has no parent, enter it in the roots hash table.
521  * if it has a parent, make this article the parent's first child,
522  * even it means making the existing first child our first sibling.
523  */
524 /* ARGSUSED */
525 static int
numvisit(char * key,char * data,char * hook)526 numvisit(char *key, char *data, char *hook)
527 {
528     register struct novart *art = (struct novart *) data, *parent = NULL;
529     register char  *msgid;
530     register struct novgroup *gp = (struct novgroup *) hook;
531 
532     if (gp->g_roots == NULL) {
533 	gp->g_roots = hashcreate(500, (unsigned (*) ()) NULL);
534 	if (gp->g_roots == NULL)/* better not happen */
535 	    return;
536     }
537     msgid = art->a_msgid;
538     if (art->a_parent != NULL)
539 	parent = (struct novart *) hashfetch(gp->g_msgids, art->a_parent);
540     if (parent != NULL) {
541 	if (parent->a_child1 != NULL) {
542 	    if (art->a_sibling != NULL)
543 		return;		/* sibling in use; better not happen */
544 	    art->a_sibling = parent->a_child1;
545 	}
546 	parent->a_child1 = msgid;
547     } else {			/* no parent - must be a root */
548 	art->a_parent = NULL;
549 	if (!hashstore(gp->g_roots, msgid, (char *) art))
550 	    return;		/* better not happen */
551     }
552 }
553 
554 
555 static void
novthread(register struct novgroup * gp)556 novthread(register struct novgroup * gp)
557 {
558     if (gp->g_first == NULL)	/* new group? */
559 	(void) prsoverview(gp, 1, 201);
560     /* build trees */
561     if (gp->g_first != NULL)
562 	hashwalk(gp->g_msgids, numvisit, (char *) gp);
563 }
564 
565 #endif
566 
567 void
novclose(register struct novgroup * gp)568 novclose(register struct novgroup * gp)
569 {
570     register struct novart *art, *next;
571 
572     hashdestroy(gp->g_msgids);
573     hashdestroy(gp->g_roots);
574     if (gp->g_dir != NULL)
575 	free(gp->g_dir);
576     for (art = gp->g_first; art != NULL; art = next) {
577 	next = art->a_nxtnum;
578 	freeart(art);
579     }
580 }
581