1 /*******************WARNING*********************
2
3 This is a *MODIFIED* version of Geoff Coller's proof-of-concept NOV
4 implementation.
5
6 It has been modified to support threading directly from a file handle
7 to a NNTP server without a temporary file.
8
9 This is not a complete distribution. We have only distributed enough
10 to support NN's needs.
11
12 The original version came from world.std.com:/src/news/nov.dist.tar.Z
13 and was dated 11 Aug 1993.
14
15 In any case, bugs you find here are probably my fault, as I've trimmed
16 a fair bit of unused code.
17
18 -Peter Wemm <peter@DIALix.oz.au>
19 */
20
21 /*
22 * Copyright (c) Geoffrey Collyer 1992, 1993.
23 * All rights reserved.
24 * Written by Geoffrey Collyer.
25 * Thanks to UUNET Communications Services Inc for financial support.
26 *
27 * This software is not subject to any license of the American Telephone
28 * and Telegraph Company, the Regents of the University of California, or
29 * the Free Software Foundation.
30 *
31 * Permission is granted to anyone to use this software for any purpose on
32 * any computer system, and to alter it and redistribute it freely, subject
33 * to the following restrictions:
34 *
35 * 1. The authors are not responsible for the consequences of use of this
36 * software, no matter how awful, even if they arise from flaws in it.
37 *
38 * 2. The origin of this software must not be misrepresented, either by
39 * explicit claim or by omission. Since few users ever read sources,
40 * credits must appear in the documentation.
41 *
42 * 3. Altered versions must be plainly marked as such, and must not be
43 * misrepresented as being the original software. Since few users
44 * ever read sources, credits must appear in the documentation.
45 *
46 * 4. This notice may not be removed or altered.
47 */
48
49
50 /*
51 * library to access news history adjunct data
52 */
53
54 #include <stdlib.h>
55 #include <string.h>
56 #include <strings.h>
57 #include "config.h"
58 #include "global.h"
59 #include "awksplit.h"
60 #include "digest.h"
61 #include "hash.h"
62 #include "newsoverview.h"
63 #include "nntp.h"
64 #include "split.h"
65
66 #ifndef NEWS_DIRECTORY
67 #define NEWS_DIRECTORY "/usr/spool/news"
68 #endif
69
70 #ifndef OVFILENAME
71 #define OVFILENAME ".overview"
72 #endif
73
74 #define STREQ(a, b) (*(a) == *(b) && strcmp((a), (b)) == 0)
75
76 /* imports */
77 static char *newsarts = NEWS_DIRECTORY; /* news spool */
78 static char *overviewfiles = OVFILENAME; /* overview */
79 static int prsoverview(register struct novgroup *, register article_number, register article_number);
80
81 #ifdef DO_NOV_DIGEST
82 static void de_digest(struct novgroup *, struct novart *);
83 #endif
84
85 void
novartdir(char * dir)86 novartdir(char *dir)
87 {
88 newsarts = (dir == NULL ? NEWS_DIRECTORY : dir);
89 }
90
91 void
novfilename(char * name)92 novfilename(char *name)
93 {
94 overviewfiles = (name == NULL ? OVFILENAME : name);
95 }
96
97 static struct novgroup * /* malloced */
novnew(void)98 novnew(void)
99 {
100 register struct novgroup *gp = (struct novgroup *) malloc(sizeof *gp);
101
102 if (gp != NULL) {
103 gp->g_first = gp->g_curr = NULL;
104 gp->g_msgids = gp->g_roots = NULL;
105 gp->g_dir = NULL;
106 gp->g_stream = NULL;
107 }
108 return gp;
109 }
110
111 struct novgroup * /* malloced cookie */
novopen(char * grp)112 novopen(char *grp)
113 { /* change to group grp */
114 register struct novgroup *gp = novnew();
115 register char *sgrp;
116 register char *s;
117
118 if (gp == NULL)
119 return NULL;
120 sgrp = strsave(grp);
121 if (sgrp == NULL) {
122 free((char *) gp);
123 return NULL;
124 }
125 for (s = sgrp; *s != '\0'; s++)
126 if (*s == '.')
127 *s = '/';
128 gp->g_dir = str3save(newsarts, "/", sgrp);
129 free(sgrp);
130 return gp;
131 }
132
133 struct novgroup *
novstream(register FILE * fp)134 novstream(register FILE * fp)
135 {
136 register struct novgroup *gp = novnew();
137
138 if (gp != NULL)
139 gp->g_stream = fp;
140 return gp;
141 }
142
143 /*
144 * novseek()
145 * For local overview file, use binary search to find first line
146 * which is at artnum or before.
147 * Ripped off from inn1.4/nnrpd/newnews.c
148 */
149 static int
novseek(register FILE * fp,register article_number artnum)150 novseek(register FILE * fp, register article_number artnum)
151 {
152 char *line;
153 long upper;
154 long lower;
155 long middle;
156
157 /* Read first line -- is it in our range? */
158 (void) fseek(fp, 0L, 0);
159 if ((line = fgetstr(fp)) == NULL)
160 return 0;
161 if (atol(line) >= artnum) {
162 (void) fseek(fp, 0L, 0);
163 return 1;
164 }
165 /* Set search ranges and go. */
166 lower = 0;
167 (void) fseek(fp, 0L, 2);
168 upper = ftell(fp);
169 for (;;) {
170 /* Seek to middle line. */
171 middle = (upper + lower) / 2;
172 (void) fseek(fp, middle, 0);
173 while (++middle <= upper && getc(fp) != '\n')
174 continue;
175
176 if (middle >= upper)
177 break;
178
179 if ((line = fgetstr(fp)) != NULL && atol(line) > artnum)
180 upper = middle;
181 else if (lower == middle)
182 break;
183 else
184 lower = middle;
185 }
186
187 /* Move to lower bound; we know this will always be the start of a line. */
188 (void) fseek(fp, lower, 0);
189 while ((line = fgetstr(fp)) != NULL)
190 if (atol(line) >= artnum) {
191 (void) fseek(fp, lower, 0);
192 return 1;
193 }
194 return 0;
195 }
196
197
198 struct novart *
novall(register struct novgroup * gp,register article_number first,register article_number last)199 novall(register struct novgroup * gp, register article_number first, register article_number last)
200 {
201 if (gp->g_first == NULL) /* new group? */
202 (void) prsoverview(gp, first, last);
203 return gp->g_first;
204 }
205
206 struct novart *
novnext(register struct novgroup * gp)207 novnext(register struct novgroup * gp)
208 /* gp cookie from novopen */
209 {
210 register struct novart *thisart;
211
212 if (gp->g_first == NULL) /* new group? */
213 (void) prsoverview(gp, 1, 201);
214 thisart = gp->g_curr;
215 if (thisart != NULL)
216 gp->g_curr = thisart->a_nxtnum;
217 return thisart;
218 }
219
220 static void
freeart(register struct novart * art)221 freeart(register struct novart * art)
222 {
223 if (art->a_refs != NULL)
224 free(art->a_refs);
225 if (art->a_parent != NULL)
226 free(art->a_parent);
227 if (art->a_num != NULL)
228 free(art->a_num); /* the original input line, chopped */
229 free((char *) art);
230 }
231
232 #define MAXFIELDS 9 /* last field is "other" fields */
233 #define DEFREFS 20
234
235 #define PRSFAIL 0 /* disaster (out of memory, etc.) */
236 #define PRSOKAY 1
237 #define PRSBAD 2 /* bad syntax */
238
239 static int
prsovline(register char * line,register struct novgroup * gp,register struct novart * art,register struct novart * prevart)240 prsovline(register char *line, register struct novgroup * gp, register struct novart * art, register struct novart * prevart)
241 /* line malloced; will be chopped up */
242 {
243 register int nf, nrefs, len;
244 char *fields[MAXFIELDS], *refs[DEFREFS];
245 char **refsp = refs;
246 static struct novart zart;
247
248 *art = zart; /* make freeart safe if we bail out early */
249 len = strlen(line);
250 if (len > 0 && line[len - 1] == '\n')
251 line[len - 1] = '\0'; /* make field count straightforward */
252 nf = split(line, fields, MAXFIELDS, "\t");
253 if (nf < MAXFIELDS - 1) /* only "others" fields are optional */
254 return PRSBAD; /* skip this line */
255 while (nf < MAXFIELDS)
256 fields[nf++] = ""; /* fake missing fields */
257
258 /*
259 * duplicate message-ids would confuse the threading code and anyway
260 * should not happen (now that relaynews suppresses multiple links within
261 * a group for the same article), so ignore any entries for duplicate
262 * message-ids.
263 */
264 if (hashfetch(gp->g_msgids, fields[4]) != NULL)
265 return PRSBAD;
266
267 art->a_parent = NULL;
268 art->a_refs = strsave(fields[5]); /* fields[5] will be split below */
269 if (art->a_refs == NULL)
270 return PRSFAIL;
271 if (art->a_refs[0] != '\0') { /* at least one ref? */
272 nrefs = awksplit(fields[5], &refsp, DEFREFS, "");
273 if (refsp == NULL)
274 return PRSFAIL;
275 if (nrefs > 0) { /* last ref is parent */
276 if (refsp[nrefs - 1] == NULL)
277 return PRSFAIL;
278 art->a_parent = strsave(refsp[nrefs - 1]);
279 if (art->a_parent == NULL)
280 return PRSFAIL;
281 if (refsp != refs)
282 free((char *) refsp);
283 }
284 }
285 art->a_num = fields[0]; /* line */
286 art->a_subj = fields[1];
287 art->a_from = fields[2];
288 art->a_date = fields[3];
289 art->a_msgid = fields[4];
290 /* see above for fields[5] */
291 art->a_bytes = fields[6];
292 art->a_lines = fields[7];
293 art->a_others = fields[8];
294 art->a_nxtnum = NULL;
295
296 if (!hashstore(gp->g_msgids, art->a_msgid, (char *) art))
297 return PRSFAIL;
298 if (gp->g_first == NULL)
299 gp->g_first = art;
300 if (prevart != NULL)
301 prevart->a_nxtnum = art;
302 return PRSOKAY;
303 }
304
305 static int
prsoverview(register struct novgroup * gp,register article_number first,register article_number last)306 prsoverview(register struct novgroup * gp, register article_number first, register article_number last)
307 /* gp cookie from novopen */
308 {
309 register struct novart *art, *prevart = NULL;
310 register int prssts;
311 unsigned hsize;
312 char *line;
313
314 gp->g_curr = gp->g_first = NULL;
315 if (gp->g_dir == NULL && gp->g_stream == NULL)
316 return 0;
317 if (gp->g_stream == NULL) {
318 line = str3save(gp->g_dir, "/", overviewfiles);
319 if (line == NULL)
320 return 0;
321 gp->g_stream = fopen(line, "r");
322 free(line);
323 if (gp->g_stream == NULL)
324 return 0;
325 }
326 /* parse input and store in gp->g_msgids for later traversal */
327 hsize = (last - first) | 0x7f;
328 gp->g_msgids = hashcreate(hsize, (unsigned (*) ()) NULL);
329 if (gp->g_msgids == NULL) {
330 if (gp->g_dir != NULL) /* we opened the stream? */
331 (void) fclose(gp->g_stream);
332 return 0;
333 }
334 if (!use_nntp) {
335 if (!novseek(gp->g_stream, first))
336 goto done;
337 }
338 while ((line = fgetstr(gp->g_stream)) != NULL) {
339 if (strcmp(line, ".") == 0) /* EOF on a NNTP stream */
340 break;
341 art = (struct novart *) malloc(sizeof *art);
342 if (art == NULL || (prssts = prsovline(strsave(line), gp, art, prevart)) == PRSFAIL) {
343 if (gp->g_dir != NULL) /* we opened the stream? */
344 (void) fclose(gp->g_stream);
345 if (art != NULL)
346 freeart(art);
347 return 0;
348 }
349 if (prssts == PRSOKAY)
350 prevart = art;
351 else
352 freeart(art);
353 }
354 done:
355 if (gp->g_dir != NULL) /* we opened the stream? */
356 (void) fclose(gp->g_stream);
357 gp->g_curr = gp->g_first;
358
359 #ifdef DO_NOV_DIGEST
360
361 /*
362 * This is really horrible. NOV doesn't break down digests (I don't
363 * think it should), but NN wants all the information up front. We have
364 * to find any digest and break it apart.
365 */
366 for (art = gp->g_first; art; art = art->a_nxtnum) {
367 if (is_digest(art->a_subj))
368 de_digest(gp, art);
369 }
370 #endif
371
372 return 1;
373 }
374
375 #ifdef DO_NOV_DIGEST
376 static char *build_nov_line(struct novart *, struct digest_header *, int);
377 static char *detab_cp(register char *, register char *);
378
379 static void
de_digest(struct novgroup * gp,struct novart * ap)380 de_digest(struct novgroup * gp, struct novart * ap)
381 /* gp cookie from novopen */
382 {
383 register struct novart *art, *prevart;
384 news_header_buffer dgbuf;
385 int cont, seq;
386 FILE *fp;
387 char *line;
388
389 #ifdef NNTP
390 if (use_nntp) {
391 if (atol(ap->a_num) == 0)
392 return;
393 fp = nntp_get_article(atol(ap->a_num), 0);
394 } else
395 #endif /* NNTP */
396
397 fp = open_file(ap->a_num, OPEN_READ);
398
399 if (fp == NULL)
400 return;
401
402 cont = 1;
403 prevart = ap;
404 seq = 0;
405
406 skip_digest_body(fp);
407 while (cont && (cont = get_digest_article(fp, dgbuf)) >= 0) {
408 if (seq == 0) {
409
410 #ifndef NO_MEMMOVE
411 memmove(ap->a_num + 1, ap->a_num,
412 ap->a_bytes - ap->a_num);
413 #else
414 bcopy(ap->a_num, ap->a_num + 1,
415 ap->a_bytes - ap->a_num);
416 #endif /* NO_MEMMOVE */
417
418 ap->a_num[0] = '-';
419 ap->a_subj++;
420 ap->a_from++;
421 ap->a_date++;
422 ap->a_msgid++;
423 } else {
424 if ((art = (struct novart *) malloc(sizeof *art)) == NULL)
425 break;
426 if ((line = build_nov_line(ap, &digest, seq)) == NULL) {
427 free(art);
428 break;
429 }
430 if (prsovline(line, gp, art, (struct novart *) NULL) != PRSOKAY) {
431 if (art->a_num != line)
432 free(line);
433 freeart(art);
434 continue;
435 }
436 art->a_nxtnum = prevart->a_nxtnum;
437 prevart->a_nxtnum = art;
438 prevart = art;
439 }
440 seq++;
441 }
442 fclose(fp);
443 }
444
445 static char *
build_nov_line(struct novart * ap,struct digest_header * dp,int seq)446 build_nov_line(struct novart * ap, struct digest_header * dp, int seq)
447 {
448 char *cp, *bp;
449 int len, i;
450 char *flds[10];
451
452 flds[0] = dp->dg_subj;
453 flds[1] = dp->dg_from;
454
455 if (dp->dg_date)
456 flds[2] = dp->dg_date;
457 else
458 flds[2] = ap->a_date;
459
460 flds[3] = ap->a_msgid;
461 flds[4] = ap->a_refs;
462 flds[5] = ap->a_bytes;
463 flds[6] = ap->a_others;
464
465 len = 64;
466 for (i = 0; i <= 6; i++) {
467 if (flds[i])
468 len += strlen(flds[i]);
469 else
470 flds[i] = "";
471 }
472 if ((bp = malloc(len)) == NULL)
473 return (bp);
474
475 cp = bp;
476 *cp++ = '0';
477 *cp++ = '\t';
478 cp = detab_cp(cp, flds[0]);
479 *cp++ = '\t';
480 cp = detab_cp(cp, flds[1]);
481 *cp++ = '\t';
482 cp = detab_cp(cp, flds[2]);
483 *cp++ = '\t';
484
485 cp = detab_cp(cp, flds[3]); /* need unique msgid */
486 sprintf(cp, ".%d\t", seq);
487 cp += strlen(cp);
488
489 cp = detab_cp(cp, flds[4]);
490 *cp++ = '\t';
491
492 cp = detab_cp(cp, flds[5]); /* add position data to byte count */
493 sprintf(cp, ":%ld:%ld:%ld\t", (long) dp->dg_hpos,
494 (long) dp->dg_fpos - (long) dp->dg_hpos, (long) dp->dg_lpos);
495 cp += strlen(cp);
496
497 sprintf(cp, "%d\t", --dp->dg_lines);
498 cp += strlen(cp);
499
500 detab_cp(cp, flds[6]);
501
502 return (bp);
503 }
504
505 static char *
detab_cp(register char * dst,register char * src)506 detab_cp(register char *dst, register char *src)
507 {
508 while ((*dst = *src++)) {
509 if (*dst == '\t')
510 *dst = ' ';
511 dst++;
512 }
513 return (dst);
514 }
515
516 #endif /* DO_NOV_DIGEST */
517
518 #ifdef THREAD
519 /*
520 * if this article has no parent, enter it in the roots hash table.
521 * if it has a parent, make this article the parent's first child,
522 * even it means making the existing first child our first sibling.
523 */
524 /* ARGSUSED */
525 static int
numvisit(char * key,char * data,char * hook)526 numvisit(char *key, char *data, char *hook)
527 {
528 register struct novart *art = (struct novart *) data, *parent = NULL;
529 register char *msgid;
530 register struct novgroup *gp = (struct novgroup *) hook;
531
532 if (gp->g_roots == NULL) {
533 gp->g_roots = hashcreate(500, (unsigned (*) ()) NULL);
534 if (gp->g_roots == NULL)/* better not happen */
535 return;
536 }
537 msgid = art->a_msgid;
538 if (art->a_parent != NULL)
539 parent = (struct novart *) hashfetch(gp->g_msgids, art->a_parent);
540 if (parent != NULL) {
541 if (parent->a_child1 != NULL) {
542 if (art->a_sibling != NULL)
543 return; /* sibling in use; better not happen */
544 art->a_sibling = parent->a_child1;
545 }
546 parent->a_child1 = msgid;
547 } else { /* no parent - must be a root */
548 art->a_parent = NULL;
549 if (!hashstore(gp->g_roots, msgid, (char *) art))
550 return; /* better not happen */
551 }
552 }
553
554
555 static void
novthread(register struct novgroup * gp)556 novthread(register struct novgroup * gp)
557 {
558 if (gp->g_first == NULL) /* new group? */
559 (void) prsoverview(gp, 1, 201);
560 /* build trees */
561 if (gp->g_first != NULL)
562 hashwalk(gp->g_msgids, numvisit, (char *) gp);
563 }
564
565 #endif
566
567 void
novclose(register struct novgroup * gp)568 novclose(register struct novgroup * gp)
569 {
570 register struct novart *art, *next;
571
572 hashdestroy(gp->g_msgids);
573 hashdestroy(gp->g_roots);
574 if (gp->g_dir != NULL)
575 free(gp->g_dir);
576 for (art = gp->g_first; art != NULL; art = next) {
577 next = art->a_nxtnum;
578 freeart(art);
579 }
580 }
581