1 /*
2  *	(c) Copyright 1990, Kim Fabricius Storm.  All rights reserved.
3  *      Copyright (c) 1996-2005 Michael T Pins.  All rights reserved.
4  *
5  *	Article header parsing.
6  */
7 
8 #include <stdlib.h>
9 #include <ctype.h>
10 #include "config.h"
11 #include "global.h"
12 #include "digest.h"
13 #include "more.h"
14 #include "news.h"
15 #include "nntp.h"
16 
17 #ifndef SUNOS4
18 #include <strings.h>
19 #else
20 #include <string.h>
21 #endif
22 
23 /* news.c */
24 
25 static char   **art_hdr_field(register char *lp, int all);
26 
27 
28 int             retry_on_error = 0;
29 
30 char           *
parse_header(FILE * f,char ** (* hdr_field)(),int modes,news_header_buffer hdrbuf)31 parse_header(FILE * f, char **(*hdr_field) (), int modes, news_header_buffer hdrbuf)
32 {
33     register char  *bp, *cp, **fptr;
34     int             siz, all, date_only;
35     long            pos;
36 
37     pos = ftell(f);
38 
39 /* read first NEWS_HEADER_BUFFER bytes (should be more than enough) */
40 
41     all = modes & GET_ALL_FIELDS;
42     date_only = modes & GET_DATE_ONLY;
43 
44     siz = fread(hdrbuf, sizeof(char), NEWS_HEADER_BUFFER, f);
45     if (siz <= 0) {
46 	hdrbuf[0] = NUL;
47 	return hdrbuf;
48     }
49     bp = hdrbuf;
50     bp[siz - 1] = NUL;
51 
52     /* decode subarticle header */
53     while (*bp) {
54 
55 	if (*bp == NL) {	/* empty line following header */
56 	    ++bp;
57 	    fseek(f, pos + (bp - hdrbuf), 0);
58 	    return bp;
59 	}
60 	if (bp[0] == SP && bp[1] == SP) {	/* An ugly hack so that NN
61 						 * can read */
62 	    bp += 2;		/* it's own FAQ... :-) (sorry Bill) */
63 	    continue;
64 	}
65 	if (date_only && *bp != 'D')
66 	    fptr = NULL;
67 	else if ((fptr = (*hdr_field) (bp, all))) {
68 	    while (*bp && *bp != ':' && isascii(*bp) && !isspace(*bp))
69 		bp++;
70 	    if (*bp)
71 		bp++;
72 	    while (*bp && isascii(*bp) && isspace(*bp) && *bp != NL)
73 		bp++;
74 	    *fptr = bp;
75 	}
76 
77 #ifdef NO_HEADER_SEPARATION_HACK
78 	else {
79 	    for (cp = bp; *cp && *cp != ':'; cp++) {
80 		if (!isascii(*cp))
81 		    break;
82 		if (*cp == '_' || *cp == '-')
83 		    continue;
84 		if (isalnum(*cp))
85 		    continue;
86 		break;
87 	    }
88 	    if (*cp != ':') {
89 		*bp = NL;
90 		pos--;
91 		continue;
92 	    }
93 	}
94 #endif
95 
96 	while (*bp && *bp != NL)
97 	    bp++;
98 
99 	/* Assume that continued lines are never empty! */
100 	if (fptr && bp == *fptr)
101 	    *fptr = NULL;
102 
103 	while (*bp) {		/* look for continued lines */
104 	    cp = bp + 1;
105 
106 	    if (!(*cp && isascii(*cp) && isspace(*cp) && *cp != NL)) {
107 		/* next line is empty or not indented */
108 		*bp++ = NUL;
109 		break;
110 	    }
111 	    *bp = SP;		/* substitute NL with SPACE */
112 	    bp = cp;
113 	    while (*bp && *bp != NL)
114 		bp++;
115 	}
116     }
117 
118     return bp;
119 }
120 
121 static char   **
art_hdr_field(register char * lp,int all)122 art_hdr_field(register char *lp, int all)
123 {
124 
125 #define check(name, lgt, field) \
126     if (isascii(lp[lgt]) && isspace(lp[lgt]) \
127 	&& strncasecmp(name, lp, lgt) == 0)\
128 	return &news.field
129 
130     switch (*lp++) {
131 
132 	    case 'A':
133 	    case 'a':
134 	    if (!all)
135 		break;
136 	    check("pproved:", 8, ng_appr);
137 	    break;
138 
139 	case 'B':
140 	case 'b':
141 	    check("ack-References:", 15, ng_bref);
142 	    break;
143 
144 	case 'C':
145 	case 'c':
146 	    check("ontrol:", 7, ng_control);
147 	    check("omment-To:", 10, ng_comment);
148 	    break;
149 
150 	case 'D':
151 	case 'd':
152 	    check("ate:", 4, ng_date);
153 	    if (!all)
154 		break;
155 	    check("ate-Received:", 13, ng_rdate);
156 	    check("istribution:", 12, ng_dist);
157 	    break;
158 
159 	case 'F':
160 	case 'f':
161 	    check("rom:", 4, ng_from);
162 	    if (!all)
163 		break;
164 	    check("ollowup-To:", 11, ng_follow);
165 	    break;
166 
167 	case 'K':
168 	case 'k':
169 	    if (!all)
170 		break;
171 	    check("eywords:", 8, ng_keyw);
172 	    break;
173 
174 	case 'L':
175 	case 'l':
176 	    check("ines:", 5, ng_xlines);
177 	    break;
178 
179 	case 'M':
180 	case 'm':
181 	    if (!all)
182 		break;
183 	    if (strncasecmp(lp, "essage-", 7))
184 		break;
185 	    lp += 7;
186 	    check("ID:", 3, ng_ident);
187 	    break;
188 
189 	case 'N':
190 	case 'n':
191 	    check("ewsgroups:", 10, ng_groups);
192 	    break;
193 
194 	case 'O':
195 	case 'o':
196 	    if (!all)
197 		break;
198 	    check("rganization:", 12, ng_org);
199 	    check("riginator:", 10, ng_origr);
200 	    break;
201 
202 	case 'P':
203 	case 'p':
204 	    if (!all)
205 		break;
206 	    check("ath:", 4, ng_path);
207 	    break;
208 
209 	case 'R':
210 	case 'r':
211 	    check("eferences:", 10, ng_ref);
212 	    check("eply-To:", 8, ng_reply);
213 	    break;
214 
215 	case 'S':
216 	case 's':
217 	    check("ubject:", 7, ng_subj);
218 	    check("ender:", 6, ng_sender);
219 	    if (!all)
220 		break;
221 	    check("ummary:", 7, ng_summ);
222 	    break;
223 
224 	case 'T':
225 	case 't':
226 	    check("itle:", 5, ng_subj);
227 	    break;
228 
229 	case 'X':
230 	case 'x':
231 	    check("ref:", 4, ng_xref);
232 	    break;
233     }
234 
235     return NULL;
236 
237 #undef check
238 }
239 
240 int
is_header_line(char * line)241 is_header_line(char *line)
242 {
243     return art_hdr_field(line, 0) != (char **) NULL;
244 }
245 
246 
247 FILE           *
open_news_article(article_header * art,int modes,news_header_buffer buffer1,news_header_buffer buffer2)248 open_news_article(article_header * art, int modes, news_header_buffer buffer1, news_header_buffer buffer2)
249 {
250 
251     char           *digest_buffer;
252     int             retry;
253     FILE           *f;
254     struct stat     statb;
255 
256 #ifndef DONT_COUNT_LINES
257     int             c;
258     off_t           digest_artlen = 0;
259 #endif				/* DONT_COUNT_LINES */
260 
261 #ifdef NNTP
262     int             lazy = 0;
263 #endif				/* NNTP */
264 
265 #ifndef DONT_COUNT_LINES
266 #ifdef NNTP
267     long            fpos;
268 #endif				/* NNTP */
269 #endif				/* DONT_COUNT_LINES */
270 
271     if (art->flag & A_FOLDER) {
272 	f = open_file(group_path_name, OPEN_READ);
273 	if (f == NULL)
274 	    return NULL;
275 	fseek(f, art->hpos, 0);
276 
277 #ifndef DONT_COUNT_LINES
278 	digest_artlen = art->lpos - art->fpos;
279 #endif				/* DONT_COUNT_LINES */
280     }
281 
282 #ifdef NNTP
283     else if (use_nntp) {
284 	lazy = (current_group->master_flag & M_ALWAYS_DIGEST) == 0
285 	    && (modes & LAZY_BODY) ? 1 : 0;
286 	f = nntp_get_article(art->a_number, lazy);
287 	if (f == NULL)
288 	    return NULL;
289     }
290 #endif				/* NNTP */
291 
292     else {
293 	sprintf(group_file_name, "%ld", art->a_number);
294 
295 	retry = retry_on_error;
296 	while ((f = open_file(group_path_name, OPEN_READ)) == NULL)
297 	    if (--retry < 0)
298 		return NULL;
299 
300 	/* necessary because empty files wreak havoc */
301 	if (fstat(fileno(f), &statb) < 0 ||
302 
303 #ifdef NOV
304 	    (art->lpos = statb.st_size, statb.st_size <= (off_t) 0)) {
305 #else
306 	    statb.st_size < art->lpos || statb.st_size <= (off_t) 0) {
307 #endif				/* NOV */
308 
309 	    fclose(f);
310 	    return who_am_i == I_AM_MASTER ? (FILE *) 1 : NULL;
311 	}
312     }
313 
314     digest_buffer = buffer1;
315 
316     if (modes & FILL_NEWS_HEADER) {
317 
318 	news.ng_from = NULL;
319 	news.ng_reply = NULL;
320 	news.ng_name = NULL;
321 	news.ng_subj = NULL;
322 	news.ng_groups = NULL;
323 	news.ng_ref = NULL;
324 	news.ng_bref = NULL;
325 	news.ng_sender = NULL;
326 
327 	news.ng_xlines = NULL;
328 	news.ng_xref = NULL;
329 
330 	if (modes & GET_ALL_FIELDS) {
331 	    news.ng_path = NULL;
332 	    news.ng_reply = NULL;
333 	    news.ng_ident = NULL;
334 	    news.ng_follow = NULL;
335 	    news.ng_keyw = NULL;
336 	    news.ng_dist = NULL;
337 	    news.ng_org = NULL;
338 	    news.ng_appr = NULL;
339 	    news.ng_summ = NULL;
340 	    news.ng_control = NULL;
341 	    news.ng_date = NULL;
342 	    news.ng_rdate = NULL;
343 	    news.ng_comment = NULL;
344 	    news.ng_origr = NULL;
345 	}
346 	if (modes & GET_DATE_ONLY)
347 	    news.ng_date = NULL;
348 
349 	(void) parse_header(f, art_hdr_field, modes, buffer1);
350 
351 	if (news.ng_from == NULL)
352 	    news.ng_from = news.ng_sender;
353 
354 #ifdef NOV
355 	/* fill in article positions..  new style.. */
356 	if ((art->flag & (A_FOLDER | A_DIGEST)) == 0) {
357 	    setpos(art, f);
358 	    news.ng_fpos = art->fpos;
359 	}
360 #else				/* NOV */
361 	if (modes & FILL_OFFSETS)	/* used only by old DB code */
362 	    art->fpos = news.ng_fpos = ftell(f);
363 #endif				/* NOV */
364 
365 	if (news.ng_xlines)
366 	    news.ng_lines = atoi(news.ng_xlines);
367 	else {
368 
369 #ifndef DONT_COUNT_LINES
370 
371 #ifdef NNTP
372 	    if (use_nntp && lazy && !(art->flag & (A_DIGEST | A_FOLDER))) {
373 		fpos = ftell(f);
374 		fclose(f);
375 		f = nntp_get_article(art->a_number, 2);
376 		if (f == NULL)
377 		    return NULL;
378 		lazy = 0;
379 		fseek(f, fpos, 0);
380 	    }
381 #endif				/* NNTP */
382 
383 	    news.ng_lines = 0;
384 	    while ((c = getc(f)) != EOF) {
385 		if (c == '\n')
386 		    news.ng_lines++;
387 		if (digest_artlen && --digest_artlen == 0)
388 		    break;
389 	    }
390 #else				/* DONT_COUNT_LINES */
391 	    news.ng_lines = -1;
392 #endif				/* DONT_COUNT_LINES */
393 	}
394 
395 	if (modes & FILL_OFFSETS) {
396 	    fseek(f, 0, 2);
397 	    news.ng_lpos = ftell(f);
398 	}
399 
400 #ifdef NNTP
401 	else if (use_nntp && (art->flag & (A_DIGEST | A_FOLDER)) == 0) {
402 	    fseek(f, 0, 2);
403 	    art->lpos = ftell(f);
404 	}
405 #endif
406 
407 	news.ng_flag = 0;
408 
409 	if (news.ng_appr)
410 	    news.ng_flag |= N_MODERATED;
411 
412 	if (modes & DIGEST_CHECK && is_digest(news.ng_subj))
413 	    news.ng_flag |= N_DIGEST;
414 
415 #ifdef NNTP
416 	if (use_nntp && lazy && news.ng_flag & N_DIGEST) {
417 	    fclose(f);
418 	    f = nntp_get_article(art->a_number, 2);
419 	    if (f == NULL)
420 		return NULL;
421 	}
422 #endif
423 
424 	digest_buffer = buffer2;
425     }
426 
427 #ifdef NNTP
428     else if (use_nntp && (art->flag & (A_DIGEST | A_FOLDER)) == 0) {
429 	fseek(f, 0, 2);
430 	art->lpos = ftell(f);
431     }
432 #endif
433 
434     if (modes & FILL_DIGEST_HEADER) {
435 	fseek(f, art->hpos, 0);
436 	parse_digest_header(f, modes & GET_ALL_FIELDS, digest_buffer);
437     }
438 
439 #ifdef NOV
440     else {
441 	/* fill in article positions..  new style.. */
442 	if ((art->flag & (A_FOLDER | A_DIGEST)) == 0) {
443 	    setpos(art, f);
444 	    news.ng_fpos = art->fpos;
445 	}
446     }
447 #endif				/* NOV */
448 
449     fseek(f, (modes & SKIP_HEADER) ? art->fpos : art->hpos, 0);
450 
451     return f;
452 }
453