1 /*
2 * (c) Copyright 1990, Kim Fabricius Storm. All rights reserved.
3 * Copyright (c) 1996-2005 Michael T Pins. All rights reserved.
4 *
5 * Article header parsing.
6 */
7
8 #include <stdlib.h>
9 #include <ctype.h>
10 #include "config.h"
11 #include "global.h"
12 #include "digest.h"
13 #include "more.h"
14 #include "news.h"
15 #include "nntp.h"
16
17 #ifndef SUNOS4
18 #include <strings.h>
19 #else
20 #include <string.h>
21 #endif
22
23 /* news.c */
24
25 static char **art_hdr_field(register char *lp, int all);
26
27
28 int retry_on_error = 0;
29
30 char *
parse_header(FILE * f,char ** (* hdr_field)(),int modes,news_header_buffer hdrbuf)31 parse_header(FILE * f, char **(*hdr_field) (), int modes, news_header_buffer hdrbuf)
32 {
33 register char *bp, *cp, **fptr;
34 int siz, all, date_only;
35 long pos;
36
37 pos = ftell(f);
38
39 /* read first NEWS_HEADER_BUFFER bytes (should be more than enough) */
40
41 all = modes & GET_ALL_FIELDS;
42 date_only = modes & GET_DATE_ONLY;
43
44 siz = fread(hdrbuf, sizeof(char), NEWS_HEADER_BUFFER, f);
45 if (siz <= 0) {
46 hdrbuf[0] = NUL;
47 return hdrbuf;
48 }
49 bp = hdrbuf;
50 bp[siz - 1] = NUL;
51
52 /* decode subarticle header */
53 while (*bp) {
54
55 if (*bp == NL) { /* empty line following header */
56 ++bp;
57 fseek(f, pos + (bp - hdrbuf), 0);
58 return bp;
59 }
60 if (bp[0] == SP && bp[1] == SP) { /* An ugly hack so that NN
61 * can read */
62 bp += 2; /* it's own FAQ... :-) (sorry Bill) */
63 continue;
64 }
65 if (date_only && *bp != 'D')
66 fptr = NULL;
67 else if ((fptr = (*hdr_field) (bp, all))) {
68 while (*bp && *bp != ':' && isascii(*bp) && !isspace(*bp))
69 bp++;
70 if (*bp)
71 bp++;
72 while (*bp && isascii(*bp) && isspace(*bp) && *bp != NL)
73 bp++;
74 *fptr = bp;
75 }
76
77 #ifdef NO_HEADER_SEPARATION_HACK
78 else {
79 for (cp = bp; *cp && *cp != ':'; cp++) {
80 if (!isascii(*cp))
81 break;
82 if (*cp == '_' || *cp == '-')
83 continue;
84 if (isalnum(*cp))
85 continue;
86 break;
87 }
88 if (*cp != ':') {
89 *bp = NL;
90 pos--;
91 continue;
92 }
93 }
94 #endif
95
96 while (*bp && *bp != NL)
97 bp++;
98
99 /* Assume that continued lines are never empty! */
100 if (fptr && bp == *fptr)
101 *fptr = NULL;
102
103 while (*bp) { /* look for continued lines */
104 cp = bp + 1;
105
106 if (!(*cp && isascii(*cp) && isspace(*cp) && *cp != NL)) {
107 /* next line is empty or not indented */
108 *bp++ = NUL;
109 break;
110 }
111 *bp = SP; /* substitute NL with SPACE */
112 bp = cp;
113 while (*bp && *bp != NL)
114 bp++;
115 }
116 }
117
118 return bp;
119 }
120
121 static char **
art_hdr_field(register char * lp,int all)122 art_hdr_field(register char *lp, int all)
123 {
124
125 #define check(name, lgt, field) \
126 if (isascii(lp[lgt]) && isspace(lp[lgt]) \
127 && strncasecmp(name, lp, lgt) == 0)\
128 return &news.field
129
130 switch (*lp++) {
131
132 case 'A':
133 case 'a':
134 if (!all)
135 break;
136 check("pproved:", 8, ng_appr);
137 break;
138
139 case 'B':
140 case 'b':
141 check("ack-References:", 15, ng_bref);
142 break;
143
144 case 'C':
145 case 'c':
146 check("ontrol:", 7, ng_control);
147 check("omment-To:", 10, ng_comment);
148 break;
149
150 case 'D':
151 case 'd':
152 check("ate:", 4, ng_date);
153 if (!all)
154 break;
155 check("ate-Received:", 13, ng_rdate);
156 check("istribution:", 12, ng_dist);
157 break;
158
159 case 'F':
160 case 'f':
161 check("rom:", 4, ng_from);
162 if (!all)
163 break;
164 check("ollowup-To:", 11, ng_follow);
165 break;
166
167 case 'K':
168 case 'k':
169 if (!all)
170 break;
171 check("eywords:", 8, ng_keyw);
172 break;
173
174 case 'L':
175 case 'l':
176 check("ines:", 5, ng_xlines);
177 break;
178
179 case 'M':
180 case 'm':
181 if (!all)
182 break;
183 if (strncasecmp(lp, "essage-", 7))
184 break;
185 lp += 7;
186 check("ID:", 3, ng_ident);
187 break;
188
189 case 'N':
190 case 'n':
191 check("ewsgroups:", 10, ng_groups);
192 break;
193
194 case 'O':
195 case 'o':
196 if (!all)
197 break;
198 check("rganization:", 12, ng_org);
199 check("riginator:", 10, ng_origr);
200 break;
201
202 case 'P':
203 case 'p':
204 if (!all)
205 break;
206 check("ath:", 4, ng_path);
207 break;
208
209 case 'R':
210 case 'r':
211 check("eferences:", 10, ng_ref);
212 check("eply-To:", 8, ng_reply);
213 break;
214
215 case 'S':
216 case 's':
217 check("ubject:", 7, ng_subj);
218 check("ender:", 6, ng_sender);
219 if (!all)
220 break;
221 check("ummary:", 7, ng_summ);
222 break;
223
224 case 'T':
225 case 't':
226 check("itle:", 5, ng_subj);
227 break;
228
229 case 'X':
230 case 'x':
231 check("ref:", 4, ng_xref);
232 break;
233 }
234
235 return NULL;
236
237 #undef check
238 }
239
240 int
is_header_line(char * line)241 is_header_line(char *line)
242 {
243 return art_hdr_field(line, 0) != (char **) NULL;
244 }
245
246
247 FILE *
open_news_article(article_header * art,int modes,news_header_buffer buffer1,news_header_buffer buffer2)248 open_news_article(article_header * art, int modes, news_header_buffer buffer1, news_header_buffer buffer2)
249 {
250
251 char *digest_buffer;
252 int retry;
253 FILE *f;
254 struct stat statb;
255
256 #ifndef DONT_COUNT_LINES
257 int c;
258 off_t digest_artlen = 0;
259 #endif /* DONT_COUNT_LINES */
260
261 #ifdef NNTP
262 int lazy = 0;
263 #endif /* NNTP */
264
265 #ifndef DONT_COUNT_LINES
266 #ifdef NNTP
267 long fpos;
268 #endif /* NNTP */
269 #endif /* DONT_COUNT_LINES */
270
271 if (art->flag & A_FOLDER) {
272 f = open_file(group_path_name, OPEN_READ);
273 if (f == NULL)
274 return NULL;
275 fseek(f, art->hpos, 0);
276
277 #ifndef DONT_COUNT_LINES
278 digest_artlen = art->lpos - art->fpos;
279 #endif /* DONT_COUNT_LINES */
280 }
281
282 #ifdef NNTP
283 else if (use_nntp) {
284 lazy = (current_group->master_flag & M_ALWAYS_DIGEST) == 0
285 && (modes & LAZY_BODY) ? 1 : 0;
286 f = nntp_get_article(art->a_number, lazy);
287 if (f == NULL)
288 return NULL;
289 }
290 #endif /* NNTP */
291
292 else {
293 sprintf(group_file_name, "%ld", art->a_number);
294
295 retry = retry_on_error;
296 while ((f = open_file(group_path_name, OPEN_READ)) == NULL)
297 if (--retry < 0)
298 return NULL;
299
300 /* necessary because empty files wreak havoc */
301 if (fstat(fileno(f), &statb) < 0 ||
302
303 #ifdef NOV
304 (art->lpos = statb.st_size, statb.st_size <= (off_t) 0)) {
305 #else
306 statb.st_size < art->lpos || statb.st_size <= (off_t) 0) {
307 #endif /* NOV */
308
309 fclose(f);
310 return who_am_i == I_AM_MASTER ? (FILE *) 1 : NULL;
311 }
312 }
313
314 digest_buffer = buffer1;
315
316 if (modes & FILL_NEWS_HEADER) {
317
318 news.ng_from = NULL;
319 news.ng_reply = NULL;
320 news.ng_name = NULL;
321 news.ng_subj = NULL;
322 news.ng_groups = NULL;
323 news.ng_ref = NULL;
324 news.ng_bref = NULL;
325 news.ng_sender = NULL;
326
327 news.ng_xlines = NULL;
328 news.ng_xref = NULL;
329
330 if (modes & GET_ALL_FIELDS) {
331 news.ng_path = NULL;
332 news.ng_reply = NULL;
333 news.ng_ident = NULL;
334 news.ng_follow = NULL;
335 news.ng_keyw = NULL;
336 news.ng_dist = NULL;
337 news.ng_org = NULL;
338 news.ng_appr = NULL;
339 news.ng_summ = NULL;
340 news.ng_control = NULL;
341 news.ng_date = NULL;
342 news.ng_rdate = NULL;
343 news.ng_comment = NULL;
344 news.ng_origr = NULL;
345 }
346 if (modes & GET_DATE_ONLY)
347 news.ng_date = NULL;
348
349 (void) parse_header(f, art_hdr_field, modes, buffer1);
350
351 if (news.ng_from == NULL)
352 news.ng_from = news.ng_sender;
353
354 #ifdef NOV
355 /* fill in article positions.. new style.. */
356 if ((art->flag & (A_FOLDER | A_DIGEST)) == 0) {
357 setpos(art, f);
358 news.ng_fpos = art->fpos;
359 }
360 #else /* NOV */
361 if (modes & FILL_OFFSETS) /* used only by old DB code */
362 art->fpos = news.ng_fpos = ftell(f);
363 #endif /* NOV */
364
365 if (news.ng_xlines)
366 news.ng_lines = atoi(news.ng_xlines);
367 else {
368
369 #ifndef DONT_COUNT_LINES
370
371 #ifdef NNTP
372 if (use_nntp && lazy && !(art->flag & (A_DIGEST | A_FOLDER))) {
373 fpos = ftell(f);
374 fclose(f);
375 f = nntp_get_article(art->a_number, 2);
376 if (f == NULL)
377 return NULL;
378 lazy = 0;
379 fseek(f, fpos, 0);
380 }
381 #endif /* NNTP */
382
383 news.ng_lines = 0;
384 while ((c = getc(f)) != EOF) {
385 if (c == '\n')
386 news.ng_lines++;
387 if (digest_artlen && --digest_artlen == 0)
388 break;
389 }
390 #else /* DONT_COUNT_LINES */
391 news.ng_lines = -1;
392 #endif /* DONT_COUNT_LINES */
393 }
394
395 if (modes & FILL_OFFSETS) {
396 fseek(f, 0, 2);
397 news.ng_lpos = ftell(f);
398 }
399
400 #ifdef NNTP
401 else if (use_nntp && (art->flag & (A_DIGEST | A_FOLDER)) == 0) {
402 fseek(f, 0, 2);
403 art->lpos = ftell(f);
404 }
405 #endif
406
407 news.ng_flag = 0;
408
409 if (news.ng_appr)
410 news.ng_flag |= N_MODERATED;
411
412 if (modes & DIGEST_CHECK && is_digest(news.ng_subj))
413 news.ng_flag |= N_DIGEST;
414
415 #ifdef NNTP
416 if (use_nntp && lazy && news.ng_flag & N_DIGEST) {
417 fclose(f);
418 f = nntp_get_article(art->a_number, 2);
419 if (f == NULL)
420 return NULL;
421 }
422 #endif
423
424 digest_buffer = buffer2;
425 }
426
427 #ifdef NNTP
428 else if (use_nntp && (art->flag & (A_DIGEST | A_FOLDER)) == 0) {
429 fseek(f, 0, 2);
430 art->lpos = ftell(f);
431 }
432 #endif
433
434 if (modes & FILL_DIGEST_HEADER) {
435 fseek(f, art->hpos, 0);
436 parse_digest_header(f, modes & GET_ALL_FIELDS, digest_buffer);
437 }
438
439 #ifdef NOV
440 else {
441 /* fill in article positions.. new style.. */
442 if ((art->flag & (A_FOLDER | A_DIGEST)) == 0) {
443 setpos(art, f);
444 news.ng_fpos = art->fpos;
445 }
446 }
447 #endif /* NOV */
448
449 fseek(f, (modes & SKIP_HEADER) ? art->fpos : art->hpos, 0);
450
451 return f;
452 }
453