1 /*
2 * process a single incoming article
3 */
4
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <sys/types.h>
9 #include <sys/timeb.h> /* solely for getindate call */
10 #include <unistd.h>
11 #include <time.h>
12
13 #include "libc.h"
14 #include "news.h"
15 #include "active.h"
16 #include "headers.h"
17 #include "relay.h"
18 #include "history.h"
19 #include "msgs.h"
20 #include "ngmatch.h"
21 #include "system.h"
22 #include "rmsgs.h"
23 #include "transmit.h"
24 #include "control.h"
25 #include "fileart.h"
26 #include "io.h"
27
28 #define DAY (24L*60L*60L)
29
30 /*
31 * seconds of slop permitted: article dates may be this many seconds in the
32 * future. It should be an hour, but for sites (e.g. in Australia) that
33 * emit local time incorrectly labelled as GMT. They really should fix
34 * their software, but in the mean time, a day's slop will prevent their
35 * articles from being dropped.
36 */
37 #define CLOCKSLOP DAY
38
39 /*
40 * COPYSIZE is the length of a bulk-copying buffer: the bigger the better,
41 * though fewer than 3% of articles exceed 8192 bytes (may 1988).
42 * It holds header lines first, and later holds bytes of the body.
43 * This buffer is allocated once at the start and never deallocated.
44 */
45 #ifndef COPYSIZE
46 #ifdef SMALLMEM
47 #define COPYSIZE BUFSIZ /* conserve memory at the expense of speed */
48 #else
49 #define COPYSIZE 8192 /* big enough even for worst-case 4.2bsd blocks */
50 #endif /* SMALLMEM */
51 #endif /* COPYSIZE */
52
53 /* imports */
54 extern void decline(struct article *art);
55 extern void persistent(void *art, int code, const char *fmt, const char *arg);
56 extern void logaudit(struct article *art, int code, const char *fmt, const char *arg);
57 extern void canthappen(struct article *art, int code, const char *fmt, const char *arg);
58 extern int msgidok(register struct article *art);
59 extern void mkcopies(register struct article *art);
60 extern void wrhdrstrm(register struct article *art, char *hdr, register int hdrlen);
61 extern void flshdrstrm(register struct article *art);
62
63 /*
64 * Unlink all files in filelist, and optionally return article numbers.
65 * When removing a link, note any failure, but don't issue an error message.
66 * For one thing, cancel controls fail routinely because the article has been
67 * removed manually or never existed (a previous cancel arrived before its
68 * subject and generated a fake history entry).
69 */
70 STATIC statust
snuffmayreturn(filelist,artret)71 snuffmayreturn(filelist, artret)
72 char *filelist;
73 boolean artret; /* return article numbers & note unlink errors? */
74 {
75 register statust status = ST_OKAY;
76 register char *arts, *spacep, *slashp, *artnm;
77
78 /* this is a deadly tedious job and I really should automate it */
79 for (arts = filelist; arts != NULL && arts[0] != '\0';
80 arts = (spacep == NULL? NULL: spacep+1)) {
81 spacep = strchr(arts, ' ');
82 if (spacep != NULL)
83 spacep[0] = '\0'; /* will be restored below */
84 artnm = strsave(arts);
85 if (spacep != NULL)
86 spacep[0] = ' '; /* restore space */
87
88 slashp = strchr(artnm, FNDELIM);
89 if (slashp != NULL)
90 slashp[0] = '\0'; /* will be restored below */
91 if (artret)
92 /* prevartnum will complain on i/o error to active */
93 (void) prevartnum(artnm); /* return assigned # */
94 if (slashp != NULL)
95 slashp[0] = FNDELIM; /* restore slash */
96
97 mkfilenm(artnm);
98 if (unlink(artnm) < 0) {
99 persistent(NOART, '\0', "can't unlink", "");
100 status |= ST_ACCESS;
101 }
102 free(artnm);
103 }
104 return status;
105 }
106
107 statust
snufffiles(filelist)108 snufffiles(filelist) /* just unlink all files in filelist */
109 char *filelist;
110 {
111 /* don't return article numbers (NO) & return unlink errors */
112 return snuffmayreturn(filelist, NO);
113 }
114
115 /*
116 * "Uninstall" an article: remove art->a_files (permanent names) and
117 * a_tmpf (temporary name if a_unlink set), and return assigned article #'s.
118 * If a_unlink isn't set, a_tmpf is a copy of the first link in art->a_files.
119 * Must be called before history() is called (or after it has failed),
120 * else there will be a history entry for the article, but no spool files.
121 * insart() need not be called first.
122 */
123 void
uninsart(art)124 uninsart(art)
125 register struct article *art;
126 {
127 if (art->a_unlink && art->a_tmpf != NULL) {
128 (void) unlink(art->a_tmpf); /* I don't wanna know... */
129 art->a_unlink = NO;
130 }
131 /* return article numbers (YES) & ignore unlink errors */
132 (void) snuffmayreturn(art->a_files, YES);
133 }
134
135 /*
136 * If nothing has gone wrong yet,
137 * install the article on art->a_tmpf or art->a_files:
138 * The article should have been accepted and filed in copyart().
139 * Add history entries for the article. Log arrival.
140 * Transmit the article to our neighbours.
141 * Process control mess(age)es. ctlmsg can call transmit(fakeart,x)
142 * and generate log lines for cancels and ihave/sendme.
143 */
144 STATIC void
insart(art)145 insart(art)
146 register struct article *art;
147 {
148 if (!(art->a_status&(ST_DROPPED|ST_REFUSED|ST_NEEDATTN))) {
149 if (!art->a_filed) /* paranoia */
150 canthappen(art, 'i', "%s not filed by copyart!",
151 art->h.h_msgid);
152 if (opts.dupsokay) {
153 time_t now;
154
155 timestamp(stdout, &now);
156 if (printf(" %s + %s", /* TODO: special code for dup? */
157 sendersite(nullify(art->h.h_path)),
158 nullify(art->h.h_msgid)) == EOF)
159 fulldisk(art, "stdout");
160 } else
161 history(art, STARTLOG); /* history may be unwritable */
162 if (art->a_status&(ST_DROPPED|ST_REFUSED|ST_NEEDATTN)) {
163 uninsart(art); /* t'was; can't keep article */
164 (void) putchar('\n'); /* ends the log line */
165 } else {
166 /* transmit() writes system names on stdout */
167 transmit(art, opts.exclude);
168 (void) putchar('\n'); /* ends the log line */
169 ctlmsg(art); /* NCMP */
170 }
171 /* don't bother flushing stdout; it's only a log file */
172 }
173 art->a_status &= ~ST_REFUSED; /* refusal is quite casual & common */
174 }
175
176 /*
177 * print the leader of a refusal message about the article in "art".
178 */
179 void
prefuse(art)180 prefuse(art)
181 register struct article *art;
182 {
183 timestamp(stdout, (time_t *)NULL);
184 (void) printf(" %s - %s ", sendersite(nullify(art->h.h_path)),
185 nullify(art->h.h_msgid));
186 }
187
188 /*
189 * Reject articles. This can be arbitrarily picky.
190 * Only the headers are used to decide, so this can be called before
191 * the article is filed but after all the headers are read.
192 * Try to put the fastest tests first, especially if they often result
193 * in rejections.
194 */
195 void
reject(art)196 reject(art)
197 register struct article *art;
198 {
199 register struct headers *hdrs = &art->h;
200 register char *ngs = hdrs->h_ngs;
201 register char *errstr;
202 register time_t date;
203 static time_t now, datestale;
204 extern time_t getindate();
205
206 if (art->a_status&ST_REFUSED)
207 return; /* already rejected */
208 if (now == 0) {
209 now = time(&now);
210 datestale = now - opts.staledays*DAY;
211 }
212 errstr = hdrreq(hdrs);
213 if (errstr != NULL) {
214 prefuse(art);
215 (void) fputs(errstr, stdout);
216 #ifdef notdef
217 } else if (art->a_badhdr) {
218 prefuse(art);
219 (void) fputs("article \"header\" contains non-header line\n",
220 stdout);
221 #endif
222 } else if (!msgidok(art))
223 (void) putchar('\n'); /* msgidok complained; end log line */
224 else if (hdrs->h_approved == NULL && moderated(ngs)) {
225 prefuse(art);
226 /* lots of logaudit()s here ... */
227 (void) printf("unapproved article in moderated group(s) `%s'\n",
228 ngs);
229 } else if ((date =
230 getindate(hdrs->h_date, (struct timeb *)NULL)) == -1) {
231 prefuse(art);
232 (void) printf("unparsable Date: `%s'\n", hdrs->h_date);
233 } else if (date > now + CLOCKSLOP) {
234 prefuse(art);
235 (void) printf("Date: too far in the future: `%s'\n",
236 hdrs->h_date);
237 } else if (opts.staledays > 0 && date < datestale) {
238 prefuse(art);
239 (void) printf("ancient date `%s'\n", hdrs->h_date);
240 } else if (spacein(ngs)) {
241 prefuse(art);
242 (void) printf("space in groups `%s'\n", ngs);
243 } else if (alreadyseen(hdrs->h_msgid)) {
244 if (opts.dupsokay)
245 return;
246 prefuse(art);
247 (void) fputs("duplicate\n", stdout);
248 } else if (hopcount(hdrs->h_path) > 0 &&
249 !ngpatmat(oursys()->sy_trngs, ngs)) {
250 /*
251 * non-local article, with all bad groups.
252 * (local articles with bad groups will be bounced
253 * by fileart when the groups aren't in active.)
254 */
255 if (opts.histreject)
256 history(art, NOLOG);
257 prefuse(art);
258 (void) printf("no subscribed groups in `%s'\n", ngs);
259 } else
260 return; /* art was accepted */
261 decline(art);
262 }
263
264 /*
265 * The loop copies header lines from input to output or a
266 * header output cache. On exit, hdr will contain the first
267 * non-header line, if any, left over from the end of header copying.
268 *
269 * If the byte count is positive, read a line; if it doesn't return
270 * EOF and is a header, then adjust byte count, stash and munge headers.
271 * strlen(line) must be computed before hdrstash is called,
272 * as hdrstash (and thus hdrdigest) removes newlines.
273 *
274 * RFC 822 defines the message header as ending at a blank line, *not* at
275 * the first line that cannot syntactically be a header nor a header
276 * continuation. As a result of this stunning bit of brilliance, we can end
277 * up with non-header lines in the message header, though they are illegal.
278 *
279 * Don't complain if this article has already been refused.
280 *
281 * TODO: Cope with NULs in header input, which bugger fgets and friends
282 * and throw off our byte count, thus buggering unbatching.
283 */
284 char * /* first body line, from gethdr */
hdrcopy(art,in)285 hdrcopy(art, in)
286 register struct article *art;
287 FILE *in;
288 {
289 register char *hdr = NULL;
290 long limit = art->a_unread + SIZENUL;
291 int is_hdr = NO;
292
293 while (limit > SIZENUL && (hdr = gethdr(in, &limit, &is_hdr)) != NULL &&
294 is_hdr) {
295 hdrdigest(art, hdr, strlen(hdr));
296 hdr = NULL; /* freed inside gethdr */
297 }
298 /* If we read a body line, gethdr has adjusted limit appropriately. */
299 art->a_unread = limit - SIZENUL;
300 if (!is_hdr && hdr != NULL && *hdr != '\n' &&
301 !(art->a_status&ST_REFUSED)) {
302 register char *hdrnonl = strsave(hdr);
303
304 #ifdef notdef
305 art->a_badhdr = YES;
306 #endif
307 trim(hdrnonl);
308 decline(art);
309 prefuse(art);
310 /* transient(art, '-', ...); */
311 (void) printf(
312 "article \"header\" contains non-RFC-1036-header line `%s'\n",
313 hdrnonl);
314 free(hdrnonl);
315 }
316 /* if is_hdr, there is no body: header fills limit */
317 return (is_hdr? NULL: hdr);
318 }
319
320 /*
321 * If not yet uninstalled, and the disk filled (or the news system was found
322 * to be otherwise unwell), uninstall this article
323 * to remove any (zero-length) links and decrement the active article number.
324 * The ST_NEEDATTN status will prevent a history entry being generated later.
325 */
326 void
surveydamage(art,installedp)327 surveydamage(art, installedp)
328 register struct article *art;
329 register boolean *installedp;
330 {
331 if (art->a_unread > 0 && art->a_blvmax) {
332 char bytes[30];
333
334 (void) sprintf(bytes, "%ld", (long)art->a_unread);
335 logaudit(art, 'b', "short by %s bytes", bytes);
336 art->a_status |= ST_SHORT;
337 /* truncated input; NB.: don't uninstall this art. */
338 }
339 if (*installedp && art->a_status&ST_NEEDATTN) {
340 uninsart(art);
341 *installedp = NO;
342 }
343 #ifdef WATCHCORE
344 {
345 char stbot;
346 extern char *sbrk();
347
348 (void) printf("debugging memory use: top of data=%u",
349 (unsigned)sbrk(0));
350 (void) printf(", bottom of stack=%u\n", (unsigned)&stbot);
351 }
352 #endif
353 }
354
355 /*
356 * Copy article body.
357 * body will contain the first non-header line, if any,
358 * left over from the end of header copying. Write it.
359 * Copy at most COPYSIZE bytes of body at a time and exactly art->a_unread
360 * bytes in total, barring EOF or a full disk. Then "block" is no longer needed.
361 * Force the article to disk, mostly for the benefit of control message
362 * processing.
363 *
364 * The copying buffer, block, is static because it is used repeatedly
365 * and persists through most of execution, so dynamic allocation
366 * and deallocation seems wasteful, but also for the benefit
367 * of compilers for odd machines (e.g. PE, 370s) which make
368 * implementing "large" automatic arrays difficult.
369 *
370 * Some people think the loop is ugly; I'm not sure why.
371 */
372 STATIC void
cpybody(art,in,body)373 cpybody(art, in, body)
374 register struct article *art;
375 FILE *in;
376 register char *body;
377 {
378 register int readcnt;
379 register FILE *out = art->a_artf;
380 static char block[COPYSIZE];
381
382 if (body != NULL) { /* read too far? */
383 register int bodylen = strlen(body);
384
385 if (out != NULL && fwrite(body, 1, bodylen, out) != bodylen)
386 fulldisk(art, spoolnm(art));
387 art->a_charswritten += bodylen;
388 }
389 for (; art->a_unread > 0 && !(art->a_status&ST_NEEDATTN) && !feof(in) &&
390 (readcnt = fread(block, 1, (int)min(art->a_unread, COPYSIZE), in)) >
391 0; art->a_unread -= readcnt, art->a_charswritten += readcnt)
392 if (out != NULL && fwrite(block, 1, readcnt, out) != readcnt)
393 fulldisk(art, spoolnm(art));
394 if (out != NULL && fflush(out) == EOF)
395 fulldisk(art, spoolnm(art));
396 }
397
398 /*
399 * Copy the next charcnt bytes of "in" (may be not a disk file)
400 * to a permanent file under a (possibly) temporary name.
401 * After the headers are seen, accept or reject the article.
402 * Either uninstall the article described by art, or accept it and file it.
403 * If rejecting it, remove any links and give back assigned #'s
404 * (art->a_artf may still be open; arguably uninsart should close it).
405 * If rejected and the headers fit in core, no files will be opened.
406 * Must munge certain headers on the way & remember certain values.
407 * hdrmunge() or hdrdump() sets art->a_tmpf & art->a_artf.
408 * Unlink art->a_tmpf, if a temporary link.
409 */
410 /* ARGSUSED inname */
411 STATIC void
copyart(art,in,inname)412 copyart(art, in, inname)
413 register struct article *art;
414 register FILE *in;
415 const char *inname;
416 {
417 boolean installed = YES;
418 char *body = hdrcopy(art, in);
419 char bytehdr[64];
420
421 hdrdeflt(&art->h);
422 reject(art); /* duplicate, etc.? */
423 if (art->a_status&(ST_DROPPED|ST_REFUSED)) {
424 uninsart(art);
425 installed = NO;
426 } else {
427 fileart(art);
428 hdrdump(art);
429 }
430 cpybody(art, in, body); /* consume article body from input batch */
431 if (!(art->a_status&(ST_DROPPED|ST_REFUSED)))
432 mkcopies(art);
433 if (art->a_unlink) {
434 /* a_tmpf has had links made to it, so it can be removed. */
435 if (unlink(art->a_tmpf) < 0) {
436 persistent(art, 'f', "copyart can't unlink `%s'",
437 art->a_tmpf);
438 art->a_status |= ST_ACCESS;
439 }
440 art->a_unlink = NO; /* caution */
441 }
442 /* assertion: header values (art->h) can be forgotten here */
443 if (!(art->a_status&(ST_DROPPED|ST_REFUSED|ST_NEEDATTN))) {
444 /* fake Bytes: header for readers */
445 (void) sprintf(bytehdr, "Bytes: %ld\n",
446 (long)art->a_charswritten);
447 wrhdrstrm(art, bytehdr, strlen(bytehdr));
448 }
449 flshdrstrm(art);
450 surveydamage(art, &installed);
451 }
452
453 /*
454 * Copy the article on "in" to a temporary name in the news spool directory,
455 * unlink temp name; *or* copy into the final names, if known early enough.
456 * (Sets a_tmpf in or near hdrmunge() or hdrdump().)
457 * If the spool file opened, install the article it contains.
458 *
459 * copyart() may reject() the article, and may fill the disk.
460 * it calls fileart and logs rejected articles. it may call uninsart.
461 */
462 statust
cpinsart(in,inname,maxima,blvmax)463 cpinsart(in, inname, maxima, blvmax)
464 FILE *in;
465 register const char *inname;
466 long maxima;
467 boolean blvmax; /* believe maxima? */
468 {
469 struct article art;
470 register struct article *artp = &art;
471 register statust status;
472
473 artinit(artp);
474 artp->a_blvmax = blvmax;
475 artp->a_unread = maxima;
476 copyart(artp, in, inname);
477 if (artp->a_status&ST_REFUSED) {
478 /* no good ngs (in fileart) or reject()ed; not serious */
479 artp->a_status &= ~ST_REFUSED;
480 /* paranoia; shouldn't happen */
481 nnfclose(artp, &artp->a_artf, inname);
482 } else if (artp->a_artf == NULL) {
483 persistent(artp, 'f', "can't open spool file `%s'",
484 artp->a_tmpf);
485 } else {
486 nnfclose(artp, &artp->a_artf, inname);
487 insart(artp); /* logs accepted art.s during transmission */
488 if (artp->a_status&ST_JUNKED) { /* yer welcome, henry */
489 artp->a_status &= ~ST_JUNKED;
490 logaudit(artp, 'j', "junked due to groups `%s'",
491 artp->h.h_ngs);
492 }
493 }
494 status = artp->a_status;
495 artfree(artp);
496 return status;
497 }
498