1 /*
2  * process a single incoming article
3  */
4 
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <sys/types.h>
9 #include <sys/timeb.h>		/* solely for getindate call */
10 #include <unistd.h>
11 #include <time.h>
12 
13 #include "libc.h"
14 #include "news.h"
15 #include "active.h"
16 #include "headers.h"
17 #include "relay.h"
18 #include "history.h"
19 #include "msgs.h"
20 #include "ngmatch.h"
21 #include "system.h"
22 #include "rmsgs.h"
23 #include "transmit.h"
24 #include "control.h"
25 #include "fileart.h"
26 #include "io.h"
27 
28 #define DAY (24L*60L*60L)
29 
30 /*
31  * seconds of slop permitted: article dates may be this many seconds in the
32  * future.  It should be an hour, but for sites (e.g. in Australia) that
33  * emit local time incorrectly labelled as GMT.  They really should fix
34  * their software, but in the mean time, a day's slop will prevent their
35  * articles from being dropped.
36  */
37 #define CLOCKSLOP DAY
38 
39 /*
40  * COPYSIZE is the length of a bulk-copying buffer: the bigger the better,
41  * though fewer than 3% of articles exceed 8192 bytes (may 1988).
42  * It holds header lines first, and later holds bytes of the body.
43  * This buffer is allocated once at the start and never deallocated.
44  */
45 #ifndef COPYSIZE
46 #ifdef SMALLMEM
47 #define COPYSIZE BUFSIZ		/* conserve memory at the expense of speed */
48 #else
49 #define COPYSIZE 8192		/* big enough even for worst-case 4.2bsd blocks */
50 #endif				/* SMALLMEM */
51 #endif				/* COPYSIZE */
52 
53 /* imports */
54 extern void decline(struct article *art);
55 extern void persistent(void *art, int code, const char *fmt, const char *arg);
56 extern void logaudit(struct article *art, int code, const char *fmt, const char *arg);
57 extern void canthappen(struct article *art, int code, const char *fmt, const char *arg);
58 extern int msgidok(register struct article *art);
59 extern void mkcopies(register struct article *art);
60 extern void wrhdrstrm(register struct article *art, char *hdr, register int hdrlen);
61 extern void flshdrstrm(register struct article *art);
62 
63 /*
64  * Unlink all files in filelist, and optionally return article numbers.
65  * When removing a link, note any failure, but don't issue an error message.
66  * For one thing, cancel controls fail routinely because the article has been
67  * removed manually or never existed (a previous cancel arrived before its
68  * subject and generated a fake history entry).
69  */
70 STATIC statust
snuffmayreturn(filelist,artret)71 snuffmayreturn(filelist, artret)
72 char *filelist;
73 boolean artret;		/* return article numbers & note unlink errors? */
74 {
75 	register statust status = ST_OKAY;
76 	register char *arts, *spacep, *slashp, *artnm;
77 
78 	/* this is a deadly tedious job and I really should automate it */
79 	for (arts = filelist; arts != NULL && arts[0] != '\0';
80 	     arts = (spacep == NULL? NULL: spacep+1)) {
81 		spacep = strchr(arts, ' ');
82 		if (spacep != NULL)
83 			spacep[0] = '\0';	/* will be restored below */
84 		artnm = strsave(arts);
85 		if (spacep != NULL)
86 			spacep[0] = ' ';	/* restore space */
87 
88 		slashp = strchr(artnm, FNDELIM);
89 		if (slashp != NULL)
90 			slashp[0] = '\0';	/* will be restored below */
91 		if (artret)
92 			/* prevartnum will complain on i/o error to active */
93 			(void) prevartnum(artnm); /* return assigned # */
94 		if (slashp != NULL)
95 			slashp[0] = FNDELIM;	/* restore slash */
96 
97 		mkfilenm(artnm);
98 		if (unlink(artnm) < 0) {
99 			persistent(NOART, '\0', "can't unlink", "");
100 			status |= ST_ACCESS;
101 		}
102 		free(artnm);
103 	}
104 	return status;
105 }
106 
107 statust
snufffiles(filelist)108 snufffiles(filelist)		/* just unlink all files in filelist */
109 char *filelist;
110 {
111 	/* don't return article numbers (NO) & return unlink errors */
112 	return snuffmayreturn(filelist, NO);
113 }
114 
115 /*
116  * "Uninstall" an article: remove art->a_files (permanent names) and
117  * a_tmpf (temporary name if a_unlink set), and return assigned article #'s.
118  * If a_unlink isn't set, a_tmpf is a copy of the first link in art->a_files.
119  * Must be called before history() is called (or after it has failed),
120  * else there will be a history entry for the article, but no spool files.
121  * insart() need not be called first.
122  */
123 void
uninsart(art)124 uninsart(art)
125 register struct article *art;
126 {
127 	if (art->a_unlink && art->a_tmpf != NULL) {
128 		(void) unlink(art->a_tmpf);	/* I don't wanna know... */
129 		art->a_unlink = NO;
130 	}
131 	/* return article numbers (YES) & ignore unlink errors */
132 	(void) snuffmayreturn(art->a_files, YES);
133 }
134 
135 /*
136  * If nothing has gone wrong yet,
137  * install the article on art->a_tmpf or art->a_files:
138  * The article should have been accepted and filed in copyart().
139  * Add history entries for the article.  Log arrival.
140  * Transmit the article to our neighbours.
141  * Process control mess(age)es.  ctlmsg can call transmit(fakeart,x)
142  * and generate log lines for cancels and ihave/sendme.
143  */
144 STATIC void
insart(art)145 insart(art)
146 register struct article *art;
147 {
148 	if (!(art->a_status&(ST_DROPPED|ST_REFUSED|ST_NEEDATTN))) {
149 		if (!art->a_filed)			/* paranoia */
150 			canthappen(art, 'i', "%s not filed by copyart!",
151 				art->h.h_msgid);
152 		if (opts.dupsokay) {
153 			time_t now;
154 
155 			timestamp(stdout, &now);
156 			if (printf(" %s + %s", /* TODO: special code for dup? */
157 			    sendersite(nullify(art->h.h_path)),
158 			    nullify(art->h.h_msgid)) == EOF)
159 				fulldisk(art, "stdout");
160 		} else
161 			history(art, STARTLOG);	/* history may be unwritable */
162 		if (art->a_status&(ST_DROPPED|ST_REFUSED|ST_NEEDATTN)) {
163 			uninsart(art);		/* t'was; can't keep article */
164 			(void) putchar('\n');	/* ends the log line */
165 		} else {
166 			/* transmit() writes system names on stdout */
167 			transmit(art, opts.exclude);
168 			(void) putchar('\n');	/* ends the log line */
169 			ctlmsg(art);		/* NCMP */
170 		}
171 		/* don't bother flushing stdout; it's only a log file */
172 	}
173 	art->a_status &= ~ST_REFUSED;	/* refusal is quite casual & common */
174 }
175 
176 /*
177  * print the leader of a refusal message about the article in "art".
178  */
179 void
prefuse(art)180 prefuse(art)
181 register struct article *art;
182 {
183 	timestamp(stdout, (time_t *)NULL);
184 	(void) printf(" %s - %s ", sendersite(nullify(art->h.h_path)),
185 		nullify(art->h.h_msgid));
186 }
187 
188 /*
189  * Reject articles.  This can be arbitrarily picky.
190  * Only the headers are used to decide, so this can be called before
191  * the article is filed but after all the headers are read.
192  * Try to put the fastest tests first, especially if they often result
193  * in rejections.
194  */
195 void
reject(art)196 reject(art)
197 register struct article *art;
198 {
199 	register struct headers *hdrs = &art->h;
200 	register char *ngs = hdrs->h_ngs;
201 	register char *errstr;
202 	register time_t date;
203 	static time_t now, datestale;
204 	extern time_t getindate();
205 
206 	if (art->a_status&ST_REFUSED)
207 		return;			/* already rejected */
208 	if (now == 0) {
209 		now = time(&now);
210 		datestale = now - opts.staledays*DAY;
211 	}
212 	errstr = hdrreq(hdrs);
213 	if (errstr != NULL) {
214 		prefuse(art);
215 		(void) fputs(errstr, stdout);
216 #ifdef notdef
217 	} else if (art->a_badhdr) {
218 		prefuse(art);
219 		(void) fputs("article \"header\" contains non-header line\n",
220 			stdout);
221 #endif
222 	} else if (!msgidok(art))
223 		(void) putchar('\n');	/* msgidok complained; end log line */
224 	else if (hdrs->h_approved == NULL && moderated(ngs)) {
225 		prefuse(art);
226 		/* lots of logaudit()s here ... */
227 		(void) printf("unapproved article in moderated group(s) `%s'\n",
228 			ngs);
229 	} else if ((date =
230 	    getindate(hdrs->h_date, (struct timeb *)NULL)) == -1) {
231 		prefuse(art);
232 		(void) printf("unparsable Date: `%s'\n", hdrs->h_date);
233 	} else if (date > now + CLOCKSLOP) {
234 		prefuse(art);
235 		(void) printf("Date: too far in the future: `%s'\n",
236 			hdrs->h_date);
237 	} else if (opts.staledays > 0 && date < datestale) {
238 		prefuse(art);
239 		(void) printf("ancient date `%s'\n", hdrs->h_date);
240 	} else if (spacein(ngs)) {
241 		prefuse(art);
242 		(void) printf("space in groups `%s'\n", ngs);
243 	} else if (alreadyseen(hdrs->h_msgid)) {
244 		if (opts.dupsokay)
245 			return;
246 		prefuse(art);
247 		(void) fputs("duplicate\n", stdout);
248 	} else if (hopcount(hdrs->h_path) > 0 &&
249 	    !ngpatmat(oursys()->sy_trngs, ngs)) {
250 		/*
251 		 * non-local article, with all bad groups.
252 		 * (local articles with bad groups will be bounced
253 		 * by fileart when the groups aren't in active.)
254 		 */
255 		if (opts.histreject)
256 			history(art, NOLOG);
257 		prefuse(art);
258 		(void) printf("no subscribed groups in `%s'\n", ngs);
259 	} else
260 		return;			/* art was accepted */
261 	decline(art);
262 }
263 
264 /*
265  * The loop copies header lines from input to output or a
266  * header output cache.  On exit, hdr will contain the first
267  * non-header line, if any, left over from the end of header copying.
268  *
269  * If the byte count is positive, read a line; if it doesn't return
270  * EOF and is a header, then adjust byte count, stash and munge headers.
271  * strlen(line) must be computed before hdrstash is called,
272  * as hdrstash (and thus hdrdigest) removes newlines.
273  *
274  * RFC 822 defines the message header as ending at a blank line, *not* at
275  * the first line that cannot syntactically be a header nor a header
276  * continuation.  As a result of this stunning bit of brilliance, we can end
277  * up with non-header lines in the message header, though they are illegal.
278  *
279  * Don't complain if this article has already been refused.
280  *
281  * TODO: Cope with NULs in header input, which bugger fgets and friends
282  * and throw off our byte count, thus buggering unbatching.
283  */
284 char *					/* first body line, from gethdr */
hdrcopy(art,in)285 hdrcopy(art, in)
286 register struct article *art;
287 FILE *in;
288 {
289 	register char *hdr = NULL;
290 	long limit = art->a_unread + SIZENUL;
291 	int is_hdr = NO;
292 
293 	while (limit > SIZENUL && (hdr = gethdr(in, &limit, &is_hdr)) != NULL &&
294 	    is_hdr) {
295 	    	hdrdigest(art, hdr, strlen(hdr));
296 		hdr = NULL;			/* freed inside gethdr */
297 	}
298 	/* If we read a body line, gethdr has adjusted limit appropriately. */
299 	art->a_unread = limit - SIZENUL;
300 	if (!is_hdr && hdr != NULL && *hdr != '\n' &&
301 	    !(art->a_status&ST_REFUSED)) {
302 		register char *hdrnonl = strsave(hdr);
303 
304 #ifdef notdef
305 		art->a_badhdr = YES;
306 #endif
307 		trim(hdrnonl);
308 		decline(art);
309 		prefuse(art);
310 		/* transient(art, '-', ...); */
311 		(void) printf(
312 		"article \"header\" contains non-RFC-1036-header line `%s'\n",
313 			hdrnonl);
314 		free(hdrnonl);
315 	}
316 	/* if is_hdr, there is no body: header fills limit */
317 	return (is_hdr? NULL: hdr);
318 }
319 
320 /*
321  * If not yet uninstalled, and the disk filled (or the news system was found
322  * to be otherwise unwell), uninstall this article
323  * to remove any (zero-length) links and decrement the active article number.
324  * The ST_NEEDATTN status will prevent a history entry being generated later.
325  */
326 void
surveydamage(art,installedp)327 surveydamage(art, installedp)
328 register struct article *art;
329 register boolean *installedp;
330 {
331 	if (art->a_unread > 0 && art->a_blvmax) {
332 		char bytes[30];
333 
334 		(void) sprintf(bytes, "%ld", (long)art->a_unread);
335 		logaudit(art, 'b', "short by %s bytes", bytes);
336 		art->a_status |= ST_SHORT;
337 			/* truncated input; NB.: don't uninstall this art. */
338 	}
339 	if (*installedp && art->a_status&ST_NEEDATTN) {
340 		uninsart(art);
341 		*installedp = NO;
342 	}
343 #ifdef WATCHCORE
344 	{
345 		char stbot;
346 		extern char *sbrk();
347 
348 		(void) printf("debugging memory use: top of data=%u",
349 			(unsigned)sbrk(0));
350 		(void) printf(", bottom of stack=%u\n", (unsigned)&stbot);
351 	}
352 #endif
353 }
354 
355 /*
356  * Copy article body.
357  * body will contain the first non-header line, if any,
358  * left over from the end of header copying.  Write it.
359  * Copy at most COPYSIZE bytes of body at a time and exactly art->a_unread
360  * bytes in total, barring EOF or a full disk. Then "block" is no longer needed.
361  * Force the article to disk, mostly for the benefit of control message
362  * processing.
363  *
364  * The copying buffer, block, is static because it is used repeatedly
365  * and persists through most of execution, so dynamic allocation
366  * and deallocation seems wasteful, but also for the benefit
367  * of compilers for odd machines (e.g. PE, 370s) which make
368  * implementing "large" automatic arrays difficult.
369  *
370  * Some people think the loop is ugly; I'm not sure why.
371  */
372 STATIC void
cpybody(art,in,body)373 cpybody(art, in, body)
374 register struct article *art;
375 FILE *in;
376 register char *body;
377 {
378 	register int readcnt;
379 	register FILE *out = art->a_artf;
380 	static char block[COPYSIZE];
381 
382 	if (body != NULL) {			/* read too far? */
383 		register int bodylen = strlen(body);
384 
385 		if (out != NULL && fwrite(body, 1, bodylen, out) != bodylen)
386 			fulldisk(art, spoolnm(art));
387 		art->a_charswritten += bodylen;
388 	}
389 	for (; art->a_unread > 0 && !(art->a_status&ST_NEEDATTN) && !feof(in) &&
390 	    (readcnt = fread(block, 1, (int)min(art->a_unread, COPYSIZE), in)) >
391 	    0; art->a_unread -= readcnt, art->a_charswritten += readcnt)
392 		if (out != NULL && fwrite(block, 1, readcnt, out) != readcnt)
393 			fulldisk(art, spoolnm(art));
394 	if (out != NULL && fflush(out) == EOF)
395 		fulldisk(art, spoolnm(art));
396 }
397 
398 /*
399  * Copy the next charcnt bytes of "in" (may be not a disk file)
400  * to a permanent file under a (possibly) temporary name.
401  * After the headers are seen, accept or reject the article.
402  * Either uninstall the article described by art, or accept it and file it.
403  * If rejecting it, remove any links and give back assigned #'s
404  * (art->a_artf may still be open; arguably uninsart should close it).
405  * If rejected and the headers fit in core, no files will be opened.
406  * Must munge certain headers on the way & remember certain values.
407  * hdrmunge() or hdrdump() sets art->a_tmpf & art->a_artf.
408  * Unlink art->a_tmpf, if a temporary link.
409  */
410 /* ARGSUSED inname */
411 STATIC void
copyart(art,in,inname)412 copyart(art, in, inname)
413 register struct article *art;
414 register FILE *in;
415 const char *inname;
416 {
417 	boolean installed = YES;
418 	char *body = hdrcopy(art, in);
419 	char bytehdr[64];
420 
421 	hdrdeflt(&art->h);
422 	reject(art);				/* duplicate, etc.? */
423 	if (art->a_status&(ST_DROPPED|ST_REFUSED)) {
424 		uninsart(art);
425 		installed = NO;
426 	} else {
427 		fileart(art);
428 		hdrdump(art);
429 	}
430 	cpybody(art, in, body);	/* consume article body from input batch */
431 	if (!(art->a_status&(ST_DROPPED|ST_REFUSED)))
432 		mkcopies(art);
433 	if (art->a_unlink) {
434 		/* a_tmpf has had links made to it, so it can be removed. */
435 		if (unlink(art->a_tmpf) < 0) {
436 			persistent(art, 'f', "copyart can't unlink `%s'",
437 				art->a_tmpf);
438 			art->a_status |= ST_ACCESS;
439 		}
440 		art->a_unlink = NO;		/* caution */
441 	}
442 	/* assertion: header values (art->h) can be forgotten here */
443 	if (!(art->a_status&(ST_DROPPED|ST_REFUSED|ST_NEEDATTN))) {
444 		/* fake Bytes: header for readers */
445 		(void) sprintf(bytehdr, "Bytes: %ld\n",
446 			       (long)art->a_charswritten);
447 		wrhdrstrm(art, bytehdr, strlen(bytehdr));
448 	}
449 	flshdrstrm(art);
450 	surveydamage(art, &installed);
451 }
452 
453 /*
454  * Copy the article on "in" to a temporary name in the news spool directory,
455  * unlink temp name; *or* copy into the final names, if known early enough.
456  * (Sets a_tmpf in or near hdrmunge() or hdrdump().)
457  * If the spool file opened, install the article it contains.
458  *
459  * copyart() may reject() the article, and may fill the disk.
460  * it calls fileart and logs rejected articles.  it may call uninsart.
461  */
462 statust
cpinsart(in,inname,maxima,blvmax)463 cpinsart(in, inname, maxima, blvmax)
464 FILE *in;
465 register const char *inname;
466 long maxima;
467 boolean blvmax;				/* believe maxima? */
468 {
469 	struct article art;
470 	register struct article *artp = &art;
471 	register statust status;
472 
473 	artinit(artp);
474 	artp->a_blvmax = blvmax;
475 	artp->a_unread = maxima;
476 	copyart(artp, in, inname);
477 	if (artp->a_status&ST_REFUSED) {
478 		/* no good ngs (in fileart) or reject()ed; not serious */
479 		artp->a_status &= ~ST_REFUSED;
480 		/* paranoia; shouldn't happen */
481 		nnfclose(artp, &artp->a_artf, inname);
482 	} else if (artp->a_artf == NULL) {
483 		persistent(artp, 'f', "can't open spool file `%s'",
484 			artp->a_tmpf);
485 	} else {
486 		nnfclose(artp, &artp->a_artf, inname);
487 		insart(artp);	/* logs accepted art.s during transmission */
488 		if (artp->a_status&ST_JUNKED) {	/* yer welcome, henry */
489 			artp->a_status &= ~ST_JUNKED;
490 			logaudit(artp, 'j', "junked due to groups `%s'",
491 				 artp->h.h_ngs);
492 		}
493 	}
494 	status = artp->a_status;
495 	artfree(artp);
496 	return status;
497 }
498