1This is the pre-mmh version of sbr/m_getfld.c (dated 2008-12-26).
2The current version is still unbearbable, but this one is original.
3Enjoy! :-)      -- 2012-04-01 markus schnalke <meillo@marmaro.de>
4
5/*
6 * m_getfld.c -- read/parse a message
7 *
8 * This code is Copyright (c) 2002, by the authors of nmh.  See the
9 * COPYRIGHT file in the root directory of the nmh distribution for
10 * complete copyright information.
11 */
12
13#include <h/mh.h>
14#include <h/mts.h>
15#include <h/utils.h>
16
17/* This module has a long and checkered history.  First, it didn't burst
18   maildrops correctly because it considered two CTRL-A:s in a row to be
19   an inter-message delimiter.  It really is four CTRL-A:s followed by a
20   newline.  Unfortunately, MMDF will convert this delimiter *inside* a
21   message to a CTRL-B followed by three CTRL-A:s and a newline.  This
22   caused the old version of m_getfld() to declare eom prematurely.  The
23   fix was a lot slower than
24
25		c == '\001' && peekc (iob) == '\001'
26
27   but it worked, and to increase generality, MBOX style maildrops could
28   be parsed as well.  Unfortunately the speed issue finally caught up with
29   us since this routine is at the very heart of MH.
30
31   To speed things up considerably, the routine Eom() was made an auxiliary
32   function called by the macro eom().  Unless we are bursting a maildrop,
33   the eom() macro returns FALSE saying we aren't at the end of the
34   message.
35
36   The next thing to do is to read the mts.conf file and initialize
37   delimiter[] and delimlen accordingly...
38
39   After mhl was made a built-in in msh, m_getfld() worked just fine
40   (using m_unknown() at startup).  Until one day: a message which was
41   the result of a bursting was shown. Then, since the burst boundaries
42   aren't CTRL-A:s, m_getfld() would blinding plunge on past the boundary.
43   Very sad.  The solution: introduce m_eomsbr().  This hook gets called
44   after the end of each line (since testing for eom involves an fseek()).
45   This worked fine, until one day: a message with no body portion arrived.
46   Then the
47
48		   while (eom (c = Getc (iob), iob))
49			continue;
50
51   loop caused m_getfld() to return FMTERR.  So, that logic was changed to
52   check for (*eom_action) and act accordingly.
53
54   This worked fine, until one day: someone didn't use four CTRL:A's as
55   their delimiters.  So, the bullet got bit and we read mts.h and
56   continue to struggle on.  It's not that bad though, since the only time
57   the code gets executed is when inc (or msh) calls it, and both of these
58   have already called mts_init().
59
60   ------------------------
61   (Written by Van Jacobson for the mh6 m_getfld, January, 1986):
62
63   This routine was accounting for 60% of the cpu time used by most mh
64   programs.  I spent a bit of time tuning and it now accounts for <10%
65   of the time used.  Like any heavily tuned routine, it's a bit
66   complex and you want to be sure you understand everything that it's
67   doing before you start hacking on it.  Let me try to emphasize
68   that:  every line in this atrocity depends on every other line,
69   sometimes in subtle ways.  You should understand it all, in detail,
70   before trying to change any part.  If you do change it, test the
71   result thoroughly (I use a hand-constructed test file that exercises
72   all the ways a header name, header body, header continuation,
73   header-body separator, body line and body eom can align themselves
74   with respect to a buffer boundary).  "Minor" bugs in this routine
75   result in garbaged or lost mail.
76
77   If you hack on this and slow it down, I, my children and my
78   children's children will curse you.
79
80   This routine gets used on three different types of files: normal,
81   single msg files, "packed" unix or mmdf mailboxs (when used by inc)
82   and packed, directoried bulletin board files (when used by msh).
83   The biggest impact of different file types is in "eom" testing.  The
84   code has been carefully organized to test for eom at appropriate
85   times and at no other times (since the check is quite expensive).
86   I have tried to arrange things so that the eom check need only be
87   done on entry to this routine.  Since an eom can only occur after a
88   newline, this is easy to manage for header fields.  For the msg
89   body, we try to efficiently search the input buffer to see if
90   contains the eom delimiter.  If it does, we take up to the
91   delimiter, otherwise we take everything in the buffer.  (The change
92   to the body eom/copy processing produced the most noticeable
93   performance difference, particularly for "inc" and "show".)
94
95   There are three qualitatively different things this routine busts
96   out of a message: field names, field text and msg bodies.  Field
97   names are typically short (~8 char) and the loop that extracts them
98   might terminate on a colon, newline or max width.  I considered
99   using a Vax "scanc" to locate the end of the field followed by a
100   "bcopy" but the routine call overhead on a Vax is too large for this
101   to work on short names.  If Berkeley ever makes "inline" part of the
102   C optimiser (so things like "scanc" turn into inline instructions) a
103   change here would be worthwhile.
104
105   Field text is typically 60 - 100 characters so there's (barely)
106   a win in doing a routine call to something that does a "locc"
107   followed by a "bmove".  About 30% of the fields have continuations
108   (usually the 822 "received:" lines) and each continuation generates
109   another routine call.  "Inline" would be a big win here, as well.
110
111   Messages, as of this writing, seem to come in two flavors: small
112   (~1K) and long (>2K).  Most messages have 400 - 600 bytes of headers
113   so message bodies average at least a few hundred characters.
114   Assuming your system uses reasonably sized stdio buffers (1K or
115   more), this routine should be able to remove the body in large
116   (>500 byte) chunks.  The makes the cost of a call to "bcopy"
117   small but there is a premium on checking for the eom in packed
118   maildrops.  The eom pattern is always a simple string so we can
119   construct an efficient pattern matcher for it (e.g., a Vax "matchc"
120   instruction).  Some thought went into recognizing the start of
121   an eom that has been split across two buffers.
122
123   This routine wants to deal with large chunks of data so, rather
124   than "getc" into a local buffer, it uses stdio's buffer.  If
125   you try to use it on a non-buffered file, you'll get what you
126   deserve.  This routine "knows" that struct FILEs have a _ptr
127   and a _cnt to describe the current state of the buffer and
128   it knows that _filbuf ignores the _ptr & _cnt and simply fills
129   the buffer.  If stdio on your system doesn't work this way, you
130   may have to make small changes in this routine.
131
132   This routine also "knows" that an EOF indication on a stream is
133   "sticky" (i.e., you will keep getting EOF until you reposition the
134   stream).  If your system doesn't work this way it is broken and you
135   should complain to the vendor.  As a consequence of the sticky
136   EOF, this routine will never return any kind of EOF status when
137   there is data in "name" or "buf").
138  */
139
140
141/*
142 * static prototypes
143 */
144static int m_Eom (int, FILE *);
145static unsigned char *matchc(int, char *, int, char *);
146static unsigned char *locc(int, unsigned char *, unsigned char);
147
148#define Getc(iob)	getc(iob)
149#define eom(c,iob)	(msg_style != MS_DEFAULT && \
150			 (((c) == *msg_delim && m_Eom(c,iob)) ||\
151			  (eom_action && (*eom_action)(c))))
152
153static unsigned char **pat_map;
154
155/*
156 * defined in sbr/m_msgdef.c = 0
157 * This is a disgusting hack for "inc" so it can know how many
158 * characters were stuffed in the buffer on the last call
159 * (see comments in uip/scansbr.c).
160 */
161extern int msg_count;
162
163/*
164 * defined in sbr/m_msgdef.c = MS_DEFAULT
165 */
166extern int msg_style;
167
168/*
169 * The "full" delimiter string for a packed maildrop consists
170 * of a newline followed by the actual delimiter.  E.g., the
171 * full string for a Unix maildrop would be: "\n\nFrom ".
172 * "Fdelim" points to the start of the full string and is used
173 * in the BODY case of the main routine to search the buffer for
174 * a possible eom.  Msg_delim points to the first character of
175 * the actual delim. string (i.e., fdelim+1).  Edelim
176 * points to the 2nd character of actual delimiter string.  It
177 * is used in m_Eom because the first character of the string
178 * has been read and matched before m_Eom is called.
179 */
180extern char *msg_delim;         /* defined in sbr/m_msgdef.c = "" */
181static unsigned char *fdelim;
182static unsigned char *delimend;
183static int fdelimlen;
184static unsigned char *edelim;
185static int edelimlen;
186
187static int (*eom_action)(int) = NULL;
188
189#ifdef _FSTDIO
190# define _ptr    _p		/* Gag   */
191# define _cnt    _r		/* Retch */
192# define _filbuf __srget	/* Puke  */
193# define DEFINED__FILBUF_TO_SOMETHING_SPECIFIC
194#endif
195
196#ifdef SCO_5_STDIO
197# define _ptr  __ptr
198# define _cnt  __cnt
199# define _base __base
200# define _filbuf(fp)  ((fp)->__cnt = 0, __filbuf(fp))
201# define DEFINED__FILBUF_TO_SOMETHING_SPECIFIC
202#endif
203
204#ifndef DEFINED__FILBUF_TO_SOMETHING_SPECIFIC
205extern int  _filbuf(FILE*);
206#endif
207
208
209int
210m_getfld (int state, unsigned char *name, unsigned char *buf,
211          int bufsz, FILE *iob)
212{
213    register unsigned char  *bp, *cp, *ep, *sp;
214    register int cnt, c, i, j;
215
216    if ((c = Getc(iob)) < 0) {
217	msg_count = 0;
218	*buf = 0;
219	return FILEEOF;
220    }
221    if (eom (c, iob)) {
222	if (! eom_action) {
223	    /* flush null messages */
224	    while ((c = Getc(iob)) >= 0 && eom (c, iob))
225		;
226	    if (c >= 0)
227		ungetc(c, iob);
228	}
229	msg_count = 0;
230	*buf = 0;
231	return FILEEOF;
232    }
233
234    switch (state) {
235	case FLDEOF:
236	case BODYEOF:
237	case FLD:
238	    if (c == '\n' || c == '-') {
239		/* we hit the header/body separator */
240		while (c != '\n' && (c = Getc(iob)) >= 0)
241		    ;
242
243		if (c < 0 || (c = Getc(iob)) < 0 || eom (c, iob)) {
244		    if (! eom_action) {
245			/* flush null messages */
246			while ((c = Getc(iob)) >= 0 && eom (c, iob))
247			    ;
248			if (c >= 0)
249			    ungetc(c, iob);
250		    }
251		    msg_count = 0;
252		    *buf = 0;
253		    return FILEEOF;
254		}
255		state = BODY;
256		goto body;
257	    }
258	    /*
259	     * get the name of this component.  take characters up
260	     * to a ':', a newline or NAMESZ-1 characters, whichever
261	     * comes first.
262	     */
263	    cp = name;
264	    i = NAMESZ - 1;
265	    for (;;) {
266#ifdef LINUX_STDIO
267		bp = sp = (unsigned char *) iob->_IO_read_ptr - 1;
268		j = (cnt = ((long) iob->_IO_read_end -
269			(long) iob->_IO_read_ptr)  + 1) < i ? cnt : i;
270#elif defined(__DragonFly__)
271		bp = sp = (unsigned char *) ((struct __FILE_public *)iob)->_p - 1;
272		j = (cnt = ((struct __FILE_public *)iob)->_r+1) < i ? cnt : i;
273#else
274		bp = sp = (unsigned char *) iob->_ptr - 1;
275		j = (cnt = iob->_cnt+1) < i ? cnt : i;
276#endif
277		while (--j >= 0 && (c = *bp++) != ':' && c != '\n')
278		    *cp++ = c;
279
280		j = bp - sp;
281		if ((cnt -= j) <= 0) {
282#ifdef LINUX_STDIO
283		    iob->_IO_read_ptr = iob->_IO_read_end;
284		    if (__underflow(iob) == EOF) {
285#elif defined(__DragonFly__)
286		    if (__srget(iob) == EOF) {
287#else
288		    if (_filbuf(iob) == EOF) {
289#endif
290			*cp = *buf = 0;
291			advise (NULL, "eof encountered in field \"%s\"", name);
292			return FMTERR;
293		    }
294#ifdef LINUX_STDIO
295		iob->_IO_read_ptr++; /* NOT automatic in __underflow()! */
296#endif
297		} else {
298#ifdef LINUX_STDIO
299		    iob->_IO_read_ptr = bp + 1;
300#elif defined(__DragonFly__)
301		    ((struct __FILE_public *)iob)->_p = bp + 1;
302		    ((struct __FILE_public *)iob)->_r = cnt - 1;
303#else
304		    iob->_ptr = bp + 1;
305		    iob->_cnt = cnt - 1;
306#endif
307		}
308		if (c == ':')
309		    break;
310
311		/*
312		 * something went wrong.  possibilities are:
313		 *  . hit a newline (error)
314		 *  . got more than namesz chars. (error)
315		 *  . hit the end of the buffer. (loop)
316		 */
317		if (c == '\n') {
318		    /* We hit the end of the line without seeing ':' to
319		     * terminate the field name.  This is usually (always?)
320		     * spam.  But, blowing up is lame, especially when
321		     * scan(1)ing a folder with such messages.  Pretend such
322		     * lines are the first of the body (at least mutt also
323		     * handles it this way). */
324
325		    /* See if buf can hold this line, since we were assuming
326		     * we had a buffer of NAMESZ, not bufsz. */
327		    /* + 1 for the newline */
328		    if (bufsz < j + 1) {
329			/* No, it can't.  Oh well, guess we'll blow up. */
330			*cp = *buf = 0;
331			advise (NULL, "eol encountered in field \"%s\"", name);
332			state = FMTERR;
333			goto finish;
334		    }
335		    memcpy (buf, name, j - 1);
336		    buf[j - 1] = '\n';
337		    buf[j] = '\0';
338		    /* mhparse.c:get_content wants to find the position of the
339		     * body start, but it thinks there's a blank line between
340		     * the header and the body (naturally!), so seek back so
341		     * that things line up even though we don't have that
342		     * blank line in this case.  Simpler parsers (e.g. mhl)
343		     * get extra newlines, but that should be harmless enough,
344		     * right?  This is a corrupt message anyway. */
345		    fseek (iob, ftell (iob) - 2, SEEK_SET);
346		    return BODY;
347		}
348		if ((i -= j) <= 0) {
349		    *cp = *buf = 0;
350		    advise (NULL, "field name \"%s\" exceeds %d bytes", name, NAMESZ - 2);
351		    state = LENERR;
352		    goto finish;
353		}
354	    }
355
356	    while (isspace (*--cp) && cp >= name)
357		;
358	    *++cp = 0;
359	    /* fall through */
360
361	case FLDPLUS:
362	    /*
363	     * get (more of) the text of a field.  take
364	     * characters up to the end of this field (newline
365	     * followed by non-blank) or bufsz-1 characters.
366	     */
367	    cp = buf; i = bufsz-1;
368	    for (;;) {
369#ifdef LINUX_STDIO
370		cnt = (long) iob->_IO_read_end - (long) iob->_IO_read_ptr;
371		bp = (unsigned char *) --iob->_IO_read_ptr;
372#elif defined(__DragonFly__)
373		cnt = ((struct __FILE_public *)iob)->_r++;
374		bp = (unsigned char *) --((struct __FILE_public *)iob)->_p;
375#else
376		cnt = iob->_cnt++;
377		bp = (unsigned char *) --iob->_ptr;
378#endif
379		c = cnt < i ? cnt : i;
380		while ((ep = locc( c, bp, '\n' ))) {
381		    /*
382		     * if we hit the end of this field, return.
383		     */
384		    if ((j = *++ep) != ' ' && j != '\t') {
385#ifdef LINUX_STDIO
386			j = ep - (unsigned char *) iob->_IO_read_ptr;
387			memcpy (cp, iob->_IO_read_ptr, j);
388			iob->_IO_read_ptr = ep;
389#elif defined(__DragonFly__)
390			j = ep - (unsigned char *) ((struct __FILE_public *)iob)->_p;
391			memcpy (cp, ((struct __FILE_public *)iob)->_p, j);
392			((struct __FILE_public *)iob)->_p = ep;
393			((struct __FILE_public *)iob)->_r -= j;
394#else
395			j = ep - (unsigned char *) iob->_ptr;
396			memcpy (cp, iob->_ptr, j);
397			iob->_ptr = ep;
398			iob->_cnt -= j;
399#endif
400			cp += j;
401			state = FLD;
402			goto finish;
403		    }
404		    c -= ep - bp;
405		    bp = ep;
406		}
407		/*
408		 * end of input or dest buffer - copy what we've found.
409		 */
410#ifdef LINUX_STDIO
411		c += bp - (unsigned char *) iob->_IO_read_ptr;
412		memcpy( cp, iob->_IO_read_ptr, c);
413#elif defined(__DragonFly__)
414		c += bp - (unsigned char *) ((struct __FILE_public *)iob)->_p;
415		memcpy( cp, ((struct __FILE_public *)iob)->_p, c);
416#else
417		c += bp - (unsigned char *) iob->_ptr;
418		memcpy( cp, iob->_ptr, c);
419#endif
420		i -= c;
421		cp += c;
422		if (i <= 0) {
423		    /* the dest buffer is full */
424#ifdef LINUX_STDIO
425		    iob->_IO_read_ptr += c;
426#elif defined(__DragonFly__)
427		    ((struct __FILE_public *)iob)->_r -= c;
428		    ((struct __FILE_public *)iob)->_p += c;
429#else
430		    iob->_cnt -= c;
431		    iob->_ptr += c;
432#endif
433		    state = FLDPLUS;
434		    break;
435		}
436		/*
437		 * There's one character left in the input buffer.
438		 * Copy it & fill the buffer.  If the last char
439		 * was a newline and the next char is not whitespace,
440		 * this is the end of the field.  Otherwise loop.
441		 */
442		--i;
443#ifdef LINUX_STDIO
444		*cp++ = j = *(iob->_IO_read_ptr + c);
445		iob->_IO_read_ptr = iob->_IO_read_end;
446		c = __underflow(iob);
447		iob->_IO_read_ptr++;    /* NOT automatic! */
448#elif defined(__DragonFly__)
449		*cp++ =j = *(((struct __FILE_public *)iob)->_p + c);
450		c = __srget(iob);
451#else
452		*cp++ = j = *(iob->_ptr + c);
453		c = _filbuf(iob);
454#endif
455                if (c == EOF ||
456		  ((j == '\0' || j == '\n') && c != ' ' && c != '\t')) {
457		    if (c != EOF) {
458#ifdef LINUX_STDIO
459			--iob->_IO_read_ptr;
460#elif defined(__DragonFly__)
461			--((struct __FILE_public *)iob)->_p;
462			++((struct __FILE_public *)iob)->_r;
463#else
464			--iob->_ptr;
465			++iob->_cnt;
466#endif
467		    }
468		    state = FLD;
469		    break;
470		}
471	    }
472	    break;
473
474	case BODY:
475	body:
476	    /*
477	     * get the message body up to bufsz characters or the
478	     * end of the message.  Sleazy hack: if bufsz is negative
479	     * we assume that we were called to copy directly into
480	     * the output buffer and we don't add an eos.
481	     */
482	    i = (bufsz < 0) ? -bufsz : bufsz-1;
483#ifdef LINUX_STDIO
484	    bp = (unsigned char *) --iob->_IO_read_ptr;
485	    cnt = (long) iob->_IO_read_end - (long) iob->_IO_read_ptr;
486#elif defined(__DragonFly__)
487	    bp = (unsigned char *) --((struct __FILE_public *)iob)->_p;
488	    cnt = ++((struct __FILE_public *)iob)->_r;
489#else
490	    bp = (unsigned char *) --iob->_ptr;
491	    cnt = ++iob->_cnt;
492#endif
493	    c = (cnt < i ? cnt : i);
494	    if (msg_style != MS_DEFAULT && c > 1) {
495		/*
496		 * packed maildrop - only take up to the (possible)
497		 * start of the next message.  This "matchc" should
498		 * probably be a Boyer-Moore matcher for non-vaxen,
499		 * particularly since we have the alignment table
500		 * all built for the end-of-buffer test (next).
501		 * But our vax timings indicate that the "matchc"
502		 * instruction is 50% faster than a carefully coded
503		 * B.M. matcher for most strings.  (So much for elegant
504		 * algorithms vs. brute force.)  Since I (currently)
505		 * run MH on a vax, we use the matchc instruction. --vj
506		 */
507		if ((ep = matchc( fdelimlen, fdelim, c, bp )))
508		    c = ep - bp + 1;
509		else {
510		    /*
511		     * There's no delim in the buffer but there may be
512		     * a partial one at the end.  If so, we want to leave
513		     * it so the "eom" check on the next call picks it up.
514		     * Use a modified Boyer-Moore matcher to make this
515		     * check relatively cheap.  The first "if" figures
516		     * out what position in the pattern matches the last
517		     * character in the buffer.  The inner "while" matches
518		     * the pattern against the buffer, backwards starting
519		     * at that position.  Note that unless the buffer
520		     * ends with one of the characters in the pattern
521		     * (excluding the first and last), we do only one test.
522		     */
523		    ep = bp + c - 1;
524		    if ((sp = pat_map[*ep])) {
525			do {
526			    /* This if() is true unless (a) the buffer is too
527			     * small to contain this delimiter prefix, or
528			     * (b) it contains exactly enough chars for the
529			     * delimiter prefix.
530			     * For case (a) obviously we aren't going to match.
531			     * For case (b), if the buffer really contained exactly
532			     * a delim prefix, then the m_eom call at entry
533			     * should have found it.  Thus it's not a delim
534			     * and we know we won't get a match.
535			     */
536			    if (((sp - fdelim) + 2) <= c) {
537				cp = sp;
538				/* Unfortunately although fdelim has a preceding NUL
539				 * we can't use this as a sentinel in case the buffer
540				 * contains a NUL in exactly the wrong place (this
541				 * would cause us to run off the front of fdelim).
542				 */
543				while (*--ep == *--cp)
544				    if (cp < fdelim)
545					break;
546				if (cp < fdelim) {
547				    /* we matched the entire delim prefix,
548				     * so only take the buffer up to there.
549				     * we know ep >= bp -- check above prevents underrun
550				     */
551				    c = (ep - bp) + 2;
552				    break;
553				}
554			    }
555			    /* try matching one less char of delim string */
556			    ep = bp + c - 1;
557			} while (--sp > fdelim);
558		    }
559		}
560	    }
561	    memcpy( buf, bp, c );
562#ifdef LINUX_STDIO
563	    iob->_IO_read_ptr += c;
564#elif defined(__DragonFly__)
565	    ((struct __FILE_public *)iob)->_r -= c;
566	    ((struct __FILE_public *)iob)->_p += c;
567#else
568	    iob->_cnt -= c;
569	    iob->_ptr += c;
570#endif
571	    if (bufsz < 0) {
572		msg_count = c;
573		return (state);
574	    }
575	    cp = buf + c;
576	    break;
577
578	default:
579	    adios (NULL, "m_getfld() called with bogus state of %d", state);
580    }
581finish:
582    *cp = 0;
583    msg_count = cp - buf;
584    return (state);
585}
586
587
588#ifdef RPATHS
589static char unixbuf[BUFSIZ] = "";
590#endif /* RPATHS */
591
592void
593m_unknown(FILE *iob)
594{
595    register int c;
596    register long pos;
597    char text[10];
598    register char *cp;
599    register char *delimstr;
600
601/*
602 * Figure out what the message delimitter string is for this
603 * maildrop.  (This used to be part of m_Eom but I didn't like
604 * the idea of an "if" statement that could only succeed on the
605 * first call to m_Eom getting executed on each call, i.e., at
606 * every newline in the message).
607 *
608 * If the first line of the maildrop is a Unix "From " line, we
609 * say the style is MBOX and eat the rest of the line.  Otherwise
610 * we say the style is MMDF and look for the delimiter string
611 * specified when nmh was built (or from the mts.conf file).
612 */
613
614    msg_style = MS_UNKNOWN;
615
616    pos = ftell (iob);
617    if (fread (text, sizeof(*text), 5, iob) == 5
618	    && strncmp (text, "From ", 5) == 0) {
619	msg_style = MS_MBOX;
620	delimstr = "\nFrom ";
621#ifndef	RPATHS
622	while ((c = getc (iob)) != '\n' && c >= 0)
623	    ;
624#else /* RPATHS */
625	cp = unixbuf;
626	while ((c = getc (iob)) != '\n' && cp - unixbuf < BUFSIZ - 1)
627	    *cp++ = c;
628	*cp = 0;
629#endif /* RPATHS */
630    } else {
631	/* not a Unix style maildrop */
632	fseek (iob, pos, SEEK_SET);
633	if (mmdlm2 == NULL || *mmdlm2 == 0)
634	    mmdlm2 = "\001\001\001\001\n";
635	delimstr = mmdlm2;
636	msg_style = MS_MMDF;
637    }
638    c = strlen (delimstr);
639    fdelim = (unsigned char *) mh_xmalloc((size_t) (c + 3));
640    *fdelim++ = '\0';
641    *fdelim = '\n';
642    msg_delim = (char *)fdelim+1;
643    edelim = (unsigned char *)msg_delim+1;
644    fdelimlen = c + 1;
645    edelimlen = c - 1;
646    strcpy (msg_delim, delimstr);
647    delimend = (unsigned char *)msg_delim + edelimlen;
648    if (edelimlen <= 1)
649	adios (NULL, "maildrop delimiter must be at least 2 bytes");
650    /*
651     * build a Boyer-Moore end-position map for the matcher in m_getfld.
652     * N.B. - we don't match just the first char (since it's the newline
653     * separator) or the last char (since the matchc would have found it
654     * if it was a real delim).
655     */
656    pat_map = (unsigned char **) calloc (256, sizeof(unsigned char *));
657
658    for (cp = (char *) fdelim + 1; cp < (char *) delimend; cp++ )
659	pat_map[(unsigned char)*cp] = (unsigned char *) cp;
660
661    if (msg_style == MS_MMDF) {
662	/* flush extra msg hdrs */
663	while ((c = Getc(iob)) >= 0 && eom (c, iob))
664	    ;
665	if (c >= 0)
666	    ungetc(c, iob);
667    }
668}
669
670
671void
672m_eomsbr (int (*action)(int))
673{
674    if ((eom_action = action)) {
675	msg_style = MS_MSH;
676	*msg_delim = 0;
677	fdelimlen = 1;
678	delimend = fdelim;
679    } else {
680	msg_style = MS_MMDF;
681	msg_delim = (char *)fdelim + 1;
682	fdelimlen = strlen((char *)fdelim);
683	delimend = (unsigned char *)(msg_delim + edelimlen);
684    }
685}
686
687
688/*
689 * test for msg delimiter string
690 */
691
692static int
693m_Eom (int c, FILE *iob)
694{
695    register long pos = 0L;
696    register int i;
697    char text[10];
698#ifdef RPATHS
699    register char *cp;
700#endif /* RPATHS */
701
702    pos = ftell (iob);
703    if ((i = fread (text, sizeof *text, edelimlen, iob)) != edelimlen
704	    || strncmp (text, (char *)edelim, edelimlen)) {
705	if (i == 0 && msg_style == MS_MBOX)
706	    /* the final newline in the (brain damaged) unix-format
707	     * maildrop is part of the delimitter - delete it.
708	     */
709	    return 1;
710
711#if 0
712	fseek (iob, pos, SEEK_SET);
713#endif
714
715	fseek (iob, (long)(pos-1), SEEK_SET);
716	getc (iob);		/* should be OK */
717	return 0;
718    }
719
720    if (msg_style == MS_MBOX) {
721#ifndef RPATHS
722	while ((c = getc (iob)) != '\n')
723	    if (c < 0)
724		break;
725#else /* RPATHS */
726	cp = unixbuf;
727	while ((c = getc (iob)) != '\n' && c >= 0 && cp - unixbuf < BUFSIZ - 1)
728	    *cp++ = c;
729	*cp = 0;
730#endif /* RPATHS */
731    }
732
733    return 1;
734}
735
736
737#ifdef RPATHS
738/*
739 * Return the Return-Path and Delivery-Date
740 * header information.
741 *
742 * Currently, I'm assuming that the "From " line
743 * takes one of the following forms.
744 *
745 * From sender date remote from host   (for UUCP delivery)
746 * From sender@host  date              (for sendmail delivery)
747 */
748
749int
750get_returnpath (char *rp, int rplen, char *dd, int ddlen)
751{
752    char *ap, *bp, *cp, *dp;
753
754    ap = unixbuf;
755    if (!(bp = cp = strchr(ap, ' ')))
756	return 0;
757
758    /*
759     * Check for "remote from" in envelope to see
760     * if this message uses UUCP style addressing
761     */
762    while ((cp = strchr(++cp, 'r'))) {
763	if (strncmp (cp, "remote from", 11) == 0) {
764	    cp = strrchr (cp, ' ');
765	    break;
766	}
767    }
768
769    /*
770     * Get the Return-Path information from
771     * the "From " envelope.
772     */
773    if (cp) {
774	/* return path for UUCP style addressing */
775	dp = strchr (++cp, '\n');
776	snprintf (rp, rplen, "%.*s!%.*s\n", (int)(dp - cp), cp, (int)(bp - ap), ap);
777    } else {
778	/* return path for standard domain addressing */
779	snprintf (rp, rplen, "%.*s\n", (int)(bp - ap), ap);
780    }
781
782    /*
783     * advance over the spaces to get to
784     * delivery date on envelope
785     */
786    while (*bp == ' ')
787	bp++;
788
789    /* Now get delivery date from envelope */
790    snprintf (dd, ddlen, "%.*s\n", 24, bp);
791
792    unixbuf[0] = 0;
793    return 1;
794}
795#endif /* RPATHS */
796
797
798static unsigned char *
799matchc(int patln, char *pat, int strln, char *str)
800{
801	register char *es = str + strln - patln;
802	register char *sp;
803	register char *pp;
804	register char *ep = pat + patln;
805	register char pc = *pat++;
806
807	for(;;) {
808		while (pc != *str++)
809			if (str > es)
810				return 0;
811		if (str > es+1)
812			return 0;
813		sp = str; pp = pat;
814		while (pp < ep && *sp++ == *pp)
815			pp++;
816		if (pp >= ep)
817			return ((unsigned char *)--str);
818	}
819}
820
821
822/*
823 * Locate character "term" in the next "cnt" characters of "src".
824 * If found, return its address, otherwise return 0.
825 */
826
827static unsigned char *
828locc(int cnt, unsigned char *src, unsigned char term)
829{
830    while (*src++ != term && --cnt > 0);
831
832    return (cnt > 0 ? --src : (unsigned char *)0);
833}
834
835