xref: /minix/usr.bin/mail/head.c (revision 27852ebe)
1 /*	$NetBSD: head.c,v 1.24 2013/01/16 15:21:42 christos Exp $	*/
2 
3 /*
4  * Copyright (c) 1980, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #ifndef lint
34 #if 0
35 static char sccsid[] = "@(#)head.c	8.2 (Berkeley) 4/20/95";
36 #else
37 __RCSID("$NetBSD: head.c,v 1.24 2013/01/16 15:21:42 christos Exp $");
38 #endif
39 #endif /* not lint */
40 
41 #include "rcv.h"
42 #include "extern.h"
43 
44 /*
45  * Mail -- a mail program
46  *
47  * Routines for processing and detecting headlines.
48  */
49 
50 /*
51  * Match the given string (cp) against the given template (tp).
52  * Return 1 if they match, 0 if they don't
53  */
54 static int
55 cmatch(const char *cp, const char *tp)
56 {
57 
58 	while (*cp && *tp)
59 		switch (*tp++) {
60 		case 'a':
61 			if (!islower((unsigned char)*cp++))
62 				return 0;
63 			break;
64 		case 'A':
65 			if (!isupper((unsigned char)*cp++))
66 				return 0;
67 			break;
68 		case ' ':
69 			if (*cp++ != ' ')
70 				return 0;
71 			break;
72 		case '0':
73 			if (!isdigit((unsigned char)*cp++))
74 				return 0;
75 			break;
76 		case 'O':
77 			if (*cp != ' ' && !isdigit((unsigned char)*cp))
78 				return 0;
79 			cp++;
80 			break;
81 		case ':':
82 			if (*cp++ != ':')
83 				return 0;
84 			break;
85 		case 'N':
86 			if (*cp++ != '\n')
87 				return 0;
88 			break;
89 		case '+':
90 			if (*cp != '+' && *cp != '-')
91 				return 0;
92 			cp++;
93 			break;
94 		}
95 	if (*cp || *tp)
96 		return 0;
97 	return 1;
98 }
99 
100 /*
101  * Test to see if the passed string is a ctime(3) generated
102  * date string as documented in the manual.  The template
103  * below is used as the criterion of correctness.
104  * Also, we check for a possible trailing time zone using
105  * the tmztype template.
106  */
107 
108 /*
109  * 'A'	An upper case char
110  * 'a'	A lower case char
111  * ' '	A space
112  * '0'	A digit
113  * 'O'	An optional digit or space
114  * ':'	A colon
115  * 'N'	A new line
116  * '+'	A plus or minus sign
117  */
118 static struct cmatch_data {
119 	size_t		tlen;
120 	char const	*tdata;
121 } const	cmatch_data[] = {
122 #define TSZ(a)	(sizeof(a) - 1), a
123 	{ TSZ("Aaa Aaa O0 00:00:00 0000") },		/* BSD ctype */
124 	{ TSZ("Aaa Aaa O0 00:00 0000") },		/* SysV ctype */
125 	{ TSZ("Aaa Aaa O0 00:00:00 AAA 0000") },	/* BSD tmztype */
126 	{ TSZ("Aaa Aaa O0 00:00 AAA 0000") },		/* SysV tmztype */
127 	/*
128 	 * RFC 822-alike From_ lines do not conform to RFC 4155, but seem to
129 	 * be used in the wild by UW-imap (MBX format plus)
130 	 */
131 	{ TSZ("Aaa Aaa O0 00:00:00 0000 +0000") },	/* RFC822, UT offset */
132 	/*
133 	 * RFC 822 with zone spec:
134 	 *    1. military,
135 	 *    2. UT,
136 	 *    3. north america time zone strings
137 	 * note that 1. is strictly speaking not correct as some letters are
138 	 * not used
139 	 */
140 	{ TSZ("Aaa Aaa O0 00:00:00 0000 A") },
141 	{ TSZ("Aaa Aaa O0 00:00:00 0000 AA") },
142         { TSZ("Aaa Aaa O0 00:00:00 0000 AAA") },
143 	{ 0, NULL },
144 };
145 
146 static int
147 isdate(const char date[])
148 {
149 	static size_t cmatch_minlen = 0;
150 	struct cmatch_data const *cmdp;
151 	size_t dl = strlen(date);
152 
153 	if (cmatch_minlen == 0)
154 		for (cmdp = cmatch_data; cmdp->tdata != NULL; ++cmdp)
155 			cmatch_minlen = MIN(cmatch_minlen, cmdp->tlen);
156 
157 	if (dl < cmatch_minlen)
158 		return 0;
159 
160 	for (cmdp = cmatch_data; cmdp->tdata != NULL; ++cmdp)
161 		if (dl == cmdp->tlen && cmatch(date, cmdp->tdata))
162 			return 1;
163 
164 	return 0;
165 }
166 
167 static void
168 fail(const char linebuf[], const char reason[])
169 {
170 #ifndef FMT_PROG
171 	if (debug)
172 		(void)fprintf(stderr, "\"%s\"\nnot a header because %s\n",
173 		    linebuf, reason);
174 #endif
175 }
176 
177 /*
178  * Collect a liberal (space, tab delimited) word into the word buffer
179  * passed.  Also, return a pointer to the next word following that,
180  * or NULL if none follow.
181  */
182 static const char *
183 nextword(const char *wp, char *wbuf)
184 {
185 	if (wp == NULL) {
186 		*wbuf = 0;
187 		return NULL;
188 	}
189 	while (*wp && !is_WSP(*wp)) {
190 		*wbuf++ = *wp;
191 		if (*wp++ == '"') {
192  			while (*wp && *wp != '"')
193  				*wbuf++ = *wp++;
194  			if (*wp == '"')
195  				*wbuf++ = *wp++;
196 		}
197 	}
198 	*wbuf = '\0';
199 	wp = skip_WSP(wp);
200 	if (*wp == '\0')
201 		return NULL;
202 	return wp;
203 }
204 
205 /*
206  * Copy the string on the left into the string on the right
207  * and bump the right (reference) string pointer by the length.
208  * Thus, dynamically allocate space in the right string, copying
209  * the left string into it.
210  */
211 static char *
212 copyin(const char *src, char **space)
213 {
214 	char *cp;
215 	char *begin;
216 
217 	begin = cp = *space;
218 	while ((*cp++ = *src++) != '\0')
219 		continue;
220 	*space = cp;
221 	return begin;
222 }
223 
224 /*
225  * Split a headline into its useful components.
226  * Copy the line into dynamic string space, then set
227  * pointers into the copied line in the passed headline
228  * structure.  Actually, it scans.
229  *
230  * XXX - line[], pbuf[], and word[] must be LINESIZE in length or
231  * overflow can occur in nextword() or copyin().
232  */
233 PUBLIC void
234 parse(const char line[], struct headline *hl, char pbuf[])
235 {
236 	const char *cp;
237 	char *sp;
238 	char word[LINESIZE];
239 
240 	hl->l_from = NULL;
241 	hl->l_tty = NULL;
242 	hl->l_date = NULL;
243 	cp = line;
244 	sp = pbuf;
245 	/*
246 	 * Skip over "From" first.
247 	 */
248 	cp = nextword(cp, word);
249 	cp = nextword(cp, word);
250 	if (*word)
251 		hl->l_from = copyin(word, &sp);
252 	if (cp != NULL && cp[0] == 't' && cp[1] == 't' && cp[2] == 'y') {
253 		cp = nextword(cp, word);
254 		hl->l_tty = copyin(word, &sp);
255 	}
256 	if (cp != NULL)
257 		hl->l_date = copyin(cp, &sp);
258 }
259 
260 /*
261  * See if the passed line buffer is a mail header.
262  * Return true if yes.  Note the extreme pains to
263  * accommodate all funny formats.
264  */
265 PUBLIC int
266 ishead(const char linebuf[])
267 {
268 	const char *cp;
269 	struct headline hl;
270 	char parbuf[LINESIZE];
271 
272 	cp = linebuf;
273 	if (*cp++ != 'F' || *cp++ != 'r' || *cp++ != 'o' || *cp++ != 'm' ||
274 	    *cp++ != ' ')
275 		return 0;
276 	parse(linebuf, &hl, parbuf);
277 	if (hl.l_from == NULL || hl.l_date == NULL) {
278 		fail(linebuf, "No from or date field");
279 		return 0;
280 	}
281 	if (!isdate(hl.l_date)) {
282 		fail(linebuf, "Date field not legal date");
283 		return 0;
284 	}
285 	/*
286 	 * I guess we got it!
287 	 */
288 	return 1;
289 }
290