1 /* rfc822tok.c -- RFC 822/RFC 2822 tokenizer
2  *
3  * Copyright (c) 2012 Carnegie Mellon University.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in
14  *    the documentation and/or other materials provided with the
15  *    distribution.
16  *
17  * 3. The name "Carnegie Mellon University" must not be used to
18  *    endorse or promote products derived from this software without
19  *    prior written permission. For permission or any legal
20  *    details, please contact
21  *      Carnegie Mellon University
22  *      Center for Technology Transfer and Enterprise Creation
23  *      4615 Forbes Avenue
24  *      Suite 302
25  *      Pittsburgh, PA  15213
26  *      (412) 268-7393, fax: (412) 268-7395
27  *      innovation@andrew.cmu.edu
28  *
29  * 4. Redistributions of any form whatsoever must retain the following
30  *    acknowledgment:
31  *    "This product includes software developed by Computing Services
32  *     at Carnegie Mellon University (http://www.cmu.edu/computing/)."
33  *
34  * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
35  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
36  * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
37  * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
38  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
39  * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
40  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
41  */
42 
43 #include <config.h>
44 #include <string.h>
45 #include <ctype.h>
46 #include <errno.h>
47 #include "rfc822tok.h"
48 
rfc822tok_init(rfc822tok_t * t,const char * base,unsigned int len,unsigned int flags)49 EXPORTED void rfc822tok_init(rfc822tok_t *t, const char *base,
50                              unsigned int len, unsigned int flags)
51 {
52     buf_init_ro(&t->buf, base, len);
53     t->ptr = base;
54     t->flags = flags;
55 }
56 
rfc822tok_init_buf(rfc822tok_t * t,const struct buf * b,unsigned int flags)57 EXPORTED void rfc822tok_init_buf(rfc822tok_t *t, const struct buf *b,
58                                  unsigned int flags)
59 {
60     rfc822tok_init(t, (b ? b->s : NULL), (b ? b->len : 0), flags);
61 }
62 
rfc822tok_fini(rfc822tok_t * t)63 EXPORTED void rfc822tok_fini(rfc822tok_t *t)
64 {
65     buf_free(&t->buf);
66 }
67 
is_special(rfc822tok_t * t,int c)68 static inline int is_special(rfc822tok_t *t, int c)
69 {
70     /* These specials are defined by RFC2822 */
71     if (strchr("()<>[]:;@\\,", c))
72         return 1;
73     /* ...except '.' sometimes is and sometimes isn't special */
74     if (c == '.' && (t->flags & RFC822_SPECIAL_DOT))
75         return 1;
76     /* ...and '=' sometimes is and sometimes isn't */
77     if (c == '=' && (t->flags & RFC822_SPECIAL_EQUAL))
78         return 1;
79     return 0;
80 }
81 
rfc822tok_next(rfc822tok_t * t,char ** textp)82 EXPORTED int rfc822tok_next(rfc822tok_t *t, char **textp)
83 {
84     const char *p;
85     const char *end;
86     int comment_depth = 0;
87     static struct buf text = BUF_INITIALIZER;
88     int r;
89 
90     buf_reset(&text);
91     if (textp) *textp = NULL;
92     if (!t->buf.len)
93         return EOF;
94 
95     end = t->buf.s + t->buf.len;
96     p = t->ptr;
97     if (p >= end)
98         return EOF;
99 
100     /* skip any leading whitespace and comments */
101     for ( ; p < end ; p++) {
102         if (comment_depth) {
103             if (*p == '\\')
104                 p++;
105             else if (*p == ')')
106                 comment_depth--;
107             else if (*p == '(')
108                 comment_depth++;
109         }
110         else if (*p == '(') {
111             comment_depth++;
112         }
113         else if (!isspace(*p)) {
114             break;
115         }
116     }
117     if (comment_depth) {
118         r = -EINVAL;
119         goto out;
120     }
121     if (p >= end) {
122         r = EOF;
123         goto out;
124     }
125 
126     /* RFC2822 specials are single-char tokens */
127     if (is_special(t, *p)) {
128         r = *p++;
129         goto out;
130     }
131 
132     if (*p == '"') {
133         /* parse quoted-string per RFC2822 section 3.2.5 */
134         int in_quoted_pair = 0;
135         int in_quoted_string = 1;
136 
137         for (p++ ; p < end ; p++) {
138             if (*p == '\r' && p+1 < end && p[1] == '\n') {
139                 /* elide CRLF inside a quoted string */
140                 p++;
141                 /* a close reading of RFC2822 shows that \ is only
142                  * semantically invisible when part of a quoted-pair,
143                  * and CRLF is not part of a quoted-pair; so if we see a
144                  * dangling \ just before CRLF we need to include it in
145                  * the string */
146                 if (in_quoted_pair) {
147                     buf_putc(&text, '\\');
148                     in_quoted_pair = 0;
149                 }
150                 continue;
151             }
152             else if (in_quoted_pair) {
153                 in_quoted_pair = 0;
154                 buf_putc(&text, *p);
155             }
156             else if (*p == '\\') {
157                 in_quoted_pair = 1;
158                 continue;
159             }
160             else if (*p == '"') {
161                 in_quoted_string = 0;
162                 p++;
163                 break;
164             }
165             else {
166                 buf_putc(&text, *p);
167             }
168         }
169         r = RFC822_QSTRING;
170         if (in_quoted_string || in_quoted_pair)
171             r = -EINVAL;
172         goto out;
173     }
174 
175     /* anything else is an atom */
176     for ( ; p < end ; p++) {
177         if (isspace(*p) || *p == '(' || *p == '"' || is_special(t, *p))
178             break;
179         buf_putc(&text, *p);
180     }
181     r = RFC822_ATOM;
182 
183 out:
184     t->ptr = p;
185     if (textp) *textp = text.len ? (char *)buf_cstring(&text) : NULL;
186     return r;
187 }
188 
189 
190