1 /* rfc822tok.c -- RFC 822/RFC 2822 tokenizer
2 *
3 * Copyright (c) 2012 Carnegie Mellon University. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 *
17 * 3. The name "Carnegie Mellon University" must not be used to
18 * endorse or promote products derived from this software without
19 * prior written permission. For permission or any legal
20 * details, please contact
21 * Carnegie Mellon University
22 * Center for Technology Transfer and Enterprise Creation
23 * 4615 Forbes Avenue
24 * Suite 302
25 * Pittsburgh, PA 15213
26 * (412) 268-7393, fax: (412) 268-7395
27 * innovation@andrew.cmu.edu
28 *
29 * 4. Redistributions of any form whatsoever must retain the following
30 * acknowledgment:
31 * "This product includes software developed by Computing Services
32 * at Carnegie Mellon University (http://www.cmu.edu/computing/)."
33 *
34 * CARNEGIE MELLON UNIVERSITY DISCLAIMS ALL WARRANTIES WITH REGARD TO
35 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
36 * AND FITNESS, IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
37 * FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
38 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
39 * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
40 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
41 */
42
43 #include <config.h>
44 #include <string.h>
45 #include <ctype.h>
46 #include <errno.h>
47 #include "rfc822tok.h"
48
rfc822tok_init(rfc822tok_t * t,const char * base,unsigned int len,unsigned int flags)49 EXPORTED void rfc822tok_init(rfc822tok_t *t, const char *base,
50 unsigned int len, unsigned int flags)
51 {
52 buf_init_ro(&t->buf, base, len);
53 t->ptr = base;
54 t->flags = flags;
55 }
56
rfc822tok_init_buf(rfc822tok_t * t,const struct buf * b,unsigned int flags)57 EXPORTED void rfc822tok_init_buf(rfc822tok_t *t, const struct buf *b,
58 unsigned int flags)
59 {
60 rfc822tok_init(t, (b ? b->s : NULL), (b ? b->len : 0), flags);
61 }
62
rfc822tok_fini(rfc822tok_t * t)63 EXPORTED void rfc822tok_fini(rfc822tok_t *t)
64 {
65 buf_free(&t->buf);
66 }
67
is_special(rfc822tok_t * t,int c)68 static inline int is_special(rfc822tok_t *t, int c)
69 {
70 /* These specials are defined by RFC2822 */
71 if (strchr("()<>[]:;@\\,", c))
72 return 1;
73 /* ...except '.' sometimes is and sometimes isn't special */
74 if (c == '.' && (t->flags & RFC822_SPECIAL_DOT))
75 return 1;
76 /* ...and '=' sometimes is and sometimes isn't */
77 if (c == '=' && (t->flags & RFC822_SPECIAL_EQUAL))
78 return 1;
79 return 0;
80 }
81
rfc822tok_next(rfc822tok_t * t,char ** textp)82 EXPORTED int rfc822tok_next(rfc822tok_t *t, char **textp)
83 {
84 const char *p;
85 const char *end;
86 int comment_depth = 0;
87 static struct buf text = BUF_INITIALIZER;
88 int r;
89
90 buf_reset(&text);
91 if (textp) *textp = NULL;
92 if (!t->buf.len)
93 return EOF;
94
95 end = t->buf.s + t->buf.len;
96 p = t->ptr;
97 if (p >= end)
98 return EOF;
99
100 /* skip any leading whitespace and comments */
101 for ( ; p < end ; p++) {
102 if (comment_depth) {
103 if (*p == '\\')
104 p++;
105 else if (*p == ')')
106 comment_depth--;
107 else if (*p == '(')
108 comment_depth++;
109 }
110 else if (*p == '(') {
111 comment_depth++;
112 }
113 else if (!isspace(*p)) {
114 break;
115 }
116 }
117 if (comment_depth) {
118 r = -EINVAL;
119 goto out;
120 }
121 if (p >= end) {
122 r = EOF;
123 goto out;
124 }
125
126 /* RFC2822 specials are single-char tokens */
127 if (is_special(t, *p)) {
128 r = *p++;
129 goto out;
130 }
131
132 if (*p == '"') {
133 /* parse quoted-string per RFC2822 section 3.2.5 */
134 int in_quoted_pair = 0;
135 int in_quoted_string = 1;
136
137 for (p++ ; p < end ; p++) {
138 if (*p == '\r' && p+1 < end && p[1] == '\n') {
139 /* elide CRLF inside a quoted string */
140 p++;
141 /* a close reading of RFC2822 shows that \ is only
142 * semantically invisible when part of a quoted-pair,
143 * and CRLF is not part of a quoted-pair; so if we see a
144 * dangling \ just before CRLF we need to include it in
145 * the string */
146 if (in_quoted_pair) {
147 buf_putc(&text, '\\');
148 in_quoted_pair = 0;
149 }
150 continue;
151 }
152 else if (in_quoted_pair) {
153 in_quoted_pair = 0;
154 buf_putc(&text, *p);
155 }
156 else if (*p == '\\') {
157 in_quoted_pair = 1;
158 continue;
159 }
160 else if (*p == '"') {
161 in_quoted_string = 0;
162 p++;
163 break;
164 }
165 else {
166 buf_putc(&text, *p);
167 }
168 }
169 r = RFC822_QSTRING;
170 if (in_quoted_string || in_quoted_pair)
171 r = -EINVAL;
172 goto out;
173 }
174
175 /* anything else is an atom */
176 for ( ; p < end ; p++) {
177 if (isspace(*p) || *p == '(' || *p == '"' || is_special(t, *p))
178 break;
179 buf_putc(&text, *p);
180 }
181 r = RFC822_ATOM;
182
183 out:
184 t->ptr = p;
185 if (textp) *textp = text.len ? (char *)buf_cstring(&text) : NULL;
186 return r;
187 }
188
189
190