1 /*
2 **  Routines for headers: manipulation and checks.
3 */
4 
5 #include "portable/system.h"
6 
7 #include <ctype.h>
8 
9 #include "inn/libinn.h"
10 
11 
12 /*
13 **  Check whether the argument is a valid header field name.
14 **
15 **  We currently assume the maximal line length has already been checked.
16 **  Only ensure the requirements for RFC 3977:
17 **
18 **    o  The name [of a header field] consists of one or more printable
19 **       US-ASCII characters other than colon.
20 */
21 bool
IsValidHeaderName(const char * p)22 IsValidHeaderName(const char *p)
23 {
24     /* Not NULL and not empty. */
25     if (p == NULL || *p == '\0')
26         return false;
27 
28     for (; *p != '\0'; p++) {
29         /* Contains only printable US-ASCII characters other
30          * than colon. */
31         if (!isgraph((unsigned char) *p) || *p == ':')
32             return false;
33     }
34 
35     return true;
36 }
37 
38 
39 /*
40 **  Check whether the argument is a valid header field body.  It starts
41 **  after the space following the header field name and its colon.
42 **  Internationalized header fields encoded in UTF-8 are allowed.
43 **
44 **  We currently assume the maximal line length has already been checked.
45 */
46 bool
IsValidHeaderBody(const char * p)47 IsValidHeaderBody(const char *p)
48 {
49     bool emptycontentline = true;
50 
51     /* Not NULL and not empty. */
52     if (p == NULL || *p == '\0')
53         return false;
54 
55     if (!is_valid_utf8(p))
56         return false;
57 
58     for (; *p != '\0'; p++) {
59         if (ISWHITE(*p)) {
60             /* Skip SP and TAB. */
61             continue;
62         } else if (*p == '\n' || (*p == '\r' && *++p == '\n')) {
63             /* Folding detected.  We expect CRLF or lone LF as some parts
64              * of INN code internally remove CR.
65              * Check that the line that has just been processed is not
66              * "empty" and that the following character marks the beginning
67              * of a continuation line. */
68             if (emptycontentline || !ISWHITE(p[1])) {
69                 return false;
70             }
71             /* A continuation line begins.  This new line should also have
72              * at least one printable octet other than SP or TAB, so we
73              * re-initialize emptycontentline to true. */
74             emptycontentline = true;
75             continue;
76         } else if (p[-1] == '\r') {
77             /* Case of CR not followed by LF (handled at the previous
78              * if statement). */
79             return false;
80         } else {
81             /* Current header content line contains a (non-whitespace)
82              * character. */
83             emptycontentline = false;
84             continue;
85         }
86     }
87 
88     return (!emptycontentline);
89 }
90 
91 
92 /*
93 **  Check whether the argument is a valid header field.
94 **
95 **  We currently assume the maximal line length has already been checked.
96 */
97 bool
IsValidHeaderField(const char * p)98 IsValidHeaderField(const char *p)
99 {
100     /* Not NULL, not empty, and does not begin with a colon. */
101     if (p == NULL || *p == '\0' || *p == ':')
102         return false;
103 
104     for (; *p != '\0'; p++) {
105         /* Header field names contain only printable US-ASCII characters
106          * other than colon.  A colon terminates the header field name. */
107         if (!isgraph((unsigned char) *p))
108             return false;
109         if (*p == ':') {
110             p++;
111             break;
112         }
113     }
114 
115     /* Empty body or no colon found in header field. */
116     if (*p == '\0')
117         return false;
118 
119     /* Missing space after colon. */
120     if (*p != ' ')
121         return false;
122 
123     p++;
124     return IsValidHeaderBody(p);
125 }
126 
127 
128 /*
129 **  Skip any amount of CFWS (comments and folding whitespace), the RFC 5322
130 **  grammar term for whitespace, CRLF pairs, and possibly nested comments that
131 **  may contain escaped parens.  We also allow simple newlines since we don't
132 **  always deal with wire-format messages.  Note that we do not attempt to
133 **  ensure that CRLF or a newline is followed by whitespace.  Returns the new
134 **  position of the pointer.
135 */
136 const char *
skip_cfws(const char * p)137 skip_cfws(const char *p)
138 {
139     int nesting = 0;
140 
141     for (; *p != '\0'; p++) {
142         switch (*p) {
143         case ' ':
144         case '\t':
145         case '\n':
146             break;
147         case '\r':
148             if (p[1] != '\n' && nesting == 0)
149                 return p;
150             break;
151         case '(':
152             nesting++;
153             break;
154         case ')':
155             if (nesting == 0)
156                 return p;
157             nesting--;
158             break;
159         case '\\':
160             if (nesting == 0 || p[1] == '\0')
161                 return p;
162             p++;
163             break;
164         default:
165             if (nesting == 0)
166                 return p;
167             break;
168         }
169     }
170     return p;
171 }
172 
173 
174 /*
175 **  Skip any amount of FWS (folding whitespace), the RFC 5322 grammar term
176 **  for whitespace and CRLF pairs.  We also allow simple newlines since we
177 **  don't always deal with wire-format messages.  Note that we do not attempt
178 **  to ensure that CRLF or a newline is followed by whitespace.  Returns the
179 **  new position of the pointer.
180 */
181 const char *
skip_fws(const char * p)182 skip_fws(const char *p)
183 {
184     for (; *p != '\0'; p++) {
185         switch (*p) {
186         case ' ':
187         case '\t':
188         case '\n':
189             break;
190         case '\r':
191             if (p[1] != '\n')
192                 return p;
193             break;
194         default:
195             return p;
196         }
197     }
198     return p;
199 }
200