1 /*
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9 #include <proxygen/lib/http/experimental/RFC1867.h>
10 #include <proxygen/lib/utils/Logging.h>
11
12 using folly::IOBuf;
13 using folly::IOBufQueue;
14 using folly::StringPiece;
15 using folly::io::Cursor;
16 using std::string;
17
18 namespace {
19 // This is required to get HTTP1xCodec ready to parse a header block
20 const string kDummyGet("GET / HTTP/1.0");
21
22 enum class BoundaryResult { YES, NO, PARTIAL };
23
isBoundary(const IOBuf & buf,uint32_t offset,char const * boundary,size_t boundarylen)24 BoundaryResult isBoundary(const IOBuf& buf,
25 uint32_t offset,
26 char const* boundary,
27 size_t boundarylen) {
28 assert(offset <= buf.length());
29 const IOBuf* crtBuf = &buf;
30 do {
31 size_t crtLen = crtBuf->length() - offset;
32 const uint8_t* crtData = crtBuf->data() + offset;
33 size_t cmplen = std::min(crtLen, boundarylen);
34 if (memcmp(crtData, boundary, cmplen) == 0) {
35 if (cmplen == boundarylen) {
36 return BoundaryResult::YES;
37 } else {
38 // beginning of a partial match
39 boundary += cmplen;
40 boundarylen -= cmplen;
41 }
42 } else {
43 return BoundaryResult::NO;
44 }
45 offset = 0;
46 crtBuf = crtBuf->next();
47 } while (crtBuf != &buf);
48
49 return BoundaryResult::PARTIAL;
50 }
51
52 } // namespace
53
54 namespace proxygen {
55
onIngress(std::unique_ptr<IOBuf> data)56 std::unique_ptr<IOBuf> RFC1867Codec::onIngress(std::unique_ptr<IOBuf> data) {
57 static auto dummyBuf =
58 IOBuf::wrapBuffer(kDummyGet.data(), kDummyGet.length());
59 IOBufQueue result{IOBufQueue::cacheChainLength()};
60 bool foundBoundary = false;
61 BoundaryResult br = BoundaryResult::NO;
62
63 input_.append(std::move(data));
64 while (!input_.empty()) {
65 switch (state_) {
66 case ParserState::START:
67 // first time, must start with boundary without leading \n
68 br = isBoundary(
69 *input_.front(), 0, boundary_.data() + 1, boundary_.length() - 1);
70 if (br == BoundaryResult::NO) {
71 if (callback_) {
72 LOG(ERROR) << "Invalid starting sequence";
73 callback_->onError();
74 }
75 state_ = ParserState::ERROR;
76 return nullptr;
77 } else if (br == BoundaryResult::PARTIAL) {
78 return input_.move();
79 }
80 input_.trimStart(boundary_.length() - 1);
81 bytesProcessed_ += boundary_.length() - 1;
82 state_ = ParserState::HEADERS_START;
83 // fall through
84
85 case ParserState::HEADERS_START: {
86 if (input_.chainLength() < 3) {
87 return input_.move();
88 }
89 Cursor c(input_.front());
90 char firstTwo[2];
91 c.pull(firstTwo, 2);
92 // We have at least 3 chars available to read
93 uint8_t toTrim = 3;
94 if (memcmp(firstTwo, "--", 2) == 0) {
95 do {
96 auto ch = c.read<char>();
97 if (ch == '\n') {
98 input_.trimStart(toTrim);
99 state_ = ParserState::DONE;
100 } else if (ch == '\r') {
101 // Every \r we encounter is a char we must trim but we must
102 // make sure we have sufficient data available in input_ to
103 // keep reading (toTrim is always one pos ahead to handle the
104 // expected \n)
105 ++toTrim;
106 if (input_.chainLength() < toTrim) {
107 return input_.move();
108 }
109 } else {
110 state_ = ParserState::ERROR;
111 }
112 } while (state_ == ParserState::HEADERS_START);
113 break;
114 }
115 }
116 headerParser_.setParserPaused(false);
117 headerParser_.onIngress(*dummyBuf);
118 CHECK(!parseError_);
119 state_ = ParserState::HEADERS;
120 // fall through
121
122 case ParserState::HEADERS:
123 while (!parseError_ && input_.front() &&
124 state_ == ParserState::HEADERS) {
125 size_t bytesParsed = headerParser_.onIngress(*input_.front());
126 input_.trimStart(bytesParsed);
127 bytesProcessed_ += bytesParsed;
128 }
129 if (parseError_) {
130 if (callback_) {
131 LOG(ERROR) << "Error parsing header data: ";
132 VLOG(3) << IOBufPrinter::printHexFolly(input_.front());
133 callback_->onError();
134 }
135 state_ = ParserState::ERROR;
136 return nullptr;
137 }
138 break;
139
140 case ParserState::FIELD_DATA:
141 result = readToBoundary(foundBoundary);
142 value_.append(result.move());
143 if (!value_.empty() && callback_) {
144 if (callback_->onFieldData(value_.move(), bytesProcessed_) < 0) {
145 LOG(ERROR) << "Callback returned error";
146 state_ = ParserState::ERROR;
147 return nullptr;
148 }
149 }
150 if (foundBoundary) {
151 if (callback_) {
152 callback_->onFieldEnd(true, bytesProcessed_);
153 }
154 state_ = ParserState::HEADERS_START;
155 } else {
156 if (input_.chainLength() > 0) {
157 VLOG(5) << "Trailing input="
158 << IOBufPrinter::printHexFolly(input_.front());
159 }
160 return input_.move();
161 }
162 break;
163 case ParserState::DONE:
164 case ParserState::ERROR:
165 // abort, consume all input
166 return nullptr;
167 }
168 }
169 return nullptr;
170 }
171
onHeadersComplete(HTTPCodec::StreamID,std::unique_ptr<HTTPMessage> msg)172 void RFC1867Codec::onHeadersComplete(HTTPCodec::StreamID /*stream*/,
173 std::unique_ptr<HTTPMessage> msg) {
174 static const StringPiece kName("name", 4);
175 static const StringPiece kFilename("filename", 8);
176 static const StringPiece kFormData("form-data", 9);
177
178 const auto& contentDisp =
179 msg->getHeaders().getSingleOrEmpty(HTTP_HEADER_CONTENT_DISPOSITION);
180 string name;
181 folly::Optional<string> filename; // filename is optional
182 HTTPMessage::splitNameValuePieces(
183 contentDisp,
184 ';',
185 '=',
186 [&](folly::StringPiece parameter, folly::StringPiece value) {
187 // TODO: Trim whitespace first
188 // Strip quotes if present
189 if (value.size() >= 2 && value[0] == '\"' &&
190 value[value.size() - 1] == '\"') {
191 value.reset(value.data() + 1, value.size() - 2);
192 }
193 if (parameter == kName) {
194 name = value.str();
195 } else if (parameter == kFilename) {
196 filename = value.str();
197 } else if (parameter != kFormData) {
198 LOG(WARNING) << "Ignoring parameter " << parameter << " value \""
199 << value << '"';
200 }
201 });
202 if (name.empty()) {
203 if (callback_) {
204 LOG(ERROR) << "name empty";
205 callback_->onError();
206 }
207 state_ = ParserState::ERROR;
208 return;
209 } else {
210 state_ = ParserState::FIELD_DATA;
211 if (callback_ && callback_->onFieldStart(
212 name, filename, std::move(msg), bytesProcessed_) < 0) {
213 field_ = name;
214 LOG(WARNING) << "Callback returned error";
215 state_ = ParserState::ERROR;
216 }
217 }
218 }
219
readToBoundary(bool & foundBoundary)220 IOBufQueue RFC1867Codec::readToBoundary(bool& foundBoundary) {
221 IOBufQueue result{IOBufQueue::cacheChainLength()};
222 BoundaryResult boundaryResult = BoundaryResult::NO;
223
224 while (!input_.empty() && boundaryResult != BoundaryResult::PARTIAL) {
225 const IOBuf* head = input_.front();
226 uint64_t len = head->length();
227 const uint8_t* ptr = head->data();
228
229 /* iterate through first character matches */
230 while (len > 0 && (ptr = (const uint8_t*)memchr(ptr, boundary_[0], len))) {
231 /* calculate length after match */
232 uint64_t readlen = (ptr - head->data());
233 len = head->length() - readlen;
234 boundaryResult =
235 isBoundary(*head, readlen, boundary_.data(), boundary_.length());
236 if (boundaryResult == BoundaryResult::YES) {
237 CHECK(readlen < head->length());
238 bool hasCr = false;
239 if (readlen == 0 && pendingCR_) {
240 pendingCR_.reset();
241 }
242 if (readlen > 0) {
243 // If the last read char is a CR omit from result
244 Cursor c(head);
245 c.skip(readlen - 1);
246 uint8_t ch = c.read<uint8_t>();
247 if (ch == '\r') {
248 --readlen;
249 hasCr = true;
250 }
251 }
252 result.append(std::move(pendingCR_));
253 result.append(input_.split(readlen));
254 uint32_t trimLen = boundary_.length() + (hasCr ? 1 : 0);
255 input_.trimStart(trimLen);
256 bytesProcessed_ += readlen + trimLen;
257 foundBoundary = true;
258 return result;
259 } else if (boundaryResult == BoundaryResult::PARTIAL) {
260 break;
261 } else if (pendingCR_) {
262 // not a match, append pending CR to result
263 result.append(std::move(pendingCR_));
264 }
265
266 /* next character */
267 ptr++;
268 len--;
269 }
270 uint64_t resultLen = ptr ? ptr - head->data() : head->length();
271 // Put pendingCR_ in result if there was no partial match in head, or a
272 // partial match starting after the first character
273 if ((boundaryResult == BoundaryResult::NO || resultLen > 0) && pendingCR_) {
274 result.append(std::move(pendingCR_));
275 }
276 // the boundary does not start through resultLen, append it
277 // to result, except maybe the last char if it's a CR.
278 if (resultLen > 0 && head->data()[resultLen - 1] == '\r') {
279 result.append(input_.split(resultLen - 1));
280 CHECK(!pendingCR_);
281 pendingCR_ = input_.split(1);
282 } else {
283 result.append(input_.split(resultLen));
284 }
285 bytesProcessed_ += resultLen;
286 }
287
288 // reached the end but no boundary found
289 foundBoundary = false;
290
291 return result;
292 }
293
onIngressEOM()294 void RFC1867Codec::onIngressEOM() {
295 if (state_ == ParserState::FIELD_DATA) {
296 LOG(WARNING) << "Field not terminated by boundary";
297 if (callback_) {
298 callback_->onFieldEnd(false, bytesProcessed_);
299 }
300 }
301 if (state_ != ParserState::HEADERS_START && state_ != ParserState::ERROR &&
302 state_ != ParserState::DONE) {
303 if (callback_) {
304 LOG(ERROR) << "onIngressEOM with state_=" << (uint8_t)state_;
305 callback_->onError();
306 }
307 }
308 state_ = ParserState::START;
309 }
310
311 } // namespace proxygen
312