1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #include <proxygen/lib/http/experimental/RFC1867.h>
10 #include <proxygen/lib/utils/Logging.h>
11 
12 using folly::IOBuf;
13 using folly::IOBufQueue;
14 using folly::StringPiece;
15 using folly::io::Cursor;
16 using std::string;
17 
18 namespace {
19 // This is required to get HTTP1xCodec ready to parse a header block
20 const string kDummyGet("GET / HTTP/1.0");
21 
22 enum class BoundaryResult { YES, NO, PARTIAL };
23 
isBoundary(const IOBuf & buf,uint32_t offset,char const * boundary,size_t boundarylen)24 BoundaryResult isBoundary(const IOBuf& buf,
25                           uint32_t offset,
26                           char const* boundary,
27                           size_t boundarylen) {
28   assert(offset <= buf.length());
29   const IOBuf* crtBuf = &buf;
30   do {
31     size_t crtLen = crtBuf->length() - offset;
32     const uint8_t* crtData = crtBuf->data() + offset;
33     size_t cmplen = std::min(crtLen, boundarylen);
34     if (memcmp(crtData, boundary, cmplen) == 0) {
35       if (cmplen == boundarylen) {
36         return BoundaryResult::YES;
37       } else {
38         // beginning of a partial match
39         boundary += cmplen;
40         boundarylen -= cmplen;
41       }
42     } else {
43       return BoundaryResult::NO;
44     }
45     offset = 0;
46     crtBuf = crtBuf->next();
47   } while (crtBuf != &buf);
48 
49   return BoundaryResult::PARTIAL;
50 }
51 
52 } // namespace
53 
54 namespace proxygen {
55 
onIngress(std::unique_ptr<IOBuf> data)56 std::unique_ptr<IOBuf> RFC1867Codec::onIngress(std::unique_ptr<IOBuf> data) {
57   static auto dummyBuf =
58       IOBuf::wrapBuffer(kDummyGet.data(), kDummyGet.length());
59   IOBufQueue result{IOBufQueue::cacheChainLength()};
60   bool foundBoundary = false;
61   BoundaryResult br = BoundaryResult::NO;
62 
63   input_.append(std::move(data));
64   while (!input_.empty()) {
65     switch (state_) {
66       case ParserState::START:
67         // first time, must start with boundary without leading \n
68         br = isBoundary(
69             *input_.front(), 0, boundary_.data() + 1, boundary_.length() - 1);
70         if (br == BoundaryResult::NO) {
71           if (callback_) {
72             LOG(ERROR) << "Invalid starting sequence";
73             callback_->onError();
74           }
75           state_ = ParserState::ERROR;
76           return nullptr;
77         } else if (br == BoundaryResult::PARTIAL) {
78           return input_.move();
79         }
80         input_.trimStart(boundary_.length() - 1);
81         bytesProcessed_ += boundary_.length() - 1;
82         state_ = ParserState::HEADERS_START;
83         // fall through
84 
85       case ParserState::HEADERS_START: {
86         if (input_.chainLength() < 3) {
87           return input_.move();
88         }
89         Cursor c(input_.front());
90         char firstTwo[2];
91         c.pull(firstTwo, 2);
92         // We have at least 3 chars available to read
93         uint8_t toTrim = 3;
94         if (memcmp(firstTwo, "--", 2) == 0) {
95           do {
96             auto ch = c.read<char>();
97             if (ch == '\n') {
98               input_.trimStart(toTrim);
99               state_ = ParserState::DONE;
100             } else if (ch == '\r') {
101               // Every \r we encounter is a char we must trim but we must
102               // make sure we have sufficient data available in input_ to
103               // keep reading (toTrim is always one pos ahead to handle the
104               // expected \n)
105               ++toTrim;
106               if (input_.chainLength() < toTrim) {
107                 return input_.move();
108               }
109             } else {
110               state_ = ParserState::ERROR;
111             }
112           } while (state_ == ParserState::HEADERS_START);
113           break;
114         }
115       }
116         headerParser_.setParserPaused(false);
117         headerParser_.onIngress(*dummyBuf);
118         CHECK(!parseError_);
119         state_ = ParserState::HEADERS;
120         // fall through
121 
122       case ParserState::HEADERS:
123         while (!parseError_ && input_.front() &&
124                state_ == ParserState::HEADERS) {
125           size_t bytesParsed = headerParser_.onIngress(*input_.front());
126           input_.trimStart(bytesParsed);
127           bytesProcessed_ += bytesParsed;
128         }
129         if (parseError_) {
130           if (callback_) {
131             LOG(ERROR) << "Error parsing header data: ";
132             VLOG(3) << IOBufPrinter::printHexFolly(input_.front());
133             callback_->onError();
134           }
135           state_ = ParserState::ERROR;
136           return nullptr;
137         }
138         break;
139 
140       case ParserState::FIELD_DATA:
141         result = readToBoundary(foundBoundary);
142         value_.append(result.move());
143         if (!value_.empty() && callback_) {
144           if (callback_->onFieldData(value_.move(), bytesProcessed_) < 0) {
145             LOG(ERROR) << "Callback returned error";
146             state_ = ParserState::ERROR;
147             return nullptr;
148           }
149         }
150         if (foundBoundary) {
151           if (callback_) {
152             callback_->onFieldEnd(true, bytesProcessed_);
153           }
154           state_ = ParserState::HEADERS_START;
155         } else {
156           if (input_.chainLength() > 0) {
157             VLOG(5) << "Trailing input="
158                     << IOBufPrinter::printHexFolly(input_.front());
159           }
160           return input_.move();
161         }
162         break;
163       case ParserState::DONE:
164       case ParserState::ERROR:
165         // abort, consume all input
166         return nullptr;
167     }
168   }
169   return nullptr;
170 }
171 
onHeadersComplete(HTTPCodec::StreamID,std::unique_ptr<HTTPMessage> msg)172 void RFC1867Codec::onHeadersComplete(HTTPCodec::StreamID /*stream*/,
173                                      std::unique_ptr<HTTPMessage> msg) {
174   static const StringPiece kName("name", 4);
175   static const StringPiece kFilename("filename", 8);
176   static const StringPiece kFormData("form-data", 9);
177 
178   const auto& contentDisp =
179       msg->getHeaders().getSingleOrEmpty(HTTP_HEADER_CONTENT_DISPOSITION);
180   string name;
181   folly::Optional<string> filename; // filename is optional
182   HTTPMessage::splitNameValuePieces(
183       contentDisp,
184       ';',
185       '=',
186       [&](folly::StringPiece parameter, folly::StringPiece value) {
187         // TODO: Trim whitespace first
188         // Strip quotes if present
189         if (value.size() >= 2 && value[0] == '\"' &&
190             value[value.size() - 1] == '\"') {
191           value.reset(value.data() + 1, value.size() - 2);
192         }
193         if (parameter == kName) {
194           name = value.str();
195         } else if (parameter == kFilename) {
196           filename = value.str();
197         } else if (parameter != kFormData) {
198           LOG(WARNING) << "Ignoring parameter " << parameter << " value \""
199                        << value << '"';
200         }
201       });
202   if (name.empty()) {
203     if (callback_) {
204       LOG(ERROR) << "name empty";
205       callback_->onError();
206     }
207     state_ = ParserState::ERROR;
208     return;
209   } else {
210     state_ = ParserState::FIELD_DATA;
211     if (callback_ && callback_->onFieldStart(
212                          name, filename, std::move(msg), bytesProcessed_) < 0) {
213       field_ = name;
214       LOG(WARNING) << "Callback returned error";
215       state_ = ParserState::ERROR;
216     }
217   }
218 }
219 
readToBoundary(bool & foundBoundary)220 IOBufQueue RFC1867Codec::readToBoundary(bool& foundBoundary) {
221   IOBufQueue result{IOBufQueue::cacheChainLength()};
222   BoundaryResult boundaryResult = BoundaryResult::NO;
223 
224   while (!input_.empty() && boundaryResult != BoundaryResult::PARTIAL) {
225     const IOBuf* head = input_.front();
226     uint64_t len = head->length();
227     const uint8_t* ptr = head->data();
228 
229     /* iterate through first character matches */
230     while (len > 0 && (ptr = (const uint8_t*)memchr(ptr, boundary_[0], len))) {
231       /* calculate length after match */
232       uint64_t readlen = (ptr - head->data());
233       len = head->length() - readlen;
234       boundaryResult =
235           isBoundary(*head, readlen, boundary_.data(), boundary_.length());
236       if (boundaryResult == BoundaryResult::YES) {
237         CHECK(readlen < head->length());
238         bool hasCr = false;
239         if (readlen == 0 && pendingCR_) {
240           pendingCR_.reset();
241         }
242         if (readlen > 0) {
243           // If the last read char is a CR omit from result
244           Cursor c(head);
245           c.skip(readlen - 1);
246           uint8_t ch = c.read<uint8_t>();
247           if (ch == '\r') {
248             --readlen;
249             hasCr = true;
250           }
251         }
252         result.append(std::move(pendingCR_));
253         result.append(input_.split(readlen));
254         uint32_t trimLen = boundary_.length() + (hasCr ? 1 : 0);
255         input_.trimStart(trimLen);
256         bytesProcessed_ += readlen + trimLen;
257         foundBoundary = true;
258         return result;
259       } else if (boundaryResult == BoundaryResult::PARTIAL) {
260         break;
261       } else if (pendingCR_) {
262         // not a match, append pending CR to result
263         result.append(std::move(pendingCR_));
264       }
265 
266       /* next character */
267       ptr++;
268       len--;
269     }
270     uint64_t resultLen = ptr ? ptr - head->data() : head->length();
271     // Put pendingCR_ in result if there was no partial match in head, or a
272     // partial match starting after the first character
273     if ((boundaryResult == BoundaryResult::NO || resultLen > 0) && pendingCR_) {
274       result.append(std::move(pendingCR_));
275     }
276     // the boundary does not start through resultLen, append it
277     // to result, except maybe the last char if it's a CR.
278     if (resultLen > 0 && head->data()[resultLen - 1] == '\r') {
279       result.append(input_.split(resultLen - 1));
280       CHECK(!pendingCR_);
281       pendingCR_ = input_.split(1);
282     } else {
283       result.append(input_.split(resultLen));
284     }
285     bytesProcessed_ += resultLen;
286   }
287 
288   // reached the end but no boundary found
289   foundBoundary = false;
290 
291   return result;
292 }
293 
onIngressEOM()294 void RFC1867Codec::onIngressEOM() {
295   if (state_ == ParserState::FIELD_DATA) {
296     LOG(WARNING) << "Field not terminated by boundary";
297     if (callback_) {
298       callback_->onFieldEnd(false, bytesProcessed_);
299     }
300   }
301   if (state_ != ParserState::HEADERS_START && state_ != ParserState::ERROR &&
302       state_ != ParserState::DONE) {
303     if (callback_) {
304       LOG(ERROR) << "onIngressEOM with state_=" << (uint8_t)state_;
305       callback_->onError();
306     }
307   }
308   state_ = ParserState::START;
309 }
310 
311 } // namespace proxygen
312