1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "extensions/browser/api/web_request/form_data_parser.h"
6 
7 #include <stddef.h>
8 
9 #include <vector>
10 
11 #include "base/check.h"
12 #include "base/lazy_instance.h"
13 #include "base/notreached.h"
14 #include "base/stl_util.h"
15 #include "base/strings/string_util.h"
16 #include "base/values.h"
17 #include "net/base/escape.h"
18 #include "net/http/http_request_headers.h"
19 #if defined(OS_BSD)
20 #include <re2/re2.h>
21 #else
22 #include "third_party/re2/src/re2/re2.h"
23 #endif
24 
25 using base::DictionaryValue;
26 using base::ListValue;
27 using base::StringPiece;
28 using re2::RE2;
29 
30 namespace extensions {
31 
32 namespace {
33 
34 const char kContentDisposition[] = "content-disposition:";
35 const size_t kContentDispositionLength = base::size(kContentDisposition) - 1;
36 // kCharacterPattern is an allowed character in a URL encoding. Definition is
37 // from RFC 1738, end of section 2.2.
38 const char kCharacterPattern[] =
39     "(?:[a-zA-Z0-9$_.+!*'(),]|-|(?:%[a-fA-F0-9]{2}))";
40 const char kEscapeClosingQuote[] = "\\\\E";
41 
42 // A wrapper struct for static RE2 objects to be held as LazyInstance.
43 struct Patterns {
44   Patterns();
45   // Patterns is only instantiated as a leaky LazyInstance, so the destructor
46   // is never called.
47   ~Patterns() = delete;
48   const RE2 transfer_padding_pattern;
49   const RE2 crlf_pattern;
50   const RE2 closing_pattern;
51   const RE2 epilogue_pattern;
52   const RE2 crlf_free_pattern;
53   const RE2 preamble_pattern;
54   const RE2 header_pattern;
55   const RE2 content_disposition_pattern;
56   const RE2 name_pattern;
57   const RE2 value_pattern;
58   const RE2 unquote_pattern;
59   const RE2 url_encoded_pattern;
60   const RE2 content_type_octet_stream;
61 };
62 
Patterns()63 Patterns::Patterns()
64     : transfer_padding_pattern("[ \\t]*\\r\\n"),
65       crlf_pattern("\\r\\n"),
66       closing_pattern("--[ \\t]*"),
67       epilogue_pattern("|\\r\\n(?s:.)*"),
68       crlf_free_pattern("(?:[^\\r]|\\r+[^\\r\\n])*"),
69       preamble_pattern(".+?"),
70       header_pattern("[!-9;-~]+:(.|\\r\\n[\\t ])*\\r\\n"),
71       content_disposition_pattern(std::string("(?i:") + kContentDisposition +
72                                   ")"),
73       name_pattern("\\bname=\"([^\"]*)\""),
74       value_pattern("\\bfilename=\"([^\"]*)\""),
75       unquote_pattern(kEscapeClosingQuote),
76       url_encoded_pattern(std::string("(") + kCharacterPattern + "*)=(" +
77                           kCharacterPattern + "*)"),
78       content_type_octet_stream(
79           "Content-Type: application\\/octet-stream\\r\\n") {}
80 
81 base::LazyInstance<Patterns>::Leaky g_patterns = LAZY_INSTANCE_INITIALIZER;
82 
83 }  // namespace
84 
85 // Parses URLencoded forms, see
86 // http://www.w3.org/TR/REC-html40-971218/interact/forms.html#h-17.13.4.1 .
87 class FormDataParserUrlEncoded : public FormDataParser {
88  public:
89   FormDataParserUrlEncoded();
90   ~FormDataParserUrlEncoded() override;
91 
92   // Implementation of FormDataParser.
93   bool AllDataReadOK() override;
94   bool GetNextNameValue(Result* result) override;
95   bool SetSource(base::StringPiece source) override;
96 
97  private:
98   // Returns the pattern to match a single name-value pair. This could be even
99   // static, but then we would have to spend more code on initializing the
100   // cached pointer to g_patterns.Get().
pattern() const101   const RE2& pattern() const {
102     return patterns_->url_encoded_pattern;
103   }
104 
105   // Auxiliary constant for using RE2. Number of arguments for parsing
106   // name-value pairs (one for name, one for value).
107   static const size_t args_size_ = 2u;
108 
109   re2::StringPiece source_;
110   bool source_set_;
111   bool source_malformed_;
112 
113   // Auxiliary store for using RE2.
114   std::string name_;
115   std::string value_;
116   const RE2::Arg arg_name_;
117   const RE2::Arg arg_value_;
118   const RE2::Arg* args_[args_size_];
119 
120   // Caching the pointer to g_patterns.Get().
121   const Patterns* patterns_;
122 
123   DISALLOW_COPY_AND_ASSIGN(FormDataParserUrlEncoded);
124 };
125 
126 // The following class, FormDataParserMultipart, parses forms encoded as
127 // multipart, defined in RFCs 2388 (specific to forms), 2046 (multipart
128 // encoding) and 5322 (MIME-headers).
129 //
130 // Implementation details
131 //
132 // The original grammar from RFC 2046 is this, "multipart-body" being the root
133 // non-terminal:
134 //
135 // boundary := 0*69<bchars> bcharsnospace
136 // bchars := bcharsnospace / " "
137 // bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" / "_" / ","
138 //                  / "-" / "." / "/" / ":" / "=" / "?"
139 // dash-boundary := "--" boundary
140 // multipart-body := [preamble CRLF]
141 //                        dash-boundary transport-padding CRLF
142 //                        body-part *encapsulation
143 //                        close-delimiter transport-padding
144 //                        [CRLF epilogue]
145 // transport-padding := *LWSP-char
146 // encapsulation := delimiter transport-padding CRLF body-part
147 // delimiter := CRLF dash-boundary
148 // close-delimiter := delimiter "--"
149 // preamble := discard-text
150 // epilogue := discard-text
151 // discard-text := *(*text CRLF) *text
152 // body-part := MIME-part-headers [CRLF *OCTET]
153 // OCTET := <any 0-255 octet value>
154 //
155 // Uppercase non-terminals are defined in RFC 5234, Appendix B.1; i.e. CRLF,
156 // DIGIT, and ALPHA stand for "\r\n", '0'-'9' and the set of letters of the
157 // English alphabet, respectively.
158 // The non-terminal "text" is presumably just any text, excluding line breaks.
159 // The non-terminal "LWSP-char" is not directly defined in the original grammar
160 // but it means "linear whitespace", which is a space or a horizontal tab.
161 // The non-terminal "MIME-part-headers" is not discussed in RFC 2046, so we use
162 // the syntax for "optional fields" from Section 3.6.8 of RFC 5322:
163 //
164 // MIME-part-headers := field-name ":" unstructured CRLF
165 // field-name := 1*ftext
166 // ftext := %d33-57 /          ; Printable US-ASCII
167 //          %d59-126           ;  characters not including ":".
168 // Based on Section 2.2.1 of RFC 5322, "unstructured" matches any string which
169 // does not contain a CRLF sub-string, except for substrings "CRLF<space>" and
170 // "CRLF<horizontal tab>", which serve for "folding".
171 //
172 // The FormDataParseMultipart class reads the input source and tries to parse it
173 // according to the grammar above, rooted at the "multipart-body" non-terminal.
174 // This happens in stages:
175 //
176 // 1. The optional preamble and the initial dash-boundary with transport padding
177 // and a CRLF are read and ignored.
178 //
179 // 2. Repeatedly each body part is read. The body parts can either serve to
180 //    upload a file, or just a string of bytes.
181 // 2.a. The headers of that part are searched for the "content-disposition"
182 //      header, which contains the name of the value represented by that body
183 //      part. If the body-part is for file upload, that header also contains a
184 //      filename.
185 // 2.b. The "*OCTET" part of the body part is then read and passed as the value
186 //      of the name-value pair for body parts representing a string of bytes.
187 //      For body parts for uploading a file the "*OCTET" part is just ignored
188 //      and the filename is used for value instead.
189 //
190 // 3. The final close-delimiter and epilogue are read and ignored.
191 //
192 // IMPORTANT NOTE
193 // This parser supports sources split into multiple chunks. Therefore SetSource
194 // can be called multiple times if the source is spread over several chunks.
195 // However, the split may only occur inside a body part, right after the
196 // trailing CRLF of headers.
197 class FormDataParserMultipart : public FormDataParser {
198  public:
199   explicit FormDataParserMultipart(const std::string& boundary_separator);
200   ~FormDataParserMultipart() override;
201 
202   // Implementation of FormDataParser.
203   bool AllDataReadOK() override;
204   bool GetNextNameValue(Result* result) override;
205   bool SetSource(base::StringPiece source) override;
206 
207  private:
208   enum State {
209     STATE_INIT,      // No input read yet.
210     STATE_READY,     // Ready to call GetNextNameValue.
211     STATE_FINISHED,  // Read the input until the end.
212     STATE_SUSPEND,   // Waiting until a new |source_| is set.
213     STATE_ERROR
214   };
215 
216   // Produces a regexp to match the string "--" + |literal|. The idea is to
217   // represent "--" + |literal| as a "quoted pattern", a verbatim copy enclosed
218   // in "\\Q" and "\\E". The only catch is to watch out for occurrences of "\\E"
219   // inside |literal|. Those must be excluded from the quote and the backslash
220   // doubly escaped. For example, for literal == "abc\\Edef" the result is
221   // "\\Q--abc\\E\\\\E\\Qdef\\E".
222   static std::string CreateBoundaryPatternFromLiteral(
223       const std::string& literal);
224 
225   // Tests whether |input| has a prefix matching |pattern|.
226   static bool StartsWithPattern(const re2::StringPiece& input,
227                                 const RE2& pattern);
228 
229   // If |source_| starts with a header, seeks |source_| beyond the header. If
230   // the header is Content-Disposition, extracts |name| from "name=" and
231   // possibly |value| from "filename=" fields of that header. Only if the
232   // "name" or "filename" fields are found, then |name| or |value| are touched.
233   // Returns true iff |source_| is seeked forward. Sets |value_assigned|
234   // to true iff |value| has been assigned to. Sets |value_is_binary| to true if
235   // header has content-type: application/octet-stream.
236   bool TryReadHeader(base::StringPiece* name,
237                      base::StringPiece* value,
238                      bool* value_assigned,
239                      bool* value_is_binary);
240 
241   // Helper to GetNextNameValue. Expects that the input starts with a data
242   // portion of a body part. An attempt is made to read the input until the end
243   // of that body part. If |data| is not NULL, it is set to contain the data
244   // portion. Returns true iff the reading was successful.
245   bool FinishReadingPart(base::StringPiece* data);
246 
247   // These methods could be even static, but then we would have to spend more
248   // code on initializing the cached pointer to g_patterns.Get().
transfer_padding_pattern() const249   const RE2& transfer_padding_pattern() const {
250     return patterns_->transfer_padding_pattern;
251   }
crlf_pattern() const252   const RE2& crlf_pattern() const {
253     return patterns_->crlf_pattern;
254   }
closing_pattern() const255   const RE2& closing_pattern() const {
256     return patterns_->closing_pattern;
257   }
epilogue_pattern() const258   const RE2& epilogue_pattern() const {
259     return patterns_->epilogue_pattern;
260   }
crlf_free_pattern() const261   const RE2& crlf_free_pattern() const {
262     return patterns_->crlf_free_pattern;
263   }
preamble_pattern() const264   const RE2& preamble_pattern() const {
265     return patterns_->preamble_pattern;
266   }
header_pattern() const267   const RE2& header_pattern() const {
268     return patterns_->header_pattern;
269   }
content_disposition_pattern() const270   const RE2& content_disposition_pattern() const {
271     return patterns_->content_disposition_pattern;
272   }
name_pattern() const273   const RE2& name_pattern() const {
274     return patterns_->name_pattern;
275   }
value_pattern() const276   const RE2& value_pattern() const {
277     return patterns_->value_pattern;
278   }
279 
content_type_octet_stream() const280   const RE2& content_type_octet_stream() const {
281     return patterns_->content_type_octet_stream;
282   }
283 
284   // However, this is used in a static method so it needs to be static.
unquote_pattern()285   static const RE2& unquote_pattern() {
286     return g_patterns.Get().unquote_pattern;  // No caching g_patterns here.
287   }
288 
289   const RE2 dash_boundary_pattern_;
290 
291   // Because of initialisation dependency, |state_| needs to be declared after
292   // |dash_boundary_pattern_|.
293   State state_;
294 
295   // The parsed message can be split into multiple sources which we read
296   // sequentially.
297   re2::StringPiece source_;
298 
299   // Caching the pointer to g_patterns.Get().
300   const Patterns* patterns_;
301 
302   DISALLOW_COPY_AND_ASSIGN(FormDataParserMultipart);
303 };
304 
Result()305 FormDataParser::Result::Result() {}
~Result()306 FormDataParser::Result::~Result() {}
307 
SetBinaryValue(base::StringPiece str)308 void FormDataParser::Result::SetBinaryValue(base::StringPiece str) {
309   value_ = base::Value(
310       base::Value::BlobStorage(str.data(), str.data() + str.size()));
311 }
312 
SetStringValue(std::string str)313 void FormDataParser::Result::SetStringValue(std::string str) {
314   value_ = base::Value(std::move(str));
315 }
316 
~FormDataParser()317 FormDataParser::~FormDataParser() {}
318 
319 // static
Create(const net::HttpRequestHeaders & request_headers)320 std::unique_ptr<FormDataParser> FormDataParser::Create(
321     const net::HttpRequestHeaders& request_headers) {
322   std::string value;
323   const bool found =
324       request_headers.GetHeader(net::HttpRequestHeaders::kContentType, &value);
325   return CreateFromContentTypeHeader(found ? &value : NULL);
326 }
327 
328 // static
CreateFromContentTypeHeader(const std::string * content_type_header)329 std::unique_ptr<FormDataParser> FormDataParser::CreateFromContentTypeHeader(
330     const std::string* content_type_header) {
331   enum ParserChoice {URL_ENCODED, MULTIPART, ERROR_CHOICE};
332   ParserChoice choice = ERROR_CHOICE;
333   std::string boundary;
334 
335   if (content_type_header == NULL) {
336     choice = URL_ENCODED;
337   } else {
338     const std::string content_type(
339         content_type_header->substr(0, content_type_header->find(';')));
340 
341     if (base::EqualsCaseInsensitiveASCII(content_type,
342                                          "application/x-www-form-urlencoded")) {
343       choice = URL_ENCODED;
344     } else if (base::EqualsCaseInsensitiveASCII(content_type,
345                                                 "multipart/form-data")) {
346       static const char kBoundaryString[] = "boundary=";
347       size_t offset = content_type_header->find(kBoundaryString);
348       if (offset == std::string::npos) {
349         // Malformed header.
350         return std::unique_ptr<FormDataParser>();
351       }
352       offset += sizeof(kBoundaryString) - 1;
353       boundary = content_type_header->substr(
354           offset, content_type_header->find(';', offset));
355       if (!boundary.empty())
356         choice = MULTIPART;
357     }
358   }
359   // Other cases are unparseable, including when |content_type| is "text/plain".
360 
361   switch (choice) {
362     case URL_ENCODED:
363       return std::unique_ptr<FormDataParser>(new FormDataParserUrlEncoded());
364     case MULTIPART:
365       return std::unique_ptr<FormDataParser>(
366           new FormDataParserMultipart(boundary));
367     case ERROR_CHOICE:
368       return std::unique_ptr<FormDataParser>();
369   }
370   NOTREACHED();  // Some compilers do not believe this is unreachable.
371   return std::unique_ptr<FormDataParser>();
372 }
373 
FormDataParser()374 FormDataParser::FormDataParser() {}
375 
FormDataParserUrlEncoded()376 FormDataParserUrlEncoded::FormDataParserUrlEncoded()
377     : source_(NULL),
378       source_set_(false),
379       source_malformed_(false),
380       arg_name_(&name_),
381       arg_value_(&value_),
382       patterns_(g_patterns.Pointer()) {
383   args_[0] = &arg_name_;
384   args_[1] = &arg_value_;
385 }
386 
~FormDataParserUrlEncoded()387 FormDataParserUrlEncoded::~FormDataParserUrlEncoded() {}
388 
AllDataReadOK()389 bool FormDataParserUrlEncoded::AllDataReadOK() {
390   // All OK means we read the whole source.
391   return source_set_ && source_.empty() && !source_malformed_;
392 }
393 
GetNextNameValue(Result * result)394 bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) {
395   if (!source_set_ || source_malformed_)
396     return false;
397 
398   bool success = RE2::ConsumeN(&source_, pattern(), args_, args_size_);
399   if (success) {
400     const net::UnescapeRule::Type kUnescapeRules =
401         net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;
402 
403     std::string unescaped_name =
404         net::UnescapeBinaryURLComponent(name_, kUnescapeRules);
405     result->set_name(unescaped_name);
406     std::string unescaped_value =
407         net::UnescapeBinaryURLComponent(value_, kUnescapeRules);
408     const base::StringPiece unescaped_data(unescaped_value.data(),
409                                            unescaped_value.length());
410     if (base::IsStringUTF8(unescaped_data)) {
411       result->SetStringValue(std::move(unescaped_value));
412     } else {
413       result->SetBinaryValue(unescaped_data);
414     }
415   }
416   if (source_.length() > 0) {
417     if (source_[0] == '&')
418       source_.remove_prefix(1);  // Remove the leading '&'.
419     else
420       source_malformed_ = true;  // '&' missing between two name-value pairs.
421   }
422   return success && !source_malformed_;
423 }
424 
SetSource(base::StringPiece source)425 bool FormDataParserUrlEncoded::SetSource(base::StringPiece source) {
426   if (source_set_)
427     return false;  // We do not allow multiple sources for this parser.
428   source_.set(source.data(), source.size());
429   source_set_ = true;
430   source_malformed_ = false;
431   return true;
432 }
433 
434 // static
CreateBoundaryPatternFromLiteral(const std::string & literal)435 std::string FormDataParserMultipart::CreateBoundaryPatternFromLiteral(
436     const std::string& literal) {
437   static const char quote[] = "\\Q";
438   static const char unquote[] = "\\E";
439 
440   // The result always starts with opening the qoute and then "--".
441   std::string result("\\Q--");
442 
443   // This StringPiece is used below to record the next occurrence of "\\E" in
444   // |literal|.
445   re2::StringPiece seek_unquote(literal);
446   const char* copy_start = literal.data();
447   size_t copy_length = literal.size();
448 
449   // Find all "\\E" in |literal| and exclude them from the \Q...\E quote.
450   while (RE2::FindAndConsume(&seek_unquote, unquote_pattern())) {
451     copy_length = seek_unquote.data() - copy_start;
452     result.append(copy_start, copy_length);
453     result.append(kEscapeClosingQuote);
454     result.append(quote);
455     copy_start = seek_unquote.data();
456   }
457 
458   // Finish the last \Q...\E quote.
459   copy_length = (literal.data() + literal.size()) - copy_start;
460   result.append(copy_start, copy_length);
461   result.append(unquote);
462   return result;
463 }
464 
465 // static
StartsWithPattern(const re2::StringPiece & input,const RE2 & pattern)466 bool FormDataParserMultipart::StartsWithPattern(const re2::StringPiece& input,
467                                                 const RE2& pattern) {
468   return pattern.Match(input, 0, input.size(), RE2::ANCHOR_START, NULL, 0);
469 }
470 
FormDataParserMultipart(const std::string & boundary_separator)471 FormDataParserMultipart::FormDataParserMultipart(
472     const std::string& boundary_separator)
473     : dash_boundary_pattern_(
474           CreateBoundaryPatternFromLiteral(boundary_separator)),
475       state_(dash_boundary_pattern_.ok() ? STATE_INIT : STATE_ERROR),
476       patterns_(g_patterns.Pointer()) {}
477 
~FormDataParserMultipart()478 FormDataParserMultipart::~FormDataParserMultipart() {}
479 
AllDataReadOK()480 bool FormDataParserMultipart::AllDataReadOK() {
481   return state_ == STATE_FINISHED;
482 }
483 
FinishReadingPart(base::StringPiece * data)484 bool FormDataParserMultipart::FinishReadingPart(base::StringPiece* data) {
485   const char* data_start = source_.data();
486   while (!StartsWithPattern(source_, dash_boundary_pattern_)) {
487     if (!RE2::Consume(&source_, crlf_free_pattern()) ||
488         !RE2::Consume(&source_, crlf_pattern())) {
489       state_ = STATE_ERROR;
490       return false;
491     }
492   }
493   if (data != NULL) {
494     if (source_.data() == data_start) {
495       // No data in this body part.
496       state_ = STATE_ERROR;
497       return false;
498     }
499     // Subtract 2 for the trailing "\r\n".
500     *data = base::StringPiece(data_start, source_.data() - data_start - 2);
501   }
502 
503   // Finally, read the dash-boundary and either skip to the next body part, or
504   // finish reading the source.
505   CHECK(RE2::Consume(&source_, dash_boundary_pattern_));
506   if (StartsWithPattern(source_, closing_pattern())) {
507     CHECK(RE2::Consume(&source_, closing_pattern()));
508     if (RE2::Consume(&source_, epilogue_pattern()))
509       state_ = STATE_FINISHED;
510     else
511       state_ = STATE_ERROR;
512   } else {  // Next body part ahead.
513     if (!RE2::Consume(&source_, transfer_padding_pattern()))
514       state_ = STATE_ERROR;
515   }
516   return state_ != STATE_ERROR;
517 }
518 
GetNextNameValue(Result * result)519 bool FormDataParserMultipart::GetNextNameValue(Result* result) {
520   if (source_.empty() || state_ != STATE_READY)
521     return false;
522 
523   // 1. Read body-part headers.
524   base::StringPiece name;
525   base::StringPiece value;
526   bool value_assigned = false;
527   bool value_is_binary = false;
528   bool value_assigned_temp;
529   bool value_is_binary_temp;
530   while (TryReadHeader(&name, &value, &value_assigned_temp,
531                        &value_is_binary_temp)) {
532     value_is_binary |= value_is_binary_temp;
533     value_assigned |= value_assigned_temp;
534   }
535   if (name.empty() || state_ == STATE_ERROR) {
536     state_ = STATE_ERROR;
537     return false;
538   }
539 
540   // 2. Read the trailing CRLF after headers.
541   if (!RE2::Consume(&source_, crlf_pattern())) {
542     state_ = STATE_ERROR;
543     return false;
544   }
545 
546   // 3. Read the data of this body part, i.e., everything until the first
547   // dash-boundary.
548   bool return_value;
549   if (value_assigned && source_.empty()) {  // Wait for a new source?
550     return_value = true;
551     state_ = STATE_SUSPEND;
552   } else {
553     return_value = FinishReadingPart(value_assigned ? nullptr : &value);
554   }
555 
556   result->set_name(net::UnescapeBinaryURLComponent(name));
557   if (value_assigned) {
558     // Hold filename as value.
559     result->SetStringValue(value.as_string());
560   } else if (value_is_binary) {
561     result->SetBinaryValue(value);
562   } else {
563     result->SetStringValue(value.as_string());
564   }
565 
566   return return_value;
567 }
568 
SetSource(base::StringPiece source)569 bool FormDataParserMultipart::SetSource(base::StringPiece source) {
570   if (source.data() == NULL || !source_.empty())
571     return false;
572   source_.set(source.data(), source.size());
573 
574   switch (state_) {
575     case STATE_INIT:
576       // Seek behind the preamble.
577       while (!StartsWithPattern(source_, dash_boundary_pattern_)) {
578         if (!RE2::Consume(&source_, preamble_pattern())) {
579           state_ = STATE_ERROR;
580           break;
581         }
582       }
583       // Read dash-boundary, transfer padding, and CRLF.
584       if (state_ != STATE_ERROR) {
585         if (!RE2::Consume(&source_, dash_boundary_pattern_) ||
586             !RE2::Consume(&source_, transfer_padding_pattern()))
587           state_ = STATE_ERROR;
588         else
589           state_ = STATE_READY;
590       }
591       break;
592     case STATE_READY:  // Nothing to do.
593       break;
594     case STATE_SUSPEND:
595       state_ = FinishReadingPart(nullptr) ? STATE_READY : STATE_ERROR;
596       break;
597     default:
598       state_ = STATE_ERROR;
599   }
600   return state_ != STATE_ERROR;
601 }
602 
TryReadHeader(base::StringPiece * name,base::StringPiece * value,bool * value_assigned,bool * value_is_binary)603 bool FormDataParserMultipart::TryReadHeader(base::StringPiece* name,
604                                             base::StringPiece* value,
605                                             bool* value_assigned,
606                                             bool* value_is_binary) {
607   *value_assigned = false;
608   *value_is_binary = false;
609   // Support Content-Type: application/octet-stream.
610   // Form data with this content type is represented as string of bytes.
611   if (RE2::Consume(&source_, content_type_octet_stream())) {
612     *value_is_binary = true;
613     return true;
614   }
615   const char* header_start = source_.data();
616   if (!RE2::Consume(&source_, header_pattern()))
617     return false;
618   // (*) After this point we must return true, because we consumed one header.
619 
620   // Subtract 2 for the trailing "\r\n".
621   re2::StringPiece header(header_start, source_.data() - header_start - 2);
622 
623   if (!StartsWithPattern(header, content_disposition_pattern()))
624     return true;  // Skip headers that don't describe the content-disposition.
625 
626   re2::StringPiece groups[2];
627 
628   if (!name_pattern().Match(header,
629                             kContentDispositionLength, header.size(),
630                             RE2::UNANCHORED, groups, 2)) {
631     state_ = STATE_ERROR;
632     return true;  // See (*) for why true.
633   }
634   *name = base::StringPiece(groups[1].data(), groups[1].size());
635 
636   if (value_pattern().Match(header,
637                             kContentDispositionLength, header.size(),
638                             RE2::UNANCHORED, groups, 2)) {
639     *value = base::StringPiece(groups[1].data(), groups[1].size());
640     *value_assigned = true;
641   }
642   return true;
643 }
644 
645 }  // namespace extensions
646