1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "BodyUtil.h"
8 
9 #include "nsError.h"
10 #include "nsString.h"
11 #include "nsIGlobalObject.h"
12 #include "mozilla/Encoding.h"
13 
14 #include "nsCRT.h"
15 #include "nsCharSeparatedTokenizer.h"
16 #include "nsDOMString.h"
17 #include "nsNetUtil.h"
18 #include "nsReadableUtils.h"
19 #include "nsStreamUtils.h"
20 #include "nsStringStream.h"
21 #include "nsURLHelper.h"
22 
23 #include "js/ArrayBuffer.h"  // JS::NewArrayBufferWithContents
24 #include "js/JSON.h"
25 #include "mozilla/ErrorResult.h"
26 #include "mozilla/dom/Exceptions.h"
27 #include "mozilla/dom/FetchUtil.h"
28 #include "mozilla/dom/File.h"
29 #include "mozilla/dom/FormData.h"
30 #include "mozilla/dom/Headers.h"
31 #include "mozilla/dom/Promise.h"
32 
33 namespace mozilla::dom {
34 
35 namespace {
36 
37 // Reads over a CRLF and positions start after it.
PushOverLine(nsACString::const_iterator & aStart,const nsACString::const_iterator & aEnd)38 static bool PushOverLine(nsACString::const_iterator& aStart,
39                          const nsACString::const_iterator& aEnd) {
40   if (*aStart == nsCRT::CR && (aEnd - aStart > 1) && *(++aStart) == nsCRT::LF) {
41     ++aStart;  // advance to after CRLF
42     return true;
43   }
44 
45   return false;
46 }
47 
48 /**
49  * A simple multipart/form-data parser as defined in RFC 2388 and RFC 2046.
50  * This does not respect any encoding specified per entry, using UTF-8
51  * throughout. This is as the Fetch spec states in the consume body algorithm.
52  * Borrows some things from Necko's nsMultiMixedConv, but is simpler since
53  * unlike Necko we do not have to deal with receiving incomplete chunks of data.
54  *
55  * This parser will fail the entire parse on any invalid entry, so it will
56  * never return a partially filled FormData.
57  * The content-disposition header is used to figure out the name and filename
58  * entries. The inclusion of the filename parameter decides if the entry is
59  * inserted into the FormData as a string or a File.
60  *
61  * File blobs are copies of the underlying data string since we cannot adopt
62  * char* chunks embedded within the larger body without significant effort.
63  * FIXME(nsm): Bug 1127552 - We should add telemetry to calls to formData() and
64  * friends to figure out if Fetch ends up copying big blobs to see if this is
65  * worth optimizing.
66  */
67 class MOZ_STACK_CLASS FormDataParser {
68  private:
69   RefPtr<FormData> mFormData;
70   nsCString mMimeType;
71   nsCString mData;
72 
73   // Entry state, reset in START_PART.
74   nsCString mName;
75   nsCString mFilename;
76   nsCString mContentType;
77 
78   enum {
79     START_PART,
80     PARSE_HEADER,
81     PARSE_BODY,
82   } mState;
83 
84   nsIGlobalObject* mParentObject;
85 
86   // Reads over a boundary and sets start to the position after the end of the
87   // boundary. Returns false if no boundary is found immediately.
PushOverBoundary(const nsACString & aBoundaryString,nsACString::const_iterator & aStart,nsACString::const_iterator & aEnd)88   bool PushOverBoundary(const nsACString& aBoundaryString,
89                         nsACString::const_iterator& aStart,
90                         nsACString::const_iterator& aEnd) {
91     // We copy the end iterator to keep the original pointing to the real end
92     // of the string.
93     nsACString::const_iterator end(aEnd);
94     const char* beginning = aStart.get();
95     if (FindInReadable(aBoundaryString, aStart, end)) {
96       // We either should find the body immediately, or after 2 chars with the
97       // 2 chars being '-', everything else is failure.
98       if ((aStart.get() - beginning) == 0) {
99         aStart.advance(aBoundaryString.Length());
100         return true;
101       }
102 
103       if ((aStart.get() - beginning) == 2) {
104         if (*(--aStart) == '-' && *(--aStart) == '-') {
105           aStart.advance(aBoundaryString.Length() + 2);
106           return true;
107         }
108       }
109     }
110 
111     return false;
112   }
113 
ParseHeader(nsACString::const_iterator & aStart,nsACString::const_iterator & aEnd,bool * aWasEmptyHeader)114   bool ParseHeader(nsACString::const_iterator& aStart,
115                    nsACString::const_iterator& aEnd, bool* aWasEmptyHeader) {
116     nsAutoCString headerName, headerValue;
117     if (!FetchUtil::ExtractHeader(aStart, aEnd, headerName, headerValue,
118                                   aWasEmptyHeader)) {
119       return false;
120     }
121     if (*aWasEmptyHeader) {
122       return true;
123     }
124 
125     if (headerName.LowerCaseEqualsLiteral("content-disposition")) {
126       bool seenFormData = false;
127       for (const nsACString& token :
128            nsCCharSeparatedTokenizer(headerValue, ';').ToRange()) {
129         if (token.IsEmpty()) {
130           continue;
131         }
132 
133         if (token.EqualsLiteral("form-data")) {
134           seenFormData = true;
135           continue;
136         }
137 
138         if (seenFormData && StringBeginsWith(token, "name="_ns)) {
139           mName = StringTail(token, token.Length() - 5);
140           mName.Trim(" \"");
141           continue;
142         }
143 
144         if (seenFormData && StringBeginsWith(token, "filename="_ns)) {
145           mFilename = StringTail(token, token.Length() - 9);
146           mFilename.Trim(" \"");
147           continue;
148         }
149       }
150 
151       if (mName.IsVoid()) {
152         // Could not parse a valid entry name.
153         return false;
154       }
155     } else if (headerName.LowerCaseEqualsLiteral("content-type")) {
156       mContentType = headerValue;
157     }
158 
159     return true;
160   }
161 
162   // The end of a body is marked by a CRLF followed by the boundary. So the
163   // CRLF is part of the boundary and not the body, but any prior CRLFs are
164   // part of the body. This will position the iterator at the beginning of the
165   // boundary (after the CRLF).
ParseBody(const nsACString & aBoundaryString,nsACString::const_iterator & aStart,nsACString::const_iterator & aEnd)166   bool ParseBody(const nsACString& aBoundaryString,
167                  nsACString::const_iterator& aStart,
168                  nsACString::const_iterator& aEnd) {
169     const char* beginning = aStart.get();
170 
171     // Find the boundary marking the end of the body.
172     nsACString::const_iterator end(aEnd);
173     if (!FindInReadable(aBoundaryString, aStart, end)) {
174       return false;
175     }
176 
177     // We found a boundary, strip the just prior CRLF, and consider
178     // everything else the body section.
179     if (aStart.get() - beginning < 2) {
180       // Only the first entry can have a boundary right at the beginning. Even
181       // an empty body will have a CRLF before the boundary. So this is
182       // a failure.
183       return false;
184     }
185 
186     // Check that there is a CRLF right before the boundary.
187     aStart.advance(-2);
188 
189     // Skip optional hyphens.
190     if (*aStart == '-' && *(aStart.get() + 1) == '-') {
191       if (aStart.get() - beginning < 2) {
192         return false;
193       }
194 
195       aStart.advance(-2);
196     }
197 
198     if (*aStart != nsCRT::CR || *(aStart.get() + 1) != nsCRT::LF) {
199       return false;
200     }
201 
202     nsAutoCString body(beginning, aStart.get() - beginning);
203 
204     // Restore iterator to after the \r\n as we promised.
205     // We do not need to handle the extra hyphens case since our boundary
206     // parser in PushOverBoundary()
207     aStart.advance(2);
208 
209     if (!mFormData) {
210       mFormData = new FormData();
211     }
212 
213     NS_ConvertUTF8toUTF16 name(mName);
214 
215     if (mFilename.IsVoid()) {
216       ErrorResult rv;
217       mFormData->Append(name, NS_ConvertUTF8toUTF16(body), rv);
218       MOZ_ASSERT(!rv.Failed());
219     } else {
220       // Unfortunately we've to copy the data first since all our strings are
221       // going to free it. We also need fallible alloc, so we can't just use
222       // ToNewCString().
223       char* copy = static_cast<char*>(moz_xmalloc(body.Length()));
224       nsCString::const_iterator bodyIter, bodyEnd;
225       body.BeginReading(bodyIter);
226       body.EndReading(bodyEnd);
227       char* p = copy;
228       while (bodyIter != bodyEnd) {
229         *p++ = *bodyIter++;
230       }
231       p = nullptr;
232 
233       RefPtr<Blob> file = File::CreateMemoryFileWithCustomLastModified(
234           mParentObject, reinterpret_cast<void*>(copy), body.Length(),
235           NS_ConvertUTF8toUTF16(mFilename), NS_ConvertUTF8toUTF16(mContentType),
236           /* aLastModifiedDate */ 0);
237       if (NS_WARN_IF(!file)) {
238         return false;
239       }
240 
241       Optional<nsAString> dummy;
242       ErrorResult rv;
243       mFormData->Append(name, *file, dummy, rv);
244       if (NS_WARN_IF(rv.Failed())) {
245         rv.SuppressException();
246         return false;
247       }
248     }
249 
250     return true;
251   }
252 
253  public:
FormDataParser(const nsACString & aMimeType,const nsACString & aData,nsIGlobalObject * aParent)254   FormDataParser(const nsACString& aMimeType, const nsACString& aData,
255                  nsIGlobalObject* aParent)
256       : mMimeType(aMimeType),
257         mData(aData),
258         mState(START_PART),
259         mParentObject(aParent) {}
260 
Parse()261   bool Parse() {
262     if (mData.IsEmpty()) {
263       return false;
264     }
265 
266     // Determine boundary from mimetype.
267     const char* boundaryId = nullptr;
268     boundaryId = strstr(mMimeType.BeginWriting(), "boundary");
269     if (!boundaryId) {
270       return false;
271     }
272 
273     boundaryId = strchr(boundaryId, '=');
274     if (!boundaryId) {
275       return false;
276     }
277 
278     // Skip over '='.
279     boundaryId++;
280 
281     char* attrib = (char*)strchr(boundaryId, ';');
282     if (attrib) *attrib = '\0';
283 
284     nsAutoCString boundaryString(boundaryId);
285     if (attrib) *attrib = ';';
286 
287     boundaryString.Trim(" \"");
288 
289     if (boundaryString.Length() == 0) {
290       return false;
291     }
292 
293     nsACString::const_iterator start, end;
294     mData.BeginReading(start);
295     // This should ALWAYS point to the end of data.
296     // Helpers make copies.
297     mData.EndReading(end);
298 
299     while (start != end) {
300       switch (mState) {
301         case START_PART:
302           mName.SetIsVoid(true);
303           mFilename.SetIsVoid(true);
304           mContentType = "text/plain"_ns;
305 
306           // MUST start with boundary.
307           if (!PushOverBoundary(boundaryString, start, end)) {
308             return false;
309           }
310 
311           if (start != end && *start == '-') {
312             // End of data.
313             if (!mFormData) {
314               mFormData = new FormData();
315             }
316             return true;
317           }
318 
319           if (!PushOverLine(start, end)) {
320             return false;
321           }
322           mState = PARSE_HEADER;
323           break;
324 
325         case PARSE_HEADER:
326           bool emptyHeader;
327           if (!ParseHeader(start, end, &emptyHeader)) {
328             return false;
329           }
330 
331           if (emptyHeader && !PushOverLine(start, end)) {
332             return false;
333           }
334 
335           mState = emptyHeader ? PARSE_BODY : PARSE_HEADER;
336           break;
337 
338         case PARSE_BODY:
339           if (mName.IsVoid()) {
340             NS_WARNING(
341                 "No content-disposition header with a valid name was "
342                 "found. Failing at body parse.");
343             return false;
344           }
345 
346           if (!ParseBody(boundaryString, start, end)) {
347             return false;
348           }
349 
350           mState = START_PART;
351           break;
352 
353         default:
354           MOZ_CRASH("Invalid case");
355       }
356     }
357 
358     MOZ_ASSERT_UNREACHABLE("Should never reach here.");
359     return false;
360   }
361 
GetFormData()362   already_AddRefed<FormData> GetFormData() { return mFormData.forget(); }
363 };
364 }  // namespace
365 
366 // static
ConsumeArrayBuffer(JSContext * aCx,JS::MutableHandle<JSObject * > aValue,uint32_t aInputLength,uint8_t * aInput,ErrorResult & aRv)367 void BodyUtil::ConsumeArrayBuffer(JSContext* aCx,
368                                   JS::MutableHandle<JSObject*> aValue,
369                                   uint32_t aInputLength, uint8_t* aInput,
370                                   ErrorResult& aRv) {
371   JS::Rooted<JSObject*> arrayBuffer(aCx);
372   arrayBuffer = JS::NewArrayBufferWithContents(aCx, aInputLength,
373                                                reinterpret_cast<void*>(aInput));
374   if (!arrayBuffer) {
375     JS_ClearPendingException(aCx);
376     aRv.Throw(NS_ERROR_OUT_OF_MEMORY);
377     return;
378   }
379   aValue.set(arrayBuffer);
380 }
381 
382 // static
ConsumeBlob(nsIGlobalObject * aParent,const nsString & aMimeType,uint32_t aInputLength,uint8_t * aInput,ErrorResult & aRv)383 already_AddRefed<Blob> BodyUtil::ConsumeBlob(nsIGlobalObject* aParent,
384                                              const nsString& aMimeType,
385                                              uint32_t aInputLength,
386                                              uint8_t* aInput,
387                                              ErrorResult& aRv) {
388   RefPtr<Blob> blob = Blob::CreateMemoryBlob(
389       aParent, reinterpret_cast<void*>(aInput), aInputLength, aMimeType);
390 
391   if (!blob) {
392     aRv.Throw(NS_ERROR_DOM_UNKNOWN_ERR);
393     return nullptr;
394   }
395   return blob.forget();
396 }
397 
398 // static
ConsumeFormData(nsIGlobalObject * aParent,const nsCString & aMimeType,const nsCString & aStr,ErrorResult & aRv)399 already_AddRefed<FormData> BodyUtil::ConsumeFormData(nsIGlobalObject* aParent,
400                                                      const nsCString& aMimeType,
401                                                      const nsCString& aStr,
402                                                      ErrorResult& aRv) {
403   constexpr auto formDataMimeType = "multipart/form-data"_ns;
404 
405   // Allow semicolon separated boundary/encoding suffix like
406   // multipart/form-data; boundary= but disallow multipart/form-datafoobar.
407   bool isValidFormDataMimeType = StringBeginsWith(aMimeType, formDataMimeType);
408 
409   if (isValidFormDataMimeType &&
410       aMimeType.Length() > formDataMimeType.Length()) {
411     isValidFormDataMimeType = aMimeType[formDataMimeType.Length()] == ';';
412   }
413 
414   if (isValidFormDataMimeType) {
415     FormDataParser parser(aMimeType, aStr, aParent);
416     if (!parser.Parse()) {
417       aRv.ThrowTypeError<MSG_BAD_FORMDATA>();
418       return nullptr;
419     }
420 
421     RefPtr<FormData> fd = parser.GetFormData();
422     MOZ_ASSERT(fd);
423     return fd.forget();
424   }
425 
426   constexpr auto urlDataMimeType = "application/x-www-form-urlencoded"_ns;
427   bool isValidUrlEncodedMimeType = StringBeginsWith(aMimeType, urlDataMimeType);
428 
429   if (isValidUrlEncodedMimeType &&
430       aMimeType.Length() > urlDataMimeType.Length()) {
431     isValidUrlEncodedMimeType = aMimeType[urlDataMimeType.Length()] == ';';
432   }
433 
434   if (isValidUrlEncodedMimeType) {
435     RefPtr<FormData> fd = new FormData(aParent);
436     DebugOnly<bool> status = URLParams::Parse(
437         aStr, [&fd](const nsAString& aName, const nsAString& aValue) {
438           ErrorResult rv;
439           fd->Append(aName, aValue, rv);
440           MOZ_ASSERT(!rv.Failed());
441           return true;
442         });
443     MOZ_ASSERT(status);
444 
445     return fd.forget();
446   }
447 
448   aRv.ThrowTypeError<MSG_BAD_FORMDATA>();
449   return nullptr;
450 }
451 
452 // static
ConsumeText(uint32_t aInputLength,uint8_t * aInput,nsString & aText)453 nsresult BodyUtil::ConsumeText(uint32_t aInputLength, uint8_t* aInput,
454                                nsString& aText) {
455   nsresult rv =
456       UTF_8_ENCODING->DecodeWithBOMRemoval(Span(aInput, aInputLength), aText);
457   if (NS_FAILED(rv)) {
458     return rv;
459   }
460   return NS_OK;
461 }
462 
463 // static
ConsumeJson(JSContext * aCx,JS::MutableHandle<JS::Value> aValue,const nsString & aStr,ErrorResult & aRv)464 void BodyUtil::ConsumeJson(JSContext* aCx, JS::MutableHandle<JS::Value> aValue,
465                            const nsString& aStr, ErrorResult& aRv) {
466   aRv.MightThrowJSException();
467 
468   JS::Rooted<JS::Value> json(aCx);
469   if (!JS_ParseJSON(aCx, aStr.get(), aStr.Length(), &json)) {
470     if (!JS_IsExceptionPending(aCx)) {
471       aRv.Throw(NS_ERROR_DOM_UNKNOWN_ERR);
472       return;
473     }
474 
475     JS::Rooted<JS::Value> exn(aCx);
476     DebugOnly<bool> gotException = JS_GetPendingException(aCx, &exn);
477     MOZ_ASSERT(gotException);
478 
479     JS_ClearPendingException(aCx);
480     aRv.ThrowJSException(aCx, exn);
481     return;
482   }
483 
484   aValue.set(json);
485 }
486 
487 }  // namespace mozilla::dom
488