1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4//
5
6/*
7Package multipart implements MIME multipart parsing, as defined in RFC
82046.
9
10The implementation is sufficient for HTTP (RFC 2388) and the multipart
11bodies generated by popular browsers.
12*/
13package multipart
14
15import (
16	"bufio"
17	"bytes"
18	"fmt"
19	"io"
20	"io/ioutil"
21	"mime"
22	"mime/quotedprintable"
23	"net/textproto"
24	"strings"
25)
26
27var emptyParams = make(map[string]string)
28
29// This constant needs to be at least 76 for this package to work correctly.
30// This is because \r\n--separator_of_len_70- would fill the buffer and it
31// wouldn't be safe to consume a single byte from it.
32const peekBufferSize = 4096
33
34// A Part represents a single part in a multipart body.
35type Part struct {
36	// The headers of the body, if any, with the keys canonicalized
37	// in the same fashion that the Go http.Request headers are.
38	// For example, "foo-bar" changes case to "Foo-Bar"
39	//
40	// As a special case, if the "Content-Transfer-Encoding" header
41	// has a value of "quoted-printable", that header is instead
42	// hidden from this map and the body is transparently decoded
43	// during Read calls.
44	Header textproto.MIMEHeader
45
46	mr *Reader
47
48	disposition       string
49	dispositionParams map[string]string
50
51	// r is either a reader directly reading from mr, or it's a
52	// wrapper around such a reader, decoding the
53	// Content-Transfer-Encoding
54	r io.Reader
55
56	n       int   // known data bytes waiting in mr.bufReader
57	total   int64 // total data bytes read already
58	err     error // error to return when n == 0
59	readErr error // read error observed from mr.bufReader
60}
61
62// FormName returns the name parameter if p has a Content-Disposition
63// of type "form-data".  Otherwise it returns the empty string.
64func (p *Part) FormName() string {
65	// See https://tools.ietf.org/html/rfc2183 section 2 for EBNF
66	// of Content-Disposition value format.
67	if p.dispositionParams == nil {
68		p.parseContentDisposition()
69	}
70	if p.disposition != "form-data" {
71		return ""
72	}
73	return p.dispositionParams["name"]
74}
75
76// FileName returns the filename parameter of the Part's
77// Content-Disposition header.
78func (p *Part) FileName() string {
79	if p.dispositionParams == nil {
80		p.parseContentDisposition()
81	}
82	return p.dispositionParams["filename"]
83}
84
85func (p *Part) parseContentDisposition() {
86	v := p.Header.Get("Content-Disposition")
87	var err error
88	p.disposition, p.dispositionParams, err = mime.ParseMediaType(v)
89	if err != nil {
90		p.dispositionParams = emptyParams
91	}
92}
93
94// NewReader creates a new multipart Reader reading from r using the
95// given MIME boundary.
96//
97// The boundary is usually obtained from the "boundary" parameter of
98// the message's "Content-Type" header. Use mime.ParseMediaType to
99// parse such headers.
100func NewReader(r io.Reader, boundary string) *Reader {
101	b := []byte("\r\n--" + boundary + "--")
102	return &Reader{
103		bufReader:        bufio.NewReaderSize(&stickyErrorReader{r: r}, peekBufferSize),
104		nl:               b[:2],
105		nlDashBoundary:   b[:len(b)-2],
106		dashBoundaryDash: b[2:],
107		dashBoundary:     b[2 : len(b)-2],
108	}
109}
110
111// stickyErrorReader is an io.Reader which never calls Read on its
112// underlying Reader once an error has been seen. (the io.Reader
113// interface's contract promises nothing about the return values of
114// Read calls after an error, yet this package does do multiple Reads
115// after error)
116type stickyErrorReader struct {
117	r   io.Reader
118	err error
119}
120
121func (r *stickyErrorReader) Read(p []byte) (n int, _ error) {
122	if r.err != nil {
123		return 0, r.err
124	}
125	n, r.err = r.r.Read(p)
126	return n, r.err
127}
128
129func newPart(mr *Reader) (*Part, error) {
130	bp := &Part{
131		Header: make(map[string][]string),
132		mr:     mr,
133	}
134	if err := bp.populateHeaders(); err != nil {
135		return nil, err
136	}
137	bp.r = partReader{bp}
138	const cte = "Content-Transfer-Encoding"
139	if strings.EqualFold(bp.Header.Get(cte), "quoted-printable") {
140		bp.Header.Del(cte)
141		bp.r = quotedprintable.NewReader(bp.r)
142	}
143	return bp, nil
144}
145
146func (bp *Part) populateHeaders() error {
147	r := textproto.NewReader(bp.mr.bufReader)
148	header, err := r.ReadMIMEHeader()
149	if err == nil {
150		bp.Header = header
151	}
152	return err
153}
154
155// Read reads the body of a part, after its headers and before the
156// next part (if any) begins.
157func (p *Part) Read(d []byte) (n int, err error) {
158	return p.r.Read(d)
159}
160
161// partReader implements io.Reader by reading raw bytes directly from the
162// wrapped *Part, without doing any Transfer-Encoding decoding.
163type partReader struct {
164	p *Part
165}
166
167func (pr partReader) Read(d []byte) (int, error) {
168	p := pr.p
169	br := p.mr.bufReader
170
171	// Read into buffer until we identify some data to return,
172	// or we find a reason to stop (boundary or read error).
173	for p.n == 0 && p.err == nil {
174		peek, _ := br.Peek(br.Buffered())
175		p.n, p.err = scanUntilBoundary(peek, p.mr.dashBoundary, p.mr.nlDashBoundary, p.total, p.readErr)
176		if p.n == 0 && p.err == nil {
177			// Force buffered I/O to read more into buffer.
178			_, p.readErr = br.Peek(len(peek) + 1)
179			if p.readErr == io.EOF {
180				p.readErr = io.ErrUnexpectedEOF
181			}
182		}
183	}
184
185	// Read out from "data to return" part of buffer.
186	if p.n == 0 {
187		return 0, p.err
188	}
189	n := len(d)
190	if n > p.n {
191		n = p.n
192	}
193	n, _ = br.Read(d[:n])
194	p.total += int64(n)
195	p.n -= n
196	if p.n == 0 {
197		return n, p.err
198	}
199	return n, nil
200}
201
202// scanUntilBoundary scans buf to identify how much of it can be safely
203// returned as part of the Part body.
204// dashBoundary is "--boundary".
205// nlDashBoundary is "\r\n--boundary" or "\n--boundary", depending on what mode we are in.
206// The comments below (and the name) assume "\n--boundary", but either is accepted.
207// total is the number of bytes read out so far. If total == 0, then a leading "--boundary" is recognized.
208// readErr is the read error, if any, that followed reading the bytes in buf.
209// scanUntilBoundary returns the number of data bytes from buf that can be
210// returned as part of the Part body and also the error to return (if any)
211// once those data bytes are done.
212func scanUntilBoundary(buf, dashBoundary, nlDashBoundary []byte, total int64, readErr error) (int, error) {
213	if total == 0 {
214		// At beginning of body, allow dashBoundary.
215		if bytes.HasPrefix(buf, dashBoundary) {
216			switch matchAfterPrefix(buf, dashBoundary, readErr) {
217			case -1:
218				return len(dashBoundary), nil
219			case 0:
220				return 0, nil
221			case +1:
222				return 0, io.EOF
223			}
224		}
225		if bytes.HasPrefix(dashBoundary, buf) {
226			return 0, readErr
227		}
228	}
229
230	// Search for "\n--boundary".
231	if i := bytes.Index(buf, nlDashBoundary); i >= 0 {
232		switch matchAfterPrefix(buf[i:], nlDashBoundary, readErr) {
233		case -1:
234			return i + len(nlDashBoundary), nil
235		case 0:
236			return i, nil
237		case +1:
238			return i, io.EOF
239		}
240	}
241	if bytes.HasPrefix(nlDashBoundary, buf) {
242		return 0, readErr
243	}
244
245	// Otherwise, anything up to the final \n is not part of the boundary
246	// and so must be part of the body.
247	// Also if the section from the final \n onward is not a prefix of the boundary,
248	// it too must be part of the body.
249	i := bytes.LastIndexByte(buf, nlDashBoundary[0])
250	if i >= 0 && bytes.HasPrefix(nlDashBoundary, buf[i:]) {
251		return i, nil
252	}
253	return len(buf), readErr
254}
255
256// matchAfterPrefix checks whether buf should be considered to match the boundary.
257// The prefix is "--boundary" or "\r\n--boundary" or "\n--boundary",
258// and the caller has verified already that bytes.HasPrefix(buf, prefix) is true.
259//
260// matchAfterPrefix returns +1 if the buffer does match the boundary,
261// meaning the prefix is followed by a dash, space, tab, cr, nl, or end of input.
262// It returns -1 if the buffer definitely does NOT match the boundary,
263// meaning the prefix is followed by some other character.
264// For example, "--foobar" does not match "--foo".
265// It returns 0 more input needs to be read to make the decision,
266// meaning that len(buf) == len(prefix) and readErr == nil.
267func matchAfterPrefix(buf, prefix []byte, readErr error) int {
268	if len(buf) == len(prefix) {
269		if readErr != nil {
270			return +1
271		}
272		return 0
273	}
274	c := buf[len(prefix)]
275	if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '-' {
276		return +1
277	}
278	return -1
279}
280
281func (p *Part) Close() error {
282	io.Copy(ioutil.Discard, p)
283	return nil
284}
285
286// Reader is an iterator over parts in a MIME multipart body.
287// Reader's underlying parser consumes its input as needed. Seeking
288// isn't supported.
289type Reader struct {
290	bufReader *bufio.Reader
291
292	currentPart *Part
293	partsRead   int
294
295	nl               []byte // "\r\n" or "\n" (set after seeing first boundary line)
296	nlDashBoundary   []byte // nl + "--boundary"
297	dashBoundaryDash []byte // "--boundary--"
298	dashBoundary     []byte // "--boundary"
299}
300
301// NextPart returns the next part in the multipart or an error.
302// When there are no more parts, the error io.EOF is returned.
303func (r *Reader) NextPart() (*Part, error) {
304	if r.currentPart != nil {
305		r.currentPart.Close()
306	}
307	if string(r.dashBoundary) == "--" {
308		return nil, fmt.Errorf("multipart: boundary is empty")
309	}
310	expectNewPart := false
311	for {
312		line, err := r.bufReader.ReadSlice('\n')
313
314		if err == io.EOF && r.isFinalBoundary(line) {
315			// If the buffer ends in "--boundary--" without the
316			// trailing "\r\n", ReadSlice will return an error
317			// (since it's missing the '\n'), but this is a valid
318			// multipart EOF so we need to return io.EOF instead of
319			// a fmt-wrapped one.
320			return nil, io.EOF
321		}
322		if err != nil {
323			return nil, fmt.Errorf("multipart: NextPart: %v", err)
324		}
325
326		if r.isBoundaryDelimiterLine(line) {
327			r.partsRead++
328			bp, err := newPart(r)
329			if err != nil {
330				return nil, err
331			}
332			r.currentPart = bp
333			return bp, nil
334		}
335
336		if r.isFinalBoundary(line) {
337			// Expected EOF
338			return nil, io.EOF
339		}
340
341		if expectNewPart {
342			return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line))
343		}
344
345		if r.partsRead == 0 {
346			// skip line
347			continue
348		}
349
350		// Consume the "\n" or "\r\n" separator between the
351		// body of the previous part and the boundary line we
352		// now expect will follow. (either a new part or the
353		// end boundary)
354		if bytes.Equal(line, r.nl) {
355			expectNewPart = true
356			continue
357		}
358
359		return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line)
360	}
361}
362
363// isFinalBoundary reports whether line is the final boundary line
364// indicating that all parts are over.
365// It matches `^--boundary--[ \t]*(\r\n)?$`
366func (mr *Reader) isFinalBoundary(line []byte) bool {
367	if !bytes.HasPrefix(line, mr.dashBoundaryDash) {
368		return false
369	}
370	rest := line[len(mr.dashBoundaryDash):]
371	rest = skipLWSPChar(rest)
372	return len(rest) == 0 || bytes.Equal(rest, mr.nl)
373}
374
375func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) {
376	// https://tools.ietf.org/html/rfc2046#section-5.1
377	//   The boundary delimiter line is then defined as a line
378	//   consisting entirely of two hyphen characters ("-",
379	//   decimal value 45) followed by the boundary parameter
380	//   value from the Content-Type header field, optional linear
381	//   whitespace, and a terminating CRLF.
382	if !bytes.HasPrefix(line, mr.dashBoundary) {
383		return false
384	}
385	rest := line[len(mr.dashBoundary):]
386	rest = skipLWSPChar(rest)
387
388	// On the first part, see our lines are ending in \n instead of \r\n
389	// and switch into that mode if so. This is a violation of the spec,
390	// but occurs in practice.
391	if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' {
392		mr.nl = mr.nl[1:]
393		mr.nlDashBoundary = mr.nlDashBoundary[1:]
394	}
395	return bytes.Equal(rest, mr.nl)
396}
397
398// skipLWSPChar returns b with leading spaces and tabs removed.
399// RFC 822 defines:
400//    LWSP-char = SPACE / HTAB
401func skipLWSPChar(b []byte) []byte {
402	for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') {
403		b = b[1:]
404	}
405	return b
406}
407