1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
7Package multipart implements MIME multipart parsing, as defined in RFC
10The implementation is sufficient for HTTP (RFC 2388) and the multipart
11bodies generated by popular browsers.
13package multipart
15import (
16	"bufio"
17	"bytes"
18	"fmt"
19	"io"
20	"mime"
21	"mime/quotedprintable"
22	"net/textproto"
23	"path/filepath"
24	"strings"
27var emptyParams = make(map[string]string)
29// This constant needs to be at least 76 for this package to work correctly.
30// This is because \r\n--separator_of_len_70- would fill the buffer and it
31// wouldn't be safe to consume a single byte from it.
32const peekBufferSize = 4096
34// A Part represents a single part in a multipart body.
35type Part struct {
36	// The headers of the body, if any, with the keys canonicalized
37	// in the same fashion that the Go http.Request headers are.
38	// For example, "foo-bar" changes case to "Foo-Bar"
39	Header textproto.MIMEHeader
41	mr *Reader
43	disposition       string
44	dispositionParams map[string]string
46	// r is either a reader directly reading from mr, or it's a
47	// wrapper around such a reader, decoding the
48	// Content-Transfer-Encoding
49	r io.Reader
51	n       int   // known data bytes waiting in mr.bufReader
52	total   int64 // total data bytes read already
53	err     error // error to return when n == 0
54	readErr error // read error observed from mr.bufReader
57// FormName returns the name parameter if p has a Content-Disposition
58// of type "form-data".  Otherwise it returns the empty string.
59func (p *Part) FormName() string {
60	// See https://tools.ietf.org/html/rfc2183 section 2 for EBNF
61	// of Content-Disposition value format.
62	if p.dispositionParams == nil {
63		p.parseContentDisposition()
64	}
65	if p.disposition != "form-data" {
66		return ""
67	}
68	return p.dispositionParams["name"]
71// FileName returns the filename parameter of the Part's Content-Disposition
72// header. If not empty, the filename is passed through filepath.Base (which is
73// platform dependent) before being returned.
74func (p *Part) FileName() string {
75	if p.dispositionParams == nil {
76		p.parseContentDisposition()
77	}
78	filename := p.dispositionParams["filename"]
79	if filename == "" {
80		return ""
81	}
82	// RFC 7578, Section 4.2 requires that if a filename is provided, the
83	// directory path information must not be used.
84	return filepath.Base(filename)
87func (p *Part) parseContentDisposition() {
88	v := p.Header.Get("Content-Disposition")
89	var err error
90	p.disposition, p.dispositionParams, err = mime.ParseMediaType(v)
91	if err != nil {
92		p.dispositionParams = emptyParams
93	}
96// NewReader creates a new multipart Reader reading from r using the
97// given MIME boundary.
99// The boundary is usually obtained from the "boundary" parameter of
100// the message's "Content-Type" header. Use mime.ParseMediaType to
101// parse such headers.
102func NewReader(r io.Reader, boundary string) *Reader {
103	b := []byte("\r\n--" + boundary + "--")
104	return &Reader{
105		bufReader:        bufio.NewReaderSize(&stickyErrorReader{r: r}, peekBufferSize),
106		nl:               b[:2],
107		nlDashBoundary:   b[:len(b)-2],
108		dashBoundaryDash: b[2:],
109		dashBoundary:     b[2 : len(b)-2],
110	}
113// stickyErrorReader is an io.Reader which never calls Read on its
114// underlying Reader once an error has been seen. (the io.Reader
115// interface's contract promises nothing about the return values of
116// Read calls after an error, yet this package does do multiple Reads
117// after error)
118type stickyErrorReader struct {
119	r   io.Reader
120	err error
123func (r *stickyErrorReader) Read(p []byte) (n int, _ error) {
124	if r.err != nil {
125		return 0, r.err
126	}
127	n, r.err = r.r.Read(p)
128	return n, r.err
131func newPart(mr *Reader, rawPart bool) (*Part, error) {
132	bp := &Part{
133		Header: make(map[string][]string),
134		mr:     mr,
135	}
136	if err := bp.populateHeaders(); err != nil {
137		return nil, err
138	}
139	bp.r = partReader{bp}
141	// rawPart is used to switch between Part.NextPart and Part.NextRawPart.
142	if !rawPart {
143		const cte = "Content-Transfer-Encoding"
144		if strings.EqualFold(bp.Header.Get(cte), "quoted-printable") {
145			bp.Header.Del(cte)
146			bp.r = quotedprintable.NewReader(bp.r)
147		}
148	}
149	return bp, nil
152func (bp *Part) populateHeaders() error {
153	r := textproto.NewReader(bp.mr.bufReader)
154	header, err := r.ReadMIMEHeader()
155	if err == nil {
156		bp.Header = header
157	}
158	return err
161// Read reads the body of a part, after its headers and before the
162// next part (if any) begins.
163func (p *Part) Read(d []byte) (n int, err error) {
164	return p.r.Read(d)
167// partReader implements io.Reader by reading raw bytes directly from the
168// wrapped *Part, without doing any Transfer-Encoding decoding.
169type partReader struct {
170	p *Part
173func (pr partReader) Read(d []byte) (int, error) {
174	p := pr.p
175	br := p.mr.bufReader
177	// Read into buffer until we identify some data to return,
178	// or we find a reason to stop (boundary or read error).
179	for p.n == 0 && p.err == nil {
180		peek, _ := br.Peek(br.Buffered())
181		p.n, p.err = scanUntilBoundary(peek, p.mr.dashBoundary, p.mr.nlDashBoundary, p.total, p.readErr)
182		if p.n == 0 && p.err == nil {
183			// Force buffered I/O to read more into buffer.
184			_, p.readErr = br.Peek(len(peek) + 1)
185			if p.readErr == io.EOF {
186				p.readErr = io.ErrUnexpectedEOF
187			}
188		}
189	}
191	// Read out from "data to return" part of buffer.
192	if p.n == 0 {
193		return 0, p.err
194	}
195	n := len(d)
196	if n > p.n {
197		n = p.n
198	}
199	n, _ = br.Read(d[:n])
200	p.total += int64(n)
201	p.n -= n
202	if p.n == 0 {
203		return n, p.err
204	}
205	return n, nil
208// scanUntilBoundary scans buf to identify how much of it can be safely
209// returned as part of the Part body.
210// dashBoundary is "--boundary".
211// nlDashBoundary is "\r\n--boundary" or "\n--boundary", depending on what mode we are in.
212// The comments below (and the name) assume "\n--boundary", but either is accepted.
213// total is the number of bytes read out so far. If total == 0, then a leading "--boundary" is recognized.
214// readErr is the read error, if any, that followed reading the bytes in buf.
215// scanUntilBoundary returns the number of data bytes from buf that can be
216// returned as part of the Part body and also the error to return (if any)
217// once those data bytes are done.
218func scanUntilBoundary(buf, dashBoundary, nlDashBoundary []byte, total int64, readErr error) (int, error) {
219	if total == 0 {
220		// At beginning of body, allow dashBoundary.
221		if bytes.HasPrefix(buf, dashBoundary) {
222			switch matchAfterPrefix(buf, dashBoundary, readErr) {
223			case -1:
224				return len(dashBoundary), nil
225			case 0:
226				return 0, nil
227			case +1:
228				return 0, io.EOF
229			}
230		}
231		if bytes.HasPrefix(dashBoundary, buf) {
232			return 0, readErr
233		}
234	}
236	// Search for "\n--boundary".
237	if i := bytes.Index(buf, nlDashBoundary); i >= 0 {
238		switch matchAfterPrefix(buf[i:], nlDashBoundary, readErr) {
239		case -1:
240			return i + len(nlDashBoundary), nil
241		case 0:
242			return i, nil
243		case +1:
244			return i, io.EOF
245		}
246	}
247	if bytes.HasPrefix(nlDashBoundary, buf) {
248		return 0, readErr
249	}
251	// Otherwise, anything up to the final \n is not part of the boundary
252	// and so must be part of the body.
253	// Also if the section from the final \n onward is not a prefix of the boundary,
254	// it too must be part of the body.
255	i := bytes.LastIndexByte(buf, nlDashBoundary[0])
256	if i >= 0 && bytes.HasPrefix(nlDashBoundary, buf[i:]) {
257		return i, nil
258	}
259	return len(buf), readErr
262// matchAfterPrefix checks whether buf should be considered to match the boundary.
263// The prefix is "--boundary" or "\r\n--boundary" or "\n--boundary",
264// and the caller has verified already that bytes.HasPrefix(buf, prefix) is true.
266// matchAfterPrefix returns +1 if the buffer does match the boundary,
267// meaning the prefix is followed by a dash, space, tab, cr, nl, or end of input.
268// It returns -1 if the buffer definitely does NOT match the boundary,
269// meaning the prefix is followed by some other character.
270// For example, "--foobar" does not match "--foo".
271// It returns 0 more input needs to be read to make the decision,
272// meaning that len(buf) == len(prefix) and readErr == nil.
273func matchAfterPrefix(buf, prefix []byte, readErr error) int {
274	if len(buf) == len(prefix) {
275		if readErr != nil {
276			return +1
277		}
278		return 0
279	}
280	c := buf[len(prefix)]
281	if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '-' {
282		return +1
283	}
284	return -1
287func (p *Part) Close() error {
288	io.Copy(io.Discard, p)
289	return nil
292// Reader is an iterator over parts in a MIME multipart body.
293// Reader's underlying parser consumes its input as needed. Seeking
294// isn't supported.
295type Reader struct {
296	bufReader *bufio.Reader
298	currentPart *Part
299	partsRead   int
301	nl               []byte // "\r\n" or "\n" (set after seeing first boundary line)
302	nlDashBoundary   []byte // nl + "--boundary"
303	dashBoundaryDash []byte // "--boundary--"
304	dashBoundary     []byte // "--boundary"
307// NextPart returns the next part in the multipart or an error.
308// When there are no more parts, the error io.EOF is returned.
310// As a special case, if the "Content-Transfer-Encoding" header
311// has a value of "quoted-printable", that header is instead
312// hidden and the body is transparently decoded during Read calls.
313func (r *Reader) NextPart() (*Part, error) {
314	return r.nextPart(false)
317// NextRawPart returns the next part in the multipart or an error.
318// When there are no more parts, the error io.EOF is returned.
320// Unlike NextPart, it does not have special handling for
321// "Content-Transfer-Encoding: quoted-printable".
322func (r *Reader) NextRawPart() (*Part, error) {
323	return r.nextPart(true)
326func (r *Reader) nextPart(rawPart bool) (*Part, error) {
327	if r.currentPart != nil {
328		r.currentPart.Close()
329	}
330	if string(r.dashBoundary) == "--" {
331		return nil, fmt.Errorf("multipart: boundary is empty")
332	}
333	expectNewPart := false
334	for {
335		line, err := r.bufReader.ReadSlice('\n')
337		if err == io.EOF && r.isFinalBoundary(line) {
338			// If the buffer ends in "--boundary--" without the
339			// trailing "\r\n", ReadSlice will return an error
340			// (since it's missing the '\n'), but this is a valid
341			// multipart EOF so we need to return io.EOF instead of
342			// a fmt-wrapped one.
343			return nil, io.EOF
344		}
345		if err != nil {
346			return nil, fmt.Errorf("multipart: NextPart: %v", err)
347		}
349		if r.isBoundaryDelimiterLine(line) {
350			r.partsRead++
351			bp, err := newPart(r, rawPart)
352			if err != nil {
353				return nil, err
354			}
355			r.currentPart = bp
356			return bp, nil
357		}
359		if r.isFinalBoundary(line) {
360			// Expected EOF
361			return nil, io.EOF
362		}
364		if expectNewPart {
365			return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line))
366		}
368		if r.partsRead == 0 {
369			// skip line
370			continue
371		}
373		// Consume the "\n" or "\r\n" separator between the
374		// body of the previous part and the boundary line we
375		// now expect will follow. (either a new part or the
376		// end boundary)
377		if bytes.Equal(line, r.nl) {
378			expectNewPart = true
379			continue
380		}
382		return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line)
383	}
386// isFinalBoundary reports whether line is the final boundary line
387// indicating that all parts are over.
388// It matches `^--boundary--[ \t]*(\r\n)?$`
389func (mr *Reader) isFinalBoundary(line []byte) bool {
390	if !bytes.HasPrefix(line, mr.dashBoundaryDash) {
391		return false
392	}
393	rest := line[len(mr.dashBoundaryDash):]
394	rest = skipLWSPChar(rest)
395	return len(rest) == 0 || bytes.Equal(rest, mr.nl)
398func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) {
399	// https://tools.ietf.org/html/rfc2046#section-5.1
400	//   The boundary delimiter line is then defined as a line
401	//   consisting entirely of two hyphen characters ("-",
402	//   decimal value 45) followed by the boundary parameter
403	//   value from the Content-Type header field, optional linear
404	//   whitespace, and a terminating CRLF.
405	if !bytes.HasPrefix(line, mr.dashBoundary) {
406		return false
407	}
408	rest := line[len(mr.dashBoundary):]
409	rest = skipLWSPChar(rest)
411	// On the first part, see our lines are ending in \n instead of \r\n
412	// and switch into that mode if so. This is a violation of the spec,
413	// but occurs in practice.
414	if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' {
415		mr.nl = mr.nl[1:]
416		mr.nlDashBoundary = mr.nlDashBoundary[1:]
417	}
418	return bytes.Equal(rest, mr.nl)
421// skipLWSPChar returns b with leading spaces and tabs removed.
422// RFC 822 defines:
423//    LWSP-char = SPACE / HTAB
424func skipLWSPChar(b []byte) []byte {
425	for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') {
426		b = b[1:]
427	}
428	return b