1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
7Package multipart implements MIME multipart parsing, as defined in RFC
10The implementation is sufficient for HTTP (RFC 2388) and the multipart
11bodies generated by popular browsers.
13package multipart
15import (
16	"bufio"
17	"bytes"
18	"fmt"
19	"io"
20	"io/ioutil"
21	"mime"
22	"net/textproto"
25var emptyParams = make(map[string]string)
27// A Part represents a single part in a multipart body.
28type Part struct {
29	// The headers of the body, if any, with the keys canonicalized
30	// in the same fashion that the Go http.Request headers are.
31	// i.e. "foo-bar" changes case to "Foo-Bar"
32	Header textproto.MIMEHeader
34	buffer    *bytes.Buffer
35	mr        *Reader
36	bytesRead int
38	disposition       string
39	dispositionParams map[string]string
41	// r is either a reader directly reading from mr, or it's a
42	// wrapper around such a reader, decoding the
43	// Content-Transfer-Encoding
44	r io.Reader
47// FormName returns the name parameter if p has a Content-Disposition
48// of type "form-data".  Otherwise it returns the empty string.
49func (p *Part) FormName() string {
50	// See http://tools.ietf.org/html/rfc2183 section 2 for EBNF
51	// of Content-Disposition value format.
52	if p.dispositionParams == nil {
53		p.parseContentDisposition()
54	}
55	if p.disposition != "form-data" {
56		return ""
57	}
58	return p.dispositionParams["name"]
61// FileName returns the filename parameter of the Part's
62// Content-Disposition header.
63func (p *Part) FileName() string {
64	if p.dispositionParams == nil {
65		p.parseContentDisposition()
66	}
67	return p.dispositionParams["filename"]
70func (p *Part) parseContentDisposition() {
71	v := p.Header.Get("Content-Disposition")
72	var err error
73	p.disposition, p.dispositionParams, err = mime.ParseMediaType(v)
74	if err != nil {
75		p.dispositionParams = emptyParams
76	}
79// NewReader creates a new multipart Reader reading from reader using the
80// given MIME boundary.
81func NewReader(reader io.Reader, boundary string) *Reader {
82	b := []byte("\r\n--" + boundary + "--")
83	return &Reader{
84		bufReader: bufio.NewReader(reader),
86		nl:               b[:2],
87		nlDashBoundary:   b[:len(b)-2],
88		dashBoundaryDash: b[2:],
89		dashBoundary:     b[2 : len(b)-2],
90	}
93func newPart(mr *Reader) (*Part, error) {
94	bp := &Part{
95		Header: make(map[string][]string),
96		mr:     mr,
97		buffer: new(bytes.Buffer),
98	}
99	if err := bp.populateHeaders(); err != nil {
100		return nil, err
101	}
102	bp.r = partReader{bp}
103	const cte = "Content-Transfer-Encoding"
104	if bp.Header.Get(cte) == "quoted-printable" {
105		bp.Header.Del(cte)
106		bp.r = newQuotedPrintableReader(bp.r)
107	}
108	return bp, nil
111func (bp *Part) populateHeaders() error {
112	r := textproto.NewReader(bp.mr.bufReader)
113	header, err := r.ReadMIMEHeader()
114	if err == nil {
115		bp.Header = header
116	}
117	return err
120// Read reads the body of a part, after its headers and before the
121// next part (if any) begins.
122func (p *Part) Read(d []byte) (n int, err error) {
123	return p.r.Read(d)
126// partReader implements io.Reader by reading raw bytes directly from the
127// wrapped *Part, without doing any Transfer-Encoding decoding.
128type partReader struct {
129	p *Part
132func (pr partReader) Read(d []byte) (n int, err error) {
133	p := pr.p
134	defer func() {
135		p.bytesRead += n
136	}()
137	if p.buffer.Len() >= len(d) {
138		// Internal buffer of unconsumed data is large enough for
139		// the read request.  No need to parse more at the moment.
140		return p.buffer.Read(d)
141	}
142	peek, err := p.mr.bufReader.Peek(4096) // TODO(bradfitz): add buffer size accessor
144	// Look for an immediate empty part without a leading \r\n
145	// before the boundary separator.  Some MIME code makes empty
146	// parts like this. Most browsers, however, write the \r\n
147	// before the subsequent boundary even for empty parts and
148	// won't hit this path.
149	if p.bytesRead == 0 && p.mr.peekBufferIsEmptyPart(peek) {
150		return 0, io.EOF
151	}
152	unexpectedEOF := err == io.EOF
153	if err != nil && !unexpectedEOF {
154		return 0, fmt.Errorf("multipart: Part Read: %v", err)
155	}
156	if peek == nil {
157		panic("nil peek buf")
158	}
160	// Search the peek buffer for "\r\n--boundary". If found,
161	// consume everything up to the boundary. If not, consume only
162	// as much of the peek buffer as cannot hold the boundary
163	// string.
164	nCopy := 0
165	foundBoundary := false
166	if idx := bytes.Index(peek, p.mr.nlDashBoundary); idx != -1 {
167		nCopy = idx
168		foundBoundary = true
169	} else if safeCount := len(peek) - len(p.mr.nlDashBoundary); safeCount > 0 {
170		nCopy = safeCount
171	} else if unexpectedEOF {
172		// If we've run out of peek buffer and the boundary
173		// wasn't found (and can't possibly fit), we must have
174		// hit the end of the file unexpectedly.
175		return 0, io.ErrUnexpectedEOF
176	}
177	if nCopy > 0 {
178		if _, err := io.CopyN(p.buffer, p.mr.bufReader, int64(nCopy)); err != nil {
179			return 0, err
180		}
181	}
182	n, err = p.buffer.Read(d)
183	if err == io.EOF && !foundBoundary {
184		// If the boundary hasn't been reached there's more to
185		// read, so don't pass through an EOF from the buffer
186		err = nil
187	}
188	return
191func (p *Part) Close() error {
192	io.Copy(ioutil.Discard, p)
193	return nil
196// Reader is an iterator over parts in a MIME multipart body.
197// Reader's underlying parser consumes its input as needed.  Seeking
198// isn't supported.
199type Reader struct {
200	bufReader *bufio.Reader
202	currentPart *Part
203	partsRead   int
205	nl               []byte // "\r\n" or "\n" (set after seeing first boundary line)
206	nlDashBoundary   []byte // nl + "--boundary"
207	dashBoundaryDash []byte // "--boundary--"
208	dashBoundary     []byte // "--boundary"
211// NextPart returns the next part in the multipart or an error.
212// When there are no more parts, the error io.EOF is returned.
213func (r *Reader) NextPart() (*Part, error) {
214	if r.currentPart != nil {
215		r.currentPart.Close()
216	}
218	expectNewPart := false
219	for {
220		line, err := r.bufReader.ReadSlice('\n')
221		if err == io.EOF && r.isFinalBoundary(line) {
222			// If the buffer ends in "--boundary--" without the
223			// trailing "\r\n", ReadSlice will return an error
224			// (since it's missing the '\n'), but this is a valid
225			// multipart EOF so we need to return io.EOF instead of
226			// a fmt-wrapped one.
227			return nil, io.EOF
228		}
229		if err != nil {
230			return nil, fmt.Errorf("multipart: NextPart: %v", err)
231		}
233		if r.isBoundaryDelimiterLine(line) {
234			r.partsRead++
235			bp, err := newPart(r)
236			if err != nil {
237				return nil, err
238			}
239			r.currentPart = bp
240			return bp, nil
241		}
243		if r.isFinalBoundary(line) {
244			// Expected EOF
245			return nil, io.EOF
246		}
248		if expectNewPart {
249			return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line))
250		}
252		if r.partsRead == 0 {
253			// skip line
254			continue
255		}
257		// Consume the "\n" or "\r\n" separator between the
258		// body of the previous part and the boundary line we
259		// now expect will follow. (either a new part or the
260		// end boundary)
261		if bytes.Equal(line, r.nl) {
262			expectNewPart = true
263			continue
264		}
266		return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line)
267	}
268	panic("unreachable")
271// isFinalBoundary returns whether line is the final boundary line
272// indiciating that all parts are over.
273// It matches `^--boundary--[ \t]*(\r\n)?$`
274func (mr *Reader) isFinalBoundary(line []byte) bool {
275	if !bytes.HasPrefix(line, mr.dashBoundaryDash) {
276		return false
277	}
278	rest := line[len(mr.dashBoundaryDash):]
279	rest = skipLWSPChar(rest)
280	return len(rest) == 0 || bytes.Equal(rest, mr.nl)
283func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) {
284	// http://tools.ietf.org/html/rfc2046#section-5.1
285	//   The boundary delimiter line is then defined as a line
286	//   consisting entirely of two hyphen characters ("-",
287	//   decimal value 45) followed by the boundary parameter
288	//   value from the Content-Type header field, optional linear
289	//   whitespace, and a terminating CRLF.
290	if !bytes.HasPrefix(line, mr.dashBoundary) {
291		return false
292	}
293	rest := line[len(mr.dashBoundary):]
294	rest = skipLWSPChar(rest)
296	// On the first part, see our lines are ending in \n instead of \r\n
297	// and switch into that mode if so.  This is a violation of the spec,
298	// but occurs in practice.
299	if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' {
300		mr.nl = mr.nl[1:]
301		mr.nlDashBoundary = mr.nlDashBoundary[1:]
302	}
303	return bytes.Equal(rest, mr.nl)
306// peekBufferIsEmptyPart returns whether the provided peek-ahead
307// buffer represents an empty part.  This is only called if we've not
308// already read any bytes in this part and checks for the case of MIME
309// software not writing the \r\n on empty parts. Some does, some
310// doesn't.
312// This checks that what follows the "--boundary" is actually the end
313// ("--boundary--" with optional whitespace) or optional whitespace
314// and then a newline, so we don't catch "--boundaryFAKE", in which
315// case the whole line is part of the data.
316func (mr *Reader) peekBufferIsEmptyPart(peek []byte) bool {
317	// End of parts case.
318	// Test whether peek matches `^--boundary--[ \t]*(?:\r\n|$)`
319	if bytes.HasPrefix(peek, mr.dashBoundaryDash) {
320		rest := peek[len(mr.dashBoundaryDash):]
321		rest = skipLWSPChar(rest)
322		return bytes.HasPrefix(rest, mr.nl) || len(rest) == 0
323	}
324	if !bytes.HasPrefix(peek, mr.dashBoundary) {
325		return false
326	}
327	// Test whether rest matches `^[ \t]*\r\n`)
328	rest := peek[len(mr.dashBoundary):]
329	rest = skipLWSPChar(rest)
330	return bytes.HasPrefix(rest, mr.nl)
333// skipLWSPChar returns b with leading spaces and tabs removed.
334// RFC 822 defines:
335//    LWSP-char = SPACE / HTAB
336func skipLWSPChar(b []byte) []byte {
337	for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') {
338		b = b[1:]
339	}
340	return b