1// Copyright 2010 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4// 5 6/* 7Package multipart implements MIME multipart parsing, as defined in RFC 82046. 9 10The implementation is sufficient for HTTP (RFC 2388) and the multipart 11bodies generated by popular browsers. 12*/ 13package multipart 14 15import ( 16 "bufio" 17 "bytes" 18 "fmt" 19 "io" 20 "io/ioutil" 21 "mime" 22 "net/textproto" 23) 24 25var emptyParams = make(map[string]string) 26 27// A Part represents a single part in a multipart body. 28type Part struct { 29 // The headers of the body, if any, with the keys canonicalized 30 // in the same fashion that the Go http.Request headers are. 31 // For example, "foo-bar" changes case to "Foo-Bar" 32 // 33 // As a special case, if the "Content-Transfer-Encoding" header 34 // has a value of "quoted-printable", that header is instead 35 // hidden from this map and the body is transparently decoded 36 // during Read calls. 37 Header textproto.MIMEHeader 38 39 buffer *bytes.Buffer 40 mr *Reader 41 bytesRead int 42 43 disposition string 44 dispositionParams map[string]string 45 46 // r is either a reader directly reading from mr, or it's a 47 // wrapper around such a reader, decoding the 48 // Content-Transfer-Encoding 49 r io.Reader 50} 51 52// FormName returns the name parameter if p has a Content-Disposition 53// of type "form-data". Otherwise it returns the empty string. 54func (p *Part) FormName() string { 55 // See http://tools.ietf.org/html/rfc2183 section 2 for EBNF 56 // of Content-Disposition value format. 57 if p.dispositionParams == nil { 58 p.parseContentDisposition() 59 } 60 if p.disposition != "form-data" { 61 return "" 62 } 63 return p.dispositionParams["name"] 64} 65 66// FileName returns the filename parameter of the Part's 67// Content-Disposition header. 68func (p *Part) FileName() string { 69 if p.dispositionParams == nil { 70 p.parseContentDisposition() 71 } 72 return p.dispositionParams["filename"] 73} 74 75func (p *Part) parseContentDisposition() { 76 v := p.Header.Get("Content-Disposition") 77 var err error 78 p.disposition, p.dispositionParams, err = mime.ParseMediaType(v) 79 if err != nil { 80 p.dispositionParams = emptyParams 81 } 82} 83 84// NewReader creates a new multipart Reader reading from reader using the 85// given MIME boundary. 86func NewReader(reader io.Reader, boundary string) *Reader { 87 b := []byte("\r\n--" + boundary + "--") 88 return &Reader{ 89 bufReader: bufio.NewReader(reader), 90 91 nl: b[:2], 92 nlDashBoundary: b[:len(b)-2], 93 dashBoundaryDash: b[2:], 94 dashBoundary: b[2 : len(b)-2], 95 } 96} 97 98func newPart(mr *Reader) (*Part, error) { 99 bp := &Part{ 100 Header: make(map[string][]string), 101 mr: mr, 102 buffer: new(bytes.Buffer), 103 } 104 if err := bp.populateHeaders(); err != nil { 105 return nil, err 106 } 107 bp.r = partReader{bp} 108 const cte = "Content-Transfer-Encoding" 109 if bp.Header.Get(cte) == "quoted-printable" { 110 bp.Header.Del(cte) 111 bp.r = newQuotedPrintableReader(bp.r) 112 } 113 return bp, nil 114} 115 116func (bp *Part) populateHeaders() error { 117 r := textproto.NewReader(bp.mr.bufReader) 118 header, err := r.ReadMIMEHeader() 119 if err == nil { 120 bp.Header = header 121 } 122 return err 123} 124 125// Read reads the body of a part, after its headers and before the 126// next part (if any) begins. 127func (p *Part) Read(d []byte) (n int, err error) { 128 return p.r.Read(d) 129} 130 131// partReader implements io.Reader by reading raw bytes directly from the 132// wrapped *Part, without doing any Transfer-Encoding decoding. 133type partReader struct { 134 p *Part 135} 136 137func (pr partReader) Read(d []byte) (n int, err error) { 138 p := pr.p 139 defer func() { 140 p.bytesRead += n 141 }() 142 if p.buffer.Len() >= len(d) { 143 // Internal buffer of unconsumed data is large enough for 144 // the read request. No need to parse more at the moment. 145 return p.buffer.Read(d) 146 } 147 peek, err := p.mr.bufReader.Peek(4096) // TODO(bradfitz): add buffer size accessor 148 149 // Look for an immediate empty part without a leading \r\n 150 // before the boundary separator. Some MIME code makes empty 151 // parts like this. Most browsers, however, write the \r\n 152 // before the subsequent boundary even for empty parts and 153 // won't hit this path. 154 if p.bytesRead == 0 && p.mr.peekBufferIsEmptyPart(peek) { 155 return 0, io.EOF 156 } 157 unexpectedEOF := err == io.EOF 158 if err != nil && !unexpectedEOF { 159 return 0, fmt.Errorf("multipart: Part Read: %v", err) 160 } 161 if peek == nil { 162 panic("nil peek buf") 163 } 164 165 // Search the peek buffer for "\r\n--boundary". If found, 166 // consume everything up to the boundary. If not, consume only 167 // as much of the peek buffer as cannot hold the boundary 168 // string. 169 nCopy := 0 170 foundBoundary := false 171 if idx := bytes.Index(peek, p.mr.nlDashBoundary); idx != -1 { 172 nCopy = idx 173 foundBoundary = true 174 } else if safeCount := len(peek) - len(p.mr.nlDashBoundary); safeCount > 0 { 175 nCopy = safeCount 176 } else if unexpectedEOF { 177 // If we've run out of peek buffer and the boundary 178 // wasn't found (and can't possibly fit), we must have 179 // hit the end of the file unexpectedly. 180 return 0, io.ErrUnexpectedEOF 181 } 182 if nCopy > 0 { 183 if _, err := io.CopyN(p.buffer, p.mr.bufReader, int64(nCopy)); err != nil { 184 return 0, err 185 } 186 } 187 n, err = p.buffer.Read(d) 188 if err == io.EOF && !foundBoundary { 189 // If the boundary hasn't been reached there's more to 190 // read, so don't pass through an EOF from the buffer 191 err = nil 192 } 193 return 194} 195 196func (p *Part) Close() error { 197 io.Copy(ioutil.Discard, p) 198 return nil 199} 200 201// Reader is an iterator over parts in a MIME multipart body. 202// Reader's underlying parser consumes its input as needed. Seeking 203// isn't supported. 204type Reader struct { 205 bufReader *bufio.Reader 206 207 currentPart *Part 208 partsRead int 209 210 nl []byte // "\r\n" or "\n" (set after seeing first boundary line) 211 nlDashBoundary []byte // nl + "--boundary" 212 dashBoundaryDash []byte // "--boundary--" 213 dashBoundary []byte // "--boundary" 214} 215 216// NextPart returns the next part in the multipart or an error. 217// When there are no more parts, the error io.EOF is returned. 218func (r *Reader) NextPart() (*Part, error) { 219 if r.currentPart != nil { 220 r.currentPart.Close() 221 } 222 223 expectNewPart := false 224 for { 225 line, err := r.bufReader.ReadSlice('\n') 226 if err == io.EOF && r.isFinalBoundary(line) { 227 // If the buffer ends in "--boundary--" without the 228 // trailing "\r\n", ReadSlice will return an error 229 // (since it's missing the '\n'), but this is a valid 230 // multipart EOF so we need to return io.EOF instead of 231 // a fmt-wrapped one. 232 return nil, io.EOF 233 } 234 if err != nil { 235 return nil, fmt.Errorf("multipart: NextPart: %v", err) 236 } 237 238 if r.isBoundaryDelimiterLine(line) { 239 r.partsRead++ 240 bp, err := newPart(r) 241 if err != nil { 242 return nil, err 243 } 244 r.currentPart = bp 245 return bp, nil 246 } 247 248 if r.isFinalBoundary(line) { 249 // Expected EOF 250 return nil, io.EOF 251 } 252 253 if expectNewPart { 254 return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line)) 255 } 256 257 if r.partsRead == 0 { 258 // skip line 259 continue 260 } 261 262 // Consume the "\n" or "\r\n" separator between the 263 // body of the previous part and the boundary line we 264 // now expect will follow. (either a new part or the 265 // end boundary) 266 if bytes.Equal(line, r.nl) { 267 expectNewPart = true 268 continue 269 } 270 271 return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line) 272 } 273} 274 275// isFinalBoundary reports whether line is the final boundary line 276// indicating that all parts are over. 277// It matches `^--boundary--[ \t]*(\r\n)?$` 278func (mr *Reader) isFinalBoundary(line []byte) bool { 279 if !bytes.HasPrefix(line, mr.dashBoundaryDash) { 280 return false 281 } 282 rest := line[len(mr.dashBoundaryDash):] 283 rest = skipLWSPChar(rest) 284 return len(rest) == 0 || bytes.Equal(rest, mr.nl) 285} 286 287func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) { 288 // http://tools.ietf.org/html/rfc2046#section-5.1 289 // The boundary delimiter line is then defined as a line 290 // consisting entirely of two hyphen characters ("-", 291 // decimal value 45) followed by the boundary parameter 292 // value from the Content-Type header field, optional linear 293 // whitespace, and a terminating CRLF. 294 if !bytes.HasPrefix(line, mr.dashBoundary) { 295 return false 296 } 297 rest := line[len(mr.dashBoundary):] 298 rest = skipLWSPChar(rest) 299 300 // On the first part, see our lines are ending in \n instead of \r\n 301 // and switch into that mode if so. This is a violation of the spec, 302 // but occurs in practice. 303 if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' { 304 mr.nl = mr.nl[1:] 305 mr.nlDashBoundary = mr.nlDashBoundary[1:] 306 } 307 return bytes.Equal(rest, mr.nl) 308} 309 310// peekBufferIsEmptyPart reports whether the provided peek-ahead 311// buffer represents an empty part. It is called only if we've not 312// already read any bytes in this part and checks for the case of MIME 313// software not writing the \r\n on empty parts. Some does, some 314// doesn't. 315// 316// This checks that what follows the "--boundary" is actually the end 317// ("--boundary--" with optional whitespace) or optional whitespace 318// and then a newline, so we don't catch "--boundaryFAKE", in which 319// case the whole line is part of the data. 320func (mr *Reader) peekBufferIsEmptyPart(peek []byte) bool { 321 // End of parts case. 322 // Test whether peek matches `^--boundary--[ \t]*(?:\r\n|$)` 323 if bytes.HasPrefix(peek, mr.dashBoundaryDash) { 324 rest := peek[len(mr.dashBoundaryDash):] 325 rest = skipLWSPChar(rest) 326 return bytes.HasPrefix(rest, mr.nl) || len(rest) == 0 327 } 328 if !bytes.HasPrefix(peek, mr.dashBoundary) { 329 return false 330 } 331 // Test whether rest matches `^[ \t]*\r\n`) 332 rest := peek[len(mr.dashBoundary):] 333 rest = skipLWSPChar(rest) 334 return bytes.HasPrefix(rest, mr.nl) 335} 336 337// skipLWSPChar returns b with leading spaces and tabs removed. 338// RFC 822 defines: 339// LWSP-char = SPACE / HTAB 340func skipLWSPChar(b []byte) []byte { 341 for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') { 342 b = b[1:] 343 } 344 return b 345} 346