1// Copyright 2010 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4// 5 6/* 7Package multipart implements MIME multipart parsing, as defined in RFC 82046. 9 10The implementation is sufficient for HTTP (RFC 2388) and the multipart 11bodies generated by popular browsers. 12*/ 13package multipart 14 15import ( 16 "bufio" 17 "bytes" 18 "fmt" 19 "io" 20 "mime" 21 "mime/quotedprintable" 22 "net/textproto" 23 "path/filepath" 24 "strings" 25) 26 27var emptyParams = make(map[string]string) 28 29// This constant needs to be at least 76 for this package to work correctly. 30// This is because \r\n--separator_of_len_70- would fill the buffer and it 31// wouldn't be safe to consume a single byte from it. 32const peekBufferSize = 4096 33 34// A Part represents a single part in a multipart body. 35type Part struct { 36 // The headers of the body, if any, with the keys canonicalized 37 // in the same fashion that the Go http.Request headers are. 38 // For example, "foo-bar" changes case to "Foo-Bar" 39 Header textproto.MIMEHeader 40 41 mr *Reader 42 43 disposition string 44 dispositionParams map[string]string 45 46 // r is either a reader directly reading from mr, or it's a 47 // wrapper around such a reader, decoding the 48 // Content-Transfer-Encoding 49 r io.Reader 50 51 n int // known data bytes waiting in mr.bufReader 52 total int64 // total data bytes read already 53 err error // error to return when n == 0 54 readErr error // read error observed from mr.bufReader 55} 56 57// FormName returns the name parameter if p has a Content-Disposition 58// of type "form-data". Otherwise it returns the empty string. 59func (p *Part) FormName() string { 60 // See https://tools.ietf.org/html/rfc2183 section 2 for EBNF 61 // of Content-Disposition value format. 62 if p.dispositionParams == nil { 63 p.parseContentDisposition() 64 } 65 if p.disposition != "form-data" { 66 return "" 67 } 68 return p.dispositionParams["name"] 69} 70 71// FileName returns the filename parameter of the Part's Content-Disposition 72// header. If not empty, the filename is passed through filepath.Base (which is 73// platform dependent) before being returned. 74func (p *Part) FileName() string { 75 if p.dispositionParams == nil { 76 p.parseContentDisposition() 77 } 78 filename := p.dispositionParams["filename"] 79 if filename == "" { 80 return "" 81 } 82 // RFC 7578, Section 4.2 requires that if a filename is provided, the 83 // directory path information must not be used. 84 return filepath.Base(filename) 85} 86 87func (p *Part) parseContentDisposition() { 88 v := p.Header.Get("Content-Disposition") 89 var err error 90 p.disposition, p.dispositionParams, err = mime.ParseMediaType(v) 91 if err != nil { 92 p.dispositionParams = emptyParams 93 } 94} 95 96// NewReader creates a new multipart Reader reading from r using the 97// given MIME boundary. 98// 99// The boundary is usually obtained from the "boundary" parameter of 100// the message's "Content-Type" header. Use mime.ParseMediaType to 101// parse such headers. 102func NewReader(r io.Reader, boundary string) *Reader { 103 b := []byte("\r\n--" + boundary + "--") 104 return &Reader{ 105 bufReader: bufio.NewReaderSize(&stickyErrorReader{r: r}, peekBufferSize), 106 nl: b[:2], 107 nlDashBoundary: b[:len(b)-2], 108 dashBoundaryDash: b[2:], 109 dashBoundary: b[2 : len(b)-2], 110 } 111} 112 113// stickyErrorReader is an io.Reader which never calls Read on its 114// underlying Reader once an error has been seen. (the io.Reader 115// interface's contract promises nothing about the return values of 116// Read calls after an error, yet this package does do multiple Reads 117// after error) 118type stickyErrorReader struct { 119 r io.Reader 120 err error 121} 122 123func (r *stickyErrorReader) Read(p []byte) (n int, _ error) { 124 if r.err != nil { 125 return 0, r.err 126 } 127 n, r.err = r.r.Read(p) 128 return n, r.err 129} 130 131func newPart(mr *Reader, rawPart bool) (*Part, error) { 132 bp := &Part{ 133 Header: make(map[string][]string), 134 mr: mr, 135 } 136 if err := bp.populateHeaders(); err != nil { 137 return nil, err 138 } 139 bp.r = partReader{bp} 140 141 // rawPart is used to switch between Part.NextPart and Part.NextRawPart. 142 if !rawPart { 143 const cte = "Content-Transfer-Encoding" 144 if strings.EqualFold(bp.Header.Get(cte), "quoted-printable") { 145 bp.Header.Del(cte) 146 bp.r = quotedprintable.NewReader(bp.r) 147 } 148 } 149 return bp, nil 150} 151 152func (bp *Part) populateHeaders() error { 153 r := textproto.NewReader(bp.mr.bufReader) 154 header, err := r.ReadMIMEHeader() 155 if err == nil { 156 bp.Header = header 157 } 158 return err 159} 160 161// Read reads the body of a part, after its headers and before the 162// next part (if any) begins. 163func (p *Part) Read(d []byte) (n int, err error) { 164 return p.r.Read(d) 165} 166 167// partReader implements io.Reader by reading raw bytes directly from the 168// wrapped *Part, without doing any Transfer-Encoding decoding. 169type partReader struct { 170 p *Part 171} 172 173func (pr partReader) Read(d []byte) (int, error) { 174 p := pr.p 175 br := p.mr.bufReader 176 177 // Read into buffer until we identify some data to return, 178 // or we find a reason to stop (boundary or read error). 179 for p.n == 0 && p.err == nil { 180 peek, _ := br.Peek(br.Buffered()) 181 p.n, p.err = scanUntilBoundary(peek, p.mr.dashBoundary, p.mr.nlDashBoundary, p.total, p.readErr) 182 if p.n == 0 && p.err == nil { 183 // Force buffered I/O to read more into buffer. 184 _, p.readErr = br.Peek(len(peek) + 1) 185 if p.readErr == io.EOF { 186 p.readErr = io.ErrUnexpectedEOF 187 } 188 } 189 } 190 191 // Read out from "data to return" part of buffer. 192 if p.n == 0 { 193 return 0, p.err 194 } 195 n := len(d) 196 if n > p.n { 197 n = p.n 198 } 199 n, _ = br.Read(d[:n]) 200 p.total += int64(n) 201 p.n -= n 202 if p.n == 0 { 203 return n, p.err 204 } 205 return n, nil 206} 207 208// scanUntilBoundary scans buf to identify how much of it can be safely 209// returned as part of the Part body. 210// dashBoundary is "--boundary". 211// nlDashBoundary is "\r\n--boundary" or "\n--boundary", depending on what mode we are in. 212// The comments below (and the name) assume "\n--boundary", but either is accepted. 213// total is the number of bytes read out so far. If total == 0, then a leading "--boundary" is recognized. 214// readErr is the read error, if any, that followed reading the bytes in buf. 215// scanUntilBoundary returns the number of data bytes from buf that can be 216// returned as part of the Part body and also the error to return (if any) 217// once those data bytes are done. 218func scanUntilBoundary(buf, dashBoundary, nlDashBoundary []byte, total int64, readErr error) (int, error) { 219 if total == 0 { 220 // At beginning of body, allow dashBoundary. 221 if bytes.HasPrefix(buf, dashBoundary) { 222 switch matchAfterPrefix(buf, dashBoundary, readErr) { 223 case -1: 224 return len(dashBoundary), nil 225 case 0: 226 return 0, nil 227 case +1: 228 return 0, io.EOF 229 } 230 } 231 if bytes.HasPrefix(dashBoundary, buf) { 232 return 0, readErr 233 } 234 } 235 236 // Search for "\n--boundary". 237 if i := bytes.Index(buf, nlDashBoundary); i >= 0 { 238 switch matchAfterPrefix(buf[i:], nlDashBoundary, readErr) { 239 case -1: 240 return i + len(nlDashBoundary), nil 241 case 0: 242 return i, nil 243 case +1: 244 return i, io.EOF 245 } 246 } 247 if bytes.HasPrefix(nlDashBoundary, buf) { 248 return 0, readErr 249 } 250 251 // Otherwise, anything up to the final \n is not part of the boundary 252 // and so must be part of the body. 253 // Also if the section from the final \n onward is not a prefix of the boundary, 254 // it too must be part of the body. 255 i := bytes.LastIndexByte(buf, nlDashBoundary[0]) 256 if i >= 0 && bytes.HasPrefix(nlDashBoundary, buf[i:]) { 257 return i, nil 258 } 259 return len(buf), readErr 260} 261 262// matchAfterPrefix checks whether buf should be considered to match the boundary. 263// The prefix is "--boundary" or "\r\n--boundary" or "\n--boundary", 264// and the caller has verified already that bytes.HasPrefix(buf, prefix) is true. 265// 266// matchAfterPrefix returns +1 if the buffer does match the boundary, 267// meaning the prefix is followed by a dash, space, tab, cr, nl, or end of input. 268// It returns -1 if the buffer definitely does NOT match the boundary, 269// meaning the prefix is followed by some other character. 270// For example, "--foobar" does not match "--foo". 271// It returns 0 more input needs to be read to make the decision, 272// meaning that len(buf) == len(prefix) and readErr == nil. 273func matchAfterPrefix(buf, prefix []byte, readErr error) int { 274 if len(buf) == len(prefix) { 275 if readErr != nil { 276 return +1 277 } 278 return 0 279 } 280 c := buf[len(prefix)] 281 if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '-' { 282 return +1 283 } 284 return -1 285} 286 287func (p *Part) Close() error { 288 io.Copy(io.Discard, p) 289 return nil 290} 291 292// Reader is an iterator over parts in a MIME multipart body. 293// Reader's underlying parser consumes its input as needed. Seeking 294// isn't supported. 295type Reader struct { 296 bufReader *bufio.Reader 297 298 currentPart *Part 299 partsRead int 300 301 nl []byte // "\r\n" or "\n" (set after seeing first boundary line) 302 nlDashBoundary []byte // nl + "--boundary" 303 dashBoundaryDash []byte // "--boundary--" 304 dashBoundary []byte // "--boundary" 305} 306 307// NextPart returns the next part in the multipart or an error. 308// When there are no more parts, the error io.EOF is returned. 309// 310// As a special case, if the "Content-Transfer-Encoding" header 311// has a value of "quoted-printable", that header is instead 312// hidden and the body is transparently decoded during Read calls. 313func (r *Reader) NextPart() (*Part, error) { 314 return r.nextPart(false) 315} 316 317// NextRawPart returns the next part in the multipart or an error. 318// When there are no more parts, the error io.EOF is returned. 319// 320// Unlike NextPart, it does not have special handling for 321// "Content-Transfer-Encoding: quoted-printable". 322func (r *Reader) NextRawPart() (*Part, error) { 323 return r.nextPart(true) 324} 325 326func (r *Reader) nextPart(rawPart bool) (*Part, error) { 327 if r.currentPart != nil { 328 r.currentPart.Close() 329 } 330 if string(r.dashBoundary) == "--" { 331 return nil, fmt.Errorf("multipart: boundary is empty") 332 } 333 expectNewPart := false 334 for { 335 line, err := r.bufReader.ReadSlice('\n') 336 337 if err == io.EOF && r.isFinalBoundary(line) { 338 // If the buffer ends in "--boundary--" without the 339 // trailing "\r\n", ReadSlice will return an error 340 // (since it's missing the '\n'), but this is a valid 341 // multipart EOF so we need to return io.EOF instead of 342 // a fmt-wrapped one. 343 return nil, io.EOF 344 } 345 if err != nil { 346 return nil, fmt.Errorf("multipart: NextPart: %v", err) 347 } 348 349 if r.isBoundaryDelimiterLine(line) { 350 r.partsRead++ 351 bp, err := newPart(r, rawPart) 352 if err != nil { 353 return nil, err 354 } 355 r.currentPart = bp 356 return bp, nil 357 } 358 359 if r.isFinalBoundary(line) { 360 // Expected EOF 361 return nil, io.EOF 362 } 363 364 if expectNewPart { 365 return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line)) 366 } 367 368 if r.partsRead == 0 { 369 // skip line 370 continue 371 } 372 373 // Consume the "\n" or "\r\n" separator between the 374 // body of the previous part and the boundary line we 375 // now expect will follow. (either a new part or the 376 // end boundary) 377 if bytes.Equal(line, r.nl) { 378 expectNewPart = true 379 continue 380 } 381 382 return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line) 383 } 384} 385 386// isFinalBoundary reports whether line is the final boundary line 387// indicating that all parts are over. 388// It matches `^--boundary--[ \t]*(\r\n)?$` 389func (mr *Reader) isFinalBoundary(line []byte) bool { 390 if !bytes.HasPrefix(line, mr.dashBoundaryDash) { 391 return false 392 } 393 rest := line[len(mr.dashBoundaryDash):] 394 rest = skipLWSPChar(rest) 395 return len(rest) == 0 || bytes.Equal(rest, mr.nl) 396} 397 398func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) { 399 // https://tools.ietf.org/html/rfc2046#section-5.1 400 // The boundary delimiter line is then defined as a line 401 // consisting entirely of two hyphen characters ("-", 402 // decimal value 45) followed by the boundary parameter 403 // value from the Content-Type header field, optional linear 404 // whitespace, and a terminating CRLF. 405 if !bytes.HasPrefix(line, mr.dashBoundary) { 406 return false 407 } 408 rest := line[len(mr.dashBoundary):] 409 rest = skipLWSPChar(rest) 410 411 // On the first part, see our lines are ending in \n instead of \r\n 412 // and switch into that mode if so. This is a violation of the spec, 413 // but occurs in practice. 414 if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' { 415 mr.nl = mr.nl[1:] 416 mr.nlDashBoundary = mr.nlDashBoundary[1:] 417 } 418 return bytes.Equal(rest, mr.nl) 419} 420 421// skipLWSPChar returns b with leading spaces and tabs removed. 422// RFC 822 defines: 423// LWSP-char = SPACE / HTAB 424func skipLWSPChar(b []byte) []byte { 425 for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') { 426 b = b[1:] 427 } 428 return b 429} 430