1// Copyright 2010 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4// 5 6/* 7Package multipart implements MIME multipart parsing, as defined in RFC 82046. 9 10The implementation is sufficient for HTTP (RFC 2388) and the multipart 11bodies generated by popular browsers. 12*/ 13package multipart 14 15import ( 16 "bufio" 17 "bytes" 18 "fmt" 19 "io" 20 "io/ioutil" 21 "mime" 22 "mime/quotedprintable" 23 "net/textproto" 24 "strings" 25) 26 27var emptyParams = make(map[string]string) 28 29// This constant needs to be at least 76 for this package to work correctly. 30// This is because \r\n--separator_of_len_70- would fill the buffer and it 31// wouldn't be safe to consume a single byte from it. 32const peekBufferSize = 4096 33 34// A Part represents a single part in a multipart body. 35type Part struct { 36 // The headers of the body, if any, with the keys canonicalized 37 // in the same fashion that the Go http.Request headers are. 38 // For example, "foo-bar" changes case to "Foo-Bar" 39 // 40 // As a special case, if the "Content-Transfer-Encoding" header 41 // has a value of "quoted-printable", that header is instead 42 // hidden from this map and the body is transparently decoded 43 // during Read calls. 44 Header textproto.MIMEHeader 45 46 mr *Reader 47 48 disposition string 49 dispositionParams map[string]string 50 51 // r is either a reader directly reading from mr, or it's a 52 // wrapper around such a reader, decoding the 53 // Content-Transfer-Encoding 54 r io.Reader 55 56 n int // known data bytes waiting in mr.bufReader 57 total int64 // total data bytes read already 58 err error // error to return when n == 0 59 readErr error // read error observed from mr.bufReader 60} 61 62// FormName returns the name parameter if p has a Content-Disposition 63// of type "form-data". Otherwise it returns the empty string. 64func (p *Part) FormName() string { 65 // See https://tools.ietf.org/html/rfc2183 section 2 for EBNF 66 // of Content-Disposition value format. 67 if p.dispositionParams == nil { 68 p.parseContentDisposition() 69 } 70 if p.disposition != "form-data" { 71 return "" 72 } 73 return p.dispositionParams["name"] 74} 75 76// FileName returns the filename parameter of the Part's 77// Content-Disposition header. 78func (p *Part) FileName() string { 79 if p.dispositionParams == nil { 80 p.parseContentDisposition() 81 } 82 return p.dispositionParams["filename"] 83} 84 85func (p *Part) parseContentDisposition() { 86 v := p.Header.Get("Content-Disposition") 87 var err error 88 p.disposition, p.dispositionParams, err = mime.ParseMediaType(v) 89 if err != nil { 90 p.dispositionParams = emptyParams 91 } 92} 93 94// NewReader creates a new multipart Reader reading from r using the 95// given MIME boundary. 96// 97// The boundary is usually obtained from the "boundary" parameter of 98// the message's "Content-Type" header. Use mime.ParseMediaType to 99// parse such headers. 100func NewReader(r io.Reader, boundary string) *Reader { 101 b := []byte("\r\n--" + boundary + "--") 102 return &Reader{ 103 bufReader: bufio.NewReaderSize(&stickyErrorReader{r: r}, peekBufferSize), 104 nl: b[:2], 105 nlDashBoundary: b[:len(b)-2], 106 dashBoundaryDash: b[2:], 107 dashBoundary: b[2 : len(b)-2], 108 } 109} 110 111// stickyErrorReader is an io.Reader which never calls Read on its 112// underlying Reader once an error has been seen. (the io.Reader 113// interface's contract promises nothing about the return values of 114// Read calls after an error, yet this package does do multiple Reads 115// after error) 116type stickyErrorReader struct { 117 r io.Reader 118 err error 119} 120 121func (r *stickyErrorReader) Read(p []byte) (n int, _ error) { 122 if r.err != nil { 123 return 0, r.err 124 } 125 n, r.err = r.r.Read(p) 126 return n, r.err 127} 128 129func newPart(mr *Reader) (*Part, error) { 130 bp := &Part{ 131 Header: make(map[string][]string), 132 mr: mr, 133 } 134 if err := bp.populateHeaders(); err != nil { 135 return nil, err 136 } 137 bp.r = partReader{bp} 138 const cte = "Content-Transfer-Encoding" 139 if strings.EqualFold(bp.Header.Get(cte), "quoted-printable") { 140 bp.Header.Del(cte) 141 bp.r = quotedprintable.NewReader(bp.r) 142 } 143 return bp, nil 144} 145 146func (bp *Part) populateHeaders() error { 147 r := textproto.NewReader(bp.mr.bufReader) 148 header, err := r.ReadMIMEHeader() 149 if err == nil { 150 bp.Header = header 151 } 152 return err 153} 154 155// Read reads the body of a part, after its headers and before the 156// next part (if any) begins. 157func (p *Part) Read(d []byte) (n int, err error) { 158 return p.r.Read(d) 159} 160 161// partReader implements io.Reader by reading raw bytes directly from the 162// wrapped *Part, without doing any Transfer-Encoding decoding. 163type partReader struct { 164 p *Part 165} 166 167func (pr partReader) Read(d []byte) (int, error) { 168 p := pr.p 169 br := p.mr.bufReader 170 171 // Read into buffer until we identify some data to return, 172 // or we find a reason to stop (boundary or read error). 173 for p.n == 0 && p.err == nil { 174 peek, _ := br.Peek(br.Buffered()) 175 p.n, p.err = scanUntilBoundary(peek, p.mr.dashBoundary, p.mr.nlDashBoundary, p.total, p.readErr) 176 if p.n == 0 && p.err == nil { 177 // Force buffered I/O to read more into buffer. 178 _, p.readErr = br.Peek(len(peek) + 1) 179 if p.readErr == io.EOF { 180 p.readErr = io.ErrUnexpectedEOF 181 } 182 } 183 } 184 185 // Read out from "data to return" part of buffer. 186 if p.n == 0 { 187 return 0, p.err 188 } 189 n := len(d) 190 if n > p.n { 191 n = p.n 192 } 193 n, _ = br.Read(d[:n]) 194 p.total += int64(n) 195 p.n -= n 196 if p.n == 0 { 197 return n, p.err 198 } 199 return n, nil 200} 201 202// scanUntilBoundary scans buf to identify how much of it can be safely 203// returned as part of the Part body. 204// dashBoundary is "--boundary". 205// nlDashBoundary is "\r\n--boundary" or "\n--boundary", depending on what mode we are in. 206// The comments below (and the name) assume "\n--boundary", but either is accepted. 207// total is the number of bytes read out so far. If total == 0, then a leading "--boundary" is recognized. 208// readErr is the read error, if any, that followed reading the bytes in buf. 209// scanUntilBoundary returns the number of data bytes from buf that can be 210// returned as part of the Part body and also the error to return (if any) 211// once those data bytes are done. 212func scanUntilBoundary(buf, dashBoundary, nlDashBoundary []byte, total int64, readErr error) (int, error) { 213 if total == 0 { 214 // At beginning of body, allow dashBoundary. 215 if bytes.HasPrefix(buf, dashBoundary) { 216 switch matchAfterPrefix(buf, dashBoundary, readErr) { 217 case -1: 218 return len(dashBoundary), nil 219 case 0: 220 return 0, nil 221 case +1: 222 return 0, io.EOF 223 } 224 } 225 if bytes.HasPrefix(dashBoundary, buf) { 226 return 0, readErr 227 } 228 } 229 230 // Search for "\n--boundary". 231 if i := bytes.Index(buf, nlDashBoundary); i >= 0 { 232 switch matchAfterPrefix(buf[i:], nlDashBoundary, readErr) { 233 case -1: 234 return i + len(nlDashBoundary), nil 235 case 0: 236 return i, nil 237 case +1: 238 return i, io.EOF 239 } 240 } 241 if bytes.HasPrefix(nlDashBoundary, buf) { 242 return 0, readErr 243 } 244 245 // Otherwise, anything up to the final \n is not part of the boundary 246 // and so must be part of the body. 247 // Also if the section from the final \n onward is not a prefix of the boundary, 248 // it too must be part of the body. 249 i := bytes.LastIndexByte(buf, nlDashBoundary[0]) 250 if i >= 0 && bytes.HasPrefix(nlDashBoundary, buf[i:]) { 251 return i, nil 252 } 253 return len(buf), readErr 254} 255 256// matchAfterPrefix checks whether buf should be considered to match the boundary. 257// The prefix is "--boundary" or "\r\n--boundary" or "\n--boundary", 258// and the caller has verified already that bytes.HasPrefix(buf, prefix) is true. 259// 260// matchAfterPrefix returns +1 if the buffer does match the boundary, 261// meaning the prefix is followed by a dash, space, tab, cr, nl, or end of input. 262// It returns -1 if the buffer definitely does NOT match the boundary, 263// meaning the prefix is followed by some other character. 264// For example, "--foobar" does not match "--foo". 265// It returns 0 more input needs to be read to make the decision, 266// meaning that len(buf) == len(prefix) and readErr == nil. 267func matchAfterPrefix(buf, prefix []byte, readErr error) int { 268 if len(buf) == len(prefix) { 269 if readErr != nil { 270 return +1 271 } 272 return 0 273 } 274 c := buf[len(prefix)] 275 if c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '-' { 276 return +1 277 } 278 return -1 279} 280 281func (p *Part) Close() error { 282 io.Copy(ioutil.Discard, p) 283 return nil 284} 285 286// Reader is an iterator over parts in a MIME multipart body. 287// Reader's underlying parser consumes its input as needed. Seeking 288// isn't supported. 289type Reader struct { 290 bufReader *bufio.Reader 291 292 currentPart *Part 293 partsRead int 294 295 nl []byte // "\r\n" or "\n" (set after seeing first boundary line) 296 nlDashBoundary []byte // nl + "--boundary" 297 dashBoundaryDash []byte // "--boundary--" 298 dashBoundary []byte // "--boundary" 299} 300 301// NextPart returns the next part in the multipart or an error. 302// When there are no more parts, the error io.EOF is returned. 303func (r *Reader) NextPart() (*Part, error) { 304 if r.currentPart != nil { 305 r.currentPart.Close() 306 } 307 if string(r.dashBoundary) == "--" { 308 return nil, fmt.Errorf("multipart: boundary is empty") 309 } 310 expectNewPart := false 311 for { 312 line, err := r.bufReader.ReadSlice('\n') 313 314 if err == io.EOF && r.isFinalBoundary(line) { 315 // If the buffer ends in "--boundary--" without the 316 // trailing "\r\n", ReadSlice will return an error 317 // (since it's missing the '\n'), but this is a valid 318 // multipart EOF so we need to return io.EOF instead of 319 // a fmt-wrapped one. 320 return nil, io.EOF 321 } 322 if err != nil { 323 return nil, fmt.Errorf("multipart: NextPart: %v", err) 324 } 325 326 if r.isBoundaryDelimiterLine(line) { 327 r.partsRead++ 328 bp, err := newPart(r) 329 if err != nil { 330 return nil, err 331 } 332 r.currentPart = bp 333 return bp, nil 334 } 335 336 if r.isFinalBoundary(line) { 337 // Expected EOF 338 return nil, io.EOF 339 } 340 341 if expectNewPart { 342 return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line)) 343 } 344 345 if r.partsRead == 0 { 346 // skip line 347 continue 348 } 349 350 // Consume the "\n" or "\r\n" separator between the 351 // body of the previous part and the boundary line we 352 // now expect will follow. (either a new part or the 353 // end boundary) 354 if bytes.Equal(line, r.nl) { 355 expectNewPart = true 356 continue 357 } 358 359 return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line) 360 } 361} 362 363// isFinalBoundary reports whether line is the final boundary line 364// indicating that all parts are over. 365// It matches `^--boundary--[ \t]*(\r\n)?$` 366func (mr *Reader) isFinalBoundary(line []byte) bool { 367 if !bytes.HasPrefix(line, mr.dashBoundaryDash) { 368 return false 369 } 370 rest := line[len(mr.dashBoundaryDash):] 371 rest = skipLWSPChar(rest) 372 return len(rest) == 0 || bytes.Equal(rest, mr.nl) 373} 374 375func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) { 376 // https://tools.ietf.org/html/rfc2046#section-5.1 377 // The boundary delimiter line is then defined as a line 378 // consisting entirely of two hyphen characters ("-", 379 // decimal value 45) followed by the boundary parameter 380 // value from the Content-Type header field, optional linear 381 // whitespace, and a terminating CRLF. 382 if !bytes.HasPrefix(line, mr.dashBoundary) { 383 return false 384 } 385 rest := line[len(mr.dashBoundary):] 386 rest = skipLWSPChar(rest) 387 388 // On the first part, see our lines are ending in \n instead of \r\n 389 // and switch into that mode if so. This is a violation of the spec, 390 // but occurs in practice. 391 if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' { 392 mr.nl = mr.nl[1:] 393 mr.nlDashBoundary = mr.nlDashBoundary[1:] 394 } 395 return bytes.Equal(rest, mr.nl) 396} 397 398// skipLWSPChar returns b with leading spaces and tabs removed. 399// RFC 822 defines: 400// LWSP-char = SPACE / HTAB 401func skipLWSPChar(b []byte) []byte { 402 for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') { 403 b = b[1:] 404 } 405 return b 406} 407