1// Copyright 2010 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4// 5 6/* 7Package multipart implements MIME multipart parsing, as defined in RFC 82046. 9 10The implementation is sufficient for HTTP (RFC 2388) and the multipart 11bodies generated by popular browsers. 12*/ 13package multipart 14 15import ( 16 "bufio" 17 "bytes" 18 "fmt" 19 "io" 20 "io/ioutil" 21 "mime" 22 "net/textproto" 23) 24 25var emptyParams = make(map[string]string) 26 27// A Part represents a single part in a multipart body. 28type Part struct { 29 // The headers of the body, if any, with the keys canonicalized 30 // in the same fashion that the Go http.Request headers are. 31 // i.e. "foo-bar" changes case to "Foo-Bar" 32 Header textproto.MIMEHeader 33 34 buffer *bytes.Buffer 35 mr *Reader 36 bytesRead int 37 38 disposition string 39 dispositionParams map[string]string 40 41 // r is either a reader directly reading from mr, or it's a 42 // wrapper around such a reader, decoding the 43 // Content-Transfer-Encoding 44 r io.Reader 45} 46 47// FormName returns the name parameter if p has a Content-Disposition 48// of type "form-data". Otherwise it returns the empty string. 49func (p *Part) FormName() string { 50 // See http://tools.ietf.org/html/rfc2183 section 2 for EBNF 51 // of Content-Disposition value format. 52 if p.dispositionParams == nil { 53 p.parseContentDisposition() 54 } 55 if p.disposition != "form-data" { 56 return "" 57 } 58 return p.dispositionParams["name"] 59} 60 61// FileName returns the filename parameter of the Part's 62// Content-Disposition header. 63func (p *Part) FileName() string { 64 if p.dispositionParams == nil { 65 p.parseContentDisposition() 66 } 67 return p.dispositionParams["filename"] 68} 69 70func (p *Part) parseContentDisposition() { 71 v := p.Header.Get("Content-Disposition") 72 var err error 73 p.disposition, p.dispositionParams, err = mime.ParseMediaType(v) 74 if err != nil { 75 p.dispositionParams = emptyParams 76 } 77} 78 79// NewReader creates a new multipart Reader reading from reader using the 80// given MIME boundary. 81func NewReader(reader io.Reader, boundary string) *Reader { 82 b := []byte("\r\n--" + boundary + "--") 83 return &Reader{ 84 bufReader: bufio.NewReader(reader), 85 86 nl: b[:2], 87 nlDashBoundary: b[:len(b)-2], 88 dashBoundaryDash: b[2:], 89 dashBoundary: b[2 : len(b)-2], 90 } 91} 92 93func newPart(mr *Reader) (*Part, error) { 94 bp := &Part{ 95 Header: make(map[string][]string), 96 mr: mr, 97 buffer: new(bytes.Buffer), 98 } 99 if err := bp.populateHeaders(); err != nil { 100 return nil, err 101 } 102 bp.r = partReader{bp} 103 const cte = "Content-Transfer-Encoding" 104 if bp.Header.Get(cte) == "quoted-printable" { 105 bp.Header.Del(cte) 106 bp.r = newQuotedPrintableReader(bp.r) 107 } 108 return bp, nil 109} 110 111func (bp *Part) populateHeaders() error { 112 r := textproto.NewReader(bp.mr.bufReader) 113 header, err := r.ReadMIMEHeader() 114 if err == nil { 115 bp.Header = header 116 } 117 return err 118} 119 120// Read reads the body of a part, after its headers and before the 121// next part (if any) begins. 122func (p *Part) Read(d []byte) (n int, err error) { 123 return p.r.Read(d) 124} 125 126// partReader implements io.Reader by reading raw bytes directly from the 127// wrapped *Part, without doing any Transfer-Encoding decoding. 128type partReader struct { 129 p *Part 130} 131 132func (pr partReader) Read(d []byte) (n int, err error) { 133 p := pr.p 134 defer func() { 135 p.bytesRead += n 136 }() 137 if p.buffer.Len() >= len(d) { 138 // Internal buffer of unconsumed data is large enough for 139 // the read request. No need to parse more at the moment. 140 return p.buffer.Read(d) 141 } 142 peek, err := p.mr.bufReader.Peek(4096) // TODO(bradfitz): add buffer size accessor 143 144 // Look for an immediate empty part without a leading \r\n 145 // before the boundary separator. Some MIME code makes empty 146 // parts like this. Most browsers, however, write the \r\n 147 // before the subsequent boundary even for empty parts and 148 // won't hit this path. 149 if p.bytesRead == 0 && p.mr.peekBufferIsEmptyPart(peek) { 150 return 0, io.EOF 151 } 152 unexpectedEOF := err == io.EOF 153 if err != nil && !unexpectedEOF { 154 return 0, fmt.Errorf("multipart: Part Read: %v", err) 155 } 156 if peek == nil { 157 panic("nil peek buf") 158 } 159 160 // Search the peek buffer for "\r\n--boundary". If found, 161 // consume everything up to the boundary. If not, consume only 162 // as much of the peek buffer as cannot hold the boundary 163 // string. 164 nCopy := 0 165 foundBoundary := false 166 if idx := bytes.Index(peek, p.mr.nlDashBoundary); idx != -1 { 167 nCopy = idx 168 foundBoundary = true 169 } else if safeCount := len(peek) - len(p.mr.nlDashBoundary); safeCount > 0 { 170 nCopy = safeCount 171 } else if unexpectedEOF { 172 // If we've run out of peek buffer and the boundary 173 // wasn't found (and can't possibly fit), we must have 174 // hit the end of the file unexpectedly. 175 return 0, io.ErrUnexpectedEOF 176 } 177 if nCopy > 0 { 178 if _, err := io.CopyN(p.buffer, p.mr.bufReader, int64(nCopy)); err != nil { 179 return 0, err 180 } 181 } 182 n, err = p.buffer.Read(d) 183 if err == io.EOF && !foundBoundary { 184 // If the boundary hasn't been reached there's more to 185 // read, so don't pass through an EOF from the buffer 186 err = nil 187 } 188 return 189} 190 191func (p *Part) Close() error { 192 io.Copy(ioutil.Discard, p) 193 return nil 194} 195 196// Reader is an iterator over parts in a MIME multipart body. 197// Reader's underlying parser consumes its input as needed. Seeking 198// isn't supported. 199type Reader struct { 200 bufReader *bufio.Reader 201 202 currentPart *Part 203 partsRead int 204 205 nl []byte // "\r\n" or "\n" (set after seeing first boundary line) 206 nlDashBoundary []byte // nl + "--boundary" 207 dashBoundaryDash []byte // "--boundary--" 208 dashBoundary []byte // "--boundary" 209} 210 211// NextPart returns the next part in the multipart or an error. 212// When there are no more parts, the error io.EOF is returned. 213func (r *Reader) NextPart() (*Part, error) { 214 if r.currentPart != nil { 215 r.currentPart.Close() 216 } 217 218 expectNewPart := false 219 for { 220 line, err := r.bufReader.ReadSlice('\n') 221 if err == io.EOF && r.isFinalBoundary(line) { 222 // If the buffer ends in "--boundary--" without the 223 // trailing "\r\n", ReadSlice will return an error 224 // (since it's missing the '\n'), but this is a valid 225 // multipart EOF so we need to return io.EOF instead of 226 // a fmt-wrapped one. 227 return nil, io.EOF 228 } 229 if err != nil { 230 return nil, fmt.Errorf("multipart: NextPart: %v", err) 231 } 232 233 if r.isBoundaryDelimiterLine(line) { 234 r.partsRead++ 235 bp, err := newPart(r) 236 if err != nil { 237 return nil, err 238 } 239 r.currentPart = bp 240 return bp, nil 241 } 242 243 if r.isFinalBoundary(line) { 244 // Expected EOF 245 return nil, io.EOF 246 } 247 248 if expectNewPart { 249 return nil, fmt.Errorf("multipart: expecting a new Part; got line %q", string(line)) 250 } 251 252 if r.partsRead == 0 { 253 // skip line 254 continue 255 } 256 257 // Consume the "\n" or "\r\n" separator between the 258 // body of the previous part and the boundary line we 259 // now expect will follow. (either a new part or the 260 // end boundary) 261 if bytes.Equal(line, r.nl) { 262 expectNewPart = true 263 continue 264 } 265 266 return nil, fmt.Errorf("multipart: unexpected line in Next(): %q", line) 267 } 268 panic("unreachable") 269} 270 271// isFinalBoundary returns whether line is the final boundary line 272// indiciating that all parts are over. 273// It matches `^--boundary--[ \t]*(\r\n)?$` 274func (mr *Reader) isFinalBoundary(line []byte) bool { 275 if !bytes.HasPrefix(line, mr.dashBoundaryDash) { 276 return false 277 } 278 rest := line[len(mr.dashBoundaryDash):] 279 rest = skipLWSPChar(rest) 280 return len(rest) == 0 || bytes.Equal(rest, mr.nl) 281} 282 283func (mr *Reader) isBoundaryDelimiterLine(line []byte) (ret bool) { 284 // http://tools.ietf.org/html/rfc2046#section-5.1 285 // The boundary delimiter line is then defined as a line 286 // consisting entirely of two hyphen characters ("-", 287 // decimal value 45) followed by the boundary parameter 288 // value from the Content-Type header field, optional linear 289 // whitespace, and a terminating CRLF. 290 if !bytes.HasPrefix(line, mr.dashBoundary) { 291 return false 292 } 293 rest := line[len(mr.dashBoundary):] 294 rest = skipLWSPChar(rest) 295 296 // On the first part, see our lines are ending in \n instead of \r\n 297 // and switch into that mode if so. This is a violation of the spec, 298 // but occurs in practice. 299 if mr.partsRead == 0 && len(rest) == 1 && rest[0] == '\n' { 300 mr.nl = mr.nl[1:] 301 mr.nlDashBoundary = mr.nlDashBoundary[1:] 302 } 303 return bytes.Equal(rest, mr.nl) 304} 305 306// peekBufferIsEmptyPart returns whether the provided peek-ahead 307// buffer represents an empty part. This is only called if we've not 308// already read any bytes in this part and checks for the case of MIME 309// software not writing the \r\n on empty parts. Some does, some 310// doesn't. 311// 312// This checks that what follows the "--boundary" is actually the end 313// ("--boundary--" with optional whitespace) or optional whitespace 314// and then a newline, so we don't catch "--boundaryFAKE", in which 315// case the whole line is part of the data. 316func (mr *Reader) peekBufferIsEmptyPart(peek []byte) bool { 317 // End of parts case. 318 // Test whether peek matches `^--boundary--[ \t]*(?:\r\n|$)` 319 if bytes.HasPrefix(peek, mr.dashBoundaryDash) { 320 rest := peek[len(mr.dashBoundaryDash):] 321 rest = skipLWSPChar(rest) 322 return bytes.HasPrefix(rest, mr.nl) || len(rest) == 0 323 } 324 if !bytes.HasPrefix(peek, mr.dashBoundary) { 325 return false 326 } 327 // Test whether rest matches `^[ \t]*\r\n`) 328 rest := peek[len(mr.dashBoundary):] 329 rest = skipLWSPChar(rest) 330 return bytes.HasPrefix(rest, mr.nl) 331} 332 333// skipLWSPChar returns b with leading spaces and tabs removed. 334// RFC 822 defines: 335// LWSP-char = SPACE / HTAB 336func skipLWSPChar(b []byte) []byte { 337 for len(b) > 0 && (b[0] == ' ' || b[0] == '\t') { 338 b = b[1:] 339 } 340 return b 341} 342