1// Copyright 2010 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package mime 6 7import ( 8 "bytes" 9 "errors" 10 "fmt" 11 "strings" 12 "unicode" 13) 14 15// FormatMediaType serializes mediatype t and the parameters 16// param as a media type conforming to RFC 2045 and RFC 2616. 17// The type and parameter names are written in lower-case. 18// When any of the arguments result in a standard violation then 19// FormatMediaType returns the empty string. 20func FormatMediaType(t string, param map[string]string) string { 21 slash := strings.Index(t, "/") 22 if slash == -1 { 23 return "" 24 } 25 major, sub := t[:slash], t[slash+1:] 26 if !isToken(major) || !isToken(sub) { 27 return "" 28 } 29 var b bytes.Buffer 30 b.WriteString(strings.ToLower(major)) 31 b.WriteByte('/') 32 b.WriteString(strings.ToLower(sub)) 33 34 for attribute, value := range param { 35 b.WriteByte(';') 36 b.WriteByte(' ') 37 if !isToken(attribute) { 38 return "" 39 } 40 b.WriteString(strings.ToLower(attribute)) 41 b.WriteByte('=') 42 if isToken(value) { 43 b.WriteString(value) 44 continue 45 } 46 47 b.WriteByte('"') 48 offset := 0 49 for index, character := range value { 50 if character == '"' || character == '\r' { 51 b.WriteString(value[offset:index]) 52 offset = index 53 b.WriteByte('\\') 54 } 55 if character&0x80 != 0 { 56 return "" 57 } 58 } 59 b.WriteString(value[offset:]) 60 b.WriteByte('"') 61 } 62 return b.String() 63} 64 65func checkMediaTypeDisposition(s string) error { 66 typ, rest := consumeToken(s) 67 if typ == "" { 68 return errors.New("mime: no media type") 69 } 70 if rest == "" { 71 return nil 72 } 73 if !strings.HasPrefix(rest, "/") { 74 return errors.New("mime: expected slash after first token") 75 } 76 subtype, rest := consumeToken(rest[1:]) 77 if subtype == "" { 78 return errors.New("mime: expected token after slash") 79 } 80 if rest != "" { 81 return errors.New("mime: unexpected content after media subtype") 82 } 83 return nil 84} 85 86// ParseMediaType parses a media type value and any optional 87// parameters, per RFC 1521. Media types are the values in 88// Content-Type and Content-Disposition headers (RFC 2183). 89// On success, ParseMediaType returns the media type converted 90// to lowercase and trimmed of white space and a non-nil map. 91// The returned map, params, maps from the lowercase 92// attribute to the attribute value with its case preserved. 93func ParseMediaType(v string) (mediatype string, params map[string]string, err error) { 94 i := strings.Index(v, ";") 95 if i == -1 { 96 i = len(v) 97 } 98 mediatype = strings.TrimSpace(strings.ToLower(v[0:i])) 99 100 err = checkMediaTypeDisposition(mediatype) 101 if err != nil { 102 return "", nil, err 103 } 104 105 params = make(map[string]string) 106 107 // Map of base parameter name -> parameter name -> value 108 // for parameters containing a '*' character. 109 // Lazily initialized. 110 var continuation map[string]map[string]string 111 112 v = v[i:] 113 for len(v) > 0 { 114 v = strings.TrimLeftFunc(v, unicode.IsSpace) 115 if len(v) == 0 { 116 break 117 } 118 key, value, rest := consumeMediaParam(v) 119 if key == "" { 120 if strings.TrimSpace(rest) == ";" { 121 // Ignore trailing semicolons. 122 // Not an error. 123 return 124 } 125 // Parse error. 126 return "", nil, errors.New("mime: invalid media parameter") 127 } 128 129 pmap := params 130 if idx := strings.Index(key, "*"); idx != -1 { 131 baseName := key[:idx] 132 if continuation == nil { 133 continuation = make(map[string]map[string]string) 134 } 135 var ok bool 136 if pmap, ok = continuation[baseName]; !ok { 137 continuation[baseName] = make(map[string]string) 138 pmap = continuation[baseName] 139 } 140 } 141 if _, exists := pmap[key]; exists { 142 // Duplicate parameter name is bogus. 143 return "", nil, errors.New("mime: duplicate parameter name") 144 } 145 pmap[key] = value 146 v = rest 147 } 148 149 // Stitch together any continuations or things with stars 150 // (i.e. RFC 2231 things with stars: "foo*0" or "foo*") 151 var buf bytes.Buffer 152 for key, pieceMap := range continuation { 153 singlePartKey := key + "*" 154 if v, ok := pieceMap[singlePartKey]; ok { 155 decv := decode2231Enc(v) 156 params[key] = decv 157 continue 158 } 159 160 buf.Reset() 161 valid := false 162 for n := 0; ; n++ { 163 simplePart := fmt.Sprintf("%s*%d", key, n) 164 if v, ok := pieceMap[simplePart]; ok { 165 valid = true 166 buf.WriteString(v) 167 continue 168 } 169 encodedPart := simplePart + "*" 170 if v, ok := pieceMap[encodedPart]; ok { 171 valid = true 172 if n == 0 { 173 buf.WriteString(decode2231Enc(v)) 174 } else { 175 decv, _ := percentHexUnescape(v) 176 buf.WriteString(decv) 177 } 178 } else { 179 break 180 } 181 } 182 if valid { 183 params[key] = buf.String() 184 } 185 } 186 187 return 188} 189 190func decode2231Enc(v string) string { 191 sv := strings.SplitN(v, "'", 3) 192 if len(sv) != 3 { 193 return "" 194 } 195 // TODO: ignoring lang in sv[1] for now. If anybody needs it we'll 196 // need to decide how to expose it in the API. But I'm not sure 197 // anybody uses it in practice. 198 charset := strings.ToLower(sv[0]) 199 if charset != "us-ascii" && charset != "utf-8" { 200 // TODO: unsupported encoding 201 return "" 202 } 203 encv, _ := percentHexUnescape(sv[2]) 204 return encv 205} 206 207func isNotTokenChar(r rune) bool { 208 return !isTokenChar(r) 209} 210 211// consumeToken consumes a token from the beginning of provided 212// string, per RFC 2045 section 5.1 (referenced from 2183), and return 213// the token consumed and the rest of the string. Returns ("", v) on 214// failure to consume at least one character. 215func consumeToken(v string) (token, rest string) { 216 notPos := strings.IndexFunc(v, isNotTokenChar) 217 if notPos == -1 { 218 return v, "" 219 } 220 if notPos == 0 { 221 return "", v 222 } 223 return v[0:notPos], v[notPos:] 224} 225 226// consumeValue consumes a "value" per RFC 2045, where a value is 227// either a 'token' or a 'quoted-string'. On success, consumeValue 228// returns the value consumed (and de-quoted/escaped, if a 229// quoted-string) and the rest of the string. On failure, returns 230// ("", v). 231func consumeValue(v string) (value, rest string) { 232 if !strings.HasPrefix(v, `"`) && !strings.HasPrefix(v, `'`) { 233 return consumeToken(v) 234 } 235 236 leadQuote := rune(v[0]) 237 238 // parse a quoted-string 239 rest = v[1:] // consume the leading quote 240 buffer := new(bytes.Buffer) 241 var idx int 242 var r rune 243 var nextIsLiteral bool 244 for idx, r = range rest { 245 switch { 246 case nextIsLiteral: 247 buffer.WriteRune(r) 248 nextIsLiteral = false 249 case r == leadQuote: 250 return buffer.String(), rest[idx+1:] 251 case r == '\\': 252 nextIsLiteral = true 253 case r != '\r' && r != '\n': 254 buffer.WriteRune(r) 255 default: 256 return "", v 257 } 258 } 259 return "", v 260} 261 262func consumeMediaParam(v string) (param, value, rest string) { 263 rest = strings.TrimLeftFunc(v, unicode.IsSpace) 264 if !strings.HasPrefix(rest, ";") { 265 return "", "", v 266 } 267 268 rest = rest[1:] // consume semicolon 269 rest = strings.TrimLeftFunc(rest, unicode.IsSpace) 270 param, rest = consumeToken(rest) 271 param = strings.ToLower(param) 272 if param == "" { 273 return "", "", v 274 } 275 276 rest = strings.TrimLeftFunc(rest, unicode.IsSpace) 277 if !strings.HasPrefix(rest, "=") { 278 return "", "", v 279 } 280 rest = rest[1:] // consume equals sign 281 rest = strings.TrimLeftFunc(rest, unicode.IsSpace) 282 value, rest = consumeValue(rest) 283 if value == "" { 284 return "", "", v 285 } 286 return param, value, rest 287} 288 289func percentHexUnescape(s string) (string, error) { 290 // Count %, check that they're well-formed. 291 percents := 0 292 for i := 0; i < len(s); { 293 if s[i] != '%' { 294 i++ 295 continue 296 } 297 percents++ 298 if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { 299 s = s[i:] 300 if len(s) > 3 { 301 s = s[0:3] 302 } 303 return "", fmt.Errorf("mime: bogus characters after %%: %q", s) 304 } 305 i += 3 306 } 307 if percents == 0 { 308 return s, nil 309 } 310 311 t := make([]byte, len(s)-2*percents) 312 j := 0 313 for i := 0; i < len(s); { 314 switch s[i] { 315 case '%': 316 t[j] = unhex(s[i+1])<<4 | unhex(s[i+2]) 317 j++ 318 i += 3 319 default: 320 t[j] = s[i] 321 j++ 322 i++ 323 } 324 } 325 return string(t), nil 326} 327 328func ishex(c byte) bool { 329 switch { 330 case '0' <= c && c <= '9': 331 return true 332 case 'a' <= c && c <= 'f': 333 return true 334 case 'A' <= c && c <= 'F': 335 return true 336 } 337 return false 338} 339 340func unhex(c byte) byte { 341 switch { 342 case '0' <= c && c <= '9': 343 return c - '0' 344 case 'a' <= c && c <= 'f': 345 return c - 'a' + 10 346 case 'A' <= c && c <= 'F': 347 return c - 'A' + 10 348 } 349 return 0 350} 351