1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package mime
6
7import (
8	"bytes"
9	"errors"
10	"fmt"
11	"sort"
12	"strings"
13	"unicode"
14)
15
16// FormatMediaType serializes mediatype t and the parameters
17// param as a media type conforming to RFC 2045 and RFC 2616.
18// The type and parameter names are written in lower-case.
19// When any of the arguments result in a standard violation then
20// FormatMediaType returns the empty string.
21func FormatMediaType(t string, param map[string]string) string {
22	var b bytes.Buffer
23	if slash := strings.Index(t, "/"); slash == -1 {
24		if !isToken(t) {
25			return ""
26		}
27		b.WriteString(strings.ToLower(t))
28	} else {
29		major, sub := t[:slash], t[slash+1:]
30		if !isToken(major) || !isToken(sub) {
31			return ""
32		}
33		b.WriteString(strings.ToLower(major))
34		b.WriteByte('/')
35		b.WriteString(strings.ToLower(sub))
36	}
37
38	attrs := make([]string, 0, len(param))
39	for a := range param {
40		attrs = append(attrs, a)
41	}
42	sort.Strings(attrs)
43
44	for _, attribute := range attrs {
45		value := param[attribute]
46		b.WriteByte(';')
47		b.WriteByte(' ')
48		if !isToken(attribute) {
49			return ""
50		}
51		b.WriteString(strings.ToLower(attribute))
52		b.WriteByte('=')
53		if isToken(value) {
54			b.WriteString(value)
55			continue
56		}
57
58		b.WriteByte('"')
59		offset := 0
60		for index, character := range value {
61			if character == '"' || character == '\\' {
62				b.WriteString(value[offset:index])
63				offset = index
64				b.WriteByte('\\')
65			}
66			if character&0x80 != 0 {
67				return ""
68			}
69		}
70		b.WriteString(value[offset:])
71		b.WriteByte('"')
72	}
73	return b.String()
74}
75
76func checkMediaTypeDisposition(s string) error {
77	typ, rest := consumeToken(s)
78	if typ == "" {
79		return errors.New("mime: no media type")
80	}
81	if rest == "" {
82		return nil
83	}
84	if !strings.HasPrefix(rest, "/") {
85		return errors.New("mime: expected slash after first token")
86	}
87	subtype, rest := consumeToken(rest[1:])
88	if subtype == "" {
89		return errors.New("mime: expected token after slash")
90	}
91	if rest != "" {
92		return errors.New("mime: unexpected content after media subtype")
93	}
94	return nil
95}
96
97// ErrInvalidMediaParameter is returned by ParseMediaType if
98// the media type value was found but there was an error parsing
99// the optional parameters
100var ErrInvalidMediaParameter = errors.New("mime: invalid media parameter")
101
102// ParseMediaType parses a media type value and any optional
103// parameters, per RFC 1521.  Media types are the values in
104// Content-Type and Content-Disposition headers (RFC 2183).
105// On success, ParseMediaType returns the media type converted
106// to lowercase and trimmed of white space and a non-nil map.
107// If there is an error parsing the optional parameter,
108// the media type will be returned along with the error
109// ErrInvalidMediaParameter.
110// The returned map, params, maps from the lowercase
111// attribute to the attribute value with its case preserved.
112func ParseMediaType(v string) (mediatype string, params map[string]string, err error) {
113	i := strings.Index(v, ";")
114	if i == -1 {
115		i = len(v)
116	}
117	mediatype = strings.TrimSpace(strings.ToLower(v[0:i]))
118
119	err = checkMediaTypeDisposition(mediatype)
120	if err != nil {
121		return "", nil, err
122	}
123
124	params = make(map[string]string)
125
126	// Map of base parameter name -> parameter name -> value
127	// for parameters containing a '*' character.
128	// Lazily initialized.
129	var continuation map[string]map[string]string
130
131	v = v[i:]
132	for len(v) > 0 {
133		v = strings.TrimLeftFunc(v, unicode.IsSpace)
134		if len(v) == 0 {
135			break
136		}
137		key, value, rest := consumeMediaParam(v)
138		if key == "" {
139			if strings.TrimSpace(rest) == ";" {
140				// Ignore trailing semicolons.
141				// Not an error.
142				return
143			}
144			// Parse error.
145			return mediatype, nil, ErrInvalidMediaParameter
146		}
147
148		pmap := params
149		if idx := strings.Index(key, "*"); idx != -1 {
150			baseName := key[:idx]
151			if continuation == nil {
152				continuation = make(map[string]map[string]string)
153			}
154			var ok bool
155			if pmap, ok = continuation[baseName]; !ok {
156				continuation[baseName] = make(map[string]string)
157				pmap = continuation[baseName]
158			}
159		}
160		if _, exists := pmap[key]; exists {
161			// Duplicate parameter name is bogus.
162			return "", nil, errors.New("mime: duplicate parameter name")
163		}
164		pmap[key] = value
165		v = rest
166	}
167
168	// Stitch together any continuations or things with stars
169	// (i.e. RFC 2231 things with stars: "foo*0" or "foo*")
170	var buf bytes.Buffer
171	for key, pieceMap := range continuation {
172		singlePartKey := key + "*"
173		if v, ok := pieceMap[singlePartKey]; ok {
174			if decv, ok := decode2231Enc(v); ok {
175				params[key] = decv
176			}
177			continue
178		}
179
180		buf.Reset()
181		valid := false
182		for n := 0; ; n++ {
183			simplePart := fmt.Sprintf("%s*%d", key, n)
184			if v, ok := pieceMap[simplePart]; ok {
185				valid = true
186				buf.WriteString(v)
187				continue
188			}
189			encodedPart := simplePart + "*"
190			v, ok := pieceMap[encodedPart]
191			if !ok {
192				break
193			}
194			valid = true
195			if n == 0 {
196				if decv, ok := decode2231Enc(v); ok {
197					buf.WriteString(decv)
198				}
199			} else {
200				decv, _ := percentHexUnescape(v)
201				buf.WriteString(decv)
202			}
203		}
204		if valid {
205			params[key] = buf.String()
206		}
207	}
208
209	return
210}
211
212func decode2231Enc(v string) (string, bool) {
213	sv := strings.SplitN(v, "'", 3)
214	if len(sv) != 3 {
215		return "", false
216	}
217	// TODO: ignoring lang in sv[1] for now. If anybody needs it we'll
218	// need to decide how to expose it in the API. But I'm not sure
219	// anybody uses it in practice.
220	charset := strings.ToLower(sv[0])
221	if len(charset) == 0 {
222		return "", false
223	}
224	if charset != "us-ascii" && charset != "utf-8" {
225		// TODO: unsupported encoding
226		return "", false
227	}
228	encv, err := percentHexUnescape(sv[2])
229	if err != nil {
230		return "", false
231	}
232	return encv, true
233}
234
235func isNotTokenChar(r rune) bool {
236	return !isTokenChar(r)
237}
238
239// consumeToken consumes a token from the beginning of provided
240// string, per RFC 2045 section 5.1 (referenced from 2183), and return
241// the token consumed and the rest of the string. Returns ("", v) on
242// failure to consume at least one character.
243func consumeToken(v string) (token, rest string) {
244	notPos := strings.IndexFunc(v, isNotTokenChar)
245	if notPos == -1 {
246		return v, ""
247	}
248	if notPos == 0 {
249		return "", v
250	}
251	return v[0:notPos], v[notPos:]
252}
253
254// consumeValue consumes a "value" per RFC 2045, where a value is
255// either a 'token' or a 'quoted-string'.  On success, consumeValue
256// returns the value consumed (and de-quoted/escaped, if a
257// quoted-string) and the rest of the string. On failure, returns
258// ("", v).
259func consumeValue(v string) (value, rest string) {
260	if v == "" {
261		return
262	}
263	if v[0] != '"' {
264		return consumeToken(v)
265	}
266
267	// parse a quoted-string
268	buffer := new(bytes.Buffer)
269	for i := 1; i < len(v); i++ {
270		r := v[i]
271		if r == '"' {
272			return buffer.String(), v[i+1:]
273		}
274		// When MSIE sends a full file path (in "intranet mode"), it does not
275		// escape backslashes: "C:\dev\go\foo.txt", not "C:\\dev\\go\\foo.txt".
276		//
277		// No known MIME generators emit unnecessary backslash escapes
278		// for simple token characters like numbers and letters.
279		//
280		// If we see an unnecessary backslash escape, assume it is from MSIE
281		// and intended as a literal backslash. This makes Go servers deal better
282		// with MSIE without affecting the way they handle conforming MIME
283		// generators.
284		if r == '\\' && i+1 < len(v) && !isTokenChar(rune(v[i+1])) {
285			buffer.WriteByte(v[i+1])
286			i++
287			continue
288		}
289		if r == '\r' || r == '\n' {
290			return "", v
291		}
292		buffer.WriteByte(v[i])
293	}
294	// Did not find end quote.
295	return "", v
296}
297
298func consumeMediaParam(v string) (param, value, rest string) {
299	rest = strings.TrimLeftFunc(v, unicode.IsSpace)
300	if !strings.HasPrefix(rest, ";") {
301		return "", "", v
302	}
303
304	rest = rest[1:] // consume semicolon
305	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
306	param, rest = consumeToken(rest)
307	param = strings.ToLower(param)
308	if param == "" {
309		return "", "", v
310	}
311
312	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
313	if !strings.HasPrefix(rest, "=") {
314		return "", "", v
315	}
316	rest = rest[1:] // consume equals sign
317	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
318	value, rest2 := consumeValue(rest)
319	if value == "" && rest2 == rest {
320		return "", "", v
321	}
322	rest = rest2
323	return param, value, rest
324}
325
326func percentHexUnescape(s string) (string, error) {
327	// Count %, check that they're well-formed.
328	percents := 0
329	for i := 0; i < len(s); {
330		if s[i] != '%' {
331			i++
332			continue
333		}
334		percents++
335		if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
336			s = s[i:]
337			if len(s) > 3 {
338				s = s[0:3]
339			}
340			return "", fmt.Errorf("mime: bogus characters after %%: %q", s)
341		}
342		i += 3
343	}
344	if percents == 0 {
345		return s, nil
346	}
347
348	t := make([]byte, len(s)-2*percents)
349	j := 0
350	for i := 0; i < len(s); {
351		switch s[i] {
352		case '%':
353			t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
354			j++
355			i += 3
356		default:
357			t[j] = s[i]
358			j++
359			i++
360		}
361	}
362	return string(t), nil
363}
364
365func ishex(c byte) bool {
366	switch {
367	case '0' <= c && c <= '9':
368		return true
369	case 'a' <= c && c <= 'f':
370		return true
371	case 'A' <= c && c <= 'F':
372		return true
373	}
374	return false
375}
376
377func unhex(c byte) byte {
378	switch {
379	case '0' <= c && c <= '9':
380		return c - '0'
381	case 'a' <= c && c <= 'f':
382		return c - 'a' + 10
383	case 'A' <= c && c <= 'F':
384		return c - 'A' + 10
385	}
386	return 0
387}
388