1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package mime
6
7import (
8	"bytes"
9	"errors"
10	"fmt"
11	"strings"
12	"unicode"
13)
14
15// FormatMediaType serializes mediatype t and the parameters
16// param as a media type conforming to RFC 2045 and RFC 2616.
17// The type and parameter names are written in lower-case.
18// When any of the arguments result in a standard violation then
19// FormatMediaType returns the empty string.
20func FormatMediaType(t string, param map[string]string) string {
21	slash := strings.Index(t, "/")
22	if slash == -1 {
23		return ""
24	}
25	major, sub := t[:slash], t[slash+1:]
26	if !isToken(major) || !isToken(sub) {
27		return ""
28	}
29	var b bytes.Buffer
30	b.WriteString(strings.ToLower(major))
31	b.WriteByte('/')
32	b.WriteString(strings.ToLower(sub))
33
34	for attribute, value := range param {
35		b.WriteByte(';')
36		b.WriteByte(' ')
37		if !isToken(attribute) {
38			return ""
39		}
40		b.WriteString(strings.ToLower(attribute))
41		b.WriteByte('=')
42		if isToken(value) {
43			b.WriteString(value)
44			continue
45		}
46
47		b.WriteByte('"')
48		offset := 0
49		for index, character := range value {
50			if character == '"' || character == '\r' {
51				b.WriteString(value[offset:index])
52				offset = index
53				b.WriteByte('\\')
54			}
55			if character&0x80 != 0 {
56				return ""
57			}
58		}
59		b.WriteString(value[offset:])
60		b.WriteByte('"')
61	}
62	return b.String()
63}
64
65func checkMediaTypeDisposition(s string) error {
66	typ, rest := consumeToken(s)
67	if typ == "" {
68		return errors.New("mime: no media type")
69	}
70	if rest == "" {
71		return nil
72	}
73	if !strings.HasPrefix(rest, "/") {
74		return errors.New("mime: expected slash after first token")
75	}
76	subtype, rest := consumeToken(rest[1:])
77	if subtype == "" {
78		return errors.New("mime: expected token after slash")
79	}
80	if rest != "" {
81		return errors.New("mime: unexpected content after media subtype")
82	}
83	return nil
84}
85
86// ParseMediaType parses a media type value and any optional
87// parameters, per RFC 1521.  Media types are the values in
88// Content-Type and Content-Disposition headers (RFC 2183).
89// On success, ParseMediaType returns the media type converted
90// to lowercase and trimmed of white space and a non-nil map.
91// The returned map, params, maps from the lowercase
92// attribute to the attribute value with its case preserved.
93func ParseMediaType(v string) (mediatype string, params map[string]string, err error) {
94	i := strings.Index(v, ";")
95	if i == -1 {
96		i = len(v)
97	}
98	mediatype = strings.TrimSpace(strings.ToLower(v[0:i]))
99
100	err = checkMediaTypeDisposition(mediatype)
101	if err != nil {
102		return "", nil, err
103	}
104
105	params = make(map[string]string)
106
107	// Map of base parameter name -> parameter name -> value
108	// for parameters containing a '*' character.
109	// Lazily initialized.
110	var continuation map[string]map[string]string
111
112	v = v[i:]
113	for len(v) > 0 {
114		v = strings.TrimLeftFunc(v, unicode.IsSpace)
115		if len(v) == 0 {
116			break
117		}
118		key, value, rest := consumeMediaParam(v)
119		if key == "" {
120			if strings.TrimSpace(rest) == ";" {
121				// Ignore trailing semicolons.
122				// Not an error.
123				return
124			}
125			// Parse error.
126			return "", nil, errors.New("mime: invalid media parameter")
127		}
128
129		pmap := params
130		if idx := strings.Index(key, "*"); idx != -1 {
131			baseName := key[:idx]
132			if continuation == nil {
133				continuation = make(map[string]map[string]string)
134			}
135			var ok bool
136			if pmap, ok = continuation[baseName]; !ok {
137				continuation[baseName] = make(map[string]string)
138				pmap = continuation[baseName]
139			}
140		}
141		if _, exists := pmap[key]; exists {
142			// Duplicate parameter name is bogus.
143			return "", nil, errors.New("mime: duplicate parameter name")
144		}
145		pmap[key] = value
146		v = rest
147	}
148
149	// Stitch together any continuations or things with stars
150	// (i.e. RFC 2231 things with stars: "foo*0" or "foo*")
151	var buf bytes.Buffer
152	for key, pieceMap := range continuation {
153		singlePartKey := key + "*"
154		if v, ok := pieceMap[singlePartKey]; ok {
155			decv := decode2231Enc(v)
156			params[key] = decv
157			continue
158		}
159
160		buf.Reset()
161		valid := false
162		for n := 0; ; n++ {
163			simplePart := fmt.Sprintf("%s*%d", key, n)
164			if v, ok := pieceMap[simplePart]; ok {
165				valid = true
166				buf.WriteString(v)
167				continue
168			}
169			encodedPart := simplePart + "*"
170			if v, ok := pieceMap[encodedPart]; ok {
171				valid = true
172				if n == 0 {
173					buf.WriteString(decode2231Enc(v))
174				} else {
175					decv, _ := percentHexUnescape(v)
176					buf.WriteString(decv)
177				}
178			} else {
179				break
180			}
181		}
182		if valid {
183			params[key] = buf.String()
184		}
185	}
186
187	return
188}
189
190func decode2231Enc(v string) string {
191	sv := strings.SplitN(v, "'", 3)
192	if len(sv) != 3 {
193		return ""
194	}
195	// TODO: ignoring lang in sv[1] for now. If anybody needs it we'll
196	// need to decide how to expose it in the API. But I'm not sure
197	// anybody uses it in practice.
198	charset := strings.ToLower(sv[0])
199	if charset != "us-ascii" && charset != "utf-8" {
200		// TODO: unsupported encoding
201		return ""
202	}
203	encv, _ := percentHexUnescape(sv[2])
204	return encv
205}
206
207func isNotTokenChar(r rune) bool {
208	return !isTokenChar(r)
209}
210
211// consumeToken consumes a token from the beginning of provided
212// string, per RFC 2045 section 5.1 (referenced from 2183), and return
213// the token consumed and the rest of the string.  Returns ("", v) on
214// failure to consume at least one character.
215func consumeToken(v string) (token, rest string) {
216	notPos := strings.IndexFunc(v, isNotTokenChar)
217	if notPos == -1 {
218		return v, ""
219	}
220	if notPos == 0 {
221		return "", v
222	}
223	return v[0:notPos], v[notPos:]
224}
225
226// consumeValue consumes a "value" per RFC 2045, where a value is
227// either a 'token' or a 'quoted-string'.  On success, consumeValue
228// returns the value consumed (and de-quoted/escaped, if a
229// quoted-string) and the rest of the string.  On failure, returns
230// ("", v).
231func consumeValue(v string) (value, rest string) {
232	if !strings.HasPrefix(v, `"`) && !strings.HasPrefix(v, `'`) {
233		return consumeToken(v)
234	}
235
236	leadQuote := rune(v[0])
237
238	// parse a quoted-string
239	rest = v[1:] // consume the leading quote
240	buffer := new(bytes.Buffer)
241	var idx int
242	var r rune
243	var nextIsLiteral bool
244	for idx, r = range rest {
245		switch {
246		case nextIsLiteral:
247			buffer.WriteRune(r)
248			nextIsLiteral = false
249		case r == leadQuote:
250			return buffer.String(), rest[idx+1:]
251		case r == '\\':
252			nextIsLiteral = true
253		case r != '\r' && r != '\n':
254			buffer.WriteRune(r)
255		default:
256			return "", v
257		}
258	}
259	return "", v
260}
261
262func consumeMediaParam(v string) (param, value, rest string) {
263	rest = strings.TrimLeftFunc(v, unicode.IsSpace)
264	if !strings.HasPrefix(rest, ";") {
265		return "", "", v
266	}
267
268	rest = rest[1:] // consume semicolon
269	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
270	param, rest = consumeToken(rest)
271	param = strings.ToLower(param)
272	if param == "" {
273		return "", "", v
274	}
275
276	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
277	if !strings.HasPrefix(rest, "=") {
278		return "", "", v
279	}
280	rest = rest[1:] // consume equals sign
281	rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
282	value, rest = consumeValue(rest)
283	if value == "" {
284		return "", "", v
285	}
286	return param, value, rest
287}
288
289func percentHexUnescape(s string) (string, error) {
290	// Count %, check that they're well-formed.
291	percents := 0
292	for i := 0; i < len(s); {
293		if s[i] != '%' {
294			i++
295			continue
296		}
297		percents++
298		if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) {
299			s = s[i:]
300			if len(s) > 3 {
301				s = s[0:3]
302			}
303			return "", fmt.Errorf("mime: bogus characters after %%: %q", s)
304		}
305		i += 3
306	}
307	if percents == 0 {
308		return s, nil
309	}
310
311	t := make([]byte, len(s)-2*percents)
312	j := 0
313	for i := 0; i < len(s); {
314		switch s[i] {
315		case '%':
316			t[j] = unhex(s[i+1])<<4 | unhex(s[i+2])
317			j++
318			i += 3
319		default:
320			t[j] = s[i]
321			j++
322			i++
323		}
324	}
325	return string(t), nil
326}
327
328func ishex(c byte) bool {
329	switch {
330	case '0' <= c && c <= '9':
331		return true
332	case 'a' <= c && c <= 'f':
333		return true
334	case 'A' <= c && c <= 'F':
335		return true
336	}
337	return false
338}
339
340func unhex(c byte) byte {
341	switch {
342	case '0' <= c && c <= '9':
343		return c - '0'
344	case 'a' <= c && c <= 'f':
345		return c - 'a' + 10
346	case 'A' <= c && c <= 'F':
347		return c - 'A' + 10
348	}
349	return 0
350}
351