1package quotedprintable
2
3import (
4	"bytes"
5	"encoding/base64"
6	"errors"
7	"fmt"
8	"io"
9	"strings"
10	"unicode"
11	"unicode/utf8"
12)
13
14// A WordEncoder is a RFC 2047 encoded-word encoder.
15type WordEncoder byte
16
17const (
18	// BEncoding represents Base64 encoding scheme as defined by RFC 2045.
19	BEncoding = WordEncoder('b')
20	// QEncoding represents the Q-encoding scheme as defined by RFC 2047.
21	QEncoding = WordEncoder('q')
22)
23
24var (
25	errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word")
26)
27
28// Encode returns the encoded-word form of s. If s is ASCII without special
29// characters, it is returned unchanged. The provided charset is the IANA
30// charset name of s. It is case insensitive.
31func (e WordEncoder) Encode(charset, s string) string {
32	if !needsEncoding(s) {
33		return s
34	}
35	return e.encodeWord(charset, s)
36}
37
38func needsEncoding(s string) bool {
39	for _, b := range s {
40		if (b < ' ' || b > '~') && b != '\t' {
41			return true
42		}
43	}
44	return false
45}
46
47// encodeWord encodes a string into an encoded-word.
48func (e WordEncoder) encodeWord(charset, s string) string {
49	buf := getBuffer()
50	defer putBuffer(buf)
51
52	buf.WriteString("=?")
53	buf.WriteString(charset)
54	buf.WriteByte('?')
55	buf.WriteByte(byte(e))
56	buf.WriteByte('?')
57
58	if e == BEncoding {
59		w := base64.NewEncoder(base64.StdEncoding, buf)
60		io.WriteString(w, s)
61		w.Close()
62	} else {
63		enc := make([]byte, 3)
64		for i := 0; i < len(s); i++ {
65			b := s[i]
66			switch {
67			case b == ' ':
68				buf.WriteByte('_')
69			case b <= '~' && b >= '!' && b != '=' && b != '?' && b != '_':
70				buf.WriteByte(b)
71			default:
72				enc[0] = '='
73				enc[1] = upperhex[b>>4]
74				enc[2] = upperhex[b&0x0f]
75				buf.Write(enc)
76			}
77		}
78	}
79	buf.WriteString("?=")
80	return buf.String()
81}
82
83const upperhex = "0123456789ABCDEF"
84
85// A WordDecoder decodes MIME headers containing RFC 2047 encoded-words.
86type WordDecoder struct {
87	// CharsetReader, if non-nil, defines a function to generate
88	// charset-conversion readers, converting from the provided
89	// charset into UTF-8.
90	// Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets
91	// are handled by default.
92	// One of the the CharsetReader's result values must be non-nil.
93	CharsetReader func(charset string, input io.Reader) (io.Reader, error)
94}
95
96// Decode decodes an encoded-word. If word is not a valid RFC 2047 encoded-word,
97// word is returned unchanged.
98func (d *WordDecoder) Decode(word string) (string, error) {
99	fields := strings.Split(word, "?") // TODO: remove allocation?
100	if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" || len(fields[2]) != 1 {
101		return "", errInvalidWord
102	}
103
104	content, err := decode(fields[2][0], fields[3])
105	if err != nil {
106		return "", err
107	}
108
109	buf := getBuffer()
110	defer putBuffer(buf)
111
112	if err := d.convert(buf, fields[1], content); err != nil {
113		return "", err
114	}
115
116	return buf.String(), nil
117}
118
119// DecodeHeader decodes all encoded-words of the given string. It returns an
120// error if and only if CharsetReader of d returns an error.
121func (d *WordDecoder) DecodeHeader(header string) (string, error) {
122	// If there is no encoded-word, returns before creating a buffer.
123	i := strings.Index(header, "=?")
124	if i == -1 {
125		return header, nil
126	}
127
128	buf := getBuffer()
129	defer putBuffer(buf)
130
131	buf.WriteString(header[:i])
132	header = header[i:]
133
134	betweenWords := false
135	for {
136		start := strings.Index(header, "=?")
137		if start == -1 {
138			break
139		}
140		cur := start + len("=?")
141
142		i := strings.Index(header[cur:], "?")
143		if i == -1 {
144			break
145		}
146		charset := header[cur : cur+i]
147		cur += i + len("?")
148
149		if len(header) < cur+len("Q??=") {
150			break
151		}
152		encoding := header[cur]
153		cur++
154
155		if header[cur] != '?' {
156			break
157		}
158		cur++
159
160		j := strings.Index(header[cur:], "?=")
161		if j == -1 {
162			break
163		}
164		text := header[cur : cur+j]
165		end := cur + j + len("?=")
166
167		content, err := decode(encoding, text)
168		if err != nil {
169			betweenWords = false
170			buf.WriteString(header[:start+2])
171			header = header[start+2:]
172			continue
173		}
174
175		// Write characters before the encoded-word. White-space and newline
176		// characters separating two encoded-words must be deleted.
177		if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) {
178			buf.WriteString(header[:start])
179		}
180
181		if err := d.convert(buf, charset, content); err != nil {
182			return "", err
183		}
184
185		header = header[end:]
186		betweenWords = true
187	}
188
189	if len(header) > 0 {
190		buf.WriteString(header)
191	}
192
193	return buf.String(), nil
194}
195
196func decode(encoding byte, text string) ([]byte, error) {
197	switch encoding {
198	case 'B', 'b':
199		return base64.StdEncoding.DecodeString(text)
200	case 'Q', 'q':
201		return qDecode(text)
202	}
203	return nil, errInvalidWord
204}
205
206func (d *WordDecoder) convert(buf *bytes.Buffer, charset string, content []byte) error {
207	switch {
208	case strings.EqualFold("utf-8", charset):
209		buf.Write(content)
210	case strings.EqualFold("iso-8859-1", charset):
211		for _, c := range content {
212			buf.WriteRune(rune(c))
213		}
214	case strings.EqualFold("us-ascii", charset):
215		for _, c := range content {
216			if c >= utf8.RuneSelf {
217				buf.WriteRune(unicode.ReplacementChar)
218			} else {
219				buf.WriteByte(c)
220			}
221		}
222	default:
223		if d.CharsetReader == nil {
224			return fmt.Errorf("mime: unhandled charset %q", charset)
225		}
226		r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content))
227		if err != nil {
228			return err
229		}
230		if _, err = buf.ReadFrom(r); err != nil {
231			return err
232		}
233	}
234	return nil
235}
236
237// hasNonWhitespace reports whether s (assumed to be ASCII) contains at least
238// one byte of non-whitespace.
239func hasNonWhitespace(s string) bool {
240	for _, b := range s {
241		switch b {
242		// Encoded-words can only be separated by linear white spaces which does
243		// not include vertical tabs (\v).
244		case ' ', '\t', '\n', '\r':
245		default:
246			return true
247		}
248	}
249	return false
250}
251
252// qDecode decodes a Q encoded string.
253func qDecode(s string) ([]byte, error) {
254	dec := make([]byte, len(s))
255	n := 0
256	for i := 0; i < len(s); i++ {
257		switch c := s[i]; {
258		case c == '_':
259			dec[n] = ' '
260		case c == '=':
261			if i+2 >= len(s) {
262				return nil, errInvalidWord
263			}
264			b, err := readHexByte(s[i+1], s[i+2])
265			if err != nil {
266				return nil, err
267			}
268			dec[n] = b
269			i += 2
270		case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t':
271			dec[n] = c
272		default:
273			return nil, errInvalidWord
274		}
275		n++
276	}
277
278	return dec[:n], nil
279}
280