1// Copyright 2012 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package quotedprintable implements quoted-printable encoding as specified by
6// RFC 2045.
7package quotedprintable
8
9import (
10	"bufio"
11	"bytes"
12	"fmt"
13	"io"
14)
15
16// Reader is a quoted-printable decoder.
17type Reader struct {
18	br   *bufio.Reader
19	rerr error  // last read error
20	line []byte // to be consumed before more of br
21}
22
23// NewReader returns a quoted-printable reader, decoding from r.
24func NewReader(r io.Reader) *Reader {
25	return &Reader{
26		br: bufio.NewReader(r),
27	}
28}
29
30func fromHex(b byte) (byte, error) {
31	switch {
32	case b >= '0' && b <= '9':
33		return b - '0', nil
34	case b >= 'A' && b <= 'F':
35		return b - 'A' + 10, nil
36	// Accept badly encoded bytes.
37	case b >= 'a' && b <= 'f':
38		return b - 'a' + 10, nil
39	}
40	return 0, fmt.Errorf("quotedprintable: invalid hex byte 0x%02x", b)
41}
42
43func readHexByte(v []byte) (b byte, err error) {
44	if len(v) < 2 {
45		return 0, io.ErrUnexpectedEOF
46	}
47	var hb, lb byte
48	if hb, err = fromHex(v[0]); err != nil {
49		return 0, err
50	}
51	if lb, err = fromHex(v[1]); err != nil {
52		return 0, err
53	}
54	return hb<<4 | lb, nil
55}
56
57func isQPDiscardWhitespace(r rune) bool {
58	switch r {
59	case '\n', '\r', ' ', '\t':
60		return true
61	}
62	return false
63}
64
65var (
66	crlf       = []byte("\r\n")
67	lf         = []byte("\n")
68	softSuffix = []byte("=")
69)
70
71// Read reads and decodes quoted-printable data from the underlying reader.
72func (r *Reader) Read(p []byte) (n int, err error) {
73	// Deviations from RFC 2045:
74	// 1. in addition to "=\r\n", "=\n" is also treated as soft line break.
75	// 2. it will pass through a '\r' or '\n' not preceded by '=', consistent
76	//    with other broken QP encoders & decoders.
77	// 3. it accepts soft line-break (=) at end of message (issue 15486); i.e.
78	//    the final byte read from the underlying reader is allowed to be '=',
79	//    and it will be silently ignored.
80	// 4. it takes = as literal = if not followed by two hex digits
81	//    but not at end of line (issue 13219).
82	for len(p) > 0 {
83		if len(r.line) == 0 {
84			if r.rerr != nil {
85				return n, r.rerr
86			}
87			r.line, r.rerr = r.br.ReadSlice('\n')
88
89			// Does the line end in CRLF instead of just LF?
90			hasLF := bytes.HasSuffix(r.line, lf)
91			hasCR := bytes.HasSuffix(r.line, crlf)
92			wholeLine := r.line
93			r.line = bytes.TrimRightFunc(wholeLine, isQPDiscardWhitespace)
94			if bytes.HasSuffix(r.line, softSuffix) {
95				rightStripped := wholeLine[len(r.line):]
96				r.line = r.line[:len(r.line)-1]
97				if !bytes.HasPrefix(rightStripped, lf) && !bytes.HasPrefix(rightStripped, crlf) &&
98					!(len(rightStripped) == 0 && len(r.line) > 0 && r.rerr == io.EOF) {
99					r.rerr = fmt.Errorf("quotedprintable: invalid bytes after =: %q", rightStripped)
100				}
101			} else if hasLF {
102				if hasCR {
103					r.line = append(r.line, '\r', '\n')
104				} else {
105					r.line = append(r.line, '\n')
106				}
107			}
108			continue
109		}
110		b := r.line[0]
111
112		switch {
113		case b == '=':
114			b, err = readHexByte(r.line[1:])
115			if err != nil {
116				if len(r.line) >= 2 && r.line[1] != '\r' && r.line[1] != '\n' {
117					// Take the = as a literal =.
118					b = '='
119					break
120				}
121				return n, err
122			}
123			r.line = r.line[2:] // 2 of the 3; other 1 is done below
124		case b == '\t' || b == '\r' || b == '\n':
125			break
126		case b >= 0x80:
127			// As an extension to RFC 2045, we accept
128			// values >= 0x80 without complaint. Issue 22597.
129			break
130		case b < ' ' || b > '~':
131			return n, fmt.Errorf("quotedprintable: invalid unescaped byte 0x%02x in body", b)
132		}
133		p[0] = b
134		p = p[1:]
135		r.line = r.line[1:]
136		n++
137	}
138	return n, nil
139}
140