1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package hpack implements HPACK, a compression format for
6// efficiently representing HTTP header fields in the context of HTTP/2.
7//
8// See http://tools.ietf.org/html/draft-ietf-httpbis-header-compression-09
9package hpack
10
11import (
12	"bytes"
13	"errors"
14	"fmt"
15)
16
17// A DecodingError is something the spec defines as a decoding error.
18type DecodingError struct {
19	Err error
20}
21
22func (de DecodingError) Error() string {
23	return fmt.Sprintf("decoding error: %v", de.Err)
24}
25
26// An InvalidIndexError is returned when an encoder references a table
27// entry before the static table or after the end of the dynamic table.
28type InvalidIndexError int
29
30func (e InvalidIndexError) Error() string {
31	return fmt.Sprintf("invalid indexed representation index %d", int(e))
32}
33
34// A HeaderField is a name-value pair. Both the name and value are
35// treated as opaque sequences of octets.
36type HeaderField struct {
37	Name, Value string
38
39	// Sensitive means that this header field should never be
40	// indexed.
41	Sensitive bool
42}
43
44// IsPseudo reports whether the header field is an http2 pseudo header.
45// That is, it reports whether it starts with a colon.
46// It is not otherwise guaranteed to be a valid pseudo header field,
47// though.
48func (hf HeaderField) IsPseudo() bool {
49	return len(hf.Name) != 0 && hf.Name[0] == ':'
50}
51
52func (hf HeaderField) String() string {
53	var suffix string
54	if hf.Sensitive {
55		suffix = " (sensitive)"
56	}
57	return fmt.Sprintf("header field %q = %q%s", hf.Name, hf.Value, suffix)
58}
59
60// Size returns the size of an entry per RFC 7541 section 4.1.
61func (hf HeaderField) Size() uint32 {
62	// http://http2.github.io/http2-spec/compression.html#rfc.section.4.1
63	// "The size of the dynamic table is the sum of the size of
64	// its entries. The size of an entry is the sum of its name's
65	// length in octets (as defined in Section 5.2), its value's
66	// length in octets (see Section 5.2), plus 32.  The size of
67	// an entry is calculated using the length of the name and
68	// value without any Huffman encoding applied."
69
70	// This can overflow if somebody makes a large HeaderField
71	// Name and/or Value by hand, but we don't care, because that
72	// won't happen on the wire because the encoding doesn't allow
73	// it.
74	return uint32(len(hf.Name) + len(hf.Value) + 32)
75}
76
77// A Decoder is the decoding context for incremental processing of
78// header blocks.
79type Decoder struct {
80	dynTab dynamicTable
81	emit   func(f HeaderField)
82
83	emitEnabled bool // whether calls to emit are enabled
84	maxStrLen   int  // 0 means unlimited
85
86	// buf is the unparsed buffer. It's only written to
87	// saveBuf if it was truncated in the middle of a header
88	// block. Because it's usually not owned, we can only
89	// process it under Write.
90	buf []byte // not owned; only valid during Write
91
92	// saveBuf is previous data passed to Write which we weren't able
93	// to fully parse before. Unlike buf, we own this data.
94	saveBuf bytes.Buffer
95}
96
97// NewDecoder returns a new decoder with the provided maximum dynamic
98// table size. The emitFunc will be called for each valid field
99// parsed, in the same goroutine as calls to Write, before Write returns.
100func NewDecoder(maxDynamicTableSize uint32, emitFunc func(f HeaderField)) *Decoder {
101	d := &Decoder{
102		emit:        emitFunc,
103		emitEnabled: true,
104	}
105	d.dynTab.table.init()
106	d.dynTab.allowedMaxSize = maxDynamicTableSize
107	d.dynTab.setMaxSize(maxDynamicTableSize)
108	return d
109}
110
111// ErrStringLength is returned by Decoder.Write when the max string length
112// (as configured by Decoder.SetMaxStringLength) would be violated.
113var ErrStringLength = errors.New("hpack: string too long")
114
115// SetMaxStringLength sets the maximum size of a HeaderField name or
116// value string. If a string exceeds this length (even after any
117// decompression), Write will return ErrStringLength.
118// A value of 0 means unlimited and is the default from NewDecoder.
119func (d *Decoder) SetMaxStringLength(n int) {
120	d.maxStrLen = n
121}
122
123// SetEmitFunc changes the callback used when new header fields
124// are decoded.
125// It must be non-nil. It does not affect EmitEnabled.
126func (d *Decoder) SetEmitFunc(emitFunc func(f HeaderField)) {
127	d.emit = emitFunc
128}
129
130// SetEmitEnabled controls whether the emitFunc provided to NewDecoder
131// should be called. The default is true.
132//
133// This facility exists to let servers enforce MAX_HEADER_LIST_SIZE
134// while still decoding and keeping in-sync with decoder state, but
135// without doing unnecessary decompression or generating unnecessary
136// garbage for header fields past the limit.
137func (d *Decoder) SetEmitEnabled(v bool) { d.emitEnabled = v }
138
139// EmitEnabled reports whether calls to the emitFunc provided to NewDecoder
140// are currently enabled. The default is true.
141func (d *Decoder) EmitEnabled() bool { return d.emitEnabled }
142
143// TODO: add method *Decoder.Reset(maxSize, emitFunc) to let callers re-use Decoders and their
144// underlying buffers for garbage reasons.
145
146func (d *Decoder) SetMaxDynamicTableSize(v uint32) {
147	d.dynTab.setMaxSize(v)
148}
149
150// SetAllowedMaxDynamicTableSize sets the upper bound that the encoded
151// stream (via dynamic table size updates) may set the maximum size
152// to.
153func (d *Decoder) SetAllowedMaxDynamicTableSize(v uint32) {
154	d.dynTab.allowedMaxSize = v
155}
156
157type dynamicTable struct {
158	// http://http2.github.io/http2-spec/compression.html#rfc.section.2.3.2
159	table          headerFieldTable
160	size           uint32 // in bytes
161	maxSize        uint32 // current maxSize
162	allowedMaxSize uint32 // maxSize may go up to this, inclusive
163}
164
165func (dt *dynamicTable) setMaxSize(v uint32) {
166	dt.maxSize = v
167	dt.evict()
168}
169
170func (dt *dynamicTable) add(f HeaderField) {
171	dt.table.addEntry(f)
172	dt.size += f.Size()
173	dt.evict()
174}
175
176// If we're too big, evict old stuff.
177func (dt *dynamicTable) evict() {
178	var n int
179	for dt.size > dt.maxSize && n < dt.table.len() {
180		dt.size -= dt.table.ents[n].Size()
181		n++
182	}
183	dt.table.evictOldest(n)
184}
185
186func (d *Decoder) maxTableIndex() int {
187	// This should never overflow. RFC 7540 Section 6.5.2 limits the size of
188	// the dynamic table to 2^32 bytes, where each entry will occupy more than
189	// one byte. Further, the staticTable has a fixed, small length.
190	return d.dynTab.table.len() + staticTable.len()
191}
192
193func (d *Decoder) at(i uint64) (hf HeaderField, ok bool) {
194	// See Section 2.3.3.
195	if i == 0 {
196		return
197	}
198	if i <= uint64(staticTable.len()) {
199		return staticTable.ents[i-1], true
200	}
201	if i > uint64(d.maxTableIndex()) {
202		return
203	}
204	// In the dynamic table, newer entries have lower indices.
205	// However, dt.ents[0] is the oldest entry. Hence, dt.ents is
206	// the reversed dynamic table.
207	dt := d.dynTab.table
208	return dt.ents[dt.len()-(int(i)-staticTable.len())], true
209}
210
211// Decode decodes an entire block.
212//
213// TODO: remove this method and make it incremental later? This is
214// easier for debugging now.
215func (d *Decoder) DecodeFull(p []byte) ([]HeaderField, error) {
216	var hf []HeaderField
217	saveFunc := d.emit
218	defer func() { d.emit = saveFunc }()
219	d.emit = func(f HeaderField) { hf = append(hf, f) }
220	if _, err := d.Write(p); err != nil {
221		return nil, err
222	}
223	if err := d.Close(); err != nil {
224		return nil, err
225	}
226	return hf, nil
227}
228
229func (d *Decoder) Close() error {
230	if d.saveBuf.Len() > 0 {
231		d.saveBuf.Reset()
232		return DecodingError{errors.New("truncated headers")}
233	}
234	return nil
235}
236
237func (d *Decoder) Write(p []byte) (n int, err error) {
238	if len(p) == 0 {
239		// Prevent state machine CPU attacks (making us redo
240		// work up to the point of finding out we don't have
241		// enough data)
242		return
243	}
244	// Only copy the data if we have to. Optimistically assume
245	// that p will contain a complete header block.
246	if d.saveBuf.Len() == 0 {
247		d.buf = p
248	} else {
249		d.saveBuf.Write(p)
250		d.buf = d.saveBuf.Bytes()
251		d.saveBuf.Reset()
252	}
253
254	for len(d.buf) > 0 {
255		err = d.parseHeaderFieldRepr()
256		if err == errNeedMore {
257			// Extra paranoia, making sure saveBuf won't
258			// get too large. All the varint and string
259			// reading code earlier should already catch
260			// overlong things and return ErrStringLength,
261			// but keep this as a last resort.
262			const varIntOverhead = 8 // conservative
263			if d.maxStrLen != 0 && int64(len(d.buf)) > 2*(int64(d.maxStrLen)+varIntOverhead) {
264				return 0, ErrStringLength
265			}
266			d.saveBuf.Write(d.buf)
267			return len(p), nil
268		}
269		if err != nil {
270			break
271		}
272	}
273	return len(p), err
274}
275
276// errNeedMore is an internal sentinel error value that means the
277// buffer is truncated and we need to read more data before we can
278// continue parsing.
279var errNeedMore = errors.New("need more data")
280
281type indexType int
282
283const (
284	indexedTrue indexType = iota
285	indexedFalse
286	indexedNever
287)
288
289func (v indexType) indexed() bool   { return v == indexedTrue }
290func (v indexType) sensitive() bool { return v == indexedNever }
291
292// returns errNeedMore if there isn't enough data available.
293// any other error is fatal.
294// consumes d.buf iff it returns nil.
295// precondition: must be called with len(d.buf) > 0
296func (d *Decoder) parseHeaderFieldRepr() error {
297	b := d.buf[0]
298	switch {
299	case b&128 != 0:
300		// Indexed representation.
301		// High bit set?
302		// http://http2.github.io/http2-spec/compression.html#rfc.section.6.1
303		return d.parseFieldIndexed()
304	case b&192 == 64:
305		// 6.2.1 Literal Header Field with Incremental Indexing
306		// 0b10xxxxxx: top two bits are 10
307		// http://http2.github.io/http2-spec/compression.html#rfc.section.6.2.1
308		return d.parseFieldLiteral(6, indexedTrue)
309	case b&240 == 0:
310		// 6.2.2 Literal Header Field without Indexing
311		// 0b0000xxxx: top four bits are 0000
312		// http://http2.github.io/http2-spec/compression.html#rfc.section.6.2.2
313		return d.parseFieldLiteral(4, indexedFalse)
314	case b&240 == 16:
315		// 6.2.3 Literal Header Field never Indexed
316		// 0b0001xxxx: top four bits are 0001
317		// http://http2.github.io/http2-spec/compression.html#rfc.section.6.2.3
318		return d.parseFieldLiteral(4, indexedNever)
319	case b&224 == 32:
320		// 6.3 Dynamic Table Size Update
321		// Top three bits are '001'.
322		// http://http2.github.io/http2-spec/compression.html#rfc.section.6.3
323		return d.parseDynamicTableSizeUpdate()
324	}
325
326	return DecodingError{errors.New("invalid encoding")}
327}
328
329// (same invariants and behavior as parseHeaderFieldRepr)
330func (d *Decoder) parseFieldIndexed() error {
331	buf := d.buf
332	idx, buf, err := readVarInt(7, buf)
333	if err != nil {
334		return err
335	}
336	hf, ok := d.at(idx)
337	if !ok {
338		return DecodingError{InvalidIndexError(idx)}
339	}
340	d.buf = buf
341	return d.callEmit(HeaderField{Name: hf.Name, Value: hf.Value})
342}
343
344// (same invariants and behavior as parseHeaderFieldRepr)
345func (d *Decoder) parseFieldLiteral(n uint8, it indexType) error {
346	buf := d.buf
347	nameIdx, buf, err := readVarInt(n, buf)
348	if err != nil {
349		return err
350	}
351
352	var hf HeaderField
353	wantStr := d.emitEnabled || it.indexed()
354	if nameIdx > 0 {
355		ihf, ok := d.at(nameIdx)
356		if !ok {
357			return DecodingError{InvalidIndexError(nameIdx)}
358		}
359		hf.Name = ihf.Name
360	} else {
361		hf.Name, buf, err = d.readString(buf, wantStr)
362		if err != nil {
363			return err
364		}
365	}
366	hf.Value, buf, err = d.readString(buf, wantStr)
367	if err != nil {
368		return err
369	}
370	d.buf = buf
371	if it.indexed() {
372		d.dynTab.add(hf)
373	}
374	hf.Sensitive = it.sensitive()
375	return d.callEmit(hf)
376}
377
378func (d *Decoder) callEmit(hf HeaderField) error {
379	if d.maxStrLen != 0 {
380		if len(hf.Name) > d.maxStrLen || len(hf.Value) > d.maxStrLen {
381			return ErrStringLength
382		}
383	}
384	if d.emitEnabled {
385		d.emit(hf)
386	}
387	return nil
388}
389
390// (same invariants and behavior as parseHeaderFieldRepr)
391func (d *Decoder) parseDynamicTableSizeUpdate() error {
392	// RFC 7541, sec 4.2: This dynamic table size update MUST occur at the
393	// beginning of the first header block following the change to the dynamic table size.
394	if d.dynTab.size > 0 {
395		return DecodingError{errors.New("dynamic table size update MUST occur at the beginning of a header block")}
396	}
397
398	buf := d.buf
399	size, buf, err := readVarInt(5, buf)
400	if err != nil {
401		return err
402	}
403	if size > uint64(d.dynTab.allowedMaxSize) {
404		return DecodingError{errors.New("dynamic table size update too large")}
405	}
406	d.dynTab.setMaxSize(uint32(size))
407	d.buf = buf
408	return nil
409}
410
411var errVarintOverflow = DecodingError{errors.New("varint integer overflow")}
412
413// readVarInt reads an unsigned variable length integer off the
414// beginning of p. n is the parameter as described in
415// http://http2.github.io/http2-spec/compression.html#rfc.section.5.1.
416//
417// n must always be between 1 and 8.
418//
419// The returned remain buffer is either a smaller suffix of p, or err != nil.
420// The error is errNeedMore if p doesn't contain a complete integer.
421func readVarInt(n byte, p []byte) (i uint64, remain []byte, err error) {
422	if n < 1 || n > 8 {
423		panic("bad n")
424	}
425	if len(p) == 0 {
426		return 0, p, errNeedMore
427	}
428	i = uint64(p[0])
429	if n < 8 {
430		i &= (1 << uint64(n)) - 1
431	}
432	if i < (1<<uint64(n))-1 {
433		return i, p[1:], nil
434	}
435
436	origP := p
437	p = p[1:]
438	var m uint64
439	for len(p) > 0 {
440		b := p[0]
441		p = p[1:]
442		i += uint64(b&127) << m
443		if b&128 == 0 {
444			return i, p, nil
445		}
446		m += 7
447		if m >= 63 { // TODO: proper overflow check. making this up.
448			return 0, origP, errVarintOverflow
449		}
450	}
451	return 0, origP, errNeedMore
452}
453
454// readString decodes an hpack string from p.
455//
456// wantStr is whether s will be used. If false, decompression and
457// []byte->string garbage are skipped if s will be ignored
458// anyway. This does mean that huffman decoding errors for non-indexed
459// strings past the MAX_HEADER_LIST_SIZE are ignored, but the server
460// is returning an error anyway, and because they're not indexed, the error
461// won't affect the decoding state.
462func (d *Decoder) readString(p []byte, wantStr bool) (s string, remain []byte, err error) {
463	if len(p) == 0 {
464		return "", p, errNeedMore
465	}
466	isHuff := p[0]&128 != 0
467	strLen, p, err := readVarInt(7, p)
468	if err != nil {
469		return "", p, err
470	}
471	if d.maxStrLen != 0 && strLen > uint64(d.maxStrLen) {
472		return "", nil, ErrStringLength
473	}
474	if uint64(len(p)) < strLen {
475		return "", p, errNeedMore
476	}
477	if !isHuff {
478		if wantStr {
479			s = string(p[:strLen])
480		}
481		return s, p[strLen:], nil
482	}
483
484	if wantStr {
485		buf := bufPool.Get().(*bytes.Buffer)
486		buf.Reset() // don't trust others
487		defer bufPool.Put(buf)
488		if err := huffmanDecode(buf, d.maxStrLen, p[:strLen]); err != nil {
489			buf.Reset()
490			return "", nil, err
491		}
492		s = buf.String()
493		buf.Reset() // be nice to GC
494	}
495	return s, p[strLen:], nil
496}
497