1package brotli
2
3import "encoding/binary"
4
5/* Copyright 2015 Google Inc. All Rights Reserved.
6
7   Distributed under MIT license.
8   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
9*/
10
11/* Function for fast encoding of an input fragment, independently from the input
12   history. This function uses one-pass processing: when we find a backward
13   match, we immediately emit the corresponding command and literal codes to
14   the bit stream.
15
16   Adapted from the CompressFragment() function in
17   https://github.com/google/snappy/blob/master/snappy.cc */
18
19const maxDistance_compress_fragment = 262128
20
21func hash5(p []byte, shift uint) uint32 {
22	var h uint64 = (binary.LittleEndian.Uint64(p) << 24) * uint64(kHashMul32)
23	return uint32(h >> shift)
24}
25
26func hashBytesAtOffset5(v uint64, offset int, shift uint) uint32 {
27	assert(offset >= 0)
28	assert(offset <= 3)
29	{
30		var h uint64 = ((v >> uint(8*offset)) << 24) * uint64(kHashMul32)
31		return uint32(h >> shift)
32	}
33}
34
35func isMatch5(p1 []byte, p2 []byte) bool {
36	return binary.LittleEndian.Uint32(p1) == binary.LittleEndian.Uint32(p2) &&
37		p1[4] == p2[4]
38}
39
40/* Builds a literal prefix code into "depths" and "bits" based on the statistics
41   of the "input" string and stores it into the bit stream.
42   Note that the prefix code here is built from the pre-LZ77 input, therefore
43   we can only approximate the statistics of the actual literal stream.
44   Moreover, for long inputs we build a histogram from a sample of the input
45   and thus have to assign a non-zero depth for each literal.
46   Returns estimated compression ratio millibytes/char for encoding given input
47   with generated code. */
48func buildAndStoreLiteralPrefixCode(input []byte, input_size uint, depths []byte, bits []uint16, storage_ix *uint, storage []byte) uint {
49	var histogram = [256]uint32{0}
50	var histogram_total uint
51	var i uint
52	if input_size < 1<<15 {
53		for i = 0; i < input_size; i++ {
54			histogram[input[i]]++
55		}
56
57		histogram_total = input_size
58		for i = 0; i < 256; i++ {
59			/* We weigh the first 11 samples with weight 3 to account for the
60			   balancing effect of the LZ77 phase on the histogram. */
61			var adjust uint32 = 2 * brotli_min_uint32_t(histogram[i], 11)
62			histogram[i] += adjust
63			histogram_total += uint(adjust)
64		}
65	} else {
66		const kSampleRate uint = 29
67		for i = 0; i < input_size; i += kSampleRate {
68			histogram[input[i]]++
69		}
70
71		histogram_total = (input_size + kSampleRate - 1) / kSampleRate
72		for i = 0; i < 256; i++ {
73			/* We add 1 to each population count to avoid 0 bit depths (since this is
74			   only a sample and we don't know if the symbol appears or not), and we
75			   weigh the first 11 samples with weight 3 to account for the balancing
76			   effect of the LZ77 phase on the histogram (more frequent symbols are
77			   more likely to be in backward references instead as literals). */
78			var adjust uint32 = 1 + 2*brotli_min_uint32_t(histogram[i], 11)
79			histogram[i] += adjust
80			histogram_total += uint(adjust)
81		}
82	}
83
84	buildAndStoreHuffmanTreeFast(histogram[:], histogram_total, /* max_bits = */
85		8, depths, bits, storage_ix, storage)
86	{
87		var literal_ratio uint = 0
88		for i = 0; i < 256; i++ {
89			if histogram[i] != 0 {
90				literal_ratio += uint(histogram[i] * uint32(depths[i]))
91			}
92		}
93
94		/* Estimated encoding ratio, millibytes per symbol. */
95		return (literal_ratio * 125) / histogram_total
96	}
97}
98
99/* Builds a command and distance prefix code (each 64 symbols) into "depth" and
100   "bits" based on "histogram" and stores it into the bit stream. */
101func buildAndStoreCommandPrefixCode1(histogram []uint32, depth []byte, bits []uint16, storage_ix *uint, storage []byte) {
102	var tree [129]huffmanTree
103	var cmd_depth = [numCommandSymbols]byte{0}
104	/* Tree size for building a tree over 64 symbols is 2 * 64 + 1. */
105
106	var cmd_bits [64]uint16
107
108	createHuffmanTree(histogram, 64, 15, tree[:], depth)
109	createHuffmanTree(histogram[64:], 64, 14, tree[:], depth[64:])
110
111	/* We have to jump through a few hoops here in order to compute
112	   the command bits because the symbols are in a different order than in
113	   the full alphabet. This looks complicated, but having the symbols
114	   in this order in the command bits saves a few branches in the Emit*
115	   functions. */
116	copy(cmd_depth[:], depth[:24])
117
118	copy(cmd_depth[24:][:], depth[40:][:8])
119	copy(cmd_depth[32:][:], depth[24:][:8])
120	copy(cmd_depth[40:][:], depth[48:][:8])
121	copy(cmd_depth[48:][:], depth[32:][:8])
122	copy(cmd_depth[56:][:], depth[56:][:8])
123	convertBitDepthsToSymbols(cmd_depth[:], 64, cmd_bits[:])
124	copy(bits, cmd_bits[:24])
125	copy(bits[24:], cmd_bits[32:][:8])
126	copy(bits[32:], cmd_bits[48:][:8])
127	copy(bits[40:], cmd_bits[24:][:8])
128	copy(bits[48:], cmd_bits[40:][:8])
129	copy(bits[56:], cmd_bits[56:][:8])
130	convertBitDepthsToSymbols(depth[64:], 64, bits[64:])
131	{
132		/* Create the bit length array for the full command alphabet. */
133		var i uint
134		for i := 0; i < int(64); i++ {
135			cmd_depth[i] = 0
136		} /* only 64 first values were used */
137		copy(cmd_depth[:], depth[:8])
138		copy(cmd_depth[64:][:], depth[8:][:8])
139		copy(cmd_depth[128:][:], depth[16:][:8])
140		copy(cmd_depth[192:][:], depth[24:][:8])
141		copy(cmd_depth[384:][:], depth[32:][:8])
142		for i = 0; i < 8; i++ {
143			cmd_depth[128+8*i] = depth[40+i]
144			cmd_depth[256+8*i] = depth[48+i]
145			cmd_depth[448+8*i] = depth[56+i]
146		}
147
148		storeHuffmanTree(cmd_depth[:], numCommandSymbols, tree[:], storage_ix, storage)
149	}
150
151	storeHuffmanTree(depth[64:], 64, tree[:], storage_ix, storage)
152}
153
154/* REQUIRES: insertlen < 6210 */
155func emitInsertLen1(insertlen uint, depth []byte, bits []uint16, histo []uint32, storage_ix *uint, storage []byte) {
156	if insertlen < 6 {
157		var code uint = insertlen + 40
158		writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
159		histo[code]++
160	} else if insertlen < 130 {
161		var tail uint = insertlen - 2
162		var nbits uint32 = log2FloorNonZero(tail) - 1
163		var prefix uint = tail >> nbits
164		var inscode uint = uint((nbits << 1) + uint32(prefix) + 42)
165		writeBits(uint(depth[inscode]), uint64(bits[inscode]), storage_ix, storage)
166		writeBits(uint(nbits), uint64(tail)-(uint64(prefix)<<nbits), storage_ix, storage)
167		histo[inscode]++
168	} else if insertlen < 2114 {
169		var tail uint = insertlen - 66
170		var nbits uint32 = log2FloorNonZero(tail)
171		var code uint = uint(nbits + 50)
172		writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
173		writeBits(uint(nbits), uint64(tail)-(uint64(uint(1))<<nbits), storage_ix, storage)
174		histo[code]++
175	} else {
176		writeBits(uint(depth[61]), uint64(bits[61]), storage_ix, storage)
177		writeBits(12, uint64(insertlen)-2114, storage_ix, storage)
178		histo[61]++
179	}
180}
181
182func emitLongInsertLen(insertlen uint, depth []byte, bits []uint16, histo []uint32, storage_ix *uint, storage []byte) {
183	if insertlen < 22594 {
184		writeBits(uint(depth[62]), uint64(bits[62]), storage_ix, storage)
185		writeBits(14, uint64(insertlen)-6210, storage_ix, storage)
186		histo[62]++
187	} else {
188		writeBits(uint(depth[63]), uint64(bits[63]), storage_ix, storage)
189		writeBits(24, uint64(insertlen)-22594, storage_ix, storage)
190		histo[63]++
191	}
192}
193
194func emitCopyLen1(copylen uint, depth []byte, bits []uint16, histo []uint32, storage_ix *uint, storage []byte) {
195	if copylen < 10 {
196		writeBits(uint(depth[copylen+14]), uint64(bits[copylen+14]), storage_ix, storage)
197		histo[copylen+14]++
198	} else if copylen < 134 {
199		var tail uint = copylen - 6
200		var nbits uint32 = log2FloorNonZero(tail) - 1
201		var prefix uint = tail >> nbits
202		var code uint = uint((nbits << 1) + uint32(prefix) + 20)
203		writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
204		writeBits(uint(nbits), uint64(tail)-(uint64(prefix)<<nbits), storage_ix, storage)
205		histo[code]++
206	} else if copylen < 2118 {
207		var tail uint = copylen - 70
208		var nbits uint32 = log2FloorNonZero(tail)
209		var code uint = uint(nbits + 28)
210		writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
211		writeBits(uint(nbits), uint64(tail)-(uint64(uint(1))<<nbits), storage_ix, storage)
212		histo[code]++
213	} else {
214		writeBits(uint(depth[39]), uint64(bits[39]), storage_ix, storage)
215		writeBits(24, uint64(copylen)-2118, storage_ix, storage)
216		histo[39]++
217	}
218}
219
220func emitCopyLenLastDistance1(copylen uint, depth []byte, bits []uint16, histo []uint32, storage_ix *uint, storage []byte) {
221	if copylen < 12 {
222		writeBits(uint(depth[copylen-4]), uint64(bits[copylen-4]), storage_ix, storage)
223		histo[copylen-4]++
224	} else if copylen < 72 {
225		var tail uint = copylen - 8
226		var nbits uint32 = log2FloorNonZero(tail) - 1
227		var prefix uint = tail >> nbits
228		var code uint = uint((nbits << 1) + uint32(prefix) + 4)
229		writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
230		writeBits(uint(nbits), uint64(tail)-(uint64(prefix)<<nbits), storage_ix, storage)
231		histo[code]++
232	} else if copylen < 136 {
233		var tail uint = copylen - 8
234		var code uint = (tail >> 5) + 30
235		writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
236		writeBits(5, uint64(tail)&31, storage_ix, storage)
237		writeBits(uint(depth[64]), uint64(bits[64]), storage_ix, storage)
238		histo[code]++
239		histo[64]++
240	} else if copylen < 2120 {
241		var tail uint = copylen - 72
242		var nbits uint32 = log2FloorNonZero(tail)
243		var code uint = uint(nbits + 28)
244		writeBits(uint(depth[code]), uint64(bits[code]), storage_ix, storage)
245		writeBits(uint(nbits), uint64(tail)-(uint64(uint(1))<<nbits), storage_ix, storage)
246		writeBits(uint(depth[64]), uint64(bits[64]), storage_ix, storage)
247		histo[code]++
248		histo[64]++
249	} else {
250		writeBits(uint(depth[39]), uint64(bits[39]), storage_ix, storage)
251		writeBits(24, uint64(copylen)-2120, storage_ix, storage)
252		writeBits(uint(depth[64]), uint64(bits[64]), storage_ix, storage)
253		histo[39]++
254		histo[64]++
255	}
256}
257
258func emitDistance1(distance uint, depth []byte, bits []uint16, histo []uint32, storage_ix *uint, storage []byte) {
259	var d uint = distance + 3
260	var nbits uint32 = log2FloorNonZero(d) - 1
261	var prefix uint = (d >> nbits) & 1
262	var offset uint = (2 + prefix) << nbits
263	var distcode uint = uint(2*(nbits-1) + uint32(prefix) + 80)
264	writeBits(uint(depth[distcode]), uint64(bits[distcode]), storage_ix, storage)
265	writeBits(uint(nbits), uint64(d)-uint64(offset), storage_ix, storage)
266	histo[distcode]++
267}
268
269func emitLiterals(input []byte, len uint, depth []byte, bits []uint16, storage_ix *uint, storage []byte) {
270	var j uint
271	for j = 0; j < len; j++ {
272		var lit byte = input[j]
273		writeBits(uint(depth[lit]), uint64(bits[lit]), storage_ix, storage)
274	}
275}
276
277/* REQUIRES: len <= 1 << 24. */
278func storeMetaBlockHeader1(len uint, is_uncompressed bool, storage_ix *uint, storage []byte) {
279	var nibbles uint = 6
280
281	/* ISLAST */
282	writeBits(1, 0, storage_ix, storage)
283
284	if len <= 1<<16 {
285		nibbles = 4
286	} else if len <= 1<<20 {
287		nibbles = 5
288	}
289
290	writeBits(2, uint64(nibbles)-4, storage_ix, storage)
291	writeBits(nibbles*4, uint64(len)-1, storage_ix, storage)
292
293	/* ISUNCOMPRESSED */
294	writeSingleBit(is_uncompressed, storage_ix, storage)
295}
296
297func updateBits(n_bits uint, bits uint32, pos uint, array []byte) {
298	for n_bits > 0 {
299		var byte_pos uint = pos >> 3
300		var n_unchanged_bits uint = pos & 7
301		var n_changed_bits uint = brotli_min_size_t(n_bits, 8-n_unchanged_bits)
302		var total_bits uint = n_unchanged_bits + n_changed_bits
303		var mask uint32 = (^((1 << total_bits) - 1)) | ((1 << n_unchanged_bits) - 1)
304		var unchanged_bits uint32 = uint32(array[byte_pos]) & mask
305		var changed_bits uint32 = bits & ((1 << n_changed_bits) - 1)
306		array[byte_pos] = byte(changed_bits<<n_unchanged_bits | unchanged_bits)
307		n_bits -= n_changed_bits
308		bits >>= n_changed_bits
309		pos += n_changed_bits
310	}
311}
312
313func rewindBitPosition1(new_storage_ix uint, storage_ix *uint, storage []byte) {
314	var bitpos uint = new_storage_ix & 7
315	var mask uint = (1 << bitpos) - 1
316	storage[new_storage_ix>>3] &= byte(mask)
317	*storage_ix = new_storage_ix
318}
319
320var shouldMergeBlock_kSampleRate uint = 43
321
322func shouldMergeBlock(data []byte, len uint, depths []byte) bool {
323	var histo = [256]uint{0}
324	var i uint
325	for i = 0; i < len; i += shouldMergeBlock_kSampleRate {
326		histo[data[i]]++
327	}
328	{
329		var total uint = (len + shouldMergeBlock_kSampleRate - 1) / shouldMergeBlock_kSampleRate
330		var r float64 = (fastLog2(total)+0.5)*float64(total) + 200
331		for i = 0; i < 256; i++ {
332			r -= float64(histo[i]) * (float64(depths[i]) + fastLog2(histo[i]))
333		}
334
335		return r >= 0.0
336	}
337}
338
339func shouldUseUncompressedMode(metablock_start []byte, next_emit []byte, insertlen uint, literal_ratio uint) bool {
340	var compressed uint = uint(-cap(next_emit) + cap(metablock_start))
341	if compressed*50 > insertlen {
342		return false
343	} else {
344		return literal_ratio > 980
345	}
346}
347
348func emitUncompressedMetaBlock1(begin []byte, end []byte, storage_ix_start uint, storage_ix *uint, storage []byte) {
349	var len uint = uint(-cap(end) + cap(begin))
350	rewindBitPosition1(storage_ix_start, storage_ix, storage)
351	storeMetaBlockHeader1(uint(len), true, storage_ix, storage)
352	*storage_ix = (*storage_ix + 7) &^ 7
353	copy(storage[*storage_ix>>3:], begin[:len])
354	*storage_ix += uint(len << 3)
355	storage[*storage_ix>>3] = 0
356}
357
358var kCmdHistoSeed = [128]uint32{
359	0,
360	1,
361	1,
362	1,
363	1,
364	1,
365	1,
366	1,
367	1,
368	1,
369	1,
370	1,
371	1,
372	1,
373	1,
374	1,
375	0,
376	0,
377	0,
378	1,
379	1,
380	1,
381	1,
382	1,
383	1,
384	1,
385	1,
386	1,
387	1,
388	1,
389	1,
390	1,
391	1,
392	1,
393	1,
394	1,
395	1,
396	1,
397	1,
398	1,
399	0,
400	1,
401	1,
402	1,
403	1,
404	1,
405	1,
406	1,
407	1,
408	1,
409	1,
410	1,
411	1,
412	1,
413	1,
414	1,
415	1,
416	1,
417	1,
418	1,
419	1,
420	1,
421	1,
422	1,
423	1,
424	0,
425	0,
426	0,
427	0,
428	0,
429	0,
430	0,
431	0,
432	0,
433	0,
434	0,
435	0,
436	0,
437	0,
438	0,
439	1,
440	1,
441	1,
442	1,
443	1,
444	1,
445	1,
446	1,
447	1,
448	1,
449	1,
450	1,
451	1,
452	1,
453	1,
454	1,
455	1,
456	1,
457	1,
458	1,
459	1,
460	1,
461	1,
462	1,
463	1,
464	1,
465	1,
466	1,
467	1,
468	1,
469	1,
470	1,
471	1,
472	1,
473	1,
474	1,
475	1,
476	1,
477	1,
478	1,
479	1,
480	1,
481	1,
482	1,
483	0,
484	0,
485	0,
486	0,
487}
488
489var compressFragmentFastImpl_kFirstBlockSize uint = 3 << 15
490var compressFragmentFastImpl_kMergeBlockSize uint = 1 << 16
491
492func compressFragmentFastImpl(in []byte, input_size uint, is_last bool, table []int, table_bits uint, cmd_depth []byte, cmd_bits []uint16, cmd_code_numbits *uint, cmd_code []byte, storage_ix *uint, storage []byte) {
493	var cmd_histo [128]uint32
494	var ip_end int
495	var next_emit int = 0
496	var base_ip int = 0
497	var input int = 0
498	const kInputMarginBytes uint = windowGap
499	const kMinMatchLen uint = 5
500	var metablock_start int = input
501	var block_size uint = brotli_min_size_t(input_size, compressFragmentFastImpl_kFirstBlockSize)
502	var total_block_size uint = block_size
503	var mlen_storage_ix uint = *storage_ix + 3
504	var lit_depth [256]byte
505	var lit_bits [256]uint16
506	var literal_ratio uint
507	var ip int
508	var last_distance int
509	var shift uint = 64 - table_bits
510
511	/* "next_emit" is a pointer to the first byte that is not covered by a
512	   previous copy. Bytes between "next_emit" and the start of the next copy or
513	   the end of the input will be emitted as literal bytes. */
514
515	/* Save the start of the first block for position and distance computations.
516	 */
517
518	/* Save the bit position of the MLEN field of the meta-block header, so that
519	   we can update it later if we decide to extend this meta-block. */
520	storeMetaBlockHeader1(block_size, false, storage_ix, storage)
521
522	/* No block splits, no contexts. */
523	writeBits(13, 0, storage_ix, storage)
524
525	literal_ratio = buildAndStoreLiteralPrefixCode(in[input:], block_size, lit_depth[:], lit_bits[:], storage_ix, storage)
526	{
527		/* Store the pre-compressed command and distance prefix codes. */
528		var i uint
529		for i = 0; i+7 < *cmd_code_numbits; i += 8 {
530			writeBits(8, uint64(cmd_code[i>>3]), storage_ix, storage)
531		}
532	}
533
534	writeBits(*cmd_code_numbits&7, uint64(cmd_code[*cmd_code_numbits>>3]), storage_ix, storage)
535
536	/* Initialize the command and distance histograms. We will gather
537	   statistics of command and distance codes during the processing
538	   of this block and use it to update the command and distance
539	   prefix codes for the next block. */
540emit_commands:
541	copy(cmd_histo[:], kCmdHistoSeed[:])
542
543	/* "ip" is the input pointer. */
544	ip = input
545
546	last_distance = -1
547	ip_end = int(uint(input) + block_size)
548
549	if block_size >= kInputMarginBytes {
550		var len_limit uint = brotli_min_size_t(block_size-kMinMatchLen, input_size-kInputMarginBytes)
551		var ip_limit int = int(uint(input) + len_limit)
552		/* For the last block, we need to keep a 16 bytes margin so that we can be
553		   sure that all distances are at most window size - 16.
554		   For all other blocks, we only need to keep a margin of 5 bytes so that
555		   we don't go over the block size with a copy. */
556
557		var next_hash uint32
558		ip++
559		for next_hash = hash5(in[ip:], shift); ; {
560			var skip uint32 = 32
561			var next_ip int = ip
562			/* Step 1: Scan forward in the input looking for a 5-byte-long match.
563			   If we get close to exhausting the input then goto emit_remainder.
564
565			   Heuristic match skipping: If 32 bytes are scanned with no matches
566			   found, start looking only at every other byte. If 32 more bytes are
567			   scanned, look at every third byte, etc.. When a match is found,
568			   immediately go back to looking at every byte. This is a small loss
569			   (~5% performance, ~0.1% density) for compressible data due to more
570			   bookkeeping, but for non-compressible data (such as JPEG) it's a huge
571			   win since the compressor quickly "realizes" the data is incompressible
572			   and doesn't bother looking for matches everywhere.
573
574			   The "skip" variable keeps track of how many bytes there are since the
575			   last match; dividing it by 32 (i.e. right-shifting by five) gives the
576			   number of bytes to move ahead for each iteration. */
577
578			var candidate int
579			assert(next_emit < ip)
580
581		trawl:
582			for {
583				var hash uint32 = next_hash
584				var bytes_between_hash_lookups uint32 = skip >> 5
585				skip++
586				assert(hash == hash5(in[next_ip:], shift))
587				ip = next_ip
588				next_ip = int(uint32(ip) + bytes_between_hash_lookups)
589				if next_ip > ip_limit {
590					goto emit_remainder
591				}
592
593				next_hash = hash5(in[next_ip:], shift)
594				candidate = ip - last_distance
595				if isMatch5(in[ip:], in[candidate:]) {
596					if candidate < ip {
597						table[hash] = int(ip - base_ip)
598						break
599					}
600				}
601
602				candidate = base_ip + table[hash]
603				assert(candidate >= base_ip)
604				assert(candidate < ip)
605
606				table[hash] = int(ip - base_ip)
607				if !(!isMatch5(in[ip:], in[candidate:])) {
608					break
609				}
610			}
611
612			/* Check copy distance. If candidate is not feasible, continue search.
613			   Checking is done outside of hot loop to reduce overhead. */
614			if ip-candidate > maxDistance_compress_fragment {
615				goto trawl
616			}
617
618			/* Step 2: Emit the found match together with the literal bytes from
619			   "next_emit" to the bit stream, and then see if we can find a next match
620			   immediately afterwards. Repeat until we find no match for the input
621			   without emitting some literal bytes. */
622			{
623				var base int = ip
624				/* > 0 */
625				var matched uint = 5 + findMatchLengthWithLimit(in[candidate+5:], in[ip+5:], uint(ip_end-ip)-5)
626				var distance int = int(base - candidate)
627				/* We have a 5-byte match at ip, and we need to emit bytes in
628				   [next_emit, ip). */
629
630				var insert uint = uint(base - next_emit)
631				ip += int(matched)
632				if insert < 6210 {
633					emitInsertLen1(insert, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
634				} else if shouldUseUncompressedMode(in[metablock_start:], in[next_emit:], insert, literal_ratio) {
635					emitUncompressedMetaBlock1(in[metablock_start:], in[base:], mlen_storage_ix-3, storage_ix, storage)
636					input_size -= uint(base - input)
637					input = base
638					next_emit = input
639					goto next_block
640				} else {
641					emitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
642				}
643
644				emitLiterals(in[next_emit:], insert, lit_depth[:], lit_bits[:], storage_ix, storage)
645				if distance == last_distance {
646					writeBits(uint(cmd_depth[64]), uint64(cmd_bits[64]), storage_ix, storage)
647					cmd_histo[64]++
648				} else {
649					emitDistance1(uint(distance), cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
650					last_distance = distance
651				}
652
653				emitCopyLenLastDistance1(matched, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
654
655				next_emit = ip
656				if ip >= ip_limit {
657					goto emit_remainder
658				}
659
660				/* We could immediately start working at ip now, but to improve
661				   compression we first update "table" with the hashes of some positions
662				   within the last copy. */
663				{
664					var input_bytes uint64 = binary.LittleEndian.Uint64(in[ip-3:])
665					var prev_hash uint32 = hashBytesAtOffset5(input_bytes, 0, shift)
666					var cur_hash uint32 = hashBytesAtOffset5(input_bytes, 3, shift)
667					table[prev_hash] = int(ip - base_ip - 3)
668					prev_hash = hashBytesAtOffset5(input_bytes, 1, shift)
669					table[prev_hash] = int(ip - base_ip - 2)
670					prev_hash = hashBytesAtOffset5(input_bytes, 2, shift)
671					table[prev_hash] = int(ip - base_ip - 1)
672
673					candidate = base_ip + table[cur_hash]
674					table[cur_hash] = int(ip - base_ip)
675				}
676			}
677
678			for isMatch5(in[ip:], in[candidate:]) {
679				var base int = ip
680				/* We have a 5-byte match at ip, and no need to emit any literal bytes
681				   prior to ip. */
682
683				var matched uint = 5 + findMatchLengthWithLimit(in[candidate+5:], in[ip+5:], uint(ip_end-ip)-5)
684				if ip-candidate > maxDistance_compress_fragment {
685					break
686				}
687				ip += int(matched)
688				last_distance = int(base - candidate) /* > 0 */
689				emitCopyLen1(matched, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
690				emitDistance1(uint(last_distance), cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
691
692				next_emit = ip
693				if ip >= ip_limit {
694					goto emit_remainder
695				}
696
697				/* We could immediately start working at ip now, but to improve
698				   compression we first update "table" with the hashes of some positions
699				   within the last copy. */
700				{
701					var input_bytes uint64 = binary.LittleEndian.Uint64(in[ip-3:])
702					var prev_hash uint32 = hashBytesAtOffset5(input_bytes, 0, shift)
703					var cur_hash uint32 = hashBytesAtOffset5(input_bytes, 3, shift)
704					table[prev_hash] = int(ip - base_ip - 3)
705					prev_hash = hashBytesAtOffset5(input_bytes, 1, shift)
706					table[prev_hash] = int(ip - base_ip - 2)
707					prev_hash = hashBytesAtOffset5(input_bytes, 2, shift)
708					table[prev_hash] = int(ip - base_ip - 1)
709
710					candidate = base_ip + table[cur_hash]
711					table[cur_hash] = int(ip - base_ip)
712				}
713			}
714
715			ip++
716			next_hash = hash5(in[ip:], shift)
717		}
718	}
719
720emit_remainder:
721	assert(next_emit <= ip_end)
722	input += int(block_size)
723	input_size -= block_size
724	block_size = brotli_min_size_t(input_size, compressFragmentFastImpl_kMergeBlockSize)
725
726	/* Decide if we want to continue this meta-block instead of emitting the
727	   last insert-only command. */
728	if input_size > 0 && total_block_size+block_size <= 1<<20 && shouldMergeBlock(in[input:], block_size, lit_depth[:]) {
729		assert(total_block_size > 1<<16)
730
731		/* Update the size of the current meta-block and continue emitting commands.
732		   We can do this because the current size and the new size both have 5
733		   nibbles. */
734		total_block_size += block_size
735
736		updateBits(20, uint32(total_block_size-1), mlen_storage_ix, storage)
737		goto emit_commands
738	}
739
740	/* Emit the remaining bytes as literals. */
741	if next_emit < ip_end {
742		var insert uint = uint(ip_end - next_emit)
743		if insert < 6210 {
744			emitInsertLen1(insert, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
745			emitLiterals(in[next_emit:], insert, lit_depth[:], lit_bits[:], storage_ix, storage)
746		} else if shouldUseUncompressedMode(in[metablock_start:], in[next_emit:], insert, literal_ratio) {
747			emitUncompressedMetaBlock1(in[metablock_start:], in[ip_end:], mlen_storage_ix-3, storage_ix, storage)
748		} else {
749			emitLongInsertLen(insert, cmd_depth, cmd_bits, cmd_histo[:], storage_ix, storage)
750			emitLiterals(in[next_emit:], insert, lit_depth[:], lit_bits[:], storage_ix, storage)
751		}
752	}
753
754	next_emit = ip_end
755
756	/* If we have more data, write a new meta-block header and prefix codes and
757	   then continue emitting commands. */
758next_block:
759	if input_size > 0 {
760		metablock_start = input
761		block_size = brotli_min_size_t(input_size, compressFragmentFastImpl_kFirstBlockSize)
762		total_block_size = block_size
763
764		/* Save the bit position of the MLEN field of the meta-block header, so that
765		   we can update it later if we decide to extend this meta-block. */
766		mlen_storage_ix = *storage_ix + 3
767
768		storeMetaBlockHeader1(block_size, false, storage_ix, storage)
769
770		/* No block splits, no contexts. */
771		writeBits(13, 0, storage_ix, storage)
772
773		literal_ratio = buildAndStoreLiteralPrefixCode(in[input:], block_size, lit_depth[:], lit_bits[:], storage_ix, storage)
774		buildAndStoreCommandPrefixCode1(cmd_histo[:], cmd_depth, cmd_bits, storage_ix, storage)
775		goto emit_commands
776	}
777
778	if !is_last {
779		/* If this is not the last block, update the command and distance prefix
780		   codes for the next block and store the compressed forms. */
781		cmd_code[0] = 0
782
783		*cmd_code_numbits = 0
784		buildAndStoreCommandPrefixCode1(cmd_histo[:], cmd_depth, cmd_bits, cmd_code_numbits, cmd_code)
785	}
786}
787
788/* Compresses "input" string to the "*storage" buffer as one or more complete
789   meta-blocks, and updates the "*storage_ix" bit position.
790
791   If "is_last" is 1, emits an additional empty last meta-block.
792
793   "cmd_depth" and "cmd_bits" contain the command and distance prefix codes
794   (see comment in encode.h) used for the encoding of this input fragment.
795   If "is_last" is 0, they are updated to reflect the statistics
796   of this input fragment, to be used for the encoding of the next fragment.
797
798   "*cmd_code_numbits" is the number of bits of the compressed representation
799   of the command and distance prefix codes, and "cmd_code" is an array of
800   at least "(*cmd_code_numbits + 7) >> 3" size that contains the compressed
801   command and distance prefix codes. If "is_last" is 0, these are also
802   updated to represent the updated "cmd_depth" and "cmd_bits".
803
804   REQUIRES: "input_size" is greater than zero, or "is_last" is 1.
805   REQUIRES: "input_size" is less or equal to maximal metablock size (1 << 24).
806   REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
807   REQUIRES: "table_size" is an odd (9, 11, 13, 15) power of two
808   OUTPUT: maximal copy distance <= |input_size|
809   OUTPUT: maximal copy distance <= BROTLI_MAX_BACKWARD_LIMIT(18) */
810func compressFragmentFast(input []byte, input_size uint, is_last bool, table []int, table_size uint, cmd_depth []byte, cmd_bits []uint16, cmd_code_numbits *uint, cmd_code []byte, storage_ix *uint, storage []byte) {
811	var initial_storage_ix uint = *storage_ix
812	var table_bits uint = uint(log2FloorNonZero(table_size))
813
814	if input_size == 0 {
815		assert(is_last)
816		writeBits(1, 1, storage_ix, storage) /* islast */
817		writeBits(1, 1, storage_ix, storage) /* isempty */
818		*storage_ix = (*storage_ix + 7) &^ 7
819		return
820	}
821
822	compressFragmentFastImpl(input, input_size, is_last, table, table_bits, cmd_depth, cmd_bits, cmd_code_numbits, cmd_code, storage_ix, storage)
823
824	/* If output is larger than single uncompressed block, rewrite it. */
825	if *storage_ix-initial_storage_ix > 31+(input_size<<3) {
826		emitUncompressedMetaBlock1(input, input[input_size:], initial_storage_ix, storage_ix, storage)
827	}
828
829	if is_last {
830		writeBits(1, 1, storage_ix, storage) /* islast */
831		writeBits(1, 1, storage_ix, storage) /* isempty */
832		*storage_ix = (*storage_ix + 7) &^ 7
833	}
834}
835