1package brotli
2
3import "math"
4
5/* Copyright 2013 Google Inc. All Rights Reserved.
6
7   Distributed under MIT license.
8   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
9*/
10
11func initialEntropyCodesDistance(data []uint16, length uint, stride uint, num_histograms uint, histograms []histogramDistance) {
12	var seed uint32 = 7
13	var block_length uint = length / num_histograms
14	var i uint
15	clearHistogramsDistance(histograms, num_histograms)
16	for i = 0; i < num_histograms; i++ {
17		var pos uint = length * i / num_histograms
18		if i != 0 {
19			pos += uint(myRand(&seed) % uint32(block_length))
20		}
21
22		if pos+stride >= length {
23			pos = length - stride - 1
24		}
25
26		histogramAddVectorDistance(&histograms[i], data[pos:], stride)
27	}
28}
29
30func randomSampleDistance(seed *uint32, data []uint16, length uint, stride uint, sample *histogramDistance) {
31	var pos uint = 0
32	if stride >= length {
33		stride = length
34	} else {
35		pos = uint(myRand(seed) % uint32(length-stride+1))
36	}
37
38	histogramAddVectorDistance(sample, data[pos:], stride)
39}
40
41func refineEntropyCodesDistance(data []uint16, length uint, stride uint, num_histograms uint, histograms []histogramDistance) {
42	var iters uint = kIterMulForRefining*length/stride + kMinItersForRefining
43	var seed uint32 = 7
44	var iter uint
45	iters = ((iters + num_histograms - 1) / num_histograms) * num_histograms
46	for iter = 0; iter < iters; iter++ {
47		var sample histogramDistance
48		histogramClearDistance(&sample)
49		randomSampleDistance(&seed, data, length, stride, &sample)
50		histogramAddHistogramDistance(&histograms[iter%num_histograms], &sample)
51	}
52}
53
54/* Assigns a block id from the range [0, num_histograms) to each data element
55   in data[0..length) and fills in block_id[0..length) with the assigned values.
56   Returns the number of blocks, i.e. one plus the number of block switches. */
57func findBlocksDistance(data []uint16, length uint, block_switch_bitcost float64, num_histograms uint, histograms []histogramDistance, insert_cost []float64, cost []float64, switch_signal []byte, block_id []byte) uint {
58	var data_size uint = histogramDataSizeDistance()
59	var bitmaplen uint = (num_histograms + 7) >> 3
60	var num_blocks uint = 1
61	var i uint
62	var j uint
63	assert(num_histograms <= 256)
64	if num_histograms <= 1 {
65		for i = 0; i < length; i++ {
66			block_id[i] = 0
67		}
68
69		return 1
70	}
71
72	for i := 0; i < int(data_size*num_histograms); i++ {
73		insert_cost[i] = 0
74	}
75	for i = 0; i < num_histograms; i++ {
76		insert_cost[i] = fastLog2(uint(uint32(histograms[i].total_count_)))
77	}
78
79	for i = data_size; i != 0; {
80		i--
81		for j = 0; j < num_histograms; j++ {
82			insert_cost[i*num_histograms+j] = insert_cost[j] - bitCost(uint(histograms[j].data_[i]))
83		}
84	}
85
86	for i := 0; i < int(num_histograms); i++ {
87		cost[i] = 0
88	}
89	for i := 0; i < int(length*bitmaplen); i++ {
90		switch_signal[i] = 0
91	}
92
93	/* After each iteration of this loop, cost[k] will contain the difference
94	   between the minimum cost of arriving at the current byte position using
95	   entropy code k, and the minimum cost of arriving at the current byte
96	   position. This difference is capped at the block switch cost, and if it
97	   reaches block switch cost, it means that when we trace back from the last
98	   position, we need to switch here. */
99	for i = 0; i < length; i++ {
100		var byte_ix uint = i
101		var ix uint = byte_ix * bitmaplen
102		var insert_cost_ix uint = uint(data[byte_ix]) * num_histograms
103		var min_cost float64 = 1e99
104		var block_switch_cost float64 = block_switch_bitcost
105		var k uint
106		for k = 0; k < num_histograms; k++ {
107			/* We are coding the symbol in data[byte_ix] with entropy code k. */
108			cost[k] += insert_cost[insert_cost_ix+k]
109
110			if cost[k] < min_cost {
111				min_cost = cost[k]
112				block_id[byte_ix] = byte(k)
113			}
114		}
115
116		/* More blocks for the beginning. */
117		if byte_ix < 2000 {
118			block_switch_cost *= 0.77 + 0.07*float64(byte_ix)/2000
119		}
120
121		for k = 0; k < num_histograms; k++ {
122			cost[k] -= min_cost
123			if cost[k] >= block_switch_cost {
124				var mask byte = byte(1 << (k & 7))
125				cost[k] = block_switch_cost
126				assert(k>>3 < bitmaplen)
127				switch_signal[ix+(k>>3)] |= mask
128				/* Trace back from the last position and switch at the marked places. */
129			}
130		}
131	}
132	{
133		var byte_ix uint = length - 1
134		var ix uint = byte_ix * bitmaplen
135		var cur_id byte = block_id[byte_ix]
136		for byte_ix > 0 {
137			var mask byte = byte(1 << (cur_id & 7))
138			assert(uint(cur_id)>>3 < bitmaplen)
139			byte_ix--
140			ix -= bitmaplen
141			if switch_signal[ix+uint(cur_id>>3)]&mask != 0 {
142				if cur_id != block_id[byte_ix] {
143					cur_id = block_id[byte_ix]
144					num_blocks++
145				}
146			}
147
148			block_id[byte_ix] = cur_id
149		}
150	}
151
152	return num_blocks
153}
154
155var remapBlockIdsDistance_kInvalidId uint16 = 256
156
157func remapBlockIdsDistance(block_ids []byte, length uint, new_id []uint16, num_histograms uint) uint {
158	var next_id uint16 = 0
159	var i uint
160	for i = 0; i < num_histograms; i++ {
161		new_id[i] = remapBlockIdsDistance_kInvalidId
162	}
163
164	for i = 0; i < length; i++ {
165		assert(uint(block_ids[i]) < num_histograms)
166		if new_id[block_ids[i]] == remapBlockIdsDistance_kInvalidId {
167			new_id[block_ids[i]] = next_id
168			next_id++
169		}
170	}
171
172	for i = 0; i < length; i++ {
173		block_ids[i] = byte(new_id[block_ids[i]])
174		assert(uint(block_ids[i]) < num_histograms)
175	}
176
177	assert(uint(next_id) <= num_histograms)
178	return uint(next_id)
179}
180
181func buildBlockHistogramsDistance(data []uint16, length uint, block_ids []byte, num_histograms uint, histograms []histogramDistance) {
182	var i uint
183	clearHistogramsDistance(histograms, num_histograms)
184	for i = 0; i < length; i++ {
185		histogramAddDistance(&histograms[block_ids[i]], uint(data[i]))
186	}
187}
188
189var clusterBlocksDistance_kInvalidIndex uint32 = math.MaxUint32
190
191func clusterBlocksDistance(data []uint16, length uint, num_blocks uint, block_ids []byte, split *blockSplit) {
192	var histogram_symbols []uint32 = make([]uint32, num_blocks)
193	var block_lengths []uint32 = make([]uint32, num_blocks)
194	var expected_num_clusters uint = clustersPerBatch * (num_blocks + histogramsPerBatch - 1) / histogramsPerBatch
195	var all_histograms_size uint = 0
196	var all_histograms_capacity uint = expected_num_clusters
197	var all_histograms []histogramDistance = make([]histogramDistance, all_histograms_capacity)
198	var cluster_size_size uint = 0
199	var cluster_size_capacity uint = expected_num_clusters
200	var cluster_size []uint32 = make([]uint32, cluster_size_capacity)
201	var num_clusters uint = 0
202	var histograms []histogramDistance = make([]histogramDistance, brotli_min_size_t(num_blocks, histogramsPerBatch))
203	var max_num_pairs uint = histogramsPerBatch * histogramsPerBatch / 2
204	var pairs_capacity uint = max_num_pairs + 1
205	var pairs []histogramPair = make([]histogramPair, pairs_capacity)
206	var pos uint = 0
207	var clusters []uint32
208	var num_final_clusters uint
209	var new_index []uint32
210	var i uint
211	var sizes = [histogramsPerBatch]uint32{0}
212	var new_clusters = [histogramsPerBatch]uint32{0}
213	var symbols = [histogramsPerBatch]uint32{0}
214	var remap = [histogramsPerBatch]uint32{0}
215
216	for i := 0; i < int(num_blocks); i++ {
217		block_lengths[i] = 0
218	}
219	{
220		var block_idx uint = 0
221		for i = 0; i < length; i++ {
222			assert(block_idx < num_blocks)
223			block_lengths[block_idx]++
224			if i+1 == length || block_ids[i] != block_ids[i+1] {
225				block_idx++
226			}
227		}
228
229		assert(block_idx == num_blocks)
230	}
231
232	for i = 0; i < num_blocks; i += histogramsPerBatch {
233		var num_to_combine uint = brotli_min_size_t(num_blocks-i, histogramsPerBatch)
234		var num_new_clusters uint
235		var j uint
236		for j = 0; j < num_to_combine; j++ {
237			var k uint
238			histogramClearDistance(&histograms[j])
239			for k = 0; uint32(k) < block_lengths[i+j]; k++ {
240				histogramAddDistance(&histograms[j], uint(data[pos]))
241				pos++
242			}
243
244			histograms[j].bit_cost_ = populationCostDistance(&histograms[j])
245			new_clusters[j] = uint32(j)
246			symbols[j] = uint32(j)
247			sizes[j] = 1
248		}
249
250		num_new_clusters = histogramCombineDistance(histograms, sizes[:], symbols[:], new_clusters[:], []histogramPair(pairs), num_to_combine, num_to_combine, histogramsPerBatch, max_num_pairs)
251		if all_histograms_capacity < (all_histograms_size + num_new_clusters) {
252			var _new_size uint
253			if all_histograms_capacity == 0 {
254				_new_size = all_histograms_size + num_new_clusters
255			} else {
256				_new_size = all_histograms_capacity
257			}
258			var new_array []histogramDistance
259			for _new_size < (all_histograms_size + num_new_clusters) {
260				_new_size *= 2
261			}
262			new_array = make([]histogramDistance, _new_size)
263			if all_histograms_capacity != 0 {
264				copy(new_array, all_histograms[:all_histograms_capacity])
265			}
266
267			all_histograms = new_array
268			all_histograms_capacity = _new_size
269		}
270
271		brotli_ensure_capacity_uint32_t(&cluster_size, &cluster_size_capacity, cluster_size_size+num_new_clusters)
272		for j = 0; j < num_new_clusters; j++ {
273			all_histograms[all_histograms_size] = histograms[new_clusters[j]]
274			all_histograms_size++
275			cluster_size[cluster_size_size] = sizes[new_clusters[j]]
276			cluster_size_size++
277			remap[new_clusters[j]] = uint32(j)
278		}
279
280		for j = 0; j < num_to_combine; j++ {
281			histogram_symbols[i+j] = uint32(num_clusters) + remap[symbols[j]]
282		}
283
284		num_clusters += num_new_clusters
285		assert(num_clusters == cluster_size_size)
286		assert(num_clusters == all_histograms_size)
287	}
288
289	histograms = nil
290
291	max_num_pairs = brotli_min_size_t(64*num_clusters, (num_clusters/2)*num_clusters)
292	if pairs_capacity < max_num_pairs+1 {
293		pairs = nil
294		pairs = make([]histogramPair, (max_num_pairs + 1))
295	}
296
297	clusters = make([]uint32, num_clusters)
298	for i = 0; i < num_clusters; i++ {
299		clusters[i] = uint32(i)
300	}
301
302	num_final_clusters = histogramCombineDistance(all_histograms, cluster_size, histogram_symbols, clusters, pairs, num_clusters, num_blocks, maxNumberOfBlockTypes, max_num_pairs)
303	pairs = nil
304	cluster_size = nil
305
306	new_index = make([]uint32, num_clusters)
307	for i = 0; i < num_clusters; i++ {
308		new_index[i] = clusterBlocksDistance_kInvalidIndex
309	}
310	pos = 0
311	{
312		var next_index uint32 = 0
313		for i = 0; i < num_blocks; i++ {
314			var histo histogramDistance
315			var j uint
316			var best_out uint32
317			var best_bits float64
318			histogramClearDistance(&histo)
319			for j = 0; uint32(j) < block_lengths[i]; j++ {
320				histogramAddDistance(&histo, uint(data[pos]))
321				pos++
322			}
323
324			if i == 0 {
325				best_out = histogram_symbols[0]
326			} else {
327				best_out = histogram_symbols[i-1]
328			}
329			best_bits = histogramBitCostDistanceDistance(&histo, &all_histograms[best_out])
330			for j = 0; j < num_final_clusters; j++ {
331				var cur_bits float64 = histogramBitCostDistanceDistance(&histo, &all_histograms[clusters[j]])
332				if cur_bits < best_bits {
333					best_bits = cur_bits
334					best_out = clusters[j]
335				}
336			}
337
338			histogram_symbols[i] = best_out
339			if new_index[best_out] == clusterBlocksDistance_kInvalidIndex {
340				new_index[best_out] = next_index
341				next_index++
342			}
343		}
344	}
345
346	clusters = nil
347	all_histograms = nil
348	brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, num_blocks)
349	brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, num_blocks)
350	{
351		var cur_length uint32 = 0
352		var block_idx uint = 0
353		var max_type byte = 0
354		for i = 0; i < num_blocks; i++ {
355			cur_length += block_lengths[i]
356			if i+1 == num_blocks || histogram_symbols[i] != histogram_symbols[i+1] {
357				var id byte = byte(new_index[histogram_symbols[i]])
358				split.types[block_idx] = id
359				split.lengths[block_idx] = cur_length
360				max_type = brotli_max_uint8_t(max_type, id)
361				cur_length = 0
362				block_idx++
363			}
364		}
365
366		split.num_blocks = block_idx
367		split.num_types = uint(max_type) + 1
368	}
369
370	new_index = nil
371	block_lengths = nil
372	histogram_symbols = nil
373}
374
375func splitByteVectorDistance(data []uint16, length uint, literals_per_histogram uint, max_histograms uint, sampling_stride_length uint, block_switch_cost float64, params *encoderParams, split *blockSplit) {
376	var data_size uint = histogramDataSizeDistance()
377	var num_histograms uint = length/literals_per_histogram + 1
378	var histograms []histogramDistance
379	if num_histograms > max_histograms {
380		num_histograms = max_histograms
381	}
382
383	if length == 0 {
384		split.num_types = 1
385		return
386	} else if length < kMinLengthForBlockSplitting {
387		brotli_ensure_capacity_uint8_t(&split.types, &split.types_alloc_size, split.num_blocks+1)
388		brotli_ensure_capacity_uint32_t(&split.lengths, &split.lengths_alloc_size, split.num_blocks+1)
389		split.num_types = 1
390		split.types[split.num_blocks] = 0
391		split.lengths[split.num_blocks] = uint32(length)
392		split.num_blocks++
393		return
394	}
395
396	histograms = make([]histogramDistance, num_histograms)
397
398	/* Find good entropy codes. */
399	initialEntropyCodesDistance(data, length, sampling_stride_length, num_histograms, histograms)
400
401	refineEntropyCodesDistance(data, length, sampling_stride_length, num_histograms, histograms)
402	{
403		var block_ids []byte = make([]byte, length)
404		var num_blocks uint = 0
405		var bitmaplen uint = (num_histograms + 7) >> 3
406		var insert_cost []float64 = make([]float64, (data_size * num_histograms))
407		var cost []float64 = make([]float64, num_histograms)
408		var switch_signal []byte = make([]byte, (length * bitmaplen))
409		var new_id []uint16 = make([]uint16, num_histograms)
410		var iters uint
411		if params.quality < hqZopflificationQuality {
412			iters = 3
413		} else {
414			iters = 10
415		}
416		/* Find a good path through literals with the good entropy codes. */
417
418		var i uint
419		for i = 0; i < iters; i++ {
420			num_blocks = findBlocksDistance(data, length, block_switch_cost, num_histograms, histograms, insert_cost, cost, switch_signal, block_ids)
421			num_histograms = remapBlockIdsDistance(block_ids, length, new_id, num_histograms)
422			buildBlockHistogramsDistance(data, length, block_ids, num_histograms, histograms)
423		}
424
425		insert_cost = nil
426		cost = nil
427		switch_signal = nil
428		new_id = nil
429		histograms = nil
430		clusterBlocksDistance(data, length, num_blocks, block_ids, split)
431		block_ids = nil
432	}
433}
434