1package brotli
2
3/* Copyright 2013 Google Inc. All Rights Reserved.
4
5   Distributed under MIT license.
6   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
7*/
8
9/* Functions to estimate the bit cost of Huffman trees. */
10func shannonEntropy(population []uint32, size uint, total *uint) float64 {
11	var sum uint = 0
12	var retval float64 = 0
13	var population_end []uint32 = population[size:]
14	var p uint
15	for -cap(population) < -cap(population_end) {
16		p = uint(population[0])
17		population = population[1:]
18		sum += p
19		retval -= float64(p) * fastLog2(p)
20	}
21
22	if sum != 0 {
23		retval += float64(sum) * fastLog2(sum)
24	}
25	*total = sum
26	return retval
27}
28
29func bitsEntropy(population []uint32, size uint) float64 {
30	var sum uint
31	var retval float64 = shannonEntropy(population, size, &sum)
32	if retval < float64(sum) {
33		/* At least one bit per literal is needed. */
34		retval = float64(sum)
35	}
36
37	return retval
38}
39
40const kOneSymbolHistogramCost float64 = 12
41const kTwoSymbolHistogramCost float64 = 20
42const kThreeSymbolHistogramCost float64 = 28
43const kFourSymbolHistogramCost float64 = 37
44
45func populationCostLiteral(histogram *histogramLiteral) float64 {
46	var data_size uint = histogramDataSizeLiteral()
47	var count int = 0
48	var s [5]uint
49	var bits float64 = 0.0
50	var i uint
51	if histogram.total_count_ == 0 {
52		return kOneSymbolHistogramCost
53	}
54
55	for i = 0; i < data_size; i++ {
56		if histogram.data_[i] > 0 {
57			s[count] = i
58			count++
59			if count > 4 {
60				break
61			}
62		}
63	}
64
65	if count == 1 {
66		return kOneSymbolHistogramCost
67	}
68
69	if count == 2 {
70		return kTwoSymbolHistogramCost + float64(histogram.total_count_)
71	}
72
73	if count == 3 {
74		var histo0 uint32 = histogram.data_[s[0]]
75		var histo1 uint32 = histogram.data_[s[1]]
76		var histo2 uint32 = histogram.data_[s[2]]
77		var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
78		return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
79	}
80
81	if count == 4 {
82		var histo [4]uint32
83		var h23 uint32
84		var histomax uint32
85		for i = 0; i < 4; i++ {
86			histo[i] = histogram.data_[s[i]]
87		}
88
89		/* Sort */
90		for i = 0; i < 4; i++ {
91			var j uint
92			for j = i + 1; j < 4; j++ {
93				if histo[j] > histo[i] {
94					var tmp uint32 = histo[j]
95					histo[j] = histo[i]
96					histo[i] = tmp
97				}
98			}
99		}
100
101		h23 = histo[2] + histo[3]
102		histomax = brotli_max_uint32_t(h23, histo[0])
103		return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
104	}
105	{
106		var max_depth uint = 1
107		var depth_histo = [codeLengthCodes]uint32{0}
108		/* In this loop we compute the entropy of the histogram and simultaneously
109		   build a simplified histogram of the code length codes where we use the
110		   zero repeat code 17, but we don't use the non-zero repeat code 16. */
111
112		var log2total float64 = fastLog2(histogram.total_count_)
113		for i = 0; i < data_size; {
114			if histogram.data_[i] > 0 {
115				var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
116				/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
117				   = log2(total_count) - log2(count(symbol)) */
118
119				var depth uint = uint(log2p + 0.5)
120				/* Approximate the bit depth by round(-log2(P(symbol))) */
121				bits += float64(histogram.data_[i]) * log2p
122
123				if depth > 15 {
124					depth = 15
125				}
126
127				if depth > max_depth {
128					max_depth = depth
129				}
130
131				depth_histo[depth]++
132				i++
133			} else {
134				var reps uint32 = 1
135				/* Compute the run length of zeros and add the appropriate number of 0
136				   and 17 code length codes to the code length code histogram. */
137
138				var k uint
139				for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
140					reps++
141				}
142
143				i += uint(reps)
144				if i == data_size {
145					/* Don't add any cost for the last zero run, since these are encoded
146					   only implicitly. */
147					break
148				}
149
150				if reps < 3 {
151					depth_histo[0] += reps
152				} else {
153					reps -= 2
154					for reps > 0 {
155						depth_histo[repeatZeroCodeLength]++
156
157						/* Add the 3 extra bits for the 17 code length code. */
158						bits += 3
159
160						reps >>= 3
161					}
162				}
163			}
164		}
165
166		/* Add the estimated encoding cost of the code length code histogram. */
167		bits += float64(18 + 2*max_depth)
168
169		/* Add the entropy of the code length code histogram. */
170		bits += bitsEntropy(depth_histo[:], codeLengthCodes)
171	}
172
173	return bits
174}
175
176func populationCostCommand(histogram *histogramCommand) float64 {
177	var data_size uint = histogramDataSizeCommand()
178	var count int = 0
179	var s [5]uint
180	var bits float64 = 0.0
181	var i uint
182	if histogram.total_count_ == 0 {
183		return kOneSymbolHistogramCost
184	}
185
186	for i = 0; i < data_size; i++ {
187		if histogram.data_[i] > 0 {
188			s[count] = i
189			count++
190			if count > 4 {
191				break
192			}
193		}
194	}
195
196	if count == 1 {
197		return kOneSymbolHistogramCost
198	}
199
200	if count == 2 {
201		return kTwoSymbolHistogramCost + float64(histogram.total_count_)
202	}
203
204	if count == 3 {
205		var histo0 uint32 = histogram.data_[s[0]]
206		var histo1 uint32 = histogram.data_[s[1]]
207		var histo2 uint32 = histogram.data_[s[2]]
208		var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
209		return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
210	}
211
212	if count == 4 {
213		var histo [4]uint32
214		var h23 uint32
215		var histomax uint32
216		for i = 0; i < 4; i++ {
217			histo[i] = histogram.data_[s[i]]
218		}
219
220		/* Sort */
221		for i = 0; i < 4; i++ {
222			var j uint
223			for j = i + 1; j < 4; j++ {
224				if histo[j] > histo[i] {
225					var tmp uint32 = histo[j]
226					histo[j] = histo[i]
227					histo[i] = tmp
228				}
229			}
230		}
231
232		h23 = histo[2] + histo[3]
233		histomax = brotli_max_uint32_t(h23, histo[0])
234		return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
235	}
236	{
237		var max_depth uint = 1
238		var depth_histo = [codeLengthCodes]uint32{0}
239		/* In this loop we compute the entropy of the histogram and simultaneously
240		   build a simplified histogram of the code length codes where we use the
241		   zero repeat code 17, but we don't use the non-zero repeat code 16. */
242
243		var log2total float64 = fastLog2(histogram.total_count_)
244		for i = 0; i < data_size; {
245			if histogram.data_[i] > 0 {
246				var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
247				/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
248				   = log2(total_count) - log2(count(symbol)) */
249
250				var depth uint = uint(log2p + 0.5)
251				/* Approximate the bit depth by round(-log2(P(symbol))) */
252				bits += float64(histogram.data_[i]) * log2p
253
254				if depth > 15 {
255					depth = 15
256				}
257
258				if depth > max_depth {
259					max_depth = depth
260				}
261
262				depth_histo[depth]++
263				i++
264			} else {
265				var reps uint32 = 1
266				/* Compute the run length of zeros and add the appropriate number of 0
267				   and 17 code length codes to the code length code histogram. */
268
269				var k uint
270				for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
271					reps++
272				}
273
274				i += uint(reps)
275				if i == data_size {
276					/* Don't add any cost for the last zero run, since these are encoded
277					   only implicitly. */
278					break
279				}
280
281				if reps < 3 {
282					depth_histo[0] += reps
283				} else {
284					reps -= 2
285					for reps > 0 {
286						depth_histo[repeatZeroCodeLength]++
287
288						/* Add the 3 extra bits for the 17 code length code. */
289						bits += 3
290
291						reps >>= 3
292					}
293				}
294			}
295		}
296
297		/* Add the estimated encoding cost of the code length code histogram. */
298		bits += float64(18 + 2*max_depth)
299
300		/* Add the entropy of the code length code histogram. */
301		bits += bitsEntropy(depth_histo[:], codeLengthCodes)
302	}
303
304	return bits
305}
306
307func populationCostDistance(histogram *histogramDistance) float64 {
308	var data_size uint = histogramDataSizeDistance()
309	var count int = 0
310	var s [5]uint
311	var bits float64 = 0.0
312	var i uint
313	if histogram.total_count_ == 0 {
314		return kOneSymbolHistogramCost
315	}
316
317	for i = 0; i < data_size; i++ {
318		if histogram.data_[i] > 0 {
319			s[count] = i
320			count++
321			if count > 4 {
322				break
323			}
324		}
325	}
326
327	if count == 1 {
328		return kOneSymbolHistogramCost
329	}
330
331	if count == 2 {
332		return kTwoSymbolHistogramCost + float64(histogram.total_count_)
333	}
334
335	if count == 3 {
336		var histo0 uint32 = histogram.data_[s[0]]
337		var histo1 uint32 = histogram.data_[s[1]]
338		var histo2 uint32 = histogram.data_[s[2]]
339		var histomax uint32 = brotli_max_uint32_t(histo0, brotli_max_uint32_t(histo1, histo2))
340		return kThreeSymbolHistogramCost + 2*(float64(histo0)+float64(histo1)+float64(histo2)) - float64(histomax)
341	}
342
343	if count == 4 {
344		var histo [4]uint32
345		var h23 uint32
346		var histomax uint32
347		for i = 0; i < 4; i++ {
348			histo[i] = histogram.data_[s[i]]
349		}
350
351		/* Sort */
352		for i = 0; i < 4; i++ {
353			var j uint
354			for j = i + 1; j < 4; j++ {
355				if histo[j] > histo[i] {
356					var tmp uint32 = histo[j]
357					histo[j] = histo[i]
358					histo[i] = tmp
359				}
360			}
361		}
362
363		h23 = histo[2] + histo[3]
364		histomax = brotli_max_uint32_t(h23, histo[0])
365		return kFourSymbolHistogramCost + 3*float64(h23) + 2*(float64(histo[0])+float64(histo[1])) - float64(histomax)
366	}
367	{
368		var max_depth uint = 1
369		var depth_histo = [codeLengthCodes]uint32{0}
370		/* In this loop we compute the entropy of the histogram and simultaneously
371		   build a simplified histogram of the code length codes where we use the
372		   zero repeat code 17, but we don't use the non-zero repeat code 16. */
373
374		var log2total float64 = fastLog2(histogram.total_count_)
375		for i = 0; i < data_size; {
376			if histogram.data_[i] > 0 {
377				var log2p float64 = log2total - fastLog2(uint(histogram.data_[i]))
378				/* Compute -log2(P(symbol)) = -log2(count(symbol)/total_count) =
379				   = log2(total_count) - log2(count(symbol)) */
380
381				var depth uint = uint(log2p + 0.5)
382				/* Approximate the bit depth by round(-log2(P(symbol))) */
383				bits += float64(histogram.data_[i]) * log2p
384
385				if depth > 15 {
386					depth = 15
387				}
388
389				if depth > max_depth {
390					max_depth = depth
391				}
392
393				depth_histo[depth]++
394				i++
395			} else {
396				var reps uint32 = 1
397				/* Compute the run length of zeros and add the appropriate number of 0
398				   and 17 code length codes to the code length code histogram. */
399
400				var k uint
401				for k = i + 1; k < data_size && histogram.data_[k] == 0; k++ {
402					reps++
403				}
404
405				i += uint(reps)
406				if i == data_size {
407					/* Don't add any cost for the last zero run, since these are encoded
408					   only implicitly. */
409					break
410				}
411
412				if reps < 3 {
413					depth_histo[0] += reps
414				} else {
415					reps -= 2
416					for reps > 0 {
417						depth_histo[repeatZeroCodeLength]++
418
419						/* Add the 3 extra bits for the 17 code length code. */
420						bits += 3
421
422						reps >>= 3
423					}
424				}
425			}
426		}
427
428		/* Add the estimated encoding cost of the code length code histogram. */
429		bits += float64(18 + 2*max_depth)
430
431		/* Add the entropy of the code length code histogram. */
432		bits += bitsEntropy(depth_histo[:], codeLengthCodes)
433	}
434
435	return bits
436}
437