1 /*
2 Copyright 2011 Google Inc. All Rights Reserved.
3 
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7 
8     http://www.apache.org/licenses/LICENSE-2.0
9 
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 
16 Author: lode.vandevenne@gmail.com (Lode Vandevenne)
17 Author: jyrki.alakuijala@gmail.com (Jyrki Alakuijala)
18 */
19 
20 /*
21 Bounded package merge algorithm, based on the paper
22 "A Fast and Space-Economical Algorithm for Length-Limited Coding
23 Jyrki Katajainen, Alistair Moffat, Andrew Turpin".
24 */
25 
26 #include "katajainen.h"
27 #include <assert.h>
28 #include <stdlib.h>
29 #include <limits.h>
30 
31 typedef struct Node Node;
32 
33 /*
34 Nodes forming chains. Also used to represent leaves.
35 */
36 struct Node {
37   size_t weight;  /* Total weight (symbol count) of this chain. */
38   Node* tail;  /* Previous node(s) of this chain, or 0 if none. */
39   int count;  /* Leaf symbol index, or number of leaves before this chain. */
40 };
41 
42 /*
43 Memory pool for nodes.
44 */
45 typedef struct NodePool {
46   Node* next;  /* Pointer to a free node in the pool. */
47 } NodePool;
48 
49 /*
50 Initializes a chain node with the given values and marks it as in use.
51 */
InitNode(size_t weight,int count,Node * tail,Node * node)52 static void InitNode(size_t weight, int count, Node* tail, Node* node) {
53   node->weight = weight;
54   node->count = count;
55   node->tail = tail;
56 }
57 
58 /*
59 Performs a Boundary Package-Merge step. Puts a new chain in the given list. The
60 new chain is, depending on the weights, a leaf or a combination of two chains
61 from the previous list.
62 lists: The lists of chains.
63 maxbits: Number of lists.
64 leaves: The leaves, one per symbol.
65 numsymbols: Number of leaves.
66 pool: the node memory pool.
67 index: The index of the list in which a new chain or leaf is required.
68 */
BoundaryPM(Node * (* lists)[2],Node * leaves,int numsymbols,NodePool * pool,int index)69 static void BoundaryPM(Node* (*lists)[2], Node* leaves, int numsymbols,
70                        NodePool* pool, int index) {
71   Node* newchain;
72   Node* oldchain;
73   int lastcount = lists[index][1]->count;  /* Count of last chain of list. */
74 
75   if (index == 0 && lastcount >= numsymbols) return;
76 
77   newchain = pool->next++;
78   oldchain = lists[index][1];
79 
80   /* These are set up before the recursive calls below, so that there is a list
81   pointing to the new node, to let the garbage collection know it's in use. */
82   lists[index][0] = oldchain;
83   lists[index][1] = newchain;
84 
85   if (index == 0) {
86     /* New leaf node in list 0. */
87     InitNode(leaves[lastcount].weight, lastcount + 1, 0, newchain);
88   } else {
89     size_t sum = lists[index - 1][0]->weight + lists[index - 1][1]->weight;
90     if (lastcount < numsymbols && sum > leaves[lastcount].weight) {
91       /* New leaf inserted in list, so count is incremented. */
92       InitNode(leaves[lastcount].weight, lastcount + 1, oldchain->tail,
93           newchain);
94     } else {
95       InitNode(sum, lastcount, lists[index - 1][1], newchain);
96       /* Two lookahead chains of previous list used up, create new ones. */
97       BoundaryPM(lists, leaves, numsymbols, pool, index - 1);
98       BoundaryPM(lists, leaves, numsymbols, pool, index - 1);
99     }
100   }
101 }
102 
BoundaryPMFinal(Node * (* lists)[2],Node * leaves,int numsymbols,NodePool * pool,int index)103 static void BoundaryPMFinal(Node* (*lists)[2],
104     Node* leaves, int numsymbols, NodePool* pool, int index) {
105   int lastcount = lists[index][1]->count;  /* Count of last chain of list. */
106 
107   size_t sum = lists[index - 1][0]->weight + lists[index - 1][1]->weight;
108 
109   if (lastcount < numsymbols && sum > leaves[lastcount].weight) {
110     Node* newchain = pool->next;
111     Node* oldchain = lists[index][1]->tail;
112 
113     lists[index][1] = newchain;
114     newchain->count = lastcount + 1;
115     newchain->tail = oldchain;
116   } else {
117     lists[index][1]->tail = lists[index - 1][1];
118   }
119 }
120 
121 /*
122 Initializes each list with as lookahead chains the two leaves with lowest
123 weights.
124 */
InitLists(NodePool * pool,const Node * leaves,int maxbits,Node * (* lists)[2])125 static void InitLists(
126     NodePool* pool, const Node* leaves, int maxbits, Node* (*lists)[2]) {
127   int i;
128   Node* node0 = pool->next++;
129   Node* node1 = pool->next++;
130   InitNode(leaves[0].weight, 1, 0, node0);
131   InitNode(leaves[1].weight, 2, 0, node1);
132   for (i = 0; i < maxbits; i++) {
133     lists[i][0] = node0;
134     lists[i][1] = node1;
135   }
136 }
137 
138 /*
139 Converts result of boundary package-merge to the bitlengths. The result in the
140 last chain of the last list contains the amount of active leaves in each list.
141 chain: Chain to extract the bit length from (last chain from last list).
142 */
ExtractBitLengths(Node * chain,Node * leaves,unsigned * bitlengths)143 static void ExtractBitLengths(Node* chain, Node* leaves, unsigned* bitlengths) {
144   int counts[16] = {0};
145   unsigned end = 16;
146   unsigned ptr = 15;
147   unsigned value = 1;
148   Node* node;
149   int val;
150 
151   for (node = chain; node; node = node->tail) {
152     counts[--end] = node->count;
153   }
154 
155   val = counts[15];
156   while (ptr >= end) {
157     for (; val > counts[ptr - 1]; val--) {
158       bitlengths[leaves[val - 1].count] = value;
159     }
160     ptr--;
161     value++;
162   }
163 }
164 
165 /*
166 Comparator for sorting the leaves. Has the function signature for qsort.
167 */
LeafComparator(const void * a,const void * b)168 static int LeafComparator(const void* a, const void* b) {
169   return ((const Node*)a)->weight - ((const Node*)b)->weight;
170 }
171 
ZopfliLengthLimitedCodeLengths(const size_t * frequencies,int n,int maxbits,unsigned * bitlengths)172 int ZopfliLengthLimitedCodeLengths(
173     const size_t* frequencies, int n, int maxbits, unsigned* bitlengths) {
174   NodePool pool;
175   int i;
176   int numsymbols = 0;  /* Amount of symbols with frequency > 0. */
177   int numBoundaryPMRuns;
178   Node* nodes;
179 
180   /* Array of lists of chains. Each list requires only two lookahead chains at
181   a time, so each list is a array of two Node*'s. */
182   Node* (*lists)[2];
183 
184   /* One leaf per symbol. Only numsymbols leaves will be used. */
185   Node* leaves = (Node*)malloc(n * sizeof(*leaves));
186 
187   /* Initialize all bitlengths at 0. */
188   for (i = 0; i < n; i++) {
189     bitlengths[i] = 0;
190   }
191 
192   /* Count used symbols and place them in the leaves. */
193   for (i = 0; i < n; i++) {
194     if (frequencies[i]) {
195       leaves[numsymbols].weight = frequencies[i];
196       leaves[numsymbols].count = i;  /* Index of symbol this leaf represents. */
197       numsymbols++;
198     }
199   }
200 
201   /* Check special cases and error conditions. */
202   if ((1 << maxbits) < numsymbols) {
203     free(leaves);
204     return 1;  /* Error, too few maxbits to represent symbols. */
205   }
206   if (numsymbols == 0) {
207     free(leaves);
208     return 0;  /* No symbols at all. OK. */
209   }
210   if (numsymbols == 1) {
211     bitlengths[leaves[0].count] = 1;
212     free(leaves);
213     return 0;  /* Only one symbol, give it bitlength 1, not 0. OK. */
214   }
215   if (numsymbols == 2) {
216     bitlengths[leaves[0].count]++;
217     bitlengths[leaves[1].count]++;
218     free(leaves);
219     return 0;
220   }
221 
222   /* Sort the leaves from lightest to heaviest. Add count into the same
223   variable for stable sorting. */
224   for (i = 0; i < numsymbols; i++) {
225     if (leaves[i].weight >=
226         ((size_t)1 << (sizeof(leaves[0].weight) * CHAR_BIT - 9))) {
227       free(leaves);
228       return 1;  /* Error, we need 9 bits for the count. */
229     }
230     leaves[i].weight = (leaves[i].weight << 9) | leaves[i].count;
231   }
232   qsort(leaves, numsymbols, sizeof(Node), LeafComparator);
233   for (i = 0; i < numsymbols; i++) {
234     leaves[i].weight >>= 9;
235   }
236 
237   if (numsymbols - 1 < maxbits) {
238     maxbits = numsymbols - 1;
239   }
240 
241   /* Initialize node memory pool. */
242   nodes = (Node*)malloc(maxbits * 2 * numsymbols * sizeof(Node));
243   pool.next = nodes;
244 
245   lists = (Node* (*)[2])malloc(maxbits * sizeof(*lists));
246   InitLists(&pool, leaves, maxbits, lists);
247 
248   /* In the last list, 2 * numsymbols - 2 active chains need to be created. Two
249   are already created in the initialization. Each BoundaryPM run creates one. */
250   numBoundaryPMRuns = 2 * numsymbols - 4;
251   for (i = 0; i < numBoundaryPMRuns - 1; i++) {
252     BoundaryPM(lists, leaves, numsymbols, &pool, maxbits - 1);
253   }
254   BoundaryPMFinal(lists, leaves, numsymbols, &pool, maxbits - 1);
255 
256   ExtractBitLengths(lists[maxbits - 1][1], leaves, bitlengths);
257 
258   free(lists);
259   free(leaves);
260   free(nodes);
261   return 0;  /* OK. */
262 }
263