1 /*
2  * index.c: Implementation of index.h.
3  */
4 
5 #include "agedu.h"
6 #include "trie.h"
7 #include "index.h"
8 #include "alloc.h"
9 
10 #define alignof(typ) ( offsetof(struct { char c; typ t; }, t) )
11 
12 #define PADDING(x, mod) ( ((mod) - ((x) % (mod))) % (mod) )
13 
14 struct avlnode {
15     off_t children[2], element;
16     int maxdepth;		       /* maximum depth of this subtree */
17     unsigned long long totalsize;
18 };
19 
20 /*
21  * Determine the maximum depth of an AVL tree containing a certain
22  * number of nodes.
23  */
index_maxdepth(int nodecount)24 static int index_maxdepth(int nodecount)
25 {
26     int b, c, maxdepth;
27 
28     /*
29      * Model the tree growing at maximum imbalance. We do this by
30      * determining the number of nodes in the most unbalanced
31      * (i.e. smallest) tree of any given depth, and stopping when
32      * that's larger than nodecount.
33      */
34     maxdepth = 1;
35     b = 0;
36     c = 1;
37     while (b <= nodecount) {
38 	int tmp;
39 
40 	tmp = 1 + b + c;
41 	b = c;
42 	c = tmp;
43 	maxdepth++;
44     }
45 
46     return maxdepth;
47 }
48 
index_initial_size(off_t currentsize,int nodecount)49 off_t index_initial_size(off_t currentsize, int nodecount)
50 {
51     currentsize += PADDING(currentsize, alignof(off_t));
52     currentsize += nodecount * sizeof(off_t);
53     currentsize += PADDING(currentsize, alignof(struct avlnode));
54 
55     return currentsize;
56 }
57 
58 /* ----------------------------------------------------------------------
59  * Functions to build the index.
60  */
61 
62 struct indexbuild {
63     void *t;
64     int n, nnodes;
65     struct avlnode *nodes;
66     off_t *roots;
67     struct avlnode *currroot;
68     struct avlnode *firstmutable;
69 };
70 
71 #define ELEMENT(t,offset) \
72     ((struct trie_file *) ((offset) ? ((char *)(t) + (offset)) : NULL))
73 #define NODE(t,offset) \
74     ((struct avlnode *) ((offset) ? ((char *)(t) + (offset)) : NULL))
75 #define OFFSET(t,node) \
76     ((node) ? (off_t)((const char *)node - (const char *)t) : (off_t)0)
77 #define MAXDEPTH(node) ((node) ? (node)->maxdepth : 0)
78 
indexbuild_new(void * t,off_t startoff,int nodecount,size_t * delta)79 indexbuild *indexbuild_new(void *t, off_t startoff, int nodecount,
80 			   size_t *delta)
81 {
82     indexbuild *ib = snew(indexbuild);
83 
84     ib->t = t;
85     startoff += PADDING(startoff, alignof(off_t));
86     ib->roots = (off_t *)((char *)t + startoff);
87     trie_set_index_offset(t, startoff);
88     startoff += nodecount * sizeof(off_t);
89     startoff += PADDING(startoff, alignof(struct avlnode));
90     ib->nodes = (struct avlnode *)((char *)t + startoff);
91     ib->nnodes = ib->n = 0;
92     ib->currroot = NULL;
93     ib->firstmutable = ib->nodes + ib->nnodes;
94 
95     if (delta)
96 	*delta = sizeof(struct avlnode) * (1 + index_maxdepth(nodecount));
97 
98     return ib;
99 }
100 
101 /*
102  * Return a mutable node, which is n or a copy of n if n is
103  * non-NULL.
104  */
avl_makemutable(indexbuild * ib,struct avlnode * n)105 static struct avlnode *avl_makemutable(indexbuild *ib, struct avlnode *n)
106 {
107     struct avlnode *newnode;
108 
109     if (n && n >= ib->firstmutable)
110 	return n;		       /* already mutable */
111 
112     newnode = ib->nodes + ib->nnodes++;
113     if (n)
114 	*newnode = *n;		       /* structure copy */
115     return newnode;
116 }
117 
118 /*
119  * Fix the annotations in a tree node.
120  */
avl_fix(indexbuild * ib,struct avlnode * n)121 static void avl_fix(indexbuild *ib, struct avlnode *n)
122 {
123     /*
124      * Make sure the max depth field is right.
125      */
126     n->maxdepth = 1 + max(MAXDEPTH(NODE(ib->t, n->children[0])),
127 			  MAXDEPTH(NODE(ib->t, n->children[1])));
128 
129     n->totalsize =
130 	(ELEMENT(ib->t, n->element)->size +
131 	 (n->children[0] ? NODE(ib->t, n->children[0])->totalsize : 0) +
132 	 (n->children[1] ? NODE(ib->t, n->children[1])->totalsize : 0));
133 }
134 
avl_insert(indexbuild * ib,struct avlnode * n,off_t node)135 static struct avlnode *avl_insert(indexbuild *ib, struct avlnode *n,
136 				  off_t node)
137 {
138     struct trie_file *newfile;
139     struct trie_file *oldfile;
140     int subtree;
141     struct avlnode *nn;
142 
143     /*
144      * Recursion bottoming out: if the subtree we're inserting
145      * into is null, just construct and return a fresh node.
146      */
147     if (!n) {
148 	n = avl_makemutable(ib, NULL);
149 	n->children[0] = n->children[1] = 0;
150 	n->element = node;
151 	avl_fix(ib, n);
152 	return n;
153     }
154 
155     /*
156      * Otherwise, we have to insert into an existing tree.
157      */
158 
159     /*
160      * Determine which subtree to insert this node into. Ties
161      * aren't important, so we just break them any old way.
162      */
163     newfile = (struct trie_file *)((char *)ib->t + node);
164     oldfile = (struct trie_file *)((char *)ib->t + n->element);
165     if (newfile->atime > oldfile->atime)
166 	subtree = 1;
167     else
168 	subtree = 0;
169 
170     /*
171      * Construct a copy of the node we're looking at.
172      */
173     n = avl_makemutable(ib, n);
174 
175     /*
176      * Recursively insert into the next subtree down.
177      */
178     nn = avl_insert(ib, NODE(ib->t, n->children[subtree]), node);
179     n->children[subtree] = OFFSET(ib->t, nn);
180 
181     /*
182      * Rebalance if necessary, to ensure that our node's children
183      * differ in maximum depth by at most one. Of course, the
184      * subtree we've just modified will be the deeper one if so.
185      */
186     if (MAXDEPTH(NODE(ib->t, n->children[subtree])) >
187 	MAXDEPTH(NODE(ib->t, n->children[1-subtree])) + 1) {
188 	struct avlnode *p, *q;
189 
190 	/*
191 	 * There are two possible cases, one of which requires a
192 	 * single tree rotation and the other requires two. It all
193 	 * depends on which subtree of the next node down (here p)
194 	 * is the taller. (It turns out that they can't both be
195 	 * the same height: any tree which has just increased in
196 	 * depth must have one subtree strictly taller than the
197 	 * other.)
198 	 */
199 	p = NODE(ib->t, n->children[subtree]);
200 	assert(p >= ib->firstmutable);
201 	if (MAXDEPTH(NODE(ib->t, p->children[subtree])) >=
202 	    MAXDEPTH(NODE(ib->t, p->children[1-subtree]))) {
203 	    /*
204 	     *     n                       p
205 	     *    / \                     / \
206 	     *  [k]  p         ->        n  [k+1]
207 	     *      / \                 / \
208 	     *    [k] [k+1]           [k] [k]
209 	     */
210 	    n->children[subtree] = p->children[1-subtree];
211 	    p->children[1-subtree] = OFFSET(ib->t, n);
212 	    avl_fix(ib, n);
213 	    n = p;
214 	} else {
215 	    q = NODE(ib->t, p->children[1-subtree]);
216 	    assert(q >= ib->firstmutable);
217 	    p->children[1-subtree] = OFFSET(ib->t, q);
218 	    /*
219 	     *     n               n                 q
220 	     *    / \             / \              /   \
221 	     *  [k]  p    ==    [k]  p      ->    n     p
222 	     *      / \             / \          / \   / \
223 	     *  [k+1] [k]          q  [k]      [k]  \ /  [k]
224 	     *                    / \         [k-1,k] [k-1,k]
225 	     *              [k-1,k] [k-1,k]
226 	     */
227 	    n->children[subtree] = q->children[1-subtree];
228 	    p->children[1-subtree] = q->children[subtree];
229 	    q->children[1-subtree] = OFFSET(ib->t, n);
230 	    q->children[subtree] = OFFSET(ib->t, p);
231 	    avl_fix(ib, n);
232 	    avl_fix(ib, p);
233 	    n = q;
234 	}
235     }
236 
237     /*
238      * Fix up our maximum depth field.
239      */
240     avl_fix(ib, n);
241 
242     /*
243      * Done.
244      */
245     return n;
246 }
247 
indexbuild_add(indexbuild * ib,const struct trie_file * tf)248 void indexbuild_add(indexbuild *ib, const struct trie_file *tf)
249 {
250     off_t node = OFFSET(ib->t, tf);
251     ib->currroot = avl_insert(ib, ib->currroot, node);
252     ib->roots[ib->n++] = 0;
253 }
254 
indexbuild_tag(indexbuild * ib)255 void indexbuild_tag(indexbuild *ib)
256 {
257     if (ib->n > 0)
258 	ib->roots[ib->n - 1] = OFFSET(ib->t, ib->currroot);
259     ib->firstmutable = ib->nodes + ib->nnodes;
260 }
261 
indexbuild_rebase(indexbuild * ib,void * t)262 void indexbuild_rebase(indexbuild *ib, void *t)
263 {
264     ptrdiff_t diff = (unsigned char *)t - (unsigned char *)(ib->t);
265 
266     ib->t = t;
267     ib->nodes = (struct avlnode *)((unsigned char *)ib->nodes + diff);
268     ib->roots = (off_t *)((unsigned char *)ib->roots + diff);
269     if (ib->currroot)
270 	ib->currroot = (struct avlnode *)
271 	    ((unsigned char *)ib->currroot + diff);
272     ib->firstmutable = (struct avlnode *)((unsigned char *)ib->firstmutable + diff);
273 }
274 
indexbuild_realsize(indexbuild * ib)275 off_t indexbuild_realsize(indexbuild *ib)
276 {
277     return OFFSET(ib->t, (ib->nodes + ib->nnodes));
278 }
279 
indexbuild_free(indexbuild * ib)280 void indexbuild_free(indexbuild *ib)
281 {
282     assert(ib->n == trie_count(ib->t));
283     sfree(ib);
284 }
285 
index_has_root(const void * t,int n)286 int index_has_root(const void *t, int n)
287 {
288     const off_t *roots;
289 
290     roots = (const off_t *)((const char *)t + trie_get_index_offset(t));
291 
292     if (n == 0)
293 	return 1;
294     if (n < 0 || n >= trie_count(t) || !roots[n-1])
295 	return 0;
296     return 1;
297 }
298 
index_query(const void * t,int n,unsigned long long at)299 unsigned long long index_query(const void *t, int n, unsigned long long at)
300 {
301     const off_t *roots;
302     const struct avlnode *node;
303     unsigned long count;
304     unsigned long long ret;
305 
306     roots = (const off_t *)((const char *)t + trie_get_index_offset(t));
307 
308     if (n < 1)
309 	return 0;
310     count = trie_count(t);
311     if (n > count)
312 	n = count;
313 
314     assert(roots[n-1]);
315     node = NODE(t, roots[n-1]);
316 
317     ret = 0;
318 
319     while (node) {
320 	const struct trie_file *tf = ELEMENT(t, node->element);
321 	const struct avlnode *left = NODE(t, node->children[0]);
322 	const struct avlnode *right = NODE(t, node->children[1]);
323 
324 	if (at <= tf->atime) {
325 	    node = left;
326 	} else {
327 	    if (left)
328 		ret += left->totalsize;
329 	    ret += tf->size;
330 	    node = right;
331 	}
332     }
333 
334     return ret;
335 }
336 
index_order_stat(const void * t,double f)337 unsigned long long index_order_stat(const void *t, double f)
338 {
339     const off_t *roots;
340     const struct avlnode *node;
341     unsigned long count;
342     unsigned long long size;
343 
344     roots = (const off_t *)((const char *)t + trie_get_index_offset(t));
345     count = trie_count(t);
346     assert(roots[count-1]);
347     node = NODE(t, roots[count-1]);
348 
349     size = node->totalsize * f;
350     if (size > node->totalsize) {
351         /*
352          * This can happen in principle if node->totalsize is so
353          * enormous (bigger than 2^53 bytes) that it can't be
354          * represented faithfully in a 'double'. Then it might be
355          * rounded _up_ by the conversion to double, and if
356          * multiplication by f doesn't make it smaller again, end up
357          * just bigger than node->totalsize by the time we convert
358          * back to an integer.
359          *
360          * It takes a huge filesystem to make that happen in reality -
361          * but a corrupt index file could also trip the same case, so
362          * we should at least handle it gracefully.
363          */
364         size = node->totalsize;
365     }
366 
367     while (1) {
368 	const struct trie_file *tf = ELEMENT(t, node->element);
369 	const struct avlnode *left = NODE(t, node->children[0]);
370 	const struct avlnode *right = NODE(t, node->children[1]);
371 
372 	if (left && size < left->totalsize) {
373 	    node = left;
374 	} else if (!right ||
375                    size < (left ? left->totalsize : 0) + tf->size) {
376 	    return tf->atime;
377 	} else {
378 	    if (left)
379 		size -= left->totalsize;
380 	    size -= tf->size;
381 	    node = right;
382 	}
383     }
384 }
385