1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8; fill-column: 160 -*- */
2 /*
3  * Copyright (C) 1999-2008 Novell, Inc. (www.novell.com)
4  *
5  * This library is free software: you can redistribute it and/or modify it
6  * under the terms of the GNU Lesser General Public License as published by
7  * the Free Software Foundation.
8  *
9  * This library is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
11  * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
12  * for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public License
15  * along with this library. If not, see <http://www.gnu.org/licenses/>.
16  *
17  * Authors: Michael Zucchi <notzed@ximian.com>
18  */
19 
20 #include "evolution-data-server-config.h"
21 
22 #include <ctype.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <unistd.h>
29 #include <sys/stat.h>
30 #include <sys/types.h>
31 
32 #include <glib/gstdio.h>
33 
34 #include "camel-block-file.h"
35 #include "camel-mempool.h"
36 #include "camel-object.h"
37 #include "camel-partition-table.h"
38 #include "camel-text-index.h"
39 
40 #define w(x)
41 #define io(x)
42 #define d(x) /*(printf ("%s (%d):%s: ",  __FILE__, __LINE__, __PRETTY_FUNCTION__),(x))*/
43 
44 /* cursor debug */
45 #define c(x)
46 
47 #define CAMEL_TEXT_INDEX_MAX_WORDLEN  (36)
48 
49 #define CAMEL_TEXT_INDEX_LOCK(kf, lock) \
50 	(g_rec_mutex_lock (&((CamelTextIndex *) kf)->priv->lock))
51 #define CAMEL_TEXT_INDEX_UNLOCK(kf, lock) \
52 	(g_rec_mutex_unlock (&((CamelTextIndex *) kf)->priv->lock))
53 
54 static gint text_index_compress_nosync (CamelIndex *idx);
55 
56 /* ********************************************************************** */
57 
58 struct _CamelTextIndexNamePrivate {
59 	GString *buffer;
60 	camel_key_t nameid;
61 	CamelMemPool *pool;
62 };
63 
64 CamelTextIndexName *camel_text_index_name_new (CamelTextIndex *idx, const gchar *name, camel_key_t nameid);
65 
66 /* ****************************** */
67 
68 struct _CamelTextIndexCursorPrivate {
69 	camel_block_t first;
70 	camel_block_t next;
71 
72 	gint record_index;
73 
74 	gsize record_count;
75 	camel_key_t *records;
76 
77 	gchar *current;
78 };
79 
80 CamelTextIndexCursor *camel_text_index_cursor_new (CamelTextIndex *idx, camel_block_t data);
81 
82 /* ****************************** */
83 
84 struct _CamelTextIndexKeyCursorPrivate {
85 	CamelKeyTable *table;
86 
87 	camel_key_t keyid;
88 	guint flags;
89 	camel_block_t data;
90 	gchar *current;
91 };
92 
93 CamelTextIndexKeyCursor *camel_text_index_key_cursor_new (CamelTextIndex *idx, CamelKeyTable *table);
94 
95 /* ********************************************************************** */
96 
97 #define CAMEL_TEXT_INDEX_VERSION "TEXT.000"
98 #define CAMEL_TEXT_INDEX_KEY_VERSION "KEYS.000"
99 
100 struct _CamelTextIndexPrivate {
101 	CamelBlockFile *blocks;
102 	CamelKeyFile *links;
103 
104 	CamelKeyTable *word_index;
105 	CamelPartitionTable *word_hash;
106 
107 	CamelKeyTable *name_index;
108 	CamelPartitionTable *name_hash;
109 
110 	/* Cache of words to write */
111 	guint word_cache_limit;
112 	GQueue word_cache;
113 	GHashTable *words;
114 	GRecMutex lock;
115 };
116 
117 /* Root block of text index */
118 struct _CamelTextIndexRoot {
119 	struct _CamelBlockRoot root;
120 
121 	/* FIXME: the index root could contain a pointer to the hash root */
122 	camel_block_t word_index_root; /* a keyindex containing the keyid -> word mapping */
123 	camel_block_t word_hash_root; /* a partitionindex containing word -> keyid mapping */
124 
125 	camel_block_t name_index_root; /* same, for names */
126 	camel_block_t name_hash_root;
127 
128 	guint32 words;		/* total words */
129 	guint32 names;		/* total names */
130 	guint32 deleted;	/* deleted names */
131 	guint32 keys;		/* total key 'chunks' written, used with deleted to determine fragmentation */
132 };
133 
134 struct _CamelTextIndexWord {
135 	camel_block_t data;	/* where the data starts */
136 	camel_key_t wordid;
137 	gchar *word;
138 	guint used;
139 	camel_key_t names[32];
140 };
141 
142 /* ********************************************************************** */
143 /* CamelTextIndex */
144 /* ********************************************************************** */
145 
G_DEFINE_TYPE_WITH_PRIVATE(CamelTextIndex,camel_text_index,CAMEL_TYPE_INDEX)146 G_DEFINE_TYPE_WITH_PRIVATE (CamelTextIndex, camel_text_index, CAMEL_TYPE_INDEX)
147 
148 static void
149 text_index_dispose (GObject *object)
150 {
151 	CamelTextIndexPrivate *priv;
152 
153 	priv = CAMEL_TEXT_INDEX (object)->priv;
154 
155 	/* Only run this the first time. */
156 	if (priv->word_index != NULL)
157 		camel_index_sync (CAMEL_INDEX (object));
158 
159 	g_clear_object (&priv->word_index);
160 	g_clear_object (&priv->word_hash);
161 	g_clear_object (&priv->name_index);
162 	g_clear_object (&priv->name_hash);
163 	g_clear_object (&priv->blocks);
164 	g_clear_object (&priv->links);
165 
166 	/* Chain up to parent's dispose () method. */
167 	G_OBJECT_CLASS (camel_text_index_parent_class)->dispose (object);
168 }
169 
170 static void
text_index_finalize(GObject * object)171 text_index_finalize (GObject *object)
172 {
173 	CamelTextIndexPrivate *priv;
174 
175 	priv = CAMEL_TEXT_INDEX (object)->priv;
176 
177 	g_warn_if_fail (g_queue_is_empty (&priv->word_cache));
178 	g_warn_if_fail (g_hash_table_size (priv->words) == 0);
179 
180 	g_hash_table_destroy (priv->words);
181 
182 	g_rec_mutex_clear (&priv->lock);
183 
184 	/* Chain up to parent's finalize () method. */
185 	G_OBJECT_CLASS (camel_text_index_parent_class)->finalize (object);
186 }
187 
188 /* call locked */
189 static void
text_index_add_name_to_word(CamelIndex * idx,const gchar * word,camel_key_t nameid)190 text_index_add_name_to_word (CamelIndex *idx,
191                              const gchar *word,
192                              camel_key_t nameid)
193 {
194 	struct _CamelTextIndexWord *w;
195 	CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv;
196 	camel_key_t wordid;
197 	camel_block_t data;
198 	struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *) camel_block_file_get_root (p->blocks);
199 
200 	w = g_hash_table_lookup (p->words, word);
201 	if (w == NULL) {
202 		GQueue trash = G_QUEUE_INIT;
203 		GList *link;
204 		guint length;
205 
206 		wordid = camel_partition_table_lookup (p->word_hash, word);
207 		if (wordid == 0) {
208 			data = 0;
209 			wordid = camel_key_table_add (p->word_index, word, 0, 0);
210 			if (wordid == 0) {
211 				g_warning (
212 					"Could not create key entry for word '%s': %s\n",
213 					word, g_strerror (errno));
214 				return;
215 			}
216 			if (camel_partition_table_add (p->word_hash, word, wordid) == -1) {
217 				g_warning (
218 					"Could not create hash entry for word '%s': %s\n",
219 					word, g_strerror (errno));
220 				return;
221 			}
222 			rb->words++;
223 			camel_block_file_touch_block (p->blocks, camel_block_file_get_root_block (p->blocks));
224 		} else {
225 			data = camel_key_table_lookup (p->word_index, wordid, NULL, NULL);
226 			if (data == 0) {
227 				g_warning (
228 					"Could not find key entry for word '%s': %s\n",
229 					word, g_strerror (errno));
230 				return;
231 			}
232 		}
233 
234 		w = g_malloc0 (sizeof (*w));
235 		w->word = g_strdup (word);
236 		w->wordid = wordid;
237 		w->used = 1;
238 		w->data = data;
239 
240 		w->names[0] = nameid;
241 		g_hash_table_insert (p->words, w->word, w);
242 		g_queue_push_head (&p->word_cache, w);
243 
244 		length = p->word_cache.length;
245 		link = g_queue_peek_tail_link (&p->word_cache);
246 
247 		while (link != NULL && length > p->word_cache_limit) {
248 			struct _CamelTextIndexWord *ww = link->data;
249 
250 			io (printf ("writing key file entry '%s' [%x]\n", ww->word, ww->data));
251 			if (camel_key_file_write (p->links, &ww->data, ww->used, ww->names) != -1) {
252 				io (printf ("  new data [%x]\n", ww->data));
253 				rb->keys++;
254 				camel_block_file_touch_block (p->blocks, camel_block_file_get_root_block (p->blocks));
255 				/* if this call fails - we still point to the old data - not fatal */
256 				camel_key_table_set_data (
257 					p->word_index, ww->wordid, ww->data);
258 				g_hash_table_remove (p->words, ww->word);
259 				g_queue_push_tail (&trash, link);
260 				link->data = NULL;
261 				g_free (ww->word);
262 				g_free (ww);
263 				length--;
264 			}
265 
266 			link = g_list_previous (link);
267 		}
268 
269 		/* Remove deleted words from the cache. */
270 		while ((link = g_queue_pop_head (&trash)) != NULL)
271 			g_queue_delete_link (&p->word_cache, link);
272 
273 	} else {
274 		g_queue_remove (&p->word_cache, w);
275 		g_queue_push_head (&p->word_cache, w);
276 		w->names[w->used] = nameid;
277 		w->used++;
278 		if (w->used == G_N_ELEMENTS (w->names)) {
279 			io (printf ("writing key file entry '%s' [%x]\n", w->word, w->data));
280 			if (camel_key_file_write (p->links, &w->data, w->used, w->names) != -1) {
281 				rb->keys++;
282 				camel_block_file_touch_block (p->blocks, camel_block_file_get_root_block (p->blocks));
283 				/* if this call fails - we still point to the old data - not fatal */
284 				camel_key_table_set_data (
285 					p->word_index, w->wordid, w->data);
286 			}
287 			/* FIXME: what to on error?  lost data? */
288 			w->used = 0;
289 		}
290 	}
291 }
292 
293 static gint
text_index_sync(CamelIndex * idx)294 text_index_sync (CamelIndex *idx)
295 {
296 	CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv;
297 	struct _CamelTextIndexWord *ww;
298 	struct _CamelTextIndexRoot *rb;
299 	gint ret = 0, wfrag, nfrag;
300 
301 	d (printf ("sync: blocks = %p\n", p->blocks));
302 
303 	if (p->blocks == NULL || p->links == NULL
304 	    || p->word_index == NULL || p->word_hash == NULL
305 	    || p->name_index == NULL || p->name_hash == NULL)
306 		return 0;
307 
308 	rb = (struct _CamelTextIndexRoot *) camel_block_file_get_root (p->blocks);
309 
310 	/* sync/flush word cache */
311 
312 	CAMEL_TEXT_INDEX_LOCK (idx, lock);
313 
314 	/* we sync, bump down the cache limits since we dont need them for reading */
315 	camel_block_file_set_cache_limit (p->blocks, 128);
316 	/* this doesn't really need to be dropped, its only used in updates anyway */
317 	p->word_cache_limit = 1024;
318 
319 	while ((ww = g_queue_pop_head (&p->word_cache))) {
320 		if (ww->used > 0) {
321 			io (printf ("writing key file entry '%s' [%x]\n", ww->word, ww->data));
322 			if (camel_key_file_write (p->links, &ww->data, ww->used, ww->names) != -1) {
323 				io (printf ("  new data [%x]\n", ww->data));
324 				rb->keys++;
325 				camel_block_file_touch_block (p->blocks, camel_block_file_get_root_block (p->blocks));
326 				camel_key_table_set_data (
327 					p->word_index, ww->wordid, ww->data);
328 			} else {
329 				ret = -1;
330 			}
331 			ww->used = 0;
332 		}
333 		g_hash_table_remove (p->words, ww->word);
334 		g_free (ww->word);
335 		g_free (ww);
336 	}
337 
338 	if (camel_key_table_sync (p->word_index) == -1
339 	    || camel_key_table_sync (p->name_index) == -1
340 	    || camel_partition_table_sync (p->word_hash) == -1
341 	    || camel_partition_table_sync (p->name_hash) == -1)
342 		ret = -1;
343 
344 	/* only do the frag/compress check if we did some new writes on this index */
345 	wfrag = rb->words ? (((rb->keys - rb->words) * 100)/ rb->words) : 0;
346 	nfrag = rb->names ? ((rb->deleted * 100) / rb->names) : 0;
347 	d (printf ("  words = %d, keys = %d\n", rb->words, rb->keys));
348 
349 	if (ret == 0) {
350 		if (wfrag > 30 || nfrag > 20)
351 			ret = text_index_compress_nosync (idx);
352 	}
353 
354 	ret = ret == -1 ? ret : camel_block_file_sync (p->blocks);
355 
356 	CAMEL_TEXT_INDEX_UNLOCK (idx, lock);
357 
358 	return ret;
359 }
360 
361 static void
tmp_name(const gchar * in,gchar * o,gsize o_len)362 tmp_name (const gchar *in,
363           gchar *o,
364           gsize o_len)
365 {
366 	gchar *s;
367 
368 	s = strrchr (in, '/');
369 	if (s) {
370 		memcpy (o, in, s - in + 1);
371 		memcpy (o + (s - in + 1), ".#", 2);
372 		strcpy (o + (s - in + 3), s + 1);
373 	} else {
374 		g_snprintf (o, o_len, ".#%s", in);
375 	}
376 }
377 
378 static gint
text_index_compress(CamelIndex * idx)379 text_index_compress (CamelIndex *idx)
380 {
381 	gint ret;
382 
383 	CAMEL_TEXT_INDEX_LOCK (idx, lock);
384 
385 	ret = camel_index_sync (idx);
386 	if (ret != -1)
387 		ret = text_index_compress_nosync (idx);
388 
389 	CAMEL_TEXT_INDEX_UNLOCK (idx, lock);
390 
391 	return ret;
392 }
393 
394 /* Attempt to recover index space by compressing the indices */
395 static gint
text_index_compress_nosync(CamelIndex * idx)396 text_index_compress_nosync (CamelIndex *idx)
397 {
398 	CamelTextIndex *newidx;
399 	CamelTextIndexPrivate *newp, *oldp;
400 	camel_key_t oldkeyid, newkeyid;
401 	GHashTable *remap;
402 	guint deleted;
403 	camel_block_t data, newdata;
404 	gint i, ret = -1;
405 	gchar *name = NULL;
406 	guint flags;
407 	gchar *newpath, *savepath, *oldpath;
408 	gsize count, newcount;
409 	camel_key_t *records, newrecords[256];
410 	struct _CamelTextIndexRoot *rb;
411 
412 	i = strlen (idx->path) + 16;
413 	oldpath = alloca (i);
414 	newpath = alloca (i);
415 	savepath = alloca (i);
416 
417 	g_strlcpy (oldpath, idx->path, i);
418 	oldpath[strlen (oldpath) - strlen (".index")] = 0;
419 
420 	tmp_name (oldpath, newpath, i);
421 	g_snprintf (savepath, i, "%s~", oldpath);
422 
423 	d (printf ("Old index: %s\n", idx->path));
424 	d (printf ("Old path: %s\n", oldpath));
425 	d (printf ("New: %s\n", newpath));
426 	d (printf ("Save: %s\n", savepath));
427 
428 	newidx = camel_text_index_new (newpath, O_RDWR | O_CREAT);
429 	if (newidx == NULL)
430 		return -1;
431 
432 	newp = CAMEL_TEXT_INDEX (newidx)->priv;
433 	oldp = CAMEL_TEXT_INDEX (idx)->priv;
434 
435 	CAMEL_TEXT_INDEX_LOCK (idx, lock);
436 
437 	rb = (struct _CamelTextIndexRoot *) camel_block_file_get_root (newp->blocks);
438 
439 	rb->words = 0;
440 	rb->names = 0;
441 	rb->deleted = 0;
442 	rb->keys = 0;
443 
444 	/* Process:
445 	 * For each name we still have:
446 	 * Add it to the new index & setup remap table
447 	 *
448 	 * For each word:
449 	 * Copy word's data to a new file
450 	 * Add new word to index (*) (can we just copy blocks?) */
451 
452 	/* Copy undeleted names to new index file, creating new indices */
453 	io (printf ("Copying undeleted names to new file\n"));
454 	remap = g_hash_table_new (NULL, NULL);
455 	oldkeyid = 0;
456 	deleted = 0;
457 	while ((oldkeyid = camel_key_table_next (oldp->name_index, oldkeyid, &name, &flags, &data))) {
458 		if ((flags&1) == 0) {
459 			io (printf ("copying name '%s'\n", name));
460 			newkeyid = camel_key_table_add (
461 				newp->name_index, name, data, flags);
462 			if (newkeyid == 0)
463 				goto fail;
464 			rb->names++;
465 			camel_partition_table_add (
466 				newp->name_hash, name, newkeyid);
467 			g_hash_table_insert (remap, GINT_TO_POINTER (oldkeyid), GINT_TO_POINTER (newkeyid));
468 		} else {
469 			io (printf ("deleted name '%s'\n", name));
470 		}
471 		g_free (name);
472 		name = NULL;
473 		deleted |= flags;
474 	}
475 
476 	/* Copy word data across, remapping/deleting and create new index for it */
477 	/* We re-block the data into 256 entry lots while we're at it, since we only
478 	 * have to do 1 at a time and its cheap */
479 	oldkeyid = 0;
480 	while ((oldkeyid = camel_key_table_next (oldp->word_index, oldkeyid, &name, &flags, &data))) {
481 		io (printf ("copying word '%s'\n", name));
482 		newdata = 0;
483 		newcount = 0;
484 		if (data) {
485 			rb->words++;
486 			rb->keys++;
487 		}
488 		while (data) {
489 			if (camel_key_file_read (oldp->links, &data, &count, &records) == -1) {
490 				io (printf ("could not read from old keys at %d for word '%s'\n", (gint) data, name));
491 				goto fail;
492 			}
493 			for (i = 0; i < count; i++) {
494 				newkeyid = (camel_key_t) GPOINTER_TO_INT (g_hash_table_lookup (remap, GINT_TO_POINTER (records[i])));
495 				if (newkeyid) {
496 					newrecords[newcount++] = newkeyid;
497 					if (newcount == G_N_ELEMENTS (newrecords)) {
498 						if (camel_key_file_write (newp->links, &newdata, newcount, newrecords) == -1) {
499 							g_free (records);
500 							goto fail;
501 						}
502 						newcount = 0;
503 					}
504 				}
505 			}
506 			g_free (records);
507 		}
508 
509 		if (newcount > 0) {
510 			if (camel_key_file_write (newp->links, &newdata, newcount, newrecords) == -1)
511 				goto fail;
512 		}
513 
514 		if (newdata != 0) {
515 			newkeyid = camel_key_table_add (
516 				newp->word_index, name, newdata, flags);
517 			if (newkeyid == 0)
518 				goto fail;
519 			camel_partition_table_add (
520 				newp->word_hash, name, newkeyid);
521 		}
522 		g_free (name);
523 		name = NULL;
524 	}
525 
526 	camel_block_file_touch_block (newp->blocks, camel_block_file_get_root_block (newp->blocks));
527 
528 	if (camel_index_sync (CAMEL_INDEX (newidx)) == -1)
529 		goto fail;
530 
531 	/* Rename underlying files to match */
532 	ret = camel_index_rename (idx, savepath);
533 	if (ret == -1)
534 		goto fail;
535 
536 	/* If this fails, we'll pick up something during restart? */
537 	ret = camel_index_rename ((CamelIndex *) newidx, oldpath);
538 
539 #define myswap(a, b) { gpointer tmp = a; a = b; b = tmp; }
540 	/* Poke the private data across to the new object */
541 	/* And change the fd's over, etc? */
542 	/* Yes: This is a hack */
543 	myswap (newp->blocks, oldp->blocks);
544 	myswap (newp->links, oldp->links);
545 	myswap (newp->word_index, oldp->word_index);
546 	myswap (newp->word_hash, oldp->word_hash);
547 	myswap (newp->name_index, oldp->name_index);
548 	myswap (newp->name_hash, oldp->name_hash);
549 	myswap (((CamelIndex *) newidx)->path, ((CamelIndex *) idx)->path);
550 #undef myswap
551 
552 	ret = 0;
553 fail:
554 	CAMEL_TEXT_INDEX_UNLOCK (idx, lock);
555 
556 	camel_index_delete ((CamelIndex *) newidx);
557 
558 	g_object_unref (newidx);
559 	g_free (name);
560 	g_hash_table_destroy (remap);
561 
562 	/* clean up temp files always */
563 	g_snprintf (savepath, i, "%s~.index", oldpath);
564 	g_unlink (savepath);
565 	g_snprintf (newpath, i, "%s.data", savepath);
566 	g_unlink (newpath);
567 
568 	return ret;
569 }
570 
571 static gint
text_index_delete(CamelIndex * idx)572 text_index_delete (CamelIndex *idx)
573 {
574 	CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv;
575 	gint ret = 0;
576 
577 	if (camel_block_file_delete (p->blocks) == -1)
578 		ret = -1;
579 	if (camel_key_file_delete (p->links) == -1)
580 		ret = -1;
581 
582 	return ret;
583 }
584 
585 static gint
text_index_rename(CamelIndex * idx,const gchar * path)586 text_index_rename (CamelIndex *idx,
587                    const gchar *path)
588 {
589 	CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv;
590 	gchar *newlink, *newblock;
591 	gsize newlink_len, newblock_len;
592 	gint err, ret;
593 
594 	CAMEL_TEXT_INDEX_LOCK (idx, lock);
595 
596 	newblock_len = strlen (path) + 8;
597 	newblock = alloca (newblock_len);
598 	g_snprintf (newblock, newblock_len, "%s.index", path);
599 	ret = camel_block_file_rename (p->blocks, newblock);
600 	if (ret == -1) {
601 		CAMEL_TEXT_INDEX_UNLOCK (idx, lock);
602 		return -1;
603 	}
604 
605 	newlink_len = strlen (path) + 16;
606 	newlink = alloca (newlink_len);
607 	g_snprintf (newlink, newlink_len, "%s.index.data", path);
608 	ret = camel_key_file_rename (p->links, newlink);
609 	if (ret == -1) {
610 		err = errno;
611 		camel_block_file_rename (p->blocks, idx->path);
612 		CAMEL_TEXT_INDEX_UNLOCK (idx, lock);
613 		errno = err;
614 		return -1;
615 	}
616 
617 	g_free (idx->path);
618 	idx->path = g_strdup (newblock);
619 
620 	CAMEL_TEXT_INDEX_UNLOCK (idx, lock);
621 
622 	return 0;
623 }
624 
625 static gint
text_index_has_name(CamelIndex * idx,const gchar * name)626 text_index_has_name (CamelIndex *idx,
627                      const gchar *name)
628 {
629 	CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv;
630 
631 	return camel_partition_table_lookup (p->name_hash, name) != 0;
632 }
633 
634 static CamelIndexName *
text_index_add_name(CamelIndex * idx,const gchar * name)635 text_index_add_name (CamelIndex *idx,
636                      const gchar *name)
637 {
638 	CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv;
639 	camel_key_t keyid;
640 	CamelIndexName *idn;
641 	struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *) camel_block_file_get_root (p->blocks);
642 
643 	CAMEL_TEXT_INDEX_LOCK (idx, lock);
644 
645 	/* if we're adding words, up the cache limits a lot */
646 	if (p->word_cache_limit < 8192) {
647 		camel_block_file_set_cache_limit (p->blocks, 1024);
648 		p->word_cache_limit = 8192;
649 	}
650 
651 	/* If we have it already replace it */
652 	keyid = camel_partition_table_lookup (p->name_hash, name);
653 	if (keyid != 0) {
654 		/* TODO: We could just update the partition table's
655 		 * key pointer rather than having to delete it */
656 		rb->deleted++;
657 		camel_key_table_set_flags (p->name_index, keyid, 1, 1);
658 		camel_partition_table_remove (p->name_hash, name);
659 	}
660 
661 	keyid = camel_key_table_add (p->name_index, name, 0, 0);
662 	if (keyid != 0) {
663 		camel_partition_table_add (p->name_hash, name, keyid);
664 		rb->names++;
665 	}
666 
667 	camel_block_file_touch_block (p->blocks, camel_block_file_get_root_block (p->blocks));
668 
669 	/* TODO: if keyid == 0, we had a failure, we should somehow flag that, but for
670 	 * now just return a valid object but discard its results, see text_index_write_name */
671 
672 	CAMEL_TEXT_INDEX_UNLOCK (idx, lock);
673 
674 	idn = (CamelIndexName *) camel_text_index_name_new ((CamelTextIndex *) idx, name, keyid);
675 
676 	return idn;
677 }
678 
679 /* call locked */
680 static void
hash_write_word(gchar * word,gpointer data,CamelIndexName * idn)681 hash_write_word (gchar *word,
682                  gpointer data,
683                  CamelIndexName *idn)
684 {
685 	CamelTextIndexName *tin = (CamelTextIndexName *) idn;
686 
687 	text_index_add_name_to_word (idn->index, word, tin->priv->nameid);
688 }
689 
690 static gint
text_index_write_name(CamelIndex * idx,CamelIndexName * idn)691 text_index_write_name (CamelIndex *idx,
692                        CamelIndexName *idn)
693 {
694 	/* force 'flush' of any outstanding data */
695 	camel_index_name_add_buffer (idn, NULL, 0);
696 
697 	/* see text_index_add_name for when this can be 0 */
698 	if (((CamelTextIndexName *) idn)->priv->nameid != 0) {
699 		CAMEL_TEXT_INDEX_LOCK (idx, lock);
700 
701 		g_hash_table_foreach (idn->words, (GHFunc) hash_write_word, idn);
702 
703 		CAMEL_TEXT_INDEX_UNLOCK (idx, lock);
704 	}
705 
706 	return 0;
707 }
708 
709 static CamelIndexCursor *
text_index_find_name(CamelIndex * idx,const gchar * name)710 text_index_find_name (CamelIndex *idx,
711                       const gchar *name)
712 {
713 	/* what was this for, umm */
714 	return NULL;
715 }
716 
717 static void
text_index_delete_name(CamelIndex * idx,const gchar * name)718 text_index_delete_name (CamelIndex *idx,
719                         const gchar *name)
720 {
721 	CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv;
722 	camel_key_t keyid;
723 	struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *) camel_block_file_get_root (p->blocks);
724 
725 	d (printf ("Delete name: %s\n", name));
726 
727 	/* probably doesn't really need locking, but oh well */
728 	CAMEL_TEXT_INDEX_LOCK (idx, lock);
729 
730 	/* We just mark the key deleted, and remove it from the hash table */
731 	keyid = camel_partition_table_lookup (p->name_hash, name);
732 	if (keyid != 0) {
733 		rb->deleted++;
734 		camel_block_file_touch_block (p->blocks, camel_block_file_get_root_block (p->blocks));
735 		camel_key_table_set_flags (p->name_index, keyid, 1, 1);
736 		camel_partition_table_remove (p->name_hash, name);
737 	}
738 
739 	CAMEL_TEXT_INDEX_UNLOCK (idx, lock);
740 }
741 
742 static CamelIndexCursor *
text_index_find(CamelIndex * idx,const gchar * word)743 text_index_find (CamelIndex *idx,
744                  const gchar *word)
745 {
746 	CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv;
747 	camel_key_t keyid;
748 	camel_block_t data = 0;
749 	guint flags;
750 	CamelIndexCursor *idc;
751 
752 	CAMEL_TEXT_INDEX_LOCK (idx, lock);
753 
754 	keyid = camel_partition_table_lookup (p->word_hash, word);
755 	if (keyid != 0) {
756 		data = camel_key_table_lookup (
757 			p->word_index, keyid, NULL, &flags);
758 		if (flags & 1)
759 			data = 0;
760 	}
761 
762 	CAMEL_TEXT_INDEX_UNLOCK (idx, lock);
763 
764 	idc = (CamelIndexCursor *) camel_text_index_cursor_new ((CamelTextIndex *) idx, data);
765 
766 	return idc;
767 }
768 
769 static CamelIndexCursor *
text_index_words(CamelIndex * idx)770 text_index_words (CamelIndex *idx)
771 {
772 	CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv;
773 
774 	return (CamelIndexCursor *) camel_text_index_key_cursor_new ((CamelTextIndex *) idx, p->word_index);
775 }
776 
777 static void
camel_text_index_class_init(CamelTextIndexClass * class)778 camel_text_index_class_init (CamelTextIndexClass *class)
779 {
780 	GObjectClass *object_class;
781 	CamelIndexClass *index_class;
782 
783 	object_class = G_OBJECT_CLASS (class);
784 	object_class->dispose = text_index_dispose;
785 	object_class->finalize = text_index_finalize;
786 
787 	index_class = CAMEL_INDEX_CLASS (class);
788 	index_class->sync = text_index_sync;
789 	index_class->compress = text_index_compress;
790 	index_class->delete_ = text_index_delete;
791 	index_class->rename = text_index_rename;
792 	index_class->has_name = text_index_has_name;
793 	index_class->add_name = text_index_add_name;
794 	index_class->write_name = text_index_write_name;
795 	index_class->find_name = text_index_find_name;
796 	index_class->delete_name = text_index_delete_name;
797 	index_class->find = text_index_find;
798 	index_class->words = text_index_words;
799 }
800 
801 static void
camel_text_index_init(CamelTextIndex * text_index)802 camel_text_index_init (CamelTextIndex *text_index)
803 {
804 	text_index->priv = camel_text_index_get_instance_private (text_index);
805 
806 	g_queue_init (&text_index->priv->word_cache);
807 	text_index->priv->words = g_hash_table_new (g_str_hash, g_str_equal);
808 
809 	/* This cache size and the block cache size have been tuned for
810 	 * about the best with moderate memory usage.  Doubling the memory
811 	 * usage barely affects performance. */
812 	text_index->priv->word_cache_limit = 4096; /* 1024 = 128K */
813 
814 	g_rec_mutex_init (&text_index->priv->lock);
815 }
816 
817 static gchar *
text_index_normalize(CamelIndex * idx,const gchar * in,gpointer data)818 text_index_normalize (CamelIndex *idx,
819                       const gchar *in,
820                       gpointer data)
821 {
822 	gchar *word;
823 
824 	/* Sigh, this is really expensive */
825 	/*g_utf8_normalize (in, strlen (in), G_NORMALIZE_ALL);*/
826 	word = g_utf8_strdown (in, -1);
827 
828 	return word;
829 }
830 
831 CamelTextIndex *
camel_text_index_new(const gchar * path,gint flags)832 camel_text_index_new (const gchar *path,
833                       gint flags)
834 {
835 	CamelTextIndex *idx = g_object_new (CAMEL_TYPE_TEXT_INDEX, NULL);
836 	CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv;
837 	struct _CamelTextIndexRoot *rb;
838 	gchar *link;
839 	gsize link_len;
840 	CamelBlock *bl;
841 
842 	camel_index_construct ((CamelIndex *) idx, path, flags);
843 	camel_index_set_normalize ((CamelIndex *) idx, text_index_normalize, NULL);
844 
845 	p->blocks = camel_block_file_new (
846 		idx->parent.path, flags, CAMEL_TEXT_INDEX_VERSION, CAMEL_BLOCK_SIZE);
847 	if (p->blocks == NULL)
848 		goto fail;
849 
850 	link_len = strlen (idx->parent.path) + 7;
851 	link = alloca (link_len);
852 	g_snprintf (link, link_len, "%s.data", idx->parent.path);
853 	p->links = camel_key_file_new (link, flags, CAMEL_TEXT_INDEX_KEY_VERSION);
854 
855 	if (p->links == NULL)
856 		goto fail;
857 
858 	rb = (struct _CamelTextIndexRoot *) camel_block_file_get_root (p->blocks);
859 
860 	if (rb->word_index_root == 0) {
861 		bl = camel_block_file_new_block (p->blocks);
862 
863 		if (bl == NULL)
864 			goto fail;
865 
866 		rb->word_index_root = bl->id;
867 		camel_block_file_unref_block (p->blocks, bl);
868 		camel_block_file_touch_block (p->blocks, camel_block_file_get_root_block (p->blocks));
869 	}
870 
871 	if (rb->word_hash_root == 0) {
872 		bl = camel_block_file_new_block (p->blocks);
873 
874 		if (bl == NULL)
875 			goto fail;
876 
877 		rb->word_hash_root = bl->id;
878 		camel_block_file_unref_block (p->blocks, bl);
879 		camel_block_file_touch_block (p->blocks, camel_block_file_get_root_block (p->blocks));
880 	}
881 
882 	if (rb->name_index_root == 0) {
883 		bl = camel_block_file_new_block (p->blocks);
884 
885 		if (bl == NULL)
886 			goto fail;
887 
888 		rb->name_index_root = bl->id;
889 		camel_block_file_unref_block (p->blocks, bl);
890 		camel_block_file_touch_block (p->blocks, camel_block_file_get_root_block (p->blocks));
891 	}
892 
893 	if (rb->name_hash_root == 0) {
894 		bl = camel_block_file_new_block (p->blocks);
895 
896 		if (bl == NULL)
897 			goto fail;
898 
899 		rb->name_hash_root = bl->id;
900 		camel_block_file_unref_block (p->blocks, bl);
901 		camel_block_file_touch_block (p->blocks, camel_block_file_get_root_block (p->blocks));
902 	}
903 
904 	p->word_index = camel_key_table_new (p->blocks, rb->word_index_root);
905 	p->word_hash = camel_partition_table_new (p->blocks, rb->word_hash_root);
906 	p->name_index = camel_key_table_new (p->blocks, rb->name_index_root);
907 	p->name_hash = camel_partition_table_new (p->blocks, rb->name_hash_root);
908 
909 	if (p->word_index == NULL || p->word_hash == NULL
910 	    || p->name_index == NULL || p->name_hash == NULL) {
911 		g_object_unref (idx);
912 		idx = NULL;
913 	}
914 
915 	return idx;
916 
917 fail:
918 	g_object_unref (idx);
919 	return NULL;
920 }
921 
922 /* returns 0 if the index exists, is valid, and synced, -1 otherwise */
923 gint
camel_text_index_check(const gchar * path)924 camel_text_index_check (const gchar *path)
925 {
926 	gchar *block, *key;
927 	gsize block_len, key_len;
928 	CamelBlockFile *blocks;
929 	CamelKeyFile *keys;
930 
931 	block_len = strlen (path) + 7;
932 	block = alloca (block_len);
933 	g_snprintf (block, block_len, "%s.index", path);
934 	blocks = camel_block_file_new (block, O_RDONLY, CAMEL_TEXT_INDEX_VERSION, CAMEL_BLOCK_SIZE);
935 	if (blocks == NULL) {
936 		io (printf ("Check failed: No block file: %s\n", g_strerror (errno)));
937 		return -1;
938 	}
939 	key_len = strlen (path) + 12;
940 	key = alloca (key_len);
941 	g_snprintf (key, key_len, "%s.index.data", path);
942 	keys = camel_key_file_new (key, O_RDONLY, CAMEL_TEXT_INDEX_KEY_VERSION);
943 	if (keys == NULL) {
944 		io (printf ("Check failed: No key file: %s\n", g_strerror (errno)));
945 		g_object_unref (blocks);
946 		return -1;
947 	}
948 
949 	g_object_unref (keys);
950 	g_object_unref (blocks);
951 
952 	return 0;
953 }
954 
955 gint
camel_text_index_rename(const gchar * old,const gchar * new)956 camel_text_index_rename (const gchar *old,
957                          const gchar *new)
958 {
959 	gchar *oldname, *newname;
960 	gsize oldname_len, newname_len;
961 	gint err;
962 
963 	/* TODO: camel_text_index_rename should find out if we have an active index and use that instead */
964 
965 	oldname_len = strlen (old) + 12;
966 	newname_len = strlen (new) + 12;
967 	oldname = alloca (oldname_len);
968 	newname = alloca (newname_len);
969 	g_snprintf (oldname, oldname_len, "%s.index", old);
970 	g_snprintf (newname, newname_len, "%s.index", new);
971 
972 	if (g_rename (oldname, newname) == -1 && errno != ENOENT)
973 		return -1;
974 
975 	g_snprintf (oldname, oldname_len, "%s.index.data", old);
976 	g_snprintf (newname, newname_len, "%s.index.data", new);
977 
978 	if (g_rename (oldname, newname) == -1 && errno != ENOENT) {
979 		err = errno;
980 		g_snprintf (oldname, oldname_len, "%s.index", old);
981 		g_snprintf (newname, newname_len, "%s.index", new);
982 		if (g_rename (newname, oldname) == -1) {
983 			g_warning (
984 				"%s: Failed to rename '%s' to '%s': %s",
985 				G_STRFUNC, newname, oldname, g_strerror (errno));
986 		}
987 		errno = err;
988 		return -1;
989 	}
990 
991 	return 0;
992 }
993 
994 gint
camel_text_index_remove(const gchar * old)995 camel_text_index_remove (const gchar *old)
996 {
997 	gchar *block, *key;
998 	gsize block_len, key_len;
999 	gint ret = 0;
1000 
1001 	/* TODO: needs to poke any active indices to remain unlinked */
1002 
1003 	block_len = strlen (old) + 12;
1004 	block = alloca (block_len);
1005 	key_len = strlen (old) + 12;
1006 	key = alloca (key_len);
1007 	g_snprintf (block, block_len, "%s.index", old);
1008 	g_snprintf (key, key_len, "%s.index.data", old);
1009 
1010 	if (g_unlink (block) == -1 && errno != ENOENT && errno != ENOTDIR)
1011 		ret = -1;
1012 	if (g_unlink (key) == -1 && errno != ENOENT && errno != ENOTDIR)
1013 		ret = -1;
1014 
1015 	if (ret == 0)
1016 		errno = 0;
1017 
1018 	return ret;
1019 }
1020 
1021 /* Debug */
1022 void
camel_text_index_info(CamelTextIndex * idx)1023 camel_text_index_info (CamelTextIndex *idx)
1024 {
1025 	CamelTextIndexPrivate *p = idx->priv;
1026 	struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *) camel_block_file_get_root (p->blocks);
1027 	gint frag;
1028 
1029 	printf ("Path: '%s'\n", idx->parent.path);
1030 	printf ("Version: %u\n", idx->parent.version);
1031 	printf ("Flags: %08x\n", idx->parent.flags);
1032 	printf ("Total words: %u\n", rb->words);
1033 	printf ("Total names: %u\n", rb->names);
1034 	printf ("Total deleted: %u\n", rb->deleted);
1035 	printf ("Total key blocks: %u\n", rb->keys);
1036 
1037 	if (rb->words > 0) {
1038 		frag = ((rb->keys - rb->words) * 100)/ rb->words;
1039 		printf ("Word fragmentation: %d%%\n", frag);
1040 	}
1041 
1042 	if (rb->names > 0) {
1043 		frag = (rb->deleted * 100)/ rb->names;
1044 		printf ("Name fragmentation: %d%%\n", frag);
1045 	}
1046 }
1047 
1048 /* #define DUMP_RAW */
1049 
1050 #ifdef DUMP_RAW
1051 enum { KEY_ROOT = 1, KEY_DATA = 2, PARTITION_MAP = 4, PARTITION_DATA = 8 };
1052 
1053 static void
add_type(GHashTable * map,camel_block_t id,gint type)1054 add_type (GHashTable *map,
1055           camel_block_t id,
1056           gint type)
1057 {
1058 	camel_block_t old;
1059 
1060 	old = g_hash_table_lookup (map, id);
1061 	if (old == type)
1062 		return;
1063 
1064 	if (old != 0 && old != type)
1065 		g_warning ("block %x redefined as type %d, already type %d\n", id, type, old);
1066 	g_hash_table_insert (map, id, GINT_TO_POINTER (type | old));
1067 }
1068 
1069 static void
add_partition(GHashTable * map,CamelBlockFile * blocks,camel_block_t id)1070 add_partition (GHashTable *map,
1071                CamelBlockFile *blocks,
1072                camel_block_t id)
1073 {
1074 	CamelBlock *bl;
1075 	CamelPartitionMapBlock *pm;
1076 	gint i;
1077 
1078 	while (id) {
1079 		add_type (map, id, PARTITION_MAP);
1080 		bl = camel_block_file_get_block (blocks, id);
1081 		if (bl == NULL) {
1082 			g_warning ("couldn't get parition: %x\n", id);
1083 			return;
1084 		}
1085 
1086 		pm = (CamelPartitionMapBlock *) &bl->data;
1087 		if (pm->used > G_N_ELEMENTS (pm->partition)) {
1088 			g_warning ("Partition block %x invalid\n", id);
1089 			camel_block_file_unref_block (blocks, bl);
1090 			return;
1091 		}
1092 
1093 		for (i = 0; i < pm->used; i++)
1094 			add_type (map, pm->partition[i].blockid, PARTITION_DATA);
1095 
1096 		id = pm->next;
1097 		camel_block_file_unref_block (blocks, bl);
1098 	}
1099 }
1100 
1101 static void
add_keys(GHashTable * map,CamelBlockFile * blocks,camel_block_t id)1102 add_keys (GHashTable *map,
1103           CamelBlockFile *blocks,
1104           camel_block_t id)
1105 {
1106 	CamelBlock *rbl, *bl;
1107 	CamelKeyRootBlock *root;
1108 	CamelKeyBlock *kb;
1109 
1110 	add_type (map, id, KEY_ROOT);
1111 	rbl = camel_block_file_get_block (blocks, id);
1112 	if (rbl == NULL) {
1113 		g_warning ("couldn't get key root: %x\n", id);
1114 		return;
1115 	}
1116 	root = (CamelKeyRootBlock *) &rbl->data;
1117 	id = root->first;
1118 
1119 	while (id) {
1120 		add_type (map, id, KEY_DATA);
1121 		bl = camel_block_file_get_block (blocks, id);
1122 		if (bl == NULL) {
1123 			g_warning ("couldn't get key: %x\n", id);
1124 			break;
1125 		}
1126 
1127 		kb = (CamelKeyBlock *) &bl->data;
1128 		id = kb->next;
1129 		camel_block_file_unref_block (blocks, bl);
1130 	}
1131 
1132 	camel_block_file_unref_block (blocks, rbl);
1133 }
1134 
1135 static void
dump_raw(GHashTable * map,gchar * path)1136 dump_raw (GHashTable *map,
1137           gchar *path)
1138 {
1139 	gchar buf[1024];
1140 	gchar line[256];
1141 	gchar *p, c, *e, *a, *o;
1142 	gint v, n, len, i, type;
1143 	gchar hex[16] = "0123456789ABCDEF";
1144 	gint fd;
1145 	camel_block_t id, total;
1146 
1147 	fd = g_open (path, O_RDONLY | O_BINARY, 0);
1148 	if (fd == -1)
1149 		return;
1150 
1151 	total = 0;
1152 	while ((len = read (fd, buf, 1024)) == 1024) {
1153 		id = total;
1154 
1155 		type = g_hash_table_lookup (map, id);
1156 		switch (type) {
1157 		case 0:
1158 			printf (" - unknown -\n");
1159 			break;
1160 		default:
1161 			printf (" - invalid -\n");
1162 			break;
1163 		case KEY_ROOT: {
1164 			CamelKeyRootBlock *r = (CamelKeyRootBlock *) buf;
1165 			printf ("Key root:\n");
1166 			printf ("First: %08x     Last: %08x     Free: %08x\n", r->first, r->last, r->free);
1167 		} break;
1168 		case KEY_DATA: {
1169 			CamelKeyBlock *k = (CamelKeyBlock *) buf;
1170 			printf ("Key data:\n");
1171 			printf ("Next: %08x      Used: %u\n", k->next, k->used);
1172 			for (i = 0; i < k->used; i++) {
1173 				if (i == 0)
1174 					len = sizeof (k->u.keydata);
1175 				else
1176 					len = k->u.keys[i - 1].offset;
1177 				len -= k->u.keys[i].offset;
1178 				printf (
1179 					"[%03d]: %08x %5d %06x %3d '%.*s'\n", i,
1180 					k->u.keys[i].data, k->u.keys[i].offset, k->u.keys[i].flags,
1181 					len, len, k->u.keydata + k->u.keys[i].offset);
1182 			}
1183 		} break;
1184 		case PARTITION_MAP: {
1185 			CamelPartitionMapBlock *m = (CamelPartitionMapBlock *) buf;
1186 			printf ("Partition map\n");
1187 			printf ("Next: %08x      Used: %u\n", m->next, m->used);
1188 			for (i = 0; i < m->used; i++) {
1189 				printf ("[%03d]: %08x -> %08x\n", i, m->partition[i].hashid, m->partition[i].blockid);
1190 			}
1191 		} break;
1192 		case PARTITION_DATA: {
1193 			CamelPartitionKeyBlock *k = (CamelPartitionKeyBlock *) buf;
1194 			printf ("Partition data\n");
1195 			printf ("Used: %u\n", k->used);
1196 		} break;
1197 		}
1198 
1199 		printf ("--raw--\n");
1200 
1201 		len = 1024;
1202 		p = buf;
1203 		do {
1204 			g_snprintf (line, sizeof (line), "%08x:                                                                      ", total);
1205 			total += 16;
1206 			o = line + 10;
1207 			a = o + 16 * 2 + 2;
1208 			i = 0;
1209 			while (len && i < 16) {
1210 				c = *p++;
1211 				*a++ = isprint (c)?c:'.';
1212 				*o++ = hex[(c>>4)&0x0f];
1213 				*o++ = hex[c&0x0f];
1214 				i++;
1215 				if (i == 8)
1216 					*o++ = ' ';
1217 				len--;
1218 			}
1219 			*a = 0;
1220 			printf ("%s\n", line);
1221 		} while (len);
1222 		printf ("\n");
1223 	}
1224 	close (fd);
1225 }
1226 #endif
1227 
1228 /* Debug */
1229 void
camel_text_index_dump(CamelTextIndex * idx)1230 camel_text_index_dump (CamelTextIndex *idx)
1231 {
1232 	CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv;
1233 #ifndef DUMP_RAW
1234 	camel_key_t keyid;
1235 	gchar *word = NULL;
1236 	const gchar *name;
1237 	guint flags;
1238 	camel_block_t data;
1239 
1240 	/* Iterate over all names in the file first */
1241 
1242 	printf ("UID's in index\n");
1243 
1244 	keyid = 0;
1245 	while ((keyid = camel_key_table_next (p->name_index, keyid, &word, &flags, &data))) {
1246 		if ((flags & 1) == 0)
1247 			printf (" %s\n", word);
1248 		else
1249 			printf (" %s (deleted)\n", word);
1250 		g_free (word);
1251 		word = NULL;
1252 	}
1253 
1254 	g_clear_pointer (&word, g_free);
1255 
1256 	printf ("Word's in index\n");
1257 
1258 	keyid = 0;
1259 	while ((keyid = camel_key_table_next (p->word_index, keyid, &word, &flags, &data))) {
1260 		CamelIndexCursor *idc;
1261 
1262 		printf ("Word: '%s':\n", word);
1263 
1264 		idc = camel_index_find ((CamelIndex *) idx, word);
1265 		while ((name = camel_index_cursor_next (idc))) {
1266 			printf (" %s", name);
1267 		}
1268 		printf ("\n");
1269 		g_object_unref (idc);
1270 		g_free (word);
1271 		word = NULL;
1272 	}
1273 
1274 	g_clear_pointer (&word, g_free);
1275 #else
1276 	/* a more low-level dump routine */
1277 	GHashTable *block_type = g_hash_table_new (NULL, NULL);
1278 	camel_block_t id;
1279 	struct stat st;
1280 	gint type;
1281 
1282 	add_keys (block_type, p->blocks, p->word_index->rootid);
1283 	add_keys (block_type, p->blocks, p->name_index->rootid);
1284 
1285 	add_partition (block_type, p->blocks, p->word_hash->rootid);
1286 	add_partition (block_type, p->blocks, p->name_hash->rootid);
1287 
1288 	dump_raw (block_type, p->blocks->path);
1289 	g_hash_table_destroy (block_type);
1290 #endif
1291 }
1292 
1293 /* more debug stuff */
1294 void
camel_text_index_validate(CamelTextIndex * idx)1295 camel_text_index_validate (CamelTextIndex *idx)
1296 {
1297 	CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv;
1298 	camel_key_t keyid;
1299 	gchar *word = NULL;
1300 	const gchar *name;
1301 	guint flags;
1302 	camel_block_t data;
1303 	gchar *oldword;
1304 	camel_key_t *records;
1305 	gsize count;
1306 
1307 	GHashTable *names, *deleted, *words, *keys, *name_word, *word_word;
1308 
1309 	names = g_hash_table_new (NULL, NULL);
1310 	deleted = g_hash_table_new (NULL, NULL);
1311 
1312 	name_word = g_hash_table_new (g_str_hash, g_str_equal);
1313 
1314 	words = g_hash_table_new (NULL, NULL);
1315 	keys = g_hash_table_new (NULL, NULL);
1316 
1317 	word_word = g_hash_table_new (g_str_hash, g_str_equal);
1318 
1319 	/* Iterate over all names in the file first */
1320 
1321 	printf ("Checking UID consistency\n");
1322 
1323 	keyid = 0;
1324 	while ((keyid = camel_key_table_next (p->name_index, keyid, &word, &flags, &data))) {
1325 		if ((oldword = g_hash_table_lookup (names, GINT_TO_POINTER (keyid))) != NULL
1326 		    || (oldword = g_hash_table_lookup (deleted, GINT_TO_POINTER (keyid))) != NULL) {
1327 			printf ("Warning, name '%s' duplicates key (%x) with name '%s'\n", word, keyid, oldword);
1328 			g_free (word);
1329 		} else {
1330 			g_hash_table_insert (name_word, word, GINT_TO_POINTER (1));
1331 			if ((flags & 1) == 0) {
1332 				g_hash_table_insert (names, GINT_TO_POINTER (keyid), word);
1333 			} else {
1334 				g_hash_table_insert (deleted, GINT_TO_POINTER (keyid), word);
1335 			}
1336 		}
1337 
1338 		word = NULL;
1339 	}
1340 
1341 	g_clear_pointer (&word, g_free);
1342 
1343 	printf ("Checking WORD member consistency\n");
1344 
1345 	keyid = 0;
1346 	while ((keyid = camel_key_table_next (p->word_index, keyid, &word, &flags, &data))) {
1347 		CamelIndexCursor *idc;
1348 		GHashTable *used;
1349 
1350 		/* first, check for duplicates of keyid, and data */
1351 		if ((oldword = g_hash_table_lookup (words, GINT_TO_POINTER (keyid))) != NULL) {
1352 			printf ("Warning, word '%s' duplicates key (%x) with name '%s'\n", word, keyid, oldword);
1353 			g_free (word);
1354 			word = oldword;
1355 		} else {
1356 			g_hash_table_insert (words, GINT_TO_POINTER (keyid), word);
1357 		}
1358 
1359 		if (data == 0) {
1360 			/* This may not be an issue if things have been removed over time,
1361 			 * though it is a problem if its a fresh index */
1362 			printf ("Word '%s' has no data associated with it\n", word);
1363 		} else {
1364 			if ((oldword = g_hash_table_lookup (keys, GUINT_TO_POINTER (data))) != NULL) {
1365 				printf ("Warning, word '%s' duplicates data (%x) with name '%s'\n", word, data, oldword);
1366 			} else {
1367 				g_hash_table_insert (keys, GUINT_TO_POINTER (data), word);
1368 			}
1369 		}
1370 
1371 		if (g_hash_table_lookup (word_word, word) != NULL) {
1372 			printf ("Warning, word '%s' occurs more than once\n", word);
1373 		} else {
1374 			g_hash_table_insert (word_word, word, word);
1375 		}
1376 
1377 		used = g_hash_table_new (g_str_hash, g_str_equal);
1378 
1379 		idc = camel_index_find ((CamelIndex *) idx, word);
1380 		while ((name = camel_index_cursor_next (idc))) {
1381 			if (g_hash_table_lookup (name_word, name) == NULL) {
1382 				printf ("word '%s' references non-existant name '%s'\n", word, name);
1383 			}
1384 			if (g_hash_table_lookup (used, name) != NULL) {
1385 				printf ("word '%s' uses word '%s' more than once\n", word, name);
1386 			} else {
1387 				g_hash_table_insert (used, g_strdup (name), (gpointer) 1);
1388 			}
1389 		}
1390 		g_object_unref (idc);
1391 
1392 		g_hash_table_foreach (used, (GHFunc) g_free, NULL);
1393 		g_hash_table_destroy (used);
1394 
1395 		printf ("word '%s'\n", word);
1396 
1397 		while (data) {
1398 			printf (" data %x ", data);
1399 			if (camel_key_file_read (p->links, &data, &count, &records) == -1) {
1400 				printf ("Warning, read failed for word '%s', at data '%u'\n", word, data);
1401 				data = 0;
1402 			} else {
1403 				printf ("(%d)\n", (gint) count);
1404 				g_free (records);
1405 			}
1406 		}
1407 
1408 		word = NULL;
1409 	}
1410 
1411 	g_clear_pointer (&word, g_free);
1412 
1413 	g_hash_table_destroy (names);
1414 	g_hash_table_destroy (deleted);
1415 	g_hash_table_destroy (keys);
1416 
1417 	g_hash_table_foreach (words, (GHFunc) g_free, NULL);
1418 	g_hash_table_destroy (words);
1419 
1420 	g_hash_table_foreach (name_word, (GHFunc) g_free, NULL);
1421 	g_hash_table_destroy (name_word);
1422 
1423 	g_hash_table_foreach (word_word, (GHFunc) g_free, NULL);
1424 	g_hash_table_destroy (word_word);
1425 }
1426 
1427 /* ********************************************************************** */
1428 /* CamelTextIndexName */
1429 /* ********************************************************************** */
1430 
G_DEFINE_TYPE_WITH_PRIVATE(CamelTextIndexName,camel_text_index_name,CAMEL_TYPE_INDEX_NAME)1431 G_DEFINE_TYPE_WITH_PRIVATE (CamelTextIndexName, camel_text_index_name, CAMEL_TYPE_INDEX_NAME)
1432 
1433 static void
1434 text_index_name_finalize (GObject *object)
1435 {
1436 	CamelTextIndexNamePrivate *priv;
1437 
1438 	priv = CAMEL_TEXT_INDEX_NAME (object)->priv;
1439 
1440 	g_hash_table_destroy (CAMEL_TEXT_INDEX_NAME (object)->parent.words);
1441 
1442 	g_string_free (priv->buffer, TRUE);
1443 	camel_mempool_destroy (priv->pool);
1444 
1445 	/* Chain up to parent's finalize() method. */
1446 	G_OBJECT_CLASS (camel_text_index_name_parent_class)->finalize (object);
1447 }
1448 
1449 static void
text_index_name_add_word(CamelIndexName * idn,const gchar * word)1450 text_index_name_add_word (CamelIndexName *idn,
1451                           const gchar *word)
1452 {
1453 	CamelTextIndexNamePrivate *p = ((CamelTextIndexName *) idn)->priv;
1454 
1455 	if (g_hash_table_lookup (idn->words, word) == NULL) {
1456 		gchar *w = camel_mempool_strdup (p->pool, word);
1457 
1458 		g_hash_table_insert (idn->words, w, w);
1459 	}
1460 }
1461 
1462 /* Why?
1463  * Because it doesn't hang/loop forever on bad data
1464  * Used to clean up utf8 before it gets further */
1465 
1466 static inline guint32
camel_utf8_next(const guchar ** ptr,const guchar * ptrend)1467 camel_utf8_next (const guchar **ptr,
1468                  const guchar *ptrend)
1469 {
1470 	register guchar *p = (guchar *) * ptr;
1471 	register guint c;
1472 	register guint32 v;
1473 	gint l;
1474 
1475 	if (p == ptrend)
1476 		return 0;
1477 
1478 	while ((c = *p++)) {
1479 		if (c < 0x80) {
1480 			*ptr = p;
1481 			return c;
1482 		} else if ((c&0xe0) == 0xc0) {
1483 			v = c & 0x1f;
1484 			l = 1;
1485 		} else if ((c&0xf0) == 0xe0) {
1486 			v = c & 0x0f;
1487 			l = 2;
1488 		} else if ((c&0xf8) == 0xf0) {
1489 			v = c & 0x07;
1490 			l = 3;
1491 		} else if ((c&0xfc) == 0xf8) {
1492 			v = c & 0x03;
1493 			l = 4;
1494 		} else if ((c&0xfe) == 0xfc) {
1495 			v = c & 0x01;
1496 			l = 5;
1497 		} else
1498 			/* Invalid, ignore and look for next start gchar if room */
1499 			if (p == ptrend) {
1500 				return 0;
1501 			} else {
1502 				continue;
1503 			}
1504 
1505 		/* bad data or truncated buffer */
1506 		if (p + l > ptrend)
1507 			return 0;
1508 
1509 		while (l && ((c = *p) & 0xc0) == 0x80) {
1510 			p++;
1511 			l--;
1512 			v = (v << 6) | (c & 0x3f);
1513 		}
1514 
1515 		/* valid gchar */
1516 		if (l == 0) {
1517 			*ptr = p;
1518 			return v;
1519 		}
1520 
1521 		/* else look for a start gchar again */
1522 	}
1523 
1524 	return 0;
1525 }
1526 
1527 static gsize
text_index_name_add_buffer(CamelIndexName * idn,const gchar * buffer,gsize len)1528 text_index_name_add_buffer (CamelIndexName *idn,
1529                             const gchar *buffer,
1530                             gsize len)
1531 {
1532 	CamelTextIndexNamePrivate *p = CAMEL_TEXT_INDEX_NAME (idn)->priv;
1533 	const guchar *ptr, *ptrend;
1534 	guint32 c;
1535 	gchar utf8[8];
1536 	gint utf8len;
1537 
1538 	if (buffer == NULL) {
1539 		if (p->buffer->len) {
1540 			camel_index_name_add_word (idn, p->buffer->str);
1541 			g_string_truncate (p->buffer, 0);
1542 		}
1543 		return 0;
1544 	}
1545 
1546 	ptr = (const guchar *) buffer;
1547 	ptrend = (const guchar *) buffer + len;
1548 	while ((c = camel_utf8_next (&ptr, ptrend))) {
1549 		if (g_unichar_isalnum (c)) {
1550 			c = g_unichar_tolower (c);
1551 			utf8len = g_unichar_to_utf8 (c, utf8);
1552 			utf8[utf8len] = 0;
1553 			g_string_append (p->buffer, utf8);
1554 		} else {
1555 			if (p->buffer->len > 0 && p->buffer->len <= CAMEL_TEXT_INDEX_MAX_WORDLEN) {
1556 				text_index_name_add_word (idn, p->buffer->str);
1557 				/*camel_index_name_add_word (idn, p->buffer->str);*/
1558 			}
1559 
1560 			g_string_truncate (p->buffer, 0);
1561 		}
1562 	}
1563 
1564 	return 0;
1565 }
1566 
1567 static void
camel_text_index_name_class_init(CamelTextIndexNameClass * class)1568 camel_text_index_name_class_init (CamelTextIndexNameClass *class)
1569 {
1570 	GObjectClass *object_class;
1571 	CamelIndexNameClass *index_name_class;
1572 
1573 	object_class = G_OBJECT_CLASS (class);
1574 	object_class->finalize = text_index_name_finalize;
1575 
1576 	index_name_class = CAMEL_INDEX_NAME_CLASS (class);
1577 	index_name_class->add_word = text_index_name_add_word;
1578 	index_name_class->add_buffer = text_index_name_add_buffer;
1579 }
1580 
1581 static void
camel_text_index_name_init(CamelTextIndexName * text_index_name)1582 camel_text_index_name_init (CamelTextIndexName *text_index_name)
1583 {
1584 	text_index_name->priv = camel_text_index_name_get_instance_private (text_index_name);
1585 
1586 	text_index_name->parent.words = g_hash_table_new (
1587 		g_str_hash, g_str_equal);
1588 
1589 	text_index_name->priv->buffer = g_string_new ("");
1590 	text_index_name->priv->pool =
1591 		camel_mempool_new (256, 128, CAMEL_MEMPOOL_ALIGN_BYTE);
1592 }
1593 
1594 CamelTextIndexName *
camel_text_index_name_new(CamelTextIndex * idx,const gchar * name,camel_key_t nameid)1595 camel_text_index_name_new (CamelTextIndex *idx,
1596                            const gchar *name,
1597                            camel_key_t nameid)
1598 {
1599 	CamelTextIndexName *idn = g_object_new (CAMEL_TYPE_TEXT_INDEX_NAME, NULL);
1600 	CamelIndexName *cin = &idn->parent;
1601 	CamelTextIndexNamePrivate *p = CAMEL_TEXT_INDEX_NAME (idn)->priv;
1602 
1603 	cin->index = g_object_ref (idx);
1604 	cin->name = camel_mempool_strdup (p->pool, name);
1605 	p->nameid = nameid;
1606 
1607 	return idn;
1608 }
1609 
1610 /* ********************************************************************** */
1611 /* CamelTextIndexCursor */
1612 /* ********************************************************************** */
1613 
G_DEFINE_TYPE_WITH_PRIVATE(CamelTextIndexCursor,camel_text_index_cursor,CAMEL_TYPE_INDEX_CURSOR)1614 G_DEFINE_TYPE_WITH_PRIVATE (CamelTextIndexCursor, camel_text_index_cursor, CAMEL_TYPE_INDEX_CURSOR)
1615 
1616 static void
1617 text_index_cursor_finalize (GObject *object)
1618 {
1619 	CamelTextIndexCursorPrivate *priv;
1620 
1621 	priv = CAMEL_TEXT_INDEX_CURSOR (object)->priv;
1622 
1623 	g_free (priv->records);
1624 	g_free (priv->current);
1625 
1626 	/* Chain up to parent's finalize() method. */
1627 	G_OBJECT_CLASS (camel_text_index_cursor_parent_class)->finalize (object);
1628 }
1629 
1630 static const gchar *
text_index_cursor_next(CamelIndexCursor * idc)1631 text_index_cursor_next (CamelIndexCursor *idc)
1632 {
1633 	CamelTextIndexCursorPrivate *p = CAMEL_TEXT_INDEX_CURSOR (idc)->priv;
1634 	CamelTextIndexPrivate *tip = CAMEL_TEXT_INDEX (idc->index)->priv;
1635 	guint flags;
1636 
1637 	c (printf ("Going to next cursor for word with data '%08x' next %08x\n", p->first, p->next));
1638 
1639 	do {
1640 		while (p->record_index >= p->record_count) {
1641 			g_free (p->records);
1642 			p->records = NULL;
1643 			p->record_index = 0;
1644 			p->record_count = 0;
1645 			if (p->next == 0)
1646 				return NULL;
1647 			if (camel_key_file_read (tip->links, &p->next, &p->record_count, &p->records) == -1)
1648 				return NULL;
1649 		}
1650 
1651 		g_free (p->current);
1652 		p->current = NULL;
1653 		flags = 0;
1654 
1655 		camel_key_table_lookup (
1656 			tip->name_index, p->records[p->record_index],
1657 			&p->current, &flags);
1658 		if (flags & 1) {
1659 			g_free (p->current);
1660 			p->current = NULL;
1661 		}
1662 		p->record_index++;
1663 	} while (p->current == NULL);
1664 
1665 	return p->current;
1666 }
1667 
1668 static void
camel_text_index_cursor_class_init(CamelTextIndexCursorClass * class)1669 camel_text_index_cursor_class_init (CamelTextIndexCursorClass *class)
1670 {
1671 	GObjectClass *object_class;
1672 	CamelIndexCursorClass *index_cursor_class;
1673 
1674 	object_class = G_OBJECT_CLASS (class);
1675 	object_class->finalize = text_index_cursor_finalize;
1676 
1677 	index_cursor_class = CAMEL_INDEX_CURSOR_CLASS (class);
1678 	index_cursor_class->next = text_index_cursor_next;
1679 }
1680 
1681 static void
camel_text_index_cursor_init(CamelTextIndexCursor * text_index_cursor)1682 camel_text_index_cursor_init (CamelTextIndexCursor *text_index_cursor)
1683 {
1684 	text_index_cursor->priv = camel_text_index_cursor_get_instance_private (text_index_cursor);
1685 }
1686 
1687 CamelTextIndexCursor *
camel_text_index_cursor_new(CamelTextIndex * idx,camel_block_t data)1688 camel_text_index_cursor_new (CamelTextIndex *idx,
1689                              camel_block_t data)
1690 {
1691 	CamelTextIndexCursor *idc = g_object_new (CAMEL_TYPE_TEXT_INDEX_CURSOR, NULL);
1692 	CamelIndexCursor *cic = &idc->parent;
1693 	CamelTextIndexCursorPrivate *p = CAMEL_TEXT_INDEX_CURSOR (idc)->priv;
1694 
1695 	cic->index = g_object_ref (idx);
1696 	p->first = data;
1697 	p->next = data;
1698 	p->record_count = 0;
1699 	p->record_index = 0;
1700 
1701 	return idc;
1702 }
1703 
1704 /* ********************************************************************** */
1705 /* CamelTextIndexKeyCursor */
1706 /* ********************************************************************** */
1707 
G_DEFINE_TYPE_WITH_PRIVATE(CamelTextIndexKeyCursor,camel_text_index_key_cursor,CAMEL_TYPE_INDEX_CURSOR)1708 G_DEFINE_TYPE_WITH_PRIVATE (CamelTextIndexKeyCursor, camel_text_index_key_cursor, CAMEL_TYPE_INDEX_CURSOR)
1709 
1710 static void
1711 text_index_key_cursor_dispose (GObject *object)
1712 {
1713 	CamelTextIndexKeyCursorPrivate *priv;
1714 
1715 	priv = CAMEL_TEXT_INDEX_KEY_CURSOR (object)->priv;
1716 	g_clear_object (&priv->table);
1717 
1718 	/* Chain up parent's dispose() method. */
1719 	G_OBJECT_CLASS (camel_text_index_key_cursor_parent_class)->dispose (object);
1720 }
1721 
1722 static void
text_index_key_cursor_finalize(GObject * object)1723 text_index_key_cursor_finalize (GObject *object)
1724 {
1725 	CamelTextIndexKeyCursorPrivate *priv;
1726 
1727 	priv = CAMEL_TEXT_INDEX_KEY_CURSOR (object)->priv;
1728 
1729 	g_free (priv->current);
1730 
1731 	/* Chain up to parent's finalize() method. */
1732 	G_OBJECT_CLASS (camel_text_index_key_cursor_parent_class)->finalize (object);
1733 }
1734 
1735 static const gchar *
text_index_key_cursor_next(CamelIndexCursor * idc)1736 text_index_key_cursor_next (CamelIndexCursor *idc)
1737 {
1738 	CamelTextIndexKeyCursorPrivate *p = CAMEL_TEXT_INDEX_KEY_CURSOR (idc)->priv;
1739 
1740 	c (printf ("Going to next cursor for keyid %08x\n", p->keyid));
1741 
1742 	g_free (p->current);
1743 	p->current = NULL;
1744 
1745 	while ((p->keyid = camel_key_table_next (p->table, p->keyid, &p->current, &p->flags, &p->data))) {
1746 		if ((p->flags & 1) == 0) {
1747 			return p->current;
1748 		} else {
1749 			g_free (p->current);
1750 			p->current = NULL;
1751 		}
1752 	}
1753 
1754 	return NULL;
1755 }
1756 
1757 static void
camel_text_index_key_cursor_class_init(CamelTextIndexKeyCursorClass * class)1758 camel_text_index_key_cursor_class_init (CamelTextIndexKeyCursorClass *class)
1759 {
1760 	GObjectClass *object_class;
1761 	CamelIndexCursorClass *index_cursor_class;
1762 
1763 	object_class = G_OBJECT_CLASS (class);
1764 	object_class->dispose = text_index_key_cursor_dispose;
1765 	object_class->finalize = text_index_key_cursor_finalize;
1766 
1767 	index_cursor_class = CAMEL_INDEX_CURSOR_CLASS (class);
1768 	index_cursor_class->next = text_index_key_cursor_next;
1769 }
1770 
1771 static void
camel_text_index_key_cursor_init(CamelTextIndexKeyCursor * text_index_key_cursor)1772 camel_text_index_key_cursor_init (CamelTextIndexKeyCursor *text_index_key_cursor)
1773 {
1774 	text_index_key_cursor->priv = camel_text_index_key_cursor_get_instance_private (text_index_key_cursor);
1775 
1776 	text_index_key_cursor->priv->keyid = 0;
1777 	text_index_key_cursor->priv->flags = 0;
1778 	text_index_key_cursor->priv->data = 0;
1779 	text_index_key_cursor->priv->current = NULL;
1780 }
1781 
1782 CamelTextIndexKeyCursor *
camel_text_index_key_cursor_new(CamelTextIndex * idx,CamelKeyTable * table)1783 camel_text_index_key_cursor_new (CamelTextIndex *idx,
1784                                  CamelKeyTable *table)
1785 {
1786 	CamelTextIndexKeyCursor *idc = g_object_new (CAMEL_TYPE_TEXT_INDEX_KEY_CURSOR, NULL);
1787 	CamelIndexCursor *cic = &idc->parent;
1788 	CamelTextIndexKeyCursorPrivate *p = CAMEL_TEXT_INDEX_KEY_CURSOR (idc)->priv;
1789 
1790 	cic->index = g_object_ref (idx);
1791 	p->table = g_object_ref (table);
1792 
1793 	return idc;
1794 }
1795 
1796 /* ********************************************************************** */
1797 
1798 #define m(x)
1799 
1800 #if 0
1801 
1802 struct _CamelIndexRoot {
1803 	struct _CamelBlockRoot root;
1804 
1805 	camel_block_t word_root; /* a keyindex containing the keyid -> word mapping */
1806 	camel_block_t word_hash_root; /* a partitionindex containing word -> keyid mapping */
1807 
1808 	camel_block_t name_root; /* same, for names */
1809 	camel_block_t name_hash_root;
1810 };
1811 
1812 gchar wordbuffer[] = "This is a buffer of multiple words.  Some of the words are duplicates"
1813 " while other words are the same, some are in difFerenT Different different case cAsE casE,"
1814 " with,with:with;with-with'with\"'\"various punctuation as well.  So much for those Words. and 10"
1815 " numbers in a row too 1,2,3,4,5,6,7,8,9,10!  Yay!.";
1816 
1817 gint
1818 main (gint argc,
1819       gchar **argv)
1820 {
1821 #if 0
1822 	CamelBlockFile *bs;
1823 	CamelKeyTable *ki;
1824 	CamelPartitionTable *cpi;
1825 	CamelBlock *keyroot, *partroot;
1826 	struct _CamelIndexRoot *root;
1827 	FILE *fp;
1828 	gchar line[256], *key;
1829 	camel_key_t keyid;
1830 	gint index = 0, flags, data;
1831 #endif
1832 	CamelIndex *idx;
1833 	CamelIndexName *idn;
1834 	CamelIndexCursor *idc;
1835 	const gchar *word;
1836 	gint i;
1837 
1838 	printf ("Camel text index tester!\n");
1839 
1840 	camel_init (NULL, 0);
1841 
1842 	idx = (CamelIndex *) camel_text_index_new ("textindex", O_CREAT | O_RDWR | O_TRUNC);
1843 
1844 #if 1
1845 	camel_index_compress (idx);
1846 
1847 	return 0;
1848 #endif
1849 
1850 	for (i = 0; i < 100; i++) {
1851 		gchar name[16];
1852 
1853 		g_snprintf (name, sizeof (name), "%d", i);
1854 		printf ("Adding words to name '%s'\n", name);
1855 		idn = camel_index_add_name (idx, name);
1856 		camel_index_name_add_buffer (idn, wordbuffer, sizeof (wordbuffer) - 1);
1857 		camel_index_write_name (idx, idn);
1858 		g_object_unref (idn);
1859 	}
1860 
1861 	printf ("Looking up which names contain word 'word'\n");
1862 	idc = camel_index_find (idx, "words");
1863 	while ((word = camel_index_cursor_next (idc)) != NULL) {
1864 		printf (" name is '%s'\n", word);
1865 	}
1866 	g_object_unref (idc);
1867 	printf ("done.\n");
1868 
1869 	printf ("Looking up which names contain word 'truncate'\n");
1870 	idc = camel_index_find (idx, "truncate");
1871 	while ((word = camel_index_cursor_next (idc)) != NULL) {
1872 		printf (" name is '%s'\n", word);
1873 	}
1874 	g_object_unref (idc);
1875 	printf ("done.\n");
1876 
1877 	camel_index_sync (idx);
1878 	g_object_unref (idx);
1879 
1880 #if 0
1881 	bs = camel_block_file_new ("blocks", "TESTINDX", CAMEL_BLOCK_SIZE);
1882 
1883 	root = (struct _CamelIndexRoot *) camel_block_file_get_root (bs);
1884 	if (root->word_root == 0) {
1885 		keyroot = camel_block_file_new_block (bs);
1886 		root->word_root = keyroot->id;
1887 		camel_block_file_touch_block (bs, camel_block_file_get_root_block (bs));
1888 	}
1889 	if (root->word_hash_root == 0) {
1890 		partroot = camel_block_file_new_block (bs);
1891 		root->word_hash_root = partroot->id;
1892 		camel_block_file_touch_block (bs, camel_block_file_get_root_block (bs));
1893 	}
1894 
1895 	ki = camel_key_table_new (bs, root->word_root);
1896 	cpi = camel_partition_table_new (bs, root->word_hash_root);
1897 
1898 	fp = fopen ("/usr/dict/words", "r");
1899 	if (fp == NULL) {
1900 		perror ("fopen");
1901 		return 1;
1902 	}
1903 
1904 	while (fgets (line, sizeof (line), fp) != NULL) {
1905 		line[strlen (line) - 1] = 0;
1906 
1907 		/* see if its already there */
1908 		keyid = camel_partition_table_lookup (cpi, line);
1909 		if (keyid == 0) {
1910 			m (printf ("Adding word '%s' %d\n", line, index));
1911 
1912 			keyid = camel_key_table_add (ki, line, index, 0);
1913 			m (printf (" key = %08x\n", keyid));
1914 
1915 			camel_partition_table_add (cpi, line, keyid);
1916 
1917 			m (printf ("Lookup word '%s'\n", line));
1918 			keyid = camel_partition_table_lookup (cpi, line);
1919 			m (printf (" key = %08x\n", keyid));
1920 		}
1921 
1922 		m (printf ("Lookup key %08x\n", keyid));
1923 
1924 		camel_key_table_set_flags (ki, keyid, index, 1);
1925 
1926 		data = camel_key_table_lookup (ki, keyid, &key, &flags);
1927 		m (printf (" word = '%s' %d %04x\n", key, data, flags));
1928 
1929 		g_return_val_if_fail (data == index && strcmp (key, line) == 0, -1);
1930 
1931 		g_free (key);
1932 
1933 		index++;
1934 	}
1935 
1936 	printf ("Scanning again\n");
1937 	fseek (fp, SEEK_SET, 0);
1938 	index = 0;
1939 	while (fgets (line, sizeof (line), fp) != NULL) {
1940 		line[strlen (line) - 1] = 0;
1941 		m (printf ("Lookup word '%s' %d\n", line, index));
1942 		keyid = camel_partition_table_lookup (cpi, line);
1943 		m (printf (" key = %08d\n", keyid));
1944 
1945 		m (printf ("Lookup key %08x\n", keyid));
1946 		data = camel_key_table_lookup (ki, keyid, &key, &flags);
1947 		m (printf (" word = '%s' %d\n", key, data));
1948 
1949 		g_return_val_if_fail (data == index && strcmp (key, line) == 0, -1);
1950 
1951 		g_free (key);
1952 
1953 		index++;
1954 	}
1955 	fclose (fp);
1956 
1957 	printf ("Freeing partition index\n");
1958 	camel_partition_table_free (cpi);
1959 
1960 	printf ("Syncing block file\n");
1961 	camel_block_file_sync (bs);
1962 #endif
1963 	return 0;
1964 }
1965 
1966 #endif
1967