1 /* This file is part of the Zebra server.
2    Copyright (C) 2004-2013 Index Data
3 
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8 
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13 
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 
18 */
19 
20 /*
21  * TODO:
22  *   Reduction to lower categories in isamc_merge
23  */
24 #if HAVE_CONFIG_H
25 #include <config.h>
26 #endif
27 #include <stdlib.h>
28 #include <assert.h>
29 #include <string.h>
30 #include <stdio.h>
31 
32 #include <yaz/log.h>
33 #include <yaz/xmalloc.h>
34 #include "isamc-p.h"
35 
36 static void flush_block (ISAMC is, int cat);
37 static void release_fc (ISAMC is, int cat);
38 static void init_fc (ISAMC is, int cat);
39 
40 #define ISAMC_FREELIST_CHUNK 0
41 
42 #define SMALL_TEST 0
43 
isamc_getmethod(ISAMC_M * m)44 void isamc_getmethod (ISAMC_M *m)
45 {
46 
47     static struct ISAMC_filecat_s def_cat[] = {
48 #if SMALL_TEST
49         {    32,     28,      0,  3 },
50 	{    64,     54,     30,  0 },
51 #else
52         {    64,     56,     40,  5 },
53 	{   128,    120,    100,  10 },
54         {   512,    490,    350,  10 },
55         {  2048,   1900,   1700,  10 },
56         {  8192,   8000,   7900,  10 },
57         { 32768,  32000,  31000,  0 },
58 #endif
59     };
60     m->filecat = def_cat;
61 
62     m->codec.start = NULL;
63     m->codec.decode  = NULL;
64     m->codec.encode = NULL;
65     m->codec.stop = NULL;
66     m->codec.reset = NULL;
67 
68     m->compare_item = NULL;
69     m->log_item = NULL;
70 
71     m->debug = 1;
72 
73     m->max_blocks_mem = 10;
74 }
75 
isamc_open(BFiles bfs,const char * name,int writeflag,ISAMC_M * method)76 ISAMC isamc_open (BFiles bfs, const char *name, int writeflag, ISAMC_M *method)
77 {
78     ISAMC is;
79     ISAMC_filecat filecat;
80     int i = 0;
81     int max_buf_size = 0;
82 
83     is = (ISAMC) xmalloc (sizeof(*is));
84 
85     is->method = (ISAMC_M *) xmalloc (sizeof(*is->method));
86     memcpy (is->method, method, sizeof(*method));
87     filecat = is->method->filecat;
88     assert (filecat);
89 
90     /* determine number of block categories */
91     if (is->method->debug)
92         yaz_log (YLOG_LOG, "isc: bsize  ifill  mfill mblocks");
93     do
94     {
95         if (is->method->debug)
96             yaz_log (YLOG_LOG, "isc:%6d %6d %6d %6d",
97                   filecat[i].bsize, filecat[i].ifill,
98                   filecat[i].mfill, filecat[i].mblocks);
99         if (max_buf_size < filecat[i].mblocks * filecat[i].bsize)
100             max_buf_size = filecat[i].mblocks * filecat[i].bsize;
101     } while (filecat[i++].mblocks);
102     is->no_files = i;
103     is->max_cat = --i;
104     /* max_buf_size is the larget buffer to be used during merge */
105     max_buf_size = (1 + max_buf_size / filecat[i].bsize) * filecat[i].bsize;
106     if (max_buf_size < (1+is->method->max_blocks_mem) * filecat[i].bsize)
107         max_buf_size = (1+is->method->max_blocks_mem) * filecat[i].bsize;
108     if (is->method->debug)
109         yaz_log (YLOG_LOG, "isc: max_buf_size %d", max_buf_size);
110 
111     assert (is->no_files > 0);
112     is->files = (ISAMC_file) xmalloc (sizeof(*is->files)*is->no_files);
113     if (writeflag)
114     {
115         is->merge_buf = (char *) xmalloc (max_buf_size+256);
116 	memset (is->merge_buf, 0, max_buf_size+256);
117     }
118     else
119         is->merge_buf = NULL;
120     for (i = 0; i<is->no_files; i++)
121     {
122         is->files[i].bf = 0;
123         is->files[i].head_is_dirty = 0;
124         is->files[i].head.lastblock = 1;
125         is->files[i].head.freelist = 0;
126 	is->files[i].alloc_entries_num = 0;
127 	is->files[i].alloc_entries_max =
128 	    is->method->filecat[i].bsize / sizeof(zint) - 1;
129 	is->files[i].alloc_buf = (char *)
130 	    xmalloc (is->method->filecat[i].bsize);
131         is->files[i].no_writes = 0;
132         is->files[i].no_reads = 0;
133         is->files[i].no_skip_writes = 0;
134         is->files[i].no_allocated = 0;
135         is->files[i].no_released = 0;
136         is->files[i].no_remap = 0;
137 	is->files[i].no_forward = 0;
138 	is->files[i].no_backward = 0;
139 	is->files[i].sum_forward = 0;
140 	is->files[i].sum_backward = 0;
141 	is->files[i].no_next = 0;
142 	is->files[i].no_prev = 0;
143 
144         init_fc (is, i);
145     }
146 
147     for (i = 0; i<is->no_files; i++)
148     {
149         char fname[FILENAME_MAX];
150         int r;
151 
152         sprintf (fname, "%s%c", name, i+'A');
153         is->files[i].bf = bf_open (bfs, fname, is->method->filecat[i].bsize,
154                                    writeflag);
155         if (!is->files[i].bf)
156         {
157             isamc_close(is);
158             return 0;
159         }
160         r = bf_read(is->files[i].bf, 0, 0, sizeof(ISAMC_head),
161                      &is->files[i].head);
162         if (r == -1)
163         {
164             isamc_close(is);
165             return 0;
166         }
167     }
168     return is;
169 }
170 
isamc_block_used(ISAMC is,int type)171 zint isamc_block_used (ISAMC is, int type)
172 {
173     if (type < 0 || type >= is->no_files)
174 	return -1;
175     return is->files[type].head.lastblock-1;
176 }
177 
isamc_block_size(ISAMC is,int type)178 int isamc_block_size (ISAMC is, int type)
179 {
180     ISAMC_filecat filecat = is->method->filecat;
181     if (type < 0 || type >= is->no_files)
182 	return -1;
183     return filecat[type].bsize;
184 }
185 
isamc_close(ISAMC is)186 int isamc_close (ISAMC is)
187 {
188     int i;
189 
190     if (is->method->debug)
191     {
192 	yaz_log (YLOG_LOG, "isc:    next    forw   mid-f    prev   backw   mid-b");
193 	for (i = 0; i<is->no_files; i++)
194 	    yaz_log (YLOG_LOG, "isc:%8d%8d%8.1f%8d%8d%8.1f",
195 		  is->files[i].no_next,
196 		  is->files[i].no_forward,
197 		  is->files[i].no_forward ?
198 		  (double) is->files[i].sum_forward/is->files[i].no_forward
199 		  : 0.0,
200 		  is->files[i].no_prev,
201 		  is->files[i].no_backward,
202 		  is->files[i].no_backward ?
203 		  (double) is->files[i].sum_backward/is->files[i].no_backward
204 		  : 0.0);
205     }
206     if (is->method->debug)
207         yaz_log (YLOG_LOG, "isc:  writes   reads skipped   alloc released  remap");
208     for (i = 0; i<is->no_files; i++)
209     {
210         release_fc (is, i);
211         if (is->method->debug)
212             yaz_log (YLOG_LOG, "isc:%8d%8d%8d%8d%8d%8d",
213                      is->files[i].no_writes,
214                      is->files[i].no_reads,
215                      is->files[i].no_skip_writes,
216                      is->files[i].no_allocated,
217                      is->files[i].no_released,
218                      is->files[i].no_remap);
219         if (is->files[i].bf)
220         {
221             if (is->files[i].head_is_dirty)
222                 bf_write (is->files[i].bf, 0, 0, sizeof(ISAMC_head),
223                           &is->files[i].head);
224             flush_block (is, i);
225             bf_close (is->files[i].bf);
226         }
227         xfree(is->files[i].fc_list);
228         xfree(is->files[i].alloc_buf);
229     }
230     xfree (is->files);
231     xfree (is->merge_buf);
232     xfree (is->method);
233     xfree (is);
234     return 0;
235 }
236 
isamc_read_block(ISAMC is,int cat,zint pos,char * dst)237 int isamc_read_block (ISAMC is, int cat, zint pos, char *dst)
238 {
239     ++(is->files[cat].no_reads);
240     return bf_read (is->files[cat].bf, pos, 0, 0, dst);
241 }
242 
isamc_write_block(ISAMC is,int cat,zint pos,char * src)243 int isamc_write_block (ISAMC is, int cat, zint pos, char *src)
244 {
245     ++(is->files[cat].no_writes);
246     if (is->method->debug > 2)
247         yaz_log (YLOG_LOG, "isc: write_block %d " ZINT_FORMAT, cat, pos);
248     return bf_write (is->files[cat].bf, pos, 0, 0, src);
249 }
250 
isamc_write_dblock(ISAMC is,int cat,zint pos,char * src,zint nextpos,int offset)251 int isamc_write_dblock (ISAMC is, int cat, zint pos, char *src,
252                       zint nextpos, int offset)
253 {
254     ISAMC_BLOCK_SIZE size = offset + ISAMC_BLOCK_OFFSET_N;
255     if (is->method->debug > 2)
256         yaz_log (YLOG_LOG, "isc: write_dblock. size=%d nextpos=" ZINT_FORMAT,
257               (int) size, nextpos);
258     src -= ISAMC_BLOCK_OFFSET_N;
259     memcpy (src, &nextpos, sizeof(nextpos));
260     memcpy (src + sizeof(nextpos), &size, sizeof(size));
261     return isamc_write_block (is, cat, pos, src);
262 }
263 
264 #if ISAMC_FREELIST_CHUNK
flush_block(ISAMC is,int cat)265 static void flush_block (ISAMC is, int cat)
266 {
267     char *abuf = is->files[cat].alloc_buf;
268     zint block = is->files[cat].head.freelist;
269     if (block && is->files[cat].alloc_entries_num)
270     {
271 	memcpy (abuf, &is->files[cat].alloc_entries_num, sizeof(block));
272 	bf_write (is->files[cat].bf, block, 0, 0, abuf);
273 	is->files[cat].alloc_entries_num = 0;
274     }
275 }
276 
alloc_block(ISAMC is,int cat)277 static zint alloc_block (ISAMC is, int cat)
278 {
279     zint block = is->files[cat].head.freelist;
280     char *abuf = is->files[cat].alloc_buf;
281 
282     (is->files[cat].no_allocated)++;
283 
284     if (!block)
285     {
286         block = (is->files[cat].head.lastblock)++;   /* no free list */
287 	is->files[cat].head_is_dirty = 1;
288     }
289     else
290     {
291 	if (!is->files[cat].alloc_entries_num) /* read first time */
292 	{
293 	    bf_read (is->files[cat].bf, block, 0, 0, abuf);
294 	    memcpy (&is->files[cat].alloc_entries_num, abuf,
295 		    sizeof(is->files[cat].alloc_entries_num));
296 	    assert (is->files[cat].alloc_entries_num > 0);
297 	}
298 	/* have some free blocks now */
299 	assert (is->files[cat].alloc_entries_num > 0);
300 	is->files[cat].alloc_entries_num--;
301 	if (!is->files[cat].alloc_entries_num)  /* last one in block? */
302 	{
303 	    memcpy (&is->files[cat].head.freelist, abuf + sizeof(int),
304 		    sizeof(zint));
305 	    is->files[cat].head_is_dirty = 1;
306 
307 	    if (is->files[cat].head.freelist)
308 	    {
309 		bf_read (is->files[cat].bf, is->files[cat].head.freelist,
310 			 0, 0, abuf);
311 		memcpy (&is->files[cat].alloc_entries_num, abuf,
312 			sizeof(is->files[cat].alloc_entries_num));
313 		assert (is->files[cat].alloc_entries_num);
314 	    }
315 	}
316 	else
317 	    memcpy (&block, abuf + sizeof(zint) + sizeof(int) *
318 		    is->files[cat].alloc_entries_num, sizeof(zint));
319     }
320     return block;
321 }
322 
release_block(ISAMC is,int cat,zint pos)323 static void release_block (ISAMC is, int cat, zint pos)
324 {
325     char *abuf = is->files[cat].alloc_buf;
326     zint block = is->files[cat].head.freelist;
327 
328     (is->files[cat].no_released)++;
329 
330     if (block && !is->files[cat].alloc_entries_num) /* must read block */
331     {
332 	bf_read (is->files[cat].bf, block, 0, 0, abuf);
333 	memcpy (&is->files[cat].alloc_entries_num, abuf,
334 		sizeof(is->files[cat].alloc_entries_num));
335 	assert (is->files[cat].alloc_entries_num > 0);
336     }
337     assert (is->files[cat].alloc_entries_num <= is->files[cat].alloc_entries_max);
338     if (is->files[cat].alloc_entries_num == is->files[cat].alloc_entries_max)
339     {
340 	assert (block);
341 	memcpy (abuf, &is->files[cat].alloc_entries_num, sizeof(int));
342 	bf_write (is->files[cat].bf, block, 0, 0, abuf);
343 	is->files[cat].alloc_entries_num = 0;
344     }
345     if (!is->files[cat].alloc_entries_num) /* make new buffer? */
346     {
347 	memcpy (abuf + sizeof(int), &block, sizeof(zint));
348 	is->files[cat].head.freelist = pos;
349 	is->files[cat].head_is_dirty = 1;
350     }
351     else
352     {
353 	memcpy (abuf + sizeof(int) +
354 		is->files[cat].alloc_entries_num*sizeof(zint),
355 		&pos, sizeof(zint));
356     }
357     is->files[cat].alloc_entries_num++;
358 }
359 #else
flush_block(ISAMC is,int cat)360 static void flush_block (ISAMC is, int cat)
361 {
362 }
363 
alloc_block(ISAMC is,int cat)364 static zint alloc_block (ISAMC is, int cat)
365 {
366     zint block;
367     char buf[sizeof(zint)];
368 
369     is->files[cat].head_is_dirty = 1;
370     (is->files[cat].no_allocated)++;
371     if ((block = is->files[cat].head.freelist))
372     {
373         bf_read (is->files[cat].bf, block, 0, sizeof(zint), buf);
374         memcpy (&is->files[cat].head.freelist, buf, sizeof(zint));
375     }
376     else
377         block = (is->files[cat].head.lastblock)++;
378     return block;
379 }
380 
release_block(ISAMC is,int cat,zint pos)381 static void release_block (ISAMC is, int cat, zint pos)
382 {
383     char buf[sizeof(zint)];
384 
385     (is->files[cat].no_released)++;
386     is->files[cat].head_is_dirty = 1;
387     memcpy (buf, &is->files[cat].head.freelist, sizeof(zint));
388     is->files[cat].head.freelist = pos;
389     bf_write (is->files[cat].bf, pos, 0, sizeof(zint), buf);
390 }
391 #endif
392 
isamc_alloc_block(ISAMC is,int cat)393 zint isamc_alloc_block (ISAMC is, int cat)
394 {
395     zint block = 0;
396 
397     if (is->files[cat].fc_list)
398     {
399         int j;
400 	zint nb;
401         for (j = 0; j < is->files[cat].fc_max; j++)
402             if ((nb = is->files[cat].fc_list[j]) && (!block || nb < block))
403             {
404                 is->files[cat].fc_list[j] = 0;
405 		block = nb;
406                 break;
407             }
408     }
409     if (!block)
410         block = alloc_block (is, cat);
411     if (is->method->debug > 3)
412         yaz_log (YLOG_LOG, "isc: alloc_block in cat %d: " ZINT_FORMAT, cat, block);
413     return block;
414 }
415 
isamc_release_block(ISAMC is,int cat,zint pos)416 void isamc_release_block (ISAMC is, int cat, zint pos)
417 {
418     if (is->method->debug > 3)
419         yaz_log (YLOG_LOG, "isc: release_block in cat %d:" ZINT_FORMAT, cat, pos);
420     if (is->files[cat].fc_list)
421     {
422         int j;
423         for (j = 0; j<is->files[cat].fc_max; j++)
424             if (!is->files[cat].fc_list[j])
425             {
426                 is->files[cat].fc_list[j] = pos;
427                 return;
428             }
429     }
430     release_block (is, cat, pos);
431 }
432 
init_fc(ISAMC is,int cat)433 static void init_fc (ISAMC is, int cat)
434 {
435     int j = 100;
436 
437     is->files[cat].fc_max = j;
438     is->files[cat].fc_list = (zint *)
439 	xmalloc (sizeof(*is->files[0].fc_list) * j);
440     while (--j >= 0)
441         is->files[cat].fc_list[j] = 0;
442 }
443 
release_fc(ISAMC is,int cat)444 static void release_fc (ISAMC is, int cat)
445 {
446     int j = is->files[cat].fc_max;
447     zint b;
448 
449     while (--j >= 0)
450         if ((b = is->files[cat].fc_list[j]))
451         {
452             release_block (is, cat, b);
453             is->files[cat].fc_list[j] = 0;
454         }
455 }
456 
isamc_pp_close(ISAMC_PP pp)457 void isamc_pp_close (ISAMC_PP pp)
458 {
459     ISAMC is = pp->is;
460 
461     (*is->method->codec.stop)(pp->decodeClientData);
462     xfree (pp->buf);
463     xfree (pp);
464 }
465 
isamc_pp_open(ISAMC is,ISAM_P ipos)466 ISAMC_PP isamc_pp_open (ISAMC is, ISAM_P ipos)
467 {
468     ISAMC_PP pp = (ISAMC_PP) xmalloc (sizeof(*pp));
469     char *src;
470 
471     pp->cat = (int) isamc_type(ipos);
472     pp->pos = isamc_block(ipos);
473 
474     src = pp->buf = (char *) xmalloc (is->method->filecat[pp->cat].bsize);
475 
476     pp->next = 0;
477     pp->size = 0;
478     pp->offset = 0;
479     pp->is = is;
480     pp->decodeClientData = (*is->method->codec.start)();
481     pp->deleteFlag = 0;
482     pp->numKeys = 0;
483 
484     if (pp->pos)
485     {
486         src = pp->buf;
487         isamc_read_block (is, pp->cat, pp->pos, src);
488         memcpy (&pp->next, src, sizeof(pp->next));
489         src += sizeof(pp->next);
490         memcpy (&pp->size, src, sizeof(pp->size));
491         src += sizeof(pp->size);
492         memcpy (&pp->numKeys, src, sizeof(pp->numKeys));
493         src += sizeof(pp->numKeys);
494 	if (pp->next == pp->pos)
495 	{
496 	    yaz_log(YLOG_FATAL|YLOG_LOG, "pp->next = " ZINT_FORMAT, pp->next);
497 	    yaz_log(YLOG_FATAL|YLOG_LOG, "pp->pos = " ZINT_FORMAT, pp->pos);
498 	    assert (pp->next != pp->pos);
499 	}
500         pp->offset = src - pp->buf;
501         assert (pp->offset == ISAMC_BLOCK_OFFSET_1);
502         if (is->method->debug > 2)
503             yaz_log (YLOG_LOG, "isc: read_block size=%d %d " ZINT_FORMAT " next="
504 		  ZINT_FORMAT, pp->size, pp->cat, pp->pos, pp->next);
505     }
506     return pp;
507 }
508 
509 /* returns non-zero if item could be read; 0 otherwise */
isamc_pp_read(ISAMC_PP pp,void * buf)510 int isamc_pp_read (ISAMC_PP pp, void *buf)
511 {
512     char *cp = buf;
513     return isamc_read_item (pp, &cp);
514 }
515 
516 /* read one item from file - decode and store it in *dst.
517    Returns
518      0 if end-of-file
519      1 if item could be read ok and NO boundary
520      2 if item could be read ok and boundary */
isamc_read_item(ISAMC_PP pp,char ** dst)521 int isamc_read_item (ISAMC_PP pp, char **dst)
522 {
523     ISAMC is = pp->is;
524     const char *src = pp->buf + pp->offset;
525 
526     if (pp->offset >= pp->size)
527     {
528 	if (!pp->next)
529 	{
530 	    pp->pos = 0;
531 	    return 0; /* end of file */
532 	}
533 	if (pp->next > pp->pos)
534 	{
535 	    if (pp->next == pp->pos + 1)
536 		is->files[pp->cat].no_next++;
537 	    else
538 	    {
539 		is->files[pp->cat].no_forward++;
540 		is->files[pp->cat].sum_forward += pp->next - pp->pos;
541 	    }
542 	}
543 	else
544 	{
545 	    if (pp->next + 1 == pp->pos)
546 		is->files[pp->cat].no_prev++;
547 	    else
548 	    {
549 		is->files[pp->cat].no_backward++;
550 		is->files[pp->cat].sum_backward += pp->pos - pp->next;
551 	    }
552 	}
553 	/* out new block position */
554         pp->pos = pp->next;
555         src = pp->buf;
556 	/* read block and save 'next' and 'size' entry */
557         isamc_read_block (is, pp->cat, pp->pos, pp->buf);
558         memcpy (&pp->next, src, sizeof(pp->next));
559         src += sizeof(pp->next);
560         memcpy (&pp->size, src, sizeof(pp->size));
561         src += sizeof(pp->size);
562         /* assume block is non-empty */
563         assert (src - pp->buf == ISAMC_BLOCK_OFFSET_N);
564 
565 	if (pp->next == pp->pos)
566 	{
567 	    yaz_log(YLOG_FATAL|YLOG_LOG, "pp->next = " ZINT_FORMAT, pp->next);
568 	    yaz_log(YLOG_FATAL|YLOG_LOG, "pp->pos = " ZINT_FORMAT, pp->pos);
569 	    assert (pp->next != pp->pos);
570 	}
571 
572         if (pp->deleteFlag)
573             isamc_release_block (is, pp->cat, pp->pos);
574         (*is->method->codec.decode)(pp->decodeClientData, dst, &src);
575         pp->offset = src - pp->buf;
576         if (is->method->debug > 2)
577             yaz_log (YLOG_LOG, "isc: read_block size=%d %d " ZINT_FORMAT " next="
578 		  ZINT_FORMAT, pp->size, pp->cat, pp->pos, pp->next);
579         return 2;
580     }
581     (*is->method->codec.decode)(pp->decodeClientData, dst, &src);
582     pp->offset = src - pp->buf;
583     return 1;
584 }
585 
isamc_pp_num(ISAMC_PP pp)586 zint isamc_pp_num (ISAMC_PP pp)
587 {
588     return pp->numKeys;
589 }
590 
591 /*
592  * Local variables:
593  * c-basic-offset: 4
594  * c-file-style: "Stroustrup"
595  * indent-tabs-mode: nil
596  * End:
597  * vim: shiftwidth=4 tabstop=8 expandtab
598  */
599 
600