1 /***************************************************************** 2 * SQUID - a library of functions for biological sequence analysis 3 * Copyright (C) 1992-2002 Washington University School of Medicine 4 * 5 * This source code is freely distributed under the terms of the 6 * GNU General Public License. See the files COPYRIGHT and LICENSE 7 * for details. 8 *****************************************************************/ 9 10 #ifndef GSIH_INCLUDED 11 #define GSIH_INCLUDED 12 13 /* gsi.h 14 * Database indexing (GSI format support) 15 * RCS $Id: gsi.h 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: gsi.h,v 1.3 2001/08/04 20:15:42 eddy Exp) 16 * 17 * A GSI (generic sequence index) file is composed of 18 * recnum + nfiles + 1 records. Each record contains 19 * three fields; key, file number, and disk offset. 20 * Record 0 contains: 21 * [ "GSI" ] [ nfiles ] [ recnum ] 22 * Records 1..nfiles map file names to file numbers, and contain: 23 * [ filename ] [ file number, 1..nfiles ] [ 0 (unused) ] 24 * Records nfiles+1 to recnum+nfiles+1 provide disk offset 25 * and file number indices for every key: 26 * [ key ] [ file number ] [ offset] 27 * 28 * Because the file is binary, we take some (but not 29 * complete) care to improve portability amongst platforms. 30 * This means using network order integers (see ntohl()) 31 * and defining types for 16 and 32 bit integers. 32 * 33 * Because we use 32-bit offsets, ftell(), and fseek(), 34 * there is an implicit 2 Gb file size maximum. 35 * AFAIK neither ANSI C nor POSIX provide a portable solution 36 * to this problem. fsetpos(), fgetpos() use an 37 * opaque fpos_t datatype that we can't write portably 38 * to a disk file. Suggestions welcomed. 39 */ 40 #define GSI_KEYSIZE 32 /* keys are 32 bytes long */ 41 #define GSI_RECSIZE 38 /* 32 + 2 + 4 bytes */ 42 #define SQD_UINT16_MAX 65535 /* 2^16-1 */ 43 #define SQD_UINT32_MAX 4294967295U/* 2^32-1 */ 44 45 struct gsi_s { 46 FILE *gsifp; /* open GSI index file */ 47 sqd_uint16 nfiles; /* number of files = 16 bit int */ 48 sqd_uint32 recnum; /* number of records = 32 bit int */ 49 }; 50 typedef struct gsi_s GSIFILE; 51 52 struct gsikey_s { 53 char key[GSI_KEYSIZE]; 54 sqd_uint16 filenum; 55 sqd_uint32 offset; 56 }; 57 struct gsiindex_s { 58 char **filenames; 59 int *fmt; 60 sqd_uint16 nfiles; 61 62 struct gsikey_s *elems; 63 int nkeys; 64 }; 65 66 67 /* from gsi.c 68 */ 69 extern GSIFILE *GSIOpen(char *gsifile); 70 extern int GSIGetRecord(GSIFILE *gsi, char *f1, sqd_uint16 *f2, sqd_uint32 *f3); 71 extern int GSIGetOffset(GSIFILE *gsi, char *key, char *sqfile, 72 int *fmt, long *ret_offset); 73 extern void GSIClose(GSIFILE *gsi); 74 extern struct gsiindex_s *GSIAllocIndex(void); 75 extern void GSIFreeIndex(struct gsiindex_s *g); 76 extern void GSIAddFileToIndex(struct gsiindex_s *g, char *filename, int fmt); 77 extern void GSIAddKeyToIndex(struct gsiindex_s *g, char *key, int filenum, long offset); 78 extern void GSISortIndex(struct gsiindex_s *g); 79 extern void GSIWriteIndex(FILE *fp, struct gsiindex_s *g); 80 extern void GSIWriteHeader(FILE *fp, int nfiles, long nkeys); 81 extern int GSIWriteFileRecord(FILE *fp, char *fname, int idx, int fmt); 82 extern int GSIWriteKeyRecord(FILE *fp, char *key, int fileidx, long offset); 83 84 #endif /*GSIH_INCLUDED*/ 85