1 /* Simple sequence indices: 2 * Fast sequence record lookup in large files by keywords, such 3 * as names or accessions. 4 */ 5 #ifndef eslSSI_INCLUDED 6 #define eslSSI_INCLUDED 7 #include "esl_config.h" 8 9 #ifdef HAVE_SYS_TYPES_H 10 #include <sys/types.h> 11 #endif 12 #ifdef HAVE_STDINT_H 13 #include <stdint.h> 14 #endif 15 #ifdef HAVE_INTTYPES_H 16 #include <inttypes.h> 17 #endif 18 19 #define eslSSI_MAXFILES 32767 /* 2^15-1 */ 20 #define eslSSI_MAXKEYS 2147483647L /* 2^31-1 */ 21 #define eslSSI_MAXRAM 256 /* >256MB indices trigger external sort */ 22 23 #ifndef HAVE_FSEEKO 24 #define fseeko fseek 25 #define ftello ftell 26 #endif 27 28 /* ESL_SSI 29 * Using an existing SSI index file. 30 */ 31 typedef struct { 32 FILE *fp; /* open SSI index file */ 33 uint32_t flags; /* optional behavior flags */ 34 uint32_t offsz; /* sizeof(off_t)'s in the SSI file */ 35 uint16_t nfiles; /* number of files = 16 bit int */ 36 uint64_t nprimary; /* number of primary keys */ 37 uint64_t nsecondary; /* number of secondary keys */ 38 uint32_t flen; /* length of filenames (inc '\0') */ 39 uint32_t plen; /* length of primary keys (inc '\0') */ 40 uint32_t slen; /* length of secondary keys (inc '\0') */ 41 uint32_t frecsize; /* # bytes in a file record */ 42 uint32_t precsize; /* # bytes in a primary key record */ 43 uint32_t srecsize; /* # bytes in a secondary key record */ 44 off_t foffset; /* disk offset, start of file records */ 45 off_t poffset; /* disk offset, start of pri key recs */ 46 off_t soffset; /* disk offset, start of sec key recs */ 47 48 49 /* File information: */ 50 char **filename; /* list of file names [0..nfiles-1] */ 51 uint32_t *fileformat; /* file formats */ 52 uint32_t *fileflags; /* optional per-file behavior flags */ 53 uint32_t *bpl; /* bytes per line in file */ 54 uint32_t *rpl; /* residues per line in file */ 55 } ESL_SSI; 56 57 /* Flags for the <ssi->fileflags> bit vectors. */ 58 #define eslSSI_FASTSUBSEQ (1<<0) /* we can do fast subseq lookup calculations on this file */ 59 60 61 /* ESL_NEWSSI 62 * Used to create a new SSI index. 63 */ 64 typedef struct { /* Primary key data: */ 65 char *key; /* key name */ 66 uint16_t fnum; /* file number */ 67 off_t r_off; /* record offset */ 68 off_t d_off; /* data offset */ 69 int64_t len; /* sequence length */ 70 } ESL_PKEY; 71 72 typedef struct { /* Secondary key data: */ 73 char *key; /* secondary key name */ 74 char *pkey; /* primary key name */ 75 } ESL_SKEY; 76 77 typedef struct { 78 char *ssifile; /* name of the SSI file we're creating */ 79 FILE *ssifp; /* open SSI file being created */ 80 int external; /* TRUE if pkeys and skeys are on disk */ 81 int max_ram; /* threshold in MB to trigger extern sort */ 82 83 char **filenames; 84 uint32_t *fileformat; 85 uint32_t *bpl; 86 uint32_t *rpl; 87 uint32_t flen; /* length of longest filename, inc '\0' */ 88 uint16_t nfiles; /* can store up to 2^15-1 (32767) files */ 89 90 ESL_PKEY *pkeys; 91 uint32_t plen; /* length of longest pkey, including '\0' */ 92 uint64_t nprimary; /* can store up to 2^63-1 = 9.2e18 keys */ 93 char *ptmpfile; /* primary key tmpfile name, for extern sort */ 94 FILE *ptmp; /* handle on open ptmpfile */ 95 96 ESL_SKEY *skeys; 97 uint32_t slen; /* length of longest skey, including '\0' */ 98 uint64_t nsecondary; 99 char *stmpfile; /* secondary key tmpfile name, for extern sort */ 100 FILE *stmp; /* handle on open ptmpfile */ 101 102 char errbuf[eslERRBUFSIZE]; 103 } ESL_NEWSSI; 104 105 106 #define eslSSI_FCHUNK 16 /* chunk size for file name reallocation */ 107 #define eslSSI_KCHUNK 128 /* and for key reallocation */ 108 109 110 /* 1. Using (reading) SSI indices */ 111 extern int esl_ssi_Open(const char *filename, ESL_SSI **ret_ssi); 112 extern void esl_ssi_Close(ESL_SSI *ssi); 113 extern int esl_ssi_FindName(ESL_SSI *ssi, const char *key, 114 uint16_t *ret_fh, off_t *ret_roff, off_t *opt_doff, int64_t *opt_L); 115 extern int esl_ssi_FindNumber(ESL_SSI *ssi, int64_t nkey, 116 uint16_t *opt_fh, off_t *opt_roff, off_t *opt_doff, int64_t *opt_L, char **opt_pkey); 117 extern int esl_ssi_FindSubseq(ESL_SSI *ssi, const char *key, int64_t requested_start, 118 uint16_t *ret_fh, off_t *ret_roff, off_t *ret_doff, int64_t *ret_L, int64_t *ret_actual_start); 119 extern int esl_ssi_FileInfo(ESL_SSI *ssi, uint16_t fh, char **ret_filename, int *ret_format); 120 121 122 123 /* 2. Creating (writing) SSI indices. */ 124 extern int esl_newssi_Open(const char *ssifile, int allow_overwrite, ESL_NEWSSI **ret_newssi); 125 extern int esl_newssi_AddFile (ESL_NEWSSI *ns, const char *filename, int fmt, uint16_t *ret_fh); 126 extern int esl_newssi_SetSubseq(ESL_NEWSSI *ns, uint16_t fh, uint32_t bpl, uint32_t rpl); 127 extern int esl_newssi_AddKey (ESL_NEWSSI *ns, const char *key, uint16_t fh, off_t r_off, off_t d_off, int64_t L); 128 extern int esl_newssi_AddAlias (ESL_NEWSSI *ns, const char *alias, const char *key); 129 extern int esl_newssi_Write (ESL_NEWSSI *ns); 130 extern void esl_newssi_Close (ESL_NEWSSI *ns); 131 132 133 /* 3. Portable binary i/o. */ 134 extern void esl_byteswap(char *swap, int nbytes); 135 extern uint16_t esl_ntoh16(uint16_t netshort); 136 extern uint32_t esl_ntoh32(uint32_t netlong); 137 extern uint64_t esl_ntoh64(uint64_t net_int64); 138 extern uint16_t esl_hton16(uint16_t hostshort); 139 extern uint32_t esl_hton32(uint32_t hostlong); 140 extern uint64_t esl_hton64(uint64_t host_int64); 141 extern int esl_fread_u16(FILE *fp, uint16_t *ret_result); 142 extern int esl_fread_u32(FILE *fp, uint32_t *ret_result); 143 extern int esl_fread_u64(FILE *fp, uint64_t *ret_result); 144 extern int esl_fread_i16(FILE *fp, int16_t *ret_result); 145 extern int esl_fread_i32(FILE *fp, int32_t *ret_result); 146 extern int esl_fread_i64(FILE *fp, int64_t *ret_result); 147 extern int esl_fwrite_u16(FILE *fp, uint16_t n); 148 extern int esl_fwrite_u32(FILE *fp, uint32_t n); 149 extern int esl_fwrite_u64(FILE *fp, uint64_t n); 150 extern int esl_fwrite_i16(FILE *fp, int16_t n); 151 extern int esl_fwrite_i32(FILE *fp, int32_t n); 152 extern int esl_fwrite_i64(FILE *fp, int64_t n); 153 extern int esl_fread_offset(FILE *fp, int mode, off_t *ret_offset); 154 extern int esl_fwrite_offset(FILE *fp, off_t offset); 155 156 #endif /* eslSSI_INCLUDED */ 157