1 /* Simple sequence indices:
2  * Fast sequence record lookup in large files by keywords, such
3  * as names or accessions.
4  */
5 #ifndef eslSSI_INCLUDED
6 #define eslSSI_INCLUDED
7 #include "esl_config.h"
8 
9 #ifdef HAVE_SYS_TYPES_H
10 #include <sys/types.h>
11 #endif
12 #ifdef HAVE_STDINT_H
13 #include <stdint.h>
14 #endif
15 #ifdef HAVE_INTTYPES_H
16 #include <inttypes.h>
17 #endif
18 
19 #define eslSSI_MAXFILES 32767	     /* 2^15-1 */
20 #define eslSSI_MAXKEYS  2147483647L  /* 2^31-1 */
21 #define eslSSI_MAXRAM   256	     /* >256MB indices trigger external sort */
22 
23 #ifndef HAVE_FSEEKO
24 #define fseeko fseek
25 #define ftello ftell
26 #endif
27 
28 /* ESL_SSI
29  * Using an existing SSI index file.
30  */
31 typedef struct {
32   FILE      *fp;              /* open SSI index file                 */
33   uint32_t   flags;	      /* optional behavior flags             */
34   uint32_t   offsz;	      /* sizeof(off_t)'s in the SSI file     */
35   uint16_t   nfiles;          /* number of files = 16 bit int        */
36   uint64_t   nprimary;        /* number of primary keys              */
37   uint64_t   nsecondary;      /* number of secondary keys            */
38   uint32_t   flen;            /* length of filenames (inc '\0')      */
39   uint32_t   plen;            /* length of primary keys (inc '\0')   */
40   uint32_t   slen;            /* length of secondary keys (inc '\0') */
41   uint32_t   frecsize;        /* # bytes in a file record            */
42   uint32_t   precsize;        /* # bytes in a primary key record     */
43   uint32_t   srecsize;        /* # bytes in a secondary key record   */
44   off_t      foffset;         /* disk offset, start of file records  */
45   off_t      poffset;         /* disk offset, start of pri key recs  */
46   off_t      soffset;         /* disk offset, start of sec key recs  */
47 
48 
49   /* File information:  */
50   char     **filename;        /* list of file names [0..nfiles-1]    */
51   uint32_t  *fileformat;      /* file formats                        */
52   uint32_t  *fileflags;	      /* optional per-file behavior flags    */
53   uint32_t  *bpl;             /* bytes per line in file              */
54   uint32_t  *rpl;             /* residues per line in file           */
55 } ESL_SSI;
56 
57 /* Flags for the <ssi->fileflags> bit vectors. */
58 #define eslSSI_FASTSUBSEQ   (1<<0)    /* we can do fast subseq lookup calculations on this file */
59 
60 
61 /* ESL_NEWSSI
62  * Used to create a new SSI index.
63  */
64 typedef struct {		/* Primary key data: */
65   char      *key;               /* key name          */
66   uint16_t   fnum;		/* file number       */
67   off_t      r_off;		/* record offset     */
68   off_t      d_off;		/* data offset       */
69   int64_t    len;		/* sequence length   */
70 } ESL_PKEY;
71 
72 typedef struct {		/* Secondary key data: */
73   char        *key;             /* secondary key name  */
74   char        *pkey;            /* primary key name    */
75 } ESL_SKEY;
76 
77 typedef struct {
78   char       *ssifile;		/* name of the SSI file we're creating    */
79   FILE       *ssifp;		/* open SSI file being created            */
80   int         external;	        /* TRUE if pkeys and skeys are on disk    */
81   int         max_ram;	        /* threshold in MB to trigger extern sort */
82 
83   char      **filenames;
84   uint32_t   *fileformat;
85   uint32_t   *bpl;
86   uint32_t   *rpl;
87   uint32_t    flen;		/* length of longest filename, inc '\0' */
88   uint16_t    nfiles;		/* can store up to 2^15-1 (32767) files */
89 
90   ESL_PKEY   *pkeys;
91   uint32_t    plen;	        /* length of longest pkey, including '\0'    */
92   uint64_t    nprimary;		/* can store up to 2^63-1 = 9.2e18 keys      */
93   char       *ptmpfile;		/* primary key tmpfile name, for extern sort */
94   FILE       *ptmp;	        /* handle on open ptmpfile */
95 
96   ESL_SKEY   *skeys;
97   uint32_t    slen;        	/* length of longest skey, including '\0' */
98   uint64_t    nsecondary;
99   char       *stmpfile;		/* secondary key tmpfile name, for extern sort */
100   FILE       *stmp;	        /* handle on open ptmpfile */
101 
102   char        errbuf[eslERRBUFSIZE];
103 } ESL_NEWSSI;
104 
105 
106 #define eslSSI_FCHUNK  16	/* chunk size for file name reallocation */
107 #define eslSSI_KCHUNK  128	/* and for key reallocation              */
108 
109 
110 /* 1. Using (reading) SSI indices */
111 extern int  esl_ssi_Open(const char *filename, ESL_SSI **ret_ssi);
112 extern void esl_ssi_Close(ESL_SSI *ssi);
113 extern int  esl_ssi_FindName(ESL_SSI *ssi, const char *key,
114 			     uint16_t *ret_fh, off_t *ret_roff, off_t *opt_doff, int64_t *opt_L);
115 extern int  esl_ssi_FindNumber(ESL_SSI *ssi, int64_t nkey,
116 			       uint16_t *opt_fh, off_t *opt_roff, off_t *opt_doff, int64_t *opt_L, char **opt_pkey);
117 extern int  esl_ssi_FindSubseq(ESL_SSI *ssi, const char *key, int64_t requested_start,
118 			       uint16_t *ret_fh, off_t *ret_roff, off_t *ret_doff, int64_t *ret_L, int64_t *ret_actual_start);
119 extern int  esl_ssi_FileInfo(ESL_SSI *ssi, uint16_t fh, char **ret_filename, int *ret_format);
120 
121 
122 
123 /* 2. Creating (writing) SSI indices. */
124 extern int  esl_newssi_Open(const char *ssifile, int allow_overwrite, ESL_NEWSSI **ret_newssi);
125 extern int  esl_newssi_AddFile  (ESL_NEWSSI *ns, const char *filename, int fmt, uint16_t *ret_fh);
126 extern int  esl_newssi_SetSubseq(ESL_NEWSSI *ns, uint16_t fh, uint32_t bpl, uint32_t rpl);
127 extern int  esl_newssi_AddKey   (ESL_NEWSSI *ns, const char *key, uint16_t fh, off_t r_off, off_t d_off, int64_t L);
128 extern int  esl_newssi_AddAlias (ESL_NEWSSI *ns, const char *alias, const char *key);
129 extern int  esl_newssi_Write    (ESL_NEWSSI *ns);
130 extern void esl_newssi_Close    (ESL_NEWSSI *ns);
131 
132 
133 /* 3. Portable binary i/o. */
134 extern void     esl_byteswap(char *swap, int nbytes);
135 extern uint16_t esl_ntoh16(uint16_t netshort);
136 extern uint32_t esl_ntoh32(uint32_t netlong);
137 extern uint64_t esl_ntoh64(uint64_t net_int64);
138 extern uint16_t esl_hton16(uint16_t hostshort);
139 extern uint32_t esl_hton32(uint32_t hostlong);
140 extern uint64_t esl_hton64(uint64_t host_int64);
141 extern int      esl_fread_u16(FILE *fp, uint16_t *ret_result);
142 extern int      esl_fread_u32(FILE *fp, uint32_t *ret_result);
143 extern int      esl_fread_u64(FILE *fp, uint64_t *ret_result);
144 extern int      esl_fread_i16(FILE *fp, int16_t  *ret_result);
145 extern int      esl_fread_i32(FILE *fp, int32_t  *ret_result);
146 extern int      esl_fread_i64(FILE *fp, int64_t  *ret_result);
147 extern int      esl_fwrite_u16(FILE *fp, uint16_t n);
148 extern int      esl_fwrite_u32(FILE *fp, uint32_t n);
149 extern int      esl_fwrite_u64(FILE *fp, uint64_t n);
150 extern int      esl_fwrite_i16(FILE *fp, int16_t  n);
151 extern int      esl_fwrite_i32(FILE *fp, int32_t  n);
152 extern int      esl_fwrite_i64(FILE *fp, int64_t  n);
153 extern int	esl_fread_offset(FILE *fp, int mode, off_t *ret_offset);
154 extern int      esl_fwrite_offset(FILE *fp, off_t offset);
155 
156 #endif /* eslSSI_INCLUDED */
157