1 /* Unaligned ncbi sequence file i/o.
2  */
3 #ifndef eslSQIO_NCBI_INCLUDED
4 #define eslSQIO_NCBI_INCLUDED
5 #include "esl_config.h"
6 
7 #include <stdio.h>
8 #ifdef HAVE_SYS_TYPES_H
9 #include <sys/types.h>
10 #endif
11 
12 #include "esl_sq.h"
13 #include "esl_sqio.h"
14 
15 /* forward declaration */
16 struct esl_sqio_s;
17 
18 /* set the max residue count to 1 meg when reading a block */
19 #define MAX_RESIDUE_COUNT (1024 * 1024)
20 
21 #define MAX_DB_VOLUMES   100
22 
23 /* ESL_SQNCBI_VOLUME:
24  * Information for the volume
25  */
26 typedef struct esl_sqncbi_vol_s {
27   char      *name;                 /* name of the volume                       */
28 
29   uint32_t   start_seq;            /* starting sequence number                 */
30   uint32_t   end_seq;              /* ending sequence number                   */
31 
32   uint32_t   hdr_off;              /* disk offset in .pin to header index      */
33   uint32_t   seq_off;              /* disk offset to .pin to sequence index    */
34   uint32_t   amb_off;              /* disk offset to .pin to ambiguous index   */
35 } ESL_SQNCBI_VOLUME;
36 
37 /* ESL_SQNCBI:
38  * An open sequence file for reading.
39  */
40 typedef struct esl_sqncbi_s {
41   FILE      *fppin;                /* Open .pin file ptr                       */
42   FILE      *fpphr;                /* Open .phr file ptr                       */
43   FILE      *fppsq;                /* Open .psq file ptr                       */
44   char       errbuf[eslERRBUFSIZE];/* parse error mesg.  Size must match msa.h */
45 
46   char      *title;                /* database title                           */
47   int        version;              /* database version                         */
48   char      *timestamp;            /* time stamp of database creation          */
49 
50   uint32_t   num_seq;              /* number of sequences in the database      */
51   uint64_t   total_res;            /* total number of residues                 */
52   uint32_t   max_seq;              /* longest sequence in the database         */
53 
54   uint32_t   hdr_off;              /* disk offset in .pin to header index      */
55   uint32_t   seq_off;              /* disk offset to .pin to sequence index    */
56   uint32_t   amb_off;              /* disk offset to .pin to ambiguous index   */
57 
58   int        index;                /* current sequence index in the database   */
59   uint32_t   vol_index;            /* current volume index (-1 if no volumes)  */
60   uint32_t   roff;                 /* record offset (start of header)          */
61   uint32_t   hoff;                 /* offset to last byte of header            */
62   uint32_t   doff;                 /* data offset (start of sequence data)     */
63   uint32_t   eoff;                 /* offset to last byte of sequence          */
64 
65   uint32_t   index_start;          /* start of indexes currently loaded        */
66   uint32_t   index_end;            /* end of indexes currently loaded          */
67   uint32_t  *hdr_indexes;          /* block of header indexes from .pin        */
68   uint32_t  *seq_indexes;          /* block of header indexes from .pin        */
69   uint32_t  *amb_indexes;          /* block of header indexes from .pin        */
70 
71   /* volume information */
72   uint32_t   volumes;              /* number of volumes                        */
73   ESL_SQNCBI_VOLUME vols[MAX_DB_VOLUMES];
74 
75   /* information for the current header */
76   unsigned char *hdr_buf;          /* buffer for holding unparsed header       */
77   unsigned char *hdr_ptr;          /* current parser position                  */
78   int            hdr_alloced;      /* size of the allocated buffer             */
79 
80   char          *name_ptr;         /* pointer to name NOT NULL TERMINATED      */
81   int32_t        name_size;        /* length of the name                       */
82   char          *acc_ptr;          /* pointer to accession NOT NULL TERMINATED */
83   int32_t        acc_size;         /* length of the accession                  */
84   int32_t        int_id;           /* integer sequence id                      */
85   char          *str_id_ptr;       /* pointer to id NOT NULL TERMINATED        */
86   int32_t        str_id_size;      /* length of the id                         */
87 
88   /* information on the current sequence */
89   uint32_t       seq_apos;         /* position of ambiguity table              */
90   uint32_t       seq_alen;         /* size of ambiguity table                  */
91   uint32_t       seq_cpos;         /* current position in ambiguity table      */
92   int32_t        seq_L;            /* true sequence length                     */
93 
94   /* alphabet used to convert ncbi to hmmer to ascii */
95   int            alphatype;        /* amino or dna                             */
96   char          *alphasym;         /* string of residues                       */
97 
98 } ESL_SQNCBI_DATA;
99 
100 
101 extern int  esl_sqncbi_Open(char *seqfile, int format, struct esl_sqio_s *sqfp);
102 
103 #endif /*eslSQIO_NCBI_INCLUDED*/
104 
105