1 /* Memory-efficient multiple sequence alignment i/o from Pfam format
2  *
3  * Legacy interface, now that ESL_MSAFILE is rewritten.  Still need
4  * to support --small option in various tools, so the necessary parts
5  * of the old interface were moved here.
6  *
7  * To-do:
8  *   :: add memory-efficient interface in ESL_MSAFILE
9  *   :: add memory-efficient ESL_MSA w/ API
10  *   :: add space-efficient MSA file format
11  */
12 #ifndef eslMSAFILE2_INCLUDED
13 #define eslMSAFILE2_INCLUDED
14 #include "esl_config.h"
15 
16 #include "easel.h"
17 #include "esl_alphabet.h"	/* digital alphabet                                                   */
18 #include "esl_keyhash.h"	/* string hashes, for mapping unique seq names                        */
19 #include "esl_msa.h"		/* ESL_MSA structure                                                  */
20 #include "esl_msafile.h"	/* preferred msafile interface, inc. fmt codes shared w/ ESL_MSAFILE2 */
21 #include "esl_ssi.h"        	/* indexing large flatfiles on disk                                   */
22 
23 
24 /* Object: ESL_MSAFILE2
25  *
26  * Defines an alignment file that we open for reading,
27  * in our legacy version. See ESL_MSAFILE (esl_msafile.c) for the
28  * preferred version.
29  */
30 typedef struct {
31   FILE *f;                      /* open file pointer                         */
32   char *fname;			/* name of file. used for diagnostic output  */
33   int   linenumber;		/* what line are we on in the file           */
34   char  errbuf[eslERRBUFSIZE];  /* buffer for holding parse error info       */
35 
36   char *buf;			/* buffer for line input w/ sre_fgets()      */
37   int   buflen;			/* current allocated length for buf          */
38 
39   int   do_gzip;		/* TRUE if f is "gzip -dc |" (will pclose(f))*/
40   int   do_stdin;		/* TRUE if f is stdin (won't close f)        */
41   int   format;			/* format of alignment file we're reading    */
42 
43   int   do_digital;		/* TRUE to digitize seqs directly into ax    */
44   const ESL_ALPHABET *abc;	/* digitized input  */
45 
46   ESL_SSI *ssi;		        /* open SSI index file; or NULL, if none.    */
47 
48   ESL_MSA *msa_cache;		/* occasional lookahead at next MSA; GuessAlphabet() */
49 } ESL_MSAFILE2;
50 
51 
52 
53 /* 1. The ESL_MSAFILE2 object */
54 extern int  esl_msafile2_Open(const char *filename, const char *env, ESL_MSAFILE2 **ret_afp);
55 extern int  esl_msafile2_OpenDigital(const ESL_ALPHABET *abc, const char *filename, const char *env, ESL_MSAFILE2 **ret_afp);
56 extern void esl_msafile2_Close(ESL_MSAFILE2 *afp);
57 
58 /* 2. Memory efficient reading/writing in Pfam format */
59 extern int   esl_msafile2_ReadInfoPfam(ESL_MSAFILE2 *afp, FILE *listfp, ESL_ALPHABET *abc, int64_t known_alen, char *known_rf, char *known_ss_cons, ESL_MSA **ret_msa,
60 				       int *opt_nseq, int64_t *opt_alen, int *opt_ngs, int *opt_maxname, int *opt_maxgf, int *opt_maxgc, int *opt_maxgr,
61 				       double ***opt_abc_ct, double ***opt_pp_ct, double ****opt_bp_ct, int **opt_spos_ct, int **opt_epos_ct);
62 extern int   esl_msafile2_RegurgitatePfam(ESL_MSAFILE2 *afp, FILE *ofp, int maxname, int maxgf, int maxgc, int maxgr,
63 					  int do_header, int do_trailer, int do_blanks, int do_comments, int do_gf,
64 					  int do_gs, int do_gc, int do_gr, int do_aseq, ESL_KEYHASH *seqs2regurg, ESL_KEYHASH *seqs2skip,
65 					  int *useme, int *add2me, int exp_alen, char gapchar2add, int *opt_nseq_read, int *opt_nseq_written);
66 
67 #endif //eslMSAFILE2_INCLUDED
68 
69