1 /* Memory-efficient multiple sequence alignment i/o from Pfam format 2 * 3 * Legacy interface, now that ESL_MSAFILE is rewritten. Still need 4 * to support --small option in various tools, so the necessary parts 5 * of the old interface were moved here. 6 * 7 * To-do: 8 * :: add memory-efficient interface in ESL_MSAFILE 9 * :: add memory-efficient ESL_MSA w/ API 10 * :: add space-efficient MSA file format 11 */ 12 #ifndef eslMSAFILE2_INCLUDED 13 #define eslMSAFILE2_INCLUDED 14 #include "esl_config.h" 15 16 #include "easel.h" 17 #include "esl_alphabet.h" /* digital alphabet */ 18 #include "esl_keyhash.h" /* string hashes, for mapping unique seq names */ 19 #include "esl_msa.h" /* ESL_MSA structure */ 20 #include "esl_msafile.h" /* preferred msafile interface, inc. fmt codes shared w/ ESL_MSAFILE2 */ 21 #include "esl_ssi.h" /* indexing large flatfiles on disk */ 22 23 24 /* Object: ESL_MSAFILE2 25 * 26 * Defines an alignment file that we open for reading, 27 * in our legacy version. See ESL_MSAFILE (esl_msafile.c) for the 28 * preferred version. 29 */ 30 typedef struct { 31 FILE *f; /* open file pointer */ 32 char *fname; /* name of file. used for diagnostic output */ 33 int linenumber; /* what line are we on in the file */ 34 char errbuf[eslERRBUFSIZE]; /* buffer for holding parse error info */ 35 36 char *buf; /* buffer for line input w/ sre_fgets() */ 37 int buflen; /* current allocated length for buf */ 38 39 int do_gzip; /* TRUE if f is "gzip -dc |" (will pclose(f))*/ 40 int do_stdin; /* TRUE if f is stdin (won't close f) */ 41 int format; /* format of alignment file we're reading */ 42 43 int do_digital; /* TRUE to digitize seqs directly into ax */ 44 const ESL_ALPHABET *abc; /* digitized input */ 45 46 ESL_SSI *ssi; /* open SSI index file; or NULL, if none. */ 47 48 ESL_MSA *msa_cache; /* occasional lookahead at next MSA; GuessAlphabet() */ 49 } ESL_MSAFILE2; 50 51 52 53 /* 1. The ESL_MSAFILE2 object */ 54 extern int esl_msafile2_Open(const char *filename, const char *env, ESL_MSAFILE2 **ret_afp); 55 extern int esl_msafile2_OpenDigital(const ESL_ALPHABET *abc, const char *filename, const char *env, ESL_MSAFILE2 **ret_afp); 56 extern void esl_msafile2_Close(ESL_MSAFILE2 *afp); 57 58 /* 2. Memory efficient reading/writing in Pfam format */ 59 extern int esl_msafile2_ReadInfoPfam(ESL_MSAFILE2 *afp, FILE *listfp, ESL_ALPHABET *abc, int64_t known_alen, char *known_rf, char *known_ss_cons, ESL_MSA **ret_msa, 60 int *opt_nseq, int64_t *opt_alen, int *opt_ngs, int *opt_maxname, int *opt_maxgf, int *opt_maxgc, int *opt_maxgr, 61 double ***opt_abc_ct, double ***opt_pp_ct, double ****opt_bp_ct, int **opt_spos_ct, int **opt_epos_ct); 62 extern int esl_msafile2_RegurgitatePfam(ESL_MSAFILE2 *afp, FILE *ofp, int maxname, int maxgf, int maxgc, int maxgr, 63 int do_header, int do_trailer, int do_blanks, int do_comments, int do_gf, 64 int do_gs, int do_gc, int do_gr, int do_aseq, ESL_KEYHASH *seqs2regurg, ESL_KEYHASH *seqs2skip, 65 int *useme, int *add2me, int exp_alen, char gapchar2add, int *opt_nseq_read, int *opt_nseq_written); 66 67 #endif //eslMSAFILE2_INCLUDED 68 69