1 /* Calculates pairwise %id for all aligned sequence pairs in MSA
2 */
3 #include "esl_config.h"
4
5 #include <stdlib.h>
6 #include <stdio.h>
7
8 #include "easel.h"
9 #include "esl_distance.h"
10 #include "esl_getopts.h"
11 #include "esl_msa.h"
12 #include "esl_msafile.h"
13
14 static ESL_OPTIONS options[] = {
15 /* name type default env range togs reqs incomp help docgroup */
16 { "-h", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "help; show brief info on version and usage", 1 },
17 { "--informat", eslARG_STRING, NULL, NULL, NULL, NULL, NULL, NULL, "specify the input MSA file is in format <s>", 0 },
18 { "--outformat", eslARG_STRING, "Clustal", NULL, NULL, NULL, NULL, NULL, "write the output MSA in format <s>", 0 },
19 { "--noheader", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "no header", 0 },
20 { "--dna", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "use DNA alphabet", 0 },
21 { "--rna", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "use RNA alphabet", 0 },
22 { "--amino", eslARG_NONE, FALSE, NULL, NULL, NULL, NULL, NULL, "use protein alphabet", 0 },
23 { 0,0,0,0,0,0,0,0,0,0 },
24 };
25
26 static char banner[] = "calculate pairwise %id for each seq pair in an MSA";
27 static char usage[] = "[options] <msafile>";
28
29 int
main(int argc,char ** argv)30 main(int argc, char **argv)
31 {
32 ESL_GETOPTS *go = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage);
33 char *msafile = esl_opt_GetArg(go, 1);
34 ESL_ALPHABET *abc = NULL;
35 int infmt = eslMSAFILE_UNKNOWN;
36 ESL_MSAFILE *afp = NULL;
37 ESL_MSA *msa = NULL;
38 FILE *ofp = stdout;
39 int nali = 0;
40 int namewidth;
41 double pid;
42 double pmatch;
43 int nid, n;
44 int nmatch, m;
45 int i,j;
46 int header = TRUE;
47 int status;
48
49 /* allow user to assert the input MSA alphabet */
50 if (esl_opt_GetBoolean(go, "--rna")) abc = esl_alphabet_Create(eslRNA);
51 else if (esl_opt_GetBoolean(go, "--dna")) abc = esl_alphabet_Create(eslDNA);
52 else if (esl_opt_GetBoolean(go, "--amino")) abc = esl_alphabet_Create(eslAMINO);
53
54 if (esl_opt_GetBoolean(go, "--noheader")) header = FALSE;
55
56 /* allow user to assert the input MSA format */
57 if (esl_opt_IsOn(go, "--informat") &&
58 (infmt = esl_msafile_EncodeFormat(esl_opt_GetString(go, "--informat"))) == eslMSAFILE_UNKNOWN)
59 esl_fatal("%s is not a valid MSA file format for --informat", esl_opt_GetString(go, "--informat"));
60
61 /* digital open */
62 if ( ( status = esl_msafile_Open(&abc, msafile, NULL, infmt, NULL, &afp)) != eslOK)
63 esl_msafile_OpenFailure(afp, status);
64
65 if (header) fprintf(ofp, "# seqname1 seqname2 %%id nid denomid %%match nmatch denommatch\n");
66 while ((status = esl_msafile_Read(afp, &msa)) == eslOK)
67 {
68 nali++;
69
70 namewidth = esl_str_GetMaxWidth(msa->sqname, msa->nseq);
71
72 for (i = 0; i < msa->nseq; i++)
73 for (j = i+1; j < msa->nseq; j++)
74 {
75 esl_dst_XPairId (abc, msa->ax[i], msa->ax[j], &pid, &nid, &n);
76 esl_dst_XPairMatch(abc, msa->ax[i], msa->ax[j], &pmatch, &nmatch, &m);
77 fprintf(ofp, "%-*s %-*s %6.2f %6d %6d %6.2f %6d %6d\n", namewidth, msa->sqname[i], namewidth, msa->sqname[j], pid*100.0, nid, n, pmatch*100.0, nmatch, m);
78 }
79
80 esl_msa_Destroy(msa);
81 }
82 if (nali == 0 || status != eslEOF) esl_msafile_ReadFailure(afp, status);
83
84 esl_msafile_Close(afp);
85 esl_alphabet_Destroy(abc);
86 esl_getopts_Destroy(go);
87 return 0;
88 }
89
90