1 /* @source needleall application
2 **
3 ** Many-to-many pairwise alignment
4 **
5 **
6 ** This program is free software; you can redistribute it and/or
7 ** modify it under the terms of the GNU General Public License
8 ** as published by the Free Software Foundation; either version 2
9 ** of the License, or (at your option) any later version.
10 **
11 ** This program is distributed in the hope that it will be useful,
12 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 ** GNU General Public License for more details.
15 **
16 ** You should have received a copy of the GNU General Public License
17 ** along with this program; if not, write to the Free Software
18 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
19 ******************************************************************************/
20 
21 
22 
23 
24 #include "emboss.h"
25 #include <limits.h>
26 #include <math.h>
27 
28 
29 
30 
31 /* @prog needleall ***********************************************************
32 **
33 ** Many-to-many pairwise sequence alignment using Needleman-Wunsch algorithm
34 **
35 ******************************************************************************/
36 
main(int argc,char ** argv)37 int main(int argc, char **argv)
38 {
39     AjPAlign align;
40     AjPSeqall seqall;
41     AjPSeqset seqset;
42     const AjPSeq seqa;
43     AjPSeq seqb;
44     AjPStr alga;
45     AjPStr algb;
46     AjPStr ss;
47     AjPFile errorf;
48 
49     ajuint lena;
50     ajuint lenb;
51     ajuint k;
52 
53     const char *p;
54     const char *q;
55 
56     ajint start1 = 0;
57     ajint start2 = 0;
58 
59     ajint *compass;
60     float* ix;
61     float* iy;
62     float* m;
63 
64     AjPMatrixf matrix;
65     AjPSeqCvt cvt = 0;
66     float **sub;
67 
68     float gapopen;
69     float gapextend;
70     float endgapopen;
71     float endgapextend;
72     float minscore;
73     size_t maxarr = 1000;  /* arbitrary. realloc'd if needed */
74     size_t len;
75 
76     float score;
77 
78     AjBool dobrief = ajTrue;
79     AjBool endweight = ajFalse; /*whether end gap penalties should be applied*/
80 
81     float id   = 0.;
82     float sim  = 0.;
83     float idx  = 0.;
84     float simx = 0.;
85 
86     AjPStr tmpstr = NULL;
87 
88     embInit("needleall", argc, argv);
89 
90     matrix    = ajAcdGetMatrixf("datafile");
91     seqset    = ajAcdGetSeqset("asequence");
92     ajSeqsetTrim(seqset);
93     seqall    = ajAcdGetSeqall("bsequence");
94     gapopen   = ajAcdGetFloat("gapopen");
95     gapextend = ajAcdGetFloat("gapextend");
96     endgapopen   = ajAcdGetFloat("endopen");
97     endgapextend = ajAcdGetFloat("endextend");
98     minscore = ajAcdGetFloat("minscore");
99     dobrief   = ajAcdGetBoolean("brief");
100     endweight   = ajAcdGetBoolean("endweight");
101     align     = ajAcdGetAlign("outfile");
102     errorf    = ajAcdGetOutfile("errfile");
103 
104     gapopen = ajRoundFloat(gapopen, 8);
105     gapextend = ajRoundFloat(gapextend, 8);
106 
107     AJCNEW0(compass, maxarr);
108     AJCNEW0(m, maxarr);
109     AJCNEW0(ix, maxarr);
110     AJCNEW0(iy, maxarr);
111 
112     alga  = ajStrNew();
113     algb  = ajStrNew();
114     ss = ajStrNew();
115 
116     sub = ajMatrixfGetMatrix(matrix);
117     cvt = ajMatrixfGetCvt(matrix);
118 
119     while(ajSeqallNext(seqall,&seqb))
120     {
121         ajSeqTrim(seqb);
122         lenb = ajSeqGetLen(seqb);
123 
124         for(k=0;k<ajSeqsetGetSize(seqset);k++)
125         {
126             seqa = ajSeqsetGetseqSeq(seqset, k);
127             lena = ajSeqGetLen(seqa);
128 
129 
130             if(lenb > (LONG_MAX/(size_t)(lena+1)))
131                 ajDie("Sequences too big.");
132 
133             len = (size_t)lena*(size_t)lenb;
134 
135             if(len>maxarr)
136             {
137                 AJCRESIZETRY0(compass,(size_t)maxarr,len);
138                 if(!compass)
139                     ajDie("Sequences too big, memory allocation failed");
140                 AJCRESIZETRY0(m,(size_t)maxarr,len);
141                 if(!m)
142                     ajDie("Sequences too big, memory allocation failed");
143                 AJCRESIZETRY0(ix,(size_t)maxarr,len);
144                 if(!ix)
145                     ajDie("Sequences too big, memory allocation failed");
146                 AJCRESIZETRY0(iy,(size_t)maxarr,len);
147                 if(!iy)
148                     ajDie("Sequences too big, memory allocation failed");
149                 maxarr=len;
150             }
151 
152 
153             p = ajSeqGetSeqC(seqa);
154             q = ajSeqGetSeqC(seqb);
155 
156             ajStrAssignC(&alga,"");
157             ajStrAssignC(&algb,"");
158 
159             score = embAlignPathCalcWithEndGapPenalties(p, q, lena, lenb,
160                     gapopen, gapextend, endgapopen, endgapextend,
161                     &start1, &start2, sub, cvt, m, ix, iy,
162                     compass, ajFalse, endweight);
163 
164             embAlignWalkNWMatrixUsingCompass(p, q, &alga, &algb,
165                     lena, lenb, &start1, &start2, compass);
166 
167             if (score > minscore){
168                 if(!ajAlignFormatShowsSequences(align))
169                 {
170                     ajAlignDefineCC(align, ajStrGetPtr(alga),
171                             ajStrGetPtr(algb), ajSeqGetNameC(seqa),
172                             ajSeqGetNameC(seqb));
173                     ajAlignSetScoreR(align, score);
174                 }
175                 else
176                 {
177                     embAlignReportGlobal(align, seqa, seqb, alga, algb,
178                             start1, start2,
179                             gapopen, gapextend,
180                             score, matrix,
181                             ajSeqGetOffset(seqa), ajSeqGetOffset(seqb));
182                 }
183 
184                 if(!dobrief)
185                 {
186                     embAlignCalcSimilarity(alga,algb,sub,cvt,lena,lenb,&id,
187                             &sim, &idx, &simx);
188                     ajFmtPrintS(&tmpstr,"Longest_Identity = %5.2f%%\n",
189                             id);
190                     ajFmtPrintAppS(&tmpstr,"Longest_Similarity = %5.2f%%\n",
191                             sim);
192                     ajFmtPrintAppS(&tmpstr,"Shortest_Identity = %5.2f%%\n",
193                             idx);
194                     ajFmtPrintAppS(&tmpstr,"Shortest_Similarity = %5.2f%%",
195                             simx);
196                     ajAlignSetSubHeaderApp(align, tmpstr);
197                 }
198                 ajAlignWrite(align);
199                 ajAlignReset(align);
200             }
201             else
202                 ajFmtPrintF(errorf,
203                         "Alignment score (%.1f) is less than minimum score"
204                         "(%.1f) for sequences %s vs %s\n",
205                         score, minscore, ajSeqGetNameC(seqa),
206                         ajSeqGetNameC(seqb));
207         }
208     }
209 
210 
211     if(!ajAlignFormatShowsSequences(align))
212     {
213         ajMatrixfDel(&matrix);
214     }
215 
216     ajAlignClose(align);
217     ajAlignDel(&align);
218     ajFileClose(&errorf);
219 
220 
221     ajSeqallDel(&seqall);
222     ajSeqsetDel(&seqset);
223     ajSeqDel(&seqb);
224 
225     AJFREE(compass);
226     AJFREE(ix);
227     AJFREE(iy);
228     AJFREE(m);
229 
230     ajStrDel(&alga);
231     ajStrDel(&algb);
232     ajStrDel(&ss);
233     ajStrDel(&tmpstr);
234 
235     embExit();
236 
237     return 0;
238 }
239