1 /* @source needleall application
2 **
3 ** Many-to-many pairwise alignment
4 **
5 **
6 ** This program is free software; you can redistribute it and/or
7 ** modify it under the terms of the GNU General Public License
8 ** as published by the Free Software Foundation; either version 2
9 ** of the License, or (at your option) any later version.
10 **
11 ** This program is distributed in the hope that it will be useful,
12 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ** GNU General Public License for more details.
15 **
16 ** You should have received a copy of the GNU General Public License
17 ** along with this program; if not, write to the Free Software
18 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 ******************************************************************************/
20
21
22
23
24 #include "emboss.h"
25 #include <limits.h>
26 #include <math.h>
27
28
29
30
31 /* @prog needleall ***********************************************************
32 **
33 ** Many-to-many pairwise sequence alignment using Needleman-Wunsch algorithm
34 **
35 ******************************************************************************/
36
main(int argc,char ** argv)37 int main(int argc, char **argv)
38 {
39 AjPAlign align;
40 AjPSeqall seqall;
41 AjPSeqset seqset;
42 const AjPSeq seqa;
43 AjPSeq seqb;
44 AjPStr alga;
45 AjPStr algb;
46 AjPStr ss;
47 AjPFile errorf;
48
49 ajuint lena;
50 ajuint lenb;
51 ajuint k;
52
53 const char *p;
54 const char *q;
55
56 ajint start1 = 0;
57 ajint start2 = 0;
58
59 ajint *compass;
60 float* ix;
61 float* iy;
62 float* m;
63
64 AjPMatrixf matrix;
65 AjPSeqCvt cvt = 0;
66 float **sub;
67
68 float gapopen;
69 float gapextend;
70 float endgapopen;
71 float endgapextend;
72 float minscore;
73 size_t maxarr = 1000; /* arbitrary. realloc'd if needed */
74 size_t len;
75
76 float score;
77
78 AjBool dobrief = ajTrue;
79 AjBool endweight = ajFalse; /*whether end gap penalties should be applied*/
80
81 float id = 0.;
82 float sim = 0.;
83 float idx = 0.;
84 float simx = 0.;
85
86 AjPStr tmpstr = NULL;
87
88 embInit("needleall", argc, argv);
89
90 matrix = ajAcdGetMatrixf("datafile");
91 seqset = ajAcdGetSeqset("asequence");
92 ajSeqsetTrim(seqset);
93 seqall = ajAcdGetSeqall("bsequence");
94 gapopen = ajAcdGetFloat("gapopen");
95 gapextend = ajAcdGetFloat("gapextend");
96 endgapopen = ajAcdGetFloat("endopen");
97 endgapextend = ajAcdGetFloat("endextend");
98 minscore = ajAcdGetFloat("minscore");
99 dobrief = ajAcdGetBoolean("brief");
100 endweight = ajAcdGetBoolean("endweight");
101 align = ajAcdGetAlign("outfile");
102 errorf = ajAcdGetOutfile("errfile");
103
104 gapopen = ajRoundFloat(gapopen, 8);
105 gapextend = ajRoundFloat(gapextend, 8);
106
107 AJCNEW0(compass, maxarr);
108 AJCNEW0(m, maxarr);
109 AJCNEW0(ix, maxarr);
110 AJCNEW0(iy, maxarr);
111
112 alga = ajStrNew();
113 algb = ajStrNew();
114 ss = ajStrNew();
115
116 sub = ajMatrixfGetMatrix(matrix);
117 cvt = ajMatrixfGetCvt(matrix);
118
119 while(ajSeqallNext(seqall,&seqb))
120 {
121 ajSeqTrim(seqb);
122 lenb = ajSeqGetLen(seqb);
123
124 for(k=0;k<ajSeqsetGetSize(seqset);k++)
125 {
126 seqa = ajSeqsetGetseqSeq(seqset, k);
127 lena = ajSeqGetLen(seqa);
128
129
130 if(lenb > (LONG_MAX/(size_t)(lena+1)))
131 ajDie("Sequences too big.");
132
133 len = (size_t)lena*(size_t)lenb;
134
135 if(len>maxarr)
136 {
137 AJCRESIZETRY0(compass,(size_t)maxarr,len);
138 if(!compass)
139 ajDie("Sequences too big, memory allocation failed");
140 AJCRESIZETRY0(m,(size_t)maxarr,len);
141 if(!m)
142 ajDie("Sequences too big, memory allocation failed");
143 AJCRESIZETRY0(ix,(size_t)maxarr,len);
144 if(!ix)
145 ajDie("Sequences too big, memory allocation failed");
146 AJCRESIZETRY0(iy,(size_t)maxarr,len);
147 if(!iy)
148 ajDie("Sequences too big, memory allocation failed");
149 maxarr=len;
150 }
151
152
153 p = ajSeqGetSeqC(seqa);
154 q = ajSeqGetSeqC(seqb);
155
156 ajStrAssignC(&alga,"");
157 ajStrAssignC(&algb,"");
158
159 score = embAlignPathCalcWithEndGapPenalties(p, q, lena, lenb,
160 gapopen, gapextend, endgapopen, endgapextend,
161 &start1, &start2, sub, cvt, m, ix, iy,
162 compass, ajFalse, endweight);
163
164 embAlignWalkNWMatrixUsingCompass(p, q, &alga, &algb,
165 lena, lenb, &start1, &start2, compass);
166
167 if (score > minscore){
168 if(!ajAlignFormatShowsSequences(align))
169 {
170 ajAlignDefineCC(align, ajStrGetPtr(alga),
171 ajStrGetPtr(algb), ajSeqGetNameC(seqa),
172 ajSeqGetNameC(seqb));
173 ajAlignSetScoreR(align, score);
174 }
175 else
176 {
177 embAlignReportGlobal(align, seqa, seqb, alga, algb,
178 start1, start2,
179 gapopen, gapextend,
180 score, matrix,
181 ajSeqGetOffset(seqa), ajSeqGetOffset(seqb));
182 }
183
184 if(!dobrief)
185 {
186 embAlignCalcSimilarity(alga,algb,sub,cvt,lena,lenb,&id,
187 &sim, &idx, &simx);
188 ajFmtPrintS(&tmpstr,"Longest_Identity = %5.2f%%\n",
189 id);
190 ajFmtPrintAppS(&tmpstr,"Longest_Similarity = %5.2f%%\n",
191 sim);
192 ajFmtPrintAppS(&tmpstr,"Shortest_Identity = %5.2f%%\n",
193 idx);
194 ajFmtPrintAppS(&tmpstr,"Shortest_Similarity = %5.2f%%",
195 simx);
196 ajAlignSetSubHeaderApp(align, tmpstr);
197 }
198 ajAlignWrite(align);
199 ajAlignReset(align);
200 }
201 else
202 ajFmtPrintF(errorf,
203 "Alignment score (%.1f) is less than minimum score"
204 "(%.1f) for sequences %s vs %s\n",
205 score, minscore, ajSeqGetNameC(seqa),
206 ajSeqGetNameC(seqb));
207 }
208 }
209
210
211 if(!ajAlignFormatShowsSequences(align))
212 {
213 ajMatrixfDel(&matrix);
214 }
215
216 ajAlignClose(align);
217 ajAlignDel(&align);
218 ajFileClose(&errorf);
219
220
221 ajSeqallDel(&seqall);
222 ajSeqsetDel(&seqset);
223 ajSeqDel(&seqb);
224
225 AJFREE(compass);
226 AJFREE(ix);
227 AJFREE(iy);
228 AJFREE(m);
229
230 ajStrDel(&alga);
231 ajStrDel(&algb);
232 ajStrDel(&ss);
233 ajStrDel(&tmpstr);
234
235 embExit();
236
237 return 0;
238 }
239