1 /* @source ajalign ************************************************************
2 **
3 ** AJAX ALIGN (ajax alignment output) functions
4 **
5 ** These functions align AJAX sequences and report them in a variety
6 ** of formats.
7 **
8 ** @author Copyright (C) 2001 Peter Rice, LION Bioscience Ltd.
9 ** @version $Revision: 1.132 $
10 ** @modified $Date: 2012/07/02 16:44:15 $ by $Author: rice $
11 ** @@
12 **
13 ** This library is free software; you can redistribute it and/or
14 ** modify it under the terms of the GNU Lesser General Public
15 ** License as published by the Free Software Foundation; either
16 ** version 2.1 of the License, or (at your option) any later version.
17 **
18 ** This library is distributed in the hope that it will be useful,
19 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 ** Lesser General Public License for more details.
22 **
23 ** You should have received a copy of the GNU Lesser General Public
24 ** License along with this library; if not, write to the Free Software
25 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
26 ** MA 02110-1301, USA.
27 **
28 ******************************************************************************/
29
30
31 #include "ajlib.h"
32
33 #include "ajalign.h"
34 #include "ajbase.h"
35 #include "ajarr.h"
36 #include "ajutil.h"
37 #include "ajnam.h"
38 #include "ajmath.h"
39 #include "ajseqwrite.h"
40
41 #include <string.h>
42 #include <ctype.h>
43 #include <stddef.h>
44 #include <stdarg.h>
45 #include <float.h>
46 #include <limits.h>
47 #include <math.h>
48
49
50
51
52 /* @datastatic AlignPData *****************************************************
53 **
54 ** Ajax Align Data object.
55 **
56 ** Holds definition of feature alignment data. Stored in AjPAlign structure as
57 ** an AjPList.
58 **
59 ** @alias AlignSData
60 ** @alias AlignOData
61 **
62 ** @attr Start [ajint*] Start position for each sequence
63 ** @attr End [ajint*] End position for each sequence
64 ** @attr Len [ajint*] Original length for each sequence
65 ** @attr Offset [ajint*] Offset for numbering start of each sequence
66 ** @attr Offend [ajint*] Offset for numbering end of each sequence
67 ** @attr SubOffset [ajint*] Offset of used subsequence within given sequence
68 ** generally for cases where SeqExternal is set
69 ** in the AjPAlign object that includes this
70 ** AlignPData object. Used for fetching the
71 ** actual sequence from the stored sequence,
72 ** but ignored when calculating position.
73 ** @attr Rev [AjBool*] Reverse each sequence (so far not used)
74 ** @attr RealSeq [AjPSeq*] Sequences usually as copies of data from the
75 ** application unless SeqExternal in the AjPAlign
76 ** object is true
77 ** @attr Seq [const AjPSeq*] Pointer to sequence in RealSeq
78 ** unless SeqExternal in the AjPAlign
79 ** object is true when pointer is to
80 ** sequence elsewhere
81 ** @attr LenAli [ajint] Length of the alignment
82 ** @attr NumId [ajint] Number of identical positions, usually calculated
83 ** @attr NumSim [ajint] Number of similar positions, usually calculated
84 ** @attr NumGap [ajint] Number of gap positions, usually calculated
85 ** @attr Score [AjPStr] Score to be reported - stored as a string for output
86 ** set by functions using ajint or float input
87 ** @attr Nseqs [ajint] Number of sequences, size of each array. This is
88 ** duplicated in the AjPAlign object but is useful
89 ** here for constructor and destructor efficiency.
90 ** @attr Padding [char[4]] Padding to alignment boundary
91 ** @@
92 ******************************************************************************/
93
94 typedef struct AlignSData
95 {
96 ajint* Start;
97 ajint* End;
98 ajint* Len;
99 ajint* Offset;
100 ajint* Offend;
101 ajint* SubOffset;
102 AjBool* Rev;
103 AjPSeq* RealSeq;
104 const AjPSeq* Seq;
105 ajint LenAli;
106 ajint NumId;
107 ajint NumSim;
108 ajint NumGap;
109 AjPStr Score;
110 ajint Nseqs;
111 char Padding[4];
112 } AlignOData;
113
114 #define AlignPData AlignOData*
115
116
117
118
119 /* @datastatic AlignPFormat ***************************************************
120 **
121 ** Ajax Align Formats
122 **
123 ** List of alignment output formats available
124 **
125 ** @alias AlignSFormat
126 ** @alias AlignOFormat
127 **
128 ** @attr Name [const char*] format name
129 ** @attr Desc [const char*] Format description
130 ** @attr Alias [AjBool] Name is an alias for an identical definition
131 ** @attr Nucleotide [AjBool] ajTrue if format can use nucleotide sequences
132 ** @attr Protein [AjBool] ajTrue if format can use protein sequences
133 ** @attr Showheader [AjBool] ajTrue if header appears in output
134 ** @attr Showseqs [AjBool] ajTrue if sequences appear in output
135 ** @attr Padding [ajint] Padding to alignment boundary
136 ** @attr Minseq [ajint] Minimum number of sequences, 2 for pairwise
137 ** @attr Maxseq [ajint] Maximum number of sequences, 2 for pairwise
138 ** @attr Write [void function] Function to write alignment
139 ** @@
140 ******************************************************************************/
141
142 typedef struct AlignSFormat
143 {
144 const char *Name;
145 const char *Desc;
146 AjBool Alias;
147 AjBool Nucleotide;
148 AjBool Protein;
149 AjBool Showheader;
150 AjBool Showseqs;
151 ajint Padding;
152 ajint Minseq;
153 ajint Maxseq;
154 void (*Write) (AjPAlign thys);
155 } AlignOFormat;
156
157 #define AlignPFormat AlignOFormat*
158
159
160
161 static ajint alignCigar(const AjPSeq qryseq, const AjPSeq refseq,
162 ajint qstart, ajint qend,
163 ajint rstart,
164 AjPStr *mseq, AjPStr* cigar, AjBool seqExternal);
165 static void alignConsStats(AjPAlign thys, ajint iali, AjPStr *cons,
166 ajint* retident, ajint* retsim, ajint* retgap,
167 ajint* retlen);
168 static AjBool alignConsSet(const AjPAlign thys, ajint iali, ajint seqnum,
169 AjPStr *cons);
170 static AlignPData alignData(const AjPAlign thys, ajint iali);
171 static void alignDataDel(AlignPData* pthys, AjBool external);
172 static AlignPData alignDataNew(ajint nseqs, AjBool external);
173 static void alignDataSetSequence(AlignPData thys, const AjPSeq seq,
174 ajint i, AjBool external);
175 static void alignDiff(AjPStr* pmark, const AjPStr seq, char idchar);
176 static ajint alignLen(const AjPAlign thys, ajint iali);
177 static const AjPSeq alignSeq(const AjPAlign thys, ajint iseq, ajint iali);
178 static ajint alignSeqBegin(const AlignPData thys, ajint iseq);
179 static ajint alignSeqEnd(const AlignPData thys, ajint iseq);
180 static ajint alignSeqGapBegin(const AlignPData data, ajint iseq);
181 static ajint alignSeqGapEnd(const AlignPData data, ajint iseq);
182 static ajint alignSeqIncrement(const AlignPData thys, ajint iseq);
183 static const AjPStr alignSeqName(const AjPAlign thys, const AjPSeq seq);
184 static AjBool alignSeqRev(const AlignPData thys, ajint iseq);
185 static const AjPSeq* alignSeqs(const AjPAlign thys, ajint iali);
186 static void alignSame(AjPStr* pmark, const AjPStr seq, char idchar);
187 static void alignSim(AjPStr* pmark, const char idch, const char simch,
188 const char misch, const char gapch);
189 static float alignTotweight(const AjPAlign thys, ajint iali);
190 static void alignTraceData(const AjPAlign thys);
191
192 /*static void alignWriteBam(AjPAlign thys);*/
193 static void alignWriteClustal(AjPAlign thys);
194 static void alignWriteFasta(AjPAlign thys);
195 static void alignWriteMark(AjPAlign thys, ajint iali, ajint markx);
196 static void alignWriteMarkX0(AjPAlign thys);
197 static void alignWriteMarkX1(AjPAlign thys);
198 static void alignWriteMarkX2(AjPAlign thys);
199 static void alignWriteMarkX3(AjPAlign thys);
200 static void alignWriteMarkX10(AjPAlign thys);
201 static void alignWriteMatch(AjPAlign thys);
202 static void alignWriteMega(AjPAlign thys);
203 static void alignWriteMeganon(AjPAlign thys);
204 static void alignWriteMsf(AjPAlign thys);
205 static void alignWriteNexus(AjPAlign thys);
206 static void alignWriteNexusnon(AjPAlign thys);
207 static void alignWritePhylip(AjPAlign thys);
208 static void alignWritePhylipnon(AjPAlign thys);
209 static void alignWriteSam(AjPAlign thys);
210 static void alignWriteScore(AjPAlign thys);
211 static void alignWriteSelex(AjPAlign thys);
212 static void alignWriteSeqformat(AjPAlign thys, ajint iali,
213 const char* sqfmt);
214 static void alignWriteSimple(AjPAlign thys);
215 static void alignWriteSrs(AjPAlign thys);
216 static void alignWriteSrsAny(AjPAlign thys,
217 ajint imax, AjBool mark);
218 static void alignWriteSrsPair(AjPAlign thys);
219 static void alignWriteTCoffee(AjPAlign thys);
220 static void alignWriteTrace(AjPAlign thys);
221 static void alignWriteTreecon(AjPAlign thys);
222 static void alignWriteHeaderNum(AjPAlign thys, ajint iali);
223
224
225
226
227 /* @funclist alignFormat ******************************************************
228 **
229 ** Functions to write alignments
230 **
231 ******************************************************************************/
232
233 static AlignOFormat alignFormat[] =
234 {
235 /* name description function */
236 /* Alias nucleic protein header min max zero */
237 /* standard sequence formats */
238 {"unknown", "Unknown format",
239 AJFALSE, AJFALSE, AJFALSE, AJTRUE, AJTRUE, 0, 0, 0, &alignWriteSimple},
240 {"fasta", "Fasta format sequence",
241 AJFALSE, AJTRUE, AJTRUE, AJFALSE, AJTRUE, 0, 0, 0, &alignWriteFasta},
242 {"a2m", "A2M (fasta) format sequence", /* same as fasta */
243 AJTRUE, AJTRUE, AJTRUE, AJFALSE, AJTRUE, 0, 0, 0, &alignWriteFasta},
244 {"msf", "MSF format sequence",
245 AJFALSE, AJTRUE, AJTRUE, AJFALSE, AJTRUE, 0, 0, 0, &alignWriteMsf},
246 {"clustal", "clustalw format sequence",
247 AJFALSE, AJTRUE, AJTRUE, AJFALSE, AJTRUE, 0, 0, 0, &alignWriteClustal},
248 {"mega", "Mega format sequence",
249 AJFALSE, AJTRUE, AJTRUE, AJFALSE, AJTRUE, 0, 0, 0, &alignWriteMega},
250 {"meganon", "Mega non-interleaved format sequence",
251 AJFALSE, AJTRUE, AJTRUE, AJFALSE, AJTRUE, 0, 0, 0, &alignWriteMeganon},
252 {"nexus", "nexus/paup format sequence",
253 AJFALSE, AJTRUE, AJTRUE, AJFALSE, AJTRUE, 0, 0, 0, &alignWriteNexus},
254 {"paup", "nexus/paup format sequence (alias)",
255 AJTRUE, AJTRUE, AJTRUE, AJFALSE, AJTRUE, 0, 0, 0, &alignWriteNexus},
256 {"nexusnon", "nexus/paup non-interleaved format sequence",
257 AJFALSE, AJTRUE, AJTRUE, AJFALSE, AJTRUE, 0, 0, 0, &alignWriteNexusnon},
258 {"paupnon", "nexus/paup non-interleaved format sequence (alias)",
259 AJTRUE, AJTRUE, AJTRUE, AJFALSE, AJTRUE, 0, 0, 0, &alignWriteNexusnon},
260 {"phylip", "phylip format sequence",
261 AJFALSE, AJTRUE, AJTRUE, AJFALSE, AJTRUE, 0, 0, 0, &alignWritePhylip},
262 {"phylipnon", "phylip non-interleaved format sequence",
263 AJFALSE, AJTRUE, AJTRUE, AJFALSE, AJTRUE, 0, 0, 0, &alignWritePhylipnon},
264 {"selex", "SELEX format sequence",
265 AJFALSE, AJTRUE, AJTRUE, AJFALSE, AJTRUE, 0, 0, 0, &alignWriteSelex},
266 {"treecon", "Treecon format sequence",
267 AJFALSE, AJTRUE, AJTRUE, AJFALSE, AJTRUE, 0, 0, 0, &alignWriteTreecon},
268 /* trace for debug */
269 {"debug", "Debugging trace of full internal data content",
270 AJFALSE, AJTRUE, AJTRUE, AJTRUE, AJTRUE, 0, 0, 0, &alignWriteTrace},
271 /* alignment formats */
272 {"markx0", "Pearson MARKX0 format",
273 AJFALSE, AJTRUE, AJTRUE, AJTRUE, AJTRUE, 0, 2, 2, &alignWriteMarkX0},
274 {"markx1", "Pearson MARKX1 format",
275 AJFALSE, AJTRUE, AJTRUE, AJTRUE, AJTRUE, 0, 2, 2, &alignWriteMarkX1},
276 {"markx2", "Pearson MARKX2 format",
277 AJFALSE, AJTRUE, AJTRUE, AJTRUE, AJTRUE, 0, 2, 2, &alignWriteMarkX2},
278 {"markx3", "Pearson MARKX3 format",
279 AJFALSE, AJTRUE, AJTRUE, AJTRUE, AJTRUE, 0, 2, 2, &alignWriteMarkX3},
280 {"markx10", "Pearson MARKX10 format",
281 AJFALSE, AJTRUE, AJTRUE, AJTRUE, AJTRUE, 0, 2, 2, &alignWriteMarkX10},
282 {"match","Start and end of matches between sequence pairs",
283 AJFALSE, AJTRUE, AJTRUE, AJTRUE, AJFALSE, 0, 2, 2, &alignWriteMatch},
284 {"multiple", "Simple multiple alignment",
285 AJTRUE, AJTRUE, AJTRUE, AJTRUE, AJTRUE, 0, 0, 0, &alignWriteSimple},
286 {"pair", "Simple pairwise alignment",
287 AJFALSE, AJTRUE, AJTRUE, AJTRUE, AJTRUE, 0, 2, 2, &alignWriteSimple},
288 {"simple", "Simple multiple alignment",
289 AJFALSE, AJTRUE, AJTRUE, AJTRUE, AJTRUE, 0, 0, 0, &alignWriteSimple},
290 {"sam", "Sequence alignent/map (SAM) format",
291 AJFALSE, AJTRUE, AJFALSE, AJFALSE, AJTRUE, 0, 2, 2, &alignWriteSam},
292 /*
293 {"bam", "Binary sequence alignent/map (BAM) format",
294 AJFALSE, AJTRUE, AJFALSE, AJFALSE, AJTRUE, 0, 2, 2, &alignWriteBam},
295 */
296 {"score", "Score values for pairs of sequences",
297 AJFALSE, AJTRUE, AJTRUE, AJTRUE, AJFALSE, 0, 2, 2, &alignWriteScore},
298 {"srs", "Simple multiple sequence format for SRS",
299 AJFALSE, AJTRUE, AJTRUE, AJTRUE, AJTRUE, 0, 0, 0, &alignWriteSrs},
300 {"srspair", "Simple pairwise sequence format for SRS",
301 AJFALSE, AJTRUE, AJTRUE, AJTRUE, AJTRUE, 0, 2, 2, &alignWriteSrsPair},
302 {"tcoffee", "TCOFFEE program format",
303 AJFALSE, AJTRUE, AJTRUE, AJFALSE, AJTRUE, 0, 0, 0, &alignWriteTCoffee},
304 {NULL, NULL,
305 AJFALSE, AJFALSE, AJFALSE, AJFALSE, AJTRUE, 0, 0, 0, NULL}
306 };
307
308 static char alignDefformat[] = "simple";
309
310
311
312
313 /* pair only works if the alignment is defined as 2 sequences */
314
315 /* other formats to be defined:
316 **
317 ** markx1 .. markx10 as for FASTA (code in matcher.c)
318 ** blast to be implemented
319 */
320
321
322
323
324 /* @funcstatic alignWriteTrace ************************************************
325 **
326 ** Writes an alignment in Trace format
327 **
328 ** @param [u] thys [AjPAlign] Alignment object
329 ** @return [void]
330 **
331 ** @release 2.1.0
332 ** @@
333 ******************************************************************************/
334
alignWriteTrace(AjPAlign thys)335 static void alignWriteTrace(AjPAlign thys)
336 {
337 const AjPSeq seq = NULL;
338 ajint nali;
339 ajint iali;
340 ajint iseq;
341 ajint nseq;
342
343 ajint identity = 0;
344 ajint similarity = 0;
345 ajint gaps = 0;
346 ajint seqlen = 0;
347
348 AlignPData* pdata = NULL;
349 AlignPData data = NULL;
350
351 AjPStr tmpstr = NULL;
352 AjPFile outf = NULL;
353
354 nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
355 ajFmtPrintF(thys->File, "Trace output\n");
356 ajFmtPrintF(thys->File, "============\n");
357
358 ajFmtPrintF(thys->File,
359 "a: Type:'%S' Formatstr:'%S' Format:%d\n",
360 thys->Type, thys->Formatstr, thys->Format);
361
362 ajFmtPrintF(thys->File,
363 "b: File:%F\n",
364 thys->File);
365
366 ajFmtPrintF(thys->File,
367 "Show: ShowAcc:%B ShowDes:%B ShowUsa:%B\n",
368 thys->Showacc, thys->Showdes, thys->Showusa);
369
370 ajFmtPrintF(thys->File,
371 "Booleans: Multi:%B Global:%B SeqOnly:%B SeqExternal:%B\n",
372 thys->Multi, thys->Global, thys->SeqOnly, thys->SeqExternal);
373
374 ajFmtPrintF(thys->File,
375 "Numbers: NMin:%d NMax:%d Nseqs:%d Count:%d Width:%d\n",
376 thys->Nmin, thys->Nmax, thys->Nseqs,
377 thys->Count, thys->Width);
378
379 ajFmtPrintF(thys->File,
380 "Matrices: IMatrix:'%S'(%d) FMatrix:'%S'(%d)\n",
381 ajMatrixGetName(thys->IMatrix),
382 ajMatrixGetSize(thys->IMatrix),
383 ajMatrixfGetName(thys->FMatrix),
384 ajMatrixfGetSize(thys->FMatrix));
385
386 ajFmtPrintF(thys->File,
387 "Strings: Matrix:'%S' GapPen:'%S' ExtPen:'%S'\n",
388 thys->Matrix, thys->GapPen, thys->ExtPen);
389
390 ajFmtPrintF(thys->File,
391 "Header: '%S'\n",
392 thys->Header);
393 ajFmtPrintF(thys->File,
394 "SubHeader: '%S'\n",
395 thys->SubHeader);
396 ajFmtPrintF(thys->File,
397 "Tail: '%S'\n",
398 thys->Tail);
399 ajFmtPrintF(thys->File,
400 "SubTail: '%S'\n",
401 thys->Tail);
402
403 ajFmtPrintF(thys->File,
404 "Key: seqlen/len offset> start..end <offend (suboffset) rev "
405 "Begin..End GapBegin..End\n");
406
407 for(iali=0; iali<nali; iali++)
408 {
409 data = pdata[iali];
410 ajFmtPrintF(thys->File,
411 "\nalign%d: Nseqs:%d LenAli:%d "
412 "NumId:%d NumSim:%d NumGap:%d Score:'%S'\n",
413 iali, data->Nseqs, data->LenAli,
414 data->NumId, data->NumSim, data->NumGap,
415 data->Score);
416
417 alignConsStats(thys, iali, &tmpstr,
418 &identity, &similarity, &gaps, &seqlen);
419
420 ajAlignSetStats(thys, iali, seqlen, identity, similarity, gaps, NULL);
421 ajFmtPrintF(thys->File,
422 "fixed%d: Nseqs:%d LenAli:%d "
423 "NumId:%d NumSim:%d NumGap:%d Score:'%S'\n",
424 iali, data->Nseqs, data->LenAli,
425 data->NumId, data->NumSim, data->NumGap,
426 data->Score);
427 ajAlignSetSubStandard(thys, iali);
428 outf = thys->File;
429
430 /* turn off printing of the header, keep the calculation */
431 thys->File = NULL;
432 alignWriteHeaderNum(thys,iali);
433 thys->File = outf;
434
435 nseq = thys->Nseqs;
436
437 for(iseq=0; iseq < nseq; iseq++)
438 {
439 seq = alignSeq(thys, iseq, iali);
440 ajFmtPrintF(thys->File,
441 "Num%d.%d: %d/%d %d> %d..%d <%d (%d) Rev:%B %d..%d "
442 "%d..%d\n",
443 iali, iseq,
444 (ajSeqGetLen(seq) - ajSeqCountGaps(seq)),
445 data->Len[iseq],
446 data->Offset[iseq],
447 data->Start[iseq], data->End[iseq],
448 data->Offend[iseq], data->SubOffset[iseq],
449 data->Rev[iseq],
450 alignSeqBegin(data, iseq), alignSeqEnd(data, iseq),
451 alignSeqGapBegin(data, iseq),
452 alignSeqGapEnd(data, iseq));
453
454 if(ajSeqGetLen(seq) <= 40)
455 ajFmtPrintF(thys->File,
456 "Seq%d.%d: %d '%S'\n",
457 iali, iseq, ajSeqGetLen(seq), ajSeqGetSeqS(seq));
458 else
459 {
460 ajStrAssignSubS(&tmpstr, ajSeqGetSeqS(seq), -20, -1);
461 ajFmtPrintF(thys->File,
462 "Seq%d.%d: %d '%20.20S...%20S'\n",
463 iali, iseq, ajSeqGetLen(seq),
464 ajSeqGetSeqS(seq), tmpstr);
465 }
466 }
467 }
468
469 ajStrDel(&tmpstr);
470
471 AJFREE(pdata);
472
473 return;
474 }
475
476
477
478
479 /* @funcstatic alignWriteClustal **********************************************
480 **
481 ** Writes an alignment in ClustalW format
482 **
483 ** @param [u] thys [AjPAlign] Alignment object
484 ** @return [void]
485 **
486 ** @release 6.1.0
487 ** @@
488 ******************************************************************************/
489
alignWriteClustal(AjPAlign thys)490 static void alignWriteClustal(AjPAlign thys)
491 {
492 alignWriteSeqformat(thys, 0, "clustal");
493
494 return;
495 }
496
497
498
499
500 /* @funcstatic alignWriteMega *************************************************
501 **
502 ** Writes an alignment in Mega format
503 **
504 ** @param [u] thys [AjPAlign] Alignment object
505 ** @return [void]
506 **
507 ** @release 6.1.0
508 ** @@
509 ******************************************************************************/
510
alignWriteMega(AjPAlign thys)511 static void alignWriteMega(AjPAlign thys)
512 {
513 alignWriteSeqformat(thys, 0, "mega");
514
515 return;
516 }
517
518
519
520
521 /* @funcstatic alignWriteMeganon **********************************************
522 **
523 ** Writes an alignment in Mega non-interleaved format
524 **
525 ** @param [u] thys [AjPAlign] Alignment object
526 ** @return [void]
527 **
528 ** @release 6.1.0
529 ** @@
530 ******************************************************************************/
531
alignWriteMeganon(AjPAlign thys)532 static void alignWriteMeganon(AjPAlign thys)
533 {
534 alignWriteSeqformat(thys, 0, "meganon");
535
536 return;
537 }
538
539
540
541
542 /* @funcstatic alignWriteMsf **************************************************
543 **
544 ** Writes an alignment in MSF format
545 **
546 ** @param [u] thys [AjPAlign] Alignment object
547 ** @return [void]
548 **
549 ** @release 2.1.0
550 ** @@
551 ******************************************************************************/
552
alignWriteMsf(AjPAlign thys)553 static void alignWriteMsf(AjPAlign thys)
554 {
555 alignWriteSeqformat(thys, 0, "msf");
556
557 return;
558 }
559
560
561
562
563 /* @funcstatic alignWriteNexus ************************************************
564 **
565 ** Writes an alignment in Nexus/PAUP interleaved format
566 **
567 ** @param [u] thys [AjPAlign] Alignment object
568 ** @return [void]
569 **
570 ** @release 6.1.0
571 ** @@
572 ******************************************************************************/
573
alignWriteNexus(AjPAlign thys)574 static void alignWriteNexus(AjPAlign thys)
575 {
576 alignWriteSeqformat(thys, 0, "nexus");
577
578 return;
579 }
580
581
582
583
584 /* @funcstatic alignWriteNexusnon *********************************************
585 **
586 ** Writes an alignment in Nexus/PAUP non-interleaved format
587 **
588 ** @param [u] thys [AjPAlign] Alignment object
589 ** @return [void]
590 **
591 ** @release 6.1.0
592 ** @@
593 ******************************************************************************/
594
alignWriteNexusnon(AjPAlign thys)595 static void alignWriteNexusnon(AjPAlign thys)
596 {
597 alignWriteSeqformat(thys, 0, "nexusnon");
598
599 return;
600 }
601
602
603
604
605 /* @funcstatic alignWritePhylip ***********************************************
606 **
607 ** Writes an alignment in Phylip interleaved format
608 **
609 ** @param [u] thys [AjPAlign] Alignment object
610 ** @return [void]
611 **
612 ** @release 6.1.0
613 ** @@
614 ******************************************************************************/
615
alignWritePhylip(AjPAlign thys)616 static void alignWritePhylip(AjPAlign thys)
617 {
618 alignWriteSeqformat(thys, 0, "phylip");
619
620 return;
621 }
622
623
624
625
626 /* @funcstatic alignWritePhylipnon ********************************************
627 **
628 ** Writes an alignment in Phylip non-interleaved format
629 **
630 ** @param [u] thys [AjPAlign] Alignment object
631 ** @return [void]
632 **
633 ** @release 6.1.0
634 ** @@
635 ******************************************************************************/
636
alignWritePhylipnon(AjPAlign thys)637 static void alignWritePhylipnon(AjPAlign thys)
638 {
639 alignWriteSeqformat(thys, 0, "phylipnon");
640
641 return;
642 }
643
644
645
646
647 /* @funcstatic alignWriteSelex ************************************************
648 **
649 ** Writes an alignment in SELEX format
650 **
651 ** @param [u] thys [AjPAlign] Alignment object
652 ** @return [void]
653 **
654 ** @release 6.1.0
655 ** @@
656 ******************************************************************************/
657
alignWriteSelex(AjPAlign thys)658 static void alignWriteSelex(AjPAlign thys)
659 {
660 alignWriteSeqformat(thys, 0, "selex");
661
662 return;
663 }
664
665
666
667
668 /* @funcstatic alignWriteTreecon **********************************************
669 **
670 ** Writes an alignment in Treecon format
671 **
672 ** @param [u] thys [AjPAlign] Alignment object
673 ** @return [void]
674 **
675 ** @release 6.1.0
676 ** @@
677 ******************************************************************************/
678
alignWriteTreecon(AjPAlign thys)679 static void alignWriteTreecon(AjPAlign thys)
680 {
681 alignWriteSeqformat(thys, 0, "treecon");
682
683 return;
684 }
685
686
687
688
689 /* @funcstatic alignWriteFasta ************************************************
690 **
691 ** Writes an alignment in MSF format
692 **
693 ** @param [u] thys [AjPAlign] Alignment object
694 ** @return [void]
695 **
696 ** @release 2.1.0
697 ** @@
698 ******************************************************************************/
699
alignWriteFasta(AjPAlign thys)700 static void alignWriteFasta(AjPAlign thys)
701 {
702 alignWriteSeqformat(thys, 0, "fasta");
703
704 return;
705 }
706
707
708
709
710 /* @funcstatic alignWriteSeqformat ********************************************
711 **
712 ** Writes an alignment in a sequence format.
713 ** Usually called for only one alignment, ignoring any
714 ** sub-alignments as they would overwrite.
715 **
716 ** @param [u] thys [AjPAlign] Alignment object
717 ** @param [r] iali [ajint] Alignment number
718 ** @param [r] sqfmt [const char*] Sequence output format
719 ** @return [void]
720 **
721 ** @release 3.0.0
722 ** @@
723 ******************************************************************************/
724
alignWriteSeqformat(AjPAlign thys,ajint iali,const char * sqfmt)725 static void alignWriteSeqformat(AjPAlign thys, ajint iali, const char* sqfmt)
726 {
727 AlignPData data = NULL;
728
729 AjPSeq seq = NULL;
730 AjPSeqout seqout;
731
732 ajint iseq;
733 ajint istart;
734 ajint iend;
735 ajint ilen;
736 AjPStr tmpstr = NULL;
737
738 seqout = ajSeqoutNewFile(thys->File);
739
740 thys->SeqOnly = ajTrue;
741
742 ajStrAssignC(&seqout->Formatstr, sqfmt);
743 seqout->File = thys->File;
744
745 ajSeqoutOpen(seqout);
746
747 ajListPeekNumber(thys->Data, 0, (void**) &data);
748 ilen = data->LenAli;
749
750 for(iseq=0; iseq < thys->Nseqs; iseq++)
751 {
752 seq = ajSeqNewSeq(alignSeq(thys, iseq, iali));
753 istart = data->SubOffset[iseq];
754 iend = istart + ilen - 1;
755 ajStrAssignSubS(&tmpstr, ajSeqGetSeqS(seq), istart, iend);
756 ajSeqAssignSeqS(seq, tmpstr);
757 ajSeqoutWriteSeq(seqout, seq);
758 ajSeqDel(&seq);
759 ajStrDel(&tmpstr);
760 }
761
762 ajSeqoutClose(seqout);
763 seqout->File = NULL;
764
765 ajSeqoutDel(&seqout);
766
767 return;
768 }
769
770
771
772
773 /* @funcstatic alignWriteMarkX0 ***********************************************
774 **
775 ** Writes an alignment in Fasta MarkX 0 format.
776 **
777 ** This is the standard default output format for FASTA programs.
778 **
779 ** @param [u] thys [AjPAlign] Alignment object
780 ** @return [void]
781 **
782 ** @release 2.1.0
783 ** @@
784 ******************************************************************************/
785
alignWriteMarkX0(AjPAlign thys)786 static void alignWriteMarkX0(AjPAlign thys)
787 {
788 ajint iali;
789 ajint nali;
790
791 nali = (ajuint) ajListGetLength(thys->Data);
792
793 for(iali=0; iali < nali; iali++)
794 alignWriteMark(thys, iali, 0);
795
796 return;
797 }
798
799
800
801
802 /* @funcstatic alignWriteMarkX1 ***********************************************
803 **
804 ** Writes an alignment in Fasta MarkX 1 format.
805 **
806 ** This is an alternative output format for FASTA programs in which
807 ** identities are not marked.
808 ** Instead conservative replacements are denoted by 'x'
809 ** and non-conservative substitutions by 'X'.
810 **
811 ** @param [u] thys [AjPAlign] Alignment object
812 ** @return [void]
813 **
814 ** @release 2.1.0
815 ** @@
816 ******************************************************************************/
817
alignWriteMarkX1(AjPAlign thys)818 static void alignWriteMarkX1(AjPAlign thys)
819 {
820 ajint iali;
821 ajint nali;
822
823 nali = (ajuint) ajListGetLength(thys->Data);
824
825 for(iali=0; iali < nali; iali++)
826 alignWriteMark(thys, iali, 1);
827
828 return;
829 }
830
831
832
833
834 /* @funcstatic alignWriteMarkX2 ***********************************************
835 **
836 ** Writes an alignment in Fasta MarkX 2 format
837 **
838 ** This is an alternative output format for FASTA programs in which
839 ** the residues in the second sequence are only shown if they are
840 ** different from the first.
841 **
842 ** @param [u] thys [AjPAlign] Alignment object
843 ** @return [void]
844 **
845 ** @release 2.1.0
846 ** @@
847 ******************************************************************************/
848
alignWriteMarkX2(AjPAlign thys)849 static void alignWriteMarkX2(AjPAlign thys)
850 {
851 ajint iali;
852 ajint nali;
853
854 nali = (ajuint) ajListGetLength(thys->Data);
855
856 for(iali=0; iali < nali; iali++)
857 alignWriteMark(thys, iali, 2);
858
859 return;
860 }
861
862
863
864
865 /* @funcstatic alignWriteMarkX3 ***********************************************
866 **
867 ** Writes an alignment in Fasta MarkX 3 format
868 **
869 ** This is an alternative output format for FASTA programs in which
870 ** the aligned library sequences are displayed in FASTA format
871 ** These can be used to build a primitive multiple alignment.
872 **
873 ** @param [u] thys [AjPAlign] Alignment object
874 ** @return [void]
875 **
876 ** @release 2.1.0
877 ** @@
878 ******************************************************************************/
879
alignWriteMarkX3(AjPAlign thys)880 static void alignWriteMarkX3(AjPAlign thys)
881 {
882 ajint iali;
883 ajint nali;
884
885 nali = (ajuint) ajListGetLength(thys->Data);
886
887 for(iali=0; iali < nali; iali++)
888 alignWriteMark(thys, iali, 3);
889
890 return;
891 }
892
893
894
895
896 /* @funcstatic alignWriteMarkX10 **********************************************
897 **
898 ** Writes an alignment in Fasta MarkX 10 format
899 **
900 ** @param [u] thys [AjPAlign] Alignment object
901 ** @return [void]
902 **
903 ** @release 2.1.0
904 ** @@
905 ******************************************************************************/
906
alignWriteMarkX10(AjPAlign thys)907 static void alignWriteMarkX10(AjPAlign thys)
908 {
909 ajint iali;
910 ajint nali;
911
912 nali = (ajuint) ajListGetLength(thys->Data);
913
914 for(iali=0; iali < nali; iali++)
915 alignWriteMark(thys, iali, 10);
916
917 return;
918 }
919
920
921
922
923 /* @funcstatic alignWriteMark *************************************************
924 **
925 ** Writes a pairwise alignment in a Fasta MarkX format.
926 **
927 ** For now, seems to work with 0, 1, 2, 3 or 10.
928 ** FASTA 3.4 has 4, 5, 6 and 9 as possible options
929 ** but most seem to make no difference on pairwise comparisons.
930 **
931 ** 0: identities marked with ':' similarities with '.'
932 **
933 ** 1: differences marked with 'X'
934 **
935 ** 2: identities marked with '.', differences with seq2 character
936 **
937 ** 3: fasta format sequences with no description
938 **
939 ** 10: aligned regions with associated data
940 **
941 ** @param [u] thys [AjPAlign] Alignment object
942 ** @param [r] iali [ajint] Alignment number
943 ** @param [r] markx [ajint] Markup type (as defined in Bill Pearson's
944 ** FASTA suite
945 ** @return [void]
946 **
947 ** @release 2.1.0
948 ** @@
949 ** Note: Modified 30-nov-04 (pmr) to replace the original code which had nasty
950 ** dependencies on global variable settings in the original - which we had
951 ** failed to reproduce in all possible circumstances, with code derived from
952 ** the existing and working EMBOSS alignment formats.
953 ******************************************************************************/
954
alignWriteMark(AjPAlign thys,ajint iali,ajint markx)955 static void alignWriteMark(AjPAlign thys, ajint iali, ajint markx)
956 {
957
958 AjPFile outf;
959 int nseq;
960
961 const AjPStr seq = NULL;
962 const AjPSeq seqp = NULL;
963
964 AlignPData data = NULL;
965
966 ajint iseq;
967 ajint i;
968 ajint j;
969 ajint jj;
970 ajint kk;
971 ajint istart;
972 ajint iend;
973 ajint ilen;
974 ajint jstart;
975 ajint jend;
976 ajint iwidth = 50;
977 const char* cp;
978
979 AjPStr tmpstr = NULL;
980 AjPStr mrkstr = NULL;
981 AjPStr mrkcons = NULL;
982 AjPStr cons = NULL;
983
984 ajint identity = 0;
985 ajint similarity = 0;
986 ajint gaps = 0;
987 ajint seqlen = 0;
988
989 ajint* ipos = NULL;
990 ajint* incs = NULL;
991 AjBool* num = NULL;
992 ajint jpos;
993 ajint icnt;
994 ajint increment = 1;
995
996 AjPStr tmphdr = NULL;
997 AjPStr tmpnum = NULL;
998 AjBool lastline = ajFalse;
999
1000 AjBool isnuc = ajFalse;
1001 static ajint icount = 0;
1002 float ident = 0.0;
1003
1004 ajDebug("alignWriteMark iali:%d markx:%d\n", iali, markx);
1005
1006 icount++;
1007
1008 nseq = thys->Nseqs;
1009 outf = thys->File;
1010
1011 if(thys->Width)
1012 iwidth = thys->Width;
1013
1014 ajListPeekNumber(thys->Data, iali, (void**) &data);
1015
1016 AJCNEW0(ipos, nseq);
1017 AJCNEW0(incs, nseq);
1018 AJCNEW0(num, nseq);
1019
1020 ajStrDel(&cons);
1021 ajStrDel(&tmphdr);
1022
1023 ilen = data->LenAli;
1024
1025 alignConsStats(thys, iali, &cons,
1026 &identity, &similarity, &gaps, &seqlen);
1027
1028 ajAlignSetStats(thys, iali, seqlen, identity, similarity, gaps, NULL);
1029 ajAlignSetSubStandard(thys, iali);
1030 alignWriteHeaderNum(thys,iali);
1031 isnuc = ajSeqIsNuc(data->Seq[0]);
1032 ident = (float) data->NumId / (float) data->LenAli;
1033
1034 /*ajDebug("# Consens: '%S'\n\n", cons);*/
1035
1036 if(markx == 1)
1037 alignSim(&cons, ' ', ' ', 'X', ' ');
1038 else if(markx == 2)
1039 {
1040 seq = ajSeqGetSeqS(data->Seq[0]);
1041 alignConsSet(thys, iali, 1, &cons);
1042 ajDebug("alignWriteMark(%d)\nseq0:%S\ncons:%S\nseq1:%S\n",
1043 markx, seq, cons, ajSeqGetSeqS(data->Seq[1]));
1044 alignDiff(&cons, seq, '.');
1045 ajDebug("alignWriteMark(%d)\nseq0:%S\ncons:%S\nseq1:%S\n",
1046 markx, seq, cons, ajSeqGetSeqS(data->Seq[1]));
1047 }
1048 else
1049 alignSim(&cons, ':', '.', ' ', ' ');
1050
1051 /*ajDebug("# Modcons: '%S'\n\n", cons);*/
1052 ajDebug("# nseq:%d\n", nseq);
1053
1054 ajDebug("# AliData [%d] len %d \n", iali, ilen);
1055 for(iseq=0; iseq < nseq; iseq++)
1056 {
1057 incs[iseq] = alignSeqIncrement(data, iseq);
1058 ipos[iseq] = alignSeqBegin(data, iseq)-incs[iseq];
1059 /*ajDebug("# Seq[%d]'%S'\n",
1060 iseq, ajSeqGetSeqS(data->Seq[iseq]));*/
1061 }
1062
1063 /*
1064 for(iseq=0; iseq < nseq; iseq++)
1065 {
1066 ajDebug("# Seq[%d] Len:%d Start:%d End:%d Rev:%B Off:%d\n",
1067 iseq, data->Len[iseq],
1068 data->Start[iseq], data->End[iseq],
1069 data->Rev[iseq],
1070 data->Offset[iseq]);
1071 }
1072 */
1073
1074 if(markx==10)
1075 {
1076 if(isnuc)
1077 ajFmtPrintF(outf,">>>%s, %d nt vs %s, %d nt\n",
1078 ajSeqGetNameC(data->Seq[0]), data->Len[0],
1079 ajSeqGetNameC(data->Seq[1]), data->Len[1]);
1080 else
1081 ajFmtPrintF(outf,">>>%s, %d aa vs %s, %d aa\n",
1082 ajSeqGetNameC(data->Seq[0]), data->Len[0],
1083 ajSeqGetNameC(data->Seq[1]), data->Len[1]);
1084
1085 ajFmtPrintF(outf,"; mp_name: EMBOSS\n");
1086 ajFmtPrintF(outf,"; mp_ver: %S\n", ajNamValueVersion());
1087 ajFmtPrintF(outf,"; pg_name: %S\n", ajUtilGetProgram());
1088 ajFmtPrintF(outf,"; pg_ver: %S\n", ajNamValueVersion());
1089 ajFmtPrintF(outf,"; pg_matrix: %S\n", thys->Matrix);
1090 ajFmtPrintF(outf,"; pg_gap-pen: -%S -%S\n",
1091 thys->GapPen, thys->ExtPen);
1092 ajFmtPrintF(outf,">>#%d\n", icount);
1093 ajFmtPrintF(outf,"; sw_score: %S\n", data->Score);
1094 ident = (float) data->NumId / (float) data->LenAli;
1095 ajFmtPrintF(outf,"; sw_ident: %5.3f\n", ident);
1096 ajFmtPrintF(outf,"; sw_overlap: %d\n", data->LenAli);
1097 }
1098
1099 if(markx <= 2)
1100 {
1101 for(i=0; i < ilen; i += iwidth)
1102 {
1103 for(iseq=0; iseq < nseq; iseq++)
1104 {
1105 seqp = data->Seq[iseq];
1106 seq = ajSeqGetSeqS(seqp);
1107 istart = i + data->SubOffset[iseq];
1108 iend = (istart+iwidth-1);
1109
1110 if(iend >= (data->SubOffset[iseq]+ilen-1))
1111 {
1112 iend = data->SubOffset[iseq]+ilen-1;
1113 lastline = ajTrue;
1114 }
1115
1116 increment = incs[iseq];
1117 ajStrAssignSubS(&tmpstr, seq, istart, iend);
1118 ajStrAssignSubS(&mrkcons, cons,
1119 istart - data->SubOffset[iseq],
1120 iend - data->SubOffset[iseq]);
1121
1122 ajStrExchangeCC(&tmpstr, ".", "-");
1123 icnt = ajStrGetLen(tmpstr)
1124 - (size_t) ajStrCalcCountK(tmpstr, '-')
1125 - (size_t) ajStrCalcCountK(tmpstr, ' ');
1126
1127 /* number the top sequence */
1128 if(!iseq)
1129 {
1130 jpos = ipos[iseq];
1131 ajStrAssignClear(&tmpnum);
1132 jj=7;
1133 cp = ajStrGetPtr(tmpstr);
1134
1135 while(*cp)
1136 {
1137 jj ++;
1138
1139 if((*cp != '-') && (*cp != ' '))
1140 {
1141 jpos += increment;
1142
1143 if(jpos%10 == 0)
1144 {
1145 ajFmtPrintAppS(&tmpnum,"%*d",jj,jpos);
1146 jj=0;
1147 num[iseq] = ajTrue;
1148 }
1149 }
1150 cp++;
1151 }
1152
1153 if(lastline && !num[iseq])
1154 {
1155 ajFmtPrintAppS(&tmpnum,"%*d",jj,jpos);
1156 jj=0;
1157 }
1158
1159 if(jj)
1160 ajStrAppendCountK(&tmpnum, ' ', jj);
1161
1162 ajFmtPrintF(outf, "%S\n", tmpnum);
1163 }
1164
1165 if(!iseq)
1166 ajStrAssignS(&mrkstr, tmpstr);
1167 else
1168 alignSame(&mrkstr, tmpstr, ' ');
1169
1170 if(markx==2 && nseq==2 && iseq==1)
1171 {
1172 ajFmtPrintF(outf,
1173 "%6.6S %S\n",
1174 alignSeqName(thys, seqp), mrkcons);
1175 }
1176 else
1177 {
1178 if(nseq==2 && iseq==1)
1179 ajFmtPrintF(outf,
1180 " %S\n",
1181 mrkcons);
1182
1183 if(ajStrGetLen(tmpstr))
1184 {
1185 ajFmtPrintF(outf,
1186 "%6.6S %S\n",
1187 alignSeqName(thys, seqp),
1188 tmpstr);
1189 }
1190 else
1191 ajFmtPrintF(outf,
1192 "%6.6S\n",
1193 alignSeqName(thys, seqp));
1194
1195 /* number the bottom sequence */
1196 if(iseq+1 == nseq)
1197 {
1198 jpos = ipos[iseq];
1199 ajStrAssignClear(&tmpnum);
1200 jj=7;
1201 kk=7;
1202 cp = ajStrGetPtr(tmpstr);
1203
1204 while(*cp)
1205 {
1206 jj++;
1207 if((*cp != '-') && (*cp != ' '))
1208 {
1209 kk = jj;
1210 jpos += increment;
1211
1212 if(jpos%10 == 0)
1213 {
1214 ajFmtPrintAppS(&tmpnum,"%*d",jj,jpos);
1215 jj=0;
1216 kk=0;
1217 num[iseq] = ajTrue;
1218 }
1219 }
1220
1221 cp++;
1222 }
1223
1224 if(lastline && !num[iseq])
1225 {
1226 ajFmtPrintAppS(&tmpnum,"%*d",kk,jpos);
1227 jj = jj - kk;
1228 }
1229
1230 if(jj)
1231 ajStrAppendCountK(&tmpnum, ' ', jj);
1232
1233 ajFmtPrintF(outf, "%S\n", tmpnum);
1234 }
1235
1236 if(increment > 0)
1237 ipos[iseq] += icnt;
1238 else
1239 ipos[iseq] -= icnt;
1240 }
1241 }
1242
1243 if(nseq > 2) /* 3 or more seqs, markup under */
1244 ajFmtPrintF(outf,
1245 " %S\n",
1246 mrkcons);
1247
1248 ajFmtPrintF(outf, "\n");
1249 }
1250 }
1251 else
1252 {
1253 for(iseq=0; iseq < nseq; iseq++)
1254 {
1255 seqp = data->Seq[iseq];
1256 seq = ajSeqGetSeqS(seqp);
1257 istart = data->SubOffset[iseq];
1258 iend = istart + ilen - 1;
1259 ajStrAssignSubS(&tmpstr, seq, istart, iend);
1260 ajStrAssignSubS(&mrkcons, cons,
1261 istart - data->SubOffset[iseq],
1262 iend - data->SubOffset[iseq]);
1263
1264 ajStrExchangeCC(&tmpstr, ".", "-");
1265 icnt = ajStrGetLen(tmpstr)
1266 - (size_t) ajStrCalcCountK(tmpstr, '-')
1267 - (size_t) ajStrCalcCountK(tmpstr, ' ');
1268
1269 if(!iseq)
1270 ajStrAssignS(&mrkstr, tmpstr);
1271 else
1272 alignSame(&mrkstr, tmpstr, ' ');
1273
1274 if(markx==3)
1275 {
1276 ajFmtPrintF(outf,
1277 ">%S ..\n",
1278 alignSeqName(thys, seqp));
1279 }
1280 else if(markx==10)
1281 {
1282 ajFmtPrintF(outf,">%S ..\n", alignSeqName(thys, seqp));
1283 ajFmtPrintF(outf,"; sq_len: %d\n",
1284 data->Len[iseq]);
1285
1286 if(ajSeqIsNuc(data->Seq[iseq]))
1287 ajFmtPrintF(outf,"; sq_type: D\n");
1288 else
1289 ajFmtPrintF(outf,"; sq_type: p\n");
1290
1291 ajFmtPrintF(outf,"; al_start: %d\n",
1292 data->Offset[iseq] + data->Start[iseq]);
1293 ajFmtPrintF(outf,"; al_stop: %d\n",
1294 data->Offset[iseq] + data->Start[iseq] + icnt - 1);
1295 ajFmtPrintF(outf,"; al_display_start: %d\n",
1296 data->Offset[iseq] + data->Start[iseq]);
1297 }
1298 else
1299 {
1300 }
1301
1302 for(j=0; j < ilen; j += iwidth)
1303 {
1304 jstart = j + data->SubOffset[iseq];
1305 jend = AJMIN(data->SubOffset[iseq]+ilen-1, jstart+iwidth-1);
1306 ajStrAssignSubS(&tmpstr, seq, jstart, jend);
1307
1308 if(ajStrGetLen(tmpstr))
1309 {
1310 ajFmtPrintF(outf,
1311 "%S\n",
1312 tmpstr);
1313 }
1314 }
1315
1316 ipos[iseq] += icnt;
1317 }
1318
1319 }
1320
1321 ajStrDel(&cons);
1322 ajStrDel(&mrkcons);
1323 ajStrDel(&mrkstr);
1324 ajStrDel(&tmpstr);
1325 ajStrDel(&tmphdr);
1326 ajStrDel(&tmpnum);
1327 AJFREE(ipos);
1328 AJFREE(incs);
1329 AJFREE(num);
1330
1331 return;
1332 }
1333
1334
1335
1336
1337
1338 /* @funcstatic alignWriteMatch ************************************************
1339 **
1340 ** Writes an alignment in Match format
1341 **
1342 ** @param [u] thys [AjPAlign] Alignment object
1343 ** @return [void]
1344 **
1345 ** @release 2.4.0
1346 ** @@
1347 ******************************************************************************/
1348
alignWriteMatch(AjPAlign thys)1349 static void alignWriteMatch(AjPAlign thys)
1350 {
1351 AjPFile outf;
1352 const AjPSeq seq1;
1353 const AjPSeq seq2;
1354 ajint nali;
1355 ajint iali;
1356 ajint len0;
1357 char fwd0 = '+';
1358 char fwd1 = '+';
1359 ajint endgaps = 0;
1360 ajint begingaps = 0;
1361
1362 AlignPData* pdata = NULL;
1363 AlignPData data = NULL;
1364
1365 outf = thys->File;
1366 nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
1367
1368 ajAlignWriteHeader(thys);
1369
1370 for(iali=0; iali<nali; iali++)
1371 {
1372 data = pdata[iali];
1373 len0 = data->End[0] - data->Start[0] + 1;
1374
1375 if(alignSeqRev(data, 0))
1376 fwd0 = '-';
1377
1378 if(alignSeqRev(data, 1))
1379 fwd1 = '-';
1380
1381 begingaps = alignSeqGapBegin(data,0) + alignSeqGapBegin(data,1);
1382 endgaps = alignSeqGapEnd(data,0) + alignSeqGapEnd(data,1);
1383 seq1 = alignSeq(thys, 0, iali);
1384 seq2 = alignSeq(thys, 1, iali);
1385
1386 if(thys->Global)
1387 {
1388 len0 = len0 - endgaps - begingaps;
1389 ajFmtPrintF(outf,
1390 "%6d %-15.15S %c %8d..%-8d %-15.15S %c %8d..%d\n",
1391 len0, alignSeqName(thys, seq1), fwd0,
1392 alignSeqBegin(data,0) + alignSeqGapBegin(data,1),
1393 alignSeqEnd(data,0) - endgaps -
1394 alignSeqGapBegin(data,0),
1395 alignSeqName(thys, seq2), fwd1,
1396 alignSeqBegin(data,1) + alignSeqGapBegin(data,0),
1397 alignSeqEnd(data,1) - endgaps -
1398 alignSeqGapBegin(data,1));
1399 }
1400 else
1401 {
1402 ajFmtPrintF(outf,
1403 "%6d %-15.15S %c %8d..%-8d %-15.15S %c %8d..%d\n",
1404 len0, alignSeqName(thys, seq1), fwd0,
1405 alignSeqBegin(data,0),
1406 alignSeqEnd(data,0),
1407 alignSeqName(thys, seq2), fwd1,
1408 alignSeqBegin(data,1),
1409 alignSeqEnd(data,1));
1410 }
1411 }
1412
1413 AJFREE(pdata);
1414
1415 return;
1416 }
1417
1418
1419
1420
1421 /* @funcstatic alignWriteSimple ***********************************************
1422 **
1423 ** Writes an alignment in Simple format
1424 **
1425 ** @param [u] thys [AjPAlign] Alignment object
1426 ** @return [void]
1427 **
1428 ** @release 2.1.0
1429 ** @@
1430 ******************************************************************************/
1431
alignWriteSimple(AjPAlign thys)1432 static void alignWriteSimple(AjPAlign thys)
1433 {
1434 AjPFile outf;
1435 int nseq;
1436 int nali;
1437
1438 const AjPStr seq = NULL;
1439
1440 AlignPData* pdata = NULL;
1441 AlignPData data = NULL;
1442
1443 ajint iali;
1444 ajint iseq;
1445 ajint i;
1446 ajint istart;
1447 ajint iend;
1448 ajint ilen;
1449 ajint iwidth = 50;
1450
1451 AjPStr tmpstr = NULL;
1452 AjPStr mrkstr = NULL;
1453 AjPStr mrkcons = NULL;
1454 AjPStr cons = NULL;
1455
1456 ajint identity = 0;
1457 ajint similarity = 0;
1458 ajint gaps = 0;
1459 ajint seqlen = 0;
1460 ajint icnt;
1461
1462 ajint* ipos = NULL;
1463 ajint* incs = NULL;
1464
1465 AjPStr tmphdr = NULL;
1466
1467 ajDebug("alignWriteSimple\n");
1468
1469
1470 nseq = thys->Nseqs;
1471 outf = thys->File;
1472
1473 if(thys->Width)
1474 iwidth = thys->Width;
1475
1476 nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
1477
1478 AJCNEW0(ipos, nseq);
1479 AJCNEW0(incs, nseq);
1480
1481 for(iali=0; iali<nali; iali++)
1482 {
1483 ajStrDel(&cons);
1484 ajStrDel(&tmphdr);
1485
1486 data = pdata[iali];
1487 ilen = data->LenAli;
1488
1489 alignConsStats(thys, iali, &cons,
1490 &identity, &similarity, &gaps, &seqlen);
1491
1492 ajAlignSetStats(thys, iali, seqlen, identity, similarity, gaps, NULL);
1493 ajAlignSetSubStandard(thys, iali);
1494 alignWriteHeaderNum(thys, iali);
1495
1496 /*ajDebug("# Consens: '%S'\n\n", cons);*/
1497
1498 alignSim(&cons, '|', ':', '.', ' ');
1499
1500 /*ajDebug("# Modcons: '%S'\n\n", cons);*/
1501 ajDebug("# Nali:%d nseq:%d\n", nali, nseq);
1502
1503 ajDebug("# AliData [%d] len %d \n", iali, ilen);
1504
1505 for(iseq=0; iseq < nseq; iseq++)
1506 {
1507 incs[iseq] = alignSeqIncrement(data, iseq);
1508 ipos[iseq] = alignSeqBegin(data, iseq)-incs[iseq];
1509 /* ajDebug("# Seq[%d]'%S'\n",
1510 iseq, ajSeqGetSeqS(data->Seq[iseq]));*/
1511 }
1512
1513 /*
1514 for(iseq=0; iseq < nseq; iseq++)
1515 {
1516 ajDebug("# Seq[%d] Len:%d Start:%d End:%d Rev:%B\n",
1517 iseq, data->Len[iseq],
1518 data->Start[iseq], data->End[iseq],
1519 data->Rev[iseq]);
1520 }
1521 */
1522
1523 for(i=0; i < ilen; i += iwidth)
1524 {
1525 for(iseq=0; iseq < nseq; iseq++)
1526 {
1527 seq = ajSeqGetSeqS(data->Seq[iseq]);
1528 istart = i + data->SubOffset[iseq];
1529 iend = AJMIN(data->SubOffset[iseq]+ilen-1, istart+iwidth-1);
1530 ajStrAssignSubS(&tmpstr, seq, istart, iend);
1531 ajStrAssignSubS(&mrkcons, cons,
1532 istart - data->SubOffset[iseq],
1533 iend - data->SubOffset[iseq]);
1534
1535 ajStrExchangeCC(&tmpstr, ".", "-");
1536 icnt = incs[iseq] * (ajStrGetLen(tmpstr)
1537 - (size_t)ajStrCalcCountK(tmpstr, '-')
1538 - (size_t)ajStrCalcCountK(tmpstr, ' '));
1539
1540 if(!iseq)
1541 ajStrAssignS(&mrkstr, tmpstr);
1542 else
1543 alignSame(&mrkstr, tmpstr, ' ');
1544
1545
1546 if(nseq==2 && iseq==1) /* 2 seqs, markup between them */
1547 ajFmtPrintF(outf,
1548 " %S\n",
1549 mrkcons);
1550
1551 if(ajStrGetLen(tmpstr))
1552 {
1553 ajFmtPrintF(outf,
1554 "%-13.13S %6d %S %6d\n",
1555 alignSeqName(thys, data->Seq[iseq]),
1556 ipos[iseq]+incs[iseq], tmpstr,
1557 ipos[iseq]+icnt);
1558 }
1559 else
1560 ajFmtPrintF(outf,
1561 "%-13.13S\n",
1562 alignSeqName(thys, data->Seq[iseq]));
1563
1564 ipos[iseq] += icnt;
1565 }
1566
1567 if(nseq > 2) /* 3 or more seqs, markup under */
1568 ajFmtPrintF(outf,
1569 " %S\n",
1570 mrkcons);
1571
1572 ajFmtPrintF(outf, "\n");
1573 }
1574 }
1575
1576
1577 ajStrDel(&cons);
1578 ajStrDel(&mrkcons);
1579 ajStrDel(&mrkstr);
1580 ajStrDel(&tmpstr);
1581 AJFREE(ipos);
1582 AJFREE(incs);
1583 AJFREE(pdata);
1584
1585 return;
1586 }
1587
1588
1589
1590
1591 /* @funcstatic alignWriteScore ************************************************
1592 **
1593 ** Writes an alignment in Score-only format
1594 **
1595 ** @param [u] thys [AjPAlign] Alignment object
1596 ** @return [void]
1597 **
1598 ** @release 2.1.0
1599 ** @@
1600 ******************************************************************************/
1601
alignWriteScore(AjPAlign thys)1602 static void alignWriteScore(AjPAlign thys)
1603 {
1604 AjPFile outf;
1605 ajint nali;
1606 ajint iali;
1607 AlignPData* pdata = NULL;
1608 AlignPData data = NULL;
1609 const AjPSeq seq1;
1610 const AjPSeq seq2;
1611
1612 outf = thys->File;
1613 nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
1614
1615 for(iali=0; iali<nali; iali++)
1616 {
1617 data = pdata[iali];
1618 seq1 = data->Seq[0];
1619 seq2 = data->Seq[1];
1620
1621 if(ajStrGetLen(data->Score))
1622 ajFmtPrintF(outf, "%S %S %d (%S)\n",
1623 alignSeqName(thys, seq1),
1624 alignSeqName(thys, seq2),
1625 data->LenAli, data->Score);
1626 else
1627 ajFmtPrintF(outf, "%S %S %d\n",
1628 alignSeqName(thys, seq1),
1629 alignSeqName(thys, seq2),
1630 data->LenAli);
1631 }
1632
1633 AJFREE(pdata);
1634
1635 return;
1636 }
1637
1638
1639
1640
1641 /* @funcstatic alignWriteSam **************************************************
1642 **
1643 ** Writes an alignment in sequence alignment/map (SAM) format
1644 **
1645 ** @param [u] thys [AjPAlign] Alignment object
1646 ** @return [void]
1647 **
1648 ** @release 6.2.0
1649 ** @@
1650 ******************************************************************************/
1651
alignWriteSam(AjPAlign thys)1652 static void alignWriteSam(AjPAlign thys)
1653 {
1654 AjPFile outf;
1655 ajint nali;
1656 ajint iali;
1657 ajint iseq;
1658 ajuint j;
1659 ajuint qrystart;
1660 ajuint qryend;
1661 ajuint qualstart;
1662 ajuint qualend;
1663 ajint refstart;
1664
1665 ajint nmismatches;
1666 AlignPData* pdata = NULL;
1667 AlignPData data = NULL;
1668
1669 ajint refpos = 0; /* 1-based leftmost position on the reference sequence */
1670
1671 const AjPSeq refseq;
1672 const AjPSeq qryseq;
1673 AjPStr seqacc = NULL;
1674
1675 ajuint flag = 0; /* bitwise flag, section 2.2.2 in SAM specification */
1676 ajuint mapq = 255; /* mapping quality, 255 means not available */
1677 AjPStr cigar = NULL; /* extended CIGAR string */
1678 AjPStr mrnm = NULL; /* mate reference sequence name */
1679 ajuint mpos = 0; /* leftmost mate position of the clipped sequence */
1680 ajuint isize = 0; /* inferred insert size */
1681
1682 AjPStr argstr;
1683 AjPStr mseq = NULL; /* match region of the query sequence */
1684
1685 mseq = ajStrNew();
1686 mrnm = ajStrNewC("*");/* TODO: mate sequences not yet supported */
1687
1688 outf = thys->File;
1689 nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
1690
1691 if(!thys->Count++)
1692 {
1693 ajFmtPrintF(outf, "@HD\tVN:1.3\n");
1694
1695 /* Program record */
1696 argstr = ajStrNewS(ajUtilGetCmdline());
1697 ajStrExchangeKK(&argstr, '\n', ' ');
1698 ajFmtPrintF(outf, "@PG\tID:%S\tVN:%S\tCL:%S\n",
1699 ajUtilGetProgram(), ajNamValueVersion(), argstr);
1700 ajStrDel(&argstr);
1701 }
1702
1703 for(iali=0; iali<nali; iali++)
1704 {
1705 data = pdata[iali];
1706 refseq = data->Seq[thys->RefSeq];
1707
1708 refpos = alignSeqBegin(data, thys->RefSeq) + 1
1709 - alignSeqIncrement(data, thys->RefSeq);
1710
1711 for(iseq=0; iseq < data->Nseqs; iseq++)
1712 {
1713 if(iseq==thys->RefSeq)
1714 continue;
1715
1716 qryseq = data->Seq[iseq];
1717
1718 if(ajSeqIsReversed(qryseq))
1719 flag |= 0x0010;
1720
1721 if(thys->Global ||thys->SeqExternal)
1722 {
1723 qrystart = alignSeqBegin(data,iseq) -
1724 alignSeqIncrement(data,iseq);
1725 qryend = alignSeqEnd(data,iseq);
1726 refstart = refpos-1;
1727 }
1728 else
1729 {
1730 qrystart = 0;
1731 qryend = data->LenAli;
1732 refstart = 0;
1733 }
1734
1735 qualstart = qrystart;
1736 qualend = AJMIN(qryend, qryseq->Qualsize);
1737
1738 if(qryseq->Accuracy)
1739 {
1740 ajStrAssignClear(&seqacc);
1741
1742 /* ASCII-33 gives the Phred base quality */
1743 for(j=qualstart;j< qualend;j++)
1744 ajStrAppendK(&seqacc, (char) (33 + qryseq->Accuracy[j]));
1745
1746 }
1747 else
1748 ajStrAssignC(&seqacc,"*");
1749
1750 ajStrAssignClear(&mseq);
1751 ajStrAssignClear(&cigar);
1752
1753 /* get CIGAR, match region of the query sequence and #mismatches */
1754 nmismatches = alignCigar(qryseq, refseq,
1755 qrystart,
1756 qryend,
1757 refstart,
1758 &mseq, &cigar, thys->SeqExternal);
1759
1760 ajFmtPrintF(outf, "%S\t%u\t%S\t%d\t%d\t%S\t%S\t%u\t%u\t%S\t%S\t"
1761 "AS:i:%S\tNM:i:%d\n",
1762 alignSeqName(thys, qryseq),
1763 flag,
1764 alignSeqName(thys, refseq),
1765 refpos,
1766 mapq,
1767 cigar,
1768 mrnm,
1769 mpos,
1770 isize,
1771 mseq,
1772 seqacc,
1773 data->Score,
1774 nmismatches);
1775 }
1776 }
1777
1778 AJFREE(pdata);
1779 ajStrDel(&cigar);
1780 ajStrDel(&mrnm);
1781 ajStrDel(&seqacc);
1782 ajStrDel(&mseq);
1783
1784 return;
1785 }
1786
1787
1788
1789
1790 /* #funcstatic alignWriteBam ***************************************************
1791 **
1792 ** Writes an alignment in binary sequence alignment/map (BAM) format
1793 **
1794 ** #param [u] thys [AjPAlign] Alignment object
1795 ** #return [void]
1796 ** ##
1797 ******************************************************************************/
1798 /*
1799 //static void alignWriteBam(AjPAlign thys)
1800 //{
1801 // ajint nali;
1802 // ajint iali;
1803 // AlignPData* pdata = NULL;
1804 //
1805 //
1806 // nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
1807 //
1808 // for(iali=0; iali<nali; iali++)
1809 // {
1810 // }
1811 //
1812 // AJFREE(pdata);
1813 //
1814 // return;
1815 //}
1816 */
1817
1818
1819
1820
1821 /* @funcstatic alignWriteSrs **************************************************
1822 **
1823 ** Writes an alignment in SRS format
1824 **
1825 ** @param [u] thys [AjPAlign] Alignment object
1826 ** @return [void]
1827 **
1828 ** @release 2.1.0
1829 ** @@
1830 ******************************************************************************/
1831
alignWriteSrs(AjPAlign thys)1832 static void alignWriteSrs(AjPAlign thys)
1833 {
1834 alignWriteSrsAny(thys, 0, ajFalse);
1835
1836 return;
1837 }
1838
1839
1840
1841
1842 /* @funcstatic alignWriteSrsPair **********************************************
1843 **
1844 ** Writes an alignment in SrsPair format
1845 **
1846 ** @param [u] thys [AjPAlign] Alignment object
1847 ** @return [void]
1848 **
1849 ** @release 2.1.0
1850 ** @@
1851 ******************************************************************************/
1852
alignWriteSrsPair(AjPAlign thys)1853 static void alignWriteSrsPair(AjPAlign thys)
1854 {
1855 alignWriteSrsAny(thys, 2, ajTrue);
1856
1857 return;
1858 }
1859
1860
1861
1862
1863 /* @funcstatic alignWriteSrsAny ***********************************************
1864 **
1865 ** Writes an alignment in SRS format (with switches for pairwise and general
1866 ** formatting)
1867 **
1868 ** @param [u] thys [AjPAlign] Alignment object
1869 ** @param [r] imax [ajint] Maximum number of sequences (0 for unknown)
1870 ** @param [r] mark [AjBool] Markup the alignment
1871 ** @return [void]
1872 **
1873 ** @release 2.1.0
1874 ** @@
1875 ******************************************************************************/
1876
alignWriteSrsAny(AjPAlign thys,ajint imax,AjBool mark)1877 static void alignWriteSrsAny(AjPAlign thys, ajint imax, AjBool mark)
1878 {
1879 AjPFile outf;
1880 int nseq;
1881 int nali;
1882
1883 const AjPStr seq = NULL;
1884
1885 AlignPData* pdata = NULL;
1886 AlignPData data = NULL;
1887
1888 ajint iali;
1889 ajint iseq;
1890 ajint i;
1891 ajint istart;
1892 ajint iend;
1893 ajint ilen;
1894
1895 AjPStr tmpstr = NULL;
1896 AjPStr mrkstr = NULL;
1897 AjPStr mrkcons = NULL;
1898 AjPStr cons = NULL;
1899
1900 ajint iwidth = 50;
1901 ajint identity = 0;
1902 ajint similarity = 0;
1903 ajint gaps=0;
1904 ajint seqlen=0;
1905 ajint icnt;
1906
1907 ajint* ipos = NULL;
1908 ajint* incs = NULL;
1909 AjBool pair = ajFalse;
1910
1911 AjPStr tmphdr = NULL;
1912
1913
1914 outf = thys->File;
1915 nseq = thys->Nseqs;
1916
1917 if(nseq < 1)
1918 {
1919 ajAlignWriteHeader(thys);
1920
1921 return;
1922 }
1923
1924 if(thys->Width)
1925 iwidth = thys->Width;
1926
1927 /*
1928 ** pair: if true, change consensus into |:. markup
1929 ** and put markup line between sequences
1930 */
1931
1932 if(imax == 2 && nseq == 2 && mark)
1933 pair = ajTrue;
1934
1935 nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
1936
1937 AJCNEW0(ipos, nseq);
1938 AJCNEW0(incs, nseq);
1939
1940 for(iali=0; iali<nali; iali++)
1941 {
1942 data = pdata[iali];
1943 ilen = data->LenAli;
1944
1945 ajStrDel(&cons);
1946 ajStrDel(&tmphdr);
1947 alignConsStats(thys, iali, &cons,
1948 &identity, &similarity, &gaps, &seqlen);
1949
1950 ajAlignSetStats(thys, iali, seqlen, identity, similarity, gaps,
1951 NULL);
1952 ajAlignSetSubStandard(thys, iali);
1953 alignWriteHeaderNum(thys, iali);
1954
1955 /*ajDebug("# Consens: '%S'\n\n", cons);*/
1956
1957 if(pair)
1958 alignSim(&cons, '|', ':', '.', ' ');
1959
1960 /*ajDebug("# Modcons: '%S'\n\n", cons);*/
1961 ajDebug("# Nali:%d nseq:%d\n", nali, nseq);
1962
1963 ajDebug("# AliData [%d] len %d \n", iali, ilen);
1964
1965 for(iseq=0; iseq < nseq; iseq++)
1966 {
1967 incs[iseq] = alignSeqIncrement(data, iseq);
1968 ipos[iseq] = alignSeqBegin(data, iseq)-incs[iseq];
1969 /*ajDebug("# Seq[%d] Off:%d Sta:%d End:%d '%S'\n",
1970 iseq, data->Offset[iseq], data->Start[iseq],
1971 data->End[iseq], ajSeqGetSeqS(data->Seq[iseq]));*/
1972 }
1973
1974 /*
1975 for(iseq=0; iseq < nseq; iseq++)
1976 ajDebug("# Seq[%d] Start:%d End:%d Rev:%B\n",
1977 iseq, data->Start[iseq], data->End[iseq],
1978 data->Rev[iseq]);
1979 */
1980
1981 for(i=0; i < ilen; i += iwidth)
1982 {
1983 for(iseq=0; iseq < nseq; iseq++)
1984 {
1985 seq = ajSeqGetSeqS(data->Seq[iseq]);
1986 istart = i + data->SubOffset[iseq];
1987 iend = AJMIN(data->SubOffset[iseq]+ilen-1, istart+iwidth-1);
1988 ajStrAssignSubS(&tmpstr, seq, istart, iend);
1989 ajStrAssignSubS(&mrkcons, cons,
1990 istart - data->SubOffset[iseq],
1991 iend - data->SubOffset[iseq]);
1992
1993 ajStrExchangeCC(&tmpstr, ".", "-");
1994 icnt = incs[iseq] * (ajStrGetLen(tmpstr)
1995 - (size_t)ajStrCalcCountK(tmpstr, '-')
1996 - (size_t)ajStrCalcCountK(tmpstr, ' '));
1997
1998 if(!iseq)
1999 ajStrAssignS(&mrkstr, tmpstr);
2000 else
2001 alignSame(&mrkstr, tmpstr, ' ');
2002
2003 if(pair && iseq==1) /* 2 seqs, markup between them */
2004 ajFmtPrintF(outf,
2005 " %S\n",
2006 mrkcons);
2007
2008 if(ajStrGetLen(tmpstr))
2009 {
2010 if(icnt)
2011 ajFmtPrintF(outf,
2012 "%-13.13S %6d %S %6d\n",
2013 alignSeqName(thys, data->Seq[iseq]),
2014 ipos[iseq]+incs[iseq], tmpstr,
2015 ipos[iseq]+icnt);
2016 else
2017 ajFmtPrintF(outf,
2018 "%-13.13S %6d %S %6d\n",
2019 alignSeqName(thys, data->Seq[iseq]),
2020 ipos[iseq], tmpstr,
2021 ipos[iseq]);
2022 }
2023 else
2024 ajFmtPrintF(outf,
2025 "%-13.13S\n",
2026 alignSeqName(thys, data->Seq[iseq]));
2027
2028 ipos[iseq] += icnt;
2029 }
2030
2031 if(mark && !pair) /* 3 or more seqs, markup under */
2032 ajFmtPrintF(outf,
2033 " %S\n",
2034 mrkcons);
2035
2036 ajFmtPrintF(outf, "\n");
2037 }
2038 }
2039
2040 ajStrDel(&mrkcons);
2041 ajStrDel(&tmpstr);
2042 ajStrDel(&mrkstr);
2043 ajStrDel(&cons);
2044 AJFREE(ipos);
2045 AJFREE(incs);
2046 AJFREE(pdata);
2047
2048 return;
2049 }
2050
2051
2052
2053
2054 /* @funcstatic alignWriteTCoffee **********************************************
2055 **
2056 ** Writes an alignment as a T-COFFEE library
2057 **
2058 ** @param [u] thys [AjPAlign] Alignment object
2059 ** @return [void]
2060 **
2061 ** @release 2.8.0
2062 ** @@
2063 ******************************************************************************/
2064
alignWriteTCoffee(AjPAlign thys)2065 static void alignWriteTCoffee (AjPAlign thys)
2066 {
2067 AjPFile outf = thys->File;
2068 int nseq;
2069 int nali;
2070 AlignPData* pdata = NULL;
2071 AlignPData data = NULL;
2072 ajint iali;
2073 ajint iseq;
2074 ajint i;
2075 ajint s1,s2;
2076 ajint n1,n2;
2077 ajint ilen;
2078 AjPStr sseq= NULL;
2079 const AjPStr sseq1= NULL;
2080 const AjPStr sseq2= NULL;
2081 ajint pidentity=0;
2082
2083 ajDebug("alignWriteTCoffee\n");
2084
2085 nseq = thys->Nseqs;
2086 nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
2087 thys->SeqOnly = ajTrue; /* suppress output of tail */
2088
2089 /* print header */
2090 ajFmtPrintF(outf, "%d\n", nseq); /* number of sequences */
2091
2092 for(iseq=0; iseq < nseq; iseq++) {
2093 ajStrAssignS(&sseq,ajSeqGetSeqS(pdata[0]->Seq[iseq]));
2094 ajStrRemoveWhite(&sseq);
2095 ajStrExchangeCC(&sseq, "-","");
2096 /* <seqname> <seqlen> <sequence> */
2097 ajFmtPrintF(outf, "%S %d %S\n",
2098 alignSeqName(thys, pdata[0]->Seq[iseq]),
2099 ajStrGetLen(sseq),
2100 sseq);
2101 ajStrDel(&sseq);
2102 }
2103
2104
2105 for(iali = 0; iali<nali; iali++)
2106 {
2107 data = pdata[iali];
2108 ilen = data->LenAli;
2109 pidentity = (int)(0.5 + 100.0 *
2110 (float) data->NumId / (float) data->LenAli);
2111 ajFmtPrintF(outf, "! score=%S\n",data->Score);
2112 ajFmtPrintF(outf, "! matrix=%S\n",thys->Matrix);
2113 ajFmtPrintF(outf, "! gapopen=%S gapext=%S\n",
2114 thys->GapPen,thys->ExtPen);
2115
2116 /* go through all pairwise alignments */
2117 for(s1=0; s1<nseq-1; s1++)
2118 {
2119 sseq1 = ajSeqGetSeqS(pdata[0]->Seq[s1]);
2120
2121 for(s2=s1+1; s2<nseq; s2++)
2122 {
2123 ajFmtPrintF(outf, "#%d %d\n", s1+1, s2+1);
2124 n1 = 1; n2 = 1;
2125 sseq2 = ajSeqGetSeqS(pdata[0]->Seq[s2]);
2126
2127 for(i=0; i<ilen; i++)
2128 {
2129 /* check if position is ungapped */
2130 if(ajStrGetCharPos(sseq1,i)>='A' && ajStrGetCharPos(sseq2,i)>='A')
2131 {
2132 /* output aligned pair,
2133 sequence weight,
2134 residue weight,
2135 match weight
2136 (format as guessed from the T-coffee docs) */
2137 ajFmtPrintF(outf, "%d %d %d %d %d\n",
2138 n1, n2, pidentity, 1, 0);
2139 }
2140
2141 if(ajStrGetCharPos(sseq1,i)>='A')
2142 n1++;
2143
2144 if(ajStrGetCharPos(sseq2,i)>='A')
2145 n2++;
2146 }
2147 }
2148 }
2149 }
2150
2151 /* write footer */
2152 ajFmtPrintF(outf, "! SEQ_1_TO_N\n");
2153
2154 AJFREE(pdata);
2155
2156 return;
2157 }
2158
2159
2160
2161
2162 /* @func ajAlignDefine ********************************************************
2163 **
2164 ** Defines a sequence set as an alignment. The sequences are stored internally
2165 ** and may be edited by alignment processing.
2166 **
2167 ** @param [u] thys [AjPAlign] Alignment object
2168 ** @param [u] seqset [AjPSeqset] Sequence set object
2169 ** @return [AjBool] ajTrue on success
2170 **
2171 ** @release 2.1.0
2172 ** @@
2173 ******************************************************************************/
2174
ajAlignDefine(AjPAlign thys,AjPSeqset seqset)2175 AjBool ajAlignDefine(AjPAlign thys, AjPSeqset seqset)
2176 {
2177 AlignPData data = NULL;
2178 ajint i;
2179 const AjPSeq seq = NULL;
2180
2181 if(!thys->Nseqs)
2182 thys->Nseqs = ajSeqsetGetSize(seqset);
2183
2184
2185 data = alignDataNew(thys->Nseqs, thys->SeqExternal);
2186
2187
2188 for(i=0; i < thys->Nseqs; i++)
2189 {
2190 seq = ajSeqsetGetseqSeq(seqset, i);
2191 alignDataSetSequence(data, seq, i, thys->SeqExternal);
2192 }
2193
2194 data->LenAli = ajSeqsetGetLen(seqset);
2195
2196 ajListPushAppend(thys->Data, data);
2197
2198 return ajTrue;
2199 }
2200
2201
2202
2203
2204 /* @func ajAlignDefineSS ******************************************************
2205 **
2206 ** Defines a sequence pair as an alignment. Either new copies of the
2207 ** sequences or direct references to them are made depending on the value
2208 ** of the SeqExternal attribute of the alignment object.
2209 **
2210 ** @param [u] thys [AjPAlign] Alignment object
2211 ** @param [r] seqa [const AjPSeq] Sequence object
2212 ** @param [r] seqb [const AjPSeq] Second sequence object
2213 ** @return [AjBool] ajTrue on success
2214 **
2215 ** @release 2.1.0
2216 ** @@
2217 ******************************************************************************/
2218
ajAlignDefineSS(AjPAlign thys,const AjPSeq seqa,const AjPSeq seqb)2219 AjBool ajAlignDefineSS(AjPAlign thys, const AjPSeq seqa, const AjPSeq seqb)
2220 {
2221 AlignPData data = NULL;
2222
2223 if(!thys->Nseqs)
2224 thys->Nseqs = 2;
2225
2226 if(thys->Nseqs != 2)
2227 {
2228 ajErr("ajAlignDefineSS called with %d sequences in alignment",
2229 thys->Nseqs);
2230 }
2231
2232 data = alignDataNew(2, thys->SeqExternal);
2233
2234 ajDebug("ajAlignDefineSS '%S' '%S'\n",
2235 ajSeqGetNameS(seqa), ajSeqGetNameS(seqb));
2236
2237 alignDataSetSequence(data, seqa, 0, thys->SeqExternal);
2238
2239 if(!thys->SeqExternal && !ajSeqIsTrimmed(data->RealSeq[0]))
2240 {
2241 ajSeqTrim(data->RealSeq[0]);
2242 }
2243
2244 alignDataSetSequence(data, seqb, 1, thys->SeqExternal);
2245
2246 if(!thys->SeqExternal && !ajSeqIsTrimmed(data->RealSeq[1]))
2247 {
2248 ajSeqTrim(data->RealSeq[1]);
2249 }
2250
2251 data->LenAli = AJMIN(ajSeqGetLen(seqa), ajSeqGetLen(seqb));
2252
2253 ajListPushAppend(thys->Data, data);
2254
2255 return ajTrue;
2256 }
2257
2258
2259
2260
2261 /* @func ajAlignDefineCC ******************************************************
2262 **
2263 ** Defines a pair of char* strings as an alignment
2264 **
2265 ** @param [u] thys [AjPAlign] Alignment object
2266 ** @param [r] seqa [const char*] First sequence
2267 ** @param [r] seqb [const char*] Second sequence
2268 ** @param [r] namea [const char*] Name of first sequence
2269 ** @param [r] nameb [const char*] Name of second sequence
2270 ** @return [AjBool] ajTrue on success
2271 **
2272 ** @release 2.9.0
2273 ** @@
2274 ******************************************************************************/
2275
ajAlignDefineCC(AjPAlign thys,const char * seqa,const char * seqb,const char * namea,const char * nameb)2276 AjBool ajAlignDefineCC(AjPAlign thys, const char* seqa, const char* seqb,
2277 const char* namea, const char* nameb)
2278 {
2279 AlignPData data = NULL;
2280
2281 AjPStr tmpstr = NULL;
2282
2283 if(!thys->Nseqs)
2284 thys->Nseqs = 2;
2285
2286 data = alignDataNew(2, AJFALSE);
2287
2288 ajStrAssignC(&tmpstr, seqa);
2289 data->Start[0] = 1;
2290 data->End[0] = ajStrGetLen(tmpstr);
2291 data->Len[0] = ajStrGetLen(tmpstr) - ajSeqstrCountGaps(tmpstr);
2292 data->Offset[0] = 0;
2293 data->Offend[0] = 0;
2294 data->SubOffset[0] = 0;
2295 data->Rev[0] = ajFalse;
2296
2297 /* no external option - we do need to create the AjPSeqs */
2298
2299 data->RealSeq[0] = ajSeqNewNameC(seqa, namea);
2300 ajSeqGapStandard(data->RealSeq[0], '-');
2301 data->Seq[0] = data->RealSeq[0];
2302
2303 ajStrAssignC(&tmpstr, seqb);
2304 data->Start[1] = 1;
2305 data->End[1] = ajStrGetLen(tmpstr);
2306 data->Len[1] = ajStrGetLen(tmpstr) - ajSeqstrCountGaps(tmpstr);
2307 data->Offset[1] = 0;
2308 data->Offend[1] = 0;
2309 data->SubOffset[1] = 0;
2310 data->Rev[1] = ajFalse;
2311
2312 data->RealSeq[1] = ajSeqNewNameC(seqb, nameb);
2313 ajSeqGapStandard(data->RealSeq[1], '-');
2314 data->Seq[1] = data->RealSeq[1];
2315
2316 data->LenAli = AJMIN(strlen(seqa), strlen(seqb));
2317
2318 ajDebug("ajAlignDefineCC %s %d %s %d\n",
2319 namea, ajSeqGetLen(data->Seq[0]),
2320 nameb, ajSeqGetLen(data->Seq[1]));
2321 ajListPushAppend(thys->Data, data);
2322
2323 ajStrDel(&tmpstr);
2324
2325 return ajTrue;
2326 }
2327
2328
2329
2330
2331 /* @func ajAlignDel ***********************************************************
2332 **
2333 ** Destructor for Alignment objects
2334 **
2335 ** @param [d] pthys [AjPAlign*] Alignment object reference
2336 ** @return [void]
2337 ** @category delete [AjPAlign] Default destructor
2338 **
2339 ** @release 2.1.0
2340 ** @@
2341 ******************************************************************************/
2342
ajAlignDel(AjPAlign * pthys)2343 void ajAlignDel(AjPAlign* pthys)
2344 {
2345 AlignPData data = NULL;
2346 AjPAlign thys;
2347
2348 thys = *pthys;
2349
2350 if(!thys)
2351 return;
2352
2353 ajDebug("ajAlignDel %d seqs\n", thys->Nseqs);
2354 ajStrDel(&thys->Formatstr);
2355 ajStrDel(&thys->Type);
2356 ajStrDel(&thys->Header);
2357 ajStrDel(&thys->SubHeader);
2358 ajStrDel(&thys->Tail);
2359 ajStrDel(&thys->SubTail);
2360 ajStrDel(&thys->Matrix);
2361 ajStrDel(&thys->GapPen);
2362 ajStrDel(&thys->ExtPen);
2363
2364 ajMatrixDel(&thys->IMatrix);
2365 ajMatrixfDel(&thys->FMatrix);
2366
2367 while(ajListPop(thys->Data, (void**) &data))
2368 alignDataDel(&data, thys->SeqExternal);
2369
2370 ajListFree(&thys->Data);
2371
2372 AJFREE(*pthys);
2373
2374 return;
2375 }
2376
2377
2378
2379
2380 /* @func ajAlignReset *********************************************************
2381 **
2382 ** Reset for Alignment objects
2383 **
2384 ** @param [w] thys [AjPAlign] Alignment object reference
2385 ** @return [void]
2386 ** @category modify [AjPAlign] Resets ready for reuse.
2387 **
2388 ** @release 2.1.0
2389 ** @@
2390 ******************************************************************************/
2391
ajAlignReset(AjPAlign thys)2392 void ajAlignReset(AjPAlign thys)
2393 {
2394 AlignPData data = NULL;
2395
2396 while(ajListPop(thys->Data, (void**) &data))
2397 alignDataDel(&data, thys->SeqExternal);
2398
2399 ajListFree(&thys->Data);
2400 thys->Data = ajListNew();
2401
2402 thys->Nseqs = 0;
2403
2404 return;
2405 }
2406
2407
2408
2409
2410 /* @func ajAlignOpen **********************************************************
2411 **
2412 ** Opens a new align file
2413 **
2414 ** @param [u] thys [AjPAlign] Alignment object
2415 ** @param [r] name [const AjPStr] File name
2416 ** @return [AjBool] ajTrue on success
2417 **
2418 ** @release 2.1.0
2419 ** @@
2420 ******************************************************************************/
2421
ajAlignOpen(AjPAlign thys,const AjPStr name)2422 AjBool ajAlignOpen(AjPAlign thys, const AjPStr name)
2423 {
2424 if(!ajAlignValid(thys))
2425 return ajFalse;
2426
2427 thys->File = ajFileNewOutNameS(name);
2428
2429 if(thys->File)
2430 return ajTrue;
2431
2432 return ajFalse;
2433 }
2434
2435
2436
2437
2438 /* @func ajAlignFormatDefault *************************************************
2439 **
2440 ** Sets the default format for an alignment
2441 **
2442 ** @param [w] pformat [AjPStr*] Default format returned
2443 ** @return [AjBool] ajTrue if format was returned
2444 **
2445 ** @release 2.1.0
2446 ** @@
2447 ******************************************************************************/
2448
ajAlignFormatDefault(AjPStr * pformat)2449 AjBool ajAlignFormatDefault(AjPStr* pformat)
2450 {
2451 if(ajStrGetLen(*pformat))
2452 ajDebug("... output format '%S'\n", *pformat);
2453 else
2454 {
2455 /* ajStrAssignEmptyC(pformat, alignFormat[0].Name);*/
2456 ajStrAssignEmptyC(pformat, "gff"); /* use the real name */
2457 ajDebug("... output format not set, default to '%S'\n", *pformat);
2458 }
2459
2460 return ajTrue;
2461 }
2462
2463
2464
2465
2466 /* @func ajAlignGetLen ********************************************************
2467 **
2468 ** Returns the filename for an alignment. If the alignment has more than one
2469 ** sub-alignment, returns the total.
2470 **
2471 ** @param [r] thys [const AjPAlign] Alignment object.
2472 ** @return [ajint] Alignment length.
2473 **
2474 ** @release 4.1.0
2475 ** @@
2476 ******************************************************************************/
2477
ajAlignGetLen(const AjPAlign thys)2478 ajint ajAlignGetLen(const AjPAlign thys)
2479 {
2480 ajint ret = 0;
2481 ajint i;
2482 ajint nali;
2483
2484 AlignPData* pdata = NULL;
2485 AlignPData data = NULL;
2486
2487 if(!thys)
2488 return 0;
2489
2490 if(!thys->Data)
2491 return 0;
2492
2493 nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
2494
2495 for(i=0; i<nali; i++)
2496 {
2497 data = pdata[i];
2498 ret += data->LenAli;
2499 }
2500
2501 AJFREE(pdata);
2502
2503 return ret;
2504 }
2505
2506
2507
2508
2509 /* @func ajAlignGetFilename ***************************************************
2510 **
2511 ** Returns the filename for an alignment.
2512 **
2513 ** @param [r] thys [const AjPAlign] Alignment object.
2514 ** @return [const char*] Filename.
2515 **
2516 ** @release 4.0.0
2517 ** @@
2518 ******************************************************************************/
2519
ajAlignGetFilename(const AjPAlign thys)2520 const char * ajAlignGetFilename(const AjPAlign thys)
2521 {
2522 if(!thys)
2523 return NULL;
2524
2525 if(!thys->File)
2526 return NULL;
2527
2528 return ajFileGetPrintnameC(thys->File);
2529 }
2530
2531
2532
2533
2534 /* @func ajAlignGetFormat *****************************************************
2535 **
2536 ** Returns the sequence format for an alignment.
2537 **
2538 ** @param [r] thys [const AjPAlign] Alignment object.
2539 ** @return [const AjPStr] Alignment format
2540 **
2541 ** @release 4.0.0
2542 ** @@
2543 ******************************************************************************/
2544
ajAlignGetFormat(const AjPAlign thys)2545 const AjPStr ajAlignGetFormat(const AjPAlign thys)
2546 {
2547 if(!thys)
2548 return NULL;
2549
2550 return thys->Formatstr;
2551 }
2552
2553
2554
2555
2556 /* @func ajAlignFormatShowsSequences ******************************************
2557 **
2558 ** Returns whether current alignment format includes sequences in output
2559 **
2560 ** @param [r] thys [const AjPAlign] Alignment object
2561 ** @return [AjBool] true if sequences appear in output
2562 **
2563 ** @release 6.2.0
2564 ** @@
2565 ******************************************************************************/
2566
ajAlignFormatShowsSequences(const AjPAlign thys)2567 AjBool ajAlignFormatShowsSequences(const AjPAlign thys)
2568 {
2569 if(!thys)
2570 return AJTRUE;
2571
2572 return alignFormat[thys->Format].Showseqs;
2573 }
2574
2575
2576
2577
2578 /* @func ajAlignFindFormat ****************************************************
2579 **
2580 ** Looks for the specified align format in the internal definitions and
2581 ** returns the index.
2582 **
2583 ** @param [r] format [const AjPStr] Format required.
2584 ** @param [w] iformat [ajint*] Index
2585 ** @return [AjBool] ajTrue on success.
2586 **
2587 ** @release 2.1.0
2588 ** @@
2589 ******************************************************************************/
2590
ajAlignFindFormat(const AjPStr format,ajint * iformat)2591 AjBool ajAlignFindFormat(const AjPStr format, ajint* iformat)
2592 {
2593 AjPStr tmpformat = NULL;
2594 ajint i = 0;
2595
2596 ajDebug("ajAlignFindFormat '%S'\n", format);
2597
2598 if(!ajStrGetLen(format))
2599 return ajFalse;
2600
2601 ajStrAssignS(&tmpformat, format);
2602 ajStrFmtLower(&tmpformat);
2603
2604 while(alignFormat[i].Name)
2605 {
2606 if(ajStrMatchCaseC(tmpformat, alignFormat[i].Name))
2607 {
2608 *iformat = i;
2609 ajStrDel(&tmpformat);
2610 ajDebug("... found at %d\n", i);
2611
2612 return ajTrue;
2613 }
2614 i++;
2615 }
2616
2617 ajDebug("... not found\n");
2618 ajStrDel(&tmpformat);
2619 return ajFalse;
2620 }
2621
2622
2623
2624
2625 /* @func ajAlignValid *********************************************************
2626 **
2627 ** Test for an alignment object.
2628 **
2629 ** Checks the format works with the number of sequences.
2630 ** Checks the format works with the type (protein or nucleotide).
2631 **
2632 ** @param [u] thys [AjPAlign] Alignment object
2633 ** @return [AjBool] ajTrue on success
2634 **
2635 ** @release 2.1.0
2636 ** @@
2637 ******************************************************************************/
2638
ajAlignValid(AjPAlign thys)2639 AjBool ajAlignValid(AjPAlign thys)
2640 {
2641 ajDebug("ajAlignValid format '%S' %d Nmin %d Nmax %d\n",
2642 thys->Formatstr, thys->Format, thys->Nmin, thys->Nmax);
2643
2644 if(!thys->Format) /* test acdc-alignbadformat */
2645 {
2646 if(ajStrGetLen(thys->Formatstr))
2647 {
2648 if(!ajAlignFindFormat(thys->Formatstr, &thys->Format))
2649 {
2650 ajErr("Unknown alignment format '%S'", thys->Formatstr);
2651 return ajFalse;
2652 }
2653 }
2654 else
2655 {
2656 ajStrAssignC(&thys->Formatstr,alignDefformat);
2657 ajAlignFindFormat(thys->Formatstr, &thys->Format);
2658 }
2659 }
2660
2661 if( alignFormat[thys->Format].Minseq &&
2662 thys->Nmin < alignFormat[thys->Format].Minseq)
2663 {
2664 ajErr("Alignment format %s specifies at least %d sequences, "
2665 "alignment has only %d",
2666 alignFormat[thys->Format].Name,
2667 alignFormat[thys->Format].Minseq, thys->Nmin);
2668 return ajFalse;
2669 }
2670
2671 if( alignFormat[thys->Format].Maxseq &&
2672 thys->Nmax > alignFormat[thys->Format].Maxseq)
2673 {
2674 ajErr("Alignment format %s specifies at most %d sequences, "
2675 "alignment has %d",
2676 alignFormat[thys->Format].Name,
2677 alignFormat[thys->Format].Maxseq, thys->Nmax);
2678 return ajFalse;
2679 }
2680
2681 if(thys->Width < 10)
2682 {
2683 ajWarn("Alignment width (-awidth=%d) too narrow, reset to 10",
2684 thys->Width);
2685 thys->Width=10;
2686 }
2687
2688 return ajTrue;
2689 }
2690
2691
2692
2693
2694 /* @func ajAlignNew ***********************************************************
2695 **
2696 ** Constructor for an alignment object
2697 **
2698 ** @return [AjPAlign] New Alignment object
2699 ** @category new [AjPAlign] Default constructor
2700 **
2701 ** @release 2.1.0
2702 ** @@
2703 ******************************************************************************/
2704
ajAlignNew(void)2705 AjPAlign ajAlignNew(void)
2706 {
2707 AjPAlign pthis;
2708
2709 AJNEW0(pthis);
2710
2711 pthis->Count = 0;
2712 pthis->Formatstr = ajStrNew();
2713 pthis->Format = 0;
2714 pthis->File = NULL;
2715 pthis->Data = ajListNew();
2716 pthis->RefSeq = 0;
2717
2718 return pthis;
2719 }
2720
2721
2722
2723
2724 /* @func ajAlignWrite *********************************************************
2725 **
2726 ** Writes an alignment file
2727 **
2728 ** @param [u] thys [AjPAlign] Alignment object
2729 ** @return [void]
2730 ** @category output [AjPAlign] Master alignment output routine
2731 **
2732 ** @release 2.1.0
2733 ** @@
2734 ******************************************************************************/
2735
ajAlignWrite(AjPAlign thys)2736 void ajAlignWrite(AjPAlign thys)
2737 {
2738 ajDebug("ajAlignWrite\n");
2739
2740 ajAlignTraceT(thys, "ajAlignWrite start");
2741
2742 if(!thys->Format)
2743 if(!ajAlignFindFormat(thys->Formatstr, &thys->Format))
2744 ajErr("unknown align format '%S'", thys->Formatstr);
2745
2746 ajDebug("ajAlignWrite %d '%s'\n",
2747 thys->Format, alignFormat[thys->Format].Name);
2748
2749 ajAlignSetType(thys);
2750
2751 /* Calling funclist alignFormat() */
2752
2753 (*alignFormat[thys->Format].Write)(thys);
2754
2755 return;
2756 }
2757
2758
2759
2760
2761 /* @func ajAlignClose *********************************************************
2762 **
2763 ** Closes an alignment
2764 **
2765 ** @param [u] thys [AjPAlign] Alignment object
2766 ** @return [void]
2767 **
2768 ** @release 2.1.0
2769 ** @@
2770 ******************************************************************************/
2771
ajAlignClose(AjPAlign thys)2772 void ajAlignClose(AjPAlign thys)
2773 {
2774 ajDebug("ajAlignClose '%F'\n", thys->File);
2775
2776 if(!thys->SeqOnly)
2777 ajAlignWriteTail(thys);
2778
2779 ajFileClose(&thys->File);
2780
2781 return;
2782 }
2783
2784
2785
2786
2787 /* @func ajAlignWriteHeader ***************************************************
2788 **
2789 ** Writes an alignment header.
2790 **
2791 ** @param [u] thys [AjPAlign] Alignment object. Internal count is updated
2792 ** to avoid duplicate headers
2793 ** @return [void]
2794 ** @category output [AjPAlign] Master header output routine
2795 **
2796 ** @release 2.1.0
2797 ** @@
2798 ******************************************************************************/
2799
ajAlignWriteHeader(AjPAlign thys)2800 void ajAlignWriteHeader(AjPAlign thys)
2801 {
2802 alignWriteHeaderNum(thys, 0);
2803
2804 return;
2805 }
2806
2807
2808
2809
2810 /* @funcstatic alignWriteHeaderNum ********************************************
2811 **
2812 ** Writes an alignment header.
2813 **
2814 ** @param [u] thys [AjPAlign] Alignment object. Internal count is updated
2815 ** to avoid duplicate headers
2816 ** @param [r] iali [ajint] Alignment number
2817 ** @return [void]
2818 ** @category output [AjPAlign] Master header output routine
2819 **
2820 ** @release 6.0.0
2821 ** @@
2822 ******************************************************************************/
2823
alignWriteHeaderNum(AjPAlign thys,ajint iali)2824 static void alignWriteHeaderNum(AjPAlign thys, ajint iali)
2825 {
2826 AjPFile outf;
2827 AjPStr tmpstr = NULL;
2828 AjBool doSingle;
2829 ajint i;
2830 const AjPSeq seq;
2831
2832 if(!alignFormat[thys->Format].Showheader)
2833 return;
2834
2835 outf = thys->File;
2836 doSingle = ajFalse; /* turned off for now - always multi format */
2837
2838 if(!thys->Count)
2839 {
2840 ajFmtPrintF(outf, "########################################\n");
2841 ajFmtPrintF(outf, "# Program: %S\n", ajUtilGetProgram());
2842 ajFmtPrintF(outf, "# Rundate: %D\n", ajTimeRefTodayFmt("report"));
2843 ajFmtPrintF(outf, "# Commandline: %S\n", ajUtilGetProgram());
2844 ajStrAssignS(&tmpstr, ajUtilGetCmdline());
2845
2846 if(ajStrGetLen(tmpstr))
2847 {
2848 ajStrExchangeCC(&tmpstr, "\n", "\1# ");
2849 ajStrExchangeCC(&tmpstr, "\1", "\n");
2850 ajFmtPrintF(outf, "# %S\n", tmpstr);
2851 }
2852
2853 ajStrAssignS(&tmpstr, ajUtilGetInputs());
2854
2855 if(ajStrGetLen(tmpstr))
2856 {
2857 ajStrExchangeCC(&tmpstr, "\n", "\1# ");
2858 ajStrExchangeCC(&tmpstr, "\1", "\n");
2859 ajFmtPrintF(outf, "# %S\n", tmpstr);
2860 }
2861
2862 ajFmtPrintF(outf, "# Align_format: %S\n", thys->Formatstr);
2863 ajFmtPrintF(outf, "# Report_file: %F\n", outf);
2864
2865 if(!doSingle || thys->Multi)
2866 ajFmtPrintF(outf, "########################################\n");
2867 else
2868 ajFmtPrintF(outf, "#\n");
2869 }
2870
2871 if(!doSingle || thys->Multi)
2872 ajFmtPrintF(outf, "\n#=======================================\n#\n");
2873
2874 ajFmtPrintF(outf, "# Aligned_sequences: %d\n", thys->Nseqs);
2875
2876 for(i=0; i < thys->Nseqs; i++)
2877 {
2878 ajStrAssignClear(&tmpstr);
2879 seq = alignSeq(thys,i, iali);
2880
2881 if(thys->Showacc)
2882 ajFmtPrintAppS(&tmpstr, " (%S)",
2883 ajSeqGetAccS(seq));
2884 if(thys->Showdes)
2885 ajFmtPrintAppS(&tmpstr, " %S",
2886 ajSeqGetDescS(seq));
2887 ajFmtPrintF(outf, "# %d: %S%S\n",
2888 i+1, alignSeqName(thys, seq), tmpstr);
2889 }
2890
2891 if(ajStrGetLen(thys->Matrix))
2892 ajFmtPrintF(outf, "# Matrix: %S\n", thys->Matrix);
2893
2894 if(ajStrGetLen(thys->GapPen))
2895 ajFmtPrintF(outf, "# Gap_penalty: %S\n", thys->GapPen);
2896
2897 if(ajStrGetLen(thys->ExtPen))
2898 ajFmtPrintF(outf, "# Extend_penalty: %S\n", thys->ExtPen);
2899
2900 if(ajStrGetLen(thys->Header))
2901 {
2902 ajStrAssignS(&tmpstr, thys->Header);
2903 ajStrExchangeCC(&tmpstr, "\n", "\1# ");
2904 ajStrExchangeCC(&tmpstr, "\1", "\n");
2905 ajFmtPrintF(outf, "#\n");
2906 ajFmtPrintF(outf, "# %S\n", tmpstr);
2907 ajFmtPrintF(outf, "#\n");
2908 }
2909
2910 if(ajStrGetLen(thys->SubHeader))
2911 {
2912 ajStrAssignS(&tmpstr, thys->SubHeader);
2913 ajStrExchangeCC(&tmpstr, "\n", "\1# ");
2914 ajStrExchangeCC(&tmpstr, "\1", "\n");
2915 ajFmtPrintF(outf, "#\n");
2916 ajFmtPrintF(outf, "# %S\n", tmpstr);
2917 ajFmtPrintF(outf, "#\n");
2918 ajStrDel(&thys->SubHeader);
2919 }
2920
2921 if(!doSingle || thys->Multi)
2922 ajFmtPrintF(outf, "#=======================================\n\n");
2923 else
2924 ajFmtPrintF(outf, "########################################\n\n");
2925
2926 ++thys->Count;
2927
2928 ajStrDel(&tmpstr);
2929
2930 return;
2931 }
2932
2933
2934
2935
2936 /* @func ajAlignWriteTail *****************************************************
2937 **
2938 ** Writes an alignment tail
2939 **
2940 ** @param [u] thys [AjPAlign] Alignment object
2941 ** @return [void]
2942 ** @category output [AjPAlign] Master footer output routine
2943 **
2944 ** @release 2.1.0
2945 ** @@
2946 ******************************************************************************/
2947
ajAlignWriteTail(AjPAlign thys)2948 void ajAlignWriteTail(AjPAlign thys)
2949 {
2950 AjPFile outf;
2951 AjPStr tmpstr = NULL;
2952 AjBool doSingle;
2953
2954 if(!alignFormat[thys->Format].Showheader)
2955 return;
2956
2957 outf = thys->File;
2958 doSingle = ajFalse; /* turned off for now - always multi format */
2959
2960 ajFmtPrintF(outf, "\n");
2961
2962 if(!doSingle || thys->Multi)
2963 ajFmtPrintF(outf, "#---------------------------------------\n");
2964 else
2965 ajFmtPrintF(outf, "\n########################################\n");
2966
2967 if(ajStrGetLen(thys->SubTail))
2968 {
2969 ajStrAssignS(&tmpstr, thys->SubTail);
2970 ajStrExchangeCC(&tmpstr, "\n", "\1# ");
2971 ajStrExchangeCC(&tmpstr, "\1", "\n");
2972 ajFmtPrintF(outf, "#\n");
2973 ajFmtPrintF(outf, "# %S\n", tmpstr);
2974 ajFmtPrintF(outf, "#\n");
2975 ajStrDel(&thys->SubTail);
2976 }
2977
2978 if(ajStrGetLen(thys->Tail))
2979 {
2980 ajStrAssignS(&tmpstr, thys->Tail);
2981 ajStrExchangeCC(&tmpstr, "\n", "\1# ");
2982 ajStrExchangeCC(&tmpstr, "\1", "\n");
2983 ajFmtPrintF(outf, "#\n");
2984 ajFmtPrintF(outf, "# %S\n", tmpstr);
2985 ajFmtPrintF(outf, "#\n");
2986 }
2987
2988 if(!doSingle || thys->Multi)
2989 ajFmtPrintF(outf, "#---------------------------------------\n");
2990 else
2991 ajFmtPrintF(outf, "########################################\n");
2992
2993 ajStrDel(&tmpstr);
2994
2995 return;
2996 }
2997
2998
2999
3000
3001 /* @func ajAlignSetHeader *****************************************************
3002 **
3003 ** Defines an alignment header
3004 **
3005 ** @param [u] thys [AjPAlign] Alignment object
3006 ** @param [r] header [const AjPStr] Align header with embedded newlines
3007 ** @return [void]
3008 **
3009 ** @release 2.1.0
3010 ** @@
3011 ******************************************************************************/
3012
ajAlignSetHeader(AjPAlign thys,const AjPStr header)3013 void ajAlignSetHeader(AjPAlign thys, const AjPStr header)
3014 {
3015 ajStrAssignS(&thys->Header, header);
3016
3017 ajDebug("ajAlignSetHeader len %d '%S'\n",
3018 ajStrGetLen(thys->Header), header);
3019
3020 return;
3021 }
3022
3023
3024
3025
3026 /* @func ajAlignSetHeaderC ****************************************************
3027 **
3028 ** Defines an alignment header
3029 **
3030 ** @param [u] thys [AjPAlign] Alignment object
3031 ** @param [r] header [const char*] Align header with embedded newlines
3032 ** @return [void]
3033 **
3034 ** @release 2.1.0
3035 ** @@
3036 ******************************************************************************/
3037
ajAlignSetHeaderC(AjPAlign thys,const char * header)3038 void ajAlignSetHeaderC(AjPAlign thys, const char* header)
3039 {
3040 ajStrAssignC(&thys->Header, header);
3041
3042 ajDebug("ajAlignSetHeaderC len %d '%S'\n",
3043 ajStrGetLen(thys->Header), header);
3044
3045 return;
3046 }
3047
3048
3049
3050
3051 /* @func ajAlignSetHeaderApp **************************************************
3052 **
3053 ** Appends to an alignment header
3054 **
3055 ** @param [u] thys [AjPAlign] Alignment object
3056 ** @param [r] header [const AjPStr] Align header with embedded newlines
3057 ** @return [void]
3058 **
3059 ** @release 2.1.0
3060 ** @@
3061 ******************************************************************************/
3062
ajAlignSetHeaderApp(AjPAlign thys,const AjPStr header)3063 void ajAlignSetHeaderApp(AjPAlign thys, const AjPStr header)
3064 {
3065 if(ajStrGetLen(thys->Header) && ajStrGetCharLast(thys->Header) != '\n')
3066 ajStrAppendC(&thys->Header, "\n");
3067
3068 ajStrAppendS(&thys->Header, header);
3069
3070 ajDebug("ajAlignSetHeaderApp len %d '%S'\n",
3071 ajStrGetLen(thys->Header), header);
3072
3073 return;
3074 }
3075
3076
3077
3078
3079 /* @func ajAlignSetTail *******************************************************
3080 **
3081 ** Defines an alignment tail
3082 **
3083 ** @param [u] thys [AjPAlign] Alignment object
3084 ** @param [r] tail [const AjPStr] Align tail with embedded newlines
3085 ** @return [void]
3086 **
3087 ** @release 2.1.0
3088 ** @@
3089 ******************************************************************************/
3090
ajAlignSetTail(AjPAlign thys,const AjPStr tail)3091 void ajAlignSetTail(AjPAlign thys, const AjPStr tail)
3092 {
3093 ajStrAssignS(&thys->Tail, tail);
3094
3095 ajDebug("ajAlignSetTail len %d '%S'\n",
3096 ajStrGetLen(thys->Tail), tail);
3097
3098 return;
3099 }
3100
3101
3102
3103
3104 /* @func ajAlignSetTailC ******************************************************
3105 **
3106 ** Defines an alignment tail
3107 **
3108 ** @param [u] thys [AjPAlign] Alignment object
3109 ** @param [r] tail [const char*] Align tail with embedded newlines
3110 ** @return [void]
3111 **
3112 ** @release 2.1.0
3113 ** @@
3114 ******************************************************************************/
3115
ajAlignSetTailC(AjPAlign thys,const char * tail)3116 void ajAlignSetTailC(AjPAlign thys, const char* tail)
3117 {
3118 ajStrAssignC(&thys->Tail, tail);
3119
3120 ajDebug("ajAlignSetTailC len %d '%S'\n",
3121 ajStrGetLen(thys->Tail), tail);
3122
3123 return;
3124 }
3125
3126
3127
3128
3129 /* @func ajAlignSetTailApp ****************************************************
3130 **
3131 ** Appends to an alignment tail
3132 **
3133 ** @param [u] thys [AjPAlign] Alignment object
3134 ** @param [r] tail [const AjPStr] Align tail with embedded newlines
3135 ** @return [void]
3136 **
3137 ** @release 2.1.0
3138 ** @@
3139 ******************************************************************************/
3140
ajAlignSetTailApp(AjPAlign thys,const AjPStr tail)3141 void ajAlignSetTailApp(AjPAlign thys, const AjPStr tail)
3142 {
3143 if(ajStrGetLen(thys->Tail) && ajStrGetCharLast(thys->Tail) != '\n')
3144 ajStrAppendC(&thys->Tail, "\n");
3145
3146 ajStrAppendS(&thys->Tail, tail);
3147
3148 ajDebug("ajAlignSetTailApp len %d '%S'\n",
3149 ajStrGetLen(thys->Tail), tail);
3150
3151 return;
3152 }
3153
3154
3155
3156
3157 /* @func ajAlignSetSubTail ****************************************************
3158 **
3159 ** Defines an alignment tail
3160 **
3161 ** @param [u] thys [AjPAlign] Alignment object
3162 ** @param [r] tail [const AjPStr] Align tail with embedded newlines
3163 ** @return [void]
3164 **
3165 ** @release 2.9.0
3166 ** @@
3167 ******************************************************************************/
3168
ajAlignSetSubTail(AjPAlign thys,const AjPStr tail)3169 void ajAlignSetSubTail(AjPAlign thys, const AjPStr tail)
3170 {
3171 ajStrAssignS(&thys->SubTail, tail);
3172
3173 ajDebug("ajAlignSetSubTail len %d '%S'\n",
3174 ajStrGetLen(thys->SubTail), tail);
3175
3176 return;
3177 }
3178
3179
3180
3181
3182 /* @func ajAlignSetSubTailC ***************************************************
3183 **
3184 ** Defines an alignment tail
3185 **
3186 ** @param [u] thys [AjPAlign] Alignment object
3187 ** @param [r] tail [const char*] Align tail with embedded newlines
3188 ** @return [void]
3189 **
3190 ** @release 2.9.0
3191 ** @@
3192 ******************************************************************************/
3193
ajAlignSetSubTailC(AjPAlign thys,const char * tail)3194 void ajAlignSetSubTailC(AjPAlign thys, const char* tail)
3195 {
3196 ajStrAssignC(&thys->SubTail, tail);
3197
3198 ajDebug("ajAlignSetSubTailC len %d '%S'\n",
3199 ajStrGetLen(thys->SubTail), tail);
3200
3201 return;
3202 }
3203
3204
3205
3206
3207 /* @func ajAlignSetSubTailApp *************************************************
3208 **
3209 ** Appends to an alignment tail
3210 **
3211 ** @param [u] thys [AjPAlign] Alignment object
3212 ** @param [r] tail [const AjPStr] Align tail with embedded newlines
3213 ** @return [void]
3214 **
3215 ** @release 2.9.0
3216 ** @@
3217 ******************************************************************************/
3218
ajAlignSetSubTailApp(AjPAlign thys,const AjPStr tail)3219 void ajAlignSetSubTailApp(AjPAlign thys, const AjPStr tail)
3220 {
3221 if(ajStrGetLen(thys->SubTail) && ajStrGetCharLast(thys->SubTail) != '\n')
3222 ajStrAppendC(&thys->SubTail, "\n");
3223
3224 ajStrAppendS(&thys->SubTail, tail);
3225
3226 ajDebug("ajAlignSetSubTailApp len %d '%S'\n",
3227 ajStrGetLen(thys->SubTail), tail);
3228
3229 return;
3230 }
3231
3232
3233
3234
3235 /* @func ajAlignSetSubHeader **************************************************
3236 **
3237 ** Defines an alignment subheader (cleared after printing so it can
3238 ** be set again for the next alignment)
3239 **
3240 ** @param [u] thys [AjPAlign] Alignment object
3241 ** @param [r] subheader [const AjPStr] Align subheader with embedded newlines
3242 ** @return [void]
3243 **
3244 ** @release 2.1.0
3245 ** @@
3246 ******************************************************************************/
3247
ajAlignSetSubHeader(AjPAlign thys,const AjPStr subheader)3248 void ajAlignSetSubHeader(AjPAlign thys, const AjPStr subheader)
3249 {
3250 ajStrAssignS(&thys->SubHeader, subheader);
3251
3252 ajDebug("ajAlignSetSubHeader len %d '%S'\n",
3253 ajStrGetLen(thys->SubHeader), subheader);
3254
3255 return;
3256 }
3257
3258
3259
3260
3261 /* @func ajAlignSetSubHeaderC *************************************************
3262 **
3263 ** Defines an alignment header (cleared after printing so it can
3264 ** be set again for the next alignment)
3265 **
3266 ** @param [u] thys [AjPAlign] Alignment object
3267 ** @param [r] subheader [const char*] Align subheader with embedded newlines
3268 ** @return [void]
3269 **
3270 ** @release 2.1.0
3271 ** @@
3272 ******************************************************************************/
3273
ajAlignSetSubHeaderC(AjPAlign thys,const char * subheader)3274 void ajAlignSetSubHeaderC(AjPAlign thys, const char* subheader)
3275 {
3276 ajStrAssignC(&thys->Header, subheader);
3277
3278 ajDebug("ajAlignSetSubHeaderC len %d '%S'\n",
3279 ajStrGetLen(thys->SubHeader), subheader);
3280
3281 return;
3282 }
3283
3284
3285
3286
3287 /* @func ajAlignSetSubHeaderApp ***********************************************
3288 **
3289 ** Appends to an alignment subheader (cleared after printing so it can
3290 ** be set again for the next alignment)
3291 **
3292 ** @param [u] thys [AjPAlign] Alignment object
3293 ** @param [r] subheader [const AjPStr] Align subheader with embedded newlines
3294 ** @return [void]
3295 **
3296 ** @release 2.5.1
3297 ** @@
3298 ******************************************************************************/
3299
ajAlignSetSubHeaderApp(AjPAlign thys,const AjPStr subheader)3300 void ajAlignSetSubHeaderApp(AjPAlign thys, const AjPStr subheader)
3301 {
3302 if(ajStrGetLen(thys->SubHeader) &&
3303 ajStrGetCharLast(thys->SubHeader) != '\n')
3304 ajStrAppendC(&thys->SubHeader, "\n");
3305
3306 ajStrAppendS(&thys->SubHeader, subheader);
3307
3308 ajDebug("ajAlignSetSubHeaderApp len %d '%S'\n",
3309 ajStrGetLen(thys->SubHeader), subheader);
3310
3311 return;
3312 }
3313
3314
3315
3316
3317 /* @func ajAlignSetSubHeaderPre ***********************************************
3318 **
3319 ** Prepends to an alignment subheader (cleared after printing so it can
3320 ** be set again for the next alignment)
3321 **
3322 ** @param [u] thys [AjPAlign] Alignment object
3323 ** @param [r] subheader [const AjPStr] Align subheader with embedded newlines
3324 ** @return [void]
3325 **
3326 ** @release 2.5.1
3327 ** @@
3328 ******************************************************************************/
3329
ajAlignSetSubHeaderPre(AjPAlign thys,const AjPStr subheader)3330 void ajAlignSetSubHeaderPre(AjPAlign thys, const AjPStr subheader)
3331 {
3332 if(ajStrGetLen(thys->SubHeader) && ajStrGetCharLast(subheader) != '\n')
3333 ajStrInsertC(&thys->SubHeader, 0, "\n");
3334
3335 ajStrInsertS(&thys->SubHeader, 0, subheader);
3336
3337 ajDebug("ajAlignSetSubHeaderPre len %d '%S'\n",
3338 ajStrGetLen(thys->SubHeader), subheader);
3339
3340 return;
3341 }
3342
3343
3344
3345
3346 /* @func ajAlignSetMatrixName *************************************************
3347 **
3348 ** Defines an alignment matrix
3349 **
3350 ** @param [u] thys [AjPAlign] Alignment object
3351 ** @param [r] matrix [const AjPStr] Matrix name
3352 ** @return [void]
3353 **
3354 ** @release 2.1.0
3355 ** @@
3356 ******************************************************************************/
3357
ajAlignSetMatrixName(AjPAlign thys,const AjPStr matrix)3358 void ajAlignSetMatrixName(AjPAlign thys, const AjPStr matrix)
3359 {
3360 ajAlignSetMatrixNameC(thys, ajStrGetPtr(matrix));
3361
3362 return;
3363 }
3364
3365
3366
3367
3368 /* @func ajAlignSetMatrixNameC ************************************************
3369 **
3370 ** Defines an alignment matrix
3371 **
3372 ** @param [u] thys [AjPAlign] Alignment object
3373 ** @param [r] matrix [const char*] Matrix name
3374 ** @return [void]
3375 **
3376 ** @release 2.1.0
3377 ** @@
3378 ******************************************************************************/
3379
ajAlignSetMatrixNameC(AjPAlign thys,const char * matrix)3380 void ajAlignSetMatrixNameC(AjPAlign thys, const char* matrix)
3381 {
3382 ajStrAssignC(&thys->Matrix, matrix);
3383
3384 return;
3385 }
3386
3387
3388
3389
3390 /* @func ajAlignSetMatrixInt **************************************************
3391 **
3392 ** Defines an alignment matrix
3393 **
3394 ** @param [u] thys [AjPAlign] Alignment object
3395 ** @param [u] matrix [AjPMatrix] Matrix object
3396 ** @return [void]
3397 **
3398 ** @release 2.1.0
3399 ** @@
3400 ******************************************************************************/
3401
ajAlignSetMatrixInt(AjPAlign thys,AjPMatrix matrix)3402 void ajAlignSetMatrixInt(AjPAlign thys, AjPMatrix matrix)
3403 {
3404 if(!thys->IMatrix)
3405 {
3406 thys->IMatrix = matrix;
3407 ajAlignSetMatrixName(thys, ajMatrixGetName(matrix));
3408 }
3409
3410 if(thys->FMatrix)
3411 ajMatrixfDel(&thys->FMatrix);
3412
3413 return;
3414 }
3415
3416
3417
3418
3419 /* @func ajAlignSetMatrixFloat ************************************************
3420 **
3421 ** Defines an alignment matrix
3422 **
3423 ** @param [u] thys [AjPAlign] Alignment object
3424 ** @param [u] matrix [AjPMatrixf] Matrix (floating point version) object
3425 ** @return [void]
3426 **
3427 ** @release 2.1.0
3428 ** @@
3429 ******************************************************************************/
3430
ajAlignSetMatrixFloat(AjPAlign thys,AjPMatrixf matrix)3431 void ajAlignSetMatrixFloat(AjPAlign thys, AjPMatrixf matrix)
3432 {
3433 if(!thys->FMatrix)
3434 {
3435 thys->FMatrix = matrix;
3436 ajAlignSetMatrixName(thys, ajMatrixfGetName(matrix));
3437 }
3438
3439 if(thys->IMatrix)
3440 ajMatrixDel(&thys->IMatrix);
3441
3442 return;
3443 }
3444
3445
3446
3447
3448 /* @func ajAlignSetGapI *******************************************************
3449 **
3450 ** Defines alignment gap penalties
3451 **
3452 ** @param [u] thys [AjPAlign] Alignment object
3453 ** @param [r] gappen [ajint] Gap penalty
3454 ** @param [r] extpen [ajint] Gap extension penalty
3455 ** @return [void]
3456 **
3457 ** @release 2.1.0
3458 ** @@
3459 ******************************************************************************/
3460
ajAlignSetGapI(AjPAlign thys,ajint gappen,ajint extpen)3461 void ajAlignSetGapI(AjPAlign thys, ajint gappen, ajint extpen)
3462 {
3463 AjPStr tmpstr = NULL;
3464
3465 ajFmtPrintS(&tmpstr, "%d", gappen);
3466 ajStrAssignS(&thys->GapPen, tmpstr);
3467
3468 ajFmtPrintS(&tmpstr, "%d", extpen);
3469 ajStrAssignS(&thys->ExtPen, tmpstr);
3470
3471 ajStrDel(&tmpstr);
3472
3473 return;
3474 }
3475
3476
3477
3478
3479 /* @func ajAlignSetGapR *******************************************************
3480 **
3481 ** Defines alignment gap penalties
3482 **
3483 ** @param [u] thys [AjPAlign] Alignment object
3484 ** @param [r] gappen [float] Gap penalty
3485 ** @param [r] extpen [float] Gap extension penalty
3486 ** @return [void]
3487 ** @@
3488 **
3489 ** @release 2.1.0
3490 *****************************************************************************/
3491
ajAlignSetGapR(AjPAlign thys,float gappen,float extpen)3492 void ajAlignSetGapR(AjPAlign thys, float gappen, float extpen)
3493 {
3494 AjPStr tmpstr = NULL;
3495
3496 ajint precision = 3;
3497 ajint i;
3498
3499 ajFmtPrintS(&tmpstr, "%.*f", precision, gappen);
3500
3501 for(i=1; i<precision; i++)
3502 {
3503 if(ajStrGetCharLast(tmpstr) != '0') break;
3504 ajStrCutEnd(&tmpstr, 1);
3505 }
3506
3507 ajStrAssignS(&thys->GapPen, tmpstr);
3508
3509 ajFmtPrintS(&tmpstr, "%.*f", precision, extpen);
3510
3511 for(i=1; i<precision; i++)
3512 {
3513 if(ajStrGetCharLast(tmpstr) != '0') break;
3514 ajStrCutEnd(&tmpstr, 1);
3515 }
3516
3517 ajStrAssignS(&thys->ExtPen, tmpstr);
3518
3519 ajStrDel(&tmpstr);
3520 return;
3521 }
3522
3523
3524
3525
3526 /* @func ajAlignSetScoreI *****************************************************
3527 **
3528 ** Defines alignment score
3529 **
3530 ** @param [u] thys [AjPAlign] Alignment object
3531 ** @param [r] score [ajint] score
3532 ** @return [void]
3533 **
3534 ** @release 2.1.0
3535 ** @@
3536 ******************************************************************************/
3537
ajAlignSetScoreI(AjPAlign thys,ajint score)3538 void ajAlignSetScoreI(AjPAlign thys, ajint score)
3539 {
3540 AjPStr tmpstr = NULL;
3541 AlignPData data = NULL;
3542
3543 ajListPeekLast(thys->Data, (void**) &data);
3544 ajFmtPrintS(&tmpstr, "%d", score);
3545 ajStrAssignS(&data->Score, tmpstr);
3546
3547 ajDebug("ajAlignSetScoreI: %d '%S' %d\n",
3548 score, data->Score, data->LenAli);
3549
3550 ajStrDel(&tmpstr);
3551
3552 return;
3553 }
3554
3555
3556
3557
3558 /* @func ajAlignSetScoreL *****************************************************
3559 **
3560 ** Defines alignment score
3561 **
3562 ** @param [u] thys [AjPAlign] Alignment object
3563 ** @param [r] score [ajlong] score
3564 ** @return [void]
3565 **
3566 ** @release 4.0.0
3567 ** @@
3568 ******************************************************************************/
3569
ajAlignSetScoreL(AjPAlign thys,ajlong score)3570 void ajAlignSetScoreL(AjPAlign thys, ajlong score)
3571 {
3572 AjPStr tmpstr = NULL;
3573 AlignPData data = NULL;
3574
3575 ajListPeekLast(thys->Data, (void**) &data);
3576 ajFmtPrintS(&tmpstr, "%Ld", score);
3577 ajStrAssignS(&data->Score, tmpstr);
3578
3579 ajDebug("ajAlignSetScoreI: %Ld '%S' %d\n",
3580 score, data->Score, data->LenAli);
3581 ajStrDel(&tmpstr);
3582
3583 return;
3584 }
3585
3586
3587
3588
3589 /* @func ajAlignSetScoreR *****************************************************
3590 **
3591 ** Defines alignment score
3592 **
3593 ** @param [u] thys [AjPAlign] Alignment object
3594 ** @param [r] score [float] score
3595 ** @return [void]
3596 **
3597 ** @release 2.1.0
3598 ** @@
3599 ******************************************************************************/
3600
ajAlignSetScoreR(AjPAlign thys,float score)3601 void ajAlignSetScoreR(AjPAlign thys, float score)
3602 {
3603 AjPStr tmpstr = NULL;
3604
3605 ajint precision = 3;
3606 ajint i;
3607 AlignPData data = NULL;
3608
3609 ajListPeekLast(thys->Data, (void**) &data);
3610 ajFmtPrintS(&tmpstr, "%.*f", precision, score);
3611
3612 for(i=1; i<precision; i++)
3613 {
3614 if(ajStrGetCharLast(tmpstr) != '0') break;
3615 ajStrCutEnd(&tmpstr, 1);
3616 }
3617
3618 ajStrAssignS(&data->Score, tmpstr);
3619
3620 ajStrDel(&tmpstr);
3621
3622 return;
3623 }
3624
3625
3626
3627
3628 /* @func ajAlignSetStats ******************************************************
3629 **
3630 ** Sets standard properties for an alignment subheader. These are:
3631 ** Length, Identity, Gaps, Similarity, Score
3632 **
3633 ** @param [u] thys [AjPAlign] Alignment object
3634 ** @param [r] iali [ajint] Alignment number
3635 ** @param [r] len [ajint] Alignment length
3636 ** @param [r] ident [ajint] Number of identities
3637 ** @param [r] sim [ajint] Number of similarities
3638 ** @param [r] gaps [ajint] Number of gaps
3639 ** @param [r] score [const AjPStr] Alignment score (as saved by
3640 ** ajAlignSetScoreI or ajAlignSetScoreR)
3641 ** @return [void]
3642 **
3643 ** @release 2.1.0
3644 ******************************************************************************/
3645
ajAlignSetStats(AjPAlign thys,ajint iali,ajint len,ajint ident,ajint sim,ajint gaps,const AjPStr score)3646 void ajAlignSetStats(AjPAlign thys, ajint iali, ajint len,
3647 ajint ident, ajint sim, ajint gaps,
3648 const AjPStr score)
3649 {
3650 AlignPData* pdata = NULL;
3651 AlignPData data = NULL;
3652 ajint nali;
3653
3654 nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
3655
3656 if(iali < 0)
3657 data = pdata[nali-1];
3658 else
3659 data = pdata[iali];
3660
3661 /* ajDebug("ajAlignSetStats iali:%d len:%d id:%d "
3662 "sim:%d gap:%d score:'%S'\n",
3663 iali, len, ident, sim, gaps, score); */
3664
3665 data->LenAli = len;
3666
3667 if(len > 0)
3668 {
3669 if(ident >= 0)
3670 data->NumId = ident;
3671 else
3672 data->NumId = -1;
3673
3674 if(sim >= 0)
3675 data->NumSim = sim;
3676 else
3677 data->NumSim = -1;
3678
3679 if(gaps >= 0)
3680 data->NumGap = gaps;
3681 else
3682 data->NumGap = -1;
3683 }
3684
3685 if(ajStrGetLen(score))
3686 ajStrAssignS(&data->Score, score);
3687
3688 AJFREE(pdata);
3689
3690 return;
3691 }
3692
3693
3694
3695
3696 /* @func ajAlignSetSubStandard ************************************************
3697 **
3698 ** Sets standard subheader using the properties for an alignment.
3699 ** These are:
3700 ** Length, Identity, Gaps, Similarity, Score
3701 **
3702 ** @param [u] thys [AjPAlign] Alignment object
3703 ** @param [r] iali [ajint] Alignment number (or -1 for the latest)
3704 ** @return [void]
3705 **
3706 ** @release 2.1.0
3707 ******************************************************************************/
3708
ajAlignSetSubStandard(AjPAlign thys,ajint iali)3709 void ajAlignSetSubStandard(AjPAlign thys, ajint iali)
3710 {
3711 AjPStr tmphdr = NULL;
3712 AlignPData* pdata = NULL;
3713 AlignPData data = NULL;
3714 ajint nali;
3715 float pct;
3716
3717 nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
3718
3719 if(iali < 0)
3720 data = pdata[nali-1];
3721 else
3722 data = pdata[iali];
3723
3724 ajFmtPrintAppS(&tmphdr, "Length: %d\n", data->LenAli);
3725
3726 if(data->LenAli > 0)
3727 {
3728 if(data->NumId >= 0)
3729 {
3730 pct = (float)100. * (float) data->NumId / (float) data->LenAli;
3731 ajFmtPrintAppS(&tmphdr, "Identity: %5d/%d (%4.1f%%)\n",
3732 data->NumId, data->LenAli, pct);
3733 }
3734
3735 if(data->NumSim >= 0)
3736 {
3737 pct = (float)100. * (float) data->NumSim / (float) data->LenAli;
3738 ajFmtPrintAppS(&tmphdr, "Similarity: %5d/%d (%4.1f%%)\n",
3739 data->NumSim, data->LenAli, pct);
3740 }
3741
3742 if(data->NumGap >= 0)
3743 {
3744 pct = (float)100. * (float) data->NumGap / (float) data->LenAli;
3745 ajFmtPrintAppS(&tmphdr, "Gaps: %5d/%d (%4.1f%%)\n",
3746 data->NumGap, data->LenAli, pct);
3747 }
3748 }
3749
3750 if(ajStrGetLen(data->Score))
3751 ajFmtPrintAppS(&tmphdr, "Score: %S\n", data->Score);
3752
3753 ajAlignSetSubHeaderPre(thys, tmphdr);
3754
3755 ajStrDel(&tmphdr);
3756 AJFREE(pdata);
3757
3758 return;
3759 }
3760
3761
3762
3763
3764 /* @funcstatic alignSeqs ******************************************************
3765 **
3766 ** Returns the sequences for the nth alignment
3767 **
3768 ** @param [r] thys [const AjPAlign] Alignment object
3769 ** @param [r] iali [ajint] Alignment number
3770 ** @return [const AjPSeq*] Pointer to the internal sequence array
3771 **
3772 ** @release 2.1.0
3773 ******************************************************************************/
3774
alignSeqs(const AjPAlign thys,ajint iali)3775 static const AjPSeq* alignSeqs(const AjPAlign thys, ajint iali)
3776 {
3777 AlignPData data = NULL;
3778
3779 ajListPeekNumber(thys->Data, iali, (void**) &data);
3780
3781 return data->Seq;
3782 }
3783
3784
3785
3786
3787 /* @funcstatic alignSeq *******************************************************
3788 **
3789 ** Returns the nth sequence for an alignment
3790 **
3791 ** @param [r] thys [const AjPAlign] Alignment object
3792 ** @param [r] iseq [ajint] Sequence number
3793 ** @param [r] iali [ajint] Alignment number
3794 ** @return [const AjPSeq] Pointer to the internal sequence
3795 **
3796 ** @release 2.1.0
3797 ******************************************************************************/
3798
alignSeq(const AjPAlign thys,ajint iseq,ajint iali)3799 static const AjPSeq alignSeq(const AjPAlign thys, ajint iseq, ajint iali)
3800 {
3801 AlignPData data = NULL;
3802
3803 ajListPeekNumber(thys->Data, iali, (void **) &data);
3804
3805 return data->Seq[iseq];
3806 }
3807
3808
3809
3810
3811 /* @funcstatic alignSeqBegin **************************************************
3812 **
3813 ** Returns the start position for the nth sequence for an alignment.
3814 **
3815 ** Reverse direction is tested and the reverse complement numbering is used.
3816 **
3817 ** @param [r] data [const AlignPData] Alignment data object
3818 ** @param [r] iseq [ajint] Sequence number
3819 ** @return [ajint] Start position
3820 **
3821 ** @release 3.0.0
3822 ******************************************************************************/
3823
alignSeqBegin(const AlignPData data,ajint iseq)3824 static ajint alignSeqBegin(const AlignPData data, ajint iseq)
3825 {
3826 ajint ret = 0;
3827
3828 if(data->Rev[iseq])
3829 {
3830 ret = data->Len[iseq] - data->Offend[iseq]
3831 - data->Start[iseq] + 1;
3832 ajDebug(" alignSeqBegin %d rev:%B len:%d Offend:%d Start:%d ret:%d\n",
3833 iseq, data->Rev[iseq], data->Len[iseq], data->Offend[iseq],
3834 data->Start[iseq], ret);
3835 }
3836 else
3837 {
3838 ret = data->Offset[iseq] + data->Start[iseq];
3839 ajDebug(" alignSeqBegin %d rev:%B Offset:%d Start:%d ret:%d\n",
3840 iseq, data->Rev[iseq], data->Offset[iseq],
3841 data->Start[iseq], ret);
3842 }
3843
3844 return ret;
3845 }
3846
3847
3848
3849
3850 /* @funcstatic alignSeqEnd ****************************************************
3851 **
3852 ** Returns the end position for the nth sequence for an alignment.
3853 **
3854 ** Reverse direction is tested and the reverse complement numbering is used.
3855 **
3856 ** @param [r] data [const AlignPData] Alignment data object
3857 ** @param [r] iseq [ajint] Sequence number
3858 ** @return [ajint] Start position
3859 **
3860 ** @release 3.0.0
3861 ******************************************************************************/
3862
alignSeqEnd(const AlignPData data,ajint iseq)3863 static ajint alignSeqEnd(const AlignPData data, ajint iseq)
3864 {
3865 ajint ret = 0;
3866 ajint iend;
3867
3868 if(data->End[iseq])
3869 iend = data->End[iseq];
3870 else
3871 iend = data->Len[iseq];
3872
3873 if(data->Rev[iseq])
3874 ret = data->Len[iseq] - data->Offend[iseq] - iend + 1;
3875 else
3876 ret = data->Offset[iseq] + iend;
3877
3878 return ret;
3879 }
3880
3881
3882
3883
3884 /* @funcstatic alignSeqIncrement **********************************************
3885 **
3886 ** Returns the position increment for the nth sequence for an alignment.
3887 **
3888 ** Reverse direction gives a value of -1 for counting down.
3889 **
3890 ** Forward direction gives +1.
3891 **
3892 ** @param [r] data [const AlignPData] Alignment data object
3893 ** @param [r] iseq [ajint] Sequence number
3894 ** @return [ajint] Increment for position counting
3895 **
3896 ** @release 3.0.0
3897 ******************************************************************************/
3898
alignSeqIncrement(const AlignPData data,ajint iseq)3899 static ajint alignSeqIncrement(const AlignPData data, ajint iseq)
3900 {
3901 ajint ret = 0;
3902
3903 if(data->Rev[iseq])
3904 ret = -1;
3905 else
3906 ret = 1;
3907
3908 return ret;
3909 }
3910
3911
3912
3913
3914 /* @funcstatic alignSeqGapBegin ***********************************************
3915 **
3916 ** Counts the gaps at the start of the nth sequence for an alignment.
3917 **
3918 ** Reverse direction is tested and the reverse complement numbering is used.
3919 **
3920 ** @param [r] data [const AlignPData] Alignment data object
3921 ** @param [r] iseq [ajint] Sequence number
3922 ** @return [ajint] Start position
3923 **
3924 ** @release 3.0.0
3925 ******************************************************************************/
3926
alignSeqGapBegin(const AlignPData data,ajint iseq)3927 static ajint alignSeqGapBegin(const AlignPData data, ajint iseq)
3928 {
3929 ajint ret = 0;
3930 static char testchars[] = "-~.? "; /* all known gap characters */
3931 const char* cp;
3932 ajint i;
3933
3934 if(data->Rev[iseq])
3935 ret = data->Offend[iseq];
3936 else
3937 ret = data->Offset[iseq];
3938
3939 cp = ajSeqGetSeqC(data->Seq[iseq]);
3940 i = strspn(cp+data->Start[iseq]-1, testchars);
3941
3942 return ret+i;
3943 }
3944
3945
3946
3947
3948 /* @funcstatic alignSeqGapEnd *************************************************
3949 **
3950 ** Counts the gaps at the end of the nth sequence for an alignment.
3951 **
3952 ** Reverse direction is tested and the reverse complement numbering is used.
3953 **
3954 ** @param [r] data [const AlignPData] Alignment data object
3955 ** @param [r] iseq [ajint] Sequence number
3956 ** @return [ajint] End position
3957 **
3958 ** @release 3.0.0
3959 ******************************************************************************/
3960
alignSeqGapEnd(const AlignPData data,ajint iseq)3961 static ajint alignSeqGapEnd(const AlignPData data, ajint iseq)
3962 {
3963 ajint ret = 0;
3964 static char testchars[] = "-~.? "; /* all known gap characters */
3965 const char* cp;
3966 const char* cpstart;
3967 ajint i;
3968
3969 if(data->Rev[iseq])
3970 ret = data->Offset[iseq];
3971 else
3972 ret = data->Offend[iseq];
3973
3974 cpstart = ajSeqGetSeqC(data->Seq[iseq]);
3975 cp = cpstart + data->End[iseq];
3976
3977 for(i=0; cp > cpstart; cp--)
3978 {
3979 if(!strchr(testchars, *cp))
3980 return ret+i-1;
3981 i++;
3982 }
3983
3984 return ret+data->End[iseq];
3985 }
3986
3987
3988
3989
3990 /* @funcstatic alignSeqRev ****************************************************
3991 **
3992 ** Returns the direction for the nth sequence for an alignment.
3993 **
3994 ** @param [r] data [const AlignPData] Alignment data object
3995 ** @param [r] iseq [ajint] Sequence number
3996 ** @return [AjBool] ajTrue if sequence is reversed
3997 **
3998 ** @release 3.0.0
3999 ******************************************************************************/
4000
alignSeqRev(const AlignPData data,ajint iseq)4001 static AjBool alignSeqRev(const AlignPData data, ajint iseq)
4002 {
4003 AjBool ret = ajFalse;
4004
4005 if(data->Rev[iseq])
4006 ret = ajTrue;
4007 else
4008 ret = ajFalse;
4009
4010 return ret;
4011 }
4012
4013
4014
4015
4016 /* @funcstatic alignData ******************************************************
4017 **
4018 ** Returns the nth data structure for an alignment
4019 **
4020 ** @param [r] thys [const AjPAlign] Alignment object
4021 ** @param [r] iali [ajint] Alignment number
4022 ** @return [AlignPData] Pointer to the internal alignment structure
4023 **
4024 ** @release 2.1.0
4025 ******************************************************************************/
4026
alignData(const AjPAlign thys,ajint iali)4027 static AlignPData alignData(const AjPAlign thys, ajint iali)
4028 {
4029 AlignPData data = NULL;
4030
4031 ajListPeekNumber(thys->Data, iali, (void**) &data);
4032
4033 return data;
4034 }
4035
4036
4037
4038
4039 /* @funcstatic alignLen *******************************************************
4040 **
4041 ** Returns the length of the nth sequence for an alignment
4042 **
4043 ** @param [r] thys [const AjPAlign] Alignment object
4044 ** @param [r] iali [ajint] Alignment number
4045 ** @return [ajint] Length of the internal sequence
4046 **
4047 ** @release 2.1.0
4048 ******************************************************************************/
4049
alignLen(const AjPAlign thys,ajint iali)4050 static ajint alignLen(const AjPAlign thys, ajint iali)
4051 {
4052 AlignPData data = NULL;
4053
4054 ajListPeekNumber(thys->Data, iali, (void**) &data);
4055
4056 return data->LenAli;
4057 }
4058
4059
4060
4061
4062 /* @func ajAlignSetType *******************************************************
4063 **
4064 ** Sets the align type (if it is not set already)
4065 **
4066 ** @param [u] thys [AjPAlign] Alignment object
4067 ** @return [void]
4068 **
4069 ** @release 2.1.0
4070 ** @@
4071 ******************************************************************************/
4072
ajAlignSetType(AjPAlign thys)4073 void ajAlignSetType(AjPAlign thys)
4074 {
4075 const AjPSeq seq;
4076 ajint i;
4077
4078 ajDebug("ajAlignSetType '%S'\n",
4079 thys->Type);
4080
4081 if(ajStrGetLen(thys->Type))
4082 return;
4083
4084 if(!thys->Nseqs)
4085 return;
4086
4087 for(i=0; i < thys->Nseqs; i++)
4088 {
4089 ajDebug("Calling alignSeq d 0\n", i);
4090 seq = alignSeq(thys, i, 0);
4091
4092 if(ajStrGetLen(seq->Type))
4093 {
4094 ajStrAssignS(&thys->Type, seq->Type);
4095 return;
4096 }
4097 }
4098
4099 ajDebug("testing alignSeq 0 0\n", i);
4100
4101 if(ajSeqIsNuc(alignSeq(thys, 0, 0)))
4102 ajStrAssignC(&thys->Type, "N");
4103 else
4104 ajStrAssignC(&thys->Type, "P");
4105
4106 return;
4107 }
4108
4109
4110
4111
4112 /* @func ajAlignSetExternal ***************************************************
4113 **
4114 ** Sets the align object to use external sequence references, which are
4115 ** to be copied pointers rather than clones of the whole sequence.
4116 **
4117 ** Intended for alignments of large sequences where there is no need to
4118 ** keep many copies. An example is the EMBOSS application wordmatch.
4119 **
4120 ** @param [u] thys [AjPAlign] Alignment object
4121 ** @param [r] external [AjBool] If true, do not make copies of sequence data
4122 ** and do not delete internal sequence data
4123 ** @return [void]
4124 **
4125 ** @release 2.4.0
4126 ** @@
4127 ******************************************************************************/
4128
ajAlignSetExternal(AjPAlign thys,AjBool external)4129 void ajAlignSetExternal(AjPAlign thys, AjBool external)
4130 {
4131 ajDebug("ajAlignSetExternal old:%B new:%B\n",
4132 thys->SeqExternal, external);
4133
4134 thys->SeqExternal = external;
4135
4136 return;
4137 }
4138
4139
4140
4141
4142 /* @func ajAlignSetRefSeqIndx *************************************************
4143 **
4144 ** Sets the index of the reference sequence.
4145 **
4146 ** @param [u] thys [AjPAlign] Alignment object
4147 ** @param [r] refseq [ajint] index of the reference sequence
4148 ** @return [void]
4149 **
4150 ** @release 6.3.0
4151 ** @@
4152 ******************************************************************************/
4153
ajAlignSetRefSeqIndx(AjPAlign thys,ajint refseq)4154 void ajAlignSetRefSeqIndx(AjPAlign thys, ajint refseq)
4155 {
4156
4157 thys->RefSeq = refseq;
4158
4159 return;
4160 }
4161
4162
4163
4164
4165 /* @func ajAlignSetRange ******************************************************
4166 **
4167 ** Sets the alignment range in each sequence, but only for a
4168 ** pairwise alignment
4169 **
4170 ** @param [u] thys [AjPAlign] Alignment object
4171 ** @param [r] start1 [ajint] Start in sequence 1
4172 ** @param [r] end1 [ajint] End in sequence 1
4173 ** @param [r] len1 [ajint] Length of sequence 1
4174 ** @param [r] off1 [ajint] Offset of sequence 1
4175 ** @param [r] start2 [ajint] Start in sequence 2
4176 ** @param [r] end2 [ajint] End in sequence 2
4177 ** @param [r] len2 [ajint] Length of sequence 2
4178 ** @param [r] off2 [ajint] Offset of sequence 2
4179 ** @return [AjBool] ajTrue on success. Failure also writes an error message.
4180 **
4181 ** @release 2.3.0
4182 ** @@
4183 ******************************************************************************/
4184
ajAlignSetRange(AjPAlign thys,ajint start1,ajint end1,ajint len1,ajint off1,ajint start2,ajint end2,ajint len2,ajint off2)4185 AjBool ajAlignSetRange(AjPAlign thys,
4186 ajint start1, ajint end1, ajint len1, ajint off1,
4187 ajint start2, ajint end2, ajint len2, ajint off2)
4188 {
4189 AlignPData data = NULL;
4190 ajint nali;
4191
4192 ajDebug("ajAlignSetRange %d..%d (%d) %d..%d (%d)\n",
4193 start1, end1, len1, start2, end2, len2);
4194
4195 if(thys->Nseqs != 2)
4196 {
4197 ajErr("ajAlignSetRange requires alignment of 2 sequences only");
4198 return ajFalse;
4199 }
4200
4201 ajListPeekLast(thys->Data, (void**) &data);
4202
4203 nali = (ajuint) ajListGetLength(thys->Data);
4204 ajDebug("nali:%d set range %d\n", nali, nali-1);
4205
4206 data->Start[0] = start1;
4207 data->End[0] = end1;
4208 data->Len[0] = len1;
4209 data->Offset[0] = off1;
4210 data->Rev[0] = ajFalse;
4211
4212 data->Start[1] = start2;
4213 data->End[1] = end2;
4214 data->Len[1] = len2;
4215 data->Offset[1] = off2;
4216 data->Rev[1] = ajFalse;
4217
4218 if(thys->SeqExternal)
4219 {
4220 data->LenAli = (end1 - start1) + 1;
4221
4222 if(data->LenAli < (end2 - start2 + 1))
4223 data->LenAli = (end2 - start2) + 1;
4224
4225 ajDebug("len: %d\n", data->LenAli);
4226 }
4227
4228 return ajTrue;
4229 }
4230
4231
4232
4233
4234 /* @func ajAlignSetSubRange ***************************************************
4235 **
4236 ** Sets the local alignment sub range in each sequence, but only for a
4237 ** pairwise alignment.
4238 **
4239 ** This sets the SubOffset in addition to the Start and End values,
4240 ** for use where the alignment has a large sequence, but only part of
4241 ** it is to be reported.
4242 **
4243 ** The usual example is where there are many matches in a long sequence,
4244 ** defined with SeqExternal so we use pointers to one original to avoid
4245 ** making multiple copies in memory while building the AjPAlign and AlignPData
4246 ** structure.
4247 **
4248 ** Resets the alignment length to be the length of the longest subsequence
4249 **
4250 ** @param [u] thys [AjPAlign] Alignment object
4251 ** @param [r] substart1 [ajint] Subsequence offset in sequence 1
4252 ** @param [r] start1 [ajint] Subsequence start in sequence 1
4253 ** @param [r] end1 [ajint] Subsequence end in sequence 1
4254 ** @param [r] rev1 [AjBool] ajTrue if sequence 1 is reversed
4255 ** @param [r] len1 [ajint] Length of sequence 1
4256 ** @param [r] substart2 [ajint] Subsequence offset in sequence 2
4257 ** @param [r] start2 [ajint] Subsequence start in sequence 2
4258 ** @param [r] end2 [ajint] Subsequence end in sequence 2
4259 ** @param [r] rev2 [AjBool] ajTrue if sequence 2 is reversed
4260 ** @param [r] len2 [ajint] Length of sequence 2
4261 ** @return [AjBool] ajTrue on success. Failure also writes an error message.
4262 **
4263 ** @release 2.5.1
4264 ** @@
4265 ******************************************************************************/
4266
ajAlignSetSubRange(AjPAlign thys,ajint substart1,ajint start1,ajint end1,AjBool rev1,ajint len1,ajint substart2,ajint start2,ajint end2,AjBool rev2,ajint len2)4267 AjBool ajAlignSetSubRange(AjPAlign thys,
4268 ajint substart1, ajint start1,
4269 ajint end1, AjBool rev1, ajint len1,
4270 ajint substart2, ajint start2,
4271 ajint end2, AjBool rev2, ajint len2)
4272 {
4273 AlignPData data = NULL;
4274 ajint nali;
4275
4276 ajDebug("ajAlignSetSubRange %d(%d)..%d (%d) %d(%d)..%d (%d)\n",
4277 start1, substart1, end1, len1,
4278 start2, substart2, end2, len2);
4279
4280 if(thys->Nseqs != 2)
4281 {
4282 ajErr("ajAlignSetSubRange requires alignment of 2 sequences only");
4283
4284 return ajFalse;
4285 }
4286
4287 ajListPeekLast(thys->Data, (void**) &data);
4288
4289 nali = (ajuint) ajListGetLength(thys->Data);
4290 ajDebug("nali:%d set range %d\n", nali, nali-1);
4291
4292 data->SubOffset[0] = substart1;
4293 data->Start[0] = start1;
4294 data->End[0] = end1;
4295 data->Len[0] = len1;
4296 data->Offset[0] = substart1;
4297 data->Offend[0] = len1 - (substart1 + end1 - start1 + 1);
4298 data->Rev[0] = rev1;
4299
4300 data->SubOffset[1] = substart2;
4301 data->Start[1] = start2;
4302 data->End[1] = end2;
4303 data->Len[1] = len2;
4304 data->Offset[1] = substart2;
4305 data->Offend[1] = len2 - (substart2 + end2 - start2 + 1);
4306 data->Rev[1] = rev2;
4307
4308 data->LenAli = (end1 - start1) + 1;
4309
4310 if(data->LenAli < (end2 - start2 + 1))
4311 {
4312 data->LenAli = (end2 - start2) + 1;
4313 }
4314
4315 ajDebug("len: %d\n", data->LenAli);
4316
4317 return ajTrue;
4318 }
4319
4320
4321
4322
4323 /* @funcstatic alignSeqName ***************************************************
4324 **
4325 ** Returns the sequence name or USA depending on the setting in the
4326 ** Alignment object (derived from the ACD and command line -ausa option)
4327 **
4328 ** @param [r] thys [const AjPAlign] Alignment object
4329 ** @param [r] seq [const AjPSeq] Sequence object
4330 ** @return [const AjPStr] Sequence name for this alignment
4331 **
4332 ** @release 2.1.0
4333 ******************************************************************************/
4334
alignSeqName(const AjPAlign thys,const AjPSeq seq)4335 static const AjPStr alignSeqName(const AjPAlign thys, const AjPSeq seq)
4336 {
4337 if(thys->Showusa)
4338 return ajSeqGetUsaS(seq);
4339
4340 return ajSeqGetNameS(seq);
4341 }
4342
4343
4344
4345
4346 /* @funcstatic alignDataDel ***************************************************
4347 **
4348 ** Deletes an alignment data structure
4349 **
4350 ** @param [d] pthys [AlignPData*] Alignment data structure
4351 ** @param [r] external [AjBool] Sequence is a pointer to an external
4352 ** object, do not delete.
4353 ** @return [void]
4354 **
4355 ** @release 2.1.0
4356 ******************************************************************************/
4357
alignDataDel(AlignPData * pthys,AjBool external)4358 static void alignDataDel(AlignPData* pthys, AjBool external)
4359 {
4360 AlignPData thys;
4361 ajint i;
4362 void *freeptr = NULL;
4363
4364 thys = *pthys;
4365
4366 AJFREE(thys->Start);
4367 AJFREE(thys->End);
4368 AJFREE(thys->Len);
4369 AJFREE(thys->Offset);
4370 AJFREE(thys->Offend);
4371 AJFREE(thys->SubOffset);
4372 AJFREE(thys->Rev);
4373
4374 ajStrDel(&thys->Score);
4375 ajDebug("alignDataDel NSeqs: %d External: %B\n",
4376 thys->Nseqs, external);
4377
4378 if(!external)
4379 for(i=0;i<thys->Nseqs;++i)
4380 {
4381 ajDebug("alignDataDel seq[%d] %S %d\n",
4382 i, ajSeqGetNameS(thys->Seq[i]), ajSeqGetLen(thys->Seq[i]));
4383 ajSeqDel(&thys->RealSeq[i]);
4384 }
4385
4386 AJFREE(thys->RealSeq);
4387 freeptr = (void *)thys->Seq;
4388 AJFREE(freeptr);
4389 AJFREE(*pthys);
4390
4391 return;
4392 }
4393
4394
4395
4396
4397 /* @funcstatic alignDataNew ***************************************************
4398 **
4399 ** Creates and initialises an alignment data object
4400 ** with a specified number of sequences
4401 **
4402 ** @param [r] nseqs [ajint] Number of sequences alignment data object will
4403 ** store
4404 ** @param [r] external [AjBool] Sequence is a pointer to an external
4405 ** object, no need for an own copy.
4406 ** @return [AlignPData] Pointer to the new alignment data object
4407 **
4408 ** @release 6.2.0
4409 ******************************************************************************/
4410
alignDataNew(ajint nseqs,AjBool external)4411 static AlignPData alignDataNew(ajint nseqs, AjBool external)
4412 {
4413 AlignPData data = NULL;
4414
4415 AJNEW0(data);
4416
4417 data->Nseqs = nseqs;
4418 AJCNEW0(data->Start, nseqs);
4419 AJCNEW0(data->End, nseqs);
4420 AJCNEW0(data->Len, nseqs);
4421 AJCNEW0(data->Offset, nseqs);
4422 AJCNEW0(data->Offend, nseqs);
4423 AJCNEW0(data->SubOffset, nseqs);
4424 AJCNEW0(data->Rev, nseqs);
4425 AJCNEW0(data->Seq, nseqs);
4426 if(!external)
4427 AJCNEW0(data->RealSeq, nseqs);
4428
4429 return data;
4430 }
4431
4432
4433
4434
4435 /* @funcstatic alignDataSetSequence *******************************************
4436 **
4437 ** Sets specified sequence in the alignment data object
4438 **
4439 ** Note: In pre EMBOSS-6.3 the external option is set only by wordmatch
4440 ** and seqmatchall, both producing exact multiple matches and calling
4441 ** ajAlignSetSubRange after calling ajAlignDefineSS which repeats the settings
4442 ** made in this function when called via ajAlignDefineSS.
4443 ** Since ajSeqCountGaps function is time consuming for long sequences,
4444 ** function was modified to return quickly when the external option is true.
4445 ** After release 6.3 this function call hierarchy should be revised.
4446 **
4447 **
4448 ** @param [u] thys [AlignPData] Alignment data object
4449 ** @param [r] seq [const AjPSeq] Input sequence, either a new copy is made
4450 ** or a direct reference is made depending on
4451 ** the external parameter
4452 ** @param [r] iseq [ajint] Index of the sequence
4453 ** @param [r] external [AjBool] Sequence is a pointer to an external
4454 ** object
4455 ** @return [void]
4456 **
4457 ** @release 6.2.0
4458 ******************************************************************************/
4459
alignDataSetSequence(AlignPData thys,const AjPSeq seq,ajint iseq,AjBool external)4460 static void alignDataSetSequence(AlignPData thys, const AjPSeq seq, ajint iseq,
4461 AjBool external)
4462 {
4463 if(external)
4464 {
4465 thys->Seq[iseq] = seq;
4466 return;
4467 }
4468
4469 thys->Start[iseq] = ajSeqGetBegin(seq);
4470 thys->End[iseq] = ajSeqGetEnd(seq);
4471 thys->Len[iseq] = ajSeqGetLen(seq) + ajSeqGetOffset(seq)
4472 + ajSeqGetOffend(seq) - ajSeqCountGaps(seq);
4473 thys->Offset[iseq] = ajSeqGetOffset(seq);
4474 thys->Offend[iseq] = ajSeqGetOffend(seq);
4475 thys->SubOffset[iseq] = 0;
4476 thys->Rev[iseq] = ajSeqIsReversed(seq);
4477
4478 thys->RealSeq[iseq] = ajSeqNewSeq(seq);
4479 ajSeqGapStandard(thys->RealSeq[iseq], '-');
4480 thys->Seq[iseq] = thys->RealSeq[iseq];
4481
4482 return;
4483 }
4484
4485
4486
4487
4488 /* @funcstatic alignDiff ******************************************************
4489 **
4490 ** Blank out identities between two strings
4491 **
4492 ** @param [w] pmark [AjPStr*] Mark string with spaces for identities
4493 ** @param [r] seq [const AjPStr] String (sequence) to compare
4494 ** @param [r] idchar [char] Character for identities
4495 ** @return [void]
4496 **
4497 ** @release 2.1.0
4498 ******************************************************************************/
4499
alignDiff(AjPStr * pmark,const AjPStr seq,char idchar)4500 static void alignDiff(AjPStr* pmark, const AjPStr seq, char idchar)
4501 {
4502 ajuint i;
4503 ajuint ilen;
4504 char c;
4505 char d;
4506
4507 ilen = ajStrGetLen(seq);
4508
4509 ajStrGetuniqueStr(pmark);
4510
4511 if(ajStrGetLen(*pmark) < ilen)
4512 ilen = ajStrGetLen(*pmark);
4513
4514 for(i=0; i < ilen; i++)
4515 {
4516 c = ajStrGetCharPos(*pmark, i);
4517
4518 if(c == ' ')
4519 continue;
4520
4521 if(c == '-')
4522 continue;
4523
4524 d = ajStrGetCharPos(seq, i);
4525
4526 if(d == ' ')
4527 continue;
4528
4529 if(d == '-')
4530 continue;
4531
4532 if(toupper((int)c) == toupper((int)d))
4533 ajStrPasteCountK(pmark, i, idchar, 1);
4534 }
4535
4536 return;
4537 }
4538
4539
4540
4541
4542 /* @funcstatic alignSame ******************************************************
4543 **
4544 ** Blank out differences between two strings
4545 **
4546 ** @param [w] pmark [AjPStr*] Mark string with spaces for differences
4547 ** @param [r] seq [const AjPStr] String (sequence) to compare
4548 ** @param [r] diffchar [char] Character for differences
4549 ** @return [void]
4550 **
4551 ** @release 3.0.0
4552 ******************************************************************************/
4553
alignSame(AjPStr * pmark,const AjPStr seq,char diffchar)4554 static void alignSame(AjPStr* pmark, const AjPStr seq, char diffchar)
4555 {
4556 ajuint i;
4557 ajuint ilen;
4558 char c;
4559 char d;
4560
4561 ilen = ajStrGetLen(seq);
4562
4563 ajStrGetuniqueStr(pmark);
4564
4565 if(ajStrGetLen(*pmark) < ilen)
4566 ilen = ajStrGetLen(*pmark);
4567
4568 for(i=0; i < ilen; i++)
4569 {
4570 c = ajStrGetCharPos(*pmark, i);
4571
4572 if(c == ' ')
4573 continue;
4574
4575 d = ajStrGetCharPos(seq, i);
4576
4577 if(toupper((int)c) == toupper((int)d))
4578 continue;
4579
4580 ajStrPasteCountK(pmark, i, diffchar, 1);
4581 }
4582
4583 return;
4584 }
4585
4586
4587
4588
4589 /* @funcstatic alignSim *******************************************************
4590 **
4591 ** Convert upper case (identical) positions to an identity character,
4592 ** and lower case (similar) positions to a similarity character
4593 **
4594 ** @param [w] pmark [AjPStr*] Mark string with spaces for differences
4595 ** @param [r] idch [const char] Identity character
4596 ** @param [r] simch [const char] Similarity character
4597 ** @param [r] misch [const char] Mismatch character
4598 ** @param [r] gapch [const char] Gap character
4599 ** @return [void]
4600 **
4601 ** @release 2.1.0
4602 ******************************************************************************/
4603
alignSim(AjPStr * pmark,const char idch,const char simch,const char misch,const char gapch)4604 static void alignSim(AjPStr* pmark, const char idch, const char simch,
4605 const char misch, const char gapch)
4606 {
4607 ajint i;
4608 ajint ilen;
4609 char c;
4610
4611 /*ajDebug("alignSim '%S'\n", *pmark);*/
4612
4613 ajStrGetuniqueStr(pmark);
4614
4615 ilen = ajStrGetLen(*pmark);
4616
4617 for(i=0; i < ilen; i++)
4618 {
4619 c = ajStrGetCharPos(*pmark, i);
4620
4621 if(tolower((int)c) == '#')
4622 ajStrPasteCountK(pmark, i, misch, 1);
4623 else if(isupper((int)c))
4624 ajStrPasteCountK(pmark, i, idch, 1);
4625 else if(islower((int)c))
4626 ajStrPasteCountK(pmark, i, simch, 1);
4627 else
4628 ajStrPasteCountK(pmark, i,gapch, 1);
4629 }
4630
4631 /*ajDebug(" result '%S'\n", *pmark);*/
4632
4633 return;
4634 }
4635
4636
4637
4638
4639 /* @funcstatic alignConsStats *************************************************
4640 **
4641 ** Calculates alignment statistics (and a consensus).
4642 **
4643 ** @param [u] thys [AjPAlign] Alignment object. Matrix will be defined
4644 ** if not already set.
4645 ** @param [r] iali [ajint] alignment number
4646 ** @param [w] cons [AjPStr*] the created consensus sequence
4647 ** @param [w] retident [ajint*] number of residues identical in all sequences
4648 ** @param [w] retsim [ajint*] number of residues similar in all sequences
4649 ** @param [w] retgap [ajint*] number of residues with a gap in 1 sequence
4650 ** @param [w] retlen [ajint*] length of the alignment
4651 ** @return [void]
4652 **
4653 ** @release 2.1.0
4654 ******************************************************************************/
4655
alignConsStats(AjPAlign thys,ajint iali,AjPStr * cons,ajint * retident,ajint * retsim,ajint * retgap,ajint * retlen)4656 static void alignConsStats(AjPAlign thys, ajint iali, AjPStr *cons,
4657 ajint* retident, ajint* retsim, ajint* retgap,
4658 ajint* retlen)
4659 {
4660 ajint imat; /* iterate over identical and matching arrays */
4661 ajint iseq; /* iterate over sequences (outer loop) */
4662 ajint jseq; /* iterate over sequences (inner loop) */
4663
4664
4665 ajint **matrix = NULL;
4666 float **fmatrix = NULL;
4667
4668 ajint m1 = 0;
4669 ajint m2 = 0;
4670
4671 ajint matsize;
4672 ajint matchingmaxindex;
4673 ajint identicalmaxindex;
4674 ajint nseqs;
4675 ajint mlen;
4676
4677 float max;
4678 float contri = 0;
4679 float contrj = 0;
4680
4681 AjPSeqCvt cvt = 0;
4682
4683 AjPFloat posScore = NULL; /* cumulative similarity scores by sequence */
4684 /* for matching all other sequences */
4685 float *identical; /* cum. weight for each valid character */
4686 float *matching; /* cum. weight for matching this character */
4687 ajint highindex; /* position of highest score in posScore */
4688 ajint kkpos; /* alignment position loop variable */
4689 ajint kipos; /* alignment position kkpos + iseq start */
4690 ajint kjpos; /* alignment position kkpos + jseq start */
4691 ajint khpos; /* alignment position in highindex */
4692
4693 float himatch = 0.0; /* highest match score (often used) */
4694
4695 const char **seqcharptr;
4696 char res;
4697 char nocon;
4698 char gapch;
4699 float fplural;
4700 float fplurality = 51.0;
4701 float ident;
4702 AjBool isident;
4703 AjBool issim;
4704 AjBool isgap;
4705 const AjPSeq* seqs;
4706 ajint numres; /* number of residues (not spaces) */
4707 AlignPData data = NULL;
4708 void *freeptr;
4709
4710
4711 data = alignData(thys, iali);
4712
4713 if(!thys->IMatrix && !thys->FMatrix)
4714 {
4715 if(ajSeqIsNuc(alignSeq(thys, 0, iali)))
4716 ajStrAssignC(&thys->Matrix, "EDNAFULL");
4717 else
4718 ajStrAssignC(&thys->Matrix, "EBLOSUM62");
4719
4720 thys->IMatrix = ajMatrixNewFile(thys->Matrix);
4721 }
4722
4723 *retident=0;
4724 *retsim=0;
4725 *retgap=0;
4726
4727 seqs = alignSeqs(thys, iali);
4728 nseqs = thys->Nseqs;
4729 mlen = alignLen(thys, iali);
4730 fplural = alignTotweight(thys, 0) * fplurality / (float)100.;
4731 ident = alignTotweight(thys, 0);
4732
4733 ajDebug("alignConsStats ali:%d mlen:%d\n", iali, mlen);
4734
4735 /* ajDebug("fplural:%.1f ident:%.1f mlen: %d\n",
4736 fplural, ident, mlen); */
4737
4738 if(thys->IMatrix)
4739 {
4740 matrix = ajMatrixGetMatrix(thys->IMatrix);
4741 cvt = ajMatrixGetCvt(thys->IMatrix); /* return conversion table */
4742 matsize = ajMatrixGetSize(thys->IMatrix);
4743 }
4744 else
4745 {
4746 fmatrix = ajMatrixfGetMatrix(thys->FMatrix);
4747 cvt = ajMatrixfGetCvt(thys->FMatrix); /* return conversion table */
4748 matsize = ajMatrixfGetSize(thys->FMatrix);
4749 }
4750
4751 AJCNEW(seqcharptr,nseqs);
4752 AJCNEW(identical,matsize);
4753 AJCNEW(matching,matsize);
4754
4755 posScore = ajFloatNew();
4756
4757 gapch = '-';
4758 nocon = '#';
4759
4760 for(iseq=0;iseq<nseqs;iseq++) /* get sequence as string */
4761 seqcharptr[iseq] = ajSeqGetSeqC(alignSeq(thys, iseq, iali));
4762
4763 /* For each position in the alignment, calculate consensus character */
4764
4765 for(kkpos=0; kkpos< mlen; kkpos++)
4766 {
4767 res = gapch;
4768
4769 isident = ajFalse;
4770 issim = ajFalse;
4771 isgap = ajFalse;
4772
4773 /*
4774 ** reset identities and +ve matches
4775 */
4776
4777 for(imat=0;imat<matsize;imat++)
4778 {
4779 identical[imat] = 0.0; /* weights of all sequence chars in col. */
4780 matching[imat] = 0.0;
4781 }
4782
4783 /*
4784 ** reset the posScore array
4785 */
4786
4787 for(iseq=0;iseq<nseqs;iseq++)
4788 ajFloatPut(&posScore,iseq,0.);
4789
4790 /*
4791 ** generate scores (identical, posScore) for columns
4792 */
4793
4794 for(iseq=0;iseq<nseqs;iseq++)
4795 {
4796 kipos = kkpos + data->SubOffset[iseq];
4797 m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[iseq][kipos]);
4798
4799 if(m1 < 0)
4800 {
4801 ajUser("Bad character for matrix iseq:%d kipos:%d %x\n",
4802 iseq, kipos, (int) seqcharptr[iseq][kipos]);
4803 m1 = 0;
4804 }
4805
4806 if(m1)
4807 identical[m1] += seqs[iseq]->Weight;
4808
4809 for(jseq=iseq+1;jseq<nseqs;jseq++)
4810 {
4811 kjpos = kkpos + data->SubOffset[jseq];
4812 m2 = ajSeqcvtGetCodeK(cvt,seqcharptr[jseq][kjpos]);
4813
4814 if(m2 < 0)
4815 {
4816 ajDebug("Bad character for matrix jseq:%d kjpos:%d %x\n",
4817 jseq, kjpos, (int) seqcharptr[jseq][kjpos]);
4818 m2 = 0;
4819 }
4820
4821 if(m1 && m2)
4822 {
4823 if(matrix)
4824 {
4825 contri = (float)matrix[m1][m2]*seqs[jseq]->Weight
4826 +ajFloatGet(posScore,iseq);
4827 contrj = (float)matrix[m1][m2]*seqs[iseq]->Weight
4828 +ajFloatGet(posScore,jseq);
4829 }
4830 else
4831 {
4832 contri = fmatrix[m1][m2]*seqs[jseq]->Weight
4833 +ajFloatGet(posScore,iseq);
4834 contrj = fmatrix[m1][m2]*seqs[iseq]->Weight
4835 +ajFloatGet(posScore,jseq);
4836 }
4837
4838 ajFloatPut(&posScore,iseq,contri);
4839 ajFloatPut(&posScore,jseq,contrj);
4840 }
4841 }
4842 }
4843
4844 /*
4845 ** highindex is the highest scoring position (seq no.) in posScore
4846 ** for 2 sequences this appears to be usually 0
4847 */
4848
4849 highindex = -1;
4850 max = -FLT_MAX;
4851 numres = 0;
4852 for(iseq=0;iseq<nseqs;iseq++)
4853 {
4854 kipos = kkpos + data->SubOffset[iseq];
4855
4856 if(seqcharptr[iseq][kipos] != ' ' &&
4857 seqcharptr[iseq][kipos] != '-')
4858 numres++;
4859
4860 if(ajFloatGet(posScore,iseq) > max)
4861 {
4862 highindex = iseq;
4863 max = ajFloatGet(posScore,iseq);
4864 }
4865 }
4866
4867 /* highindex is now set */
4868
4869 /*
4870 ** find +ve matches in the column
4871 ** m1 is non-zero for a valid character in iseq
4872 ** m2 is non-zero for a valid character in jseq
4873 */
4874
4875 for(iseq=0;iseq<nseqs;iseq++)
4876 {
4877 kipos = kkpos + data->SubOffset[iseq];
4878 m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[iseq][kipos]);
4879
4880 if(m1 < 0)
4881 {
4882 ajDebug("Bad character for matrix iseq:%d kipos:%d %x\n",
4883 iseq, kipos, (int) seqcharptr[iseq][kipos]);
4884 m1 = 0;
4885 }
4886
4887 if(E_FPZERO(matching[m1],U_FEPS))
4888 { /* first time we have met this character */
4889 for(jseq=0;jseq<nseqs;jseq++) /* all (other) sequences */
4890 {
4891 kjpos = kkpos + data->SubOffset[jseq];
4892 m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[jseq][kjpos]);
4893
4894 if(m2 < 0)
4895 {
4896 ajDebug("Bad character for matrix jseq:%d kjpos:%d "
4897 "%x\n",
4898 jseq, kjpos, (int) seqcharptr[jseq][kjpos]);
4899 m2 = 0;
4900 }
4901
4902 if(matrix)
4903 {
4904 /* 'matching' if positive */
4905 if(m1 && m2 && matrix[m1][m2] > 0)
4906 matching[m1] += seqs[jseq]->Weight;
4907 }
4908 else
4909 if(m1 && m2 && fmatrix[m1][m2] > 0.0)
4910 matching[m1] += seqs[jseq]->Weight;
4911
4912
4913 /*
4914 if( iseq != jseq)
4915 /# skip the sequence we are on #/
4916 {
4917 m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[jseq][kjpos]);
4918 if(matrix)
4919 {
4920 if(m1 && m2 && matrix[m1][m2] > 0)
4921 /# 'matching' if positive #/
4922 {
4923 matching[m1] += seqs[jseq]->Weight;
4924 }
4925 }
4926 else
4927 {
4928 if(m1 && m2 && fmatrix[m1][m2] > 0.0)
4929 {
4930 matching[m1] += seqs[jseq]->Weight;
4931 }
4932 }
4933 }
4934 */
4935 }
4936 }
4937 }
4938
4939 matchingmaxindex = 0; /* get max matching and identical */
4940 identicalmaxindex = 0;
4941
4942 for(iseq=0;iseq<nseqs;iseq++)
4943 {
4944 kipos = kkpos + data->SubOffset[iseq];
4945 m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[iseq][kipos]);
4946
4947 if(identical[m1] > identical[identicalmaxindex])
4948 identicalmaxindex= m1;
4949 }
4950
4951 for(iseq=0;iseq<nseqs;iseq++)
4952 {
4953 kipos = kkpos + data->SubOffset[iseq];
4954 m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[iseq][kipos]);
4955
4956 if(matching[m1] > matching[matchingmaxindex])
4957 matchingmaxindex= m1;
4958 else if(E_FPEQ(matching[m1],matching[matchingmaxindex],U_FEPS))
4959 {
4960 if(identical[m1] > identical[matchingmaxindex])
4961 matchingmaxindex= m1;
4962 }
4963
4964 if(seqcharptr[iseq][kipos] == '-' ||
4965 seqcharptr[iseq][kipos] == ' ')
4966 isgap=ajTrue;
4967 }
4968
4969 /*
4970 ajDebug("index[%d] ident:%d matching:%d high:%d\n",
4971 kpos,
4972 identicalmaxindex,
4973 matchingmaxindex, highindex);
4974 */
4975 khpos = kkpos + data->SubOffset[highindex];
4976 himatch = matching[ajSeqcvtGetCodeK(cvt,seqcharptr[highindex][khpos])];
4977
4978 if(thys->IMatrix)
4979 {
4980 /* debugstr1 = ajMatrixGetLabelNum(thys->IMatrix, identicalmaxindex-1);
4981 debugstr2 = ajMatrixGetLabelNum(thys->IMatrix, matchingmaxindex-1);
4982 */
4983
4984 /*ajDebug("index[%d] ident:%d '%S' %.1f matching:%d '%S' %.1f %.1f "
4985 "high:%d '%c' %.1f\n",
4986 kkpos,
4987 identicalmaxindex,
4988 debugstr1,
4989 identical[identicalmaxindex],
4990 matchingmaxindex,
4991 debugstr2,
4992 matching[matchingmaxindex],
4993 himatch,
4994 highindex, seqcharptr[highindex][khpos],
4995 seqs[highindex]->Weight); */
4996 }
4997 else
4998 {
4999 /*debugstr1 = ajMatrixfGetLabelNum(thys->FMatrix,
5000 identicalmaxindex-1);
5001 debugstr2 = ajMatrixfGetLabelNum(thys->FMatrix,
5002 matchingmaxindex-1);*/
5003 /* ajDebug("index[%d] ident:%d '%S' %.1f matching:%d '%S' %.1f "
5004 "%.1f "
5005 "high:%d '%c' %.1f\n",
5006 kkpos,
5007 identicalmaxindex,
5008 debugstr1,
5009 identical[identicalmaxindex],
5010 matchingmaxindex,
5011 debugstr2,
5012 matching[matchingmaxindex],
5013 himatch,
5014 highindex, seqcharptr[highindex][khpos],
5015 seqs[highindex]->Weight); */
5016 }
5017
5018 if(identical[identicalmaxindex] >= ident)
5019 isident=ajTrue;
5020
5021 if(matching[matchingmaxindex] >= fplural)
5022 issim=ajTrue;
5023
5024 /* plurality check */
5025 if(himatch >= fplural)
5026 if(seqcharptr[highindex][khpos] != '-')
5027 res = toupper((int)seqcharptr[highindex][khpos]);
5028
5029
5030 if(E_FPEQ(himatch,seqs[highindex]->Weight,U_FEPS))
5031 {
5032 if(numres > 1)
5033 res = nocon;
5034 else
5035 res = gapch;
5036 }
5037
5038 if(issim && ! isident)
5039 res = tolower((int)res);
5040
5041 ajStrAppendK(cons,res);
5042
5043 if(isident)
5044 ++*retident;
5045
5046 if(issim)
5047 ++*retsim;
5048
5049 if(isgap)
5050 ++*retgap;
5051
5052 /* ajDebug("id:%b sim:%b gap:%b res:%c '",
5053 isident, issim, isgap, res); */
5054 for(iseq=0; iseq<nseqs; iseq++)
5055 {
5056 kipos = kkpos + data->SubOffset[iseq];
5057 /* ajDebug("%c", seqcharptr[iseq][kipos]); */
5058 }
5059 /* ajDebug("\n"); */
5060 }
5061
5062 *retlen = alignLen(thys, iali);
5063
5064 /* ajDebug("ret ident:%d sim:%d gap:%d len:%d\n",
5065 *retident, *retsim, *retgap, *retlen); */
5066
5067 freeptr = (void *) seqcharptr;
5068 AJFREE(freeptr);
5069
5070 AJFREE(matching);
5071 AJFREE(identical);
5072 ajFloatDel(&posScore);
5073
5074 return;
5075 }
5076
5077
5078
5079
5080 /* @funcstatic alignTotweight *************************************************
5081 **
5082 ** Returns the total weight for all sequences in an alignment.
5083 **
5084 ** @param [r] thys [const AjPAlign] Alignment object
5085 ** @param [r] iali [ajint] Alignment number
5086 ** @return [float] Total weight for all sequences
5087 **
5088 ** @release 2.1.0
5089 ******************************************************************************/
5090
alignTotweight(const AjPAlign thys,ajint iali)5091 static float alignTotweight(const AjPAlign thys, ajint iali)
5092 {
5093 ajint i;
5094 const AjPSeq seq = NULL;
5095 float ret = 0.0;
5096
5097 for(i=0; i < thys->Nseqs; i++)
5098 {
5099 seq = alignSeq(thys, i, iali);
5100 ret += seq->Weight;
5101 }
5102
5103 return ret;
5104 }
5105
5106
5107
5108
5109 /* @func ajAlignTraceT ********************************************************
5110 **
5111 ** Reports an AjPAlign object to debug output
5112 **
5113 ** @param [r] thys [const AjPAlign] alignment object
5114 ** @param [r] title [const char*] Trace report title
5115 ** @return [void]
5116 **
5117 ** @release 3.0.0
5118 ******************************************************************************/
5119
ajAlignTraceT(const AjPAlign thys,const char * title)5120 void ajAlignTraceT(const AjPAlign thys, const char* title)
5121 {
5122 ajDebug("%s\n",title);
5123 ajAlignTrace(thys);
5124
5125 return;
5126 }
5127
5128
5129
5130
5131 /* @func ajAlignTrace *********************************************************
5132 **
5133 ** Reports an AjPAlign object to debug output
5134 **
5135 ** @param [r] thys [const AjPAlign] alignment object
5136 ** @return [void]
5137 **
5138 ** @release 2.1.0
5139 ******************************************************************************/
5140
ajAlignTrace(const AjPAlign thys)5141 void ajAlignTrace(const AjPAlign thys)
5142 {
5143 ajDebug("AjAlignTrace\n");
5144 ajDebug("============\n");
5145 ajDebug("Type: '%S'\n", thys->Type);
5146 ajDebug("Formatstr: '%S'\n", thys->Formatstr);
5147 ajDebug("Format: %d\n", thys->Format);
5148 ajDebug("File: '%F'\n", thys->File);
5149 ajDebug("Header: '%S'\n", thys->Header);
5150 ajDebug("SubHeader: '%S'\n", thys->SubHeader);
5151 ajDebug("Tail: '%S'\n", thys->Tail);
5152 ajDebug("SubTail: '%S'\n", thys->SubTail);
5153 ajDebug("Showusa: %B\n", thys->Showusa);
5154 ajDebug("Multi: %B\n", thys->Multi);
5155 ajDebug("Global: %B\n", thys->Global);
5156
5157 alignTraceData(thys);
5158
5159 ajDebug("Nseqs: %d\n", thys->Nseqs);
5160 ajDebug("WidthNmin: %d\n", thys->Nmin);
5161 ajDebug("Nmax: %d\n", thys->Nmax);
5162 ajDebug("Width: %d\n", thys->Width);
5163 ajDebug("Count: %d\n", thys->Count);
5164
5165 ajDebug("IMatrix: %x\n", thys->IMatrix);
5166 ajDebug("FMatrix: %x\n", thys->FMatrix);
5167 ajDebug("Matrix: '%S'\n", thys->Matrix);
5168 ajDebug("GapPen: '%S'\n", thys->GapPen);
5169 ajDebug("ExtPen: '%S'\n", thys->ExtPen);
5170 ajDebug("SeqOnly: %B\n", thys->SeqOnly);
5171 ajDebug("SeqExternal: %B\n", thys->SeqExternal);
5172
5173 return;
5174 }
5175
5176
5177
5178
5179 /* @funcstatic alignTraceData *************************************************
5180 **
5181 ** Report alignment internal list data to debug output
5182 **
5183 ** @param [r] thys [const AjPAlign] Alignment object
5184 ** @return [void]
5185 **
5186 ** @release 2.1.0
5187 ******************************************************************************/
5188
alignTraceData(const AjPAlign thys)5189 static void alignTraceData(const AjPAlign thys)
5190 {
5191 AlignPData* pdata = NULL;
5192 AlignPData data = NULL;
5193 ajint nali;
5194 ajint iali;
5195 ajint i;
5196 ajint nseq;
5197 AjPStr tmpstr = NULL;
5198
5199 nseq = thys->Nseqs;
5200
5201 nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
5202 ajDebug("Data list length: %d\n", nali);
5203
5204 if(!nali)
5205 return;
5206
5207 for(iali=0; iali<nali; iali++)
5208 {
5209 data = pdata[iali];
5210
5211 ajDebug("%d LenAli: %d\n", iali, data->LenAli);
5212 ajDebug("%d NumId: %d\n", iali, data->NumId);
5213 ajDebug("%d NumSim: %d\n", iali, data->NumSim);
5214 ajDebug("%d NumGap: %d\n", iali, data->NumGap);
5215 ajDebug("%d Score: '%S'\n", iali, data->Score);
5216
5217 ajDebug("%d Start: {", iali);
5218
5219 for(i=0; i < nseq; i++)
5220 ajDebug(" %d", data->Start[i]);
5221
5222 ajDebug(" }\n");
5223
5224 ajDebug("%d End: {", iali);
5225
5226 for(i=0; i < nseq; i++)
5227 ajDebug(" %d", data->End[i]);
5228
5229 ajDebug(" }\n");
5230
5231 ajDebug("%d Len: {", iali);
5232
5233 for(i=0; i < nseq; i++)
5234 ajDebug(" %d", data->Len[i]);
5235
5236 ajDebug(" }\n");
5237
5238 ajDebug("%d Offset: {", iali);
5239
5240 for(i=0; i < nseq; i++)
5241 ajDebug(" %d", data->Offset[i]);
5242
5243 ajDebug(" }\n");
5244
5245 ajDebug("%d SubOffset: {", iali);
5246
5247 for(i=0; i < nseq; i++)
5248 ajDebug(" %d", data->SubOffset[i]);
5249
5250 ajDebug(" }\n");
5251
5252 ajDebug("%d Rev: {", iali);
5253
5254 for(i=0; i < nseq; i++)
5255 ajDebug(" %b", data->Rev[i]);
5256
5257 ajDebug(" }\n");
5258
5259 ajDebug("%d Seq: {\n", iali);
5260
5261 for(i=0; i < nseq; i++)
5262 if(ajSeqGetLen(data->Seq[i]) > 40)
5263 {
5264 ajStrAssignSubS(&tmpstr, ajSeqGetSeqS(data->Seq[i]), -20, -1);
5265 ajDebug("%6d [%d] '%20.20S...%20S'\n",
5266 ajSeqGetLen(data->Seq[i]), i,
5267 ajSeqGetSeqS(data->Seq[i]),
5268 tmpstr);
5269 }
5270 else
5271 ajDebug(" %d '%S'\n", i, ajSeqGetSeqS(data->Seq[i]));
5272
5273 ajDebug(" }\n");
5274 }
5275 AJFREE(pdata);
5276 ajStrDel(&tmpstr);
5277
5278 return;
5279 }
5280
5281
5282
5283
5284 /* @func ajAlignPrintFormat ***************************************************
5285 **
5286 ** Reports the internal data structures
5287 **
5288 ** @param [u] outf [AjPFile] Output file
5289 ** @param [r] full [AjBool] Full report (usually ajFalse)
5290 ** @return [void]
5291 **
5292 ** @release 2.5.0
5293 ** @@
5294 ******************************************************************************/
5295
ajAlignPrintFormat(AjPFile outf,AjBool full)5296 void ajAlignPrintFormat(AjPFile outf, AjBool full)
5297 {
5298 ajint i = 0;
5299
5300 ajFmtPrintF(outf, "\n");
5301 ajFmtPrintF(outf, "# Alignment output formats\n");
5302 ajFmtPrintF(outf, "# Name Format name (or alias)\n");
5303 ajFmtPrintF(outf, "# Minseq Minimum number of sequences\n");
5304 ajFmtPrintF(outf, "# Maxseq Minimum number of sequences\n");
5305 ajFmtPrintF(outf, "# Nuc Valid for nucleotide sequences\n");
5306 ajFmtPrintF(outf, "# Pro Valid for protein sequences\n");
5307 ajFmtPrintF(outf, "# Header Include standard header/footer blocks\n");
5308 ajFmtPrintF(outf, "# Desc Format description\n");
5309 ajFmtPrintF(outf, "# Name Alias Nuc Nuc Pro Minseq Maxseq "
5310 "Description\n");
5311 ajFmtPrintF(outf, "\n");
5312 ajFmtPrintF(outf, "AFormat {\n");
5313
5314 for(i=0; alignFormat[i].Name; i++)
5315 {
5316 if(full || !alignFormat[i].Alias)
5317 ajFmtPrintF(outf, " %-12s %5B %3B %3B %3B %6d %6d \"%s\"\n",
5318 alignFormat[i].Name,
5319 alignFormat[i].Alias,
5320 alignFormat[i].Nucleotide,
5321 alignFormat[i].Protein,
5322 alignFormat[i].Showheader,
5323 alignFormat[i].Minseq,
5324 alignFormat[i].Maxseq,
5325 alignFormat[i].Desc);
5326 }
5327
5328 ajFmtPrintF(outf, "}\n\n");
5329
5330 return;
5331 }
5332
5333
5334
5335
5336 /* @func ajAlignPrintbookFormat ***********************************************
5337 **
5338 ** Reports the alignment format internals as Docbook text
5339 **
5340 ** @param [u] outf [AjPFile] Output file
5341 ** @return [void]
5342 **
5343 ** @release 6.2.0
5344 ** @@
5345 ******************************************************************************/
5346
ajAlignPrintbookFormat(AjPFile outf)5347 void ajAlignPrintbookFormat(AjPFile outf)
5348 {
5349 ajint i = 0;
5350 ajint j = 0;
5351 AjPStr namestr = NULL;
5352 AjPList fmtlist;
5353 AjPStr* names;
5354
5355 fmtlist = ajListstrNew();
5356
5357 ajFmtPrintF(outf, "<para>The supported alignment formats are summarised "
5358 "in the table below. The columns are as follows: "
5359 "<emphasis>Output format</emphasis> (format name), "
5360 "<emphasis>Nuc</emphasis> (\"true\" indicates nucleotide "
5361 "sequence data may be represented), "
5362 "<emphasis>Pro</emphasis> (\"true\" indicates protein "
5363 "sequence data may be represented, "
5364 "<emphasis>Header</emphasis> (whether the standard EMBOSS "
5365 "alignment header is included), "
5366 "<emphasis>Minseq</emphasis> "
5367 "(minimum sequences in alignment), "
5368 "<emphasis>Maxseq</emphasis> "
5369 "(maximum sequences in alignment) and "
5370 "<emphasis>Description</emphasis> (short description of "
5371 "the format).</para> \n\n");
5372
5373 ajFmtPrintF(outf, "<table frame=\"box\" rules=\"cols\">\n");
5374 ajFmtPrintF(outf, " <caption>Alignment formats</caption>\n");
5375 ajFmtPrintF(outf, " <thead>\n");
5376 ajFmtPrintF(outf, " <tr align=\"center\">\n");
5377 ajFmtPrintF(outf, " <th>Output Format</th>\n");
5378 ajFmtPrintF(outf, " <th>Nuc</th>\n");
5379 ajFmtPrintF(outf, " <th>Pro</th>\n");
5380 ajFmtPrintF(outf, " <th>Header</th>\n");
5381 ajFmtPrintF(outf, " <th>Minseq</th>\n");
5382 ajFmtPrintF(outf, " <th>Maxseq</th>\n");
5383 ajFmtPrintF(outf, " <th>Description</th>\n");
5384 ajFmtPrintF(outf, " </tr>\n");
5385 ajFmtPrintF(outf, " </thead>\n");
5386 ajFmtPrintF(outf, " <tbody>\n");
5387
5388 for(i=1; alignFormat[i].Name; i++)
5389 {
5390 if(!alignFormat[i].Alias)
5391 {
5392 namestr = ajStrNewC(alignFormat[i].Name);
5393 ajListPush(fmtlist, namestr);
5394 namestr = NULL;
5395 }
5396 }
5397
5398 ajListSort(fmtlist, &ajStrVcmp);
5399 ajListstrToarray(fmtlist, &names);
5400
5401 for(i=0; names[i]; i++)
5402 {
5403 for(j=0; alignFormat[j].Name; j++)
5404 {
5405 if(ajStrMatchC(names[i],alignFormat[j].Name))
5406 {
5407 ajFmtPrintF(outf, " <tr>\n");
5408 ajFmtPrintF(outf, " <td>%s</td>\n",
5409 alignFormat[j].Name);
5410 ajFmtPrintF(outf, " <td>%B</td>\n",
5411 alignFormat[j].Nucleotide);
5412 ajFmtPrintF(outf, " <td>%B</td>\n",
5413 alignFormat[j].Protein);
5414 ajFmtPrintF(outf, " <td>%B</td>\n",
5415 alignFormat[j].Showheader);
5416 ajFmtPrintF(outf, " <td>%d</td>\n",
5417 alignFormat[j].Minseq);
5418 ajFmtPrintF(outf, " <td>%d</td>\n",
5419 alignFormat[j].Maxseq);
5420 ajFmtPrintF(outf, " <td>%s</td>\n",
5421 alignFormat[j].Desc);
5422 ajFmtPrintF(outf, " </tr>\n");
5423 }
5424 }
5425 }
5426
5427 ajFmtPrintF(outf, " </tbody>\n");
5428 ajFmtPrintF(outf, "</table>\n");
5429 ajStrDel(&namestr);
5430
5431 names = NULL;
5432 ajListstrFreeData(&fmtlist);
5433
5434 return;
5435 }
5436
5437
5438
5439
5440 /* @func ajAlignPrinthtmlFormat ***********************************************
5441 **
5442 ** Reports the internal data structures
5443 **
5444 ** @param [u] outf [AjPFile] Output file
5445 ** @return [void]
5446 **
5447 ** @release 6.4.0
5448 ** @@
5449 ******************************************************************************/
5450
ajAlignPrinthtmlFormat(AjPFile outf)5451 void ajAlignPrinthtmlFormat(AjPFile outf)
5452 {
5453 ajint i = 0;
5454
5455 ajFmtPrintF(outf, "<table border=3>");
5456 ajFmtPrintF(outf, "<tr><th>Alignment Format</th><th>Alias</th>\n");
5457 ajFmtPrintF(outf, "<th>Nuc</th><th>Pro</th><th>Showheader</th>\n");
5458 ajFmtPrintF(outf, "<th>Minseq</th><th>Maxseq</th>\n");
5459 ajFmtPrintF(outf, "<th>Description</th></tr>\n");
5460 ajFmtPrintF(outf, "\n");
5461 ajFmtPrintF(outf, "# alignment output formats\n");
5462 ajFmtPrintF(outf, "# Name Format name (or alias)\n");
5463 ajFmtPrintF(outf, "# Minseq Minimum number of sequences\n");
5464 ajFmtPrintF(outf, "# Maxseq Minimum number of sequences\n");
5465 ajFmtPrintF(outf, "# Nuc Valid for nucleotide sequences\n");
5466 ajFmtPrintF(outf, "# Pro Valid for protein sequences\n");
5467 ajFmtPrintF(outf, "# Header Include standard header/footer blocks\n");
5468 ajFmtPrintF(outf, "# Desc Format description\n");
5469 ajFmtPrintF(outf, "# Name Alias Nuc Nuc Pro Minseq Maxseq "
5470 "Description\n");
5471 ajFmtPrintF(outf, "\n");
5472 ajFmtPrintF(outf, "AFormat {\n");
5473
5474 for(i=0; alignFormat[i].Name; i++)
5475 {
5476 if(!alignFormat[i].Alias)
5477 ajFmtPrintF(outf, "<tr><td>\n%-12s\n</td><td>%5B</td><td>%3B</td>"
5478 "<td>%3B</td><td>%3B</td><td>%6d</td><td>%6d</td>"
5479 "<td>\"%s\"</td></tr>\n",
5480 alignFormat[i].Name,
5481 alignFormat[i].Alias,
5482 alignFormat[i].Nucleotide,
5483 alignFormat[i].Protein,
5484 alignFormat[i].Showheader,
5485 alignFormat[i].Minseq,
5486 alignFormat[i].Maxseq,
5487 alignFormat[i].Desc);
5488 }
5489
5490 ajFmtPrintF(outf, "}\n\n");
5491
5492 return;
5493 }
5494
5495
5496
5497
5498 /* @func ajAlignPrintwikiFormat ***********************************************
5499 **
5500 ** Reports the alignment format internals as wikitext
5501 **
5502 ** @param [u] outf [AjPFile] Output file
5503 ** @return [void]
5504 **
5505 ** @release 6.2.0
5506 ** @@
5507 ******************************************************************************/
5508
ajAlignPrintwikiFormat(AjPFile outf)5509 void ajAlignPrintwikiFormat(AjPFile outf)
5510 {
5511 ajint i = 0;
5512 ajint j = 0;
5513 AjPStr namestr = NULL;
5514
5515 ajFmtPrintF(outf, "{| class=\"wikitable sortable\" border=\"2\"\n");
5516 ajFmtPrintF(outf, "|-\n");
5517 ajFmtPrintF(outf, "!Format!!Nuc!!Pro!!Header!!Min||Max!!"
5518 "class=\"unsortable\"|Description\n");
5519
5520 for(i=1; alignFormat[i].Name; i++)
5521 {
5522 if(!alignFormat[i].Alias)
5523 {
5524 ajFmtPrintF(outf, "|-\n");
5525 ajStrAssignC(&namestr, alignFormat[i].Name);
5526
5527
5528 for(j=i+1; alignFormat[j].Name; j++)
5529 {
5530 if(alignFormat[j].Write == alignFormat[i].Write)
5531 {
5532 ajFmtPrintAppS(&namestr, "<br>%s", alignFormat[j].Name);
5533 if(!alignFormat[j].Alias)
5534 {
5535 ajWarn("Align output format '%s' same as '%s' "
5536 "but not alias",
5537 alignFormat[j].Name,
5538 alignFormat[i].Name);
5539 }
5540 }
5541 }
5542 ajFmtPrintF(outf, "|%S||%B||%B||%B||%d||%d||%s\n",
5543 namestr,
5544 alignFormat[i].Nucleotide,
5545 alignFormat[i].Protein,
5546 alignFormat[i].Showheader,
5547 alignFormat[i].Minseq,
5548 alignFormat[i].Maxseq,
5549 alignFormat[i].Desc);
5550 }
5551 }
5552
5553 ajFmtPrintF(outf, "|}\n");
5554
5555 ajStrDel(&namestr);
5556
5557 return;
5558 }
5559
5560
5561
5562
5563 /* @funcstatic alignConsSet ***************************************************
5564 **
5565 ** Sets consensus to be a copy of a numbered sequence
5566 **
5567 ** @param [r] thys [const AjPAlign] Alignment object
5568 ** @param [r] iali [ajint] alignment number
5569 ** @param [r] numseq [ajint] Sequence number to use as the consensus
5570 ** @param [w] cons [AjPStr*] the created consensus sequence
5571 ** @return [AjBool] True on success
5572 **
5573 ** @release 5.0.0
5574 ******************************************************************************/
5575
alignConsSet(const AjPAlign thys,ajint iali,ajint numseq,AjPStr * cons)5576 static AjBool alignConsSet(const AjPAlign thys, ajint iali,
5577 ajint numseq, AjPStr *cons)
5578 {
5579 ajint nseqs;
5580 ajint mlen;
5581
5582 ajint kkpos; /* alignment position loop variable */
5583
5584 const char *seqcharptr;
5585 char gapch;
5586 AlignPData data = NULL;
5587 const AjPSeq* seqs;
5588 const AjPSeq seq;
5589
5590 data = alignData(thys, iali);
5591 seqs = alignSeqs(thys, iali);
5592
5593 nseqs = thys->Nseqs;
5594 mlen = data->LenAli;
5595
5596 ajDebug("alignConsSet iali:%d numseq:%d nseqs:%d mlen:%d\n",
5597 iali, numseq, nseqs, mlen);
5598
5599 gapch = '-';
5600
5601 seq = seqs[numseq];
5602 seqcharptr = ajSeqGetSeqC(seq);
5603
5604 ajStrAssignClear(cons);
5605
5606 /* For each position in the alignment, calculate consensus character */
5607
5608 for(kkpos=0; kkpos<data->SubOffset[0]; kkpos++)
5609 ajStrAppendK(cons, gapch);
5610
5611 for(kkpos=0; kkpos< mlen; kkpos++)
5612 ajStrAppendK(cons,seqcharptr[kkpos+data->SubOffset[numseq]]);
5613
5614 /* ajDebug("ret ident:%d sim:%d gap:%d len:%d\n",
5615 *retident, *retsim, *retgap, *retlen); */
5616
5617 return ajTrue;
5618 }
5619
5620
5621
5622
5623 /* @func ajAlignConsAmbig *****************************************************
5624 **
5625 ** Calculates alignment consensus of ambiguity characters from a sequence set.
5626 **
5627 ** @param [r] thys [const AjPSeqset] Sequence set.
5628 ** @param [w] cons [AjPStr*] the created consensus sequence
5629 ** @return [AjBool] ajTrue on success
5630 **
5631 ** @release 6.0.0
5632 ******************************************************************************/
5633
ajAlignConsAmbig(const AjPSeqset thys,AjPStr * cons)5634 AjBool ajAlignConsAmbig(const AjPSeqset thys, AjPStr *cons)
5635 {
5636 if(ajSeqsetIsNuc(thys))
5637 return ajAlignConsAmbigNuc(thys, cons);
5638
5639 return ajAlignConsAmbigProt(thys, cons);
5640 }
5641
5642
5643
5644
5645 /* @func ajAlignConsAmbigNuc **************************************************
5646 **
5647 ** Calculates alignment consensus of ambiguity characters from a sequence set.
5648 **
5649 ** @param [r] thys [const AjPSeqset] Sequence set.
5650 ** @param [w] cons [AjPStr*] the created consensus sequence
5651 ** @return [AjBool] ajTrue on success
5652 **
5653 ** @release 6.0.0
5654 ******************************************************************************/
5655
ajAlignConsAmbigNuc(const AjPSeqset thys,AjPStr * cons)5656 AjBool ajAlignConsAmbigNuc(const AjPSeqset thys, AjPStr *cons)
5657 {
5658 ajint iseq; /* iterate over sequences (outer loop) */
5659
5660 ajint nseqs;
5661 ajint mlen;
5662 AjBool isgap;
5663 ajint kipos; /* alignment position */
5664
5665 const char **seqcharptr;
5666 char res;
5667 ajuint binres;
5668
5669 void *freeptr = NULL;
5670
5671 nseqs = thys->Size;
5672 mlen = thys->Len;
5673
5674 AJCNEW(seqcharptr,nseqs);
5675
5676 /* ajDebug("fplural:%.2f ident:%.1f mlen: %d\n",
5677 fplural, ident, mlen); */
5678
5679 for(iseq=0;iseq<nseqs;iseq++) /* get sequence as string */
5680 seqcharptr[iseq] = ajSeqsetGetseqSeqC(thys, iseq);
5681
5682 /* For each position in the alignment, calculate consensus character */
5683
5684 for(kipos=0; kipos< mlen; kipos++)
5685 {
5686 binres = 0;
5687 isgap = ajFalse;
5688
5689 for(iseq=0;iseq<nseqs;iseq++)
5690 {
5691 if(seqcharptr[iseq][kipos] == ' ' ||
5692 seqcharptr[iseq][kipos] == '-')
5693 isgap = ajTrue;
5694
5695 binres |= ajBaseAlphaToBin(seqcharptr[iseq][kipos]);
5696 }
5697
5698 res = ajBaseBinToAlpha(binres);
5699
5700 if(isgap)
5701 res = (char)tolower((int)res);
5702
5703 ajStrAppendK(cons,res);
5704 }
5705
5706 freeptr = (void *) seqcharptr;
5707 AJFREE(freeptr);
5708
5709 return ajTrue;
5710 }
5711
5712
5713
5714
5715 /* @func ajAlignConsAmbigProt *************************************************
5716 **
5717 ** Calculates alignment consensus of ambiguity characters from a sequence set.
5718 **
5719 ** @param [r] thys [const AjPSeqset] Sequence set.
5720 ** @param [w] cons [AjPStr*] the created consensus sequence
5721 ** @return [AjBool] ajTrue on success
5722 **
5723 ** @release 6.0.0
5724 ******************************************************************************/
5725
ajAlignConsAmbigProt(const AjPSeqset thys,AjPStr * cons)5726 AjBool ajAlignConsAmbigProt(const AjPSeqset thys, AjPStr *cons)
5727 {
5728 ajint iseq; /* iterate over sequences (outer loop) */
5729
5730 ajint nseqs;
5731 ajint mlen;
5732 AjBool isgap;
5733 ajint kipos; /* alignment position */
5734
5735 const char **seqcharptr;
5736 char res;
5737 ajuint binres;
5738
5739 void *freeptr = NULL;
5740
5741 nseqs = thys->Size;
5742 mlen = thys->Len;
5743
5744 AJCNEW(seqcharptr,nseqs);
5745
5746 /* ajDebug("fplural:%.2f ident:%.1f mlen: %d\n",
5747 fplural, ident, mlen); */
5748
5749 for(iseq=0;iseq<nseqs;iseq++) /* get sequence as string */
5750 seqcharptr[iseq] = ajSeqsetGetseqSeqC(thys, iseq);
5751
5752 /* For each position in the alignment, calculate consensus character */
5753
5754 for(kipos=0; kipos< mlen; kipos++)
5755 {
5756 binres = 0;
5757 isgap = ajFalse;
5758
5759 for(iseq=0;iseq<nseqs;iseq++)
5760 {
5761 if(seqcharptr[iseq][kipos] == ' ' ||
5762 seqcharptr[iseq][kipos] == '-')
5763 isgap = ajTrue;
5764 binres |= ajResidueAlphaToBin(seqcharptr[iseq][kipos]);
5765 }
5766
5767 res = ajResidueBinToAlpha(binres);
5768
5769 if(isgap)
5770 res = (char)tolower((int)res);
5771
5772 ajStrAppendK(cons,res);
5773 }
5774
5775 freeptr = (void *) seqcharptr;
5776 AJFREE(freeptr);
5777
5778 return ajTrue;
5779 }
5780
5781
5782
5783
5784 /* @func ajAlignConsStats *****************************************************
5785 **
5786 ** Calculates alignment statistics (and a consensus) from a sequence set.
5787 **
5788 ** @param [r] thys [const AjPSeqset] Sequence set.
5789 ** @param [w] mymatrix [AjPMatrix] User-defined matrix, or NULL for the default
5790 ** @param [w] cons [AjPStr*] the created consensus sequence
5791 ** @param [w] retident [ajint*] number of residues identical in all sequences
5792 ** @param [w] retsim [ajint*] number of residues similar in all sequences
5793 ** @param [w] retgap [ajint*] number of residues with a gap in 1 sequence
5794 ** @param [w] retlen [ajint*] length of the alignment
5795 ** @return [AjBool] ajTrue on success
5796 **
5797 ** @release 2.9.0
5798 ******************************************************************************/
5799
ajAlignConsStats(const AjPSeqset thys,AjPMatrix mymatrix,AjPStr * cons,ajint * retident,ajint * retsim,ajint * retgap,ajint * retlen)5800 AjBool ajAlignConsStats(const AjPSeqset thys, AjPMatrix mymatrix, AjPStr *cons,
5801 ajint* retident, ajint* retsim, ajint* retgap,
5802 ajint* retlen)
5803 {
5804 AjPMatrix imatrix = NULL;
5805 AjPStr matname = NULL;
5806 ajint imat; /* iterate over identical and matching arrays */
5807 ajint iseq; /* iterate over sequences (outer loop) */
5808 ajint jseq; /* iterate over sequences (inner loop) */
5809
5810 ajint **matrix = NULL;
5811 float **fmatrix = NULL;
5812 ajint m1 = 0;
5813 ajint m2 = 0;
5814 ajint matsize;
5815 ajint matchingmaxindex;
5816 ajint identicalmaxindex;
5817 ajint nseqs;
5818 ajint mlen;
5819
5820 float max;
5821 float contri = 0;
5822 float contrj = 0;
5823
5824 AjPSeqCvt cvt = 0;
5825 AjPFloat posScore=NULL; /* cumulative similarity scores by sequence */
5826 /* for matching all other sequences */
5827 float *identical; /* cum. weight for each valid character */
5828 float *matching; /* cum. weight for matching this character */
5829 ajint highindex; /* position of highest score in posScore */
5830 ajint kkpos; /* alignment position loop variable */
5831 ajint kipos; /* alignment position kkpos + iseq start */
5832 ajint kjpos; /* alignment position kkpos + jseq start */
5833 ajint khpos; /* alignment position in highindex */
5834
5835 float himatch = 0.0; /* highest match score (often used) */
5836
5837 const char **seqcharptr;
5838 char res;
5839 char nocon;
5840 char gapch;
5841 float fplural;
5842 float fplurality = 51.0;
5843 float ident;
5844 AjBool isident;
5845 AjBool issim;
5846 AjBool isgap;
5847 const AjPSeq* seqs;
5848 ajint numres; /* number of residues (not spaces) */
5849 void *freeptr;
5850
5851 if(mymatrix)
5852 imatrix = mymatrix;
5853
5854 if(!imatrix)
5855 {
5856 if(ajSeqsetIsNuc(thys))
5857 ajStrAssignC(&matname, "EDNAFULL");
5858 else
5859 ajStrAssignC(&matname, "EBLOSUM62");
5860 imatrix = ajMatrixNewFile(matname);
5861 }
5862
5863 ajStrDel(&matname);
5864
5865 *retident = 0;
5866 *retsim = 0;
5867 *retgap = 0;
5868
5869 nseqs = thys->Size;
5870 mlen = thys->Len;
5871 fplural = ajSeqsetGetTotweight(thys) * fplurality / (float)100.;
5872 ident = ajSeqsetGetTotweight(thys);
5873
5874 /* ajDebug("fplural:%.2f ident:%.1f mlen: %d\n",
5875 fplural, ident, mlen); */
5876
5877 matrix = ajMatrixGetMatrix(imatrix);
5878 cvt = ajMatrixGetCvt(imatrix); /* return conversion table */
5879 matsize = ajMatrixGetSize(imatrix);
5880
5881 AJCNEW(seqs,nseqs);
5882 AJCNEW(seqcharptr,nseqs);
5883 AJCNEW(identical,matsize);
5884 AJCNEW(matching,matsize);
5885
5886 posScore = ajFloatNew();
5887
5888 gapch = '-';
5889 nocon = 'x';
5890
5891 for(iseq=0;iseq<nseqs;iseq++) /* get sequence as string */
5892 {
5893 seqcharptr[iseq] = ajSeqsetGetseqSeqC(thys, iseq);
5894 seqs[iseq] = ajSeqsetGetseqSeq(thys, iseq);
5895 }
5896
5897 /* For each position in the alignment, calculate consensus character */
5898
5899 for(kkpos=0; kkpos< mlen; kkpos++)
5900 {
5901 res = gapch;
5902
5903 isident = ajFalse;
5904 issim = ajFalse;
5905 isgap = ajFalse;
5906
5907 /*
5908 ** reset identities and +ve matches
5909 */
5910
5911 for(imat=0;imat<matsize;imat++)
5912 {
5913 identical[imat] = 0.0; /* weights of all sequence chars in col. */
5914 matching[imat] = 0.0;
5915 }
5916
5917 /*
5918 ** reset the posScore array
5919 */
5920
5921 for(iseq=0;iseq<nseqs;iseq++)
5922 ajFloatPut(&posScore,iseq,0.);
5923
5924 /*
5925 ** generate scores (identical, posScore) for columns
5926 */
5927
5928 for(iseq=0;iseq<nseqs;iseq++)
5929 {
5930 kipos = kkpos;
5931 m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[iseq][kipos]);
5932
5933 if(m1)
5934 identical[m1] += seqs[iseq]->Weight;
5935
5936 for(jseq=iseq+1;jseq<nseqs;jseq++)
5937 {
5938 kjpos = kkpos;
5939 m2 = ajSeqcvtGetCodeK(cvt,seqcharptr[jseq][kjpos]);
5940
5941 if(m1 && m2)
5942 {
5943 if(matrix)
5944 {
5945 contri = (float)matrix[m1][m2]*seqs[jseq]->Weight
5946 +ajFloatGet(posScore,iseq);
5947 contrj = (float)matrix[m1][m2]*seqs[iseq]->Weight
5948 +ajFloatGet(posScore,jseq);
5949 }
5950 else
5951 {
5952 contri = fmatrix[m1][m2]*seqs[jseq]->Weight
5953 +ajFloatGet(posScore,iseq);
5954 contrj = fmatrix[m1][m2]*seqs[iseq]->Weight
5955 +ajFloatGet(posScore,jseq);
5956 }
5957
5958 ajFloatPut(&posScore,iseq,contri);
5959 ajFloatPut(&posScore,jseq,contrj);
5960 }
5961 }
5962 }
5963
5964 /*
5965 ** highindex is the highest scoring position (seq no.) in posScore
5966 ** for 2 sequences this appears to be usually 0
5967 */
5968
5969 highindex = -1;
5970 max = -FLT_MAX;
5971 numres = 0;
5972
5973 for(iseq=0;iseq<nseqs;iseq++)
5974 {
5975 kipos = kkpos;
5976
5977 if(seqcharptr[iseq][kipos] != ' ' &&
5978 seqcharptr[iseq][kipos] != '-')
5979 numres++;
5980
5981 if(ajFloatGet(posScore,iseq) > max)
5982 {
5983 highindex = iseq;
5984 max = ajFloatGet(posScore,iseq);
5985 }
5986 }
5987
5988 /* highindex is now set */
5989
5990 /*
5991 ** find +ve matches in the column
5992 ** m1 is non-zero for a valid character in iseq
5993 ** m2 is non-zero for a valid character in jseq
5994 */
5995
5996 for(iseq=0;iseq<nseqs;iseq++)
5997 {
5998 kipos = kkpos;
5999 m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[iseq][kipos]);
6000
6001 if(E_FPZERO(matching[m1],U_FEPS))
6002 { /* first time we have met this character */
6003 for(jseq=0;jseq<nseqs;jseq++) /* all (other) sequences */
6004 {
6005 kjpos = kkpos;
6006 m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[jseq][kjpos]);
6007
6008 if(matrix)
6009 {
6010 if(m1 && m2 && matrix[m1][m2] > 0)
6011 { /* 'matching' if positive */
6012 matching[m1] += seqs[jseq]->Weight;
6013 }
6014 }
6015 else
6016 {
6017 if(m1 && m2 && fmatrix[m1][m2] > 0.0)
6018 {
6019 matching[m1] += seqs[jseq]->Weight;
6020 }
6021 }
6022
6023 }
6024 }
6025 }
6026
6027 matchingmaxindex = 0; /* get max matching and identical */
6028 identicalmaxindex = 0;
6029
6030 for(iseq=0;iseq<nseqs;iseq++)
6031 {
6032 kipos = kkpos;
6033 m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[iseq][kipos]);
6034
6035 if(identical[m1] > identical[identicalmaxindex])
6036 identicalmaxindex= m1;
6037 }
6038
6039 for(iseq=0;iseq<nseqs;iseq++)
6040 {
6041 kipos = kkpos;
6042 m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[iseq][kipos]);
6043
6044 if(matching[m1] > matching[matchingmaxindex])
6045 {
6046 matchingmaxindex= m1;
6047 }
6048 else if(E_FPEQ(matching[m1],matching[matchingmaxindex],U_FEPS))
6049 {
6050 if(identical[m1] > identical[matchingmaxindex])
6051 matchingmaxindex= m1;
6052 }
6053
6054 if(seqcharptr[iseq][kipos] == '-' ||
6055 seqcharptr[iseq][kipos] == ' ')
6056 isgap=ajTrue;
6057 }
6058
6059 khpos = kkpos;
6060 himatch = matching[ajSeqcvtGetCodeK(cvt,seqcharptr[highindex][khpos])];
6061
6062 if(identical[identicalmaxindex] >= ident)
6063 isident=ajTrue;
6064
6065 if(matching[matchingmaxindex] >= fplural)
6066 issim=ajTrue;
6067
6068 /* plurality check */
6069 res = gapch;
6070 if(himatch >= fplural)
6071 if(seqcharptr[highindex][khpos] != '-')
6072 res = toupper((int)seqcharptr[highindex][khpos]);
6073
6074 if(nseqs > 1 && E_FPEQ(himatch,seqs[highindex]->Weight,U_FEPS))
6075 {
6076 if(numres > 1)
6077 res = nocon;
6078 else
6079 res = gapch;
6080 }
6081
6082 if(issim && ! isident)
6083 res = tolower((int)res);
6084
6085 ajStrAppendK(cons,res);
6086 if(isident) ++*retident;
6087 if(issim) ++*retsim;
6088 if(isgap) ++*retgap;
6089
6090 /* ajDebug("id:%b sim:%b gap:%b res:%c '",
6091 isident, issim, isgap, res); */
6092 for(iseq=0; iseq<nseqs; iseq++)
6093 {
6094 kipos = kkpos;
6095 /* ajDebug("%c", seqcharptr[iseq][kipos]); */
6096 }
6097 /* ajDebug("'\n"); */
6098 }
6099
6100 *retlen = ajSeqsetGetLen(thys);
6101
6102 /* ajDebug("ret ident:%d sim:%d gap:%d len:%d\n",
6103 *retident, *retsim, *retgap, *retlen); */
6104
6105
6106 freeptr = (void *) seqs;
6107 AJFREE(freeptr);
6108
6109 freeptr = (void *) seqcharptr;
6110 AJFREE(freeptr);
6111
6112 AJFREE(matching);
6113 AJFREE(identical);
6114 ajFloatDel(&posScore);
6115 ajMatrixDel(&imatrix);
6116
6117 return ajTrue;
6118 }
6119
6120
6121
6122
6123 /* @funcstatic alignCigar *****************************************************
6124 **
6125 ** Return CIGAR string for a pairwise alignment, as well as match region
6126 ** of the query sequence and the number of mismatches
6127 **
6128 ** @param [r] qryseq [const AjPSeq] query sequence
6129 ** @param [r] refseq [const AjPSeq] reference sequence
6130 ** @param [r] qstart [ajint] query sequence start point
6131 ** @param [r] qend [ajint] query sequence end point
6132 ** @param [r] rstart [ajint] reference sequence start point
6133 ** @param [w] qseq [AjPStr*] query sequence
6134 ** @param [w] cigar [AjPStr*] CIGAR string
6135 ** @param [r] seqexternal [AjBool] Sequences are external references,
6136 ** ignore their offsets and use qstart and qend
6137 ** @return [ajint] number of mismatches
6138 **
6139 ** @release 6.3.0
6140 ******************************************************************************/
6141
alignCigar(const AjPSeq qryseq,const AjPSeq refseq,ajint qstart,ajint qend,ajint rstart,AjPStr * qseq,AjPStr * cigar,AjBool seqexternal)6142 static ajint alignCigar(const AjPSeq qryseq, const AjPSeq refseq,
6143 ajint qstart, ajint qend,
6144 ajint rstart,
6145 AjPStr *qseq, AjPStr* cigar, AjBool seqexternal)
6146 {
6147 const char* qryseqc;
6148 const char* refseqc;
6149
6150 static char gapchars[] = "-~.? "; /* all known gap characters */
6151
6152 ajint m=0;
6153 ajint i=0; /* insertions to the reference sequence */
6154 ajint d=0; /* deletions from the reference sequence */
6155 ajuint qoffset=0;
6156 ajuint qoffend=0;
6157 ajint nmismatches=0;
6158 ajint qseqlen;
6159
6160 qseqlen = ajSeqGetLen(qryseq);
6161 qryseqc = ajSeqGetSeqC(qryseq);
6162 refseqc = ajSeqGetSeqC(refseq);
6163
6164 if(seqexternal)
6165 {
6166 qoffset = qstart;
6167 qoffend = qend;
6168 }
6169 else
6170 {
6171 qoffset = ajSeqGetOffset(qryseq);
6172 qoffend = ajSeqGetOffend(qryseq);
6173 }
6174
6175 if(ajSeqIsReversed(qryseq)&&qoffend>0)
6176 ajFmtPrintAppS(cigar, "%dH",qoffend);
6177 else
6178 if(qoffset>0)
6179 ajFmtPrintAppS(cigar, "%dH",qoffset);
6180
6181
6182 for(; qstart<qend; qstart++, rstart++)
6183 {
6184 if(strchr(gapchars,qryseqc[qstart]))
6185 {
6186 d++;
6187
6188 if(m>0)
6189 {
6190 ajFmtPrintAppS(cigar, "%uM",m);
6191 m=0;
6192 }
6193
6194 if(i>0)
6195 {
6196 ajFmtPrintAppS(cigar, "%uI",d);
6197 i=0;
6198 }
6199 }
6200 else if(strchr(gapchars,refseqc[rstart]))
6201 {
6202 i++;
6203
6204 if(m>0)
6205 {
6206 ajFmtPrintAppS(cigar, "%uM",m);
6207 m=0;
6208 }
6209
6210 if(d>0)
6211 {
6212 ajFmtPrintAppS(cigar, "%uD",i);
6213 d=0;
6214 }
6215
6216 ajStrAppendK(qseq,qryseqc[qstart]);
6217
6218 }
6219 else /* match */
6220 {
6221 m++;
6222 ajStrAppendK(qseq,qryseqc[qstart]);
6223
6224 if(toupper((int)qryseqc[qstart])!=toupper((int)refseqc[rstart]))
6225 nmismatches ++;
6226
6227 if(d>0)
6228 {
6229 ajFmtPrintAppS(cigar, "%uD",d);
6230 d=0;
6231
6232 }
6233
6234 if(i>0)
6235 {
6236 ajFmtPrintAppS(cigar, "%uI",i);
6237 i=0;
6238 }
6239 }
6240 }
6241
6242 if(m>0)
6243 ajFmtPrintAppS(cigar, "%uM",m);
6244 else if(d>0)
6245 ajFmtPrintAppS(cigar, "%uD",d);
6246 else if(i>0)
6247 ajFmtPrintAppS(cigar, "%uI",i);
6248
6249 if(ajSeqIsReversed(qryseq)&&qend>qseqlen)
6250 ajFmtPrintAppS(cigar, "%dH",qseqlen-qend);
6251 else if(qend<qseqlen)
6252 ajFmtPrintAppS(cigar, "%dH",qseqlen-qend);
6253
6254 return nmismatches;
6255 }
6256
6257
6258
6259
6260 /* @func ajAlignExit **********************************************************
6261 **
6262 ** Cleans up report processing internal memory
6263 **
6264 ** @return [void]
6265 **
6266 ** @release 4.0.0
6267 ** @@
6268 ******************************************************************************/
6269
ajAlignExit(void)6270 void ajAlignExit(void)
6271 {
6272 return;
6273 }
6274