1 /* @source ajalign ************************************************************
2 **
3 ** AJAX ALIGN (ajax alignment output) functions
4 **
5 ** These functions align AJAX sequences and report them in a variety
6 ** of formats.
7 **
8 ** @author Copyright (C) 2001 Peter Rice, LION Bioscience Ltd.
9 ** @version $Revision: 1.132 $
10 ** @modified $Date: 2012/07/02 16:44:15 $ by $Author: rice $
11 ** @@
12 **
13 ** This library is free software; you can redistribute it and/or
14 ** modify it under the terms of the GNU Lesser General Public
15 ** License as published by the Free Software Foundation; either
16 ** version 2.1 of the License, or (at your option) any later version.
17 **
18 ** This library is distributed in the hope that it will be useful,
19 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21 ** Lesser General Public License for more details.
22 **
23 ** You should have received a copy of the GNU Lesser General Public
24 ** License along with this library; if not, write to the Free Software
25 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
26 ** MA  02110-1301,  USA.
27 **
28 ******************************************************************************/
29 
30 
31 #include "ajlib.h"
32 
33 #include "ajalign.h"
34 #include "ajbase.h"
35 #include "ajarr.h"
36 #include "ajutil.h"
37 #include "ajnam.h"
38 #include "ajmath.h"
39 #include "ajseqwrite.h"
40 
41 #include <string.h>
42 #include <ctype.h>
43 #include <stddef.h>
44 #include <stdarg.h>
45 #include <float.h>
46 #include <limits.h>
47 #include <math.h>
48 
49 
50 
51 
52 /* @datastatic AlignPData *****************************************************
53 **
54 ** Ajax Align Data object.
55 **
56 ** Holds definition of feature alignment data. Stored in AjPAlign structure as
57 ** an AjPList.
58 **
59 ** @alias AlignSData
60 ** @alias AlignOData
61 **
62 ** @attr Start [ajint*] Start position for each sequence
63 ** @attr End [ajint*] End position for each sequence
64 ** @attr Len [ajint*] Original length for each sequence
65 ** @attr Offset [ajint*] Offset for numbering start of each sequence
66 ** @attr Offend [ajint*] Offset for numbering end of each sequence
67 ** @attr SubOffset [ajint*] Offset of used subsequence within given sequence
68 **                          generally for cases where SeqExternal is set
69 **                          in the AjPAlign object that includes this
70 **                          AlignPData object. Used for fetching the
71 **                          actual sequence from the stored sequence,
72 **                          but ignored when calculating position.
73 ** @attr Rev [AjBool*] Reverse each sequence (so far not used)
74 ** @attr RealSeq [AjPSeq*] Sequences usually as copies of data from the
75 **                     application unless SeqExternal in the AjPAlign
76 **                     object is true
77 ** @attr Seq [const AjPSeq*] Pointer to sequence in RealSeq
78 **                           unless SeqExternal in the AjPAlign
79 **                           object is true when pointer is to
80 **                           sequence elsewhere
81 ** @attr LenAli [ajint] Length of the alignment
82 ** @attr NumId [ajint] Number of identical positions, usually calculated
83 ** @attr NumSim [ajint] Number of similar positions, usually calculated
84 ** @attr NumGap [ajint] Number of gap positions, usually calculated
85 ** @attr Score [AjPStr] Score to be reported - stored as a string for output
86 **                      set by functions using ajint or float input
87 ** @attr Nseqs [ajint] Number of sequences, size of each array. This is
88 **                     duplicated in the AjPAlign object but is useful
89 **                     here for constructor and destructor efficiency.
90 ** @attr Padding [char[4]] Padding to alignment boundary
91 ** @@
92 ******************************************************************************/
93 
94 typedef struct AlignSData
95 {
96     ajint* Start;
97     ajint* End;
98     ajint* Len;
99     ajint* Offset;
100     ajint* Offend;
101     ajint* SubOffset;
102     AjBool* Rev;
103     AjPSeq* RealSeq;
104     const AjPSeq* Seq;
105     ajint LenAli;
106     ajint NumId;
107     ajint NumSim;
108     ajint NumGap;
109     AjPStr Score;
110     ajint  Nseqs;
111     char Padding[4];
112 } AlignOData;
113 
114 #define AlignPData AlignOData*
115 
116 
117 
118 
119 /* @datastatic AlignPFormat ***************************************************
120 **
121 ** Ajax Align Formats
122 **
123 ** List of alignment output formats available
124 **
125 ** @alias AlignSFormat
126 ** @alias AlignOFormat
127 **
128 ** @attr Name [const char*] format name
129 ** @attr Desc [const char*] Format description
130 ** @attr Alias [AjBool] Name is an alias for an identical definition
131 ** @attr Nucleotide [AjBool] ajTrue if format can use nucleotide sequences
132 ** @attr Protein [AjBool] ajTrue if format can use protein sequences
133 ** @attr Showheader [AjBool] ajTrue if header appears in output
134 ** @attr Showseqs [AjBool] ajTrue if sequences appear in output
135 ** @attr Padding [ajint] Padding to alignment boundary
136 ** @attr Minseq [ajint] Minimum number of sequences, 2 for pairwise
137 ** @attr Maxseq [ajint] Maximum number of sequences, 2 for pairwise
138 ** @attr Write [void function] Function to write alignment
139 ** @@
140 ******************************************************************************/
141 
142 typedef struct AlignSFormat
143 {
144     const char *Name;
145     const char *Desc;
146     AjBool Alias;
147     AjBool Nucleotide;
148     AjBool Protein;
149     AjBool Showheader;
150     AjBool Showseqs;
151     ajint  Padding;
152     ajint  Minseq;
153     ajint  Maxseq;
154     void (*Write) (AjPAlign thys);
155 } AlignOFormat;
156 
157 #define AlignPFormat AlignOFormat*
158 
159 
160 
161 static ajint      alignCigar(const AjPSeq qryseq, const AjPSeq refseq,
162 	                     ajint qstart, ajint qend,
163                              ajint rstart,
164                              AjPStr *mseq, AjPStr* cigar, AjBool seqExternal);
165 static void       alignConsStats(AjPAlign thys, ajint iali, AjPStr *cons,
166 				 ajint* retident, ajint* retsim, ajint* retgap,
167 				 ajint* retlen);
168 static AjBool     alignConsSet(const AjPAlign thys, ajint iali, ajint seqnum,
169 			       AjPStr *cons);
170 static AlignPData alignData(const AjPAlign thys, ajint iali);
171 static void       alignDataDel(AlignPData* pthys, AjBool external);
172 static AlignPData alignDataNew(ajint nseqs, AjBool external);
173 static void       alignDataSetSequence(AlignPData thys, const AjPSeq seq,
174                     ajint i, AjBool external);
175 static void       alignDiff(AjPStr* pmark, const AjPStr seq, char idchar);
176 static ajint      alignLen(const AjPAlign thys, ajint iali);
177 static const AjPSeq alignSeq(const AjPAlign thys, ajint iseq, ajint iali);
178 static ajint      alignSeqBegin(const AlignPData thys, ajint iseq);
179 static ajint      alignSeqEnd(const AlignPData thys, ajint iseq);
180 static ajint      alignSeqGapBegin(const AlignPData data, ajint iseq);
181 static ajint      alignSeqGapEnd(const AlignPData data, ajint iseq);
182 static ajint      alignSeqIncrement(const AlignPData thys, ajint iseq);
183 static const AjPStr alignSeqName(const AjPAlign thys, const AjPSeq seq);
184 static AjBool     alignSeqRev(const AlignPData thys, ajint iseq);
185 static const AjPSeq* alignSeqs(const AjPAlign thys, ajint iali);
186 static void       alignSame(AjPStr* pmark, const AjPStr seq, char idchar);
187 static void       alignSim(AjPStr* pmark, const char idch, const char simch,
188 			   const char misch, const char gapch);
189 static float      alignTotweight(const AjPAlign thys, ajint iali);
190 static void       alignTraceData(const AjPAlign thys);
191 
192 /*static void       alignWriteBam(AjPAlign thys);*/
193 static void       alignWriteClustal(AjPAlign thys);
194 static void       alignWriteFasta(AjPAlign thys);
195 static void       alignWriteMark(AjPAlign thys, ajint iali, ajint markx);
196 static void       alignWriteMarkX0(AjPAlign thys);
197 static void       alignWriteMarkX1(AjPAlign thys);
198 static void       alignWriteMarkX2(AjPAlign thys);
199 static void       alignWriteMarkX3(AjPAlign thys);
200 static void       alignWriteMarkX10(AjPAlign thys);
201 static void       alignWriteMatch(AjPAlign thys);
202 static void       alignWriteMega(AjPAlign thys);
203 static void       alignWriteMeganon(AjPAlign thys);
204 static void       alignWriteMsf(AjPAlign thys);
205 static void       alignWriteNexus(AjPAlign thys);
206 static void       alignWriteNexusnon(AjPAlign thys);
207 static void       alignWritePhylip(AjPAlign thys);
208 static void       alignWritePhylipnon(AjPAlign thys);
209 static void       alignWriteSam(AjPAlign thys);
210 static void       alignWriteScore(AjPAlign thys);
211 static void       alignWriteSelex(AjPAlign thys);
212 static void       alignWriteSeqformat(AjPAlign thys, ajint iali,
213 				      const char* sqfmt);
214 static void       alignWriteSimple(AjPAlign thys);
215 static void       alignWriteSrs(AjPAlign thys);
216 static void       alignWriteSrsAny(AjPAlign thys,
217 				   ajint imax, AjBool mark);
218 static void       alignWriteSrsPair(AjPAlign thys);
219 static void       alignWriteTCoffee(AjPAlign thys);
220 static void       alignWriteTrace(AjPAlign thys);
221 static void       alignWriteTreecon(AjPAlign thys);
222 static void       alignWriteHeaderNum(AjPAlign thys, ajint iali);
223 
224 
225 
226 
227 /* @funclist alignFormat ******************************************************
228 **
229 ** Functions to write alignments
230 **
231 ******************************************************************************/
232 
233 static AlignOFormat alignFormat[] =
234 {
235   /* name       description       function */
236   /*   Alias    nucleic  protein  header min max zero */
237   /* standard sequence formats */
238   {"unknown",   "Unknown format",
239      AJFALSE, AJFALSE, AJFALSE, AJTRUE,  AJTRUE,  0, 0, 0, &alignWriteSimple},
240   {"fasta",     "Fasta format sequence",
241      AJFALSE, AJTRUE,  AJTRUE,  AJFALSE, AJTRUE, 0, 0, 0, &alignWriteFasta},
242   {"a2m",     "A2M (fasta) format sequence", /* same as fasta */
243      AJTRUE,  AJTRUE,  AJTRUE,  AJFALSE, AJTRUE, 0, 0, 0, &alignWriteFasta},
244   {"msf",       "MSF format sequence",
245      AJFALSE, AJTRUE,  AJTRUE,  AJFALSE, AJTRUE, 0, 0, 0, &alignWriteMsf},
246   {"clustal",   "clustalw format sequence",
247      AJFALSE, AJTRUE,  AJTRUE,  AJFALSE, AJTRUE, 0, 0, 0, &alignWriteClustal},
248   {"mega",       "Mega format sequence",
249      AJFALSE, AJTRUE,  AJTRUE,  AJFALSE, AJTRUE, 0, 0, 0, &alignWriteMega},
250   {"meganon",       "Mega non-interleaved format sequence",
251      AJFALSE, AJTRUE,  AJTRUE,  AJFALSE, AJTRUE, 0, 0, 0, &alignWriteMeganon},
252   {"nexus",   "nexus/paup format sequence",
253      AJFALSE, AJTRUE,  AJTRUE,  AJFALSE, AJTRUE, 0, 0, 0, &alignWriteNexus},
254   {"paup",    "nexus/paup format sequence (alias)",
255      AJTRUE,  AJTRUE,  AJTRUE,  AJFALSE, AJTRUE, 0, 0, 0, &alignWriteNexus},
256   {"nexusnon",   "nexus/paup non-interleaved format sequence",
257      AJFALSE, AJTRUE,  AJTRUE,  AJFALSE, AJTRUE, 0, 0, 0, &alignWriteNexusnon},
258   {"paupnon",    "nexus/paup non-interleaved format sequence (alias)",
259      AJTRUE,  AJTRUE,  AJTRUE,  AJFALSE, AJTRUE, 0, 0, 0, &alignWriteNexusnon},
260   {"phylip",   "phylip format sequence",
261      AJFALSE, AJTRUE,  AJTRUE,  AJFALSE, AJTRUE, 0, 0, 0, &alignWritePhylip},
262   {"phylipnon", "phylip non-interleaved format sequence",
263      AJFALSE, AJTRUE,  AJTRUE,  AJFALSE, AJTRUE, 0, 0, 0, &alignWritePhylipnon},
264   {"selex",       "SELEX format sequence",
265      AJFALSE, AJTRUE,  AJTRUE,  AJFALSE, AJTRUE, 0, 0, 0, &alignWriteSelex},
266   {"treecon",       "Treecon format sequence",
267      AJFALSE, AJTRUE,  AJTRUE,  AJFALSE, AJTRUE, 0, 0, 0, &alignWriteTreecon},
268   /* trace  for debug */
269   {"debug", "Debugging trace of full internal data content",
270       AJFALSE, AJTRUE,  AJTRUE,  AJTRUE,  AJTRUE, 0,  0, 0, &alignWriteTrace},
271   /* alignment formats */
272   {"markx0",    "Pearson MARKX0 format",
273      AJFALSE, AJTRUE,  AJTRUE,  AJTRUE,  AJTRUE, 0,  2, 2, &alignWriteMarkX0},
274   {"markx1",    "Pearson MARKX1 format",
275      AJFALSE, AJTRUE,  AJTRUE,  AJTRUE,  AJTRUE, 0,  2, 2, &alignWriteMarkX1},
276   {"markx2",    "Pearson MARKX2 format",
277      AJFALSE, AJTRUE,  AJTRUE,  AJTRUE,  AJTRUE, 0,  2, 2, &alignWriteMarkX2},
278   {"markx3",    "Pearson MARKX3 format",
279      AJFALSE, AJTRUE,  AJTRUE,  AJTRUE,  AJTRUE, 0,  2, 2, &alignWriteMarkX3},
280   {"markx10",   "Pearson MARKX10 format",
281      AJFALSE, AJTRUE,  AJTRUE,  AJTRUE,  AJTRUE, 0, 2, 2, &alignWriteMarkX10},
282   {"match","Start and end of matches between sequence pairs",
283      AJFALSE, AJTRUE,  AJTRUE,  AJTRUE, AJFALSE, 0,  2, 2, &alignWriteMatch},
284   {"multiple",  "Simple multiple alignment",
285      AJTRUE,  AJTRUE,  AJTRUE,  AJTRUE,  AJTRUE, 0,  0, 0, &alignWriteSimple},
286   {"pair",      "Simple pairwise alignment",
287      AJFALSE, AJTRUE,  AJTRUE,  AJTRUE,  AJTRUE, 0,  2, 2, &alignWriteSimple},
288   {"simple",    "Simple multiple alignment",
289      AJFALSE, AJTRUE,  AJTRUE,  AJTRUE,  AJTRUE, 0,  0, 0, &alignWriteSimple},
290   {"sam",       "Sequence alignent/map (SAM) format",
291      AJFALSE, AJTRUE,  AJFALSE,  AJFALSE, AJTRUE, 0,  2, 2, &alignWriteSam},
292 /*
293   {"bam",       "Binary sequence alignent/map (BAM) format",
294      AJFALSE, AJTRUE,  AJFALSE,  AJFALSE, AJTRUE, 0,  2, 2, &alignWriteBam},
295 */
296   {"score",     "Score values for pairs of sequences",
297      AJFALSE, AJTRUE,  AJTRUE,  AJTRUE, AJFALSE, 0,  2, 2, &alignWriteScore},
298   {"srs",       "Simple multiple sequence format for SRS",
299      AJFALSE, AJTRUE,  AJTRUE,  AJTRUE,  AJTRUE, 0,  0, 0, &alignWriteSrs},
300   {"srspair",   "Simple pairwise sequence format for SRS",
301      AJFALSE, AJTRUE,  AJTRUE,  AJTRUE,  AJTRUE, 0,  2, 2, &alignWriteSrsPair},
302   {"tcoffee",   "TCOFFEE program format",
303      AJFALSE, AJTRUE,  AJTRUE,  AJFALSE, AJTRUE, 0, 0, 0, &alignWriteTCoffee},
304   {NULL, NULL,
305      AJFALSE, AJFALSE, AJFALSE, AJFALSE, AJTRUE, 0, 0, 0, NULL}
306 };
307 
308 static char alignDefformat[] = "simple";
309 
310 
311 
312 
313 /* pair only works if the alignment is defined as 2 sequences */
314 
315 /* other formats to be defined:
316 **
317 ** markx1 .. markx10 as for FASTA (code in matcher.c)
318 ** blast to be implemented
319 */
320 
321 
322 
323 
324 /* @funcstatic alignWriteTrace ************************************************
325 **
326 ** Writes an alignment in Trace format
327 **
328 ** @param [u] thys [AjPAlign] Alignment object
329 ** @return [void]
330 **
331 ** @release 2.1.0
332 ** @@
333 ******************************************************************************/
334 
alignWriteTrace(AjPAlign thys)335 static void alignWriteTrace(AjPAlign thys)
336 {
337     const AjPSeq seq = NULL;
338     ajint nali;
339     ajint iali;
340     ajint iseq;
341     ajint nseq;
342 
343     ajint identity   = 0;
344     ajint similarity = 0;
345     ajint gaps       = 0;
346     ajint seqlen     = 0;
347 
348     AlignPData* pdata = NULL;
349     AlignPData data = NULL;
350 
351     AjPStr tmpstr    = NULL;
352     AjPFile outf = NULL;
353 
354     nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
355     ajFmtPrintF(thys->File, "Trace output\n");
356     ajFmtPrintF(thys->File, "============\n");
357 
358     ajFmtPrintF(thys->File,
359 		"a: Type:'%S' Formatstr:'%S' Format:%d\n",
360 		thys->Type, thys->Formatstr, thys->Format);
361 
362     ajFmtPrintF(thys->File,
363 		"b: File:%F\n",
364 		thys->File);
365 
366     ajFmtPrintF(thys->File,
367 		"Show:     ShowAcc:%B  ShowDes:%B  ShowUsa:%B\n",
368 		thys->Showacc, thys->Showdes, thys->Showusa);
369 
370    ajFmtPrintF(thys->File,
371 		"Booleans: Multi:%B  Global:%B  SeqOnly:%B  SeqExternal:%B\n",
372 		thys->Multi, thys->Global, thys->SeqOnly, thys->SeqExternal);
373 
374     ajFmtPrintF(thys->File,
375 		"Numbers:  NMin:%d  NMax:%d  Nseqs:%d  Count:%d  Width:%d\n",
376 		thys->Nmin, thys->Nmax, thys->Nseqs,
377 		thys->Count, thys->Width);
378 
379     ajFmtPrintF(thys->File,
380 		"Matrices: IMatrix:'%S'(%d)  FMatrix:'%S'(%d)\n",
381 		ajMatrixGetName(thys->IMatrix),
382                 ajMatrixGetSize(thys->IMatrix),
383 		ajMatrixfGetName(thys->FMatrix),
384                 ajMatrixfGetSize(thys->FMatrix));
385 
386     ajFmtPrintF(thys->File,
387 		"Strings:  Matrix:'%S'  GapPen:'%S'  ExtPen:'%S'\n",
388 		thys->Matrix, thys->GapPen, thys->ExtPen);
389 
390     ajFmtPrintF(thys->File,
391 		"Header: '%S'\n",
392 		thys->Header);
393     ajFmtPrintF(thys->File,
394 		"SubHeader: '%S'\n",
395 		thys->SubHeader);
396     ajFmtPrintF(thys->File,
397 		"Tail: '%S'\n",
398 		thys->Tail);
399     ajFmtPrintF(thys->File,
400 		"SubTail: '%S'\n",
401 		thys->Tail);
402 
403     ajFmtPrintF(thys->File,
404 		"Key: seqlen/len offset> start..end <offend (suboffset) rev "
405 		"Begin..End GapBegin..End\n");
406 
407     for(iali=0; iali<nali; iali++)
408     {
409 	data = pdata[iali];
410 	ajFmtPrintF(thys->File,
411 		    "\nalign%d: Nseqs:%d  LenAli:%d  "
412 		    "NumId:%d  NumSim:%d  NumGap:%d  Score:'%S'\n",
413 		    iali, data->Nseqs, data->LenAli,
414 		    data->NumId, data->NumSim, data->NumGap,
415 		    data->Score);
416 
417 	alignConsStats(thys, iali, &tmpstr,
418 		       &identity, &similarity, &gaps, &seqlen);
419 
420 	ajAlignSetStats(thys, iali, seqlen, identity, similarity, gaps, NULL);
421 	ajFmtPrintF(thys->File,
422 		    "fixed%d: Nseqs:%d  LenAli:%d  "
423 		    "NumId:%d  NumSim:%d  NumGap:%d  Score:'%S'\n",
424 		    iali, data->Nseqs, data->LenAli,
425 		    data->NumId, data->NumSim, data->NumGap,
426 		    data->Score);
427 	ajAlignSetSubStandard(thys, iali);
428 	outf = thys->File;
429 
430 	/* turn off printing of the header, keep the calculation */
431 	thys->File = NULL;
432 	alignWriteHeaderNum(thys,iali);
433 	thys->File = outf;
434 
435 	nseq = thys->Nseqs;
436 
437 	for(iseq=0; iseq < nseq; iseq++)
438 	{
439 	    seq = alignSeq(thys, iseq, iali);
440 	    ajFmtPrintF(thys->File,
441 			"Num%d.%d: %d/%d %d> %d..%d <%d (%d) Rev:%B %d..%d "
442                         "%d..%d\n",
443 			iali, iseq,
444 			(ajSeqGetLen(seq) - ajSeqCountGaps(seq)),
445 			data->Len[iseq],
446 			data->Offset[iseq],
447 			data->Start[iseq], data->End[iseq],
448 			data->Offend[iseq], data->SubOffset[iseq],
449 			data->Rev[iseq],
450 			alignSeqBegin(data, iseq), alignSeqEnd(data, iseq),
451 			alignSeqGapBegin(data, iseq),
452 			alignSeqGapEnd(data, iseq));
453 
454 	    if(ajSeqGetLen(seq) <= 40)
455 		ajFmtPrintF(thys->File,
456 			    "Seq%d.%d: %d '%S'\n",
457 			    iali, iseq, ajSeqGetLen(seq), ajSeqGetSeqS(seq));
458 	    else
459 	    {
460 		ajStrAssignSubS(&tmpstr, ajSeqGetSeqS(seq), -20, -1);
461 		ajFmtPrintF(thys->File,
462 			    "Seq%d.%d: %d '%20.20S...%20S'\n",
463 			    iali, iseq, ajSeqGetLen(seq),
464 			    ajSeqGetSeqS(seq), tmpstr);
465 	    }
466 	}
467     }
468 
469     ajStrDel(&tmpstr);
470 
471     AJFREE(pdata);
472 
473     return;
474 }
475 
476 
477 
478 
479 /* @funcstatic alignWriteClustal **********************************************
480 **
481 ** Writes an alignment in ClustalW format
482 **
483 ** @param [u] thys [AjPAlign] Alignment object
484 ** @return [void]
485 **
486 ** @release 6.1.0
487 ** @@
488 ******************************************************************************/
489 
alignWriteClustal(AjPAlign thys)490 static void alignWriteClustal(AjPAlign thys)
491 {
492     alignWriteSeqformat(thys, 0, "clustal");
493 
494     return;
495 }
496 
497 
498 
499 
500 /* @funcstatic alignWriteMega *************************************************
501 **
502 ** Writes an alignment in Mega format
503 **
504 ** @param [u] thys [AjPAlign] Alignment object
505 ** @return [void]
506 **
507 ** @release 6.1.0
508 ** @@
509 ******************************************************************************/
510 
alignWriteMega(AjPAlign thys)511 static void alignWriteMega(AjPAlign thys)
512 {
513     alignWriteSeqformat(thys, 0, "mega");
514 
515     return;
516 }
517 
518 
519 
520 
521 /* @funcstatic alignWriteMeganon **********************************************
522 **
523 ** Writes an alignment in Mega non-interleaved format
524 **
525 ** @param [u] thys [AjPAlign] Alignment object
526 ** @return [void]
527 **
528 ** @release 6.1.0
529 ** @@
530 ******************************************************************************/
531 
alignWriteMeganon(AjPAlign thys)532 static void alignWriteMeganon(AjPAlign thys)
533 {
534     alignWriteSeqformat(thys, 0, "meganon");
535 
536     return;
537 }
538 
539 
540 
541 
542 /* @funcstatic alignWriteMsf **************************************************
543 **
544 ** Writes an alignment in MSF format
545 **
546 ** @param [u] thys [AjPAlign] Alignment object
547 ** @return [void]
548 **
549 ** @release 2.1.0
550 ** @@
551 ******************************************************************************/
552 
alignWriteMsf(AjPAlign thys)553 static void alignWriteMsf(AjPAlign thys)
554 {
555     alignWriteSeqformat(thys, 0, "msf");
556 
557     return;
558 }
559 
560 
561 
562 
563 /* @funcstatic alignWriteNexus ************************************************
564 **
565 ** Writes an alignment in Nexus/PAUP interleaved format
566 **
567 ** @param [u] thys [AjPAlign] Alignment object
568 ** @return [void]
569 **
570 ** @release 6.1.0
571 ** @@
572 ******************************************************************************/
573 
alignWriteNexus(AjPAlign thys)574 static void alignWriteNexus(AjPAlign thys)
575 {
576     alignWriteSeqformat(thys, 0, "nexus");
577 
578     return;
579 }
580 
581 
582 
583 
584 /* @funcstatic alignWriteNexusnon *********************************************
585 **
586 ** Writes an alignment in Nexus/PAUP non-interleaved format
587 **
588 ** @param [u] thys [AjPAlign] Alignment object
589 ** @return [void]
590 **
591 ** @release 6.1.0
592 ** @@
593 ******************************************************************************/
594 
alignWriteNexusnon(AjPAlign thys)595 static void alignWriteNexusnon(AjPAlign thys)
596 {
597     alignWriteSeqformat(thys, 0, "nexusnon");
598 
599     return;
600 }
601 
602 
603 
604 
605 /* @funcstatic alignWritePhylip ***********************************************
606 **
607 ** Writes an alignment in Phylip interleaved format
608 **
609 ** @param [u] thys [AjPAlign] Alignment object
610 ** @return [void]
611 **
612 ** @release 6.1.0
613 ** @@
614 ******************************************************************************/
615 
alignWritePhylip(AjPAlign thys)616 static void alignWritePhylip(AjPAlign thys)
617 {
618     alignWriteSeqformat(thys, 0, "phylip");
619 
620     return;
621 }
622 
623 
624 
625 
626 /* @funcstatic alignWritePhylipnon ********************************************
627 **
628 ** Writes an alignment in Phylip non-interleaved format
629 **
630 ** @param [u] thys [AjPAlign] Alignment object
631 ** @return [void]
632 **
633 ** @release 6.1.0
634 ** @@
635 ******************************************************************************/
636 
alignWritePhylipnon(AjPAlign thys)637 static void alignWritePhylipnon(AjPAlign thys)
638 {
639     alignWriteSeqformat(thys, 0, "phylipnon");
640 
641     return;
642 }
643 
644 
645 
646 
647 /* @funcstatic alignWriteSelex ************************************************
648 **
649 ** Writes an alignment in SELEX format
650 **
651 ** @param [u] thys [AjPAlign] Alignment object
652 ** @return [void]
653 **
654 ** @release 6.1.0
655 ** @@
656 ******************************************************************************/
657 
alignWriteSelex(AjPAlign thys)658 static void alignWriteSelex(AjPAlign thys)
659 {
660     alignWriteSeqformat(thys, 0, "selex");
661 
662     return;
663 }
664 
665 
666 
667 
668 /* @funcstatic alignWriteTreecon **********************************************
669 **
670 ** Writes an alignment in Treecon format
671 **
672 ** @param [u] thys [AjPAlign] Alignment object
673 ** @return [void]
674 **
675 ** @release 6.1.0
676 ** @@
677 ******************************************************************************/
678 
alignWriteTreecon(AjPAlign thys)679 static void alignWriteTreecon(AjPAlign thys)
680 {
681     alignWriteSeqformat(thys, 0, "treecon");
682 
683     return;
684 }
685 
686 
687 
688 
689 /* @funcstatic alignWriteFasta ************************************************
690 **
691 ** Writes an alignment in MSF format
692 **
693 ** @param [u] thys [AjPAlign] Alignment object
694 ** @return [void]
695 **
696 ** @release 2.1.0
697 ** @@
698 ******************************************************************************/
699 
alignWriteFasta(AjPAlign thys)700 static void alignWriteFasta(AjPAlign thys)
701 {
702     alignWriteSeqformat(thys, 0, "fasta");
703 
704     return;
705 }
706 
707 
708 
709 
710 /* @funcstatic alignWriteSeqformat ********************************************
711 **
712 ** Writes an alignment in a sequence format.
713 ** Usually called for only one alignment, ignoring any
714 ** sub-alignments as they would overwrite.
715 **
716 ** @param [u] thys [AjPAlign] Alignment object
717 ** @param [r] iali [ajint] Alignment number
718 ** @param [r] sqfmt [const char*] Sequence output format
719 ** @return [void]
720 **
721 ** @release 3.0.0
722 ** @@
723 ******************************************************************************/
724 
alignWriteSeqformat(AjPAlign thys,ajint iali,const char * sqfmt)725 static void alignWriteSeqformat(AjPAlign thys, ajint iali, const char* sqfmt)
726 {
727     AlignPData data = NULL;
728 
729     AjPSeq seq = NULL;
730     AjPSeqout seqout;
731 
732     ajint iseq;
733     ajint istart;
734     ajint iend;
735     ajint ilen;
736     AjPStr tmpstr  = NULL;
737 
738     seqout = ajSeqoutNewFile(thys->File);
739 
740     thys->SeqOnly = ajTrue;
741 
742     ajStrAssignC(&seqout->Formatstr, sqfmt);
743     seqout->File = thys->File;
744 
745     ajSeqoutOpen(seqout);
746 
747     ajListPeekNumber(thys->Data, 0, (void**) &data);
748     ilen = data->LenAli;
749 
750     for(iseq=0; iseq < thys->Nseqs; iseq++)
751     {
752 	seq = ajSeqNewSeq(alignSeq(thys, iseq, iali));
753 	istart = data->SubOffset[iseq];
754 	iend = istart + ilen - 1;
755 	ajStrAssignSubS(&tmpstr, ajSeqGetSeqS(seq), istart, iend);
756 	ajSeqAssignSeqS(seq, tmpstr);
757 	ajSeqoutWriteSeq(seqout, seq);
758 	ajSeqDel(&seq);
759 	ajStrDel(&tmpstr);
760     }
761 
762     ajSeqoutClose(seqout);
763     seqout->File = NULL;
764 
765     ajSeqoutDel(&seqout);
766 
767     return;
768 }
769 
770 
771 
772 
773 /* @funcstatic alignWriteMarkX0 ***********************************************
774 **
775 ** Writes an alignment in Fasta MarkX 0 format.
776 **
777 ** This is the standard default output format for FASTA programs.
778 **
779 ** @param [u] thys [AjPAlign] Alignment object
780 ** @return [void]
781 **
782 ** @release 2.1.0
783 ** @@
784 ******************************************************************************/
785 
alignWriteMarkX0(AjPAlign thys)786 static void alignWriteMarkX0(AjPAlign thys)
787 {
788     ajint iali;
789     ajint nali;
790 
791     nali = (ajuint) ajListGetLength(thys->Data);
792 
793     for(iali=0; iali < nali; iali++)
794 	alignWriteMark(thys, iali, 0);
795 
796     return;
797 }
798 
799 
800 
801 
802 /* @funcstatic alignWriteMarkX1 ***********************************************
803 **
804 ** Writes an alignment in Fasta MarkX 1 format.
805 **
806 ** This is an alternative output format for FASTA programs in which
807 ** identities are not marked.
808 ** Instead conservative replacements are denoted by 'x'
809 ** and non-conservative substitutions by 'X'.
810 **
811 ** @param [u] thys [AjPAlign] Alignment object
812 ** @return [void]
813 **
814 ** @release 2.1.0
815 ** @@
816 ******************************************************************************/
817 
alignWriteMarkX1(AjPAlign thys)818 static void alignWriteMarkX1(AjPAlign thys)
819 {
820     ajint iali;
821     ajint nali;
822 
823     nali = (ajuint) ajListGetLength(thys->Data);
824 
825     for(iali=0; iali < nali; iali++)
826 	alignWriteMark(thys, iali, 1);
827 
828     return;
829 }
830 
831 
832 
833 
834 /* @funcstatic alignWriteMarkX2 ***********************************************
835 **
836 ** Writes an alignment in Fasta MarkX 2 format
837 **
838 ** This is an alternative output format for FASTA programs in which
839 ** the residues in the second sequence are only shown if they are
840 ** different from the first.
841 **
842 ** @param [u] thys [AjPAlign] Alignment object
843 ** @return [void]
844 **
845 ** @release 2.1.0
846 ** @@
847 ******************************************************************************/
848 
alignWriteMarkX2(AjPAlign thys)849 static void alignWriteMarkX2(AjPAlign thys)
850 {
851     ajint iali;
852     ajint nali;
853 
854     nali = (ajuint) ajListGetLength(thys->Data);
855 
856     for(iali=0; iali < nali; iali++)
857 	alignWriteMark(thys, iali, 2);
858 
859     return;
860 }
861 
862 
863 
864 
865 /* @funcstatic alignWriteMarkX3 ***********************************************
866 **
867 ** Writes an alignment in Fasta MarkX 3 format
868 **
869 ** This is an alternative output format for FASTA programs in which
870 ** the aligned library sequences are displayed in FASTA format
871 ** These can be used to build a primitive multiple alignment.
872 **
873 ** @param [u] thys [AjPAlign] Alignment object
874 ** @return [void]
875 **
876 ** @release 2.1.0
877 ** @@
878 ******************************************************************************/
879 
alignWriteMarkX3(AjPAlign thys)880 static void alignWriteMarkX3(AjPAlign thys)
881 {
882     ajint iali;
883     ajint nali;
884 
885     nali = (ajuint) ajListGetLength(thys->Data);
886 
887     for(iali=0; iali < nali; iali++)
888 	alignWriteMark(thys, iali, 3);
889 
890     return;
891 }
892 
893 
894 
895 
896 /* @funcstatic alignWriteMarkX10 **********************************************
897 **
898 ** Writes an alignment in Fasta MarkX 10 format
899 **
900 ** @param [u] thys [AjPAlign] Alignment object
901 ** @return [void]
902 **
903 ** @release 2.1.0
904 ** @@
905 ******************************************************************************/
906 
alignWriteMarkX10(AjPAlign thys)907 static void alignWriteMarkX10(AjPAlign thys)
908 {
909     ajint iali;
910     ajint nali;
911 
912     nali = (ajuint) ajListGetLength(thys->Data);
913 
914     for(iali=0; iali < nali; iali++)
915 	alignWriteMark(thys, iali, 10);
916 
917     return;
918 }
919 
920 
921 
922 
923 /* @funcstatic alignWriteMark *************************************************
924 **
925 ** Writes a pairwise alignment in a Fasta MarkX format.
926 **
927 ** For now, seems to work with 0, 1, 2, 3 or 10.
928 ** FASTA 3.4 has 4, 5, 6 and 9 as possible options
929 ** but most seem to make no difference on pairwise comparisons.
930 **
931 ** 0: identities marked with ':' similarities with '.'
932 **
933 ** 1: differences marked with 'X'
934 **
935 ** 2: identities marked with '.', differences with seq2 character
936 **
937 ** 3: fasta format sequences with no description
938 **
939 ** 10: aligned regions with associated data
940 **
941 ** @param [u] thys [AjPAlign] Alignment object
942 ** @param [r] iali [ajint] Alignment number
943 ** @param [r] markx [ajint] Markup type (as defined in Bill Pearson's
944 **                          FASTA suite
945 ** @return [void]
946 **
947 ** @release 2.1.0
948 ** @@
949 ** Note: Modified 30-nov-04 (pmr) to replace the original code which had nasty
950 ** dependencies on global variable settings in the original - which we had
951 ** failed to reproduce in all possible circumstances, with code derived from
952 ** the existing and working EMBOSS alignment formats.
953 ******************************************************************************/
954 
alignWriteMark(AjPAlign thys,ajint iali,ajint markx)955 static void alignWriteMark(AjPAlign thys, ajint iali, ajint markx)
956 {
957 
958     AjPFile outf;
959     int nseq;
960 
961     const AjPStr seq = NULL;
962     const AjPSeq seqp = NULL;
963 
964     AlignPData data = NULL;
965 
966     ajint iseq;
967     ajint i;
968     ajint j;
969     ajint jj;
970     ajint kk;
971     ajint istart;
972     ajint iend;
973     ajint ilen;
974     ajint jstart;
975     ajint jend;
976     ajint iwidth = 50;
977     const char* cp;
978 
979     AjPStr tmpstr  = NULL;
980     AjPStr mrkstr  = NULL;
981     AjPStr mrkcons = NULL;
982     AjPStr cons    = NULL;
983 
984     ajint identity   = 0;
985     ajint similarity = 0;
986     ajint gaps       = 0;
987     ajint seqlen     = 0;
988 
989     ajint* ipos = NULL;
990     ajint* incs = NULL;
991     AjBool* num = NULL;
992     ajint jpos;
993     ajint icnt;
994     ajint increment = 1;
995 
996     AjPStr tmphdr = NULL;
997     AjPStr tmpnum = NULL;
998     AjBool lastline = ajFalse;
999 
1000     AjBool isnuc = ajFalse;
1001     static ajint icount = 0;
1002     float ident = 0.0;
1003 
1004     ajDebug("alignWriteMark iali:%d markx:%d\n", iali, markx);
1005 
1006     icount++;
1007 
1008     nseq = thys->Nseqs;
1009     outf = thys->File;
1010 
1011     if(thys->Width)
1012 	iwidth = thys->Width;
1013 
1014     ajListPeekNumber(thys->Data, iali, (void**) &data);
1015 
1016     AJCNEW0(ipos, nseq);
1017     AJCNEW0(incs, nseq);
1018     AJCNEW0(num, nseq);
1019 
1020     ajStrDel(&cons);
1021     ajStrDel(&tmphdr);
1022 
1023     ilen = data->LenAli;
1024 
1025     alignConsStats(thys, iali, &cons,
1026 		   &identity, &similarity, &gaps, &seqlen);
1027 
1028     ajAlignSetStats(thys, iali, seqlen, identity, similarity, gaps, NULL);
1029     ajAlignSetSubStandard(thys, iali);
1030     alignWriteHeaderNum(thys,iali);
1031     isnuc = ajSeqIsNuc(data->Seq[0]);
1032     ident = (float) data->NumId / (float) data->LenAli;
1033 
1034     /*ajDebug("# Consens: '%S'\n\n", cons);*/
1035 
1036     if(markx == 1)
1037 	alignSim(&cons, ' ', ' ', 'X', ' ');
1038     else if(markx == 2)
1039     {
1040 	seq = ajSeqGetSeqS(data->Seq[0]);
1041 	alignConsSet(thys, iali, 1, &cons);
1042 	ajDebug("alignWriteMark(%d)\nseq0:%S\ncons:%S\nseq1:%S\n",
1043 		markx, seq, cons, ajSeqGetSeqS(data->Seq[1]));
1044 	alignDiff(&cons, seq, '.');
1045 	ajDebug("alignWriteMark(%d)\nseq0:%S\ncons:%S\nseq1:%S\n",
1046 		markx, seq, cons, ajSeqGetSeqS(data->Seq[1]));
1047     }
1048     else
1049 	alignSim(&cons, ':', '.', ' ', ' ');
1050 
1051     /*ajDebug("# Modcons: '%S'\n\n", cons);*/
1052     ajDebug("# nseq:%d\n", nseq);
1053 
1054     ajDebug("# AliData [%d] len %d \n", iali, ilen);
1055     for(iseq=0; iseq < nseq; iseq++)
1056     {
1057 	incs[iseq] = alignSeqIncrement(data, iseq);
1058 	ipos[iseq] = alignSeqBegin(data, iseq)-incs[iseq];
1059 	/*ajDebug("#   Seq[%d]'%S'\n",
1060 	  iseq, ajSeqGetSeqS(data->Seq[iseq]));*/
1061     }
1062 
1063     /*
1064     for(iseq=0; iseq < nseq; iseq++)
1065     {
1066 	ajDebug("#   Seq[%d]  Len:%d Start:%d End:%d Rev:%B Off:%d\n",
1067 		iseq, data->Len[iseq],
1068 		data->Start[iseq], data->End[iseq],
1069 		data->Rev[iseq],
1070 		data->Offset[iseq]);
1071     }
1072     */
1073 
1074     if(markx==10)
1075     {
1076 	if(isnuc)
1077 	    ajFmtPrintF(outf,">>>%s, %d nt vs %s, %d nt\n",
1078 			ajSeqGetNameC(data->Seq[0]), data->Len[0],
1079 			ajSeqGetNameC(data->Seq[1]), data->Len[1]);
1080 	else
1081 	    ajFmtPrintF(outf,">>>%s, %d aa vs %s, %d aa\n",
1082 			ajSeqGetNameC(data->Seq[0]), data->Len[0],
1083 			ajSeqGetNameC(data->Seq[1]), data->Len[1]);
1084 
1085 	ajFmtPrintF(outf,"; mp_name: EMBOSS\n");
1086 	ajFmtPrintF(outf,"; mp_ver: %S\n", ajNamValueVersion());
1087 	ajFmtPrintF(outf,"; pg_name: %S\n", ajUtilGetProgram());
1088 	ajFmtPrintF(outf,"; pg_ver: %S\n", ajNamValueVersion());
1089 	ajFmtPrintF(outf,"; pg_matrix: %S\n", thys->Matrix);
1090 	ajFmtPrintF(outf,"; pg_gap-pen: -%S -%S\n",
1091 		    thys->GapPen, thys->ExtPen);
1092 	ajFmtPrintF(outf,">>#%d\n", icount);
1093 	ajFmtPrintF(outf,"; sw_score: %S\n", data->Score);
1094 	ident = (float) data->NumId / (float) data->LenAli;
1095 	ajFmtPrintF(outf,"; sw_ident: %5.3f\n", ident);
1096 	ajFmtPrintF(outf,"; sw_overlap: %d\n", data->LenAli);
1097     }
1098 
1099     if(markx <= 2)
1100     {
1101 	for(i=0; i < ilen; i += iwidth)
1102 	{
1103 	    for(iseq=0; iseq < nseq; iseq++)
1104 	    {
1105 	        seqp = data->Seq[iseq];
1106 		seq = ajSeqGetSeqS(seqp);
1107 		istart = i + data->SubOffset[iseq];
1108 		iend = (istart+iwidth-1);
1109 
1110 		if(iend >= (data->SubOffset[iseq]+ilen-1))
1111 		{
1112 		    iend = data->SubOffset[iseq]+ilen-1;
1113 		    lastline = ajTrue;
1114 		}
1115 
1116 		increment = incs[iseq];
1117 		ajStrAssignSubS(&tmpstr, seq, istart, iend);
1118 		ajStrAssignSubS(&mrkcons, cons,
1119 			    istart - data->SubOffset[iseq],
1120 			    iend - data->SubOffset[iseq]);
1121 
1122 		ajStrExchangeCC(&tmpstr, ".", "-");
1123 		icnt = ajStrGetLen(tmpstr)
1124 		  - (size_t) ajStrCalcCountK(tmpstr, '-')
1125 		  - (size_t) ajStrCalcCountK(tmpstr, ' ');
1126 
1127 		/* number the top sequence */
1128 		if(!iseq)
1129 		{
1130 		    jpos = ipos[iseq];
1131 		    ajStrAssignClear(&tmpnum);
1132 		    jj=7;
1133 		    cp = ajStrGetPtr(tmpstr);
1134 
1135 		    while(*cp)
1136                     {
1137 			jj ++;
1138 
1139 			if((*cp != '-') && (*cp != ' '))
1140 			{
1141 			    jpos += increment;
1142 
1143 			    if(jpos%10 == 0)
1144 			    {
1145 				ajFmtPrintAppS(&tmpnum,"%*d",jj,jpos);
1146 				jj=0;
1147 				num[iseq] = ajTrue;
1148 			    }
1149 			}
1150 			cp++;
1151 		    }
1152 
1153 		    if(lastline && !num[iseq])
1154 		    {
1155 			ajFmtPrintAppS(&tmpnum,"%*d",jj,jpos);
1156 			jj=0;
1157 		    }
1158 
1159 		    if(jj)
1160 			ajStrAppendCountK(&tmpnum, ' ', jj);
1161 
1162 		    ajFmtPrintF(outf, "%S\n", tmpnum);
1163 		}
1164 
1165 		if(!iseq)
1166 		    ajStrAssignS(&mrkstr, tmpstr);
1167 		else
1168 		    alignSame(&mrkstr, tmpstr, ' ');
1169 
1170 		if(markx==2 && nseq==2 && iseq==1)
1171                 {
1172 		    ajFmtPrintF(outf,
1173 				"%6.6S %S\n",
1174 				alignSeqName(thys, seqp), mrkcons);
1175                 }
1176 		else
1177 		{
1178 		    if(nseq==2 && iseq==1)
1179 			ajFmtPrintF(outf,
1180 				    "       %S\n",
1181 				    mrkcons);
1182 
1183 		    if(ajStrGetLen(tmpstr))
1184 		    {
1185 			ajFmtPrintF(outf,
1186 				    "%6.6S %S\n",
1187 				    alignSeqName(thys, seqp),
1188 				    tmpstr);
1189 		    }
1190 		    else
1191 			ajFmtPrintF(outf,
1192 				    "%6.6S\n",
1193 				    alignSeqName(thys, seqp));
1194 
1195 		    /* number the bottom sequence */
1196 		    if(iseq+1 == nseq)
1197 		    {
1198 			jpos = ipos[iseq];
1199 			ajStrAssignClear(&tmpnum);
1200 			jj=7;
1201 			kk=7;
1202 			cp = ajStrGetPtr(tmpstr);
1203 
1204 			while(*cp)
1205                         {
1206 			    jj++;
1207 			    if((*cp != '-') && (*cp != ' '))
1208 			    {
1209 				kk = jj;
1210 				jpos += increment;
1211 
1212 				if(jpos%10 == 0)
1213 				{
1214 				    ajFmtPrintAppS(&tmpnum,"%*d",jj,jpos);
1215 				    jj=0;
1216 				    kk=0;
1217 				    num[iseq] = ajTrue;
1218 				}
1219 			    }
1220 
1221 			    cp++;
1222 			}
1223 
1224 			if(lastline && !num[iseq])
1225                         {
1226 			    ajFmtPrintAppS(&tmpnum,"%*d",kk,jpos);
1227 			    jj = jj - kk;
1228 			}
1229 
1230 			if(jj)
1231 			    ajStrAppendCountK(&tmpnum, ' ', jj);
1232 
1233 			ajFmtPrintF(outf, "%S\n", tmpnum);
1234 		    }
1235 
1236 		    if(increment > 0)
1237 			ipos[iseq] += icnt;
1238 		    else
1239 			ipos[iseq] -= icnt;
1240 		}
1241 	    }
1242 
1243 	    if(nseq > 2) 	    /* 3 or more seqs, markup under */
1244 		ajFmtPrintF(outf,
1245 			    "                     %S\n",
1246 			    mrkcons);
1247 
1248 	    ajFmtPrintF(outf, "\n");
1249 	}
1250     }
1251     else
1252     {
1253 	for(iseq=0; iseq < nseq; iseq++)
1254 	{
1255 	    seqp = data->Seq[iseq];
1256 	    seq = ajSeqGetSeqS(seqp);
1257 	    istart = data->SubOffset[iseq];
1258 	    iend = istart + ilen - 1;
1259 	    ajStrAssignSubS(&tmpstr, seq, istart, iend);
1260 	    ajStrAssignSubS(&mrkcons, cons,
1261 			istart - data->SubOffset[iseq],
1262 			iend - data->SubOffset[iseq]);
1263 
1264 	    ajStrExchangeCC(&tmpstr, ".", "-");
1265 	    icnt = ajStrGetLen(tmpstr)
1266 	      - (size_t) ajStrCalcCountK(tmpstr, '-')
1267 	      - (size_t) ajStrCalcCountK(tmpstr, ' ');
1268 
1269 	    if(!iseq)
1270 		ajStrAssignS(&mrkstr, tmpstr);
1271 	    else
1272 		alignSame(&mrkstr, tmpstr, ' ');
1273 
1274 	    if(markx==3)
1275 	    {
1276 		ajFmtPrintF(outf,
1277 			    ">%S ..\n",
1278 			    alignSeqName(thys, seqp));
1279 	    }
1280 	    else if(markx==10)
1281 	    {
1282 		ajFmtPrintF(outf,">%S ..\n", alignSeqName(thys, seqp));
1283 		ajFmtPrintF(outf,"; sq_len: %d\n",
1284 			    data->Len[iseq]);
1285 
1286 		if(ajSeqIsNuc(data->Seq[iseq]))
1287 		    ajFmtPrintF(outf,"; sq_type: D\n");
1288 		else
1289 		    ajFmtPrintF(outf,"; sq_type: p\n");
1290 
1291 		ajFmtPrintF(outf,"; al_start: %d\n",
1292 			    data->Offset[iseq] + data->Start[iseq]);
1293 		ajFmtPrintF(outf,"; al_stop: %d\n",
1294 			    data->Offset[iseq] + data->Start[iseq] + icnt - 1);
1295 		ajFmtPrintF(outf,"; al_display_start: %d\n",
1296 			    data->Offset[iseq] + data->Start[iseq]);
1297 	    }
1298 	    else
1299 	    {
1300 	    }
1301 
1302 	    for(j=0; j < ilen; j += iwidth)
1303 	    {
1304 		jstart = j + data->SubOffset[iseq];
1305 		jend = AJMIN(data->SubOffset[iseq]+ilen-1, jstart+iwidth-1);
1306 		ajStrAssignSubS(&tmpstr, seq, jstart, jend);
1307 
1308 		if(ajStrGetLen(tmpstr))
1309 		{
1310 		    ajFmtPrintF(outf,
1311 				"%S\n",
1312 				tmpstr);
1313 		}
1314 	    }
1315 
1316 	    ipos[iseq] += icnt;
1317 	}
1318 
1319     }
1320 
1321     ajStrDel(&cons);
1322     ajStrDel(&mrkcons);
1323     ajStrDel(&mrkstr);
1324     ajStrDel(&tmpstr);
1325     ajStrDel(&tmphdr);
1326     ajStrDel(&tmpnum);
1327     AJFREE(ipos);
1328     AJFREE(incs);
1329     AJFREE(num);
1330 
1331     return;
1332 }
1333 
1334 
1335 
1336 
1337 
1338 /* @funcstatic alignWriteMatch ************************************************
1339 **
1340 ** Writes an alignment in Match format
1341 **
1342 ** @param [u] thys [AjPAlign] Alignment object
1343 ** @return [void]
1344 **
1345 ** @release 2.4.0
1346 ** @@
1347 ******************************************************************************/
1348 
alignWriteMatch(AjPAlign thys)1349 static void alignWriteMatch(AjPAlign thys)
1350 {
1351     AjPFile outf;
1352     const AjPSeq seq1;
1353     const AjPSeq seq2;
1354     ajint nali;
1355     ajint iali;
1356     ajint len0;
1357     char fwd0 = '+';
1358     char fwd1 = '+';
1359     ajint endgaps = 0;
1360     ajint begingaps = 0;
1361 
1362     AlignPData* pdata = NULL;
1363     AlignPData data = NULL;
1364 
1365     outf = thys->File;
1366     nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
1367 
1368     ajAlignWriteHeader(thys);
1369 
1370     for(iali=0; iali<nali; iali++)
1371     {
1372 	data = pdata[iali];
1373 	len0 = data->End[0] - data->Start[0] + 1;
1374 
1375 	if(alignSeqRev(data, 0))
1376 	    fwd0 = '-';
1377 
1378 	if(alignSeqRev(data, 1))
1379 	    fwd1 = '-';
1380 
1381 	begingaps = alignSeqGapBegin(data,0) + alignSeqGapBegin(data,1);
1382 	endgaps = alignSeqGapEnd(data,0) + alignSeqGapEnd(data,1);
1383 	seq1 = alignSeq(thys, 0, iali);
1384 	seq2 = alignSeq(thys, 1, iali);
1385 
1386 	if(thys->Global)
1387 	{
1388 	    len0 = len0 - endgaps - begingaps;
1389 	    ajFmtPrintF(outf,
1390 			"%6d %-15.15S %c %8d..%-8d %-15.15S %c %8d..%d\n",
1391 			len0, alignSeqName(thys, seq1),  fwd0,
1392 			alignSeqBegin(data,0) + alignSeqGapBegin(data,1),
1393 			alignSeqEnd(data,0) - endgaps -
1394                         alignSeqGapBegin(data,0),
1395 			alignSeqName(thys, seq2), fwd1,
1396 			alignSeqBegin(data,1) + alignSeqGapBegin(data,0),
1397 			alignSeqEnd(data,1) - endgaps -
1398                         alignSeqGapBegin(data,1));
1399 	}
1400 	else
1401 	{
1402 	    ajFmtPrintF(outf,
1403 			"%6d %-15.15S %c %8d..%-8d %-15.15S %c %8d..%d\n",
1404 			len0, alignSeqName(thys, seq1),  fwd0,
1405 			alignSeqBegin(data,0),
1406 			alignSeqEnd(data,0),
1407 			alignSeqName(thys, seq2), fwd1,
1408 			alignSeqBegin(data,1),
1409 			alignSeqEnd(data,1));
1410 	}
1411     }
1412 
1413     AJFREE(pdata);
1414 
1415     return;
1416 }
1417 
1418 
1419 
1420 
1421 /* @funcstatic alignWriteSimple ***********************************************
1422 **
1423 ** Writes an alignment in Simple format
1424 **
1425 ** @param [u] thys [AjPAlign] Alignment object
1426 ** @return [void]
1427 **
1428 ** @release 2.1.0
1429 ** @@
1430 ******************************************************************************/
1431 
alignWriteSimple(AjPAlign thys)1432 static void alignWriteSimple(AjPAlign thys)
1433 {
1434     AjPFile outf;
1435     int nseq;
1436     int nali;
1437 
1438     const AjPStr seq = NULL;
1439 
1440     AlignPData* pdata = NULL;
1441     AlignPData data = NULL;
1442 
1443     ajint iali;
1444     ajint iseq;
1445     ajint i;
1446     ajint istart;
1447     ajint iend;
1448     ajint ilen;
1449     ajint iwidth = 50;
1450 
1451     AjPStr tmpstr  = NULL;
1452     AjPStr mrkstr  = NULL;
1453     AjPStr mrkcons = NULL;
1454     AjPStr cons    = NULL;
1455 
1456     ajint identity   = 0;
1457     ajint similarity = 0;
1458     ajint gaps       = 0;
1459     ajint seqlen     = 0;
1460     ajint icnt;
1461 
1462     ajint* ipos   = NULL;
1463     ajint* incs = NULL;
1464 
1465     AjPStr tmphdr = NULL;
1466 
1467     ajDebug("alignWriteSimple\n");
1468 
1469 
1470     nseq = thys->Nseqs;
1471     outf = thys->File;
1472 
1473     if(thys->Width)
1474 	iwidth = thys->Width;
1475 
1476     nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
1477 
1478     AJCNEW0(ipos, nseq);
1479     AJCNEW0(incs, nseq);
1480 
1481     for(iali=0; iali<nali; iali++)
1482     {
1483 	ajStrDel(&cons);
1484 	ajStrDel(&tmphdr);
1485 
1486 	data = pdata[iali];
1487 	ilen = data->LenAli;
1488 
1489 	alignConsStats(thys, iali, &cons,
1490 		       &identity, &similarity, &gaps, &seqlen);
1491 
1492 	ajAlignSetStats(thys, iali, seqlen, identity, similarity, gaps, NULL);
1493 	ajAlignSetSubStandard(thys, iali);
1494 	alignWriteHeaderNum(thys, iali);
1495 
1496 	/*ajDebug("# Consens: '%S'\n\n", cons);*/
1497 
1498 	alignSim(&cons, '|', ':', '.', ' ');
1499 
1500 	/*ajDebug("# Modcons: '%S'\n\n", cons);*/
1501 	ajDebug("# Nali:%d nseq:%d\n", nali, nseq);
1502 
1503 	ajDebug("# AliData [%d] len %d \n", iali, ilen);
1504 
1505 	for(iseq=0; iseq < nseq; iseq++)
1506 	{
1507 	    incs[iseq] = alignSeqIncrement(data, iseq);
1508 	    ipos[iseq] = alignSeqBegin(data, iseq)-incs[iseq];
1509 	    /* ajDebug("#   Seq[%d]'%S'\n",
1510 	       iseq, ajSeqGetSeqS(data->Seq[iseq]));*/
1511 	}
1512 
1513 	/*
1514 	for(iseq=0; iseq < nseq; iseq++)
1515 	{
1516 	    ajDebug("#   Seq[%d]  Len:%d Start:%d End:%d Rev:%B\n",
1517 		    iseq, data->Len[iseq],
1518 		    data->Start[iseq], data->End[iseq],
1519 		    data->Rev[iseq]);
1520 	}
1521 	*/
1522 
1523 	for(i=0; i < ilen; i += iwidth)
1524 	{
1525 	    for(iseq=0; iseq < nseq; iseq++)
1526 	    {
1527 		seq = ajSeqGetSeqS(data->Seq[iseq]);
1528 		istart = i + data->SubOffset[iseq];
1529 		iend = AJMIN(data->SubOffset[iseq]+ilen-1, istart+iwidth-1);
1530 		ajStrAssignSubS(&tmpstr, seq, istart, iend);
1531 		ajStrAssignSubS(&mrkcons, cons,
1532 			    istart - data->SubOffset[iseq],
1533 			    iend - data->SubOffset[iseq]);
1534 
1535 		ajStrExchangeCC(&tmpstr, ".", "-");
1536 		icnt = incs[iseq] * (ajStrGetLen(tmpstr)
1537 				     - (size_t)ajStrCalcCountK(tmpstr, '-')
1538 				     - (size_t)ajStrCalcCountK(tmpstr, ' '));
1539 
1540 		if(!iseq)
1541 		    ajStrAssignS(&mrkstr, tmpstr);
1542 		else
1543 		    alignSame(&mrkstr, tmpstr, ' ');
1544 
1545 
1546 		if(nseq==2 && iseq==1) /* 2 seqs, markup between them */
1547 		    ajFmtPrintF(outf,
1548 				"                     %S\n",
1549 				mrkcons);
1550 
1551 		if(ajStrGetLen(tmpstr))
1552 		{
1553 		    ajFmtPrintF(outf,
1554 				"%-13.13S %6d %S %6d\n",
1555 				alignSeqName(thys, data->Seq[iseq]),
1556 				ipos[iseq]+incs[iseq], tmpstr,
1557 				ipos[iseq]+icnt);
1558 		}
1559 		else
1560 		    ajFmtPrintF(outf,
1561 				"%-13.13S\n",
1562 				alignSeqName(thys, data->Seq[iseq]));
1563 
1564 		ipos[iseq] += icnt;
1565 	    }
1566 
1567 	    if(nseq > 2) 	    /* 3 or more seqs, markup under */
1568 		ajFmtPrintF(outf,
1569 			    "                     %S\n",
1570 			    mrkcons);
1571 
1572 	    ajFmtPrintF(outf, "\n");
1573 	}
1574     }
1575 
1576 
1577     ajStrDel(&cons);
1578     ajStrDel(&mrkcons);
1579     ajStrDel(&mrkstr);
1580     ajStrDel(&tmpstr);
1581     AJFREE(ipos);
1582     AJFREE(incs);
1583     AJFREE(pdata);
1584 
1585     return;
1586 }
1587 
1588 
1589 
1590 
1591 /* @funcstatic alignWriteScore ************************************************
1592 **
1593 ** Writes an alignment in Score-only format
1594 **
1595 ** @param [u] thys [AjPAlign] Alignment object
1596 ** @return [void]
1597 **
1598 ** @release 2.1.0
1599 ** @@
1600 ******************************************************************************/
1601 
alignWriteScore(AjPAlign thys)1602 static void alignWriteScore(AjPAlign thys)
1603 {
1604     AjPFile outf;
1605     ajint nali;
1606     ajint iali;
1607     AlignPData* pdata = NULL;
1608     AlignPData data = NULL;
1609     const AjPSeq seq1;
1610     const AjPSeq seq2;
1611 
1612     outf = thys->File;
1613     nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
1614 
1615     for(iali=0; iali<nali; iali++)
1616     {
1617 	data = pdata[iali];
1618 	seq1 = data->Seq[0];
1619 	seq2 = data->Seq[1];
1620 
1621 	if(ajStrGetLen(data->Score))
1622 	    ajFmtPrintF(outf, "%S %S %d (%S)\n",
1623 			alignSeqName(thys, seq1),
1624                         alignSeqName(thys, seq2),
1625 			data->LenAli, data->Score);
1626 	else
1627 	    ajFmtPrintF(outf, "%S %S %d\n",
1628 			alignSeqName(thys, seq1),
1629                         alignSeqName(thys, seq2),
1630 			data->LenAli);
1631     }
1632 
1633     AJFREE(pdata);
1634 
1635     return;
1636 }
1637 
1638 
1639 
1640 
1641 /* @funcstatic alignWriteSam **************************************************
1642 **
1643 ** Writes an alignment in sequence alignment/map (SAM) format
1644 **
1645 ** @param [u] thys [AjPAlign] Alignment object
1646 ** @return [void]
1647 **
1648 ** @release 6.2.0
1649 ** @@
1650 ******************************************************************************/
1651 
alignWriteSam(AjPAlign thys)1652 static void alignWriteSam(AjPAlign thys)
1653 {
1654     AjPFile outf;
1655     ajint nali;
1656     ajint iali;
1657     ajint iseq;
1658     ajuint j;
1659     ajuint qrystart;
1660     ajuint qryend;
1661     ajuint qualstart;
1662     ajuint qualend;
1663     ajint refstart;
1664 
1665     ajint nmismatches;
1666     AlignPData* pdata = NULL;
1667     AlignPData data = NULL;
1668 
1669     ajint refpos = 0; /* 1-based leftmost position on the reference sequence */
1670 
1671     const AjPSeq refseq;
1672     const AjPSeq qryseq;
1673     AjPStr seqacc = NULL;
1674 
1675     ajuint flag = 0;     /* bitwise flag, section 2.2.2 in SAM specification */
1676     ajuint mapq = 255;   /* mapping quality, 255 means not available */
1677     AjPStr cigar = NULL; /* extended CIGAR string */
1678     AjPStr mrnm = NULL;  /* mate reference sequence name */
1679     ajuint mpos = 0;     /* leftmost mate position of the clipped sequence */
1680     ajuint isize = 0;    /* inferred insert size */
1681 
1682     AjPStr argstr;
1683     AjPStr mseq = NULL;  /* match region of the query sequence */
1684 
1685     mseq = ajStrNew();
1686     mrnm = ajStrNewC("*");/* TODO: mate sequences not yet supported */
1687 
1688     outf = thys->File;
1689     nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
1690 
1691     if(!thys->Count++)
1692     {
1693         ajFmtPrintF(outf, "@HD\tVN:1.3\n");
1694 
1695         /* Program record */
1696         argstr = ajStrNewS(ajUtilGetCmdline());
1697         ajStrExchangeKK(&argstr, '\n', ' ');
1698         ajFmtPrintF(outf, "@PG\tID:%S\tVN:%S\tCL:%S\n",
1699                     ajUtilGetProgram(), ajNamValueVersion(), argstr);
1700         ajStrDel(&argstr);
1701     }
1702 
1703     for(iali=0; iali<nali; iali++)
1704     {
1705 	data = pdata[iali];
1706 	refseq = data->Seq[thys->RefSeq];
1707 
1708 	refpos = alignSeqBegin(data, thys->RefSeq) + 1
1709 			- alignSeqIncrement(data, thys->RefSeq);
1710 
1711 	for(iseq=0; iseq < data->Nseqs; iseq++)
1712 	{
1713 	    if(iseq==thys->RefSeq)
1714 		continue;
1715 
1716 	    qryseq = data->Seq[iseq];
1717 
1718 	    if(ajSeqIsReversed(qryseq))
1719 		flag |= 0x0010;
1720 
1721 	    if(thys->Global ||thys->SeqExternal)
1722 	    {
1723 		qrystart = alignSeqBegin(data,iseq) -
1724 			alignSeqIncrement(data,iseq);
1725 		qryend = alignSeqEnd(data,iseq);
1726 		refstart = refpos-1;
1727 	    }
1728 	    else
1729 	    {
1730 		qrystart = 0;
1731 		qryend = data->LenAli;
1732 		refstart = 0;
1733 	    }
1734 
1735 	    qualstart = qrystart;
1736 	    qualend = AJMIN(qryend, qryseq->Qualsize);
1737 
1738 	    if(qryseq->Accuracy)
1739 	    {
1740 		ajStrAssignClear(&seqacc);
1741 
1742 		/* ASCII-33 gives the Phred base quality */
1743 		for(j=qualstart;j< qualend;j++)
1744 		    ajStrAppendK(&seqacc, (char) (33 + qryseq->Accuracy[j]));
1745 
1746 	    }
1747 	    else
1748 		ajStrAssignC(&seqacc,"*");
1749 
1750 	    ajStrAssignClear(&mseq);
1751 	    ajStrAssignClear(&cigar);
1752 
1753 	    /* get CIGAR, match region of the query sequence and #mismatches */
1754 	    nmismatches = alignCigar(qryseq, refseq,
1755 	                             qrystart,
1756 	                             qryend,
1757 	                             refstart,
1758 	                             &mseq, &cigar, thys->SeqExternal);
1759 
1760 	    ajFmtPrintF(outf, "%S\t%u\t%S\t%d\t%d\t%S\t%S\t%u\t%u\t%S\t%S\t"
1761 		    "AS:i:%S\tNM:i:%d\n",
1762 		    alignSeqName(thys, qryseq),
1763 		    flag,
1764 		    alignSeqName(thys, refseq),
1765 		    refpos,
1766 		    mapq,
1767 		    cigar,
1768 		    mrnm,
1769 		    mpos,
1770 		    isize,
1771 		    mseq,
1772 		    seqacc,
1773 		    data->Score,
1774 		    nmismatches);
1775 	}
1776     }
1777 
1778     AJFREE(pdata);
1779     ajStrDel(&cigar);
1780     ajStrDel(&mrnm);
1781     ajStrDel(&seqacc);
1782     ajStrDel(&mseq);
1783 
1784     return;
1785 }
1786 
1787 
1788 
1789 
1790 /* #funcstatic alignWriteBam ***************************************************
1791 **
1792 ** Writes an alignment in binary sequence alignment/map (BAM) format
1793 **
1794 ** #param [u] thys [AjPAlign] Alignment object
1795 ** #return [void]
1796 ** ##
1797 ******************************************************************************/
1798 /*
1799 //static void alignWriteBam(AjPAlign thys)
1800 //{
1801 //    ajint nali;
1802 //    ajint iali;
1803 //    AlignPData* pdata = NULL;
1804 //
1805 //
1806 //    nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
1807 //
1808 //    for(iali=0; iali<nali; iali++)
1809 //    {
1810 //    }
1811 //
1812 //    AJFREE(pdata);
1813 //
1814 //    return;
1815 //}
1816 */
1817 
1818 
1819 
1820 
1821 /* @funcstatic alignWriteSrs **************************************************
1822 **
1823 ** Writes an alignment in SRS format
1824 **
1825 ** @param [u] thys [AjPAlign] Alignment object
1826 ** @return [void]
1827 **
1828 ** @release 2.1.0
1829 ** @@
1830 ******************************************************************************/
1831 
alignWriteSrs(AjPAlign thys)1832 static void alignWriteSrs(AjPAlign thys)
1833 {
1834     alignWriteSrsAny(thys, 0, ajFalse);
1835 
1836     return;
1837 }
1838 
1839 
1840 
1841 
1842 /* @funcstatic alignWriteSrsPair **********************************************
1843 **
1844 ** Writes an alignment in SrsPair format
1845 **
1846 ** @param [u] thys [AjPAlign] Alignment object
1847 ** @return [void]
1848 **
1849 ** @release 2.1.0
1850 ** @@
1851 ******************************************************************************/
1852 
alignWriteSrsPair(AjPAlign thys)1853 static void alignWriteSrsPair(AjPAlign thys)
1854 {
1855     alignWriteSrsAny(thys, 2, ajTrue);
1856 
1857     return;
1858 }
1859 
1860 
1861 
1862 
1863 /* @funcstatic alignWriteSrsAny ***********************************************
1864 **
1865 ** Writes an alignment in SRS format (with switches for pairwise and general
1866 ** formatting)
1867 **
1868 ** @param [u] thys [AjPAlign] Alignment object
1869 ** @param [r] imax [ajint] Maximum number of sequences (0 for unknown)
1870 ** @param [r] mark [AjBool] Markup the alignment
1871 ** @return [void]
1872 **
1873 ** @release 2.1.0
1874 ** @@
1875 ******************************************************************************/
1876 
alignWriteSrsAny(AjPAlign thys,ajint imax,AjBool mark)1877 static void alignWriteSrsAny(AjPAlign thys, ajint imax, AjBool mark)
1878 {
1879     AjPFile outf;
1880     int nseq;
1881     int nali;
1882 
1883     const AjPStr seq = NULL;
1884 
1885     AlignPData* pdata = NULL;
1886     AlignPData data = NULL;
1887 
1888     ajint iali;
1889     ajint iseq;
1890     ajint i;
1891     ajint istart;
1892     ajint iend;
1893     ajint ilen;
1894 
1895     AjPStr tmpstr  = NULL;
1896     AjPStr mrkstr  = NULL;
1897     AjPStr mrkcons = NULL;
1898     AjPStr cons    = NULL;
1899 
1900     ajint iwidth     = 50;
1901     ajint identity   = 0;
1902     ajint similarity = 0;
1903     ajint gaps=0;
1904     ajint seqlen=0;
1905     ajint icnt;
1906 
1907     ajint* ipos = NULL;
1908     ajint* incs = NULL;
1909     AjBool pair = ajFalse;
1910 
1911     AjPStr tmphdr = NULL;
1912 
1913 
1914     outf = thys->File;
1915     nseq = thys->Nseqs;
1916 
1917     if(nseq < 1)
1918     {
1919 	ajAlignWriteHeader(thys);
1920 
1921 	return;
1922     }
1923 
1924     if(thys->Width)
1925 	iwidth = thys->Width;
1926 
1927     /*
1928      ** pair: if true, change consensus into |:. markup
1929      ** and put markup line between sequences
1930      */
1931 
1932     if(imax == 2 && nseq == 2 && mark)
1933 	pair = ajTrue;
1934 
1935     nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
1936 
1937     AJCNEW0(ipos, nseq);
1938     AJCNEW0(incs, nseq);
1939 
1940     for(iali=0; iali<nali; iali++)
1941     {
1942 	data = pdata[iali];
1943 	ilen = data->LenAli;
1944 
1945 	ajStrDel(&cons);
1946 	ajStrDel(&tmphdr);
1947 	alignConsStats(thys, iali, &cons,
1948 		       &identity, &similarity, &gaps, &seqlen);
1949 
1950 	ajAlignSetStats(thys, iali, seqlen, identity, similarity, gaps,
1951 			NULL);
1952 	ajAlignSetSubStandard(thys, iali);
1953 	alignWriteHeaderNum(thys, iali);
1954 
1955 	/*ajDebug("# Consens: '%S'\n\n", cons);*/
1956 
1957 	if(pair)
1958 	    alignSim(&cons, '|', ':', '.', ' ');
1959 
1960 	/*ajDebug("# Modcons: '%S'\n\n", cons);*/
1961 	ajDebug("# Nali:%d nseq:%d\n", nali, nseq);
1962 
1963 	ajDebug("# AliData [%d] len %d \n", iali, ilen);
1964 
1965 	for(iseq=0; iseq < nseq; iseq++)
1966 	{
1967 	    incs[iseq] = alignSeqIncrement(data, iseq);
1968 	    ipos[iseq] = alignSeqBegin(data, iseq)-incs[iseq];
1969 	    /*ajDebug("#   Seq[%d] Off:%d Sta:%d End:%d '%S'\n",
1970 		    iseq, data->Offset[iseq], data->Start[iseq],
1971 		    data->End[iseq], ajSeqGetSeqS(data->Seq[iseq]));*/
1972 	}
1973 
1974 	/*
1975 	for(iseq=0; iseq < nseq; iseq++)
1976 	    ajDebug("#   Seq[%d] Start:%d End:%d Rev:%B\n",
1977 		    iseq, data->Start[iseq], data->End[iseq],
1978 		    data->Rev[iseq]);
1979 	*/
1980 
1981 	for(i=0; i < ilen; i += iwidth)
1982 	{
1983 	    for(iseq=0; iseq < nseq; iseq++)
1984 	    {
1985 		seq = ajSeqGetSeqS(data->Seq[iseq]);
1986 		istart = i + data->SubOffset[iseq];
1987 		iend = AJMIN(data->SubOffset[iseq]+ilen-1, istart+iwidth-1);
1988 		ajStrAssignSubS(&tmpstr, seq, istart, iend);
1989 		ajStrAssignSubS(&mrkcons, cons,
1990 			    istart - data->SubOffset[iseq],
1991 			    iend - data->SubOffset[iseq]);
1992 
1993 		ajStrExchangeCC(&tmpstr, ".", "-");
1994 		icnt = incs[iseq] * (ajStrGetLen(tmpstr)
1995 				     - (size_t)ajStrCalcCountK(tmpstr, '-')
1996 				     - (size_t)ajStrCalcCountK(tmpstr, ' '));
1997 
1998 		if(!iseq)
1999 		    ajStrAssignS(&mrkstr, tmpstr);
2000 		else
2001 		    alignSame(&mrkstr, tmpstr, ' ');
2002 
2003 		if(pair && iseq==1)  /* 2 seqs, markup between them */
2004 		    ajFmtPrintF(outf,
2005 				"                     %S\n",
2006 				mrkcons);
2007 
2008 		if(ajStrGetLen(tmpstr))
2009 		{
2010 		    if(icnt)
2011 			ajFmtPrintF(outf,
2012 				    "%-13.13S %6d %S %6d\n",
2013 				    alignSeqName(thys, data->Seq[iseq]),
2014 				    ipos[iseq]+incs[iseq], tmpstr,
2015 				    ipos[iseq]+icnt);
2016 		    else
2017 			ajFmtPrintF(outf,
2018 				    "%-13.13S %6d %S %6d\n",
2019 				    alignSeqName(thys, data->Seq[iseq]),
2020 				    ipos[iseq], tmpstr,
2021 				    ipos[iseq]);
2022 		}
2023 		else
2024 		    ajFmtPrintF(outf,
2025 				"%-13.13S\n",
2026 				alignSeqName(thys, data->Seq[iseq]));
2027 
2028 		ipos[iseq] += icnt;
2029 	    }
2030 
2031 	    if(mark && !pair)	    /* 3 or more seqs, markup under */
2032 		ajFmtPrintF(outf,
2033 			    "                     %S\n",
2034 			    mrkcons);
2035 
2036 	    ajFmtPrintF(outf, "\n");
2037 	}
2038     }
2039 
2040     ajStrDel(&mrkcons);
2041     ajStrDel(&tmpstr);
2042     ajStrDel(&mrkstr);
2043     ajStrDel(&cons);
2044     AJFREE(ipos);
2045     AJFREE(incs);
2046     AJFREE(pdata);
2047 
2048     return;
2049 }
2050 
2051 
2052 
2053 
2054 /* @funcstatic alignWriteTCoffee **********************************************
2055 **
2056 ** Writes an alignment as a T-COFFEE library
2057 **
2058 ** @param [u] thys [AjPAlign] Alignment object
2059 ** @return [void]
2060 **
2061 ** @release 2.8.0
2062 ** @@
2063 ******************************************************************************/
2064 
alignWriteTCoffee(AjPAlign thys)2065 static void alignWriteTCoffee (AjPAlign thys)
2066 {
2067     AjPFile outf = thys->File;
2068     int nseq;
2069     int nali;
2070     AlignPData* pdata = NULL;
2071     AlignPData data = NULL;
2072     ajint iali;
2073     ajint iseq;
2074     ajint i;
2075     ajint s1,s2;
2076     ajint n1,n2;
2077     ajint ilen;
2078     AjPStr sseq= NULL;
2079     const AjPStr sseq1= NULL;
2080     const AjPStr sseq2= NULL;
2081     ajint pidentity=0;
2082 
2083     ajDebug("alignWriteTCoffee\n");
2084 
2085     nseq = thys->Nseqs;
2086     nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
2087     thys->SeqOnly = ajTrue; /* suppress output of tail */
2088 
2089     /* print header */
2090     ajFmtPrintF(outf, "%d\n", nseq); /* number of sequences */
2091 
2092     for(iseq=0; iseq < nseq; iseq++) {
2093 	ajStrAssignS(&sseq,ajSeqGetSeqS(pdata[0]->Seq[iseq]));
2094 	ajStrRemoveWhite(&sseq);
2095 	ajStrExchangeCC(&sseq, "-","");
2096 	/* <seqname> <seqlen> <sequence> */
2097 	ajFmtPrintF(outf, "%S %d %S\n",
2098 		    alignSeqName(thys, pdata[0]->Seq[iseq]),
2099 		    ajStrGetLen(sseq),
2100 		    sseq);
2101 	ajStrDel(&sseq);
2102     }
2103 
2104 
2105     for(iali = 0; iali<nali; iali++)
2106     {
2107 	data = pdata[iali];
2108 	ilen = data->LenAli;
2109 	pidentity = (int)(0.5 + 100.0 *
2110 			  (float) data->NumId / (float) data->LenAli);
2111 	ajFmtPrintF(outf, "! score=%S\n",data->Score);
2112 	ajFmtPrintF(outf, "! matrix=%S\n",thys->Matrix);
2113 	ajFmtPrintF(outf, "! gapopen=%S gapext=%S\n",
2114 		    thys->GapPen,thys->ExtPen);
2115 
2116 	/* go through all pairwise alignments */
2117 	for(s1=0; s1<nseq-1; s1++)
2118 	{
2119 	    sseq1 = ajSeqGetSeqS(pdata[0]->Seq[s1]);
2120 
2121 	    for(s2=s1+1; s2<nseq; s2++)
2122 	    {
2123 		ajFmtPrintF(outf, "#%d %d\n", s1+1, s2+1);
2124 		n1 = 1; n2 = 1;
2125 		sseq2 = ajSeqGetSeqS(pdata[0]->Seq[s2]);
2126 
2127 		for(i=0; i<ilen; i++)
2128 		{
2129 		    /* check if position is ungapped */
2130 		    if(ajStrGetCharPos(sseq1,i)>='A' && ajStrGetCharPos(sseq2,i)>='A')
2131 		    {
2132 			/* output aligned pair,
2133 			   sequence weight,
2134 			   residue weight,
2135 			   match weight
2136 			   (format as guessed from the T-coffee docs) */
2137 			ajFmtPrintF(outf, "%d %d %d %d %d\n",
2138 				     n1, n2, pidentity, 1, 0);
2139 		    }
2140 
2141 		    if(ajStrGetCharPos(sseq1,i)>='A')
2142 			n1++;
2143 
2144 		    if(ajStrGetCharPos(sseq2,i)>='A')
2145 			n2++;
2146 		}
2147 	    }
2148 	}
2149     }
2150 
2151     /* write footer */
2152     ajFmtPrintF(outf, "! SEQ_1_TO_N\n");
2153 
2154     AJFREE(pdata);
2155 
2156     return;
2157 }
2158 
2159 
2160 
2161 
2162 /* @func ajAlignDefine ********************************************************
2163 **
2164 ** Defines a sequence set as an alignment. The sequences are stored internally
2165 ** and may be edited by alignment processing.
2166 **
2167 ** @param [u] thys [AjPAlign] Alignment object
2168 ** @param [u] seqset [AjPSeqset] Sequence set object
2169 ** @return [AjBool] ajTrue on success
2170 **
2171 ** @release 2.1.0
2172 ** @@
2173 ******************************************************************************/
2174 
ajAlignDefine(AjPAlign thys,AjPSeqset seqset)2175 AjBool ajAlignDefine(AjPAlign thys, AjPSeqset seqset)
2176 {
2177     AlignPData data = NULL;
2178     ajint i;
2179     const AjPSeq seq = NULL;
2180 
2181     if(!thys->Nseqs)
2182 	thys->Nseqs = ajSeqsetGetSize(seqset);
2183 
2184 
2185     data = alignDataNew(thys->Nseqs, thys->SeqExternal);
2186 
2187 
2188     for(i=0; i < thys->Nseqs; i++)
2189     {
2190 	seq = ajSeqsetGetseqSeq(seqset, i);
2191 	alignDataSetSequence(data, seq, i, thys->SeqExternal);
2192     }
2193 
2194     data->LenAli = ajSeqsetGetLen(seqset);
2195 
2196     ajListPushAppend(thys->Data, data);
2197 
2198     return ajTrue;
2199 }
2200 
2201 
2202 
2203 
2204 /* @func ajAlignDefineSS ******************************************************
2205 **
2206 ** Defines a sequence pair as an alignment. Either new copies of the
2207 ** sequences or direct references to them are made depending on the value
2208 ** of the SeqExternal attribute of the alignment object.
2209 **
2210 ** @param [u] thys [AjPAlign] Alignment object
2211 ** @param [r] seqa [const AjPSeq] Sequence object
2212 ** @param [r] seqb [const AjPSeq] Second sequence object
2213 ** @return [AjBool] ajTrue on success
2214 **
2215 ** @release 2.1.0
2216 ** @@
2217 ******************************************************************************/
2218 
ajAlignDefineSS(AjPAlign thys,const AjPSeq seqa,const AjPSeq seqb)2219 AjBool ajAlignDefineSS(AjPAlign thys, const AjPSeq seqa, const AjPSeq seqb)
2220 {
2221     AlignPData data = NULL;
2222 
2223     if(!thys->Nseqs)
2224 	thys->Nseqs = 2;
2225 
2226     if(thys->Nseqs != 2)
2227     {
2228 	ajErr("ajAlignDefineSS called with %d sequences in alignment",
2229 	      thys->Nseqs);
2230     }
2231 
2232     data = alignDataNew(2, thys->SeqExternal);
2233 
2234     ajDebug("ajAlignDefineSS '%S' '%S'\n",
2235 	    ajSeqGetNameS(seqa), ajSeqGetNameS(seqb));
2236 
2237     alignDataSetSequence(data, seqa, 0, thys->SeqExternal);
2238 
2239     if(!thys->SeqExternal && !ajSeqIsTrimmed(data->RealSeq[0]))
2240     {
2241 	    ajSeqTrim(data->RealSeq[0]);
2242     }
2243 
2244     alignDataSetSequence(data, seqb, 1, thys->SeqExternal);
2245 
2246     if(!thys->SeqExternal && !ajSeqIsTrimmed(data->RealSeq[1]))
2247     {
2248 	    ajSeqTrim(data->RealSeq[1]);
2249     }
2250 
2251     data->LenAli = AJMIN(ajSeqGetLen(seqa), ajSeqGetLen(seqb));
2252 
2253     ajListPushAppend(thys->Data, data);
2254 
2255     return ajTrue;
2256 }
2257 
2258 
2259 
2260 
2261 /* @func ajAlignDefineCC ******************************************************
2262 **
2263 ** Defines a pair of char* strings as an alignment
2264 **
2265 ** @param [u] thys [AjPAlign] Alignment object
2266 ** @param [r] seqa [const char*] First sequence
2267 ** @param [r] seqb [const char*] Second sequence
2268 ** @param [r] namea [const char*] Name of first sequence
2269 ** @param [r] nameb [const char*] Name of second sequence
2270 ** @return [AjBool] ajTrue on success
2271 **
2272 ** @release 2.9.0
2273 ** @@
2274 ******************************************************************************/
2275 
ajAlignDefineCC(AjPAlign thys,const char * seqa,const char * seqb,const char * namea,const char * nameb)2276 AjBool ajAlignDefineCC(AjPAlign thys, const char* seqa, const char* seqb,
2277 		       const char* namea, const char* nameb)
2278 {
2279     AlignPData data = NULL;
2280 
2281     AjPStr tmpstr = NULL;
2282 
2283     if(!thys->Nseqs)
2284 	thys->Nseqs = 2;
2285 
2286     data = alignDataNew(2, AJFALSE);
2287 
2288     ajStrAssignC(&tmpstr, seqa);
2289     data->Start[0] = 1;
2290     data->End[0] = ajStrGetLen(tmpstr);
2291     data->Len[0] = ajStrGetLen(tmpstr) - ajSeqstrCountGaps(tmpstr);
2292     data->Offset[0] = 0;
2293     data->Offend[0] = 0;
2294     data->SubOffset[0] = 0;
2295     data->Rev[0] = ajFalse;
2296 
2297     /* no external option - we do need to create the AjPSeqs */
2298 
2299     data->RealSeq[0] = ajSeqNewNameC(seqa, namea);
2300     ajSeqGapStandard(data->RealSeq[0], '-');
2301     data->Seq[0] = data->RealSeq[0];
2302 
2303     ajStrAssignC(&tmpstr, seqb);
2304     data->Start[1] = 1;
2305     data->End[1] = ajStrGetLen(tmpstr);
2306     data->Len[1] = ajStrGetLen(tmpstr) - ajSeqstrCountGaps(tmpstr);
2307     data->Offset[1] = 0;
2308     data->Offend[1] = 0;
2309     data->SubOffset[1] = 0;
2310     data->Rev[1] = ajFalse;
2311 
2312     data->RealSeq[1] = ajSeqNewNameC(seqb, nameb);
2313     ajSeqGapStandard(data->RealSeq[1], '-');
2314     data->Seq[1] = data->RealSeq[1];
2315 
2316     data->LenAli = AJMIN(strlen(seqa), strlen(seqb));
2317 
2318     ajDebug("ajAlignDefineCC %s %d %s %d\n",
2319 	    namea, ajSeqGetLen(data->Seq[0]),
2320 	    nameb, ajSeqGetLen(data->Seq[1]));
2321     ajListPushAppend(thys->Data, data);
2322 
2323     ajStrDel(&tmpstr);
2324 
2325     return ajTrue;
2326 }
2327 
2328 
2329 
2330 
2331 /* @func ajAlignDel ***********************************************************
2332 **
2333 ** Destructor for Alignment objects
2334 **
2335 ** @param [d] pthys [AjPAlign*] Alignment object reference
2336 ** @return [void]
2337 ** @category delete [AjPAlign] Default destructor
2338 **
2339 ** @release 2.1.0
2340 ** @@
2341 ******************************************************************************/
2342 
ajAlignDel(AjPAlign * pthys)2343 void ajAlignDel(AjPAlign* pthys)
2344 {
2345     AlignPData data = NULL;
2346     AjPAlign thys;
2347 
2348     thys = *pthys;
2349 
2350     if(!thys)
2351         return;
2352 
2353     ajDebug("ajAlignDel %d seqs\n", thys->Nseqs);
2354     ajStrDel(&thys->Formatstr);
2355     ajStrDel(&thys->Type);
2356     ajStrDel(&thys->Header);
2357     ajStrDel(&thys->SubHeader);
2358     ajStrDel(&thys->Tail);
2359     ajStrDel(&thys->SubTail);
2360     ajStrDel(&thys->Matrix);
2361     ajStrDel(&thys->GapPen);
2362     ajStrDel(&thys->ExtPen);
2363 
2364     ajMatrixDel(&thys->IMatrix);
2365     ajMatrixfDel(&thys->FMatrix);
2366 
2367     while(ajListPop(thys->Data, (void**) &data))
2368 	alignDataDel(&data, thys->SeqExternal);
2369 
2370     ajListFree(&thys->Data);
2371 
2372     AJFREE(*pthys);
2373 
2374     return;
2375 }
2376 
2377 
2378 
2379 
2380 /* @func ajAlignReset *********************************************************
2381 **
2382 ** Reset for Alignment objects
2383 **
2384 ** @param [w] thys [AjPAlign] Alignment object reference
2385 ** @return [void]
2386 ** @category modify [AjPAlign] Resets ready for reuse.
2387 **
2388 ** @release 2.1.0
2389 ** @@
2390 ******************************************************************************/
2391 
ajAlignReset(AjPAlign thys)2392 void ajAlignReset(AjPAlign thys)
2393 {
2394     AlignPData data = NULL;
2395 
2396     while(ajListPop(thys->Data, (void**) &data))
2397 	alignDataDel(&data, thys->SeqExternal);
2398 
2399     ajListFree(&thys->Data);
2400     thys->Data = ajListNew();
2401 
2402     thys->Nseqs = 0;
2403 
2404     return;
2405 }
2406 
2407 
2408 
2409 
2410 /* @func ajAlignOpen **********************************************************
2411 **
2412 ** Opens a new align file
2413 **
2414 ** @param [u] thys [AjPAlign] Alignment object
2415 ** @param [r] name [const AjPStr] File name
2416 ** @return [AjBool] ajTrue on success
2417 **
2418 ** @release 2.1.0
2419 ** @@
2420 ******************************************************************************/
2421 
ajAlignOpen(AjPAlign thys,const AjPStr name)2422 AjBool ajAlignOpen(AjPAlign thys, const AjPStr name)
2423 {
2424     if(!ajAlignValid(thys))
2425 	return ajFalse;
2426 
2427     thys->File = ajFileNewOutNameS(name);
2428 
2429     if(thys->File)
2430 	return ajTrue;
2431 
2432     return ajFalse;
2433 }
2434 
2435 
2436 
2437 
2438 /* @func ajAlignFormatDefault *************************************************
2439 **
2440 ** Sets the default format for an alignment
2441 **
2442 ** @param [w] pformat [AjPStr*] Default format returned
2443 ** @return [AjBool] ajTrue if format was returned
2444 **
2445 ** @release 2.1.0
2446 ** @@
2447 ******************************************************************************/
2448 
ajAlignFormatDefault(AjPStr * pformat)2449 AjBool ajAlignFormatDefault(AjPStr* pformat)
2450 {
2451     if(ajStrGetLen(*pformat))
2452 	ajDebug("... output format '%S'\n", *pformat);
2453     else
2454     {
2455 	/* ajStrAssignEmptyC(pformat, alignFormat[0].Name);*/
2456 	ajStrAssignEmptyC(pformat, "gff");	/* use the real name */
2457 	ajDebug("... output format not set, default to '%S'\n", *pformat);
2458     }
2459 
2460     return ajTrue;
2461 }
2462 
2463 
2464 
2465 
2466 /* @func ajAlignGetLen ********************************************************
2467 **
2468 ** Returns the filename for an alignment. If the alignment has more than one
2469 ** sub-alignment, returns the total.
2470 **
2471 ** @param [r] thys [const AjPAlign] Alignment object.
2472 ** @return [ajint] Alignment length.
2473 **
2474 ** @release 4.1.0
2475 ** @@
2476 ******************************************************************************/
2477 
ajAlignGetLen(const AjPAlign thys)2478 ajint ajAlignGetLen(const AjPAlign thys)
2479 {
2480     ajint ret = 0;
2481     ajint i;
2482     ajint nali;
2483 
2484     AlignPData* pdata = NULL;
2485     AlignPData data = NULL;
2486 
2487     if(!thys)
2488 	return 0;
2489 
2490     if(!thys->Data)
2491 	return 0;
2492 
2493     nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
2494 
2495     for(i=0; i<nali; i++)
2496     {
2497 	data = pdata[i];
2498 	ret += data->LenAli;
2499     }
2500 
2501     AJFREE(pdata);
2502 
2503     return ret;
2504 }
2505 
2506 
2507 
2508 
2509 /* @func ajAlignGetFilename ***************************************************
2510 **
2511 ** Returns the filename for an alignment.
2512 **
2513 ** @param [r] thys [const AjPAlign] Alignment object.
2514 ** @return [const char*] Filename.
2515 **
2516 ** @release 4.0.0
2517 ** @@
2518 ******************************************************************************/
2519 
ajAlignGetFilename(const AjPAlign thys)2520 const char * ajAlignGetFilename(const AjPAlign thys)
2521 {
2522     if(!thys)
2523 	return NULL;
2524 
2525     if(!thys->File)
2526 	return NULL;
2527 
2528     return ajFileGetPrintnameC(thys->File);
2529 }
2530 
2531 
2532 
2533 
2534 /* @func ajAlignGetFormat *****************************************************
2535 **
2536 ** Returns the sequence format for an alignment.
2537 **
2538 ** @param [r] thys [const AjPAlign] Alignment object.
2539 ** @return [const AjPStr] Alignment format
2540 **
2541 ** @release 4.0.0
2542 ** @@
2543 ******************************************************************************/
2544 
ajAlignGetFormat(const AjPAlign thys)2545 const AjPStr ajAlignGetFormat(const AjPAlign thys)
2546 {
2547     if(!thys)
2548 	return NULL;
2549 
2550     return thys->Formatstr;
2551 }
2552 
2553 
2554 
2555 
2556 /* @func ajAlignFormatShowsSequences ******************************************
2557 **
2558 ** Returns whether current alignment format includes sequences in output
2559 **
2560 ** @param [r] thys [const AjPAlign] Alignment object
2561 ** @return [AjBool] true if sequences appear in output
2562 **
2563 ** @release 6.2.0
2564 ** @@
2565 ******************************************************************************/
2566 
ajAlignFormatShowsSequences(const AjPAlign thys)2567 AjBool ajAlignFormatShowsSequences(const AjPAlign thys)
2568 {
2569     if(!thys)
2570     return AJTRUE;
2571 
2572     return alignFormat[thys->Format].Showseqs;
2573 }
2574 
2575 
2576 
2577 
2578 /* @func ajAlignFindFormat ****************************************************
2579 **
2580 ** Looks for the specified align format in the internal definitions and
2581 ** returns the index.
2582 **
2583 ** @param [r] format [const AjPStr] Format required.
2584 ** @param [w] iformat [ajint*] Index
2585 ** @return [AjBool] ajTrue on success.
2586 **
2587 ** @release 2.1.0
2588 ** @@
2589 ******************************************************************************/
2590 
ajAlignFindFormat(const AjPStr format,ajint * iformat)2591 AjBool ajAlignFindFormat(const AjPStr format, ajint* iformat)
2592 {
2593     AjPStr tmpformat = NULL;
2594     ajint i = 0;
2595 
2596     ajDebug("ajAlignFindFormat '%S'\n", format);
2597 
2598     if(!ajStrGetLen(format))
2599 	return ajFalse;
2600 
2601     ajStrAssignS(&tmpformat, format);
2602     ajStrFmtLower(&tmpformat);
2603 
2604     while(alignFormat[i].Name)
2605     {
2606 	if(ajStrMatchCaseC(tmpformat, alignFormat[i].Name))
2607 	{
2608 	    *iformat = i;
2609 	    ajStrDel(&tmpformat);
2610 	    ajDebug("... found at %d\n", i);
2611 
2612 	    return ajTrue;
2613 	}
2614 	i++;
2615     }
2616 
2617     ajDebug("... not found\n");
2618     ajStrDel(&tmpformat);
2619     return ajFalse;
2620 }
2621 
2622 
2623 
2624 
2625 /* @func ajAlignValid *********************************************************
2626 **
2627 ** Test for an alignment object.
2628 **
2629 ** Checks the format works with the number of sequences.
2630 ** Checks the format works with the type (protein or nucleotide).
2631 **
2632 ** @param [u] thys [AjPAlign] Alignment object
2633 ** @return [AjBool] ajTrue on success
2634 **
2635 ** @release 2.1.0
2636 ** @@
2637 ******************************************************************************/
2638 
ajAlignValid(AjPAlign thys)2639 AjBool ajAlignValid(AjPAlign thys)
2640 {
2641     ajDebug("ajAlignValid format '%S' %d Nmin %d Nmax %d\n",
2642 	    thys->Formatstr, thys->Format, thys->Nmin, thys->Nmax);
2643 
2644     if(!thys->Format)			/* test acdc-alignbadformat */
2645     {
2646         if(ajStrGetLen(thys->Formatstr))
2647         {
2648             if(!ajAlignFindFormat(thys->Formatstr, &thys->Format))
2649             {
2650                 ajErr("Unknown alignment format '%S'", thys->Formatstr);
2651                 return ajFalse;
2652             }
2653         }
2654         else
2655         {
2656             ajStrAssignC(&thys->Formatstr,alignDefformat);
2657             ajAlignFindFormat(thys->Formatstr, &thys->Format);
2658         }
2659     }
2660 
2661     if( alignFormat[thys->Format].Minseq  &&
2662        thys->Nmin < alignFormat[thys->Format].Minseq)
2663     {
2664 	ajErr("Alignment format %s specifies at least %d sequences, "
2665 	      "alignment has only %d",
2666 	      alignFormat[thys->Format].Name,
2667               alignFormat[thys->Format].Minseq, thys->Nmin);
2668 	return ajFalse;
2669     }
2670 
2671     if( alignFormat[thys->Format].Maxseq  &&
2672        thys->Nmax > alignFormat[thys->Format].Maxseq)
2673     {
2674 	ajErr("Alignment format %s specifies at most %d sequences, "
2675 	      "alignment has %d",
2676 	      alignFormat[thys->Format].Name,
2677               alignFormat[thys->Format].Maxseq, thys->Nmax);
2678 	return ajFalse;
2679     }
2680 
2681     if(thys->Width < 10)
2682     {
2683 	ajWarn("Alignment width (-awidth=%d) too narrow, reset to 10",
2684 	       thys->Width);
2685 	thys->Width=10;
2686     }
2687 
2688     return ajTrue;
2689 }
2690 
2691 
2692 
2693 
2694 /* @func ajAlignNew ***********************************************************
2695 **
2696 ** Constructor for an alignment object
2697 **
2698 ** @return [AjPAlign] New Alignment object
2699 ** @category new [AjPAlign] Default constructor
2700 **
2701 ** @release 2.1.0
2702 ** @@
2703 ******************************************************************************/
2704 
ajAlignNew(void)2705 AjPAlign ajAlignNew(void)
2706 {
2707     AjPAlign pthis;
2708 
2709     AJNEW0(pthis);
2710 
2711     pthis->Count     = 0;
2712     pthis->Formatstr = ajStrNew();
2713     pthis->Format    = 0;
2714     pthis->File      = NULL;
2715     pthis->Data      = ajListNew();
2716     pthis->RefSeq    = 0;
2717 
2718     return pthis;
2719 }
2720 
2721 
2722 
2723 
2724 /* @func ajAlignWrite *********************************************************
2725 **
2726 ** Writes an alignment file
2727 **
2728 ** @param [u] thys [AjPAlign] Alignment object
2729 ** @return [void]
2730 ** @category output [AjPAlign] Master alignment output routine
2731 **
2732 ** @release 2.1.0
2733 ** @@
2734 ******************************************************************************/
2735 
ajAlignWrite(AjPAlign thys)2736 void ajAlignWrite(AjPAlign thys)
2737 {
2738     ajDebug("ajAlignWrite\n");
2739 
2740     ajAlignTraceT(thys, "ajAlignWrite start");
2741 
2742     if(!thys->Format)
2743 	if(!ajAlignFindFormat(thys->Formatstr, &thys->Format))
2744 	    ajErr("unknown align format '%S'", thys->Formatstr);
2745 
2746     ajDebug("ajAlignWrite %d '%s'\n",
2747 	    thys->Format, alignFormat[thys->Format].Name);
2748 
2749     ajAlignSetType(thys);
2750 
2751     /* Calling funclist alignFormat() */
2752 
2753     (*alignFormat[thys->Format].Write)(thys);
2754 
2755     return;
2756 }
2757 
2758 
2759 
2760 
2761 /* @func ajAlignClose *********************************************************
2762 **
2763 ** Closes an alignment
2764 **
2765 ** @param [u] thys [AjPAlign] Alignment object
2766 ** @return [void]
2767 **
2768 ** @release 2.1.0
2769 ** @@
2770 ******************************************************************************/
2771 
ajAlignClose(AjPAlign thys)2772 void ajAlignClose(AjPAlign thys)
2773 {
2774     ajDebug("ajAlignClose '%F'\n", thys->File);
2775 
2776     if(!thys->SeqOnly)
2777 	ajAlignWriteTail(thys);
2778 
2779     ajFileClose(&thys->File);
2780 
2781     return;
2782 }
2783 
2784 
2785 
2786 
2787 /* @func ajAlignWriteHeader ***************************************************
2788 **
2789 ** Writes an alignment header.
2790 **
2791 ** @param [u] thys [AjPAlign] Alignment object. Internal count is updated
2792 **                                  to avoid duplicate headers
2793 ** @return [void]
2794 ** @category output [AjPAlign] Master header output routine
2795 **
2796 ** @release 2.1.0
2797 ** @@
2798 ******************************************************************************/
2799 
ajAlignWriteHeader(AjPAlign thys)2800 void ajAlignWriteHeader(AjPAlign thys)
2801 {
2802     alignWriteHeaderNum(thys, 0);
2803 
2804     return;
2805 }
2806 
2807 
2808 
2809 
2810 /* @funcstatic alignWriteHeaderNum ********************************************
2811 **
2812 ** Writes an alignment header.
2813 **
2814 ** @param [u] thys [AjPAlign] Alignment object. Internal count is updated
2815 **                                  to avoid duplicate headers
2816 ** @param [r] iali [ajint] Alignment number
2817 ** @return [void]
2818 ** @category output [AjPAlign] Master header output routine
2819 **
2820 ** @release 6.0.0
2821 ** @@
2822 ******************************************************************************/
2823 
alignWriteHeaderNum(AjPAlign thys,ajint iali)2824 static void alignWriteHeaderNum(AjPAlign thys, ajint iali)
2825 {
2826     AjPFile outf;
2827     AjPStr tmpstr = NULL;
2828     AjBool doSingle;
2829     ajint i;
2830     const AjPSeq seq;
2831 
2832     if(!alignFormat[thys->Format].Showheader)
2833         return;
2834 
2835     outf     = thys->File;
2836     doSingle = ajFalse;	/* turned off for now - always multi format */
2837 
2838     if(!thys->Count)
2839     {
2840 	ajFmtPrintF(outf, "########################################\n");
2841 	ajFmtPrintF(outf, "# Program: %S\n", ajUtilGetProgram());
2842 	ajFmtPrintF(outf, "# Rundate: %D\n", ajTimeRefTodayFmt("report"));
2843 	ajFmtPrintF(outf, "# Commandline: %S\n", ajUtilGetProgram());
2844 	ajStrAssignS(&tmpstr, ajUtilGetCmdline());
2845 
2846 	if(ajStrGetLen(tmpstr))
2847 	{
2848 	    ajStrExchangeCC(&tmpstr, "\n", "\1#    ");
2849 	    ajStrExchangeCC(&tmpstr, "\1", "\n");
2850 	    ajFmtPrintF(outf, "#    %S\n", tmpstr);
2851 	}
2852 
2853 	ajStrAssignS(&tmpstr, ajUtilGetInputs());
2854 
2855 	if(ajStrGetLen(tmpstr))
2856 	{
2857 	    ajStrExchangeCC(&tmpstr, "\n", "\1#    ");
2858 	    ajStrExchangeCC(&tmpstr, "\1", "\n");
2859 	    ajFmtPrintF(outf, "#    %S\n", tmpstr);
2860 	}
2861 
2862 	ajFmtPrintF(outf, "# Align_format: %S\n", thys->Formatstr);
2863 	ajFmtPrintF(outf, "# Report_file: %F\n", outf);
2864 
2865 	if(!doSingle || thys->Multi)
2866 	    ajFmtPrintF(outf, "########################################\n");
2867 	else
2868 	    ajFmtPrintF(outf, "#\n");
2869     }
2870 
2871     if(!doSingle || thys->Multi)
2872 	ajFmtPrintF(outf, "\n#=======================================\n#\n");
2873 
2874     ajFmtPrintF(outf, "# Aligned_sequences: %d\n", thys->Nseqs);
2875 
2876     for(i=0; i < thys->Nseqs; i++)
2877     {
2878 	ajStrAssignClear(&tmpstr);
2879 	seq = alignSeq(thys,i, iali);
2880 
2881 	if(thys->Showacc)
2882 	    ajFmtPrintAppS(&tmpstr, " (%S)",
2883 			   ajSeqGetAccS(seq));
2884 	if(thys->Showdes)
2885 	    ajFmtPrintAppS(&tmpstr, " %S",
2886 			   ajSeqGetDescS(seq));
2887 	ajFmtPrintF(outf, "# %d: %S%S\n",
2888 		    i+1, alignSeqName(thys, seq), tmpstr);
2889     }
2890 
2891     if(ajStrGetLen(thys->Matrix))
2892 	ajFmtPrintF(outf, "# Matrix: %S\n", thys->Matrix);
2893 
2894     if(ajStrGetLen(thys->GapPen))
2895 	ajFmtPrintF(outf, "# Gap_penalty: %S\n", thys->GapPen);
2896 
2897     if(ajStrGetLen(thys->ExtPen))
2898 	ajFmtPrintF(outf, "# Extend_penalty: %S\n", thys->ExtPen);
2899 
2900     if(ajStrGetLen(thys->Header))
2901     {
2902 	ajStrAssignS(&tmpstr, thys->Header);
2903 	ajStrExchangeCC(&tmpstr, "\n", "\1# ");
2904 	ajStrExchangeCC(&tmpstr, "\1", "\n");
2905 	ajFmtPrintF(outf, "#\n");
2906 	ajFmtPrintF(outf, "# %S\n", tmpstr);
2907 	ajFmtPrintF(outf, "#\n");
2908     }
2909 
2910     if(ajStrGetLen(thys->SubHeader))
2911     {
2912 	ajStrAssignS(&tmpstr, thys->SubHeader);
2913 	ajStrExchangeCC(&tmpstr, "\n", "\1# ");
2914 	ajStrExchangeCC(&tmpstr, "\1", "\n");
2915 	ajFmtPrintF(outf, "#\n");
2916 	ajFmtPrintF(outf, "# %S\n", tmpstr);
2917 	ajFmtPrintF(outf, "#\n");
2918 	ajStrDel(&thys->SubHeader);
2919     }
2920 
2921     if(!doSingle || thys->Multi)
2922 	ajFmtPrintF(outf, "#=======================================\n\n");
2923     else
2924 	ajFmtPrintF(outf, "########################################\n\n");
2925 
2926     ++thys->Count;
2927 
2928     ajStrDel(&tmpstr);
2929 
2930     return;
2931 }
2932 
2933 
2934 
2935 
2936 /* @func ajAlignWriteTail *****************************************************
2937 **
2938 ** Writes an alignment tail
2939 **
2940 ** @param [u] thys [AjPAlign] Alignment object
2941 ** @return [void]
2942 ** @category output [AjPAlign] Master footer output routine
2943 **
2944 ** @release 2.1.0
2945 ** @@
2946 ******************************************************************************/
2947 
ajAlignWriteTail(AjPAlign thys)2948 void ajAlignWriteTail(AjPAlign thys)
2949 {
2950     AjPFile outf;
2951     AjPStr tmpstr = NULL;
2952     AjBool doSingle;
2953 
2954     if(!alignFormat[thys->Format].Showheader)
2955         return;
2956 
2957     outf     = thys->File;
2958     doSingle = ajFalse; /* turned off for now - always multi format */
2959 
2960     ajFmtPrintF(outf, "\n");
2961 
2962     if(!doSingle || thys->Multi)
2963 	ajFmtPrintF(outf, "#---------------------------------------\n");
2964     else
2965 	ajFmtPrintF(outf, "\n########################################\n");
2966 
2967     if(ajStrGetLen(thys->SubTail))
2968     {
2969 	ajStrAssignS(&tmpstr, thys->SubTail);
2970 	ajStrExchangeCC(&tmpstr, "\n", "\1# ");
2971 	ajStrExchangeCC(&tmpstr, "\1", "\n");
2972 	ajFmtPrintF(outf, "#\n");
2973 	ajFmtPrintF(outf, "# %S\n", tmpstr);
2974 	ajFmtPrintF(outf, "#\n");
2975 	ajStrDel(&thys->SubTail);
2976     }
2977 
2978     if(ajStrGetLen(thys->Tail))
2979     {
2980 	ajStrAssignS(&tmpstr, thys->Tail);
2981 	ajStrExchangeCC(&tmpstr, "\n", "\1# ");
2982 	ajStrExchangeCC(&tmpstr, "\1", "\n");
2983 	ajFmtPrintF(outf, "#\n");
2984 	ajFmtPrintF(outf, "# %S\n", tmpstr);
2985 	ajFmtPrintF(outf, "#\n");
2986     }
2987 
2988     if(!doSingle || thys->Multi)
2989 	ajFmtPrintF(outf, "#---------------------------------------\n");
2990     else
2991 	ajFmtPrintF(outf, "########################################\n");
2992 
2993     ajStrDel(&tmpstr);
2994 
2995     return;
2996 }
2997 
2998 
2999 
3000 
3001 /* @func ajAlignSetHeader *****************************************************
3002 **
3003 ** Defines an alignment header
3004 **
3005 ** @param [u] thys [AjPAlign] Alignment object
3006 ** @param [r] header [const AjPStr] Align header with embedded newlines
3007 ** @return [void]
3008 **
3009 ** @release 2.1.0
3010 ** @@
3011 ******************************************************************************/
3012 
ajAlignSetHeader(AjPAlign thys,const AjPStr header)3013 void ajAlignSetHeader(AjPAlign thys, const AjPStr header)
3014 {
3015     ajStrAssignS(&thys->Header, header);
3016 
3017     ajDebug("ajAlignSetHeader len %d '%S'\n",
3018 	    ajStrGetLen(thys->Header), header);
3019 
3020     return;
3021 }
3022 
3023 
3024 
3025 
3026 /* @func ajAlignSetHeaderC ****************************************************
3027 **
3028 ** Defines an alignment header
3029 **
3030 ** @param [u] thys [AjPAlign] Alignment object
3031 ** @param [r] header [const char*] Align header with embedded newlines
3032 ** @return [void]
3033 **
3034 ** @release 2.1.0
3035 ** @@
3036 ******************************************************************************/
3037 
ajAlignSetHeaderC(AjPAlign thys,const char * header)3038 void ajAlignSetHeaderC(AjPAlign thys, const char* header)
3039 {
3040     ajStrAssignC(&thys->Header, header);
3041 
3042     ajDebug("ajAlignSetHeaderC len %d '%S'\n",
3043 	    ajStrGetLen(thys->Header), header);
3044 
3045     return;
3046 }
3047 
3048 
3049 
3050 
3051 /* @func ajAlignSetHeaderApp **************************************************
3052 **
3053 ** Appends to an alignment header
3054 **
3055 ** @param [u] thys [AjPAlign] Alignment object
3056 ** @param [r] header [const AjPStr] Align header with embedded newlines
3057 ** @return [void]
3058 **
3059 ** @release 2.1.0
3060 ** @@
3061 ******************************************************************************/
3062 
ajAlignSetHeaderApp(AjPAlign thys,const AjPStr header)3063 void ajAlignSetHeaderApp(AjPAlign thys, const AjPStr header)
3064 {
3065     if(ajStrGetLen(thys->Header) && ajStrGetCharLast(thys->Header) != '\n')
3066 	ajStrAppendC(&thys->Header, "\n");
3067 
3068     ajStrAppendS(&thys->Header, header);
3069 
3070     ajDebug("ajAlignSetHeaderApp len %d '%S'\n",
3071 	    ajStrGetLen(thys->Header), header);
3072 
3073     return;
3074 }
3075 
3076 
3077 
3078 
3079 /* @func ajAlignSetTail *******************************************************
3080 **
3081 ** Defines an alignment tail
3082 **
3083 ** @param [u] thys [AjPAlign] Alignment object
3084 ** @param [r] tail [const AjPStr] Align tail with embedded newlines
3085 ** @return [void]
3086 **
3087 ** @release 2.1.0
3088 ** @@
3089 ******************************************************************************/
3090 
ajAlignSetTail(AjPAlign thys,const AjPStr tail)3091 void ajAlignSetTail(AjPAlign thys, const AjPStr tail)
3092 {
3093     ajStrAssignS(&thys->Tail, tail);
3094 
3095     ajDebug("ajAlignSetTail len %d '%S'\n",
3096 	    ajStrGetLen(thys->Tail), tail);
3097 
3098     return;
3099 }
3100 
3101 
3102 
3103 
3104 /* @func ajAlignSetTailC ******************************************************
3105 **
3106 ** Defines an alignment tail
3107 **
3108 ** @param [u] thys [AjPAlign] Alignment object
3109 ** @param [r] tail [const char*] Align tail with embedded newlines
3110 ** @return [void]
3111 **
3112 ** @release 2.1.0
3113 ** @@
3114 ******************************************************************************/
3115 
ajAlignSetTailC(AjPAlign thys,const char * tail)3116 void ajAlignSetTailC(AjPAlign thys, const char* tail)
3117 {
3118     ajStrAssignC(&thys->Tail, tail);
3119 
3120     ajDebug("ajAlignSetTailC len %d '%S'\n",
3121 	    ajStrGetLen(thys->Tail), tail);
3122 
3123     return;
3124 }
3125 
3126 
3127 
3128 
3129 /* @func ajAlignSetTailApp ****************************************************
3130 **
3131 ** Appends to an alignment tail
3132 **
3133 ** @param [u] thys [AjPAlign] Alignment object
3134 ** @param [r] tail [const AjPStr] Align tail with embedded newlines
3135 ** @return [void]
3136 **
3137 ** @release 2.1.0
3138 ** @@
3139 ******************************************************************************/
3140 
ajAlignSetTailApp(AjPAlign thys,const AjPStr tail)3141 void ajAlignSetTailApp(AjPAlign thys, const AjPStr tail)
3142 {
3143     if(ajStrGetLen(thys->Tail) && ajStrGetCharLast(thys->Tail) != '\n')
3144 	ajStrAppendC(&thys->Tail, "\n");
3145 
3146     ajStrAppendS(&thys->Tail, tail);
3147 
3148     ajDebug("ajAlignSetTailApp len %d '%S'\n",
3149 	    ajStrGetLen(thys->Tail), tail);
3150 
3151     return;
3152 }
3153 
3154 
3155 
3156 
3157 /* @func ajAlignSetSubTail ****************************************************
3158 **
3159 ** Defines an alignment tail
3160 **
3161 ** @param [u] thys [AjPAlign] Alignment object
3162 ** @param [r] tail [const AjPStr] Align tail with embedded newlines
3163 ** @return [void]
3164 **
3165 ** @release 2.9.0
3166 ** @@
3167 ******************************************************************************/
3168 
ajAlignSetSubTail(AjPAlign thys,const AjPStr tail)3169 void ajAlignSetSubTail(AjPAlign thys, const AjPStr tail)
3170 {
3171     ajStrAssignS(&thys->SubTail, tail);
3172 
3173     ajDebug("ajAlignSetSubTail len %d '%S'\n",
3174 	    ajStrGetLen(thys->SubTail), tail);
3175 
3176     return;
3177 }
3178 
3179 
3180 
3181 
3182 /* @func ajAlignSetSubTailC ***************************************************
3183 **
3184 ** Defines an alignment tail
3185 **
3186 ** @param [u] thys [AjPAlign] Alignment object
3187 ** @param [r] tail [const char*] Align tail with embedded newlines
3188 ** @return [void]
3189 **
3190 ** @release 2.9.0
3191 ** @@
3192 ******************************************************************************/
3193 
ajAlignSetSubTailC(AjPAlign thys,const char * tail)3194 void ajAlignSetSubTailC(AjPAlign thys, const char* tail)
3195 {
3196     ajStrAssignC(&thys->SubTail, tail);
3197 
3198     ajDebug("ajAlignSetSubTailC len %d '%S'\n",
3199 	    ajStrGetLen(thys->SubTail), tail);
3200 
3201     return;
3202 }
3203 
3204 
3205 
3206 
3207 /* @func ajAlignSetSubTailApp *************************************************
3208 **
3209 ** Appends to an alignment tail
3210 **
3211 ** @param [u] thys [AjPAlign] Alignment object
3212 ** @param [r] tail [const AjPStr] Align tail with embedded newlines
3213 ** @return [void]
3214 **
3215 ** @release 2.9.0
3216 ** @@
3217 ******************************************************************************/
3218 
ajAlignSetSubTailApp(AjPAlign thys,const AjPStr tail)3219 void ajAlignSetSubTailApp(AjPAlign thys, const AjPStr tail)
3220 {
3221     if(ajStrGetLen(thys->SubTail) && ajStrGetCharLast(thys->SubTail) != '\n')
3222 	ajStrAppendC(&thys->SubTail, "\n");
3223 
3224     ajStrAppendS(&thys->SubTail, tail);
3225 
3226     ajDebug("ajAlignSetSubTailApp len %d '%S'\n",
3227 	    ajStrGetLen(thys->SubTail), tail);
3228 
3229     return;
3230 }
3231 
3232 
3233 
3234 
3235 /* @func ajAlignSetSubHeader **************************************************
3236 **
3237 ** Defines an alignment subheader (cleared after printing so it can
3238 ** be set again for the next alignment)
3239 **
3240 ** @param [u] thys [AjPAlign] Alignment object
3241 ** @param [r] subheader [const AjPStr] Align subheader with embedded newlines
3242 ** @return [void]
3243 **
3244 ** @release 2.1.0
3245 ** @@
3246 ******************************************************************************/
3247 
ajAlignSetSubHeader(AjPAlign thys,const AjPStr subheader)3248 void ajAlignSetSubHeader(AjPAlign thys, const AjPStr subheader)
3249 {
3250   ajStrAssignS(&thys->SubHeader, subheader);
3251 
3252   ajDebug("ajAlignSetSubHeader len %d '%S'\n",
3253 	  ajStrGetLen(thys->SubHeader), subheader);
3254 
3255   return;
3256 }
3257 
3258 
3259 
3260 
3261 /* @func ajAlignSetSubHeaderC *************************************************
3262 **
3263 ** Defines an alignment header (cleared after printing so it can
3264 ** be set again for the next alignment)
3265 **
3266 ** @param [u] thys [AjPAlign] Alignment object
3267 ** @param [r] subheader [const char*] Align subheader with embedded newlines
3268 ** @return [void]
3269 **
3270 ** @release 2.1.0
3271 ** @@
3272 ******************************************************************************/
3273 
ajAlignSetSubHeaderC(AjPAlign thys,const char * subheader)3274 void ajAlignSetSubHeaderC(AjPAlign thys, const char* subheader)
3275 {
3276     ajStrAssignC(&thys->Header, subheader);
3277 
3278     ajDebug("ajAlignSetSubHeaderC len %d '%S'\n",
3279 	    ajStrGetLen(thys->SubHeader), subheader);
3280 
3281     return;
3282 }
3283 
3284 
3285 
3286 
3287 /* @func ajAlignSetSubHeaderApp ***********************************************
3288 **
3289 ** Appends to an alignment subheader (cleared after printing so it can
3290 ** be set again for the next alignment)
3291 **
3292 ** @param [u] thys [AjPAlign] Alignment object
3293 ** @param [r] subheader [const AjPStr] Align subheader with embedded newlines
3294 ** @return [void]
3295 **
3296 ** @release 2.5.1
3297 ** @@
3298 ******************************************************************************/
3299 
ajAlignSetSubHeaderApp(AjPAlign thys,const AjPStr subheader)3300 void ajAlignSetSubHeaderApp(AjPAlign thys, const AjPStr subheader)
3301 {
3302     if(ajStrGetLen(thys->SubHeader) &&
3303        ajStrGetCharLast(thys->SubHeader) != '\n')
3304 	ajStrAppendC(&thys->SubHeader, "\n");
3305 
3306     ajStrAppendS(&thys->SubHeader, subheader);
3307 
3308     ajDebug("ajAlignSetSubHeaderApp len %d '%S'\n",
3309 	    ajStrGetLen(thys->SubHeader), subheader);
3310 
3311     return;
3312 }
3313 
3314 
3315 
3316 
3317 /* @func ajAlignSetSubHeaderPre ***********************************************
3318 **
3319 ** Prepends to an alignment subheader (cleared after printing so it can
3320 ** be set again for the next alignment)
3321 **
3322 ** @param [u] thys [AjPAlign] Alignment object
3323 ** @param [r] subheader [const AjPStr] Align subheader with embedded newlines
3324 ** @return [void]
3325 **
3326 ** @release 2.5.1
3327 ** @@
3328 ******************************************************************************/
3329 
ajAlignSetSubHeaderPre(AjPAlign thys,const AjPStr subheader)3330 void ajAlignSetSubHeaderPre(AjPAlign thys, const AjPStr subheader)
3331 {
3332     if(ajStrGetLen(thys->SubHeader) && ajStrGetCharLast(subheader) != '\n')
3333 	ajStrInsertC(&thys->SubHeader, 0, "\n");
3334 
3335     ajStrInsertS(&thys->SubHeader, 0, subheader);
3336 
3337     ajDebug("ajAlignSetSubHeaderPre len %d '%S'\n",
3338 	    ajStrGetLen(thys->SubHeader), subheader);
3339 
3340     return;
3341 }
3342 
3343 
3344 
3345 
3346 /* @func ajAlignSetMatrixName *************************************************
3347 **
3348 ** Defines an alignment matrix
3349 **
3350 ** @param [u] thys [AjPAlign] Alignment object
3351 ** @param [r] matrix [const AjPStr] Matrix name
3352 ** @return [void]
3353 **
3354 ** @release 2.1.0
3355 ** @@
3356 ******************************************************************************/
3357 
ajAlignSetMatrixName(AjPAlign thys,const AjPStr matrix)3358 void ajAlignSetMatrixName(AjPAlign thys, const AjPStr matrix)
3359 {
3360     ajAlignSetMatrixNameC(thys, ajStrGetPtr(matrix));
3361 
3362     return;
3363 }
3364 
3365 
3366 
3367 
3368 /* @func ajAlignSetMatrixNameC ************************************************
3369 **
3370 ** Defines an alignment matrix
3371 **
3372 ** @param [u] thys [AjPAlign] Alignment object
3373 ** @param [r] matrix [const char*] Matrix name
3374 ** @return [void]
3375 **
3376 ** @release 2.1.0
3377 ** @@
3378 ******************************************************************************/
3379 
ajAlignSetMatrixNameC(AjPAlign thys,const char * matrix)3380 void ajAlignSetMatrixNameC(AjPAlign thys, const char* matrix)
3381 {
3382     ajStrAssignC(&thys->Matrix, matrix);
3383 
3384     return;
3385 }
3386 
3387 
3388 
3389 
3390 /* @func ajAlignSetMatrixInt **************************************************
3391 **
3392 ** Defines an alignment matrix
3393 **
3394 ** @param [u] thys [AjPAlign] Alignment object
3395 ** @param [u] matrix [AjPMatrix] Matrix object
3396 ** @return [void]
3397 **
3398 ** @release 2.1.0
3399 ** @@
3400 ******************************************************************************/
3401 
ajAlignSetMatrixInt(AjPAlign thys,AjPMatrix matrix)3402 void ajAlignSetMatrixInt(AjPAlign thys, AjPMatrix matrix)
3403 {
3404     if(!thys->IMatrix)
3405     {
3406 	thys->IMatrix = matrix;
3407 	ajAlignSetMatrixName(thys, ajMatrixGetName(matrix));
3408     }
3409 
3410     if(thys->FMatrix)
3411 	ajMatrixfDel(&thys->FMatrix);
3412 
3413     return;
3414 }
3415 
3416 
3417 
3418 
3419 /* @func ajAlignSetMatrixFloat ************************************************
3420 **
3421 ** Defines an alignment matrix
3422 **
3423 ** @param [u] thys [AjPAlign] Alignment object
3424 ** @param [u] matrix [AjPMatrixf] Matrix (floating point version) object
3425 ** @return [void]
3426 **
3427 ** @release 2.1.0
3428 ** @@
3429 ******************************************************************************/
3430 
ajAlignSetMatrixFloat(AjPAlign thys,AjPMatrixf matrix)3431 void ajAlignSetMatrixFloat(AjPAlign thys, AjPMatrixf matrix)
3432 {
3433     if(!thys->FMatrix)
3434     {
3435 	thys->FMatrix = matrix;
3436 	ajAlignSetMatrixName(thys, ajMatrixfGetName(matrix));
3437     }
3438 
3439     if(thys->IMatrix)
3440 	ajMatrixDel(&thys->IMatrix);
3441 
3442     return;
3443 }
3444 
3445 
3446 
3447 
3448 /* @func ajAlignSetGapI *******************************************************
3449 **
3450 ** Defines alignment gap penalties
3451 **
3452 ** @param [u] thys [AjPAlign] Alignment object
3453 ** @param [r] gappen [ajint] Gap penalty
3454 ** @param [r] extpen [ajint] Gap extension penalty
3455 ** @return [void]
3456 **
3457 ** @release 2.1.0
3458 ** @@
3459 ******************************************************************************/
3460 
ajAlignSetGapI(AjPAlign thys,ajint gappen,ajint extpen)3461 void ajAlignSetGapI(AjPAlign thys, ajint gappen, ajint extpen)
3462 {
3463     AjPStr tmpstr = NULL;
3464 
3465     ajFmtPrintS(&tmpstr, "%d", gappen);
3466     ajStrAssignS(&thys->GapPen, tmpstr);
3467 
3468     ajFmtPrintS(&tmpstr, "%d", extpen);
3469     ajStrAssignS(&thys->ExtPen, tmpstr);
3470 
3471     ajStrDel(&tmpstr);
3472 
3473     return;
3474 }
3475 
3476 
3477 
3478 
3479 /* @func ajAlignSetGapR *******************************************************
3480  **
3481  ** Defines alignment gap penalties
3482  **
3483  ** @param [u] thys [AjPAlign] Alignment object
3484  ** @param [r] gappen [float] Gap penalty
3485  ** @param [r] extpen [float] Gap extension penalty
3486  ** @return [void]
3487  ** @@
3488 **
3489 ** @release 2.1.0
3490  *****************************************************************************/
3491 
ajAlignSetGapR(AjPAlign thys,float gappen,float extpen)3492 void ajAlignSetGapR(AjPAlign thys, float gappen, float extpen)
3493 {
3494     AjPStr tmpstr = NULL;
3495 
3496     ajint precision = 3;
3497     ajint i;
3498 
3499     ajFmtPrintS(&tmpstr, "%.*f", precision, gappen);
3500 
3501     for(i=1; i<precision; i++)
3502     {
3503 	if(ajStrGetCharLast(tmpstr) != '0') break;
3504 	ajStrCutEnd(&tmpstr, 1);
3505     }
3506 
3507     ajStrAssignS(&thys->GapPen, tmpstr);
3508 
3509     ajFmtPrintS(&tmpstr, "%.*f", precision, extpen);
3510 
3511     for(i=1; i<precision; i++)
3512     {
3513 	if(ajStrGetCharLast(tmpstr) != '0') break;
3514 	ajStrCutEnd(&tmpstr, 1);
3515     }
3516 
3517     ajStrAssignS(&thys->ExtPen, tmpstr);
3518 
3519     ajStrDel(&tmpstr);
3520     return;
3521 }
3522 
3523 
3524 
3525 
3526 /* @func ajAlignSetScoreI *****************************************************
3527 **
3528 ** Defines alignment score
3529 **
3530 ** @param [u] thys [AjPAlign] Alignment object
3531 ** @param [r] score [ajint] score
3532 ** @return [void]
3533 **
3534 ** @release 2.1.0
3535 ** @@
3536 ******************************************************************************/
3537 
ajAlignSetScoreI(AjPAlign thys,ajint score)3538 void ajAlignSetScoreI(AjPAlign thys, ajint score)
3539 {
3540     AjPStr tmpstr = NULL;
3541     AlignPData data = NULL;
3542 
3543     ajListPeekLast(thys->Data, (void**) &data);
3544     ajFmtPrintS(&tmpstr, "%d", score);
3545     ajStrAssignS(&data->Score, tmpstr);
3546 
3547     ajDebug("ajAlignSetScoreI: %d '%S' %d\n",
3548 	    score, data->Score, data->LenAli);
3549 
3550     ajStrDel(&tmpstr);
3551 
3552     return;
3553 }
3554 
3555 
3556 
3557 
3558 /* @func ajAlignSetScoreL *****************************************************
3559 **
3560 ** Defines alignment score
3561 **
3562 ** @param [u] thys [AjPAlign] Alignment object
3563 ** @param [r] score [ajlong] score
3564 ** @return [void]
3565 **
3566 ** @release 4.0.0
3567 ** @@
3568 ******************************************************************************/
3569 
ajAlignSetScoreL(AjPAlign thys,ajlong score)3570 void ajAlignSetScoreL(AjPAlign thys, ajlong score)
3571 {
3572     AjPStr tmpstr = NULL;
3573     AlignPData data = NULL;
3574 
3575     ajListPeekLast(thys->Data, (void**) &data);
3576     ajFmtPrintS(&tmpstr, "%Ld", score);
3577     ajStrAssignS(&data->Score, tmpstr);
3578 
3579     ajDebug("ajAlignSetScoreI: %Ld '%S' %d\n",
3580 	    score, data->Score, data->LenAli);
3581     ajStrDel(&tmpstr);
3582 
3583     return;
3584 }
3585 
3586 
3587 
3588 
3589 /* @func ajAlignSetScoreR *****************************************************
3590 **
3591 ** Defines alignment score
3592 **
3593 ** @param [u] thys [AjPAlign] Alignment object
3594 ** @param [r] score [float] score
3595 ** @return [void]
3596 **
3597 ** @release 2.1.0
3598 ** @@
3599 ******************************************************************************/
3600 
ajAlignSetScoreR(AjPAlign thys,float score)3601 void ajAlignSetScoreR(AjPAlign thys, float score)
3602 {
3603     AjPStr tmpstr = NULL;
3604 
3605     ajint precision = 3;
3606     ajint i;
3607     AlignPData data = NULL;
3608 
3609     ajListPeekLast(thys->Data, (void**) &data);
3610     ajFmtPrintS(&tmpstr, "%.*f", precision, score);
3611 
3612     for(i=1; i<precision; i++)
3613     {
3614 	if(ajStrGetCharLast(tmpstr) != '0') break;
3615 	ajStrCutEnd(&tmpstr, 1);
3616     }
3617 
3618     ajStrAssignS(&data->Score, tmpstr);
3619 
3620     ajStrDel(&tmpstr);
3621 
3622     return;
3623 }
3624 
3625 
3626 
3627 
3628 /* @func ajAlignSetStats ******************************************************
3629 **
3630 ** Sets standard properties for an alignment subheader. These are:
3631 ** Length, Identity, Gaps, Similarity, Score
3632 **
3633 ** @param [u] thys [AjPAlign] Alignment object
3634 ** @param [r] iali [ajint] Alignment number
3635 ** @param [r] len [ajint] Alignment length
3636 ** @param [r] ident [ajint] Number of identities
3637 ** @param [r] sim [ajint] Number of similarities
3638 ** @param [r] gaps [ajint] Number of gaps
3639 ** @param [r] score [const AjPStr] Alignment score (as saved by
3640 **                           ajAlignSetScoreI or ajAlignSetScoreR)
3641 ** @return [void]
3642 **
3643 ** @release 2.1.0
3644 ******************************************************************************/
3645 
ajAlignSetStats(AjPAlign thys,ajint iali,ajint len,ajint ident,ajint sim,ajint gaps,const AjPStr score)3646 void ajAlignSetStats(AjPAlign thys, ajint iali, ajint len,
3647 		     ajint ident, ajint sim, ajint gaps,
3648 		     const AjPStr score)
3649 {
3650     AlignPData* pdata = NULL;
3651     AlignPData data = NULL;
3652     ajint nali;
3653 
3654     nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
3655 
3656     if(iali < 0)
3657 	data = pdata[nali-1];
3658     else
3659 	data = pdata[iali];
3660 
3661     /* ajDebug("ajAlignSetStats iali:%d len:%d id:%d "
3662                "sim:%d gap:%d score:'%S'\n",
3663 	    iali, len, ident, sim, gaps, score); */
3664 
3665     data->LenAli = len;
3666 
3667     if(len > 0)
3668     {
3669 	if(ident >= 0)
3670 	    data->NumId = ident;
3671 	else
3672 	    data->NumId = -1;
3673 
3674 	if(sim >= 0)
3675 	    data->NumSim = sim;
3676 	else
3677 	    data->NumSim = -1;
3678 
3679 	if(gaps >= 0)
3680 	    data->NumGap = gaps;
3681 	else
3682 	    data->NumGap = -1;
3683     }
3684 
3685     if(ajStrGetLen(score))
3686 	ajStrAssignS(&data->Score, score);
3687 
3688     AJFREE(pdata);
3689 
3690     return;
3691 }
3692 
3693 
3694 
3695 
3696 /* @func ajAlignSetSubStandard ************************************************
3697 **
3698 ** Sets standard subheader using the properties for an alignment.
3699 ** These are:
3700 ** Length, Identity, Gaps, Similarity, Score
3701 **
3702 ** @param [u] thys [AjPAlign] Alignment object
3703 ** @param [r] iali [ajint] Alignment number (or -1 for the latest)
3704 ** @return [void]
3705 **
3706 ** @release 2.1.0
3707 ******************************************************************************/
3708 
ajAlignSetSubStandard(AjPAlign thys,ajint iali)3709 void ajAlignSetSubStandard(AjPAlign thys, ajint iali)
3710 {
3711     AjPStr tmphdr = NULL;
3712     AlignPData* pdata = NULL;
3713     AlignPData data = NULL;
3714     ajint nali;
3715     float pct;
3716 
3717     nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
3718 
3719     if(iali < 0)
3720 	data = pdata[nali-1];
3721     else
3722 	data = pdata[iali];
3723 
3724     ajFmtPrintAppS(&tmphdr, "Length: %d\n", data->LenAli);
3725 
3726     if(data->LenAli > 0)
3727     {
3728 	if(data->NumId >= 0)
3729 	{
3730 	    pct = (float)100. * (float) data->NumId / (float) data->LenAli;
3731 	    ajFmtPrintAppS(&tmphdr, "Identity:   %5d/%d (%4.1f%%)\n",
3732 			   data->NumId, data->LenAli, pct);
3733 	}
3734 
3735 	if(data->NumSim >= 0)
3736 	{
3737 	    pct = (float)100. * (float) data->NumSim / (float) data->LenAli;
3738 	    ajFmtPrintAppS(&tmphdr, "Similarity: %5d/%d (%4.1f%%)\n",
3739 			   data->NumSim, data->LenAli, pct);
3740 	}
3741 
3742 	if(data->NumGap >= 0)
3743 	{
3744 	    pct = (float)100. * (float) data->NumGap / (float) data->LenAli;
3745 	    ajFmtPrintAppS(&tmphdr, "Gaps:       %5d/%d (%4.1f%%)\n",
3746 			   data->NumGap, data->LenAli, pct);
3747 	}
3748     }
3749 
3750     if(ajStrGetLen(data->Score))
3751 	ajFmtPrintAppS(&tmphdr, "Score: %S\n", data->Score);
3752 
3753     ajAlignSetSubHeaderPre(thys, tmphdr);
3754 
3755     ajStrDel(&tmphdr);
3756     AJFREE(pdata);
3757 
3758     return;
3759 }
3760 
3761 
3762 
3763 
3764 /* @funcstatic alignSeqs ******************************************************
3765 **
3766 ** Returns the sequences for the nth alignment
3767 **
3768 ** @param [r] thys [const AjPAlign] Alignment object
3769 ** @param [r] iali [ajint] Alignment number
3770 ** @return [const AjPSeq*] Pointer to the internal sequence array
3771 **
3772 ** @release 2.1.0
3773 ******************************************************************************/
3774 
alignSeqs(const AjPAlign thys,ajint iali)3775 static const AjPSeq* alignSeqs(const AjPAlign thys, ajint iali)
3776 {
3777     AlignPData data = NULL;
3778 
3779     ajListPeekNumber(thys->Data, iali, (void**) &data);
3780 
3781     return data->Seq;
3782 }
3783 
3784 
3785 
3786 
3787 /* @funcstatic alignSeq *******************************************************
3788 **
3789 ** Returns the nth sequence for an alignment
3790 **
3791 ** @param [r] thys [const AjPAlign] Alignment object
3792 ** @param [r] iseq [ajint] Sequence number
3793 ** @param [r] iali [ajint] Alignment number
3794 ** @return [const AjPSeq] Pointer to the internal sequence
3795 **
3796 ** @release 2.1.0
3797 ******************************************************************************/
3798 
alignSeq(const AjPAlign thys,ajint iseq,ajint iali)3799 static const AjPSeq alignSeq(const AjPAlign thys, ajint iseq, ajint iali)
3800 {
3801     AlignPData data = NULL;
3802 
3803     ajListPeekNumber(thys->Data, iali, (void **) &data);
3804 
3805     return data->Seq[iseq];
3806 }
3807 
3808 
3809 
3810 
3811 /* @funcstatic alignSeqBegin **************************************************
3812 **
3813 ** Returns the start position for the nth sequence for an alignment.
3814 **
3815 ** Reverse direction is tested and the reverse complement numbering is used.
3816 **
3817 ** @param [r] data [const AlignPData] Alignment data object
3818 ** @param [r] iseq [ajint] Sequence number
3819 ** @return [ajint] Start position
3820 **
3821 ** @release 3.0.0
3822 ******************************************************************************/
3823 
alignSeqBegin(const AlignPData data,ajint iseq)3824 static ajint alignSeqBegin(const AlignPData data, ajint iseq)
3825 {
3826     ajint ret = 0;
3827 
3828     if(data->Rev[iseq])
3829     {
3830 	ret = data->Len[iseq] - data->Offend[iseq]
3831 	    - data->Start[iseq] + 1;
3832 	ajDebug(" alignSeqBegin %d rev:%B len:%d Offend:%d Start:%d ret:%d\n",
3833 		iseq, data->Rev[iseq], data->Len[iseq], data->Offend[iseq],
3834 		data->Start[iseq], ret);
3835     }
3836     else
3837     {
3838 	ret = data->Offset[iseq] + data->Start[iseq];
3839 	ajDebug(" alignSeqBegin %d rev:%B Offset:%d Start:%d ret:%d\n",
3840 		iseq, data->Rev[iseq], data->Offset[iseq],
3841 		data->Start[iseq], ret);
3842     }
3843 
3844     return ret;
3845 }
3846 
3847 
3848 
3849 
3850 /* @funcstatic alignSeqEnd ****************************************************
3851 **
3852 ** Returns the end position for the nth sequence for an alignment.
3853 **
3854 ** Reverse direction is tested and the reverse complement numbering is used.
3855 **
3856 ** @param [r] data [const AlignPData] Alignment data object
3857 ** @param [r] iseq [ajint] Sequence number
3858 ** @return [ajint] Start position
3859 **
3860 ** @release 3.0.0
3861 ******************************************************************************/
3862 
alignSeqEnd(const AlignPData data,ajint iseq)3863 static ajint alignSeqEnd(const AlignPData data, ajint iseq)
3864 {
3865     ajint ret = 0;
3866     ajint iend;
3867 
3868     if(data->End[iseq])
3869 	iend = data->End[iseq];
3870     else
3871 	iend = data->Len[iseq];
3872 
3873     if(data->Rev[iseq])
3874 	ret = data->Len[iseq] - data->Offend[iseq] - iend + 1;
3875     else
3876 	ret = data->Offset[iseq] + iend;
3877 
3878     return ret;
3879 }
3880 
3881 
3882 
3883 
3884 /* @funcstatic alignSeqIncrement **********************************************
3885 **
3886 ** Returns the position increment for the nth sequence for an alignment.
3887 **
3888 ** Reverse direction gives a value of -1 for counting down.
3889 **
3890 ** Forward direction gives +1.
3891 **
3892 ** @param [r] data [const AlignPData] Alignment data object
3893 ** @param [r] iseq [ajint] Sequence number
3894 ** @return [ajint] Increment for position counting
3895 **
3896 ** @release 3.0.0
3897 ******************************************************************************/
3898 
alignSeqIncrement(const AlignPData data,ajint iseq)3899 static ajint alignSeqIncrement(const AlignPData data, ajint iseq)
3900 {
3901     ajint ret = 0;
3902 
3903     if(data->Rev[iseq])
3904 	ret = -1;
3905     else
3906 	ret = 1;
3907 
3908     return ret;
3909 }
3910 
3911 
3912 
3913 
3914 /* @funcstatic alignSeqGapBegin ***********************************************
3915 **
3916 ** Counts the gaps at the start of the nth sequence for an alignment.
3917 **
3918 ** Reverse direction is tested and the reverse complement numbering is used.
3919 **
3920 ** @param [r] data [const AlignPData] Alignment data object
3921 ** @param [r] iseq [ajint] Sequence number
3922 ** @return [ajint] Start position
3923 **
3924 ** @release 3.0.0
3925 ******************************************************************************/
3926 
alignSeqGapBegin(const AlignPData data,ajint iseq)3927 static ajint alignSeqGapBegin(const AlignPData data, ajint iseq)
3928 {
3929     ajint ret = 0;
3930     static char testchars[] = "-~.? "; /* all known gap characters */
3931     const char* cp;
3932     ajint i;
3933 
3934     if(data->Rev[iseq])
3935 	ret = data->Offend[iseq];
3936     else
3937 	ret = data->Offset[iseq];
3938 
3939     cp = ajSeqGetSeqC(data->Seq[iseq]);
3940     i = strspn(cp+data->Start[iseq]-1, testchars);
3941 
3942     return ret+i;
3943 }
3944 
3945 
3946 
3947 
3948 /* @funcstatic alignSeqGapEnd *************************************************
3949 **
3950 ** Counts the gaps at the end of the nth sequence for an alignment.
3951 **
3952 ** Reverse direction is tested and the reverse complement numbering is used.
3953 **
3954 ** @param [r] data [const AlignPData] Alignment data object
3955 ** @param [r] iseq [ajint] Sequence number
3956 ** @return [ajint] End position
3957 **
3958 ** @release 3.0.0
3959 ******************************************************************************/
3960 
alignSeqGapEnd(const AlignPData data,ajint iseq)3961 static ajint alignSeqGapEnd(const AlignPData data, ajint iseq)
3962 {
3963     ajint ret = 0;
3964     static char testchars[] = "-~.? "; /* all known gap characters */
3965     const char* cp;
3966     const char* cpstart;
3967     ajint i;
3968 
3969     if(data->Rev[iseq])
3970 	ret = data->Offset[iseq];
3971     else
3972 	ret = data->Offend[iseq];
3973 
3974     cpstart = ajSeqGetSeqC(data->Seq[iseq]);
3975     cp = cpstart + data->End[iseq];
3976 
3977     for(i=0; cp > cpstart; cp--)
3978     {
3979 	if(!strchr(testchars, *cp))
3980 	    return ret+i-1;
3981 	i++;
3982     }
3983 
3984     return ret+data->End[iseq];
3985 }
3986 
3987 
3988 
3989 
3990 /* @funcstatic alignSeqRev ****************************************************
3991 **
3992 ** Returns the direction for the nth sequence for an alignment.
3993 **
3994 ** @param [r] data [const AlignPData] Alignment data object
3995 ** @param [r] iseq [ajint] Sequence number
3996 ** @return [AjBool] ajTrue if sequence is reversed
3997 **
3998 ** @release 3.0.0
3999 ******************************************************************************/
4000 
alignSeqRev(const AlignPData data,ajint iseq)4001 static AjBool alignSeqRev(const AlignPData data, ajint iseq)
4002 {
4003     AjBool ret = ajFalse;
4004 
4005     if(data->Rev[iseq])
4006 	ret = ajTrue;
4007     else
4008 	ret = ajFalse;
4009 
4010     return ret;
4011 }
4012 
4013 
4014 
4015 
4016 /* @funcstatic alignData ******************************************************
4017 **
4018 ** Returns the nth data structure for an alignment
4019 **
4020 ** @param [r] thys [const AjPAlign] Alignment object
4021 ** @param [r] iali [ajint] Alignment number
4022 ** @return [AlignPData] Pointer to the internal alignment structure
4023 **
4024 ** @release 2.1.0
4025 ******************************************************************************/
4026 
alignData(const AjPAlign thys,ajint iali)4027 static AlignPData alignData(const AjPAlign thys, ajint iali)
4028 {
4029     AlignPData data = NULL;
4030 
4031     ajListPeekNumber(thys->Data, iali, (void**) &data);
4032 
4033     return data;
4034 }
4035 
4036 
4037 
4038 
4039 /* @funcstatic alignLen *******************************************************
4040 **
4041 ** Returns the length of the nth sequence for an alignment
4042 **
4043 ** @param [r] thys [const AjPAlign] Alignment object
4044 ** @param [r] iali [ajint] Alignment number
4045 ** @return [ajint] Length of the internal sequence
4046 **
4047 ** @release 2.1.0
4048 ******************************************************************************/
4049 
alignLen(const AjPAlign thys,ajint iali)4050 static ajint alignLen(const AjPAlign thys, ajint iali)
4051 {
4052     AlignPData data = NULL;
4053 
4054     ajListPeekNumber(thys->Data, iali, (void**) &data);
4055 
4056     return data->LenAli;
4057 }
4058 
4059 
4060 
4061 
4062 /* @func ajAlignSetType *******************************************************
4063 **
4064 ** Sets the align type (if it is not set already)
4065 **
4066 ** @param [u] thys [AjPAlign] Alignment object
4067 ** @return [void]
4068 **
4069 ** @release 2.1.0
4070 ** @@
4071 ******************************************************************************/
4072 
ajAlignSetType(AjPAlign thys)4073 void ajAlignSetType(AjPAlign thys)
4074 {
4075     const AjPSeq seq;
4076     ajint i;
4077 
4078     ajDebug("ajAlignSetType '%S'\n",
4079 	    thys->Type);
4080 
4081     if(ajStrGetLen(thys->Type))
4082 	return;
4083 
4084     if(!thys->Nseqs)
4085 	return;
4086 
4087     for(i=0; i < thys->Nseqs; i++)
4088     {
4089 	ajDebug("Calling alignSeq d 0\n", i);
4090 	seq = alignSeq(thys, i, 0);
4091 
4092 	if(ajStrGetLen(seq->Type))
4093 	{
4094 	    ajStrAssignS(&thys->Type, seq->Type);
4095 	    return;
4096 	}
4097     }
4098 
4099     ajDebug("testing alignSeq 0 0\n", i);
4100 
4101     if(ajSeqIsNuc(alignSeq(thys, 0, 0)))
4102 	ajStrAssignC(&thys->Type, "N");
4103     else
4104 	ajStrAssignC(&thys->Type, "P");
4105 
4106     return;
4107 }
4108 
4109 
4110 
4111 
4112 /* @func ajAlignSetExternal ***************************************************
4113 **
4114 ** Sets the align object to use external sequence references, which are
4115 ** to be copied pointers rather than clones of the whole sequence.
4116 **
4117 ** Intended for alignments of large sequences where there is no need to
4118 ** keep many copies. An example is the EMBOSS application wordmatch.
4119 **
4120 ** @param [u] thys [AjPAlign] Alignment object
4121 ** @param [r] external [AjBool] If true, do not make copies of sequence data
4122 **                              and do not delete internal sequence data
4123 ** @return [void]
4124 **
4125 ** @release 2.4.0
4126 ** @@
4127 ******************************************************************************/
4128 
ajAlignSetExternal(AjPAlign thys,AjBool external)4129 void ajAlignSetExternal(AjPAlign thys, AjBool external)
4130 {
4131     ajDebug("ajAlignSetExternal old:%B new:%B\n",
4132 	    thys->SeqExternal, external);
4133 
4134     thys->SeqExternal = external;
4135 
4136     return;
4137 }
4138 
4139 
4140 
4141 
4142 /* @func ajAlignSetRefSeqIndx *************************************************
4143 **
4144 ** Sets the index of the reference sequence.
4145 **
4146 ** @param [u] thys [AjPAlign] Alignment object
4147 ** @param [r] refseq [ajint] index of the reference sequence
4148 ** @return [void]
4149 **
4150 ** @release 6.3.0
4151 ** @@
4152 ******************************************************************************/
4153 
ajAlignSetRefSeqIndx(AjPAlign thys,ajint refseq)4154 void ajAlignSetRefSeqIndx(AjPAlign thys, ajint refseq)
4155 {
4156 
4157     thys->RefSeq = refseq;
4158 
4159     return;
4160 }
4161 
4162 
4163 
4164 
4165 /* @func ajAlignSetRange ******************************************************
4166 **
4167 ** Sets the alignment range in each sequence, but only for a
4168 ** pairwise alignment
4169 **
4170 ** @param [u] thys [AjPAlign] Alignment object
4171 ** @param [r] start1 [ajint] Start in sequence 1
4172 ** @param [r] end1 [ajint] End in sequence 1
4173 ** @param [r] len1 [ajint] Length of sequence 1
4174 ** @param [r] off1 [ajint] Offset of sequence 1
4175 ** @param [r] start2 [ajint] Start in sequence 2
4176 ** @param [r] end2 [ajint] End in sequence 2
4177 ** @param [r] len2 [ajint] Length of sequence 2
4178 ** @param [r] off2 [ajint] Offset of sequence 2
4179 ** @return [AjBool] ajTrue on success. Failure also writes an error message.
4180 **
4181 ** @release 2.3.0
4182 ** @@
4183 ******************************************************************************/
4184 
ajAlignSetRange(AjPAlign thys,ajint start1,ajint end1,ajint len1,ajint off1,ajint start2,ajint end2,ajint len2,ajint off2)4185 AjBool ajAlignSetRange(AjPAlign thys,
4186 		       ajint start1, ajint end1, ajint len1, ajint off1,
4187 		       ajint start2, ajint end2, ajint len2, ajint off2)
4188 {
4189     AlignPData data = NULL;
4190     ajint nali;
4191 
4192     ajDebug("ajAlignSetRange %d..%d (%d) %d..%d (%d)\n",
4193 	    start1, end1, len1, start2, end2, len2);
4194 
4195     if(thys->Nseqs != 2)
4196     {
4197 	ajErr("ajAlignSetRange requires alignment of 2 sequences only");
4198 	return ajFalse;
4199     }
4200 
4201     ajListPeekLast(thys->Data, (void**) &data);
4202 
4203     nali = (ajuint) ajListGetLength(thys->Data);
4204     ajDebug("nali:%d set range %d\n", nali, nali-1);
4205 
4206     data->Start[0]  = start1;
4207     data->End[0]    = end1;
4208     data->Len[0]    = len1;
4209     data->Offset[0] = off1;
4210     data->Rev[0]    = ajFalse;
4211 
4212     data->Start[1]  = start2;
4213     data->End[1]    = end2;
4214     data->Len[1]    = len2;
4215     data->Offset[1] = off2;
4216     data->Rev[1]    = ajFalse;
4217 
4218     if(thys->SeqExternal)
4219     {
4220 	data->LenAli = (end1 - start1) + 1;
4221 
4222 	if(data->LenAli < (end2 - start2 + 1))
4223 	    data->LenAli = (end2 - start2) + 1;
4224 
4225 	ajDebug("len:  %d\n", data->LenAli);
4226     }
4227 
4228     return ajTrue;
4229 }
4230 
4231 
4232 
4233 
4234 /* @func ajAlignSetSubRange ***************************************************
4235 **
4236 ** Sets the local alignment sub range in each sequence, but only for a
4237 ** pairwise alignment.
4238 **
4239 ** This sets the SubOffset in addition to the Start and End values,
4240 ** for use where the alignment has a large sequence, but only part of
4241 ** it is to be reported.
4242 **
4243 ** The usual example is where there are many matches in a long sequence,
4244 ** defined with SeqExternal so we use pointers to one original to avoid
4245 ** making multiple copies in memory while building the AjPAlign and AlignPData
4246 ** structure.
4247 **
4248 ** Resets the alignment length to be the length of the longest subsequence
4249 **
4250 ** @param [u] thys [AjPAlign] Alignment object
4251 ** @param [r] substart1 [ajint] Subsequence offset in sequence 1
4252 ** @param [r] start1 [ajint] Subsequence start in sequence 1
4253 ** @param [r] end1 [ajint] Subsequence end in sequence 1
4254 ** @param [r] rev1 [AjBool] ajTrue if sequence 1 is reversed
4255 ** @param [r] len1 [ajint] Length of sequence 1
4256 ** @param [r] substart2 [ajint] Subsequence offset in sequence 2
4257 ** @param [r] start2 [ajint] Subsequence start in sequence 2
4258 ** @param [r] end2 [ajint] Subsequence end in sequence 2
4259 ** @param [r] rev2 [AjBool] ajTrue if sequence 2 is reversed
4260 ** @param [r] len2 [ajint] Length of sequence 2
4261 ** @return [AjBool] ajTrue on success. Failure also writes an error message.
4262 **
4263 ** @release 2.5.1
4264 ** @@
4265 ******************************************************************************/
4266 
ajAlignSetSubRange(AjPAlign thys,ajint substart1,ajint start1,ajint end1,AjBool rev1,ajint len1,ajint substart2,ajint start2,ajint end2,AjBool rev2,ajint len2)4267 AjBool ajAlignSetSubRange(AjPAlign thys,
4268 			  ajint substart1, ajint start1,
4269 			  ajint end1, AjBool rev1, ajint len1,
4270 			  ajint substart2, ajint start2,
4271 			  ajint end2, AjBool rev2, ajint len2)
4272 {
4273     AlignPData data = NULL;
4274     ajint nali;
4275 
4276     ajDebug("ajAlignSetSubRange %d(%d)..%d (%d) %d(%d)..%d (%d)\n",
4277 	    start1, substart1, end1, len1,
4278 	    start2, substart2, end2, len2);
4279 
4280     if(thys->Nseqs != 2)
4281     {
4282 	ajErr("ajAlignSetSubRange requires alignment of 2 sequences only");
4283 
4284 	return ajFalse;
4285     }
4286 
4287     ajListPeekLast(thys->Data, (void**) &data);
4288 
4289     nali = (ajuint) ajListGetLength(thys->Data);
4290     ajDebug("nali:%d set range %d\n", nali, nali-1);
4291 
4292     data->SubOffset[0] = substart1;
4293     data->Start[0]     = start1;
4294     data->End[0]       = end1;
4295     data->Len[0]       = len1;
4296     data->Offset[0]    = substart1;
4297     data->Offend[0]    = len1 - (substart1 + end1 - start1 + 1);
4298     data->Rev[0]       = rev1;
4299 
4300     data->SubOffset[1] = substart2;
4301     data->Start[1]     = start2;
4302     data->End[1]       = end2;
4303     data->Len[1]       = len2;
4304     data->Offset[1]    = substart2;
4305     data->Offend[1]    = len2 - (substart2 + end2 - start2 + 1);
4306     data->Rev[1]       = rev2;
4307 
4308 	data->LenAli = (end1 - start1) + 1;
4309 
4310 	if(data->LenAli < (end2 - start2 + 1))
4311 	{
4312 	    data->LenAli = (end2 - start2) + 1;
4313 	}
4314 
4315 	ajDebug("len:  %d\n", data->LenAli);
4316 
4317     return ajTrue;
4318 }
4319 
4320 
4321 
4322 
4323 /* @funcstatic alignSeqName ***************************************************
4324 **
4325 ** Returns the sequence name or USA depending on the setting in the
4326 ** Alignment object (derived from the ACD and command line -ausa option)
4327 **
4328 ** @param [r] thys [const AjPAlign] Alignment object
4329 ** @param [r] seq [const AjPSeq] Sequence object
4330 ** @return [const AjPStr] Sequence name for this alignment
4331 **
4332 ** @release 2.1.0
4333 ******************************************************************************/
4334 
alignSeqName(const AjPAlign thys,const AjPSeq seq)4335 static const AjPStr alignSeqName(const AjPAlign thys, const AjPSeq seq)
4336 {
4337     if(thys->Showusa)
4338     return ajSeqGetUsaS(seq);
4339 
4340     return ajSeqGetNameS(seq);
4341 }
4342 
4343 
4344 
4345 
4346 /* @funcstatic alignDataDel ***************************************************
4347 **
4348 ** Deletes an alignment data structure
4349 **
4350 ** @param [d] pthys [AlignPData*] Alignment data structure
4351 ** @param [r] external [AjBool] Sequence is a pointer to an external
4352 **                              object, do not delete.
4353 ** @return [void]
4354 **
4355 ** @release 2.1.0
4356 ******************************************************************************/
4357 
alignDataDel(AlignPData * pthys,AjBool external)4358 static void alignDataDel(AlignPData* pthys, AjBool external)
4359 {
4360     AlignPData thys;
4361     ajint i;
4362     void *freeptr = NULL;
4363 
4364     thys = *pthys;
4365 
4366     AJFREE(thys->Start);
4367     AJFREE(thys->End);
4368     AJFREE(thys->Len);
4369     AJFREE(thys->Offset);
4370     AJFREE(thys->Offend);
4371     AJFREE(thys->SubOffset);
4372     AJFREE(thys->Rev);
4373 
4374     ajStrDel(&thys->Score);
4375     ajDebug("alignDataDel NSeqs: %d External: %B\n",
4376 	    thys->Nseqs, external);
4377 
4378     if(!external)
4379 	for(i=0;i<thys->Nseqs;++i)
4380 	{
4381 	    ajDebug("alignDataDel seq[%d] %S %d\n",
4382 		    i, ajSeqGetNameS(thys->Seq[i]), ajSeqGetLen(thys->Seq[i]));
4383 	    ajSeqDel(&thys->RealSeq[i]);
4384         }
4385 
4386     AJFREE(thys->RealSeq);
4387     freeptr = (void *)thys->Seq;
4388     AJFREE(freeptr);
4389     AJFREE(*pthys);
4390 
4391     return;
4392 }
4393 
4394 
4395 
4396 
4397 /* @funcstatic alignDataNew ***************************************************
4398 **
4399 ** Creates and initialises an alignment data object
4400 ** with a specified number of sequences
4401 **
4402 ** @param [r] nseqs [ajint] Number of sequences alignment data object will
4403 **                          store
4404 ** @param [r] external [AjBool] Sequence is a pointer to an external
4405 **                              object, no need for an own copy.
4406 ** @return [AlignPData] Pointer to the new alignment data object
4407 **
4408 ** @release 6.2.0
4409 ******************************************************************************/
4410 
alignDataNew(ajint nseqs,AjBool external)4411 static AlignPData alignDataNew(ajint nseqs, AjBool external)
4412 {
4413     AlignPData data = NULL;
4414 
4415     AJNEW0(data);
4416 
4417     data->Nseqs = nseqs;
4418     AJCNEW0(data->Start, nseqs);
4419     AJCNEW0(data->End, nseqs);
4420     AJCNEW0(data->Len, nseqs);
4421     AJCNEW0(data->Offset, nseqs);
4422     AJCNEW0(data->Offend, nseqs);
4423     AJCNEW0(data->SubOffset, nseqs);
4424     AJCNEW0(data->Rev, nseqs);
4425     AJCNEW0(data->Seq, nseqs);
4426     if(!external)
4427         AJCNEW0(data->RealSeq, nseqs);
4428 
4429     return data;
4430 }
4431 
4432 
4433 
4434 
4435 /* @funcstatic alignDataSetSequence *******************************************
4436 **
4437 ** Sets specified sequence in the alignment data object
4438 **
4439 ** Note: In pre EMBOSS-6.3 the external option is set only by wordmatch
4440 ** and seqmatchall, both producing exact multiple matches and calling
4441 ** ajAlignSetSubRange after calling ajAlignDefineSS which repeats the settings
4442 ** made in this function when called via ajAlignDefineSS.
4443 ** Since ajSeqCountGaps function is time consuming for long sequences,
4444 ** function was modified to return quickly when the external option is true.
4445 ** After release 6.3 this function call hierarchy should be revised.
4446 **
4447 **
4448 ** @param [u] thys [AlignPData] Alignment data object
4449 ** @param [r] seq [const AjPSeq] Input sequence, either a new copy is made
4450 **                               or a direct reference is made depending on
4451 **                               the external parameter
4452 ** @param [r] iseq [ajint] Index of the sequence
4453 ** @param [r] external [AjBool] Sequence is a pointer to an external
4454 **                              object
4455 ** @return [void]
4456 **
4457 ** @release 6.2.0
4458 ******************************************************************************/
4459 
alignDataSetSequence(AlignPData thys,const AjPSeq seq,ajint iseq,AjBool external)4460 static void alignDataSetSequence(AlignPData thys, const AjPSeq seq, ajint iseq,
4461         AjBool external)
4462 {
4463     if(external)
4464     {
4465         thys->Seq[iseq] = seq;
4466         return;
4467     }
4468 
4469     thys->Start[iseq] = ajSeqGetBegin(seq);
4470     thys->End[iseq]   = ajSeqGetEnd(seq);
4471     thys->Len[iseq]   = ajSeqGetLen(seq) + ajSeqGetOffset(seq)
4472                         + ajSeqGetOffend(seq) - ajSeqCountGaps(seq);
4473     thys->Offset[iseq] = ajSeqGetOffset(seq);
4474     thys->Offend[iseq] = ajSeqGetOffend(seq);
4475     thys->SubOffset[iseq] = 0;
4476     thys->Rev[iseq]    = ajSeqIsReversed(seq);
4477 
4478     thys->RealSeq[iseq] = ajSeqNewSeq(seq);
4479     ajSeqGapStandard(thys->RealSeq[iseq], '-');
4480     thys->Seq[iseq] = thys->RealSeq[iseq];
4481 
4482     return;
4483 }
4484 
4485 
4486 
4487 
4488 /* @funcstatic alignDiff ******************************************************
4489 **
4490 ** Blank out identities between two strings
4491 **
4492 ** @param [w] pmark [AjPStr*] Mark string with spaces for identities
4493 ** @param [r] seq [const AjPStr] String (sequence) to compare
4494 ** @param [r] idchar [char] Character for identities
4495 ** @return [void]
4496 **
4497 ** @release 2.1.0
4498 ******************************************************************************/
4499 
alignDiff(AjPStr * pmark,const AjPStr seq,char idchar)4500 static void alignDiff(AjPStr* pmark, const AjPStr seq, char idchar)
4501 {
4502     ajuint i;
4503     ajuint ilen;
4504     char c;
4505     char d;
4506 
4507     ilen = ajStrGetLen(seq);
4508 
4509     ajStrGetuniqueStr(pmark);
4510 
4511     if(ajStrGetLen(*pmark) < ilen)
4512 	ilen = ajStrGetLen(*pmark);
4513 
4514     for(i=0; i < ilen; i++)
4515     {
4516 	c = ajStrGetCharPos(*pmark, i);
4517 
4518 	if(c == ' ')
4519             continue;
4520 
4521 	if(c == '-')
4522             continue;
4523 
4524 	d = ajStrGetCharPos(seq, i);
4525 
4526 	if(d == ' ')
4527             continue;
4528 
4529 	if(d == '-')
4530             continue;
4531 
4532 	if(toupper((int)c) == toupper((int)d))
4533 	    ajStrPasteCountK(pmark, i, idchar, 1);
4534     }
4535 
4536     return;
4537 }
4538 
4539 
4540 
4541 
4542 /* @funcstatic alignSame ******************************************************
4543 **
4544 ** Blank out differences between two strings
4545 **
4546 ** @param [w] pmark [AjPStr*] Mark string with spaces for differences
4547 ** @param [r] seq [const AjPStr] String (sequence) to compare
4548 ** @param [r] diffchar [char] Character for differences
4549 ** @return [void]
4550 **
4551 ** @release 3.0.0
4552 ******************************************************************************/
4553 
alignSame(AjPStr * pmark,const AjPStr seq,char diffchar)4554 static void alignSame(AjPStr* pmark, const AjPStr seq, char diffchar)
4555 {
4556     ajuint i;
4557     ajuint ilen;
4558     char c;
4559     char d;
4560 
4561     ilen = ajStrGetLen(seq);
4562 
4563     ajStrGetuniqueStr(pmark);
4564 
4565     if(ajStrGetLen(*pmark) < ilen)
4566 	ilen = ajStrGetLen(*pmark);
4567 
4568     for(i=0; i < ilen; i++)
4569     {
4570 	c = ajStrGetCharPos(*pmark, i);
4571 
4572 	if(c == ' ')
4573             continue;
4574 
4575 	d = ajStrGetCharPos(seq, i);
4576 
4577 	if(toupper((int)c) == toupper((int)d))
4578             continue;
4579 
4580 	ajStrPasteCountK(pmark, i, diffchar, 1);
4581     }
4582 
4583     return;
4584 }
4585 
4586 
4587 
4588 
4589 /* @funcstatic alignSim *******************************************************
4590 **
4591 ** Convert upper case (identical) positions to an identity character,
4592 ** and lower case (similar) positions to a similarity character
4593 **
4594 ** @param [w] pmark [AjPStr*] Mark string with spaces for differences
4595 ** @param [r] idch [const char] Identity character
4596 ** @param [r] simch [const char] Similarity character
4597 ** @param [r] misch [const char] Mismatch character
4598 ** @param [r] gapch [const char] Gap character
4599 ** @return [void]
4600 **
4601 ** @release 2.1.0
4602 ******************************************************************************/
4603 
alignSim(AjPStr * pmark,const char idch,const char simch,const char misch,const char gapch)4604 static void alignSim(AjPStr* pmark, const char idch, const char simch,
4605 		     const char misch, const char gapch)
4606 {
4607     ajint i;
4608     ajint ilen;
4609     char c;
4610 
4611     /*ajDebug("alignSim '%S'\n", *pmark);*/
4612 
4613     ajStrGetuniqueStr(pmark);
4614 
4615     ilen = ajStrGetLen(*pmark);
4616 
4617     for(i=0; i < ilen; i++)
4618     {
4619 	c = ajStrGetCharPos(*pmark, i);
4620 
4621 	if(tolower((int)c) == '#')
4622 	    ajStrPasteCountK(pmark, i, misch, 1);
4623 	else if(isupper((int)c))
4624 	    ajStrPasteCountK(pmark, i, idch, 1);
4625 	else if(islower((int)c))
4626 	    ajStrPasteCountK(pmark, i, simch, 1);
4627 	else
4628 	    ajStrPasteCountK(pmark, i,gapch, 1);
4629     }
4630 
4631     /*ajDebug("  result '%S'\n", *pmark);*/
4632 
4633     return;
4634 }
4635 
4636 
4637 
4638 
4639 /* @funcstatic alignConsStats *************************************************
4640 **
4641 ** Calculates alignment statistics (and a consensus).
4642 **
4643 ** @param [u] thys [AjPAlign] Alignment object. Matrix will be defined
4644 **                            if not already set.
4645 ** @param [r] iali [ajint] alignment number
4646 ** @param [w] cons [AjPStr*] the created consensus sequence
4647 ** @param [w] retident [ajint*] number of residues identical in all sequences
4648 ** @param [w] retsim   [ajint*] number of residues similar in all sequences
4649 ** @param [w] retgap   [ajint*] number of residues with a gap in 1 sequence
4650 ** @param [w] retlen [ajint*] length of the alignment
4651 ** @return [void]
4652 **
4653 ** @release 2.1.0
4654 ******************************************************************************/
4655 
alignConsStats(AjPAlign thys,ajint iali,AjPStr * cons,ajint * retident,ajint * retsim,ajint * retgap,ajint * retlen)4656 static void alignConsStats(AjPAlign thys, ajint iali, AjPStr *cons,
4657 			   ajint* retident, ajint* retsim, ajint* retgap,
4658 			   ajint* retlen)
4659 {
4660     ajint   imat;     /* iterate over identical and matching arrays */
4661     ajint   iseq;	     /* iterate over sequences (outer loop) */
4662     ajint   jseq;	     /* iterate over sequences (inner loop) */
4663 
4664 
4665     ajint   **matrix  = NULL;
4666     float   **fmatrix = NULL;
4667 
4668     ajint   m1 = 0;
4669     ajint   m2 = 0;
4670 
4671     ajint   matsize;
4672     ajint   matchingmaxindex;
4673     ajint   identicalmaxindex;
4674     ajint   nseqs;
4675     ajint   mlen;
4676 
4677     float   max;
4678     float   contri = 0;
4679     float   contrj = 0;
4680 
4681     AjPSeqCvt cvt  = 0;
4682 
4683     AjPFloat  posScore = NULL; /* cumulative similarity scores by sequence */
4684     /* for matching all other sequences */
4685     float   *identical;	    /* cum. weight for each valid character */
4686     float   *matching;	 /* cum. weight for matching this character */
4687     ajint   highindex;	   /* position of highest score in posScore */
4688     ajint   kkpos;		/* alignment position loop variable */
4689     ajint   kipos;	   /* alignment position kkpos + iseq start */
4690     ajint   kjpos;	   /* alignment position kkpos + jseq start */
4691     ajint   khpos;		 /* alignment position in highindex */
4692 
4693     float himatch = 0.0;	/* highest match score (often used) */
4694 
4695     const char **seqcharptr;
4696     char res;
4697     char nocon;
4698     char gapch;
4699     float fplural;
4700     float fplurality = 51.0;
4701     float ident;
4702     AjBool isident;
4703     AjBool issim;
4704     AjBool isgap;
4705     const AjPSeq* seqs;
4706     ajint numres;		 /* number of residues (not spaces) */
4707     AlignPData data = NULL;
4708     void *freeptr;
4709 
4710 
4711     data = alignData(thys, iali);
4712 
4713     if(!thys->IMatrix && !thys->FMatrix)
4714     {
4715 	if(ajSeqIsNuc(alignSeq(thys, 0, iali)))
4716 	    ajStrAssignC(&thys->Matrix, "EDNAFULL");
4717 	else
4718 	    ajStrAssignC(&thys->Matrix, "EBLOSUM62");
4719 
4720 	thys->IMatrix = ajMatrixNewFile(thys->Matrix);
4721     }
4722 
4723     *retident=0;
4724     *retsim=0;
4725     *retgap=0;
4726 
4727     seqs    = alignSeqs(thys, iali);
4728     nseqs   = thys->Nseqs;
4729     mlen    = alignLen(thys, iali);
4730     fplural = alignTotweight(thys, 0) * fplurality / (float)100.;
4731     ident   = alignTotweight(thys, 0);
4732 
4733     ajDebug("alignConsStats ali:%d mlen:%d\n", iali, mlen);
4734 
4735     /* ajDebug("fplural:%.1f ident:%.1f mlen: %d\n",
4736 	    fplural, ident, mlen); */
4737 
4738     if(thys->IMatrix)
4739     {
4740 	matrix  = ajMatrixGetMatrix(thys->IMatrix);
4741 	cvt     = ajMatrixGetCvt(thys->IMatrix); /* return conversion table */
4742 	matsize = ajMatrixGetSize(thys->IMatrix);
4743     }
4744     else
4745     {
4746 	fmatrix = ajMatrixfGetMatrix(thys->FMatrix);
4747 	cvt     = ajMatrixfGetCvt(thys->FMatrix); /* return conversion table */
4748 	matsize = ajMatrixfGetSize(thys->FMatrix);
4749     }
4750 
4751     AJCNEW(seqcharptr,nseqs);
4752     AJCNEW(identical,matsize);
4753     AJCNEW(matching,matsize);
4754 
4755     posScore = ajFloatNew();
4756 
4757     gapch = '-';
4758     nocon = '#';
4759 
4760     for(iseq=0;iseq<nseqs;iseq++)	/* get sequence as string */
4761 	seqcharptr[iseq] = ajSeqGetSeqC(alignSeq(thys, iseq, iali));
4762 
4763     /* For each position in the alignment, calculate consensus character */
4764 
4765     for(kkpos=0; kkpos< mlen; kkpos++)
4766     {
4767 	res = gapch;
4768 
4769 	isident = ajFalse;
4770 	issim   = ajFalse;
4771 	isgap   = ajFalse;
4772 
4773 	/*
4774 	 ** reset identities and +ve matches
4775 	 */
4776 
4777 	for(imat=0;imat<matsize;imat++)
4778 	{
4779 	    identical[imat] = 0.0; /* weights of all sequence chars in col. */
4780 	    matching[imat]  = 0.0;
4781 	}
4782 
4783 	/*
4784 	 ** reset the posScore array
4785 	 */
4786 
4787 	for(iseq=0;iseq<nseqs;iseq++)
4788 	    ajFloatPut(&posScore,iseq,0.);
4789 
4790 	/*
4791 	 ** generate scores (identical, posScore) for columns
4792 	 */
4793 
4794 	for(iseq=0;iseq<nseqs;iseq++)
4795 	{
4796 	    kipos = kkpos + data->SubOffset[iseq];
4797 	    m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[iseq][kipos]);
4798 
4799 	    if(m1 < 0)
4800 	    {
4801 		ajUser("Bad character for matrix iseq:%d kipos:%d %x\n",
4802 			iseq, kipos, (int) seqcharptr[iseq][kipos]);
4803 		m1 = 0;
4804 	    }
4805 
4806 	    if(m1)
4807 		identical[m1] += seqs[iseq]->Weight;
4808 
4809 	    for(jseq=iseq+1;jseq<nseqs;jseq++)
4810 	    {
4811 		kjpos = kkpos + data->SubOffset[jseq];
4812 		m2 = ajSeqcvtGetCodeK(cvt,seqcharptr[jseq][kjpos]);
4813 
4814 		if(m2 < 0)
4815 		{
4816 		    ajDebug("Bad character for matrix jseq:%d kjpos:%d %x\n",
4817 			    jseq, kjpos, (int) seqcharptr[jseq][kjpos]);
4818 		    m2 = 0;
4819 		}
4820 
4821 		if(m1 && m2)
4822 		{
4823 		    if(matrix)
4824 		    {
4825 			contri = (float)matrix[m1][m2]*seqs[jseq]->Weight
4826 			  +ajFloatGet(posScore,iseq);
4827 			contrj = (float)matrix[m1][m2]*seqs[iseq]->Weight
4828 			  +ajFloatGet(posScore,jseq);
4829 		    }
4830 		    else
4831 		    {
4832 			contri = fmatrix[m1][m2]*seqs[jseq]->Weight
4833 			  +ajFloatGet(posScore,iseq);
4834 			contrj = fmatrix[m1][m2]*seqs[iseq]->Weight
4835 			  +ajFloatGet(posScore,jseq);
4836 		    }
4837 
4838 		    ajFloatPut(&posScore,iseq,contri);
4839 		    ajFloatPut(&posScore,jseq,contrj);
4840 		}
4841 	    }
4842 	}
4843 
4844 	/*
4845 	 ** highindex is the highest scoring position (seq no.) in posScore
4846 	 ** for 2 sequences this appears to be usually 0
4847 	 */
4848 
4849 	highindex = -1;
4850 	max       = -FLT_MAX;
4851 	numres    = 0;
4852 	for(iseq=0;iseq<nseqs;iseq++)
4853 	{
4854 	    kipos = kkpos + data->SubOffset[iseq];
4855 
4856 	    if(seqcharptr[iseq][kipos] != ' ' &&
4857 	       seqcharptr[iseq][kipos] != '-')
4858 		numres++;
4859 
4860 	    if(ajFloatGet(posScore,iseq) > max)
4861 	    {
4862 		highindex = iseq;
4863 		max       = ajFloatGet(posScore,iseq);
4864 	    }
4865 	}
4866 
4867 	/* highindex is now set */
4868 
4869 	/*
4870 	 ** find +ve matches in the column
4871 	 ** m1 is non-zero for a valid character in iseq
4872 	 ** m2 is non-zero for a valid character in jseq
4873 	 */
4874 
4875 	for(iseq=0;iseq<nseqs;iseq++)
4876 	{
4877 	    kipos = kkpos + data->SubOffset[iseq];
4878 	    m1    = ajSeqcvtGetCodeK(cvt, seqcharptr[iseq][kipos]);
4879 
4880 	    if(m1 < 0)
4881 	    {
4882 		ajDebug("Bad character for matrix iseq:%d kipos:%d %x\n",
4883 			iseq, kipos, (int) seqcharptr[iseq][kipos]);
4884 		m1 = 0;
4885 	    }
4886 
4887 	    if(E_FPZERO(matching[m1],U_FEPS))
4888 	    {   /* first time we have met this character */
4889 		for(jseq=0;jseq<nseqs;jseq++) /* all (other) sequences */
4890 		{
4891 		    kjpos = kkpos + data->SubOffset[jseq];
4892 		    m2    = ajSeqcvtGetCodeK(cvt, seqcharptr[jseq][kjpos]);
4893 
4894 		    if(m2 < 0)
4895 		    {
4896 			ajDebug("Bad character for matrix jseq:%d kjpos:%d "
4897                                 "%x\n",
4898 				jseq, kjpos, (int) seqcharptr[jseq][kjpos]);
4899 			m2 = 0;
4900 		    }
4901 
4902 		    if(matrix)
4903 		    {
4904 			/* 'matching' if positive */
4905 			if(m1 && m2 && matrix[m1][m2] > 0)
4906 			    matching[m1] += seqs[jseq]->Weight;
4907 		    }
4908 		    else
4909 			if(m1 && m2 && fmatrix[m1][m2] > 0.0)
4910 			    matching[m1] += seqs[jseq]->Weight;
4911 
4912 
4913 		    /*
4914 		       if( iseq != jseq)
4915 		       /# skip the sequence we are on #/
4916 		       {
4917 		       m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[jseq][kjpos]);
4918 		       if(matrix)
4919 		       {
4920 		       if(m1 && m2 && matrix[m1][m2] > 0)
4921 		       /# 'matching' if positive #/
4922 		       {
4923 		       matching[m1] += seqs[jseq]->Weight;
4924 		       }
4925 		       }
4926 		       else
4927 		       {
4928 		       if(m1 && m2 && fmatrix[m1][m2] > 0.0)
4929 		       {
4930 		       matching[m1] += seqs[jseq]->Weight;
4931 		       }
4932 		       }
4933 		       }
4934 		       */
4935 		}
4936 	    }
4937 	}
4938 
4939 	matchingmaxindex  = 0;	  /* get max matching and identical */
4940 	identicalmaxindex = 0;
4941 
4942 	for(iseq=0;iseq<nseqs;iseq++)
4943 	{
4944 	    kipos = kkpos + data->SubOffset[iseq];
4945 	    m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[iseq][kipos]);
4946 
4947 	    if(identical[m1] > identical[identicalmaxindex])
4948 		identicalmaxindex= m1;
4949 	}
4950 
4951 	for(iseq=0;iseq<nseqs;iseq++)
4952 	{
4953 	    kipos = kkpos + data->SubOffset[iseq];
4954 	    m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[iseq][kipos]);
4955 
4956 	    if(matching[m1] > matching[matchingmaxindex])
4957 		matchingmaxindex= m1;
4958 	    else if(E_FPEQ(matching[m1],matching[matchingmaxindex],U_FEPS))
4959 	    {
4960 		if(identical[m1] > identical[matchingmaxindex])
4961 		    matchingmaxindex= m1;
4962 	    }
4963 
4964 	    if(seqcharptr[iseq][kipos] == '-' ||
4965 	       seqcharptr[iseq][kipos] == ' ')
4966 		isgap=ajTrue;
4967 	}
4968 
4969 	/*
4970 	   ajDebug("index[%d] ident:%d matching:%d high:%d\n",
4971 	   kpos,
4972 	   identicalmaxindex,
4973 	   matchingmaxindex, highindex);
4974 	   */
4975 	khpos   = kkpos + data->SubOffset[highindex];
4976 	himatch = matching[ajSeqcvtGetCodeK(cvt,seqcharptr[highindex][khpos])];
4977 
4978 	if(thys->IMatrix)
4979 	{
4980 /*	    debugstr1 = ajMatrixGetLabelNum(thys->IMatrix, identicalmaxindex-1);
4981 	    debugstr2 = ajMatrixGetLabelNum(thys->IMatrix, matchingmaxindex-1);
4982 */
4983 
4984 	    /*ajDebug("index[%d] ident:%d '%S' %.1f matching:%d '%S' %.1f %.1f "
4985 		    "high:%d '%c' %.1f\n",
4986 		    kkpos,
4987 		    identicalmaxindex,
4988 		    debugstr1,
4989 		    identical[identicalmaxindex],
4990 		    matchingmaxindex,
4991 		    debugstr2,
4992 		    matching[matchingmaxindex],
4993 		    himatch,
4994 		    highindex, seqcharptr[highindex][khpos],
4995 		    seqs[highindex]->Weight); */
4996 	}
4997 	else
4998 	{
4999 	    /*debugstr1 = ajMatrixfGetLabelNum(thys->FMatrix,
5000                                                identicalmaxindex-1);
5001               debugstr2 = ajMatrixfGetLabelNum(thys->FMatrix,
5002                                                matchingmaxindex-1);*/
5003 	    /* ajDebug("index[%d] ident:%d '%S' %.1f matching:%d '%S' %.1f "
5004                        "%.1f "
5005 		    "high:%d '%c' %.1f\n",
5006 		    kkpos,
5007 		    identicalmaxindex,
5008 		    debugstr1,
5009 		    identical[identicalmaxindex],
5010 		    matchingmaxindex,
5011 		    debugstr2,
5012 		    matching[matchingmaxindex],
5013 		    himatch,
5014 		    highindex, seqcharptr[highindex][khpos],
5015 		    seqs[highindex]->Weight); */
5016 	}
5017 
5018 	if(identical[identicalmaxindex] >= ident)
5019 	    isident=ajTrue;
5020 
5021 	if(matching[matchingmaxindex] >= fplural)
5022 	    issim=ajTrue;
5023 
5024 	/* plurality check */
5025 	if(himatch >= fplural)
5026 	    if(seqcharptr[highindex][khpos] != '-')
5027 		res = toupper((int)seqcharptr[highindex][khpos]);
5028 
5029 
5030 	if(E_FPEQ(himatch,seqs[highindex]->Weight,U_FEPS))
5031 	{
5032 	    if(numres > 1)
5033 		res = nocon;
5034 	    else
5035 		res = gapch;
5036 	}
5037 
5038 	if(issim && ! isident)
5039 	    res = tolower((int)res);
5040 
5041 	ajStrAppendK(cons,res);
5042 
5043 	if(isident)
5044 	    ++*retident;
5045 
5046 	if(issim)
5047 	    ++*retsim;
5048 
5049 	if(isgap)
5050 	    ++*retgap;
5051 
5052 	/* ajDebug("id:%b sim:%b gap:%b res:%c '",
5053                    isident, issim, isgap, res); */
5054 	for(iseq=0; iseq<nseqs; iseq++)
5055 	{
5056 	    kipos = kkpos + data->SubOffset[iseq];
5057 	    /* ajDebug("%c", seqcharptr[iseq][kipos]); */
5058 	}
5059 	/* ajDebug("\n"); */
5060     }
5061 
5062     *retlen = alignLen(thys, iali);
5063 
5064     /* ajDebug("ret ident:%d sim:%d gap:%d len:%d\n",
5065 	    *retident, *retsim, *retgap, *retlen); */
5066 
5067     freeptr = (void *) seqcharptr;
5068     AJFREE(freeptr);
5069 
5070     AJFREE(matching);
5071     AJFREE(identical);
5072     ajFloatDel(&posScore);
5073 
5074     return;
5075 }
5076 
5077 
5078 
5079 
5080 /* @funcstatic alignTotweight *************************************************
5081 **
5082 ** Returns the total weight for all sequences in an alignment.
5083 **
5084 ** @param [r] thys [const AjPAlign] Alignment object
5085 ** @param [r] iali [ajint] Alignment number
5086 ** @return [float] Total weight for all sequences
5087 **
5088 ** @release 2.1.0
5089 ******************************************************************************/
5090 
alignTotweight(const AjPAlign thys,ajint iali)5091 static float alignTotweight(const AjPAlign thys, ajint iali)
5092 {
5093     ajint i;
5094     const AjPSeq seq = NULL;
5095     float ret  = 0.0;
5096 
5097     for(i=0; i < thys->Nseqs; i++)
5098     {
5099 	seq = alignSeq(thys, i, iali);
5100 	ret += seq->Weight;
5101     }
5102 
5103     return ret;
5104 }
5105 
5106 
5107 
5108 
5109 /* @func ajAlignTraceT ********************************************************
5110 **
5111 ** Reports an AjPAlign object to debug output
5112 **
5113 ** @param [r] thys [const AjPAlign] alignment object
5114 ** @param [r] title [const char*] Trace report title
5115 ** @return [void]
5116 **
5117 ** @release 3.0.0
5118 ******************************************************************************/
5119 
ajAlignTraceT(const AjPAlign thys,const char * title)5120 void ajAlignTraceT(const AjPAlign thys, const char* title)
5121 {
5122     ajDebug("%s\n",title);
5123     ajAlignTrace(thys);
5124 
5125     return;
5126 }
5127 
5128 
5129 
5130 
5131 /* @func ajAlignTrace *********************************************************
5132 **
5133 ** Reports an AjPAlign object to debug output
5134 **
5135 ** @param [r] thys [const AjPAlign] alignment object
5136 ** @return [void]
5137 **
5138 ** @release 2.1.0
5139 ******************************************************************************/
5140 
ajAlignTrace(const AjPAlign thys)5141 void ajAlignTrace(const AjPAlign thys)
5142 {
5143     ajDebug("AjAlignTrace\n");
5144     ajDebug("============\n");
5145     ajDebug("Type: '%S'\n", thys->Type);
5146     ajDebug("Formatstr: '%S'\n", thys->Formatstr);
5147     ajDebug("Format: %d\n", thys->Format);
5148     ajDebug("File: '%F'\n", thys->File);
5149     ajDebug("Header: '%S'\n", thys->Header);
5150     ajDebug("SubHeader: '%S'\n", thys->SubHeader);
5151     ajDebug("Tail: '%S'\n", thys->Tail);
5152     ajDebug("SubTail: '%S'\n", thys->SubTail);
5153     ajDebug("Showusa: %B\n", thys->Showusa);
5154     ajDebug("Multi: %B\n", thys->Multi);
5155     ajDebug("Global: %B\n", thys->Global);
5156 
5157     alignTraceData(thys);
5158 
5159     ajDebug("Nseqs: %d\n", thys->Nseqs);
5160     ajDebug("WidthNmin: %d\n", thys->Nmin);
5161     ajDebug("Nmax: %d\n", thys->Nmax);
5162     ajDebug("Width: %d\n", thys->Width);
5163     ajDebug("Count: %d\n", thys->Count);
5164 
5165     ajDebug("IMatrix: %x\n", thys->IMatrix);
5166     ajDebug("FMatrix: %x\n", thys->FMatrix);
5167     ajDebug("Matrix: '%S'\n", thys->Matrix);
5168     ajDebug("GapPen: '%S'\n", thys->GapPen);
5169     ajDebug("ExtPen: '%S'\n", thys->ExtPen);
5170     ajDebug("SeqOnly: %B\n", thys->SeqOnly);
5171     ajDebug("SeqExternal: %B\n", thys->SeqExternal);
5172 
5173     return;
5174 }
5175 
5176 
5177 
5178 
5179 /* @funcstatic alignTraceData *************************************************
5180 **
5181 ** Report alignment internal list data to debug output
5182 **
5183 ** @param [r] thys [const AjPAlign] Alignment object
5184 ** @return [void]
5185 **
5186 ** @release 2.1.0
5187 ******************************************************************************/
5188 
alignTraceData(const AjPAlign thys)5189 static void alignTraceData(const AjPAlign thys)
5190 {
5191     AlignPData* pdata = NULL;
5192     AlignPData data = NULL;
5193     ajint nali;
5194     ajint iali;
5195     ajint i;
5196     ajint nseq;
5197     AjPStr tmpstr = NULL;
5198 
5199     nseq = thys->Nseqs;
5200 
5201     nali = (ajuint) ajListToarray(thys->Data, (void***) &pdata);
5202     ajDebug("Data list length: %d\n", nali);
5203 
5204     if(!nali)
5205 	return;
5206 
5207     for(iali=0; iali<nali; iali++)
5208     {
5209 	data = pdata[iali];
5210 
5211 	ajDebug("%d LenAli: %d\n", iali, data->LenAli);
5212 	ajDebug("%d NumId: %d\n", iali, data->NumId);
5213 	ajDebug("%d NumSim: %d\n", iali, data->NumSim);
5214 	ajDebug("%d NumGap: %d\n", iali, data->NumGap);
5215 	ajDebug("%d Score: '%S'\n", iali, data->Score);
5216 
5217 	ajDebug("%d Start: {", iali);
5218 
5219 	for(i=0; i < nseq; i++)
5220 	    ajDebug(" %d", data->Start[i]);
5221 
5222 	ajDebug(" }\n");
5223 
5224 	ajDebug("%d End: {", iali);
5225 
5226 	for(i=0; i < nseq; i++)
5227 	    ajDebug(" %d", data->End[i]);
5228 
5229 	ajDebug(" }\n");
5230 
5231 	ajDebug("%d Len: {", iali);
5232 
5233 	for(i=0; i < nseq; i++)
5234 	    ajDebug(" %d", data->Len[i]);
5235 
5236 	ajDebug(" }\n");
5237 
5238 	ajDebug("%d Offset: {", iali);
5239 
5240 	for(i=0; i < nseq; i++)
5241 	    ajDebug(" %d", data->Offset[i]);
5242 
5243 	ajDebug(" }\n");
5244 
5245 	ajDebug("%d SubOffset: {", iali);
5246 
5247 	for(i=0; i < nseq; i++)
5248 	    ajDebug(" %d", data->SubOffset[i]);
5249 
5250 	ajDebug(" }\n");
5251 
5252 	ajDebug("%d Rev: {", iali);
5253 
5254 	for(i=0; i < nseq; i++)
5255 	    ajDebug(" %b", data->Rev[i]);
5256 
5257 	ajDebug(" }\n");
5258 
5259 	ajDebug("%d Seq: {\n", iali);
5260 
5261 	for(i=0; i < nseq; i++)
5262 	    if(ajSeqGetLen(data->Seq[i]) > 40)
5263 	    {
5264 		ajStrAssignSubS(&tmpstr, ajSeqGetSeqS(data->Seq[i]), -20, -1);
5265 		ajDebug("%6d [%d] '%20.20S...%20S'\n",
5266 			ajSeqGetLen(data->Seq[i]), i,
5267 			ajSeqGetSeqS(data->Seq[i]),
5268 			tmpstr);
5269 	    }
5270 	    else
5271 		ajDebug("  %d '%S'\n", i, ajSeqGetSeqS(data->Seq[i]));
5272 
5273 	ajDebug("  }\n");
5274     }
5275     AJFREE(pdata);
5276     ajStrDel(&tmpstr);
5277 
5278     return;
5279 }
5280 
5281 
5282 
5283 
5284 /* @func ajAlignPrintFormat ***************************************************
5285 **
5286 ** Reports the internal data structures
5287 **
5288 ** @param [u] outf [AjPFile] Output file
5289 ** @param [r] full [AjBool] Full report (usually ajFalse)
5290 ** @return [void]
5291 **
5292 ** @release 2.5.0
5293 ** @@
5294 ******************************************************************************/
5295 
ajAlignPrintFormat(AjPFile outf,AjBool full)5296 void ajAlignPrintFormat(AjPFile outf, AjBool full)
5297 {
5298     ajint i = 0;
5299 
5300     ajFmtPrintF(outf, "\n");
5301     ajFmtPrintF(outf, "# Alignment output formats\n");
5302     ajFmtPrintF(outf, "# Name    Format name (or alias)\n");
5303     ajFmtPrintF(outf, "# Minseq  Minimum number of sequences\n");
5304     ajFmtPrintF(outf, "# Maxseq  Minimum number of sequences\n");
5305     ajFmtPrintF(outf, "# Nuc     Valid for nucleotide sequences\n");
5306     ajFmtPrintF(outf, "# Pro     Valid for protein sequences\n");
5307     ajFmtPrintF(outf, "# Header  Include standard header/footer blocks\n");
5308     ajFmtPrintF(outf, "# Desc    Format description\n");
5309     ajFmtPrintF(outf, "# Name         Alias Nuc Nuc Pro Minseq Maxseq "
5310 		"Description\n");
5311     ajFmtPrintF(outf, "\n");
5312     ajFmtPrintF(outf, "AFormat {\n");
5313 
5314     for(i=0; alignFormat[i].Name; i++)
5315     {
5316 	if(full || !alignFormat[i].Alias)
5317 	    ajFmtPrintF(outf, "  %-12s %5B %3B %3B %3B  %6d %6d \"%s\"\n",
5318 			alignFormat[i].Name,
5319 			alignFormat[i].Alias,
5320 			alignFormat[i].Nucleotide,
5321 			alignFormat[i].Protein,
5322 			alignFormat[i].Showheader,
5323 			alignFormat[i].Minseq,
5324 			alignFormat[i].Maxseq,
5325 			alignFormat[i].Desc);
5326     }
5327 
5328     ajFmtPrintF(outf, "}\n\n");
5329 
5330     return;
5331 }
5332 
5333 
5334 
5335 
5336 /* @func ajAlignPrintbookFormat ***********************************************
5337 **
5338 ** Reports the alignment format internals as Docbook text
5339 **
5340 ** @param [u] outf [AjPFile] Output file
5341 ** @return [void]
5342 **
5343 ** @release 6.2.0
5344 ** @@
5345 ******************************************************************************/
5346 
ajAlignPrintbookFormat(AjPFile outf)5347 void ajAlignPrintbookFormat(AjPFile outf)
5348 {
5349     ajint i = 0;
5350     ajint j = 0;
5351     AjPStr namestr = NULL;
5352     AjPList fmtlist;
5353     AjPStr* names;
5354 
5355     fmtlist = ajListstrNew();
5356 
5357     ajFmtPrintF(outf, "<para>The supported alignment formats are summarised "
5358                 "in the table below. The columns are as follows: "
5359                 "<emphasis>Output format</emphasis> (format name), "
5360                 "<emphasis>Nuc</emphasis> (\"true\" indicates nucleotide "
5361                 "sequence data may be represented), "
5362                 "<emphasis>Pro</emphasis> (\"true\" indicates protein "
5363                 "sequence data may be represented, "
5364                 "<emphasis>Header</emphasis> (whether the standard EMBOSS "
5365                 "alignment header is included), "
5366                 "<emphasis>Minseq</emphasis> "
5367                 "(minimum sequences in alignment), "
5368                 "<emphasis>Maxseq</emphasis> "
5369                 "(maximum sequences in alignment) and "
5370                 "<emphasis>Description</emphasis> (short description of "
5371                 "the format).</para> \n\n");
5372 
5373     ajFmtPrintF(outf, "<table frame=\"box\" rules=\"cols\">\n");
5374     ajFmtPrintF(outf, "  <caption>Alignment formats</caption>\n");
5375     ajFmtPrintF(outf, "  <thead>\n");
5376     ajFmtPrintF(outf, "    <tr align=\"center\">\n");
5377     ajFmtPrintF(outf, "      <th>Output Format</th>\n");
5378     ajFmtPrintF(outf, "      <th>Nuc</th>\n");
5379     ajFmtPrintF(outf, "      <th>Pro</th>\n");
5380     ajFmtPrintF(outf, "      <th>Header</th>\n");
5381     ajFmtPrintF(outf, "      <th>Minseq</th>\n");
5382     ajFmtPrintF(outf, "      <th>Maxseq</th>\n");
5383     ajFmtPrintF(outf, "      <th>Description</th>\n");
5384     ajFmtPrintF(outf, "    </tr>\n");
5385     ajFmtPrintF(outf, "  </thead>\n");
5386     ajFmtPrintF(outf, "  <tbody>\n");
5387 
5388     for(i=1; alignFormat[i].Name; i++)
5389     {
5390 	if(!alignFormat[i].Alias)
5391         {
5392             namestr = ajStrNewC(alignFormat[i].Name);
5393             ajListPush(fmtlist, namestr);
5394             namestr = NULL;
5395         }
5396     }
5397 
5398     ajListSort(fmtlist, &ajStrVcmp);
5399     ajListstrToarray(fmtlist, &names);
5400 
5401     for(i=0; names[i]; i++)
5402     {
5403         for(j=0; alignFormat[j].Name; j++)
5404         {
5405             if(ajStrMatchC(names[i],alignFormat[j].Name))
5406             {
5407                 ajFmtPrintF(outf, "    <tr>\n");
5408                 ajFmtPrintF(outf, "      <td>%s</td>\n",
5409                             alignFormat[j].Name);
5410                 ajFmtPrintF(outf, "      <td>%B</td>\n",
5411                             alignFormat[j].Nucleotide);
5412                 ajFmtPrintF(outf, "      <td>%B</td>\n",
5413                             alignFormat[j].Protein);
5414                 ajFmtPrintF(outf, "      <td>%B</td>\n",
5415                             alignFormat[j].Showheader);
5416                 ajFmtPrintF(outf, "      <td>%d</td>\n",
5417                             alignFormat[j].Minseq);
5418                 ajFmtPrintF(outf, "      <td>%d</td>\n",
5419                             alignFormat[j].Maxseq);
5420                 ajFmtPrintF(outf, "      <td>%s</td>\n",
5421                             alignFormat[j].Desc);
5422                 ajFmtPrintF(outf, "    </tr>\n");
5423             }
5424         }
5425     }
5426 
5427     ajFmtPrintF(outf, "  </tbody>\n");
5428     ajFmtPrintF(outf, "</table>\n");
5429     ajStrDel(&namestr);
5430 
5431     names = NULL;
5432     ajListstrFreeData(&fmtlist);
5433 
5434     return;
5435 }
5436 
5437 
5438 
5439 
5440 /* @func ajAlignPrinthtmlFormat ***********************************************
5441 **
5442 ** Reports the internal data structures
5443 **
5444 ** @param [u] outf [AjPFile] Output file
5445 ** @return [void]
5446 **
5447 ** @release 6.4.0
5448 ** @@
5449 ******************************************************************************/
5450 
ajAlignPrinthtmlFormat(AjPFile outf)5451 void ajAlignPrinthtmlFormat(AjPFile outf)
5452 {
5453     ajint i = 0;
5454 
5455     ajFmtPrintF(outf, "<table border=3>");
5456     ajFmtPrintF(outf, "<tr><th>Alignment Format</th><th>Alias</th>\n");
5457     ajFmtPrintF(outf, "<th>Nuc</th><th>Pro</th><th>Showheader</th>\n");
5458     ajFmtPrintF(outf, "<th>Minseq</th><th>Maxseq</th>\n");
5459     ajFmtPrintF(outf, "<th>Description</th></tr>\n");
5460     ajFmtPrintF(outf, "\n");
5461     ajFmtPrintF(outf, "# alignment output formats\n");
5462     ajFmtPrintF(outf, "# Name    Format name (or alias)\n");
5463     ajFmtPrintF(outf, "# Minseq  Minimum number of sequences\n");
5464     ajFmtPrintF(outf, "# Maxseq  Minimum number of sequences\n");
5465     ajFmtPrintF(outf, "# Nuc     Valid for nucleotide sequences\n");
5466     ajFmtPrintF(outf, "# Pro     Valid for protein sequences\n");
5467     ajFmtPrintF(outf, "# Header  Include standard header/footer blocks\n");
5468     ajFmtPrintF(outf, "# Desc    Format description\n");
5469     ajFmtPrintF(outf, "# Name         Alias Nuc Nuc Pro Minseq Maxseq "
5470 		"Description\n");
5471     ajFmtPrintF(outf, "\n");
5472     ajFmtPrintF(outf, "AFormat {\n");
5473 
5474     for(i=0; alignFormat[i].Name; i++)
5475     {
5476 	if(!alignFormat[i].Alias)
5477 	    ajFmtPrintF(outf, "<tr><td>\n%-12s\n</td><td>%5B</td><td>%3B</td>"
5478                         "<td>%3B</td><td>%3B</td><td>%6d</td><td>%6d</td>"
5479                         "<td>\"%s\"</td></tr>\n",
5480 			alignFormat[i].Name,
5481 			alignFormat[i].Alias,
5482 			alignFormat[i].Nucleotide,
5483 			alignFormat[i].Protein,
5484 			alignFormat[i].Showheader,
5485 			alignFormat[i].Minseq,
5486 			alignFormat[i].Maxseq,
5487 			alignFormat[i].Desc);
5488     }
5489 
5490     ajFmtPrintF(outf, "}\n\n");
5491 
5492     return;
5493 }
5494 
5495 
5496 
5497 
5498 /* @func ajAlignPrintwikiFormat ***********************************************
5499 **
5500 ** Reports the alignment format internals as wikitext
5501 **
5502 ** @param [u] outf [AjPFile] Output file
5503 ** @return [void]
5504 **
5505 ** @release 6.2.0
5506 ** @@
5507 ******************************************************************************/
5508 
ajAlignPrintwikiFormat(AjPFile outf)5509 void ajAlignPrintwikiFormat(AjPFile outf)
5510 {
5511     ajint i = 0;
5512     ajint j = 0;
5513     AjPStr namestr = NULL;
5514 
5515     ajFmtPrintF(outf, "{| class=\"wikitable sortable\" border=\"2\"\n");
5516     ajFmtPrintF(outf, "|-\n");
5517     ajFmtPrintF(outf, "!Format!!Nuc!!Pro!!Header!!Min||Max!!"
5518                 "class=\"unsortable\"|Description\n");
5519 
5520     for(i=1; alignFormat[i].Name; i++)
5521     {
5522         if(!alignFormat[i].Alias)
5523         {
5524             ajFmtPrintF(outf, "|-\n");
5525             ajStrAssignC(&namestr, alignFormat[i].Name);
5526 
5527 
5528             for(j=i+1; alignFormat[j].Name; j++)
5529             {
5530                 if(alignFormat[j].Write == alignFormat[i].Write)
5531                 {
5532                     ajFmtPrintAppS(&namestr, "<br>%s", alignFormat[j].Name);
5533                     if(!alignFormat[j].Alias)
5534                     {
5535                         ajWarn("Align output format '%s' same as '%s' "
5536                                "but not alias",
5537                                alignFormat[j].Name,
5538                                alignFormat[i].Name);
5539                     }
5540                 }
5541             }
5542             ajFmtPrintF(outf, "|%S||%B||%B||%B||%d||%d||%s\n",
5543 			namestr,
5544 			alignFormat[i].Nucleotide,
5545 			alignFormat[i].Protein,
5546 			alignFormat[i].Showheader,
5547 			alignFormat[i].Minseq,
5548 			alignFormat[i].Maxseq,
5549 			alignFormat[i].Desc);
5550         }
5551     }
5552 
5553     ajFmtPrintF(outf, "|}\n");
5554 
5555     ajStrDel(&namestr);
5556 
5557     return;
5558 }
5559 
5560 
5561 
5562 
5563 /* @funcstatic alignConsSet ***************************************************
5564 **
5565 ** Sets consensus to be a copy of a numbered sequence
5566 **
5567 ** @param [r] thys [const AjPAlign] Alignment object
5568 ** @param [r] iali [ajint] alignment number
5569 ** @param [r] numseq [ajint] Sequence number to use as the consensus
5570 ** @param [w] cons [AjPStr*] the created consensus sequence
5571 ** @return [AjBool] True on success
5572 **
5573 ** @release 5.0.0
5574 ******************************************************************************/
5575 
alignConsSet(const AjPAlign thys,ajint iali,ajint numseq,AjPStr * cons)5576 static AjBool alignConsSet(const AjPAlign thys, ajint iali,
5577 			   ajint numseq, AjPStr *cons)
5578 {
5579     ajint   nseqs;
5580     ajint   mlen;
5581 
5582     ajint   kkpos;		/* alignment position loop variable */
5583 
5584     const char *seqcharptr;
5585     char gapch;
5586     AlignPData data = NULL;
5587     const AjPSeq* seqs;
5588     const AjPSeq seq;
5589 
5590     data = alignData(thys, iali);
5591     seqs = alignSeqs(thys, iali);
5592 
5593     nseqs   = thys->Nseqs;
5594     mlen    = data->LenAli;
5595 
5596     ajDebug("alignConsSet iali:%d numseq:%d nseqs:%d mlen:%d\n",
5597 	    iali, numseq, nseqs, mlen);
5598 
5599     gapch = '-';
5600 
5601     seq = seqs[numseq];
5602     seqcharptr = ajSeqGetSeqC(seq);
5603 
5604     ajStrAssignClear(cons);
5605 
5606     /* For each position in the alignment, calculate consensus character */
5607 
5608     for(kkpos=0; kkpos<data->SubOffset[0]; kkpos++)
5609 	ajStrAppendK(cons, gapch);
5610 
5611     for(kkpos=0; kkpos< mlen; kkpos++)
5612 	ajStrAppendK(cons,seqcharptr[kkpos+data->SubOffset[numseq]]);
5613 
5614     /* ajDebug("ret ident:%d sim:%d gap:%d len:%d\n",
5615 	    *retident, *retsim, *retgap, *retlen); */
5616 
5617     return ajTrue;
5618 }
5619 
5620 
5621 
5622 
5623 /* @func ajAlignConsAmbig *****************************************************
5624 **
5625 ** Calculates alignment consensus of ambiguity characters from a sequence set.
5626 **
5627 ** @param [r] thys [const AjPSeqset] Sequence set.
5628 ** @param [w] cons [AjPStr*] the created consensus sequence
5629 ** @return [AjBool] ajTrue on success
5630 **
5631 ** @release 6.0.0
5632 ******************************************************************************/
5633 
ajAlignConsAmbig(const AjPSeqset thys,AjPStr * cons)5634 AjBool ajAlignConsAmbig(const AjPSeqset thys, AjPStr *cons)
5635 {
5636     if(ajSeqsetIsNuc(thys))
5637         return ajAlignConsAmbigNuc(thys, cons);
5638 
5639     return ajAlignConsAmbigProt(thys, cons);
5640 }
5641 
5642 
5643 
5644 
5645 /* @func ajAlignConsAmbigNuc **************************************************
5646 **
5647 ** Calculates alignment consensus of ambiguity characters from a sequence set.
5648 **
5649 ** @param [r] thys [const AjPSeqset] Sequence set.
5650 ** @param [w] cons [AjPStr*] the created consensus sequence
5651 ** @return [AjBool] ajTrue on success
5652 **
5653 ** @release 6.0.0
5654 ******************************************************************************/
5655 
ajAlignConsAmbigNuc(const AjPSeqset thys,AjPStr * cons)5656 AjBool ajAlignConsAmbigNuc(const AjPSeqset thys, AjPStr *cons)
5657 {
5658     ajint   iseq;	     /* iterate over sequences (outer loop) */
5659 
5660     ajint   nseqs;
5661     ajint   mlen;
5662     AjBool  isgap;
5663     ajint   kipos;	   /* alignment position */
5664 
5665     const char **seqcharptr;
5666     char res;
5667     ajuint binres;
5668 
5669     void *freeptr = NULL;
5670 
5671     nseqs   = thys->Size;
5672     mlen    = thys->Len;
5673 
5674     AJCNEW(seqcharptr,nseqs);
5675 
5676     /* ajDebug("fplural:%.2f ident:%.1f mlen: %d\n",
5677 	    fplural, ident, mlen); */
5678 
5679     for(iseq=0;iseq<nseqs;iseq++)	/* get sequence as string */
5680 	seqcharptr[iseq] =  ajSeqsetGetseqSeqC(thys, iseq);
5681 
5682     /* For each position in the alignment, calculate consensus character */
5683 
5684     for(kipos=0; kipos< mlen; kipos++)
5685     {
5686         binres = 0;
5687         isgap = ajFalse;
5688 
5689 	for(iseq=0;iseq<nseqs;iseq++)
5690 	{
5691 	    if(seqcharptr[iseq][kipos] == ' ' ||
5692 	       seqcharptr[iseq][kipos] == '-')
5693                 isgap = ajTrue;
5694 
5695             binres |= ajBaseAlphaToBin(seqcharptr[iseq][kipos]);
5696 	}
5697 
5698         res = ajBaseBinToAlpha(binres);
5699 
5700         if(isgap)
5701             res = (char)tolower((int)res);
5702 
5703 	ajStrAppendK(cons,res);
5704     }
5705 
5706     freeptr = (void *) seqcharptr;
5707     AJFREE(freeptr);
5708 
5709     return ajTrue;
5710 }
5711 
5712 
5713 
5714 
5715 /* @func ajAlignConsAmbigProt *************************************************
5716 **
5717 ** Calculates alignment consensus of ambiguity characters from a sequence set.
5718 **
5719 ** @param [r] thys [const AjPSeqset] Sequence set.
5720 ** @param [w] cons [AjPStr*] the created consensus sequence
5721 ** @return [AjBool] ajTrue on success
5722 **
5723 ** @release 6.0.0
5724 ******************************************************************************/
5725 
ajAlignConsAmbigProt(const AjPSeqset thys,AjPStr * cons)5726 AjBool ajAlignConsAmbigProt(const AjPSeqset thys, AjPStr *cons)
5727 {
5728     ajint   iseq;	     /* iterate over sequences (outer loop) */
5729 
5730     ajint   nseqs;
5731     ajint   mlen;
5732     AjBool  isgap;
5733     ajint   kipos;	   /* alignment position */
5734 
5735     const char **seqcharptr;
5736     char res;
5737     ajuint binres;
5738 
5739     void *freeptr = NULL;
5740 
5741     nseqs   = thys->Size;
5742     mlen    = thys->Len;
5743 
5744     AJCNEW(seqcharptr,nseqs);
5745 
5746     /* ajDebug("fplural:%.2f ident:%.1f mlen: %d\n",
5747 	    fplural, ident, mlen); */
5748 
5749     for(iseq=0;iseq<nseqs;iseq++)	/* get sequence as string */
5750 	seqcharptr[iseq] =  ajSeqsetGetseqSeqC(thys, iseq);
5751 
5752     /* For each position in the alignment, calculate consensus character */
5753 
5754     for(kipos=0; kipos< mlen; kipos++)
5755     {
5756         binres = 0;
5757         isgap = ajFalse;
5758 
5759 	for(iseq=0;iseq<nseqs;iseq++)
5760 	{
5761 	    if(seqcharptr[iseq][kipos] == ' ' ||
5762 	       seqcharptr[iseq][kipos] == '-')
5763                 isgap = ajTrue;
5764             binres |= ajResidueAlphaToBin(seqcharptr[iseq][kipos]);
5765 	}
5766 
5767         res = ajResidueBinToAlpha(binres);
5768 
5769         if(isgap)
5770             res = (char)tolower((int)res);
5771 
5772 	ajStrAppendK(cons,res);
5773     }
5774 
5775     freeptr = (void *) seqcharptr;
5776     AJFREE(freeptr);
5777 
5778     return ajTrue;
5779 }
5780 
5781 
5782 
5783 
5784 /* @func ajAlignConsStats *****************************************************
5785 **
5786 ** Calculates alignment statistics (and a consensus) from a sequence set.
5787 **
5788 ** @param [r] thys [const AjPSeqset] Sequence set.
5789 ** @param [w] mymatrix [AjPMatrix] User-defined matrix, or NULL for the default
5790 ** @param [w] cons [AjPStr*] the created consensus sequence
5791 ** @param [w] retident [ajint*] number of residues identical in all sequences
5792 ** @param [w] retsim   [ajint*] number of residues similar in all sequences
5793 ** @param [w] retgap   [ajint*] number of residues with a gap in 1 sequence
5794 ** @param [w] retlen [ajint*] length of the alignment
5795 ** @return [AjBool] ajTrue on success
5796 **
5797 ** @release 2.9.0
5798 ******************************************************************************/
5799 
ajAlignConsStats(const AjPSeqset thys,AjPMatrix mymatrix,AjPStr * cons,ajint * retident,ajint * retsim,ajint * retgap,ajint * retlen)5800 AjBool ajAlignConsStats(const AjPSeqset thys, AjPMatrix mymatrix, AjPStr *cons,
5801 			 ajint* retident, ajint* retsim, ajint* retgap,
5802 			 ajint* retlen)
5803 {
5804     AjPMatrix imatrix = NULL;
5805     AjPStr matname    = NULL;
5806     ajint   imat;     /* iterate over identical and matching arrays */
5807     ajint   iseq;	     /* iterate over sequences (outer loop) */
5808     ajint   jseq;	     /* iterate over sequences (inner loop) */
5809 
5810     ajint   **matrix  = NULL;
5811     float   **fmatrix = NULL;
5812     ajint   m1 = 0;
5813     ajint   m2 = 0;
5814     ajint   matsize;
5815     ajint   matchingmaxindex;
5816     ajint   identicalmaxindex;
5817     ajint   nseqs;
5818     ajint   mlen;
5819 
5820     float   max;
5821     float   contri = 0;
5822     float   contrj = 0;
5823 
5824     AjPSeqCvt cvt = 0;
5825     AjPFloat  posScore=NULL; /* cumulative similarity scores by sequence */
5826     /* for matching all other sequences */
5827     float   *identical;	    /* cum. weight for each valid character */
5828     float   *matching;	 /* cum. weight for matching this character */
5829     ajint   highindex;	   /* position of highest score in posScore */
5830     ajint   kkpos;		/* alignment position loop variable */
5831     ajint   kipos;	   /* alignment position kkpos + iseq start */
5832     ajint   kjpos;	   /* alignment position kkpos + jseq start */
5833     ajint   khpos;		 /* alignment position in highindex */
5834 
5835     float himatch = 0.0;	/* highest match score (often used) */
5836 
5837     const char **seqcharptr;
5838     char res;
5839     char nocon;
5840     char gapch;
5841     float fplural;
5842     float fplurality = 51.0;
5843     float ident;
5844     AjBool isident;
5845     AjBool issim;
5846     AjBool isgap;
5847     const AjPSeq* seqs;
5848     ajint numres;		 /* number of residues (not spaces) */
5849     void *freeptr;
5850 
5851     if(mymatrix)
5852 	imatrix = mymatrix;
5853 
5854     if(!imatrix)
5855     {
5856 	if(ajSeqsetIsNuc(thys))
5857 	    ajStrAssignC(&matname, "EDNAFULL");
5858 	else
5859 	    ajStrAssignC(&matname, "EBLOSUM62");
5860 	imatrix = ajMatrixNewFile(matname);
5861     }
5862 
5863     ajStrDel(&matname);
5864 
5865     *retident = 0;
5866     *retsim   = 0;
5867     *retgap   = 0;
5868 
5869     nseqs   = thys->Size;
5870     mlen    = thys->Len;
5871     fplural = ajSeqsetGetTotweight(thys) * fplurality / (float)100.;
5872     ident   = ajSeqsetGetTotweight(thys);
5873 
5874     /* ajDebug("fplural:%.2f ident:%.1f mlen: %d\n",
5875 	    fplural, ident, mlen); */
5876 
5877     matrix  = ajMatrixGetMatrix(imatrix);
5878     cvt     = ajMatrixGetCvt(imatrix);	/* return conversion table */
5879     matsize = ajMatrixGetSize(imatrix);
5880 
5881     AJCNEW(seqs,nseqs);
5882     AJCNEW(seqcharptr,nseqs);
5883     AJCNEW(identical,matsize);
5884     AJCNEW(matching,matsize);
5885 
5886     posScore = ajFloatNew();
5887 
5888     gapch = '-';
5889     nocon = 'x';
5890 
5891     for(iseq=0;iseq<nseqs;iseq++)	/* get sequence as string */
5892     {
5893 	seqcharptr[iseq] =  ajSeqsetGetseqSeqC(thys, iseq);
5894 	seqs[iseq]       =  ajSeqsetGetseqSeq(thys, iseq);
5895     }
5896 
5897     /* For each position in the alignment, calculate consensus character */
5898 
5899     for(kkpos=0; kkpos< mlen; kkpos++)
5900     {
5901 	res = gapch;
5902 
5903 	isident = ajFalse;
5904 	issim   = ajFalse;
5905 	isgap   = ajFalse;
5906 
5907 	/*
5908 	 ** reset identities and +ve matches
5909 	 */
5910 
5911 	for(imat=0;imat<matsize;imat++)
5912 	{
5913 	    identical[imat] = 0.0; /* weights of all sequence chars in col. */
5914 	    matching[imat] = 0.0;
5915 	}
5916 
5917 	/*
5918 	 ** reset the posScore array
5919 	 */
5920 
5921 	for(iseq=0;iseq<nseqs;iseq++)
5922 	    ajFloatPut(&posScore,iseq,0.);
5923 
5924 	/*
5925 	 ** generate scores (identical, posScore) for columns
5926 	 */
5927 
5928 	for(iseq=0;iseq<nseqs;iseq++)
5929 	{
5930 	    kipos = kkpos;
5931 	    m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[iseq][kipos]);
5932 
5933 	    if(m1)
5934 		identical[m1] += seqs[iseq]->Weight;
5935 
5936 	    for(jseq=iseq+1;jseq<nseqs;jseq++)
5937 	    {
5938 		kjpos = kkpos;
5939 		m2 = ajSeqcvtGetCodeK(cvt,seqcharptr[jseq][kjpos]);
5940 
5941 		if(m1 && m2)
5942 		{
5943 		    if(matrix)
5944 		    {
5945 			contri = (float)matrix[m1][m2]*seqs[jseq]->Weight
5946 			  +ajFloatGet(posScore,iseq);
5947 			contrj = (float)matrix[m1][m2]*seqs[iseq]->Weight
5948 			  +ajFloatGet(posScore,jseq);
5949 		    }
5950 		    else
5951 		    {
5952 			contri = fmatrix[m1][m2]*seqs[jseq]->Weight
5953 			  +ajFloatGet(posScore,iseq);
5954 			contrj = fmatrix[m1][m2]*seqs[iseq]->Weight
5955 			  +ajFloatGet(posScore,jseq);
5956 		    }
5957 
5958 		    ajFloatPut(&posScore,iseq,contri);
5959 		    ajFloatPut(&posScore,jseq,contrj);
5960 		}
5961 	    }
5962 	}
5963 
5964 	/*
5965 	 ** highindex is the highest scoring position (seq no.) in posScore
5966 	 ** for 2 sequences this appears to be usually 0
5967 	 */
5968 
5969 	highindex = -1;
5970 	max       = -FLT_MAX;
5971 	numres    = 0;
5972 
5973 	for(iseq=0;iseq<nseqs;iseq++)
5974 	{
5975 	    kipos = kkpos;
5976 
5977             if(seqcharptr[iseq][kipos] != ' ' &&
5978 	       seqcharptr[iseq][kipos] != '-')
5979 		numres++;
5980 
5981 	    if(ajFloatGet(posScore,iseq) > max)
5982 	    {
5983 		highindex = iseq;
5984 		max       = ajFloatGet(posScore,iseq);
5985 	    }
5986 	}
5987 
5988 	/* highindex is now set */
5989 
5990 	/*
5991 	 ** find +ve matches in the column
5992 	 ** m1 is non-zero for a valid character in iseq
5993 	 ** m2 is non-zero for a valid character in jseq
5994 	 */
5995 
5996 	for(iseq=0;iseq<nseqs;iseq++)
5997 	{
5998 	    kipos = kkpos;
5999 	    m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[iseq][kipos]);
6000 
6001 	    if(E_FPZERO(matching[m1],U_FEPS))
6002 	    {   /* first time we have met this character */
6003 		for(jseq=0;jseq<nseqs;jseq++) /* all (other) sequences */
6004 		{
6005 		    kjpos = kkpos;
6006 		    m2    = ajSeqcvtGetCodeK(cvt, seqcharptr[jseq][kjpos]);
6007 
6008 		    if(matrix)
6009 		    {
6010 			if(m1 && m2 && matrix[m1][m2] > 0)
6011 			{		/* 'matching' if positive */
6012 			    matching[m1] += seqs[jseq]->Weight;
6013 			}
6014 		    }
6015 		    else
6016 		    {
6017 			if(m1 && m2 && fmatrix[m1][m2] > 0.0)
6018 			{
6019 			    matching[m1] += seqs[jseq]->Weight;
6020 			}
6021 		    }
6022 
6023 		}
6024 	    }
6025 	}
6026 
6027 	matchingmaxindex  = 0;	  /* get max matching and identical */
6028 	identicalmaxindex = 0;
6029 
6030 	for(iseq=0;iseq<nseqs;iseq++)
6031 	{
6032 	    kipos = kkpos;
6033 	    m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[iseq][kipos]);
6034 
6035 	    if(identical[m1] > identical[identicalmaxindex])
6036 		identicalmaxindex= m1;
6037 	}
6038 
6039 	for(iseq=0;iseq<nseqs;iseq++)
6040 	{
6041 	    kipos = kkpos;
6042 	    m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[iseq][kipos]);
6043 
6044 	    if(matching[m1] > matching[matchingmaxindex])
6045 	    {
6046 		matchingmaxindex= m1;
6047 	    }
6048 	    else if(E_FPEQ(matching[m1],matching[matchingmaxindex],U_FEPS))
6049 	    {
6050 		if(identical[m1] > identical[matchingmaxindex])
6051 		    matchingmaxindex= m1;
6052 	    }
6053 
6054 	    if(seqcharptr[iseq][kipos] == '-' ||
6055 	       seqcharptr[iseq][kipos] == ' ')
6056 		isgap=ajTrue;
6057 	}
6058 
6059 	khpos = kkpos;
6060 	himatch = matching[ajSeqcvtGetCodeK(cvt,seqcharptr[highindex][khpos])];
6061 
6062 	if(identical[identicalmaxindex] >= ident)
6063             isident=ajTrue;
6064 
6065 	if(matching[matchingmaxindex] >= fplural)
6066             issim=ajTrue;
6067 
6068 	/* plurality check */
6069 	res = gapch;
6070 	if(himatch >= fplural)
6071 	    if(seqcharptr[highindex][khpos] != '-')
6072 		res = toupper((int)seqcharptr[highindex][khpos]);
6073 
6074 	if(nseqs > 1 && E_FPEQ(himatch,seqs[highindex]->Weight,U_FEPS))
6075 	{
6076 	    if(numres > 1)
6077 		res = nocon;
6078 	    else
6079 		res = gapch;
6080 	}
6081 
6082 	if(issim && ! isident)
6083 	    res = tolower((int)res);
6084 
6085 	ajStrAppendK(cons,res);
6086 	if(isident) ++*retident;
6087 	if(issim) ++*retsim;
6088 	if(isgap) ++*retgap;
6089 
6090 	/* ajDebug("id:%b sim:%b gap:%b res:%c '",
6091 	           isident, issim, isgap, res); */
6092 	for(iseq=0; iseq<nseqs; iseq++)
6093 	{
6094 	    kipos = kkpos;
6095 	    /* ajDebug("%c", seqcharptr[iseq][kipos]); */
6096 	}
6097 	/* ajDebug("'\n");	 */
6098     }
6099 
6100     *retlen = ajSeqsetGetLen(thys);
6101 
6102     /* ajDebug("ret ident:%d sim:%d gap:%d len:%d\n",
6103 	    *retident, *retsim, *retgap, *retlen); */
6104 
6105 
6106     freeptr = (void *) seqs;
6107     AJFREE(freeptr);
6108 
6109     freeptr = (void *) seqcharptr;
6110     AJFREE(freeptr);
6111 
6112     AJFREE(matching);
6113     AJFREE(identical);
6114     ajFloatDel(&posScore);
6115     ajMatrixDel(&imatrix);
6116 
6117     return ajTrue;
6118 }
6119 
6120 
6121 
6122 
6123 /* @funcstatic alignCigar *****************************************************
6124 **
6125 ** Return CIGAR string for a pairwise alignment, as well as match region
6126 ** of the query sequence and the number of mismatches
6127 **
6128 ** @param [r] qryseq [const AjPSeq] query sequence
6129 ** @param [r] refseq [const AjPSeq] reference sequence
6130 ** @param [r] qstart [ajint] query sequence start point
6131 ** @param [r] qend [ajint] query sequence end point
6132 ** @param [r] rstart [ajint] reference sequence start point
6133 ** @param [w] qseq [AjPStr*] query sequence
6134 ** @param [w] cigar [AjPStr*] CIGAR string
6135 ** @param [r] seqexternal [AjBool] Sequences are external references,
6136 **              ignore their offsets and use qstart and qend
6137 ** @return [ajint] number of mismatches
6138 **
6139 ** @release 6.3.0
6140 ******************************************************************************/
6141 
alignCigar(const AjPSeq qryseq,const AjPSeq refseq,ajint qstart,ajint qend,ajint rstart,AjPStr * qseq,AjPStr * cigar,AjBool seqexternal)6142 static ajint alignCigar(const AjPSeq qryseq, const AjPSeq refseq,
6143 	                ajint qstart, ajint qend,
6144 	                ajint rstart,
6145 	                AjPStr *qseq, AjPStr* cigar, AjBool seqexternal)
6146 {
6147     const char* qryseqc;
6148     const char* refseqc;
6149 
6150     static char gapchars[] = "-~.? "; /* all known gap characters */
6151 
6152     ajint m=0;
6153     ajint i=0; /* insertions to the reference sequence */
6154     ajint d=0; /* deletions from the reference sequence */
6155     ajuint qoffset=0;
6156     ajuint qoffend=0;
6157     ajint nmismatches=0;
6158     ajint qseqlen;
6159 
6160     qseqlen = ajSeqGetLen(qryseq);
6161     qryseqc = ajSeqGetSeqC(qryseq);
6162     refseqc = ajSeqGetSeqC(refseq);
6163 
6164     if(seqexternal)
6165     {
6166 	qoffset = qstart;
6167 	qoffend = qend;
6168     }
6169     else
6170     {
6171 	qoffset = ajSeqGetOffset(qryseq);
6172 	qoffend = ajSeqGetOffend(qryseq);
6173     }
6174 
6175     if(ajSeqIsReversed(qryseq)&&qoffend>0)
6176 	ajFmtPrintAppS(cigar, "%dH",qoffend);
6177     else
6178 	if(qoffset>0)
6179 	    ajFmtPrintAppS(cigar, "%dH",qoffset);
6180 
6181 
6182     for(; qstart<qend; qstart++, rstart++)
6183     {
6184 	if(strchr(gapchars,qryseqc[qstart]))
6185 	{
6186 	    d++;
6187 
6188 	    if(m>0)
6189 	    {
6190 		ajFmtPrintAppS(cigar, "%uM",m);
6191 		m=0;
6192 	    }
6193 
6194 	    if(i>0)
6195 	    {
6196 		ajFmtPrintAppS(cigar, "%uI",d);
6197 		i=0;
6198 	    }
6199 	}
6200 	else if(strchr(gapchars,refseqc[rstart]))
6201 	{
6202 	    i++;
6203 
6204 	    if(m>0)
6205 	    {
6206 		ajFmtPrintAppS(cigar, "%uM",m);
6207 		m=0;
6208 	    }
6209 
6210 	    if(d>0)
6211 	    {
6212 		ajFmtPrintAppS(cigar, "%uD",i);
6213 		d=0;
6214 	    }
6215 
6216 	    ajStrAppendK(qseq,qryseqc[qstart]);
6217 
6218 	}
6219 	else /* match */
6220 	{
6221 	    m++;
6222 	    ajStrAppendK(qseq,qryseqc[qstart]);
6223 
6224 	    if(toupper((int)qryseqc[qstart])!=toupper((int)refseqc[rstart]))
6225 		nmismatches ++;
6226 
6227 	    if(d>0)
6228 	    {
6229 		ajFmtPrintAppS(cigar, "%uD",d);
6230 		d=0;
6231 
6232 	    }
6233 
6234 	    if(i>0)
6235 	    {
6236 		ajFmtPrintAppS(cigar, "%uI",i);
6237 		i=0;
6238 	    }
6239 	}
6240     }
6241 
6242     if(m>0)
6243 	ajFmtPrintAppS(cigar, "%uM",m);
6244     else if(d>0)
6245 	ajFmtPrintAppS(cigar, "%uD",d);
6246     else if(i>0)
6247 	ajFmtPrintAppS(cigar, "%uI",i);
6248 
6249     if(ajSeqIsReversed(qryseq)&&qend>qseqlen)
6250 	ajFmtPrintAppS(cigar, "%dH",qseqlen-qend);
6251     else if(qend<qseqlen)
6252 	    ajFmtPrintAppS(cigar, "%dH",qseqlen-qend);
6253 
6254     return nmismatches;
6255 }
6256 
6257 
6258 
6259 
6260 /* @func ajAlignExit **********************************************************
6261 **
6262 ** Cleans up report processing internal memory
6263 **
6264 ** @return [void]
6265 **
6266 ** @release 4.0.0
6267 ** @@
6268 ******************************************************************************/
6269 
ajAlignExit(void)6270 void ajAlignExit(void)
6271 {
6272     return;
6273 }
6274