1 /* @source ajassemwrite *******************************************************
2 **
3 ** AJAX assembly writing functions
4 **
5 ** @author Copyright (C) 2010 Peter Rice
6 ** @version $Revision: 1.50 $
7 ** @modified Oct 25 2010 pmr First AJAX version
8 ** @modified $Date: 2012/07/05 12:04:37 $ by $Author: rice $
9 ** @@
10 **
11 ** This library is free software; you can redistribute it and/or
12 ** modify it under the terms of the GNU Lesser General Public
13 ** License as published by the Free Software Foundation; either
14 ** version 2.1 of the License, or (at your option) any later version.
15 **
16 ** This library is distributed in the hope that it will be useful,
17 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 ** Lesser General Public License for more details.
20 **
21 ** You should have received a copy of the GNU Lesser General Public
22 ** License along with this library; if not, write to the Free Software
23 ** Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
24 ** MA 02110-1301, USA.
25 **
26 ******************************************************************************/
27
28
29 #include "ajlib.h"
30
31 #include "ajassem.h"
32 #include "ajassemwrite.h"
33 #include "ajseqbam.h"
34 #include "ajfileio.h"
35 #include "ajseq.h"
36 #include "ajutil.h"
37 #include "ajnam.h"
38 #include "ajbamindex.h"
39
40 #include <ctype.h>
41
42 #ifdef WIN32
43 #define strdup _strdup
44 #endif
45
46 static AjPStr assemoutSamLinetxt = NULL;
47
48 static AjBool assemoutWriteList(AjPFile outf, const AjPAssem assem);
49
50 static AjBool assemoutWriteBam(AjPFile outf, const AjPAssem assem);
51 static AjBool assemoutWriteMaf(AjPFile outf, const AjPAssem assem);
52 static AjBool assemoutWriteSam(AjPFile outf, const AjPAssem assem);
53 static AjBool assemoutWriteNextList(AjPOutfile outfile, const AjPAssem assem);
54 static AjBool assemoutWriteNextBam(AjPOutfile outfile, const AjPAssem assem);
55 static AjBool assemoutWriteNextMaf(AjPOutfile outfile, const AjPAssem assem);
56 static AjBool assemoutWriteNextSam(AjPOutfile outfile, const AjPAssem assem);
57 static AjBool assemoutWriteSamAlignment(AjPFile outf, const AjPAssemRead r,
58 AjPAssemContig const* contigs,
59 ajint ncontigs);
60 static AjBool assemoutWriteBamAlignment(AjPSeqBamBgzf gzfile,
61 const AjPAssemRead r,
62 AjPSeqBam bam);
63
64 static AjPStr assemoutMakeCigar(const char* contig, const char* readseq);
65
66 static const AjPStr assemSAMGetReadgroupHeaderlines(const AjPAssem assem);
67 static void assemoutBamIndex(AjPOutfile outf);
68
69
70
71
72 /* @datastatic AssemPOutFormat ************************************************
73 **
74 ** Assembly output formats data structure
75 **
76 ** @alias AssemSoutFormat
77 ** @alias AssemOOutFormat
78 **
79 ** @attr Name [const char*] Format name
80 ** @attr Desc [const char*] Format description
81 ** @attr Write [AjBool function] Output function, returns ajTrue on success
82 ** @attr WriteNext [AjBool function] Partial output function
83 ** @attr Cleanup [void function] Function to write remaining lines on closing
84 ** @@
85 ******************************************************************************/
86
87 typedef struct AssemSOutFormat
88 {
89 const char *Name;
90 const char *Desc;
91 AjBool (*Write) (AjPFile outf, const AjPAssem assem);
92 AjBool (*WriteNext) (AjPOutfile outfile, const AjPAssem assem);
93 void (*Cleanup) (AjPOutfile outf);
94 } AssemOOutFormat;
95
96 #define AssemPOutFormat AssemOOutFormat*
97
98
99 static AssemOOutFormat assemoutFormatDef[] =
100 {
101 /* "Name", "Description" */
102 /* WriteFunction WriteNextFunction */
103
104 {"bam", "BAM assembly format",
105 &assemoutWriteBam, &assemoutWriteNextBam, &assemoutBamIndex},
106 {"list", "Identifier only",
107 &assemoutWriteList, &assemoutWriteNextList, NULL},
108 {"maf", "MIRA assembly format",
109 &assemoutWriteMaf, &assemoutWriteNextMaf, NULL},
110 {"sam", "SAM assembly format",
111 &assemoutWriteSam, &assemoutWriteNextSam, NULL},
112
113 {NULL, NULL, NULL, NULL, NULL}
114 };
115
116
117
118
119 /* @filesection ajassem *******************************************************
120 **
121 ** @nam1rule aj Function belongs to the AJAX library.
122 **
123 ******************************************************************************/
124
125
126
127
128
129 /* @datasection [AjPFile] Assembly data output ********************************
130 **
131 ** Function is for manipulating assembly data objects
132 **
133 ** @nam2rule Assemout Assembly data output
134 **
135 ******************************************************************************/
136
137
138
139
140 /* @section assembly data outputs *********************************************
141 **
142 ** These functions write the assembly data provided by the first argument
143 **
144 ** @fdata [AjPFile]
145 **
146 ** @nam3rule Write Write assembly data
147 ** @nam4rule Format Use a named format
148 ** @nam4rule Next Write next block of data
149 **
150 ** @argrule Write outf [AjPOutfile] Output file
151 ** @argrule Write assem [const AjPAssem] Assembly data
152 ** @argrule Format fmt [const AjPStr] Format name
153 **
154 ** @valrule * [AjBool] true on success
155 **
156 ** @fcategory output
157 **
158 ******************************************************************************/
159
160
161
162
163 /* @func ajAssemoutWrite ******************************************************
164 **
165 ** Write assembly data in a named format
166 **
167 ** @param [u] outf [AjPOutfile] Output file
168 ** @param [r] assem [const AjPAssem] Assembly data object
169 **
170 ** @return [AjBool] True on success
171 **
172 **
173 ** @release 6.5.0
174 ******************************************************************************/
175
ajAssemoutWrite(AjPOutfile outf,const AjPAssem assem)176 AjBool ajAssemoutWrite(AjPOutfile outf, const AjPAssem assem)
177 {
178 ajuint i = ajOutfileGetFormatindex(outf);
179 AjPFile outfile = ajOutfileGetFile(outf);
180
181 ajDebug("ajAssemoutWrite %d\n",i);
182 return (*assemoutFormatDef[i].Write)(outfile, assem);
183 }
184
185
186
187
188 /* @func ajAssemoutWriteNext **************************************************
189 **
190 ** Write latest chunk of assembly data in a named format
191 **
192 ** @param [u] outf [AjPOutfile] Output file
193 ** @param [r] assem [const AjPAssem] Assembly data object
194 **
195 ** @return [AjBool] True on success
196 **
197 **
198 ** @release 6.5.0
199 ******************************************************************************/
200
ajAssemoutWriteNext(AjPOutfile outf,const AjPAssem assem)201 AjBool ajAssemoutWriteNext(AjPOutfile outf, const AjPAssem assem)
202 {
203 ajuint i = ajOutfileGetFormatindex(outf);
204
205 ajDebug("ajAssemoutWriteNext %d\n",i);
206 if(!outf->Cleanup && assemoutFormatDef[i].Cleanup)
207 outf->Cleanup = assemoutFormatDef[i].Cleanup;
208
209 return (*assemoutFormatDef[i].WriteNext)(outf, assem);
210 }
211
212
213
214
215 /* @funcstatic assemoutWriteBam ***********************************************
216 **
217 ** Writes an assembly in binary alignment/map (BAM) format.
218 **
219 ** The sort order is "unsorted". Samtools can re-sort the file.
220 **
221 ** @param [u] outf [AjPFile] Output file object.
222 ** @param [r] assem [const AjPAssem] Assembly object
223 ** @return [AjBool] True on success
224 **
225 ** @release 6.5.0
226 ** @@
227 ******************************************************************************/
228
assemoutWriteBam(AjPFile outf,const AjPAssem assem)229 static AjBool assemoutWriteBam(AjPFile outf, const AjPAssem assem)
230 {
231 AjPSeqBamHeader header = NULL;
232 AjPAssemContig c = NULL;
233 AjPSeqBam bam;
234 AjPAssemRead r = NULL;
235 AjPAssemContig* contigs = NULL;
236 AjPAssemTag t = NULL;
237 AjIList j = NULL;
238 AjPSeqBamBgzf gzfile = NULL;
239 AjPStr headertext=NULL;
240 const AjPStr rgheadertext=NULL;
241 AjBool ret = ajTrue;
242 ajint i=0;
243 ajulong ncontigs=0UL;
244
245
246 if(ajListGetLength(assem->ContigsOrder))
247 ncontigs = ajListToarray(assem->ContigsOrder, (void***)&contigs);
248 else
249 ncontigs = ajTableToarrayValues(assem->Contigs, (void***)&contigs);
250
251 AJNEW0(bam);
252 bam->m_data=10;
253 AJCNEW0(bam->data, bam->m_data);
254
255 gzfile = ajSeqBamBgzfNew(ajFileGetFileptr(outf), "w");
256
257 ajFmtPrintS(&headertext, "@HD\tVN:1.3\tSO:%s\n",
258 ajAssemGetSortorderC(assem));
259 header = ajSeqBamHeaderNewN((ajuint) ncontigs);
260
261 while (contigs[i]) /* contigs */
262 {
263 c = contigs[i];
264
265 if(ajStrMatchC(c->Name, "*"))
266 {
267 i++;
268 continue;
269 }
270
271 header->target_name[i] = strdup(ajStrGetPtr(c->Name));
272 header->target_len[i++] = c->Length;
273
274 ajFmtPrintAppS(&headertext, "@SQ\tSN:%S\tLN:%d",
275 c->Name, c->Length);
276
277 if(c->URI)
278 ajFmtPrintAppS(&headertext, "\tUR:%S", c->URI);
279
280 if(c->MD5)
281 ajFmtPrintAppS(&headertext, "\tM5:%S", c->MD5);
282
283 if(c->Species)
284 ajFmtPrintAppS(&headertext, "\tSP:%S", c->Species);
285
286 ajFmtPrintAppS(&headertext, "\n");
287
288
289 j = ajListIterNewread(c->Tags);
290 while (!ajListIterDone(j))
291 {
292 t = ajListIterGet(j);
293 ajFmtPrintAppS(&headertext,
294 "@CO\t%S %u %u %S\n", t->Name, t->x1, t->y1,
295 t->Comment);
296 }
297 ajListIterDel(&j);
298 }
299
300 rgheadertext = assemSAMGetReadgroupHeaderlines(assem);
301 if(rgheadertext)
302 ajStrAppendS(&headertext, rgheadertext);
303
304 ajSeqBamHeaderSetTextC(header, ajStrGetPtr(headertext));
305 ajSeqBamHeaderWrite(gzfile, header);
306
307 j = ajListIterNewread(assem->Reads);
308
309 while (!ajListIterDone(j)) /* reads */
310 {
311 r = ajListIterGet(j);
312 assemoutWriteBamAlignment(gzfile, r, bam);
313 }
314
315 ajListIterDel(&j);
316
317 ajSeqBamBgzfClose(gzfile);
318 ajSeqBamHeaderDel(&header);
319 ajStrDel(&headertext);
320
321 AJFREE(contigs);
322 AJFREE(bam->data);
323 AJFREE(bam);
324
325 ajBamIndexBuild(ajFileGetNameC(outf));
326
327 return ret;
328 }
329
330
331
332
333 /* @funcstatic assemoutBamIndex ***********************************************
334 **
335 ** Closes and idexes a BAM output file
336 **
337 ** @param [u] outf [AjPOutfile] Output file object.
338 ** @return [void]
339 **
340 ** @release 6.5.0
341 ** @@
342 ******************************************************************************/
343
assemoutBamIndex(AjPOutfile outf)344 static void assemoutBamIndex(AjPOutfile outf)
345 {
346 char *fname = ajCharNewC(ajFileGetNameC(ajOutfileGetFile(outf)));
347 AjPSeqBamBgzf gzfile = outf->OutData;
348
349 ajSeqBamBgzfClose(gzfile);
350 ajFileClose(&outf->File);
351
352 ajBamIndexBuild(fname);
353
354 AJFREE(fname);
355
356 return;
357 }
358
359
360
361
362 /* @funcstatic assemoutWriteNextBam *******************************************
363 **
364 ** Writes an assembly in binary alignment/map (BAM) format.
365 **
366 ** The sort order is "unsorted". Samtools can re-sort the file.
367 **
368 ** @param [u] outfile [AjPOutfile] Output file object.
369 ** @param [r] assem [const AjPAssem] Assembly object
370 ** @return [AjBool] True on success
371 **
372 ** @release 6.5.0
373 ** @@
374 ******************************************************************************/
375
assemoutWriteNextBam(AjPOutfile outfile,const AjPAssem assem)376 static AjBool assemoutWriteNextBam(AjPOutfile outfile, const AjPAssem assem)
377 {
378 AjPFile outf = ajOutfileGetFile(outfile);
379 AjPSeqBamHeader header = NULL;
380 AjPAssemContig c = NULL;
381 AjPSeqBam bam;
382 AjPAssemRead r = NULL;
383 AjPAssemContig* contigs = NULL;
384 AjPAssemTag t = NULL;
385 AjIList j = NULL;
386 AjPSeqBamBgzf gzfile = NULL;
387 AjPStr headertext=NULL;
388 const AjPStr rgheadertext=NULL;
389 AjBool ret = ajTrue;
390 ajint i=0;
391 ajulong ncontigs=0UL;
392
393 if(!outf) return ajFalse;
394 if(!assem) return ajFalse;
395
396 if(!assem->Hasdata)
397 {
398 if(ajListGetLength(assem->ContigsOrder))
399 ncontigs = ajListToarray(assem->ContigsOrder, (void***)&contigs);
400 else
401 ncontigs = ajTableToarrayValues(assem->Contigs, (void***)&contigs);
402
403 ajFmtPrintS(&headertext, "@HD\tVN:1.3\tSO:%s\n",
404 ajAssemGetSortorderC(assem));
405 header = ajSeqBamHeaderNewN((ajuint) ncontigs);
406
407 gzfile = ajSeqBamBgzfNew(ajFileGetFileptr(outf), "w");
408 outfile->OutData = gzfile;
409
410 while (contigs[i]) /* contigs */
411 {
412 c = contigs[i];
413
414 if(ajStrMatchC(c->Name, "*"))
415 {
416 i++;
417 continue;
418 }
419
420 header->target_name[i] = strdup(ajStrGetPtr(c->Name));
421 header->target_len[i++] = c->Length;
422
423 ajFmtPrintAppS(&headertext, "@SQ\tSN:%S\tLN:%d",
424 c->Name, c->Length);
425
426 if(c->URI)
427 ajFmtPrintAppS(&headertext, "\tUR:%S", c->URI);
428
429 if(c->MD5)
430 ajFmtPrintAppS(&headertext, "\tM5:%S", c->MD5);
431
432 if(c->Species)
433 ajFmtPrintAppS(&headertext, "\tSP:%S", c->Species);
434
435 ajFmtPrintAppS(&headertext, "\n");
436
437
438 j = ajListIterNewread(c->Tags);
439 while (!ajListIterDone(j))
440 {
441 t = ajListIterGet(j);
442 ajFmtPrintAppS(&headertext,
443 "@CO\t%S %u %u %S\n", t->Name, t->x1, t->y1,
444 t->Comment);
445 }
446 ajListIterDel(&j);
447 }
448
449 rgheadertext = assemSAMGetReadgroupHeaderlines(assem);
450 if(rgheadertext)
451 ajStrAppendS(&headertext, rgheadertext);
452
453 ajSeqBamHeaderSetTextC(header, ajStrGetPtr(headertext));
454 ajSeqBamHeaderWrite(gzfile, header);
455
456 ajSeqBamHeaderDel(&header);
457 ajStrDel(&headertext);
458
459 AJFREE(contigs);
460
461 if(!assem->BamHeader)
462 return ajTrue;
463 }
464
465 /* data */
466
467 gzfile = outfile->OutData;
468
469 AJNEW0(bam);
470 bam->m_data=10;
471 AJCNEW0(bam->data, bam->m_data);
472
473 j = ajListIterNewread(assem->Reads);
474
475 while (!ajListIterDone(j)) /* reads */
476 {
477 r = ajListIterGet(j);
478 assemoutWriteBamAlignment(gzfile, r, bam);
479 }
480
481 ajListIterDel(&j);
482
483 AJFREE(bam->data);
484 AJFREE(bam);
485
486 /* ajSeqBamBgzfClose(gzfile);*/
487
488 return ret;
489 }
490
491
492
493
494 /* @funcstatic assemoutWriteBamAlignment **************************************
495 **
496 ** Write individual alignment records of assembly data in BAM format
497 **
498 ** @param [u] gzfile [AjPSeqBamBgzf] Output file
499 ** @param [r] r [const AjPAssemRead] Read object with alignment information
500 ** @param [u] bam [AjPSeqBam] samtools data structure to store
501 ** alignment data before it is written in BAM format
502 **
503 ** @return [AjBool] True on success
504 **
505 ** @release 6.5.0
506 **
507 ******************************************************************************/
508
assemoutWriteBamAlignment(AjPSeqBamBgzf gzfile,const AjPAssemRead r,AjPSeqBam bam)509 static AjBool assemoutWriteBamAlignment(AjPSeqBamBgzf gzfile,
510 const AjPAssemRead r,
511 AjPSeqBam bam)
512 {
513 AjPSeqBamCore c;
514 AjPAssemTag tag;
515 unsigned char *dpos;
516 const char *s;
517 ajuint ilen;
518 ajuint slen;
519 ajuint i;
520 AjIList l = NULL;
521
522 /* optional fields */
523 ajint tagvalsize = 0;
524 const unsigned char* tagval = 0;
525 ajint intval =0;
526
527 /* processing cigar strings*/
528 char *t;
529 int op;
530 long x;
531
532
533 unsigned char bam_nt16_table[256] =
534 {
535 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
536 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
537 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
538 1, 2, 4, 8, 15,15,15,15, 15,15,15,15, 15, 0 /*=*/,15,15,
539 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,
540 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15,
541 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,
542 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15,
543 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
544 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
545 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
546 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
547 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
548 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
549 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
550 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15
551 };
552
553 /* bam_write1 for each alignment */
554
555 c = &bam->core;
556
557 ilen = ajStrGetLen(r->Seq);
558
559 c->tid = (int) r->Reference;
560
561 if(r->Flag & BAM_FREVERSE)
562 c->pos = r->y1-1;
563 else
564 c->pos = r->x1-1; /* BAM format is zero based;
565 -1 is translated to 0, meaning unmapped */
566 c->bin = 0;
567 c->qual = r->MapQ;
568 c->l_qname = 1 + ajStrGetLen(r->Name);
569 c->flag = r->Flag;
570 c->n_cigar = 0;
571 c->l_qseq = ilen;
572 c->mtid = (int) r->Rnext;
573 c->mpos = (int) r->Pnext-1;
574 c->isize = r->Tlen;
575
576
577 /* get cigar string length */
578 s = ajStrGetPtr(r->Cigar);
579 if (strcmp(s,"*")) /* '*' means unavailable */
580 {
581 for (; *s; ++s)
582 {
583 if ((isalpha((int)*s)) || (*s=='='))
584 ++c->n_cigar;
585 else
586 if (!isdigit((int)*s))
587 ajWarn("invalid CIGAR character: %c\n", *s);
588 }
589 }
590
591
592 bam->data_len = c->n_cigar*4 + c->l_qname +
593 (ilen + 1)/2 + ilen;
594
595 /* allocation for optional tags are made as they are appended */
596
597 if(bam->data_len > bam->m_data)
598 {
599 AJCRESIZE0(bam->data,bam->m_data, bam->data_len);
600 bam->m_data = bam->data_len;
601 }
602
603 dpos = bam->data;
604
605 /* copy query name to bam->data */
606 memcpy(dpos, ajStrGetPtr(r->Name), c->l_qname);
607 dpos += c->l_qname;
608
609 /* copy cigar string to bam->data */
610 s = ajStrGetPtr(r->Cigar);
611 for (i = 0; i != c->n_cigar; ++i)
612 {
613 x = strtol(s, &t, 10);
614 op = toupper((int)*t);
615 if (op == 'M') op = BAM_CMATCH;
616 else if (op == 'I') op = BAM_CINS;
617 else if (op == 'D') op = BAM_CDEL;
618 else if (op == 'N') op = BAM_CREF_SKIP;
619 else if (op == 'S') op = BAM_CSOFT_CLIP;
620 else if (op == 'H') op = BAM_CHARD_CLIP;
621 else if (op == 'P') op = BAM_CPAD;
622 else if (op == '=') op = BAM_CEQUAL;
623 else if (op == 'X') op = BAM_CDIFF;
624 else ajWarn("invalid CIGAR operation: %c",op);
625 s = t + 1;
626 ((ajuint*)dpos)[i] = x << BAM_CIGAR_SHIFT | op;
627 }
628
629 if (*s && c->n_cigar)
630 ajWarn("unmatched CIGAR operation: %c", *s);
631
632 c->bin = ajSeqBamReg2bin(c->pos, ajSeqBamCalend(c, MAJSEQBAMCIGAR(bam)));
633
634 dpos += c->n_cigar*4;
635
636
637 /* copy sequence string to bam->data */
638 s = ajStrGetPtr(r->Seq);
639 slen = (ilen+1)/2;
640 for (i = 0; i < slen; ++i)
641 dpos[i] = 0;
642 for (i = 0; i < ilen; ++i)
643 dpos[i/2] |= bam_nt16_table[(ajuint)s[i]] << 4*(1-i%2);
644 dpos += slen;
645
646 /* copy quality values to bam->data */
647 if(r->SeqQ && !ajStrMatchC(r->SeqQ, "*"))
648 {
649 s = ajStrGetPtr(r->SeqQ);
650
651 for(i=0;i<ilen;i++)
652 dpos[i]= s[i]-33;
653
654 }
655 else
656 for(i=0;i<ilen;i++)
657 dpos[i]= 0xff;
658
659
660
661 l = ajListIterNewread(r->Tags);
662 bam->l_aux=0;
663 while (!ajListIterDone(l))
664 {
665
666 tag = ajListIterGet(l);
667
668 /* TODO: array type 'B' and other types */
669
670 if(tag->type == 'i' || tag->type == 'I')
671 {
672 tagvalsize = 4;
673 ajStrToInt(tag->Comment, &intval);
674 tagval = (unsigned char*)&intval;
675 }
676 else if(tag->type =='s' || tag->type =='S')
677 {
678 tagvalsize = 2;
679 ajStrToInt(tag->Comment, &intval);
680 tagval = (unsigned char*)&intval;
681 }
682 else if(tag->type =='c' || tag->type =='C')
683 {
684 tagvalsize = 1;
685 ajStrToInt(tag->Comment, &intval);
686 tagval = (unsigned char*)&intval;
687 }
688 else if(tag->type =='A')
689 {
690 tagvalsize = 1;
691 tagval = (const unsigned char*)ajStrGetPtr(tag->Comment);
692 }
693 else if(tag->type =='Z')
694 {
695 tagvalsize = ajStrGetLen(tag->Comment)+1;
696 tagval = (const unsigned char*)ajStrGetPtr(tag->Comment);
697 }
698 else
699 {
700 ajWarn("tag type '%c' not yet supported",tag->type);
701 continue;
702 }
703 ajSeqBamAuxAppend(bam,
704 ajStrGetPtr(tag->Name),
705 tag->type,
706 tagvalsize,
707 tagval);
708
709
710 }
711 ajListIterDel(&l);
712
713
714
715 ajSeqBamWrite(gzfile, bam);
716
717 return ajTrue;
718 }
719
720
721
722
723 /* @funcstatic assemoutWriteMaf ***********************************************
724 **
725 ** Write assembly data in Mira Assembly Format
726 **
727 ** @param [u] outf [AjPFile] Output file
728 ** @param [r] assem [const AjPAssem] Assembly data object
729 **
730 ** @return [AjBool] True on success
731 **
732 **
733 ** @release 6.5.0
734 ******************************************************************************/
735
assemoutWriteMaf(AjPFile outf,const AjPAssem assem)736 static AjBool assemoutWriteMaf(AjPFile outf, const AjPAssem assem)
737 {
738 AjPAssemContig c = NULL;
739 AjPAssemRead r = NULL;
740 AjPAssemTag t = NULL;
741 ajint i = 0;
742 ajint nreads = 0;
743 AjIList j = NULL;
744 AjIList k = NULL;
745 AjIList reads = NULL;
746 AjPAssemContig* contigs = NULL;
747
748 if(!outf) return ajFalse;
749 if(!assem) return ajFalse;
750
751 ajTableToarrayValues(assem->Contigs, (void***)&contigs);
752 reads = ajListIterNewread(assem->Reads);
753
754 for (;contigs[i];i++)
755 {
756 c = contigs[i];
757 ajFmtPrintF(outf, "CO\t%S\n", c->Name);
758 ajFmtPrintF(outf, "NR\t%d\n", c->Nreads);
759 ajFmtPrintF(outf, "LC\t%d\n", c->Length);
760
761 j = ajListIterNewread(c->Tags);
762 while (!ajListIterDone(j))
763 {
764 t = ajListIterGet(j);
765 ajFmtPrintF(outf, "CT\t%S %u %u %S\n", t->Name, t->x1, t->y1,
766 t->Comment);
767 }
768 ajListIterDel(&j);
769
770 ajFmtPrintF(outf, "CS\t%S\n", c->Consensus);
771 ajFmtPrintF(outf, "CQ\t%S\n", c->ConsensusQ);
772
773 ajFmtPrintF(outf, "\\\\\n");
774
775 while (nreads++ < c->Nreads && !ajListIterDone(reads))
776 {
777 r = ajListIterGet(reads);
778
779 if(r->Reference !=i)
780 ajErr("different reference/contig number(%d) than expected(%d)"
781 "\nreads were expected to be sorted by contigs");
782
783 ajFmtPrintF(outf, "RD\t%S\n", r->Name);
784 ajFmtPrintF(outf, "RS\t%S\n", r->Seq);
785 ajFmtPrintF(outf, "RQ\t%S\n", r->SeqQ);
786 ajFmtPrintF(outf, "TN\t%S\n", r->Template);
787
788 if(r->Direction)
789 ajFmtPrintF(outf, "DI\t%c\n", r->Direction);
790
791 if(r->TemplateSizeMin)
792 ajFmtPrintF(outf, "TF\t%d\n", r->TemplateSizeMin);
793
794 if(r->TemplateSizeMax)
795 ajFmtPrintF(outf, "TT\t%d\n", r->TemplateSizeMax);
796
797 if(r->File)
798 ajFmtPrintF(outf, "SF\t%S\n", r->File);
799
800 if(r->VectorLeft)
801 ajFmtPrintF(outf, "SL\t%d\n", r->VectorLeft);
802
803 if(r->VectorRight)
804 ajFmtPrintF(outf, "SR\t%d\n", r->VectorRight);
805
806 if(r->QualLeft)
807 ajFmtPrintF(outf, "QL\t%d\n", r->QualLeft);
808
809 if(r->QualRight)
810 ajFmtPrintF(outf, "QR\t%d\n", r->QualRight);
811
812 if(r->ClipLeft)
813 ajFmtPrintF(outf, "SL\t%d\n", r->ClipLeft);
814
815 if(r->ClipRight)
816 ajFmtPrintF(outf, "SR\t%d\n", r->ClipRight);
817
818 k = ajListIterNewread(r->Tags);
819
820 while (!ajListIterDone(k))
821 {
822 t = ajListIterGet(k);
823
824 ajFmtPrintF(outf, "RT\t%S %u %u\n", t->Name,
825 t->x1, t->y1);
826 }
827
828 ajListIterDel(&k);
829
830 ajFmtPrintF(outf, "ST\t%S\n", r->Technology);
831 ajFmtPrintF(outf, "ER\n");
832 ajFmtPrintF(outf, "AT\t%u %u %u %u\n", r->x1, r->y1, r->x2, r->y2);
833 }
834
835 ajListIterDel(&j);
836 ajFmtPrintF(outf, "//\n");
837 ajFmtPrintF(outf, "EC\n");
838 }
839
840 AJFREE(contigs);
841 ajListIterDel(&reads);
842
843 return ajTrue;
844 }
845
846
847
848
849 /* @funcstatic assemoutWriteNextMaf *******************************************
850 **
851 ** Write latest chunk of assembly data in Mira Assembly Format
852 **
853 ** @param [u] outfile [AjPOutfile] Output file
854 ** @param [r] assem [const AjPAssem] Assembly data object
855 **
856 ** @return [AjBool] True on success
857 **
858 **
859 ** @release 6.5.0
860 ******************************************************************************/
861
assemoutWriteNextMaf(AjPOutfile outfile,const AjPAssem assem)862 static AjBool assemoutWriteNextMaf(AjPOutfile outfile, const AjPAssem assem)
863 {
864 AjPFile outf = ajOutfileGetFile(outfile);
865 AjPAssemContig c = NULL;
866 AjPAssemRead r = NULL;
867 AjPAssemTag t = NULL;
868 ajint i = 0;
869 ajint nreads = 0;
870 AjIList j = NULL;
871 AjIList k = NULL;
872 AjIList reads = NULL;
873 AjPAssemContig* contigs = NULL;
874
875 if(!outf) return ajFalse;
876 if(!assem) return ajFalse;
877
878 if(!assem->Hasdata)
879 {
880 ajTableToarrayValues(assem->Contigs, (void***)&contigs);
881 for (;contigs[i];i++)
882 {
883 c = contigs[i];
884 ajFmtPrintF(outf, "CO\t%S\n", c->Name);
885 ajFmtPrintF(outf, "NR\t%d\n", c->Nreads);
886 ajFmtPrintF(outf, "LC\t%d\n", c->Length);
887
888 j = ajListIterNewread(c->Tags);
889 while (!ajListIterDone(j))
890 {
891 t = ajListIterGet(j);
892 ajFmtPrintF(outf, "CT\t%S %u %u %S\n", t->Name, t->x1, t->y1,
893 t->Comment);
894 }
895 ajListIterDel(&j);
896
897 ajFmtPrintF(outf, "CS\t%S\n", c->Consensus);
898 ajFmtPrintF(outf, "CQ\t%S\n", c->ConsensusQ);
899
900 ajFmtPrintF(outf, "\\\\\n");
901 }
902 AJFREE(contigs);
903
904 return ajTrue;
905 }
906
907 /* data */
908
909 reads = ajListIterNewread(assem->Reads);
910
911 while (!ajListIterDone(reads))
912 {
913 nreads++;
914 r = ajListIterGet(reads);
915
916 if(r->Reference !=i)
917 ajErr("different reference/contig number(%d) than expected(%d)"
918 "\nreads were expected to be sorted by contigs");
919
920 ajFmtPrintF(outf, "RD\t%S\n", r->Name);
921 ajFmtPrintF(outf, "RS\t%S\n", r->Seq);
922 ajFmtPrintF(outf, "RQ\t%S\n", r->SeqQ);
923 ajFmtPrintF(outf, "TN\t%S\n", r->Template);
924
925 if(r->Direction)
926 ajFmtPrintF(outf, "DI\t%c\n", r->Direction);
927
928 if(r->TemplateSizeMin)
929 ajFmtPrintF(outf, "TF\t%d\n", r->TemplateSizeMin);
930
931 if(r->TemplateSizeMax)
932 ajFmtPrintF(outf, "TT\t%d\n", r->TemplateSizeMax);
933
934 if(r->File)
935 ajFmtPrintF(outf, "SF\t%S\n", r->File);
936
937 if(r->VectorLeft)
938 ajFmtPrintF(outf, "SL\t%d\n", r->VectorLeft);
939
940 if(r->VectorRight)
941 ajFmtPrintF(outf, "SR\t%d\n", r->VectorRight);
942
943 if(r->QualLeft)
944 ajFmtPrintF(outf, "QL\t%d\n", r->QualLeft);
945
946 if(r->QualRight)
947 ajFmtPrintF(outf, "QR\t%d\n", r->QualRight);
948
949 if(r->ClipLeft)
950 ajFmtPrintF(outf, "SL\t%d\n", r->ClipLeft);
951
952 if(r->ClipRight)
953 ajFmtPrintF(outf, "SR\t%d\n", r->ClipRight);
954
955 k = ajListIterNewread(r->Tags);
956
957 while (!ajListIterDone(k))
958 {
959 t = ajListIterGet(k);
960
961 ajFmtPrintF(outf, "RT\t%S %u %u\n", t->Name,
962 t->x1, t->y1);
963 }
964
965 ajListIterDel(&k);
966
967 ajFmtPrintF(outf, "ST\t%S\n", r->Technology);
968 ajFmtPrintF(outf, "ER\n");
969 ajFmtPrintF(outf, "AT\t%u %u %u %u\n", r->x1, r->y1, r->x2, r->y2);
970 }
971
972 ajListIterDel(&j);
973 ajListIterDel(&reads);
974
975 if(!nreads)
976 {
977 ajFmtPrintF(outf, "//\n");
978 ajFmtPrintF(outf, "EC\n");
979 }
980
981
982 return ajTrue;
983 }
984
985
986
987
988 /* @funcstatic assemoutMakeCigar **********************************************
989 **
990 ** Given a reference sequence region and a read sequence returns CIGAR string
991 ** representing the alignment.
992 **
993 ** Based on make_cigar function in Peter Cock's maf2sam project
994 **
995 ** We have a similar function in ajalign.c, called alignCigar(),
996 ** we should aim using that function rather than having two similar functions
997 **
998 ** @param [r] contig [const char*] Reference sequence region
999 ** @param [r] readseq [const char*] Read sequence
1000 **
1001 ** @return [AjPStr] CIGAR string
1002 **
1003 **
1004 ** @release 6.5.0
1005 ******************************************************************************/
1006
assemoutMakeCigar(const char * contig,const char * readseq)1007 static AjPStr assemoutMakeCigar(const char* contig, const char* readseq)
1008 {
1009 AjPStr cigar = NULL;
1010 ajint count =0;
1011
1012 char mode = '\0';
1013 char c;
1014 char r;
1015
1016 c = contig[0];
1017 r = readseq[0];
1018
1019 cigar = ajStrNewC("");
1020
1021 for(;c!=0; c=*++contig, r=*++readseq)
1022 {
1023
1024 if(c=='*' && r=='*')
1025 continue;
1026 else if(c!='*' && r!='*')
1027 {
1028 /* alignment match/mismatch */
1029 if (mode!='M')
1030 {
1031 if(count)
1032 ajFmtPrintAppS(&cigar, "%d%c", count, mode);
1033 mode = 'M';
1034 count = 1;
1035 }
1036 else
1037 count++;
1038 }
1039 else if(c=='*')
1040 {
1041 if (mode!='I')
1042 {
1043 if(count)
1044 ajFmtPrintAppS(&cigar, "%d%c", count, mode);
1045 mode = 'I';
1046 count = 1;
1047 }
1048 else
1049 count++;
1050 }
1051 else if(r=='*')
1052 {
1053 if (mode!='D')
1054 {
1055 if(count)
1056 ajFmtPrintAppS(&cigar, "%d%c", count, mode);
1057 mode = 'D';
1058 count = 1;
1059 }
1060 else
1061 count++;
1062 }
1063 else
1064 ajErr("something wrong!!!");
1065 }
1066
1067 if(count)
1068 ajFmtPrintAppS(&cigar, "%d%c", count, mode);
1069
1070 return cigar;
1071 }
1072
1073
1074
1075
1076 /* @funcstatic assemoutWriteSam ***********************************************
1077 **
1078 ** Write assembly data in SAM format
1079 **
1080 ** @param [u] outf [AjPFile] Output file
1081 ** @param [r] assem [const AjPAssem] Assembly data object
1082 **
1083 ** @return [AjBool] True on success
1084 **
1085 **
1086 ** @release 6.5.0
1087 ******************************************************************************/
1088
assemoutWriteSam(AjPFile outf,const AjPAssem assem)1089 static AjBool assemoutWriteSam(AjPFile outf, const AjPAssem assem)
1090 {
1091 AjPAssemContig c = NULL;
1092 AjPAssemRead r = NULL;
1093 AjPAssemTag t = NULL;
1094 AjPAssemContig* contigs = NULL;
1095 AjIList j = NULL;
1096 AjPStr argstr = NULL;
1097 const AjPStr headertext = NULL;
1098 ajint n = 0;
1099 ajint i = 0;
1100 AjBool ret = ajTrue;
1101
1102 if(!outf || !assem)
1103 return ajFalse;
1104
1105 ajDebug("assemoutWriteSam: # of contigs = %d\n", n);
1106
1107 ajFmtPrintF(outf, "@HD\tVN:1.3\tSO:%s\n", ajAssemGetSortorderC(assem));
1108
1109 /* Program record */
1110 argstr = ajStrNewS(ajUtilGetCmdline());
1111 ajStrExchangeKK(&argstr, '\n', ' ');
1112 ajFmtPrintF(outf, "@PG\tID:%S\tVN:%S\tCL:%S\n",
1113 ajUtilGetProgram(), ajNamValueVersion(), argstr);
1114 ajStrDel(&argstr);
1115
1116
1117 if(ajListGetLength(assem->ContigsOrder))
1118 ajListToarray(assem->ContigsOrder, (void***)&contigs);
1119 else
1120 ajTableToarrayValues(assem->Contigs, (void***)&contigs);
1121
1122 while (contigs[i]) /* contigs */
1123 {
1124 c = contigs[i++];
1125
1126 if(!ajStrMatchC(c->Name, "*"))
1127 {
1128 ajFmtPrintF(outf, "@SQ\tSN:%S\tLN:%d", c->Name, c->Length);
1129
1130 if(c->URI)
1131 ajFmtPrintF(outf, "\tUR:%S", c->URI);
1132
1133 if(c->MD5)
1134 ajFmtPrintF(outf, "\tM5:%S", c->MD5);
1135
1136 if(c->Species)
1137 ajFmtPrintF(outf, "\tSP:%S", c->Species);
1138
1139 ajFmtPrintF(outf, "\n");
1140
1141 j = ajListIterNewread(c->Tags);
1142 while (!ajListIterDone(j))
1143 {
1144 t = ajListIterGet(j);
1145 ajFmtPrintF(outf, "@CO\t%S %u %u %S\n", t->Name, t->x1, t->y1,
1146 t->Comment);
1147 }
1148 ajListIterDel(&j);
1149 }
1150 }
1151
1152 headertext = assemSAMGetReadgroupHeaderlines(assem);
1153 if(headertext)
1154 ajFmtPrintF(outf,"%S", headertext);
1155
1156 j = ajListIterNewread(assem->Reads);
1157
1158 while (!ajListIterDone(j)) /* reads */
1159 {
1160 r = ajListIterGet(j);
1161 assemoutWriteSamAlignment(outf, r, contigs, i);
1162 }
1163
1164 ajListIterDel(&j);
1165 AJFREE(contigs);
1166
1167 return ret;
1168 }
1169
1170
1171
1172
1173 /* @funcstatic assemoutWriteNextSam *******************************************
1174 **
1175 ** Write latest chunk of assembly data in SAM format
1176 **
1177 ** @param [u] outfile [AjPOutfile] Output file
1178 ** @param [r] assem [const AjPAssem] Assembly data object
1179 **
1180 ** @return [AjBool] True on success
1181 **
1182 **
1183 ** @release 6.5.0
1184 ******************************************************************************/
1185
assemoutWriteNextSam(AjPOutfile outfile,const AjPAssem assem)1186 static AjBool assemoutWriteNextSam(AjPOutfile outfile, const AjPAssem assem)
1187 {
1188 AjPFile outf = ajOutfileGetFile(outfile);
1189 AjPAssemContig c = NULL;
1190 AjPAssemRead r = NULL;
1191 AjPAssemTag t = NULL;
1192 AjPAssemContig* contigs = NULL;
1193 AjIList j = NULL;
1194 AjPStr argstr = NULL;
1195 const AjPStr headertext = NULL;
1196 ajint n = 0;
1197 ajulong i = 0UL;
1198 AjBool ret = ajTrue;
1199
1200 if(!outf || !assem)
1201 return ajFalse;
1202
1203 ajDebug("assemoutWriteSam: # of contigs = %d\n", n);
1204
1205 if(!assem->Hasdata)
1206 {
1207 ajFmtPrintF(outf, "@HD\tVN:1.3\tSO:%s\n", ajAssemGetSortorderC(assem));
1208
1209 /* Program record */
1210 argstr = ajStrNewS(ajUtilGetCmdline());
1211 ajStrExchangeKK(&argstr, '\n', ' ');
1212 ajFmtPrintF(outf, "@PG\tID:%S\tVN:%S\tCL:%S\n",
1213 ajUtilGetProgram(), ajNamValueVersion(), argstr);
1214 ajStrDel(&argstr);
1215
1216
1217 if(ajListGetLength(assem->ContigsOrder))
1218 ajListToarray(assem->ContigsOrder, (void***)&contigs);
1219 else
1220 ajTableToarrayValues(assem->Contigs, (void***)&contigs);
1221
1222 while (contigs[i]) /* contigs */
1223 {
1224 c = contigs[i++];
1225
1226 if(!ajStrMatchC(c->Name, "*"))
1227 {
1228 ajFmtPrintF(outf, "@SQ\tSN:%S\tLN:%d", c->Name, c->Length);
1229
1230 if(c->URI)
1231 ajFmtPrintF(outf, "\tUR:%S", c->URI);
1232
1233 if(c->MD5)
1234 ajFmtPrintF(outf, "\tM5:%S", c->MD5);
1235
1236 if(c->Species)
1237 ajFmtPrintF(outf, "\tSP:%S", c->Species);
1238
1239 ajFmtPrintF(outf, "\n");
1240
1241 j = ajListIterNewread(c->Tags);
1242 while (!ajListIterDone(j))
1243 {
1244 t = ajListIterGet(j);
1245 ajFmtPrintF(outf, "@CO\t%S %u %u %S\n",
1246 t->Name, t->x1, t->y1,
1247 t->Comment);
1248 }
1249 ajListIterDel(&j);
1250 }
1251 }
1252
1253 headertext = assemSAMGetReadgroupHeaderlines(assem);
1254 if(headertext)
1255 ajFmtPrintF(outf,"%S", headertext);
1256
1257 AJFREE(contigs);
1258
1259 if(!assem->BamHeader)
1260 return ajTrue;
1261 }
1262
1263
1264 /* data */
1265
1266 j = ajListIterNewread(assem->Reads);
1267 if(ajListGetLength(assem->ContigsOrder))
1268 i = ajListToarray(assem->ContigsOrder, (void***)&contigs);
1269 else
1270 i = ajTableToarrayValues(assem->Contigs, (void***)&contigs);
1271
1272 while (!ajListIterDone(j)) /* reads */
1273 {
1274 r = ajListIterGet(j);
1275 assemoutWriteSamAlignment(outf, r, contigs, (ajuint) i);
1276 }
1277
1278 ajListIterDel(&j);
1279 AJFREE(contigs);
1280
1281 return ret;
1282 }
1283
1284
1285
1286
1287 /* @funcstatic assemSAMGetReadgroupHeaderlines ********************************
1288 **
1289 ** Returns read-group header lines as text for the given assembly
1290 **
1291 ** @param [r] assem [const AjPAssem] assembly object
1292 ** @return [const AjPStr] read-group header lines
1293 **
1294 **
1295 ** @release 6.5.0
1296 ******************************************************************************/
1297
assemSAMGetReadgroupHeaderlines(const AjPAssem assem)1298 static const AjPStr assemSAMGetReadgroupHeaderlines(const AjPAssem assem)
1299 {
1300 AjPStr* rgids = NULL;
1301 const AjPAssemReadgroup rg = NULL;
1302 ajint i =0;
1303
1304 ajTableToarrayKeys(assem->Readgroups, (void***)&rgids);
1305
1306 ajStrAssignC(&assemoutSamLinetxt, "");
1307
1308 while (rgids[i]) /* read groups */
1309 {
1310 rg = ajTableFetchS(assem->Readgroups, rgids[i++]);
1311 ajFmtPrintAppS(&assemoutSamLinetxt, "@RG\tID:%S", rg->ID);
1312
1313 if(rg->CN)
1314 ajFmtPrintAppS(&assemoutSamLinetxt, "\tCN:%S", rg->CN);
1315
1316 if(rg->Desc)
1317 ajFmtPrintAppS(&assemoutSamLinetxt, "\tDS:%S", rg->Desc);
1318
1319 if(rg->Date)
1320 ajFmtPrintAppS(&assemoutSamLinetxt, "\tDT:%S", rg->Date);
1321
1322 if(rg->FlowOrder)
1323 ajFmtPrintAppS(&assemoutSamLinetxt, "\tFO:%S", rg->FlowOrder);
1324
1325 if(rg->KeySeq)
1326 ajFmtPrintAppS(&assemoutSamLinetxt, "\tKS:%S", rg->KeySeq);
1327
1328 if(rg->Library)
1329 ajFmtPrintAppS(&assemoutSamLinetxt, "\tLB:%S", rg->Library);
1330
1331 if(rg->Programs)
1332 ajFmtPrintAppS(&assemoutSamLinetxt, "\tPG:%S", rg->Programs);
1333
1334 if(rg->Isize)
1335 ajFmtPrintAppS(&assemoutSamLinetxt, "\tPI:%d", rg->Isize);
1336
1337 if(rg->Platform)
1338 ajFmtPrintAppS(&assemoutSamLinetxt, "\tPL:%s",
1339 ajAssemreadgroupGetPlatformname(rg));
1340
1341 if(rg->Unit)
1342 ajFmtPrintAppS(&assemoutSamLinetxt, "\tPU:%S", rg->Unit);
1343
1344 if(rg->Sample)
1345 ajFmtPrintAppS(&assemoutSamLinetxt, "\tSM:%S", rg->Sample);
1346
1347 ajStrAppendC(&assemoutSamLinetxt, "\n");
1348
1349 }
1350
1351 AJFREE(rgids);
1352
1353 return assemoutSamLinetxt;
1354 }
1355
1356
1357
1358
1359 /* @funcstatic assemoutWriteSamAlignment **************************************
1360 **
1361 ** Write individual alignment records of assembly data in SAM format
1362 **
1363 ** @param [u] outf [AjPFile] Output file
1364 ** @param [r] r [const AjPAssemRead] Read object with alignment information
1365 ** @param [r] contigs [AjPAssemContig const*] Contigs/refseqs the read
1366 ** aligns to
1367 ** @param [r] ncontigs [ajint] Number of contigs
1368 **
1369 ** @return [AjBool] True on success
1370 **
1371 **
1372 ** @release 6.5.0
1373 ******************************************************************************/
1374
assemoutWriteSamAlignment(AjPFile outf,const AjPAssemRead r,AjPAssemContig const * contigs,ajint ncontigs)1375 static AjBool assemoutWriteSamAlignment(AjPFile outf, const AjPAssemRead r,
1376 AjPAssemContig const * contigs,
1377 ajint ncontigs)
1378 {
1379 AjPAssemTag t = NULL;
1380 AjIList l = NULL;
1381 AjPStr qualstr = NULL;
1382 AjPStr tmp = NULL;
1383 ajint POS = 0;
1384 AjPStr CIGAR = NULL;
1385 const char* RNEXT = NULL;
1386 AjPStr SEQ = NULL;
1387 AjPStr QUAL = NULL;
1388 AjPStr SEQunpadded = NULL;
1389 AjPStr QUALunpadded = NULL;
1390 AjPStr consensus = NULL;
1391 AjBool rc= ajFalse;
1392 AjBool ret = ajTrue;
1393 const char* refseq = NULL;
1394 const AjPAssemContig contig = NULL;
1395
1396 ajuint k = 0;
1397
1398 if(r->Reference>=ncontigs)
1399 ajDie("assemoutWriteSamAlignment: reference sequence number"
1400 " '%d' is larger than or equal to known number of reference"
1401 " sequences '%d'. Problem while processing read '%S'.",
1402 r->Reference,
1403 ncontigs,
1404 r->Name);
1405
1406 contig = (r->Reference==-1 ? NULL : contigs[r->Reference]);
1407
1408 ajStrAssignRef(&SEQ, r->Seq);
1409 consensus = contig==NULL? NULL : contig->Consensus;
1410
1411 if (r->Rnext==-1)
1412 RNEXT= "*";
1413 else if(r->Rnext==r->Reference)
1414 RNEXT = "=";
1415 else
1416 RNEXT = ajStrGetPtr(contigs[r->Rnext]->Name);
1417
1418 if (r->Flag & BAM_FREVERSE)
1419 {
1420 rc = ajTrue;
1421 qualstr = ajStrNewS(r->SeqQ);
1422
1423 if(!r->Reversed)
1424 {
1425 ajStrReverse(&qualstr);
1426 ajSeqstrReverse(&SEQ);
1427 }
1428
1429 QUAL = qualstr;
1430 POS = r->y1;
1431 ajStrAssignSubS(&tmp, SEQ,
1432 ajStrGetLen(r->Seq) - r->y2,
1433 ajStrGetLen(r->Seq) - r->x2
1434 );
1435
1436 }
1437 else
1438 {
1439 rc= ajFalse;
1440 POS = r->x1;
1441 QUAL = r->SeqQ;
1442 ajStrAssignSubS(&tmp, SEQ,
1443 r->x2-1,
1444 r->y2-1
1445 );
1446 }
1447
1448 if(r->Cigar==NULL && consensus)
1449 {
1450 refseq = ajStrGetPtr(consensus) + (rc ? r->y1-1 : r->x1-1);
1451
1452 CIGAR = assemoutMakeCigar(refseq, ajStrGetPtr(tmp));
1453
1454 SEQunpadded = ajStrNewRes(ajStrGetLen(SEQ));
1455 QUALunpadded = ajStrNewRes(ajStrGetLen(SEQ));
1456
1457 for(k=0; k< ajStrGetLen(SEQ); k++)
1458 {
1459 if (ajStrGetCharPos(SEQ, k) == '*')
1460 continue;
1461
1462 ajStrAppendK(&SEQunpadded, ajStrGetCharPos(SEQ, k));
1463 ajStrAppendK(&QUALunpadded, ajStrGetCharPos(QUAL, k));
1464 }
1465
1466 ajDebug("cigar: %S\n", CIGAR);
1467
1468 ajStrAssignS(&tmp, CIGAR);
1469
1470 if(rc)
1471 {
1472 if(r->y2 < (ajint)ajStrGetLen(SEQ))
1473 ajFmtPrintS(&CIGAR, "%dS%S",
1474 ajStrGetLen(SEQ) - r->y2, tmp);
1475 if(r->x2 > 1)
1476 ajFmtPrintAppS(&CIGAR, "%dS", r->x2 - 1);
1477 }
1478 else
1479 {
1480 if(r->x2 > 1)
1481 ajFmtPrintS(&CIGAR, "%dS%S", r->x2 - 1, tmp);
1482 if(r->y2 < (ajint)ajStrGetLen(SEQ))
1483 ajFmtPrintAppS(&CIGAR, "%dS",
1484 ajStrGetLen(SEQ) - r->y2);
1485 }
1486 ajStrDel(&tmp);
1487 }
1488 else if(r->Cigar==NULL)
1489 {
1490 ajErr("both CIGAR string and consensus sequence not available");
1491 ret = ajFalse;
1492 ajStrAssignK(&CIGAR, '*');
1493 }
1494 else if(!ajStrGetLen(r->Cigar))
1495 ajStrAssignK(&CIGAR, '*');
1496 else if(ajStrGetLen(r->Cigar))
1497 {
1498 if(!ajStrGetLen(SEQ))
1499 ajStrAssignK(&SEQ, '*');
1500
1501 if(!ajStrGetLen(QUAL))
1502 ajStrAssignK(&QUAL, '*');
1503 }
1504
1505 ajStrDel(&tmp);
1506
1507 ajFmtPrintF(outf, "%S\t%d\t%s\t%d\t%d\t%S\t%s\t%Ld\t%d\t%S\t%S",
1508 r->Name,
1509 r->Flag,
1510 (contig==NULL ? "*" : ajStrGetPtr(contig->Name)),
1511 POS,
1512 r->MapQ,
1513 (CIGAR ? CIGAR : r->Cigar),
1514 RNEXT,
1515 r->Pnext,
1516 r->Tlen,
1517 (r->Cigar ? SEQ : SEQunpadded),
1518 (r->Cigar ? QUAL : QUALunpadded));
1519
1520 l = ajListIterNewread(r->Tags);
1521 while (!ajListIterDone(l))
1522 {
1523 t = ajListIterGet(l);
1524
1525 /* TODO: array type, 'B' */
1526
1527 /* In SAM, all single integer types are mapped to int32_t [SAM spec] */
1528 ajFmtPrintF(outf, "\t%S:%c:",
1529 t->Name,
1530 (t->type == 'c' || t->type == 'C' ||
1531 t->type == 's' || t->type == 'S'
1532 || t->type == 'I') ? 'i' : t->type
1533 );
1534
1535 if(t->x1 || t->y1)
1536 ajFmtPrintF(outf, " %u %u", t->x1, t->y1);
1537
1538 if(t->Comment && ajStrGetLen(t->Comment)>0)
1539 ajFmtPrintF(outf, "%S", t->Comment);
1540
1541 }
1542 ajListIterDel(&l);
1543
1544 ajFmtPrintF(outf, "\n");
1545
1546 if(qualstr)
1547 ajStrDel(&qualstr);
1548
1549 ajStrDel(&SEQ);
1550 ajStrDel(&CIGAR);
1551 ajStrDel(&SEQunpadded);
1552 ajStrDel(&QUALunpadded);
1553
1554 return ret;
1555 }
1556
1557
1558
1559
1560 /* @funcstatic assemoutWriteList **********************************************
1561 **
1562 ** Write an assembly as the simple id
1563 **
1564 ** @param [u] outf [AjPFile] Output file
1565 ** @param [r] assem [const AjPAssem] Assembly object
1566 **
1567 ** @return [AjBool] True on success
1568 **
1569 **
1570 ** @release 6.4.0
1571 ******************************************************************************/
1572
assemoutWriteList(AjPFile outf,const AjPAssem assem)1573 static AjBool assemoutWriteList(AjPFile outf, const AjPAssem assem)
1574 {
1575 if(!outf) return ajFalse;
1576 if(!assem) return ajFalse;
1577
1578 if(ajStrGetLen(assem->Db))
1579 ajFmtPrintF(outf, "%S:%S\n", assem->Db, assem->Id);
1580 else
1581 ajFmtPrintF(outf, "%S\n", assem->Id);
1582
1583 return ajTrue;
1584 }
1585
1586
1587
1588
1589 /* @funcstatic assemoutWriteNextList ******************************************
1590 **
1591 ** Write an assembly as the simple id
1592 **
1593 ** @param [u] outfile [AjPOutfile] Output file
1594 ** @param [r] assem [const AjPAssem] Assembly object
1595 **
1596 ** @return [AjBool] True on success
1597 **
1598 **
1599 ** @release 6.4.0
1600 ******************************************************************************/
1601
assemoutWriteNextList(AjPOutfile outfile,const AjPAssem assem)1602 static AjBool assemoutWriteNextList(AjPOutfile outfile, const AjPAssem assem)
1603 {
1604 AjPFile outf = ajOutfileGetFile(outfile);
1605
1606 if(!outf) return ajFalse;
1607 if(!assem) return ajFalse;
1608
1609 if(!assem->Hasdata)
1610 return ajTrue;
1611
1612 if(ajStrGetLen(assem->Db))
1613 ajFmtPrintF(outf, "%S:%S\n", assem->Db, assem->Id);
1614 else
1615 ajFmtPrintF(outf, "%S\n", assem->Id);
1616
1617 return ajTrue;
1618 }
1619
1620
1621
1622
1623 /* @datasection [none] Miscellaneous functions ********************************
1624 **
1625 ** Functions to initialise and clean up internals
1626 **
1627 ** @nam2rule Assemout Assembly output internals
1628 **
1629 ******************************************************************************/
1630
1631
1632
1633
1634 /* @section Printing **********************************************************
1635 **
1636 ** Printing details of the internals to a file
1637 **
1638 ** @fdata [none]
1639 **
1640 ** @nam2rule Assemoutprint
1641 **
1642 ** @fcategory output
1643 **
1644 ******************************************************************************/
1645
1646
1647
1648
1649 /* @section Print *************************************************************
1650 **
1651 ** Printing to a file
1652 **
1653 ** @fdata [none]
1654 **
1655 ** @nam3rule Book Print as docbook table
1656 ** @nam3rule Html Print as html table
1657 ** @nam3rule Wiki Print as wiki table
1658 ** @nam3rule Text Print as text
1659 **
1660 ** @argrule * outf [AjPFile] output file
1661 ** @argrule Text full [AjBool] Print all details
1662 **
1663 ** @valrule * [void]
1664 **
1665 ** @fcategory cast
1666 **
1667 ******************************************************************************/
1668
1669
1670
1671
1672 /* @func ajAssemoutprintBook **************************************************
1673 **
1674 ** Reports the assembly format internals as Docbook text
1675 **
1676 ** @param [u] outf [AjPFile] Output file
1677 ** @return [void]
1678 **
1679 ** @release 6.4.0
1680 ** @@
1681 ******************************************************************************/
1682
ajAssemoutprintBook(AjPFile outf)1683 void ajAssemoutprintBook(AjPFile outf)
1684 {
1685 ajint i = 0;
1686
1687 ajFmtPrintF(outf, "<para>The supported assembly output "
1688 "formats are summarised "
1689 "in the table below. The columns are as follows: "
1690 "<emphasis>Output format</emphasis> (format name), "
1691 "<emphasis>Description</emphasis> (short description of "
1692 "the format).</para> \n\n");
1693
1694 ajFmtPrintF(outf, "<table frame=\"box\" rules=\"cols\">\n");
1695 ajFmtPrintF(outf, " <caption>Assembly output formats</caption>\n");
1696 ajFmtPrintF(outf, " <thead>\n");
1697 ajFmtPrintF(outf, " <tr align=\"center\">\n");
1698 ajFmtPrintF(outf, " <th>Output Format</th>\n");
1699 ajFmtPrintF(outf, " <th>Description</th>\n");
1700 ajFmtPrintF(outf, " </tr>\n");
1701 ajFmtPrintF(outf, " </thead>\n");
1702 ajFmtPrintF(outf, " <tbody>\n");
1703
1704 for(i=0; assemoutFormatDef[i].Name; i++)
1705 {
1706 ajFmtPrintF(outf, " <tr>\n");
1707 ajFmtPrintF(outf, " <td>%s</td>\n",
1708 assemoutFormatDef[i].Name);
1709 ajFmtPrintF(outf, " <td>%s</td>\n",
1710 assemoutFormatDef[i].Desc);
1711 }
1712
1713 ajFmtPrintF(outf, " </tbody>\n");
1714 ajFmtPrintF(outf, "</table>\n");
1715
1716 return;
1717 }
1718
1719
1720
1721
1722 /* @func ajAssemoutprintHtml **************************************************
1723 **
1724 ** Reports the internal data structures
1725 **
1726 ** @param [u] outf [AjPFile] Output file
1727 ** @return [void]
1728 **
1729 ** @release 6.4.0
1730 ** @@
1731 ******************************************************************************/
1732
ajAssemoutprintHtml(AjPFile outf)1733 void ajAssemoutprintHtml(AjPFile outf)
1734 {
1735 ajint i = 0;
1736
1737 ajFmtPrintF(outf, "<table border=3>");
1738 ajFmtPrintF(outf, "<tr><th>Assembly Format</th>\n");
1739 ajFmtPrintF(outf, "<th>Description</th></tr>\n");
1740 ajFmtPrintF(outf, "\n");
1741 ajFmtPrintF(outf, "# Assembly output formats\n");
1742 ajFmtPrintF(outf, "# Name Format name (or alias)\n");
1743 ajFmtPrintF(outf, "# Desc Format description\n");
1744 ajFmtPrintF(outf, "# Name Description\n");
1745 ajFmtPrintF(outf, "\n");
1746 ajFmtPrintF(outf, "AssemFormat {\n");
1747
1748 for(i=0; assemoutFormatDef[i].Name; i++)
1749 {
1750 ajFmtPrintF(outf, "<tr><td>\n%-12s\n"
1751 "<td>\"%s\"</td></tr>\n",
1752 assemoutFormatDef[i].Name,
1753 assemoutFormatDef[i].Desc);
1754 }
1755
1756 ajFmtPrintF(outf, "}\n\n");
1757
1758 return;
1759 }
1760
1761
1762
1763
1764 /* @func ajAssemoutprintText **************************************************
1765 **
1766 ** Reports the internal data structures
1767 **
1768 ** @param [u] outf [AjPFile] Output file
1769 ** @param [r] full [AjBool] Full report (usually ajFalse)
1770 ** @return [void]
1771 **
1772 ** @release 6.4.0
1773 ** @@
1774 ******************************************************************************/
1775
ajAssemoutprintText(AjPFile outf,AjBool full)1776 void ajAssemoutprintText(AjPFile outf, AjBool full)
1777 {
1778 ajint i = 0;
1779
1780 ajFmtPrintF(outf, "\n");
1781 ajFmtPrintF(outf, "# Assembly output formats\n");
1782 ajFmtPrintF(outf, "# Name Format name (or alias)\n");
1783 ajFmtPrintF(outf, "# Desc Format description\n");
1784 ajFmtPrintF(outf, "# Name Description\n");
1785 ajFmtPrintF(outf, "\n");
1786 ajFmtPrintF(outf, "AssemFormat {\n");
1787
1788 for(i=0; assemoutFormatDef[i].Name; i++)
1789 {
1790 if(full)
1791 ajFmtPrintF(outf, " %-12s \"%s\"\n",
1792 assemoutFormatDef[i].Name,
1793 assemoutFormatDef[i].Desc);
1794 }
1795
1796 ajFmtPrintF(outf, "}\n\n");
1797
1798 return;
1799 }
1800
1801
1802
1803
1804 /* @func ajAssemoutprintWiki **************************************************
1805 **
1806 ** Reports the asembly output format internals as wikitext
1807 **
1808 ** @param [u] outf [AjPFile] Output file
1809 ** @return [void]
1810 **
1811 ** @release 6.4.0
1812 ** @@
1813 ******************************************************************************/
1814
ajAssemoutprintWiki(AjPFile outf)1815 void ajAssemoutprintWiki(AjPFile outf)
1816 {
1817 ajint i = 0;
1818 ajint j = 0;
1819 AjPStr namestr = NULL;
1820
1821 ajFmtPrintF(outf, "{| class=\"wikitable sortable\" border=\"2\"\n");
1822 ajFmtPrintF(outf, "|-\n");
1823 ajFmtPrintF(outf, "!Format!!"
1824 "class=\"unsortable\"|Description\n");
1825
1826 for(i=0; assemoutFormatDef[i].Name; i++)
1827 {
1828 ajFmtPrintF(outf, "|-\n");
1829 ajStrAssignC(&namestr, assemoutFormatDef[i].Name);
1830
1831 for(j=i+1; assemoutFormatDef[j].Name; j++)
1832 {
1833 if(assemoutFormatDef[j].Write == assemoutFormatDef[i].Write)
1834 {
1835 ajFmtPrintAppS(&namestr, "<br>%s",
1836 assemoutFormatDef[j].Name);
1837 ajWarn("Assembly output format '%s' same as '%s' "
1838 "but not alias",
1839 assemoutFormatDef[j].Name,
1840 assemoutFormatDef[i].Name);
1841 }
1842 }
1843 ajFmtPrintF(outf, "|%S||%s\n",
1844 namestr,
1845 assemoutFormatDef[i].Desc);
1846 }
1847
1848 ajFmtPrintF(outf, "|}\n");
1849
1850 ajStrDel(&namestr);
1851
1852 return;
1853 }
1854
1855
1856
1857
1858 /* @datasection [none] Output formats *****************************************
1859 **
1860 ** Output formats internals
1861 **
1862 ** @nam2rule Assemoutformat Assembly output format specific
1863 **
1864 ******************************************************************************/
1865
1866
1867
1868
1869 /* @section cast **************************************************************
1870 **
1871 ** Values for output formats
1872 **
1873 ** @fdata [none]
1874 **
1875 ** @nam3rule Find Return index to named format
1876 ** @nam3rule Test Test format value
1877 **
1878 ** @argrule * format [const AjPStr] Format name
1879 ** @argrule Find iformat [ajint*] Index matching format name
1880 **
1881 ** @valrule * [AjBool] True if found
1882 **
1883 ** @fcategory cast
1884 **
1885 ******************************************************************************/
1886
1887
1888
1889
1890 /* @func ajAssemoutformatFind *************************************************
1891 **
1892 ** Looks for the specified format(s) in the internal definitions and
1893 ** returns the index.
1894 **
1895 ** Sets iformat as the recognised format, and returns ajTrue.
1896 **
1897 ** @param [r] format [const AjPStr] Format required.
1898 ** @param [w] iformat [ajint*] Index
1899 ** @return [AjBool] ajTrue on success.
1900 **
1901 ** @release 6.4.0
1902 ** @@
1903 ******************************************************************************/
1904
ajAssemoutformatFind(const AjPStr format,ajint * iformat)1905 AjBool ajAssemoutformatFind(const AjPStr format, ajint* iformat)
1906 {
1907 AjPStr tmpformat = NULL;
1908 ajuint i = 0;
1909
1910 /* ajDebug("ajAssemoutformatFind '%S'\n", format); */
1911 if(!ajStrGetLen(format))
1912 return ajFalse;
1913
1914 ajStrAssignS(&tmpformat, format);
1915 ajStrFmtLower(&tmpformat);
1916
1917 for(i=0; assemoutFormatDef[i].Name; i++)
1918 {
1919 /* ajDebug("test %d '%s' \n", i, assemoutFormatDef[i].Name); */
1920 if(ajStrMatchCaseC(tmpformat, assemoutFormatDef[i].Name))
1921 {
1922 *iformat = i;
1923 ajStrDel(&tmpformat);
1924 /* ajDebug("found '%s' at %d\n", assemoutFormatDef[i].Name, i); */
1925 return ajTrue;
1926 }
1927 }
1928
1929 ajErr("Unknown output format '%S'", format);
1930
1931 ajStrDel(&tmpformat);
1932
1933 *iformat = 0;
1934
1935 return ajFalse;
1936 }
1937
1938
1939
1940
1941 /* @func ajAssemoutformatTest *************************************************
1942 **
1943 ** Tests whether a named assembly output format is known
1944 **
1945 ** @param [r] format [const AjPStr] Format
1946 ** @return [AjBool] ajTrue if formats was accepted
1947 **
1948 ** @release 6.4.0
1949 ** @@
1950 ******************************************************************************/
1951
ajAssemoutformatTest(const AjPStr format)1952 AjBool ajAssemoutformatTest(const AjPStr format)
1953 {
1954 ajint i;
1955
1956 for (i=0; assemoutFormatDef[i].Name; i++)
1957 if(ajStrMatchCaseC(format, assemoutFormatDef[i].Name))
1958 return ajTrue;
1959
1960 return ajFalse;
1961 }
1962
1963
1964
1965
1966 /* @datasection [none] Miscellaneous functions ********************************
1967 **
1968 ** Functions to initialise and clean up internals
1969 **
1970 ** @nam2rule Assemout Assembly output internals
1971 **
1972 ******************************************************************************/
1973
1974
1975
1976
1977 /* @section Miscellaneous *****************************************************
1978 **
1979 ** Functions to initialise and clean up internals
1980 **
1981 ** @fdata [none]
1982 **
1983 ** @nam3rule Exit Clean up and exit
1984 **
1985 ** @valrule * [void]
1986 **
1987 ** @fcategory misc
1988 **
1989 ******************************************************************************/
1990
1991
1992
1993
1994 /* @func ajAssemoutExit *******************************************************
1995 **
1996 ** Cleans up assembly output internal memory
1997 **
1998 ** @return [void]
1999 **
2000 ** @release 6.4.0
2001 ** @@
2002 ******************************************************************************/
2003
ajAssemoutExit(void)2004 void ajAssemoutExit(void)
2005 {
2006 ajStrDel(&assemoutSamLinetxt);
2007
2008 return;
2009 }
2010