1 #ifndef CODE_Transcript
2 #define CODE_Transcript
3 
4 #include "IncludeDefine.h"
5 #include "Parameters.h"
6 #include "Variation.h"
7 #include "Genome.h"
8 #include <set>
9 
10 class Transcript {
11 public:
12     uint exons[MAX_N_EXONS][EX_SIZE]; //coordinates of all exons: r-start, g-start, length
13     vector <array<uint32,2>> cigar; //new way to record alignments, with CIGAR operations. For now, only used by splice-graph
14 
15     uint shiftSJ[MAX_N_EXONS][2]; //shift of the SJ coordinates due to genomic micro-repeats
16     int canonSJ[MAX_N_EXONS]; //canonicity of each junction
17     uint8 sjAnnot[MAX_N_EXONS]; //anotated or not
18     uint8 sjStr[MAX_N_EXONS]; //strand of the junction
19     uint intronMotifs[3];
20     uint8 sjMotifStrand;
21     bool sjYes;
22 
23     uint nExons; //number of exons in the read transcript
24 
25     //variables from ReadAlign
26     uint *readLengthOriginal, *readLength;
27     uint Lread, readLengthPairOriginal;
28     uint iRead; //read identifier
29     uint readNmates;
30     char *readName;
31 
32     int iFrag; //frag number of the transcript, if the the transcript contains only one frag
33 
34     //loci
35     uint rStart, roStart, rLength, gStart, gLength, cStart; //read, original read, and genomic start/length, chromosome start
36     uint Chr,Str,roStr; //chromosome and strand and original read Strand
37     uint32 haploType; //haplotype index for diploid genome
38 
39     bool primaryFlag;
40 
41     uint nMatch;//min number of matches
42     uint nMM;//max number of mismatches
43     uint mappedLength; //total mapped length, sum of lengths of all blocks(exons)
44 
45     uint extendL; //extension length
46     intScore maxScore; //maximum Score
47 
48     uint nGap, lGap; //number of genomic gaps (>alignIntronMin) and their total length
49     uint nDel; //number of genomic deletions (ie genomic gaps)
50     uint nIns; //number of (ie read gaps)
51     uint lDel; //total genomic deletion length
52     uint lIns; //total genomic insertion length
53 
54     uint nUnique, nAnchor; //number of unique pieces in the alignment, number of anchor pieces in the alignment
55 
56     vector <int32> varInd;
57     vector <int32> varGenCoord, varReadCoord ;
58     vector <char> varAllele;
59 
60     //annotations
61     std::set <uint32> alignGenes;
62 
63     Transcript(); //resets to 0
64     void reset(); //reset to 0
65     void resetMapG(); // reset map to 0
66     void resetMapG(uint); // reset map to 0 for Lread bases
67     void add(Transcript*); // add
68     intScore alignScore(char **Read1, char *G, Parameters &P);
69     int variationAdjust(const Genome &mapGen, char *R);
70     string generateCigarP(); //generates CIGAR
71     void peOverlapSEtoPE(uint* mSta, const Transcript &t);
72     void extractSpliceJunctions(vector<array<uint64,2>> &sjOut, bool &annotYes);
73 
74     uint64 chrStartLengthExtended();
75 
76     bool transformGenome(Genome &genOut, Transcript & A);
77     bool convertGenomeCigar(Genome &genOut, Transcript & A);
78 
79 private:
80 
81 };
82 
83 #endif
84