1 #include "Transcriptome.h"
2 #include "streamFuns.h"
3 #include "GlobalVariables.h"
4 #include "ErrorWarning.h"
5 #include "serviceFuns.cpp"
6
Transcriptome(Parameters & Pin)7 Transcriptome::Transcriptome (Parameters &Pin) : P(Pin){
8
9 if (!P.quant.yes)
10 return;
11
12 trInfoDir = P.pGe.sjdbGTFfile=="-" ? P.pGe.gDir : P.sjdbInsert.outDir; //if GTF file is given at the mapping stage, it's always used for transcript info
13
14 ifstream &geStream = ifstrOpen(trInfoDir+"/geneInfo.tab", ERROR_OUT, "SOLUTION: utilize --sjdbGTFfile /path/to/annotations.gtf option at the genome generation step or mapping step", P);
15 geStream >> nGe;
16 geID.resize(nGe);
17 geName.resize(nGe);
18 geBiotype.resize(nGe);
19 geStream.ignore(999,'\n');
20 for (uint ii=0;ii<nGe;ii++) {
21 string line1;
22 getline(geStream,line1);
23 istringstream stream1(line1);
24 stream1 >> geID[ii] >> geName[ii] >> geBiotype[ii];
25 };
26 geStream.close();
27
28 if ( P.quant.trSAM.yes || P.quant.gene.yes ) {//load exon-transcript structures
29 //load tr and ex info
30 ifstream & trinfo = ifstrOpen(trInfoDir+"/transcriptInfo.tab", ERROR_OUT, "SOLUTION: utilize --sjdbGTFfile /path/to/annotantions.gtf option at the genome generation step or mapping step",P);
31 trinfo >> nTr;
32 trS=new uint [nTr];
33 trE=new uint [nTr];
34 trEmax=new uint [nTr];
35 trExI=new uint32 [nTr];
36 trExN=new uint16 [nTr];
37 trStr=new uint8 [nTr];
38 trID.resize(nTr);
39 trGene=new uint32 [nTr];
40 trLen=new uint32 [nTr];
41
42 for (uint32 itr=0; itr<nTr; itr++) {
43 uint16 str1;
44 trinfo >> trID[itr] >> trS[itr] >> trE[itr] >> trEmax[itr] >> str1 >> trExN[itr] >> trExI[itr] >> trGene[itr];
45 trStr[itr]=str1;
46
47 if (!trinfo.good()) {
48 ostringstream errOut;
49 errOut <<"EXITING because of FATAL GENOME INDEX FILE error: transcriptInfo.tab is corrupt, or is incompatible with the current STAR version\n";
50 errOut <<"SOLUTION: re-generate genome index";
51 exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_GENOME_FILES, P);
52 };
53
54 };
55 P.inOut->logMain << "Loaded transcript database, nTr="<<nTr<<endl;
56 trinfo.close();
57
58 ifstream & exinfo = ifstrOpen(trInfoDir+"/exonInfo.tab", ERROR_OUT, "SOLUTION: utilize --sjdbGTFfile /path/to/annotantions.gtf option at the genome generation step or mapping step", P);
59 exinfo >> nEx;
60 exSE = new uint32 [2*nEx];
61 exLenCum = new uint32 [nEx];
62 for (uint32 iex=0; iex<nEx; iex++) {
63 exinfo >> exSE[2*iex] >> exSE[2*iex+1] >> exLenCum[iex]; //reading all elements one after another
64 };
65 for (uint32 ii=0;ii<nTr;ii++) {
66 uint32 iex1=trExI[ii]+trExN[ii]-1; //last exon of the transcript
67 trLen[ii]=exLenCum[iex1]+exSE[2*iex1+1]-exSE[2*iex1]+1;
68 };
69
70 P.inOut->logMain << "Loaded exon database, nEx="<<nEx<<endl;
71 exinfo.close();
72 };
73 //load exon-gene structures
74 if ( P.quant.geCount.yes ) {
75 ifstream & exinfo = ifstrOpen(trInfoDir+"/exonGeTrInfo.tab", ERROR_OUT, "SOLUTION: utilize --sjdbGTFfile /path/to/annotantions.gtf option at the genome generation step or mapping step", P);
76 exinfo >> exG.nEx;
77 exG.s=new uint64[exG.nEx];
78 exG.e=new uint64[exG.nEx];
79 exG.eMax=new uint64[exG.nEx];
80 exG.str=new uint8[exG.nEx];
81 exG.g=new uint32[exG.nEx];
82 exG.t=new uint32[exG.nEx];
83 for (uint ii=0;ii<exG.nEx;ii++) {
84 int str1;
85 exinfo >> exG.s[ii] >> exG.e[ii] >> str1 >> exG.g[ii] >> exG.t[ii];
86 exG.str[ii] = (uint8) str1;
87 };
88 exinfo.close();
89 //calculate eMax
90 exG.eMax[0]=exG.e[0];
91 for (uint iex=1;iex<exG.nEx;iex++) {
92 exG.eMax[iex]=max(exG.eMax[iex-1],exG.e[iex]);
93 };
94 };
95
96 if ( P.quant.geneFull.yes ) {
97 ifstream & exinfo = ifstrOpen(trInfoDir+"/exonGeTrInfo.tab", ERROR_OUT, "SOLUTION: utilize --sjdbGTFfile /path/to/annotantions.gtf option at the genome generation step or mapping step", P);
98 exinfo >> exG.nEx;
99
100 geneFull.s=new uint64[nGe];
101 geneFull.e=new uint64[nGe];
102 geneFull.eMax=new uint64[nGe];
103 geneFull.g=new uint32[nGe];
104 geneFull.str=new uint8[nGe];
105
106 for (uint ig=0;ig<nGe;ig++) {
107 geneFull.s[ig]=-1;//largest uint64
108 geneFull.e[ig]=0;
109 };
110
111 for (uint ii=0;ii<exG.nEx;ii++) {
112 uint64 s1,e1,str1,g1,t1;
113 exinfo >> s1 >> e1 >> str1 >> g1 >> t1;
114 geneFull.s[g1]=min(geneFull.s[g1],s1);
115 geneFull.e[g1]=max(geneFull.e[g1],e1);
116 geneFull.str[g1] = (uint8) str1;
117 };
118 exinfo.close();
119
120 uint64 *gF=new uint64 [4*nGe];
121 for (uint ii=0;ii<nGe;ii++) {
122 gF[4*ii] = geneFull.s[ii];
123 gF[4*ii+1] = geneFull.e[ii];
124 gF[4*ii+2] = geneFull.str[ii];
125 gF[4*ii+3] = ii;
126 };
127
128 qsort((void*) gF, nGe, sizeof(uint64)*4, funCompareArrays<uint64,2>);
129
130 for (uint ii=0;ii<nGe;ii++) {
131 geneFull.s[ii] = gF[4*ii];
132 geneFull.e[ii] = gF[4*ii+1];
133 geneFull.str[ii] = gF[4*ii+2];
134 geneFull.g[ii] = gF[4*ii+3];
135 };
136
137 //calculate eMax
138 geneFull.eMax[0]=geneFull.e[0];
139 for (uint iex=1;iex<nGe;iex++) {
140 geneFull.eMax[iex]=max(geneFull.eMax[iex-1],geneFull.e[iex]);
141 };
142 };
143
144 };
145
quantsAllocate()146 void Transcriptome::quantsAllocate() {
147 if ( P.quant.geCount.yes ) {
148 quants = new Quantifications (nGe);
149 };
150 };
151
quantsOutput()152 void Transcriptome::quantsOutput() {
153 ofstream qOut(P.quant.geCount.outFile);
154 qOut << "N_unmapped";
155 for (int itype=0; itype<quants->geneCounts.nType; itype++) {
156 qOut << "\t" <<g_statsAll.unmappedAll;
157 };
158 qOut << "\n";
159
160 qOut << "N_multimapping";
161 for (int itype=0; itype<quants->geneCounts.nType; itype++){
162 qOut << "\t" <<quants->geneCounts.cMulti;
163 };
164 qOut << "\n";
165
166 qOut << "N_noFeature";
167 for (int itype=0; itype<quants->geneCounts.nType; itype++){
168 qOut << "\t" <<quants->geneCounts.cNone[itype];
169 };
170 qOut << "\n";
171
172 qOut << "N_ambiguous";
173 for (int itype=0; itype<quants->geneCounts.nType; itype++) {
174 qOut << "\t" <<quants->geneCounts.cAmbig[itype];
175 };
176 qOut << "\n";
177
178 for (uint32 ig=0; ig<nGe; ig++) {
179 qOut << geID[ig];
180 for (int itype=0; itype<quants->geneCounts.nType; itype++) {
181 qOut << "\t" <<quants->geneCounts.gCount[itype][ig];
182 };
183 qOut << "\n";
184 };
185 qOut.close();
186 };
187