1 #include "Genome.h"
2 #include "SuffixArrayFuns.h"
3 #include "PackedArray.h"
4 #include "ErrorWarning.h"
5 #include "streamFuns.h"
6 #include "SharedMemory.h"
7 #include "genomeScanFastaFiles.h"
8 
9 //addresses with respect to shmStart of several genome values
10 #define SHM_sizeG 0
11 #define SHM_sizeSA 8
12 #define SHM_startG 16
13 // #define SHM_startSA 24
14 //
15 // //first available byt of the shm
16 // #define SHM_startSHM 32
17 
genomeLoad()18 void Genome::genomeLoad(){//allocate and load Genome
19 
20     time_t rawtime;
21     time ( &rawtime );
22     *(P.inOut->logStdOut) << timeMonthDayTime(rawtime) << " ..... loading genome\n" <<flush;
23 
24     uint *shmNG=NULL, *shmNSA=NULL;   //pointers to shm stored values , *shmSG, *shmSSA
25     uint64 shmSize=0;//, shmStartG=0; shmStartSA=0;
26 
27     uint L=200,K=6;
28 
29     Parameters P1;
30 
31     //some initializations before reading the parameters
32     GstrandBit=0;
33 
34     ifstream parFile((pGe.gDir+("/genomeParameters.txt")).c_str());
35     if (parFile.good()) {
36         P.inOut->logMain << "Reading genome generation parameters:\n";
37 
38         //read genome internal parameters
39         while (parFile.good()) {
40             string word1;
41             parFile >> word1;
42             if (word1=="###") {
43                 parFile >> word1;
44                 if (word1=="GstrandBit") {
45                     uint gsb1=0;
46                     parFile >> gsb1;
47                     GstrandBit=(uint8) gsb1;
48                     P.inOut->logMain << "### GstrandBit=" << (uint) GstrandBit <<"\n";
49                 } else {
50                     P.inOut->logMain << "### " <<word1;
51                     getline(parFile,word1);
52                     P.inOut->logMain <<word1<<"\n";
53                 };
54             };
55         };
56         parFile.clear();
57         parFile.seekg(0,ios::beg);//rewind
58 
59 
60         P1.inOut = P.inOut;
61         P1.scanAllLines(parFile,3,-1);
62         parFile.close();
63     } else {
64         ostringstream errOut;
65         errOut << "EXITING because of FATAL ERROR: could not open genome file "<< pGe.gDir+("/genomeParameters.txt") << endl;
66         errOut << "SOLUTION: check that the path to genome files, specified in --genomeDir is correct and the files are present, and have user read permsissions\n" <<flush;
67         exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_GENOME_FILES, P);
68     };
69 
70     //check genome version
71     if (P1.versionGenome.size()==0) {//
72         ostringstream errOut;
73         errOut << "EXITING because of FATAL ERROR: read no value for the versionGenome parameter from genomeParameters.txt file\n";
74         errOut << "SOLUTION: please re-generate genome from scratch with the latest version of STAR\n";
75         exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_GENOME_FILES, P);
76     } else if (P1.versionGenome == P.versionGenome) {//
77         P.inOut->logMain << "Genome version is compatible with current STAR\n";
78     } else {
79         ostringstream errOut;
80         errOut << "EXITING because of FATAL ERROR: Genome version: " << P1.versionGenome << " is INCOMPATIBLE with running STAR version: "<< STAR_VERSION <<"\n";
81         errOut << "SOLUTION: please re-generate genome from scratch with running version of STAR, or with version: " << P.versionGenome <<"\n";
82         exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_GENOME_FILES, P);
83     };
84 
85     //find chr starts from files
86     chrInfoLoad();
87 
88     //check if sjdbInfo.txt exists => genome was generated with junctions
89     bool sjdbInfoExists=false;
90     struct stat sjdb1;
91     if ( stat( (pGe.gDir+"/sjdbInfo.txt").c_str(), &sjdb1) == 0 ) {//file exists
92         sjdbInfoExists=true;
93     };
94 
95     if ( P.sjdbInsert.yes && sjdbInfoExists && P1.pGe.sjdbInsertSave=="") {
96         //if sjdbInsert, and genome had junctions, and genome is old - it should be re-generated with new STAR
97         ostringstream errOut;
98         errOut << "EXITING because of FATAL ERROR: old Genome is INCOMPATIBLE with on the fly junction insertion\n";
99         errOut << "SOLUTION: please re-generate genome from scratch with the latest version of STAR\n";
100         exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_GENOME_FILES, P);
101     };
102 
103     //record required genome parameters in P
104     pGe.gSAindexNbases=P1.pGe.gSAindexNbases;
105     pGe.gChrBinNbits=P1.pGe.gChrBinNbits;
106     genomeChrBinNbases=1LLU<<pGe.gChrBinNbits;
107     pGe.gSAsparseD=P1.pGe.gSAsparseD;
108 
109     if (P1.pGe.gFileSizes.size()>0){//genomeFileSize was recorded in the genomeParameters file, copy the values to P
110         pGe.gFileSizes = P1.pGe.gFileSizes;
111     };
112 
113     if (P.parArray.at(pGe.sjdbOverhang_par)->inputLevel==0 && P1.pGe.sjdbOverhang>0) {
114         //if --sjdbOverhang was not defined by user and it was defined >0 at the genome generation step, then use pGe.sjdbOverhang from the genome generation step
115         pGe.sjdbOverhang=P1.pGe.sjdbOverhang;
116         P.inOut->logMain << "--sjdbOverhang = " << pGe.sjdbOverhang << " taken from the generated genome\n";
117     } else if (sjdbInfoExists && P.parArray.at(pGe.sjdbOverhang_par)->inputLevel>0 && pGe.sjdbOverhang!=P1.pGe.sjdbOverhang) {
118         //if pGe.sjdbOverhang was defined at the genome generation step,the mapping step value has to agree with it
119         ostringstream errOut;
120         errOut << "EXITING because of fatal PARAMETERS error: present --sjdbOverhang="<<pGe.sjdbOverhang << " is not equal to the value at the genome generation step ="<< P1.pGe.sjdbOverhang << "\n";
121         errOut << "SOLUTION: \n" <<flush;
122         exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_GENOME_FILES, P);
123     };
124     sjdbOverhang = pGe.sjdbOverhang;
125     sjdbLength = pGe.sjdbOverhang==0 ? 0 : pGe.sjdbOverhang*2+1;
126 
127     pGe.gType=1;
128     if (P1.pGe.gTypeString=="SuperTranscriptome")
129         pGe.gType=101;
130 
131     P.inOut->logMain << "Started loading the genome: " << asctime (localtime ( &rawtime ))<<"\n"<<flush;
132 
133     ifstream GenomeIn, SAin, SAiIn;
134 
135     if (pGe.gFileSizes.size() < 2) {//no size info available
136         pGe.gFileSizes.push_back(0);
137         pGe.gFileSizes.push_back(0);
138     };
139     nGenome = OpenStream("Genome",GenomeIn,pGe.gFileSizes.at(0));
140     nSAbyte = OpenStream("SA",SAin,pGe.gFileSizes.at(1));
141     OpenStream("SAindex",SAiIn,1); //we do not need SAiIn siz, using a dummy value here to prevent from reading its size from the disk
142 
143     uint SAiInBytes=0;
144     SAiInBytes += fstreamReadBig(SAiIn,(char*) &pGe.gSAindexNbases, sizeof(pGe.gSAindexNbases));
145     genomeSAindexStart = new uint[pGe.gSAindexNbases+1];
146     SAiInBytes += fstreamReadBig(SAiIn,(char*) genomeSAindexStart, sizeof(genomeSAindexStart[0])*(pGe.gSAindexNbases+1));
147     nSAi=genomeSAindexStart[pGe.gSAindexNbases];
148     P.inOut->logMain << "Read from SAindex: pGe.gSAindexNbases=" << pGe.gSAindexNbases <<"  nSAi="<< nSAi <<endl;
149 
150     /////////////////////////////////// at this point all array sizes should be known: calculate packed array lengths
151     if (GstrandBit==0) {//not defined before
152         GstrandBit = (uint) floor(log(nGenome)/log(2))+1;
153         if (GstrandBit<32)
154             GstrandBit=32; //TODO: use simple access function for SA
155     };
156 
157     GstrandMask = ~(1LLU<<GstrandBit);
158     nSA=(nSAbyte*8)/(GstrandBit+1);
159     SA.defineBits(GstrandBit+1,nSA);
160 
161     SAiMarkNbit=GstrandBit+1;
162     SAiMarkAbsentBit=GstrandBit+2;
163 
164     SAiMarkNmaskC=1LLU << SAiMarkNbit;
165     SAiMarkNmask=~SAiMarkNmaskC;
166     SAiMarkAbsentMaskC=1LLU << SAiMarkAbsentBit;
167     SAiMarkAbsentMask=~SAiMarkAbsentMaskC;
168 
169     SAi.defineBits(GstrandBit+3,nSAi);
170 
171     P.inOut->logMain << "nGenome=" << nGenome << ";  nSAbyte=" << nSAbyte <<endl<< flush;
172     P.inOut->logMain <<"GstrandBit="<<int(GstrandBit)<<"   SA number of indices="<<nSA<<endl<<flush;
173 
174     shmSize=SA.lengthByte + nGenome+L+L+SHM_startG+8;
175     shmSize+= SAi.lengthByte;
176 
177     if ((pGe.gLoad=="LoadAndKeep" ||
178          pGe.gLoad=="LoadAndRemove" ||
179          pGe.gLoad=="LoadAndExit" ||
180          pGe.gLoad=="Remove") && sharedMemory == NULL) {
181 
182         bool unloadLast = pGe.gLoad=="LoadAndRemove";
183         try {
184             sharedMemory = new SharedMemory(shmKey, unloadLast);
185             sharedMemory->SetErrorStream(P.inOut->logStdOut);
186 
187             if (!sharedMemory->NeedsAllocation())
188             P.inOut->logMain <<"Found genome in shared memory\n"<<flush;
189 
190             if (pGe.gLoad=="Remove") {//kill the genome and exit
191                 if (sharedMemory->NeedsAllocation()) {//did not find genome in shared memory, nothing to kill
192                 ostringstream errOut;
193                 errOut << "EXITING: Did not find the genome in memory, did not remove any genomes from shared memory\n";
194                 exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_GENOME_FILES, P);
195                 } else {
196                             sharedMemory->Clean();
197                     P.inOut->logMain <<"DONE: removed the genome from shared memory\n"<<flush;
198                             return;
199                 };
200             };
201 
202             if (sharedMemory->NeedsAllocation()){
203                 P.inOut->logMain <<"Allocating shared memory for genome\n"<<flush;
204                 sharedMemory->Allocate(shmSize);
205             };
206         } catch (const SharedMemoryException & exc){
207             HandleSharedMemoryException(exc, shmSize);
208         };
209 
210         shmStart = (char*) sharedMemory->GetMapped();
211         shmNG= (uint*) (shmStart+SHM_sizeG);
212         shmNSA= (uint*) (shmStart+SHM_sizeSA);
213 
214         if (!sharedMemory->IsAllocator()) {
215             // genome is in shared memory or being loaded
216             // wait for the process that will populate it
217             // and record the sizes
218             uint iwait=0;
219             while (*shmNG != nGenome) {
220                 iwait++;
221                 P.inOut->logMain <<"Another job is still loading the genome, sleeping for 1 min\n" <<flush;
222                 sleep(60);
223                 if (iwait==100) {
224                     ostringstream errOut;
225                     errOut << "EXITING because of FATAL ERROR: waited too long for the other job to finish loading the genome" << strerror(errno) << "\n" <<flush;
226                     errOut << "SOLUTION: remove the shared memory chunk by running STAR with --genomeLoad Remove, and restart STAR" <<flush;
227                     exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_GENOME_LOADING_WAITED_TOO_LONG, P);
228                 };
229             };
230             if (nSAbyte!=*shmNSA){
231                 ostringstream errOut;
232                 errOut << "EXITING because of FATAL ERROR: the SA file size did not match what we found in shared memory" << "\n" << flush;
233                 errOut << "SOLUTION: remove the shared memory chunk by running STAR with --genomeLoad Remove, and restart STAR" << flush;
234                 exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INCONSISTENT_DATA, P);
235             };
236             P.inOut->logMain << "Using shared memory for genome. key=0x" <<hex<<shmKey<<dec<< ";   shmid="<< sharedMemory->GetId() <<endl<<flush;
237         };
238         G1=shmStart+SHM_startG;
239         SA.pointArray(G1+nGenome+L+L);
240         char* shmNext=SA.charArray+nSAbyte;
241 
242         SAi.pointArray(shmNext);
243         shmNext += SAi.lengthByte;
244     } else if (pGe.gLoad=="NoSharedMemory") {// simply allocate memory, do not use shared memory
245         genomeInsertL=0;
246         genomeInsertChrIndFirst=nChrReal;
247         if (pGe.gFastaFiles.at(0)!="-") {//will insert sequences in the genome, now estimate the extra size
248            uint oldlen=chrStart.back();//record the old length
249            genomeInsertL=genomeScanFastaFiles(P, G, false, *this)-oldlen;
250         };
251 
252         try {
253             if (P.sjdbInsert.pass1 || P.sjdbInsert.pass2) {
254                 //reserve extra memory for insertion at the 1st and/or 2nd step
255                 nGenomeInsert=nGenome+genomeInsertL;
256                 nSAinsert=nSA+2*genomeInsertL;
257 
258                 nGenomePass1=nGenomeInsert;
259                 nSApass1=nSAinsert;
260                 if (P.sjdbInsert.pass1) {
261                     nGenomePass1+=P.limitSjdbInsertNsj*sjdbLength;
262                     nSApass1+=2*P.limitSjdbInsertNsj*sjdbLength;
263                 };
264 
265                 nGenomePass2=nGenomePass1;
266                 nSApass2=nSApass1;
267                 if (P.sjdbInsert.pass2) {
268                     nGenomePass2+=P.limitSjdbInsertNsj*sjdbLength;
269                     nSApass2+=2*P.limitSjdbInsertNsj*sjdbLength;
270                 };
271 
272                 G1=new char[nGenomePass2+L+L];
273 
274                 SApass2.defineBits(GstrandBit+1,nSApass2);
275                 SApass2.allocateArray();
276 
277                 SApass1.defineBits(GstrandBit+1,nSApass1);
278                 SApass1.pointArray(SApass2.charArray+SApass2.lengthByte-SApass1.lengthByte);
279 
280                 SAinsert.defineBits(GstrandBit+1,nSAinsert);
281                 SAinsert.pointArray(SApass1.charArray+SApass1.lengthByte-SAinsert.lengthByte);
282 
283                 SA.pointArray(SAinsert.charArray+SAinsert.lengthByte-SA.lengthByte);
284             } else {//no sjdb insertions
285                 if (genomeInsertL==0) {// no sequence insertion, simple allocation
286                     G1=new char[nGenome+L+L];
287                     SA.allocateArray();
288                 } else {
289                     G1=new char[nGenome+L+L+genomeInsertL];
290                     SAinsert.defineBits(GstrandBit+1,nSA+2*genomeInsertL);//TODO: re-define GstrandBit if necessary
291                     SAinsert.allocateArray();
292                     SA.pointArray(SAinsert.charArray+SAinsert.lengthByte-SA.lengthByte);
293                 };
294             };
295             SAi.allocateArray();
296             P.inOut->logMain <<"Shared memory is not used for genomes. Allocated a private copy of the genome.\n"<<flush;
297         } catch (exception & exc) {
298             ostringstream errOut;
299             errOut <<"EXITING: fatal error trying to allocate genome arrays, exception thrown: "<<exc.what()<<endl;
300             errOut <<"Possible cause 1: not enough RAM. Check if you have enough RAM " << nGenome+L+L+SA.lengthByte+SAi.lengthByte+2000000000 << " bytes\n";
301             errOut <<"Possible cause 2: not enough virtual memory allowed with ulimit. SOLUTION: run ulimit -v " <<  nGenome+L+L+SA.lengthByte+SAi.lengthByte+2000000000<<endl <<flush;
302             exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_MEMORY_ALLOCATION, P);
303         };
304     };
305 
306     G=G1+L;
307 
308     bool isAllocatorProcess = sharedMemory != NULL && sharedMemory->IsAllocator();
309 
310     if (pGe.gLoad=="NoSharedMemory" || isAllocatorProcess) {//load genome and SAs from files
311         //load genome
312         P.inOut->logMain <<"Genome file size: "<<nGenome <<" bytes; state: good=" <<GenomeIn.good()\
313                 <<" eof="<<GenomeIn.eof()<<" fail="<<GenomeIn.fail()<<" bad="<<GenomeIn.bad()<<"\n"<<flush;
314         P.inOut->logMain <<"Loading Genome ... " << flush;
315         uint64 genomeReadBytesN=fstreamReadBig(GenomeIn,G,nGenome);
316         P.inOut->logMain <<"done! state: good=" <<GenomeIn.good()\
317                          <<" eof="<<GenomeIn.eof()<<" fail="<<GenomeIn.fail()<<" bad="<<GenomeIn.bad()<<"; loaded "<<genomeReadBytesN<<" bytes\n" << flush;
318         GenomeIn.close();
319 
320         for (uint ii=0;ii<L;ii++) {// attach a tail with the largest symbol
321             G1[ii]=K-1;
322             G[nGenome+ii]=K-1;
323         };
324 
325         //load SAs
326         P.inOut->logMain <<"SA file size: "<<SA.lengthByte <<" bytes; state: good=" <<SAin.good()\
327                 <<" eof="<<SAin.eof()<<" fail="<<SAin.fail()<<" bad="<<SAin.bad()<<"\n"<<flush;
328         P.inOut->logMain <<"Loading SA ... " << flush;
329         genomeReadBytesN=fstreamReadBig(SAin,SA.charArray, SA.lengthByte);
330         P.inOut->logMain <<"done! state: good=" <<SAin.good()\
331                 <<" eof="<<SAin.eof()<<" fail="<<SAin.fail()<<" bad="<<SAin.bad()<<"; loaded "<<genomeReadBytesN<<" bytes\n" << flush;
332         SAin.close();
333 
334         P.inOut->logMain <<"Loading SAindex ... " << flush;
335         SAiInBytes +=fstreamReadBig(SAiIn,SAi.charArray, SAi.lengthByte);
336         P.inOut->logMain <<"done: "<<SAiInBytes<<" bytes\n" << flush;
337     };
338 
339     SAiIn.close();
340 
341     if ((pGe.gLoad=="LoadAndKeep" ||
342          pGe.gLoad=="LoadAndRemove" ||
343          pGe.gLoad=="LoadAndExit") && isAllocatorProcess )
344     {
345         //record sizes. This marks the end of genome loading
346         *shmNG=nGenome;
347         *shmNSA=nSAbyte;
348     };
349 
350     time ( &rawtime );
351     P.inOut->logMain << "Finished loading the genome: " << asctime (localtime ( &rawtime )) <<"\n"<<flush;
352 
353     #ifdef COMPILE_FOR_MAC
354     {
355         uint sum1=0;
356         for (uint ii=0;ii<nGenome; ii++) sum1 +=  (uint) (unsigned char) G[ii];
357         P.inOut->logMain << "Sum of all Genome bytes: " <<sum1 <<"\n"<<flush;
358         sum1=0;
359         for (uint ii=0;ii<SA.lengthByte; ii++) sum1 +=  (uint) (unsigned char) SA.charArray[ii];
360         P.inOut->logMain << "Sum of all SA bytes: " <<sum1 <<"\n"<<flush;
361         sum1=0;
362         for (uint ii=0;ii<SAi.lengthByte; ii++) sum1 +=  (uint) (unsigned char) SAi.charArray[ii];
363         P.inOut->logMain << "Sum of all SAi bytes: " <<sum1 <<"\n"<<flush;
364     };
365     #endif
366 
367     if (pGe.gLoad=="LoadAndExit") {
368 	uint shmSum=0;
369 	for (uint ii=0;ii<shmSize;ii++) shmSum+=shmStart[ii];
370         P.inOut->logMain << "pGe.gLoad=LoadAndExit: completed, the genome is loaded and kept in RAM, EXITING now.\n"<<flush;
371         return;
372     };
373 
374     Genome::insertSequences();
375 
376     Genome::chrBinFill();
377 
378     Genome::loadSJDB(pGe.gDir);
379 
380     //check and redefine some parameters
381     //max intron size
382     if (P.alignIntronMax==0 && P.alignMatesGapMax==0) {
383         P.inOut->logMain << "alignIntronMax=alignMatesGapMax=0, the max intron size will be approximately determined by (2^winBinNbits)*winAnchorDistNbins=" \
384                 << (1LLU<<P.winBinNbits)*P.winAnchorDistNbins <<endl;
385     } else {
386         //redefine winBinNbits
387         P.winBinNbits = (uint) floor( log2( max( max(4LLU,P.alignIntronMax), (P.alignMatesGapMax==0 ? 1000LLU : P.alignMatesGapMax) ) /4 ) + 0.5);
388         P.winBinNbits = max( P.winBinNbits, (uint) floor(log2(nGenome/40000+1)+0.5) );
389         //ISSUE - to be fixed in STAR3: if alignIntronMax>0 but alignMatesGapMax==0, winBinNbits will be defined by alignIntronMax
390         P.inOut->logMain << "To accommodate alignIntronMax="<<P.alignIntronMax<<" redefined winBinNbits="<< P.winBinNbits <<endl;
391     };
392 
393     if (P.winBinNbits > pGe.gChrBinNbits) {
394        P.inOut->logMain << "winBinNbits=" <<P.winBinNbits <<" > " << "pGe.gChrBinNbits=" << pGe.gChrBinNbits << "   redefining:\n";
395        P.winBinNbits=pGe.gChrBinNbits;
396        P.inOut->logMain << "winBinNbits=" <<P.winBinNbits <<endl;
397     };
398 
399 
400     if (P.alignIntronMax==0 && P.alignMatesGapMax==0) {
401     } else {
402         //redefine winFlankNbins,winAnchorDistNbins
403         P.winFlankNbins=max(P.alignIntronMax,P.alignMatesGapMax)/(1LLU<<P.winBinNbits)+1;
404         P.winAnchorDistNbins=2*P.winFlankNbins;
405         P.inOut->logMain << "To accommodate alignIntronMax="<<P.alignIntronMax<<" and alignMatesGapMax="<<P.alignMatesGapMax<<\
406                 ", redefined winFlankNbins="<<P.winFlankNbins<<" and winAnchorDistNbins="<<P.winAnchorDistNbins<<endl;
407     };
408 
409     P.winBinChrNbits=pGe.gChrBinNbits-P.winBinNbits;
410     P.winBinN = nGenome/(1LLU << P.winBinNbits)+1;//this may be changed later
411 
412     if (pGe.gType==101) {//SuperTranscriptome
413 		superTr = new SuperTranscriptome (P);
414         superTr->load(G, chrStart, chrLength);
415 
416         //genomeOut
417         P.pGeOut.gDir=pGe.gDir+"/fullGenome/";
418         genomeOut.convYes=true;
419         genomeOut.gapsAreJunctions=true;
420         genomeOut.convFile=pGe.gDir+"/fullGenome/conversionToFullGenome.tsv";
421 
422         genomeOut.g = new Genome(P,P.pGeOut);
423         genomeOut.g->genomeOut=genomeOut;
424         genomeOut.g->genomeOutLoad();
425 
426         //debug checks
427 //         for (auto &b : genomeOut.g->genomeOut.convBlocks) {
428 //             uint64 g1=*(genomeOut.g->G+b[2]);
429 //             uint64 g2=*(G+b[0]);
430 //             if (g1!=g2)
431 //                 cout <<b[0]<<" "<<b[1]<<" "<<b[2]<<endl;
432 //         };
433     };
434 
435 
436     if (P1.pGe.transform.typeString=="Haploid") {
437         pGe.transform.type=1;
438     } else if (P1.pGe.transform.typeString=="Diploid") {
439         pGe.transform.type=2;
440     } else {//TODO check for wrong values
441         pGe.transform.type=0;
442     };
443 
444     if (pGe.transform.outYes) {
445         if (pGe.transform.type == 0) {
446             exitWithError("EXITING because of FATAL INPUT ERROR: outTransformOutput is set, but the genome " +pGe.gDir + " was generated without transformation\n"
447                            + "SOLUTION: use the default--outTransformOutput None, or re-generate the genome with transformation options.\n"
448                           ,std::cerr, P.inOut->logMain, EXIT_CODE_MEMORY_ALLOCATION, P);
449 
450         } else {//transform genome coordinates
451             //genomeOut
452             P.pGeOut.gDir=pGe.gDir+"/OriginalGenome/";
453             genomeOut.convYes=true;
454             genomeOut.gapsAreJunctions=false;
455             genomeOut.convFile=pGe.gDir+"/transformGenomeBlocks.tsv";
456 
457 
458             genomeOut.g = new Genome(P,P.pGeOut);
459             genomeOut.g->genomeOut=genomeOut;
460             genomeOut.g->genomeOutLoad();
461         };
462     };
463 
464     if (P.pGe.gLoad=="LoadAndExit" || P.pGe.gLoad=="Remove") {
465         exit(0);
466     };
467 };
468 
469 ////////////////////////////////////////////////////////////////////////////////////////////////////////////
470 //
loadSJDB(string & genDir)471 void Genome::loadSJDB(string &genDir)
472 {
473     //splice junctions database
474     if (nGenome==chrStart[nChrReal]) {//no sjdb
475         sjdbN=0;
476         sjGstart=chrStart[nChrReal]+1; //not sure why I need that
477     } else {//there are sjdb chromosomes
478         ifstream sjdbInfo((genDir+"/sjdbInfo.txt").c_str());
479         if (sjdbInfo.fail()) {
480             ostringstream errOut;
481             errOut << "EXITING because of FATAL error, could not open file " << (genDir+"/sjdbInfo.txt") <<"\n";
482             errOut << "SOLUTION: check that the path to genome files, specified in --genomeDir is correct and the files are present, and have user read permsissions\n" <<flush;
483             exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
484         };
485 
486 
487         sjdbInfo >> sjdbN >> pGe.sjdbOverhang;
488         P.inOut->logMain << "Processing splice junctions database sjdbN=" <<sjdbN<<",   pGe.sjdbOverhang=" <<pGe.sjdbOverhang <<" \n";
489 
490         sjChrStart=nChrReal;
491         sjGstart=chrStart[sjChrStart];
492 
493         //fill the sj-db to genome translation array
494         sjDstart=new uint [sjdbN];
495         sjAstart=new uint [sjdbN];
496         sjdbStart=new uint [sjdbN];
497         sjdbEnd=new uint [sjdbN];
498 
499         sjdbMotif=new uint8 [sjdbN];
500         sjdbShiftLeft=new uint8 [sjdbN];
501         sjdbShiftRight=new uint8 [sjdbN];
502         sjdbStrand=new uint8 [sjdbN];
503 
504         for (uint ii=0;ii<sjdbN;ii++) {//get the info about junctions from sjdbInfo.txt
505             {
506                 uint16 d1,d2,d3,d4;
507                 sjdbInfo >> sjdbStart[ii] >> sjdbEnd[ii] >> d1 >> d2 >> d3 >> d4;
508                 sjdbMotif[ii]      = (uint8) d1;
509                 sjdbShiftLeft[ii]  = (uint8) d2;
510                 sjdbShiftRight[ii] = (uint8) d3;
511                 sjdbStrand[ii] = (uint8) d4;
512             };
513             sjDstart[ii]   = sjdbStart[ii]  - pGe.sjdbOverhang;
514             sjAstart[ii]   = sjdbEnd[ii] + 1;
515             if (sjdbMotif[ii]==0) {//shinon-canonical junctions back to their true coordinates
516                 sjDstart[ii] += sjdbShiftLeft[ii];
517                 sjAstart[ii] += sjdbShiftLeft[ii];
518             };
519         };
520     };
521 };