1 #include "Genome.h"
2 #include "SuffixArrayFuns.h"
3 #include "PackedArray.h"
4 #include "ErrorWarning.h"
5 #include "streamFuns.h"
6 #include "SharedMemory.h"
7 #include "genomeScanFastaFiles.h"
8 
9 #include <time.h>
10 #include <cmath>
11 #include <unistd.h>
12 #include <sys/stat.h>
13 
14 //arbitrary number for ftok function
15 #define SHM_projectID 23
16 
Genome(Parameters & P,ParametersGenome & pGe)17 Genome::Genome (Parameters &P, ParametersGenome &pGe): shmStart(NULL), P(P), pGe(pGe), sharedMemory(NULL)
18 {
19     shmKey=ftok(pGe.gDir.c_str(),SHM_projectID);
20     genomeOut.g=this;//will change if genomeOut is different from genomeMain
21     genomeOut.convYes=false;
22     sjdbOverhang = pGe.sjdbOverhang; //will be re-defined later if another value was used for the generated genome
23     sjdbLength = pGe.sjdbOverhang==0 ? 0 : pGe.sjdbOverhang*2+1;
24 };
25 
26 // Genome::~Genome()
27 // {
28 //     if (sharedMemory != NULL)
29 //         delete sharedMemory;
30 //     sharedMemory = NULL;
31 // }
32 
freeMemory()33 void Genome::freeMemory(){//free big chunks of memory used by genome and suffix array
34 
35     if (pGe.gLoad=="NoSharedMemory") {//can deallocate only for non-shared memory
36         delete[] G1;
37         G1=NULL;
38         SA.deallocateArray();
39         SApass2.deallocateArray();
40         SAi.deallocateArray();
41     };
42 };
43 
OpenStream(string name,ifstream & stream,uint size)44 uint Genome::OpenStream(string name, ifstream & stream, uint size)
45 {
46     stream.open((pGe.gDir+ "/" +name).c_str(), ios::binary);
47     if (!stream.good()) {
48         ostringstream errOut;
49         errOut << "EXITING because of FATAL ERROR: could not open genome file: "<< pGe.gDir << "/" << name <<"\n";
50         errOut << "SOLUTION: check that the path to genome files, specified in --genomeDir is correct and the files are present, and have user read permissions\n" <<flush;
51         exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_GENOME_FILES, P);
52     };
53 
54 
55     if (size>0) {
56         P.inOut->logMain << name << ": size given as a parameter = " << size <<"\n";
57     } else {
58         P.inOut->logMain << "Checking " << name << " size";
59         stream.seekg (0, ios::end);
60         int64 size1 = stream.tellg();
61         if (size1<=0) {
62             ostringstream errOut;
63             errOut << "EXITING because of FATAL ERROR: failed reading from genome file: "<< pGe.gDir << "/" << name <<"\n";
64             errOut << "SOLUTION: re-generate the genome index\n";
65             exitWithError(errOut.str(),std::cerr, P.inOut->logMain, 1, P);
66         };
67         size=(uint) size1;
68         stream.clear();
69         stream.seekg (0, ios::beg);
70         P.inOut->logMain << "file size: "<< size <<" bytes; state: good=" <<stream.good()\
71                 <<" eof="<<stream.eof()<<" fail="<<stream.fail()<<" bad="<<stream.bad()<<"\n"<<flush;
72     };
73 
74     return size;
75 };
76 
77 
78 
HandleSharedMemoryException(const SharedMemoryException & exc,uint64 shmSize)79 void Genome::HandleSharedMemoryException(const SharedMemoryException & exc, uint64 shmSize)
80 {
81     ostringstream errOut;
82     errOut << "Shared memory error: " << exc.GetErrorCode() << ", errno: " << strerror(exc.GetErrorDetail()) << "(" << errno << ")" << endl;
83 
84     int exitCode = EXIT_CODE_SHM;
85     switch (exc.GetErrorCode())
86     {
87         case EOPENFAILED:
88             errOut << "EXITING because of FATAL ERROR: problems with shared memory: error from shmget() or shm_open()." << endl << flush;
89             errOut << "SOLUTION: check shared memory settings as explained in STAR manual, OR run STAR with --genomeLoad NoSharedMemory to avoid using shared memory" << endl << flush;
90             break;
91         case EEXISTS:
92             errOut << "EXITING: fatal error from shmget() trying to allocate shared memory piece." << endl;
93             errOut << "Possible cause 1: not enough RAM. Check if you have enough RAM of at least " << shmSize+2000000000 << " bytes" << endl;
94             errOut << "Possible cause 2: not enough virtual memory allowed with ulimit. SOLUTION: run ulimit -v " <<  shmSize+2000000000 << endl;
95             errOut << "Possible cause 3: allowed shared memory size is not large enough. SOLUTIONS: (i) consult STAR manual on how to increase shared memory allocation; " \
96             "(ii) ask your system administrator to increase shared memory allocation; (iii) run STAR with --genomeLoad NoSharedMemory" << endl<<flush;
97             break;
98         case EFTRUNCATE:
99             errOut << "EXITING: fatal error from ftruncate() error shared memory."  << endl;
100             errOut << "Possible cause 1: not enough RAM. Check if you have enough RAM of at least " << shmSize+2000000000 << " bytes" << endl << flush;
101             exitCode = EXIT_CODE_MEMORY_ALLOCATION;
102             break;
103         case EMAPFAILED:
104             errOut << "EXITING because of FATAL ERROR: problems with shared memory: error from shmat() while trying to get address of the shared memory piece." << endl << flush;
105             errOut << "SOLUTION: check shared memory settings as explained in STAR manual, OR run STAR with --genomeLoad NoSharedMemory to avoid using shared memory" << endl << flush;
106             break;
107         case ECLOSE:
108             errOut << "EXITING because of FATAL ERROR: could not close the shared memory object." << endl << flush;
109             break;
110         case EUNLINK:
111             #ifdef POSIX_SHARED_MEM
112             errOut << "EXITING because of FATAL ERROR:  could not delete the shared object." << endl << flush;
113             #else
114             errOut << "EXITING because of FATAL ERROR: problems with shared memory: error from shmctl() while trying to remove shared memory piece." << endl << flush;
115             errOut << "SOLUTION: check shared memory settings as explained in STAR manual, OR run STAR with --genomeLoad NoSharedMemory to avoid using shared memory" << endl << flush;
116             #endif
117             break;
118         default:
119             errOut << "EXITING because of FATAL ERROR: There was an issue with the shared memory allocation. Try running STAR with --genomeLoad NoSharedMemory to avoid using shared memory.";
120             break;
121     }
122 
123     try
124     {
125         if (sharedMemory != NULL)
126             sharedMemory->Clean();
127     }
128     catch(...)
129     {}
130 
131     exitWithError(errOut.str(),std::cerr, P.inOut->logMain, exitCode, P);
132 };
133 
134 //////////////////////////////////////////////////////////////////////////////////////////
chrInfoLoad()135 void Genome::chrInfoLoad() {//find chrStart,Length,nChr from Genome G
136 
137     //load chr names
138     ifstream chrStreamIn ( (pGe.gDir+"/chrName.txt").c_str() );
139     if (chrStreamIn.fail()) {
140         ostringstream errOut;
141         errOut << "EXITING because of FATAL error, could not open file " << (pGe.gDir+"/chrName.txt") <<"\n";
142         errOut << "SOLUTION: re-generate genome files with STAR --runMode genomeGenerate\n";
143         exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
144     };
145 
146     char chrInChar[1000];
147 
148     while (chrStreamIn.good()) {
149         string chrIn;
150         chrStreamIn.getline(chrInChar,1000);
151         chrIn=chrInChar;
152         if (chrIn=="") break;
153         chrName.push_back(chrIn);
154     };
155     chrStreamIn.close();
156     nChrReal=chrName.size();
157 
158     P.inOut->logMain << "Number of real (reference) chromosomes= " << nChrReal <<"\n"<<flush;
159     chrStart.resize(nChrReal+1);
160     chrLength.resize(nChrReal);
161 
162     //load chr lengths
163     chrStreamIn.open( (pGe.gDir+"/chrLength.txt").c_str() );
164     if (chrStreamIn.fail()) {
165         ostringstream errOut;
166         errOut << "EXITING because of FATAL error, could not open file " << (pGe.gDir+"/chrLength.txt") <<"\n";
167         errOut << "SOLUTION: re-generate genome files with STAR --runMode genomeGenerate\n";
168         exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
169     };
170 
171     for  (uint ii=0;ii<nChrReal;ii++) {
172         chrStreamIn >> chrLength[ii];
173     };
174     chrStreamIn.close();
175 
176     //load chr starts
177     chrStreamIn.open( (pGe.gDir+"/chrStart.txt").c_str() );
178     if (chrStreamIn.fail()) {
179         ostringstream errOut;
180         errOut << "EXITING because of FATAL error, could not open file " << (pGe.gDir+"/chrStart.txt") <<"\n";
181         errOut << "SOLUTION: re-generate genome files with STAR --runMode genomeGenerate\n";
182         exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
183     };
184 
185     for  (uint ii=0;ii<=nChrReal;ii++) {
186         chrStreamIn >> chrStart[ii];
187     };
188     chrStreamIn.close();
189 
190     //log
191     for (uint ii=0; ii<nChrReal;ii++) {
192         P.inOut->logMain << ii+1 <<"\t"<< chrName[ii] <<"\t"<<chrLength[ii]<<"\t"<<chrStart[ii]<<"\n"<<flush;
193         chrNameIndex[chrName[ii]]=ii;
194     };
195 };
196 
197 //////////////////////////////////////////////////////////
chrBinFill()198 void Genome::chrBinFill() {
199     chrBinN = chrStart[nChrReal]/genomeChrBinNbases+1;
200     chrBin = new uint [chrBinN];
201     for (uint ii=0, ichr=1; ii<chrBinN; ++ii) {
202         if (ii*genomeChrBinNbases>=chrStart[ichr]) ichr++;
203         chrBin[ii]=ichr-1;
204     };
205 };
206 
207 //////////////////////////////////////////////////////////
genomeSequenceAllocate(uint64 nGenomeIn,uint64 & nG1allocOut,char * & Gout,char * & G1out)208 void Genome::genomeSequenceAllocate(uint64 nGenomeIn, uint64 &nG1allocOut, char*& Gout, char*& G1out)
209 {
210     nG1allocOut=(nGenomeIn + 100)*2; //extra 100 bytes at the beginning, just in case
211 
212     if (P.limitGenomeGenerateRAM < (nG1allocOut+nG1allocOut/3)) {//allocate nG1alloc/3 for SA generation
213         ostringstream errOut;
214         errOut <<"EXITING because of FATAL PARAMETER ERROR: limitGenomeGenerateRAM="<< (P.limitGenomeGenerateRAM) <<"is too small for your genome\n";
215         errOut <<"SOLUTION: please specify --limitGenomeGenerateRAM not less than "<< nG1allocOut+nG1allocOut/3 <<" and make that much RAM available \n";
216         exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
217     };
218 
219     G1out=new char[nG1allocOut];
220     Gout=G1out+100;
221 
222     memset(G1out,GENOME_spacingChar,nG1allocOut);//initialize to K-1 all bytes
223 };
224