1 #include "Genome.h"
2 #include "SuffixArrayFuns.h"
3 #include "PackedArray.h"
4 #include "ErrorWarning.h"
5 #include "streamFuns.h"
6 #include "SharedMemory.h"
7 #include "genomeScanFastaFiles.h"
8
9 #include <time.h>
10 #include <cmath>
11 #include <unistd.h>
12 #include <sys/stat.h>
13
14 //arbitrary number for ftok function
15 #define SHM_projectID 23
16
Genome(Parameters & P,ParametersGenome & pGe)17 Genome::Genome (Parameters &P, ParametersGenome &pGe): shmStart(NULL), P(P), pGe(pGe), sharedMemory(NULL)
18 {
19 shmKey=ftok(pGe.gDir.c_str(),SHM_projectID);
20 genomeOut.g=this;//will change if genomeOut is different from genomeMain
21 genomeOut.convYes=false;
22 sjdbOverhang = pGe.sjdbOverhang; //will be re-defined later if another value was used for the generated genome
23 sjdbLength = pGe.sjdbOverhang==0 ? 0 : pGe.sjdbOverhang*2+1;
24 };
25
26 // Genome::~Genome()
27 // {
28 // if (sharedMemory != NULL)
29 // delete sharedMemory;
30 // sharedMemory = NULL;
31 // }
32
freeMemory()33 void Genome::freeMemory(){//free big chunks of memory used by genome and suffix array
34
35 if (pGe.gLoad=="NoSharedMemory") {//can deallocate only for non-shared memory
36 delete[] G1;
37 G1=NULL;
38 SA.deallocateArray();
39 SApass2.deallocateArray();
40 SAi.deallocateArray();
41 };
42 };
43
OpenStream(string name,ifstream & stream,uint size)44 uint Genome::OpenStream(string name, ifstream & stream, uint size)
45 {
46 stream.open((pGe.gDir+ "/" +name).c_str(), ios::binary);
47 if (!stream.good()) {
48 ostringstream errOut;
49 errOut << "EXITING because of FATAL ERROR: could not open genome file: "<< pGe.gDir << "/" << name <<"\n";
50 errOut << "SOLUTION: check that the path to genome files, specified in --genomeDir is correct and the files are present, and have user read permissions\n" <<flush;
51 exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_GENOME_FILES, P);
52 };
53
54
55 if (size>0) {
56 P.inOut->logMain << name << ": size given as a parameter = " << size <<"\n";
57 } else {
58 P.inOut->logMain << "Checking " << name << " size";
59 stream.seekg (0, ios::end);
60 int64 size1 = stream.tellg();
61 if (size1<=0) {
62 ostringstream errOut;
63 errOut << "EXITING because of FATAL ERROR: failed reading from genome file: "<< pGe.gDir << "/" << name <<"\n";
64 errOut << "SOLUTION: re-generate the genome index\n";
65 exitWithError(errOut.str(),std::cerr, P.inOut->logMain, 1, P);
66 };
67 size=(uint) size1;
68 stream.clear();
69 stream.seekg (0, ios::beg);
70 P.inOut->logMain << "file size: "<< size <<" bytes; state: good=" <<stream.good()\
71 <<" eof="<<stream.eof()<<" fail="<<stream.fail()<<" bad="<<stream.bad()<<"\n"<<flush;
72 };
73
74 return size;
75 };
76
77
78
HandleSharedMemoryException(const SharedMemoryException & exc,uint64 shmSize)79 void Genome::HandleSharedMemoryException(const SharedMemoryException & exc, uint64 shmSize)
80 {
81 ostringstream errOut;
82 errOut << "Shared memory error: " << exc.GetErrorCode() << ", errno: " << strerror(exc.GetErrorDetail()) << "(" << errno << ")" << endl;
83
84 int exitCode = EXIT_CODE_SHM;
85 switch (exc.GetErrorCode())
86 {
87 case EOPENFAILED:
88 errOut << "EXITING because of FATAL ERROR: problems with shared memory: error from shmget() or shm_open()." << endl << flush;
89 errOut << "SOLUTION: check shared memory settings as explained in STAR manual, OR run STAR with --genomeLoad NoSharedMemory to avoid using shared memory" << endl << flush;
90 break;
91 case EEXISTS:
92 errOut << "EXITING: fatal error from shmget() trying to allocate shared memory piece." << endl;
93 errOut << "Possible cause 1: not enough RAM. Check if you have enough RAM of at least " << shmSize+2000000000 << " bytes" << endl;
94 errOut << "Possible cause 2: not enough virtual memory allowed with ulimit. SOLUTION: run ulimit -v " << shmSize+2000000000 << endl;
95 errOut << "Possible cause 3: allowed shared memory size is not large enough. SOLUTIONS: (i) consult STAR manual on how to increase shared memory allocation; " \
96 "(ii) ask your system administrator to increase shared memory allocation; (iii) run STAR with --genomeLoad NoSharedMemory" << endl<<flush;
97 break;
98 case EFTRUNCATE:
99 errOut << "EXITING: fatal error from ftruncate() error shared memory." << endl;
100 errOut << "Possible cause 1: not enough RAM. Check if you have enough RAM of at least " << shmSize+2000000000 << " bytes" << endl << flush;
101 exitCode = EXIT_CODE_MEMORY_ALLOCATION;
102 break;
103 case EMAPFAILED:
104 errOut << "EXITING because of FATAL ERROR: problems with shared memory: error from shmat() while trying to get address of the shared memory piece." << endl << flush;
105 errOut << "SOLUTION: check shared memory settings as explained in STAR manual, OR run STAR with --genomeLoad NoSharedMemory to avoid using shared memory" << endl << flush;
106 break;
107 case ECLOSE:
108 errOut << "EXITING because of FATAL ERROR: could not close the shared memory object." << endl << flush;
109 break;
110 case EUNLINK:
111 #ifdef POSIX_SHARED_MEM
112 errOut << "EXITING because of FATAL ERROR: could not delete the shared object." << endl << flush;
113 #else
114 errOut << "EXITING because of FATAL ERROR: problems with shared memory: error from shmctl() while trying to remove shared memory piece." << endl << flush;
115 errOut << "SOLUTION: check shared memory settings as explained in STAR manual, OR run STAR with --genomeLoad NoSharedMemory to avoid using shared memory" << endl << flush;
116 #endif
117 break;
118 default:
119 errOut << "EXITING because of FATAL ERROR: There was an issue with the shared memory allocation. Try running STAR with --genomeLoad NoSharedMemory to avoid using shared memory.";
120 break;
121 }
122
123 try
124 {
125 if (sharedMemory != NULL)
126 sharedMemory->Clean();
127 }
128 catch(...)
129 {}
130
131 exitWithError(errOut.str(),std::cerr, P.inOut->logMain, exitCode, P);
132 };
133
134 //////////////////////////////////////////////////////////////////////////////////////////
chrInfoLoad()135 void Genome::chrInfoLoad() {//find chrStart,Length,nChr from Genome G
136
137 //load chr names
138 ifstream chrStreamIn ( (pGe.gDir+"/chrName.txt").c_str() );
139 if (chrStreamIn.fail()) {
140 ostringstream errOut;
141 errOut << "EXITING because of FATAL error, could not open file " << (pGe.gDir+"/chrName.txt") <<"\n";
142 errOut << "SOLUTION: re-generate genome files with STAR --runMode genomeGenerate\n";
143 exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
144 };
145
146 char chrInChar[1000];
147
148 while (chrStreamIn.good()) {
149 string chrIn;
150 chrStreamIn.getline(chrInChar,1000);
151 chrIn=chrInChar;
152 if (chrIn=="") break;
153 chrName.push_back(chrIn);
154 };
155 chrStreamIn.close();
156 nChrReal=chrName.size();
157
158 P.inOut->logMain << "Number of real (reference) chromosomes= " << nChrReal <<"\n"<<flush;
159 chrStart.resize(nChrReal+1);
160 chrLength.resize(nChrReal);
161
162 //load chr lengths
163 chrStreamIn.open( (pGe.gDir+"/chrLength.txt").c_str() );
164 if (chrStreamIn.fail()) {
165 ostringstream errOut;
166 errOut << "EXITING because of FATAL error, could not open file " << (pGe.gDir+"/chrLength.txt") <<"\n";
167 errOut << "SOLUTION: re-generate genome files with STAR --runMode genomeGenerate\n";
168 exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
169 };
170
171 for (uint ii=0;ii<nChrReal;ii++) {
172 chrStreamIn >> chrLength[ii];
173 };
174 chrStreamIn.close();
175
176 //load chr starts
177 chrStreamIn.open( (pGe.gDir+"/chrStart.txt").c_str() );
178 if (chrStreamIn.fail()) {
179 ostringstream errOut;
180 errOut << "EXITING because of FATAL error, could not open file " << (pGe.gDir+"/chrStart.txt") <<"\n";
181 errOut << "SOLUTION: re-generate genome files with STAR --runMode genomeGenerate\n";
182 exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
183 };
184
185 for (uint ii=0;ii<=nChrReal;ii++) {
186 chrStreamIn >> chrStart[ii];
187 };
188 chrStreamIn.close();
189
190 //log
191 for (uint ii=0; ii<nChrReal;ii++) {
192 P.inOut->logMain << ii+1 <<"\t"<< chrName[ii] <<"\t"<<chrLength[ii]<<"\t"<<chrStart[ii]<<"\n"<<flush;
193 chrNameIndex[chrName[ii]]=ii;
194 };
195 };
196
197 //////////////////////////////////////////////////////////
chrBinFill()198 void Genome::chrBinFill() {
199 chrBinN = chrStart[nChrReal]/genomeChrBinNbases+1;
200 chrBin = new uint [chrBinN];
201 for (uint ii=0, ichr=1; ii<chrBinN; ++ii) {
202 if (ii*genomeChrBinNbases>=chrStart[ichr]) ichr++;
203 chrBin[ii]=ichr-1;
204 };
205 };
206
207 //////////////////////////////////////////////////////////
genomeSequenceAllocate(uint64 nGenomeIn,uint64 & nG1allocOut,char * & Gout,char * & G1out)208 void Genome::genomeSequenceAllocate(uint64 nGenomeIn, uint64 &nG1allocOut, char*& Gout, char*& G1out)
209 {
210 nG1allocOut=(nGenomeIn + 100)*2; //extra 100 bytes at the beginning, just in case
211
212 if (P.limitGenomeGenerateRAM < (nG1allocOut+nG1allocOut/3)) {//allocate nG1alloc/3 for SA generation
213 ostringstream errOut;
214 errOut <<"EXITING because of FATAL PARAMETER ERROR: limitGenomeGenerateRAM="<< (P.limitGenomeGenerateRAM) <<"is too small for your genome\n";
215 errOut <<"SOLUTION: please specify --limitGenomeGenerateRAM not less than "<< nG1allocOut+nG1allocOut/3 <<" and make that much RAM available \n";
216 exitWithError(errOut.str(),std::cerr, P.inOut->logMain, EXIT_CODE_INPUT_FILES, P);
217 };
218
219 G1out=new char[nG1allocOut];
220 Gout=G1out+100;
221
222 memset(G1out,GENOME_spacingChar,nG1allocOut);//initialize to K-1 all bytes
223 };
224