1 #include <sys/types.h>
2 #include <sys/stat.h>
3
4 #include "IncludeDefine.h"
5 #include "Parameters.h"
6 #include "SequenceFuns.h"
7 #include "Genome.h"
8 #include "Chain.h"
9 #include "ReadAlignChunk.h"
10 #include "ReadAlign.h"
11 #include "Stats.h"
12 #include "genomeGenerate.h"
13 #include "outputSJ.h"
14 #include "ThreadControl.h"
15 #include "GlobalVariables.h"
16 #include "TimeFunctions.h"
17 #include "ErrorWarning.h"
18 #include "sysRemoveDir.h"
19 #include "BAMfunctions.h"
20 #include "bamSortByCoordinate.h"
21 #include "Transcriptome.h"
22 #include "signalFromBAM.h"
23 #include "mapThreadsSpawn.h"
24 #include "SjdbClass.h"
25 #include "sjdbInsertJunctions.h"
26 #include "Variation.h"
27 #include "Solo.h"
28 #include "samHeaders.h"
29
30 #include "twoPassRunPass1.h"
31
32 #include <htslib/sam.h>
33 #include "parametersDefault.xxd"
34
usage(int usageType)35 void usage(int usageType) {
36 cout << "Usage: STAR [options]... --genomeDir /path/to/genome/index/ --readFilesIn R1.fq R2.fq\n";
37 cout <<"Spliced Transcripts Alignment to a Reference (c) Alexander Dobin, 2009-2020\n\n";
38 cout << "STAR version=" << STAR_VERSION << "\n";
39 cout << "STAR compilation time,server,dir=" << COMPILATION_TIME_PLACE << "\n";
40 cout << "For more details see:\n";
41 cout << "<https://github.com/alexdobin/STAR>\n";
42 cout << "<https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf>\n";
43
44 if (usageType==0) {//brief
45 cout << "\nTo list all parameters, run STAR --help\n";
46 } else if (usageType==1) {//full
47 cout.write(reinterpret_cast<char *> (parametersDefault),
48 parametersDefault_len);
49 };
50 exit(0);
51 };
52
main(int argInN,char * argIn[])53 int main(int argInN, char* argIn[]) {
54 // If no argument is given, or the first argument is either '-h' or '--help', run usage()
55 if (argInN == 1) {
56 usage(0);
57 } else if (argInN == 2 && (strcmp("-h",argIn[1]) == 0 || strcmp ("--help",argIn[1]) == 0 )) {
58 usage(1);
59 };
60
61 time(&g_statsAll.timeStart);
62
63 ///////////////////////////////////////////////////////////////////////
64 ///////////////////////////////////////////// Parameters
65 Parameters P; //all parameters
66 P.inputParameters(argInN, argIn);
67
68 *(P.inOut->logStdOut) << "\t" << P.commandLine << '\n';
69 *(P.inOut->logStdOut) << "\tSTAR version: " << STAR_VERSION << " compiled: " << COMPILATION_TIME_PLACE << '\n';
70 *(P.inOut->logStdOut) << timeMonthDayTime(g_statsAll.timeStart) << " ..... started STAR run\n" <<flush;
71
72 //runMode
73 if ( P.runMode == "alignReads" || P.runMode == "soloCellFiltering" ){
74 //continue
75 } else if ( P.runMode=="genomeGenerate" ) {
76 {//normal genome generation
77 Genome genomeMain(P, P.pGe);
78 genomeMain.genomeGenerate();
79 };
80
81 if (P.pGe.transform.type>0) {//generate original genome, in addition to the transfomed generated above
82 P.pGe.transform.type = 0;
83 P.pGe.transform.typeString = "None";
84 P.pGe.transform.vcfFile = "-";
85 P.pGe.gDir += "/OriginalGenome/";
86 Genome genomeOrig(P, P.pGe);
87 genomeOrig.genomeGenerate();
88 };
89
90 sysRemoveDir (P.outFileTmp);
91 P.inOut->logMain << "DONE: Genome generation, EXITING\n" << flush;
92 exit(0);
93 } else if ( P.runMode == "liftOver" ) {
94 for (uint ii=0; ii<P.pGe.gChainFiles.size();ii++) {
95 Chain chain(P,P.pGe.gChainFiles.at(ii));
96 chain.liftOverGTF(P.pGe.sjdbGTFfile,P.outFileNamePrefix+"GTFliftOver_"+to_string(ii+1)+".gtf");
97 P.inOut->logMain << "DONE: lift-over of GTF file, EXITING\n" << flush;
98 exit(0);
99 };
100 } else {
101 P.inOut->logMain << "EXITING because of INPUT ERROR: unknown value of input parameter runMode=" <<P.runMode<<endl<<flush;
102 exit(1);
103 };
104
105 //transcripome placeholder
106 Transcriptome *transcriptomeMain=NULL;
107
108 //this will execute --runMode soloCellFiltering and exit
109 Solo soloCellFilter(P, *transcriptomeMain);
110
111 ////////////////////////////////////////////////////////////////////////
112 ///////////////////////////////// Genome
113 Genome genomeMain (P, P.pGe);
114 genomeMain.genomeLoad();
115
116 genomeMain.Var=new Variation(P, genomeMain.chrStart, genomeMain.chrNameIndex);
117
118 SjdbClass sjdbLoci;
119
120 if (P.sjdbInsert.pass1) {
121 Genome genomeMain1=genomeMain;//not sure if I need to create the copy - genomeMain1 below should not be changed
122 sjdbInsertJunctions(P, genomeMain, genomeMain1, sjdbLoci);
123 };
124
125 /////////////////////////////////////////////////////////////////////////////////////////////////START
126 if (P.runThreadN>1) {
127 g_threadChunks.threadArray=new pthread_t[P.runThreadN];
128 pthread_mutex_init(&g_threadChunks.mutexInRead, NULL);
129 pthread_mutex_init(&g_threadChunks.mutexOutSAM, NULL);
130 pthread_mutex_init(&g_threadChunks.mutexOutBAM1, NULL);
131 pthread_mutex_init(&g_threadChunks.mutexOutUnmappedFastx, NULL);
132 pthread_mutex_init(&g_threadChunks.mutexOutFilterBySJout, NULL);
133 pthread_mutex_init(&g_threadChunks.mutexStats, NULL);
134 pthread_mutex_init(&g_threadChunks.mutexBAMsortBins, NULL);
135 pthread_mutex_init(&g_threadChunks.mutexError, NULL);
136 };
137
138 g_statsAll.progressReportHeader(P.inOut->logProgress);
139
140 /////////////////////////////////////////////////////////////////////////////
141 //////////////////////////////////// 2-pass 1st pass
142 twoPassRunPass1(P, genomeMain, transcriptomeMain, sjdbLoci);
143
144 if ( P.quant.yes ) {//load transcriptome
145 transcriptomeMain=new Transcriptome(P);
146 };
147
148 //initialize Stats
149 g_statsAll.resetN();
150 time(&g_statsAll.timeStartMap);
151 *P.inOut->logStdOut << timeMonthDayTime(g_statsAll.timeStartMap) << " ..... started mapping\n" <<flush;
152 g_statsAll.timeLastReport=g_statsAll.timeStartMap;
153
154 //SAM headers
155 samHeaders(P, *genomeMain.genomeOut.g, *transcriptomeMain);
156
157 //initialize chimeric parameters here - note that chimeric parameters require samHeader
158 P.pCh.initialize(&P);
159
160 // this does not seem to work at the moment
161 // P.inOut->logMain << "mlock value="<<mlockall(MCL_CURRENT|MCL_FUTURE) <<"\n"<<flush;
162
163 // prepare chunks and spawn mapping threads
164 ReadAlignChunk *RAchunk[P.runThreadN];
165 for (int ii=0;ii<P.runThreadN;ii++) {
166 RAchunk[ii]=new ReadAlignChunk(P, genomeMain, transcriptomeMain, ii);
167 };
168
169 if (P.runRestart.type!=1)
170 mapThreadsSpawn(P, RAchunk);
171
172 if (P.outFilterBySJoutStage==1) {//completed stage 1, go to stage 2
173 P.inOut->logMain << "Completed stage 1 mapping of outFilterBySJout mapping\n"<<flush;
174 outputSJ(RAchunk,P);//collapse novel junctions
175 P.readFilesIndex=-1;
176
177 P.outFilterBySJoutStage=2;
178 if (P.outBAMcoord) {
179 for (int it=0; it<P.runThreadN; it++) {//prepare the unmapped bin
180 RAchunk[it]->chunkOutBAMcoord->coordUnmappedPrepareBySJout();
181 };
182 };
183
184 mapThreadsSpawn(P, RAchunk);
185 };
186
187 //close some BAM files
188 if (P.inOut->outBAMfileUnsorted!=NULL) {
189 bgzf_flush(P.inOut->outBAMfileUnsorted);
190 bgzf_close(P.inOut->outBAMfileUnsorted);
191 };
192 if (P.inOut->outQuantBAMfile!=NULL) {
193 bgzf_flush(P.inOut->outQuantBAMfile);
194 bgzf_close(P.inOut->outQuantBAMfile);
195 };
196
197 if (P.outBAMcoord && P.limitBAMsortRAM==0) {//make it equal ot the genome size
198 P.limitBAMsortRAM=genomeMain.nGenome+genomeMain.SA.lengthByte+genomeMain.SAi.lengthByte;
199 };
200
201 time(&g_statsAll.timeFinishMap);
202 *P.inOut->logStdOut << timeMonthDayTime(g_statsAll.timeFinishMap) << " ..... finished mapping\n" <<flush;
203
204 //no need for genome anymore, free the memory
205 genomeMain.freeMemory();
206
207 //aggregate output junctions
208 //collapse splice junctions from different threads/chunks, and output them
209 if (P.runRestart.type!=1 && P.outSJ.yes)
210 outputSJ(RAchunk,P);
211
212 //solo counts
213 Solo soloMain(RAchunk,P,*RAchunk[0]->chunkTr);
214 soloMain.processAndOutput();
215
216 if ( P.quant.geCount.yes ) {//output gene quantifications
217 for (int ichunk=1; ichunk<P.runThreadN; ichunk++) {//sum counts from all chunks into 0th chunk
218 RAchunk[0]->chunkTr->quants->addQuants(*(RAchunk[ichunk]->chunkTr->quants));
219 };
220 RAchunk[0]->chunkTr->quantsOutput();
221 };
222
223 if (P.runThreadN>1 && P.outSAMorder=="PairedKeepInputOrder") {//concatenate Aligned.* files
224 RAchunk[0]->chunkFilesCat(P.inOut->outSAM, P.outFileTmp + "/Aligned.out.sam.chunk", g_threadChunks.chunkOutN);
225 };
226
227 bamSortByCoordinate(P, RAchunk, *genomeMain.genomeOut.g, soloMain);
228
229 //wiggle output
230 if (P.outWigFlags.yes) {
231 *(P.inOut->logStdOut) << timeMonthDayTime() << " ..... started wiggle output\n" <<flush;
232 P.inOut->logMain << timeMonthDayTime() << " ..... started wiggle output\n" <<flush;
233 string wigOutFileNamePrefix=P.outFileNamePrefix + "Signal";
234 signalFromBAM(P.outBAMfileCoordName, wigOutFileNamePrefix, P);
235 };
236
237 g_statsAll.writeLines(P.inOut->outChimJunction, P.pCh.outJunctionFormat, "#", STAR_VERSION + string(" ") + P.commandLine);
238
239 g_statsAll.progressReport(P.inOut->logProgress);
240 P.inOut->logProgress << "ALL DONE!\n"<<flush;
241 P.inOut->logFinal.open((P.outFileNamePrefix + "Log.final.out").c_str());
242 g_statsAll.reportFinal(P.inOut->logFinal);
243 *P.inOut->logStdOut << timeMonthDayTime(g_statsAll.timeFinish) << " ..... finished successfully\n" <<flush;
244
245 P.inOut->logMain << "ALL DONE!\n" << flush;
246 if (P.outTmpKeep=="None") {
247 sysRemoveDir (P.outFileTmp);
248 };
249
250 P.closeReadsFiles();//this will kill the readFilesCommand processes if necessary
251 //genomeMain.~Genome(); //need explicit call because of the 'delete P.inOut' below, which will destroy P.inOut->logStdOut
252 if (genomeMain.sharedMemory != NULL) {//need explicit call because this destructor will write to files which are deleted by 'delete P.inOut' below
253 delete genomeMain.sharedMemory;
254 genomeMain.sharedMemory = NULL;
255 };
256
257 delete P.inOut; //to close files
258
259 return 0;
260 };
261