1 #include <sys/types.h>
2 #include <sys/stat.h>
3 
4 #include "IncludeDefine.h"
5 #include "Parameters.h"
6 #include "SequenceFuns.h"
7 #include "Genome.h"
8 #include "Chain.h"
9 #include "ReadAlignChunk.h"
10 #include "ReadAlign.h"
11 #include "Stats.h"
12 #include "genomeGenerate.h"
13 #include "outputSJ.h"
14 #include "ThreadControl.h"
15 #include "GlobalVariables.h"
16 #include "TimeFunctions.h"
17 #include "ErrorWarning.h"
18 #include "sysRemoveDir.h"
19 #include "BAMfunctions.h"
20 #include "bamSortByCoordinate.h"
21 #include "Transcriptome.h"
22 #include "signalFromBAM.h"
23 #include "mapThreadsSpawn.h"
24 #include "SjdbClass.h"
25 #include "sjdbInsertJunctions.h"
26 #include "Variation.h"
27 #include "Solo.h"
28 #include "samHeaders.h"
29 
30 #include "twoPassRunPass1.h"
31 
32 #include <htslib/sam.h>
33 #include "parametersDefault.xxd"
34 
usage(int usageType)35 void usage(int usageType) {
36     cout << "Usage: STAR  [options]... --genomeDir /path/to/genome/index/   --readFilesIn R1.fq R2.fq\n";
37     cout <<"Spliced Transcripts Alignment to a Reference (c) Alexander Dobin, 2009-2020\n\n";
38     cout << "STAR version=" << STAR_VERSION << "\n";
39     cout << "STAR compilation time,server,dir=" << COMPILATION_TIME_PLACE << "\n";
40     cout << "For more details see:\n";
41     cout << "<https://github.com/alexdobin/STAR>\n";
42     cout << "<https://github.com/alexdobin/STAR/blob/master/doc/STARmanual.pdf>\n";
43 
44     if (usageType==0) {//brief
45         cout << "\nTo list all parameters, run STAR --help\n";
46     } else if (usageType==1) {//full
47         cout.write(reinterpret_cast<char *> (parametersDefault),
48                    parametersDefault_len);
49     };
50     exit(0);
51 };
52 
main(int argInN,char * argIn[])53 int main(int argInN, char* argIn[]) {
54     // If no argument is given, or the first argument is either '-h' or '--help', run usage()
55     if (argInN == 1) {
56         usage(0);
57     } else if (argInN == 2 && (strcmp("-h",argIn[1]) == 0 || strcmp ("--help",argIn[1]) == 0 )) {
58         usage(1);
59     };
60 
61     time(&g_statsAll.timeStart);
62 
63     ///////////////////////////////////////////////////////////////////////
64     ///////////////////////////////////////////// Parameters
65     Parameters P; //all parameters
66     P.inputParameters(argInN, argIn);
67 
68     *(P.inOut->logStdOut) << "\t" << P.commandLine << '\n';
69     *(P.inOut->logStdOut) << "\tSTAR version: " << STAR_VERSION << "   compiled: " << COMPILATION_TIME_PLACE << '\n';
70     *(P.inOut->logStdOut) << timeMonthDayTime(g_statsAll.timeStart) << " ..... started STAR run\n"  <<flush;
71 
72     //runMode
73     if ( P.runMode == "alignReads" || P.runMode == "soloCellFiltering" ){
74         //continue
75     } else if ( P.runMode=="genomeGenerate" ) {
76         {//normal genome generation
77             Genome genomeMain(P, P.pGe);
78             genomeMain.genomeGenerate();
79         };
80 
81         if (P.pGe.transform.type>0) {//generate original genome, in addition to the transfomed generated above
82             P.pGe.transform.type = 0;
83             P.pGe.transform.typeString = "None";
84             P.pGe.transform.vcfFile = "-";
85             P.pGe.gDir += "/OriginalGenome/";
86             Genome genomeOrig(P, P.pGe);
87             genomeOrig.genomeGenerate();
88         };
89 
90         sysRemoveDir (P.outFileTmp);
91         P.inOut->logMain << "DONE: Genome generation, EXITING\n" << flush;
92         exit(0);
93     } else if ( P.runMode == "liftOver" ) {
94         for (uint ii=0; ii<P.pGe.gChainFiles.size();ii++) {
95             Chain chain(P,P.pGe.gChainFiles.at(ii));
96             chain.liftOverGTF(P.pGe.sjdbGTFfile,P.outFileNamePrefix+"GTFliftOver_"+to_string(ii+1)+".gtf");
97             P.inOut->logMain << "DONE: lift-over of GTF file, EXITING\n" << flush;
98             exit(0);
99         };
100     } else {
101         P.inOut->logMain << "EXITING because of INPUT ERROR: unknown value of input parameter runMode=" <<P.runMode<<endl<<flush;
102         exit(1);
103     };
104 
105     //transcripome placeholder
106     Transcriptome *transcriptomeMain=NULL;
107 
108     //this will execute --runMode soloCellFiltering and exit
109     Solo soloCellFilter(P, *transcriptomeMain);
110 
111     ////////////////////////////////////////////////////////////////////////
112     ///////////////////////////////// Genome
113     Genome genomeMain (P, P.pGe);
114     genomeMain.genomeLoad();
115 
116     genomeMain.Var=new Variation(P, genomeMain.chrStart, genomeMain.chrNameIndex);
117 
118     SjdbClass sjdbLoci;
119 
120     if (P.sjdbInsert.pass1) {
121         Genome genomeMain1=genomeMain;//not sure if I need to create the copy - genomeMain1 below should not be changed
122         sjdbInsertJunctions(P, genomeMain, genomeMain1, sjdbLoci);
123     };
124 
125 /////////////////////////////////////////////////////////////////////////////////////////////////START
126     if (P.runThreadN>1) {
127         g_threadChunks.threadArray=new pthread_t[P.runThreadN];
128         pthread_mutex_init(&g_threadChunks.mutexInRead, NULL);
129         pthread_mutex_init(&g_threadChunks.mutexOutSAM, NULL);
130         pthread_mutex_init(&g_threadChunks.mutexOutBAM1, NULL);
131         pthread_mutex_init(&g_threadChunks.mutexOutUnmappedFastx, NULL);
132         pthread_mutex_init(&g_threadChunks.mutexOutFilterBySJout, NULL);
133         pthread_mutex_init(&g_threadChunks.mutexStats, NULL);
134         pthread_mutex_init(&g_threadChunks.mutexBAMsortBins, NULL);
135         pthread_mutex_init(&g_threadChunks.mutexError, NULL);
136     };
137 
138     g_statsAll.progressReportHeader(P.inOut->logProgress);
139 
140     /////////////////////////////////////////////////////////////////////////////
141     //////////////////////////////////// 2-pass 1st pass
142     twoPassRunPass1(P, genomeMain, transcriptomeMain, sjdbLoci);
143 
144     if ( P.quant.yes ) {//load transcriptome
145         transcriptomeMain=new Transcriptome(P);
146     };
147 
148     //initialize Stats
149     g_statsAll.resetN();
150     time(&g_statsAll.timeStartMap);
151     *P.inOut->logStdOut << timeMonthDayTime(g_statsAll.timeStartMap) << " ..... started mapping\n" <<flush;
152     g_statsAll.timeLastReport=g_statsAll.timeStartMap;
153 
154     //SAM headers
155     samHeaders(P, *genomeMain.genomeOut.g, *transcriptomeMain);
156 
157     //initialize chimeric parameters here - note that chimeric parameters require samHeader
158     P.pCh.initialize(&P);
159 
160     // this does not seem to work at the moment
161     // P.inOut->logMain << "mlock value="<<mlockall(MCL_CURRENT|MCL_FUTURE) <<"\n"<<flush;
162 
163     // prepare chunks and spawn mapping threads
164     ReadAlignChunk *RAchunk[P.runThreadN];
165     for (int ii=0;ii<P.runThreadN;ii++) {
166         RAchunk[ii]=new ReadAlignChunk(P, genomeMain, transcriptomeMain, ii);
167     };
168 
169     if (P.runRestart.type!=1)
170         mapThreadsSpawn(P, RAchunk);
171 
172     if (P.outFilterBySJoutStage==1) {//completed stage 1, go to stage 2
173         P.inOut->logMain << "Completed stage 1 mapping of outFilterBySJout mapping\n"<<flush;
174         outputSJ(RAchunk,P);//collapse novel junctions
175         P.readFilesIndex=-1;
176 
177         P.outFilterBySJoutStage=2;
178         if (P.outBAMcoord) {
179             for (int it=0; it<P.runThreadN; it++) {//prepare the unmapped bin
180                 RAchunk[it]->chunkOutBAMcoord->coordUnmappedPrepareBySJout();
181             };
182         };
183 
184         mapThreadsSpawn(P, RAchunk);
185     };
186 
187     //close some BAM files
188     if (P.inOut->outBAMfileUnsorted!=NULL) {
189         bgzf_flush(P.inOut->outBAMfileUnsorted);
190         bgzf_close(P.inOut->outBAMfileUnsorted);
191     };
192     if (P.inOut->outQuantBAMfile!=NULL) {
193         bgzf_flush(P.inOut->outQuantBAMfile);
194         bgzf_close(P.inOut->outQuantBAMfile);
195     };
196 
197     if (P.outBAMcoord && P.limitBAMsortRAM==0) {//make it equal ot the genome size
198         P.limitBAMsortRAM=genomeMain.nGenome+genomeMain.SA.lengthByte+genomeMain.SAi.lengthByte;
199     };
200 
201     time(&g_statsAll.timeFinishMap);
202     *P.inOut->logStdOut << timeMonthDayTime(g_statsAll.timeFinishMap) << " ..... finished mapping\n" <<flush;
203 
204     //no need for genome anymore, free the memory
205     genomeMain.freeMemory();
206 
207     //aggregate output junctions
208     //collapse splice junctions from different threads/chunks, and output them
209     if (P.runRestart.type!=1 && P.outSJ.yes)
210         outputSJ(RAchunk,P);
211 
212     //solo counts
213     Solo soloMain(RAchunk,P,*RAchunk[0]->chunkTr);
214     soloMain.processAndOutput();
215 
216     if ( P.quant.geCount.yes ) {//output gene quantifications
217         for (int ichunk=1; ichunk<P.runThreadN; ichunk++) {//sum counts from all chunks into 0th chunk
218             RAchunk[0]->chunkTr->quants->addQuants(*(RAchunk[ichunk]->chunkTr->quants));
219         };
220         RAchunk[0]->chunkTr->quantsOutput();
221     };
222 
223     if (P.runThreadN>1 && P.outSAMorder=="PairedKeepInputOrder") {//concatenate Aligned.* files
224         RAchunk[0]->chunkFilesCat(P.inOut->outSAM, P.outFileTmp + "/Aligned.out.sam.chunk", g_threadChunks.chunkOutN);
225     };
226 
227     bamSortByCoordinate(P, RAchunk, *genomeMain.genomeOut.g, soloMain);
228 
229     //wiggle output
230     if (P.outWigFlags.yes) {
231         *(P.inOut->logStdOut) << timeMonthDayTime() << " ..... started wiggle output\n" <<flush;
232         P.inOut->logMain << timeMonthDayTime() << " ..... started wiggle output\n" <<flush;
233         string wigOutFileNamePrefix=P.outFileNamePrefix + "Signal";
234         signalFromBAM(P.outBAMfileCoordName, wigOutFileNamePrefix, P);
235     };
236 
237     g_statsAll.writeLines(P.inOut->outChimJunction, P.pCh.outJunctionFormat, "#", STAR_VERSION + string("   ") + P.commandLine);
238 
239     g_statsAll.progressReport(P.inOut->logProgress);
240     P.inOut->logProgress  << "ALL DONE!\n"<<flush;
241     P.inOut->logFinal.open((P.outFileNamePrefix + "Log.final.out").c_str());
242     g_statsAll.reportFinal(P.inOut->logFinal);
243     *P.inOut->logStdOut << timeMonthDayTime(g_statsAll.timeFinish) << " ..... finished successfully\n" <<flush;
244 
245     P.inOut->logMain  << "ALL DONE!\n" << flush;
246     if (P.outTmpKeep=="None") {
247         sysRemoveDir (P.outFileTmp);
248     };
249 
250     P.closeReadsFiles();//this will kill the readFilesCommand processes if necessary
251     //genomeMain.~Genome(); //need explicit call because of the 'delete P.inOut' below, which will destroy P.inOut->logStdOut
252     if (genomeMain.sharedMemory != NULL) {//need explicit call because this destructor will write to files which are deleted by 'delete P.inOut' below
253         delete genomeMain.sharedMemory;
254         genomeMain.sharedMemory = NULL;
255     };
256 
257     delete P.inOut; //to close files
258 
259     return 0;
260 };
261