1 #include "bamSortByCoordinate.h"
2 #include "BAMfunctions.h"
3 #include "BAMbinSortByCoordinate.h"
4 #include "BAMbinSortUnmapped.h"
5 #include "ErrorWarning.h"
6 #include "bam_cat.h"
7 
bamSortByCoordinate(Parameters & P,ReadAlignChunk ** RAchunk,Genome & genome,Solo & solo)8 void bamSortByCoordinate (Parameters &P, ReadAlignChunk **RAchunk, Genome &genome, Solo &solo) {
9     if (P.outBAMcoord) {//sort BAM if needed
10         *P.inOut->logStdOut << timeMonthDayTime() << " ..... started sorting BAM\n" <<flush;
11         P.inOut->logMain << timeMonthDayTime() << " ..... started sorting BAM\n" <<flush;
12         uint32 nBins=P.outBAMcoordNbins;
13 
14         //check max size needed for sorting
15         uint maxMem=0;
16         for (uint32 ibin=0; ibin<nBins-1; ibin++) {//check all bins
17             uint binS=0;
18             for (int it=0; it<P.runThreadN; it++) {//collect sizes from threads
19                 binS += RAchunk[it]->chunkOutBAMcoord->binTotalBytes[ibin]+24*RAchunk[it]->chunkOutBAMcoord->binTotalN[ibin];
20             };
21             if (binS>maxMem) maxMem=binS;
22         };
23         P.inOut->logMain << "Max memory needed for sorting = "<<maxMem<<endl;
24         if (maxMem>P.limitBAMsortRAM) {
25             ostringstream errOut;
26             errOut <<"EXITING because of fatal ERROR: not enough memory for BAM sorting: \n";
27             errOut <<"SOLUTION: re-run STAR with at least --limitBAMsortRAM " <<maxMem+1000000000;
28             exitWithError(errOut.str(), std::cerr, P.inOut->logMain, EXIT_CODE_PARAMETER, P);
29         } else if(maxMem==0) {
30             P.inOut->logMain << "WARNING: nothing to sort - no output alignments" <<endl;
31             BGZF *bgzfOut;
32             bgzfOut=bgzf_open(P.outBAMfileCoordName.c_str(),("w"+to_string((long long) P.outBAMcompression)).c_str());
33             if (bgzfOut==NULL) {
34                 ostringstream errOut;
35                 errOut <<"EXITING because of fatal ERROR: could not open output bam file: " << P.outBAMfileCoordName << "\n";
36                 errOut <<"SOLUTION: check that the disk is not full, increase the max number of open files with Linux command ulimit -n before running STAR";
37                 exitWithError(errOut.str(), std::cerr, P.inOut->logMain, EXIT_CODE_PARAMETER, P);
38             };
39             outBAMwriteHeader(bgzfOut,P.samHeaderSortedCoord,genome.chrNameAll,genome.chrLengthAll);
40             bgzf_close(bgzfOut);
41         } else {//sort
42             uint totalMem=0;
43             #pragma omp parallel num_threads(P.outBAMsortingThreadNactual)
44             #pragma omp for schedule (dynamic,1)
45             for (uint32 ibin1=0; ibin1<nBins; ibin1++) {
46                 uint32 ibin=nBins-1-ibin1;//reverse order to start with the last bin - unmapped reads
47 
48                 uint binN=0, binS=0;
49                 for (int it=0; it<P.runThreadN; it++) {//collect sizes from threads
50                     binN += RAchunk[it]->chunkOutBAMcoord->binTotalN[ibin];
51                     binS += RAchunk[it]->chunkOutBAMcoord->binTotalBytes[ibin];
52                 };
53 
54                 if (binS==0) continue; //empty bin
55 
56                 if (ibin == nBins-1) {//last bin for unmapped reads
57                     BAMbinSortUnmapped(ibin,P.runThreadN,P.outBAMsortTmpDir, P, genome, solo);
58                 } else {
59                     uint newMem=binS+binN*24;
60                     bool boolWait=true;
61                     while (boolWait) {
62                         #pragma omp critical
63                         if (totalMem+newMem < P.limitBAMsortRAM) {
64                             boolWait=false;
65                             totalMem+=newMem;
66                         };
67                         usleep(10000);
68                     };
69                     BAMbinSortByCoordinate(ibin,binN,binS,P.runThreadN,P.outBAMsortTmpDir, P, genome, solo);
70                     #pragma omp critical
71                     totalMem-=newMem;//"release" RAM
72                 };
73             };
74 
75             //concatenate all BAM files, using bam_cat
76             char **bamBinNames = new char* [nBins];
77             vector <string> bamBinNamesV;
78             for (uint32 ibin=0; ibin<nBins; ibin++) {
79 
80                 bamBinNamesV.push_back(P.outBAMsortTmpDir+"/b"+std::to_string((uint) ibin));
81                 struct stat buffer;
82                 if (stat (bamBinNamesV.back().c_str(), &buffer) != 0) {//check if file exists
83                     bamBinNamesV.pop_back();
84                 };
85             };
86             for (uint32 ibin=0; ibin<bamBinNamesV.size(); ibin++) {
87                     bamBinNames[ibin] = (char*) bamBinNamesV.at(ibin).c_str();
88             };
89             bam_cat(bamBinNamesV.size(), bamBinNames, 0, P.outBAMfileCoordName.c_str());
90         };
91     };
92 };
93