1 #include "bamSortByCoordinate.h"
2 #include "BAMfunctions.h"
3 #include "BAMbinSortByCoordinate.h"
4 #include "BAMbinSortUnmapped.h"
5 #include "ErrorWarning.h"
6 #include "bam_cat.h"
7
bamSortByCoordinate(Parameters & P,ReadAlignChunk ** RAchunk,Genome & genome,Solo & solo)8 void bamSortByCoordinate (Parameters &P, ReadAlignChunk **RAchunk, Genome &genome, Solo &solo) {
9 if (P.outBAMcoord) {//sort BAM if needed
10 *P.inOut->logStdOut << timeMonthDayTime() << " ..... started sorting BAM\n" <<flush;
11 P.inOut->logMain << timeMonthDayTime() << " ..... started sorting BAM\n" <<flush;
12 uint32 nBins=P.outBAMcoordNbins;
13
14 //check max size needed for sorting
15 uint maxMem=0;
16 for (uint32 ibin=0; ibin<nBins-1; ibin++) {//check all bins
17 uint binS=0;
18 for (int it=0; it<P.runThreadN; it++) {//collect sizes from threads
19 binS += RAchunk[it]->chunkOutBAMcoord->binTotalBytes[ibin]+24*RAchunk[it]->chunkOutBAMcoord->binTotalN[ibin];
20 };
21 if (binS>maxMem) maxMem=binS;
22 };
23 P.inOut->logMain << "Max memory needed for sorting = "<<maxMem<<endl;
24 if (maxMem>P.limitBAMsortRAM) {
25 ostringstream errOut;
26 errOut <<"EXITING because of fatal ERROR: not enough memory for BAM sorting: \n";
27 errOut <<"SOLUTION: re-run STAR with at least --limitBAMsortRAM " <<maxMem+1000000000;
28 exitWithError(errOut.str(), std::cerr, P.inOut->logMain, EXIT_CODE_PARAMETER, P);
29 } else if(maxMem==0) {
30 P.inOut->logMain << "WARNING: nothing to sort - no output alignments" <<endl;
31 BGZF *bgzfOut;
32 bgzfOut=bgzf_open(P.outBAMfileCoordName.c_str(),("w"+to_string((long long) P.outBAMcompression)).c_str());
33 if (bgzfOut==NULL) {
34 ostringstream errOut;
35 errOut <<"EXITING because of fatal ERROR: could not open output bam file: " << P.outBAMfileCoordName << "\n";
36 errOut <<"SOLUTION: check that the disk is not full, increase the max number of open files with Linux command ulimit -n before running STAR";
37 exitWithError(errOut.str(), std::cerr, P.inOut->logMain, EXIT_CODE_PARAMETER, P);
38 };
39 outBAMwriteHeader(bgzfOut,P.samHeaderSortedCoord,genome.chrNameAll,genome.chrLengthAll);
40 bgzf_close(bgzfOut);
41 } else {//sort
42 uint totalMem=0;
43 #pragma omp parallel num_threads(P.outBAMsortingThreadNactual)
44 #pragma omp for schedule (dynamic,1)
45 for (uint32 ibin1=0; ibin1<nBins; ibin1++) {
46 uint32 ibin=nBins-1-ibin1;//reverse order to start with the last bin - unmapped reads
47
48 uint binN=0, binS=0;
49 for (int it=0; it<P.runThreadN; it++) {//collect sizes from threads
50 binN += RAchunk[it]->chunkOutBAMcoord->binTotalN[ibin];
51 binS += RAchunk[it]->chunkOutBAMcoord->binTotalBytes[ibin];
52 };
53
54 if (binS==0) continue; //empty bin
55
56 if (ibin == nBins-1) {//last bin for unmapped reads
57 BAMbinSortUnmapped(ibin,P.runThreadN,P.outBAMsortTmpDir, P, genome, solo);
58 } else {
59 uint newMem=binS+binN*24;
60 bool boolWait=true;
61 while (boolWait) {
62 #pragma omp critical
63 if (totalMem+newMem < P.limitBAMsortRAM) {
64 boolWait=false;
65 totalMem+=newMem;
66 };
67 usleep(10000);
68 };
69 BAMbinSortByCoordinate(ibin,binN,binS,P.runThreadN,P.outBAMsortTmpDir, P, genome, solo);
70 #pragma omp critical
71 totalMem-=newMem;//"release" RAM
72 };
73 };
74
75 //concatenate all BAM files, using bam_cat
76 char **bamBinNames = new char* [nBins];
77 vector <string> bamBinNamesV;
78 for (uint32 ibin=0; ibin<nBins; ibin++) {
79
80 bamBinNamesV.push_back(P.outBAMsortTmpDir+"/b"+std::to_string((uint) ibin));
81 struct stat buffer;
82 if (stat (bamBinNamesV.back().c_str(), &buffer) != 0) {//check if file exists
83 bamBinNamesV.pop_back();
84 };
85 };
86 for (uint32 ibin=0; ibin<bamBinNamesV.size(); ibin++) {
87 bamBinNames[ibin] = (char*) bamBinNamesV.at(ibin).c_str();
88 };
89 bam_cat(bamBinNamesV.size(), bamBinNames, 0, P.outBAMfileCoordName.c_str());
90 };
91 };
92 };
93