1 #include "BAMbinSortByCoordinate.h"
2 #include "ErrorWarning.h"
3 #include "serviceFuns.cpp"
4 #include "BAMfunctions.h"
5 #include "SequenceFuns.h"
6 
BAMbinSortByCoordinate(uint32 iBin,uint binN,uint binS,uint nThreads,string dirBAMsort,Parameters & P,Genome & genome,Solo & solo)7 void BAMbinSortByCoordinate(uint32 iBin, uint binN, uint binS, uint nThreads, string dirBAMsort, Parameters &P, Genome &genome, Solo &solo) {
8 
9     if (binS==0) return; //nothing to do for empty bins
10     //allocate arrays
11     char *bamIn=new char[binS+1];
12     uint *startPos=new uint[binN*3];
13 
14     uint bamInBytes=0;
15     //load all aligns
16     for (uint it=0; it<nThreads; it++) {
17         string bamInFile=dirBAMsort+to_string(it)+"/"+to_string((uint) iBin);
18         ifstream bamInStream;
19         bamInStream.open(bamInFile.c_str(),std::ios::binary | std::ios::ate);//open at the end to get file size
20         int64 s1=bamInStream.tellg();
21         if (s1>0)         {
22             bamInStream.seekg(std::ios::beg);
23             bamInStream.read(bamIn+bamInBytes,s1);//read the whole file
24         } else if (s1<0) {
25             ostringstream errOut;
26             errOut << "EXITING because of FATAL ERROR: failed reading from temporary file: " << dirBAMsort+to_string(it)+"/"+to_string((uint) iBin);
27             exitWithError(errOut.str(),std::cerr, P.inOut->logMain, 1, P);
28         };
29         bamInBytes += bamInStream.gcount();
30         bamInStream.close();
31         remove(bamInFile.c_str());
32     };
33     if (bamInBytes!=binS) {
34         ostringstream errOut;
35         errOut << "EXITING because of FATAL ERROR: number of bytes expected from the BAM bin does not agree with the actual size on disk: ";
36         errOut << "Expected bin size=" <<binS <<" ; size on disk="<< bamInBytes <<" ; bin number="<< iBin <<"\n";
37         exitWithError(errOut.str(),std::cerr, P.inOut->logMain, 1, P);
38     };
39 
40     //extract coordinates
41 
42     for (uint ib=0,ia=0;ia<binN;ia++) {
43         uint32 *bamIn32=(uint32*) (bamIn+ib);
44         startPos[ia*3]  =( ((uint) bamIn32[1]) << 32) | ( (uint)bamIn32[2] );
45         startPos[ia*3+2]=ib;
46         ib+=bamIn32[0]+sizeof(uint32);//note that size of the BAM record does not include the size record itself
47         startPos[ia*3+1]=*( (uint*) (bamIn+ib) ); //read order
48         ib+=sizeof(uint);
49     };
50 
51     //sort
52     qsort((void*) startPos, binN, sizeof(uint)*3, funCompareArrays<uint,3>);
53 
54     BGZF *bgzfBin;
55     bgzfBin=bgzf_open((dirBAMsort+"/b"+to_string((uint) iBin)).c_str(),("w"+to_string((long long) P.outBAMcompression)).c_str());
56     if (bgzfBin==NULL) {
57         ostringstream errOut;
58         errOut <<"EXITING because of fatal ERROR: could not open temporary bam file: " << dirBAMsort+"/b"+to_string((uint) iBin) << "\n";
59         errOut <<"SOLUTION: check that the disk is not full, increase the max number of open files with Linux command ulimit -n before running STAR";
60         exitWithError(errOut.str(), std::cerr, P.inOut->logMain, EXIT_CODE_PARAMETER, P);
61     };
62 
63     outBAMwriteHeader(bgzfBin,P.samHeaderSortedCoord,genome.chrNameAll,genome.chrLengthAll);
64     //send ordered aligns to bgzf one-by-one
65     char bam1[BAM_ATTR_MaxSize];//temp array
66     for (uint ia=0;ia<binN;ia++) {
67         char* bam0=bamIn+startPos[ia*3+2];
68         uint32 size0=*((uint32*) bam0)+sizeof(uint32);
69 
70         if (solo.pSolo.samAttrYes)
71             solo.soloFeat[solo.pSolo.featureInd[solo.pSolo.samAttrFeature]]->addBAMtags(bam0,size0,bam1);
72 
73         bgzf_write(bgzfBin, bam0, size0);
74     };
75 
76     bgzf_flush(bgzfBin);
77     bgzf_close(bgzfBin);
78     //release memory
79     delete [] bamIn;
80     delete [] startPos;
81 };
82