1 #include "BAMbinSortByCoordinate.h"
2 #include "ErrorWarning.h"
3 #include "serviceFuns.cpp"
4 #include "BAMfunctions.h"
5 #include "SequenceFuns.h"
6
BAMbinSortByCoordinate(uint32 iBin,uint binN,uint binS,uint nThreads,string dirBAMsort,Parameters & P,Genome & genome,Solo & solo)7 void BAMbinSortByCoordinate(uint32 iBin, uint binN, uint binS, uint nThreads, string dirBAMsort, Parameters &P, Genome &genome, Solo &solo) {
8
9 if (binS==0) return; //nothing to do for empty bins
10 //allocate arrays
11 char *bamIn=new char[binS+1];
12 uint *startPos=new uint[binN*3];
13
14 uint bamInBytes=0;
15 //load all aligns
16 for (uint it=0; it<nThreads; it++) {
17 string bamInFile=dirBAMsort+to_string(it)+"/"+to_string((uint) iBin);
18 ifstream bamInStream;
19 bamInStream.open(bamInFile.c_str(),std::ios::binary | std::ios::ate);//open at the end to get file size
20 int64 s1=bamInStream.tellg();
21 if (s1>0) {
22 bamInStream.seekg(std::ios::beg);
23 bamInStream.read(bamIn+bamInBytes,s1);//read the whole file
24 } else if (s1<0) {
25 ostringstream errOut;
26 errOut << "EXITING because of FATAL ERROR: failed reading from temporary file: " << dirBAMsort+to_string(it)+"/"+to_string((uint) iBin);
27 exitWithError(errOut.str(),std::cerr, P.inOut->logMain, 1, P);
28 };
29 bamInBytes += bamInStream.gcount();
30 bamInStream.close();
31 remove(bamInFile.c_str());
32 };
33 if (bamInBytes!=binS) {
34 ostringstream errOut;
35 errOut << "EXITING because of FATAL ERROR: number of bytes expected from the BAM bin does not agree with the actual size on disk: ";
36 errOut << "Expected bin size=" <<binS <<" ; size on disk="<< bamInBytes <<" ; bin number="<< iBin <<"\n";
37 exitWithError(errOut.str(),std::cerr, P.inOut->logMain, 1, P);
38 };
39
40 //extract coordinates
41
42 for (uint ib=0,ia=0;ia<binN;ia++) {
43 uint32 *bamIn32=(uint32*) (bamIn+ib);
44 startPos[ia*3] =( ((uint) bamIn32[1]) << 32) | ( (uint)bamIn32[2] );
45 startPos[ia*3+2]=ib;
46 ib+=bamIn32[0]+sizeof(uint32);//note that size of the BAM record does not include the size record itself
47 startPos[ia*3+1]=*( (uint*) (bamIn+ib) ); //read order
48 ib+=sizeof(uint);
49 };
50
51 //sort
52 qsort((void*) startPos, binN, sizeof(uint)*3, funCompareArrays<uint,3>);
53
54 BGZF *bgzfBin;
55 bgzfBin=bgzf_open((dirBAMsort+"/b"+to_string((uint) iBin)).c_str(),("w"+to_string((long long) P.outBAMcompression)).c_str());
56 if (bgzfBin==NULL) {
57 ostringstream errOut;
58 errOut <<"EXITING because of fatal ERROR: could not open temporary bam file: " << dirBAMsort+"/b"+to_string((uint) iBin) << "\n";
59 errOut <<"SOLUTION: check that the disk is not full, increase the max number of open files with Linux command ulimit -n before running STAR";
60 exitWithError(errOut.str(), std::cerr, P.inOut->logMain, EXIT_CODE_PARAMETER, P);
61 };
62
63 outBAMwriteHeader(bgzfBin,P.samHeaderSortedCoord,genome.chrNameAll,genome.chrLengthAll);
64 //send ordered aligns to bgzf one-by-one
65 char bam1[BAM_ATTR_MaxSize];//temp array
66 for (uint ia=0;ia<binN;ia++) {
67 char* bam0=bamIn+startPos[ia*3+2];
68 uint32 size0=*((uint32*) bam0)+sizeof(uint32);
69
70 if (solo.pSolo.samAttrYes)
71 solo.soloFeat[solo.pSolo.featureInd[solo.pSolo.samAttrFeature]]->addBAMtags(bam0,size0,bam1);
72
73 bgzf_write(bgzfBin, bam0, size0);
74 };
75
76 bgzf_flush(bgzfBin);
77 bgzf_close(bgzfBin);
78 //release memory
79 delete [] bamIn;
80 delete [] startPos;
81 };
82