1 #include "SoloFeature.h" 2 #include "streamFuns.h" 3 //#include "TimeFunctions.h" 4 //#include "SequenceFuns.h" 5 //#include "Stats.h" 6 //#include "GlobalVariables.h" 7 redistributeReadsByCB()8void SoloFeature::redistributeReadsByCB() 9 {//redistribute reads in files by CB - each file with the approximately the same number of reads, each CB is on one file only 10 11 /* SoloFeature vars that have to be setup: 12 * nCB 13 * readFeatSum->cbReadCount[] 14 */ 15 16 //find boundaries for cells 17 uint64 nReadRec=std::accumulate(readFeatSum->cbReadCount.begin(), readFeatSum->cbReadCount.end(), 0LLU); 18 //for ( auto &cbrc : readFeatSum->cbReadCount ) 19 // nReadRec += cbrc; 20 21 uint64 nReadRecBin=nReadRec/pSolo.redistrReadsNfiles; 22 23 P.inOut->logMain << " Redistributing reads into "<< pSolo.redistrReadsNfiles <<"files; nReadRec="<< nReadRec <<"; nReadRecBin="<< nReadRecBin <<endl; 24 25 redistrFilesCBfirst.push_back(0); 26 redistrFilesCBindex.resize(nCB); 27 uint64 nreads=0; 28 uint32 ind=0; 29 for (uint32 icb=0; icb<nCB; icb++){ 30 redistrFilesCBindex[icb]=ind; 31 nreads += readFeatSum->cbReadCount[indCB[icb]]; 32 if (nreads>=nReadRecBin) { 33 ind++; 34 redistrFilesCBfirst.push_back(icb+1); 35 redistrFilesNreads.push_back(nreads); 36 nreads=0; 37 }; 38 }; 39 if (nreads>0) { 40 redistrFilesCBfirst.push_back(nCB); 41 redistrFilesNreads.push_back(nreads); 42 }; 43 44 //open output files 45 redistrFilesStreams.resize(redistrFilesNreads.size()); 46 for (uint32 ii=0; ii<redistrFilesNreads.size(); ii++) { 47 //open file with flagDelete=true 48 redistrFilesStreams[ii] = &fstrOpen(P.outFileTmp + "solo"+SoloFeatureTypes::Names[featureType]+"_redistr_"+std::to_string(ii), ERROR_OUT, P, true); 49 }; 50 51 //main cycle 52 for (int ii=0; ii<P.runThreadN; ii++) { 53 readFeatAll[ii]->streamReads->clear();//this is needed if eof was reached before 54 readFeatAll[ii]->streamReads->seekg(0,ios::beg); 55 56 while ( true ) { 57 string line1; 58 getline(*readFeatAll[ii]->streamReads,line1); 59 if (line1.empty()) { 60 break; 61 }; 62 63 istringstream line1stream(line1); 64 uint64 cb1, umi; 65 line1stream >> umi >> cb1 >> cb1; 66 if (featureType==SoloFeatureTypes::SJ) 67 line1stream >> cb1; 68 line1stream >> cb1; 69 70 *redistrFilesStreams[redistrFilesCBindex[indCBwl[cb1]]] << line1 <<'\n'; 71 72 }; 73 //TODO: delete streamReads files one by one to save disk space 74 }; 75 76 //close files 77 //for (uint32 ii=0; ii<pSolo.redistrReadsNfiles; ii++) 78 // redistrFilesStreams[ii]->flush(); 79 }; 80 81