1 #include "SoloFeature.h"
2 #include "streamFuns.h"
3 //#include "TimeFunctions.h"
4 //#include "SequenceFuns.h"
5 //#include "Stats.h"
6 //#include "GlobalVariables.h"
7 
redistributeReadsByCB()8 void SoloFeature::redistributeReadsByCB()
9 {//redistribute reads in files by CB - each file with the approximately the same number of reads, each CB is on one file only
10 
11     /* SoloFeature vars that have to be setup:
12      * nCB
13      * readFeatSum->cbReadCount[]
14     */
15 
16     //find boundaries for cells
17     uint64 nReadRec=std::accumulate(readFeatSum->cbReadCount.begin(), readFeatSum->cbReadCount.end(), 0LLU);
18     //for ( auto &cbrc : readFeatSum->cbReadCount )
19     //    nReadRec += cbrc;
20 
21     uint64 nReadRecBin=nReadRec/pSolo.redistrReadsNfiles;
22 
23     P.inOut->logMain << "     Redistributing reads into "<< pSolo.redistrReadsNfiles <<"files; nReadRec="<< nReadRec <<";   nReadRecBin="<< nReadRecBin <<endl;
24 
25     redistrFilesCBfirst.push_back(0);
26     redistrFilesCBindex.resize(nCB);
27     uint64 nreads=0;
28     uint32 ind=0;
29     for (uint32 icb=0; icb<nCB; icb++){
30         redistrFilesCBindex[icb]=ind;
31         nreads += readFeatSum->cbReadCount[indCB[icb]];
32         if (nreads>=nReadRecBin) {
33             ind++;
34             redistrFilesCBfirst.push_back(icb+1);
35             redistrFilesNreads.push_back(nreads);
36             nreads=0;
37         };
38     };
39     if (nreads>0) {
40         redistrFilesCBfirst.push_back(nCB);
41         redistrFilesNreads.push_back(nreads);
42     };
43 
44     //open output files
45     redistrFilesStreams.resize(redistrFilesNreads.size());
46     for (uint32 ii=0; ii<redistrFilesNreads.size(); ii++) {
47         //open file with flagDelete=true
48         redistrFilesStreams[ii] = &fstrOpen(P.outFileTmp + "solo"+SoloFeatureTypes::Names[featureType]+"_redistr_"+std::to_string(ii), ERROR_OUT, P, true);
49     };
50 
51     //main cycle
52     for (int ii=0; ii<P.runThreadN; ii++) {
53         readFeatAll[ii]->streamReads->clear();//this is needed if eof was reached before
54         readFeatAll[ii]->streamReads->seekg(0,ios::beg);
55 
56         while ( true ) {
57             string line1;
58             getline(*readFeatAll[ii]->streamReads,line1);
59             if (line1.empty()) {
60                 break;
61             };
62 
63             istringstream line1stream(line1);
64             uint64 cb1, umi;
65             line1stream >> umi >> cb1 >> cb1;
66             if (featureType==SoloFeatureTypes::SJ)
67                 line1stream >> cb1;
68             line1stream >> cb1;
69 
70             *redistrFilesStreams[redistrFilesCBindex[indCBwl[cb1]]] << line1 <<'\n';
71 
72         };
73         //TODO: delete streamReads files one by one to save disk space
74     };
75 
76     //close files
77     //for (uint32 ii=0; ii<pSolo.redistrReadsNfiles; ii++)
78     //    redistrFilesStreams[ii]->flush();
79 };
80 
81