1 #include "SoloFeature.h"
2 #include "serviceFuns.cpp"
3 #include <math.h>
4 
cellFiltering()5 void SoloFeature::cellFiltering()
6 {
7 
8     if (pSolo.cellFilter.type[0]=="None" ||  nCB<1) {//no filtering, or no cells to filter
9         return;
10     };
11 
12     switch(featureType) {
13         default:
14             return; //filtering not done for other features
15 
16         case SoloFeatureTypes::Velocyto: {
17             filteredCells.reset(nCB);
18 
19             //Velocyto: use filter from Gene
20             SoloFeature &soFeGe= *soloFeatAll[pSolo.featureInd[SoloFeatureTypes::Gene]];
21             for (uint32 ic=0; ic<soFeGe.nCB; ic++) {
22                 if (soFeGe.filteredCells.filtVecBool[ic] && indCBwl[soFeGe.indCB[ic]] != (uint32)-1) {
23                     //this cell passde Gene filtering, and is detected in Velocyto
24                     filteredCells.filtVecBool[indCBwl[soFeGe.indCB[ic]]] = true;
25                 };
26             };
27 
28             nUMIperCBsorted=nUMIperCB;
29             std::sort( nUMIperCBsorted.begin(), nUMIperCBsorted.end(), [](const uint32_t &u1, const uint32_t &u2) {return u1>u2;} ); //sort descending
30             break;
31         };
32         case SoloFeatureTypes::Gene:
33         case SoloFeatureTypes::GeneFull:
34         case -1: {//undefined: matrix loaded from file
35 
36             ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// cell calling (filtering)
37             //do the filtering
38 
39             //simple filtering first
40             nUMIperCBsorted=nUMIperCB;
41             std::sort( nUMIperCBsorted.begin(), nUMIperCBsorted.end(), [](const uint32_t &u1, const uint32_t &u2) {return u1>u2;} ); //sort descending
42 
43             uint32 nUMImax=0, nUMImin=0;
44             if (pSolo.cellFilter.type[0]=="TopCells") {
45                 nUMImin = nUMIperCBsorted[min(nCB-1,pSolo.cellFilter.topCells)];
46             } else {//other filtering types require simple filtering first
47                 //find robust max
48                 uint32 maxind=int(round(pSolo.cellFilter.knee.nExpectedCells*(1.0-pSolo.cellFilter.knee.maxPercentile)));//cell number for robust max
49                 nUMImax = nUMIperCBsorted[min(nCB-1,maxind)];//robust estimate of the max UMI
50                 nUMImin = int(round(nUMImax/pSolo.cellFilter.knee.maxMinRatio));
51             };
52             nUMImin=max(nUMImin,(uint32) 1);//cannot be zero
53 
54             filteredCells.reset(nCB); //all stats to 0
55 
56             for (uint32 icb=0; icb<nCB; icb++) {
57                 if (nUMIperCB[icb]>=nUMImin) {
58                     filteredCells.filtVecBool[icb]=true;
59                     filteredCells.nCellsSimple++;
60                 };
61             };
62 
63             P.inOut->logMain << "cellFiltering: simple: nUMImax="<< nUMImax <<"; nUMImin="<< nUMImin <<"; nCellsSimple="<< filteredCells.nCellsSimple <<endl;
64 
65             if (pSolo.cellFilter.type[0]=="EmptyDrops_CR") {
66                 emptyDrops_CR();
67             };
68         };
69     };
70     //filtering is done: filtVecBool=true for kept cells
71 
72 
73     //calculate filtered statistics
74 
75     bool *geneDetected = new bool[featuresNumber]; //=true if a gene was detected in at least one cell
76     memset((void*) geneDetected, 0, featuresNumber);
77 
78     for (uint32 icb=0; icb<nCB; icb++) {
79         if (filteredCells.filtVecBool[icb]) {
80 
81             filteredCells.nCells++;
82 
83             filteredCells.nUMIinCells += nUMIperCB[icb]; //nUMIperCB was calculated for umiDedup-main
84 
85             filteredCells.nReadInCells += nReadPerCBtotal[icb];
86             filteredCells.nReadInCellsUnique += nReadPerCBunique[icb];
87             filteredCells.nReadPerCellUnique.push_back(nReadPerCBunique[icb]);
88 
89             uint32 ng1 = 0; //number of genes detected in this cell
90             for (uint32 ig=0; ig<nGenePerCB[icb]; ig++) {
91                 uint32 indG1=countCellGeneUMIindex[icb]+ig*countMatStride;
92                 if (countCellGeneUMI[indG1 + pSolo.umiDedup.countInd.main] > 0) {
93                     geneDetected[countCellGeneUMI[indG1]] = true; //gene is present if it's count > 0 for
94                     ng1++;
95                 };
96             };
97 
98             filteredCells.nGeneInCells += ng1; //had to recalculate this since some gene counts could be 0
99             filteredCells.nGenePerCell.push_back(ng1);
100         };
101     };
102 
103     if (filteredCells.nCells==0) {//all stats were already set to 0
104         return;
105     };
106 
107     filteredCells.nGeneDetected=0;
108     for (uint32 ii=0; ii<featuresNumber; ii++) {
109         if (geneDetected[ii])
110             filteredCells.nGeneDetected++;
111     };
112 
113     filteredCells.meanUMIperCell = filteredCells.nUMIinCells / filteredCells.nCells;
114     filteredCells.meanReadPerCellUnique = filteredCells.nReadInCellsUnique / filteredCells.nCells;
115     filteredCells.meanGenePerCell = filteredCells.nGeneInCells / filteredCells.nCells;
116 
117     sort(filteredCells.nReadPerCellUnique.begin(), filteredCells.nReadPerCellUnique.end());
118     sort(filteredCells.nGenePerCell.begin(), filteredCells.nGenePerCell.end());
119 
120     filteredCells.medianUMIperCell = nUMIperCBsorted[filteredCells.nCells/2];
121     filteredCells.medianGenePerCell = filteredCells.nGenePerCell[filteredCells.nCells/2];
122     filteredCells.medianReadPerCellUnique = filteredCells.nReadPerCellUnique[filteredCells.nCells/2];
123 
124     //////////////////////////////////////////////////////////////////output filtered matrix
125     outputResults(true, outputPrefixFiltered);
126 
127     return;
128 };
129