1 #include "SoloFeature.h"
2 #include "serviceFuns.cpp"
3 #include <math.h>
4
cellFiltering()5 void SoloFeature::cellFiltering()
6 {
7
8 if (pSolo.cellFilter.type[0]=="None" || nCB<1) {//no filtering, or no cells to filter
9 return;
10 };
11
12 switch(featureType) {
13 default:
14 return; //filtering not done for other features
15
16 case SoloFeatureTypes::Velocyto: {
17 filteredCells.reset(nCB);
18
19 //Velocyto: use filter from Gene
20 SoloFeature &soFeGe= *soloFeatAll[pSolo.featureInd[SoloFeatureTypes::Gene]];
21 for (uint32 ic=0; ic<soFeGe.nCB; ic++) {
22 if (soFeGe.filteredCells.filtVecBool[ic] && indCBwl[soFeGe.indCB[ic]] != (uint32)-1) {
23 //this cell passde Gene filtering, and is detected in Velocyto
24 filteredCells.filtVecBool[indCBwl[soFeGe.indCB[ic]]] = true;
25 };
26 };
27
28 nUMIperCBsorted=nUMIperCB;
29 std::sort( nUMIperCBsorted.begin(), nUMIperCBsorted.end(), [](const uint32_t &u1, const uint32_t &u2) {return u1>u2;} ); //sort descending
30 break;
31 };
32 case SoloFeatureTypes::Gene:
33 case SoloFeatureTypes::GeneFull:
34 case -1: {//undefined: matrix loaded from file
35
36 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// cell calling (filtering)
37 //do the filtering
38
39 //simple filtering first
40 nUMIperCBsorted=nUMIperCB;
41 std::sort( nUMIperCBsorted.begin(), nUMIperCBsorted.end(), [](const uint32_t &u1, const uint32_t &u2) {return u1>u2;} ); //sort descending
42
43 uint32 nUMImax=0, nUMImin=0;
44 if (pSolo.cellFilter.type[0]=="TopCells") {
45 nUMImin = nUMIperCBsorted[min(nCB-1,pSolo.cellFilter.topCells)];
46 } else {//other filtering types require simple filtering first
47 //find robust max
48 uint32 maxind=int(round(pSolo.cellFilter.knee.nExpectedCells*(1.0-pSolo.cellFilter.knee.maxPercentile)));//cell number for robust max
49 nUMImax = nUMIperCBsorted[min(nCB-1,maxind)];//robust estimate of the max UMI
50 nUMImin = int(round(nUMImax/pSolo.cellFilter.knee.maxMinRatio));
51 };
52 nUMImin=max(nUMImin,(uint32) 1);//cannot be zero
53
54 filteredCells.reset(nCB); //all stats to 0
55
56 for (uint32 icb=0; icb<nCB; icb++) {
57 if (nUMIperCB[icb]>=nUMImin) {
58 filteredCells.filtVecBool[icb]=true;
59 filteredCells.nCellsSimple++;
60 };
61 };
62
63 P.inOut->logMain << "cellFiltering: simple: nUMImax="<< nUMImax <<"; nUMImin="<< nUMImin <<"; nCellsSimple="<< filteredCells.nCellsSimple <<endl;
64
65 if (pSolo.cellFilter.type[0]=="EmptyDrops_CR") {
66 emptyDrops_CR();
67 };
68 };
69 };
70 //filtering is done: filtVecBool=true for kept cells
71
72
73 //calculate filtered statistics
74
75 bool *geneDetected = new bool[featuresNumber]; //=true if a gene was detected in at least one cell
76 memset((void*) geneDetected, 0, featuresNumber);
77
78 for (uint32 icb=0; icb<nCB; icb++) {
79 if (filteredCells.filtVecBool[icb]) {
80
81 filteredCells.nCells++;
82
83 filteredCells.nUMIinCells += nUMIperCB[icb]; //nUMIperCB was calculated for umiDedup-main
84
85 filteredCells.nReadInCells += nReadPerCBtotal[icb];
86 filteredCells.nReadInCellsUnique += nReadPerCBunique[icb];
87 filteredCells.nReadPerCellUnique.push_back(nReadPerCBunique[icb]);
88
89 uint32 ng1 = 0; //number of genes detected in this cell
90 for (uint32 ig=0; ig<nGenePerCB[icb]; ig++) {
91 uint32 indG1=countCellGeneUMIindex[icb]+ig*countMatStride;
92 if (countCellGeneUMI[indG1 + pSolo.umiDedup.countInd.main] > 0) {
93 geneDetected[countCellGeneUMI[indG1]] = true; //gene is present if it's count > 0 for
94 ng1++;
95 };
96 };
97
98 filteredCells.nGeneInCells += ng1; //had to recalculate this since some gene counts could be 0
99 filteredCells.nGenePerCell.push_back(ng1);
100 };
101 };
102
103 if (filteredCells.nCells==0) {//all stats were already set to 0
104 return;
105 };
106
107 filteredCells.nGeneDetected=0;
108 for (uint32 ii=0; ii<featuresNumber; ii++) {
109 if (geneDetected[ii])
110 filteredCells.nGeneDetected++;
111 };
112
113 filteredCells.meanUMIperCell = filteredCells.nUMIinCells / filteredCells.nCells;
114 filteredCells.meanReadPerCellUnique = filteredCells.nReadInCellsUnique / filteredCells.nCells;
115 filteredCells.meanGenePerCell = filteredCells.nGeneInCells / filteredCells.nCells;
116
117 sort(filteredCells.nReadPerCellUnique.begin(), filteredCells.nReadPerCellUnique.end());
118 sort(filteredCells.nGenePerCell.begin(), filteredCells.nGenePerCell.end());
119
120 filteredCells.medianUMIperCell = nUMIperCBsorted[filteredCells.nCells/2];
121 filteredCells.medianGenePerCell = filteredCells.nGenePerCell[filteredCells.nCells/2];
122 filteredCells.medianReadPerCellUnique = filteredCells.nReadPerCellUnique[filteredCells.nCells/2];
123
124 //////////////////////////////////////////////////////////////////output filtered matrix
125 outputResults(true, outputPrefixFiltered);
126
127 return;
128 };
129