1 #ifndef H_SoloFeature 2 #define H_SoloFeature 3 4 #include <fstream> 5 #include <unordered_map> 6 #include <unordered_set> 7 8 #include "IncludeDefine.h" 9 #include "ReadAlignChunk.h" 10 #include "Transcriptome.h" 11 12 #include "SoloCommon.h" 13 #include "SoloRead.h" 14 #include "ReadAlignChunk.h" 15 16 #include "SoloFilteredCells.h" 17 18 class SoloFeature { 19 private: 20 Parameters &P; 21 ReadAlignChunk **RAchunk; 22 Transcriptome &Trans; 23 24 const int32 featureType; 25 SoloFeature **soloFeatAll; 26 27 static const uint32 umiArrayStride=3; 28 enum {rguG, rguU, rguR}; 29 uint32 rguStride; 30 31 public: 32 ParametersSolo &pSolo; 33 34 SoloReadFeature *readFeatSum, **readFeatAll; 35 SoloReadBarcode *readBarSum; 36 37 uint64 nReadsMapped, nReadsInput; //total number of mapped reads 38 uint32 nCB; 39 uint32 featuresNumber; //number of features (i.e. genes, SJs, etc) 40 41 uint32 *rGeneUMI;//mapped reads sorted by CB 42 uint32 *rCBn;//number of reads for detected CBs in the whitelist 43 uint32 **rCBp;//array of pointers to each CB sub-array 44 45 vector<uint32> indCB;//index of detected CBs in the whitelist 46 vector<uint32> indCBwl; //reverse of indCB: index of WL CBs in detected CB list 47 vector<uint32> nUMIperCB, nUMIperCBsorted;//number of UMIs per CB, and the same sorted (descendant) 48 vector<uint32> nGenePerCB;//number of genes (with >0 UMIs) per CB 49 vector<uint32> nReadPerCB;//number of reads per CB. With multimappers: all aligns per CB 50 vector<uint32> nReadPerCBunique, nReadPerCBtotal; //number of unique and multiple reads per CB 51 52 vector<uint32> countCellGeneUMI;//sparsified matrix for the counts, each entry is: geneID count1 count2 ... countNcounts 53 vector<uint32> countCellGeneUMIindex;//index of CBs in the count matrix 54 uint32 countMatStride; //number of counts per entry in the count matrix 55 56 struct { 57 vector<double> m; 58 vector<uint32> i; 59 uint32 s; 60 } countMatMult; 61 62 vector<unordered_map<uint32, unordered_set<uint64>>> cbFeatureUMImap; //for SmartSeq counting 63 64 string outputPrefix, outputPrefixFiltered; 65 66 SoloFilteredCells filteredCells; 67 68 array<vector<uint64>,2> sjAll; 69 70 vector<readInfoStruct> readInfo; //corrected CB/UMI information for each read 71 72 vector<uint32> redistrFilesCBindex, redistrFilesCBfirst; //redistr file for each CB, CB boundaries in redistributed files 73 vector<uint64> redistrFilesNreads; //number of reads in each file 74 vector <fstream*> redistrFilesStreams; 75 76 SoloFeature(Parameters &Pin, ReadAlignChunk **RAchunk, Transcriptome &inTrans, int32 feTy, SoloReadBarcode *readBarSumIn, SoloFeature **soloFeatAll); 77 void processRecords(); 78 void sumThreads(); 79 void countSmartSeq(); 80 void countCBgeneUMI(); 81 void countVelocyto(); 82 void quantTranscript(); 83 84 void collapseUMI(uint32 iCB, uint32 *umiArray); 85 void collapseUMI_CR(uint32 iCB, uint32 *umiArray); 86 void collapseUMIall(uint32 iCB, uint32 *umiArray); 87 uint32 umiArrayCorrect_CR (const uint32 nU0, uintUMI *umiArr, const bool readInfoRec, const bool nUMIyes, unordered_map <uintUMI,uintUMI> &umiCorr); 88 uint32 umiArrayCorrect_Directional(const uint32 nU0, uintUMI *umiArr, const bool readInfoRec, const bool nUMIyes, unordered_map <uintUMI,uintUMI> &umiCorr, const int32 dirCountAdd); 89 uint32 umiArrayCorrect_Graph (const uint32 nU0, uintUMI *umiArr, const bool readInfoRec, const bool nUMIyes, unordered_map <uintUMI,uintUMI> &umiCorr); 90 91 void outputResults(bool cellFilterYes, string outputPrefixMat); 92 void addBAMtags(char *&bam0, uint32 &size0, char* bam1); 93 void statsOutput(); 94 void redistributeReadsByCB(); 95 96 void cellFiltering(); 97 void emptyDrops_CR(); 98 void loadRawMatrix(); 99 }; 100 101 #endif 102