1 #ifndef CODE_ParametersSolo 2 #define CODE_ParametersSolo 3 4 #include <array> 5 6 #include "IncludeDefine.h" 7 #include "SoloBarcode.h" 8 #include "SoloFeatureTypes.h" 9 10 class Parameters; 11 class ParametersSolo; 12 13 class UMIdedup { 14 public: 15 const static uint32 tN = 6; 16 array<string,tN> typeNames { {"NoDedup", "Exact", "1MM_All", "1MM_Directional", "1MM_CR", "1MM_Directional_UMItools"} }; 17 enum typeI : int32 { NoDedup=0, Exact=1, All=2, Directional=3, CR=4, Directional_UMItools=5 }; 18 19 struct { 20 uint32_t N; 21 array<bool,tN> B; 22 bool &NoDedup=B[0], &Exact=B[1], &All=B[2], &Directional=B[3], &CR=B[4], &Directional_UMItools=B[5]; 23 } yes; 24 25 struct { 26 //uint32_t N; 27 array<uint32_t,tN> I; 28 uint32_t &NoDedup=I[0], &Exact=I[1], &All=I[2], &Directional=I[3], &CR=I[4], &Directional_UMItools=I[5]; 29 uint32_t main; //index for SAM/stats/filtering output 30 } countInd; //index in the countCellGennUMI 31 32 vector<string> typesIn; //UMIdedup types from user options 33 vector<int32> types; //the above converted to typeI numbers 34 int32 typeMain; //the type to be used in SAM/stats/filtering output - for now just types[0] 35 36 void initialize(ParametersSolo *pS); 37 38 //protected: 39 // int it; 40 }; 41 42 class MultiMappers { 43 public: 44 const static uint32 tN = 5; 45 array<string,tN> typeNames { {"Unique", "Uniform", "Rescue", "PropUnique", "EM"} }; 46 enum typeI : int32 { Unique=0, Uniform=1, Rescue=2, PropUnique=3, EM=4 }; 47 48 struct { 49 bool multi; //if multimappers are requested 50 uint32_t N; 51 array<bool,tN> B; 52 bool &Unique=B[0], &Uniform=B[1], &Rescue=B[2], &PropUnique=B[3], &EM=B[4] ; 53 } yes; 54 55 struct { 56 //uint32_t N; 57 array<uint32_t,tN> I; 58 uint32_t &Unique=I[0], &Uniform=I[1], &Rescue=I[2], &PropUnique=I[3], &EM=I[4]; 59 uint32_t main; //index for SAM/stats/filtering output 60 } countInd; //index in the countCellGennUMI 61 62 vector<string> typesIn; //UMIdedup types from user options 63 vector<int32> types; //the above converted to typeI numbers 64 int32 typeMain; //the type to be used in SAM/stats/filtering output - for now just types[0] 65 66 void initialize(ParametersSolo *pS); 67 }; 68 69 class ParametersSolo { 70 public: 71 Parameters *pP; 72 bool yes; 73 74 //chemistry, library etc 75 string typeStr; 76 enum SoloTypes : int32 {None=0, CB_UMI_Simple=1, CB_UMI_Complex=2, CB_samTagOut=3, SmartSeq=4}; 77 SoloTypes type; 78 string strandStr; 79 int32 strand; 80 81 uint32 barcodeRead, barcodeReadIn;//which read is the barcode read = 0,1,2? 82 uint32 barcodeStart, barcodeEnd;//start/end of barcode sequence on barcodeRead 83 bool barcodeReadSeparate; 84 85 //simple barcodes 86 uint32 cbS, cbL; //cell barcode start,length 87 uint32 umiS, umiL; //umi start,length 88 uint32 bL, cbumiL; //total barcode sequene length, CB+UMI length. Former does may not be equal to the latter 89 90 vector<string> cbPositionStr; 91 string umiPositionStr; 92 93 //complex barcodes 94 vector<SoloBarcode> cbV; 95 SoloBarcode umiV; //single UMI 96 bool adapterYes; //anchor? 97 string adapterSeq; //anchor sequence 98 uint32 adapterMismatchesNmax;//max number of mismatches in the anchor 99 100 //input from SAM files 101 vector<string> samAtrrBarcodeSeq, samAtrrBarcodeQual; 102 103 //whitelist - general 104 uint64 cbWLsize; 105 bool cbWLyes; 106 vector<string> soloCBwhitelist; 107 vector <uint64> cbWL; 108 vector<string> cbWLstr; 109 110 MultiMappers multiMap; 111 112 //features 113 vector<string> featureIn;//string of requested features 114 vector<uint32> features; 115 uint32 nFeatures;//=features.size(), number of requested features 116 117 array<bool,SoloFeatureTypes::N> featureYes; //which features are requested 118 array<bool,SoloFeatureTypes::N> readInfoYes;//which features will need readInfo (for now only Gene and GeneFull) 119 array<bool,SoloFeatureTypes::N> readIndexYes;//which features will need recording of readIndex (for now only Gene and GeneFull, for multimappers) 120 array<int32,SoloFeatureTypes::N> featureInd;//index of each feature - skips unrequested features 121 122 //filtering 123 char QSbase,QSmax;//quality score base and cutoff 124 125 #ifdef MATCH_CellRanger 126 double cbMinP;//for CBs with non-exact matching to WL, min posterior probability 127 #else 128 float cbMinP;//for CBs with non-exact matching to WL, min posterior probability 129 #endif 130 131 //cell filtering 132 struct { 133 vector<string> type; 134 uint32 topCells; 135 136 struct { 137 double nExpectedCells; 138 double maxPercentile; 139 double maxMinRatio; 140 } knee; 141 142 struct { 143 uint32 indMin, indMax; //min/max cell index, sorted by UMI counts,for empty cells 144 uint32 umiMin; 145 double umiMinFracMedian; 146 uint32 candMaxN; 147 double FDR; 148 uint32 simN; 149 } eDcr;//EmptyDrops-CellRanger 150 151 } cellFilter; 152 153 //CB match 154 struct { 155 string type; 156 bool mm1; //1 mismatch allowed 157 bool mm1_multi; //1 mismatch, multiple matches to WL allowed 158 bool oneExact; //CBs require at least one exact match 159 bool mm1_multi_pc; //use psedocounts while calculating probabilities of multi-matches 160 bool mm1_multi_Nbase; //allow multimatching to WL for CBs with N-bases 161 } CBmatchWL; 162 163 //UMIdedup 164 UMIdedup umiDedup; 165 166 //multi-gene umi 167 struct { 168 vector<string> type; 169 bool MultiGeneUMI = false; 170 bool MultiGeneUMI_All = false; 171 bool yes = false; //true for non-CR 172 bool MultiGeneUMI_CR = false; 173 } umiFiltering; 174 175 //clusters 176 string clusterCBfile; 177 178 //output 179 vector<string> outFileNames; 180 struct { 181 string featuresGeneField3; 182 } outFormat; 183 184 bool samAttrYes;//post-processed SAM attributes: error-corrected CB and UMI 185 int32 samAttrFeature;//which feature to use for error correction 186 187 //processing 188 uint32 redistrReadsNfiles; //numer of files to resditribute reads into 189 190 //constants 191 uint32 umiMaskLow, umiMaskHigh; //low/high half bit-mask or UMIs 192 193 void initialize(Parameters *pPin); 194 void umiSwapHalves(uint32 &umi); 195 void complexWLstrings(); 196 void cellFiltering(); 197 }; 198 #endif 199