1 #ifndef CODE_ParametersSolo
2 #define CODE_ParametersSolo
3 
4 #include <array>
5 
6 #include "IncludeDefine.h"
7 #include "SoloBarcode.h"
8 #include "SoloFeatureTypes.h"
9 
10 class Parameters;
11 class ParametersSolo;
12 
13 class UMIdedup {
14 public:
15     const static uint32 tN = 6;
16     array<string,tN> typeNames { {"NoDedup", "Exact", "1MM_All", "1MM_Directional", "1MM_CR", "1MM_Directional_UMItools"} };
17     enum typeI : int32 { NoDedup=0, Exact=1, All=2, Directional=3, CR=4, Directional_UMItools=5 };
18 
19     struct {
20         uint32_t N;
21         array<bool,tN> B;
22         bool &NoDedup=B[0], &Exact=B[1], &All=B[2], &Directional=B[3], &CR=B[4], &Directional_UMItools=B[5];
23     } yes;
24 
25     struct {
26         //uint32_t N;
27         array<uint32_t,tN> I;
28         uint32_t &NoDedup=I[0], &Exact=I[1], &All=I[2], &Directional=I[3], &CR=I[4], &Directional_UMItools=I[5];
29         uint32_t main; //index for SAM/stats/filtering output
30     } countInd; //index in the countCellGennUMI
31 
32     vector<string> typesIn; //UMIdedup types from user options
33     vector<int32> types; //the above converted to typeI numbers
34     int32 typeMain; //the type to be used in SAM/stats/filtering output - for now just types[0]
35 
36     void initialize(ParametersSolo *pS);
37 
38 //protected:
39 //    int it;
40 };
41 
42 class MultiMappers {
43 public:
44     const static uint32 tN = 5;
45     array<string,tN> typeNames { {"Unique", "Uniform", "Rescue", "PropUnique", "EM"} };
46     enum typeI : int32 { Unique=0, Uniform=1, Rescue=2, PropUnique=3, EM=4 };
47 
48     struct {
49         bool multi; //if multimappers are requested
50         uint32_t N;
51         array<bool,tN> B;
52         bool &Unique=B[0], &Uniform=B[1], &Rescue=B[2], &PropUnique=B[3], &EM=B[4] ;
53     } yes;
54 
55     struct {
56         //uint32_t N;
57         array<uint32_t,tN> I;
58         uint32_t &Unique=I[0], &Uniform=I[1], &Rescue=I[2], &PropUnique=I[3], &EM=I[4];
59         uint32_t main; //index for SAM/stats/filtering output
60     } countInd; //index in the countCellGennUMI
61 
62     vector<string> typesIn; //UMIdedup types from user options
63     vector<int32> types; //the above converted to typeI numbers
64     int32 typeMain; //the type to be used in SAM/stats/filtering output - for now just types[0]
65 
66     void initialize(ParametersSolo *pS);
67 };
68 
69 class ParametersSolo {
70 public:
71     Parameters *pP;
72     bool yes;
73 
74     //chemistry, library etc
75     string typeStr;
76     enum SoloTypes : int32 {None=0, CB_UMI_Simple=1, CB_UMI_Complex=2, CB_samTagOut=3, SmartSeq=4};
77     SoloTypes type;
78     string strandStr;
79     int32 strand;
80 
81     uint32 barcodeRead, barcodeReadIn;//which read is the barcode read = 0,1,2?
82     uint32 barcodeStart, barcodeEnd;//start/end of barcode sequence on barcodeRead
83     bool barcodeReadSeparate;
84 
85     //simple barcodes
86     uint32 cbS, cbL; //cell barcode start,length
87     uint32 umiS, umiL; //umi start,length
88     uint32 bL, cbumiL; //total barcode sequene length, CB+UMI length. Former does may not be equal to the latter
89 
90     vector<string> cbPositionStr;
91     string umiPositionStr;
92 
93     //complex barcodes
94     vector<SoloBarcode> cbV;
95     SoloBarcode umiV; //single UMI
96     bool adapterYes; //anchor?
97     string adapterSeq; //anchor sequence
98     uint32 adapterMismatchesNmax;//max number of mismatches in the anchor
99 
100     //input from SAM files
101     vector<string> samAtrrBarcodeSeq, samAtrrBarcodeQual;
102 
103     //whitelist - general
104     uint64 cbWLsize;
105     bool cbWLyes;
106     vector<string> soloCBwhitelist;
107     vector <uint64> cbWL;
108     vector<string> cbWLstr;
109 
110     MultiMappers multiMap;
111 
112     //features
113     vector<string> featureIn;//string of requested features
114     vector<uint32> features;
115     uint32 nFeatures;//=features.size(), number of requested features
116 
117     array<bool,SoloFeatureTypes::N> featureYes; //which features are requested
118     array<bool,SoloFeatureTypes::N> readInfoYes;//which features will need readInfo (for now only Gene and GeneFull)
119     array<bool,SoloFeatureTypes::N> readIndexYes;//which features will need recording of readIndex (for now only Gene and GeneFull, for multimappers)
120     array<int32,SoloFeatureTypes::N> featureInd;//index of each feature - skips unrequested features
121 
122     //filtering
123     char QSbase,QSmax;//quality score base and cutoff
124 
125     #ifdef MATCH_CellRanger
126     double cbMinP;//for CBs with non-exact matching to WL, min posterior probability
127     #else
128     float cbMinP;//for CBs with non-exact matching to WL, min posterior probability
129     #endif
130 
131     //cell filtering
132     struct {
133         vector<string> type;
134         uint32 topCells;
135 
136         struct {
137             double nExpectedCells;
138             double maxPercentile;
139             double maxMinRatio;
140         } knee;
141 
142         struct {
143             uint32 indMin, indMax; //min/max cell index, sorted by UMI counts,for empty cells
144             uint32 umiMin;
145             double umiMinFracMedian;
146             uint32 candMaxN;
147             double FDR;
148             uint32 simN;
149         } eDcr;//EmptyDrops-CellRanger
150 
151     } cellFilter;
152 
153     //CB match
154     struct {
155         string type;
156         bool mm1; //1 mismatch allowed
157         bool mm1_multi; //1 mismatch, multiple matches to WL allowed
158         bool oneExact; //CBs require at least one exact match
159         bool mm1_multi_pc; //use psedocounts while calculating probabilities of multi-matches
160         bool mm1_multi_Nbase; //allow multimatching to WL for CBs with N-bases
161     } CBmatchWL;
162 
163     //UMIdedup
164     UMIdedup umiDedup;
165 
166     //multi-gene umi
167     struct {
168         vector<string> type;
169         bool MultiGeneUMI       = false;
170         bool MultiGeneUMI_All   = false;
171         bool yes                = false; //true for non-CR
172         bool MultiGeneUMI_CR    = false;
173     } umiFiltering;
174 
175     //clusters
176     string clusterCBfile;
177 
178     //output
179     vector<string> outFileNames;
180     struct {
181     	string featuresGeneField3;
182     } outFormat;
183 
184     bool samAttrYes;//post-processed SAM attributes: error-corrected CB and UMI
185     int32 samAttrFeature;//which feature to use for error correction
186 
187     //processing
188     uint32 redistrReadsNfiles; //numer of files to resditribute reads into
189 
190     //constants
191     uint32 umiMaskLow, umiMaskHigh; //low/high half bit-mask or UMIs
192 
193     void initialize(Parameters *pPin);
194     void umiSwapHalves(uint32 &umi);
195     void complexWLstrings();
196     void cellFiltering();
197 };
198 #endif
199