1 #ifndef PARAMETERS_DEF
2 #define PARAMETERS_DEF
3 
4 #include "IncludeDefine.h"
5 #include "InOutStreams.h"
6 #include "ParameterInfo.h"
7 #include <map>
8 #include "TimeFunctions.h"
9 #include <unistd.h>
10 #include <signal.h>
11 #include "ParametersChimeric.h"
12 #include "ParametersSolo.h"
13 #include "ParametersClip.h"
14 #include "ParametersGenome.h"
15 #include <vector>
16 #include <array>
17 #include <unordered_set>
18 
19 class Parameters {
20 
21     public:
22         vector <ParameterInfoBase*> parArray, parArrayInitial;
23         vector <string> parameterInputName;
24 
25         string commandLine, commandLineFull;
26 
27         //version
28         string versionGenome;
29 
30         //system parameters
31         string sysShell; //shell for executing system commands
32 
33         // run parameters
34         string runMode;
35         vector<string> runModeIn;
36         int runThreadN;
37         mode_t runDirPerm;
38         string runDirPermIn; //permission for directores created at run-time
39         int runRNGseed; //random number generator seed
40 
41         struct {
42             int32 type;//0 no restart, 1 no mapping - restart from _STARtmp files
43         } runRestart; //restart options - in development
44 
45         //parameters
46         vector <string> parametersFiles;
47 
48         //input
49         string inputBAMfile;
50 
51         //genome
52         char genomeNumToNT[6];
53         ParametersGenome pGe, pGeOut;
54 
55         //binning,windows,anchors
56         uint winBinChrNbits, winBinNbits, winAnchorDistNbins, winFlankNbins, winBinN;
57         uint winAnchorMultimapNmax; //max number of alignments for anchors
58         double winReadCoverageRelativeMin;
59         uint winReadCoverageBasesMin;
60 
61         //read parameters
62         vector <string> readFilesType;
63         int readFilesTypeN;
64         string readFilesPrefix, readFilesPrefixFinal;
65         vector <string> readFilesIn, readFilesInTmp;
66         uint32 readFilesN;
67         vector <vector <string> > readFilesNames;
68         vector <string> readFilesCommand;
69         vector <string> readFilesManifest;
70 
71         string readFilesCommandString; //actual command string
72         int readFilesIndex;
73         pid_t readFilesCommandPID[MAX_N_MATES];
74 
75         uint readMapNumber;
76         uint iReadAll;
77         uint readNmates, readNends;
78         string readMatesLengthsIn;
79         uint32 readQualityScoreBase;
80 
81         vector <string> readNameSeparator;
82         vector <char> readNameSeparatorChar;
83 
84         string outSAMreadID;
85         bool outSAMreadIDnumber;
86 
87         //new: structure for readFiles parameters
88         struct {
89             vector<string> samAttrKeepIn; //input vector of SAM tags to keep, if readFilesType=SAMtag
90             std::unordered_set<uint16_t> samAttrKeep;
91             bool samAttrKeepAll, samAttrKeepNone;
92         } readFiles;
93 
94         ParametersClip pClip;
95 
96         //align parameters
97         uint alignSJoverhangMin,alignSJDBoverhangMin,alignSplicedMateMapLmin; //min SJ donor/acceptor length
98         double alignSplicedMateMapLminOverLmate;
99         uint alignWindowsPerReadNmax; //max number of alignment windows per read
100         uint alignTranscriptsPerWindowNmax; //maximum number of transcripts recorded per window
101         uint alignTranscriptsPerReadNmax;   //max number of alignments per read
102         uint alignIntronMin;//min length to call a gap an intron
103         uint alignIntronMax;//max length to call
104         uint alignMatesGapMax;//max gap between the mates (if paired-end)
105         vector <int32> alignSJstitchMismatchNmax;
106 
107         //         struct {
108         //             string strandString;
109         //             int32 strand;
110         //         } pReads;
111 
112         struct {
113             string in;
114             bool yes;
115         } alignSoftClipAtReferenceEnds;
116 
117         struct {
118             string in;
119             bool ext[2][2];
120         } alignEndsType;
121 
122         struct {
123             vector<string> in;
124             int nBasesMax;
125             bool concordantPair;
126         } alignEndsProtrude;
127 
128         struct {
129             string in;
130             bool flushRight;
131         } alignInsertionFlush;
132 
133 
134         //seed parameters
135         uint seedMultimapNmax; //max number of multiple alignments per piece
136         uint seedSearchLmax; //max length of the seed
137         uint seedPerReadNmax; //max number of pieces per Read
138         uint seedPerWindowNmax; //max number of aligns per window
139         uint seedNoneLociPerWindow; //max number of aligns from one piece per window
140         uint seedSearchStartLmax;
141         double seedSearchStartLmaxOverLread; //length of split start points
142         uint64 seedSplitMin, seedMapMin;
143 
144         //chunk parameters
145         uint chunkInSizeBytes,chunkInSizeBytesArray,chunkOutBAMsizeBytes;
146 
147         //output
148         string outFileNamePrefix, outStd;
149         string outTmpDir, outTmpKeep;
150         string outLogFileName;
151 
152         //SAM output
153         string outBAMfileCoordName, outBAMfileUnsortedName, outQuantBAMfileName;
154         string samHeader, samHeaderHD, samHeaderSortedCoord, samHeaderExtra;
155         string outSAMmode,  outSAMorder, outSAMprimaryFlag;
156         vector<string> outSAMattributes, outSAMheaderHD, outSAMheaderPG;
157         vector<string> outSAMattrRGline,outSAMattrRGlineSplit,outSAMattrRG;
158         uint outSAMmultNmax,outSAMattrIHstart;
159         string outSAMheaderCommentFile;
160         int outSAMmapqUnique;
161 
162         struct {
163             string in;
164             uint32 type;
165         } outSAMstrandField;
166 
167         int outSAMtlen;
168 
169         struct {bool NH,HI,AS,NM,MD,nM,jM,jI,RG,XS,rB,vG,vA,vW,ha,ch,MC,CR,CY,UR,UY,CB,UB,GX,GN,sM,sS,sQ,cN;} outSAMattrPresent, outSAMattrPresentQuant;
170 
171         vector <int> outSAMattrOrder, outSAMattrOrderQuant;
172         int outBAMcompression;
173         vector <string> outSAMtype;
174         bool outBAMunsorted, outBAMcoord, outSAMbool;
175         uint32 outBAMcoordNbins;
176         uint32 outBAMsortingBinsN;//user-defined number of bins for sorting
177         string outBAMsortTmpDir;
178 
179 //         string bamRemoveDuplicatesType;
180 //         uint bamRemoveDuplicatesMate2basesN;
181         struct {
182             string mode;
183             bool yes;
184             bool markMulti;
185             uint mate2basesN;
186         } removeDuplicates;
187 
188         int outBAMsortingThreadN, outBAMsortingThreadNactual;
189         uint64 *outBAMsortingBinStart; //genomic starts for bins for sorting BAM files
190         uint16 outSAMflagOR, outSAMflagAND;
191 
192         struct {
193             vector <string> mode;
194             bool yes;
195             bool within;//output unmapped reads within SAM/BAM files
196             bool keepPairs;//keep mates together
197         } outSAMunmapped;
198 
199         struct {
200             vector <string> mode;
201             bool yes;
202             bool KeepOnlyAddedReferences;
203             bool KeepAllAddedReferences;
204         } outSAMfilter;
205 
206         struct {
207             string mode;
208             bool random;
209         } outMultimapperOrder;
210 
211         struct {
212             bool yes;
213             uint NbasesMin;
214             double MMp;
215         } peOverlap;
216 
217         string outReadsUnmapped;
218         int outQSconversionAdd;
219         string outFileTmp;
220 
221         //output filtering
222         uint outFilterMismatchNmax;
223         double outFilterMismatchNoverLmax, outFilterMismatchNoverReadLmax; //max proportion of all MM within all bases
224 
225         uint outFilterMatchNmin,outFilterMultimapNmax;//min number of matches
226         double outFilterScoreMinOverLread, outFilterMatchNminOverLread;//normalzied to read length
227         intScore outFilterScoreMin,outFilterMultimapScoreRange;//min score to output
228         string outFilterIntronMotifs,outFilterIntronStrands;
229         string outFilterType; //type of filtering
230         int outFilterBySJoutStage; //indicates the stage of filtering by SJout
231 
232         struct {
233             vector<string> type;
234             bool yes;
235         } outSJ;
236 
237         //output filtering SJs
238         string outSJfilterReads;
239         vector <int32> outSJfilterCountUniqueMin, outSJfilterCountTotalMin;
240         vector <int32> outSJfilterOverhangMin;
241         vector <int32> outSJfilterDistToOtherSJmin; //min allowed distance to other SJ's donor/acceptor
242         vector <int32> outSJfilterIntronMaxVsReadN;
243 
244         //wiggle output
245         vector <string> outWigType, outWigStrand, outWigNorm;
246         string outWigReferencesPrefix;
247         struct {
248             bool yes;
249             bool strand;
250             int type;
251             int format;
252             int norm;
253         } outWigFlags;
254 
255         //2-pass
256 //         uint twoPass.pass1readsN, twoPass.sjLimit;
257 //         string twoPass.dir,twopassSJpass1file;
258         struct {
259             bool yes; //true in 2-pass mode
260             bool pass2; //true if now running the 2nd pass
261             uint pass1readsN;
262             int pass1readsN_par;
263             string dir;
264             string pass1sjFile;
265             string mode;
266         } twoPass;
267 
268         //inserting junctions on the fly
269         struct {
270             bool yes; //insert?
271             bool pass1;//insert on the 1st pass?
272             bool pass2;//insert on the 2nd pass?
273             string outDir;
274         } sjdbInsert;
275 
276         //storage limits
277         uint64 limitGenomeGenerateRAM;
278         vector<uint64> limitIObufferSize; //max size of the in/out buffer, bytes
279         uint64 limitOutSAMoneReadBytes;
280         uint64 limitOutSJoneRead, limitOutSJcollapsed;
281         uint64 limitBAMsortRAM;
282         uint64 limitSjdbInsertNsj;
283         uint64 limitNreadsSoft;
284 
285         // penalties
286         intScore scoreGap, scoreGapNoncan, scoreGapGCAG, scoreGapATAC, scoreDelBase, scoreDelOpen, scoreInsBase, scoreInsOpen;
287         intScore scoreStitchSJshift;//Max negative score when
288         double scoreGenomicLengthLog2scale;
289 
290         //quantification parameters
291         //input
292 
293         struct {
294           bool yes; //if any quantification is done
295           vector <string> mode; //quantification mode input string
296 
297           struct {
298               bool yes;
299               bool bamYes;
300               bool indel;
301               bool softClip;
302               bool singleEnd;
303               int bamCompression;
304               string ban;
305           } trSAM;
306 
307           struct {
308               bool yes;
309               string outFile;
310           } geCount;
311 
312           struct {
313               bool yes;
314           } geneFull;
315 
316           struct {
317               bool yes;
318           } gene;
319 
320         } quant;
321 
322         //variation parameters
323         struct {
324             bool yes;
325             string vcfFile;
326         } var;
327 
328         struct {
329             bool yes;
330             bool SAMtag;
331             string outputMode;
332         } wasp;
333 
334         //solo
335         ParametersSolo pSolo;
336 
337         //chimeric
338         ParametersChimeric pCh;
339 
340         //splitting
341         uint maxNsplit;
342 
343         //not really parameters, but global variables:
344         array<vector<uint64>,2> sjAll;
345         uint64 sjNovelN, *sjNovelStart, *sjNovelEnd; //novel junctions collapased and filtered
346 
347     ////////////////////// CLEAN-UP needed
348     InOutStreams *inOut; //main input output streams
349 
350     uint Lread;
351 
352     Parameters();
353     int readParsFromFile(ifstream*, ofstream*, int); //read parameters from one file
354     int readPars(); // read parameters from all files
355     int scanOneLine (string &lineIn, int inputLevel, int inputLevelRequested);
356     void scanAllLines (istream &streamIn, int inputLevel, int inputLevelRequested);
357     void inputParameters (int argInN, char* argIn[]); //input parameters: default, from files, from command line
358     void openReadsFiles();
359     void readFilesInit();
360     void closeReadsFiles();
361     void readSAMheader(const string readFilesCommandString, const vector<string> readFilesNames);
362     void samAttributes();
363     void samAttrRequiresBAM(bool attrYes, string attrTag);
364 };
365 #endif  // Parameters.h
366