1 #ifndef PARAMETERS_DEF 2 #define PARAMETERS_DEF 3 4 #include "IncludeDefine.h" 5 #include "InOutStreams.h" 6 #include "ParameterInfo.h" 7 #include <map> 8 #include "TimeFunctions.h" 9 #include <unistd.h> 10 #include <signal.h> 11 #include "ParametersChimeric.h" 12 #include "ParametersSolo.h" 13 #include "ParametersClip.h" 14 #include "ParametersGenome.h" 15 #include <vector> 16 #include <array> 17 #include <unordered_set> 18 19 class Parameters { 20 21 public: 22 vector <ParameterInfoBase*> parArray, parArrayInitial; 23 vector <string> parameterInputName; 24 25 string commandLine, commandLineFull; 26 27 //version 28 string versionGenome; 29 30 //system parameters 31 string sysShell; //shell for executing system commands 32 33 // run parameters 34 string runMode; 35 vector<string> runModeIn; 36 int runThreadN; 37 mode_t runDirPerm; 38 string runDirPermIn; //permission for directores created at run-time 39 int runRNGseed; //random number generator seed 40 41 struct { 42 int32 type;//0 no restart, 1 no mapping - restart from _STARtmp files 43 } runRestart; //restart options - in development 44 45 //parameters 46 vector <string> parametersFiles; 47 48 //input 49 string inputBAMfile; 50 51 //genome 52 char genomeNumToNT[6]; 53 ParametersGenome pGe, pGeOut; 54 55 //binning,windows,anchors 56 uint winBinChrNbits, winBinNbits, winAnchorDistNbins, winFlankNbins, winBinN; 57 uint winAnchorMultimapNmax; //max number of alignments for anchors 58 double winReadCoverageRelativeMin; 59 uint winReadCoverageBasesMin; 60 61 //read parameters 62 vector <string> readFilesType; 63 int readFilesTypeN; 64 string readFilesPrefix, readFilesPrefixFinal; 65 vector <string> readFilesIn, readFilesInTmp; 66 uint32 readFilesN; 67 vector <vector <string> > readFilesNames; 68 vector <string> readFilesCommand; 69 vector <string> readFilesManifest; 70 71 string readFilesCommandString; //actual command string 72 int readFilesIndex; 73 pid_t readFilesCommandPID[MAX_N_MATES]; 74 75 uint readMapNumber; 76 uint iReadAll; 77 uint readNmates, readNends; 78 string readMatesLengthsIn; 79 uint32 readQualityScoreBase; 80 81 vector <string> readNameSeparator; 82 vector <char> readNameSeparatorChar; 83 84 string outSAMreadID; 85 bool outSAMreadIDnumber; 86 87 //new: structure for readFiles parameters 88 struct { 89 vector<string> samAttrKeepIn; //input vector of SAM tags to keep, if readFilesType=SAMtag 90 std::unordered_set<uint16_t> samAttrKeep; 91 bool samAttrKeepAll, samAttrKeepNone; 92 } readFiles; 93 94 ParametersClip pClip; 95 96 //align parameters 97 uint alignSJoverhangMin,alignSJDBoverhangMin,alignSplicedMateMapLmin; //min SJ donor/acceptor length 98 double alignSplicedMateMapLminOverLmate; 99 uint alignWindowsPerReadNmax; //max number of alignment windows per read 100 uint alignTranscriptsPerWindowNmax; //maximum number of transcripts recorded per window 101 uint alignTranscriptsPerReadNmax; //max number of alignments per read 102 uint alignIntronMin;//min length to call a gap an intron 103 uint alignIntronMax;//max length to call 104 uint alignMatesGapMax;//max gap between the mates (if paired-end) 105 vector <int32> alignSJstitchMismatchNmax; 106 107 // struct { 108 // string strandString; 109 // int32 strand; 110 // } pReads; 111 112 struct { 113 string in; 114 bool yes; 115 } alignSoftClipAtReferenceEnds; 116 117 struct { 118 string in; 119 bool ext[2][2]; 120 } alignEndsType; 121 122 struct { 123 vector<string> in; 124 int nBasesMax; 125 bool concordantPair; 126 } alignEndsProtrude; 127 128 struct { 129 string in; 130 bool flushRight; 131 } alignInsertionFlush; 132 133 134 //seed parameters 135 uint seedMultimapNmax; //max number of multiple alignments per piece 136 uint seedSearchLmax; //max length of the seed 137 uint seedPerReadNmax; //max number of pieces per Read 138 uint seedPerWindowNmax; //max number of aligns per window 139 uint seedNoneLociPerWindow; //max number of aligns from one piece per window 140 uint seedSearchStartLmax; 141 double seedSearchStartLmaxOverLread; //length of split start points 142 uint64 seedSplitMin, seedMapMin; 143 144 //chunk parameters 145 uint chunkInSizeBytes,chunkInSizeBytesArray,chunkOutBAMsizeBytes; 146 147 //output 148 string outFileNamePrefix, outStd; 149 string outTmpDir, outTmpKeep; 150 string outLogFileName; 151 152 //SAM output 153 string outBAMfileCoordName, outBAMfileUnsortedName, outQuantBAMfileName; 154 string samHeader, samHeaderHD, samHeaderSortedCoord, samHeaderExtra; 155 string outSAMmode, outSAMorder, outSAMprimaryFlag; 156 vector<string> outSAMattributes, outSAMheaderHD, outSAMheaderPG; 157 vector<string> outSAMattrRGline,outSAMattrRGlineSplit,outSAMattrRG; 158 uint outSAMmultNmax,outSAMattrIHstart; 159 string outSAMheaderCommentFile; 160 int outSAMmapqUnique; 161 162 struct { 163 string in; 164 uint32 type; 165 } outSAMstrandField; 166 167 int outSAMtlen; 168 169 struct {bool NH,HI,AS,NM,MD,nM,jM,jI,RG,XS,rB,vG,vA,vW,ha,ch,MC,CR,CY,UR,UY,CB,UB,GX,GN,sM,sS,sQ,cN;} outSAMattrPresent, outSAMattrPresentQuant; 170 171 vector <int> outSAMattrOrder, outSAMattrOrderQuant; 172 int outBAMcompression; 173 vector <string> outSAMtype; 174 bool outBAMunsorted, outBAMcoord, outSAMbool; 175 uint32 outBAMcoordNbins; 176 uint32 outBAMsortingBinsN;//user-defined number of bins for sorting 177 string outBAMsortTmpDir; 178 179 // string bamRemoveDuplicatesType; 180 // uint bamRemoveDuplicatesMate2basesN; 181 struct { 182 string mode; 183 bool yes; 184 bool markMulti; 185 uint mate2basesN; 186 } removeDuplicates; 187 188 int outBAMsortingThreadN, outBAMsortingThreadNactual; 189 uint64 *outBAMsortingBinStart; //genomic starts for bins for sorting BAM files 190 uint16 outSAMflagOR, outSAMflagAND; 191 192 struct { 193 vector <string> mode; 194 bool yes; 195 bool within;//output unmapped reads within SAM/BAM files 196 bool keepPairs;//keep mates together 197 } outSAMunmapped; 198 199 struct { 200 vector <string> mode; 201 bool yes; 202 bool KeepOnlyAddedReferences; 203 bool KeepAllAddedReferences; 204 } outSAMfilter; 205 206 struct { 207 string mode; 208 bool random; 209 } outMultimapperOrder; 210 211 struct { 212 bool yes; 213 uint NbasesMin; 214 double MMp; 215 } peOverlap; 216 217 string outReadsUnmapped; 218 int outQSconversionAdd; 219 string outFileTmp; 220 221 //output filtering 222 uint outFilterMismatchNmax; 223 double outFilterMismatchNoverLmax, outFilterMismatchNoverReadLmax; //max proportion of all MM within all bases 224 225 uint outFilterMatchNmin,outFilterMultimapNmax;//min number of matches 226 double outFilterScoreMinOverLread, outFilterMatchNminOverLread;//normalzied to read length 227 intScore outFilterScoreMin,outFilterMultimapScoreRange;//min score to output 228 string outFilterIntronMotifs,outFilterIntronStrands; 229 string outFilterType; //type of filtering 230 int outFilterBySJoutStage; //indicates the stage of filtering by SJout 231 232 struct { 233 vector<string> type; 234 bool yes; 235 } outSJ; 236 237 //output filtering SJs 238 string outSJfilterReads; 239 vector <int32> outSJfilterCountUniqueMin, outSJfilterCountTotalMin; 240 vector <int32> outSJfilterOverhangMin; 241 vector <int32> outSJfilterDistToOtherSJmin; //min allowed distance to other SJ's donor/acceptor 242 vector <int32> outSJfilterIntronMaxVsReadN; 243 244 //wiggle output 245 vector <string> outWigType, outWigStrand, outWigNorm; 246 string outWigReferencesPrefix; 247 struct { 248 bool yes; 249 bool strand; 250 int type; 251 int format; 252 int norm; 253 } outWigFlags; 254 255 //2-pass 256 // uint twoPass.pass1readsN, twoPass.sjLimit; 257 // string twoPass.dir,twopassSJpass1file; 258 struct { 259 bool yes; //true in 2-pass mode 260 bool pass2; //true if now running the 2nd pass 261 uint pass1readsN; 262 int pass1readsN_par; 263 string dir; 264 string pass1sjFile; 265 string mode; 266 } twoPass; 267 268 //inserting junctions on the fly 269 struct { 270 bool yes; //insert? 271 bool pass1;//insert on the 1st pass? 272 bool pass2;//insert on the 2nd pass? 273 string outDir; 274 } sjdbInsert; 275 276 //storage limits 277 uint64 limitGenomeGenerateRAM; 278 vector<uint64> limitIObufferSize; //max size of the in/out buffer, bytes 279 uint64 limitOutSAMoneReadBytes; 280 uint64 limitOutSJoneRead, limitOutSJcollapsed; 281 uint64 limitBAMsortRAM; 282 uint64 limitSjdbInsertNsj; 283 uint64 limitNreadsSoft; 284 285 // penalties 286 intScore scoreGap, scoreGapNoncan, scoreGapGCAG, scoreGapATAC, scoreDelBase, scoreDelOpen, scoreInsBase, scoreInsOpen; 287 intScore scoreStitchSJshift;//Max negative score when 288 double scoreGenomicLengthLog2scale; 289 290 //quantification parameters 291 //input 292 293 struct { 294 bool yes; //if any quantification is done 295 vector <string> mode; //quantification mode input string 296 297 struct { 298 bool yes; 299 bool bamYes; 300 bool indel; 301 bool softClip; 302 bool singleEnd; 303 int bamCompression; 304 string ban; 305 } trSAM; 306 307 struct { 308 bool yes; 309 string outFile; 310 } geCount; 311 312 struct { 313 bool yes; 314 } geneFull; 315 316 struct { 317 bool yes; 318 } gene; 319 320 } quant; 321 322 //variation parameters 323 struct { 324 bool yes; 325 string vcfFile; 326 } var; 327 328 struct { 329 bool yes; 330 bool SAMtag; 331 string outputMode; 332 } wasp; 333 334 //solo 335 ParametersSolo pSolo; 336 337 //chimeric 338 ParametersChimeric pCh; 339 340 //splitting 341 uint maxNsplit; 342 343 //not really parameters, but global variables: 344 array<vector<uint64>,2> sjAll; 345 uint64 sjNovelN, *sjNovelStart, *sjNovelEnd; //novel junctions collapased and filtered 346 347 ////////////////////// CLEAN-UP needed 348 InOutStreams *inOut; //main input output streams 349 350 uint Lread; 351 352 Parameters(); 353 int readParsFromFile(ifstream*, ofstream*, int); //read parameters from one file 354 int readPars(); // read parameters from all files 355 int scanOneLine (string &lineIn, int inputLevel, int inputLevelRequested); 356 void scanAllLines (istream &streamIn, int inputLevel, int inputLevelRequested); 357 void inputParameters (int argInN, char* argIn[]); //input parameters: default, from files, from command line 358 void openReadsFiles(); 359 void readFilesInit(); 360 void closeReadsFiles(); 361 void readSAMheader(const string readFilesCommandString, const vector<string> readFilesNames); 362 void samAttributes(); 363 void samAttrRequiresBAM(bool attrYes, string attrTag); 364 }; 365 #endif // Parameters.h 366