1  /*==========================================================================
2              RazerS - Fast Read Mapping with Controlled Loss Rate
3                    http://www.seqan.de/projects/razers.html
4 
5  ============================================================================
6   Copyright (C) 2008 by David Weese
7 
8   This program is free software; you can redistribute it and/or
9   modify it under the terms of the GNU Lesser General Public
10   License as published by the Free Software Foundation; either
11   version 3 of the License, or (at your options) any later version.
12 
13   This program is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16   Lesser General Public License for more details.
17 
18   You should have received a copy of the GNU General Public License
19   along with this program.  If not, see <http://www.gnu.org/licenses/>.
20  ==========================================================================*/
21 
22 #define SEQAN_PROFILE					// enable time measuring
23 //#define SEQAN_DEBUG_SWIFT				// test SWIFT correctness and print bucket parameters
24 //#define RAZERS_DEBUG					// print verification regions
25 #define RAZERS_PRUNE_QGRAM_INDEX		// ignore highly abundant q-grams
26 #define RAZERS_CONCATREADS				// use <ConcatDirect> StringSet to store reads
27 #define RAZERS_MEMOPT					// optimize memory consumption
28 #define RAZERS_MASK_READS				// remove matches with max-hits optimal hits on-the-fly
29 //#define NO_PARAM_CHOOSER				// disable loss-rate parameter choosing
30 //#define RAZERS_PARALLEL				// parallelize using Intel's Threading Building Blocks
31 #define RAZERS_MATEPAIRS				// enable paired-end matching
32 //#define RAZERS_DIRECT_MAQ_MAPPING
33 //#define SEQAN_USE_SSE2_WORDS			// use SSE2 128-bit integers for MyersBitVector
34 //#define RAZERS_DONTMASKDUPLICATES
35 #define RAZERS_NOOUTERREADGAPS			// enforce the alignment of the first and last base (determines the lakes)
36 #define RAZERS_OPENADDRESSING
37 
38 #include <seqan/platform.h>
39 #ifdef PLATFORM_WINDOWS
40 	#define SEQAN_DEFAULT_TMPDIR "C:\\TEMP\\"
41 #else
42 	#define SEQAN_DEFAULT_TMPDIR "./"
43 #endif
44 
45 #include <seqan/misc/misc_cmdparser.h>
46 #include "razers.h"
47 #include "outputFormat.h"
48 #include "paramChooser.h"
49 #include <seqan/file.h>
50 #include <seqan/store.h>
51 
52 #ifdef RAZERS_PARALLEL
53 #include "razers_parallel.h"
54 #endif
55 
56 #ifdef RAZERS_MATEPAIRS
57 #include "razers_matepairs.h"
58 #endif
59 
60 
61 #include <iostream>
62 #include <sstream>
63 
64 using namespace std;
65 using namespace seqan;
66 
67 
68 //////////////////////////////////////////////////////////////////////////////
69 // Read a list of genome file names
70 template<typename TSpec>
getGenomeFileNameList(CharString filename,StringSet<CharString> & genomeFileNames,RazerSOptions<TSpec> & options)71 int getGenomeFileNameList(CharString filename, StringSet<CharString> & genomeFileNames, RazerSOptions<TSpec> &options)
72 {
73 	ifstream file;
74 	file.open(toCString(filename),ios_base::in | ios_base::binary);
75 	if(!file.is_open())
76 		return RAZERS_GENOME_FAILED;
77 
78 	clear(genomeFileNames);
79 	char c = _streamGet(file);
80 	if (c != '>' && c != '@')	//if file does not start with a fasta header --> list of multiple reference genome files
81 	{
82 		if(options._debugLevel >=1)
83 			cout << endl << "Reading multiple genome files:" <<endl;
84 
85 /*		//locations of genome files are relative to list file's location
86 		string tempGenomeFile(filename);
87 		size_t lastPos = tempGenomeFile.find_last_of('/') + 1;
88 		if (lastPos == tempGenomeFile.npos) lastPos = tempGenomeFile.find_last_of('\\') + 1;
89 		if (lastPos == tempGenomeFile.npos) lastPos = 0;
90 		string filePrefix = tempGenomeFile.substr(0,lastPos);*/
91 
92 		unsigned i = 1;
93 		while(!_streamEOF(file))
94 		{
95 			_parseSkipWhitespace(file, c);
96 			appendValue(genomeFileNames,_parseReadFilepath(file,c));
97 			//CharString currentGenomeFile(filePrefix);
98 			//append(currentGenomeFile,_parseReadFilepath(file,c));
99 			//appendValue(genomeFileNames,currentGenomeFile);
100 			if(options._debugLevel >=2)
101 				cout <<"Genome file #"<< i <<": " << genomeFileNames[length(genomeFileNames)-1] << endl;
102 			++i;
103 			_parseSkipWhitespace(file, c);
104 		}
105 		if(options._debugLevel >=1)
106 			cout << i-1 << " genome files total." <<endl;
107 	}
108 	else		//if file starts with a fasta header --> regular one-genome-file input
109 		appendValue(genomeFileNames, filename, Exact());
110 	file.close();
111 	return 0;
112 
113 }
114 
115 //////////////////////////////////////////////////////////////////////////////
116 // Main read mapper function
117 template <typename TSpec>
mapReads(StringSet<CharString> & genomeFileNames,StringSet<CharString> & readFileNames,CharString & errorPrbFileName,RazerSOptions<TSpec> & options)118 int mapReads(
119 	StringSet<CharString> & genomeFileNames,
120 	StringSet<CharString> & readFileNames,	// NULL terminated list of one/two read files (single/mate-pairs)
121 	CharString & errorPrbFileName,
122 	RazerSOptions<TSpec> &options)
123 {
124 	FragmentStore<>			store;			// stores all of the tables
125 
126 	MultiFasta				genomeSet;
127 	String<String<unsigned short> > 	stats;		// needed for mapping quality calculation
128 
129 	// dump configuration in verbose mode
130 	if (options._debugLevel >= 1)
131 	{
132 		CharString bitmap;
133 		Shape<Dna, GenericShape> shape;
134 		stringToShape(shape, options.shape);
135 		shapeToString(bitmap, shape);
136 
137 		cerr << "___SETTINGS____________" << endl;
138 		cerr << "Genome file:                     \t" << genomeFileNames[0] << endl;
139 		if (empty(readFileNames[1]))
140 			cerr << "Read file:                       \t" << readFileNames[0] << endl;
141 		else
142 		{
143 			cerr << "Read files:                      \t" << readFileNames[0] << endl;
144 			for (unsigned i = 1; i < length(readFileNames); ++i)
145 				cerr << "                                 \t" << readFileNames[i] << endl;
146 		}
147 		cerr << "Compute forward matches:         \t";
148 		if (options.forward)	cerr << "YES" << endl;
149 		else				cerr << "NO" << endl;
150 		cerr << "Compute reverse matches:         \t";
151 		if (options.reverse)		cerr << "YES" << endl;
152 		else				cerr << "NO" << endl;
153 		cerr << "Error rate:                      \t" << options.errorRate << endl;
154 		cerr << "Minimal threshold:               \t" << options.threshold << endl;
155 		cerr << "Shape:                           \t" << bitmap << endl;
156 		cerr << "Repeat threshold:                \t" << options.repeatLength << endl;
157 		cerr << "Overabundance threshold:         \t" << options.abundanceCut << endl;
158 		cerr << "Taboo length:                    \t" << options.tabooLength << endl;
159 		cerr << endl;
160 	}
161 
162 	// circumvent numerical obstacles
163 	options.errorRate += 0.0000001;
164 
165 	//////////////////////////////////////////////////////////////////////////////
166 	// Step 1: Load fasta files and determine genome file type
167 	SEQAN_PROTIMESTART(load_time);
168 
169 #ifdef RAZERS_MATEPAIRS
170 	if (length(readFileNames) == 2)
171 	{
172 		if (!loadReads(store, toCString(readFileNames[0]), toCString(readFileNames[1]), options)) {
173 		//if (!loadReads(readSet, readQualities, readNames, readFileNames[0], readFileNames[1], options)) {
174 			cerr << "Failed to load reads" << endl;
175 			return RAZERS_READS_FAILED;
176 		}
177 	}
178 	else
179 #endif
180 	{
181 		if (!loadReads(store, toCString(readFileNames[0]), options)) {
182 			cerr << "Failed to load reads" << endl;
183 			return RAZERS_READS_FAILED;
184 		}
185 	}
186 
187 	if (options._debugLevel >= 1) cerr << lengthSum(store.readSeqStore) << " bps of " << length(store.readSeqStore) << " reads loaded." << endl;
188 	options.timeLoadFiles = SEQAN_PROTIMEDIFF(load_time);
189 
190 	if (options._debugLevel >= 1)
191 		cerr << "Loading reads took               \t" << options.timeLoadFiles << " seconds" << endl;
192 
193 	if (length(genomeFileNames) == 1)
194 	{
195 		int result = getGenomeFileNameList(genomeFileNames[0], genomeFileNames, options);
196 		if (result == RAZERS_GENOME_FAILED)
197 		{
198 			cerr << "Failed to open genome file " << genomeFileNames[0] << endl;
199 			return result;
200 		}
201 	}
202 
203 	//////////////////////////////////////////////////////////////////////////////
204 	// Step 2: Find matches using SWIFT
205 #ifdef RAZERS_PARALLEL
206     typedef typename RazerSOptions<TSpec>::TMutex TMutex;
207     options.patternMutex = new TMutex[length(readSet)];
208 #endif
209 
210 	String< Pair<CharString, unsigned> > gnoToFileMap; //map to retrieve genome filename and sequence number within that file
211 	int error = mapReads(store, genomeFileNames, gnoToFileMap, stats, options);
212 	if (error != 0)
213 	{
214 		switch (error)
215 		{
216 			case RAZERS_GENOME_FAILED:
217 				cerr << "Failed to load genomes" << endl;
218 				break;
219 
220 			case RAZERS_INVALID_SHAPE:
221 				cerr << "Invalid Shape" << endl;
222 				break;
223 		}
224 		return error;
225 	}
226 
227 #ifdef RAZERS_PARALLEL
228     delete[] options.patternMutex;
229 #endif
230 
231 	//////////////////////////////////////////////////////////////////////////////
232 	// Step 3: Remove duplicates and output matches
233 	if (!options.spec.DONT_DUMP_RESULTS)
234 		dumpMatches(store, genomeFileNames, gnoToFileMap, stats, readFileNames[0], errorPrbFileName, options);
235 
236 	return 0;
237 }
238 
239 //////////////////////////////////////////////////////////////////////////////
240 // Command line parsing and parameter choosing
main(int argc,const char * argv[])241 int main(int argc, const char *argv[])
242 {
243 	RazerSOptions<>			options;
244 	ParamChooserOptions		pm_options;
245 
246 #ifdef RAZERS_MATEPAIRS
247 	const unsigned			maxFiles = 3;
248 #else
249 	const unsigned			maxFiles = 2;
250 #endif
251 	StringSet<CharString>	genomeFileNames;
252 	StringSet<CharString>	readFileNames;
253 	CharString				errorPrbFileName;
254 
255 
256 	// Change defaults
257 	options.forward = false;
258 	options.reverse = false;
259 
260 	CommandLineParser parser;
261 	string rev = "$Revision: 4739 $";
262 	addVersionLine(parser, "RazerS version 1.1 20090909 [" + rev.substr(11, 4) + "]");
263 
264 	//////////////////////////////////////////////////////////////////////////////
265 	// Define options
266 	addTitleLine(parser, "***********************************************************");
267 	addTitleLine(parser, "*** RazerS - Fast Read Mapping with Sensitivity Control ***");
268 	addTitleLine(parser, "***          (c) Copyright 2009 by David Weese          ***");
269 	addTitleLine(parser, "***********************************************************");
270 	addUsageLine(parser, "[OPTION]... <GENOME FILE> <READS FILE>");
271 #ifdef RAZERS_MATEPAIRS
272 	addUsageLine(parser, "[OPTION]... <GENOME FILE> <MP-READS FILE1> <MP-READS FILE2>");
273 #endif
274 	addSection(parser, "Main Options:");
275 	addOption(parser, CommandLineOption("f",  "forward",           "only compute forward matches", OptionType::Boolean));
276 	addOption(parser, CommandLineOption("r",  "reverse",           "only compute reverse complement matches", OptionType::Boolean));
277 	addOption(parser, CommandLineOption("i",  "percent-identity",  "set the percent identity threshold", OptionType::Double | OptionType::Label, 100 - (100.0 * options.errorRate)));
278 #ifndef NO_PARAM_CHOOSER
279 	addOption(parser, CommandLineOption("rr", "recognition-rate",  "set the percent recognition rate", OptionType::Double | OptionType::Label, 100 - (100.0 * pm_options.optionLossRate)));
280 	addOption(parser, addArgumentText(CommandLineOption("pd", "param-dir",         "folder containing user-computed parameter files (optional)", OptionType::String | OptionType::Label), "DIR"));
281 #endif
282 	addOption(parser, CommandLineOption("id", "indels",            "allow indels (default: mismatches only)", OptionType::Boolean));
283 #ifdef RAZERS_MATEPAIRS
284 	addOption(parser, CommandLineOption("ll", "library-length",    "mate-pair library length", OptionType::Int | OptionType::Label, options.libraryLength));
285 	addOption(parser, CommandLineOption("le", "library-error",     "mate-pair library length tolerance", OptionType::Int | OptionType::Label, options.libraryError));
286 #endif
287 	addOption(parser, CommandLineOption("m",  "max-hits",          "output only NUM of the best hits", OptionType::Int | OptionType::Label, options.maxHits));
288 	addOption(parser, CommandLineOption("",   "unique",            "output only unique best matches (-m 1 -dr 0 -pa)", OptionType::Boolean));
289 	addOption(parser, CommandLineOption("tr", "trim-reads",        "trim reads to given length (default off)", OptionType::Int | OptionType::Label));
290 	addOption(parser, addArgumentText(CommandLineOption("o",  "output",            "change output filename (default <READS FILE>.result)", OptionType::String), "FILE"));
291 	addOption(parser, CommandLineOption("v",  "verbose",           "verbose mode", OptionType::Boolean));
292 	addOption(parser, CommandLineOption("vv", "vverbose",          "very verbose mode", OptionType::Boolean));
293 	addSection(parser, "Output Format Options:");
294 	addOption(parser, CommandLineOption("a",  "alignment",         "dump the alignment for each match", OptionType::Boolean));
295 	addOption(parser, CommandLineOption("pa", "purge-ambiguous",   "purge reads with more than max-hits best matches", OptionType::Boolean));
296 	addOption(parser, CommandLineOption("dr", "distance-range",    "only consider matches with at most NUM more errors compared to the best (default output all)", OptionType::Int | OptionType::Label));
297 	addOption(parser, CommandLineOption("of", "output-format",     "set output format", OptionType::Int | OptionType::Label, options.outputFormat));
298 	addHelpLine(parser, "0 = Razer format");
299 	addHelpLine(parser, "1 = enhanced Fasta format");
300 	addHelpLine(parser, "2 = Eland format");
301 	addHelpLine(parser, "3 = Gff format");
302 	addHelpLine(parser, "4 = Sam format");
303 	addHelpLine(parser, "5 = Amos AFG format");
304 	addOption(parser, CommandLineOption("gn", "genome-naming",     "select how genomes are named", OptionType::Int | OptionType::Label, options.genomeNaming));
305 	addHelpLine(parser, "0 = use Fasta id");
306 	addHelpLine(parser, "1 = enumerate beginning with 1");
307 	addOption(parser, CommandLineOption("rn", "read-naming",       "select how reads are named", OptionType::Int | OptionType::Label, options.readNaming));
308 	addHelpLine(parser, "0 = use Fasta id");
309 	addHelpLine(parser, "1 = enumerate beginning with 1");
310 	addHelpLine(parser, "2 = use the read sequence (only for short reads!)");
311 	addHelpLine(parser, "3 = use the Fasta id, do NOT append '/L' or '/R' for mate pairs");
312 	addOption(parser, CommandLineOption("so", "sort-order",        "select how matches are sorted", OptionType::Int | OptionType::Label, options.sortOrder));
313 	addHelpLine(parser, "0 = 1. read number, 2. genome position");
314 	addHelpLine(parser, "1 = 1. genome position, 2. read number");
315 	addOption(parser, CommandLineOption("pf", "position-format",   "select begin/end position numbering", OptionType::Int | OptionType::Label, options.sortOrder));
316 	addHelpLine(parser, "0 = gap space");
317 	addHelpLine(parser, "1 = position space");
318 	addSection(parser, "Filtration Options:");
319 	addOption(parser, addArgumentText(CommandLineOption("s",  "shape",             "set k-mer shape", OptionType::String | OptionType::Label, options.shape), "BITSTRING"));
320 	addOption(parser, CommandLineOption("t",  "threshold",         "set minimum k-mer threshold", OptionType::Int | OptionType::Label, options.threshold));
321 	addOption(parser, CommandLineOption("oc", "overabundance-cut", "set k-mer overabundance cut ratio", OptionType::Int | OptionType::Label, options.abundanceCut));
322 	addOption(parser, CommandLineOption("rl", "repeat-length",     "set simple-repeat length threshold", OptionType::Int | OptionType::Label, options.repeatLength));
323 	addOption(parser, CommandLineOption("tl", "taboo-length",      "set taboo length", OptionType::Int | OptionType::Label, options.tabooLength));
324 #ifdef RAZERS_DIRECT_MAQ_MAPPING
325 	addOption(parser, CommandLineOption("lm", "low-memory",        "decrease memory usage at the expense of runtime", OptionType::Boolean));
326 	addSection(parser, "Mapping Quality Options:");
327 	addOption(parser, CommandLineOption("mq", "mapping-quality",   "switch on mapping quality mode", OptionType::Boolean));
328 	addOption(parser, CommandLineOption("nbi","no-below-id",       "do not report matches with seed identity < percent id", OptionType::Boolean));
329 	addOption(parser, CommandLineOption("qsl","mq-seed-length",    "seed length used for mapping quality assignment", OptionType::Int | OptionType::Label, options.artSeedLength));
330 #endif
331 	addSection(parser, "Verification Options:");
332 	addOption(parser, CommandLineOption("mN", "match-N",           "\'N\' matches with all other characters", OptionType::Boolean));
333 	addOption(parser, addArgumentText(CommandLineOption("ed", "error-distr",       "write error distribution to FILE", OptionType::String), "FILE"));
334 
335 	bool stop = !parse(parser, argc, argv, cerr);
336 
337 	//////////////////////////////////////////////////////////////////////////////
338 	// Extract options
339 	getOptionValueLong(parser, "forward", options.forward);
340 	getOptionValueLong(parser, "reverse", options.reverse);
341 	getOptionValueLong(parser, "percent-identity", options.errorRate);
342 #ifndef NO_PARAM_CHOOSER
343 	getOptionValueLong(parser, "recognition-rate", pm_options.optionLossRate);
344 	getOptionValueLong(parser, "param-dir", pm_options.paramFolder);
345 #endif
346 	getOptionValueLong(parser, "indels", options.hammingOnly);
347 	options.hammingOnly = !options.hammingOnly;
348 #ifdef RAZERS_MATEPAIRS
349 	getOptionValueLong(parser, "library-length", options.libraryLength);
350 	getOptionValueLong(parser, "library-error", options.libraryError);
351 #endif
352 	getOptionValueLong(parser, "max-hits", options.maxHits);
353 	getOptionValueLong(parser, "purge-ambiguous", options.purgeAmbiguous);
354 	getOptionValueLong(parser, "distance-range", options.distanceRange);
355 	if (isSetLong(parser, "distance-range")) options.distanceRange++;
356 	getOptionValueLong(parser, "alignment", options.dumpAlignment);
357 	getOptionValueLong(parser, "output", options.output);
358 	getOptionValueLong(parser, "output-format", options.outputFormat);
359 	getOptionValueLong(parser, "sort-order", options.sortOrder);
360 	getOptionValueLong(parser, "genome-naming", options.genomeNaming);
361 	getOptionValueLong(parser, "read-naming", options.readNaming);
362 	getOptionValueLong(parser, "position-format", options.positionFormat);
363 	getOptionValueLong(parser, "shape", options.shape);
364 	getOptionValueLong(parser, "threshold", options.threshold);
365 	getOptionValueLong(parser, "overabundance-cut", options.abundanceCut);
366 	getOptionValueLong(parser, "repeat-length", options.repeatLength);
367 #ifdef RAZERS_DIRECT_MAQ_MAPPING
368 	getOptionValueLong(parser, "quality-in-header", options.fastaIdQual);
369 	getOptionValueLong(parser, "mapping-quality", options.maqMapping);
370 	getOptionValueLong(parser, "no-below-id", options.noBelowIdentity);
371 	getOptionValueLong(parser, "mq-seed-length", options.artSeedLength);
372 	getOptionValueLong(parser, "seed-mism-quality", options.maxMismatchQualSum);
373 	getOptionValueLong(parser, "total-mism-quality", options.absMaxQualSumErrors);
374 	getOptionValueLong(parser, "low-memory", options.lowMemory);
375 #endif
376 	getOptionValueLong(parser, "trim-reads", options.trimLength);
377 	getOptionValueLong(parser, "taboo-length", options.tabooLength);
378 	getOptionValueLong(parser, "match-N", options.matchN);
379 	getOptionValueLong(parser, "error-distr", errorPrbFileName);
380 	if (isSetLong(parser, "help") || isSetLong(parser, "version")) return 0;	// print help or version and exit
381 	if (isSetLong(parser, "verbose")) options._debugLevel = max(options._debugLevel, 1);
382 	if (isSetLong(parser, "vverbose")) options._debugLevel = max(options._debugLevel, 3);
383 	if (isSetLong(parser, "unique"))
384 	{
385 		options.maxHits = 1;
386 		options.distanceRange = 1;
387 		options.purgeAmbiguous = true;
388 	}
389 	if (!options.forward && !options.reverse)  // enable both per default
390 	{
391 		options.forward = true;
392 		options.reverse = true;
393 	}
394 	appendValue(genomeFileNames, getArgumentValue(parser, 0));
395 	for (unsigned i = 1; i < argumentCount(parser) && i < maxFiles; ++i)
396 		appendValue(readFileNames, getArgumentValue(parser, i), Generous());
397 
398 	//////////////////////////////////////////////////////////////////////////////
399 	// Check options
400 	if ((options.errorRate < 50 || options.errorRate > 100) && (stop = true))
401 		cerr << "Percent identity threshold must be a value between 50 and 100" << endl;
402 	if ((pm_options.optionLossRate < 80 || pm_options.optionLossRate > 100) && (stop = true))
403 		cerr << "Recognition rate must be a value between 80 and 100" << endl;
404 #ifdef RAZERS_MATEPAIRS
405 	if ((options.libraryLength <= 0) && (stop = true))
406 		cerr << "Library length must be a value greater 0" << endl;
407 	if ((options.libraryError <= 0) && (stop = true))
408 		cerr << "Library error must be a value greater or equal 0" << endl;
409 #endif
410 	if ((options.maxHits < 1) && (stop = true))
411 		cerr << "Maximum hits threshold must be greater than 0" << endl;
412 	if ((options.outputFormat > 5) && (stop = true))
413 		cerr << "Invalid output format option." << endl;
414 	if ((options.sortOrder > 1) && (stop = true))
415 		cerr << "Invalid sort order options." << endl;
416 	if ((options.genomeNaming > 1) && (stop = true))
417 		cerr << "Invalid genome naming options." << endl;
418 	if ((options.readNaming > 3) && (stop = true))
419 		cerr << "Invalid read naming options." << endl;
420 	if ((options.positionFormat > 1) && (stop = true))
421 		cerr << "Invalid position format options." << endl;
422 	if ((options.threshold < 1) && (stop = true))
423 		cerr << "Threshold must be a value greater than 0" << endl;
424 	if (isSetLong(parser, "shape"))
425 	{
426 		unsigned ones = 0;
427 		unsigned zeros = 0;
428 		for(unsigned i = 0; i < length(options.shape); ++i)
429 			switch (options.shape[i])
430 			{
431 				case '0':
432 					++zeros;
433 					break;
434 				case '1':
435 					++ones;
436 					break;
437 				default:
438 					cerr << "Shape must be a binary string" << endl;
439 					stop = true;
440 					i = length(options.shape);
441 			}
442 		if ((ones == 0 || ones > 31) && !stop)
443 		{
444 			cerr << "Invalid Shape" << endl;
445 			stop = true;
446 		}
447         unsigned maxOnes = 14;
448 #ifdef RAZERS_OPENADDRESSING
449         maxOnes = 31;
450 #endif
451         if ((ones < 7 || ones > maxOnes) && !stop)
452             cerr << "Warning: Shape should contain at least 7 and at most " << maxOnes << " '1's" << endl;
453 	}
454 	if ((options.abundanceCut <= 0 || options.abundanceCut > 1) && (stop = true))
455 		cerr << "Overabundance cut ratio must be a value >0 and <=1. Set to 1 to disable." << endl;
456 	if ((options.repeatLength <= 1) && (stop = true))
457 		cerr << "Repeat length must be a value greater than 1" << endl;
458 #ifdef RAZERS_DIRECT_MAQ_MAPPING
459 	if ((options.artSeedLength < 24) && (stop = true))
460 		cerr << "Minimum seed length is 24" << endl;
461 	if ((options.maxMismatchQualSum < 0) && (stop = true))
462 		cerr << "Max seed mismatch quality needs to be 0 or a positive value" << endl;
463 	if ((options.absMaxQualSumErrors < 0) && (stop = true))
464 		cerr << "Max total mismatch quality needs to be 0 or a positive value" << endl;
465 #endif
466 	if ((options.trimLength != 0 && options.trimLength < 14) && (stop = true))
467 		cerr << "Minimum read length is 14" << endl;
468 	if ((options.tabooLength < 1) && (stop = true))
469 		cerr << "Taboo length must be a value greater than 0" << endl;
470 	if (argumentCount(parser) == 2)
471 		options.libraryLength = -1;		// only 1 readset -> disable mate-pair mapping
472 	if ((argumentCount(parser) > maxFiles) && (stop = true))
473 		cerr << "More than " << maxFiles << " input files specified." << endl;
474 	if ((argumentCount(parser) < 2) && (stop = true))
475 	{
476 		if (argc > 1)
477 			cerr << "Less than 2 input files specified." << endl;
478 		else
479 		{
480 			shortHelp(parser, cerr);	// print short help and exit
481 			return 0;
482 		}
483 	}
484 
485 	options.errorRate = (100.0 - options.errorRate) / 100.0;
486 	pm_options.optionLossRate = (ParamChooserOptions::TFloat)(100.0 - pm_options.optionLossRate) / 100.0;
487 	if (stop)
488 	{
489 		cerr << "Exiting ..." << endl;
490 		return RAZERS_INVALID_OPTIONS;
491 	}
492 
493 	//////////////////////////////////////////////////////////////////////////////
494 	// get read length
495 	int readLength = estimateReadLength(toCString(readFileNames[0]));
496 	if (readLength == RAZERS_READS_FAILED)
497 	{
498 		cerr << "Failed to open reads file " << readFileNames[0] << endl;
499 		cerr << "Exiting ..." << endl;
500 		return RAZERS_READS_FAILED;
501 	}
502 	if (readLength == 0) {
503 		cerr << "Failed to read the first read sequence." << endl;
504 		cerr << "Exiting ..." << endl;
505 		return RAZERS_READS_FAILED;
506 	}
507 
508 	if (options.trimLength > readLength)
509 		options.trimLength = readLength;
510 
511 #ifndef NO_PARAM_CHOOSER
512 	if (!(isSetLong(parser, "shape") || isSetLong(parser, "threshold")))
513 	{
514 		if (options.lowMemory) pm_options.maxWeight = 13;
515 		pm_options.verbose = (options._debugLevel >= 1);
516 		pm_options.optionErrorRate = (ParamChooserOptions::TFloat)options.errorRate;
517 		if (options.hammingOnly)
518 		{
519 			pm_options.optionProbINSERT = (ParamChooserOptions::TFloat)0.0;
520 			pm_options.optionProbDELETE = (ParamChooserOptions::TFloat)0.0;
521 		}
522 		else
523 		{
524 			pm_options.optionProbINSERT = (ParamChooserOptions::TFloat)0.01;	//this number is basically without meaning, any value > 0 will lead to
525 			pm_options.optionProbDELETE = (ParamChooserOptions::TFloat)0.01;	//edit distance parameter choosing
526 		}
527 
528 		if (empty(pm_options.paramFolder))
529 		{
530 			string razersFolder = argv[0];
531 			size_t lastPos = razersFolder.find_last_of('/') + 1;
532 			if (lastPos == razersFolder.npos + 1) lastPos = razersFolder.find_last_of('\\') + 1;
533 			if (lastPos == razersFolder.npos + 1) lastPos = 0;
534 			razersFolder.erase(lastPos);
535 			pm_options.paramFolderPath = razersFolder;
536 		}
537 		if (options.trimLength > 0) readLength = options.trimLength;
538 		if (readLength > 0)
539 		{
540 /*			if(options.maqMapping && readLength != options.artSeedLength)
541 				pm_options.totalN = options.artSeedLength;
542 			else*/
543 				pm_options.totalN = readLength;
544 			if (options._debugLevel >= 1)
545 				cerr << "___PARAMETER_CHOOSING__" << endl;
546 			if (!chooseParams(options,pm_options))
547 			{
548 				if (pm_options.verbose)
549 					cerr << "Couldn't find preprocessed parameter files. Please configure manually (options --shape and --threshold)." << endl;
550 				cerr << "Using default configurations (shape = " << options.shape << " and q-gram lemma)." << endl;
551 			}
552 			if (options._debugLevel >= 1) cerr << endl;
553 		} else
554 		{
555 			cerr << "Failed to load reads" << endl;
556 			cerr << "Exiting ..." << endl;
557 			return RAZERS_READS_FAILED;
558 		}
559 	}
560 #endif
561 
562 #ifdef RAZERS_PARALLEL
563 	tbb::task_scheduler_init scheduler;
564 #endif
565 
566 	int result = mapReads(genomeFileNames, readFileNames, errorPrbFileName, options);
567 	if (result != 0)
568 		cerr << "Exiting ..." << endl;
569 	return result;
570 }
571