1 package align2;
2 
3 import java.io.File;
4 import java.io.PrintStream;
5 import java.util.ArrayList;
6 import java.util.Arrays;
7 import java.util.BitSet;
8 import java.util.Random;
9 
10 import dna.AminoAcid;
11 import dna.ChromosomeArray;
12 import dna.Data;
13 import dna.FastaToChromArrays2;
14 import fileIO.ReadWrite;
15 import fileIO.SummaryFile;
16 import fileIO.TextStreamWriter;
17 import shared.Parse;
18 import shared.Parser;
19 import shared.PreParser;
20 import shared.ReadStats;
21 import shared.Shared;
22 import shared.Timer;
23 import shared.Tools;
24 import stream.FASTQ;
25 import stream.FastaReadInputStream;
26 import stream.Read;
27 import structures.ByteBuilder;
28 
29 public final class RandomReads3 {
30 
31 	/*--------------------------------------------------------------*/
32 	/*----------------        Initialization        ----------------*/
33 	/*--------------------------------------------------------------*/
34 
35 
main(String[] args)36 	public static void main(String[] args){
37 
38 		{//Preparse block for help, config files, and outstream
39 			PreParser pp=new PreParser(args, new Object() { }.getClass().getEnclosingClass(), false);
40 			args=pp.args;
41 			outstream=pp.outstream;
42 		}
43 
44 		Timer t=new Timer();
45 
46 //		FASTQ.ADD_PAIRNUM_TO_CUSTOM_ID=false;
47 
48 		FastaReadInputStream.MIN_READ_LEN=1;
49 		Data.GENOME_BUILD=-1;
50 		int build=1;
51 		String ref=null;
52 		String out1=null;
53 		String out2=null;
54 
55 		long maxReads=0;
56 		int minlen=150;
57 		int maxlen=150;
58 		int midlen=-1;
59 
60 		int minInsLen=1;
61 		int minSubLen=2;
62 		int minDelLen=1;
63 		int minNLen=1;
64 
65 		int maxInsLen=12;
66 		int maxSubLen=12;
67 		int maxDelLen=400;
68 		int maxNLen=1;
69 
70 		int minChrom=-1;
71 		int maxChrom=-1;
72 
73 		int maxSnps=3;
74 		int maxInss=2;
75 		int maxDels=2;
76 		int maxSubs=2;
77 		int maxNs=0;
78 
79 		float snpRate=0;
80 		float insRate=0;
81 		float delRate=0;
82 		float subRate=0;
83 		float nRate=0;
84 		PERFECT_READ_RATIO=0;
85 
86 //		float snpRate=0.4f;
87 //		float insRate=0.2f;
88 //		float delRate=0.2f;
89 //		float subRate=0.2f;
90 //		float nRate=0.2f;
91 //		PERFECT_READ_RATIO=0.5f;
92 
93 		String pbadapter=null;
94 		String fragadapter1="AGATCGGAAGAGCACACGTCTGAACTCCAGTCACTAGCTTATCTCGTATGCCGTCTTCTGC";
95 		String fragadapter2="AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT";
96 
97 		long seed2=Long.MIN_VALUE;
98 
99 		int minQuality=20;
100 		int midQuality=28;
101 		int maxQuality=36;
102 		int minInsert=-1, maxInsert=-1, insertDev=-1;
103 
104 		boolean paired=false;
105 		String prefix_=null;
106 
107 		ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true;
108 		ReadWrite.USE_BGZIP=ReadWrite.USE_UNBGZIP=ReadWrite.PREFER_BGZIP=true;
109 
110 		float targetCov=-1;
111 //		sdfg
112 		for(int i=0; i<args.length; i++){
113 			final String arg=args[i];
114 			final String[] split=arg.split("=");
115 			final String a=split[0].toLowerCase();
116 			final String b=split.length>1 ? split[1] : null;
117 
118 //			int x=-1;
119 //			try {
120 //				x=Parse.parseIntKMG(b);
121 //			} catch (NumberFormatException e) {}
122 
123 			if(Parser.parseZip(arg, a, b)){
124 				//do nothing
125 			}else if(Parser.parseQuality(arg, a, b)){
126 				//do nothing
127 			}else if(a.equals("coverage")){
128 				targetCov=Float.parseFloat(b);
129 			}else if(a.equals("simplenames") || a.equals("simple") || a.equals("tagsimple")){
130 				FASTQ.TAG_CUSTOM_SIMPLE=Parse.parseBoolean(b);
131 			}else if(a.equals("reads")){
132 				maxReads=Parse.parseIntKMG(b);
133 			}else if(a.equals("len") || a.equals("length") || a.equals("readlen") || a.equals("readlength")){
134 				minlen=maxlen=Parse.parseIntKMG(b);
135 			}else if(a.equals("append") || a.equals("app")){
136 				append=ReadStats.append=Parse.parseBoolean(b);
137 			}else if(a.equals("overwrite") || a.equals("ow")){
138 				overwrite=Parse.parseBoolean(b);
139 			}else if(a.equals("minlen") || a.equals("minlength")){
140 				minlen=Parse.parseIntKMG(b);
141 				maxlen=Tools.max(minlen, maxlen);
142 			}else if(a.equals("maxlen") || a.equals("maxlength")){
143 				maxlen=Parse.parseIntKMG(b);
144 				minlen=Tools.min(minlen, maxlen);
145 			}else if(a.equals("midlen") || a.equals("midlength")){
146 				midlen=Parse.parseIntKMG(b);
147 			}else if(a.equals("pbadapter") || a.equals("pacbioadapter")){
148 				pbadapter=b;
149 			}else if(a.equals("fragadapter") || a.equals("fragadapter1")){
150 				fragadapter1=b;
151 			}else if(a.equals("fragadapter2")){
152 				fragadapter2=b;
153 			}else if(a.equals("amp")){
154 				AMP=Parse.parseIntKMG(b);
155 			}else if(a.equals("snprate")){
156 				snpRate=Float.parseFloat(b);
157 			}else if(a.equals("subrate")){
158 				subRate=Float.parseFloat(b);
159 			}else if(a.equals("delrate")){
160 				delRate=Float.parseFloat(b);
161 			}else if(a.equals("insrate")){
162 				insRate=Float.parseFloat(b);
163 			}else if(a.equals("nrate")){
164 				nRate=Float.parseFloat(b);
165 			}else if(a.equals("maxsnps")){
166 				maxSnps=Parse.parseIntKMG(b);
167 			}else if(a.equals("maxdels")){
168 				maxDels=Parse.parseIntKMG(b);
169 			}else if(a.equals("maxsubs")){
170 				maxSubs=Parse.parseIntKMG(b);
171 			}else if(a.equals("maxinss") || a.equals("maxins")){
172 				maxInss=Parse.parseIntKMG(b);
173 			}else if(a.equals("banns")){
174 				BAN_NS=Parse.parseBoolean(b);
175 			}else if(a.equals("maxns")){
176 				maxNs=Parse.parseIntKMG(b);
177 			}else if(a.startsWith("maxdellen")){
178 				maxDelLen=Parse.parseIntKMG(b);
179 			}else if(a.startsWith("maxsublen")){
180 				maxSubLen=Parse.parseIntKMG(b);
181 			}else if(a.startsWith("maxinslen")){
182 				maxInsLen=Parse.parseIntKMG(b);
183 			}else if(a.startsWith("maxnlen")){
184 				maxNLen=Parse.parseIntKMG(b);
185 			}else if(a.startsWith("mindellen")){
186 				minDelLen=Parse.parseIntKMG(b);
187 			}else if(a.startsWith("minsublen")){
188 				minSubLen=Parse.parseIntKMG(b);
189 			}else if(a.startsWith("mininslen")){
190 				minInsLen=Parse.parseIntKMG(b);
191 			}else if(a.startsWith("minnlen")){
192 				minNLen=Parse.parseIntKMG(b);
193 			}else if(a.equals("fastawrap") || a.equals("wrap")){
194 				Shared.FASTA_WRAP=Parse.parseIntKMG(b);
195 			}else if(a.startsWith("seed")){
196 				seed2=Long.parseLong(b);
197 			}else if(a.equals("ref") || a.equals("reference")){
198 				ref=b;
199 			}else if(a.equals("path")){
200 				Data.setPath(b);
201 			}else if(a.equals("nodisk")){
202 				assert(false) : "'nodisk' has not been implemented; please remove that flag.";
203 				RefToIndex.NODISK=NODISK=Parse.parseBoolean(b);
204 			}else if(a.equals("s") || a.startsWith("snp")){
205 				maxSnps=Parse.parseIntKMG(b);
206 				snpRate=1;
207 			}else if(a.equals("i") || a.startsWith("ins")){
208 				int x=Parse.parseIntKMG(b);
209 				maxInss=(x>0 ? 1 : 0);
210 				maxInsLen=Parse.parseIntKMG(b);
211 				insRate=1;
212 			}else if(a.equals("d") || a.startsWith("del")){
213 				int x=Parse.parseIntKMG(b);
214 				maxDels=(x>0 ? 1 : 0);
215 				maxDelLen=Parse.parseIntKMG(b);
216 				delRate=1;
217 			}else if(a.equals("u") || a.startsWith("sub")){
218 				int x=Parse.parseIntKMG(b);
219 				maxSubs=(x>0 ? 1 : 0);
220 				maxSubLen=Parse.parseIntKMG(b);
221 				subRate=1;
222 			}else if(a.equals("n")){
223 				maxNs=Parse.parseIntKMG(b);
224 				nRate=1;
225 				minNLen=maxNLen=1;
226 			}else if(a.startsWith("minchrom")){
227 				minChrom=Parse.parseIntKMG(b);
228 			}else if(a.equals("int") || a.equals("interleaved") || a.equals("interleave")){
229 				OUTPUT_INTERLEAVED=Parse.parseBoolean(b);
230 				if(OUTPUT_INTERLEAVED){paired=true;}
231 			}else if(a.equals("biasedsnps")){
232 				BIASED_SNPS=Parse.parseBoolean(b);
233 			}else if(a.startsWith("maxchrom")){
234 				maxChrom=Parse.parseIntKMG(b);
235 			}else if(a.startsWith("build") || a.startsWith("genome")){
236 				build=Parse.parseIntKMG(b);
237 //				assert(false) : "Set genome to "+x;
238 			}else if(a.startsWith("minq")){
239 				minQuality=Parse.parseIntKMG(b);
240 				midQuality=Tools.max(midQuality,  minQuality);
241 				maxQuality=Tools.max(maxQuality,  minQuality);
242 			}else if(a.startsWith("midq")){
243 				midQuality=Parse.parseIntKMG(b);
244 			}else if(a.startsWith("maxq")){
245 				maxQuality=Parse.parseIntKMG(b);
246 				midQuality=Tools.min(midQuality,  maxQuality);
247 				minQuality=Tools.min(minQuality,  maxQuality);
248 			}else if(a.equals("q")){
249 				minQuality=midQuality=maxQuality=Parse.parseIntKMG(b);
250 			}else if(a.equals("qv") || a.equals("variance") || a.equals("qvariance")){
251 				qVariance=Parse.parseIntKMG(b);
252 			}else if(a.equals("mininsert")){
253 				minInsert=Parse.parseIntKMG(b);
254 			}else if(a.equals("maxinsert")){
255 				maxInsert=Parse.parseIntKMG(b);
256 			}else if(a.equals("readlengthdev") || a.equals("readlengthsd")){
257 				readLengthDev=Parse.parseIntKMG(b);
258 			}else if(a.equals("linearlength")){
259 				LINEAR_LENGTH=Parse.parseBoolean(b);
260 				BELL_LENGTH=!LINEAR_LENGTH;
261 			}else if(a.equals("belllength") || a.equals("gaussianlength")){
262 				BELL_LENGTH=Parse.parseBoolean(b);
263 				LINEAR_LENGTH=!BELL_LENGTH;
264 			}else if(a.startsWith("minmid")){
265 				mateMiddleMin=Parse.parseIntKMG(b);
266 			}else if(a.startsWith("maxmid")){
267 				mateMiddleMax=Parse.parseIntKMG(b);
268 			}else if(a.startsWith("paired")){
269 				paired=Parse.parseBoolean(b);
270 			}else if(a.startsWith("superflat")){
271 				SUPERFLAT_DIST=Parse.parseBoolean(b);
272 			}else if(a.startsWith("exponential")){
273 				if(b==null){EXP_DIST=true;}
274 				else{
275 					char c=b.charAt(0);
276 					if(Tools.isDigit(c) || c=='.'){
277 						EXP_DIST=true;
278 						EXP_LAMDA=Double.parseDouble(b);
279 					}else{
280 						EXP_DIST=Parse.parseBoolean(b);
281 					}
282 				}
283 			}else if(a.startsWith("triang")){
284 				if(Parse.parseBoolean(b)){
285 					SUPERFLAT_DIST=FLAT_DIST=BELL_DIST=false;
286 				}
287 			}else if(a.startsWith("flat")){
288 				FLAT_DIST=Parse.parseBoolean(b);
289 			}else if(a.startsWith("bell") || a.startsWith("gauss") || a.startsWith("round")){
290 				BELL_DIST=Parse.parseBoolean(b);
291 			}else if(a.equals("illuminanames")){
292 				ILLUMINA_NAMES=Parse.parseBoolean(b);
293 			}else if(a.equals("insertnames") || a.equals("renamebyinsert")){
294 				INSERT_NAMES=Parse.parseBoolean(b);
295 			}else if(a.startsWith("unique")){
296 				USE_UNIQUE_SNPS=Parse.parseBoolean(b);
297 			}else if(a.startsWith("adderrors") || a.startsWith("usequality")){
298 				ADD_ERRORS_FROM_QUALITY=Parse.parseBoolean(b);
299 			}else if(a.equals("pacbio")){
300 				if(b!=null && (b.charAt(0)=='.' || Tools.isDigit(b.charAt(0)))){
301 					pbMinErrorRate=pbMaxErrorRate=Float.parseFloat(b);
302 					ADD_PACBIO_ERRORS=pbMinErrorRate>0;
303 				}else{
304 					ADD_PACBIO_ERRORS=Parse.parseBoolean(b);
305 				}
306 				if(ADD_PACBIO_ERRORS){ADD_ERRORS_FROM_QUALITY=false;}
307 			}else if(a.equals("pbmin") || a.equals("pbminrate")){
308 				pbMinErrorRate=Float.parseFloat(b);
309 			}else if(a.equals("pbmax") || a.equals("pbmaxrate")){
310 				pbMaxErrorRate=Float.parseFloat(b);
311 			}else if(a.startsWith("midpad")){
312 				midPad=Parse.parseIntKMG(b);
313 			}else if(a.startsWith("randomscaffold")){
314 				RANDOM_SCAFFOLD=Parse.parseBoolean(b);
315 			}else if(a.startsWith("metagenome")){
316 				METAGENOME=Parse.parseBoolean(b);
317 			}else if(a.startsWith("replacenoref")){
318 				REPLACE_NOREF=Parse.parseBoolean(b);
319 			}else if(a.equals("out") || a.equals("out1")){
320 				out1=b;
321 			}else if(a.equals("out2")){
322 				out2=b;
323 			}else if(a.equals("verbose")){
324 				verbose=Parse.parseBoolean(b);
325 			}else if(a.equals("ext") || a.equals("extension")){
326 				fileExt=b;
327 				if(fileExt==null){fileExt=".fq.gz";}
328 				if(!fileExt.startsWith(".")){fileExt="."+fileExt;}
329 			}else if(a.equals("perfect")){
330 				PERFECT_READ_RATIO=(Parse.parseBoolean(b) ? 1 : Float.parseFloat(b));
331 			}else if(a.equals("singlescaffold")){
332 				FORCE_SINGLE_SCAFFOLD=Parse.parseBoolean(b);
333 			}else if(a.equals("samestrand")){
334 				mateSameStrand=Parse.parseBoolean(b);
335 			}else if(a.equals("minoverlap") || a.equals("overlap")){
336 				MIN_SCAFFOLD_OVERLAP=Parse.parseIntKMG(b);
337 			}else if(a.equals("prefix")){
338 				prefix_=b;
339 			}else if(a.equals("slashes") || a.equals("addslashes") || a.equals("slash") || a.equals("addslash")){
340 				addslash=FASTQ.ADD_SLASH_PAIRNUM_TO_CUSTOM_ID=Parse.parseBoolean(b);
341 			}else if(a.equals("addpairnum") || a.equals("pairnum") || a.equals("addcolon")){
342 				FASTQ.ADD_PAIRNUM_TO_CUSTOM_ID=Parse.parseBoolean(b);
343 			}else if(a.equals("slashspace") || a.equals("spaceslash")){
344 				spaceslash=Parse.parseBoolean(b);
345 				FASTQ.SPACE_SLASH=spaceslash;
346 			}else if(a.equals("in")){
347 				in1=(b==null || b.equalsIgnoreCase("null") ? null : b);
348 			}else{throw new RuntimeException("Unknown parameter "+args[i]);}
349 
350 		}
351 //		assert(false) : OUTPUT_INTERLEAVED;
352 		assert(build>=0) : "Please specify a genome.";
353 
354 		if(minInsert>-1){mateMiddleMin=minInsert-2*maxlen;}else{mateMiddleMin=Tools.max(mateMiddleMin, -2*minlen);}
355 		if(maxInsert>-1){mateMiddleMax=maxInsert-2*minlen;}
356 
357 
358 		if(spaceslash){
359 			slash1=" /1";
360 			slash2=" /2";
361 		}else if(addslash){
362 			slash1="/1";
363 			slash2="/2";
364 		}
365 
366 		if(insertDev>-1){
367 			mateMiddleDev=insertDev;
368 		}else{
369 			mateMiddleDev=Tools.absdif(mateMiddleMax, mateMiddleMin)/6;
370 		}
371 
372 		if(readLengthDev>-1){
373 			//do nothing
374 		}else{
375 			readLengthDev=Tools.absdif(minlen, maxlen)/4;
376 		}
377 
378 		assert(pbMaxErrorRate>=pbMinErrorRate) : "pbMaxErrorRate must be >= pbMinErrorRate";
379 
380 		ArrayList<ChromosomeArray> chromlist=null;
381 		if(ref!=null){
382 			chromlist=writeRef(ref, build);
383 		}
384 
385 		Data.setGenome(build);
386 		if(minChrom<1){minChrom=1;}
387 		if(maxChrom<1){maxChrom=Data.numChroms;}
388 
389 		if(chromlist==null){
390 			Data.loadChromosomes(minChrom, maxChrom);
391 		}else{
392 			assert(chromlist.size()==maxChrom-minChrom+1) : chromlist.size()+", "+minChrom+", "+maxChrom;
393 			for(ChromosomeArray cha : chromlist){
394 				Data.chromosomePlusMatrix[cha.chromosome]=cha;
395 			}
396 		}
397 		if(Shared.TRIM_RNAME){Data.trimScaffoldNames();}
398 		if(targetCov>0){
399 			long glen=genomeLength();
400 			float target=(2*glen*targetCov)/(minlen+maxlen);
401 			if(paired){target*=0.5f;}
402 			maxReads=(long)target;
403 		}
404 
405 		if(maxReads<1){
406 			outstream.println("No reads to generate; quitting.");
407 			return;
408 		}
409 
410 		RandomReads3 rr=(seed2==Long.MIN_VALUE ? new RandomReads3(paired) :
411 			new RandomReads3((seed2==-1 ? System.nanoTime() : seed2), paired));
412 		rr.prefix=prefix_;
413 		if(pbadapter!=null){
414 			rr.pbadapter1=pbadapter.getBytes();
415 			rr.pbadapter2=AminoAcid.reverseComplementBases(rr.pbadapter1);
416 			rr.pbadapter1=rr.pbadapter2; //For PacBio, since adapters never appear in plus configuration
417 		}
418 		if(fragadapter1!=null){
419 			rr.fragadapter1=Tools.toAdapters(fragadapter1, -1);
420 			rr.fragadapter2=fragadapter2==null ? rr.fragadapter1 : Tools.toAdapters(fragadapter2, -1);
421 		}
422 
423 		if(REPLACE_NOREF){
424 			for(int chrom=minChrom; chrom<=maxChrom; chrom++){
425 				ChromosomeArray cha=Data.getChromosome(chrom);
426 				final byte[] array=cha.array;
427 				final byte n='N';
428 				for(int i=1; i<array.length; i++){
429 					if(array[i]==n){
430 						array[i]=AminoAcid.numberToBase[rr.randyNoref.nextInt()&3];
431 					}
432 				}
433 			}
434 		}
435 
436 		if(PERFECT_READ_RATIO>=1){
437 			snpRate=insRate=delRate=subRate=0;
438 			maxSnps=maxInss=maxDels=maxSubs=maxNs=0;
439 		}
440 
441 		if(delRate<=0 || maxDelLen<=0 || maxDels<=0){
442 			delRate=0;
443 			maxDelLen=minDelLen=maxDels=0;
444 		}
445 		if(insRate<=0 || maxInsLen<=0 || maxInss<=0){
446 			insRate=0;
447 			maxInsLen=minInsLen=maxInss=0;
448 		}
449 		if(subRate<=0 || maxSubLen<=0 || maxSubs<=0){
450 			subRate=0;
451 			maxSubLen=minSubLen=maxSubs=0;
452 		}
453 		if(snpRate<=0 || maxSnps<=0){
454 			snpRate=0;
455 			maxSnps=0;
456 		}
457 		if(nRate<=0 || maxNLen<=0 || maxNs<=0){
458 			nRate=0;
459 			maxNLen=minNLen=maxNs=0;
460 		}
461 
462 		outstream.println("snpRate="+snpRate+", max="+maxSnps+", unique="+USE_UNIQUE_SNPS);
463 		outstream.println("insRate="+insRate+", max="+maxInss+", len=("+minInsLen+"-"+maxInsLen+")");
464 		outstream.println("delRate="+delRate+", max="+maxDels+", len=("+minDelLen+"-"+maxDelLen+")");
465 		outstream.println("subRate="+subRate+", max="+maxSubs+", len=("+minSubLen+"-"+maxSubLen+")");
466 		outstream.println("nRate  ="+nRate+", max="+maxNs+", len=("+minNLen+"-"+maxNLen+")");
467 		outstream.println("genome="+Data.GENOME_BUILD);
468 		outstream.println("PERFECT_READ_RATIO="+PERFECT_READ_RATIO);
469 		outstream.println("ADD_ERRORS_FROM_QUALITY="+ADD_ERRORS_FROM_QUALITY);
470 		outstream.println("REPLACE_NOREF="+REPLACE_NOREF);
471 		outstream.println("paired="+paired);
472 		outstream.println("read length="+(minlen==maxlen ? ""+minlen : minlen+"-"+maxlen));
473 		outstream.println("reads="+maxReads);
474 		if(paired){
475 			outstream.println("insert size="+(mateMiddleMin+2*minlen)+"-"+(mateMiddleMax+2*maxlen));
476 		}
477 
478 //		assert(false) : OUTPUT_INTERLEAVED;
479 		String fname1="reads_B"+Data.GENOME_BUILD+"_"+maxReads+"x"+maxlen+"bp_"
480 			+(maxSnps==0 || snpRate==0 ? 0 : maxSnps)+"S_"+(maxInss==0 || insRate==0 ? 0 : +maxInsLen)+"I_"+(maxDels==0 || delRate==0 ? 0 : maxDelLen)+"D_"+
481 			(maxSubs==0 || subRate==0 ? 0 : maxSubLen)+"U_"+
482 			(maxNs==0 || nRate==0 ? 0 : maxNs)+"N"/*+"_chr"+minChrom+"-"+maxChrom*/+(paired ? (OUTPUT_INTERLEAVED ? "_interleaved" : "_1") : "")+fileExt;
483 
484 		String fname2=(!paired || OUTPUT_INTERLEAVED) ? null : "reads_B"+Data.GENOME_BUILD+"_"+maxReads+"x"+maxlen+"bp_"
485 			+(maxSnps==0 || snpRate==0 ? 0 : maxSnps)+"S_"+(maxInss==0 || insRate==0 ? 0 : +maxInsLen)+"I_"+(maxDels==0 || delRate==0 ? 0 : maxDelLen)+"D_"+
486 			(maxSubs==0 || subRate==0 ? 0 : maxSubLen)+"U_"+
487 			(maxNs==0 || nRate==0 ? 0 : maxNs)+"N"/*+"_chr"+minChrom+"-"+maxChrom*/+"_2"+fileExt;
488 
489 		if(out1!=null){
490 			fname1=out1;
491 			fname2=out2;
492 			if(out1!=null && out2==null && out1.contains("#")){
493 				fname1=out1.replaceFirst("#", "1");
494 				fname2=!paired ? null : out1.replaceFirst("#", "2");
495 			}
496 		}
497 
498 		if(fname2!=null){OUTPUT_INTERLEAVED=false;}
499 //		assert(false) : out+", "+fname1+", "+fname2;
500 		rr.writeRandomReadsX(maxReads, minlen, maxlen, midlen,
501 				maxSnps, maxInss, maxDels, maxSubs, maxNs,
502 				snpRate, insRate, delRate, subRate, nRate,
503 				minInsLen, minDelLen, minSubLen, minNLen,
504 				maxInsLen, maxDelLen, maxSubLen, maxNLen,
505 				minChrom, maxChrom, minQuality, midQuality, maxQuality, fname1, fname2);
506 
507 		t.stop();
508 		outstream.println("Wrote "+fname1);
509 		if(fname2!=null){outstream.println("Wrote "+fname2);}
510 		outstream.println("Time: \t"+t);
511 
512 		//Close the print stream if it was redirected
513 		Shared.closeStream(outstream);
514 	}
515 
writeRef(String reference, int build)516 	private static ArrayList<ChromosomeArray> writeRef(String reference, int build){
517 		ArrayList<ChromosomeArray> chromlist=null;
518 		if(reference!=null){
519 			{
520 				File f=new File(reference);
521 				if(!f.exists() || !f.isFile() || !f.canRead()){throw new RuntimeException("Cannot read file "+f.getAbsolutePath());}
522 			}
523 			{
524 				String s=align2.IndexMaker4.fname(1, 1, 13, 1);
525 				String dir=new File(s).getParent();
526 				dir=dir.replace('\\', '/');
527 				dir=dir.replace("ref/index/", "ref/genome/");
528 				String sf=dir+"/summary.txt";
529 				if(!NODISK && new File(sf).exists() && SummaryFile.compare(sf, reference)){
530 					//do nothing
531 					outstream.println("NOTE:\tIgnoring reference file because it already appears to have been processed.");
532 					outstream.println("NOTE:\tIf you wish to regenerate the index, please manually delete "+dir+"/summary.txt");
533 					return null;
534 				}
535 				File f=new File(dir);
536 				if(f.exists()){
537 					File[] f2=f.listFiles();
538 					if(f2!=null && f2.length>0){
539 						if(overwrite){
540 							outstream.println("NOTE:\tDeleting contents of "+dir+" because reference is specified and overwrite="+overwrite);
541 							for(File f3 : f2){
542 								if(f3.isFile()){
543 									String f3n=f3.getName();
544 									if((f3n.contains(".chrom") || f3n.endsWith(".txt") || f3n.endsWith(".txt.gz")) && !f3n.endsWith("list.txt")){
545 										f3.delete();
546 									}
547 								}
548 							}
549 						}else{
550 							outstream.println(Arrays.toString(f2));
551 							throw new RuntimeException("\nThere is already a reference at location '"+f.getAbsolutePath()+"'.  " +
552 									"Please delete it (and the associated index), or use a different build ID, " +
553 									"or remove the 'reference=' parameter from the command line, or set overwrite=true.");
554 						}
555 					}
556 				}
557 				dir=dir.replace("ref/genome/", "ref/index/");
558 				f=new File(dir);
559 				if(f.exists()){
560 					File[] f2=f.listFiles();
561 					if(f2!=null && f2.length>0){
562 						if(overwrite){
563 							outstream.println("NOTE:\tDeleting contents of "+dir+" because reference is specified and overwrite="+overwrite);
564 							for(File f3 : f2){
565 								if(f3.isFile()){f3.delete();}
566 							}
567 						}else{
568 							throw new RuntimeException("\nThere is already an index at location '"+f.getAbsolutePath()+"'.  " +
569 									"Please delete it, or use a different build ID, or remove the 'reference=' parameter from the command line.");
570 						}
571 					}
572 				}
573 			}
574 
575 			outstream.println("Writing reference.");
576 
577 			int oldzl=ReadWrite.ZIPLEVEL;
578 			ReadWrite.ZIPLEVEL=Tools.max(4, ReadWrite.ZIPLEVEL);
579 
580 			int minScaf=-1;
581 			int maxChromLen=-1;
582 			boolean genScaffoldInfo=true;
583 
584 			maxChromLen=maxChromLen>0 ? maxChromLen : FastaToChromArrays2.MAX_LENGTH;
585 			minScaf=minScaf>-1 ? minScaf : FastaToChromArrays2.MIN_SCAFFOLD;
586 			midPad=midPad>-1 ? midPad : FastaToChromArrays2.MID_PADDING;
587 			String[] ftcaArgs=new String[] {reference, ""+build, "writeinthread=false", "genscaffoldinfo="+genScaffoldInfo, "retain", "waitforwriting=false",
588 					"gz="+(Data.CHROMGZ), "maxlen="+maxChromLen,
589 							"writechroms="+(!NODISK), "minscaf="+minScaf, "midpad="+midPad, "nodisk="+NODISK};
590 
591 			chromlist=FastaToChromArrays2.main2(ftcaArgs);
592 
593 			ReadWrite.ZIPLEVEL=oldzl;
594 		}
595 		return chromlist;
596 	}
597 
598 
RandomReads3(boolean paired_)599 	public RandomReads3(boolean paired_){
600 		this(getSeed(), paired_);
601 	}
602 
RandomReads3(long seed, boolean paired_)603 	public RandomReads3(long seed, boolean paired_){
604 		if(randomChrom==null){
605 			synchronized(getClass()){
606 				if(randomChrom==null){
607 					randomChrom=fillRandomChrom();
608 				}
609 			}
610 		}
611 		randy=new Random(seed+1);
612 		randy2=new Random(seed+2);
613 		randyMutationType=new Random(seed+3);
614 		randyQual=new Random(seed+5);
615 		randyAdapter=new Random(seed+25);
616 		paired=paired_;
617 
618 		randyPerfectRead=new Random(seed+20);
619 		randyLength=new Random(seed+21);
620 		randyAmp=new Random(seed+22);
621 
622 		if(paired){
623 			randyMate=new Random(seed+6);
624 			randy2Mate=new Random(seed+7);
625 			randyMutationTypeMate=new Random(seed+8);
626 			randyQualMate=new Random(seed+10);
627 			randyAdapterMate=new Random(seed+30);
628 		}else{
629 			randyMate=null;
630 			randy2Mate=null;
631 			randyMutationTypeMate=null;
632 			randyQualMate=null;
633 			randyAdapterMate=null;
634 		}
635 
636 		if(REPLACE_NOREF){
637 			randyNoref=new Random(seed+31);
638 		}else{
639 			randyNoref=null;
640 		}
641 
642 		if(METAGENOME){
643 			makeMetagenomeProbs(randy);
644 		}
645 	}
646 
addErrorsFromQuality(Read r, Random randy)647 	private final static void addErrorsFromQuality(Read r, Random randy){
648 		addErrorsFromQuality(r, randy, 0, r.length());
649 	}
650 
addErrorsFromQuality(Read r, Random rand, final int from, final int to)651 	private final static void addErrorsFromQuality(Read r, Random rand, final int from, final int to){
652 		final byte[] quals=r.quality, bases=r.bases;
653 		for(int i=from; i<to; i++){
654 			final byte q=(quals==null ? 30 : quals[i]);
655 			if(AminoAcid.isFullyDefined(bases[i]) && rand.nextFloat()<QualityTools.PROB_ERROR[q]){
656 				int old=AminoAcid.baseToNumber[bases[i]];
657 				bases[i]=AminoAcid.numberToBase[(old+rand.nextInt(3)+1)&3];
658 			}
659 		}
660 	}
661 
addFragAdapter(Read r, final int loc, final byte[][] adapters, final Random rand)662 	public static void addFragAdapter(Read r, final int loc, final byte[][] adapters, final Random rand){
663 		final byte[] bases=r.bases;
664 		final byte[] quals=r.quality;
665 		final int initial=(bases==null ? 0 : bases.length);
666 		final byte[] adapter=adapters[rand.nextInt(adapters.length)];
667 
668 		if(initial>0 && loc>=0 && loc<initial){
669 
670 			final int lim=Tools.min(initial, adapter.length+loc);
671 			for(int i=loc, j=0; i<lim; i++, j++){
672 				if(AminoAcid.isFullyDefined(bases[i])){bases[i]=adapter[j];}
673 			}
674 			for(int i=lim; i<initial; i++){
675 				if(AminoAcid.isFullyDefined(bases[i])){
676 					bases[i]=AminoAcid.numberToBase[rand.nextInt(4)];
677 				}
678 			}
679 		}
680 		if(ADD_ERRORS_FROM_QUALITY){
681 			addErrorsFromQuality(r, rand, loc, bases.length);
682 		}
683 	}
684 
addPBAdapter(byte[] bases, int[] locs, int readlen, Random rand, byte[] adapter)685 	public static byte[] addPBAdapter(byte[] bases, int[] locs, int readlen, Random rand, byte[] adapter){
686 //		outstream.println("Adding adapter "+new String(adapter));
687 		assert(readlen<=bases.length);
688 		int mod=Tools.max((readlen+1)/2, readlen-30-adapter.length);
689 		int index=rand.nextInt(mod);
690 		index-=adapter.length/2;
691 		for(int i=0, j=index; i<adapter.length; i++, j++){
692 			if(j>=0 && j<bases.length){bases[j]=adapter[i];}
693 		}
694 		return bases;
695 	}
696 
addSNP(byte[] bases, int[] locs, int readlen, Random rand)697 	public static byte[] addSNP(byte[] bases, int[] locs, int readlen, Random rand){
698 		assert(readlen<=bases.length);
699 		int index=rand.nextInt(readlen);
700 		byte old=bases[index];
701 		byte oldNum=AminoAcid.baseToNumber[old];
702 		if(oldNum<0){oldNum=0; return bases;}
703 		int num;
704 		if(BIASED_SNPS && rand.nextInt(3)>0){
705 			num=(oldNum^3);
706 		}else{
707 			num=(oldNum+rand.nextInt(3)+1)&3;
708 		}
709 		assert(num>=0 && num<=3 && num!=oldNum);
710 		bases[index]=AminoAcid.numberToBase[num];
711 		return bases;
712 	}
713 
addSNP(byte[] bases, int[] locs, int readlen, Random rand, BitSet bits)714 	public static byte[] addSNP(byte[] bases, int[] locs, int readlen, Random rand, BitSet bits){
715 		assert(readlen<=bases.length);
716 		int index=rand.nextInt(readlen);
717 
718 		while(bits.get(index)){
719 			index=rand.nextInt(readlen);
720 		}
721 		bits.set(index);
722 
723 		byte old=bases[index];
724 		byte oldNum=AminoAcid.baseToNumber[old];
725 		if(oldNum<0){oldNum=0; return bases;}
726 		int num;
727 		if(BIASED_SNPS && rand.nextInt(3)>0){
728 			num=(oldNum^3);
729 		}else{
730 			num=(oldNum+rand.nextInt(3)+1)&3;
731 		}
732 		assert(num>=0 && num<=3 && num!=oldNum) : num+", "+oldNum;
733 		bases[index]=AminoAcid.numberToBase[num];
734 		return bases;
735 	}
736 
737 
addSUB(byte[] bases, int[] locs, int minlen, int maxlen, int readlen, Random rand)738 	public static byte[] addSUB(byte[] bases, int[] locs, int minlen, int maxlen, int readlen, Random rand){
739 		assert(readlen<=bases.length) : readlen+", "+bases.length;
740 		assert(minlen>=1);
741 		assert(maxlen>=minlen);
742 
743 //		int len=minlen+randy2.nextInt(maxlen-minlen+1);
744 		int len=minlen+Tools.min(rand.nextInt(maxlen-minlen+1), rand.nextInt(maxlen-minlen+1), rand.nextInt(maxlen-minlen+1));
745 //		int len=minlen+Tools.min(rand.nextInt(maxlen-minlen+1), rand.nextInt(maxlen-minlen+1));
746 
747 		assert(len>=minlen);
748 		assert(len<=maxlen);
749 
750 //		outstream.println(minlen+", "+maxlen+", "+readlen+", "+s.length());
751 
752 		int index=rand.nextInt(readlen-len+1);
753 
754 		int lim=index+len-1;
755 
756 		{//Change first and last to anything except old
757 			int i=index;
758 
759 			byte old=bases[i];
760 			if(AminoAcid.isFullyDefined(old)){
761 				byte oldNum=AminoAcid.baseToNumber[old];
762 				int num=(oldNum+rand.nextInt(3)+1)&3;
763 				assert(num>=0 && num<=3 && num!=oldNum);
764 				byte base=AminoAcid.numberToBase[num];
765 				bases[i]=base;
766 			}
767 
768 			i=lim;
769 			old=bases[i];
770 			if(AminoAcid.isFullyDefined(old)){
771 				byte oldNum=AminoAcid.baseToNumber[old];
772 				int num=(oldNum+rand.nextInt(3)+1)&3;
773 				assert(num>=0 && num<=3 && num!=oldNum);
774 				byte base=AminoAcid.numberToBase[num];
775 				bases[i]=base;
776 			}
777 		}
778 
779 		for(int i=index+1; i<lim; i++){ //Change middles to anything
780 			byte old=bases[i];
781 			if(AminoAcid.isFullyDefined(old)){
782 				byte oldNum=AminoAcid.baseToNumber[old];
783 				int num=(oldNum+rand.nextInt(4))&3;
784 				assert(num>=0 && num<=3);
785 				byte base=AminoAcid.numberToBase[num];
786 				bases[i]=base;
787 			}
788 		}
789 		return bases;
790 	}
791 
792 
addN(byte[] bases, int[] locs, int minlen, int maxlen, int readlen, Random rand, BitSet bits)793 	public static byte[] addN(byte[] bases, int[] locs, int minlen, int maxlen, int readlen, Random rand, BitSet bits){
794 		assert(readlen<=bases.length) : readlen+", "+bases.length;
795 		assert(minlen>=1);
796 		assert(maxlen>=minlen);
797 
798 //		int len=minlen+randy2.nextInt(maxlen-minlen+1);
799 		int len=minlen+Tools.min(rand.nextInt(maxlen-minlen+1), rand.nextInt(maxlen-minlen+1), rand.nextInt(maxlen-minlen+1));
800 
801 		assert(len>=minlen);
802 		assert(len<=maxlen);
803 
804 //		outstream.println(minlen+", "+maxlen+", "+readlen+", "+s.length());
805 
806 		int index=rand.nextInt(readlen-len+1);
807 		if(bits!=null){
808 			int trials=40;
809 			while(bits.get(index) && (trials--)>0){
810 				index=rand.nextInt(readlen-len+1);
811 			}
812 			bits.set(index);
813 		}
814 
815 		int lim=index+len-1;
816 
817 		for(int i=index; i<=lim; i++){bases[i]='N';}
818 
819 		return bases;
820 	}
821 
addInsertion(byte[] bases, int[] locs, int minlen, int maxlen, int readlen, int[] dif, Random rand)822 	public static byte[] addInsertion(byte[] bases, int[] locs, int minlen, int maxlen, int readlen, int[] dif, Random rand){
823 		assert(readlen<=bases.length) : readlen+", "+bases.length;
824 		assert(minlen>0);
825 		assert(maxlen>=minlen);
826 
827 //		int len=minlen+randy2.nextInt(maxlen-minlen+1);
828 		int len=minlen+Tools.min(rand.nextInt(maxlen-minlen+1), rand.nextInt(maxlen-minlen+1), rand.nextInt(maxlen-minlen+1));
829 //		int len=minlen+Tools.min(rand.nextInt(maxlen-minlen+1), rand.nextInt(maxlen-minlen+1));
830 
831 		len=Tools.min(len, readlen-dif[1]-2);
832 		if(len<1){return bases;}
833 
834 		dif[0]-=len;
835 		dif[1]+=len;
836 
837 		int index=rand.nextInt(readlen-len+1); //Assures that all inserted bases will be within the read
838 
839 //		outstream.println("Added insertion "+len+" at "+index);
840 
841 		byte[] bases2=new byte[bases.length+len];
842 		for(int i=0; i<index; i++){bases2[i]=bases[i];}
843 
844 		for(int i=bases.length-1, j=bases2.length-1; i>=index; i--, j--){
845 //			if(verbose){
846 //				outstream.println("i="+i+", bases.length="+bases.length+", j="+j+", bases2.length="+bases2.length+", locs.length="+locs.length+"\n"+Arrays.toString(locs));
847 //			}
848 			if(j<locs.length){locs[j]=locs[i];}
849 			bases2[j]=bases[i];
850 		}
851 
852 //		for(int i=bases.length-1; i>=index; i--){
853 //			bases2[i+len]=bases[i];
854 ////			locs[i+len]=locs[i];
855 //		}
856 //		final int locfill=locs[(index==0 ? 0 : index-1)];
857 		for(int i=index; i<index+len; i++){
858 			int x=rand.nextInt(4);
859 			byte b=AminoAcid.numberToBase[x];
860 			bases2[i]=b;
861 //			locs[i]=locfill;
862 			locs[i]=-1;
863 		}
864 
865 		return bases2;
866 	}
867 
makeDelsa(int dels, int minlen, int maxlen, Random rand)868 	public static int[] makeDelsa(int dels, int minlen, int maxlen, Random rand){
869 		if(dels<1){return null;}
870 		assert(minlen>0);
871 		assert(maxlen>=minlen);
872 		int[] delsa=new int[dels];
873 		for(int i=0; i<delsa.length; i++){
874 //			int len=minlen+Tools.min(rand.nextInt(maxlen-minlen+1), rand.nextInt(maxlen-minlen+1), rand.nextInt(maxlen-minlen+1));
875 			int len=minlen+Tools.min(rand.nextInt(maxlen-minlen+1), rand.nextInt(maxlen-minlen+1));
876 			delsa[i]=len;
877 		}
878 		return delsa;
879 	}
880 
addDeletion(byte[] bases, int[] locs, int len, int readlen, int[] dif, Random rand)881 	public static byte[] addDeletion(byte[] bases, int[] locs, int len, int readlen, int[] dif, Random rand){
882 		assert(bases.length>=readlen+len) : "bases.len="+bases.length+", readlen="+readlen+", len="+len+", dif="+Arrays.toString(dif);
883 		assert(len>0);
884 
885 		dif[0]+=len;
886 
887 //		int index=randy2.nextInt(s.length()-len);
888 		int index=1+rand.nextInt(readlen-1); //Assures there will never be a deletion of the first base, which would not technically be a deletion.
889 
890 //		outstream.println("Added deletion "+len+" at "+index);
891 
892 		byte[] bases2=new byte[bases.length-len];
893 		for(int i=0; i<index; i++){bases2[i]=bases[i];}
894 		for(int i=index; i<bases2.length; i++){
895 			bases2[i]=bases[i+len];
896 			locs[i]=locs[i+len];
897 		}
898 
899 		return bases2;
900 	}
901 
randomChrom(Read r0, int minChrom, int maxChrom)902 	public int randomChrom(Read r0, int minChrom, int maxChrom){
903 		if(r0!=null){return r0.chrom;}
904 
905 		int x=-1;
906 		int chrom=-1;
907 
908 		assert(minChrom<=maxChrom) : minChrom+", "+maxChrom;
909 		while(chrom<minChrom || chrom>maxChrom){
910 			x=randy.nextInt();
911 			chrom=randomChrom[(x&0x7FFFFFFF)%randomChrom.length];
912 		}
913 		return chrom;
914 	}
915 
randomStrand(Read r0, int minChrom, int maxChrom, boolean sameStrandMate)916 	public int randomStrand(Read r0, int minChrom, int maxChrom, boolean sameStrandMate){
917 		if(r0!=null){
918 			return sameStrandMate ? r0.strand() : r0.strand()^1;
919 		}
920 		return randy.nextInt()&1;
921 	}
922 
randomLoc(Read r0, int chrom, int readlen, int minMiddle, int maxMiddle, int strand)923 	public int randomLoc(Read r0, int chrom, int readlen, int minMiddle, int maxMiddle, int strand){
924 
925 		if(r0!=null){
926 			return randomLocPaired(r0, chrom, readlen, minMiddle, maxMiddle, strand);
927 		}
928 		return randomLocSingle(chrom, readlen);
929 	}
930 
randomLocPaired(Read r0, int chrom, int readlen, int minMiddle, int maxMiddle, int strand)931 	public int randomLocPaired(Read r0, int chrom, int readlen, int minMiddle, int maxMiddle, int strand){
932 		assert(r0!=null);
933 
934 		final int midRange=maxMiddle-minMiddle+1;
935 		final int middle0=(maxMiddle+minMiddle)/2;
936 		int middle;
937 		if(SUPERFLAT_DIST){
938 			//		outstream.print(other.numericID);
939 			middle=((int)(r0.numericID%midRange))+minMiddle;
940 			//		outstream.println("\t"+middle);
941 		}else if(FLAT_DIST){
942 			middle=randyMate.nextInt(midRange)+minMiddle;
943 		}else if(BELL_DIST){
944 
945 			double g=randyMate.nextGaussian();
946 			middle=(int)Math.round((g*mateMiddleDev)+middle0);
947 			while(middle<minMiddle || middle>maxMiddle){
948 				g=randyMate.nextGaussian();
949 				middle=(int)Math.round((g*mateMiddleDev)+middle0);
950 			}
951 
952 //			double g=2*randyMate.nextDouble()-1;
953 //			middle=(int)Math.round((g*mateMiddleDev)+middle0);
954 //			while(middle<minMiddle || middle>maxMiddle){
955 //				g=2*randyMate.nextDouble()-1;
956 //				middle=(int)Math.round((g*mateMiddleDev)+middle0);
957 //			}
958 
959 //			System.out.println(g);
960 			/*
961 			nextGaussian() has mean 0 and stdev 1
962 			 */
963 		}else{
964 			middle=(randyMate.nextInt(midRange)+randyMate.nextInt(midRange))/2+minMiddle;
965 		}
966 
967 		if(EXP_DIST){
968 			double d=999999;
969 			int mid=middle;
970 			while(d>64){
971 				d=Tools.exponential(randy, EXP_LAMDA);
972 			}
973 //			middle=(int)Math.round((1*middle+(((middle+readlen*2)*(d))-(readlen*2)))/2);
974 			middle=(int)Math.round((middle+readlen*2)*(0.4*(d*1.4+0.2)+0.6)-(readlen*2));
975 		}
976 
977 		//	outstream.println(sameStrand+": "+other.strand+" -> "+strand);
978 		int x;
979 		if(r0.strand()==Shared.PLUS){
980 			x=r0.stop+middle+1;
981 		}else{
982 			x=r0.start-middle-readlen;
983 		}
984 		return x;
985 	}
986 
randomLocSingle(int chrom, int readlen)987 	public int randomLocSingle(int chrom, int readlen){
988 
989 		ChromosomeArray cha=Data.getChromosome(chrom);
990 		byte[] array=cha.array;
991 		if(readlen>=(cha.maxIndex-cha.minIndex)){return -1;}
992 
993 		int loc=-1;
994 		for(int i=0; loc<0 && i<24; i++){
995 			loc=randy.nextInt(cha.maxIndex-readlen);
996 			for(int j=0; j<readlen; j++){
997 				if(!AminoAcid.isFullyDefined(array[j+loc])){
998 					loc=-1;
999 					break;
1000 				}
1001 			}
1002 		}
1003 		return loc;
1004 	}
1005 
genomeLength()1006 	private static long genomeLength(){
1007 		long sum=0;
1008 		for(int i=1; i<Data.scaffoldLengths.length; i++){
1009 			int[] clen=Data.scaffoldLengths[i];
1010 			for(int slen : clen){
1011 				sum+=slen;
1012 			}
1013 		}
1014 		return sum;
1015 	}
1016 
randomScaffoldLoc(int chrom, int readlen)1017 	public int[] randomScaffoldLoc(int chrom, int readlen){
1018 		int[] locs=Data.scaffoldLocs[chrom];
1019 		int[] lengths=Data.scaffoldLengths[chrom];
1020 
1021 		int scaf=randy.nextInt(locs.length);
1022 		int loc=locs[scaf];
1023 		int scaflen=lengths[scaf];
1024 		int start;
1025 		if(readlen>=scaflen){
1026 			readlen=scaflen;
1027 			start=loc;
1028 		}else{
1029 			start=loc+randy.nextInt(scaflen-readlen);
1030 		}
1031 		return new int[] {start, readlen};
1032 	}
1033 
makeMetagenomeProbs(Random randy)1034 	public static void makeMetagenomeProbs(Random randy){
1035 		int[][] lengths=Data.scaffoldLengths;
1036 		int chroms=lengths.length;
1037 		chromProbs=new double[chroms];
1038 		scafProbs=new double[chroms][];
1039 		double sum=0;
1040 		for(int chrom=1; chrom<chroms; chrom++){
1041 			int[] slens=lengths[chrom];
1042 			int max=slens.length;
1043 			scafProbs[chrom]=new double[max];
1044 			for(int snum=0; snum<max; snum++){
1045 				double d=exponential(randy, 2.5);
1046 				scafProbs[chrom][snum]=d;
1047 				sum+=d;
1048 //				outstream.println("d="+d+", sum="+sum);
1049 			}
1050 //			outstream.println(Arrays.toString(scafProbs[chrom]));
1051 		}
1052 		final double mult=1/sum;
1053 //		outstream.println("sum="+sum+", mult="+mult);
1054 		sum=0;
1055 		for(int chrom=1; chrom<chroms; chrom++){
1056 			final int max=scafProbs[chrom].length;
1057 			for(int snum=0; snum<max; snum++){
1058 				double d=scafProbs[chrom][snum]*mult;
1059 				sum+=d;
1060 //				outstream.println("d="+d+", sum="+sum);
1061 				scafProbs[chrom][snum]=sum;
1062 			}
1063 //			outstream.println(Arrays.toString(scafProbs[chrom]));
1064 //			outstream.println("sum="+sum);
1065 			chromProbs[chrom]=sum;
1066 		}
1067 	}
1068 
exponential(Random rand, double width)1069 	public static double exponential(Random rand, double width){
1070 		double d=rand.nextDouble();
1071 		d=d*width-width+1;
1072 		double exp=Math.pow(10, d)/10;
1073 		return exp;
1074 	}
1075 
randomScaffoldLocMetagenome(int readlen)1076 	public int[] randomScaffoldLocMetagenome(int readlen){
1077 		double d=randy.nextDouble();
1078 
1079 		int chrom=0, scaf=0;
1080 		while(d>chromProbs[chrom]){
1081 //			outstream.println(chrom+", "+d+", "+chromProbs[chrom]);
1082 			chrom++;
1083 		}
1084 		while(d>scafProbs[chrom][scaf]){scaf++;}
1085 
1086 		int[] locs=Data.scaffoldLocs[chrom];
1087 		int[] lengths=Data.scaffoldLengths[chrom];
1088 
1089 		int loc=locs[scaf];
1090 		int scaflen=lengths[scaf];
1091 		int start;
1092 		if(readlen>=scaflen){
1093 			readlen=scaflen;
1094 			start=loc;
1095 		}else{
1096 			start=loc+randy.nextInt(scaflen-readlen);
1097 		}
1098 		return new int[] {chrom, start, readlen};
1099 	}
1100 
writeRandomReadsX(long numReads, int minlen, int maxlen, int midlen, int maxSnps, int maxInss, int maxDels, int maxSubs, int maxNs, float snpRate, float insRate, float delRate, float subRate, float nRate, int minInsLen, int minDelLen, int minSubLen, int minNLen, int maxInsLen, int maxDelLen, int maxSubLen, int maxNLen, int minChrom, int maxChrom, int minQual, int midQual, int maxQual, String fname1, String fname2)1101 	public void writeRandomReadsX(long numReads, int minlen, int maxlen, int midlen,
1102 			int maxSnps, int maxInss, int maxDels, int maxSubs, int maxNs,
1103 			float snpRate, float insRate, float delRate, float subRate, float nRate,
1104 			int minInsLen, int minDelLen, int minSubLen, int minNLen,
1105 			int maxInsLen, int maxDelLen, int maxSubLen, int maxNLen,
1106 			int minChrom, int maxChrom,
1107 			int minQual, int midQual, int maxQual, String fname1, String fname2){
1108 		FASTQ.TAG_CUSTOM=(prefix==null && !ILLUMINA_NAMES && !INSERT_NAMES);
1109 
1110 		TextStreamWriter tsw1=new TextStreamWriter(fname1, overwrite, false, true);
1111 		tsw1.start();
1112 		TextStreamWriter tsw2=null;
1113 		if(fname2!=null){
1114 			assert(!fname2.equalsIgnoreCase(fname1));
1115 			tsw2=new TextStreamWriter(fname2, overwrite, false, true);
1116 			tsw2.start();
1117 		}
1118 
1119 		assert(minQual<=midQual);
1120 		assert(midQual<=maxQual);
1121 		assert(minQual>=0 && maxQual<60);
1122 
1123 		final int maxQualP=maxQual;//Tools.max(35, maxQual);
1124 		final int midQualP=midQual;//30;
1125 		final int minQualP=minQual;//Tools.min(25, maxQual);
1126 
1127 		final BitSet bits=new BitSet(maxlen+1);
1128 		final int[] locs=new int[(int)Tools.min(300000000, maxlen+(maxDelLen*(long)maxDels))];
1129 
1130 		Read lastRead=null;
1131 		int ampLevel=0;
1132 		int ampLength=2000;
1133 
1134 		for(int i=0; i<numReads; i++){
1135 
1136 			final boolean perfect=randyPerfectRead.nextFloat()<PERFECT_READ_RATIO;
1137 
1138 			final byte baseQuality;
1139 			final byte slant;
1140 			{
1141 //				byte baseSlant=(perfect ? (byte)5 : (byte)(maxQual-minQual+1));
1142 //				slant=(byte)((randyQual.nextInt(baseSlant)+randyQual.nextInt(baseSlant)+1)/2);
1143 //				if(randyQual.nextBoolean()){
1144 //					int range=(perfect ? maxQualP-midQualP+1 : maxQual-midQual+1);
1145 //					int delta=Tools.min(randyQual.nextInt(range), randyQual.nextInt(range));
1146 //					baseQuality=(byte)((perfect ? midQualP : midQual)+delta);
1147 //				}else{
1148 //					int range=perfect ? midQualP-minQualP+1 : midQual-minQual+1;
1149 //					int delta=randyQual.nextInt(range);
1150 //					baseQuality=(byte)((perfect ? midQualP : midQual)-delta);
1151 //				}
1152 
1153 				byte baseSlant=(byte)(maxQual-minQual+1);
1154 				slant=(byte)((randyQual.nextInt(baseSlant)+randyQual.nextInt(baseSlant)+1)/2);
1155 				if(Tools.nextBoolean(randyQual)){
1156 					int range=maxQual-midQual+1;
1157 					int delta=Tools.min(randyQual.nextInt(range), randyQual.nextInt(range));
1158 					baseQuality=(byte)(midQual+delta);
1159 				}else{
1160 					int range=midQual-minQual+1;
1161 					int delta=randyQual.nextInt(range);
1162 					baseQuality=(byte)(midQual-delta);
1163 				}
1164 			}
1165 
1166 			int forceChrom=-1, forceLoc=-1;
1167 
1168 
1169 			if(AMP>1 && lastRead!=null){
1170 				if(ampLevel>0){
1171 					forceChrom=lastRead.chrom;
1172 					//						forceLoc=lastRead.start+4-randyAmp.nextInt(9);
1173 					//						forceLoc=lastRead.start+10-randyAmp.nextInt(21);
1174 
1175 					int a=ampLength;
1176 					int b=a*2+1;
1177 					int mode=randyAmp.nextInt(100);
1178 					if(mode>96){
1179 						forceLoc=lastRead.start+a-randyAmp.nextInt(b);
1180 					}else if(mode>85){
1181 						forceLoc=lastRead.start+a-(randyAmp.nextInt(b)+randyAmp.nextInt(b))/2;
1182 					}else if(mode>30){
1183 						forceLoc=lastRead.start+a-(randyAmp.nextInt(b)+randyAmp.nextInt(b)+randyAmp.nextInt(b))/3;
1184 					}else{
1185 						forceLoc=lastRead.start+a-(randyAmp.nextInt(b)+randyAmp.nextInt(b)+randyAmp.nextInt(b)+randyAmp.nextInt(b))/4;
1186 					}
1187 				}else{
1188 
1189 					ampLevel=0;
1190 					int a1=AMP;
1191 					if(randyAmp.nextInt(30)==0){a1*=7;}
1192 
1193 					if(randyAmp.nextInt(3)>0){
1194 						ampLevel=Tools.min(randyAmp.nextInt(a1), randyAmp.nextInt(a1));
1195 					}else{
1196 						double log=Math.log10(a1*7);
1197 						ampLevel=(int)Math.round(Math.pow(10, randyAmp.nextDouble()*log));
1198 					}
1199 
1200 					ampLength=500+randyAmp.nextInt(3001)+(int)Tools.min(1000+randyAmp.nextInt(20000), 400*Tools.exponential(randyAmp, 0.8d));
1201 
1202 				}
1203 			}
1204 
1205 
1206 			Read r1=makeRead(null, minlen, maxlen, midlen, minChrom, maxChrom,
1207 					maxSnps, maxInss, maxDels, maxSubs, maxNs,
1208 					snpRate, insRate, delRate, subRate, nRate,
1209 					minInsLen, minDelLen, minSubLen, minNLen,
1210 					maxInsLen, maxDelLen, maxSubLen, maxNLen,
1211 					mateMiddleMin, mateMiddleMax, mateSameStrand,
1212 					minQual, midQual, maxQual, baseQuality, slant,
1213 					perfect, nextReadID, locs, bits, forceChrom, forceLoc);
1214 
1215 //			assert(false) : r1;
1216 			if(paired && r1!=null){
1217 
1218 				Read r2=null;
1219 				for(int tries=0; r2==null && tries<100; tries++){
1220 					r2=makeRead(r1, minlen, maxlen, midlen, minChrom, maxChrom,
1221 							maxSnps, maxInss, maxDels, maxSubs, maxNs,
1222 							snpRate, insRate, delRate, subRate, nRate,
1223 							minInsLen, minDelLen, minSubLen, minNLen,
1224 							maxInsLen, maxDelLen, maxSubLen, maxNLen,
1225 							mateMiddleMin, mateMiddleMax, mateSameStrand,
1226 							minQual, midQual, maxQual, baseQuality, slant,
1227 							perfect, nextReadID, locs, bits, -1, -1);
1228 				}
1229 
1230 				if(r2!=null){
1231 					if(FORCE_SINGLE_SCAFFOLD){
1232 						int scaf1=Data.scaffoldIndex(r1.chrom, (r1.start+r1.stop)/2);
1233 						int scaf2=Data.scaffoldIndex(r2.chrom, (r2.start+r2.stop)/2);
1234 						if(scaf1!=scaf2){
1235 							r1=r2=null;
1236 						}
1237 					}
1238 				}
1239 
1240 				if(r2!=null){
1241 					r1.mate=r2;
1242 					r2.mate=r1;
1243 					if(fragadapter1!=null){
1244 						r1.setMapped(true);
1245 						r2.setMapped(true);
1246 						int x=Read.insertSizeMapped(r1, r2, false);
1247 						if(x>0 && x<r1.length()){
1248 							addFragAdapter(r1, x, fragadapter1, randyAdapter);
1249 						}
1250 						if(x<r2.length()){
1251 							addFragAdapter(r2, x, fragadapter2, randyAdapterMate);
1252 						}
1253 					}
1254 				}else{
1255 					r1=null;
1256 				}
1257 
1258 //				outstream.println(r.strand()+"\t"+r.insertSize());
1259 			}
1260 			if(r1!=null){
1261 				final Read r2=r1.mate;
1262 				processSpecialNames(r1);
1263 
1264 				tsw1.println(r1);
1265 				if(r2!=null){
1266 					r2.setPairnum(1);
1267 					if(tsw2!=null){tsw2.println(r2);}
1268 					else{tsw1.println(r2);}
1269 				}
1270 				nextReadID++;
1271 			}else{
1272 				i--;
1273 			}
1274 //			outstream.println(r1.toFastq()+"\n"+r1.id);
1275 			ampLevel=Tools.max(0, ampLevel-1);
1276 			if(ampLevel==0){lastRead=null;}
1277 
1278 			if(lastRead==null){lastRead=r1;}
1279 //			outstream.println("Made "+r.start+" ~ "+r.stop+" = "+(r.stop-r.start));
1280 		}
1281 		tsw1.poison();
1282 		if(tsw2!=null){tsw2.poison();}
1283 	}
1284 
1285 
1286 
makeRandomReadsX(int numReads, int minlen, int maxlen, int midlen, int maxSnps, int maxInss, int maxDels, int maxSubs, int maxNs, float snpRate, float insRate, float delRate, float subRate, float nRate, int minInsLen, int minDelLen, int minSubLen, int minNLen, int maxInsLen, int maxDelLen, int maxSubLen, int maxNLen, int minChrom, int maxChrom, int minQual, int midQual, int maxQual)1287 	public ArrayList<Read> makeRandomReadsX(int numReads, int minlen, int maxlen, int midlen,
1288 			int maxSnps, int maxInss, int maxDels, int maxSubs, int maxNs,
1289 			float snpRate, float insRate, float delRate, float subRate, float nRate,
1290 			int minInsLen, int minDelLen, int minSubLen, int minNLen,
1291 			int maxInsLen, int maxDelLen, int maxSubLen, int maxNLen,
1292 			int minChrom, int maxChrom,
1293 			int minQual, int midQual, int maxQual){
1294 		FASTQ.TAG_CUSTOM=(prefix==null && !ILLUMINA_NAMES && !INSERT_NAMES);
1295 
1296 		assert(minQual<=midQual);
1297 		assert(midQual<=maxQual);
1298 		assert(minQual>=0 && maxQual<60);
1299 
1300 		if(bits_cached==null){bits_cached=new BitSet(maxlen+1);}
1301 		if(locs_cached==null || locs_cached.length<Tools.min(300000000, maxlen+(maxDelLen*(long)maxDels))){
1302 			locs_cached=new int[(int)Tools.min(300000000, maxlen+(maxDelLen*(long)maxDels))];
1303 		}
1304 		final BitSet bits=bits_cached;
1305 		final int[] locs=locs_cached;
1306 		final ArrayList<Read> list=new ArrayList<Read>(numReads);
1307 
1308 		Read lastRead=null;
1309 		int ampLevel=0;
1310 		int ampLength=2000;
1311 
1312 		for(int i=0; i<numReads; i++){
1313 
1314 			final boolean perfect=randyPerfectRead.nextFloat()<PERFECT_READ_RATIO;
1315 
1316 			final byte baseQuality;
1317 			final byte slant;
1318 			{
1319 //				byte baseSlant=(perfect ? (byte)5 : (byte)(maxQual-minQual+1));
1320 //				slant=(byte)((randyQual.nextInt(baseSlant)+randyQual.nextInt(baseSlant)+1)/2);
1321 //				if(randyQual.nextBoolean()){
1322 //					int range=(perfect ? maxQualP-midQualP+1 : maxQual-midQual+1);
1323 //					int delta=Tools.min(randyQual.nextInt(range), randyQual.nextInt(range));
1324 //					baseQuality=(byte)((perfect ? midQualP : midQual)+delta);
1325 //				}else{
1326 //					int range=perfect ? midQualP-minQualP+1 : midQual-minQual+1;
1327 //					int delta=randyQual.nextInt(range);
1328 //					baseQuality=(byte)((perfect ? midQualP : midQual)-delta);
1329 //				}
1330 
1331 				byte baseSlant=(byte)(maxQual-minQual+1);
1332 				slant=(byte)((randyQual.nextInt(baseSlant)+randyQual.nextInt(baseSlant)+1)/2);
1333 				if(Tools.nextBoolean(randyQual)){
1334 					int range=maxQual-midQual+1;
1335 					int delta=Tools.min(randyQual.nextInt(range), randyQual.nextInt(range));
1336 					baseQuality=(byte)(midQual+delta);
1337 				}else{
1338 					int range=midQual-minQual+1;
1339 					int delta=randyQual.nextInt(range);
1340 					baseQuality=(byte)(midQual-delta);
1341 				}
1342 			}
1343 
1344 			int forceChrom=-1, forceLoc=-1;
1345 			if(AMP>1 && lastRead!=null){
1346 				if(ampLevel>0){
1347 					forceChrom=lastRead.chrom;
1348 //					forceLoc=lastRead.start+4-randyAmp.nextInt(9);
1349 //					forceLoc=lastRead.start+10-randyAmp.nextInt(21);
1350 
1351 					int a=ampLength;
1352 					int b=a*2+1;
1353 					if(Tools.nextBoolean(randyAmp)){
1354 						forceLoc=lastRead.start+a-randyAmp.nextInt(b);
1355 					}else{
1356 //						if(randyAmp.nextBoolean()){
1357 //							forceLoc=lastRead.start+a-(randyAmp.nextInt(b)+randyAmp.nextInt(b))/2;
1358 //						}else{
1359 							forceLoc=lastRead.start+a-(randyAmp.nextInt(b)+randyAmp.nextInt(b)+randyAmp.nextInt(b))/3;
1360 //						}
1361 					}
1362 				}else{
1363 
1364 					int a1=AMP;
1365 					if(randyAmp.nextInt(30)==0){a1*=7;}
1366 
1367 					if(randyAmp.nextInt(3)>0){
1368 						ampLevel=Tools.min(randyAmp.nextInt(a1), randyAmp.nextInt(a1));
1369 					}else{
1370 						double log=Math.log10(a1*7);
1371 						ampLevel=(int)Math.round(Math.pow(10, randyAmp.nextDouble()*log));
1372 					}
1373 					ampLength=500+randyAmp.nextInt(3001);
1374 //					ampLevel=randyAmp.nextInt(AMP);
1375 				}
1376 			}
1377 
1378 			Read r1=makeRead(null, minlen, maxlen, midlen, minChrom, maxChrom,
1379 					maxSnps, maxInss, maxDels, maxSubs, maxNs,
1380 					snpRate, insRate, delRate, subRate, nRate,
1381 					minInsLen, minDelLen, minSubLen, minNLen,
1382 					maxInsLen, maxDelLen, maxSubLen, maxNLen,
1383 					mateMiddleMin, mateMiddleMax, mateSameStrand,
1384 					minQual, midQual, maxQual, baseQuality, slant,
1385 					perfect, nextReadID, locs, bits, forceChrom, forceLoc);
1386 
1387 //			assert(false) : r1;
1388 			if(paired && r1!=null){
1389 
1390 				Read r2=null;
1391 				for(int tries=0; r2==null && tries<100; tries++){
1392 					r2=makeRead(r1, minlen, maxlen, midlen, minChrom, maxChrom,
1393 							maxSnps, maxInss, maxDels, maxSubs, maxNs,
1394 							snpRate, insRate, delRate, subRate, nRate,
1395 							minInsLen, minDelLen, minSubLen, minNLen,
1396 							maxInsLen, maxDelLen, maxSubLen, maxNLen,
1397 							mateMiddleMin, mateMiddleMax, mateSameStrand,
1398 							minQual, midQual, maxQual, baseQuality, slant,
1399 							perfect, nextReadID, locs, bits, -1, -1);
1400 				}
1401 
1402 				if(r2!=null){
1403 					if(FORCE_SINGLE_SCAFFOLD){
1404 						int scaf1=Data.scaffoldIndex(r1.chrom, (r1.start+r1.stop)/2);
1405 						int scaf2=Data.scaffoldIndex(r2.chrom, (r2.start+r2.stop)/2);
1406 						if(scaf1!=scaf2){
1407 							r1=r2=null;
1408 						}
1409 					}
1410 				}
1411 
1412 				if(r2!=null){
1413 					r1.mate=r2;
1414 					r2.mate=r1;
1415 					if(fragadapter1!=null){
1416 						r1.setMapped(true);
1417 						r2.setMapped(true);
1418 						int x=Read.insertSizeMapped(r1, r2, false);
1419 						if(x>0 && x<r1.length()){
1420 							addFragAdapter(r1, x, fragadapter1, randyAdapter);
1421 						}
1422 						if(x<r2.length()){
1423 							addFragAdapter(r2, x, fragadapter2, randyAdapterMate);
1424 						}
1425 					}
1426 				}else{
1427 					r1=null;
1428 				}
1429 
1430 //				outstream.println(r.strand()+"\t"+r.insertSize());
1431 			}
1432 			if(r1!=null){
1433 				processSpecialNames(r1);
1434 
1435 				if(r1.mate!=null){
1436 					r1.mate.setPairnum(1);
1437 				}
1438 				list.add(r1);
1439 				nextReadID++;
1440 			}else{
1441 				i--;
1442 			}
1443 			ampLevel=Tools.max(0, ampLevel-1);
1444 			if(ampLevel==0){lastRead=null;}
1445 
1446 			if(lastRead==null){lastRead=r1;}
1447 //			outstream.println("Made "+r1.start+" ~ "+r1.stop+" = "+(r1.stop-r1.start));
1448 		}
1449 		return list;
1450 	}
1451 
processSpecialNames(Read r1)1452 	private void processSpecialNames(Read r1){
1453 		if(r1==null){return;}
1454 		Read r2=r1.mate;
1455 
1456 		if(prefix!=null){
1457 			r1.id=prefix+"_"+r1.numericID+slash1;
1458 			if(r2!=null){
1459 				r2.id=prefix+"_"+r1.numericID+slash2;
1460 			}
1461 		}else if(ILLUMINA_NAMES){
1462 			r1.id=r1.numericID+slash1;
1463 			if(r2!=null){
1464 				r2.id=r1.numericID+slash2;
1465 			}
1466 		}else if(INSERT_NAMES){
1467 			r1.setMapped(true);
1468 			if(r2!=null){
1469 				r2.setMapped(true);
1470 				r1.setPaired(true);
1471 				r2.setPaired(true);
1472 			}
1473 			int insert=Read.insertSizeMapped(r1, r2, false);
1474 //			assert(false) : r1.strand()+", "+r1.start+", "+r2.strand()+", "+r2.start+", "+insert;
1475 			String s="insert="+insert;
1476 			r1.id=s+" 1:"+r1.numericID;
1477 			if(r2!=null){
1478 				r2.id=s+" 2:"+r1.numericID;
1479 			}
1480 		}
1481 	}
1482 
genReadLen(int minLen, int maxLen, int midLen, Random randy, boolean linear, boolean bell)1483 	public int genReadLen(int minLen, int maxLen, int midLen, Random randy, boolean linear, boolean bell){
1484 		if(minLen==maxLen){return minLen;}
1485 
1486 		final int range=maxLen-minLen+1;
1487 		assert(range>0) : minLen+", "+maxLen+", "+midLen+", "+linear;
1488 		if(linear){
1489 			return minLen+randy.nextInt(range);
1490 		}
1491 		assert(midLen>=minLen && midLen<=maxLen) : "minLen="+minLen+", midLen="+midLen+", maxLen="+maxLen;
1492 
1493 		float choice=randy.nextFloat();
1494 		int len;
1495 		if(choice<0.01){
1496 			len=minLen+randy.nextInt(range);
1497 //			System.err.println("A: "+len);
1498 		}else if(choice<0.05){
1499 			len=minLen+Tools.min(randy.nextInt(range), randy.nextInt(range));
1500 //			System.err.println("B: "+len);
1501 		}else if(choice<0.2){
1502 			double g=randy.nextGaussian();
1503 			len=(int)Math.round((g*readLengthDev)+midLen);
1504 			while(len<minLen || len>maxLen){
1505 				g=randy.nextGaussian();
1506 				len=(int)Math.round((g*readLengthDev)+midLen);
1507 			}
1508 		}else if(choice<0.4){
1509 			double g=randy.nextGaussian();
1510 			len=(int)Math.round((g*readLengthDev/2)+midLen);
1511 			while(len<minLen || len>maxLen){
1512 				g=randy.nextGaussian();
1513 				len=(int)Math.round((g*readLengthDev/2)+midLen);
1514 			}
1515 		}else{
1516 			double g=randy.nextGaussian();
1517 			len=(int)Math.round((g*readLengthDev/8)+midLen);
1518 			while(len<minLen || len>maxLen){
1519 				g=randy.nextGaussian();
1520 				len=(int)Math.round((g*readLengthDev/8)+midLen);
1521 			}
1522 		}
1523 
1524 		return len;
1525 	}
1526 
makeRead(Read r0, int minlen, int maxlen, int midlen, int minChrom, int maxChrom, int maxSnps, int maxInss, int maxDels, int maxSubs, int maxNs, float snpRate, float insRate, float delRate, float subRate, float nRate, int minInsLen, int minDelLen, int minSubLen, int minNLen, int maxInsLen, int maxDelLen, int maxSubLen, int maxNLen, int minMiddle, int maxMiddle, boolean sameStrand, int minQual, int midQual, int maxQual, byte baseQuality, byte slant, boolean perfect, long rid, int[] locs, BitSet bits, int FORCE_CHROM, int FORCE_LOC)1527 	public Read makeRead(Read r0, int minlen, int maxlen, int midlen, int minChrom, int maxChrom,
1528 			int maxSnps, int maxInss, int maxDels, int maxSubs, int maxNs,
1529 			float snpRate, float insRate, float delRate, float subRate, float nRate,
1530 			int minInsLen, int minDelLen, int minSubLen, int minNLen,
1531 			int maxInsLen, int maxDelLen, int maxSubLen, int maxNLen,
1532 			int minMiddle, int maxMiddle, boolean sameStrand,
1533 			int minQual, int midQual, int maxQual, byte baseQuality, byte slant,
1534 			boolean perfect, long rid, int[] locs, BitSet bits,
1535 			int FORCE_CHROM, int FORCE_LOC){
1536 
1537 //		verbose=(rid==3860);
1538 
1539 		int SNPs=0;
1540 		int INSs=0;
1541 		int DELs=0;
1542 		int SUBs=0;
1543 		int Ns=0;
1544 		int adapters=0;
1545 
1546 		while(SNPs<maxSnps && randyMutationType.nextFloat()<snpRate){SNPs++;}
1547 		while(INSs<maxInss && randyMutationType.nextFloat()<insRate){INSs++;}
1548 		while(DELs<maxDels && randyMutationType.nextFloat()<delRate){DELs++;}
1549 		while(SUBs<maxSubs && randyMutationType.nextFloat()<subRate){SUBs++;}
1550 		while(Ns<maxNs && randyMutationType.nextFloat()<nRate){Ns++;}
1551 
1552 //		final boolean perfect=randyPerfectRead.nextFloat()<PERFECT_READ_RATIO;
1553 		if(perfect){SNPs=INSs=DELs=SUBs=Ns=0;}
1554 
1555 		if(verbose){
1556 			outstream.println("\nMaking read with snps="+SNPs+", inss="+INSs+", dels="+DELs+", subs="+SUBs+", Ns="+Ns);
1557 			outstream.println("perfect="+perfect);
1558 		}
1559 
1560 		int[] delsa=makeDelsa(DELs, minDelLen, maxDelLen, randy2);
1561 
1562 		int readlen=genReadLen(minlen, maxlen, midlen, randyLength, LINEAR_LENGTH, BELL_LENGTH);
1563 		int inititallen0=readlen+(delsa==null ? 0 : (int)Tools.sum(delsa));
1564 
1565 		if(verbose){
1566 			outstream.println("delsa="+Arrays.toString(delsa));
1567 			outstream.println("readlen="+readlen+", inititallen0="+inititallen0);
1568 		}
1569 
1570 		int chrom=(FORCE_CHROM>=0 ? FORCE_CHROM : randomChrom(r0, minChrom, maxChrom));
1571 		if(chrom<0){return null;}
1572 		final int strand=randomStrand(r0, minChrom, maxChrom, sameStrand);
1573 
1574 		int loc;
1575 		if(FORCE_LOC>=0){
1576 			loc=FORCE_LOC;
1577 		}else if(RANDOM_SCAFFOLD){
1578 			int[] x=randomScaffoldLoc(chrom, inititallen0);
1579 			if(x==null){return null;}
1580 			loc=x[0];
1581 			inititallen0=x[1];
1582 			readlen=inititallen0-(delsa==null ? 0 : (int)Tools.sum(delsa));
1583 		}else if(METAGENOME){
1584 			int[] x=randomScaffoldLocMetagenome(inititallen0);
1585 			if(x==null){return null;}
1586 			chrom=x[0];
1587 			loc=x[1];
1588 			inititallen0=x[2];
1589 			readlen=inititallen0-(delsa==null ? 0 : (int)Tools.sum(delsa));
1590 		}else{
1591 			loc=randomLoc(r0, chrom, inititallen0, minMiddle, maxMiddle, strand);
1592 		}
1593 
1594 		if(verbose){
1595 			outstream.println("chrom="+chrom+", loc="+loc+"~"+(loc+inititallen0-1)+", strand="+strand+", chalen="+Data.getChromosome(chrom).maxIndex);
1596 		}
1597 
1598 		if(r0!=null){
1599 			int y=loc+inititallen0-1;
1600 			if(loc<0){loc=0; y=readlen-1; maxDels=0; delsa=null; inititallen0=readlen;}
1601 			ChromosomeArray cha=Data.getChromosome(chrom);
1602 			if(y>cha.maxIndex){y=cha.maxIndex; loc=y-readlen+1; maxDels=0; delsa=null; inititallen0=readlen;}
1603 			if(verbose){
1604 				outstream.println("After pair compensation:");
1605 				outstream.println("delsa="+Arrays.toString(delsa));
1606 				outstream.println("readlen="+readlen+", inititallen0="+inititallen0);
1607 				outstream.println("chrom="+chrom+", loc="+loc+", strand="+strand);
1608 			}
1609 			assert(y<=cha.maxIndex) : y+", "+cha.maxIndex;
1610 			assert(cha.get(y)>0) : cha.get(y);
1611 		}
1612 
1613 		if(loc<0){
1614 			if(verbose){
1615 				outstream.println("Bad values; returning null.");
1616 			}
1617 			return null;
1618 		}
1619 
1620 		final ChromosomeArray cha=Data.getChromosome(chrom);
1621 		if(readlen>=(cha.maxIndex-cha.minIndex)){
1622 			if(verbose){
1623 				outstream.println("Too long; returning null.");
1624 			}
1625 			return null;
1626 		}
1627 		if(loc>=cha.maxIndex || loc<0){return null;}
1628 		byte[] bases=cha.getBytes(loc, loc+inititallen0-1);
1629 		assert(bases[0]>0 && bases[bases.length-1]>0) : Arrays.toString(bases);
1630 		assert(strand==Shared.MINUS || strand==Shared.PLUS);
1631 
1632 		for(int i=0; i<bases.length; i++){
1633 			locs[i]=i+loc;
1634 		}
1635 		if(verbose){
1636 			outstream.println(new String(bases));
1637 			outstream.println(Arrays.toString(Arrays.copyOf(locs, bases.length)));
1638 		}
1639 
1640 		if(BAN_NS){
1641 			for(byte b : bases){
1642 				if(!AminoAcid.isFullyDefined(b)){return null;}
1643 			}
1644 		}
1645 
1646 		if(pbadapter1!=null && (rid&3)==0){
1647 			bases=addPBAdapter(bases, locs, readlen, randyAdapter, pbadapter1);
1648 			adapters++;
1649 		}else if(pbadapter2!=null && (rid&3)==1){
1650 			bases=addPBAdapter(bases, locs, readlen, randyAdapter, pbadapter2);
1651 			adapters++;
1652 		}
1653 
1654 		int[] dif=new int[] {0, 0};
1655 
1656 		for(int j=0; delsa!=null && j<delsa.length; j++){
1657 			bases=addDeletion(bases, locs, delsa[j], readlen, dif, randy2);
1658 			if(verbose){
1659 				outstream.println("After adding del "+delsa[j]+": ");
1660 				outstream.println(new String(bases));
1661 				outstream.println(Arrays.toString(Arrays.copyOf(locs, bases.length)));
1662 			}
1663 		}
1664 		if(bases.length>readlen){bases=Arrays.copyOf(bases, readlen);}
1665 		assert(bases.length==readlen);
1666 
1667 		for(int j=0; j<INSs; j++){
1668 			bases=addInsertion(bases, locs, minInsLen, maxInsLen, readlen, dif, randy2);
1669 			if(verbose){
1670 				outstream.println("After adding ins: ");
1671 				outstream.println("'"+new String(bases)+"'");
1672 				outstream.println(Arrays.toString(Arrays.copyOf(locs, Tools.min(locs.length, bases.length))));
1673 			}
1674 		}
1675 		if(bases.length!=readlen){bases=Arrays.copyOf(bases, readlen);}
1676 		assert(bases.length==readlen);
1677 
1678 		if(USE_UNIQUE_SNPS){
1679 			bits.clear();
1680 			for(int j=0; j<SNPs; j++){bases=addSNP(bases, locs, readlen, randy2, bits);}
1681 		}else{
1682 			for(int j=0; j<SNPs; j++){bases=addSNP(bases, locs, readlen, randy2);}
1683 		}
1684 
1685 		for(int j=0; j<SUBs; j++){bases=addSUB(bases, locs, minSubLen, maxSubLen, readlen, randy2);}
1686 
1687 		if(USE_UNIQUE_SNPS){
1688 			bits.clear();
1689 			for(int j=0; j<Ns; j++){bases=addN(bases, locs, minNLen, maxNLen, readlen, randy2, bits);}
1690 		}else{
1691 			for(int j=0; j<Ns; j++){bases=addN(bases, locs, minNLen, maxNLen, readlen, randy2, null);}
1692 		}
1693 
1694 		//Fill insertions in loc array
1695 		for(int i=1; i<bases.length; i++){
1696 			if(locs[i]<0){locs[i]=locs[i-1];}
1697 		}
1698 		for(int i=bases.length-2; i>=0; i--){
1699 			if(locs[i]<0){locs[i]=locs[i+1];}
1700 		}
1701 		final int x=locs[0], y=locs[bases.length-1];
1702 		if(verbose){
1703 			outstream.println("After adding SNPs, SUBs, Ns, and fixing locs: ");
1704 			outstream.println("'"+new String(bases)+"'");
1705 			outstream.println(Arrays.toString(Arrays.copyOf(locs, Tools.min(locs.length, bases.length))));
1706 		}
1707 
1708 //		if(FORCE_LOC>=0 || FORCE_CHROM>=0){
1709 //			if(y<0 || y+readlen>)
1710 //		}
1711 		assert(FORCE_LOC>=0 || FORCE_CHROM>=0 || y<=cha.maxIndex) : y+", "+r0;
1712 		assert(FORCE_LOC>=0 || FORCE_CHROM>=0 || cha.get(y)>0) : cha.get(y);
1713 
1714 		if(strand==Shared.MINUS){
1715 			AminoAcid.reverseComplementBasesInPlace(bases);
1716 			//Reverse loc array; not really necessary
1717 			for(int i=0, lim=bases.length/2; i<lim; i++){
1718 				int tmp=locs[i];
1719 				locs[i]=locs[bases.length-i-1];
1720 				locs[bases.length-i-1]=tmp;
1721 			}
1722 			if(verbose){
1723 				outstream.println("After reverse-complement: ");
1724 				outstream.println(new String(bases));
1725 				outstream.println(Arrays.toString(Arrays.copyOf(locs, bases.length)));
1726 			}
1727 		}
1728 
1729 		if(verbose){
1730 			outstream.println("Final lineup: ");
1731 			outstream.println(new String(bases));
1732 			for(int i=0; i<bases.length; i++){
1733 				byte c=cha.get(locs[i]);
1734 				if(strand==1){c=AminoAcid.baseToComplementExtended[c];}
1735 				outstream.print((char)c);
1736 			}
1737 			outstream.println();
1738 		}
1739 
1740 		byte[] quals=null;
1741 		if(USE_FIXED_QUALITY){
1742 			quals=getFixedQualityRead(bases.length);
1743 		}else{
1744 //			if(perfect){
1745 //				quals=QualityTools.makeQualityArray(bases.length, randyQual, 30, 40, baseQuality, slant, qVariance);
1746 //			}else{
1747 				quals=QualityTools.makeQualityArray(bases.length, randyQual, minQual, maxQual, baseQuality, slant, qVariance);
1748 //			}
1749 		}
1750 		for(int j=0; j<quals.length; j++){
1751 			if(!AminoAcid.isFullyDefined(bases[j])){quals[j]=0;}
1752 		}
1753 
1754 
1755 //		Read r=new Read(bases, chrom, (byte)strand, loc, loc+bases.length-1, rid, quals, false);
1756 		Read r=new Read(bases, quals, rid, chrom, x, y, (byte)strand);
1757 		r.setSynthetic(true);
1758 		assert(r.length()==readlen);
1759 
1760 		if(ADD_ERRORS_FROM_QUALITY && !perfect){addErrorsFromQuality(r, randyQual);}
1761 		if(ADD_PACBIO_ERRORS && !perfect){
1762 			addPacBioErrors(r, randyQual.nextFloat()*(pbMaxErrorRate-pbMinErrorRate)+pbMinErrorRate, (1+randyQual.nextFloat())*(pbMaxErrorRate-pbMinErrorRate)*0.25f);
1763 		}else{
1764 			assert(r.length()==readlen);
1765 		}
1766 
1767 //		r.stop=r.start+readlen+dif[0]-1;
1768 
1769 		assert(r.stop>r.start) : r;
1770 
1771 		if(adapters>0){r.setHasAdapter(true);}
1772 		if(FORCE_SINGLE_SCAFFOLD && !Data.isSingleScaffold(r.chrom, r.start, r.stop)){return null;}
1773 		if(MIN_SCAFFOLD_OVERLAP>0 && Data.scaffoldOverlapLength(r.chrom, r.start, r.stop)<MIN_SCAFFOLD_OVERLAP){return null;}
1774 		return r;
1775 	}
1776 
addPacBioErrors(final Read r, final float errorRate, final float deviation)1777 	public void addPacBioErrors(final Read r, final float errorRate, final float deviation){
1778 
1779 		byte[] bases=r.bases;
1780 		ByteBuilder bb=new ByteBuilder((int)(bases.length*1.1f));
1781 		ByteBuilder qq=new ByteBuilder((int)(bases.length*1.1f));
1782 
1783 		for(int i=0; i<bases.length; i++){
1784 			float dev2=2*deviation*randy.nextFloat()-deviation;
1785 			float rate=errorRate+dev2;
1786 			float p=randy.nextFloat();
1787 			byte q=QualityTools.probCorrectToPhred(1-rate);
1788 			if(p>rate || !AminoAcid.isFullyDefined(bases[i])){
1789 				bb.append(bases[i]);
1790 				qq.append(q);
1791 			}else{
1792 				float p2=randyMutationType.nextFloat();
1793 				if(p2<0.4){//Ins
1794 					byte b=AminoAcid.numberToBase[randy2.nextInt(4)];
1795 					bb.append(b);
1796 					qq.append(q);
1797 					i--;
1798 				}else if(p2<0.75){//Del
1799 					//do nothing
1800 				}else{//Sub
1801 					int x=AminoAcid.baseToNumber[bases[i]]+randy2.nextInt(3)+1;
1802 					byte b=AminoAcid.numberToBase[x&3];
1803 					bb.append(b);
1804 					qq.append(q);
1805 				}
1806 			}
1807 		}
1808 
1809 		r.bases=bb.toBytes();
1810 		if(r.quality!=null){
1811 			r.quality=qq.toBytes();
1812 //			byte q=QualityTools.probCorrectToPhred(1-errorRate);
1813 //			byte[] qual=new byte[r.length()];
1814 //			Arrays.fill(qual, q);
1815 //			r.quality=qual;
1816 		}
1817 	}
1818 
fillRandomChrom()1819 	private static int[] fillRandomChrom(){
1820 
1821 		int[] in=Arrays.copyOf(Data.chromLengths, Data.chromLengths.length);
1822 		long total=Tools.sum(in);
1823 		int div=(int)(total/8192);
1824 		for(int i=0; i<in.length; i++){in[i]=((in[i]+div-1)/div);}
1825 
1826 
1827 		int sum=0;
1828 		for(int i=0; i<in.length; i++){sum+=in[i];}
1829 		int[] out=new int[sum];
1830 		sum=0;
1831 		for(int chrom=0; chrom<in.length; chrom++){
1832 			int size=in[chrom];
1833 			for(int j=0; j<size; j++){
1834 				out[sum+j]=chrom;
1835 			}
1836 			sum+=size;
1837 		}
1838 		return out;
1839 	}
1840 
getFixedQualityRead(int bases)1841 	public static final byte[] getFixedQualityRead(int bases){
1842 		if(fixedQuality[bases]==null){
1843 			fixedQuality[bases]=new byte[bases];
1844 			Arrays.fill(fixedQuality[bases], FIXED_QUALITY_VALUE);
1845 		}
1846 		return fixedQuality[bases];
1847 	}
1848 
getSeed()1849 	private static synchronized long getSeed(){
1850 		long r=seed*1000;
1851 		seed++;
1852 		return r;
1853 	}
1854 
1855 
1856 	/*--------------------------------------------------------------*/
1857 	/*----------------            Fields            ----------------*/
1858 	/*--------------------------------------------------------------*/
1859 
1860 	private final Random randy;
1861 	private final Random randy2;
1862 	private final Random randyMutationType;
1863 	private final Random randyQual;
1864 	private final Random randyAdapter;
1865 
1866 	private final Random randyMate;
1867 	private final Random randy2Mate;
1868 	private final Random randyMutationTypeMate;
1869 	private final Random randyQualMate;
1870 	private final Random randyAdapterMate;
1871 
1872 	private final Random randyPerfectRead;
1873 	private final Random randyNoref;
1874 
1875 	private final Random randyLength;
1876 
1877 	private final Random randyAmp;
1878 
1879 	public final boolean paired;
1880 
1881 	private long nextReadID=0;
1882 	private byte[] pbadapter1=null;
1883 	private byte[] pbadapter2=null;
1884 
1885 	private byte[][] fragadapter1=null;
1886 	private byte[][] fragadapter2=null;
1887 
1888 	private BitSet bits_cached;
1889 	private int[] locs_cached;
1890 
1891 	private String prefix;
1892 
1893 	/*--------------------------------------------------------------*/
1894 	/*----------------        Static Fields         ----------------*/
1895 	/*--------------------------------------------------------------*/
1896 
1897 //	private static String slash1="/1";
1898 //	private static String slash2="/2";
1899 	private static String slash1=" 1:";
1900 	private static String slash2=" 2:";
1901 
1902 	private static double[] chromProbs;
1903 	private static double[][] scafProbs;
1904 
1905 	private static int[] randomChrom;
1906 
1907 	private static long seed=0;
1908 
1909 	private static final byte[][] fixedQuality=new byte[301][];
1910 
1911 	public static final boolean USE_FIXED_QUALITY=false;
1912 	public static final byte FIXED_QUALITY_VALUE=24;
1913 	public static boolean ADD_ERRORS_FROM_QUALITY=true;
1914 	public static boolean ADD_PACBIO_ERRORS=false;
1915 	public static float pbMinErrorRate=0.13f;
1916 	public static float pbMaxErrorRate=0.17f;
1917 	public static boolean REPLACE_NOREF=false;
1918 	public static boolean OUTPUT_INTERLEAVED=false;
1919 	/** Rather than choosing a random location in the concatenated genome, choose a random scaffold, without respect to length */
1920 	public static boolean RANDOM_SCAFFOLD=false;
1921 	public static boolean METAGENOME=false;
1922 	public static String fileExt=".fq.gz";
1923 	public static boolean verbose=false;
1924 
1925 	public static boolean mateSameStrand=false;
1926 	public static int mateMiddleMin=-200;
1927 	public static int mateMiddleMax=150;
1928 	public static int mateMiddleDev=-1;
1929 	public static int readLengthDev=-1;
1930 	public static boolean SUPERFLAT_DIST=false;
1931 	public static boolean FLAT_DIST=false;
1932 	public static boolean BELL_DIST=true;
1933 	public static boolean EXP_DIST=false;
1934 	public static boolean LINEAR_LENGTH=true;
1935 	public static boolean BELL_LENGTH=false;
1936 	public static double EXP_LAMDA=0.8d;
1937 	public static boolean BIASED_SNPS=false;
1938 	public static boolean ILLUMINA_NAMES=false;
1939 	public static boolean INSERT_NAMES=false;
1940 	public static int midPad=500;
1941 	public static boolean addslash=false;
1942 	public static boolean spaceslash=false;
1943 
1944 	public static boolean NODISK=false;
1945 
1946 	public static int AMP=1;
1947 	public static int qVariance=4;
1948 
1949 	public static float PERFECT_READ_RATIO=0f;
1950 
1951 	/** Ban generation of reads over unspecified reference bases */
1952 	static boolean BAN_NS=false;
1953 
1954 	public static boolean USE_UNIQUE_SNPS=true;
1955 	public static boolean FORCE_SINGLE_SCAFFOLD=true;
1956 	public static int MIN_SCAFFOLD_OVERLAP=1;
1957 	public static boolean overwrite=true;
1958 	public static boolean append=false;
1959 	public static boolean errorState;
1960 
1961 	//Input file, for use as quality source
1962 	public static String in1;
1963 
1964 	static PrintStream outstream=System.err;
1965 
1966 }
1967