1 package var;
2 
3 import java.io.File;
4 import java.util.ArrayList;
5 import java.util.HashMap;
6 
7 import dna.Data;
8 import fileIO.TextStreamWriter;
9 import shared.Parse;
10 import shared.PreParser;
11 import shared.Shared;
12 import shared.Timer;
13 import shared.Tools;
14 
15 public class StackVariations2 {
16 
main(String[] args)17 	public static void main(String[] args){
18 		{//Preparse block for help, config files, and outstream
19 			PreParser pp=new PreParser(args, new Object() { }.getClass().getEnclosingClass(), false);
20 			args=pp.args;
21 			//outstream=pp.outstream;
22 		}
23 
24 		Timer t=new Timer();
25 
26 		String inPattern=(args[0].equalsIgnoreCase("null") ? null : args[0]);
27 		String outPattern=args[1];
28 
29 		assert(!inPattern.equalsIgnoreCase(outPattern));
30 
31 		int minChrom=-1;
32 		int maxChrom=-1;
33 
34 		boolean filter=false;
35 		Data.GENOME_BUILD=-1;
36 
37 		for(int i=2; i<args.length; i++){
38 			final String arg=args[i];
39 			final String[] split=arg.split("=");
40 			String a=split[0].toLowerCase();
41 			String b=split.length>1 ? split[1] : null;
42 
43 			if(a.equalsIgnoreCase("filter")){
44 				filter=true;
45 			}else if(a.startsWith("filter")){
46 				if(b.equals("1") || b.startsWith("t")){filter=true;}
47 				else if(b.equals("0") || b.startsWith("f")){filter=false;}
48 				else{throw new RuntimeException("Unknown parameter "+args[i]);}
49 			}else if(a.equalsIgnoreCase("strict")){
50 				if(b==null){STRICT=true;}
51 				else if(b.equals("1") || b.startsWith("t")){STRICT=true;}
52 				else if(b.equals("0") || b.startsWith("f")){STRICT=false;}
53 				else{throw new RuntimeException("Unknown parameter "+args[i]);}
54 			}else if(a.equals("genome") || a.equals("build")){
55 				Data.setGenome(Integer.parseInt(b));
56 				if(minChrom==-1){minChrom=1;}
57 				if(maxChrom==-1){maxChrom=Data.numChroms;}
58 			}else if(a.equals("minchrom")){
59 				minChrom=Integer.parseInt(b);
60 			}else if(a.equals("maxchrom")){
61 				maxChrom=Integer.parseInt(b);
62 			}else if(a.equals("threads") || a.equals("t")){
63 				THREADS=Integer.parseInt(b);
64 			}else if(a.equals("minreads")){
65 				MIN_READS_TO_KEEP=Integer.parseInt(b);
66 			}else if(a.equals("blocksize")){
67 				GenerateVarlets2.BLOCKSIZE=(Integer.parseInt(b));
68 			}else if(a.equals("deletefiles") || a.startsWith("deletetemp") || a.startsWith("deleteinput") || a.equals("delete")){
69 				DELETE_INPUT=(Parse.parseBoolean(b));
70 			}else{
71 				throw new RuntimeException("Unknown parameter "+args[i]);
72 			}
73 		}
74 
75 		assert(minChrom>=0 && maxChrom>=minChrom) : "Please set minchrom and maxchrom.";
76 		if(Data.GENOME_BUILD<0){throw new RuntimeException("Please set genome number.");}
77 		THREADS=Tools.max(1, THREADS);
78 
79 //		for(byte i=minChrom; i<=maxChrom; i++){
80 //			String fname1=inPattern.replace("#", i+"");
81 //			String fname2=outPattern.replace("#", i+"");
82 //			assert(new File(fname1).exists());
83 //			assert(!new File(fname2).exists());
84 //			processFile(fname1, fname2, filter);
85 //		}
86 
87 		runThreaded(inPattern, outPattern, minChrom, maxChrom, filter);
88 
89 		t.stop();
90 		System.out.println("Input Vars:        \t"+(totalIn_global-totalInNR_global));
91 		System.out.println("Input No-ref:      \t"+totalInNR_global);
92 		System.out.println("Input Delta Length:\t"+deltaLenIn_global);
93 		System.out.println();
94 		System.out.println("Kept Vars:         \t"+(totalKept_global-totalKeptNR_global));
95 		System.out.println("Kept No-ref:       \t"+totalKeptNR_global);
96 		System.out.println("Kept Snp:          \t"+snpKept_global);
97 		System.out.println("Kept Del:          \t"+delKept_global+"\t\tLength: \t"+delLenKept_global);
98 		System.out.println("Kept Ins:          \t"+insKept_global+"\t\tLength: \t"+insLenKept_global);
99 		System.out.println("Kept Sub:          \t"+subKept_global+"\t\tLength: \t"+subLenKept_global);
100 		System.out.println("Kept Delta Length: \t"+deltaLenKept_global);
101 		System.out.println("Kept Avg Score:    \t"+(scoreKept_global/(Tools.max(1, totalKept_global))));
102 		System.out.println();
103 		System.out.println("Dropped Vars:      \t"+(totalDropped_global-totalDroppedNR_global));
104 		System.out.println("Dropped No-ref:    \t"+totalDroppedNR_global);
105 		System.out.println("Dropped Avg Score: \t"+(scoreDropped_global/Tools.max(1, totalDropped_global)));
106 		System.out.println();
107 		System.out.println("Time: \t"+t);
108 	}
109 
runThreaded(String inPattern, String outPattern, int minChrom, int maxChrom, boolean filter)110 	public static final void runThreaded(String inPattern, String outPattern, int minChrom, int maxChrom, boolean filter){
111 		ArrayList<SVThread> svts=new ArrayList<SVThread>();
112 		for(int i=minChrom; i<=maxChrom; i++){
113 			assert(inPattern==null || !inPattern.equalsIgnoreCase(outPattern));
114 			String fname1=inPattern;
115 			String fname2=outPattern.replace("#", i+"");
116 			addThread(1);
117 			SVThread svt=new SVThread(fname1, fname2, i, filter);
118 			svts.add(svt);
119 			new Thread(svt).start();
120 		}
121 		while(addThread(0)>0){}
122 		for(SVThread svt : svts){
123 
124 			snpKept_global+=svt.snpKept;
125 			delKept_global+=svt.delKept;
126 			insKept_global+=svt.insKept;
127 			subKept_global+=svt.subKept;
128 			delLenKept_global+=svt.delLenKept;
129 			insLenKept_global+=svt.insLenKept;
130 			subLenKept_global+=svt.subLenKept;
131 			deltaLenKept_global+=svt.deltaLenKept;
132 
133 			deltaLenIn_global+=svt.deltaLenIn;
134 			totalIn_global+=svt.totalIn;
135 			totalInNR_global+=svt.totalInNR;
136 			totalKept_global+=svt.totalKept;
137 			totalDropped_global+=svt.totalDropped;
138 			totalKeptNR_global+=svt.totalKeptNR;
139 			totalDroppedNR_global+=svt.totalDroppedNR;
140 			scoreKept_global+=svt.scoreKept;
141 			scoreDropped_global+=svt.scoreDropped;
142 		}
143 	}
144 
145 
passesFilterSNP(Varlet v)146 	public static boolean passesFilterSNP(Varlet v){
147 
148 
149 			//Best so far:
150 
151 			if(STRICT){
152 
153 				if(v.endDist<3){return false;}
154 				if(v.tailDist<10){return false;}
155 
156 				//NOTE!  Last thing I did was make this more strict by adding 1 to all the num reads/unique reads required.
157 				if(v.minStrandReads()>=2){
158 
159 					if(v.errors>2){return false;}
160 					if(v.expectedErrors>1.5f){return false;}
161 //					if(v.expectedErrors-v.errors>3f){return false;}
162 					if(v.maxReadQuality()<18){return false;}
163 					if(v.avgReadQuality()<13){return false;}
164 					if(v.maxVarQuality()<26){return false;}
165 					if(v.avgVarQuality()<18){return false;}
166 					if(v.numReads<4){return false;}
167 					if(v.numSemiUniqueReads<4){return false;}
168 					if(v.numUniqueReads<2){return false;}
169 					if(v.paired<3){return false;}
170 
171 				}else if(v.minStrandReads()>=1){
172 
173 					if(v.errors>2){return false;}
174 					if(v.expectedErrors>1.2f){return false;}
175 //					if(v.expectedErrors-v.errors>3f){return false;}
176 					if(v.maxReadQuality()<19){return false;}
177 					if(v.avgReadQuality()<14){return false;}
178 					if(v.maxVarQuality()<28){return false;}
179 					if(v.avgVarQuality()<19){return false;}
180 					if(v.numReads<3){return false;}
181 					if(v.numSemiUniqueReads<3){return false;}
182 					if(v.numUniqueReads<2){return false;}
183 					if(v.paired<3){return false;}
184 
185 				}else{
186 					if(v.endDist<8){return false;}
187 					if(v.tailDist<14){return false;}
188 
189 					if(v.errors>0){return false;}
190 					if(v.expectedErrors>0.5f){return false;}
191 //					if(v.expectedErrors-v.errors>2f){return false;}
192 					if(v.maxReadQuality()<21){return false;}
193 					if(v.avgReadQuality()<17){return false;}
194 					if(v.maxVarQuality()<30){return false;}
195 					if(v.avgVarQuality()<21){return false;}
196 					if(v.numReads<6){return false;}
197 					if(v.numSemiUniqueReads<5){return false;}
198 					if(v.numUniqueReads<3){return false;}
199 					if(v.paired<5){return false;}
200 					if(v.score()<8100){return false;}
201 				}
202 
203 //				else{
204 //					if(v.endDist<8){return false;}
205 //					if(v.tailDist<14){return false;}
206 //
207 //					if(v.errors>0){return false;}
208 //					if(v.expectedErrors>0.5f){return false;}
209 ////					if(v.expectedErrors-v.errors>2f){return false;}
210 //					if(v.maxReadQuality()<21){return false;}
211 //					if(v.avgReadQuality()<17){return false;}
212 //					if(v.maxVarQuality()<30){return false;}
213 //					if(v.avgVarQuality()<21){return false;}
214 //					if(v.numReads<5){return false;}
215 //					if(v.numSemiUniqueReads<4){return false;}
216 //					if(v.numUniqueReads<2){return false;}
217 //					if(v.paired<4){return false;}
218 //					if(v.score()<8100){return false;}
219 //				}
220 
221 			}else{
222 
223 				assert(false) : "disabled";
224 
225 			}
226 
227 
228 
229 		return true;
230 	}
231 
passesFilterOther(Varlet v)232 	public static boolean passesFilterOther(Varlet v){
233 
234 
235 
236 				if(v.endDist<3){return false;}
237 				if(v.tailDist<10){return false;}
238 
239 				//NOTE!  Last thing I did was make this more strict by adding 1 to all the num reads/unique reads required.
240 				if(v.minStrandReads()>=2){
241 
242 					if(v.errors>2){return false;}
243 					if(v.expectedErrors>1.5f){return false;}
244 //					if(v.expectedErrors-v.errors>3f){return false;}
245 					if(v.maxReadQuality()<16){return false;}
246 					if(v.avgReadQuality()<12){return false;}
247 					if(v.maxVarQuality()<26){return false;}
248 					if(v.avgVarQuality()<16){return false;}
249 					if(v.numReads<4){return false;}
250 					if(v.numSemiUniqueReads<4){return false;}
251 					if(v.numUniqueReads<2){return false;}
252 					if(v.paired<3){return false;}
253 
254 				}else if(v.minStrandReads()>=1){
255 
256 					if(v.errors>2){return false;}
257 					if(v.expectedErrors>1.2f){return false;}
258 //					if(v.expectedErrors-v.errors>3f){return false;}
259 					if(v.maxReadQuality()<17){return false;}
260 					if(v.avgReadQuality()<13){return false;}
261 					if(v.maxVarQuality()<28){return false;}
262 					if(v.avgVarQuality()<17){return false;}
263 					if(v.numReads<4){return false;}
264 					if(v.numSemiUniqueReads<4){return false;}
265 					if(v.numUniqueReads<2){return false;}
266 					if(v.paired<3){return false;}
267 
268 				}else{
269 					if(v.endDist<8){return false;}
270 					if(v.tailDist<14){return false;}
271 
272 					if(v.errors>0){return false;}
273 					if(v.expectedErrors>0.5f){return false;}
274 //					if(v.expectedErrors-v.errors>2f){return false;}
275 					if(v.maxReadQuality()<20){return false;}
276 					if(v.avgReadQuality()<16){return false;}
277 					if(v.maxVarQuality()<30){return false;}
278 					if(v.avgVarQuality()<20){return false;}
279 					if(v.numReads<6){return false;}
280 					if(v.numSemiUniqueReads<5){return false;}
281 					if(v.numUniqueReads<3){return false;}
282 					if(v.paired<5){return false;}
283 					if(v.score()<6500){return false;}
284 				}
285 
286 
287 
288 
289 
290 		return true;
291 	}
292 
293 
mergeAll(ArrayList<Varlet> vars)294 	public static ArrayList<Varlet> mergeAll(ArrayList<Varlet> vars){
295 		if(vars==null || vars.size()==0){return null;}
296 		ArrayList<Varlet> out=new ArrayList<Varlet>(8+vars.size()/16);
297 		Shared.sort(vars);
298 
299 		ArrayList<Varlet> temp=new ArrayList<Varlet>(64);
300 		for(int i=0; i<vars.size(); i++){
301 //			while(vars.get(i).beginLoc<3746582){i++;}
302 			Varlet v=vars.get(i);
303 //			System.err.println("Grabbed "+v.beginLoc+" ~ "+v.call);
304 			if(temp.isEmpty()){
305 //				System.err.println("Adding "+v.beginLoc+" ~ "+v.call);
306 				temp.add(v);
307 			}else{
308 				if(v.equals(temp.get(0))){
309 					temp.add(v);
310 //					System.err.println("Adding "+v.beginLoc+" ~ "+v.call);
311 				}else{
312 //					System.err.println("Merging "+temp.size()+" x "+v.beginLoc+" ~ "+v.call);
313 					Varlet result=mergeEqualVarlets(temp);
314 					if(result.numReads>MIN_READS_TO_KEEP){
315 						out.add(result);
316 					}else if(result.numReads==MIN_READS_TO_KEEP){
317 						if(result.maxVarQuality()>=MIN_QUALITY_AT_MIN_READS &&
318 								result.errors<=MAX_ERRORS_AT_MIN_READS &&
319 								result.expectedErrors<=MAX_EXPECTED_ERRORS_AT_MIN_READS &&
320 								(result.paired>0 || !REQUIRE_PAIRED_AT_MIN_READS)){
321 							out.add(result);
322 						}
323 					}
324 					temp.clear();
325 					temp.add(v);
326 				}
327 			}
328 
329 
330 		}
331 
332 		if(!temp.isEmpty()){
333 			if(temp.size()>=MIN_READS_TO_KEEP){
334 				Varlet result=mergeEqualVarlets(temp);
335 				out.add(result);
336 			}
337 			temp.clear();
338 		}
339 
340 		{//For testing
341 			Shared.sort(out); //Should already be sorted...
342 			for(int i=1; i<out.size(); i++){
343 				assert(!out.get(i).equals(out.get(i-1)));
344 			}
345 		}
346 
347 
348 		if(verbose){System.err.println("out.size="+out.size());}
349 
350 		return out;
351 	}
352 
353 
mergeEqualVarlets(ArrayList<Varlet> vars)354 	public static Varlet mergeEqualVarlets(ArrayList<Varlet> vars){
355 
356 //		System.err.println("Merging "+vars.size()+" vars.");
357 
358 		if(vars.size()==1){return vars.get(0);}
359 
360 		HashMap<Integer, ArrayList<Varlet>> plus=new HashMap<Integer, ArrayList<Varlet>>(Tools.min(8, vars.size()));
361 		HashMap<Integer, ArrayList<Varlet>> minus=new HashMap<Integer, ArrayList<Varlet>>(Tools.min(8, vars.size()));
362 
363 		int numReads=0;
364 		int numSemiUniqueReads=0;
365 		int numUniqueReads=0;
366 		int pairedReads=0;
367 		int plusReads1=0;
368 		int minusReads1=0;
369 		int plusReads2=0;
370 		int minusReads2=0;
371 
372 		int totalQuality=0;
373 		int totalVarQuality=0;
374 
375 		int maxReadQuality=0;
376 		int maxVarQuality=0;
377 
378 		int maxMapScore=0;
379 		int bestLen=0;
380 		int minReadStart=Integer.MAX_VALUE;
381 		int maxReadStop=-999999;
382 
383 		int maxHeadDist=-1;
384 		int maxTailDist=-1;
385 		int maxEndDist=-1;
386 
387 		Varlet bestVar=null;
388 
389 		int minErrors=999;
390 		float minExpectedErrors=999f;
391 
392 		for(Varlet v : vars){
393 
394 			numReads+=v.numReads;
395 			numSemiUniqueReads+=v.numSemiUniqueReads;
396 			plusReads1+=v.numPlusReads1;
397 			minusReads1+=v.numMinusReads1;
398 			plusReads2+=v.numPlusReads2;
399 			minusReads2+=v.numMinusReads2;
400 
401 			if(v.errors<minErrors || (v.errors<=minErrors && v.maxReadQuality()>maxReadQuality)){
402 				bestVar=v;
403 			}
404 
405 			totalQuality+=v.avgReadQuality()*v.numReads;
406 			maxReadQuality=Tools.max(maxReadQuality, v.maxReadQuality());
407 
408 			totalVarQuality+=v.avgVarQuality()*v.numReads;
409 			maxVarQuality=Tools.max(maxVarQuality, v.maxVarQuality());
410 
411 			if(bestLen==0 || (v.mapScore>=maxMapScore && v.readLen>=bestLen)){
412 				bestLen=v.readLen;
413 			}
414 
415 			maxHeadDist=Tools.max(maxHeadDist, v.headDist);
416 			maxTailDist=Tools.max(maxTailDist, v.tailDist);
417 			maxEndDist=Tools.max(maxEndDist, v.endDist);
418 
419 			minErrors=Tools.min(minErrors, v.errors);
420 			minExpectedErrors=Tools.min(minExpectedErrors, v.expectedErrors);
421 			maxMapScore=Tools.max(maxMapScore, v.mapScore);
422 			minReadStart=Tools.min(minReadStart, v.readStart);
423 			maxReadStop=Tools.max(maxReadStop, v.readStop);
424 			assert(minReadStart<maxReadStop) : "\n"+minReadStart+"\n"+maxReadStop+"\n"+v.toText();
425 
426 			pairedReads+=v.paired;
427 
428 			if(v.strand==Shared.PLUS){
429 				ArrayList<Varlet> value=plus.get(v.readStart);
430 				if(value==null){
431 					numUniqueReads++;
432 					value=new ArrayList<Varlet>(2);
433 					plus.put(v.readStart, value);
434 				}
435 				value.add(v);
436 			}else{
437 				ArrayList<Varlet> value=minus.get(v.readStop);
438 				if(value==null){
439 					numUniqueReads++;
440 					value=new ArrayList<Varlet>(2);
441 					minus.put(v.readStop, value);
442 				}
443 				value.add(v);
444 			}
445 		}
446 
447 //		byte plusReads=(byte) ((plus.isEmpty() ? 0 : 1)+(minus.isEmpty() ? 0 : 1));
448 
449 		float avgVarQuality=totalVarQuality/(float)numReads;
450 		float avgReadQuality=totalQuality/(float)numReads;
451 
452 		int netQuality=(int)Math.ceil((avgVarQuality+maxVarQuality)/2);
453 		int netReadQuality=(int)Math.ceil((avgReadQuality+maxReadQuality)/2);
454 
455 		Varlet v=new Varlet(bestVar.chromosome, ((plusReads1+plusReads2>0) && (minusReads1+minusReads2>0) ? Shared.PLUS : bestVar.strand),
456 				bestVar.beginLoc, bestVar.endLoc, bestVar.matchStart, bestVar.matchStop, bestVar.varType, bestVar.ref, bestVar.call,
457 				netQuality, netReadQuality, maxMapScore, minErrors, minExpectedErrors, pairedReads, bestVar.readID, bestLen,
458 				minReadStart, maxReadStop, numReads, maxHeadDist, maxTailDist, maxEndDist, bestVar.pairNum());
459 
460 
461 		v.setMaxReadQuality(maxReadQuality);
462 		v.setMaxVarQuality(maxVarQuality);
463 		v.setAvgReadQuality((int)Math.ceil(avgReadQuality));
464 		v.setAvgVarQuality((int)Math.ceil(avgVarQuality));
465 
466 		v.numSemiUniqueReads=numSemiUniqueReads;
467 		v.numUniqueReads=numUniqueReads;
468 		v.numPlusReads1=plusReads1;
469 		v.numMinusReads1=minusReads1;
470 		v.numPlusReads2=plusReads2;
471 		v.numMinusReads2=minusReads2;
472 		assert(plusReads1+minusReads1+plusReads2+minusReads2==numSemiUniqueReads);
473 
474 		assert(v.numReads>=v.numSemiUniqueReads);
475 		assert(v.numSemiUniqueReads>=v.numUniqueReads);
476 
477 		//This assertion is only correct if stacking is done from raw, uncombined varlets.
478 		assert(v.numSemiUniqueReads==vars.size()) : "\n"+vars.size()+", "+v.numReads+", "+v.numSemiUniqueReads+", "+v.numUniqueReads
479 			+"\n"+v.toText();
480 
481 		assert(v.numUniqueReads<=v.numReads && v.numUniqueReads>0);
482 		assert(v.numUniqueReads==plus.size()+minus.size()) : "numUniqueReads="+numUniqueReads+
483 		", v.numUniqueReads="+v.numUniqueReads+", v.numReads="+v.numReads
484 		+", plus.size()="+plus.size()+", minus.size()="+minus.size()+"\n"+vars+"\n";
485 
486 		return v;
487 	}
488 
489 
490 	private static class SVThread implements Runnable {
491 
SVThread(String fname1_, String fname2_, final int chrom_, boolean filter_)492 		public SVThread(String fname1_, String fname2_, final int chrom_, boolean filter_){
493 			fname1=fname1_;
494 			fname2=fname2_;
495 			filter=filter_;
496 			chrom=chrom_;
497 		}
498 
499 		@Override
run()500 		public void run() {
501 //			addThread(1);
502 			assert(activeThreads>0);
503 			processFile(fname1, fname2);
504 			addThread(-1);
505 		}
506 
processFile(final String inName, final String outName)507 		private final void processFile(final String inName, final String outName){
508 
509 			final long[] keys=GenerateVarlets2.keys(chrom);
510 			final TextStreamWriter tsw=(inName==null ? null : new TextStreamWriter(outName, true, false, false));
511 			if(tsw!=null){
512 				tsw.start();
513 				tsw.println(Varlet.textHeader());
514 			}
515 
516 			for(final long key : keys){
517 				String blockname=GenerateVarlets2.fname(key, inName);
518 
519 				ArrayList<Varlet> initial=Varlet.fromTextFile(blockname);
520 
521 				for(Varlet v : initial){
522 					if(v.varType==Variation.NOREF){totalInNR++;}
523 					totalIn++;
524 
525 					int dif=v.lengthDif();
526 					deltaLenIn+=dif;
527 				}
528 
529 				if(verbose){System.err.println("Initial:  \t"+initial.size());}
530 
531 				int merged=mergeAll2(initial, tsw);
532 
533 				initial=null;
534 				if(verbose){System.err.println("Merged:   \t"+merged);}
535 
536 			}
537 
538 			if(tsw!=null){
539 				tsw.poison();
540 				if(DELETE_INPUT){
541 					for(int i=0; i<10 && tsw.isAlive(); i++){
542 						try {
543 							tsw.join(10000);
544 						} catch (InterruptedException e) {
545 							// TODO Auto-generated catch block
546 							e.printStackTrace();
547 						}
548 					}
549 					if(tsw.isAlive()){
550 						System.err.println(tsw.getClass().getName()+" for "+outName+" refused to die.");
551 						assert(false);
552 					}
553 				}
554 			}
555 
556 			if(DELETE_INPUT){
557 				for(final long key : keys){
558 					String blockname=GenerateVarlets2.fname(key, inName);
559 //					System.out.println("Deleting "+blockname);
560 					new File(blockname).delete();
561 				}
562 			}
563 		}
564 
565 
566 
567 
568 
mergeAll2(ArrayList<Varlet> vars, TextStreamWriter tsw)569 		private final int mergeAll2(ArrayList<Varlet> vars, TextStreamWriter tsw){
570 			if(vars==null || vars.size()==0){return 0;}
571 
572 			Shared.sort(vars);
573 			int out=0;
574 
575 			ArrayList<Varlet> temp=new ArrayList<Varlet>(64);
576 			for(int i=0; i<vars.size(); i++){
577 //				while(vars.get(i).beginLoc<3746582){i++;}
578 //				Varlet v=vars.get(i);
579 				final Varlet v=vars.set(i, null);
580 //				System.err.println("Grabbed "+v.beginLoc+" ~ "+v.call);
581 				if(temp.isEmpty()){
582 //					System.err.println("Adding "+v.beginLoc+" ~ "+v.call);
583 					temp.add(v);
584 				}else{
585 					if(v.equals(temp.get(0))){
586 						temp.add(v);
587 //						System.err.println("Adding "+v.beginLoc+" ~ "+v.call);
588 					}else{
589 //						System.err.println("Merging "+temp.size()+" x "+v.beginLoc+" ~ "+v.call);
590 						Varlet result=mergeEqualVarlets(temp);
591 
592 						processMergedVar(result, tsw);
593 						out++;
594 
595 						temp.clear();
596 						temp.add(v);
597 					}
598 				}
599 			}
600 
601 			if(!temp.isEmpty()){
602 				if(temp.size()>=MIN_READS_TO_KEEP){
603 					Varlet result=mergeEqualVarlets(temp);
604 					out++;
605 					processMergedVar(result, tsw);
606 				}
607 				temp.clear();
608 			}
609 
610 			return out;
611 		}
612 
613 
processMergedVar(Varlet v, TextStreamWriter tsw)614 		private final boolean processMergedVar(Varlet v, TextStreamWriter tsw){
615 
616 			if(v==null){return false;}
617 			if(v.numReads<MIN_READS_TO_KEEP){return false;}
618 			if(v.numReads==MIN_READS_TO_KEEP){
619 				if(v.maxVarQuality()<MIN_QUALITY_AT_MIN_READS ||
620 						v.errors<=MAX_ERRORS_AT_MIN_READS ||
621 						v.expectedErrors<=MAX_EXPECTED_ERRORS_AT_MIN_READS ||
622 						(v.paired<1 && REQUIRE_PAIRED_AT_MIN_READS)){
623 					return false;
624 				}
625 			}
626 
627 			boolean keep;
628 
629 			if(filter){
630 				keep=filterLight(v);
631 			}else{
632 				keep=true;
633 				totalKept++;
634 				scoreKept+=v.score();
635 			}
636 
637 			if(keep){
638 				StringBuilder sb=v.toText();
639 				sb.append('\n');
640 				tsw.print(sb);
641 			}
642 			return keep;
643 		}
644 
645 
filterLight(Varlet v)646 		private final boolean filterLight(Varlet v){
647 			int dropped=0;
648 
649 			int dif=v.lengthDif();
650 //			deltaLenIn+=dif;
651 
652 			boolean passes=true;
653 			if(v.varType==Variation.NOCALL){
654 				passes=false;
655 			}else if(v.numSemiUniqueReads<2){
656 				passes=false;
657 			}else if(v.endDist<6 || v.tailDist<10){
658 				passes=false;
659 			}else if(v.maxVarQuality()<24){
660 				passes=false;
661 			}else if(v.expectedErrors>2){
662 				passes=false;
663 			}
664 
665 			if(passes && STRICT){
666 				passes=passesFilterLight(v);
667 			}
668 
669 			if(passes){
670 				if(v.varType==Variation.NOREF){totalKeptNR++;}
671 				else if(v.varType==Variation.SNP){snpKept++;}
672 				else if(v.varType==Variation.DEL){
673 					delKept++;
674 					//						delLenKept-=v.lengthRef();
675 					delLenKept+=dif;
676 				}
677 				else if(v.varType==Variation.INS){
678 					insKept++;
679 					//						insLenKept+=v.lengthVar();
680 					insLenKept+=dif;
681 				}
682 				else if(v.varType==Variation.DELINS){
683 					subKept++;
684 					//						subLenKept+=(v.lengthRef()-v.lengthVar());
685 					subLenKept+=dif;
686 				}
687 				totalKept++;
688 				scoreKept+=v.score();
689 				deltaLenKept+=dif;
690 			}else{
691 				if(v.varType==Variation.NOREF){totalDroppedNR++;}
692 				dropped++;
693 				scoreDropped+=v.score();
694 			}
695 
696 			totalDropped+=dropped;
697 			return passes;
698 		}
699 
passesFilterLight(Varlet v)700 		private static boolean passesFilterLight(Varlet v){
701 			if(v.endDist<4){return false;}
702 			if(v.tailDist<10){return false;}
703 
704 			//NOTE!  Last thing I did was make this more strict by adding 1 to all the num reads/unique reads required.
705 			if(v.minStrandReads()>=2){
706 
707 				if(v.errors>2){return false;}
708 				if(v.expectedErrors>1.4f){return false;}
709 				//			if(v.expectedErrors-v.errors>3f){return false;}
710 				if(v.maxReadQuality()<17){return false;}
711 				if(v.avgReadQuality()<13){return false;}
712 				if(v.maxVarQuality()<26){return false;}
713 				if(v.avgVarQuality()<17){return false;}
714 				if(v.numReads<3){return false;}
715 				if(v.numSemiUniqueReads<3){return false;}
716 				if(v.numUniqueReads<2){return false;}
717 //				if(v.paired<3){return false;}
718 				if(v.score()<8200){return false;}
719 
720 			}else if(v.minStrandReads()>=1){
721 				if(v.endDist<7){return false;}
722 				if(v.tailDist<12){return false;}
723 
724 				if(v.errors>2){return false;}
725 				if(v.expectedErrors>1.1f){return false;}
726 				//			if(v.expectedErrors-v.errors>3f){return false;}
727 				if(v.maxReadQuality()<18){return false;}
728 				if(v.avgReadQuality()<14){return false;}
729 				if(v.maxVarQuality()<28){return false;}
730 				if(v.avgVarQuality()<18){return false;}
731 				if(v.numReads<4){return false;}
732 				if(v.numSemiUniqueReads<3){return false;}
733 				if(v.numUniqueReads<2){return false;}
734 //				if(v.paired<3){return false;}
735 				if(v.score()<8020){return false;}
736 			}else{
737 				if(v.endDist<8){return false;}
738 				if(v.tailDist<14){return false;}
739 
740 				if(v.errors>0){return false;}
741 				if(v.expectedErrors>0.5f){return false;}
742 				//			if(v.expectedErrors-v.errors>2f){return false;}
743 				if(v.maxReadQuality()<21){return false;}
744 				if(v.avgReadQuality()<17){return false;}
745 				if(v.maxVarQuality()<30){return false;}
746 				if(v.avgVarQuality()<21){return false;}
747 				if(v.numReads<6){return false;}
748 				if(v.numSemiUniqueReads<5){return false;}
749 				if(v.numUniqueReads<3){return false;}
750 //				if(v.paired<5){return false;}
751 				if(v.score()<7670){return false;}
752 			}
753 			return true;
754 		}
755 
756 		private long deltaLenKept=0;
757 		private long snpKept=0;
758 		private long delKept=0;
759 		private long insKept=0;
760 		private long subKept=0;
761 		private long delLenKept=0;
762 		private long insLenKept=0;
763 		private long subLenKept=0;
764 
765 		private long deltaLenIn=0;
766 		private long totalIn=0;
767 		private long totalInNR=0;
768 
769 		private long totalKept=0;
770 		private long totalKeptNR=0;
771 		private long totalDropped=0;
772 		private long totalDroppedNR=0;
773 		private long scoreKept=0;
774 		private long scoreDropped=0;
775 
776 		private final String fname1;
777 		private final String fname2;
778 		private final boolean filter;
779 		private final int chrom;
780 	}
781 
addThread(int x)782 	private static int addThread(int x){
783 		synchronized(THREADLOCK){
784 			while(x>0 && activeThreads>=THREADS){
785 				try {
786 					THREADLOCK.wait(200);
787 				} catch (InterruptedException e) {
788 					// TODO Auto-generated catch block
789 					e.printStackTrace();
790 				}
791 			}
792 			activeThreads+=x;
793 			return activeThreads;
794 		}
795 	}
796 
797 
798 	public static long deltaLenKept_global=0;
799 	public static long deltaLenIn_global=0;
800 
801 	public static long snpKept_global=0;
802 	public static long delKept_global=0;
803 	public static long insKept_global=0;
804 	public static long subKept_global=0;
805 	public static long delLenKept_global=0;
806 	public static long insLenKept_global=0;
807 	public static long subLenKept_global=0;
808 
809 	public static long totalIn_global=0;
810 	public static long totalInNR_global=0;
811 	public static long totalKept_global=0;
812 	public static long totalDropped_global=0;
813 	public static long totalKeptNR_global=0;
814 	public static long totalDroppedNR_global=0;
815 	public static long scoreKept_global=0;
816 	public static long scoreDropped_global=0;
817 
818 	private static int activeThreads=0;
819 
820 	private static final String THREADLOCK=new String("THREADLOCK");
821 	private static int THREADS=7;
822 	private static boolean DELETE_INPUT=false;
823 	public static int MIN_READS_TO_KEEP=1;
824 	public static final int MIN_QUALITY_AT_MIN_READS=14;
825 	public static final int MAX_ERRORS_AT_MIN_READS=2;
826 	public static final int MAX_EXPECTED_ERRORS_AT_MIN_READS=4;
827 	public static final boolean REQUIRE_PAIRED_AT_MIN_READS=false;
828 	public static boolean STRICT=false;
829 	public static boolean VSTRICT=false;
830 	public static boolean USTRICT=false;
831 
832 	public static final boolean verbose=false;
833 }
834