1 package jgi;
2 
3 import java.io.File;
4 import java.io.PrintStream;
5 import java.io.Serializable;
6 import java.util.ArrayDeque;
7 import java.util.ArrayList;
8 import java.util.Arrays;
9 import java.util.Comparator;
10 import java.util.HashMap;
11 import java.util.HashSet;
12 import java.util.LinkedHashMap;
13 import java.util.Locale;
14 import java.util.PriorityQueue;
15 import java.util.Random;
16 import java.util.concurrent.atomic.AtomicIntegerArray;
17 
18 import align2.BandedAligner;
19 import dna.AminoAcid;
20 import fileIO.ByteFile1;
21 import fileIO.ByteFile2;
22 import fileIO.ByteStreamWriter;
23 import fileIO.FileFormat;
24 import fileIO.ReadWrite;
25 import fileIO.TextStreamWriter;
26 import shared.Parse;
27 import shared.Parser;
28 import shared.PreParser;
29 import shared.ReadStats;
30 import shared.Shared;
31 import shared.Timer;
32 import shared.Tools;
33 import shared.TrimRead;
34 import sort.ReadComparator;
35 import sort.ReadComparatorID;
36 import sort.ReadComparatorName;
37 import sort.ReadLengthComparator;
38 import sort.ReadQualityComparator;
39 import stream.ConcurrentCollectionReadInputStream;
40 import stream.ConcurrentGenericReadInputStream;
41 import stream.ConcurrentReadInputStream;
42 import stream.ConcurrentReadOutputStream;
43 import stream.FASTQ;
44 import stream.FastaReadInputStream;
45 import stream.FastqReadInputStream;
46 import stream.Read;
47 import structures.ListNum;
48 import structures.LongM;
49 
50 /**
51  * @author Brian Bushnell
52  * @date Jul 18, 2013
53  *
54  */
55 public final class Dedupe2 {
56 
main(String[] args)57 	public static void main(String[] args){
58 		int rbl0=Shared.bufferLen();
59 		Shared.setBufferLen(16);
60 
61 		Dedupe2 dd=new Dedupe2(args);
62 		dd.process();
63 
64 		Shared.setBufferLen(rbl0);
65 
66 		//Close the print stream if it was redirected
67 		Shared.closeStream(outstream);
68 	}
69 
Dedupe2(String[] args)70 	public Dedupe2(String[] args){
71 
72 		{//Preparse block for help, config files, and outstream
73 			PreParser pp=new PreParser(args, getClass(), true);
74 			args=pp.args;
75 			outstream=pp.outstream;
76 		}
77 
78 		ReadWrite.ZIPLEVEL=2;
79 		ReadWrite.USE_UNPIGZ=ReadWrite.USE_PIGZ=true;
80 		ReadWrite.MAX_ZIP_THREADS=Shared.threads();
81 		Read.TO_UPPER_CASE=true;
82 
83 		boolean setMcsfs=false;
84 		int bandwidth_=-1;
85 		int k_=31;
86 		int subset_=0, subsetCount_=1;
87 		boolean ascending=false;
88 		Parser parser=new Parser();
89 
90 		for(int i=0; i<args.length; i++){
91 
92 			final String arg=args[i];
93 			String[] split=arg.split("=");
94 			String a=split[0].toLowerCase();
95 			String b=split.length>1 ? split[1] : null;
96 
97 			if(Parser.parseZip(arg, a, b)){
98 				//do nothing
99 			}else if(Parser.parseCommonStatic(arg, a, b)){
100 				//do nothing
101 			}else if(Parser.parseQuality(arg, a, b)){
102 				//do nothing
103 			}else if(Parser.parseFasta(arg, a, b)){
104 				//do nothing
105 			}else if(parser.parseQTrim(arg, a, b)){
106 				//do nothing
107 			}else if(parser.parseInterleaved(arg, a, b)){
108 				//do nothing
109 			}else if(a.equals("in") || a.equals("in1")){
110 				assert(b!=null) : "Bad parameter: "+arg;
111 				if(b.indexOf(',')>=0 && !new File(b).exists()){
112 					in1=b.split(",");
113 				}else{
114 					in1=new String[] {b};
115 				}
116 			}else if(a.equals("in2")){
117 				assert(b!=null) : "Bad parameter: "+arg;
118 				if(b.indexOf(',')>=0 && !new File(b).exists()){
119 					in2=b.split(",");
120 				}else{
121 					in2=new String[] {b};
122 				}
123 			}else if(a.equals("out")){
124 				out=b;
125 			}else if(a.equals("clusterfilepattern") || a.equals("pattern")){
126 				clusterFilePattern=b;
127 				assert(clusterFilePattern==null || clusterFilePattern.contains("%")) : "pattern must contain the % symbol.";
128 			}else if(a.equals("outbest")){
129 				outbest=b;
130 			}else if(a.equals("outd") || a.equals("outduplicate")){
131 				outdupe=b;
132 			}else if(a.equals("csf") || a.equals("clusterstatsfile")){
133 				outcsf=b;
134 			}else if(a.equals("dot") || a.equals("graph") || a.equals("outdot") || a.equals("outgraph")){
135 				outgraph=b;
136 			}else if(a.equals("mcsfs") || a.equals("minclustersizeforstats")){
137 				minClusterSizeForStats=Integer.parseInt(b);
138 			}else if(a.equals("mcs") || a.equals("minclustersize")){
139 				minClusterSize=Integer.parseInt(b);
140 				if(!setMcsfs){
141 					minClusterSizeForStats=minClusterSize;
142 				}
143 			}else if(a.equals("append") || a.equals("app")){
144 				append=ReadStats.append=Parse.parseBoolean(b);
145 			}else if(a.equals("overwrite") || a.equals("ow")){
146 				overwrite=Parse.parseBoolean(b);
147 			}else if(a.equals("sort")){
148 				if(b==null){sort=true;}
149 				else if(b.equalsIgnoreCase("a") || b.equalsIgnoreCase("ascending")){
150 					sort=true;
151 					ascending=true;
152 				}else if(b.equalsIgnoreCase("d") || b.equalsIgnoreCase("descending")){
153 					sort=true;
154 					ascending=false;
155 				}else if(b.equalsIgnoreCase("length")){
156 					sort=true;
157 					comparator=ReadLengthComparator.comparator;
158 				}else if(b.equalsIgnoreCase("quality")){
159 					sort=true;
160 					comparator=ReadQualityComparator.comparator;
161 				}else if(b.equalsIgnoreCase("name")){
162 					sort=true;
163 					comparator=ReadComparatorName.comparator;
164 				}else if(b.equalsIgnoreCase("id")){
165 					sort=true;
166 					comparator=ReadComparatorID.comparator;
167 				}else{
168 					sort=Parse.parseBoolean(b);
169 				}
170 			}else if(a.equals("ascending")){
171 				ascending=Parse.parseBoolean(b);
172 			}else if(a.equals("ordered")){
173 				boolean x=Parse.parseBoolean(b);
174 				if(x){
175 					ascending=true;
176 					sort=true;
177 					comparator=ReadComparatorID.comparator;
178 				}else{
179 					//do nothing
180 				}
181 			}else if(a.equals("arc") || a.equals("absorbrc") || a.equals("trc") || a.equals("testrc")){
182 				ignoreReverseComplement=!Parse.parseBoolean(b);
183 			}else if(a.equals("ac") || a.equals("absorbcontainment") || a.equals("absorbcontainments") || a.equals("tc") || a.equals("testcontainment") || a.equals("containment")){
184 				absorbContainment=Parse.parseBoolean(b);
185 			}else if(a.equals("am") || a.equals("absorbmatch") || a.equals("absorbmatches") || a.equals("tm") || a.equals("testmatch")){
186 				absorbMatch=Parse.parseBoolean(b);
187 			}else if(a.equals("ao") || a.equals("absorboverlap") || a.equals("absorboverlaps") || a.equals("to") || a.equals("testoverlap")){
188 				absorbOverlap=Parse.parseBoolean(b);
189 			}else if(a.equals("fo") || a.equals("findoverlap") || a.equals("findoverlaps")){
190 				findOverlaps=Parse.parseBoolean(b);
191 			}else if(a.equals("c") || a.equals("cluster") || a.equals("clusters")){
192 				makeClusters=Parse.parseBoolean(b);
193 			}else if(a.equals("fmj") || a.equals("fixmultijoin") || a.equals("fixmultijoins")){
194 				fixMultiJoins=Parse.parseBoolean(b);
195 			}else if(a.equals("fcc") || a.equals("fixcanoncontradiction") || a.equals("fixcanoncontradictions")){
196 				fixCanonContradictions=Parse.parseBoolean(b);
197 			}else if(a.equals("foc") || a.equals("fixoffsetcontradiction") || a.equals("fixoffsetcontradictions")){
198 				fixOffsetContradictions=Parse.parseBoolean(b);
199 			}else if(a.equals("pto") || a.equals("preventtransitiveoverlap") || a.equals("preventtransitiveoverlaps")){
200 				preventTransitiveOverlaps=Parse.parseBoolean(b);
201 			}else if(a.equals("pbr") || a.equals("pickbestrepresentative")){
202 				pickBestRepresentativePerCluster=Parse.parseBoolean(b);
203 			}else if(a.equals("mst") || a.equals("maxspanningtree")){
204 				maxSpanningTree=Parse.parseBoolean(b);
205 			}else if(a.equals("cc") || a.equals("canonicizecluster") || a.equals("canonicizeclusters")){
206 				canonicizeClusters=Parse.parseBoolean(b);
207 			}else if(a.equals("pc") || a.equals("processcluster") || a.equals("processclusters")){
208 				processClusters=Parse.parseBoolean(b);
209 			}else if(a.equals("rnc") || a.equals("renamecluster") || a.equals("renameclusters")){
210 				renameClusters=Parse.parseBoolean(b);
211 				if(renameClusters){storeName=false;}
212 			}else if(a.equals("rc") || a.equals("removecycles") || a.equals("removecycle")){
213 				removeCycles=Parse.parseBoolean(b);
214 			}else if(a.equals("uo") || a.equals("uniqueonly")){
215 				UNIQUE_ONLY=Parse.parseBoolean(b);
216 			}else if(a.equals("rmn") || a.equals("requirematchingnames")){
217 				REQUIRE_MATCHING_NAMES=Parse.parseBoolean(b);
218 			}else if(a.equals("ngn") || a.equals("numbergraphnodes")){
219 				NUMBER_GRAPH_NODES=Parse.parseBoolean(b);
220 			}else if(a.equals("addpairnum")){
221 				ADD_PAIRNUM_TO_NAME=Parse.parseBoolean(b);
222 			}else if(a.equals("hashns")){
223 				HASH_NS=Parse.parseBoolean(b);
224 			}else if(a.equals("pn") || a.equals("prefixname")){
225 //				PREFIX_NAME=Parse.parseBoolean(b);
226 			}else if(a.equals("k")){
227 				k_=Integer.parseInt(b);
228 				assert(k_>0 && k_<32) : "k must be between 1 and 31; default is 31, and lower values are slower.";
229 			}else if(a.equals("minscaf") || a.equals("ms")){
230 				MINSCAF=FastaReadInputStream.MIN_READ_LEN=Integer.parseInt(b);
231 			}else if(a.equals("mlp") || a.equals("minlengthpercent")){
232 				minLengthPercent=Float.parseFloat(b);
233 			}else if(a.equals("mop") || a.equals("minoverlappercent")){
234 				minOverlapPercentCluster=minOverlapPercentMerge=Float.parseFloat(b);
235 			}else if(a.equals("mopc") || a.equals("minoverlappercentcluster")){
236 				minOverlapPercentCluster=Float.parseFloat(b);
237 			}else if(a.equals("mopm") || a.equals("minoverlappercentmerge")){
238 				minOverlapPercentMerge=Float.parseFloat(b);
239 			}else if(a.equals("mo") || a.equals("minoverlap")){
240 				minOverlapCluster=minOverlapMerge=Integer.parseInt(b);
241 			}else if(a.equals("moc") || a.equals("minoverlapcluster")){
242 				minOverlapCluster=Integer.parseInt(b);
243 			}else if(a.equals("mom") || a.equals("minoverlapmerge")){
244 				minOverlapMerge=Integer.parseInt(b);
245 			}else if(a.equals("rt") || a.equals("rigoroustransitive")){
246 				rigorousTransitive=Parse.parseBoolean(b);
247 			}else if(a.equals("e") || a.equals("maxedits") || a.equals("edits") || a.equals("edist")){
248 				maxEdits=Integer.parseInt(b);
249 			}else if(a.equals("s") || a.equals("maxsubs") || a.equals("maxsubstitutions") || a.equals("hdist")){
250 				maxSubs=Integer.parseInt(b);
251 			}else if(a.equals("bw") || a.equals("bandwidth")){
252 				bandwidth_=Integer.parseInt(b);
253 			}else if(a.equals("mid") || a.equals("minidentity")){
254 				minIdentity=Float.parseFloat(b);
255 				minIdentityMult=(minIdentity==100f ? 0 : (100f-minIdentity)/100f);
256 			}else if(a.equals("threads") || a.equals("t")){
257 				THREADS=(b==null || b.equalsIgnoreCase("auto") ? Shared.threads() : Integer.parseInt(b));
258 			}else if(a.equals("showspeed") || a.equals("ss")){
259 				showSpeed=Parse.parseBoolean(b);
260 			}else if(a.equals("verbose")){
261 				verbose=Parse.parseBoolean(b);
262 //				BandedAligner.verbose=verbose;
263 			}else if(a.equals("contigbreak") || (arg.contains("=") && (a.equals("n") || a.equals("-n")))){
264 				maxNs=Integer.parseInt(b);
265 			}else if(a.equals("reads") || a.startsWith("maxreads")){
266 				maxReads=Parse.parseKMG(b);
267 			}else if(a.equals("sn") || a.equals("storename") || a.equals("storenames") || a.equals("keepnames")){
268 				storeName=Parse.parseBoolean(b);
269 			}else if(a.equals("ssx") || a.equals("storesuffix") || a.equals("storesuffixes")){
270 				storeSuffix=Parse.parseBoolean(b);
271 			}else if(a.equals("numaffixmaps") || a.equals("nam")){
272 				numAffixMaps=Integer.parseInt(b);
273 			}else if(a.equals("mac") || a.equals("maxaffixcopies")){
274 				maxAffixCopies=Integer.parseInt(b);
275 			}else if(a.equals("me") || a.equals("maxedges")){
276 				maxEdges=Integer.parseInt(b);
277 				maxEdges2=maxEdges*2;
278 				if(maxEdges2<1){maxEdges2=Integer.MAX_VALUE-1;}
279 			}else if(a.equals("ignoreaffix1") || a.equals("ia1")){
280 				ignoreAffix1=Parse.parseBoolean(b);
281 			}else if(a.equals("parsedepth") || a.equals("pd")){
282 				parseDepth=Parse.parseBoolean(b);
283 			}else if(a.equals("printlengthinedges") || a.equals("ple")){
284 				printLengthInEdges=Parse.parseBoolean(b);
285 			}else if(a.equals("depthmult") || a.equals("depthratio") || a.equals("dr")){
286 				depthRatio=Float.parseFloat(b);
287 				if(depthRatio<=0){
288 					parseDepth=false;
289 				}else{
290 					parseDepth=true;
291 					assert(depthRatio>0);
292 					if(depthRatio<1){depthRatio=1/depthRatio;}
293 				}
294 			}else if(a.equals("storequality") || a.equals("sq")){
295 				storeQuality=Parse.parseBoolean(b);
296 			}else if(a.equals("exact") || a.equals("ex")){
297 				exact=Parse.parseBoolean(b);
298 			}else if(a.equals("uniquenames") || a.equals("un")){
299 				uniqueNames=Parse.parseBoolean(b);
300 			}else if(a.equals("ftl") || a.equals("forcetrimleft")){
301 				forceTrimLeft=Integer.parseInt(b);
302 			}else if(a.equals("ftr") || a.equals("forcetrimright")){
303 				forceTrimRight=Integer.parseInt(b);
304 			}else if(a.equals("subset") || a.equals("sst")){
305 				subset_=Integer.parseInt(b);
306 			}else if(a.equals("subsets") || a.equals("subsetcount") || a.equals("sstc")){
307 				subsetCount_=Integer.parseInt(b);
308 			}else if(i==0 && in1==null && arg.indexOf('=')<0 && arg.lastIndexOf('.')>0){
309 				String c=args[i];
310 				if(c.indexOf(',')>=0 && !new File(c).exists()){
311 					in1=c.split(",");
312 				}else{
313 					in1=new String[] {c};
314 				}
315 			}else if(i==1 && out==null && arg.indexOf('=')<0 && arg.lastIndexOf('.')>0){
316 				out=args[i];
317 			}else{
318 				throw new RuntimeException("Unknown parameter "+args[i]);
319 			}
320 		}
321 
322 		comparator.setAscending(ascending);
323 
324 		{//Process parser fields
325 			Parser.processQuality();
326 			qTrimLeft=parser.qtrimLeft;
327 			qTrimRight=parser.qtrimRight;
328 			trimQ=parser.trimq;
329 			trimE=parser.trimE();
330 		}
331 
332 		if(verbose){
333 			ReadWrite.verbose=ConcurrentGenericReadInputStream.verbose=ConcurrentReadOutputStream.verbose=ByteFile1.verbose=ByteFile2.verbose=FastqReadInputStream.verbose=true;
334 		}
335 //		verbose=false;
336 
337 		k=k_;
338 		k2=k-1;
339 		subset=subset_;
340 		subsetCount=subsetCount_;
341 		subsetMode=subsetCount>1;
342 		assert(subset>=0 && subset<subsetCount) : "subset="+subset+", subsetCount="+subsetCount;
343 
344 		BandedAligner.penalizeOffCenter=true;
345 
346 		if(maxSpanningTree){removeCycles=fixMultiJoins=false;}
347 		if(absorbOverlap){processClusters=true;}
348 		if(processClusters || renameClusters || maxSpanningTree){makeClusters=true;}
349 		if(makeClusters){findOverlaps=true;}
350 		if(renameClusters){uniqueNames=/*storeName=*/false;}
351 
352 		if(bandwidth_>-1){
353 			bandwidth=Tools.min(bandwidth_, 2*maxEdits+1);
354 			customBandwidth=(bandwidth<2*maxEdits+1);
355 		}else{
356 			bandwidth=2*maxEdits+1;
357 			customBandwidth=false;
358 		}
359 		maxSubs=Tools.max(maxSubs, maxEdits);
360 		if(maxSubs>0 || minIdentity<100 || findOverlaps){storeSuffix=true;}
361 
362 		assert(FastaReadInputStream.settingsOK());
363 
364 		if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
365 
366 		for(int i=0; i<in1.length; i++){
367 			if(in1[i].equalsIgnoreCase("stdin") && !new File(in1[i]).exists()){in1[i]="stdin.fa";}
368 		}
369 
370 //		assert(false) : Arrays.toString(in);
371 
372 //		if(!setOut && clusterFilePattern==null){out="stdout.fa";}
373 //		else
374 //			if("stdout".equalsIgnoreCase(out) || "standarddout".equalsIgnoreCase(out)){
375 //			out="stdout.fa";
376 //			outstream=System.err;
377 //		}
378 		if(!Tools.canWrite(out, overwrite)){throw new RuntimeException("Output file "+out+" already exists, and overwrite="+overwrite);}
379 
380 		for(int i=0; i<in1.length; i++){
381 			assert(!in1[i].equalsIgnoreCase(out));
382 		}
383 //		assert(false) : "\nabsorbContainment="+absorbContainment+", findOverlaps="+findOverlaps+", absorbOverlap="+absorbOverlap+"\n"+
384 //			"processClusters="+processClusters+", renameClusters="+renameClusters+", makeClusters="+makeClusters+", uniqueNames="+uniqueNames+", storeName="+storeName;
385 		if(absorbContainment || findOverlaps){
386 //			assert(false);
387 			affixMaps=new HashMap[numAffixMaps];
388 			for(int i=0; i<numAffixMaps; i++){
389 				affixMaps[i]=new HashMap<LongM, ArrayList<Unit>>(4000000);
390 			}
391 		}
392 //		assert(false) : absorbContainment+", "+(affixMap==null);
393 
394 		if(outdupe==null){
395 			dupeWriter=null;
396 		}else{
397 			FileFormat ff=FileFormat.testOutput(outdupe, FileFormat.FASTA, null, true, overwrite, append, false);
398 			dupeWriter=new ByteStreamWriter(ff);
399 		}
400 	}
401 
process()402 	public void process(){
403 
404 		Timer t=new Timer();
405 
406 		boolean dq0=FASTQ.DETECT_QUALITY;
407 		boolean ti0=FASTQ.TEST_INTERLEAVED;
408 
409 		process2();
410 
411 		FASTQ.DETECT_QUALITY=dq0;
412 		FASTQ.TEST_INTERLEAVED=ti0;
413 
414 		t.stop();
415 
416 		if(showSpeed){
417 			outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8));
418 		}
419 
420 		if(errorState){
421 			throw new RuntimeException("Dedupe2 terminated in an error state; the output may be corrupt.");
422 		}
423 	}
424 
process2()425 	public void process2(){
426 		if(dupeWriter!=null){dupeWriter.start();}
427 //		assert(false) : out;
428 		Timer t=new Timer();
429 
430 		if(DISPLAY_PROGRESS){
431 			outstream.println("Initial:");
432 			Shared.printMemory();
433 			outstream.println();
434 		}
435 
436 		processMatches(t);
437 
438 		forceTrimLeft=forceTrimRight=-1;
439 		qTrimLeft=qTrimRight=false;
440 
441 		if(absorbContainment){
442 			processContainments(t);
443 		}
444 
445 		if(dupeWriter!=null){dupeWriter.poisonAndWait();}
446 
447 		if(findOverlaps){
448 			findOverlaps(t);
449 
450 			killAffixMaps();
451 
452 			if(processClusters || renameClusters || maxSpanningTree){codeMap=null;}
453 
454 			if(maxSpanningTree){
455 				processMst(t);
456 			}
457 
458 			if(processClusters){
459 				processClusters(t, absorbOverlap);
460 			}
461 //			if(renameClusters){
462 //				renameClusters(t);
463 //			}
464 //			assert(false) : (codeMap==null)+", "+(affixMap1==null)+", "+processedClusters;
465 		}
466 
467 		outstream.println("Input:                  \t"+readsProcessed+" reads \t\t"+basesProcessed+" bases.");
468 
469 		if(absorbMatch){
470 			outstream.println("Duplicates:             \t"+matches+" reads ("+String.format(Locale.ROOT, "%.2f",matches*100.0/readsProcessed)+"%) \t"+
471 					baseMatches+" bases ("+String.format(Locale.ROOT, "%.2f",baseMatches*100.0/basesProcessed)+"%)     \t"+collisions+" collisions.");
472 		}
473 		if(absorbContainment){
474 			outstream.println("Containments:           \t"+containments+" reads ("+String.format(Locale.ROOT, "%.2f",containments*100.0/readsProcessed)+"%) \t"+
475 					baseContainments+" bases ("+String.format(Locale.ROOT, "%.2f",baseContainments*100.0/basesProcessed)+"%)    \t"+containmentCollisions+" collisions.");
476 		}
477 		if(findOverlaps){
478 			outstream.println("Overlaps:               \t"+overlaps+" reads ("+String.format(Locale.ROOT, "%.2f",overlaps*100.0/readsProcessed)+"%) \t"+
479 					baseOverlaps+" bases ("+String.format(Locale.ROOT, "%.2f",baseOverlaps*100.0/basesProcessed)+"%)    \t"+overlapCollisions+" collisions.");
480 		}
481 //		outstream.println("Result:                 \t"+(addedToMain-containments)+" reads \t\t"+(basesProcessed-baseMatches-baseContainments)+" bases.");
482 
483 		long outReads=(addedToMain-containments);
484 		if(UNIQUE_ONLY){outReads=readsProcessed-matches-containments;}
485 		long outBases=(basesProcessed-baseMatches-baseContainments);
486 		outstream.println("Result:                 \t"+outReads+" reads ("+String.format(Locale.ROOT, "%.2f",outReads*100.0/readsProcessed)+"%) \t"+
487 				outBases+" bases ("+String.format(Locale.ROOT, "%.2f",outBases*100.0/basesProcessed)+"%)");
488 
489 		outstream.println("");
490 
491 		if(out!=null || clusterFilePattern!=null || outbest!=null || outgraph!=null || outcsf!=null){
492 			writeOutput(outcsf, t);
493 		}
494 
495 	}
496 
killAffixMaps()497 	private void killAffixMaps(){
498 		if(affixMaps==null){return;}
499 		for(int i=0; i<numAffixMaps; i++){
500 			if(affixMaps[i]!=null){affixMaps[i].clear();}
501 			affixMaps[i]=null;
502 		}
503 		affixMaps=null;
504 	}
505 
makeCrisArray(ArrayList<Read> list)506 	private ConcurrentReadInputStream[] makeCrisArray(ArrayList<Read> list){
507 		final ConcurrentReadInputStream[] array;
508 
509 		if(list!=null){
510 			array=new ConcurrentReadInputStream[] {new ConcurrentCollectionReadInputStream(list, null, -1)};
511 			array[0].start(); //This deadlocks if ConcurrentReadInputStream extends Thread rather than spawning a new thread.
512 		}else{
513 			array=new ConcurrentReadInputStream[in1.length];
514 			multipleInputFiles=array.length>1;
515 			for(int i=0; i<in1.length; i++){
516 				if(verbose){System.err.println("Creating cris for "+in1[i]);}
517 
518 				final ConcurrentReadInputStream cris;
519 				{
520 					FileFormat ff1=FileFormat.testInput(in1[i], FileFormat.FASTA, null, !multipleInputFiles || ReadWrite.USE_UNPIGZ, true);
521 					FileFormat ff2=(in2==null || in2.length<=i ? null : FileFormat.testInput(in2[i], FileFormat.FASTA, null, !multipleInputFiles || ReadWrite.USE_UNPIGZ, true));
522 					cris=ConcurrentReadInputStream.getReadInputStream(maxReads, ff1.samOrBam(), ff1, ff2);
523 					cris.start();
524 					if(cris.paired()){
525 						THREADS=1;//Temp fix for losing reads when multithreaded and paired
526 						if(absorbContainment){
527 							System.err.println("Set absorbContainment to false because it is not currently supported for paired reads.");
528 							absorbContainment=false;
529 						}
530 					}
531 				}
532 				array[i]=cris;
533 			}
534 		}
535 		return array;
536 	}
537 
processMatches(Timer t)538 	private void processMatches(Timer t){
539 		crisa=makeCrisArray(null);
540 
541 		ArrayList<HashThread> alht=new ArrayList<HashThread>(THREADS);
542 		for(int i=0; i<THREADS; i++){alht.add(new HashThread(true, (absorbContainment|findOverlaps), absorbMatch, false, false));}
543 		for(HashThread ht : alht){ht.start();}
544 		for(HashThread ht : alht){
545 			while(ht.getState()!=Thread.State.TERMINATED){
546 				try {
547 					ht.join();
548 				} catch (InterruptedException e) {
549 					// TODO Auto-generated catch block
550 					e.printStackTrace();
551 				}
552 			}
553 			matches+=ht.matchesT;
554 			collisions+=ht.collisionsT;
555 			containments+=ht.containmentsT;
556 			containmentCollisions+=ht.containmentCollisionsT;
557 			baseContainments+=ht.baseContainmentsT;
558 			baseMatches+=ht.baseMatchesT;
559 			addedToMain+=ht.addedToMainT;
560 			readsProcessed+=ht.readsProcessedT;
561 			basesProcessed+=ht.basesProcessedT;
562 		}
563 		alht.clear();
564 
565 		if(verbose){System.err.println("Attempting to close input streams (1).");}
566 		for(ConcurrentReadInputStream cris : crisa){
567 			errorState|=ReadWrite.closeStream(cris);
568 		}
569 		crisa=null;
570 
571 		if(DISPLAY_PROGRESS){
572 			t.stop();
573 			outstream.println("Found "+matches+" duplicates.");
574 			outstream.println("Finished exact matches.    Time: "+t);
575 			Shared.printMemory();
576 			if(verbose){outstream.println(affixMaps[0]);}
577 			outstream.println();
578 			t.start();
579 		}
580 	}
581 
processContainments(Timer t)582 	private void processContainments(Timer t){
583 		ArrayList<Read> list=new ArrayList<Read>((int)addedToMain);
584 		for(ArrayList<Unit> alu : codeMap.values()){
585 			for(Unit u : alu){
586 				assert(u.r.mate==null) : "Containments are not currently supported with paired reads.";
587 				if(u.valid() && u.r.pairnum()==0){list.add(u.r);}
588 			}
589 		}
590 
591 		//	if(minLengthPercent>0){
592 		//		if(verbose){System.err.println("Sorting.");}
593 		//		Shared.sort(list, ReadLengthComparator.comparator);
594 		//		Collections.reverse(list);
595 		//		assert(list.isEmpty() || list.get(0).length()<=list.get(list.size()-1).length()) :
596 		//			list.get(0).length()+", "+list.get(list.size()-1).length();
597 		//	}
598 
599 		crisa=makeCrisArray(subsetMode ? null : list);
600 
601 		ArrayList<HashThread> alht=new ArrayList<HashThread>(THREADS);
602 		for(int i=0; i<THREADS; i++){alht.add(new HashThread(false, false, false, true, false));}
603 
604 		for(HashThread ht : alht){ht.start();}
605 		for(HashThread ht : alht){
606 			while(ht.getState()!=Thread.State.TERMINATED){
607 				try {
608 					ht.join();
609 				} catch (InterruptedException e) {
610 					// TODO Auto-generated catch block
611 					e.printStackTrace();
612 				}
613 			}
614 			assert(ht.matchesT==0);
615 			assert(ht.collisionsT==0);
616 			assert(ht.baseMatchesT==0);
617 			assert(ht.addedToMainT==0);
618 //			assert(ht.readsProcessedT==0);
619 //			assert(ht.basesProcessedT==0);
620 			//		matches+=ht.matchesT;
621 			//		collisions+=ht.collisionsT;
622 			containments+=ht.containmentsT;
623 			containmentCollisions+=ht.containmentCollisionsT;
624 			baseContainments+=ht.baseContainmentsT;
625 			//		baseMatches+=ht.baseMatchesT;
626 			//		addedToMain+=ht.addedToMainT;
627 			//		readsProcessed+=ht.readsProcessedT;
628 			//		basesProcessed+=ht.basesProcessedT;
629 		}
630 		alht.clear();
631 		if(verbose){System.err.println("Attempting to close input streams (2).");}
632 		for(ConcurrentReadInputStream cris : crisa){
633 			errorState|=ReadWrite.closeStream(cris);
634 		}
635 
636 		if(DISPLAY_PROGRESS){
637 			t.stop();
638 			outstream.println("Found "+containments+" contained sequences.");
639 			outstream.println("Finished containment.      Time: "+t);
640 			Shared.printMemory();
641 			outstream.println();
642 			t.start();
643 		}
644 		crisa=null;
645 		if(!findOverlaps){
646 			killAffixMaps();
647 		}
648 
649 		long x=removeInvalid(list);
650 		list.clear();
651 
652 		if(DISPLAY_PROGRESS){
653 			t.stop();
654 			outstream.println("Removed "+x+" invalid entries.");
655 			outstream.println("Finished invalid removal.  Time: "+t);
656 			Shared.printMemory();
657 			outstream.println();
658 			t.start();
659 		}
660 	}
661 
findOverlaps(Timer t)662 	private void findOverlaps(Timer t){
663 
664 		ArrayList<Read> list=new ArrayList<Read>((int)addedToMain);
665 		for(ArrayList<Unit> alu : codeMap.values()){
666 			for(Unit u : alu){
667 				if(u.valid() && u.r.pairnum()==0){
668 					u.unitID=list.size();
669 					list.add(u.r);
670 					if(u.r.mate!=null){
671 						Unit u2=(Unit)u.r.mate.obj;
672 						u2.unitID=u.unitID;
673 					}
674 				}else{
675 					u.unitID=Integer.MAX_VALUE;
676 				}
677 			}
678 		}
679 
680 		if(preventTransitiveOverlaps){
681 			clusterNumbers=new AtomicIntegerArray(list.size());
682 			for(int i=0; i<clusterNumbers.length(); i++){
683 				clusterNumbers.set(i, i);
684 			}
685 		}
686 
687 		crisa=makeCrisArray(subsetMode ? null : list);
688 
689 		ArrayList<HashThread> alht=new ArrayList<HashThread>(THREADS);
690 		for(int i=0; i<THREADS; i++){alht.add(new HashThread(false, false, false, false, true));}
691 
692 		for(HashThread ht : alht){ht.start();}
693 		for(HashThread ht : alht){
694 			while(ht.getState()!=Thread.State.TERMINATED){
695 				try {
696 					ht.join();
697 				} catch (InterruptedException e) {
698 					// TODO Auto-generated catch block
699 					e.printStackTrace();
700 				}
701 			}
702 			assert(ht.matchesT==0);
703 			assert(ht.collisionsT==0);
704 			assert(ht.baseMatchesT==0);
705 			assert(ht.addedToMainT==0);
706 
707 			overlaps+=ht.overlapsT;
708 			baseOverlaps+=ht.baseOverlapsT;
709 			overlapCollisions+=ht.overlapCollisionsT;
710 		}
711 		alht.clear();
712 		if(verbose){System.err.println("Attempting to close input streams (3).");}
713 		for(ConcurrentReadInputStream cris : crisa){
714 			errorState|=ReadWrite.closeStream(cris);
715 		}
716 
717 		if(DISPLAY_PROGRESS){
718 			t.stop();
719 			outstream.println("Found "+overlaps+" overlaps.");
720 			outstream.println("Finished finding overlaps. Time: "+t);
721 			Shared.printMemory();
722 			outstream.println();
723 			t.start();
724 		}
725 
726 		crisa=null;
727 
728 		if(makeClusters){
729 			int intransitive=0, redundant=0;
730 			assert((intransitive=countIntransitive(t, list, rigorousTransitive))==0);
731 			assert((redundant=countRedundant(t, list))==0);
732 			long overlaps=countOverlaps(t, list);
733 			assert(intransitive==0);
734 			assert(redundant==0);
735 //			makeTransitive(t, list, rigorousTransitive);
736 			if(clusterQueue==null){
737 				clusterQueue=new ArrayDeque<ArrayList<Unit>>(list.size()/4+1);
738 				processedClusters=new ArrayList<ArrayList<Unit>>();
739 			}else{
740 				assert(clusterQueue.isEmpty());
741 			}
742 			makeClusters(t, list);
743 		}
744 
745 		list.clear();
746 	}
747 
makeTransitive(Timer t, ArrayList<Read> list, boolean rigorous)748 	private long makeTransitive(Timer t, ArrayList<Read> list, boolean rigorous){
749 		assert(false) : "No longer needed.";
750 		long added=0;
751 		for(Read r : list){
752 			assert(r!=null);
753 			Unit u=(Unit) r.obj;
754 			assert(u!=null);
755 			assert(u.valid());
756 //			outstream.println("Considering "+r.id+"; valid="+u.valid()+", overlaps="+(u.overlapList==null ? "null" : u.overlapList.size()));
757 			if(u.valid()){
758 
759 				if(u.overlapList!=null){
760 					for(Overlap o : u.overlapList){
761 						Unit u2=(o.u1==u ? o.u2 : o.u1);
762 						assert(u2!=u);
763 						if(u2.overlapList==null){
764 							u2.overlapList=new ArrayList<Overlap>(2);
765 							u2.overlapList.add(o);
766 						}else{
767 							boolean found=false;
768 							if(rigorous){
769 								found=u2.overlapList.contains(o);
770 							}else{
771 								for(Overlap o2 : u2.overlapList){
772 									if(o2.u1==u || o2.u2==u){found=true; break;}
773 								}
774 							}
775 							if(!found){
776 								added++;
777 								u2.overlapList.add(o);
778 							}
779 						}
780 					}
781 				}
782 			}
783 		}
784 
785 		for(Read r : list){
786 			Unit u=(Unit) r.obj;
787 			if(u.valid()){
788 				assert(u.isTransitive());
789 			}
790 		}
791 
792 		if(DISPLAY_PROGRESS){
793 			t.stop();
794 			outstream.println("Added overlaps: "+added);
795 			outstream.println("Made overlaps transitive.  Time: "+t);
796 			Shared.printMemory();
797 			outstream.println();
798 			t.start();
799 		}
800 		return added;
801 	}
802 
countIntransitive(Timer t, ArrayList<Read> list, boolean rigorous)803 	private int countIntransitive(Timer t, ArrayList<Read> list, boolean rigorous){
804 		if(!countTransitive){return 0;}
805 		int transitive=0, intransitive=0;
806 		for(Read r : list){
807 			assert(r!=null);
808 			Unit u=(Unit) r.obj;
809 			assert(u!=null);
810 			assert(u.valid());
811 //			outstream.println("Considering "+r.id+"; valid="+u.valid()+", overlaps="+(u.overlapList==null ? "null" : u.overlapList.size()));
812 			if(u.valid()){
813 				if(rigorous ? u.isPerfectlyTransitive() : u.isTransitive()){
814 					transitive++;
815 				}else{
816 					intransitive++;
817 				}
818 			}
819 		}
820 
821 		if(DISPLAY_PROGRESS){
822 			t.stop();
823 			outstream.println("Intransitive:   "+intransitive+", \ttransitive: "+transitive);
824 			outstream.println("Checked transitivity.      Time: "+t);
825 			Shared.printMemory();
826 			outstream.println();
827 			t.start();
828 		}
829 
830 		return intransitive;
831 	}
832 
countRedundant(Timer t, ArrayList<Read> list)833 	private int countRedundant(Timer t, ArrayList<Read> list){
834 		if(!countRedundant){return 0;}
835 		int redundant=0, nonredundant=0;
836 		for(Read r : list){
837 			assert(r!=null);
838 			Unit u=(Unit) r.obj;
839 			assert(u!=null);
840 			assert(u.valid());
841 //			outstream.println("Considering "+r.id+"; valid="+u.valid()+", overlaps="+(u.overlapList==null ? "null" : u.overlapList.size()));
842 			if(u.valid()){
843 				if(u.isNonRedundant()){
844 					nonredundant++;
845 				}else{
846 					redundant++;
847 				}
848 			}
849 		}
850 
851 		if(DISPLAY_PROGRESS){
852 			t.stop();
853 			outstream.println("Redundant:      "+redundant+",  \tnonredundant: "+nonredundant);
854 			outstream.println("Checked redundancy.        Time: "+t);
855 			Shared.printMemory();
856 			outstream.println();
857 			t.start();
858 		}
859 		return redundant;
860 	}
861 
countOverlaps(Timer t, ArrayList<Read> list)862 	private long countOverlaps(Timer t, ArrayList<Read> list){
863 
864 		long overlaps=0, length=0;
865 		for(Read r : list){
866 			assert(r!=null);
867 			Unit u=(Unit) r.obj;
868 			assert(u!=null);
869 			assert(u.valid());
870 //			outstream.println("Considering "+r.id+"; valid="+u.valid()+", overlaps="+(u.overlapList==null ? "null" : u.overlapList.size()));
871 			if(u.valid() && u.overlapList!=null){
872 				for(Overlap o : u.overlapList){
873 					overlaps++;
874 					length+=o.overlapLen;
875 				}
876 			}
877 		}
878 
879 		if(DISPLAY_PROGRESS){
880 			t.stop();
881 			outstream.println("Overlaps:       "+overlaps+",  \tlength: "+length);
882 			outstream.println("Counted overlaps.          Time: "+t);
883 			Shared.printMemory();
884 			outstream.println();
885 			t.start();
886 		}
887 		return overlaps;
888 	}
889 
fillClusterSizeMatrix(ArrayList<ArrayList<Unit>> clusters, long[][] clusterSize)890 	private long fillClusterSizeMatrix(ArrayList<ArrayList<Unit>> clusters, long[][] clusterSize){
891 		int max=0;
892 		for(ArrayList<Unit> cluster : clusters){
893 			final int cs=Tools.min(clusterSize.length-1, cluster.size());
894 			{
895 				long reads=0, bases=0;
896 				for(Unit u2 : cluster){
897 					reads++;
898 					bases+=u2.length();
899 				}
900 				clusterSize[0][cs]++;
901 				clusterSize[1][cs]+=reads;
902 				clusterSize[2][cs]+=bases;
903 			}
904 			max=Tools.max(max, cluster.size());
905 		}
906 		return max;
907 	}
908 
makeClusters(Timer t, ArrayList<Read> list)909 	private void makeClusters(Timer t, ArrayList<Read> list){
910 
911 		final int clusterlen=70000;
912 		long[][] clusterSize=new long[3][clusterlen];
913 		int max=0;
914 		for(Read r : list){
915 			Unit u=(Unit) r.obj;
916 
917 			if(!u.clustered()){
918 				ArrayList<Unit> cluster=u.makeCluster();
919 				if(cluster.size()>2){cluster.trimToSize();}
920 				if(cluster.size()==1 || (!processClusters && !maxSpanningTree)){processedClusters.add(cluster);}
921 				else{clusterQueue.add(cluster);}
922 				final int cs=Tools.min(clusterlen-1, cluster.size());
923 				{
924 					long reads=0, bases=0;
925 					for(Unit u2 : cluster){
926 						reads++;
927 						bases+=u2.length();
928 					}
929 					clusterSize[0][cs]++;
930 					clusterSize[1][cs]+=reads;
931 					clusterSize[2][cs]+=bases;
932 				}
933 				max=Tools.max(max, cluster.size());
934 			}
935 		}
936 
937 		if(DISPLAY_PROGRESS){
938 			t.stop();
939 			outstream.println(toClusterSizeString(clusterSize));
940 			outstream.println("\nLargest:          "+max);
941 			outstream.println("Finished making clusters.  Time: "+t);
942 			Shared.printMemory();
943 			outstream.println();
944 			t.start();
945 		}
946 
947 
948 		long x=removeInvalid(list);
949 
950 		if(DISPLAY_PROGRESS){
951 			t.stop();
952 			outstream.println("Removed "+x+" invalid entries.");
953 			outstream.println("Finished invalid removal.  Time: "+t);
954 			Shared.printMemory();
955 			outstream.println();
956 			t.start();
957 		}
958 	}
959 
toClusterSizeString(long[][] clusterSizeMatrix)960 	private String toClusterSizeString(long[][] clusterSizeMatrix){
961 
962 		long[] clusterSize=clusterSizeMatrix[0];
963 		long[] clusterReads=clusterSizeMatrix[1];
964 		long[] clusterBases=clusterSizeMatrix[2];
965 
966 		long totalClusters=Tools.sum(clusterSize);
967 
968 		long bigClusters=0;
969 		for(int i=minClusterSize; i<clusterSize.length; i++){
970 			bigClusters+=clusterSize[i];
971 		}
972 
973 		final int spaces=18;
974 		final int spaces2=spaces*2, spaces3=spaces*3;
975 
976 		final StringBuilder sb=new StringBuilder(100), sb2=new StringBuilder(1000);
977 		sb2.append("Clusters:");
978 		while(sb2.length()<spaces){sb2.append(' ');}
979 		sb2.append(totalClusters+(minClusterSize<2 ? "" : " ("+bigClusters+" of at least size "+minClusterSize+")")+"\n");
980 
981 		sb.append("Size Range");
982 		while(sb.length()<spaces){sb.append(' ');}
983 		sb.append("Clusters");
984 		while(sb.length()<spaces2){sb.append(' ');}
985 		sb.append("Reads");
986 		while(sb.length()<spaces3){sb.append(' ');}
987 		sb.append("Bases");
988 
989 		sb2.append('\n');
990 		sb2.append(sb);
991 		sb.setLength(0);
992 
993 		for(int i=0; i<clusterSize.length-1; i=Tools.max(i+1, i*2)){
994 			int a=i+1, b=i*2;
995 			if(i<2){
996 				sb.append(a);
997 				while(sb.length()<spaces){sb.append(' ');}
998 				sb.append(clusterSize[a]);
999 				while(sb.length()<spaces2){sb.append(' ');}
1000 				sb.append(clusterReads[a]);
1001 				while(sb.length()<spaces3){sb.append(' ');}
1002 				sb.append(clusterBases[a]);
1003 			}else if(b>=clusterSize.length){
1004 				long x=Tools.sum(clusterSize, a, clusterSize.length-1);
1005 				long y=Tools.sum(clusterReads, a, clusterSize.length-1);
1006 				long z=Tools.sum(clusterBases, a, clusterSize.length-1);
1007 				if(x>0){
1008 					sb.append(a+"+");
1009 					while(sb.length()<spaces){sb.append(' ');}
1010 					sb.append(x);
1011 					while(sb.length()<spaces2){sb.append(' ');}
1012 					sb.append(y);
1013 					while(sb.length()<spaces3){sb.append(' ');}
1014 					sb.append(z);
1015 				}
1016 			}else{
1017 				long x=Tools.sum(clusterSize, a, b);
1018 				long y=Tools.sum(clusterReads, a, b);
1019 				long z=Tools.sum(clusterBases, a, b);
1020 				if(x>0){
1021 					sb.append(a+"-"+b);
1022 					while(sb.length()<spaces){sb.append(' ');}
1023 					sb.append(x);
1024 					while(sb.length()<spaces2){sb.append(' ');}
1025 					sb.append(y);
1026 					while(sb.length()<spaces3){sb.append(' ');}
1027 					sb.append(z);
1028 				}
1029 			}
1030 			if(sb.length()>0){
1031 				sb2.append('\n');
1032 				sb2.append(sb);
1033 				sb.setLength(0);
1034 			}
1035 		}
1036 		return sb2.toString();
1037 	}
1038 
renameClusters(Timer t)1039 	private void renameClusters(Timer t){
1040 		assert(false) : "This is now unused; renaming is done at output time.";
1041 		int cnum=0;
1042 		final StringBuilder sb=new StringBuilder(64);
1043 		for(ArrayList<Unit> alu : processedClusters){
1044 			for(int i=0; i<alu.size(); i++){
1045 				Unit u=alu.get(i);
1046 				Read r=u.r;
1047 				sb.append("Cluster ");
1048 				sb.append(cnum);
1049 				sb.append(",contig ");
1050 				sb.append(i);
1051 				if(u.offsetValid()){
1052 					sb.append(",pos ");
1053 					sb.append(u.offset());
1054 				}
1055 				r.id=sb.toString();
1056 				sb.setLength(0);
1057 			}
1058 		}
1059 
1060 		if(DISPLAY_PROGRESS){
1061 			t.stop();
1062 			outstream.println("Finished cluster renaming. Time: "+t);
1063 			Shared.printMemory();
1064 			outstream.println();
1065 			t.start();
1066 		}
1067 	}
1068 
processMst(Timer t)1069 	private void processMst(Timer t){
1070 
1071 		if(DISPLAY_PROGRESS){outstream.println("Converting to Maximum Spanning Tree.");}
1072 
1073 		ArrayList<MstThread> alct=new ArrayList<MstThread>(THREADS);
1074 		for(int i=0; i<THREADS; i++){
1075 			alct.add(new MstThread());
1076 		}
1077 
1078 		long overlapsRemoved=0;
1079 		long overlapBasesRemoved=0;
1080 		long overlapsRetained=0;
1081 		long overlapBasesRetained=0;
1082 
1083 		for(MstThread ct : alct){ct.start();}
1084 		for(MstThread ct : alct){
1085 			while(ct.getState()!=Thread.State.TERMINATED){
1086 				try {
1087 					ct.join();
1088 				} catch (InterruptedException e) {
1089 					e.printStackTrace();
1090 				}
1091 			}
1092 
1093 			overlapsRemoved+=ct.overlapsRemovedT;
1094 			overlapBasesRemoved+=ct.overlapBasesRemovedT;
1095 			overlapsRetained+=ct.overlapsRetainedT;
1096 			overlapBasesRetained+=ct.overlapBasesRetainedT;
1097 		}
1098 		assert(clusterQueue.isEmpty());
1099 		if(processClusters){
1100 			for(MstThread ct : alct){
1101 				clusterQueue.addAll(ct.processedT);
1102 				ct.processedT.clear();
1103 				ct.processedT=null;
1104 			}
1105 		}else{
1106 			for(MstThread ct : alct){
1107 				processedClusters.addAll(ct.processedT);
1108 				ct.processedT.clear();
1109 				ct.processedT=null;
1110 			}
1111 			clusterQueue=null;
1112 		}
1113 		alct.clear();
1114 
1115 		assert(affixMaps==null);
1116 		killAffixMaps();
1117 
1118 
1119 		if(DISPLAY_PROGRESS){
1120 			t.stop();
1121 			outstream.println("Removed "+(overlapsRemoved)+" edges ("+overlapBasesRemoved+" bases).");
1122 			outstream.println("Retained "+(overlapsRetained)+" edges ("+overlapBasesRetained+" bases).");
1123 
1124 //			outstream.println("\nAfter conversion to Maximum Spanning Tree:");
1125 //			final int[] clusterSize=new int[8200];
1126 //			int max=0;
1127 //			for(ArrayList<Unit> cluster : processedClusters){
1128 //				clusterSize[Tools.min(clusterSize.length-1, cluster.size())]++;
1129 //				max=Tools.max(max, cluster.size());
1130 //			}
1131 //			outstream.println(toClusterSizeString(clusterSize));
1132 //			outstream.println("Largest:          "+max);
1133 
1134 			outstream.println("Finished MST conversion.   Time: "+t);
1135 			Shared.printMemory();
1136 			outstream.println();
1137 			t.start();
1138 		}
1139 	}
1140 
processClusters(Timer t, boolean mergeClusters)1141 	private void processClusters(Timer t, boolean mergeClusters){
1142 
1143 		ArrayList<ClusterThread> alct=new ArrayList<ClusterThread>(THREADS);
1144 		for(int i=0; i<THREADS; i++){
1145 			alct.add(new ClusterThread(fixMultiJoins, canonicizeClusters, removeCycles, fixCanonContradictions, fixOffsetContradictions,
1146 					mergeClusters, mergeClusters, mergeClusters));
1147 		}
1148 
1149 		long leafMerges=0;
1150 		long innerMerges=0;
1151 		long leafBaseMerges=0;
1152 		long innerBaseMerges=0;
1153 
1154 		long multiJoinFailures=0;
1155 		long multiJoinsFound=0;
1156 		long multiJoinBasesFound=0;
1157 		long unitsFlipped=0;
1158 		long overlapsFlipped=0;
1159 		long canonContradictoryOverlaps=0;
1160 		long canonContradictoryClusters=0;
1161 		long offsetContradictoryOverlaps=0;
1162 		long offsetContradictoryClusters=0;
1163 		long cycleOverlaps=0;
1164 		long cycleClusters=0;
1165 
1166 		for(ClusterThread ct : alct){ct.start();}
1167 		for(ClusterThread ct : alct){
1168 			while(ct.getState()!=Thread.State.TERMINATED){
1169 				try {
1170 					ct.join();
1171 				} catch (InterruptedException e) {
1172 					e.printStackTrace();
1173 				}
1174 			}
1175 
1176 			leafMerges+=ct.leafMergesT;
1177 			innerMerges+=ct.innerMergesT;
1178 			leafBaseMerges+=ct.leafBaseMergesT;
1179 			innerBaseMerges+=ct.innerBaseMergesT;
1180 
1181 			multiJoinFailures+=ct.multiJoinFailuresT;
1182 			multiJoinsFound+=ct.multiJoinsFoundT;
1183 			multiJoinBasesFound+=ct.multiJoinBasesFoundT;
1184 			unitsFlipped+=ct.unitsFlippedT;
1185 			overlapsFlipped+=ct.overlapsFlippedT;
1186 			canonContradictoryOverlaps+=ct.canonContradictoryOverlapsT;
1187 			canonContradictoryClusters+=ct.canonContradictoryClustersT;
1188 			offsetContradictoryOverlaps+=ct.offsetContradictoryOverlapsT;
1189 			offsetContradictoryClusters+=ct.offsetContradictoryClustersT;
1190 			cycleOverlaps+=ct.cycleOverlapsT;
1191 			cycleClusters+=ct.cycleClustersT;
1192 		}
1193 		alct.clear();
1194 
1195 		assert(affixMaps==null);
1196 		killAffixMaps();
1197 
1198 		assert(clusterQueue.isEmpty());
1199 		clusterQueue=null;
1200 
1201 		if(DISPLAY_PROGRESS){
1202 			t.stop();
1203 			if(fixMultiJoins){
1204 				outstream.println("Found "+(multiJoinsFound)+" multijoins ("+multiJoinBasesFound+" bases).");
1205 				outstream.println("Experienced "+(multiJoinFailures)+" multijoin removal failures.");
1206 			}
1207 			if(canonicizeClusters){
1208 				outstream.println("Flipped "+(unitsFlipped)+" reads and "+overlapsFlipped+" overlaps.");
1209 				outstream.println("Found "+(canonContradictoryClusters)+" clusters ("+canonContradictoryOverlaps+" overlaps) with contradictory orientation cycles.");
1210 			}
1211 			if(fixOffsetContradictions){
1212 				outstream.println("Found "+(offsetContradictoryClusters)+" clusters ("+offsetContradictoryOverlaps+" overlaps) with contradictory offset cycles.");
1213 			}
1214 			outstream.println("Found "+(cycleClusters)+" clusters ("+cycleOverlaps+" overlaps) with remaining cycles.");
1215 			if(absorbOverlap){
1216 				outstream.println("Merged "+(leafMerges)+" leaves ("+leafBaseMerges+" bases).");
1217 				outstream.println("Merged "+(innerMerges)+" nonleaves ("+innerBaseMerges+" bases).");
1218 			}
1219 
1220 			outstream.println("\nAfter processing clusters:");
1221 			final long[][] clusterSize=new long[3][70000];
1222 			final long max=fillClusterSizeMatrix(processedClusters, clusterSize);
1223 			outstream.println(toClusterSizeString(clusterSize));
1224 			outstream.println("\nLargest:          "+max);
1225 
1226 			outstream.println("Finished processing.       Time: "+t);
1227 			Shared.printMemory();
1228 			outstream.println();
1229 			t.start();
1230 		}
1231 	}
1232 
removeInvalid(ArrayList<Read> list)1233 	private long removeInvalid(ArrayList<Read> list){
1234 		final LongM keym=new LongM();
1235 		long removedC=0, removedP=0, removedS=0, invalid=0;
1236 
1237 		for(int j=0, lim=list.size(); j<lim; j++){
1238 			final Read r=list.get(j);
1239 			final Unit u=(Unit)r.obj;
1240 
1241 			if(!u.valid()){
1242 
1243 				invalid++;
1244 
1245 				if(codeMap!=null && !codeMap.isEmpty()){
1246 					Long key=u.code1;
1247 					ArrayList<Unit> alu=codeMap.get(key);
1248 					if(alu!=null){
1249 						int valid=0;
1250 						for(int i=alu.size()-1; i>=0; i--){
1251 							Unit u2=alu.get(i);
1252 							if(u2==null || !u2.valid()){
1253 								alu.remove(i);
1254 								removedC++;
1255 							}
1256 							else{valid++;}
1257 						}
1258 						if(valid==0){codeMap.remove(key);}
1259 					}
1260 				}
1261 
1262 				for(int num=0; num<numAffixMaps && affixMaps!=null; num++){
1263 					HashMap<LongM, ArrayList<Unit>> map=affixMaps[num];
1264 					if(map!=null && !map.isEmpty()){
1265 						if(u.prefixes[num]!=-1){
1266 							keym.set(u.prefixes[num]);
1267 							ArrayList<Unit> alu=map.get(keym);
1268 							if(alu!=null){
1269 								int valid=0;
1270 								for(int i=alu.size()-1; i>=0; i--){
1271 									Unit u2=alu.get(i);
1272 									if(u2==null || !u2.valid()){
1273 										alu.remove(i);
1274 										removedP++;
1275 									}
1276 									else{valid++;}
1277 								}
1278 								if(valid==0){map.remove(keym);}
1279 							}
1280 						}
1281 						if(storeSuffix && u.suffixes[num]!=-1){
1282 							keym.set(u.suffixes[num]);
1283 							ArrayList<Unit> alu=map.get(keym);
1284 							if(alu!=null){
1285 								int valid=0;
1286 								for(int i=alu.size()-1; i>=0; i--){
1287 									Unit u2=alu.get(i);
1288 									if(u2==null || !u2.valid()){
1289 										alu.remove(i);
1290 										removedS++;
1291 									}
1292 									else{valid++;}
1293 								}
1294 								if(valid==0){map.remove(keym);}
1295 							}
1296 						}
1297 					}
1298 				}
1299 				list.set(j, null);
1300 			}
1301 		}
1302 
1303 		if(invalid>0){
1304 			Tools.condenseStrict(list);
1305 		}
1306 		if(verbose){
1307 			outstream.println("Removed invalids: "+removedC+", "+removedP+", "+removedS);
1308 		}
1309 		return invalid;
1310 	}
1311 
1312 
addToArray(HashMap<Long, ArrayList<Unit>> codeMap, boolean sort, boolean clear, long outNum)1313 	private static ArrayList<Read> addToArray(HashMap<Long, ArrayList<Unit>> codeMap, boolean sort, boolean clear, long outNum){
1314 		assert(outNum<=Integer.MAX_VALUE);
1315 		if(verbose){System.err.println("Making list.");}
1316 		ArrayList<Read> list=new ArrayList<Read>((int)outNum);
1317 		if(verbose){System.err.println("Adding.");}
1318 		for(ArrayList<Unit> alu : codeMap.values()){
1319 			for(Unit u : alu){
1320 				if(u.valid() && u.r.pairnum()==0){list.add(u.r);}
1321 			}
1322 			if(clear){alu.clear();}
1323 		}
1324 		if(clear){codeMap.clear();}
1325 
1326 		if(sort){
1327 			if(verbose){System.err.println("Sorting.");}
1328 			Shared.sort(list, comparator);
1329 //			if(ascending){
1330 //				Collections.reverse(list);
1331 //				assert(list.isEmpty() || list.get(0).length()<=list.get(list.size()-1).length()) :
1332 //					list.get(0).length()+", "+list.get(list.size()-1).length();
1333 //			}else{
1334 //				assert(list.isEmpty() || list.get(0).length()>=list.get(list.size()-1).length()) :
1335 //					list.get(0).length()+", "+list.get(list.size()-1).length();
1336 //			}
1337 		}
1338 		assert(list.size()==outNum || list.size()*2L==outNum || UNIQUE_ONLY) : list.size()+", "+outNum;
1339 		return list;
1340 	}
1341 
writeOutput(String clusterStatsFile, Timer t)1342 	private void writeOutput(String clusterStatsFile, Timer t){
1343 //		verbose=true;
1344 //		assert(false) : (processedClusters==null)+", "+(processedClusters.isEmpty())+", "+outgraph+", "+out+", "+clusterFilePattern;
1345 		if(processedClusters==null || processedClusters.isEmpty()){
1346 
1347 			if(out!=null || clusterFilePattern!=null){
1348 
1349 				ArrayList<Read> list=addToArray(codeMap, sort, true, addedToMain-containments);
1350 				codeMap=null;
1351 
1352 				if(sort){
1353 					if(DISPLAY_PROGRESS){
1354 						t.stop();
1355 						outstream.println("Sorted output.             Time: "+t);
1356 						Shared.printMemory();
1357 						outstream.println();
1358 						t.start();
1359 					}
1360 				}
1361 
1362 				writeOutput(list);
1363 			}
1364 		}else{
1365 			if(outgraph!=null){
1366 				writeGraph(outgraph, processedClusters);
1367 			}
1368 			if(out!=null || clusterFilePattern!=null || clusterStatsFile!=null || outbest!=null){
1369 				writeOutputClusters(clusterStatsFile, processedClusters);
1370 			}
1371 		}
1372 
1373 		if(DISPLAY_PROGRESS){
1374 			t.stop();
1375 			outstream.println("Printed output.            Time: "+t);
1376 			Shared.printMemory();
1377 			outstream.println();
1378 			t.start();
1379 		}
1380 	}
1381 
1382 
1383 
writeOutput(ArrayList<Read> list)1384 	private void writeOutput(ArrayList<Read> list){
1385 
1386 		final ByteStreamWriter tsw=(out==null ? null : new ByteStreamWriter(out, overwrite, append, true));
1387 
1388 		if(verbose){System.err.println("Writing from array.");}
1389 		tsw.start();
1390 
1391 		HashSet<String> names=((uniqueNames && storeName) ?
1392 				new HashSet<String>(Tools.min(Integer.MAX_VALUE, Tools.max((int)addedToMain, (int)(addedToMain*1.35)))) : null);
1393 		long rid=0;
1394 		for(int x=0; x<list.size(); x++){
1395 			Read r=list.get(x);
1396 			list.set(x, null);
1397 
1398 			if(r.mate!=null && r.pairnum()!=0){r=r.mate;}
1399 
1400 			if(!r.discarded()){
1401 				rid++;
1402 
1403 				for(int i=0; r!=null && i<2; i++){
1404 					if(multipleInputFiles){r.numericID=rid;}
1405 					if(names!=null){
1406 						String name=(r.id==null ? ""+r.numericID : r.id);
1407 						if(names.contains(name)){
1408 							for(long j=0; j<Integer.MAX_VALUE; j++){
1409 								String name2=name+"_dd"+j;
1410 								if(r.mate!=null){name2+=(" /"+(i+1));}
1411 								if(!names.contains(name2)){
1412 									r.id=name2;
1413 									names.add(name2);
1414 									break;
1415 								}
1416 							}
1417 						}else{
1418 							names.add(name);
1419 						}
1420 					}
1421 					tsw.println(r);
1422 					r.setDiscarded(true);
1423 					r=r.mate;
1424 				}
1425 			}
1426 		}
1427 		if(verbose){System.err.println("Shutting down tsw "+tsw.fname);}
1428 		tsw.poisonAndWait();
1429 	}
1430 
1431 
writeOutputClusters(String clusterStatsFile, ArrayList<ArrayList<Unit>> clist)1432 	private void writeOutputClusters(String clusterStatsFile, ArrayList<ArrayList<Unit>> clist){
1433 
1434 //		Shared.sort(clist, CLUSTER_LENGTH_COMPARATOR);
1435 		clist.sort(CLUSTER_LENGTH_COMPARATOR);
1436 
1437 		if(verbose){System.err.println("Writing clusters.");}
1438 
1439 		final ByteStreamWriter tswAll=(out==null ? null : new ByteStreamWriter(out, overwrite, append, true));
1440 		if(tswAll!=null){tswAll.start();}
1441 		ByteStreamWriter tswCluster=null;
1442 		ByteStreamWriter tswBest=null;
1443 
1444 		if(outbest!=null){
1445 			tswBest=new ByteStreamWriter(outbest, overwrite, append, true);
1446 			tswBest.start();
1447 		}
1448 
1449 		TextStreamWriter csf=null;
1450 		if(clusterStatsFile!=null){
1451 			csf=new TextStreamWriter(clusterStatsFile, overwrite, false, false);
1452 			csf.start();
1453 			csf.print("#Name\tsize\t"+nmerLength+"-mer frequencies\n");
1454 		}
1455 
1456 		HashSet<String> names=((uniqueNames && storeName) ?
1457 				new HashSet<String>(Tools.min(Integer.MAX_VALUE, Tools.max((int)addedToMain, (int)(addedToMain*1.35)))) : null);
1458 		long rid=0;
1459 		final long[] nmerCounts=new long[maxNmer+1];
1460 
1461 		final StringBuilder sb=new StringBuilder(64);
1462 
1463 		for(int cnum=0; cnum<clist.size(); cnum++){
1464 			final ArrayList<Unit> alu=clist.get(cnum);
1465 //			clist.set(cnum, null); //This breaks subsequent output processing
1466 
1467 			if(alu.size()<minClusterSize){
1468 				if(verbose){System.err.println("Ignoring small cluster "+cnum+", size "+alu.size());}
1469 
1470 				if(csf!=null && alu.size()>=minClusterSizeForStats){
1471 					float[] profile=makeNmerProfile(alu, nmerCounts);
1472 					sb.append("Cluster_");
1473 					sb.append(cnum);
1474 					sb.append('\t');
1475 					sb.append(alu.size());
1476 					sb.append('\t');
1477 					for(float f : profile){
1478 						sb.append(String.format(Locale.ROOT, "%.5f ", f));
1479 					}
1480 					sb.setCharAt(sb.length()-1, '\n');
1481 					csf.print(sb.toString());
1482 					sb.setLength(0);
1483 				}
1484 			}else{
1485 				if(verbose){System.err.println("Writing cluster "+cnum+", size "+alu.size());}
1486 
1487 				if(clusterFilePattern!=null){
1488 					if(tswCluster!=null){
1489 						if(verbose){System.err.println("Shutting down tswCluster "+tswCluster.fname);}
1490 						tswCluster.poisonAndWait();
1491 						tswCluster=null;
1492 					}
1493 					tswCluster=new ByteStreamWriter(clusterFilePattern.replaceFirst("%", ""+cnum), overwrite, append, true);
1494 					if(verbose){System.err.println("Starting tswCluster "+tswCluster.fname);}
1495 					tswCluster.start();
1496 				}
1497 
1498 				if(csf!=null && alu.size()>=minClusterSizeForStats){
1499 					float[] profile=makeNmerProfile(alu, nmerCounts);
1500 					sb.append("Cluster_");
1501 					sb.append(cnum);
1502 					sb.append('\t');
1503 					sb.append(alu.size());
1504 					sb.append('\t');
1505 					for(float f : profile){
1506 						sb.append(String.format(Locale.ROOT, "%.5f ", f));
1507 					}
1508 					sb.setCharAt(sb.length()-1, '\n');
1509 					csf.print(sb.toString());
1510 					sb.setLength(0);
1511 				}
1512 
1513 				if(pickBestRepresentativePerCluster){
1514 					pickBestRepresenative(alu, true);
1515 				}
1516 
1517 				if(outbest!=null){
1518 					Unit u=pickBestRepresenative((ArrayList<Unit>)alu.clone(), false);
1519 					tswBest.println(u.r);
1520 					if(u.r.mate!=null){tswBest.println(u.r.mate);}
1521 				}
1522 
1523 				for(int contig=0; contig<alu.size(); contig++){
1524 					final Unit u0=alu.get(contig);
1525 					alu.set(contig, null);
1526 					Read r=u0.r;
1527 					if(r.mate!=null && r.pairnum()!=0){r=r.mate;}
1528 
1529 					if(!r.discarded()){
1530 						rid++;
1531 
1532 						for(int i=0; r!=null && i<2; i++){
1533 							assert(r.pairnum()==i) : i+", "+r.pairnum()+", "+(r.mate==null ? 9 : r.mate.pairnum());
1534 							Unit u=(Unit)r.obj;
1535 							if(verbose){System.err.println("Writing read "+r.id);}
1536 							r.numericID=rid;
1537 							if(renameClusters){
1538 								sb.append("Cluster_");
1539 								sb.append(cnum);
1540 								sb.append(",contig_");
1541 								sb.append(contig);
1542 								if(u.offsetValid()){
1543 									sb.append(",pos_");
1544 									sb.append(u.offset());
1545 								}
1546 								if(r.mate!=null){sb.append(" /"+(i+1));}
1547 								r.id=(r.id==null ? sb.toString() : r.id+"\t"+sb);
1548 								sb.setLength(0);
1549 							}else if(names!=null){
1550 								String name=(r.id==null ? ""+r.numericID : r.id);
1551 								if(names.contains(name)){
1552 									for(long j=0; j<Integer.MAX_VALUE; j++){
1553 										String name2=name+"_dd"+j;
1554 										if(!names.contains(name2)){
1555 											r.id=name2;
1556 											names.add(name2);
1557 											break;
1558 										}
1559 									}
1560 								}else{
1561 									names.add(name);
1562 								}
1563 							}
1564 							if(tswAll!=null){tswAll.println(r);}
1565 							if(tswCluster!=null){tswCluster.println(r);}
1566 							r.setDiscarded(true);
1567 							r=r.mate;
1568 						}
1569 					}
1570 				}
1571 			}
1572 		}
1573 		if(csf!=null){
1574 			if(verbose){System.err.println("Shutting down csf "+csf.fname);}
1575 			csf.poisonAndWait();
1576 		}
1577 		if(tswBest!=null){
1578 			if(verbose){System.err.println("Shutting down tswBest "+tswBest.fname);}
1579 			tswBest.poisonAndWait();
1580 		}
1581 		if(tswAll!=null){
1582 			if(verbose){System.err.println("Shutting down tswAll "+tswAll.fname);}
1583 			tswAll.poisonAndWait();
1584 		}
1585 		if(tswCluster!=null){
1586 			if(verbose){System.err.println("Shutting down tswCluster "+tswCluster.fname);}
1587 			tswCluster.poisonAndWait();
1588 		}
1589 	}
1590 
1591 
writeGraph(String graphFile, ArrayList<ArrayList<Unit>> clist)1592 	private void writeGraph(String graphFile, ArrayList<ArrayList<Unit>> clist){
1593 //		Shared.sort(clist, CLUSTER_LENGTH_COMPARATOR);
1594 		clist.sort(CLUSTER_LENGTH_COMPARATOR);
1595 
1596 		if(verbose){System.err.println("Writing overlap graph.");}
1597 
1598 		final TextStreamWriter tsw=(graphFile==null ? null : new TextStreamWriter(graphFile, overwrite, append, true));
1599 		if(tsw!=null){
1600 			tsw.start();
1601 			tsw.print("digraph G {\n");
1602 		}
1603 
1604 		for(int cnum=0; cnum<clist.size(); cnum++){
1605 			final ArrayList<Unit> alu=clist.get(cnum);
1606 //			clist.set(cnum, null); //This breaks subsequent output processing
1607 //			Shared.sort(alu); //TODO: Remove
1608 
1609 			if(alu.size()<minClusterSize){
1610 				if(verbose){System.err.println("Ignoring small cluster "+cnum+", size "+alu.size());}
1611 			}else{
1612 				if(verbose){System.err.println("Processing cluster "+cnum+", size "+alu.size());}
1613 
1614 				for(int contig=0; contig<alu.size(); contig++){
1615 					final Unit u0=alu.get(contig);
1616 //					alu.set(contig, null); //This breaks subsequent output processing
1617 					Read r=u0.r;
1618 					if(r.mate!=null && r.pairnum()!=0){r=r.mate;}
1619 
1620 					{
1621 						for(int i=0; r!=null && i<2; i++){
1622 							assert(r.pairnum()==i) : i+", "+r.pairnum()+", "+(r.mate==null ? 9 : r.mate.pairnum());
1623 							Unit u=(Unit)r.obj;
1624 							if(verbose){System.err.println("Writing read "+r.id);}
1625 
1626 							if(tsw!=null){
1627 								tsw.print("\t"+toGraphName(r)+"\n");
1628 								if(r.mate!=null && r.pairnum()==0){
1629 									Read r2=r.mate;
1630 									tsw.print(toGraphName(r)+" -> "+toGraphName(r2)+" [label=mate]");
1631 								}
1632 								if(u.overlapList!=null){
1633 									for(Overlap o : u.overlapList){
1634 										if(u==o.u1){
1635 											Read r2=o.u2.r;
1636 											tsw.print("\t"+toGraphName(r)+" -> "+toGraphName(r2)+" [label=\""+o.toLabel()+"\"]\n");
1637 										}
1638 									}
1639 								}
1640 							}
1641 							r=r.mate;
1642 						}
1643 					}
1644 				}
1645 			}
1646 		}
1647 
1648 		if(tsw!=null){
1649 			tsw.print("}\n");
1650 			if(verbose){System.err.println("Shutting down tswAll "+tsw.fname);}
1651 			tsw.poisonAndWait();
1652 		}
1653 	}
1654 
toGraphName(Read r)1655 	private static String toGraphName(Read r){
1656 		if(NUMBER_GRAPH_NODES || r.id==null){
1657 			return r.numericID+((ADD_PAIRNUM_TO_NAME || r.mate!=null) ? "."+(r.pairnum()+1) : "");
1658 		}else{
1659 			return r.id.replace(' ','_').replace('\t','_');
1660 		}
1661 	}
1662 
pickBestRepresenative(ArrayList<Unit> alu, boolean clearList)1663 	private Unit pickBestRepresenative(ArrayList<Unit> alu, boolean clearList){
1664 		if(alu==null || alu.isEmpty()){return null;}
1665 		float[] quality=new float[alu.size()];
1666 		int[] lengths=new int[alu.size()];
1667 		for(int i=0; i<alu.size(); i++){
1668 			Unit u=alu.get(i);
1669 			int len=u.r.length();
1670 			quality[i]=u.r.expectedErrors(true, 0)/len;
1671 			lengths[i]=len;
1672 		}
1673 		Arrays.sort(quality);
1674 		Arrays.sort(lengths);
1675 		int medianLength=lengths[lengths.length/2];
1676 		float bestQuality=quality[0];
1677 
1678 		float currentBestQuality=9999999;
1679 		Unit best=null;
1680 		for(int i=0; i<alu.size(); i++){
1681 			Unit u=alu.get(i);
1682 			int len=u.r.length();
1683 			float deviation=Tools.absdif(len, medianLength)*1f/(medianLength+1);
1684 			if(deviation<0.05){
1685 				float qual=u.r.expectedErrors(true, 0)/len;
1686 				qual=(qual+.001f)*(1+10*deviation);
1687 				if(qual<currentBestQuality || best==null){
1688 					currentBestQuality=qual;
1689 					best=u;
1690 				}
1691 			}
1692 		}
1693 		if(clearList){
1694 			alu.clear();
1695 			alu.add(best);
1696 		}
1697 		return best;
1698 	}
1699 
hash(byte[] bases)1700 	public static long hash(byte[] bases){
1701 		long code=bases.length;
1702 		for(int i=0; i<bases.length; i++){
1703 			byte b=bases[i];
1704 			int mode=(int)(code&31);
1705 			assert(hashcodes[b]!=null) : "Invalid sequence character: '"+(char)b+"'";
1706 			code=code^hashcodes[b][mode];
1707 			code=Long.rotateLeft(code, 1);
1708 		}
1709 		return code;
1710 	}
1711 
1712 
hashReversed(byte[] bases)1713 	public static long hashReversed(byte[] bases){
1714 		long code=bases.length;
1715 		for(int i=bases.length-1; i>=0; i--){
1716 			byte b=bases[i];
1717 			assert(hashcodes[b]!=null) : "Invalid sequence character: '"+(char)b+"'";
1718 			b=baseToComplementExtended[b];
1719 			int mode=(int)(code&31);
1720 			code=code^hashcodes[b][mode];
1721 			code=Long.rotateLeft(code, 1);
1722 		}
1723 		return code;
1724 	}
1725 
1726 
isCanonical(byte[] bases)1727 	public static boolean isCanonical(byte[] bases){
1728 		if(ignoreReverseComplement || bases==null || bases.length==0){return true;}
1729 		final int lim=(bases.length+1)/2;
1730 		for(int i=0, j=bases.length-1; i<lim; i++, j--){
1731 			byte a=bases[i], b=baseToComplementExtended[bases[j]];
1732 			if(a<b){return true;}
1733 			if(b<a){return false;}
1734 		}
1735 		assert((bases.length&1)==0 || bases[lim-1]==baseToComplementExtended[bases[lim-1]]) :
1736 			bases.length+", "+lim+", "+bases[lim-1]+", "+(char)bases[lim-1]+(bases.length<1000 ? "\n'"+new String(bases)+"'\n" : ""); //palindrome absorb
1737 		return true; //palindrome
1738 	}
1739 
1740 
1741 	private static synchronized long[][] makeCodes(int symbols, int modes){
1742 		Random randy=new Random(1);
1743 		long[][] r=new long[symbols][modes];
1744 		for(int i=0; i<symbols; i++){
1745 			for(int j=0; j<modes; j++){
1746 				r[i][j]=randy.nextLong();
1747 			}
1748 		}
1749 		return r;
1750 	}
1751 
1752 //	/** Handles IUPAC codes */
1753 //	private static synchronized long[][] makeCodes2(int modes){
1754 //		long[][] r0=makeCodes(26, modes);
1755 //		long[][] r=new long[Tools.max('Z','z')+1][];
1756 //		for(int i=0; i<26; i++){
1757 //			char c=(char)('A'+i);
1758 //			r[c]=r[Tools.toLowerCase(c)]=r0[i];
1759 //		}
1760 //		return r;
1761 //	}
1762 
1763 	/** Handles IUPAC codes and invalid symbols */
1764 	private static synchronized long[][] makeCodes2(int modes){
1765 		long[][] r=makeCodes(128, modes);
1766 
1767 		for(int i=0; i<26; i++){
1768 			char c=(char)('A'+i);
1769 			r[Tools.toLowerCase(c)]=r[c];
1770 		}
1771 		return r;
1772 	}
1773 
1774 	private void addDupe(Read r){
1775 		if(dupeWriter==null){return;}
1776 		if(r.mate==null || r.pairnum()==0){
1777 			synchronized(dupeWriter){
1778 				dupeWriter.println(r);
1779 				if(r.mate!=null){
1780 					dupeWriter.println(r.mate);
1781 				}
1782 			}
1783 		}
1784 	}
1785 
1786 
1787 	private final class MstThread extends Thread{
1788 
1789 		public MstThread(){}
1790 
1791 		@Override
1792 		public void run(){
1793 
1794 			ArrayList<Unit> cluster=null;
1795 			while((cluster=nextCluster())!=null){
1796 				makeMst(cluster);
1797 				processedT.add(cluster);
1798 			}
1799 
1800 		}
1801 
1802 		public void makeMst(ArrayList<Unit> cluster){
1803 			assert(heap.isEmpty());
1804 			unvisit(cluster);
1805 			for(Unit u : cluster){
1806 				u.flags&=~Unit.VISIT_MASK;
1807 				Shared.sort(u.overlapList);
1808 			}
1809 			{
1810 				Unit u=cluster.get(0);
1811 				u.setVisited(true);
1812 				heap.addAll(u.overlapList);
1813 			}
1814 //			assert(false) : cluster.size();
1815 			while(!heap.isEmpty()){
1816 				Overlap o=heap.poll();
1817 				assert(!o.mst());
1818 				if(!o.invalid()){
1819 //					assert(o.u1.overlapList.contains(o)); //slow
1820 //					assert(o.u2.overlapList.contains(o)); //slow
1821 					assert(o.u1.visited() || o.u2.visited());
1822 					final Unit u=(!o.u1.visited() ? o.u1 : !o.u2.visited()? o.u2 : null);
1823 					if(u!=null){
1824 						o.setMst(true);
1825 						u.setVisited(true);
1826 						overlapsRetainedT++;
1827 						overlapBasesRetainedT+=o.overlapLen;
1828 						for(Overlap o2 : u.overlapList){
1829 							if(o2.mst()){
1830 								//do nothing
1831 							}else if(!o2.u1.visited() || !o2.u2.visited()){
1832 								if(heap.size()>=Integer.MAX_VALUE){
1833 									removeInvalid(heap);
1834 								}
1835 								heap.add(o2);
1836 							}else if(!o2.invalid()){
1837 								o2.setInvalid(true);
1838 								overlapsRemovedT++;
1839 								overlapBasesRemovedT+=o2.overlapLen;
1840 							}
1841 						}
1842 					}
1843 				}
1844 			}
1845 			for(Unit u : cluster){
1846 				ArrayList<Overlap> alo=u.overlapList;
1847 				int removed=0;
1848 				for(int i=0; i<alo.size(); i++){
1849 					Overlap o=alo.get(i);
1850 					if(o.invalid()){
1851 						assert(!o.mst());
1852 						alo.set(i, null);
1853 						removed++;
1854 					}else{
1855 						assert(o.mst());
1856 					}
1857 				}
1858 				if(removed>0){
1859 					Tools.condenseStrict(alo);
1860 					alo.trimToSize();
1861 				}
1862 			}
1863 		}
1864 
1865 		private void removeInvalid(PriorityQueue<Overlap> heap){
1866 			ArrayList<Overlap> valid=new ArrayList<Overlap>(heap.size());
1867 			for(Overlap o : heap){
1868 				if(!o.invalid()){
1869 					assert(!o.u1.visited() || !o.u2.visited());
1870 					valid.add(o);
1871 				}
1872 			}
1873 			heap.clear();
1874 			heap.addAll(valid);
1875 		}
1876 
1877 
1878 		public long overlapsRemovedT=0;
1879 		public long overlapBasesRemovedT=0;
1880 		public long overlapsRetainedT=0;
1881 		public long overlapBasesRetainedT=0;
1882 
1883 		private final PriorityQueue<Overlap> heap=new PriorityQueue<Overlap>((1<<16)-1);
1884 		private ArrayList<ArrayList<Unit>> processedT=new ArrayList<ArrayList<Unit>>();
1885 	}
1886 
1887 
1888 	/**
1889 	 * Processes clustered sets of reads.
1890 	 * @author Brian Bushnell
1891 	 * @date Aug 9, 2013
1892 	 *
1893 	 */
1894 	private final class ClusterThread extends Thread{
1895 
1896 		public ClusterThread(boolean fixMultiJoins_, boolean canonicize_, boolean removeCycles_,
1897 				boolean fixCanonContradictions_, boolean fixOffsetContradictions_, boolean mergeClusters_, boolean mergeLeaves_, boolean mergeInner_){
1898 			fixMultiJoinsT=fixMultiJoins_;
1899 			canonicizeT=canonicize_;
1900 			fixCanonContradictionsT=fixCanonContradictions_;
1901 			fixOffsetContradictionsT=fixOffsetContradictions_;
1902 			mergeClustersT=mergeClusters_;
1903 			mergeLeavesT=mergeLeaves_;
1904 			mergeInnerT=mergeInner_;
1905 
1906 //			assert(false) : fixMultiJoinsT+", "+canonicizeT+", "+fixCanonContradictionsT+", "+mergeLeavesT+", "+mergeInnerT;
1907 			bandy=(maxEdits>0 ? BandedAligner.makeBandedAligner(bandwidth) : null);
1908 //			assert(false) : fixMultiJoinsT+", "+canonicizeT+", "+fixCanonContradictionsT+", "+fixOffsetContradictionsT+", "+mergeClustersT+", "+removeCycles_;
1909 		}
1910 
1911 		@Override
1912 		public void run(){
1913 
1914 			final ArrayList<Unit> temp=new ArrayList<Unit>(1000);
1915 
1916 			ArrayList<Unit> cluster=null;
1917 			while((cluster=nextCluster())!=null){
1918 
1919 				if(EA){
1920 					for(Unit u : cluster){assert(u.r.mate==null) : "Cluster processing/merging is not supported for paired reads, only cluster generation.";}
1921 				}
1922 
1923 //				for(Unit u : cluster){assert(!u.visited());}
1924 				unvisit(cluster);
1925 
1926 				reorderClusterBreadthFirst(cluster);
1927 				int multiJoinCount=findMultiJoinsInCluster(cluster, fixMultiJoinsT);
1928 
1929 				if(EA){
1930 					for(Unit u : cluster){assert(!u.visited());}
1931 				}
1932 
1933 				boolean ok=true;
1934 				if(multiJoinCount!=0){
1935 					assert(multiJoinCount>0);
1936 					multiJoinsFoundT+=multiJoinCount;
1937 					if(!fixMultiJoinsT){
1938 						multiJoinFailuresT++;
1939 						ok=false;
1940 					}
1941 				}
1942 
1943 				int canonContradictions=0;
1944 				if(ok && canonicizeT){
1945 					if(EA){
1946 						for(Unit u : cluster){
1947 							assert(!u.visited());
1948 							assert(!u.canonContradiction());
1949 							assert(!u.canonicized());
1950 							if(u.overlapList!=null){
1951 								for(Overlap o : u.overlapList){
1952 									assert(!o.invalid());
1953 									assert(!o.canonContradiction()) :
1954 										o.u1.canonContradiction()+", "+o.u2.canonContradiction()+", "+cluster.contains(o.u1)+", "+cluster.contains(o.u2);
1955 								}
1956 							}
1957 						}
1958 					}
1959 					canonContradictions=canonicizeClusterBreadthFirst(cluster, temp);
1960 //					System.err.println("Canonicized cluster of size "+cluster.size()+"; contradictions = "+canonContradictions+"; canonicized = "+temp.size());
1961 					temp.clear();
1962 					for(Unit u : cluster){assert(!u.visited());}
1963 					if(canonContradictions>0){
1964 						canonContradictoryOverlapsT+=canonContradictions;
1965 						canonContradictoryClustersT++;
1966 						if(fixCanonContradictionsT){
1967 							if(verbose){System.err.println("Pruning cluster to remove canonization contradictions.");}
1968 							fullyPruneCluster(cluster, temp);
1969 							if(verbose){System.err.println("Resulting size: "+cluster.size());}
1970 							if(EA){
1971 								for(Unit u : cluster){
1972 									assert(!u.visited());
1973 									assert(!u.canonContradiction());
1974 									assert(u.canonicized());
1975 									if(u.overlapList!=null){
1976 										for(Overlap o : u.overlapList){
1977 											assert(!o.invalid());
1978 											assert(!o.canonContradiction());
1979 											assert(o.type==FORWARD) : "\n"+o+"\n"+
1980 											o.u1.canonContradiction()+", "+o.u2.canonContradiction()+", "+o.u1.canonicized()+", "+o.u2.canonicized()+
1981 											"\n"+cluster.contains(o.u1)+", "+cluster.contains(o.u2)+", "+cluster.size();
1982 										}
1983 									}
1984 								}
1985 							}
1986 						}else{
1987 							ok=false;
1988 						}
1989 					}
1990 				}
1991 
1992 				int cycleOverlaps=0;
1993 				if(ok){
1994 					cycleOverlaps=findCycles(cluster, removeCycles);
1995 					for(Unit u : cluster){assert(!u.visited());}
1996 					if(cycleOverlaps>0){
1997 						cycleOverlapsT+=cycleOverlaps;
1998 						cycleClustersT++;
1999 					}
2000 				}
2001 
2002 				int offsetContradictions=0;
2003 				if(ok && fixOffsetContradictionsT){
2004 					if(EA){
2005 						for(Unit u : cluster){
2006 							assert(!u.visited());
2007 							assert(!u.offsetContradiction());
2008 							assert(!u.offsetValid());
2009 							assert(u.canonicized());
2010 							if(u.overlapList!=null){
2011 								for(Overlap o : u.overlapList){
2012 									assert(!o.invalid());
2013 									assert(!o.offsetContradiction());
2014 									assert(o.type==FORWARD) : o;
2015 								}
2016 							}
2017 						}
2018 					}
2019 					offsetContradictions=generateOffsetsBreadthFirst(cluster, temp);
2020 //					System.err.println("Made offsets for cluster of size "+cluster.size()+"; contradictions = "+offsetContradictions+"; set = "+temp.size());
2021 					temp.clear();
2022 					for(Unit u : cluster){assert(!u.visited());}
2023 					if(offsetContradictions>0){
2024 						offsetContradictoryOverlapsT+=offsetContradictions;
2025 						offsetContradictoryClustersT++;
2026 						if(fixOffsetContradictionsT){
2027 							if(verbose){System.err.println("Pruning cluster to remove offset contradictions.");}
2028 							fullyPruneCluster(cluster, temp);
2029 							if(verbose){System.err.println("Resulting size: "+cluster.size());}
2030 							if(EA){
2031 								for(Unit u : cluster){
2032 									assert(!u.visited());
2033 									assert(!u.offsetContradiction());
2034 									assert(u.offsetValid());
2035 									if(u.overlapList!=null){
2036 										for(Overlap o : u.overlapList){
2037 											assert(!o.invalid());
2038 											assert(!o.offsetContradiction());
2039 											assert(o.type==FORWARD) : o;
2040 										}
2041 									}
2042 								}
2043 							}
2044 						}else{
2045 							ok=false;
2046 						}
2047 					}
2048 					if(ok){Shared.sort(cluster, UNIT_OFFSET_COMPARATOR);}
2049 				}
2050 
2051 				if(ok && absorbOverlap){
2052 					mergeCluster(cluster);
2053 				}
2054 
processedClustersT.add(cluster)2055 				processedClustersT.add(cluster);
2056 				if(processedClustersT.size()>=threadMaxReadsToBuffer){
2057 					synchronized(processedClusters){
2058 						processedClusters.addAll(processedClustersT);
processedClustersT.clear()2059 						processedClustersT.clear();
2060 					}
2061 				}
2062 			}
2063 			synchronized(processedClusters){
2064 				processedClusters.addAll(processedClustersT);
processedClustersT.clear()2065 				processedClustersT.clear();
2066 			}
2067 		}
2068 
2069 		private void fullyPruneCluster(ArrayList<Unit> cluster, ArrayList<Unit> temp){
2070 			assert(cluster.size()>1) : cluster.size();
2071 			ArrayList<Unit> pruned=pruneCluster(cluster, true, true, temp);
2072 			assert(temp.isEmpty());
2073 			assert(pruned==null || pruned.size()>0);
2074 			while(pruned!=null){
2075 				ArrayList<Unit> subcluster=pruned;
2076 				for(Unit u : subcluster){
2077 					u.clearVolatileFlags();
2078 					if(u.overlapList!=null){
2079 						for(Overlap o : u.overlapList){
2080 							o.clearVolatileFlags();
2081 						}
2082 					}
2083 				}
2084 				assert(subcluster.size()>0);
2085 				pruned=pruneCluster(subcluster, false, false, temp);
2086 				assert(temp.isEmpty());
2087 				assert(pruned==null || pruned.size()>0);
2088 				assert(subcluster.size()>0);
2089 				if(subcluster.size()==1){
2090 					processedClustersT.add(subcluster);
2091 				}else{
2092 					assert(subcluster.size()>1);
2093 					synchronized(clusterQueue){
2094 						clusterQueue.add(subcluster);
2095 					}
2096 				}
2097 			}
2098 		}
2099 
2100 		/**
2101 		 * @param cluster
2102 		 */
2103 		private void mergeCluster(ArrayList<Unit> cluster) {
2104 			if(cluster.size()==1){return;}
2105 			if(mergeLeavesT){
2106 				mergeLeaves(cluster);
2107 			}
2108 			if(mergeInnerT){
2109 				mergeInner(cluster);
2110 			}
2111 		}
2112 
2113 		/**
2114 		 * Finds places in the cluster where two Units are joined by multiple different Overlaps.
2115 		 * Returns number of multijoins found.
2116 		 * @param cluster
2117 		 */
2118 		private int findMultiJoinsInCluster(ArrayList<Unit> cluster, boolean resolveProblems) {
2119 			if(cluster.size()<2){return 0;}
2120 			int totalMultiJoins=0;
2121 			for(Unit ua : cluster){
2122 				ArrayList<Overlap> list=ua.overlapList;
2123 				assert(list!=null);
2124 				if(list.size()>1){
2125 					Shared.sort(list);
2126 
2127 					int multiJoins=0;
2128 					for(int i=0; i<list.size(); i++){
2129 						Overlap o=list.get(i);
2130 						Unit ub=(o.u1==ua ? o.u2 : o.u1);
2131 						assert(ua!=ub);
2132 						assert(ua==o.u1 || ua==o.u2);
2133 						if(ub.visited()){
2134 							multiJoins++;
2135 							multiJoinBasesFoundT+=o.overlapLen;
2136 							if(!o.multiJoin()){o.setMultiJoin(true);}
2137 							if(resolveProblems){list.set(i, null);}
2138 						}else{
2139 							ub.setVisited(true);
2140 						}
2141 					}
2142 
2143 					if(multiJoins>0){
2144 						totalMultiJoins+=multiJoins;
2145 						if(resolveProblems){Tools.condenseStrict(list);}
2146 					}
2147 
2148 					for(int i=0; i<list.size(); i++){
2149 						Overlap o=list.get(i);
2150 						Unit ub=(o.u1==ua ? o.u2 : o.u1);
2151 						assert(ua!=ub);
2152 						assert(ua==o.u1 || ua==o.u2);
2153 						assert(ub.visited());
2154 						ub.setVisited(false);
2155 					}
2156 				}
2157 
2158 			}
2159 
2160 			return totalMultiJoins;
2161 		}
2162 
2163 		private ArrayList<Unit> pruneCluster(ArrayList<Unit> cluster, boolean pruneContradictoryNodes, boolean pruneContradictoryOverlaps, ArrayList<Unit> visited){
2164 			if(verbose){System.err.println("pruneCluster(size="+cluster.size()+", "+pruneContradictoryNodes+", "+pruneContradictoryOverlaps+")");}
2165 
2166 			//pruneContradictoryOverlaps is less strict than pruneContradictoryNodes
2167 			assert(pruneContradictoryOverlaps || !pruneContradictoryNodes);
2168 
2169 			for(Unit ua : cluster){
2170 				assert(!ua.visited());
2171 				assert(ua.isPerfectlyTransitive()) : ua;
2172 				if(ua.visited()){ua.setVisited(false);}
2173 			}
2174 
2175 			int prunedOverlaps=0;
2176 			int visits=1;
2177 
2178 			{
2179 				final Unit root=cluster.get(0);
2180 				assert(!root.contradiction());
2181 				root.setVisited(true);
2182 				visited.add(root);
2183 			}
2184 
2185 			for(int i=0; i<visited.size(); i++){
2186 				Unit ua=visited.get(i);
2187 
2188 				if(ua.visited() && (!ua.contradiction() || !pruneContradictoryNodes)){
2189 					ArrayList<Overlap> list=ua.overlapList;
2190 					if(list!=null){
2191 						int removed=0;
2192 						for(int j=0; j<list.size(); j++){
2193 							Overlap o=list.get(j);
2194 							Unit ub=(o.u1==ua ? o.u2 : o.u1);
2195 							assert(o.u1==ua || o.u2==ua);
2196 							assert(ua!=ub);
2197 							assert(ub.valid());
2198 
2199 							assert(!o.canonContradiction() || (ua.canonContradiction() || ub.canonContradiction())) :
2200 								"\n"+o.canonContradiction()+", "+ua.canonContradiction()+", "+ub.canonContradiction();
2201 
2202 							assert(!o.offsetContradiction() || (ua.offsetContradiction() || ub.offsetContradiction())) :
2203 								"\n"+o.offsetContradiction()+", "+ua.offsetContradiction()+", "+ub.offsetContradiction();
2204 
2205 //							assert(o.contradiction()==(ua.contradiction() && ub.contradiction())) :
2206 //								"\n"+o.canonContradiction()+", "+o.offsetContradiction()+
2207 //								"\n"+ua.canonContradiction()+", "+ua.offsetContradiction()+
2208 //								"\n"+ub.canonContradiction()+", "+ub.offsetContradiction();
2209 
2210 							final boolean remove=(pruneContradictoryNodes && ub.contradiction() || (pruneContradictoryOverlaps && o.contradiction()));
2211 							if(!remove && !ub.visited()){
2212 								ub.setVisited(true);
2213 								visited.add(ub);
2214 								visits++;
2215 							}
2216 
2217 							if(remove){
2218 								if(!o.invalid()){o.setInvalid(true);}
2219 								list.set(j, null);
2220 								removed++;
2221 								prunedOverlaps++;
2222 							}else{
2223 								assert(!o.invalid());
2224 							}
2225 						}
2226 						if(removed>0){Tools.condenseStrict(list);}
2227 					}
2228 				}
2229 			}
2230 
2231 			if(verbose){System.err.println("cluster.size()="+cluster.size()+", visits="+visits+", visited.size()="+visited.size());}
2232 
2233 //			if(visited.size()==11486){ //TODO: For testing. Remove.
2234 //				for(int i=0; i<visited.size(); i++){
2235 //					Unit u=visited.get(i);
2236 //					assert(u.visited());
2237 //					assert(!u.canonContradiction());
2238 //					assert(u.canonicized());
2239 //					for(Overlap o : u.overlapList){
2240 //						assert(!o.canonContradiction());
2241 //						assert(o.type==FORWARD) : "\n\no="+o+"\ni="+i+", u.overlapList.size="+u.overlapList.size()+"\n"+
2242 //						o.u1.canonContradiction()+", "+o.u2.canonContradiction()+", "+o.u1.canonicized()+", "+o.u2.canonicized()+
2243 //						"\n"+visited.contains(o.u1)+", "+visited.contains(o.u2)+", "+visited.size()+
2244 //						"\n"+u.overlapList;
2245 //					}
2246 //				}
2247 //			}
2248 
2249 			final int numUnvisited=cluster.size()-visits;
2250 			ArrayList<Unit> pruned=(numUnvisited==0 ? null : new ArrayList<Unit>(numUnvisited));
2251 			assert(visits==visited.size());
2252 			assert(visits>=1 && visits<=cluster.size());
2253 
2254 			if(visits<cluster.size()){
2255 				pruned=new ArrayList<Unit>(cluster.size()-visits);
2256 				for(Unit ua : cluster){
2257 					if(!ua.visited()){
2258 						pruned.add(ua);
2259 						ArrayList<Overlap> list=ua.overlapList;
2260 						if(list!=null){
2261 							int removed=0;
2262 							for(int j=0; j<list.size(); j++){
2263 								Overlap o=list.get(j);
2264 								Unit ub=(o.u1==ua ? o.u2 : o.u1);
2265 								assert(o.u1==ua || o.u2==ua);
2266 								assert(ua!=ub);
2267 								assert(ub.valid());
2268 
2269 								if(ub.visited() || o.invalid()){
2270 									assert(ub.visited()==o.invalid()) : "\n"+o+"\n"+ub;
2271 									list.set(j, null);
2272 									removed++;
2273 								}
2274 							}
2275 							if(removed>0){Tools.condenseStrict(list);}
2276 						}
2277 					}
2278 				}
2279 				assert(pruned.size()==numUnvisited);
2280 			}else{
2281 				assert(prunedOverlaps==0) : "If this fails then I may need to mark overlaps to remove.";
2282 			}
2283 			for(Unit u : cluster){
2284 				assert(u.isPerfectlyTransitive()) : u;
2285 				if(EA){
2286 					if(u.overlapList!=null){
2287 						for(Overlap o : u.overlapList){assert(!o.invalid());}
2288 					}
2289 				}
2290 				if(u.visited()){u.setVisited(false);}
2291 			}
2292 			cluster.clear();
2293 			cluster.addAll(visited);
2294 			cluster.trimToSize();
2295 
2296 //			for(Unit u : cluster){
2297 ////				assert(u.canonicized());
2298 //				for(Overlap o : u.overlapList){
2299 //					assert(pruned==null || !pruned.contains(o.u1));
2300 //					assert(pruned==null || !pruned.contains(o.u2));
2301 //					assert(cluster.contains(o.u1));
2302 //					assert(cluster.contains(o.u2));
2303 //				}
2304 //			}
2305 //			if(pruned!=null){
2306 //				for(Unit u : pruned){
2307 //					for(Overlap o : u.overlapList){
2308 //						assert(pruned.contains(o.u1));
2309 //						assert(pruned.contains(o.u2));
2310 //						assert(!cluster.contains(o.u1));
2311 //						assert(!cluster.contains(o.u2));
2312 //					}
2313 //				}
2314 //			}
2315 
2316 			visited.clear();
2317 			return pruned;
2318 		}
2319 
2320 		/**
2321 		 * Cluster should already be ordered breadth-first
2322 		 * This may fail because removing cycles could change breadth-first traversal, but if it fails, an assertion will be thrown
2323 		 * @param cluster
2324 		 */
2325 		private int findCycles(ArrayList<Unit> cluster, boolean remove){
2326 
2327 			{
2328 				final Unit root=cluster.get(0);
2329 				assert(root.length()>=cluster.get(cluster.size()-1).length());
2330 				root.setVisited(true);
2331 			}
2332 			int cycles=0;
2333 
2334 			for(Unit ua : cluster){
2335 				assert(ua.visited());
2336 				ArrayList<Overlap> list=ua.overlapList;
2337 				if(list!=null){
2338 					int removed=0;
2339 					for(int i=0; i<list.size(); i++){
2340 						Overlap o=list.get(i);
2341 						Unit ub=(o.u1==ua ? o.u2 : o.u1);
2342 						assert(o.u1==ua || o.u2==ua);
2343 						assert(ua!=ub);
2344 						assert(ub.valid());
2345 
2346 						if(!o.visited()){
2347 							o.setVisited(true);
2348 							if(ub.visited()){
2349 								if(!o.cyclic()){
2350 									o.setCyclic(true);
2351 									cycles++;
2352 								}
2353 							}else{
2354 								ub.setVisited(true);
2355 							}
2356 						}
2357 						if(remove && o.cyclic()){
2358 							list.set(i, null);
2359 							removed++;
2360 						}
2361 					}
2362 					if(removed>0){Tools.condenseStrict(list);}
2363 				}
2364 			}
2365 
2366 			for(Unit u : cluster){
2367 				if(u.visited()){u.setVisited(false);}
2368 				if(u.overlapList!=null){
2369 					for(Overlap o : u.overlapList){
2370 						if(o.visited()){o.setVisited(false);}
2371 					}
2372 				}
2373 			}
2374 
2375 			return cycles;
2376 		}
2377 
2378 		/**
2379 		 * Cluster should already be ordered breadth-first
2380 		 * @param cluster
2381 		 */
2382 		private int generateOffsetsBreadthFirst(ArrayList<Unit> cluster, ArrayList<Unit> temp){
2383 
2384 
2385 			assert(temp!=null);
2386 			assert(temp.isEmpty());
2387 			{
2388 				final Unit root=cluster.get(0);
2389 				assert(root.length()>=cluster.get(cluster.size()-1).length());
2390 				root.setOffset(0);
2391 				temp.add(root);
2392 			}
2393 
2394 			int contradictions=0;
2395 			for(int i=0; i<temp.size(); i++){
2396 				Unit u=temp.get(i);
2397 				assert(!u.visited()) : i;
2398 				assert(u.offsetValid() || contradictions>0) : i+", "+temp.size()+", "+contradictions+"\n"+toString(temp);
2399 				if(u.offsetValid() && !u.offsetContradiction()){
2400 					contradictions+=setOffsetsNeighbors(u, temp);
2401 					assert(contradictions==0 || (i>0 && temp.size()>2));
2402 				}
2403 			}
2404 
2405 			int min=0;
2406 			for(Unit u : temp){
2407 				if(u.visited()){u.setVisited(false);}
2408 				if(u.overlapList!=null){
2409 					for(Overlap o : u.overlapList){
2410 						if(o.visited()){o.setVisited(false);}
2411 					}
2412 				}
2413 				if(u.offsetValid() && !u.offsetContradiction()){
2414 					min=Tools.min(min, u.offset());
2415 				}
2416 			}
2417 
2418 			if(verbose){
2419 				System.err.println("min offset = "+min);
2420 			}
2421 
2422 			for(Unit u : temp){
2423 				if(u.offsetValid()){
2424 					if(verbose){System.err.println("Set "+u.name()+" offset from "+u.offset+" to "+(u.offset-min));}
2425 					u.offset=u.offset-min;
2426 				}
2427 			}
2428 
2429 
2430 			return contradictions;
2431 		}
2432 
2433 		/**
2434 		 * @param root
2435 		 */
2436 		private int setOffsetsNeighbors(final Unit root, final ArrayList<Unit> temp) {
2437 			if(verbose){System.err.println("\nsetOffsetsNeighbors("+root.name()+")\nroot.code1="+root.code1+"\n");}
2438 			assert(root.valid());
2439 			assert(!root.visited());
2440 			assert(root.offsetValid());
2441 			assert(!root.offsetContradiction());
2442 			root.setVisited(true);
2443 			if(root.overlapList==null){return 0;}
2444 			final int contradictions=countOffsetContradictions(root, false);
2445 			if(verbose){System.err.println("\ncontradictions="+contradictions);}
2446 			for(Overlap o : root.overlapList){
2447 				Unit u=(o.u1==root ? o.u2 : o.u1);
2448 				assert(o.u1==root || o.u2==root);
2449 				assert(root!=u);
2450 				assert(u.valid());
2451 
2452 				if(verbose){System.err.println("\nProcessing Overlap "+o);}
2453 				if(!o.visited() && !o.offsetContradiction()){
2454 					o.setVisited(true);
2455 					if(!u.offsetContradiction()){
2456 						if(verbose){System.err.println("Calling setOffset:  "+o);}
2457 						if(!u.offsetValid()){temp.add(u);}
2458 						boolean b=setOffset(root, u, o);
2459 						if(verbose){System.err.println("Finished setOffset: "+o);}
2460 
2461 //						if(x>0){
2462 //							if(verbose){System.err.println("\n*********************************************");}
2463 //							if(verbose){System.err.println("Problem detected with contig "+u.name());}
2464 //							if(verbose){System.err.println("*********************************************\n");}
2465 //							verbose=true;
2466 //							int y2=countOffsetContradictions(root, false);
2467 //							assert(contradictions==y2);
2468 //						}
2469 
2470 						assert(b) : "\n"+contradictions+", "+o.offsetContradiction()+", "+root.offsetContradiction()+", "+u.offsetContradiction()+"\n"
2471 							+root.offsetValid()+", "+u.offsetValid()+", "+OVERLAP_TYPE_NAMES[o.type]+"\n"+b
2472 							+fixMultiJoins; //This assertion can fail if a multijoin is present
2473 						assert(u.offsetValid());
2474 					}
2475 				}
2476 			}
2477 			return contradictions;
2478 		}
2479 
2480 		private int countOffsetContradictions(Unit root, boolean includeKnown){
2481 			if(verbose){System.err.println("\ncountContradictions("+root.name()+", "+includeKnown+")\nroot.code1="+root.code1+"\n");}
2482 			assert(root.valid());
2483 			assert(root.visited());
2484 			assert(root.offsetValid());
2485 //			assert(!root.offsetContradiction());
2486 			if(root.overlapList==null){return 0;}
2487 			int contradictions=0;
2488 			for(Overlap o : root.overlapList){
2489 				Unit u=(o.u1==root ? o.u2 : o.u1);
2490 				assert(o.u1==root || o.u2==root);
2491 				assert(root!=u);
2492 				assert(u.valid());
2493 
2494 				if(verbose){System.err.println("\nOverlap "+o+"\nu="+u.name()+", offsetValid="+u.offsetValid());}
2495 
2496 				boolean contradictory=(u.offsetValid() && u.offset()!=calcOffset(root, u, o));
2497 				if(verbose){System.err.println("contradictory=            \t"+contradictory);}
2498 				if(contradictory){
2499 					if(includeKnown || !u.offsetContradiction()){
2500 						contradictions++;
2501 						if(!root.offsetContradiction()){root.setOffsetContradiction(true);}
2502 					}
2503 					if(!o.offsetContradiction()){o.setOffsetContradiction(true);}
2504 					if(!u.offsetContradiction()){u.setOffsetContradiction(true);}
2505 				}
2506 				assert(contradictory==o.offsetContradiction()) : contradictory+", "+o.offsetContradiction();
2507 				if(verbose){
2508 					System.err.println("root.offsetContradiction()=\t"+root.offsetContradiction());
2509 					System.err.println("u.offsetContradiction()=   \t"+u.offsetContradiction());
2510 					System.err.println("o.offsetContradiction()=   \t"+o.offsetContradiction());
2511 					System.err.println("contradictions=           \t"+contradictions);
2512 				}
2513 			}
2514 			if(verbose){System.err.println("Final contradictions="+contradictions+"\n");}
2515 			return contradictions;
2516 		}
2517 
2518 		/**
2519 		 * Cluster should already be ordered breadth-first
2520 		 * @param cluster
2521 		 */
2522 		private int canonicizeClusterBreadthFirst(ArrayList<Unit> cluster, ArrayList<Unit> temp) {
2523 
2524 			assert(temp!=null);
2525 			assert(temp.isEmpty());
2526 			{
2527 				final Unit root=cluster.get(0);
2528 				assert(root.length()>=cluster.get(cluster.size()-1).length());
2529 				root.setCanonicized(true);
2530 				temp.add(root);
2531 			}
2532 
2533 			int contradictions=0;
2534 			for(int i=0; i<temp.size(); i++){
2535 				final Unit u=temp.get(i);
2536 				assert(!u.visited()) : i;
2537 				assert(u.canonicized() || contradictions>0) : i+", "+temp.size()+", "+contradictions+"\n"+toString(temp);
2538 				if(u.canonicized() && !u.canonContradiction()){
2539 					contradictions+=canonicizeNeighbors(u, temp);
2540 					assert(contradictions==0 || (i>0 && temp.size()>2));
2541 
2542 					if(u.overlapList!=null){
2543 						for(Overlap o : u.overlapList){
2544 							assert(o.type==FORWARD || o.canonContradiction() || o.u1.canonContradiction() || o.u2.canonContradiction()) :
2545 								o+"\n"+contradictions+", "+o.canonContradiction()+", "+o.u1.canonContradiction()+", "+o.u2.canonContradiction()+
2546 								"\n"+o.u1.canonicized()+", "+o.u2.canonicized()+", "+o.u1.visited()+", "+o.u2.visited();
2547 						}
2548 					}
2549 				}
2550 
2551 //				if(u.r.numericID==59462 || u.r.numericID==56439){ //TODO: remove
2552 //					System.err.println("\nid="+u.r.numericID+", canonicized="+u.canonicized()+", contradiction="+u.canonContradiction()+", visited="+u.visited());
2553 //					for(Overlap o : u.overlapList){
2554 //						Unit u2=(o.u1==u ? o.u2 : o.u1);
2555 //						assert(o.u1==u || o.u2==u);
2556 //						assert(u2!=u);
2557 //						assert(u2.valid());
2558 //						System.err.println("o = "+o);
2559 //						System.err.println("o.contradiction="+o.canonContradiction());
2560 //						System.err.println("u2.id="+u2.r.numericID+", canonicized="+u2.canonicized()+", contradiction="+u2.canonContradiction()+", visited="+u.visited());
2561 //					}
2562 //				}
2563 			}
2564 
2565 			for(Unit u : temp){
2566 				if(u.visited()){u.setVisited(false);}
2567 				if(EA){
2568 					if(u.overlapList!=null){
2569 						for(Overlap o : u.overlapList){assert(!o.visited());}
2570 					}
2571 				}
2572 			}
2573 
2574 			return contradictions;
2575 		}
2576 
2577 		/**
2578 		 * @param root
2579 		 */
2580 		private int canonicizeNeighbors(Unit root, ArrayList<Unit> canonicized) {
2581 			if(verbose){System.err.println("\ncanonicizeNeighbors("+root.name()+")\nroot.code1="+root.code1+"\n");}
2582 			assert(root.valid());
2583 			assert(!root.visited());
2584 			assert(root.canonicized());
2585 			assert(!root.canonContradiction());
2586 			root.setVisited(true);
2587 			if(root.overlapList==null){return 0;}
2588 			final int contradictions=countCanonContradictions(root, false);
2589 			if(verbose){System.err.println("\ncontradictions="+contradictions);}
2590 			for(Overlap o : root.overlapList){
2591 				Unit u=(o.u1==root ? o.u2 : o.u1);
2592 				assert(o.u1==root || o.u2==root);
2593 				assert(root!=u);
2594 				assert(u.valid());
2595 
2596 				if(verbose){System.err.println("\nProcessing Overlap "+o);}
2597 				if(!o.canonContradiction()){
2598 					if(!u.canonContradiction()){
2599 						boolean b=u.canonicized();
2600 						int dir=o.type;
2601 						if(verbose){System.err.println("Calling canonicize:  "+o);}
2602 						int x=canonicize(root, u, o);
2603 						if(verbose){System.err.println("Finished canonicize: "+o);}
2604 
2605 //						if(x>0){
2606 //							if(verbose){System.err.println("\n*********************************************");}
2607 //							if(verbose){System.err.println("Problem detected with contig "+u.name());}
2608 //							if(verbose){System.err.println("*********************************************\n");}
2609 //							verbose=true;
2610 //							int y2=countCanonContradictions(root, false);
2611 //							assert(contradictions==y2);
2612 //						}
2613 
2614 						assert(x==0 || (u.canonicized() && (o.type==FORWARDRC || o.type==REVERSERC)));
2615 						assert(x==0) : "\n"+x+", "+contradictions+", "+o.canonContradiction()+", "+root.canonContradiction()+", "+u.canonContradiction()+"\n"
2616 							+root.canonicized()+", "+u.canonicized()+", "+OVERLAP_TYPE_NAMES[o.type]+"\n"+b+", "+dir
2617 							+fixMultiJoins; //This assertion can fail if a multijoin is present
2618 						if(!u.canonicized()){
2619 							u.setCanonicized(true);
2620 							canonicized.add(u);
2621 						}
2622 						assert(u.canonicized());
2623 					}
2624 				}
2625 			}
2626 			if(EA){
2627 				for(Overlap o : root.overlapList){
2628 					assert(o.type==FORWARD || o.canonContradiction() || o.u1.canonContradiction() || o.u2.canonContradiction()) :
2629 						o+"\n"+contradictions+", "+o.canonContradiction()+", "+o.u1.canonContradiction()+", "+o.u2.canonContradiction()+", "+root.canonContradiction()+
2630 						"\n"+o.u1.canonicized()+", "+o.u2.canonicized()+", "+o.u1.visited()+", "+o.u2.visited();
2631 				}
2632 			}
2633 			return contradictions;
2634 		}
2635 
2636 		private int countCanonContradictions(Unit root, boolean includeKnown){
2637 			if(verbose){System.err.println("\ncountContradictions("+root.name()+", "+includeKnown+")\nroot.code1="+root.code1+"\n");}
2638 			assert(root.valid());
2639 			assert(root.visited());
2640 			assert(root.canonicized());
2641 //			assert(!root.canonContradiction());
2642 			if(root.overlapList==null){return 0;}
2643 			int contradictions=0;
2644 			for(Overlap o : root.overlapList){
2645 				Unit ub=(o.u1==root ? o.u2 : o.u1);
2646 				assert(o.u1==root || o.u2==root);
2647 				assert(root!=ub);
2648 				assert(ub.valid());
2649 
2650 				if(verbose){System.err.println("\nOverlap "+o+"\nu="+ub.name()+", canonicized="+ub.canonicized());}
2651 
2652 				boolean contradictory=(ub.canonicized() && (o.type==FORWARDRC || o.type==REVERSERC));
2653 				if(verbose){System.err.println("contradictory=            \t"+contradictory);}
2654 				if(contradictory){
2655 					if(!o.canonContradiction()){o.setCanonContradiction(true);}
2656 					if(includeKnown || !ub.canonContradiction()){
2657 						contradictions++;
2658 						if(!root.canonContradiction()){root.setCanonContradiction(true);}
2659 						if(!ub.canonContradiction()){ub.setCanonContradiction(true);}
2660 					}
2661 				}
2662 
2663 				assert(!o.canonContradiction() || (root.canonContradiction() || ub.canonContradiction())) :
2664 					"\n"+contradictory+", "+o.canonContradiction()+", "+root.canonContradiction()+", "+ub.canonContradiction();
2665 
2666 				assert(contradictory==o.canonContradiction()) : contradictory+", "+o.canonContradiction();
2667 				if(verbose){
2668 					System.err.println("root.canonContradiction()=\t"+root.canonContradiction());
2669 					System.err.println("u.canonContradiction()=   \t"+ub.canonContradiction());
2670 					System.err.println("o.canonContradiction()=   \t"+o.canonContradiction());
2671 					System.err.println("contradictions=           \t"+contradictions);
2672 				}
2673 			}
2674 			if(verbose){System.err.println("Final contradictions="+contradictions+"\n");}
2675 			return contradictions;
2676 		}
2677 
2678 		private String toString(ArrayList<Unit> cluster){
2679 			for(int i=0; i<cluster.size(); i++){
2680 				Unit u=cluster.get(i);
2681 				u.r.id=""+i;
2682 			}
2683 			StringBuilder sb=new StringBuilder(1000);
2684 			for(Unit u : cluster){
2685 				sb.append(">"+u.name()+"\n");
2686 				sb.append(new String(u.bases()));
2687 				sb.append("\n");
2688 			}
2689 			sb.append("\n*****\n");
2690 			for(Unit u : cluster){
2691 				sb.append("\n"+u.name()+":");
2692 				if(u.overlapList!=null){
2693 					for(Overlap o : u.overlapList){
2694 						Unit ub=(o.u1==u ? o.u2 : o.u1);
2695 						sb.append(" "+ub.name());
2696 					}
2697 				}
2698 			}
2699 			sb.append("\n");
2700 			return sb.toString();
2701 		}
2702 
2703 		private String toShortString(ArrayList<Unit> cluster){
2704 			for(int i=0; i<cluster.size(); i++){
2705 				Unit u=cluster.get(i);
2706 				u.r.id=""+i;
2707 			}
2708 			StringBuilder sb=new StringBuilder(1000);
2709 			for(Unit u : cluster){
2710 				sb.append("\n"+u.name()+":");
2711 				if(u.overlapList!=null){
2712 					for(Overlap o : u.overlapList){
2713 						Unit ub=(o.u1==u ? o.u2 : o.u1);
2714 						sb.append(" "+ub.name());
2715 					}
2716 				}
2717 			}
2718 			sb.append("\n");
2719 			return sb.toString();
2720 		}
2721 
2722 
2723 		/**
2724 		 * @param root
2725 		 * @param u2
2726 		 * @param o
2727 		 * @return Number of contradictions
2728 		 */
2729 		private int canonicize(final Unit root, final Unit u2, final Overlap o){
2730 			if(o.type==FORWARD){return 0;}
2731 			if(o.type==FORWARDRC || o.type==REVERSERC){
2732 				if(u2.canonicized()){return 1;}
2733 				u2.reverseComplement();
2734 				unitsFlippedT++;
2735 				for(Overlap o2 : u2.overlapList){
2736 					overlapsFlippedT++;
2737 					o2.flip(u2, bandy);
2738 				}
2739 				assert(o.type==FORWARD || o.type==REVERSE) : OVERLAP_TYPE_NAMES[o.type];
2740 			}
2741 			if(o.type==REVERSE){o.reverseDirection();}
2742 			assert(o.type==FORWARD);
2743 			assert(o.test(bandy, o.edits+maxEdits));
2744 			return 0;
2745 		}
2746 
2747 
2748 		/**
2749 		 * @param root
2750 		 * @param u2
2751 		 * @param o
2752 		 * @return true if no contradictions
2753 		 */
2754 		private boolean setOffset(final Unit root, final Unit u2, final Overlap o){
2755 			assert(root.offsetValid());
2756 			assert(!root.offsetContradiction());
2757 			int offset=calcOffset(root, u2, o);
2758 
2759 			if(u2.offsetValid()){return u2.offset()==offset;}
2760 			u2.setOffset(offset);
2761 
2762 			if(verbose){
2763 				System.err.println("\nroot = "+(root.name()==null ? root.r.numericID+"" : root.name())+", u2 = "+(u2.name()==null ? u2.r.numericID+"" : u2.name())
2764 						+"\no = "+o
2765 						+"\nroot.offset = "+root.offset()
2766 						+"\nu2.offset = "+u2.offset());
2767 			}
2768 
2769 			return true;
2770 		}
2771 
2772 
2773 		private int calcOffset(final Unit root, final Unit ub, final Overlap o){
2774 			assert(root.offsetValid());
2775 			if(o.type==FORWARD){
2776 				if(root==o.u1){
2777 					int dif=o.start1-o.start2;
2778 					if(verbose){System.err.println("root==o.u1=="+root.name()+", start1="+o.start1+"; u2==o.u2=="+ub.name()+", start2="+o.start2+", dif="+dif);}
2779 					return root.offset+dif;
2780 				}else{
2781 					int dif=o.start2-o.start1;
2782 					if(verbose){System.err.println("root==o.u2=="+root.name()+", start2="+o.start2+"; u2==o.u1=="+ub.name()+", start1="+o.start1+", dif="+dif);}
2783 					return root.offset+dif;
2784 				}
2785 			}else{
2786 				assert(false) : o;
2787 				throw new RuntimeException("TODO");
2788 			}
2789 		}
2790 
2791 
2792 		/**
2793 		 * @param cluster
2794 		 */
2795 		private void mergeLeaves(ArrayList<Unit> cluster) {
2796 			assert(false) : "TODO";
2797 			for(Unit u : cluster){
2798 
2799 			}
2800 		}
2801 
2802 		/**
2803 		 * @param cluster
2804 		 */
2805 		private void mergeInner(ArrayList<Unit> cluster) {
2806 			assert(false) : "TODO";
2807 			for(Unit u : cluster){
2808 
2809 			}
2810 		}
2811 
2812 		private ArrayList<ArrayList<Unit>> processedClustersT=new ArrayList<ArrayList<Unit>>(threadMaxReadsToBuffer);
2813 
2814 		long leafMergesT=0;
2815 		long innerMergesT=0;
2816 		long leafBaseMergesT=0;
2817 		long innerBaseMergesT=0;
2818 
2819 		long multiJoinFailuresT=0;
2820 		long multiJoinsFoundT=0;
2821 		long multiJoinBasesFoundT=0;
2822 		long unitsFlippedT=0;
2823 		long overlapsFlippedT=0;
2824 		long canonContradictoryOverlapsT=0;
2825 		long canonContradictoryClustersT=0;
2826 		long offsetContradictoryOverlapsT=0;
2827 		long offsetContradictoryClustersT=0;
2828 		long cycleOverlapsT=0;
2829 		long cycleClustersT=0;
2830 
2831 		private final boolean fixMultiJoinsT;
2832 		private final boolean canonicizeT;
2833 		private final boolean fixCanonContradictionsT;
2834 		private final boolean fixOffsetContradictionsT;
2835 		private final boolean mergeClustersT;
2836 		private final boolean mergeLeavesT;
2837 		private final boolean mergeInnerT;
2838 		private final BandedAligner bandy;
2839 	}
2840 
2841 
2842 	/**
2843 	 * @param cluster
2844 	 */
2845 	private void unvisit(ArrayList<Unit> cluster) {
2846 		for(Unit u : cluster){
2847 			if(u.visited()){u.setVisited(false);}
2848 		}
2849 	}
2850 
2851 	/**
2852 	 * @param cluster
2853 	 */
2854 	private void reorderClusterBreadthFirst(ArrayList<Unit> cluster) {
2855 		if(verbose){System.err.println("reorderClusterBreadthFirst");}
2856 
2857 		final int size=cluster.size();
2858 		Shared.sort(cluster); //Now it is in descending length
2859 		final Unit root=cluster.get(0);
2860 		assert(root.length()>=cluster.get(size-1).length()) : root.length()+", "+cluster.get(size-1).length()+", "+root.compareTo(cluster.get(size-1));
2861 
2862 		ArrayList<Unit> breadthFirst=new ArrayList<Unit>(cluster.size());
2863 		root.setVisited(true);
2864 //		System.err.println("root="+root.name());
2865 		breadthFirst.add(root);
2866 		for(int i=0; i<breadthFirst.size(); i++){
2867 			Unit u=breadthFirst.get(i);
2868 			Shared.sort(u.overlapList); //Sorted in descending overlap length
2869 			if(u.overlapList!=null){
2870 				for(Overlap o : u.overlapList){
2871 					if(!o.u1.visited()){
2872 						//					System.err.println("Visiting "+o.u1.name());
2873 						o.u1.setVisited(true);
2874 						breadthFirst.add(o.u1);
2875 					}
2876 					if(!o.u2.visited()){
2877 						//					System.err.println("Visiting "+o.u2.name());
2878 						o.u2.setVisited(true);
2879 						breadthFirst.add(o.u2);
2880 					}
2881 					//				System.err.println("***");
2882 					//				System.err.println(toShortString(breadthFirst));
2883 				}
2884 			}
2885 		}
2886 		for(Unit u : cluster){
2887 			assert(u.visited());
2888 			if(u.visited()){u.setVisited(false);}
2889 			if(EA){
2890 				if(u.overlapList!=null){
2891 					for(Overlap o : u.overlapList){assert(!o.visited());}
2892 				}
2893 			}
2894 		}
2895 //		System.err.println("***");
2896 //		System.err.println("Final:");
2897 //		System.err.println(toShortString(breadthFirst));
2898 		assert(cluster.size()==breadthFirst.size());
2899 		cluster.clear();
2900 		cluster.addAll(breadthFirst);
2901 	}
2902 
2903 
2904 
2905 	/** Returns next cluster larger than 1 element.
2906 	 * Singleton clusters are added directly to 'processed'. */
2907 	private ArrayList<Unit> nextCluster(){
2908 		synchronized(clusterQueue){
2909 			ArrayList<Unit> cluster=clusterQueue.poll();
2910 			assert(cluster==null || cluster.size()>1);
2911 //			while(cluster!=null && cluster.size()<2){
2912 ////				unmark(cluster);
2913 //				processedClustersT.add(cluster);
2914 //				cluster=clusterQueue.poll();
2915 //			}
2916 			return cluster;
2917 		}
2918 	}
2919 
2920 
2921 	/**
2922 	 * Creates Unit objects or uses ones already attached to reads.
2923 	 * Places them in local storage and percolates them to shared storage (codeMap), removing exact duplicates.
2924 	 * Also hashes tips and places these in shared affixMap.
2925 	 * Looks for containments in the affix map.
2926 	 * @author Brian Bushnell
2927 	 * @date Jul 24, 2013
2928 	 *
2929 	 */
2930 	private final class HashThread extends Thread{
2931 
2932 		public HashThread(boolean addToCodeMap_, boolean addToAffixMap_, boolean findMatches_, boolean findContainments_, boolean findOverlaps_){
2933 			addToCodeMapT=addToCodeMap_;
2934 			addToAffixMapT=addToAffixMap_;
2935 			findContainmentsT=findContainments_;
2936 			findOverlapsT=findOverlaps_;
2937 			findMatchesT=findMatches_;
2938 			tid=getTid();
2939 			crisq=new ArrayDeque<ConcurrentReadInputStream>(crisa.length);
2940 			for(int i=0; i<crisa.length; i++){
2941 //				if(verbose){System.err.println("Adding to crisq.");}
2942 				crisq.add(crisa[(i+tid)%crisa.length]);
2943 			}
2944 			bandy=(maxEdits>0 && (findOverlapsT || findContainmentsT) ? BandedAligner.makeBandedAligner(bandwidth) : null);
2945 
2946 //			assert(addToCodeMapT) : "addToCodeMapT="+addToCodeMapT+", addToAffixMapT="+addToAffixMapT+", findContainmentsT="+findContainmentsT+
2947 //			", findOverlapsT="+findOverlapsT+", findMatchesT="+findMatchesT+", convertToUpperCaseT="+convertToUpperCaseT+", numAffixMaps="+numAffixMaps;
2948 		}
2949 
2950 		@Override
2951 		public void run(){
2952 
2953 			ConcurrentReadInputStream cris=crisq.poll();
2954 
2955 			while(cris!=null){
2956 				ListNum<Read> ln=cris.nextList();
2957 				ArrayList<Read> reads=(ln!=null ? ln.list : null);
2958 				//			long xx=0;
2959 				while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
2960 
2961 					for(Read r : reads){
2962 						processReadOuter(r);
2963 					}
2964 
2965 					if(codeMapT!=null && (codeMapT.size()>threadMaxReadsToBuffer || basesStoredT>threadMaxBasesToBuffer)){
2966 						assert(addToCodeMapT);
2967 						long added=mergeMaps();
2968 						addedToMainT+=added;
2969 					}
2970 
2971 					cris.returnList(ln);
2972 					ln=cris.nextList();
2973 					reads=(ln!=null ? ln.list : null);
2974 				}
2975 				cris.returnList(ln);
2976 				if(codeMapT!=null && !codeMapT.isEmpty()){
2977 					long added=mergeMaps();
2978 					addedToMainT+=added;
2979 				}
2980 				cris=crisq.poll();
2981 			}
2982 
2983 			codeMapT=null;
2984 			localConflictList=null;
2985 			sharedConflictList=null;
2986 		}
2987 
2988 		/** Return true if this read was a member of this subset. */
2989 		private boolean processReadOuter(Read r1){
2990 			if(r1.length()<MINSCAF){return false;}
2991 			Read r2=r1.mate;
2992 
2993 			assert(r1.pairnum()==0);
2994 			assert(r2==null || r2.pairnum()==1);
2995 
2996 			if(!addToCodeMapT && r1.obj==null){
2997 				if(r1.bases!=null && r1.length()>=MINSCAF){
2998 					final Unit u=(r1.obj!=null ? (Unit)r1.obj : new Unit(r1));
2999 					assert(u.r==r1 && (r1.obj==u || r1.obj==null));
3000 					final long code=u.code1;
3001 					r1.obj=u;
3002 					assert(u.r==r1 && r1.obj==u);
3003 					if(r2!=null && r2.obj==null){r2.obj=new Unit(r2);}
3004 
3005 					//Check for subset membership
3006 					final boolean inSet=u.inSet();
3007 					if(inSet){
3008 						final Long codeL=code;
3009 						ArrayList<Unit> list=codeMap.get(codeL);
3010 						boolean found=false;
3011 						for(Unit u0 : list){
3012 							//Replace with existing read
3013 							if(u0.equals(u) && u0.r.numericID==r1.numericID){
3014 								r1=u0.r;
3015 								r2=r1.mate;
3016 								found=true;
3017 								break;
3018 							}
3019 						}
3020 						assert(list!=null);
3021 						if(!found){
3022 							return false;
3023 						}
3024 					}
3025 				}
3026 			}
3027 			boolean b=processRead(r1);
3028 			if(r2!=null){processRead(r2);}
3029 			return b;
3030 		}
3031 
3032 		/** Return true if this read was a member of this subset. */
3033 		private boolean processRead(Read r){
3034 			if(r.length()<MINSCAF){return false;}
3035 
3036 			final boolean inSet;
3037 			if(!storeName){r.id=null;}
3038 			if(!storeQuality){r.quality=null;}
3039 
3040 			if(forceTrimLeft>0 || forceTrimRight>0){//Added at request of RQC team
3041 				if(r!=null && r.length()>0){
3042 					TrimRead.trimToPosition(r, forceTrimLeft>0 ? forceTrimLeft : 0, forceTrimRight>0 ? forceTrimRight : r.length(), 1);
3043 				}
3044 			}
3045 			if(qTrimLeft || qTrimRight){
3046 				TrimRead.trimFast(r, qTrimLeft, qTrimRight, trimQ, trimE, 0);
3047 			}
3048 			if(r.length()<MINSCAF){return false;}
3049 
3050 			readsProcessedT++;
3051 			basesProcessedT+=r.length();
3052 
3053 			final Unit u=(r.obj!=null ? (Unit)r.obj : new Unit(r));
3054 			assert(u.r==r && (r.obj==u || r.obj==null));
3055 			final long code=u.code1;
3056 
3057 			//Check for subset membership
3058 			inSet=u.inSet();
3059 
3060 			r.obj=u;
3061 			assert(u.r==r && r.obj==u);
3062 			if(r.mate!=null && r.mate.obj==null){r.mate.obj=new Unit(r.mate);}
3063 
3064 			if(verbose){System.err.println("Generated "+code+" for sequence "+u.name()+"\t"+new String(r.bases, 0, Tools.min(40, r.length())));}
3065 
3066 			if(addToCodeMapT && inSet){
3067 				final Long codeL=code;
3068 				ArrayList<Unit> list=codeMapT.get(codeL);
3069 				if(list==null){
3070 					if(verbose){System.err.println("Unique.");}
3071 					list=new ArrayList<Unit>(1);
3072 					list.add(u);
3073 					basesStoredT+=r.length();
3074 					codeMapT.put(codeL, list);
3075 				}else{
3076 					if(verbose){System.err.println("Exists.");}
3077 					boolean match=false;
3078 					if(findMatchesT){
3079 						for(Unit u2 : list){
3080 							if(pairedEqualsRC(u, u2)){
3081 //								if(u.r.mate!=null){
3082 //									verbose=true;
3083 //
3084 //									Unit um=(Unit)u.r.mate.obj;
3085 //									Unit u2m=(Unit)u2.r.mate.obj;
3086 //
3087 //									if(verbose){
3088 //										System.err.println("********");
3089 //										System.err.println(u.r.toFastq());
3090 //										System.err.println(u.r.mate.toFastq());
3091 //										System.err.println("********");
3092 //										System.err.println(u2.r.toFastq());
3093 //										System.err.println(u2.r.mate.toFastq());
3094 //										System.err.println("********");
3095 //										System.err.println(u);
3096 //										System.err.println(u2);
3097 //										System.err.println(um);
3098 //										System.err.println(u2m);
3099 //										System.err.println("********");
3100 //										System.err.println(u.equals(u2));
3101 //										System.err.println(u.compareTo(u2));
3102 //										System.err.println("********");
3103 //										System.err.println(um.equals(u2m));
3104 //										System.err.println(um.compareTo(u2m));
3105 //										System.err.println("********");
3106 //									}
3107 //
3108 //									verbose=false;
3109 //								}
3110 								assert(u.r.mate==null || pairedEqualsRC((Unit)u.r.mate.obj, (Unit)u2.r.mate.obj)) :
3111 									u.r.toFastq()+"\n"+u2.r.toFastq()+"\n"+u.r.mate.toFastq()+"\n"+u2.r.mate.toFastq()+
3112 									"\n"+u+"\n"+u2+"\n"+u.r.mate.obj+"\n"+u2.r.mate.obj;
3113 //								if(verbose){System.err.println("Matches "+new String(r2.bases, 0, Tools.min(40, r2.length())));}
3114 								match=true;
3115 								u2.absorbMatch(u);
3116 								if(UNIQUE_ONLY){
3117 									synchronized(u2){
3118 										if(u2.valid()){
3119 											matchesT++;
3120 											baseMatchesT+=u2.length();
3121 											u2.setValid(false);
3122 											addDupe(u2.r);
3123 										}
3124 									}
3125 								}
3126 								break;
3127 							}
3128 						}
3129 					}
3130 					if(match){
3131 						addDupe(r);
3132 						matchesT++;
3133 						baseMatchesT+=r.length();
3134 						//							if(verbose){System.err.println("matchesT="+matchesT+", baseMatchesT="+baseMatchesT);}
3135 					}else{
3136 						collisionsT++;
3137 						if(verbose){System.err.println("False collision; count = "+collisionsT);}
3138 						list.add(u);
3139 						basesStoredT+=r.length();
3140 					}
3141 				}
3142 			}
3143 
3144 			if(findContainmentsT){
3145 				int x=findContainments(u);
3146 			}
3147 
3148 			if(findOverlapsT){
3149 				int x=findOverlaps(u);
3150 			}
3151 
3152 			return inSet;
3153 		}
3154 
3155 		private int findContainments(final Unit u){
3156 			if(minLengthPercent<=0 && maxSubs<=0 && minIdentity>=100 && !u.valid()){return 0;}
3157 			final byte[] bases=u.bases();
3158 			final int shift=2*k;
3159 			final int shift2=shift-2;
3160 			final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
3161 			long kmer=0;
3162 			long rkmer=0;
3163 			int hits=0;
3164 			int currentContainments=0;
3165 			int len=0;
3166 
3167 			if(bases==null || bases.length<k){return -1;}
3168 			final LongM key=new LongM();
3169 
3170 			for(int i=0; i<bases.length; i++){
3171 				byte b=bases[i];
3172 				long x=baseToNumber[b];
3173 				long x2=baseToComplementNumber[b];
3174 				kmer=((kmer<<2)|x)&mask;
3175 				rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
3176 				if(HASH_NS || AminoAcid.isFullyDefined(b)){len++;}
3177 				else{len=0; rkmer=0;}
3178 //				if(verbose){System.err.println("Scanning i="+i+", kmer="+kmer+", rkmer="+rkmer+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
3179 				if(len>=k){
3180 					key.set(Tools.max(kmer, rkmer)); //Canonical
3181 					for(int am=0; am<affixMaps.length; am++){
3182 						ArrayList<Unit> list=affixMaps[am].get(key);
3183 						if(list!=null){
3184 							for(Unit u2 : list){
3185 								if(u!=u2 && !u.equals(u2)){
3186 									if(u2.valid()){
3187 										hits++;
3188 										if(verbose){
3189 											System.err.println("\nFound potential containment at am="+am+", i="+i+", key="+key.value()+
3190 													", pre="+Arrays.toString(u2.prefixes)+", suf="+Arrays.toString(u2.suffixes)+
3191 													", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i, k)));
3192 										}
3193 										if(u.contains(u2, i, key, bandy, am)){
3194 											synchronized(u2){
3195 												if(u2.valid()){
3196 													currentContainments++;
3197 													baseContainmentsT+=u2.length();
3198 													u2.setValid(false);
3199 													addDupe(u2.r);
3200 												}
3201 											}
3202 											if(UNIQUE_ONLY){
3203 												synchronized(u){
3204 													if(u.valid()){
3205 														currentContainments++;
3206 														baseContainmentsT+=u.length();
3207 														u.setValid(false);
3208 														addDupe(u.r);
3209 													}
3210 												}
3211 											}
3212 
3213 											if(verbose){System.err.println("Added containment "+u2);}
3214 										}
3215 									}
3216 								}
3217 							}
3218 						}
3219 					}
3220 				}
3221 			}
3222 //			assert(false) : hits+", "+currentContainments+", "+baseContainments+"\n"+containmentMapT+"\n";
3223 
3224 			containmentCollisionsT+=(hits-currentContainments);
3225 //			outstream.println("hits="+hits+", currentContainments="+currentContainments);
3226 			containmentsT+=currentContainments;
3227 			return hits;
3228 		}
3229 
3230 		private int findOverlaps(final Unit u){
3231 //			if(minLengthPercent<=0 && maxSubs<=0 && minIdentity>=100 && !u.valid()){return 0;}
3232 //			if(u.overlapList!=null){u.overlapList.clear();}
3233 			final byte[] bases=u.bases();
3234 			final int shift=2*k;
3235 			final int shift2=shift-2;
3236 			final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
3237 			long kmer=0;
3238 			long rkmer=0;
3239 			int hits=0;
3240 			int currentOverlaps=0;
3241 			int len=0;
3242 
3243 			if(bases==null || bases.length<k){return -1;}
3244 			final LongM key=new LongM();
3245 
3246 			boolean quit=false;
3247 
3248 			for(int i=0; i<bases.length && !quit; i++){
3249 				byte b=bases[i];
3250 				long x=baseToNumber[b];
3251 				long x2=baseToComplementNumber[b];
3252 				kmer=((kmer<<2)|x)&mask;
3253 				rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
3254 				if(HASH_NS || AminoAcid.isFullyDefined(b)){len++;}
3255 				else{len=0; rkmer=0;}
3256 //				if(verbose){System.err.println("Scanning i="+i+", kmer="+kmer+", rkmer="+rkmer+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
3257 				if(len>=k){//valid key
3258 					key.set(Tools.max(kmer, rkmer)); //Canonical key
3259 					for(int am=0; am<affixMaps.length; am++){
3260 						ArrayList<Unit> list=affixMaps[am].get(key);
3261 						if(list!=null){//found a key collision
3262 							for(Unit u2 : list){
3263 								if(quit){break;}//too many edges
3264 								int u1cluster=-1, u2cluster=-2;
3265 								if(preventTransitiveOverlaps && u!=u2){
3266 									u1cluster=u.determineCluster();
3267 									u2cluster=u2.determineCluster();
3268 								}
3269 								if(u1cluster!=u2cluster && u!=u2 && !u.equals(u2) && u2.r!=u.r.mate){//TODO:  Not sure why identical things are banned...  possibly relates to avoiding inter-pair edges?
3270 									if(u2.valid()){
3271 										hits++;
3272 
3273 //										boolean flag=(u.code1==-3676200394282040623L && u2.code1==-7034423913727372751L) ||
3274 //												(u2.code1==-3676200394282040623L && u.code1==-7034423913727372751L);
3275 										final boolean flag=false;
3276 										if(verbose || flag){
3277 											System.err.println("\nFound potential overlap at am="+am+", i="+i+", key="+key.value()+
3278 													", pre="+Arrays.toString(u2.prefixes)+", suf="+Arrays.toString(u2.suffixes)+
3279 													", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i, k)));
3280 										}
3281 
3282 										final Overlap o;
3283 										if(maxEdges>1000000000 || u.overlapList==null || u2.overlapList==null ||
3284 												(u.overlapList.size()<maxEdges && u2.overlapList.size()<maxEdges2)){
3285 											 o=u.makeOverlap(u2, i, key, bandy, am);
3286 
3287 										}else{
3288 											o=null;
3289 											if(u.overlapList.size()>maxEdges){quit=true;}
3290 										}
3291 										if(o!=null){
3292 
3293 											if(preventTransitiveOverlaps){
3294 												 mergeClusterIds(u1cluster, u2cluster);
3295 											}
3296 
3297 											assert(o.test(bandy, o.edits+maxEdits)) : o;
3298 											if(verbose || flag){System.err.println("Created overlap "+o);}
3299 
3300 											long comp=u.length()-u2.length();
3301 											if(comp==0){comp=u.code1-u2.code1;}
3302 											if(comp==0){comp=u.code2-u2.code2;}
3303 											if(comp==0){comp=u.prefixes[0]-u2.prefixes[0];}
3304 											if(comp==0){comp=u.suffixes[0]-u2.suffixes[0];}
3305 											if(comp==0){comp=(u.r.numericID-u2.r.numericID);}
3306 											assert(comp!=0) : u+", "+u2;
3307 											Unit ua=(comp<0 ? u : u2);
3308 											Unit ub=(comp<0 ? u2 : u);
3309 											assert(ua!=ub);
3310 											if(verbose || flag){
3311 												System.err.println("ua="+ua.code1);
3312 												System.err.println("ub="+ub.code1);
3313 												System.err.println("u ="+u.code1);
3314 												System.err.println("u2="+u2.code1);
3315 												System.err.println("u.r ="+u.r.numericID);
3316 												System.err.println("u2.r="+u2.r.numericID);
3317 												System.err.println("ua contains o? "+ua.alreadyHas(o));
3318 												System.err.println("ub contains o? "+ub.alreadyHas(o));
3319 												System.err.println("ua.list="+ua.overlapList);
3320 												System.err.println("ub.list="+ub.overlapList);
3321 											}
3322 
3323 //											assert(ua.alreadyHas(o)==ub.alreadyHas(o));
3324 
3325 											final boolean uaContainedOverlap;
3326 
3327 											synchronized(ua){
3328 												if(ua.overlapList==null){ua.overlapList=new ArrayList<Overlap>(2);}
3329 												if(!ua.overlapList.contains(o)){
3330 													if(EA){
3331 														synchronized(ub){
3332 															assert(ub.overlapList==null || !ub.overlapList.contains(o)) :
3333 																ua.alreadyHas(o)+", "+ub.alreadyHas(o)+"\n"+o+"\n"+ub.overlapList.get(ub.overlapList.indexOf(o))+
3334 																"\nua.list="+ua.overlapList+"\nub.list="+ub.overlapList+"\nu.code1="+u.code1+"\nu2.code1="+u2.code1;
3335 														}
3336 													}
3337 													currentOverlaps++;
3338 													baseOverlapsT+=o.overlapLen;
3339 													ua.overlapList.add(o);
3340 													if(verbose || flag){System.err.println("Added overlap "+o);}
3341 													uaContainedOverlap=false;
3342 												}else{
3343 													if(verbose || flag){System.err.println("Already contained overlap "+o);}
3344 													hits--;
3345 													uaContainedOverlap=true;
3346 												}
3347 											}
3348 
3349 											if(!uaContainedOverlap){
3350 												synchronized(ub){
3351 													if(ub.overlapList==null){ub.overlapList=new ArrayList<Overlap>(2);}
3352 													assert(!ub.overlapList.contains(o));
3353 													ub.overlapList.add(o);
3354 													if(verbose || flag){System.err.println("Added overlap "+o);}
3355 												}
3356 											}else{
3357 												if(verbose || flag){System.err.println("Already contained overlap "+o);}
3358 											}
3359 
3360 
3361 //											assert(ua.alreadyHas(o));
3362 //											assert(ub.alreadyHas(o));
3363 //											assert(ua.overlapList.contains(o));
3364 //											assert(ub.overlapList.contains(o));
3365 											if(verbose || flag){
3366 												System.err.println("ua contains o? "+ua.alreadyHas(o));
3367 												System.err.println("ub contains o? "+ub.alreadyHas(o));
3368 												System.err.println("ua.list="+ua.overlapList);
3369 												System.err.println("ub.list="+ub.overlapList);
3370 											}
3371 										}
3372 									}
3373 								}
3374 							}
3375 						}
3376 					}
3377 				}
3378 			}
3379 			if(EA){
3380 				synchronized(u){
3381 					if(u.overlapList!=null && u.overlapList.isEmpty()){
3382 						assert(false) : "Why would this happen?";
3383 						u.overlapList=null;
3384 					}
3385 				}
3386 			}
3387 //			assert(false) : hits+", "+currentOverlaps+", "+baseOverlaps+"\n"+overlapMapT+"\n";
3388 
3389 //			assert(hits==currentOverlaps) : hits+", "+currentOverlaps;
3390 
3391 			overlapCollisionsT+=(hits-currentOverlaps);
3392 //			outstream.println("hits="+hits+", currentOverlaps="+currentOverlaps);
3393 			overlapsT+=currentOverlaps;
3394 			return hits;
3395 		}
3396 
3397 		/** Insert reads processed by a thread into the shared code and affix maps.
3398 		 * If operating in subset mode, only store reads with code equal to subset mod subsetCount. */
3399 		private long mergeMaps(){
3400 			if(verbose){System.err.println("Merging maps.");}
3401 			long novelReads=0, novelKeys=0;
3402 			long collisionReads=0;
3403 			long mergedReads=0;
3404 
3405 			assert(localConflictList.isEmpty());
3406 			assert(sharedConflictList.isEmpty());
3407 
3408 			synchronized(codeMap){
3409 				for(Long key : codeMapT.keySet()){
3410 					if(codeMap.containsKey(key)){
3411 						localConflictList.add(codeMapT.get(key));
3412 						sharedConflictList.add(codeMap.get(key));
3413 					}else{
3414 						ArrayList<Unit> list=codeMapT.get(key);
3415 						codeMap.put(key, list);
3416 						addedList.addAll(list);
3417 						novelReads+=list.size();
3418 						novelKeys++;
3419 					}
3420 				}
3421 			}
3422 
3423 			if(verbose){System.err.println("Novel reads = "+novelReads+", conflicts = "+localConflictList.size());}
3424 
3425 			for(int i=0; i<localConflictList.size(); i++){
3426 				ArrayList<Unit> listT=localConflictList.get(i);
3427 				ArrayList<Unit> list=sharedConflictList.get(i);
3428 				synchronized(list){
3429 					for(Unit u : listT){
3430 						if(verbose){System.err.println("Processing novel unit "+u.name());}
3431 						boolean match=false;
3432 						if(findMatchesT){
3433 							for(Unit u2 : list){
3434 								if(pairedEqualsRC(u, u2)){
3435 									//								if(verbose){System.err.println("Matches "+new String(r2.bases, 0, Tools.min(40, r2.length())));}
3436 									u2.absorbMatch(u);
3437 									if(UNIQUE_ONLY){
3438 										synchronized(u2){
3439 											if(u2.valid()){
3440 												mergedReads++;
3441 												baseMatchesT+=u2.length();
3442 												u2.setValid(false);
3443 												addDupe(u2.r);
3444 											}
3445 										}
3446 									}
3447 									match=true;
3448 									break;
3449 								}
3450 							}
3451 						}
3452 						if(match){
3453 							addDupe(u.r);
3454 							mergedReads++;
3455 							baseMatchesT+=u.length();
3456 							if(verbose){System.err.println("matchesT="+matchesT+", baseMatchesT="+baseMatchesT);}
3457 						}else{
3458 							collisionReads++;
3459 							if(verbose){System.err.println("False collision; count = "+collisionReads);}
3460 							list.add(u);
3461 							addedList.add(u);
3462 						}
3463 					}
3464 				}
3465 			}
3466 			matchesT+=mergedReads;
3467 			collisionsT+=collisionReads;
3468 			if(verbose){System.err.println("Done Merging.");}
3469 			if(verbose){System.err.println("mapT.size="+codeMapT.size()+", basesStoredT="+basesStoredT);}
3470 
3471 			codeMapT.clear();
3472 			localConflictList.clear();
3473 			sharedConflictList.clear();
3474 
3475 			if(!addedList.isEmpty()){
3476 				if(addToAffixMapT){
3477 					final LongM p=new LongM(-1, true);
3478 					assert(affixMaps!=null);
3479 					assert(affixMaps[0]!=null || (affixMaps.length>1 && affixMaps[1]!=null));
3480 
3481 					for(int i=0; i<numAffixMaps; i++){
3482 						HashMap<LongM, ArrayList<Unit>> map=affixMaps[i];
3483 						if(map!=null && (i>0 || !ignoreAffix1)){
3484 							synchronized(map){
3485 								for(Unit u : addedList){
3486 									final long prefix=u.prefixes[i], suffix=u.suffixes[i];
3487 									if(verbose){System.err.println("Processing affixes for "+u.name());}
3488 									if(prefix!=-1 || prefix!=suffix){
3489 										if(verbose){System.err.println("Using prefix "+prefix);}
3490 										p.set(prefix);
3491 										ArrayList<Unit> alu=map.get(p);
3492 										if(alu==null){
3493 											if(verbose){System.err.println("Made new alu for "+p);}
3494 											alu=new ArrayList<Unit>(2);
3495 											map.put(p.iCopy(), alu);
3496 										}
3497 										if(alu.size()<maxAffixCopies){
3498 											if(verbose){System.err.println("Added "+u.name());}
3499 											alu.add(u);
3500 										}
3501 										if(verbose){System.err.println(map.get(p));}
3502 									}
3503 									if(storeSuffix && prefix!=suffix){
3504 										if(verbose){System.err.println("Using suffix "+suffix);}
3505 										p.set(suffix);
3506 										ArrayList<Unit> alu=map.get(p);
3507 										if(alu==null){
3508 											if(verbose){System.err.println("Made new alu for "+p);}
3509 											alu=new ArrayList<Unit>(2);
3510 											map.put(p.iCopy(), alu);
3511 										}
3512 										if(alu.size()<maxAffixCopies){
3513 											if(verbose){System.err.println("Added "+u.name());}
3514 											alu.add(u);
3515 										}
3516 										if(verbose){System.err.println(map.get(p));}
3517 									}
3518 								}
3519 							}
3520 						}
3521 					}
3522 				}
3523 			}
3524 
3525 			addedList.clear();
3526 			basesStoredT=0;
3527 			return collisionReads+novelReads;
3528 		}
3529 
3530 		private int getTid(){
3531 			synchronized(HashThread.class){
3532 				int x=tcount;
3533 				tcount++;
3534 				return x;
3535 			}
3536 		}
3537 
3538 		private LinkedHashMap<Long, ArrayList<Unit>> codeMapT=new LinkedHashMap<Long, ArrayList<Unit>>(threadMaxReadsToBuffer*8);
3539 		private ArrayList<Unit> addedList=new ArrayList<Unit>(threadMaxReadsToBuffer);
3540 		private ArrayList<ArrayList<Unit>> localConflictList=new ArrayList<ArrayList<Unit>>(threadMaxReadsToBuffer);
3541 		private ArrayList<ArrayList<Unit>> sharedConflictList=new ArrayList<ArrayList<Unit>>(threadMaxReadsToBuffer);
3542 
3543 		long matchesT=0;
3544 		long baseMatchesT=0;
3545 		long baseContainmentsT=0;
3546 		long collisionsT=0;
3547 		long containmentsT=0;
3548 		long containmentCollisionsT=0;
3549 		long basesStoredT=0;
3550 		long addedToMainT=0;
3551 		long readsProcessedT=0;
3552 		long basesProcessedT=0;
3553 		long overlapsT=0;
3554 		long baseOverlapsT=0;
3555 		long overlapCollisionsT=0;
3556 
3557 		private final boolean addToCodeMapT;
3558 		private final boolean addToAffixMapT;
3559 		private final boolean findContainmentsT;
3560 		private final boolean findOverlapsT;
3561 		private final boolean findMatchesT;
3562 //		private final boolean convertToUpperCaseT;
3563 		private final int tid;
3564 		private final ArrayDeque<ConcurrentReadInputStream> crisq;
3565 		private final BandedAligner bandy;
3566 	}
3567 
3568 	public static boolean equalsRC(byte[] a, byte[] b){
3569 		if(a==b){return true;}
3570 		if(a==null || b==null){return false;}
3571 		if(a.length!=b.length){return false;}
3572 
3573 		boolean ca=isCanonical(a);
3574 		boolean cb=isCanonical(b);
3575 
3576 		if(ca==cb){
3577 			for(int i=0; i<a.length; i++){
3578 				final byte aa=a[i], bb=b[i];
3579 				if(aa!=bb){return false;}
3580 			}
3581 		}else{
3582 			for(int i=0, j=b.length-1; i<a.length; i++, j--){
3583 				final byte aa=a[i], bb=baseToComplementExtended[b[j]];
3584 				if(aa!=bb){return false;}
3585 			}
3586 		}
3587 		return true;
3588 	}
3589 
3590 	public static boolean pairedEqualsRC(Unit ua, Unit ub){
3591 		if(verbose){System.err.println("pairedEqualsRC("+ua.name()+", "+ub.name()+")");}
3592 		if(verbose){System.err.println("ea");}
3593 		boolean b=equalsRC(ua, ub);
3594 		if(verbose){System.err.println("eb");}
3595 		if(!b){return false;}
3596 		if(verbose){System.err.println("ec");}
3597 
3598 		if(ua.r!=null && ub.r!=null){
3599 			if(verbose){System.err.println("ed");}
3600 			assert((ua.r.mate==null)==(ub.r.mate==null));
3601 			if(verbose){System.err.println("ee");}
3602 			if(ua.r.mate!=null && ub.r.mate!=null){
3603 				if(verbose){System.err.println("ef");}
3604 				return ua.canonical()==ub.canonical() && ua.r.pairnum()==ub.r.pairnum() && Tools.compare(ua.r.mate.bases, ub.r.mate.bases)==0;
3605 			}
3606 			if(verbose){System.err.println("eg");}
3607 		}
3608 		if(verbose){System.err.println("eh");}
3609 		return true;
3610 	}
3611 
3612 	private static boolean equalsRC(Unit ua, Unit ub){
3613 		if(verbose){System.err.println("equalsRC("+ua.name()+", "+ub.name()+")");}
3614 		return ua.code1==ub.code1 && ua.code2==ub.code2 && (ua.canonical()==ub.canonical() ? (ua.prefixes[0]==ub.prefixes[0] && ua.suffixes[0]==ub.suffixes[0]) :
3615 			 (ua.prefixes[0]==ub.suffixes[0] && ua.suffixes[0]==ub.prefixes[0])) && compareRC(ua, ub)==0;
3616 	}
3617 
3618 	public static int comparePairedRC(Unit ua, Unit ub){
3619 		if(verbose){System.err.println("comparePairedRC("+ua.name()+", "+ub.name()+")");}
3620 		int x=compareRC(ua, ub);
3621 		if(x!=0){return x;}
3622 
3623 		if(ua.r!=null && ub.r!=null && ua.r.mate!=null && ub.r.mate!=null){
3624 			if(ua.r.pairnum()!=ub.r.pairnum()){return ua.r.pairnum()-ub.r.pairnum();}
3625 			return compareRC((Unit)ua.r.mate.obj, (Unit)ub.r.mate.obj);
3626 		}
3627 		return 0;
3628 	}
3629 
3630 	//TODO
3631 	//This is really for sorting by length.
3632 	private static int compareRC(Unit ua, Unit ub){
3633 		if(verbose){System.err.println("compareRC("+ua.name()+", "+ub.name()+")");}
3634 		if(ua==ub){return 0;}
3635 		if(verbose){System.err.println("a");}
3636 		if(verbose){System.err.println("a1");}
3637 		if(ua.length()!=ub.length()){return ub.length()-ua.length();}
3638 		if(verbose){System.err.println("a2");}
3639 
3640 		if(REQUIRE_MATCHING_NAMES){
3641 			if(ua.name()!=null && ub.name()!=null){
3642 				int x=ua.name().compareTo(ub.name());
3643 				if(x!=0){return x;}
3644 			}
3645 		}
3646 		if(verbose){System.err.println("a3");}
3647 
3648 		if(ua.r==null || ub.r==null){
3649 			if(verbose){System.err.println("b");}
3650 			if(verbose){System.err.println("b1");}
3651 			if(ua.canonical()){
3652 				if(verbose){System.err.println("c");}
3653 				if(ub.canonical()){
3654 					if(ua.prefixes[0]!=ub.prefixes[0]){return ua.prefixes[0]>ub.prefixes[0] ? 1 : -1;}
3655 					if(ua.suffixes[0]!=ub.suffixes[0]){return ua.suffixes[0]>ub.suffixes[0] ? 1 : -1;}
3656 				}else{
3657 					if(ua.prefixes[0]!=ub.suffixes[0]){return ua.prefixes[0]>ub.suffixes[0] ? 1 : -1;}
3658 					if(ua.suffixes[0]!=ub.prefixes[0]){return ua.suffixes[0]>ub.prefixes[0] ? 1 : -1;}
3659 				}
3660 			}else{
3661 				if(verbose){System.err.println("d");}
3662 				if(ub.canonical()){
3663 					if(ua.suffixes[0]!=ub.prefixes[0]){return ua.suffixes[0]>ub.prefixes[0] ? 1 : -1;}
3664 					if(ua.prefixes[0]!=ub.suffixes[0]){return ua.prefixes[0]>ub.suffixes[0] ? 1 : -1;}
3665 				}else{
3666 					if(ua.suffixes[0]!=ub.suffixes[0]){return ua.suffixes[0]>ub.suffixes[0] ? 1 : -1;}
3667 					if(ua.prefixes[0]!=ub.prefixes[0]){return ua.prefixes[0]>ub.prefixes[0] ? 1 : -1;}
3668 				}
3669 			}
3670 			if(verbose){System.err.println("e");}
3671 			if(ua.code1!=ub.code1){return ua.code1>ub.code1 ? 1 : -1;}
3672 			if(ua.code2!=ub.code2){return ua.code2>ub.code2 ? 1 : -1;}
3673 
3674 			return ua.pairnum()-ub.pairnum();
3675 		}
3676 		if(verbose){System.err.println("f");}
3677 		final byte[] a=ua.r.bases, b=ub.r.bases;
3678 		if(a==b){return 0;}
3679 		if(a==null || b==null){return a==null ? -1 : 1;}
3680 		if(verbose){System.err.println("g");}
3681 
3682 		if(ua.canonical()==ub.canonical()){
3683 			if(verbose){System.err.println("h");}
3684 			if(ua.canonical() && ub.canonical()){
3685 				for(int i=0; i<a.length; i++){
3686 					final byte aa=a[i], bb=b[i];
3687 					if(aa!=bb){return aa-bb;}
3688 				}
3689 			}else{
3690 				for(int i=a.length-1; i>=0; i--){
3691 					final byte aa=baseToComplementExtended[a[i]], bb=baseToComplementExtended[b[i]];
3692 					if(aa!=bb){return aa-bb;}
3693 				}
3694 			}
3695 		}else{
3696 			if(verbose){System.err.println("i");}
3697 			if(ua.canonical()){
3698 				for(int i=0, j=b.length-1; i<a.length; i++, j--){
3699 					final byte aa=a[i], bb=baseToComplementExtended[b[j]];
3700 					if(aa!=bb){return aa-bb;}
3701 				}
3702 			}else{
3703 				for(int i=a.length-1, j=0; i>=0; i--, j++){
3704 					final byte aa=baseToComplementExtended[a[i]], bb=b[j];
3705 					if(aa!=bb){return aa-bb;}
3706 				}
3707 			}
3708 		}
3709 
3710 		if(verbose){System.err.println("j");}
3711 		return ua.pairnum()-ub.pairnum();
3712 	}
3713 
3714 	private static long hashTip(byte[] bases, boolean prefix, int k, int skipInitialBases){
3715 		if(bases==null || bases.length<k){return -1;}
3716 
3717 		final int shift=2*k;
3718 		final int shift2=shift-2;
3719 		final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
3720 		long kmer=0;
3721 		long rkmer=0;
3722 		int len=0;
3723 
3724 		final int start=(prefix ? 0+skipInitialBases : bases.length-k-skipInitialBases);
3725 		final int stop=start+k;
3726 
3727 //		if(verbose){
3728 //			System.err.println("\n"+new String(bases));
3729 //			System.err.println("prefix="+prefix+", start="+start+", stop="+stop);
3730 ////			System.err.print(new String(bases));
3731 //		}
3732 		for(int i=start; i<stop; i++){
3733 			byte b=bases[i];
3734 //			if(verbose){System.err.print((char)b);}
3735 			long x=baseToNumber[b];
3736 			long x2=baseToComplementNumber[b];
3737 			kmer=((kmer<<2)|x)&mask;
3738 			rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
3739 			len++;
3740 		}
3741 		if(verbose){System.err.println(new String(bases, start, k)+" = "+Tools.max(kmer, rkmer));}
3742 		assert(len==k) : len+","+k;
3743 		return Tools.max(kmer, rkmer);
3744 	}
3745 
3746 	private static final int calcMaxEdits(int maxEdits, float minIdentityMult, int len){
3747 		return minIdentityMult==0 ? maxEdits : Tools.max(maxEdits, (int)Math.round(len*minIdentityMult));
3748 	}
3749 
3750 
3751 	private class Overlap implements Comparable<Overlap>{
3752 
3753 		public Overlap(Unit u1_, Unit u2_, int type_, int start1_, int start2_, int stop1_, int stop2_, int len_, int mismatches_, int edits_, BandedAligner bandy){
3754 			assert(u1_!=u2_);
3755 			if(verbose){System.err.println("\nCreating an overlap.");}
3756 			u1=u1_;
3757 			u2=u2_;
3758 			type=type_;
3759 			start1=start1_;
3760 			start2=start2_;
3761 			stop1=stop1_;
3762 			stop2=stop2_;
3763 			overlapLen=len_;
3764 			mismatches=mismatches_;
3765 			edits=edits_;
3766 
3767 			assert(Tools.absdif(Tools.absdif(start1, stop1), Tools.absdif(start2, stop2))<=maxEdits) : "\n"+this+"\n>1\n"+new String(u1.r.bases)+"\n>2\n"+new String(u2.r.bases)
3768 				+"\n>1a\n"+new String(u1.r.bases, Tools.min(start1, stop1), Tools.max(start1, stop1)-Tools.min(start1, stop1)+1)
3769 				+"\n>2a\n"+new String(u2.r.bases, Tools.min(start2, stop2), Tools.max(start2, stop2)-Tools.min(start2, stop2)+1);
3770 
3771 			assert(start1>=0 && start1<=u1.length()) : "type "+type+": "+start1+", "+stop1+", "+u1.length()+", "+start2+", "+stop2+", "+u2.length();
3772 			assert(stop1>=0 && stop1<=u1.length()) :   "type "+type+": "+start1+", "+stop1+", "+u1.length()+", "+start2+", "+stop2+", "+u2.length();
3773 			assert(start2>=0 && start2<=u2.length()) : "type "+type+": "+start1+", "+stop1+", "+u1.length()+", "+start2+", "+stop2+", "+u2.length();
3774 			assert(stop2>=0 && stop2<=u2.length()) :   "type "+type+": "+start1+", "+stop1+", "+u1.length()+", "+start2+", "+stop2+", "+u2.length();
3775 
3776 			assert(type==FORWARD || type==FORWARDRC || type==REVERSE || type==REVERSERC);
3777 
3778 			if(verbose){System.err.println(this);}
3779 
3780 			assert(Tools.absdif(Tools.absdif(start1, stop1), Tools.absdif(start1, stop1))<=maxEdits);
3781 
3782 			assert(test(bandy, edits+maxEdits)) : "\n"+this+"\n>1\n"+new String(u1.r.bases)+"\n>2\n"+new String(u2.r.bases)
3783 				+"\n>1a\n"+new String(u1.r.bases, Tools.min(start1, stop1), Tools.max(start1, stop1)-Tools.min(start1, stop1)+1)
3784 				+"\n>2a\n"+new String(u2.r.bases, Tools.min(start2, stop2), Tools.max(start2, stop2)-Tools.min(start2, stop2)+1);
3785 			if(verbose){System.err.println("Passed test 1.");}
3786 
3787 //			bandy.verbose=true;
3788 //			test(bandy);
3789 //			assert(false);
3790 
3791 			assert(u1!=u2);
3792 			u1.firstInOverlap(u2);
3793 			u2.firstInOverlap(u1);
3794 			assert(u1.length()!=u2.length() || u1.code1!=u2.code1 || u1.code2!=u2.code2 || (u1.r!=null && u1.r.mate!=null)) : "Collision? \n"+this+"\n"+u1+"\n"+u2;
3795 			assert(u1.firstInOverlap(u2)!=u2.firstInOverlap(u1)) :
3796 				"\nu1.firstInOverlap(u2)="+u1.firstInOverlap(u2)+"\nu2.firstInOverlap(u1)="+u2.firstInOverlap(u1)+"\nu1="+u1+"\nu2="+u2;
3797 
3798 			if(!u1.firstInOverlap(u2)){
3799 				if(verbose){System.err.println("\nSwapping.");}
3800 				swap();
3801 				if(verbose){System.err.println(this);}
3802 
3803 				if(EA && !customBandwidth && !test(bandy, edits+maxEdits)){
3804 					System.err.println("\n"+this);
3805 					swap();
3806 					System.err.println("\n"+this);
3807 					assert(test(bandy, edits+maxEdits)) : "\n"+this+"\n>1\n"+new String(u1.r.bases)+"\n>2\n"+new String(u2.r.bases)+"\n";
3808 					System.err.println("Passed test 2a, "+bandy.lastEdits+" edits.\n");
3809 					swap();
3810 					System.err.println("\n"+this);
3811 					assert(test(bandy, edits+maxEdits)) : "\n"+this+"\n>1\n"+new String(u1.r.bases)+"\n>2\n"+new String(u2.r.bases)
3812 						+"\n>1a\n"+new String(u1.r.bases, Tools.min(start1, stop1), Tools.max(start1, stop1)-Tools.min(start1, stop1)+1)
3813 						+"\n>2a\n"+new String(u2.r.bases, Tools.min(start2, stop2), Tools.max(start2, stop2)-Tools.min(start2, stop2)+1);
3814 					System.err.println("Passed test 2b, "+bandy.lastEdits+" edits.\n");
3815 				}
3816 
3817 				assert(customBandwidth || test(bandy, edits+maxEdits)) : "\n"+this+"\n>1\n"+new String(u1.r.bases)+"\n>2\n"+new String(u2.r.bases)
3818 					+"\n>1a\n"+new String(u1.r.bases, Tools.min(start1, stop1), Tools.max(start1, stop1)-Tools.min(start1, stop1)+1)
3819 					+"\n>2a\n"+new String(u2.r.bases, Tools.min(start2, stop2), Tools.max(start2, stop2)-Tools.min(start2, stop2)+1);
3820 				if(verbose){System.err.println("Passed test 2.");}
3821 			}
3822 
3823 			if(type==REVERSE || type==REVERSERC){
3824 				if(verbose){System.err.println("\nReversing.");}
3825 				reverseDirection();
3826 				if(verbose){System.err.println(this);}
3827 
3828 				if(EA && !Shared.anomaly && !customBandwidth && bandy!=null && !test(bandy, edits+maxEdits)){
3829 					Shared.anomaly=true;
3830 					BandedAligner.verbose=true;
3831 					System.err.println("\n********** Failed test 3, "+bandy.lastEdits+" edits. ***************\n");
3832 					reverseDirection();
3833 					System.err.println(this);
3834 					assert(test(bandy, edits+maxEdits)) : "\n"+this+"\n>1\n"+new String(u1.r.bases)+"\n>2\n"+new String(u2.r.bases)+"\n";
3835 					System.err.println("Passed test 3a, "+bandy.lastEdits+" edits.\n");
3836 					reverseDirection();
3837 					System.err.println(this);
3838 					assert(test(bandy, edits+maxEdits)) : "\n"+this+"\n>1\n"+new String(u1.r.bases)+"\n>2\n"+new String(u2.r.bases)
3839 						+"\n>1a\n"+new String(u1.r.bases, Tools.min(start1, stop1), Tools.max(start1, stop1)-Tools.min(start1, stop1)+1)
3840 						+"\n>2a\n"+new String(u2.r.bases, Tools.min(start2, stop2), Tools.max(start2, stop2)-Tools.min(start2, stop2)+1);
3841 					System.err.println("Passed test 3b, "+bandy.lastEdits+" edits.\n");
3842 					BandedAligner.verbose=false;
3843 				}
3844 
3845 				assert(customBandwidth || test(bandy, edits+maxEdits)) : "\n"+this+"\n>1\n"+new String(u1.r.bases)+"\n>2\n"+new String(u2.r.bases)+"\n";
3846 				if(verbose){System.err.println("Passed test 3.");}
3847 			}
3848 			//Now all overlaps should be FORWARD or FORWARDRC and u1 should be at least as big as u2
3849 			assert(type==FORWARD || type==FORWARDRC);
3850 			assert(u1.length()>=u2.length());
3851 			assert(u1.firstInOverlap(u2));
3852 			assert(!u2.firstInOverlap(u1));
3853 			if(verbose){System.err.println("Finished overlap initialization.");}
3854 		}
3855 
3856 		public boolean test(BandedAligner bandy, int editLimit){
3857 			final int last1=u1.length()-1, last2=u2.length()-1;
3858 			if(verbose){System.err.println("Testing "+OVERLAP_TYPE_NAMES[type]+", "+start1+", "+start2);}
3859 			if(type==FORWARD){
3860 				assert(start1==0 || start2==0) : "start1="+start1+", stop1="+stop1+", last1="+last1+", start2="+start2+", stop2="+stop2+", last2="+last2;
3861 				if(start2==0){
3862 					if(verbose){System.err.println("A");}
3863 					return u1.overlapsForward(u2, start1, start2, bandy, false, editLimit);}
3864 				else{
3865 					if(verbose){System.err.println("B");}
3866 					return u2.overlapsForward(u1, start2, start1, bandy, false, editLimit);}
3867 			}
3868 			if(type==FORWARDRC){
3869 				assert(start1==0 || start2==last2) : "start1="+start1+", stop1="+stop1+", last1="+last1+", start2="+start2+", stop2="+stop2+", last2="+last2;
3870 				if(start2==last2){return u1.overlapsForwardRC(u2, start1, start2, bandy, false, editLimit);}
3871 				else{return u2.overlapsReverseRC(u1, start2, start1, bandy, false, editLimit);}
3872 			}
3873 			if(type==REVERSE){
3874 				assert(start1==last1 || start2==last2) : "start1="+start1+", stop1="+stop1+", last1="+last1+", start2="+start2+", stop2="+stop2+", last2="+last2;
3875 				if(start2==last2){return u1.overlapsReverse(u2, start1, start2, bandy, false, editLimit);}
3876 				else{return u2.overlapsReverse(u1, start2, start1, bandy, false, editLimit);}
3877 			}
3878 			if(type==REVERSERC){
3879 				assert(start1==last1 || start2==0) : "start1="+start1+", stop1="+stop1+", last1="+last1+", start2="+start2+", stop2="+stop2+", last2="+last2;
3880 				if(start2==0){return u1.overlapsReverseRC(u2, start1, start2, bandy, false, editLimit);}
3881 				else{return u2.overlapsForwardRC(u1, start2, start1, bandy, false, editLimit);}
3882 			}
3883 			throw new RuntimeException();
3884 		}
3885 
3886 		@Override
3887 		public boolean equals(Object o){
3888 			return equals((Overlap)o);
3889 		}
3890 
3891 		public boolean equals(Overlap o){
3892 			if(this==o){return true;}
3893 			assert(o!=null) : "*A*\n"+this+"\n"+o+"\n"+u1+"\n"+u2;
3894 			assert(u1!=null && u2!=null) : "*B*\n"+this+"\n"+o+"\n"+u1+"\n"+u2;
3895 			assert(u1!=o.u2 || u2!=o.u1) : "*C*\n"+this+"\n"+o+"\n"+u1.firstInOverlap(u2)+"\n"+o.u1.firstInOverlap(o.u2)+"\n"+u1+"\n"+u2;
3896 			return (u1==o.u1 && u2==o.u2 && type==o.type && start1==o.start1 && start2==o.start2 && stop1==o.stop1 && stop2==o.stop2)
3897 					;//|| (u1==o.u2 && u2==o.u1 && type==reverseType(o.type) && start1==o.start2 && start2==o.start1);
3898 		}
3899 
3900 //		public int compareTo(Overlap o){
3901 //			int a=compareTo2(o);
3902 //			int b=o.compareTo2(this);
3903 //			assert(a==-b) : "\n"+this+"\n"+o+"\na="+a+", b="+b+", equals="+this.equals(o)
3904 //				+"\nu1.compareTo(o.u1)="+u1.compareTo(o.u1)+"\no.u1.compareTo(u1)="+o.u1.compareTo(u1)
3905 //				+"\nu2.compareTo(o.u2)="+u2.compareTo(o.u2)+"\no.u2.compareTo(u2)="+o.u2.compareTo(u2);
3906 //			return a;
3907 //		}
3908 
3909 		@Override
3910 		public int compareTo(Overlap o){
3911 			int score1=overlapLen-50*(mismatches+edits);
3912 			int score2=o.overlapLen-50*(o.mismatches+o.edits);
3913 			if(score1!=score2){return score2-score1;}
3914 			if(overlapLen!=o.overlapLen){return o.overlapLen-overlapLen;}
3915 			int x=u1.compareTo(o.u1);
3916 			if(x!=0){return -x;}
3917 			x=u2.compareTo(o.u2);
3918 			if(x!=0){return -x;}
3919 			if(type!=o.type){return type-o.type;}
3920 			if((u1!=o.u1 || u2!=o.u2) && absorbMatch && !subsetMode){
3921 				boolean oldv=verbose;
3922 				verbose=true;
3923 				System.err.println(this);
3924 				System.err.println(o);
3925 				System.err.println("********");
3926 				System.err.println(u1);
3927 				System.err.println(u2);
3928 				System.err.println(o.u1);
3929 				System.err.println(o.u2);
3930 				System.err.println("********");
3931 				System.err.println(u1.equals(o.u1));
3932 				System.err.println("********");
3933 				System.err.println(u2.equals(o.u2));
3934 				System.err.println("********");
3935 				System.err.println(u1.compareTo(o.u1));
3936 				System.err.println("********");
3937 				System.err.println(u2.compareTo(o.u2));
3938 				System.err.println("********");
3939 				verbose=oldv;
3940 			}
3941 			assert(!absorbMatch || (u1==o.u1 && u2==o.u2) || subsetMode) : "\n"+u1.r+"\n"+u2.r+"\n"+o.u1.r+"\n"+o.u2.r
3942 				+"\n\n"+u1.r.mate+"\n"+u2.r.mate+"\n"+o.u1.r.mate+"\n"+o.u2.r.mate;
3943 //			assert(false) : "\n"+this+"\n"+o+"\n>"+u1.name()+"\n"+new String(u1.bases())+"\n>"+u2.name()+"\n"+new String(u2.bases())+"\n";
3944 			if(start1!=o.start1){return start1-o.start1;}
3945 			if(stop1!=o.stop1){return stop1-o.stop1;}
3946 			if(start2!=o.start2){return start2-o.start2;}
3947 			if(stop2!=o.stop2){return stop2-o.stop2;}
3948 			if(this.equals(o)){
3949 				return 0;
3950 			}else{
3951 				//TODO: ensure this assumption is valid.
3952 				assert(!absorbContainment || !absorbMatch || subsetMode) : "\n"+this+"\n"+o+"\n>"+u1.name()+"\n"+new String(u1.bases())+"\n>"+u2.name()+"\n"+new String(u2.bases())+"\n";
3953 
3954 				if(u1.unitID!=o.u1.unitID){return u1.unitID-o.u1.unitID;}
3955 				if(u2.unitID!=o.u2.unitID){return u2.unitID-o.u2.unitID;}
3956 			}
3957 			return 0;
3958 		}
3959 
3960 		@Override
3961 		public int hashCode(){
3962 			return u1.hashCode()^u2.hashCode()^overlapLen;
3963 		}
3964 
3965 		public void flip(Unit changed, BandedAligner bandy){
3966 
3967 			if(changed==u2){
3968 				if(type==FORWARD){type=FORWARDRC;}
3969 				else if(type==FORWARDRC){type=FORWARD;}
3970 				else if(type==REVERSE){type=REVERSERC;}
3971 				else if(type==REVERSERC){type=REVERSE;}
3972 				else{throw new RuntimeException("Unknown overlap type "+type);}
3973 				start2=u2.length()-start2-1;
3974 				stop2=u2.length()-stop2-1;
3975 			}else if(changed==u1){
3976 				if(type==FORWARD){type=REVERSERC;}
3977 				else if(type==FORWARDRC){type=REVERSE;}
3978 				else if(type==REVERSE){type=FORWARDRC;}
3979 				else if(type==REVERSERC){type=FORWARD;}
3980 				else{throw new RuntimeException("Unknown overlap type "+type);}
3981 				start1=u1.length()-start1-1;
3982 				stop1=u1.length()-stop1-1;
3983 			}else{throw new RuntimeException("'changed' was not in the Overlap.");}
3984 
3985 			assert(test(bandy, edits+maxEdits));
3986 		}
3987 
3988 		public void swap(){
3989 			Unit tempu=u1;
3990 			u1=u2;
3991 			u2=tempu;
3992 			int temp=start1;
3993 			start1=start2;
3994 			start2=temp;
3995 			temp=stop1;
3996 			stop1=stop2;
3997 			stop2=temp;
3998 			if(type==FORWARDRC){type=REVERSERC;}
3999 			else if(type==REVERSERC){type=FORWARDRC;}
4000 		}
4001 
4002 		public void reverseDirection(){
4003 			type=reverseType(type);
4004 			int temp=start1;
4005 			start1=stop1;
4006 			stop1=temp;
4007 			temp=start2;
4008 			start2=stop2;
4009 			stop2=temp;
4010 		}
4011 
4012 		@Override
4013 		public String toString(){
4014 			StringBuilder sb=new StringBuilder(80);
4015 			sb.append("type=");
4016 			sb.append(OVERLAP_TYPE_NAMES[type]);
4017 			sb.append(", len=");
4018 			sb.append(overlapLen);
4019 			sb.append(", subs=");
4020 			sb.append(mismatches);
4021 			sb.append(", edits=");
4022 			sb.append(edits);
4023 
4024 			sb.append(" (");
4025 			sb.append(u1.name()==null ? u1.r.numericID+"" : u1.name());
4026 			if(printLengthInEdges){
4027 				sb.append(", length=");
4028 				sb.append(u1.length());
4029 			}
4030 			sb.append(", start1=");
4031 			sb.append(start1);
4032 			sb.append(", stop1=");
4033 			sb.append(stop1);
4034 
4035 			sb.append(") (");
4036 			sb.append(u2.name()==null ? u2.r.numericID+"" : u2.name());
4037 			if(printLengthInEdges){
4038 				sb.append(", length=");
4039 				sb.append(u2.length());
4040 			}
4041 			sb.append(", start2=");
4042 			sb.append(start2);
4043 			sb.append(", stop2=");
4044 			sb.append(stop2);
4045 			sb.append(")");
4046 			return sb.toString();
4047 		}
4048 
4049 		public String toLabel(){
4050 			StringBuilder sb=new StringBuilder(80);
4051 			sb.append(OVERLAP_TYPE_ABBREVIATIONS[type]);
4052 			sb.append(',');
4053 			sb.append(overlapLen);
4054 			sb.append(',');
4055 			sb.append(mismatches);
4056 			sb.append(',');
4057 			sb.append(edits);
4058 
4059 			if(printLengthInEdges){
4060 				sb.append(',');
4061 				sb.append(u1.length());
4062 			}
4063 			sb.append(',');
4064 			sb.append(start1);
4065 			sb.append(',');
4066 			sb.append(stop1);
4067 
4068 			if(printLengthInEdges){
4069 				sb.append(',');
4070 				sb.append(u2.length());
4071 			}
4072 			sb.append(',');
4073 			sb.append(start2);
4074 			sb.append(',');
4075 			sb.append(stop2);
4076 
4077 			return sb.toString();
4078 		}
4079 
4080 
4081 		private void setCanonContradiction(boolean b){
4082 			assert(b!=canonContradiction()) : b+", "+canonContradiction();
4083 			if(b){flags|=CANON_CONTRADICTION_MASK;}
4084 			else{flags&=~CANON_CONTRADICTION_MASK;}
4085 			assert(b==canonContradiction()) : b+", "+canonContradiction();
4086 		}
4087 
4088 		private void setOffsetContradiction(boolean b){
4089 			assert(b!=offsetContradiction()) : b+", "+offsetContradiction();
4090 			if(b){flags|=OFFSET_CONTRADICTION_MASK;}
4091 			else{flags&=~OFFSET_CONTRADICTION_MASK;}
4092 			assert(b==offsetContradiction()) : b+", "+offsetContradiction();
4093 		}
4094 
4095 		private void setMultiJoin(boolean b){
4096 			assert(b!=multiJoin()) : b+", "+multiJoin();
4097 			if(b){flags|=MULTIJOIN_MASK;}
4098 			else{flags&=~MULTIJOIN_MASK;}
4099 			assert(b==multiJoin()) : b+", "+multiJoin();
4100 		}
4101 
4102 		private void setVisited(boolean b){
4103 			assert(b!=visited()) : b+", "+visited();
4104 			if(b){flags|=VISITED_MASK;}
4105 			else{flags&=~VISITED_MASK;}
4106 			assert(b==visited()) : b+", "+visited();
4107 		}
4108 
4109 		private void setCyclic(boolean b){
4110 			assert(b!=cyclic()) : b+", "+cyclic();
4111 			if(b){flags|=CYCLIC_MASK;}
4112 			else{flags&=~CYCLIC_MASK;}
4113 			assert(b==cyclic()) : b+", "+cyclic();
4114 		}
4115 
4116 		private void setInvalid(boolean b){
4117 			assert(b!=invalid()) : b+", "+invalid();
4118 			assert(b!=mst()) : b+", "+mst()+", "+invalid();
4119 			if(b){flags|=INVALID_MASK;}
4120 			else{flags&=~INVALID_MASK;}
4121 			assert(b==invalid()) : b+", "+invalid();
4122 		}
4123 
4124 		private void setMst(boolean b){
4125 			assert(b!=mst()) : b+", "+mst();
4126 			assert(b!=invalid()) : b+", "+mst()+", "+invalid();
4127 			if(b){flags|=MST_MASK;}
4128 			else{flags&=~MST_MASK;}
4129 			assert(b==mst()) : b+", "+mst();
4130 		}
4131 
4132 		public void clearVolatileFlags(){
4133 			flags=0;
4134 //			flags=flags&~(MULTIJOIN_MASK|VISITED_MASK|CANON_CONTRADICTION_MASK|CYCLIC_MASK|OFFSET_CONTRADICTION_MASK|INVALID_MASK);
4135 //			assert(!canonContradiction());
4136 //			assert(!offsetContradiction());
4137 //			assert(!multiJoin());
4138 //			assert(!visited());
4139 //			assert(!cyclic());
4140 //			assert(!invalid());
4141 		}
4142 
4143 		public boolean canonContradiction(){return (CANON_CONTRADICTION_MASK&flags)==CANON_CONTRADICTION_MASK;}
4144 		public boolean offsetContradiction(){return (OFFSET_CONTRADICTION_MASK&flags)==OFFSET_CONTRADICTION_MASK;}
4145 		public boolean multiJoin(){return (MULTIJOIN_MASK&flags)==MULTIJOIN_MASK;}
4146 		public boolean visited(){return (VISITED_MASK&flags)==VISITED_MASK;}
4147 		public boolean cyclic(){return (CYCLIC_MASK&flags)==CYCLIC_MASK;}
4148 		public boolean invalid(){return (INVALID_MASK&flags)==INVALID_MASK;}
4149 		public boolean mst(){return (MST_MASK&flags)==MST_MASK;}
4150 		public boolean contradiction(){return canonContradiction() || offsetContradiction();}
4151 
4152 		private static final long VISITED_MASK=(1L<<0);
4153 		private static final long MULTIJOIN_MASK=(1L<<1);
4154 		private static final long CYCLIC_MASK=(1L<<2);
4155 		private static final long CANON_CONTRADICTION_MASK=(1L<<3);
4156 		private static final long OFFSET_CONTRADICTION_MASK=(1L<<4);
4157 		private static final long INVALID_MASK=(1L<<5);
4158 		private static final long MST_MASK=(1L<<6);
4159 
4160 		Unit u1;
4161 		Unit u2;
4162 		int type;
4163 		int start1;
4164 		int start2;
4165 		int stop1;
4166 		int stop2;
4167 
4168 		long flags=0;
4169 
4170 		final int overlapLen;
4171 		final int mismatches;
4172 		final int edits;
4173 	}
4174 
4175 	/**
4176 	 * @return
4177 	 */
4178 	private int determineCluster2(final int uid) {
4179 		assert(clusterNumbers!=null);
4180 		boolean stable=false;
4181 		int cluster=uid;
4182 		while(!stable){
4183 			cluster=clusterNumbers.get(uid);
4184 			if(cluster==0 || cluster==uid){return cluster;}
4185 			assert(cluster<=uid);
4186 			final int next=determineCluster2(cluster);
4187 			if(next>=cluster){return cluster;}
4188 			stable=clusterNumbers.compareAndSet(uid, cluster, next);
4189 		}
4190 		return cluster;
4191 	}
4192 
4193 
4194 	private int mergeClusterIds(int cluster1, int cluster2) {
4195 		assert(clusterNumbers!=null);
4196 
4197 //		System.err.println("Merging clusters "+cluster1+" and "+cluster2);
4198 
4199 		while(cluster1!=cluster2){
4200 			int min=Tools.min(cluster1, cluster2);
4201 			if(cluster1!=min){
4202 				assert(cluster1>min);
4203 				boolean b=clusterNumbers.compareAndSet(cluster1, cluster1, min);
4204 				if(!b){
4205 					cluster1=determineCluster2(cluster1);
4206 					min=Tools.min(cluster1, cluster2);
4207 				}
4208 			}
4209 			if(cluster2!=min){
4210 				assert(cluster2>min);
4211 				boolean b=clusterNumbers.compareAndSet(cluster2, cluster2, min);
4212 				if(!b){
4213 					cluster2=determineCluster2(cluster2);
4214 					min=Tools.min(cluster1, cluster2);
4215 				}
4216 			}
4217 		}
4218 //		System.err.println("Returning "+cluster1);
4219 		return cluster1;
4220 	}
4221 
4222 	private class Unit implements Comparable<Unit>, Serializable {
4223 
4224 		/**
4225 		 *
4226 		 */
4227 		private static final long serialVersionUID = -992343322822460643L;
4228 
4229 		public Unit(Read r_){
4230 			this(r_, isCanonical(r_.bases));
4231 		}
4232 
4233 		public Unit(Read r_, boolean canonical_){
4234 //			this(r_, canonical_, canonical_ ? hash(r_.bases) : hashReversed(r_.bases));
4235 			this(r_, canonical_, hash(r_.bases), hashReversed(r_.bases));
4236 		}
4237 
4238 		public Unit(Read r_, boolean canonical_, long codeF_, long codeR_){
4239 			r=r_;
4240 			code1=Tools.min(codeF_, codeR_);
4241 			code2=Tools.max(codeF_, codeR_);
4242 			final int len=r.length();
4243 			for(int i=0; i<prefixes.length; i++){
4244 				if(len>(i+1)*k){
4245 					prefixes[i]=hashTip(r.bases, true, k, i*k);
4246 					suffixes[i]=hashTip(r.bases, false, k, i*k);
4247 				}else{
4248 					prefixes[i]=-1;
4249 					suffixes[i]=-1;
4250 				}
4251 			}
4252 			long f=r.length();
4253 			if(canonical_){f|=CANON_MASK;}
4254 			if(r.pairnum()==1){f|=PAIRNUM_MASK;}
4255 			flags=f;
4256 			assert(canonical()==canonical_);
4257 			assert(length()==r.length());
4258 			assert(pairnum()==r.pairnum());
4259 			if(parseDepth){
4260 				int[] quad=KmerNormalize.parseDepth(r.id, null);
4261 				if(quad!=null){depth=quad[r.pairnum()];}
4262 			}
4263 		}
4264 
4265 		int determineCluster() {
4266 			return determineCluster2(unitID);
4267 		}
4268 
4269 		public void absorbMatch(Unit u){
4270 
4271 			assert(code1==u.code1 && code2==u.code2 && length()==u.length());
4272 			if(r==null || u.r==null){return;}
4273 			u.r.setDiscarded(true);
4274 			final byte[] bases1=r.bases, bases2=u.r.bases;
4275 			final byte[] quals1=r.quality, quals2=u.r.quality;
4276 
4277 			assert((r.mate==null) == (u.r.mate==null));
4278 
4279 			if(r.mate!=null && !u.r.mate.discarded()){
4280 				((Unit)r.mate.obj).absorbMatch((Unit)u.r.mate.obj);
4281 			}
4282 			if(quals1==null || quals2==null){return;}
4283 
4284 			if(canonical()==u.canonical()){
4285 				for(int i=0; i<bases1.length; i++){
4286 					byte b1=bases1[i], b2=bases2[i];
4287 					if(!AminoAcid.isFullyDefined(b1) && AminoAcid.isFullyDefined(b2)){bases1[i]=b2;}
4288 					else{assert(b1==b2);}
4289 					if(quals1!=null && quals2!=null){
4290 						quals1[i]=Tools.max(quals1[i], quals2[i]);
4291 					}
4292 				}
4293 			}else{
4294 				for(int i=0, j=bases2.length-1; i<bases1.length; i++, j--){
4295 					byte b1=bases1[i], b2=baseToComplementExtended[bases2[j]];
4296 					if(!AminoAcid.isFullyDefined(b1) && AminoAcid.isFullyDefined(b2)){bases1[i]=b2;}
4297 					else{assert(b1==b2);}
4298 					if(quals1!=null && quals2!=null){
4299 						quals1[i]=Tools.max(quals1[i], quals2[j]);
4300 					}
4301 				}
4302 			}
4303 		}
4304 
4305 		public boolean alreadyHas(Overlap o){
4306 			if(overlapList==null){return false;}
4307 			for(int i=0; i<overlapList.size(); i++){
4308 				Overlap o2=overlapList.get(i);
4309 				if(o.equals(o2)){
4310 					assert(overlapList.contains(o));
4311 					assert(o2.equals(o));
4312 					return true;
4313 				}
4314 			}
4315 			assert(!overlapList.contains(o));
4316 			return false;
4317 		}
4318 
4319 		public ArrayList<Unit> makeCluster() {
4320 			assert(!visited());
4321 			assert(!clustered());
4322 			assert(valid());
4323 //			assert(set.isEmpty());
4324 			ArrayList<Unit> cluster=new ArrayList<Unit>(overlapList==null ? 1 : overlapList.size()+1);
4325 			cluster.add(this);
4326 			setClustered(true);
4327 
4328 			int added=1;
4329 			for(int i=0; i<cluster.size(); i++){
4330 				Unit u=cluster.get(i);
4331 				added+=u.visit(cluster);
4332 			}
4333 
4334 			assert(added==cluster.size());
4335 			return cluster;
4336 		}
4337 
4338 		public int visit(ArrayList<Unit> cluster) {
4339 			assert(!visited());
4340 			assert(clustered());
4341 			assert(valid());
4342 //			assert(cluster.contains(this));
4343 			setVisited(true);
4344 			int added=0;
4345 
4346 			if(r!=null && r.mate!=null){
4347 				Unit u2=(Unit)r.mate.obj;
4348 				assert(u2!=this);
4349 				assert(u2.valid());
4350 				if(!u2.clustered()){
4351 					u2.setClustered(true);
4352 					cluster.add(u2);
4353 					added++;
4354 				}
4355 			}
4356 
4357 			if(overlapList!=null){
4358 				for(Overlap o : overlapList){
4359 					Unit u2=(o.u1==this ? o.u2 : o.u1);
4360 					assert(o.u1==this || o.u2==this);
4361 					assert(u2!=this);
4362 					assert(u2.valid());
4363 					if(!u2.clustered()){
4364 						u2.setClustered(true);
4365 						cluster.add(u2);
4366 						added++;
4367 					}
4368 				}
4369 			}
4370 			return added;
4371 		}
4372 
4373 		public boolean isTransitive(){
4374 			assert(valid());
4375 			if(overlapList==null || overlapList.size()==0){return true;}
4376 			for(Overlap o : overlapList){
4377 				assert(o.u1==this || o.u2==this);
4378 				Unit u2=(o.u1==this ? o.u2 : o.u1);
4379 				assert(u2!=this);
4380 				if(u2.overlapList==null){
4381 					return false;
4382 				}else{
4383 					boolean found=false;
4384 					for(Overlap o2 : u2.overlapList){
4385 						if(o2.u1==this || o2.u2==this){
4386 							found=true; break;
4387 						}
4388 					}
4389 					if(!found){return false;}
4390 				}
4391 			}
4392 			return true;
4393 		}
4394 
4395 		public boolean isPerfectlyTransitive(){
4396 			assert(valid());
4397 			if(overlapList==null || overlapList.size()==0){return true;}
4398 			for(Overlap o : overlapList){
4399 				assert(o.u1==this || o.u2==this);
4400 				Unit u2=(o.u1==this ? o.u2 : o.u1);
4401 				assert(u2!=this);
4402 				if(u2.overlapList==null){
4403 					return false;
4404 				}else{
4405 					boolean found=false;
4406 					for(Overlap o2 : u2.overlapList){
4407 						if(o2==o){
4408 							found=true; break;
4409 						}
4410 					}
4411 					if(!found){return false;}
4412 				}
4413 			}
4414 			return true;
4415 		}
4416 
4417 		public boolean isNonRedundant(){
4418 			assert(valid());
4419 			if(overlapList==null || overlapList.size()==0){return true;}
4420 			for(int i=0; i<overlapList.size(); i++){
4421 				Overlap a=overlapList.get(i);
4422 				for(int j=0; j<overlapList.size(); j++){
4423 					Overlap b=overlapList.get(j);
4424 					if((i==j)!=(a.equals(b))){
4425 						return false;
4426 					}
4427 				}
4428 			}
4429 			return true;
4430 		}
4431 
4432 		public boolean contains(Unit u2, int loc, LongM key, BandedAligner bandy, int tableNum) {
4433 			if(verbose){System.err.println("contains: Considering key "+key+", unit "+u2);}
4434 			if(u2.length()>this.length()){return false;} //Smaller things cannot contain larger things
4435 			if(u2.length()==this.length()){//For equal size, only one can contain the other the other
4436 				boolean pass=false;
4437 				if(!pass && u2.unitID>this.unitID){return false;}else if(u2.unitID<this.unitID){pass=true;}
4438 				if(!pass && u2.r.numericID>this.r.numericID){return false;}else if(u2.r.numericID<this.r.numericID){pass=true;}
4439 				if(!pass && u2.code1>this.code1){return false;}else if(u2.code1<this.code1){pass=true;}
4440 				if(!pass && u2.r.bases.hashCode()>this.r.bases.hashCode()){return false;}else if(u2.r.bases.hashCode()<this.r.bases.hashCode()){pass=true;}
4441 				if(!pass){return false;}
4442 			}
4443 			if(minLengthPercent>0 && (u2.length()*100f/length())<minLengthPercent){return false;}
4444 			assert(u2.code1!=code1 || u2.code2!=code2 || u2.length()!=length() || (r!=null && r.mate!=null) || //REQUIRE_MATCHING_NAMES ||
4445 					(canonical()==u2.canonical() ? (u2.prefixes[0]!=prefixes[0] && u2.suffixes[0]!=suffixes[0]) : (u2.prefixes[0]!=suffixes[0] && u2.suffixes[0]!=prefixes[0]))) :
4446 						"Collision? \n"+this+"\n"+u2+"\n"+r+"\n"+u2.r;
4447 
4448 			final boolean earlyExit=(tableNum==0);
4449 			final int x=(tableNum+1);
4450 			final int ktn=k*tableNum;
4451 
4452 			if(key.value()==u2.prefixes[tableNum]){
4453 				if(verbose){System.err.println("Containment A"+x);}
4454 				if(containsForward(u2, loc-k2-ktn, bandy, earlyExit) || containsReverseRC(u2, loc+ktn, bandy, earlyExit)){return true;}
4455 			}
4456 			if(key.value()==u2.suffixes[tableNum]){
4457 				if(verbose){System.err.println("Containment B"+x);}
4458 				if(containsReverse(u2, loc+ktn, bandy, earlyExit) || containsForwardRC(u2, loc-k2-ktn, bandy, earlyExit)){return true;}
4459 			}
4460 			return false;
4461 		}
4462 
4463 		private boolean containsForward(Unit u2, int start, BandedAligner bandy, boolean earlyExit) {
4464 			if(start+u2.length()>length() || start<0 || start>=length()){return false;}
4465 //			if(true){return false;}
4466 			if(u2.r!=null){
4467 				final byte[] a=bases(), b=u2.bases();
4468 				int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, b.length);
4469 
4470 				for(int i=start, j=0; j<b.length; i++, j++){
4471 					byte aa=a[i];
4472 					byte bb=b[j];
4473 					if(aa!=bb){
4474 						if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
4475 							if(earlyExit && j<k2){return false;}
4476 							if((mismatches=mismatches+1)>maxMismatches){
4477 								if(bandy==null || maxEdits<1){return false;}
4478 								int edits=bandy.alignForward(b, a, 0, start, maxEdits, exact);
4479 								assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
4480 								return edits<=maxEdits && bandy.score()>4*edits;
4481 							}
4482 						}
4483 					}
4484 				}
4485 				return true;
4486 			}else{
4487 				assert(false) : "TODO: Verify by hashing and checking both tips";
4488 				return false;
4489 			}
4490 		}
4491 
4492 		private boolean containsForwardRC(Unit u2, int start, BandedAligner bandy, boolean earlyExit) {
4493 			if(ignoreReverseComplement){return false;}
4494 			if(start+u2.length()>length() || start<0 || start>=length()){return false;}
4495 //			if(true){return false;}
4496 			if(u2.r!=null){
4497 				final byte[] a=bases(), b=u2.bases();
4498 				int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, b.length);
4499 
4500 				for(int i=start, j=b.length-1, iprefix=start+k2; j>=0; i++, j--){
4501 					byte aa=a[i];
4502 					byte bb=baseToComplementExtended[b[j]];
4503 					if(aa!=bb){
4504 						if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
4505 							if(earlyExit && i<iprefix){return false;}
4506 							if((mismatches=mismatches+1)>maxMismatches){
4507 								if(bandy==null || maxEdits<1){return false;}
4508 								int edits=bandy.alignForwardRC(b, a, b.length-1, start, maxEdits, exact);
4509 								assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
4510 								return edits<=maxEdits && bandy.score()>4*edits;
4511 							}
4512 						}
4513 					}
4514 				}
4515 				return true;
4516 			}else{
4517 				assert(false) : "TODO: Verify by hashing and checking both tips";
4518 				return false;
4519 			}
4520 		}
4521 
4522 		private boolean containsReverse(Unit u2, int start, BandedAligner bandy, boolean earlyExit) {
4523 			if(start+1<u2.length() || start<0 || start>=length()){return false;}
4524 //			if(true){return false;}
4525 			if(u2.r!=null){
4526 				final byte[] a=bases(), b=u2.bases();
4527 				int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, b.length);
4528 
4529 				for(int i=start, j=b.length-1, iprefix=start-k2; j>=0; i--, j--){
4530 					byte aa=a[i];
4531 					byte bb=b[j];
4532 					if(aa!=bb){
4533 						if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
4534 							if(earlyExit && i>iprefix){return false;}
4535 							if((mismatches=mismatches+1)>maxMismatches){
4536 								if(bandy==null || maxEdits<1){return false;}
4537 								int edits=bandy.alignReverse(b, a, b.length-1, start, maxEdits, exact);
4538 								assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
4539 								return edits<=maxEdits && bandy.score()>4*edits;
4540 							}
4541 						}
4542 					}
4543 				}
4544 				return true;
4545 			}else{
4546 				assert(false) : "TODO: Verify by hashing and checking both tips";
4547 				return false;
4548 			}
4549 		}
4550 
4551 		private boolean containsReverseRC(Unit u2, int start, BandedAligner bandy, boolean earlyExit) {
4552 			if(ignoreReverseComplement){return false;}
4553 			if(start+1<u2.length() || start<0 || start>=length()){return false;}
4554 //			if(true){return false;}
4555 			if(u2.r!=null){
4556 				final byte[] a=bases(), b=u2.bases();
4557 				int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, b.length);
4558 
4559 				for(int i=start, j=0; j<b.length; i--, j++){
4560 					byte aa=a[i];
4561 					byte bb=baseToComplementExtended[b[j]];
4562 					if(aa!=bb){
4563 						if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
4564 							if(earlyExit && j<k2){return false;}
4565 							if((mismatches=mismatches+1)>maxMismatches){
4566 								if(bandy==null || maxEdits<1){return false;}
4567 								int edits=bandy.alignReverseRC(b, a, 0, start, maxEdits, exact);
4568 								assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
4569 								return edits<=maxEdits && bandy.score()>4*edits;
4570 							}
4571 						}
4572 					}
4573 				}
4574 				return true;
4575 			}else{
4576 				assert(false) : "TODO: Verify by hashing and checking both tips";
4577 				return false;
4578 			}
4579 		}
4580 
4581 
4582 		public boolean depthCongruent(int aa, int bb){
4583 			if(aa<5 && bb<5){return true;}
4584 			final int a=Tools.max(1, Tools.min(aa, bb));
4585 			final int b=Tools.max(aa, bb);
4586 			return a*depthRatio>=b;
4587 		}
4588 
4589 		public boolean overlaps(Unit u2, int loc, LongM key, BandedAligner bandy, int tableNum, int editLimit) {
4590 //			return makeOverlap(u2, loc, key, bandy, earlyExit)!=null;
4591 
4592 //			assert(false) : "TODO";
4593 			if(verbose){System.err.println("overlaps: Considering key "+key+", unit "+u2);}
4594 			if(parseDepth && !depthCongruent(depth, u2.depth)){return false;}
4595 			if(minLengthPercent>0){
4596 				final int len1=length(), len2=u2.length();
4597 				if(Tools.min(len1, len2)*100f/Tools.max(len1, len2)<minLengthPercent){return false;}
4598 			}
4599 			assert(u2.code1!=code1 || u2.code2!=code2 || u2.length()!=length() ||
4600 					(canonical()==u2.canonical() ? (u2.prefixes[0]!=prefixes[0] && u2.suffixes[0]!=suffixes[0]) : (u2.prefixes[0]!=suffixes[0] && u2.suffixes[0]!=prefixes[0]))) :
4601 						"Collision? \n"+this+"\n"+u2+"\n"+r+"\n"+u2.r;
4602 
4603 			final boolean earlyExit=(tableNum==0);
4604 			final int x=(tableNum+1);
4605 			final int ktn=k*tableNum;
4606 
4607 			if(key.value()==u2.prefixes[tableNum]){
4608 				if(verbose){System.err.println("Testing overlaps A"+x);}
4609 				if(overlapsForward(u2, loc-k2-ktn, 0, bandy, earlyExit, editLimit)){
4610 					if(verbose){System.err.println("Found Overlap A"+x+"F");}
4611 					return true;
4612 				}
4613 				if(overlapsReverseRC(u2, loc+ktn, 0, bandy, earlyExit, editLimit)){
4614 					if(verbose){System.err.println("Found Overlap A"+x+"R");}
4615 					return true;
4616 				}
4617 				if(verbose){System.err.println("No Overlap.");}
4618 			}
4619 
4620 			if(key.value()==u2.suffixes[tableNum]){
4621 				if(verbose){System.err.println("Testing overlaps B"+x);}
4622 				if(overlapsForwardRC(u2, loc-k2-ktn, u2.length()-1, bandy, earlyExit, editLimit)){
4623 					if(verbose){System.err.println("Found Overlap B"+x+"F");}
4624 					return true;
4625 				}
4626 				if(overlapsReverse(u2, loc+ktn, u2.length()-1, bandy, earlyExit, editLimit)){
4627 					if(verbose){System.err.println("Found Overlap B"+x+"R");}
4628 					return true;
4629 				}
4630 				if(verbose){System.err.println("No Overlap.");}
4631 			}
4632 
4633 			return false;
4634 		}
4635 
4636 		/**
4637 		 * @param u2
4638 		 * @param loc
4639 		 * @param key
4640 		 * @return
4641 		 */
4642 		protected Overlap makeOverlap(Unit u2, int loc, LongM key, BandedAligner bandy, int tableNum) {
4643 			if(verbose){System.err.println("makeOverlap: Considering key "+key+", unit "+u2);}
4644 			if(parseDepth && !depthCongruent(depth, u2.depth)){return null;}
4645 			if(minLengthPercent>0){
4646 				final int len1=length(), len2=u2.length();
4647 				if(Tools.min(len1, len2)*100f/Tools.max(len1, len2)<minLengthPercent){return null;}
4648 			}
4649 			assert(u2.code1!=code1 || u2.code2!=code2 || u2.length()!=length() || (r!=null && r.mate!=null) ||
4650 					(canonical()==u2.canonical() ? (u2.prefixes[0]!=prefixes[0] && u2.suffixes[0]!=suffixes[0]) : (u2.prefixes[0]!=suffixes[0] && u2.suffixes[0]!=prefixes[0]))) :
4651 						"Collision? \n"+this+"\n"+u2+"\n"+r+"\n"+u2.r;
4652 
4653 			final boolean earlyExit=(tableNum==0);
4654 			final int x=(tableNum+1);
4655 			final int ktn=k*tableNum;
4656 
4657 			Overlap o=null;
4658 			if(key.value()==u2.prefixes[tableNum]){
4659 				if(verbose){System.err.println("\nTesting makeOverlap A"+x+"F");}
4660 				if((o=makeOverlapForward(u2, loc-k2-ktn, bandy, earlyExit))!=null){
4661 					if(verbose){System.err.println("Made Overlap A"+x+"F");}
4662 					return o;
4663 				}
4664 				if(verbose){System.err.println("\nTesting makeOverlap A"+x+"R");}
4665 				if((o=makeOverlapReverseRC(u2, loc+ktn, bandy, earlyExit))!=null){
4666 					if(verbose){System.err.println("Made Overlap A"+x+"R");}
4667 					return o;
4668 				}
4669 				if(verbose){System.err.println("No Overlap.");}
4670 			}
4671 			if(key.value()==u2.suffixes[tableNum]){
4672 				if(verbose){System.err.println("\nTesting makeOverlap B"+x+"F");}
4673 				if((o=makeOverlapForwardRC(u2, loc-k2-ktn, bandy, earlyExit))!=null){
4674 					if(verbose){System.err.println("Made Overlap B"+x+"F");}
4675 					return o;
4676 				}
4677 				if(verbose){System.err.println("\nTesting makeOverlap B"+x+"R");}
4678 				if((o=makeOverlapReverse(u2, loc+ktn, bandy, earlyExit))!=null){
4679 					if(verbose){System.err.println("Made Overlap B"+x+"R");}
4680 					return o;
4681 				}
4682 				if(verbose){System.err.println("No Overlap.");}
4683 			}
4684 			return o;
4685 		}
4686 
4687 		private boolean overlapsForward(Unit u2, int start1, int start2, BandedAligner bandy, boolean earlyExit, int maxEdits) {
4688 			if(verbose){System.err.println("overlapsForward(u1="+this.name()+", u2="+u2.name()+", start1="+start1+", start2="+start2+", earlyExit="+earlyExit+")");}
4689 
4690 			final int len1=length(), len2=u2.length();
4691 			if(start1<0){
4692 				start2-=start1;
4693 				start1=0;
4694 				if(verbose){System.err.println("Modified: start1="+start1+", start2="+start2);}
4695 			}
4696 			int overlapLength=Tools.min(len1-start1, len2-start2);
4697 			int overlapLength2=Tools.max(len1-start1, len2-start2);
4698 			int stop1=start1+overlapLength-1, stop2=start2+overlapLength-1;
4699 			if(verbose){System.err.println("Calculated stop1="+stop1+", stop2="+stop2+", overlapLength="+overlapLength);}
4700 
4701 			if(!allowAllContainedOverlaps || overlapLength>Tools.min(len1, len2)){
4702 				if(verbose){
4703 					System.err.println("Side block. allowAllContainedOverlaps="+allowAllContainedOverlaps+", minOverlapCluster="+minOverlapCluster);
4704 					System.err.println("start1="+start1+", stop1="+stop1+", len1="+len1+", start2="+start2+", stop2="+stop2+", len2="+len2+
4705 							", overlapLen="+overlapLength+", maxEdits="+maxEdits);
4706 				}
4707 				if(overlapLength2<minOverlapCluster){return false;}
4708 				if(minOverlapPercentCluster>0f && (overlapLength2*100f/Tools.min(len1, len2))<minOverlapPercentCluster){return false;}
4709 			}
4710 
4711 			final byte[] a=bases(), b=u2.bases();
4712 			assert(a!=null && b!=null) : "Null bases for "+code1+" or "+u2.code1;
4713 			int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, overlapLength);
4714 
4715 			if(verbose){
4716 				System.err.println("start1="+start1+", stop1="+stop1+", len1="+len1+", start2="+start2+", stop2="+stop2+", len2="+len2+
4717 						", overlapLen="+overlapLength+", maxMismatches="+maxMismatches+", maxEdits="+maxEdits);
4718 			}
4719 
4720 			for(int i=start1, j=start2; j<=stop2; i++, j++){
4721 				byte aa=a[i];
4722 				byte bb=b[j];
4723 				if(aa!=bb){
4724 					if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
4725 						if(earlyExit && j<k2){return false;}
4726 						mismatches++;
4727 						if(verbose){System.err.println("i="+i+", j="+j+", "+(char)aa+"!="+(char)bb+", mismatches="+mismatches+"/"+maxMismatches);}
4728 						if(mismatches>maxMismatches){
4729 							if(bandy==null || maxEdits<1){return false;}
4730 							if(verbose){System.err.println("Mismatches exceeded maximum, attempting banded alignment.");}
4731 							int edits=bandy.alignForward(b, a, 0, start1, maxEdits, exact);
4732 							assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
4733 							stop2=bandy.lastQueryLoc;
4734 							stop1=bandy.lastRefLoc;
4735 							return edits<=maxEdits && bandy.score()>2*edits; //Set at 2*edits instead of 4*edits to prevent assertion errors when reversing alignment
4736 						}
4737 					}
4738 				}
4739 			}
4740 			return true;
4741 		}
4742 
4743 		private boolean overlapsForwardRC(Unit u2, int start1, int start2, BandedAligner bandy, boolean earlyExit, int maxEdits) {
4744 			if(verbose){System.err.println("overlapsForwardRC(u1="+this.name()+", u2="+u2.name()+", start1="+start1+", start2="+start2+", earlyExit="+earlyExit+")");}
4745 
4746 			if(ignoreReverseComplement){return false;}
4747 			final int len1=length(), len2=u2.length();
4748 			if(start1<0){
4749 				start2+=start1;
4750 				start1=0;
4751 				if(verbose){System.err.println("Modified: start1="+start1+", start2="+start2);}
4752 			}
4753 			final int overlapLength=Tools.min(len1-start1, start2+1);
4754 			final int overlapLength2=Tools.max(len1-start1, start2+1);
4755 			int stop1=start1+overlapLength-1, stop2=start2-overlapLength+1;
4756 			if(verbose){System.err.println("Calculated stop1="+stop1+", stop2="+stop2+", overlapLength="+overlapLength);}
4757 
4758 			if(!allowAllContainedOverlaps || overlapLength>Tools.min(len1, len2)){
4759 				if(overlapLength2<minOverlapCluster){return false;}
4760 				if(minOverlapPercentCluster>0f && (overlapLength2*100f/Tools.min(len1, len2))<minOverlapPercentCluster){return false;}
4761 			}
4762 
4763 			final byte[] a=bases(), b=u2.bases();
4764 			assert(a!=null && b!=null) : "Null bases for "+code1+" or "+u2.code1;
4765 			int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, b.length);
4766 
4767 			if(verbose){
4768 				System.err.println("start1="+start1+", stop1="+stop1+", len1="+len1+", start2="+start2+", stop2="+stop2+", len2="+len2+
4769 						", overlapLen="+overlapLength+", maxMismatches="+maxMismatches+", maxEdits="+maxEdits);
4770 			}
4771 
4772 			for(int i=start1, j=start2, iprefix=start1+k2; i<=stop1; i++, j--){
4773 				byte aa=a[i];
4774 				byte bb=baseToComplementExtended[b[j]];
4775 				if(aa!=bb){
4776 					if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
4777 						if(earlyExit && i<iprefix){return false;}
4778 						mismatches++;
4779 						if(verbose){System.err.println("i="+i+", j="+j+", "+(char)aa+"!="+(char)bb+", mismatches="+mismatches+"/"+maxMismatches);}
4780 						if(mismatches>maxMismatches){
4781 							if(bandy==null || maxEdits<1){return false;}
4782 							if(verbose){System.err.println("Mismatches exceeded maximum, attempting banded alignment.");}
4783 							int edits=bandy.alignForwardRC(b, a, b.length-1, start1, maxEdits, exact);
4784 							assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
4785 							stop2=bandy.lastQueryLoc;
4786 							stop1=bandy.lastRefLoc;
4787 							return edits<=maxEdits && bandy.score()>2*edits; //Set at 2*edits instead of 4*edits to prevent assertion errors when reversing alignment
4788 						}
4789 					}
4790 				}
4791 			}
4792 			return true;
4793 		}
4794 
4795 		private boolean overlapsReverse(Unit u2, int start1, int start2, BandedAligner bandy, boolean earlyExit, int maxEdits) {
4796 			if(verbose){System.err.println("overlapsReverse(u1="+this.name()+", u2="+u2.name()+", start1="+start1+", start2="+start2+", earlyExit="+earlyExit+")");}
4797 
4798 			final int len1=length(), len2=u2.length();
4799 			if(start1>=len1){
4800 				start2-=(start1-len1+1);
4801 				start1=len1-1;
4802 				if(verbose){System.err.println("Modified: start1="+start1+", start2="+start2);}
4803 			}
4804 			final int overlapLength=Tools.min(start1+1, start2+1);
4805 			final int overlapLength2=Tools.max(start1+1, start2+1);
4806 			int stop1=start1-overlapLength+1, stop2=start2-overlapLength+1;
4807 			if(verbose){System.err.println("Calculated stop1="+stop1+", stop2="+stop2+", overlapLength="+overlapLength);}
4808 
4809 			if(!allowAllContainedOverlaps || overlapLength>Tools.min(len1, len2)){
4810 				if(overlapLength2<minOverlapCluster){return false;}
4811 				if(minOverlapPercentCluster>0f && (overlapLength2*100f/Tools.min(len1, len2))<minOverlapPercentCluster){return false;}
4812 			}
4813 
4814 			final byte[] a=bases(), b=u2.bases();
4815 			assert(a!=null && b!=null) : "Null bases for "+code1+" or "+u2.code1;
4816 			int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, b.length);
4817 
4818 			if(verbose){
4819 				System.err.println("start1="+start1+", stop1="+stop1+", len1="+len1+", start2="+start2+", stop2="+stop2+", len2="+len2+
4820 						", overlapLen="+overlapLength+", maxMismatches="+maxMismatches+", maxEdits="+maxEdits);
4821 			}
4822 
4823 			for(int i=start1, j=start2, iprefix=start1-k2; i>=stop1; i--, j--){
4824 				byte aa=a[i];
4825 				byte bb=b[j];
4826 				if(aa!=bb){
4827 					if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
4828 						if(earlyExit && i>iprefix){return false;}
4829 						mismatches++;
4830 						if(verbose){System.err.println("i="+i+", j="+j+", "+(char)aa+"!="+(char)bb+", mismatches="+mismatches+"/"+maxMismatches);}
4831 						if(mismatches>maxMismatches){
4832 							if(bandy==null || maxEdits<1){return false;}
4833 							if(verbose){System.err.println("Mismatches exceeded maximum, attempting banded alignment.");}
4834 							int edits=bandy.alignReverse(b, a, b.length-1, start1, maxEdits, exact);
4835 							assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
4836 							stop2=bandy.lastQueryLoc;
4837 							stop1=bandy.lastRefLoc;
4838 							return edits<=maxEdits && bandy.score()>2*edits; //Set at 2*edits instead of 4*edits to prevent assertion errors when reversing alignment
4839 						}
4840 					}
4841 				}
4842 			}
4843 			return true;
4844 		}
4845 
4846 		private boolean overlapsReverseRC(Unit u2, int start1, int start2, BandedAligner bandy, boolean earlyExit, int maxEdits) {
4847 			if(verbose){System.err.println("overlapsReverseRC(u1="+this.name()+", u2="+u2.name()+", start1="+start1+", start2="+start2+", earlyExit="+earlyExit+")");}
4848 
4849 			if(ignoreReverseComplement){return false;}
4850 			final int len1=length(), len2=u2.length();
4851 			if(start1>=len1){
4852 				start2+=(start1-len1+1);
4853 				start1=len1-1;
4854 				if(verbose){System.err.println("Modified: start1="+start1+", start2="+start2);}
4855 			}
4856 			final int overlapLength=Tools.min(start1+1, len2-start2);
4857 			final int overlapLength2=Tools.max(start1+1, len2-start2);
4858 			int stop1=start1-overlapLength+1, stop2=start2+overlapLength-1;
4859 			if(verbose){System.err.println("Calculated stop1="+stop1+", stop2="+stop2+", overlapLength="+overlapLength);}
4860 
4861 			if(!allowAllContainedOverlaps || overlapLength>Tools.min(len1, len2)){
4862 				if(overlapLength2<minOverlapCluster){return false;}
4863 				if(minOverlapPercentCluster>0f && (overlapLength2*100f/Tools.min(len1, len2))<minOverlapPercentCluster){return false;}
4864 			}
4865 
4866 			final byte[] a=bases(), b=u2.bases();
4867 			assert(a!=null && b!=null) : "Null bases for "+code1+" or "+u2.code1;
4868 			int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, b.length);
4869 
4870 			if(verbose){
4871 				System.err.println("start1="+start1+", stop1="+stop1+", len1="+len1+", start2="+start2+", stop2="+stop2+", len2="+len2+
4872 						", overlapLen="+overlapLength+", maxMismatches="+maxMismatches+", maxEdits="+maxEdits);
4873 			}
4874 
4875 			for(int i=start1, j=start2; j<=stop2; i--, j++){
4876 				byte aa=a[i];
4877 				byte bb=baseToComplementExtended[b[j]];
4878 				if(aa!=bb){
4879 					if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
4880 						if(earlyExit && j<k2){return false;}
4881 						mismatches++;
4882 						if(verbose){System.err.println("i="+i+", j="+j+", "+(char)aa+"!="+(char)bb+", mismatches="+mismatches+"/"+maxMismatches);}
4883 						if(mismatches>maxMismatches){
4884 							if(bandy==null || maxEdits<1){return false;}
4885 							if(verbose){System.err.println("Mismatches exceeded maximum, attempting banded alignment.");}
4886 							int edits=bandy.alignReverseRC(b, a, 0, start1, maxEdits, exact);
4887 							assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
4888 							stop2=bandy.lastQueryLoc;
4889 							stop1=bandy.lastRefLoc;
4890 							return edits<=maxEdits && bandy.score()>2*edits; //Set at 2*edits instead of 4*edits to prevent assertion errors when reversing alignment
4891 						}
4892 					}
4893 				}
4894 			}
4895 			return true;
4896 		}
4897 
4898 
4899 
4900 		private Overlap makeOverlapForward(Unit u2, int start1, BandedAligner bandy, boolean earlyExit) {
4901 			if(verbose){System.err.println("makeOverlapForward(u1="+this.name()+", u2="+u2.name()+", start="+start1+", earlyExit="+earlyExit+")");}
4902 			final int len1=length(), len2=u2.length();
4903 			int start2=0;
4904 			if(start1<0){
4905 				start2-=start1;
4906 				start1=0;
4907 			}
4908 			final int overlapLength=Tools.min(len1-start1, len2-start2);
4909 			final int overlapLength2=Tools.max(len1-start1, len2-start2);
4910 			int stop1=start1+overlapLength-1, stop2=start2+overlapLength-1;
4911 			if(verbose){System.err.println("Calculated stop1="+stop1+", stop2="+stop2+", overlapLength="+overlapLength);}
4912 
4913 			if(!allowAllContainedOverlaps || overlapLength>Tools.min(len1, len2)){
4914 				if(overlapLength<minOverlapCluster){return null;}
4915 				if(minOverlapPercentCluster>0f && (overlapLength*100f/Tools.min(len1, len2))<minOverlapPercentCluster){return null;}
4916 			}
4917 
4918 			final byte[] a=bases(), b=u2.bases();
4919 			assert(a!=null && b!=null) : "Null bases for "+code1+" or "+u2.code1;
4920 			int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, overlapLength);
4921 
4922 			if(verbose){
4923 				System.err.println("start1="+start1+", stop1="+stop1+", len1="+len1+", start2="+start2+", stop2="+stop2+", len2="+len2+
4924 						", overlapLen="+overlapLength+", maxMismatches="+maxMismatches+", maxEdits="+maxEdits);
4925 			}
4926 
4927 			for(int i=start1, j=start2; j<=stop2; i++, j++){
4928 				byte aa=a[i];
4929 				byte bb=b[j];
4930 				if(aa!=bb){
4931 					if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
4932 						if(earlyExit && j<k2){return null;}
4933 						mismatches++;
4934 						if(verbose){System.err.println("i="+i+", j="+j+", "+(char)aa+"!="+(char)bb+", mismatches="+mismatches+"/"+maxMismatches);}
4935 						if(mismatches>1 && bandy!=null){
4936 							if(maxEdits<1){return null;}
4937 							if(verbose){System.err.println("mismatches exceeded 1, attempting banded alignment.");}
4938 							int edits=bandy.alignForward(b, a, start2, start1, maxEdits, exact);
4939 							if(edits>maxEdits || bandy.score()<=4*edits){
4940 								if(verbose){System.err.println((edits>maxEdits ? "Too many edits" : "Alignment score too low")+"; returning null.");}
4941 								return null;
4942 							}
4943 							assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
4944 							stop2=bandy.lastQueryLoc;
4945 							stop1=bandy.lastRefLoc;
4946 //							if(bandy.lastOffset>0){//Ref longer than query
4947 //								for(int k=0; k<bandy.lastOffset; k++){
4948 //									if(stop1+1<=len1){stop1++;}
4949 //									else{stop2--;}//I don't think this can happen
4950 //								}
4951 //							}else if(bandy.lastOffset<0){//Query longer than ref
4952 //								for(int k=0; k>bandy.lastOffset; k--){
4953 //									if(stop2+1<=len2){stop2++;}
4954 //									else{stop1--;}
4955 //								}
4956 //							}
4957 							return new Overlap(this, u2, FORWARD, start1, start2, stop1, stop2, overlapLength, edits, edits, bandy);
4958 						}else if(mismatches>maxMismatches){return null;}
4959 					}
4960 				}
4961 			}
4962 			return new Overlap(this, u2, FORWARD, start1, start2, stop1, stop2, overlapLength, mismatches, 0, bandy);
4963 		}
4964 
4965 		private Overlap makeOverlapForwardRC(Unit u2, int start1, BandedAligner bandy, boolean earlyExit) {
4966 			if(verbose){System.err.println("makeOverlapForwardRC(u1="+this.name()+", u2="+u2.name()+", start="+start1+", earlyExit="+earlyExit+")");}
4967 			if(ignoreReverseComplement){return null;}
4968 			final int len1=length(), len2=u2.length();
4969 			int start2=len2-1;
4970 			if(start1<0){
4971 				start2+=start1;
4972 				start1=0;
4973 			}
4974 			final int overlapLength=Tools.min(len1-start1, start2+1);
4975 			final int overlapLength2=Tools.max(len1-start1, start2+1);
4976 			int stop1=start1+overlapLength-1, stop2=start2-overlapLength+1;
4977 			if(verbose){System.err.println("Calculated stop1="+stop1+", stop2="+stop2+", overlapLength="+overlapLength);}
4978 
4979 			if(!allowAllContainedOverlaps || overlapLength>Tools.min(len1, len2)){
4980 				if(overlapLength<minOverlapCluster){return null;}
4981 				if(minOverlapPercentCluster>0f && (overlapLength*100f/Tools.min(len1, len2))<minOverlapPercentCluster){return null;}
4982 			}
4983 
4984 			final byte[] a=bases(), b=u2.bases();
4985 			assert(a!=null && b!=null) : "Null bases for "+code1+" or "+u2.code1;
4986 			int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, b.length);
4987 
4988 			for(int i=start1, j=start2, iprefix=start1+k2; i<=stop1; i++, j--){
4989 				byte aa=a[i];
4990 				byte bb=baseToComplementExtended[b[j]];
4991 				if(aa!=bb){
4992 					if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
4993 						if(earlyExit && i<iprefix){return null;}
4994 						mismatches++;
4995 						if(verbose){System.err.println("i="+i+", j="+j+", "+(char)aa+"!="+(char)bb+", mismatches="+mismatches+"/"+maxMismatches);}
4996 						if(mismatches>1 && bandy!=null){
4997 							if(maxEdits<1){return null;}
4998 							if(verbose){System.err.println("mismatches exceeded 1, attempting banded alignment.");}
4999 							int edits=bandy.alignForwardRC(b, a, start2, start1, maxEdits, exact);
5000 							if(edits>maxEdits || bandy.score()<=4*edits){
5001 								if(verbose){System.err.println((edits>maxEdits ? "Too many edits" : "Alignment score too low")+"; returning null.");}
5002 								return null;
5003 							}
5004 							assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
5005 							stop2=bandy.lastQueryLoc;
5006 							stop1=bandy.lastRefLoc;
5007 //							if(bandy.lastOffset>0){//Ref longer than query
5008 //								for(int k=0; k<bandy.lastOffset; k++){
5009 //									if(stop1+1<=len1){stop1++;}
5010 //									else{stop2++;}//I don't think this can happen
5011 //								}
5012 //							}else if(bandy.lastOffset<0){//Query longer than ref
5013 //								for(int k=0; k>bandy.lastOffset; k--){
5014 //									if(stop2>0){stop2--;}
5015 //									else{stop1--;}
5016 //								}
5017 //							}
5018 							return new Overlap(this, u2, FORWARDRC, start1, start2, stop1, stop2, overlapLength, edits, edits, bandy);
5019 						}else if(mismatches>maxMismatches){return null;}
5020 					}
5021 				}
5022 			}
5023 			return new Overlap(this, u2, FORWARDRC, start1, start2, stop1, stop2, overlapLength, mismatches, 0, bandy);
5024 		}
5025 
5026 		private Overlap makeOverlapReverse(Unit u2, int start1, BandedAligner bandy, boolean earlyExit) {
5027 			if(verbose){System.err.println("makeOverlapReverse(u1="+this.name()+", u2="+u2.name()+", start="+start1+", earlyExit="+earlyExit+")");}
5028 
5029 			final int len1=length(), len2=u2.length();
5030 			int start2=len2-1;
5031 			if(start1>=len1){
5032 				start2-=(start1-len1+1);
5033 				start1=len1-1;
5034 			}
5035 			final int overlapLength=Tools.min(start1+1, start2+1);
5036 			final int overlapLength2=Tools.max(start1+1, start2+1);
5037 			int stop1=start1-overlapLength+1, stop2=start2-overlapLength+1;
5038 			if(verbose){System.err.println("Calculated stop1="+stop1+", stop2="+stop2+", overlapLength="+overlapLength);}
5039 
5040 			if(!allowAllContainedOverlaps || overlapLength>Tools.min(len1, len2)){
5041 				if(overlapLength<minOverlapCluster){return null;}
5042 				if(minOverlapPercentCluster>0f && (overlapLength*100f/Tools.min(len1, len2))<minOverlapPercentCluster){return null;}
5043 			}
5044 
5045 			final byte[] a=bases(), b=u2.bases();
5046 			assert(a!=null && b!=null) : "Null bases for "+code1+" or "+u2.code1;
5047 			int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, b.length);
5048 
5049 			if(verbose){
5050 				System.err.println("start1="+start1+", stop1="+stop1+", len1="+len1+", start2="+start2+", stop2="+stop2+", len2="+len2+
5051 						", overlapLen="+overlapLength+", maxMismatches="+maxMismatches+", maxEdits="+maxEdits);
5052 			}
5053 
5054 			for(int i=start1, j=start2, iprefix=start1-k2; i>=stop1; i--, j--){
5055 				byte aa=a[i];
5056 				byte bb=b[j];
5057 				if(aa!=bb){
5058 					if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
5059 						if(earlyExit && i>iprefix){return null;}
5060 						mismatches++;
5061 						if(verbose){System.err.println("i="+i+", j="+j+", "+(char)aa+"!="+(char)bb+", mismatches="+mismatches+"/"+maxMismatches);}
5062 						if(mismatches>1 && bandy!=null){
5063 							if(maxEdits<1){return null;}
5064 							if(verbose){System.err.println("mismatches exceeded 1, attempting banded alignment.");}
5065 							int edits=bandy.alignReverse(b, a, start2, start1, maxEdits, exact);
5066 							if(edits>maxEdits || bandy.score()<=4*edits){
5067 								if(verbose){System.err.println((edits>maxEdits ? "Too many edits" : "Alignment score too low")+"; returning null.");}
5068 								return null;
5069 							}
5070 							assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
5071 							stop2=bandy.lastQueryLoc;
5072 							stop1=bandy.lastRefLoc;
5073 //							if(bandy.lastOffset>0){//Ref longer than query
5074 //								for(int k=0; k<bandy.lastOffset; k++){
5075 //									if(stop1>0){stop1--;}
5076 //									else{stop2++;}//I don't think this can happen
5077 //								}
5078 //							}else if(bandy.lastOffset<0){//Query longer than ref
5079 //								for(int k=0; k>bandy.lastOffset; k--){
5080 //									if(stop2>0){stop2--;}
5081 //									else{stop1++;}
5082 //								}
5083 //							}
5084 							return new Overlap(this, u2, REVERSE, start1, start2, stop1, stop2, overlapLength, edits, edits, bandy);
5085 						}else if(mismatches>maxMismatches){return null;}
5086 					}
5087 				}
5088 			}
5089 			return new Overlap(this, u2, REVERSE, start1, start2, stop1, stop2, overlapLength, mismatches, 0, bandy);
5090 		}
5091 
5092 		private Overlap makeOverlapReverseRC(Unit u2, int start1, BandedAligner bandy, boolean earlyExit) {
5093 			if(verbose){System.err.println("makeOverlapReverseRC(u1="+this.name()+", u2="+u2.name()+", start="+start1+", earlyExit="+earlyExit+")");}
5094 			if(ignoreReverseComplement){return null;}
5095 			final int len1=length(), len2=u2.length();
5096 			int start2=0;
5097 			if(start1>=len1){
5098 				start2+=(start1-len1+1);
5099 				start1=len1-1;
5100 			}
5101 			final int overlapLength=Tools.min(start1+1, len2-start2);
5102 			final int overlapLength2=Tools.max(start1+1, len2-start2);
5103 			int stop1=start1-overlapLength+1, stop2=start2+overlapLength-1;
5104 			if(verbose){System.err.println("Calculated stop1="+stop1+", stop2="+stop2+", overlapLength="+overlapLength);}
5105 
5106 			if(!allowAllContainedOverlaps || overlapLength>Tools.min(len1, len2)){
5107 				if(overlapLength<minOverlapCluster){return null;}
5108 				if(minOverlapPercentCluster>0f && (overlapLength*100f/Tools.min(len1, len2))<minOverlapPercentCluster){return null;}
5109 			}
5110 
5111 			final byte[] a=bases(), b=u2.bases();
5112 			assert(a!=null && b!=null) : "Null bases for "+code1+" or "+u2.code1;
5113 			int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, b.length);
5114 
5115 			if(verbose){
5116 				System.err.println("start1="+start1+", stop1="+stop1+", len1="+len1+", start2="+start2+", stop2="+stop2+", len2="+len2+
5117 						", overlapLen="+overlapLength+", maxMismatches="+maxMismatches+", maxEdits="+maxEdits);
5118 			}
5119 
5120 			for(int i=start1, j=start2; j<=stop2; i--, j++){
5121 				byte aa=a[i];
5122 				byte bb=baseToComplementExtended[b[j]];
5123 				if(aa!=bb){
5124 					if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
5125 						if(earlyExit && j<k2){return null;}
5126 						mismatches++;
5127 						if(verbose){System.err.println("i="+i+", j="+j+", "+(char)aa+"!="+(char)bb+", mismatches="+mismatches+"/"+maxMismatches);}
5128 						if(mismatches>1 && bandy!=null){
5129 							if(maxEdits<1){return null;}
5130 							if(verbose){System.err.println("mismatches exceeded 1, attempting banded alignment.");}
5131 							int edits=bandy.alignReverseRC(b, a, start2, start1, maxEdits, exact);
5132 							if(edits>maxEdits || bandy.score()<=4*edits){
5133 								if(verbose){System.err.println((edits>maxEdits ? "Too many edits" : "Alignment score too low")+"; returning null.");}
5134 								return null;
5135 							}
5136 							assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
5137 							stop2=bandy.lastQueryLoc;
5138 							stop1=bandy.lastRefLoc;
5139 //							if(bandy.lastOffset>0){//Ref longer than query
5140 //								for(int k=0; k<bandy.lastOffset; k++){
5141 //									if(stop1>0){stop1--;}
5142 //									else{stop2--;}//I don't think this can happen
5143 //								}
5144 //							}else if(bandy.lastOffset<0){//Query longer than ref
5145 //								for(int k=0; k>bandy.lastOffset; k--){
5146 //									if(stop2+1<=len2){stop2++;}
5147 //									else{stop1++;}
5148 //								}
5149 //							}
5150 							return new Overlap(this, u2, REVERSERC, start1, start2, stop1, stop2, overlapLength, edits, edits, bandy);
5151 						}else if(mismatches>maxMismatches){return null;}
5152 					}
5153 				}
5154 			}
5155 			return new Overlap(this, u2, REVERSERC, start1, start2, stop1, stop2, overlapLength, mismatches, 0, bandy);
5156 		}
5157 
5158 		@Override
5159 		public int compareTo(Unit b) {
5160 			int x=comparePairedRC(this, b);
5161 //			int y=comparePairedRC(b, this);
5162 //			boolean eq1=this.equals(b);
5163 //			boolean eq2=b.equals(this);
5164 //
5165 //			assert((x==y)==(x==0)) : x+", "+y+"\n"+this+"\n"+b;
5166 //			assert((x>0 == y<0) || (x==0 && y==0)) : x+", "+y+"\n"+this+"\n"+b;
5167 //
5168 //			assert(eq1==eq2): x+", "+y+"\n"+this+"\n"+b;
5169 //			assert(eq1==(x==0)): x+", "+y+"\n"+this+"\n"+b;
5170 //
5171 //			assert(eq1 || this!=b);
5172 //
5173 //			if(verbose){ //TODO: Remove
5174 //				System.err.println(this+"\n"+b+"\n"+this.r.toFastq()+"\n"+this.r.mate.toFastq()+"\n"+b.r.toFastq()+"\n"+b.r.mate.toFastq()+"\n");
5175 //				System.err.println("\n"+x+", "+y+", "+eq1+", "+eq2);
5176 //				verbose=false;
5177 //			}
5178 
5179 			return x;
5180 		}
5181 
5182 		@Override
5183 		public boolean equals(Object b){return equals((Unit)b);}
5184 		public boolean equals(Unit b){
5185 			boolean x=pairedEqualsRC(this, b);
5186 //			assert(x==pairedEqualsRC(b, this));
5187 //			assert(x==(comparePairedRC(this, b)==0));
5188 //			assert(x==(comparePairedRC(b, this)==0));
5189 //			assert(x || this!=b);
5190 //			System.err.println("\n****EQUALS?****:\n"+this+"\n"+b+"\n**** ****"); //TODO: Remove
5191 			return x;
5192 		}
5193 
5194 		@Override
5195 		public int hashCode(){
5196 			return (int)((code1^(code1>>>32))&0xFFFFFFFFL);
5197 		}
5198 
5199 		private synchronized void setValid(boolean b){
5200 			assert(b!=valid());
5201 //			if(!b){System.err.println("Setting invalid "+name());}
5202 			if(b){flags&=~INVALID_MASK;}
5203 			else{flags|=INVALID_MASK;}
5204 			assert(b==valid());
5205 		}
5206 
5207 		private synchronized void setClustered(boolean b){
5208 			assert(b!=clustered());
5209 			if(b){flags|=CLUSTER_MASK;}
5210 			else{flags&=~CLUSTER_MASK;}
5211 			assert(b==clustered());
5212 		}
5213 
5214 		private void setVisited(boolean b){
5215 			assert(b!=visited());
5216 			if(b){flags|=VISIT_MASK;}
5217 			else{flags&=~VISIT_MASK;}
5218 			assert(b==visited());
5219 		}
5220 
5221 		private synchronized void setCanonical(boolean b){
5222 			assert(b!=canonical());
5223 			if(b){flags|=CANON_MASK;}
5224 			else{flags&=~CANON_MASK;}
5225 			assert(b==canonical());
5226 			assert(r==null || b==isCanonical(r.bases));
5227 		}
5228 
5229 		private void setCanonicized(boolean b){
5230 			assert(b!=canonicized());
5231 			if(b){flags|=CANONICIZED_MASK;}
5232 			else{flags&=~CANONICIZED_MASK;}
5233 			assert(b==canonicized());
5234 		}
5235 
5236 		private synchronized void setCanonContradiction(boolean b){
5237 //			assert(b!=canonContradiction());
5238 			if(b){flags|=CANON_CONTRADICTION_MASK;}
5239 			else{flags&=~CANON_CONTRADICTION_MASK;}
5240 			assert(b==canonContradiction());
5241 		}
5242 
5243 		private synchronized void setOffset(int x){
5244 			offset=x;
5245 			setOffsetValid(true);
5246 		}
5247 
5248 		private synchronized void setOffsetValid(boolean b){
5249 			assert(!offsetValid());
5250 			if(b){flags|=OFFSET_VALID_MASK;}
5251 			else{flags&=~OFFSET_VALID_MASK;}
5252 			assert(b==offsetValid());
5253 		}
5254 
5255 		private synchronized void setOffsetContradiction(boolean b){
5256 //			assert(b!=offsetContradiction());
5257 			assert(offsetValid());
5258 			if(b){flags|=OFFSET_CONTRADICTION_MASK;}
5259 			else{flags&=~OFFSET_CONTRADICTION_MASK;}
5260 			assert(b==offsetContradiction());
5261 		}
5262 
5263 		private void reverseComplement(){
5264 			assert(r!=null);
5265 			r.reverseComplement();
5266 
5267 			if(prefixes!=null){
5268 				assert(suffixes!=null) : "Can't rcomp with null suffix array.";
5269 				for(int i=0; i<prefixes.length; i++){
5270 					long temp=prefixes[i];
5271 					prefixes[i]=suffixes[i];
5272 					suffixes[i]=temp;
5273 				}
5274 			}
5275 			setCanonical(!canonical());
5276 		}
5277 
5278 		/** Return true if 'this' should be the first Unit in the overlap object */
5279 		public boolean firstInOverlap(Unit u2){
5280 			assert(this!=u2) : "\n"+this.r+"\n"+u2.r;
5281 			if(u2.length()!=length()){return u2.length()<length();}
5282 			if(u2.code1!=code1){return u2.code1<code1;}
5283 			if(u2.code2!=code2){return u2.code2<code2;}
5284 			int x=compareTo(u2);
5285 			assert(x!=0 || (r!=null && r.mate!=null));
5286 			if(x!=0){return x>=0;}
5287 			return r.numericID>=u2.r.numericID;
5288 		}
5289 
5290 		public final boolean inSet(){
5291 			if(subsetCount<2){return true;}
5292 			if(r.pairnum()>0){return ((Unit)r.mate.obj).inSet();}
5293 			return ((code1&Long.MAX_VALUE)%subsetCount)==subset;
5294 		}
5295 
5296 		public byte[] bases(){return r==null ? null : r.bases;}
5297 
5298 		public String name(){return r!=null ? r.id : null /*code+""*/;}
5299 		@Override
5300 		public String toString(){return "("+name()+","+code1+","+code2+","+length()+","+prefixes[0]+","+suffixes[0]+","+(canonical()?"c":"nc")+",d="+depth+")";}
5301 
5302 
5303 		public final Read r;
5304 		public final long code1;
5305 		public final long code2;
5306 		public final long[] prefixes=(numAffixMaps>0 ? new long[numAffixMaps] : null);
5307 		public final long[] suffixes=(storeSuffix && numAffixMaps>0 ? new long[numAffixMaps] : null);
5308 
5309 		/** Distance of leftmost side of this read relative to leftmost side of root.
5310 		 * Assumes everything is in 'forward' orientation. */
5311 		public int offset=-999999999;
5312 		public int depth=1;
5313 //		private boolean valid=true;
5314 
5315 		public int unitID;
5316 
5317 		public ArrayList<Overlap> overlapList;
5318 
5319 		private long flags;
5320 		/** True if the original read orientation was canonical */
5321 		public final boolean canonical(){return (CANON_MASK&flags)!=0;}
5322 		/** True if this contig should be output, false if not */
5323 		public final boolean valid(){return (INVALID_MASK&flags)==0;}
5324 		/** Length of this contig */
5325 		public final int length(){return (int)(LEN_MASK&flags);}
5326 		/** Position of this contig relative to root */
5327 		public final int offset(){
5328 			assert(offsetValid());
5329 			return offset;
5330 		}
5331 		public int pairnum(){return (PAIRNUM_MASK&flags)==PAIRNUM_MASK ? 1 : 0;}
5332 
5333 		public void clearVolatileFlags(){
5334 			flags=flags&~(CANONICIZED_MASK|VISIT_MASK|CANON_CONTRADICTION_MASK|OFFSET_VALID_MASK|OFFSET_CONTRADICTION_MASK);
5335 			assert(!visited());
5336 			assert(!canonicized());
5337 			assert(!canonContradiction());
5338 			assert(!offsetValid());
5339 			assert(!offsetContradiction());
5340 		}
5341 
5342 		public boolean visited(){return (VISIT_MASK&flags)==VISIT_MASK;}
5343 		public boolean clustered(){return (CLUSTER_MASK&flags)==CLUSTER_MASK;}
5344 		public boolean canonicized(){return (CANONICIZED_MASK&flags)==CANONICIZED_MASK;}
5345 		public boolean canonContradiction(){return (CANON_CONTRADICTION_MASK&flags)==CANON_CONTRADICTION_MASK;}
5346 		public boolean offsetValid(){return (OFFSET_VALID_MASK&flags)==OFFSET_VALID_MASK;}
5347 		public boolean offsetContradiction(){return (OFFSET_CONTRADICTION_MASK&flags)==OFFSET_CONTRADICTION_MASK;}
5348 		public boolean contradiction(){return offsetContradiction() || canonContradiction();}
5349 
5350 		private static final long LEN_MASK=0x7FFFFFFFL;
5351 		private static final long CANON_MASK=(1L<<33);
5352 		private static final long INVALID_MASK=(1L<<34);
5353 		private static final long VISIT_MASK=(1L<<35);
5354 		private static final long CLUSTER_MASK=(1L<<36);
5355 		private static final long CANONICIZED_MASK=(1L<<37);
5356 		private static final long CANON_CONTRADICTION_MASK=(1L<<38);
5357 		private static final long OFFSET_VALID_MASK=(1L<<39);
5358 		private static final long OFFSET_CONTRADICTION_MASK=(1L<<40);
5359 		private static final long PAIRNUM_MASK=(1L<<41);
5360 	}
5361 
5362 	private static final class UnitOffsetComparator implements Comparator<Unit> {
5363 
5364 		UnitOffsetComparator(){}
5365 
5366 		/* (non-Javadoc)
5367 		 * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object)
5368 		 */
5369 		@Override
5370 		public int compare(Unit a, Unit b) {
5371 			if(a.offsetValid() && b.offsetValid()){
5372 				int x=a.offset()-b.offset();
5373 				if(x!=0){return x;}
5374 			}else{
5375 				if(a.offsetValid()){return -1;}
5376 				if(b.offsetValid()){return 1;}
5377 			}
5378 			return a.compareTo(b);
5379 		}
5380 
5381 	}
5382 
5383 	private static final class ClusterLengthComparator implements Comparator<ArrayList<Unit>> {
5384 
5385 		ClusterLengthComparator(){}
5386 
5387 		/* (non-Javadoc)
5388 		 * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object)
5389 		 */
5390 		@Override
5391 		public int compare(ArrayList<Unit> a, ArrayList<Unit> b) {
5392 			if(a.size()!=b.size()){return b.size()-a.size();}
5393 			if(a.isEmpty() && b.isEmpty()){return 0;}
5394 			return a.get(0).compareTo(b.get(0));
5395 		}
5396 
5397 	}
5398 
5399 	private static final int[] makeNmerIndex(int n){
5400 		final int max=(1<<(2*n))-1;
5401 		int[] array=new int[max+1];
5402 
5403 		int count=0;
5404 		for(int i=0; i<=max; i++){
5405 			final int a=i, b=AminoAcid.reverseComplementBinaryFast(i, n);
5406 			int min=Tools.min(a, b);
5407 			if(min==a){
5408 				array[a]=array[b]=count;
5409 				count++;
5410 			}
5411 		}
5412 		return array;
5413 	}
5414 
5415 	/** Makes a nmer (e.g., tetramer) profile of a cluster */
5416 	private static final float[] makeNmerProfile(ArrayList<Unit> alu, long[] array_){
5417 		final int nbits=2*nmerLength;
5418 		final long[] array=(array_==null ? new long[maxNmer+1] : array_);
5419 		final int mask=~((-1)<<(nbits));
5420 
5421 		long keysCounted=0;
5422 
5423 		for(Unit u : alu){
5424 			byte[] bases=u.r.bases;
5425 			int len=0;
5426 			int kmer=0;
5427 			for(byte b : bases){
5428 				int x=AminoAcid.baseToNumber[b];
5429 				if(x<0){
5430 					len=0;
5431 					kmer=0;
5432 				}else{
5433 					kmer=((kmer<<2)|x)&mask;
5434 					len++;
5435 					if(len>=nmerLength){
5436 						int rkmer=AminoAcid.reverseComplementBinaryFast(kmer, nmerLength);
5437 						keysCounted++;
5438 						array[nmerIndex[Tools.min(kmer, rkmer)]]++;
5439 					}
5440 				}
5441 			}
5442 		}
5443 
5444 		if(keysCounted==0){keysCounted=1;}
5445 		final float mult=1f/keysCounted;
5446 
5447 		float[] r=new float[array.length];
5448 		for(int i=0; i<array.length; i++){
5449 			r[i]=array[i]*mult;
5450 			array[i]=0;
5451 		}
5452 		return r;
5453 	}
5454 
5455 	private ConcurrentReadInputStream crisa[];
5456 
5457 	private final ByteStreamWriter dupeWriter;
5458 
5459 	private String[] in1=null;
5460 	private String[] in2=null;
5461 	private String out=null;
5462 	private String clusterFilePattern=null;
5463 	private String outbest=null;
5464 	private String outdupe=null;
5465 	private String outcsf=null;
5466 	private String outgraph=null;
5467 	private int maxNs=-1;
5468 	private long maxReads=-1;
5469 	public boolean errorState=false;
5470 	boolean sort=false;
5471 //	boolean ascending=true;
5472 	boolean absorbContainment=true;
5473 	boolean absorbMatch=true;
5474 	boolean findOverlaps=false;
5475 	boolean makeClusters=false;
5476 	boolean processClusters=false;
5477 	boolean renameClusters=false;
5478 	boolean absorbOverlap=false;
5479 	boolean storeSuffix=true;
5480 	boolean storeName=true;
5481 	boolean storeQuality=true;
5482 	boolean exact=true;
5483 	boolean uniqueNames=true;
5484 	boolean maxSpanningTree=false;
5485 
5486 	boolean canonicizeClusters=true;
5487 	boolean removeCycles=true;
5488 	boolean fixMultiJoins=true;
5489 	boolean fixCanonContradictions=false;
5490 	boolean fixOffsetContradictions=false;
5491 	boolean countTransitive=false;
5492 	boolean countRedundant=false;
5493 
5494 	private boolean multipleInputFiles=false;
5495 	private boolean rigorousTransitive=false;
5496 	private int numAffixMaps=1;
5497 	private int maxAffixCopies=2000000000;
5498 	private int maxEdges=2000000000;
5499 	private int maxEdges2=2000000000;
5500 	private boolean allowAllContainedOverlaps=false;
5501 //	private boolean toUpperCase=false;
5502 
5503 	/** Trim left bases of the read to this position (exclusive, 0-based) */
5504 	private int forceTrimLeft=-1;
5505 	/** Trim right bases of the read after this position (exclusive, 0-based) */
5506 	private int forceTrimRight=-1;
5507 
5508 	private boolean qTrimLeft=false;
5509 	private boolean qTrimRight=false;
5510 	private float trimQ=6;
5511 	/** Error rate for trimming (derived from trimq) */
5512 	private final float trimE;
5513 
5514 	int maxEdits=0;
5515 	int maxSubs=0;
5516 	int bandwidth=9;
5517 	final boolean customBandwidth;
5518 	float minIdentity=100;
5519 	float minIdentityMult=0;
5520 	float minLengthPercent=0;
5521 	int minOverlapCluster=100;
5522 	int minOverlapMerge=1;
5523 	float minOverlapPercentCluster=0;
5524 	float minOverlapPercentMerge=0;
5525 
5526 	private int minClusterSize=1;
5527 	private int minClusterSizeForStats=1;
5528 	private boolean pickBestRepresentativePerCluster=false;
5529 
5530 	long readsProcessed=0;
5531 	long basesProcessed=0;
5532 	long collisions=0;
5533 	long containments=0;
5534 	long baseContainments=0;
5535 	long containmentCollisions=0;
5536 	long matches=0;
5537 	long baseMatches=0;
5538 	long overlaps=0;
5539 	long baseOverlaps=0;
5540 	long overlapCollisions=0;
5541 	long addedToMain=0;
5542 
5543 	private final int subset;
5544 	private final int subsetCount;
5545 	private final boolean subsetMode;
5546 
5547 	private final int k;
5548 	private final int k2;
5549 	private final boolean EA=Shared.EA();
5550 
5551 	/*--------------------------------------------------------------*/
5552 	/*----------------         Static Fields        ----------------*/
5553 	/*--------------------------------------------------------------*/
5554 
5555 	private static ReadComparator comparator=ReadLengthComparator.comparator;
5556 
5557 	private static int tcount=0;
5558 
5559 	private LinkedHashMap<Long, ArrayList<Unit>> codeMap=new LinkedHashMap<Long, ArrayList<Unit>>(4000000);
5560 //	private HashMap<LongM, ArrayList<Unit>> affixMap1=null;
5561 //	private HashMap<LongM, ArrayList<Unit>> affixMap2=null;
5562 	private HashMap<LongM, ArrayList<Unit>>[] affixMaps=null;
5563 	private ArrayDeque<ArrayList<Unit>> clusterQueue=null;
5564 	private ArrayList<ArrayList<Unit>> processedClusters=null;
5565 	private AtomicIntegerArray clusterNumbers=null;
5566 
5567 	private static final UnitOffsetComparator UNIT_OFFSET_COMPARATOR=new UnitOffsetComparator();
5568 	private static final ClusterLengthComparator CLUSTER_LENGTH_COMPARATOR=new ClusterLengthComparator();
5569 	private static final long[][] hashcodes=makeCodes2(32);
5570 	public static final byte[] baseToNumber=new byte[128];
5571 	public static final byte[] baseToComplementNumber=new byte[128];
5572 	public static final byte[] baseToComplementExtended=new byte[128];
5573 	public static final int nmerLength=4;
5574 	public static final int[] nmerIndex=makeNmerIndex(nmerLength);
5575 	public static final int maxNmer=Tools.max(nmerIndex);
5576 	private static PrintStream outstream=System.err;
5577 	/** Permission to overwrite existing files */
5578 	public static boolean overwrite=false;
5579 	/** Permission to append to existing files */
5580 	public static boolean append=false;
5581 	public static boolean showSpeed=true;
5582 	public static boolean verbose=false;
5583 	public static boolean ignoreReverseComplement=false;
5584 	public static boolean preventTransitiveOverlaps=false;
5585 	public static boolean ignoreAffix1=false;
5586 	public static boolean parseDepth=false;
5587 	public static boolean printLengthInEdges=false;
5588 	public static float depthRatio=2;
5589 	public static int MINSCAF=0;
5590 	public static int THREADS=Shared.threads();
5591 	public static int threadMaxReadsToBuffer=4000;
5592 	public static int threadMaxBasesToBuffer=32000000;
5593 	public static boolean DISPLAY_PROGRESS=true;
5594 	public static boolean UNIQUE_ONLY=false;
5595 	public static boolean REQUIRE_MATCHING_NAMES=false;
5596 	public static boolean NUMBER_GRAPH_NODES=true;
5597 	public static boolean ADD_PAIRNUM_TO_NAME=true;
5598 	public static boolean HASH_NS=false;
5599 
5600 	private static int reverseType(int type){return (type+2)%4;}
5601 	public static final int FORWARD=0;
5602 	public static final int FORWARDRC=1;
5603 	public static final int REVERSE=2;
5604 	public static final int REVERSERC=3;
5605 	public static final String[] OVERLAP_TYPE_NAMES=new String[] {"FORWARD", "FORWARDRC", "REVERSE", "REVERSERC"};
5606 	public static final String[] OVERLAP_TYPE_ABBREVIATIONS=new String[] {"F", "FRC", "R", "RRC"};
5607 
5608 	static{//All others are 0
5609 		baseToNumber['A']=baseToNumber['a']=0;
5610 		baseToNumber['C']=baseToNumber['c']=1;
5611 		baseToNumber['G']=baseToNumber['g']=2;
5612 		baseToNumber['T']=baseToNumber['t']=3;
5613 		baseToNumber['U']=baseToNumber['u']=3;
5614 
5615 		baseToComplementNumber['A']=baseToComplementNumber['a']=3;
5616 		baseToComplementNumber['C']=baseToComplementNumber['c']=2;
5617 		baseToComplementNumber['G']=baseToComplementNumber['g']=1;
5618 		baseToComplementNumber['T']=baseToComplementNumber['t']=0;
5619 		baseToComplementNumber['U']=baseToComplementNumber['u']=0;
5620 
5621 		for(int i=0; i<AminoAcid.baseToComplementExtended.length; i++){
5622 			byte b=AminoAcid.baseToComplementExtended[i];
5623 			baseToComplementExtended[i]=(b<0 ? (byte)i : b);
5624 		}
5625 	}
5626 
5627 }
5628