1 package jgi;
2 
3 import java.io.File;
4 import java.io.PrintStream;
5 import java.io.Serializable;
6 import java.util.ArrayDeque;
7 import java.util.ArrayList;
8 import java.util.Arrays;
9 import java.util.Comparator;
10 import java.util.HashMap;
11 import java.util.HashSet;
12 import java.util.LinkedHashMap;
13 import java.util.Locale;
14 import java.util.PriorityQueue;
15 import java.util.Random;
16 import java.util.concurrent.atomic.AtomicIntegerArray;
17 
18 import align2.BandedAligner;
19 import dna.AminoAcid;
20 import fileIO.ByteFile1;
21 import fileIO.ByteFile2;
22 import fileIO.ByteStreamWriter;
23 import fileIO.FileFormat;
24 import fileIO.ReadWrite;
25 import fileIO.TextStreamWriter;
26 import shared.Parse;
27 import shared.Parser;
28 import shared.PreParser;
29 import shared.ReadStats;
30 import shared.Shared;
31 import shared.Timer;
32 import shared.Tools;
33 import shared.TrimRead;
34 import sort.ReadComparator;
35 import sort.ReadComparatorID;
36 import sort.ReadComparatorName;
37 import sort.ReadLengthComparator;
38 import sort.ReadQualityComparator;
39 import stream.ConcurrentCollectionReadInputStream;
40 import stream.ConcurrentGenericReadInputStream;
41 import stream.ConcurrentReadInputStream;
42 import stream.ConcurrentReadOutputStream;
43 import stream.FASTQ;
44 import stream.FastaReadInputStream;
45 import stream.FastqReadInputStream;
46 import stream.Read;
47 import structures.ListNum;
48 import structures.LongM;
49 
50 /**
51  * @author Brian Bushnell
52  * @date Jul 18, 2013
53  *
54  */
55 public final class Dedupe {
56 
main(String[] args)57 	public static void main(String[] args){
58 		{//Preparse to see which Dedupe version should be used
59 			int nam=1;
60 			boolean amino=false;
61 			for(int i=0; i<args.length; i++){
62 				final String arg=args[i];
63 				String[] split=arg.split("=");
64 				String a=split[0].toLowerCase();
65 				String b=split.length>1 ? split[1] : null;
66 				while(a.charAt(0)=='-'){a=a.substring(1);}
67 				if(a.equals("nam") || a.equals("numaffixmaps")){
68 					if(b!=null){nam=Integer.parseInt(b);}
69 				}else if(a.equals("amino") || a.equals("protein")){
70 					amino=Parse.parseBoolean(b);
71 				}
72 			}
73 			if(amino){
74 				DedupeProtein.main(args);
75 				return;
76 			}
77 			if(nam>2){
78 				Dedupe2.main(args);
79 				return;
80 			}
81 		}
82 
83 		int rbl0=Shared.bufferLen();;
84 		Shared.setBufferLen(16);
85 
86 		Dedupe dd=new Dedupe(args);
87 		dd.process();
88 
89 		Shared.setBufferLen(rbl0);
90 
91 		//Close the print stream if it was redirected
92 		Shared.closeStream(outstream);
93 	}
94 
Dedupe(String[] args)95 	public Dedupe(String[] args){
96 
97 		{//Preparse block for help, config files, and outstream
98 			PreParser pp=new PreParser(args, getClass(), true);
99 			args=pp.args;
100 			outstream=pp.outstream;
101 		}
102 
103 		ReadWrite.ZIPLEVEL=2;
104 		ReadWrite.USE_UNPIGZ=ReadWrite.USE_PIGZ=true;
105 		ReadWrite.MAX_ZIP_THREADS=Shared.threads();
106 		Read.TO_UPPER_CASE=true;
107 
108 		boolean setMcsfs=false;
109 		int bandwidth_=-1;
110 		int k_=31;
111 		int subset_=0, subsetCount_=1;
112 		boolean ascending=false;
113 		Parser parser=new Parser();
114 
115 		for(int i=0; i<args.length; i++){
116 
117 			final String arg=args[i];
118 			String[] split=arg.split("=");
119 			String a=split[0].toLowerCase();
120 			String b=split.length>1 ? split[1] : null;
121 
122 			if(Parser.parseZip(arg, a, b)){
123 				//do nothing
124 			}else if(Parser.parseCommonStatic(arg, a, b)){
125 				//do nothing
126 			}else if(Parser.parseQuality(arg, a, b)){
127 				//do nothing
128 			}else if(Parser.parseFasta(arg, a, b)){
129 				//do nothing
130 			}else if(parser.parseQTrim(arg, a, b)){
131 				//do nothing
132 			}else if(parser.parseInterleaved(arg, a, b)){
133 				//do nothing
134 			}else if(a.equals("in") || a.equals("in1")){
135 				if(b!=null && b.indexOf(',')>=0 && !new File(b).exists()){
136 					in1=b.split(",");
137 				}else{
138 					in1=new String[] {b};
139 				}
140 			}else if(a.equals("in2")){
141 				if(b!=null && b.indexOf(',')>=0 && !new File(b).exists()){
142 					in2=b.split(",");
143 				}else{
144 					in2=new String[] {b};
145 				}
146 			}else if(a.equals("out") || a.equals("out")){
147 				out=b;
148 			}else if(a.equals("out2")){
149 				throw new RuntimeException("Dedupe does not allow 'out2'; for paired reads, output is interleaved.");
150 			}else if(a.equals("clusterfilepattern") || a.equals("pattern")){
151 				clusterFilePattern=b;
152 				assert(clusterFilePattern==null || clusterFilePattern.contains("%")) : "pattern must contain the % symbol.";
153 			}else if(a.equals("outbest")){
154 				outbest=b;
155 			}else if(a.equals("outd") || a.equals("outduplicate")){
156 				outdupe=b;
157 			}else if(a.equals("csf") || a.equals("clusterstatsfile")){
158 				outcsf=b;
159 			}else if(a.equals("dot") || a.equals("graph") || a.equals("outdot") || a.equals("outgraph")){
160 				outgraph=b;
161 			}else if(a.equals("mcsfs") || a.equals("minclustersizeforstats")){
162 				minClusterSizeForStats=Integer.parseInt(b);
163 			}else if(a.equals("mcs") || a.equals("minclustersize")){
164 				minClusterSize=Integer.parseInt(b);
165 				if(!setMcsfs){
166 					minClusterSizeForStats=minClusterSize;
167 				}
168 			}else if(a.equals("append") || a.equals("app")){
169 				append=ReadStats.append=Parse.parseBoolean(b);
170 			}else if(a.equals("overwrite") || a.equals("ow")){
171 				overwrite=Parse.parseBoolean(b);
172 			}else if(a.equals("sort")){
173 				if(b==null){sort=true;}
174 				else if(b.equalsIgnoreCase("a") || b.equalsIgnoreCase("ascending")){
175 					sort=true;
176 					ascending=true;
177 				}else if(b.equalsIgnoreCase("d") || b.equalsIgnoreCase("descending")){
178 					sort=true;
179 					ascending=false;
180 				}else if(b.equalsIgnoreCase("length")){
181 					sort=true;
182 					comparator=ReadLengthComparator.comparator;
183 				}else if(b.equalsIgnoreCase("quality")){
184 					sort=true;
185 					comparator=ReadQualityComparator.comparator;
186 				}else if(b.equalsIgnoreCase("name")){
187 					sort=true;
188 					comparator=ReadComparatorName.comparator;
189 				}else if(b.equalsIgnoreCase("id")){
190 					sort=true;
191 					comparator=ReadComparatorID.comparator;
192 				}else{
193 					sort=Parse.parseBoolean(b);
194 				}
195 			}else if(a.equals("ascending")){
196 				ascending=Parse.parseBoolean(b);
197 			}else if(a.equals("ordered")){
198 				boolean x=Parse.parseBoolean(b);
199 				if(x){
200 					ascending=true;
201 					sort=true;
202 					comparator=ReadComparatorID.comparator;
203 				}else{
204 					//do nothing
205 				}
206 			}else if(a.equals("arc") || a.equals("absorbrc") || a.equals("trc") || a.equals("testrc")){
207 				ignoreReverseComplement=!Parse.parseBoolean(b);
208 			}else if(a.equals("ac") || a.equals("absorbcontainment") || a.equals("absorbcontainments") || a.equals("tc") || a.equals("testcontainment") || a.equals("containment")){
209 				absorbContainment=Parse.parseBoolean(b);
210 			}else if(a.equals("am") || a.equals("absorbmatch") || a.equals("absorbmatches") || a.equals("tm") || a.equals("testmatch")){
211 				absorbMatch=Parse.parseBoolean(b);
212 			}else if(a.equals("ao") || a.equals("absorboverlap") || a.equals("absorboverlaps") || a.equals("to") || a.equals("testoverlap")){
213 				absorbOverlap=Parse.parseBoolean(b);
214 			}else if(a.equals("fo") || a.equals("findoverlap") || a.equals("findoverlaps")){
215 				findOverlaps=Parse.parseBoolean(b);
216 			}else if(a.equals("c") || a.equals("cluster") || a.equals("clusters")){
217 				makeClusters=Parse.parseBoolean(b);
218 			}else if(a.equals("fmj") || a.equals("fixmultijoin") || a.equals("fixmultijoins")){
219 				fixMultiJoins=Parse.parseBoolean(b);
220 			}else if(a.equals("fcc") || a.equals("fixcanoncontradiction") || a.equals("fixcanoncontradictions")){
221 				fixCanonContradictions=Parse.parseBoolean(b);
222 			}else if(a.equals("foc") || a.equals("fixoffsetcontradiction") || a.equals("fixoffsetcontradictions")){
223 				fixOffsetContradictions=Parse.parseBoolean(b);
224 			}else if(a.equals("pto") || a.equals("preventtransitiveoverlap") || a.equals("preventtransitiveoverlaps")){
225 				preventTransitiveOverlaps=Parse.parseBoolean(b);
226 			}else if(a.equals("pbr") || a.equals("pickbestrepresentative")){
227 				pickBestRepresentativePerCluster=Parse.parseBoolean(b);
228 			}else if(a.equals("mst") || a.equals("maxspanningtree")){
229 				maxSpanningTree=Parse.parseBoolean(b);
230 			}else if(a.equals("cc") || a.equals("canonicizecluster") || a.equals("canonicizeclusters")){
231 				canonicizeClusters=Parse.parseBoolean(b);
232 			}else if(a.equals("pc") || a.equals("processcluster") || a.equals("processclusters")){
233 				processClusters=Parse.parseBoolean(b);
234 			}else if(a.equals("rnc") || a.equals("renamecluster") || a.equals("renameclusters")){
235 				renameClusters=Parse.parseBoolean(b);
236 				if(renameClusters){storeName=false;}
237 			}else if(a.equals("rc") || a.equals("removecycles") || a.equals("removecycle")){
238 				removeCycles=Parse.parseBoolean(b);
239 			}else if(a.equals("uo") || a.equals("uniqueonly")){
240 				UNIQUE_ONLY=Parse.parseBoolean(b);
241 			}else if(a.equals("rmn") || a.equals("requirematchingnames")){
242 				REQUIRE_MATCHING_NAMES=Parse.parseBoolean(b);
243 			}else if(a.equals("ngn") || a.equals("numbergraphnodes")){
244 				NUMBER_GRAPH_NODES=Parse.parseBoolean(b);
245 			}else if(a.equals("addpairnum")){
246 				ADD_PAIRNUM_TO_NAME=Parse.parseBoolean(b);
247 			}else if(a.equals("hashns")){
248 				HASH_NS=Parse.parseBoolean(b);
249 			}else if(a.equals("pn") || a.equals("prefixname")){
250 //				PREFIX_NAME=Parse.parseBoolean(b);
251 			}else if(a.equals("k")){
252 				k_=Integer.parseInt(b);
253 				assert(k_>0 && k_<32) : "k must be between 1 and 31; default is 31, and lower values are slower.";
254 			}else if(a.equals("minscaf") || a.equals("ms")){
255 				MINSCAF=FastaReadInputStream.MIN_READ_LEN=Integer.parseInt(b);
256 			}else if(a.equals("mlp") || a.equals("minlengthpercent")){
257 				minLengthPercent=Float.parseFloat(b);
258 			}else if(a.equals("mop") || a.equals("minoverlappercent")){
259 				minOverlapPercentCluster=minOverlapPercentMerge=Float.parseFloat(b);
260 			}else if(a.equals("mopc") || a.equals("minoverlappercentcluster")){
261 				minOverlapPercentCluster=Float.parseFloat(b);
262 			}else if(a.equals("mopm") || a.equals("minoverlappercentmerge")){
263 				minOverlapPercentMerge=Float.parseFloat(b);
264 			}else if(a.equals("mo") || a.equals("minoverlap")){
265 				minOverlapCluster=minOverlapMerge=Integer.parseInt(b);
266 			}else if(a.equals("moc") || a.equals("minoverlapcluster")){
267 				minOverlapCluster=Integer.parseInt(b);
268 			}else if(a.equals("mom") || a.equals("minoverlapmerge")){
269 				minOverlapMerge=Integer.parseInt(b);
270 			}else if(a.equals("rt") || a.equals("rigoroustransitive")){
271 				rigorousTransitive=Parse.parseBoolean(b);
272 			}else if(a.equals("e") || a.equals("maxedits") || a.equals("edits") || a.equals("edist")){
273 				maxEdits=Integer.parseInt(b);
274 			}else if(a.equals("s") || a.equals("maxsubs") || a.equals("maxsubstitutions") || a.equals("hdist")){
275 				maxSubs=Integer.parseInt(b);
276 			}else if(a.equals("bw") || a.equals("bandwidth")){
277 				bandwidth_=Integer.parseInt(b);
278 			}else if(a.equals("mid") || a.equals("minidentity")){
279 				minIdentity=Float.parseFloat(b);
280 				minIdentityMult=(minIdentity==100f ? 0 : (100f-minIdentity)/100f);
281 			}else if(a.equals("threads") || a.equals("t")){
282 				THREADS=(b==null || b.equalsIgnoreCase("auto") ? Shared.threads() : Integer.parseInt(b));
283 			}else if(a.equals("showspeed") || a.equals("ss")){
284 				showSpeed=Parse.parseBoolean(b);
285 			}else if(a.equals("verbose")){
286 				verbose=Parse.parseBoolean(b);
287 //				BandedAligner.verbose=verbose;
288 			}else if(a.equals("contigbreak") || (arg.contains("=") && (a.equals("n") || a.equals("-n")))){
289 				maxNs=Integer.parseInt(b);
290 			}else if(a.equals("reads") || a.startsWith("maxreads")){
291 				maxReads=Parse.parseKMG(b);
292 			}else if(a.equals("sn") || a.equals("storename") || a.equals("storenames") || a.equals("keepnames")){
293 				storeName=Parse.parseBoolean(b);
294 			}else if(a.equals("ssx") || a.equals("storesuffix") || a.equals("storesuffixes")){
295 				storeSuffix=Parse.parseBoolean(b);
296 			}else if(a.equals("numaffixmaps") || a.equals("nam")){
297 				numAffixMaps=Integer.parseInt(b);
298 			}else if(a.equals("amino") || a.equals("protein")){
299 				assert(!Parse.parseBoolean(b)) : "This program does not support amino acids; use DedupeProtein.";
300 			}else if(a.equals("mac") || a.equals("maxaffixcopies")){
301 				maxAffixCopies=Integer.parseInt(b);
302 			}else if(a.equals("me") || a.equals("maxedges")){
303 				maxEdges=Integer.parseInt(b);
304 				maxEdges2=maxEdges*2;
305 				if(maxEdges2<1){maxEdges2=Integer.MAX_VALUE-1;}
306 			}else if(a.equals("ignoreaffix1") || a.equals("ia1")){
307 				ignoreAffix1=Parse.parseBoolean(b);
308 			}else if(a.equals("parsedepth") || a.equals("pd")){
309 				parseDepth=Parse.parseBoolean(b);
310 			}else if(a.equals("printlengthinedges") || a.equals("ple")){
311 				printLengthInEdges=Parse.parseBoolean(b);
312 			}else if(a.equals("depthmult") || a.equals("depthratio") || a.equals("dr")){
313 				depthRatio=Float.parseFloat(b);
314 				if(depthRatio<=0){
315 					parseDepth=false;
316 				}else{
317 					parseDepth=true;
318 					assert(depthRatio>0);
319 					if(depthRatio<1){depthRatio=1/depthRatio;}
320 				}
321 			}else if(a.equals("storequality") || a.equals("sq")){
322 				storeQuality=Parse.parseBoolean(b);
323 			}else if(a.equals("exact") || a.equals("ex")){
324 				exact=Parse.parseBoolean(b);
325 			}else if(a.equals("uniquenames") || a.equals("un")){
326 				uniqueNames=Parse.parseBoolean(b);
327 			}else if(a.equals("mergeheaders") || a.equals("mergenames")){
328 				mergeHeaders=Parse.parseBoolean(b);
329 			}else if(a.equals("mergedelimiter") || a.equals("mergesymbol")){
330 				mergeDelimiter=Parse.parseSymbol(b);
331 			}else if(a.equals("ftl") || a.equals("forcetrimleft")){
332 				forceTrimLeft=Integer.parseInt(b);
333 			}else if(a.equals("ftr") || a.equals("forcetrimright")){
334 				forceTrimRight=Integer.parseInt(b);
335 			}else if(a.equals("subset") || a.equals("sst")){
336 				subset_=Integer.parseInt(b);
337 			}else if(a.equals("subsets") || a.equals("subsetcount") || a.equals("sstc")){
338 				subsetCount_=Integer.parseInt(b);
339 			}else if(i==0 && in1==null && arg.indexOf('=')<0 && arg.lastIndexOf('.')>0){
340 				String c=args[i];
341 				if(c.indexOf(',')>=0 && !new File(c).exists()){
342 					in1=c.split(",");
343 				}else{
344 					in1=new String[] {c};
345 				}
346 			}else if(i==1 && out==null && arg.indexOf('=')<0 && arg.lastIndexOf('.')>0){
347 				out=args[i];
348 			}else{
349 				throw new RuntimeException("Unknown parameter "+args[i]);
350 			}
351 		}
352 
353 		comparator.setAscending(ascending);
354 
355 		{//Process parser fields
356 			Parser.processQuality();
357 			qTrimLeft=parser.qtrimLeft;
358 			qTrimRight=parser.qtrimRight;
359 			trimQ=parser.trimq;
360 			trimE=parser.trimE();
361 		}
362 
363 		if(verbose){
364 			ReadWrite.verbose=ConcurrentGenericReadInputStream.verbose=ConcurrentReadOutputStream.verbose=ByteFile1.verbose=ByteFile2.verbose=FastqReadInputStream.verbose=true;
365 		}
366 
367 		k=k_;
368 		k2=k-1;
369 		subset=subset_;
370 		subsetCount=subsetCount_;
371 		subsetMode=subsetCount>1;
372 		assert(subset>=0 && subset<subsetCount) : "subset="+subset+", subsetCount="+subsetCount;
373 
374 		BandedAligner.penalizeOffCenter=true;
375 
376 		if(maxSpanningTree){removeCycles=fixMultiJoins=false;}
377 		if(absorbOverlap){processClusters=true;}
378 		if(processClusters || renameClusters || maxSpanningTree){makeClusters=true;}
379 		if(makeClusters){findOverlaps=true;}
380 		if(renameClusters){uniqueNames=/*storeName=*/false;}
381 
382 		if(bandwidth_>-1){
383 			bandwidth=Tools.min(bandwidth_, 2*maxEdits+1);
384 			customBandwidth=(bandwidth<2*maxEdits+1);
385 		}else{
386 			bandwidth=2*maxEdits+1;
387 			customBandwidth=false;
388 		}
389 		maxSubs=Tools.max(maxSubs, maxEdits);
390 		if(maxSubs>0 || minIdentity<100 || findOverlaps){storeSuffix=true;}
391 
392 		assert(FastaReadInputStream.settingsOK());
393 
394 		if(in1==null){throw new RuntimeException("Error - at least one input file is required.");}
395 
396 		for(int i=0; i<in1.length; i++){
397 			if(in1[i].equalsIgnoreCase("stdin") && !new File(in1[i]).exists()){in1[i]="stdin.fa";}
398 		}
399 
400 //		assert(false) : Arrays.toString(in);
401 
402 //		if(!setOut && clusterFilePattern==null){out="stdout.fa";}
403 //		else
404 //			if("stdout".equalsIgnoreCase(out) || "standarddout".equalsIgnoreCase(out)){
405 //			out="stdout.fa";
406 //			outstream=System.err;
407 //		}
408 		if(!Tools.canWrite(out, overwrite)){throw new RuntimeException("Output file "+out+" already exists, and overwrite="+overwrite);}
409 
410 		for(int i=0; i<in1.length; i++){
411 			assert(!in1[i].equalsIgnoreCase(out));
412 		}
413 //		assert(false) : "\nabsorbContainment="+absorbContainment+", findOverlaps="+findOverlaps+", absorbOverlap="+absorbOverlap+"\n"+
414 //			"processClusters="+processClusters+", renameClusters="+renameClusters+", makeClusters="+makeClusters+", uniqueNames="+uniqueNames+"\n"+
415 //			"storeName="+storeName+", DISPLAY_PROGRESS="+DISPLAY_PROGRESS+", removeCycles="+removeCycles;
416 		if(absorbContainment || findOverlaps){
417 //			assert(false);
418 			affixMaps=new HashMap[numAffixMaps];
419 			for(int i=0; i<numAffixMaps; i++){
420 				affixMaps[i]=new HashMap<LongM, ArrayList<Unit>>(4000000);
421 			}
422 			if(affixMaps.length>0){affixMap1=affixMaps[0];}
423 			if(affixMaps.length>1){affixMap2=affixMaps[1];}
424 		}
425 //		assert(false) : absorbContainment+", "+(affixMap==null);
426 
427 		if(outdupe==null){
428 			dupeWriter=null;
429 		}else{
430 			FileFormat ff=FileFormat.testOutput(outdupe, FileFormat.FASTA, null, true, overwrite, append, false);
431 			dupeWriter=new ByteStreamWriter(ff);
432 		}
433 	}
434 
process()435 	public void process(){
436 
437 		Timer t=new Timer();
438 
439 		boolean dq0=FASTQ.DETECT_QUALITY;
440 		boolean ti0=FASTQ.TEST_INTERLEAVED;
441 
442 		process2();
443 
444 		FASTQ.DETECT_QUALITY=dq0;
445 		FASTQ.TEST_INTERLEAVED=ti0;
446 
447 		t.stop();
448 
449 		if(showSpeed){
450 			outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8));
451 		}
452 
453 		if(errorState){
454 			throw new RuntimeException("Dedupe terminated in an error state; the output may be corrupt.");
455 		}
456 	}
457 
process2()458 	public void process2(){
459 		if(dupeWriter!=null){dupeWriter.start();}
460 //		assert(false) : out;
461 		Timer t=new Timer();
462 
463 		if(DISPLAY_PROGRESS){
464 			outstream.println("Initial:");
465 			Shared.printMemory();
466 			outstream.println();
467 		}
468 
469 		processMatches(t);
470 
471 		forceTrimLeft=forceTrimRight=-1;
472 		qTrimLeft=qTrimRight=false;
473 
474 
475 		if(absorbContainment){
476 			processContainments(t);
477 		}
478 
479 		if(dupeWriter!=null){dupeWriter.poisonAndWait();}
480 
481 		if(findOverlaps){
482 			findOverlaps(t);
483 
484 			killAffixMaps();
485 
486 			if(processClusters || renameClusters || maxSpanningTree){codeMap=null;}
487 
488 			if(maxSpanningTree){
489 				processMst(t);
490 			}
491 
492 			if(processClusters){
493 				processClusters(t, absorbOverlap);
494 			}
495 //			if(renameClusters){
496 //				renameClusters(t);
497 //			}
498 //			assert(false) : (codeMap==null)+", "+(affixMap1==null)+", "+processedClusters;
499 		}
500 
501 		outstream.println("Input:                  \t"+readsProcessed+" reads \t\t"+basesProcessed+" bases.");
502 
503 		if(absorbMatch){
504 			outstream.println("Duplicates:             \t"+matches+" reads ("+String.format(Locale.ROOT, "%.2f",matches*100.0/readsProcessed)+"%) \t"+
505 					baseMatches+" bases ("+String.format(Locale.ROOT, "%.2f",baseMatches*100.0/basesProcessed)+"%)     \t"+collisions+" collisions.");
506 		}
507 		if(absorbContainment){
508 			outstream.println("Containments:           \t"+containments+" reads ("+String.format(Locale.ROOT, "%.2f",containments*100.0/readsProcessed)+"%) \t"+
509 					baseContainments+" bases ("+String.format(Locale.ROOT, "%.2f",baseContainments*100.0/basesProcessed)+"%)    \t"+containmentCollisions+" collisions.");
510 		}
511 		if(findOverlaps){
512 			outstream.println("Overlaps:               \t"+overlaps+" reads ("+String.format(Locale.ROOT, "%.2f",overlaps*100.0/readsProcessed)+"%) \t"+
513 					baseOverlaps+" bases ("+String.format(Locale.ROOT, "%.2f",baseOverlaps*100.0/basesProcessed)+"%)    \t"+overlapCollisions+" collisions.");
514 		}
515 //		outstream.println("Result:                 \t"+(addedToMain-containments)+" reads \t\t"+(basesProcessed-baseMatches-baseContainments)+" bases.");
516 
517 		long outReads=(addedToMain-containments);
518 		if(UNIQUE_ONLY){outReads=readsProcessed-matches-containments;}
519 		long outBases=(basesProcessed-baseMatches-baseContainments);
520 		outstream.println("Result:                 \t"+outReads+" reads ("+String.format(Locale.ROOT, "%.2f",outReads*100.0/readsProcessed)+"%) \t"+
521 				outBases+" bases ("+String.format(Locale.ROOT, "%.2f",outBases*100.0/basesProcessed)+"%)");
522 
523 		outstream.println("");
524 
525 		if(out!=null || clusterFilePattern!=null || outbest!=null || outgraph!=null || outcsf!=null){
526 			writeOutput(outcsf, t);
527 		}
528 
529 	}
530 
killAffixMaps()531 	private void killAffixMaps(){
532 		if(affixMaps==null){return;}
533 		for(int i=0; i<numAffixMaps; i++){
534 			if(affixMaps[i]!=null){affixMaps[i].clear();}
535 			affixMaps[i]=null;
536 		}
537 		affixMap1=null;
538 		affixMap2=null;
539 		affixMaps=null;
540 	}
541 
makeCrisArray(ArrayList<Read> list)542 	private ConcurrentReadInputStream[] makeCrisArray(ArrayList<Read> list){
543 		final ConcurrentReadInputStream[] array;
544 
545 		if(list!=null){
546 			array=new ConcurrentReadInputStream[] {new ConcurrentCollectionReadInputStream(list, null, -1)};
547 			array[0].start(); //This deadlocks if ConcurrentReadInputStream extends Thread rather than spawning a new thread.
548 		}else{
549 			array=new ConcurrentReadInputStream[in1.length];
550 			multipleInputFiles=array.length>1;
551 			for(int i=0; i<in1.length; i++){
552 				if(verbose){System.err.println("Creating cris for "+in1[i]);}
553 
554 				final ConcurrentReadInputStream cris;
555 				{
556 					FileFormat ff1=FileFormat.testInput(in1[i], FileFormat.FASTA, null, !multipleInputFiles || ReadWrite.USE_UNPIGZ, true);
557 					FileFormat ff2=(in2==null || in2.length<=i ? null : FileFormat.testInput(in2[i], FileFormat.FASTA, null, !multipleInputFiles || ReadWrite.USE_UNPIGZ, true));
558 					cris=ConcurrentReadInputStream.getReadInputStream(maxReads, ff1.samOrBam(), ff1, ff2);
559 					cris.start();
560 					if(cris.paired()){
561 						THREADS=1;//Temp fix for losing reads when multithreaded and paired
562 						if(absorbContainment){
563 							System.err.println("Set absorbContainment to false because it is not currently supported for paired reads.");
564 							absorbContainment=false;
565 						}
566 					}
567 				}
568 				array[i]=cris;
569 			}
570 		}
571 		return array;
572 	}
573 
processMatches(Timer t)574 	private void processMatches(Timer t){
575 		crisa=makeCrisArray(null);
576 
577 		ArrayList<HashThread> alht=new ArrayList<HashThread>(THREADS);
578 		for(int i=0; i<THREADS; i++){alht.add(new HashThread(true, (absorbContainment|findOverlaps), absorbMatch, false, false));}
579 		for(HashThread ht : alht){ht.start();}
580 		for(HashThread ht : alht){
581 			while(ht.getState()!=Thread.State.TERMINATED){
582 				try {
583 					ht.join();
584 				} catch (InterruptedException e) {
585 					// TODO Auto-generated catch block
586 					e.printStackTrace();
587 				}
588 			}
589 			matches+=ht.matchesT;
590 			collisions+=ht.collisionsT;
591 			containments+=ht.containmentsT;
592 			containmentCollisions+=ht.containmentCollisionsT;
593 			baseContainments+=ht.baseContainmentsT;
594 			baseMatches+=ht.baseMatchesT;
595 			addedToMain+=ht.addedToMainT;
596 			readsProcessed+=ht.readsProcessedT;
597 			basesProcessed+=ht.basesProcessedT;
598 		}
599 		alht.clear();
600 
601 		if(verbose){System.err.println("Attempting to close input streams (1).");}
602 		for(ConcurrentReadInputStream cris : crisa){
603 			errorState|=ReadWrite.closeStream(cris);
604 		}
605 		crisa=null;
606 
607 		if(DISPLAY_PROGRESS){
608 			t.stop();
609 			outstream.println("Found "+matches+" duplicates.");
610 			outstream.println("Finished exact matches.    Time: "+t);
611 			Shared.printMemory();
612 			if(verbose){outstream.println(affixMap1);}
613 			outstream.println();
614 			t.start();
615 		}
616 	}
617 
processContainments(Timer t)618 	private void processContainments(Timer t){
619 		ArrayList<Read> list=new ArrayList<Read>((int)addedToMain);
620 		for(ArrayList<Unit> alu : codeMap.values()){
621 			for(Unit u : alu){
622 				assert(u.r.mate==null) : "Containments are not currently supported with paired reads.";
623 				if(u.valid() && u.r.pairnum()==0){list.add(u.r);}
624 			}
625 		}
626 
627 		//	if(minLengthPercent>0){
628 		//		if(verbose){System.err.println("Sorting.");}
629 		//		Shared.sort(list, comparator);
630 		//		Collections.reverse(list);
631 		//		assert(list.isEmpty() || list.get(0).length()<=list.get(list.size()-1).length()) :
632 		//			list.get(0).length()+", "+list.get(list.size()-1).length();
633 		//	}
634 
635 		crisa=makeCrisArray(subsetMode ? null : list);
636 
637 		ArrayList<HashThread> alht=new ArrayList<HashThread>(THREADS);
638 		for(int i=0; i<THREADS; i++){alht.add(new HashThread(false, false, false, true, false));}
639 
640 		for(HashThread ht : alht){ht.start();}
641 		for(HashThread ht : alht){
642 			while(ht.getState()!=Thread.State.TERMINATED){
643 				try {
644 					ht.join();
645 				} catch (InterruptedException e) {
646 					// TODO Auto-generated catch block
647 					e.printStackTrace();
648 				}
649 			}
650 			assert(ht.matchesT==0);
651 			assert(ht.collisionsT==0);
652 			assert(ht.baseMatchesT==0);
653 			assert(ht.addedToMainT==0);
654 //			assert(ht.readsProcessedT==0);
655 //			assert(ht.basesProcessedT==0);
656 			//		matches+=ht.matchesT;
657 			//		collisions+=ht.collisionsT;
658 			containments+=ht.containmentsT;
659 			containmentCollisions+=ht.containmentCollisionsT;
660 			baseContainments+=ht.baseContainmentsT;
661 			//		baseMatches+=ht.baseMatchesT;
662 			//		addedToMain+=ht.addedToMainT;
663 			//		readsProcessed+=ht.readsProcessedT;
664 			//		basesProcessed+=ht.basesProcessedT;
665 		}
666 		alht.clear();
667 		if(verbose){System.err.println("Attempting to close input streams (2).");}
668 		for(ConcurrentReadInputStream cris : crisa){
669 			errorState|=ReadWrite.closeStream(cris);
670 		}
671 
672 		if(DISPLAY_PROGRESS){
673 			t.stop();
674 			outstream.println("Found "+containments+" contained sequences.");
675 			outstream.println("Finished containment.      Time: "+t);
676 			Shared.printMemory();
677 			outstream.println();
678 			t.start();
679 		}
680 		crisa=null;
681 		if(!findOverlaps){
682 			killAffixMaps();
683 		}
684 
685 		long x=removeInvalid(list);
686 		list.clear();
687 
688 		if(DISPLAY_PROGRESS){
689 			t.stop();
690 			outstream.println("Removed "+x+" invalid entries.");
691 			outstream.println("Finished invalid removal.  Time: "+t);
692 			Shared.printMemory();
693 			outstream.println();
694 			t.start();
695 		}
696 	}
697 
findOverlaps(Timer t)698 	private void findOverlaps(Timer t){
699 
700 		ArrayList<Read> list=new ArrayList<Read>((int)addedToMain);
701 		for(ArrayList<Unit> alu : codeMap.values()){
702 			for(Unit u : alu){
703 				if(u.valid() && u.r.pairnum()==0){
704 					u.unitID=list.size();
705 					list.add(u.r);
706 					if(u.r.mate!=null){
707 						Unit u2=(Unit)u.r.mate.obj;
708 						u2.unitID=u.unitID;
709 					}
710 				}else{
711 					u.unitID=Integer.MAX_VALUE;
712 				}
713 			}
714 		}
715 
716 		if(preventTransitiveOverlaps){
717 			clusterNumbers=new AtomicIntegerArray(list.size());
718 			for(int i=0; i<clusterNumbers.length(); i++){
719 				clusterNumbers.set(i, i);
720 			}
721 		}
722 
723 		crisa=makeCrisArray(subsetMode ? null : list);
724 
725 		ArrayList<HashThread> alht=new ArrayList<HashThread>(THREADS);
726 		for(int i=0; i<THREADS; i++){alht.add(new HashThread(false, false, false, false, true));}
727 
728 		for(HashThread ht : alht){ht.start();}
729 		for(HashThread ht : alht){
730 			while(ht.getState()!=Thread.State.TERMINATED){
731 				try {
732 					ht.join();
733 				} catch (InterruptedException e) {
734 					// TODO Auto-generated catch block
735 					e.printStackTrace();
736 				}
737 			}
738 			assert(ht.matchesT==0);
739 			assert(ht.collisionsT==0);
740 			assert(ht.baseMatchesT==0);
741 			assert(ht.addedToMainT==0);
742 
743 			overlaps+=ht.overlapsT;
744 			baseOverlaps+=ht.baseOverlapsT;
745 			overlapCollisions+=ht.overlapCollisionsT;
746 		}
747 		alht.clear();
748 		if(verbose){System.err.println("Attempting to close input streams (3).");}
749 		for(ConcurrentReadInputStream cris : crisa){
750 			errorState|=ReadWrite.closeStream(cris);
751 		}
752 
753 		if(DISPLAY_PROGRESS){
754 			t.stop();
755 			outstream.println("Found "+overlaps+" overlaps.");
756 			outstream.println("Finished finding overlaps. Time: "+t);
757 			Shared.printMemory();
758 			outstream.println();
759 			t.start();
760 		}
761 
762 		crisa=null;
763 
764 		if(makeClusters){
765 			int intransitive=0, redundant=0;
766 			assert((intransitive=countIntransitive(t, list, rigorousTransitive))==0);
767 			assert((redundant=countRedundant(t, list))==0);
768 			long overlaps=countOverlaps(t, list);
769 			assert(intransitive==0);
770 			assert(redundant==0);
771 //			makeTransitive(t, list, rigorousTransitive);
772 			if(clusterQueue==null){
773 				clusterQueue=new ArrayDeque<ArrayList<Unit>>(list.size()/4+1);
774 				processedClusters=new ArrayList<ArrayList<Unit>>();
775 			}else{
776 				assert(clusterQueue.isEmpty());
777 			}
778 			makeClusters(t, list);
779 		}
780 
781 		list.clear();
782 	}
783 
makeTransitive(Timer t, ArrayList<Read> list, boolean rigorous)784 	private long makeTransitive(Timer t, ArrayList<Read> list, boolean rigorous){
785 		assert(false) : "No longer needed.";
786 		long added=0;
787 		for(Read r : list){
788 			assert(r!=null);
789 			Unit u=(Unit) r.obj;
790 			assert(u!=null);
791 			assert(u.valid());
792 //			outstream.println("Considering "+r.id+"; valid="+u.valid()+", overlaps="+(u.overlapList==null ? "null" : u.overlapList.size()));
793 			if(u.valid()){
794 
795 				if(u.overlapList!=null){
796 					for(Overlap o : u.overlapList){
797 						Unit u2=(o.u1==u ? o.u2 : o.u1);
798 						assert(u2!=u);
799 						if(u2.overlapList==null){
800 							u2.overlapList=new ArrayList<Overlap>(2);
801 							u2.overlapList.add(o);
802 						}else{
803 							boolean found=false;
804 							if(rigorous){
805 								found=u2.overlapList.contains(o);
806 							}else{
807 								for(Overlap o2 : u2.overlapList){
808 									if(o2.u1==u || o2.u2==u){found=true; break;}
809 								}
810 							}
811 							if(!found){
812 								added++;
813 								u2.overlapList.add(o);
814 							}
815 						}
816 					}
817 				}
818 			}
819 		}
820 
821 		for(Read r : list){
822 			Unit u=(Unit) r.obj;
823 			if(u.valid()){
824 				assert(u.isTransitive());
825 			}
826 		}
827 
828 		if(DISPLAY_PROGRESS){
829 			t.stop();
830 			outstream.println("Added overlaps: "+added);
831 			outstream.println("Made overlaps transitive.  Time: "+t);
832 			Shared.printMemory();
833 			outstream.println();
834 			t.start();
835 		}
836 		return added;
837 	}
838 
countIntransitive(Timer t, ArrayList<Read> list, boolean rigorous)839 	private int countIntransitive(Timer t, ArrayList<Read> list, boolean rigorous){
840 		if(!countTransitive){return 0;}
841 		int transitive=0, intransitive=0;
842 		for(Read r : list){
843 			assert(r!=null);
844 			Unit u=(Unit) r.obj;
845 			assert(u!=null);
846 			assert(u.valid());
847 //			outstream.println("Considering "+r.id+"; valid="+u.valid()+", overlaps="+(u.overlapList==null ? "null" : u.overlapList.size()));
848 			if(u.valid()){
849 				if(rigorous ? u.isPerfectlyTransitive() : u.isTransitive()){
850 					transitive++;
851 				}else{
852 					intransitive++;
853 				}
854 			}
855 		}
856 
857 		if(DISPLAY_PROGRESS){
858 			t.stop();
859 			outstream.println("Intransitive:   "+intransitive+", \ttransitive: "+transitive);
860 			outstream.println("Checked transitivity.      Time: "+t);
861 			Shared.printMemory();
862 			outstream.println();
863 			t.start();
864 		}
865 
866 		return intransitive;
867 	}
868 
countRedundant(Timer t, ArrayList<Read> list)869 	private int countRedundant(Timer t, ArrayList<Read> list){
870 		if(!countRedundant){return 0;}
871 		int redundant=0, nonredundant=0;
872 		for(Read r : list){
873 			assert(r!=null);
874 			Unit u=(Unit) r.obj;
875 			assert(u!=null);
876 			assert(u.valid());
877 //			outstream.println("Considering "+r.id+"; valid="+u.valid()+", overlaps="+(u.overlapList==null ? "null" : u.overlapList.size()));
878 			if(u.valid()){
879 				if(u.isNonRedundant()){
880 					nonredundant++;
881 				}else{
882 					redundant++;
883 				}
884 			}
885 		}
886 
887 		if(DISPLAY_PROGRESS){
888 			t.stop();
889 			outstream.println("Redundant:      "+redundant+",  \tnonredundant: "+nonredundant);
890 			outstream.println("Checked redundancy.        Time: "+t);
891 			Shared.printMemory();
892 			outstream.println();
893 			t.start();
894 		}
895 		return redundant;
896 	}
897 
countOverlaps(Timer t, ArrayList<Read> list)898 	private long countOverlaps(Timer t, ArrayList<Read> list){
899 
900 		long overlaps=0, length=0;
901 		for(Read r : list){
902 			assert(r!=null);
903 			Unit u=(Unit) r.obj;
904 			assert(u!=null);
905 			assert(u.valid());
906 //			outstream.println("Considering "+r.id+"; valid="+u.valid()+", overlaps="+(u.overlapList==null ? "null" : u.overlapList.size()));
907 			if(u.valid() && u.overlapList!=null){
908 				for(Overlap o : u.overlapList){
909 					overlaps++;
910 					length+=o.overlapLen;
911 				}
912 			}
913 		}
914 
915 		if(DISPLAY_PROGRESS){
916 			t.stop();
917 			outstream.println("Overlaps:       "+overlaps+",  \tlength: "+length);
918 			outstream.println("Counted overlaps.          Time: "+t);
919 			Shared.printMemory();
920 			outstream.println();
921 			t.start();
922 		}
923 		return overlaps;
924 	}
925 
fillClusterSizeMatrix(ArrayList<ArrayList<Unit>> clusters, long[][] clusterSize)926 	private long fillClusterSizeMatrix(ArrayList<ArrayList<Unit>> clusters, long[][] clusterSize){
927 		int max=0;
928 		for(ArrayList<Unit> cluster : clusters){
929 			final int cs=Tools.min(clusterSize.length-1, cluster.size());
930 			{
931 				long reads=0, bases=0;
932 				for(Unit u2 : cluster){
933 					reads++;
934 					bases+=u2.length();
935 				}
936 				clusterSize[0][cs]++;
937 				clusterSize[1][cs]+=reads;
938 				clusterSize[2][cs]+=bases;
939 			}
940 			max=Tools.max(max, cluster.size());
941 		}
942 		return max;
943 	}
944 
makeClusters(Timer t, ArrayList<Read> list)945 	private void makeClusters(Timer t, ArrayList<Read> list){
946 
947 		final int clusterlen=70000;
948 		long[][] clusterSize=new long[3][clusterlen];
949 		int max=0;
950 		for(Read r : list){
951 			Unit u=(Unit) r.obj;
952 
953 			if(!u.clustered()){
954 				ArrayList<Unit> cluster=u.makeCluster();
955 				if(cluster.size()>2){cluster.trimToSize();}
956 				if(cluster.size()==1 || (!processClusters && !maxSpanningTree)){processedClusters.add(cluster);}
957 				else{clusterQueue.add(cluster);}
958 				final int cs=Tools.min(clusterlen-1, cluster.size());
959 				{
960 					long reads=0, bases=0;
961 					for(Unit u2 : cluster){
962 						reads++;
963 						bases+=u2.length();
964 					}
965 					clusterSize[0][cs]++;
966 					clusterSize[1][cs]+=reads;
967 					clusterSize[2][cs]+=bases;
968 				}
969 				max=Tools.max(max, cluster.size());
970 			}
971 		}
972 
973 		if(DISPLAY_PROGRESS){
974 			t.stop();
975 			outstream.println(toClusterSizeString(clusterSize));
976 			outstream.println("\nLargest:          "+max);
977 			outstream.println("Finished making clusters.  Time: "+t);
978 			Shared.printMemory();
979 			outstream.println();
980 			t.start();
981 		}
982 
983 
984 		long x=removeInvalid(list);
985 
986 		if(DISPLAY_PROGRESS){
987 			t.stop();
988 			outstream.println("Removed "+x+" invalid entries.");
989 			outstream.println("Finished invalid removal.  Time: "+t);
990 			Shared.printMemory();
991 			outstream.println();
992 			t.start();
993 		}
994 	}
995 
toClusterSizeString(long[][] clusterSizeMatrix)996 	private String toClusterSizeString(long[][] clusterSizeMatrix){
997 
998 		long[] clusterSize=clusterSizeMatrix[0];
999 		long[] clusterReads=clusterSizeMatrix[1];
1000 		long[] clusterBases=clusterSizeMatrix[2];
1001 
1002 		long totalClusters=Tools.sum(clusterSize);
1003 
1004 		long bigClusters=0;
1005 		for(int i=minClusterSize; i<clusterSize.length; i++){
1006 			bigClusters+=clusterSize[i];
1007 		}
1008 
1009 		final int spaces=18;
1010 		final int spaces2=spaces*2, spaces3=spaces*3;
1011 
1012 		final StringBuilder sb=new StringBuilder(100), sb2=new StringBuilder(1000);
1013 		sb2.append("Clusters:");
1014 		while(sb2.length()<spaces){sb2.append(' ');}
1015 		sb2.append(totalClusters+(minClusterSize<2 ? "" : " ("+bigClusters+" of at least size "+minClusterSize+")")+"\n");
1016 
1017 		sb.append("Size Range");
1018 		while(sb.length()<spaces){sb.append(' ');}
1019 		sb.append("Clusters");
1020 		while(sb.length()<spaces2){sb.append(' ');}
1021 		sb.append("Reads");
1022 		while(sb.length()<spaces3){sb.append(' ');}
1023 		sb.append("Bases");
1024 
1025 		sb2.append('\n');
1026 		sb2.append(sb);
1027 		sb.setLength(0);
1028 
1029 		for(int i=0; i<clusterSize.length-1; i=Tools.max(i+1, i*2)){
1030 			int a=i+1, b=i*2;
1031 			if(i<2){
1032 				sb.append(a);
1033 				while(sb.length()<spaces){sb.append(' ');}
1034 				sb.append(clusterSize[a]);
1035 				while(sb.length()<spaces2){sb.append(' ');}
1036 				sb.append(clusterReads[a]);
1037 				while(sb.length()<spaces3){sb.append(' ');}
1038 				sb.append(clusterBases[a]);
1039 			}else if(b>=clusterSize.length){
1040 				long x=Tools.sum(clusterSize, a, clusterSize.length-1);
1041 				long y=Tools.sum(clusterReads, a, clusterSize.length-1);
1042 				long z=Tools.sum(clusterBases, a, clusterSize.length-1);
1043 				if(x>0){
1044 					sb.append(a+"+");
1045 					while(sb.length()<spaces){sb.append(' ');}
1046 					sb.append(x);
1047 					while(sb.length()<spaces2){sb.append(' ');}
1048 					sb.append(y);
1049 					while(sb.length()<spaces3){sb.append(' ');}
1050 					sb.append(z);
1051 				}
1052 			}else{
1053 				long x=Tools.sum(clusterSize, a, b);
1054 				long y=Tools.sum(clusterReads, a, b);
1055 				long z=Tools.sum(clusterBases, a, b);
1056 				if(x>0){
1057 					sb.append(a+"-"+b);
1058 					while(sb.length()<spaces){sb.append(' ');}
1059 					sb.append(x);
1060 					while(sb.length()<spaces2){sb.append(' ');}
1061 					sb.append(y);
1062 					while(sb.length()<spaces3){sb.append(' ');}
1063 					sb.append(z);
1064 				}
1065 			}
1066 			if(sb.length()>0){
1067 				sb2.append('\n');
1068 				sb2.append(sb);
1069 				sb.setLength(0);
1070 			}
1071 		}
1072 		return sb2.toString();
1073 	}
1074 
renameClusters(Timer t)1075 	private void renameClusters(Timer t){
1076 		assert(false) : "This is now unused; renaming is done at output time.";
1077 		int cnum=0;
1078 		final StringBuilder sb=new StringBuilder(64);
1079 		for(ArrayList<Unit> alu : processedClusters){
1080 			for(int i=0; i<alu.size(); i++){
1081 				Unit u=alu.get(i);
1082 				Read r=u.r;
1083 				sb.append("Cluster ");
1084 				sb.append(cnum);
1085 				sb.append(",contig ");
1086 				sb.append(i);
1087 				if(u.offsetValid()){
1088 					sb.append(",pos ");
1089 					sb.append(u.offset());
1090 				}
1091 				r.id=sb.toString();
1092 				sb.setLength(0);
1093 			}
1094 		}
1095 
1096 		if(DISPLAY_PROGRESS){
1097 			t.stop();
1098 			outstream.println("Finished cluster renaming. Time: "+t);
1099 			Shared.printMemory();
1100 			outstream.println();
1101 			t.start();
1102 		}
1103 	}
1104 
processMst(Timer t)1105 	private void processMst(Timer t){
1106 
1107 		if(DISPLAY_PROGRESS){outstream.println("Converting to Maximum Spanning Tree.");}
1108 
1109 		ArrayList<MstThread> alct=new ArrayList<MstThread>(THREADS);
1110 		for(int i=0; i<THREADS; i++){
1111 			alct.add(new MstThread());
1112 		}
1113 
1114 		long overlapsRemoved=0;
1115 		long overlapBasesRemoved=0;
1116 		long overlapsRetained=0;
1117 		long overlapBasesRetained=0;
1118 
1119 		for(MstThread ct : alct){ct.start();}
1120 		for(MstThread ct : alct){
1121 			while(ct.getState()!=Thread.State.TERMINATED){
1122 				try {
1123 					ct.join();
1124 				} catch (InterruptedException e) {
1125 					e.printStackTrace();
1126 				}
1127 			}
1128 
1129 			overlapsRemoved+=ct.overlapsRemovedT;
1130 			overlapBasesRemoved+=ct.overlapBasesRemovedT;
1131 			overlapsRetained+=ct.overlapsRetainedT;
1132 			overlapBasesRetained+=ct.overlapBasesRetainedT;
1133 		}
1134 		assert(clusterQueue.isEmpty());
1135 		if(processClusters){
1136 			for(MstThread ct : alct){
1137 				clusterQueue.addAll(ct.processedT);
1138 				ct.processedT.clear();
1139 				ct.processedT=null;
1140 			}
1141 		}else{
1142 			for(MstThread ct : alct){
1143 				processedClusters.addAll(ct.processedT);
1144 				ct.processedT.clear();
1145 				ct.processedT=null;
1146 			}
1147 			clusterQueue=null;
1148 		}
1149 		alct.clear();
1150 
1151 		assert(affixMaps==null);
1152 		killAffixMaps();
1153 
1154 
1155 		if(DISPLAY_PROGRESS){
1156 			t.stop();
1157 			outstream.println("Removed "+(overlapsRemoved)+" edges ("+overlapBasesRemoved+" bases).");
1158 			outstream.println("Retained "+(overlapsRetained)+" edges ("+overlapBasesRetained+" bases).");
1159 
1160 //			outstream.println("\nAfter conversion to Maximum Spanning Tree:");
1161 //			final int[] clusterSize=new int[8200];
1162 //			int max=0;
1163 //			for(ArrayList<Unit> cluster : processedClusters){
1164 //				clusterSize[Tools.min(clusterSize.length-1, cluster.size())]++;
1165 //				max=Tools.max(max, cluster.size());
1166 //			}
1167 //			outstream.println(toClusterSizeString(clusterSize));
1168 //			outstream.println("Largest:          "+max);
1169 
1170 			outstream.println("Finished MST conversion.   Time: "+t);
1171 			Shared.printMemory();
1172 			outstream.println();
1173 			t.start();
1174 		}
1175 	}
1176 
processClusters(Timer t, boolean mergeClusters)1177 	private void processClusters(Timer t, boolean mergeClusters){
1178 
1179 		ArrayList<ClusterThread> alct=new ArrayList<ClusterThread>(THREADS);
1180 		for(int i=0; i<THREADS; i++){
1181 			alct.add(new ClusterThread(fixMultiJoins, canonicizeClusters, removeCycles, fixCanonContradictions, fixOffsetContradictions,
1182 					mergeClusters, mergeClusters, mergeClusters));
1183 		}
1184 
1185 		long leafMerges=0;
1186 		long innerMerges=0;
1187 		long leafBaseMerges=0;
1188 		long innerBaseMerges=0;
1189 
1190 		long multiJoinFailures=0;
1191 		long multiJoinsFound=0;
1192 		long multiJoinBasesFound=0;
1193 		long unitsFlipped=0;
1194 		long overlapsFlipped=0;
1195 		long canonContradictoryOverlaps=0;
1196 		long canonContradictoryClusters=0;
1197 		long offsetContradictoryOverlaps=0;
1198 		long offsetContradictoryClusters=0;
1199 		long cycleOverlaps=0;
1200 		long cycleClusters=0;
1201 
1202 		for(ClusterThread ct : alct){ct.start();}
1203 		for(ClusterThread ct : alct){
1204 			while(ct.getState()!=Thread.State.TERMINATED){
1205 				try {
1206 					ct.join();
1207 				} catch (InterruptedException e) {
1208 					e.printStackTrace();
1209 				}
1210 			}
1211 
1212 			leafMerges+=ct.leafMergesT;
1213 			innerMerges+=ct.innerMergesT;
1214 			leafBaseMerges+=ct.leafBaseMergesT;
1215 			innerBaseMerges+=ct.innerBaseMergesT;
1216 
1217 			multiJoinFailures+=ct.multiJoinFailuresT;
1218 			multiJoinsFound+=ct.multiJoinsFoundT;
1219 			multiJoinBasesFound+=ct.multiJoinBasesFoundT;
1220 			unitsFlipped+=ct.unitsFlippedT;
1221 			overlapsFlipped+=ct.overlapsFlippedT;
1222 			canonContradictoryOverlaps+=ct.canonContradictoryOverlapsT;
1223 			canonContradictoryClusters+=ct.canonContradictoryClustersT;
1224 			offsetContradictoryOverlaps+=ct.offsetContradictoryOverlapsT;
1225 			offsetContradictoryClusters+=ct.offsetContradictoryClustersT;
1226 			cycleOverlaps+=ct.cycleOverlapsT;
1227 			cycleClusters+=ct.cycleClustersT;
1228 		}
1229 		alct.clear();
1230 
1231 		assert(affixMaps==null || affixMap1==null);
1232 		killAffixMaps();
1233 
1234 		assert(clusterQueue.isEmpty());
1235 		clusterQueue=null;
1236 
1237 		if(DISPLAY_PROGRESS){
1238 			t.stop();
1239 			if(fixMultiJoins){
1240 				outstream.println("Found "+(multiJoinsFound)+" multijoins ("+multiJoinBasesFound+" bases).");
1241 				outstream.println("Experienced "+(multiJoinFailures)+" multijoin removal failures.");
1242 			}
1243 			if(canonicizeClusters){
1244 				outstream.println("Flipped "+(unitsFlipped)+" reads and "+overlapsFlipped+" overlaps.");
1245 				outstream.println("Found "+(canonContradictoryClusters)+" clusters ("+canonContradictoryOverlaps+" overlaps) with contradictory orientation cycles.");
1246 			}
1247 			if(fixOffsetContradictions){
1248 				outstream.println("Found "+(offsetContradictoryClusters)+" clusters ("+offsetContradictoryOverlaps+" overlaps) with contradictory offset cycles.");
1249 			}
1250 			outstream.println("Found "+(cycleClusters)+" clusters ("+cycleOverlaps+" overlaps) with remaining cycles.");
1251 			if(absorbOverlap){
1252 				outstream.println("Merged "+(leafMerges)+" leaves ("+leafBaseMerges+" bases).");
1253 				outstream.println("Merged "+(innerMerges)+" nonleaves ("+innerBaseMerges+" bases).");
1254 			}
1255 
1256 			outstream.println("\nAfter processing clusters:");
1257 			final long[][] clusterSize=new long[3][70000];
1258 			final long max=fillClusterSizeMatrix(processedClusters, clusterSize);
1259 			outstream.println(toClusterSizeString(clusterSize));
1260 			outstream.println("\nLargest:          "+max);
1261 
1262 			outstream.println("Finished processing.       Time: "+t);
1263 			Shared.printMemory();
1264 			outstream.println();
1265 			t.start();
1266 		}
1267 	}
1268 
removeInvalid(ArrayList<Read> list)1269 	private long removeInvalid(ArrayList<Read> list){
1270 		final LongM keym=new LongM();
1271 		long removedC=0, removedP=0, removedS=0, invalid=0;
1272 
1273 		for(int j=0, lim=list.size(); j<lim; j++){
1274 			final Read r=list.get(j);
1275 			final Unit u=(Unit)r.obj;
1276 
1277 			if(!u.valid()){
1278 
1279 				invalid++;
1280 
1281 				if(codeMap!=null && !codeMap.isEmpty()){
1282 					Long key=u.code1;
1283 					ArrayList<Unit> alu=codeMap.get(key);
1284 					if(alu!=null){
1285 						int valid=0;
1286 						for(int i=alu.size()-1; i>=0; i--){
1287 							Unit u2=alu.get(i);
1288 							if(u2==null || !u2.valid()){
1289 								alu.remove(i);
1290 								removedC++;
1291 							}
1292 							else{valid++;}
1293 						}
1294 						if(valid==0){codeMap.remove(key);}
1295 					}
1296 				}
1297 
1298 				if(affixMap1!=null && !affixMap1.isEmpty()){
1299 					{
1300 						keym.set(u.prefix1);
1301 						ArrayList<Unit> alu=affixMap1.get(keym);
1302 						if(alu!=null){
1303 							int valid=0;
1304 							for(int i=alu.size()-1; i>=0; i--){
1305 								Unit u2=alu.get(i);
1306 								if(u2==null || !u2.valid()){
1307 									alu.remove(i);
1308 									removedP++;
1309 								}
1310 								else{valid++;}
1311 							}
1312 							if(valid==0){affixMap1.remove(keym);}
1313 						}
1314 					}
1315 					if(storeSuffix){
1316 						keym.set(u.suffix1);
1317 						ArrayList<Unit> alu=affixMap1.get(keym);
1318 						if(alu!=null){
1319 							int valid=0;
1320 							for(int i=alu.size()-1; i>=0; i--){
1321 								Unit u2=alu.get(i);
1322 								if(u2==null || !u2.valid()){
1323 									alu.remove(i);
1324 									removedS++;
1325 								}
1326 								else{valid++;}
1327 							}
1328 							if(valid==0){affixMap1.remove(keym);}
1329 						}
1330 					}
1331 				}
1332 				if(affixMap2!=null && !affixMap2.isEmpty()){
1333 					if(u.prefix2!=-1){
1334 						keym.set(u.prefix2);
1335 						ArrayList<Unit> alu=affixMap2.get(keym);
1336 						if(alu!=null){
1337 							int valid=0;
1338 							for(int i=alu.size()-1; i>=0; i--){
1339 								Unit u2=alu.get(i);
1340 								if(u2==null || !u2.valid()){
1341 									alu.remove(i);
1342 									removedP++;
1343 								}
1344 								else{valid++;}
1345 							}
1346 							if(valid==0){affixMap2.remove(keym);}
1347 						}
1348 					}
1349 					if(storeSuffix && u.suffix2!=-1){
1350 						keym.set(u.suffix2);
1351 						ArrayList<Unit> alu=affixMap2.get(keym);
1352 						if(alu!=null){
1353 							int valid=0;
1354 							for(int i=alu.size()-1; i>=0; i--){
1355 								Unit u2=alu.get(i);
1356 								if(u2==null || !u2.valid()){
1357 									alu.remove(i);
1358 									removedS++;
1359 								}
1360 								else{valid++;}
1361 							}
1362 							if(valid==0){affixMap2.remove(keym);}
1363 						}
1364 					}
1365 
1366 				}
1367 
1368 				list.set(j, null);
1369 			}
1370 		}
1371 
1372 		if(invalid>0){
1373 			Tools.condenseStrict(list);
1374 		}
1375 		if(verbose){
1376 			outstream.println("Removed invalids: "+removedC+", "+removedP+", "+removedS);
1377 		}
1378 		return invalid;
1379 	}
1380 
1381 
addToArray(HashMap<Long, ArrayList<Unit>> codeMap, boolean sort, boolean clear, long outNum)1382 	private static ArrayList<Read> addToArray(HashMap<Long, ArrayList<Unit>> codeMap, boolean sort, boolean clear, long outNum){
1383 		assert(outNum<=Integer.MAX_VALUE);
1384 		if(verbose){System.err.println("Making list.");}
1385 		ArrayList<Read> list=new ArrayList<Read>((int)outNum);
1386 		if(verbose){System.err.println("Adding.");}
1387 		for(ArrayList<Unit> alu : codeMap.values()){
1388 			for(Unit u : alu){
1389 				if(u.valid() && u.r.pairnum()==0){list.add(u.r);}
1390 			}
1391 			if(clear){alu.clear();}
1392 		}
1393 		if(clear){codeMap.clear();}
1394 
1395 		if(sort){
1396 			if(verbose){System.err.println("Sorting.");}
1397 			Shared.sort(list, comparator);
1398 //			if(ascending){
1399 //				Collections.reverse(list);
1400 //				assert(list.isEmpty() || list.get(0).length()<=list.get(list.size()-1).length()) :
1401 //					list.get(0).length()+", "+list.get(list.size()-1).length();
1402 //			}else{
1403 //				assert(list.isEmpty() || list.get(0).length()>=list.get(list.size()-1).length()) :
1404 //					list.get(0).length()+", "+list.get(list.size()-1).length();
1405 //			}
1406 		}
1407 		assert(list.size()==outNum || list.size()*2L==outNum || UNIQUE_ONLY) : list.size()+", "+outNum;
1408 		return list;
1409 	}
1410 
writeOutput(String clusterStatsFile, Timer t)1411 	private void writeOutput(String clusterStatsFile, Timer t){
1412 //		verbose=true;
1413 //		assert(false) : (processedClusters==null)+", "+(processedClusters.isEmpty())+", "+outgraph+", "+out+", "+clusterFilePattern;
1414 		if(processedClusters==null || processedClusters.isEmpty()){
1415 
1416 			if(out!=null || clusterFilePattern!=null){
1417 
1418 				ArrayList<Read> list=addToArray(codeMap, sort, true, addedToMain-containments);
1419 				codeMap=null;
1420 
1421 				if(sort){
1422 					if(DISPLAY_PROGRESS){
1423 						t.stop();
1424 						outstream.println("Sorted output.             Time: "+t);
1425 						Shared.printMemory();
1426 						outstream.println();
1427 						t.start();
1428 					}
1429 				}
1430 
1431 				writeOutput(list);
1432 			}
1433 		}else{
1434 			if(outgraph!=null){
1435 				writeGraph(outgraph, processedClusters);
1436 			}
1437 			if(out!=null || clusterFilePattern!=null || clusterStatsFile!=null || outbest!=null){
1438 				writeOutputClusters(clusterStatsFile, processedClusters);
1439 			}
1440 		}
1441 
1442 		if(DISPLAY_PROGRESS){
1443 			t.stop();
1444 			outstream.println("Printed output.            Time: "+t);
1445 			Shared.printMemory();
1446 			outstream.println();
1447 			t.start();
1448 		}
1449 	}
1450 
1451 
1452 
writeOutput(ArrayList<Read> list)1453 	private void writeOutput(ArrayList<Read> list){
1454 
1455 		final ByteStreamWriter tsw=(out==null ? null : new ByteStreamWriter(out, overwrite, append, true));
1456 
1457 		if(verbose){System.err.println("Writing from array.");}
1458 		tsw.start();
1459 
1460 		HashSet<String> names=((uniqueNames && storeName) ?
1461 				new HashSet<String>(Tools.min(Integer.MAX_VALUE, Tools.max((int)addedToMain, (int)(addedToMain*1.35)))) : null);
1462 		long rid=0;
1463 		for(int x=0; x<list.size(); x++){
1464 			Read r=list.get(x);
1465 			list.set(x, null);
1466 
1467 			if(r.mate!=null && r.pairnum()!=0){r=r.mate;}
1468 
1469 			assert(r.mate==null || r.mate.discarded()==r.discarded());
1470 
1471 			if(!r.discarded()){
1472 				rid++;
1473 
1474 				for(int i=0; r!=null && i<2; i++){
1475 					if(multipleInputFiles){r.numericID=rid;}
1476 					if(names!=null){
1477 						String name=(r.id==null ? ""+r.numericID : r.id);
1478 						if(names.contains(name)){
1479 							for(long j=0; j<Integer.MAX_VALUE; j++){
1480 								String name2=name+"_dd"+j;
1481 								if(r.mate!=null){name2+=(" /"+(i+1));}
1482 								if(!names.contains(name2)){
1483 									r.id=name2;
1484 									names.add(name2);
1485 									break;
1486 								}
1487 							}
1488 						}else{
1489 							names.add(name);
1490 						}
1491 					}
1492 					tsw.println(r);
1493 					r.setDiscarded(true);
1494 					r=r.mate;
1495 				}
1496 			}
1497 		}
1498 		if(verbose){System.err.println("Shutting down tsw "+tsw.fname);}
1499 		tsw.poisonAndWait();
1500 	}
1501 
1502 
writeOutputClusters(String clusterStatsFile, ArrayList<ArrayList<Unit>> clist)1503 	private void writeOutputClusters(String clusterStatsFile, ArrayList<ArrayList<Unit>> clist){
1504 
1505 //		Shared.sort(clist, CLUSTER_LENGTH_COMPARATOR); //Causes a crash: java.lang.System.arraycopy(Native Method)
1506 		clist.sort(CLUSTER_LENGTH_COMPARATOR);
1507 
1508 		if(verbose){System.err.println("Writing clusters.");}
1509 
1510 		final ByteStreamWriter tswAll=(out==null ? null : new ByteStreamWriter(out, overwrite, append, true));
1511 		if(tswAll!=null){tswAll.start();}
1512 		ByteStreamWriter tswCluster=null;
1513 		ByteStreamWriter tswBest=null;
1514 
1515 		if(outbest!=null){
1516 			tswBest=new ByteStreamWriter(outbest, overwrite, append, true);
1517 			tswBest.start();
1518 		}
1519 
1520 		TextStreamWriter csf=null;
1521 		if(clusterStatsFile!=null){
1522 			csf=new TextStreamWriter(clusterStatsFile, overwrite, false, false);
1523 			csf.start();
1524 			csf.print("#Name\tsize\t"+nmerLength+"-mer frequencies\n");
1525 		}
1526 
1527 		HashSet<String> names=((uniqueNames && storeName) ?
1528 				new HashSet<String>(Tools.min(Integer.MAX_VALUE, Tools.max((int)addedToMain, (int)(addedToMain*1.35)))) : null);
1529 		long rid=0;
1530 		final long[] nmerCounts=new long[maxNmer+1];
1531 
1532 		final StringBuilder sb=new StringBuilder(64);
1533 
1534 		for(int cnum=0; cnum<clist.size(); cnum++){
1535 			final ArrayList<Unit> alu=clist.get(cnum);
1536 //			clist.set(cnum, null); //This breaks subsequent output processing
1537 
1538 			if(alu.size()<minClusterSize){
1539 				if(verbose){System.err.println("Ignoring small cluster "+cnum+", size "+alu.size());}
1540 
1541 				if(csf!=null && alu.size()>=minClusterSizeForStats){
1542 					float[] profile=makeNmerProfile(alu, nmerCounts);
1543 					sb.append("Cluster_");
1544 					sb.append(cnum);
1545 					sb.append('\t');
1546 					sb.append(alu.size());
1547 					sb.append('\t');
1548 					for(float f : profile){
1549 						sb.append(String.format(Locale.ROOT, "%.5f ", f));
1550 					}
1551 					sb.setCharAt(sb.length()-1, '\n');
1552 					csf.print(sb.toString());
1553 					sb.setLength(0);
1554 				}
1555 			}else{
1556 				if(verbose){System.err.println("Writing cluster "+cnum+", size "+alu.size());}
1557 
1558 				if(clusterFilePattern!=null){
1559 					if(tswCluster!=null){
1560 						if(verbose){System.err.println("Shutting down tswCluster "+tswCluster.fname);}
1561 						tswCluster.poisonAndWait();
1562 						tswCluster=null;
1563 					}
1564 					tswCluster=new ByteStreamWriter(clusterFilePattern.replaceFirst("%", ""+cnum), overwrite, append, true);
1565 					if(verbose){System.err.println("Starting tswCluster "+tswCluster.fname);}
1566 					tswCluster.start();
1567 				}
1568 
1569 				if(csf!=null && alu.size()>=minClusterSizeForStats){
1570 					float[] profile=makeNmerProfile(alu, nmerCounts);
1571 					sb.append("Cluster_");
1572 					sb.append(cnum);
1573 					sb.append('\t');
1574 					sb.append(alu.size());
1575 					sb.append('\t');
1576 					for(float f : profile){
1577 						sb.append(String.format(Locale.ROOT, "%.5f ", f));
1578 					}
1579 					sb.setCharAt(sb.length()-1, '\n');
1580 					csf.print(sb.toString());
1581 					sb.setLength(0);
1582 				}
1583 
1584 				if(pickBestRepresentativePerCluster){
1585 					pickBestRepresenative(alu, true);
1586 				}
1587 
1588 				if(outbest!=null){
1589 					Unit u=pickBestRepresenative((ArrayList<Unit>)alu.clone(), false);
1590 					tswBest.println(u.r);
1591 					if(u.r.mate!=null){tswBest.println(u.r.mate);}
1592 				}
1593 
1594 				for(int contig=0; contig<alu.size(); contig++){
1595 					final Unit u0=alu.get(contig);
1596 					alu.set(contig, null);
1597 					Read r=u0.r;
1598 					if(r.mate!=null && r.pairnum()!=0){r=r.mate;}
1599 
1600 					if(!r.discarded()){
1601 						rid++;
1602 
1603 						for(int i=0; r!=null && i<2; i++){
1604 							assert(r.pairnum()==i) : i+", "+r.pairnum()+", "+(r.mate==null ? 9 : r.mate.pairnum());
1605 							Unit u=(Unit)r.obj;
1606 							if(verbose){System.err.println("Writing read "+r.id);}
1607 							r.numericID=rid;
1608 							if(renameClusters){
1609 								sb.append("Cluster_");
1610 								sb.append(cnum);
1611 								sb.append(",contig_");
1612 								sb.append(contig);
1613 								if(u.offsetValid()){
1614 									sb.append(",pos_");
1615 									sb.append(u.offset());
1616 								}
1617 								if(r.mate!=null){sb.append(" /"+(i+1));}
1618 								r.id=(r.id==null ? sb.toString() : r.id+"\t"+sb);
1619 								sb.setLength(0);
1620 							}else if(names!=null){
1621 								String name=(r.id==null ? ""+r.numericID : r.id);
1622 								if(names.contains(name)){
1623 									for(long j=0; j<Integer.MAX_VALUE; j++){
1624 										String name2=name+"_dd"+j;
1625 										if(!names.contains(name2)){
1626 											r.id=name2;
1627 											names.add(name2);
1628 											break;
1629 										}
1630 									}
1631 								}else{
1632 									names.add(name);
1633 								}
1634 							}
1635 							if(tswAll!=null){tswAll.println(r);}
1636 							if(tswCluster!=null){tswCluster.println(r);}
1637 							r.setDiscarded(true);
1638 							r=r.mate;
1639 						}
1640 					}
1641 				}
1642 			}
1643 		}
1644 		if(csf!=null){
1645 			if(verbose){System.err.println("Shutting down csf "+csf.fname);}
1646 			csf.poisonAndWait();
1647 		}
1648 		if(tswBest!=null){
1649 			if(verbose){System.err.println("Shutting down tswBest "+tswBest.fname);}
1650 			tswBest.poisonAndWait();
1651 		}
1652 		if(tswAll!=null){
1653 			if(verbose){System.err.println("Shutting down tswAll "+tswAll.fname);}
1654 			tswAll.poisonAndWait();
1655 		}
1656 		if(tswCluster!=null){
1657 			if(verbose){System.err.println("Shutting down tswCluster "+tswCluster.fname);}
1658 			tswCluster.poisonAndWait();
1659 		}
1660 	}
1661 
1662 
writeGraph(String graphFile, ArrayList<ArrayList<Unit>> clist)1663 	private void writeGraph(String graphFile, ArrayList<ArrayList<Unit>> clist){
1664 //		Shared.sort(clist, CLUSTER_LENGTH_COMPARATOR);
1665 		clist.sort(CLUSTER_LENGTH_COMPARATOR);
1666 
1667 		if(verbose){System.err.println("Writing overlap graph.");}
1668 
1669 		final TextStreamWriter tsw=(graphFile==null ? null : new TextStreamWriter(graphFile, overwrite, append, true));
1670 		if(tsw!=null){
1671 			tsw.start();
1672 			tsw.print("digraph G {\n");
1673 		}
1674 
1675 		for(int cnum=0; cnum<clist.size(); cnum++){
1676 			final ArrayList<Unit> alu=clist.get(cnum);
1677 //			clist.set(cnum, null); //This breaks subsequent output processing
1678 //			Shared.sort(alu); //TODO: Remove
1679 
1680 			if(alu.size()<minClusterSize){
1681 				if(verbose){System.err.println("Ignoring small cluster "+cnum+", size "+alu.size());}
1682 			}else{
1683 				if(verbose){System.err.println("Processing cluster "+cnum+", size "+alu.size());}
1684 
1685 				for(int contig=0; contig<alu.size(); contig++){
1686 					final Unit u0=alu.get(contig);
1687 //					alu.set(contig, null); //This breaks subsequent output processing
1688 					Read r=u0.r;
1689 					if(r.mate!=null && r.pairnum()!=0){r=r.mate;}
1690 
1691 					{
1692 						for(int i=0; r!=null && i<2; i++){
1693 							assert(r.pairnum()==i) : i+", "+r.pairnum()+", "+(r.mate==null ? 9 : r.mate.pairnum());
1694 							Unit u=(Unit)r.obj;
1695 							if(verbose){System.err.println("Writing read "+r.id);}
1696 
1697 							if(tsw!=null){
1698 								tsw.print("\t"+toGraphName(r)+"\n");
1699 								if(r.mate!=null && r.pairnum()==0){
1700 									Read r2=r.mate;
1701 									tsw.print(toGraphName(r)+" -> "+toGraphName(r2)+" [label=mate]");
1702 								}
1703 								if(u.overlapList!=null){
1704 									for(Overlap o : u.overlapList){
1705 										if(u==o.u1){
1706 											Read r2=o.u2.r;
1707 											tsw.print("\t"+toGraphName(r)+" -> "+toGraphName(r2)+" [label=\""+o.toLabel()+"\"]\n");
1708 										}
1709 									}
1710 								}
1711 							}
1712 							r=r.mate;
1713 						}
1714 					}
1715 				}
1716 			}
1717 		}
1718 
1719 		if(tsw!=null){
1720 			tsw.print("}\n");
1721 			if(verbose){System.err.println("Shutting down tswAll "+tsw.fname);}
1722 			tsw.poisonAndWait();
1723 		}
1724 	}
1725 
toGraphName(Read r)1726 	private static String toGraphName(Read r){
1727 		if(NUMBER_GRAPH_NODES || r.id==null){
1728 			return r.numericID+((ADD_PAIRNUM_TO_NAME || r.mate!=null) ? "."+(r.pairnum()+1) : "");
1729 		}else{
1730 			return r.id.replace(' ','_').replace('\t','_');
1731 		}
1732 	}
1733 
pickBestRepresenative(ArrayList<Unit> alu, boolean clearList)1734 	private Unit pickBestRepresenative(ArrayList<Unit> alu, boolean clearList){
1735 		if(alu==null || alu.isEmpty()){return null;}
1736 		float[] quality=new float[alu.size()];
1737 		int[] lengths=new int[alu.size()];
1738 		for(int i=0; i<alu.size(); i++){
1739 			Unit u=alu.get(i);
1740 			int len=u.r.length();
1741 			quality[i]=u.r.expectedErrors(true, 0)/len;
1742 			lengths[i]=len;
1743 		}
1744 		Arrays.sort(quality);
1745 		Arrays.sort(lengths);
1746 		int medianLength=lengths[lengths.length/2];
1747 		float bestQuality=quality[0];
1748 
1749 		float currentBestQuality=9999999;
1750 		Unit best=null;
1751 		for(int i=0; i<alu.size(); i++){
1752 			Unit u=alu.get(i);
1753 			int len=u.r.length();
1754 			float deviation=Tools.absdif(len, medianLength)*1f/(medianLength+1);
1755 			if(deviation<0.05){
1756 				float qual=u.r.expectedErrors(true, 0)/len;
1757 				qual=(qual+.001f)*(1+10*deviation);
1758 				if(qual<currentBestQuality || best==null){
1759 					currentBestQuality=qual;
1760 					best=u;
1761 				}
1762 			}
1763 		}
1764 		if(clearList){
1765 			alu.clear();
1766 			alu.add(best);
1767 		}
1768 		return best;
1769 	}
1770 
hash(byte[] bases)1771 	public static long hash(byte[] bases){
1772 		long code=bases.length;
1773 		for(int i=0; i<bases.length; i++){
1774 			byte b=bases[i];
1775 			int mode=(int)(code&31);
1776 			assert(hashcodes[b]!=null) : "Invalid sequence character: '"+(char)b+"'";
1777 			code=code^hashcodes[b][mode];
1778 			code=Long.rotateLeft(code, 1);
1779 		}
1780 		return code;
1781 	}
1782 
1783 
hashReversed(byte[] bases)1784 	public static long hashReversed(byte[] bases){
1785 		long code=bases.length;
1786 		for(int i=bases.length-1; i>=0; i--){
1787 			byte b=bases[i];
1788 			assert(hashcodes[b]!=null) : "Invalid sequence character: '"+(char)b+"'";
1789 			b=baseToComplementExtended[b];
1790 			int mode=(int)(code&31);
1791 			code=code^hashcodes[b][mode];
1792 			code=Long.rotateLeft(code, 1);
1793 		}
1794 		return code;
1795 	}
1796 
1797 
isCanonical(byte[] bases)1798 	public static boolean isCanonical(byte[] bases){
1799 		if(ignoreReverseComplement || bases==null || bases.length==0){return true;}
1800 		final int lim=(bases.length+1)/2;
1801 		for(int i=0, j=bases.length-1; i<lim; i++, j--){
1802 			byte a=bases[i], b=baseToComplementExtended[bases[j]];
1803 			if(a<b){return true;}
1804 			if(b<a){return false;}
1805 		}
1806 		assert((bases.length&1)==0 || bases[lim-1]==baseToComplementExtended[bases[lim-1]]) :
1807 			bases.length+", "+lim+", "+bases[lim-1]+", "+(char)bases[lim-1]+(bases.length<1000 ? "\n'"+new String(bases)+"'\n" : ""); //palindrome absorb
1808 		return true; //palindrome
1809 	}
1810 
1811 
1812 	private static synchronized long[][] makeCodes(int symbols, int modes){
1813 		Random randy=new Random(1);
1814 		long[][] r=new long[symbols][modes];
1815 		for(int i=0; i<symbols; i++){
1816 			for(int j=0; j<modes; j++){
1817 				r[i][j]=randy.nextLong();
1818 			}
1819 		}
1820 		return r;
1821 	}
1822 
1823 //	/** Handles IUPAC codes */
1824 //	private static synchronized long[][] makeCodes2(int modes){
1825 //		long[][] r0=makeCodes(26, modes);
1826 //		long[][] r=new long[Tools.max('Z','z')+1][];
1827 //		for(int i=0; i<26; i++){
1828 //			char c=(char)('A'+i);
1829 //			r[c]=r[Tools.toLowerCase(c)]=r0[i];
1830 //		}
1831 //		return r;
1832 //	}
1833 
1834 	/** Handles IUPAC codes and invalid symbols */
1835 	private static synchronized long[][] makeCodes2(int modes){
1836 		long[][] r=makeCodes(128, modes);
1837 
1838 		for(int i=0; i<26; i++){
1839 			char c=(char)('A'+i);
1840 			r[Tools.toLowerCase(c)]=r[c];
1841 		}
1842 		return r;
1843 	}
1844 
1845 	private void addDupe(Read r, Read primary){
1846 		if(mergeHeaders && primary!=null && r!=null && r.id!=null){
1847 			primary.id+=mergeDelimiter+r.id;
1848 		}
1849 		if(dupeWriter==null){return;}
1850 		if(r.mate==null || r.pairnum()==0){
1851 			synchronized(dupeWriter){
1852 				dupeWriter.println(r);
1853 				if(r.mate!=null){
1854 					dupeWriter.println(r.mate);
1855 				}
1856 			}
1857 		}
1858 	}
1859 
1860 
1861 	private final class MstThread extends Thread{
1862 
1863 		public MstThread(){}
1864 
1865 		@Override
1866 		public void run(){
1867 
1868 			ArrayList<Unit> cluster=null;
1869 			while((cluster=nextCluster())!=null){
1870 				makeMst(cluster);
1871 				processedT.add(cluster);
1872 			}
1873 
1874 		}
1875 
1876 		public void makeMst(ArrayList<Unit> cluster){
1877 			assert(heap.isEmpty());
1878 			unvisit(cluster);
1879 			for(Unit u : cluster){
1880 				u.flags&=~Unit.VISIT_MASK;
1881 				Shared.sort(u.overlapList);
1882 			}
1883 			{
1884 				Unit u=cluster.get(0);
1885 				u.setVisited(true);
1886 				heap.addAll(u.overlapList);
1887 			}
1888 //			assert(false) : cluster.size();
1889 			while(!heap.isEmpty()){
1890 				Overlap o=heap.poll();
1891 				assert(!o.mst());
1892 				if(!o.invalid()){
1893 //					assert(o.u1.overlapList.contains(o)); //slow
1894 //					assert(o.u2.overlapList.contains(o)); //slow
1895 					assert(o.u1.visited() || o.u2.visited());
1896 					final Unit u=(!o.u1.visited() ? o.u1 : !o.u2.visited()? o.u2 : null);
1897 					if(u!=null){
1898 						o.setMst(true);
1899 						u.setVisited(true);
1900 						overlapsRetainedT++;
1901 						overlapBasesRetainedT+=o.overlapLen;
1902 						for(Overlap o2 : u.overlapList){
1903 							if(o2.mst()){
1904 								//do nothing
1905 							}else if(!o2.u1.visited() || !o2.u2.visited()){
1906 								if(heap.size()>=Integer.MAX_VALUE){
1907 									removeInvalid(heap);
1908 								}
1909 								heap.add(o2);
1910 							}else if(!o2.invalid()){
1911 								o2.setInvalid(true);
1912 								overlapsRemovedT++;
1913 								overlapBasesRemovedT+=o2.overlapLen;
1914 							}
1915 						}
1916 					}
1917 				}
1918 			}
1919 			for(Unit u : cluster){
1920 				ArrayList<Overlap> alo=u.overlapList;
1921 				int removed=0;
1922 				for(int i=0; i<alo.size(); i++){
1923 					Overlap o=alo.get(i);
1924 					if(o.invalid()){
1925 						assert(!o.mst());
1926 						alo.set(i, null);
1927 						removed++;
1928 					}else{
1929 						assert(o.mst());
1930 					}
1931 				}
1932 				if(removed>0){
1933 					Tools.condenseStrict(alo);
1934 					alo.trimToSize();
1935 				}
1936 			}
1937 		}
1938 
1939 		private void removeInvalid(PriorityQueue<Overlap> heap){
1940 			ArrayList<Overlap> valid=new ArrayList<Overlap>(heap.size());
1941 			for(Overlap o : heap){
1942 				if(!o.invalid()){
1943 					assert(!o.u1.visited() || !o.u2.visited());
1944 					valid.add(o);
1945 				}
1946 			}
1947 			heap.clear();
1948 			heap.addAll(valid);
1949 		}
1950 
1951 
1952 		public long overlapsRemovedT=0;
1953 		public long overlapBasesRemovedT=0;
1954 		public long overlapsRetainedT=0;
1955 		public long overlapBasesRetainedT=0;
1956 
1957 		private final PriorityQueue<Overlap> heap=new PriorityQueue<Overlap>((1<<16)-1);
1958 		private ArrayList<ArrayList<Unit>> processedT=new ArrayList<ArrayList<Unit>>();
1959 	}
1960 
1961 
1962 	/**
1963 	 * Processes clustered sets of reads.
1964 	 * @author Brian Bushnell
1965 	 * @date Aug 9, 2013
1966 	 *
1967 	 */
1968 	private final class ClusterThread extends Thread{
1969 
1970 		public ClusterThread(boolean fixMultiJoins_, boolean canonicize_, boolean removeCycles_,
1971 				boolean fixCanonContradictions_, boolean fixOffsetContradictions_, boolean mergeClusters_, boolean mergeLeaves_, boolean mergeInner_){
1972 			fixMultiJoinsT=fixMultiJoins_;
1973 			canonicizeT=canonicize_;
1974 			fixCanonContradictionsT=fixCanonContradictions_;
1975 			fixOffsetContradictionsT=fixOffsetContradictions_;
1976 			mergeClustersT=mergeClusters_;
1977 			mergeLeavesT=mergeLeaves_;
1978 			mergeInnerT=mergeInner_;
1979 
1980 //			assert(false) : fixMultiJoinsT+", "+canonicizeT+", "+fixCanonContradictionsT+", "+mergeLeavesT+", "+mergeInnerT;
1981 			bandy=(maxEdits>0 ? BandedAligner.makeBandedAligner(bandwidth) : null);
1982 //			assert(false) : fixMultiJoinsT+", "+canonicizeT+", "+fixCanonContradictionsT+", "+fixOffsetContradictionsT+", "+mergeClustersT+", "+removeCycles_;
1983 		}
1984 
1985 		@Override
1986 		public void run(){
1987 
1988 			final ArrayList<Unit> temp=new ArrayList<Unit>(1000);
1989 
1990 			ArrayList<Unit> cluster=null;
1991 			while((cluster=nextCluster())!=null){
1992 
1993 				if(EA){
1994 					for(Unit u : cluster){assert(u.r.mate==null) : "Cluster processing/merging is not supported for paired reads, only cluster generation.";}
1995 				}
1996 
1997 //				for(Unit u : cluster){assert(!u.visited());}
1998 				unvisit(cluster);
1999 
2000 				reorderClusterBreadthFirst(cluster);
2001 				int multiJoinCount=findMultiJoinsInCluster(cluster, fixMultiJoinsT);
2002 
2003 				if(EA){
2004 					for(Unit u : cluster){assert(!u.visited());}
2005 				}
2006 
2007 				boolean ok=true;
2008 				if(multiJoinCount!=0){
2009 					assert(multiJoinCount>0);
2010 					multiJoinsFoundT+=multiJoinCount;
2011 					if(!fixMultiJoinsT){
2012 						multiJoinFailuresT++;
2013 						ok=false;
2014 					}
2015 				}
2016 
2017 				int canonContradictions=0;
2018 				if(ok && canonicizeT){
2019 					if(EA){
2020 						for(Unit u : cluster){
2021 							assert(!u.visited());
2022 							assert(!u.canonContradiction());
2023 							assert(!u.canonicized());
2024 							if(u.overlapList!=null){
2025 								for(Overlap o : u.overlapList){
2026 									assert(!o.invalid());
2027 									assert(!o.canonContradiction()) :
2028 										o.u1.canonContradiction()+", "+o.u2.canonContradiction()+", "+cluster.contains(o.u1)+", "+cluster.contains(o.u2);
2029 								}
2030 							}
2031 						}
2032 					}
2033 					canonContradictions=canonicizeClusterBreadthFirst(cluster, temp);
2034 //					System.err.println("Canonicized cluster of size "+cluster.size()+"; contradictions = "+canonContradictions+"; canonicized = "+temp.size());
2035 					temp.clear();
2036 					for(Unit u : cluster){assert(!u.visited());}
2037 					if(canonContradictions>0){
2038 						canonContradictoryOverlapsT+=canonContradictions;
2039 						canonContradictoryClustersT++;
2040 						if(fixCanonContradictionsT){
2041 							if(verbose){System.err.println("Pruning cluster to remove canonization contradictions.");}
2042 							fullyPruneCluster(cluster, temp);
2043 							if(verbose){System.err.println("Resulting size: "+cluster.size());}
2044 							if(EA){
2045 								for(Unit u : cluster){
2046 									assert(!u.visited());
2047 									assert(!u.canonContradiction());
2048 									assert(u.canonicized());
2049 									if(u.overlapList!=null){
2050 										for(Overlap o : u.overlapList){
2051 											assert(!o.invalid());
2052 											assert(!o.canonContradiction());
2053 											assert(o.type==FORWARD) : "\n"+o+"\n"+
2054 											o.u1.canonContradiction()+", "+o.u2.canonContradiction()+", "+o.u1.canonicized()+", "+o.u2.canonicized()+
2055 											"\n"+cluster.contains(o.u1)+", "+cluster.contains(o.u2)+", "+cluster.size();
2056 										}
2057 									}
2058 								}
2059 							}
2060 						}else{
2061 							ok=false;
2062 						}
2063 					}
2064 				}
2065 
2066 				int cycleOverlaps=0;
2067 				if(ok){
2068 					cycleOverlaps=findCycles(cluster, removeCycles);
2069 					for(Unit u : cluster){assert(!u.visited());}
2070 					if(cycleOverlaps>0){
2071 						cycleOverlapsT+=cycleOverlaps;
2072 						cycleClustersT++;
2073 					}
2074 				}
2075 
2076 				int offsetContradictions=0;
2077 				if(ok && fixOffsetContradictionsT){
2078 					if(EA){
2079 						for(Unit u : cluster){
2080 							assert(!u.visited());
2081 							assert(!u.offsetContradiction());
2082 							assert(!u.offsetValid());
2083 							assert(u.canonicized());
2084 							if(u.overlapList!=null){
2085 								for(Overlap o : u.overlapList){
2086 									assert(!o.invalid());
2087 									assert(!o.offsetContradiction());
2088 									assert(o.type==FORWARD) : o;
2089 								}
2090 							}
2091 						}
2092 					}
2093 					offsetContradictions=generateOffsetsBreadthFirst(cluster, temp);
2094 //					System.err.println("Made offsets for cluster of size "+cluster.size()+"; contradictions = "+offsetContradictions+"; set = "+temp.size());
2095 					temp.clear();
2096 					for(Unit u : cluster){assert(!u.visited());}
2097 					if(offsetContradictions>0){
2098 						offsetContradictoryOverlapsT+=offsetContradictions;
2099 						offsetContradictoryClustersT++;
2100 						if(fixOffsetContradictionsT){
2101 							if(verbose){System.err.println("Pruning cluster to remove offset contradictions.");}
2102 							fullyPruneCluster(cluster, temp);
2103 							if(verbose){System.err.println("Resulting size: "+cluster.size());}
2104 							if(EA){
2105 								for(Unit u : cluster){
2106 									assert(!u.visited());
2107 									assert(!u.offsetContradiction());
2108 									assert(u.offsetValid());
2109 									if(u.overlapList!=null){
2110 										for(Overlap o : u.overlapList){
2111 											assert(!o.invalid());
2112 											assert(!o.offsetContradiction());
2113 											assert(o.type==FORWARD) : o;
2114 										}
2115 									}
2116 								}
2117 							}
2118 						}else{
2119 							ok=false;
2120 						}
2121 					}
2122 					if(ok){Shared.sort(cluster, UNIT_OFFSET_COMPARATOR);}
2123 				}
2124 
2125 				if(ok && absorbOverlap){
2126 					mergeCluster(cluster);
2127 				}
2128 
processedClustersT.add(cluster)2129 				processedClustersT.add(cluster);
2130 				if(processedClustersT.size()>=threadMaxReadsToBuffer){
2131 					synchronized(processedClusters){
2132 						processedClusters.addAll(processedClustersT);
processedClustersT.clear()2133 						processedClustersT.clear();
2134 					}
2135 				}
2136 			}
2137 			synchronized(processedClusters){
2138 				processedClusters.addAll(processedClustersT);
processedClustersT.clear()2139 				processedClustersT.clear();
2140 			}
2141 		}
2142 
2143 		private void fullyPruneCluster(ArrayList<Unit> cluster, ArrayList<Unit> temp){
2144 			assert(cluster.size()>1) : cluster.size();
2145 			ArrayList<Unit> pruned=pruneCluster(cluster, true, true, temp);
2146 			assert(temp.isEmpty());
2147 			assert(pruned==null || pruned.size()>0);
2148 			while(pruned!=null){
2149 				ArrayList<Unit> subcluster=pruned;
2150 				for(Unit u : subcluster){
2151 					u.clearVolatileFlags();
2152 					if(u.overlapList!=null){
2153 						for(Overlap o : u.overlapList){
2154 							o.clearVolatileFlags();
2155 						}
2156 					}
2157 				}
2158 				assert(subcluster.size()>0);
2159 				pruned=pruneCluster(subcluster, false, false, temp);
2160 				assert(temp.isEmpty());
2161 				assert(pruned==null || pruned.size()>0);
2162 				assert(subcluster.size()>0);
2163 				if(subcluster.size()==1){
2164 					processedClustersT.add(subcluster);
2165 				}else{
2166 					assert(subcluster.size()>1);
2167 					synchronized(clusterQueue){
2168 						clusterQueue.add(subcluster);
2169 					}
2170 				}
2171 			}
2172 		}
2173 
2174 		/**
2175 		 * @param cluster
2176 		 */
2177 		private void mergeCluster(ArrayList<Unit> cluster) {
2178 			if(cluster.size()==1){return;}
2179 			if(mergeLeavesT){
2180 				mergeLeaves(cluster);
2181 			}
2182 			if(mergeInnerT){
2183 				mergeInner(cluster);
2184 			}
2185 		}
2186 
2187 		/**
2188 		 * Finds places in the cluster where two Units are joined by multiple different Overlaps.
2189 		 * Returns number of multijoins found.
2190 		 * @param cluster
2191 		 */
2192 		private int findMultiJoinsInCluster(ArrayList<Unit> cluster, boolean resolveProblems) {
2193 			if(cluster.size()<2){return 0;}
2194 			int totalMultiJoins=0;
2195 			for(Unit ua : cluster){
2196 				ArrayList<Overlap> list=ua.overlapList;
2197 				assert(list!=null);
2198 				if(list.size()>1){
2199 					Shared.sort(list);
2200 
2201 					int multiJoins=0;
2202 					for(int i=0; i<list.size(); i++){
2203 						Overlap o=list.get(i);
2204 						Unit ub=(o.u1==ua ? o.u2 : o.u1);
2205 						assert(ua!=ub);
2206 						assert(ua==o.u1 || ua==o.u2);
2207 						if(ub.visited()){
2208 							multiJoins++;
2209 							multiJoinBasesFoundT+=o.overlapLen;
2210 							if(!o.multiJoin()){o.setMultiJoin(true);}
2211 							if(resolveProblems){list.set(i, null);}
2212 						}else{
2213 							ub.setVisited(true);
2214 						}
2215 					}
2216 
2217 					if(multiJoins>0){
2218 						totalMultiJoins+=multiJoins;
2219 						if(resolveProblems){Tools.condenseStrict(list);}
2220 					}
2221 
2222 					for(int i=0; i<list.size(); i++){
2223 						Overlap o=list.get(i);
2224 						Unit ub=(o.u1==ua ? o.u2 : o.u1);
2225 						assert(ua!=ub);
2226 						assert(ua==o.u1 || ua==o.u2);
2227 						assert(ub.visited());
2228 						ub.setVisited(false);
2229 					}
2230 				}
2231 
2232 			}
2233 
2234 			return totalMultiJoins;
2235 		}
2236 
2237 		private ArrayList<Unit> pruneCluster(ArrayList<Unit> cluster, boolean pruneContradictoryNodes, boolean pruneContradictoryOverlaps, ArrayList<Unit> visited){
2238 			if(verbose){System.err.println("pruneCluster(size="+cluster.size()+", "+pruneContradictoryNodes+", "+pruneContradictoryOverlaps+")");}
2239 
2240 			//pruneContradictoryOverlaps is less strict than pruneContradictoryNodes
2241 			assert(pruneContradictoryOverlaps || !pruneContradictoryNodes);
2242 
2243 			for(Unit ua : cluster){
2244 				assert(!ua.visited());
2245 				assert(ua.isPerfectlyTransitive()) : ua;
2246 				if(ua.visited()){ua.setVisited(false);}
2247 			}
2248 
2249 			int prunedOverlaps=0;
2250 			int visits=1;
2251 
2252 			{
2253 				final Unit root=cluster.get(0);
2254 				assert(!root.contradiction());
2255 				root.setVisited(true);
2256 				visited.add(root);
2257 			}
2258 
2259 			for(int i=0; i<visited.size(); i++){
2260 				Unit ua=visited.get(i);
2261 
2262 				if(ua.visited() && (!ua.contradiction() || !pruneContradictoryNodes)){
2263 					ArrayList<Overlap> list=ua.overlapList;
2264 					if(list!=null){
2265 						int removed=0;
2266 						for(int j=0; j<list.size(); j++){
2267 							Overlap o=list.get(j);
2268 							Unit ub=(o.u1==ua ? o.u2 : o.u1);
2269 							assert(o.u1==ua || o.u2==ua);
2270 							assert(ua!=ub);
2271 							assert(ub.valid());
2272 
2273 							assert(!o.canonContradiction() || (ua.canonContradiction() || ub.canonContradiction())) :
2274 								"\n"+o.canonContradiction()+", "+ua.canonContradiction()+", "+ub.canonContradiction();
2275 
2276 							assert(!o.offsetContradiction() || (ua.offsetContradiction() || ub.offsetContradiction())) :
2277 								"\n"+o.offsetContradiction()+", "+ua.offsetContradiction()+", "+ub.offsetContradiction();
2278 
2279 //							assert(o.contradiction()==(ua.contradiction() && ub.contradiction())) :
2280 //								"\n"+o.canonContradiction()+", "+o.offsetContradiction()+
2281 //								"\n"+ua.canonContradiction()+", "+ua.offsetContradiction()+
2282 //								"\n"+ub.canonContradiction()+", "+ub.offsetContradiction();
2283 
2284 							final boolean remove=(pruneContradictoryNodes && ub.contradiction() || (pruneContradictoryOverlaps && o.contradiction()));
2285 							if(!remove && !ub.visited()){
2286 								ub.setVisited(true);
2287 								visited.add(ub);
2288 								visits++;
2289 							}
2290 
2291 							if(remove){
2292 								if(!o.invalid()){o.setInvalid(true);}
2293 								list.set(j, null);
2294 								removed++;
2295 								prunedOverlaps++;
2296 							}else{
2297 								assert(!o.invalid());
2298 							}
2299 						}
2300 						if(removed>0){Tools.condenseStrict(list);}
2301 					}
2302 				}
2303 			}
2304 
2305 			if(verbose){System.err.println("cluster.size()="+cluster.size()+", visits="+visits+", visited.size()="+visited.size());}
2306 
2307 //			if(visited.size()==11486){ //TODO: For testing. Remove.
2308 //				for(int i=0; i<visited.size(); i++){
2309 //					Unit u=visited.get(i);
2310 //					assert(u.visited());
2311 //					assert(!u.canonContradiction());
2312 //					assert(u.canonicized());
2313 //					for(Overlap o : u.overlapList){
2314 //						assert(!o.canonContradiction());
2315 //						assert(o.type==FORWARD) : "\n\no="+o+"\ni="+i+", u.overlapList.size="+u.overlapList.size()+"\n"+
2316 //						o.u1.canonContradiction()+", "+o.u2.canonContradiction()+", "+o.u1.canonicized()+", "+o.u2.canonicized()+
2317 //						"\n"+visited.contains(o.u1)+", "+visited.contains(o.u2)+", "+visited.size()+
2318 //						"\n"+u.overlapList;
2319 //					}
2320 //				}
2321 //			}
2322 
2323 			final int numUnvisited=cluster.size()-visits;
2324 			ArrayList<Unit> pruned=(numUnvisited==0 ? null : new ArrayList<Unit>(numUnvisited));
2325 			assert(visits==visited.size());
2326 			assert(visits>=1 && visits<=cluster.size());
2327 
2328 			if(visits<cluster.size()){
2329 				pruned=new ArrayList<Unit>(cluster.size()-visits);
2330 				for(Unit ua : cluster){
2331 					if(!ua.visited()){
2332 						pruned.add(ua);
2333 						ArrayList<Overlap> list=ua.overlapList;
2334 						if(list!=null){
2335 							int removed=0;
2336 							for(int j=0; j<list.size(); j++){
2337 								Overlap o=list.get(j);
2338 								Unit ub=(o.u1==ua ? o.u2 : o.u1);
2339 								assert(o.u1==ua || o.u2==ua);
2340 								assert(ua!=ub);
2341 								assert(ub.valid());
2342 
2343 								if(ub.visited() || o.invalid()){
2344 									assert(ub.visited()==o.invalid()) : "\n"+o+"\n"+ub;
2345 									list.set(j, null);
2346 									removed++;
2347 								}
2348 							}
2349 							if(removed>0){Tools.condenseStrict(list);}
2350 						}
2351 					}
2352 				}
2353 				assert(pruned.size()==numUnvisited);
2354 			}else{
2355 				assert(prunedOverlaps==0) : "If this fails then I may need to mark overlaps to remove.";
2356 			}
2357 			for(Unit u : cluster){
2358 				assert(u.isPerfectlyTransitive()) : u;
2359 				if(EA){
2360 					if(u.overlapList!=null){
2361 						for(Overlap o : u.overlapList){assert(!o.invalid());}
2362 					}
2363 				}
2364 				if(u.visited()){u.setVisited(false);}
2365 			}
2366 			cluster.clear();
2367 			cluster.addAll(visited);
2368 			cluster.trimToSize();
2369 
2370 //			for(Unit u : cluster){
2371 ////				assert(u.canonicized());
2372 //				for(Overlap o : u.overlapList){
2373 //					assert(pruned==null || !pruned.contains(o.u1));
2374 //					assert(pruned==null || !pruned.contains(o.u2));
2375 //					assert(cluster.contains(o.u1));
2376 //					assert(cluster.contains(o.u2));
2377 //				}
2378 //			}
2379 //			if(pruned!=null){
2380 //				for(Unit u : pruned){
2381 //					for(Overlap o : u.overlapList){
2382 //						assert(pruned.contains(o.u1));
2383 //						assert(pruned.contains(o.u2));
2384 //						assert(!cluster.contains(o.u1));
2385 //						assert(!cluster.contains(o.u2));
2386 //					}
2387 //				}
2388 //			}
2389 
2390 			visited.clear();
2391 			return pruned;
2392 		}
2393 
2394 		/**
2395 		 * Cluster should already be ordered breadth-first
2396 		 * This may fail because removing cycles could change breadth-first traversal, but if it fails, an assertion will be thrown
2397 		 * @param cluster
2398 		 */
2399 		private int findCycles(ArrayList<Unit> cluster, boolean remove){
2400 
2401 			{
2402 				final Unit root=cluster.get(0);
2403 				assert(root.length()>=cluster.get(cluster.size()-1).length());
2404 				root.setVisited(true);
2405 			}
2406 			int cycles=0;
2407 
2408 			for(Unit ua : cluster){
2409 				assert(ua.visited());
2410 				ArrayList<Overlap> list=ua.overlapList;
2411 				if(list!=null){
2412 					int removed=0;
2413 					for(int i=0; i<list.size(); i++){
2414 						Overlap o=list.get(i);
2415 						Unit ub=(o.u1==ua ? o.u2 : o.u1);
2416 						assert(o.u1==ua || o.u2==ua);
2417 						assert(ua!=ub);
2418 						assert(ub.valid());
2419 
2420 						if(!o.visited()){
2421 							o.setVisited(true);
2422 							if(ub.visited()){
2423 								if(!o.cyclic()){
2424 									o.setCyclic(true);
2425 									cycles++;
2426 								}
2427 							}else{
2428 								ub.setVisited(true);
2429 							}
2430 						}
2431 						if(remove && o.cyclic()){
2432 							list.set(i, null);
2433 							removed++;
2434 						}
2435 					}
2436 					if(removed>0){Tools.condenseStrict(list);}
2437 				}
2438 			}
2439 
2440 			for(Unit u : cluster){
2441 				if(u.visited()){u.setVisited(false);}
2442 				if(u.overlapList!=null){
2443 					for(Overlap o : u.overlapList){
2444 						if(o.visited()){o.setVisited(false);}
2445 					}
2446 				}
2447 			}
2448 
2449 			return cycles;
2450 		}
2451 
2452 		/**
2453 		 * Cluster should already be ordered breadth-first
2454 		 * @param cluster
2455 		 */
2456 		private int generateOffsetsBreadthFirst(ArrayList<Unit> cluster, ArrayList<Unit> temp){
2457 
2458 
2459 			assert(temp!=null);
2460 			assert(temp.isEmpty());
2461 			{
2462 				final Unit root=cluster.get(0);
2463 				assert(root.length()>=cluster.get(cluster.size()-1).length());
2464 				root.setOffset(0);
2465 				temp.add(root);
2466 			}
2467 
2468 			int contradictions=0;
2469 			for(int i=0; i<temp.size(); i++){
2470 				Unit u=temp.get(i);
2471 				assert(!u.visited()) : i;
2472 				assert(u.offsetValid() || contradictions>0) : i+", "+temp.size()+", "+contradictions+"\n"+toString(temp);
2473 				if(u.offsetValid() && !u.offsetContradiction()){
2474 					contradictions+=setOffsetsNeighbors(u, temp);
2475 					assert(contradictions==0 || (i>0 && temp.size()>2));
2476 				}
2477 			}
2478 
2479 			int min=0;
2480 			for(Unit u : temp){
2481 				if(u.visited()){u.setVisited(false);}
2482 				if(u.overlapList!=null){
2483 					for(Overlap o : u.overlapList){
2484 						if(o.visited()){o.setVisited(false);}
2485 					}
2486 				}
2487 				if(u.offsetValid() && !u.offsetContradiction()){
2488 					min=Tools.min(min, u.offset());
2489 				}
2490 			}
2491 
2492 			if(verbose){
2493 				System.err.println("min offset = "+min);
2494 			}
2495 
2496 			for(Unit u : temp){
2497 				if(u.offsetValid()){
2498 					if(verbose){System.err.println("Set "+u.name()+" offset from "+u.offset+" to "+(u.offset-min));}
2499 					u.offset=u.offset-min;
2500 				}
2501 			}
2502 
2503 
2504 			return contradictions;
2505 		}
2506 
2507 		/**
2508 		 * @param root
2509 		 */
2510 		private int setOffsetsNeighbors(final Unit root, final ArrayList<Unit> temp) {
2511 			if(verbose){System.err.println("\nsetOffsetsNeighbors("+root.name()+")\nroot.code1="+root.code1+"\n");}
2512 			assert(root.valid());
2513 			assert(!root.visited());
2514 			assert(root.offsetValid());
2515 			assert(!root.offsetContradiction());
2516 			root.setVisited(true);
2517 			if(root.overlapList==null){return 0;}
2518 			final int contradictions=countOffsetContradictions(root, false);
2519 			if(verbose){System.err.println("\ncontradictions="+contradictions);}
2520 			for(Overlap o : root.overlapList){
2521 				Unit u=(o.u1==root ? o.u2 : o.u1);
2522 				assert(o.u1==root || o.u2==root);
2523 				assert(root!=u);
2524 				assert(u.valid());
2525 
2526 				if(verbose){System.err.println("\nProcessing Overlap "+o);}
2527 				if(!o.visited() && !o.offsetContradiction()){
2528 					o.setVisited(true);
2529 					if(!u.offsetContradiction()){
2530 						if(verbose){System.err.println("Calling setOffset:  "+o);}
2531 						if(!u.offsetValid()){temp.add(u);}
2532 						boolean b=setOffset(root, u, o);
2533 						if(verbose){System.err.println("Finished setOffset: "+o);}
2534 
2535 //						if(x>0){
2536 //							if(verbose){System.err.println("\n*********************************************");}
2537 //							if(verbose){System.err.println("Problem detected with contig "+u.name());}
2538 //							if(verbose){System.err.println("*********************************************\n");}
2539 //							verbose=true;
2540 //							int y2=countOffsetContradictions(root, false);
2541 //							assert(contradictions==y2);
2542 //						}
2543 
2544 						assert(b) : "\n"+contradictions+", "+o.offsetContradiction()+", "+root.offsetContradiction()+", "+u.offsetContradiction()+"\n"
2545 							+root.offsetValid()+", "+u.offsetValid()+", "+OVERLAP_TYPE_NAMES[o.type]+"\n"+b
2546 							+fixMultiJoins; //This assertion can fail if a multijoin is present
2547 						assert(u.offsetValid());
2548 					}
2549 				}
2550 			}
2551 			return contradictions;
2552 		}
2553 
2554 		private int countOffsetContradictions(Unit root, boolean includeKnown){
2555 			if(verbose){System.err.println("\ncountContradictions("+root.name()+", "+includeKnown+")\nroot.code1="+root.code1+"\n");}
2556 			assert(root.valid());
2557 			assert(root.visited());
2558 			assert(root.offsetValid());
2559 //			assert(!root.offsetContradiction());
2560 			if(root.overlapList==null){return 0;}
2561 			int contradictions=0;
2562 			for(Overlap o : root.overlapList){
2563 				Unit u=(o.u1==root ? o.u2 : o.u1);
2564 				assert(o.u1==root || o.u2==root);
2565 				assert(root!=u);
2566 				assert(u.valid());
2567 
2568 				if(verbose){System.err.println("\nOverlap "+o+"\nu="+u.name()+", offsetValid="+u.offsetValid());}
2569 
2570 				boolean contradictory=(u.offsetValid() && u.offset()!=calcOffset(root, u, o));
2571 				if(verbose){System.err.println("contradictory=            \t"+contradictory);}
2572 				if(contradictory){
2573 					if(includeKnown || !u.offsetContradiction()){
2574 						contradictions++;
2575 						if(!root.offsetContradiction()){root.setOffsetContradiction(true);}
2576 					}
2577 					if(!o.offsetContradiction()){o.setOffsetContradiction(true);}
2578 					if(!u.offsetContradiction()){u.setOffsetContradiction(true);}
2579 				}
2580 				assert(contradictory==o.offsetContradiction()) : contradictory+", "+o.offsetContradiction();
2581 				if(verbose){
2582 					System.err.println("root.offsetContradiction()=\t"+root.offsetContradiction());
2583 					System.err.println("u.offsetContradiction()=   \t"+u.offsetContradiction());
2584 					System.err.println("o.offsetContradiction()=   \t"+o.offsetContradiction());
2585 					System.err.println("contradictions=           \t"+contradictions);
2586 				}
2587 			}
2588 			if(verbose){System.err.println("Final contradictions="+contradictions+"\n");}
2589 			return contradictions;
2590 		}
2591 
2592 		/**
2593 		 * Cluster should already be ordered breadth-first
2594 		 * @param cluster
2595 		 */
2596 		private int canonicizeClusterBreadthFirst(ArrayList<Unit> cluster, ArrayList<Unit> temp) {
2597 
2598 			assert(temp!=null);
2599 			assert(temp.isEmpty());
2600 			{
2601 				final Unit root=cluster.get(0);
2602 				assert(root.length()>=cluster.get(cluster.size()-1).length());
2603 				root.setCanonicized(true);
2604 				temp.add(root);
2605 			}
2606 
2607 			int contradictions=0;
2608 			for(int i=0; i<temp.size(); i++){
2609 				final Unit u=temp.get(i);
2610 				assert(!u.visited()) : i;
2611 				assert(u.canonicized() || contradictions>0) : i+", "+temp.size()+", "+contradictions+"\n"+toString(temp);
2612 				if(u.canonicized() && !u.canonContradiction()){
2613 					contradictions+=canonicizeNeighbors(u, temp);
2614 					assert(contradictions==0 || (i>0 && temp.size()>2));
2615 
2616 					if(u.overlapList!=null){
2617 						for(Overlap o : u.overlapList){
2618 							assert(o.type==FORWARD || o.canonContradiction() || o.u1.canonContradiction() || o.u2.canonContradiction()) :
2619 								o+"\n"+contradictions+", "+o.canonContradiction()+", "+o.u1.canonContradiction()+", "+o.u2.canonContradiction()+
2620 								"\n"+o.u1.canonicized()+", "+o.u2.canonicized()+", "+o.u1.visited()+", "+o.u2.visited();
2621 						}
2622 					}
2623 				}
2624 
2625 //				if(u.r.numericID==59462 || u.r.numericID==56439){ //TODO: remove
2626 //					System.err.println("\nid="+u.r.numericID+", canonicized="+u.canonicized()+", contradiction="+u.canonContradiction()+", visited="+u.visited());
2627 //					for(Overlap o : u.overlapList){
2628 //						Unit u2=(o.u1==u ? o.u2 : o.u1);
2629 //						assert(o.u1==u || o.u2==u);
2630 //						assert(u2!=u);
2631 //						assert(u2.valid());
2632 //						System.err.println("o = "+o);
2633 //						System.err.println("o.contradiction="+o.canonContradiction());
2634 //						System.err.println("u2.id="+u2.r.numericID+", canonicized="+u2.canonicized()+", contradiction="+u2.canonContradiction()+", visited="+u.visited());
2635 //					}
2636 //				}
2637 			}
2638 
2639 			for(Unit u : temp){
2640 				if(u.visited()){u.setVisited(false);}
2641 				if(EA){
2642 					if(u.overlapList!=null){
2643 						for(Overlap o : u.overlapList){assert(!o.visited());}
2644 					}
2645 				}
2646 			}
2647 
2648 			return contradictions;
2649 		}
2650 
2651 		/**
2652 		 * @param root
2653 		 */
2654 		private int canonicizeNeighbors(Unit root, ArrayList<Unit> canonicized) {
2655 			if(verbose){System.err.println("\ncanonicizeNeighbors("+root.name()+")\nroot.code1="+root.code1+"\n");}
2656 			assert(root.valid());
2657 			assert(!root.visited());
2658 			assert(root.canonicized());
2659 			assert(!root.canonContradiction());
2660 			root.setVisited(true);
2661 			if(root.overlapList==null){return 0;}
2662 			final int contradictions=countCanonContradictions(root, false);
2663 			if(verbose){System.err.println("\ncontradictions="+contradictions);}
2664 			for(Overlap o : root.overlapList){
2665 				Unit u=(o.u1==root ? o.u2 : o.u1);
2666 				assert(o.u1==root || o.u2==root);
2667 				assert(root!=u);
2668 				assert(u.valid());
2669 
2670 				if(verbose){System.err.println("\nProcessing Overlap "+o);}
2671 				if(!o.canonContradiction()){
2672 					if(!u.canonContradiction()){
2673 						boolean b=u.canonicized();
2674 						int dir=o.type;
2675 						if(verbose){System.err.println("Calling canonicize:  "+o);}
2676 						int x=canonicize(root, u, o);
2677 						if(verbose){System.err.println("Finished canonicize: "+o);}
2678 
2679 //						if(x>0){
2680 //							if(verbose){System.err.println("\n*********************************************");}
2681 //							if(verbose){System.err.println("Problem detected with contig "+u.name());}
2682 //							if(verbose){System.err.println("*********************************************\n");}
2683 //							verbose=true;
2684 //							int y2=countCanonContradictions(root, false);
2685 //							assert(contradictions==y2);
2686 //						}
2687 
2688 						assert(x==0 || (u.canonicized() && (o.type==FORWARDRC || o.type==REVERSERC)));
2689 						assert(x==0) : "\n"+x+", "+contradictions+", "+o.canonContradiction()+", "+root.canonContradiction()+", "+u.canonContradiction()+"\n"
2690 							+root.canonicized()+", "+u.canonicized()+", "+OVERLAP_TYPE_NAMES[o.type]+"\n"+b+", "+dir
2691 							+fixMultiJoins; //This assertion can fail if a multijoin is present
2692 						if(!u.canonicized()){
2693 							u.setCanonicized(true);
2694 							canonicized.add(u);
2695 						}
2696 						assert(u.canonicized());
2697 					}
2698 				}
2699 			}
2700 			if(EA){
2701 				for(Overlap o : root.overlapList){
2702 					assert(o.type==FORWARD || o.canonContradiction() || o.u1.canonContradiction() || o.u2.canonContradiction()) :
2703 						o+"\n"+contradictions+", "+o.canonContradiction()+", "+o.u1.canonContradiction()+", "+o.u2.canonContradiction()+", "+root.canonContradiction()+
2704 						"\n"+o.u1.canonicized()+", "+o.u2.canonicized()+", "+o.u1.visited()+", "+o.u2.visited();
2705 				}
2706 			}
2707 			return contradictions;
2708 		}
2709 
2710 		private int countCanonContradictions(Unit root, boolean includeKnown){
2711 			if(verbose){System.err.println("\ncountContradictions("+root.name()+", "+includeKnown+")\nroot.code1="+root.code1+"\n");}
2712 			assert(root.valid());
2713 			assert(root.visited());
2714 			assert(root.canonicized());
2715 //			assert(!root.canonContradiction());
2716 			if(root.overlapList==null){return 0;}
2717 			int contradictions=0;
2718 			for(Overlap o : root.overlapList){
2719 				Unit ub=(o.u1==root ? o.u2 : o.u1);
2720 				assert(o.u1==root || o.u2==root);
2721 				assert(root!=ub);
2722 				assert(ub.valid());
2723 
2724 				if(verbose){System.err.println("\nOverlap "+o+"\nu="+ub.name()+", canonicized="+ub.canonicized());}
2725 
2726 				boolean contradictory=(ub.canonicized() && (o.type==FORWARDRC || o.type==REVERSERC));
2727 				if(verbose){System.err.println("contradictory=            \t"+contradictory);}
2728 				if(contradictory){
2729 					if(!o.canonContradiction()){o.setCanonContradiction(true);}
2730 					if(includeKnown || !ub.canonContradiction()){
2731 						contradictions++;
2732 						if(!root.canonContradiction()){root.setCanonContradiction(true);}
2733 						if(!ub.canonContradiction()){ub.setCanonContradiction(true);}
2734 					}
2735 				}
2736 
2737 				assert(!o.canonContradiction() || (root.canonContradiction() || ub.canonContradiction())) :
2738 					"\n"+contradictory+", "+o.canonContradiction()+", "+root.canonContradiction()+", "+ub.canonContradiction();
2739 
2740 				assert(contradictory==o.canonContradiction()) : contradictory+", "+o.canonContradiction();
2741 				if(verbose){
2742 					System.err.println("root.canonContradiction()=\t"+root.canonContradiction());
2743 					System.err.println("u.canonContradiction()=   \t"+ub.canonContradiction());
2744 					System.err.println("o.canonContradiction()=   \t"+o.canonContradiction());
2745 					System.err.println("contradictions=           \t"+contradictions);
2746 				}
2747 			}
2748 			if(verbose){System.err.println("Final contradictions="+contradictions+"\n");}
2749 			return contradictions;
2750 		}
2751 
2752 		private String toString(ArrayList<Unit> cluster){
2753 			for(int i=0; i<cluster.size(); i++){
2754 				Unit u=cluster.get(i);
2755 				u.r.id=""+i;
2756 			}
2757 			StringBuilder sb=new StringBuilder(1000);
2758 			for(Unit u : cluster){
2759 				sb.append(">"+u.name()+"\n");
2760 				sb.append(new String(u.bases()));
2761 				sb.append("\n");
2762 			}
2763 			sb.append("\n*****\n");
2764 			for(Unit u : cluster){
2765 				sb.append("\n"+u.name()+":");
2766 				if(u.overlapList!=null){
2767 					for(Overlap o : u.overlapList){
2768 						Unit ub=(o.u1==u ? o.u2 : o.u1);
2769 						sb.append(" "+ub.name());
2770 					}
2771 				}
2772 			}
2773 			sb.append("\n");
2774 			return sb.toString();
2775 		}
2776 
2777 		private String toShortString(ArrayList<Unit> cluster){
2778 			for(int i=0; i<cluster.size(); i++){
2779 				Unit u=cluster.get(i);
2780 				u.r.id=""+i;
2781 			}
2782 			StringBuilder sb=new StringBuilder(1000);
2783 			for(Unit u : cluster){
2784 				sb.append("\n"+u.name()+":");
2785 				if(u.overlapList!=null){
2786 					for(Overlap o : u.overlapList){
2787 						Unit ub=(o.u1==u ? o.u2 : o.u1);
2788 						sb.append(" "+ub.name());
2789 					}
2790 				}
2791 			}
2792 			sb.append("\n");
2793 			return sb.toString();
2794 		}
2795 
2796 
2797 		/**
2798 		 * @param root
2799 		 * @param u2
2800 		 * @param o
2801 		 * @return Number of contradictions
2802 		 */
2803 		private int canonicize(final Unit root, final Unit u2, final Overlap o){
2804 			if(o.type==FORWARD){return 0;}
2805 			if(o.type==FORWARDRC || o.type==REVERSERC){
2806 				if(u2.canonicized()){return 1;}
2807 				u2.reverseComplement();
2808 				unitsFlippedT++;
2809 				for(Overlap o2 : u2.overlapList){
2810 					overlapsFlippedT++;
2811 					o2.flip(u2, bandy);
2812 				}
2813 				assert(o.type==FORWARD || o.type==REVERSE) : OVERLAP_TYPE_NAMES[o.type];
2814 			}
2815 			if(o.type==REVERSE){o.reverseDirection();}
2816 			assert(o.type==FORWARD);
2817 			assert(o.test(bandy, o.edits+maxEdits));
2818 			return 0;
2819 		}
2820 
2821 
2822 		/**
2823 		 * @param root
2824 		 * @param u2
2825 		 * @param o
2826 		 * @return true if no contradictions
2827 		 */
2828 		private boolean setOffset(final Unit root, final Unit u2, final Overlap o){
2829 			assert(root.offsetValid());
2830 			assert(!root.offsetContradiction());
2831 			int offset=calcOffset(root, u2, o);
2832 
2833 			if(u2.offsetValid()){return u2.offset()==offset;}
2834 			u2.setOffset(offset);
2835 
2836 			if(verbose){
2837 				System.err.println("\nroot = "+(root.name()==null ? root.r.numericID+"" : root.name())+", u2 = "+(u2.name()==null ? u2.r.numericID+"" : u2.name())
2838 						+"\no = "+o
2839 						+"\nroot.offset = "+root.offset()
2840 						+"\nu2.offset = "+u2.offset());
2841 			}
2842 
2843 			return true;
2844 		}
2845 
2846 
2847 		private int calcOffset(final Unit root, final Unit ub, final Overlap o){
2848 			assert(root.offsetValid());
2849 			if(o.type==FORWARD){
2850 				if(root==o.u1){
2851 					int dif=o.start1-o.start2;
2852 					if(verbose){System.err.println("root==o.u1=="+root.name()+", start1="+o.start1+"; u2==o.u2=="+ub.name()+", start2="+o.start2+", dif="+dif);}
2853 					return root.offset+dif;
2854 				}else{
2855 					int dif=o.start2-o.start1;
2856 					if(verbose){System.err.println("root==o.u2=="+root.name()+", start2="+o.start2+"; u2==o.u1=="+ub.name()+", start1="+o.start1+", dif="+dif);}
2857 					return root.offset+dif;
2858 				}
2859 			}else{
2860 				assert(false) : o;
2861 				throw new RuntimeException("TODO");
2862 			}
2863 		}
2864 
2865 
2866 		/**
2867 		 * @param cluster
2868 		 */
2869 		private void mergeLeaves(ArrayList<Unit> cluster) {
2870 			assert(false) : "TODO";
2871 			for(Unit u : cluster){
2872 
2873 			}
2874 		}
2875 
2876 		/**
2877 		 * @param cluster
2878 		 */
2879 		private void mergeInner(ArrayList<Unit> cluster) {
2880 			assert(false) : "TODO";
2881 			for(Unit u : cluster){
2882 
2883 			}
2884 		}
2885 
2886 		private ArrayList<ArrayList<Unit>> processedClustersT=new ArrayList<ArrayList<Unit>>(threadMaxReadsToBuffer);
2887 
2888 		long leafMergesT=0;
2889 		long innerMergesT=0;
2890 		long leafBaseMergesT=0;
2891 		long innerBaseMergesT=0;
2892 
2893 		long multiJoinFailuresT=0;
2894 		long multiJoinsFoundT=0;
2895 		long multiJoinBasesFoundT=0;
2896 		long unitsFlippedT=0;
2897 		long overlapsFlippedT=0;
2898 		long canonContradictoryOverlapsT=0;
2899 		long canonContradictoryClustersT=0;
2900 		long offsetContradictoryOverlapsT=0;
2901 		long offsetContradictoryClustersT=0;
2902 		long cycleOverlapsT=0;
2903 		long cycleClustersT=0;
2904 
2905 		private final boolean fixMultiJoinsT;
2906 		private final boolean canonicizeT;
2907 		private final boolean fixCanonContradictionsT;
2908 		private final boolean fixOffsetContradictionsT;
2909 		private final boolean mergeClustersT;
2910 		private final boolean mergeLeavesT;
2911 		private final boolean mergeInnerT;
2912 		private final BandedAligner bandy;
2913 	}
2914 
2915 
2916 	/**
2917 	 * @param cluster
2918 	 */
2919 	private void unvisit(ArrayList<Unit> cluster) {
2920 		for(Unit u : cluster){
2921 			if(u.visited()){u.setVisited(false);}
2922 		}
2923 	}
2924 
2925 	/**
2926 	 * @param cluster
2927 	 */
2928 	private void reorderClusterBreadthFirst(ArrayList<Unit> cluster) {
2929 		if(verbose){System.err.println("reorderClusterBreadthFirst");}
2930 
2931 		final int size=cluster.size();
2932 		Shared.sort(cluster); //Now it is in descending length
2933 		final Unit root=cluster.get(0);
2934 		assert(root.length()>=cluster.get(size-1).length()) : root.length()+", "+cluster.get(size-1).length()+", "+root.compareTo(cluster.get(size-1));
2935 
2936 		ArrayList<Unit> breadthFirst=new ArrayList<Unit>(cluster.size());
2937 		root.setVisited(true);
2938 //		System.err.println("root="+root.name());
2939 		breadthFirst.add(root);
2940 		for(int i=0; i<breadthFirst.size(); i++){
2941 			Unit u=breadthFirst.get(i);
2942 			Shared.sort(u.overlapList); //Sorted in descending overlap length
2943 			if(u.overlapList!=null){
2944 				for(Overlap o : u.overlapList){
2945 					if(!o.u1.visited()){
2946 						//					System.err.println("Visiting "+o.u1.name());
2947 						o.u1.setVisited(true);
2948 						breadthFirst.add(o.u1);
2949 					}
2950 					if(!o.u2.visited()){
2951 						//					System.err.println("Visiting "+o.u2.name());
2952 						o.u2.setVisited(true);
2953 						breadthFirst.add(o.u2);
2954 					}
2955 					//				System.err.println("***");
2956 					//				System.err.println(toShortString(breadthFirst));
2957 				}
2958 			}
2959 		}
2960 		for(Unit u : cluster){
2961 			assert(u.visited());
2962 			if(u.visited()){u.setVisited(false);}
2963 			if(EA){
2964 				if(u.overlapList!=null){
2965 					for(Overlap o : u.overlapList){assert(!o.visited());}
2966 				}
2967 			}
2968 		}
2969 //		System.err.println("***");
2970 //		System.err.println("Final:");
2971 //		System.err.println(toShortString(breadthFirst));
2972 		assert(cluster.size()==breadthFirst.size());
2973 		cluster.clear();
2974 		cluster.addAll(breadthFirst);
2975 	}
2976 
2977 
2978 
2979 	/** Returns next cluster larger than 1 element.
2980 	 * Singleton clusters are added directly to 'processed'. */
2981 	private ArrayList<Unit> nextCluster(){
2982 		synchronized(clusterQueue){
2983 			ArrayList<Unit> cluster=clusterQueue.poll();
2984 			assert(cluster==null || cluster.size()>1);
2985 //			while(cluster!=null && cluster.size()<2){
2986 ////				unmark(cluster);
2987 //				processedClustersT.add(cluster);
2988 //				cluster=clusterQueue.poll();
2989 //			}
2990 			return cluster;
2991 		}
2992 	}
2993 
2994 
2995 	/**
2996 	 * Creates Unit objects or uses ones already attached to reads.
2997 	 * Places them in local storage and percolates them to shared storage (codeMap), removing exact duplicates.
2998 	 * Also hashes tips and places these in shared affixMap.
2999 	 * Looks for containments in the affix map.
3000 	 * @author Brian Bushnell
3001 	 * @date Jul 24, 2013
3002 	 *
3003 	 */
3004 	private final class HashThread extends Thread{
3005 
3006 		public HashThread(boolean addToCodeMap_, boolean addToAffixMap_, boolean findMatches_, boolean findContainments_, boolean findOverlaps_){
3007 			addToCodeMapT=addToCodeMap_;
3008 			addToAffixMapT=addToAffixMap_;
3009 			findContainmentsT=findContainments_;
3010 			findOverlapsT=findOverlaps_;
3011 			findMatchesT=findMatches_;
3012 			tid=getTid();
3013 			crisq=new ArrayDeque<ConcurrentReadInputStream>(crisa.length);
3014 			for(int i=0; i<crisa.length; i++){
3015 //				if(verbose){System.err.println("Adding to crisq.");}
3016 				crisq.add(crisa[(i+tid)%crisa.length]);
3017 			}
3018 			bandy=(maxEdits>0 && (findOverlapsT || findContainmentsT) ? BandedAligner.makeBandedAligner(bandwidth) : null);
3019 
3020 //			assert(addToCodeMapT) : "addToCodeMapT="+addToCodeMapT+", addToAffixMapT="+addToAffixMapT+", findContainmentsT="+findContainmentsT+
3021 //			", findOverlapsT="+findOverlapsT+", findMatchesT="+findMatchesT+", convertToUpperCaseT="+convertToUpperCaseT+", numAffixMaps="+numAffixMaps;
3022 		}
3023 
3024 		@Override
3025 		public void run(){
3026 
3027 			ConcurrentReadInputStream cris=crisq.poll();
3028 
3029 //			String lastName=null;
3030 
3031 			while(cris!=null){
3032 				ListNum<Read> ln=cris.nextList();
3033 				ArrayList<Read> reads=(ln!=null ? ln.list : null);
3034 
3035 				while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
3036 
3037 					for(Read r : reads){
3038 //						lastName=r.id;
3039 						processReadOuter(r);
3040 					}
3041 
3042 					if(codeMapT!=null && (codeMapT.size()>threadMaxReadsToBuffer || basesStoredT>threadMaxBasesToBuffer)){
3043 						assert(addToCodeMapT);
3044 						long added=mergeMaps();
3045 						addedToMainT+=added;
3046 					}
3047 
3048 					cris.returnList(ln);
3049 					ln=cris.nextList();
3050 					reads=(ln!=null ? ln.list : null);
3051 				}
3052 				cris.returnList(ln);
3053 				if(codeMapT!=null && !codeMapT.isEmpty()){
3054 					long added=mergeMaps();
3055 					addedToMainT+=added;
3056 				}
3057 				cris=crisq.poll();
3058 			}
3059 
3060 //			System.err.println("Thread finished; last name = "+lastName);
3061 
3062 			codeMapT=null;
3063 			localConflictList=null;
3064 			sharedConflictList=null;
3065 		}
3066 
3067 		/** Return true if this read was a member of this subset. */
3068 		private boolean processReadOuter(Read r1){
3069 			if(r1.length()<MINSCAF){return false;}
3070 			Read r2=r1.mate;
3071 
3072 			assert(r1.pairnum()==0);
3073 			assert(r2==null || r2.pairnum()==1);
3074 
3075 			if(!addToCodeMapT && r1.obj==null){
3076 				if(r1.bases!=null && r1.length()>=MINSCAF){
3077 					final Unit u=(r1.obj!=null ? (Unit)r1.obj : new Unit(r1));
3078 					assert(u.r==r1 && (r1.obj==u || r1.obj==null));
3079 					final long code=u.code1;
3080 					r1.obj=u;
3081 					assert(u.r==r1 && r1.obj==u);
3082 					if(r2!=null && r2.obj==null){r2.obj=new Unit(r2);}
3083 
3084 					//Check for subset membership
3085 					final boolean inSet=u.inSet();
3086 					if(inSet){
3087 						final Long codeL=code;
3088 						ArrayList<Unit> list=codeMap.get(codeL);
3089 						boolean found=false;
3090 						for(Unit u0 : list){
3091 							//Replace with existing read
3092 							if(u0.equals(u) && u0.r.numericID==r1.numericID){
3093 								r1=u0.r;
3094 								r2=r1.mate;
3095 								found=true;
3096 								break;
3097 							}
3098 						}
3099 						assert(list!=null);
3100 						if(!found){
3101 							return false;
3102 						}
3103 					}
3104 				}
3105 			}
3106 			boolean b=processRead(r1);
3107 			if(r2!=null){processRead(r2);}
3108 			return b;
3109 		}
3110 
3111 		/** Return true if this read was a member of this subset. */
3112 		private boolean processRead(Read r){
3113 			if(r.length()<MINSCAF){return false;}
3114 
3115 			final boolean inSet;
3116 			if(!storeName){r.id=null;}
3117 			if(!storeQuality){r.quality=null;}
3118 
3119 			if(forceTrimLeft>0 || forceTrimRight>0){//Added at request of RQC team
3120 				if(r!=null && r.length()>0){
3121 					TrimRead.trimToPosition(r, forceTrimLeft>0 ? forceTrimLeft : 0, forceTrimRight>0 ? forceTrimRight : r.length(), 1);
3122 				}
3123 			}
3124 			if(qTrimLeft || qTrimRight){
3125 				TrimRead.trimFast(r, qTrimLeft, qTrimRight, trimQ, trimE, 0);
3126 			}
3127 			if(r.length()<MINSCAF){return false;}
3128 
3129 			readsProcessedT++;
3130 			basesProcessedT+=r.length();
3131 
3132 			final Unit u=(r.obj!=null ? (Unit)r.obj : new Unit(r));
3133 			assert(u.r==r && (r.obj==u || r.obj==null));
3134 			final long code=u.code1;
3135 
3136 			//Check for subset membership
3137 			inSet=u.inSet();
3138 
3139 			r.obj=u;
3140 			assert(u.r==r && r.obj==u);
3141 			if(r.mate!=null && r.mate.obj==null){r.mate.obj=new Unit(r.mate);}
3142 
3143 			if(verbose){System.err.println("Generated "+code+" for sequence "+u.name()+"\t"+new String(r.bases, 0, Tools.min(40, r.length())));}
3144 			Read primary=null;
3145 
3146 			if(addToCodeMapT && inSet){
3147 				final Long codeL=code;
3148 				ArrayList<Unit> list=codeMapT.get(codeL);
3149 				if(list==null){
3150 					if(verbose){System.err.println("Unique.");}
3151 					list=new ArrayList<Unit>(1);
3152 					list.add(u);
3153 					basesStoredT+=r.length();
3154 					codeMapT.put(codeL, list);
3155 				}else{
3156 					if(verbose){System.err.println("Exists.");}
3157 					boolean match=false;
3158 					if(findMatchesT){
3159 						for(Unit u2 : list){
3160 							if(pairedEqualsRC(u, u2)){
3161 //								if(u.r.mate!=null){
3162 //									verbose=true;
3163 //
3164 //									Unit um=(Unit)u.r.mate.obj;
3165 //									Unit u2m=(Unit)u2.r.mate.obj;
3166 //
3167 //									if(verbose){
3168 //										System.err.println("********");
3169 //										System.err.println(u.r.toFastq());
3170 //										System.err.println(u.r.mate.toFastq());
3171 //										System.err.println("********");
3172 //										System.err.println(u2.r.toFastq());
3173 //										System.err.println(u2.r.mate.toFastq());
3174 //										System.err.println("********");
3175 //										System.err.println(u);
3176 //										System.err.println(u2);
3177 //										System.err.println(um);
3178 //										System.err.println(u2m);
3179 //										System.err.println("********");
3180 //										System.err.println(u.equals(u2));
3181 //										System.err.println(u.compareTo(u2));
3182 //										System.err.println("********");
3183 //										System.err.println(um.equals(u2m));
3184 //										System.err.println(um.compareTo(u2m));
3185 //										System.err.println("********");
3186 //									}
3187 //
3188 //									verbose=false;
3189 //								}
3190 								assert(u.r.mate==null || pairedEqualsRC((Unit)u.r.mate.obj, (Unit)u2.r.mate.obj)) :
3191 									u.r.toFastq()+"\n"+u2.r.toFastq()+"\n"+u.r.mate.toFastq()+"\n"+u2.r.mate.toFastq()+
3192 									"\n"+u+"\n"+u2+"\n"+u.r.mate.obj+"\n"+u2.r.mate.obj;
3193 //								if(verbose){System.err.println("Matches "+new String(r2.bases, 0, Tools.min(40, r2.length())));}
3194 								match=true;
3195 								primary=u2.r;
3196 								u2.absorbMatch(u);
3197 								if(UNIQUE_ONLY){
3198 									synchronized(u2){
3199 										if(u2.valid()){
3200 											matchesT++;
3201 											baseMatchesT+=u2.length();
3202 											u2.setValid(false);
3203 											addDupe(u2.r, null);
3204 										}
3205 									}
3206 								}
3207 								break;
3208 							}
3209 						}
3210 					}
3211 					if(match){
3212 						addDupe(r, primary);
3213 						matchesT++;
3214 						baseMatchesT+=r.length();
3215 						//							if(verbose){System.err.println("matchesT="+matchesT+", baseMatchesT="+baseMatchesT);}
3216 					}else{
3217 						collisionsT++;
3218 						if(verbose){System.err.println("False collision; count = "+collisionsT);}
3219 						list.add(u);
3220 						basesStoredT+=r.length();
3221 					}
3222 				}
3223 			}
3224 
3225 			if(findContainmentsT){
3226 				int x=findContainments(u);
3227 			}
3228 
3229 			if(findOverlapsT){
3230 				int x=findOverlaps(u);
3231 			}
3232 
3233 			return inSet;
3234 		}
3235 
3236 		private int findContainments(final Unit u){
3237 			if(minLengthPercent<=0 && maxSubs<=0 && minIdentity>=100 && !u.valid()){return 0;}
3238 			final byte[] bases=u.bases();
3239 			final int shift=2*k;
3240 			final int shift2=shift-2;
3241 			final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
3242 			long kmer=0;
3243 			long rkmer=0;
3244 			int hits=0;
3245 			int currentContainments=0;
3246 			int len=0;
3247 
3248 			if(bases==null || bases.length<k){return -1;}
3249 			final LongM key=new LongM();
3250 
3251 			for(int i=0; i<bases.length; i++){
3252 				byte b=bases[i];
3253 				long x=baseToNumber[b];
3254 				long x2=baseToComplementNumber[b];
3255 				kmer=((kmer<<2)|x)&mask;
3256 				rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
3257 				if(HASH_NS || AminoAcid.isFullyDefined(b)){len++;}
3258 				else{len=0; rkmer=0;}
3259 //				if(verbose){System.err.println("Scanning i="+i+", kmer="+kmer+", rkmer="+rkmer+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
3260 				if(len>=k){
3261 					key.set(Tools.max(kmer, rkmer)); //Canonical
3262 					for(int am=0; am<affixMaps.length; am++){
3263 						ArrayList<Unit> list=affixMaps[am].get(key);
3264 						if(list!=null){
3265 							for(Unit u2 : list){
3266 								if(u!=u2 && !u.equals(u2)){
3267 									if(u2.valid()){
3268 										hits++;
3269 										if(verbose){
3270 											System.err.println("\nFound potential containment at am="+am+", i="+i+", key="+key.value()+
3271 													", pre1="+u2.prefix1+", pre2="+u2.prefix2+
3272 													", suf1="+u2.suffix1+", suf2="+u2.suffix2+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i, k)));
3273 										}
3274 										if(u.contains(u2, i, key, bandy, am)){
3275 											synchronized(u2){
3276 												if(u2.valid()){
3277 													currentContainments++;
3278 													baseContainmentsT+=u2.length();
3279 													u2.setValid(false);
3280 													addDupe(u2.r, u.r);
3281 //													u2.absorbContainment(u);
3282 												}
3283 											}
3284 											if(UNIQUE_ONLY){
3285 												synchronized(u){
3286 													if(u.valid()){
3287 														currentContainments++;
3288 														baseContainmentsT+=u.length();
3289 														u.setValid(false);
3290 														addDupe(u.r, null);
3291 													}
3292 												}
3293 											}
3294 
3295 											if(verbose){System.err.println("Added containment "+u2);}
3296 										}
3297 									}
3298 								}
3299 							}
3300 						}
3301 					}
3302 				}
3303 			}
3304 //			assert(false) : hits+", "+currentContainments+", "+baseContainments+"\n"+containmentMapT+"\n";
3305 
3306 			containmentCollisionsT+=(hits-currentContainments);
3307 //			outstream.println("hits="+hits+", currentContainments="+currentContainments);
3308 			containmentsT+=currentContainments;
3309 			return hits;
3310 		}
3311 
3312 		private int findOverlaps(final Unit u){
3313 //			if(minLengthPercent<=0 && maxSubs<=0 && minIdentity>=100 && !u.valid()){return 0;}
3314 //			if(u.overlapList!=null){u.overlapList.clear();}
3315 			final byte[] bases=u.bases();
3316 			final int shift=2*k;
3317 			final int shift2=shift-2;
3318 			final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
3319 			long kmer=0;
3320 			long rkmer=0;
3321 			int hits=0;
3322 			int currentOverlaps=0;
3323 			int len=0;
3324 
3325 			if(bases==null || bases.length<k){return -1;}
3326 			final LongM key=new LongM();
3327 
3328 			boolean quit=false;
3329 
3330 			for(int i=0; i<bases.length && !quit; i++){
3331 				byte b=bases[i];
3332 				long x=baseToNumber[b];
3333 				long x2=baseToComplementNumber[b];
3334 				kmer=((kmer<<2)|x)&mask;
3335 				rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
3336 				if(HASH_NS || AminoAcid.isFullyDefined(b)){len++;}
3337 				else{len=0; rkmer=0;}
3338 //				if(verbose){System.err.println("Scanning i="+i+", kmer="+kmer+", rkmer="+rkmer+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i+1, k)));}
3339 				if(len>=k){
3340 					key.set(Tools.max(kmer, rkmer)); //Canonical key
3341 					for(int am=0; am<affixMaps.length; am++){
3342 						ArrayList<Unit> list=affixMaps[am].get(key);
3343 						if(list!=null){//found a key collision
3344 							for(Unit u2 : list){
3345 								if(quit){break;}//too many edges
3346 								int u1cluster=-1, u2cluster=-2;
3347 								if(preventTransitiveOverlaps && u!=u2){
3348 									u1cluster=u.determineCluster();
3349 									u2cluster=u2.determineCluster();
3350 								}
3351 								if(u1cluster!=u2cluster && u!=u2 && !u.equals(u2) && u2.r!=u.r.mate){//TODO:  Not sure why identical things are banned...  possibly relates to avoiding inter-pair edges?
3352 									if(u2.valid()){
3353 										hits++;
3354 
3355 //										boolean flag=(u.code1==-3676200394282040623L && u2.code1==-7034423913727372751L) ||
3356 //												(u2.code1==-3676200394282040623L && u.code1==-7034423913727372751L);
3357 										final boolean flag=false;
3358 										if(verbose || flag){
3359 											System.err.println("\nFound potential overlap at am="+am+", i="+i+", key="+key.value()+
3360 													", pre1="+u2.prefix1+", pre2="+u2.prefix2+
3361 													", suf1="+u2.suffix1+", suf2="+u2.suffix2+", bases="+new String(bases, Tools.max(0, i-k2), Tools.min(i, k)));
3362 										}
3363 
3364 										final Overlap o;
3365 										if(maxEdges>1000000000 || u.overlapList==null || u2.overlapList==null ||
3366 												(u.overlapList.size()<maxEdges && u2.overlapList.size()<maxEdges2)){
3367 											 o=u.makeOverlap(u2, i, key, bandy, am);
3368 
3369 										}else{
3370 											o=null;
3371 											if(u.overlapList.size()>maxEdges){quit=true;}
3372 										}
3373 										if(o!=null){
3374 
3375 											if(preventTransitiveOverlaps){
3376 												 mergeClusterIds(u1cluster, u2cluster);
3377 											}
3378 
3379 											assert(o.test(bandy, o.edits+maxEdits)) : o;
3380 											if(verbose || flag){System.err.println("Created overlap "+o);}
3381 
3382 											long comp=u.length()-u2.length();
3383 											if(comp==0){comp=u.code1-u2.code1;}
3384 											if(comp==0){comp=u.code2-u2.code2;}
3385 											if(comp==0){comp=u.prefix1-u2.prefix1;}
3386 											if(comp==0){comp=u.suffix1-u2.suffix1;}
3387 											if(comp==0){comp=(u.r.numericID-u2.r.numericID);}
3388 											assert(comp!=0) : u+", "+u2;
3389 											Unit ua=(comp<0 ? u : u2);
3390 											Unit ub=(comp<0 ? u2 : u);
3391 											assert(ua!=ub);
3392 											if(verbose || flag){
3393 												System.err.println("ua="+ua.code1);
3394 												System.err.println("ub="+ub.code1);
3395 												System.err.println("u ="+u.code1);
3396 												System.err.println("u2="+u2.code1);
3397 												System.err.println("u.r ="+u.r.numericID);
3398 												System.err.println("u2.r="+u2.r.numericID);
3399 												System.err.println("ua contains o? "+ua.alreadyHas(o));
3400 												System.err.println("ub contains o? "+ub.alreadyHas(o));
3401 												System.err.println("ua.list="+ua.overlapList);
3402 												System.err.println("ub.list="+ub.overlapList);
3403 											}
3404 
3405 //											assert(ua.alreadyHas(o)==ub.alreadyHas(o));
3406 
3407 											final boolean uaContainedOverlap;
3408 
3409 											synchronized(ua){
3410 												if(ua.overlapList==null){ua.overlapList=new ArrayList<Overlap>(2);}
3411 												if(!ua.overlapList.contains(o)){
3412 													if(EA){
3413 														synchronized(ub){
3414 															assert(ub.overlapList==null || !ub.overlapList.contains(o)) :
3415 																ua.alreadyHas(o)+", "+ub.alreadyHas(o)+"\n"+o+"\n"+ub.overlapList.get(ub.overlapList.indexOf(o))+
3416 																"\nua.list="+ua.overlapList+"\nub.list="+ub.overlapList+"\nu.code1="+u.code1+"\nu2.code1="+u2.code1;
3417 														}
3418 													}
3419 													currentOverlaps++;
3420 													baseOverlapsT+=o.overlapLen;
3421 													ua.overlapList.add(o);
3422 													if(verbose || flag){System.err.println("Added overlap "+o);}
3423 													uaContainedOverlap=false;
3424 												}else{
3425 													if(verbose || flag){System.err.println("Already contained overlap "+o);}
3426 													hits--;
3427 													uaContainedOverlap=true;
3428 												}
3429 											}
3430 
3431 											if(!uaContainedOverlap){
3432 												synchronized(ub){
3433 													if(ub.overlapList==null){ub.overlapList=new ArrayList<Overlap>(2);}
3434 													assert(!ub.overlapList.contains(o));
3435 													ub.overlapList.add(o);
3436 													if(verbose || flag){System.err.println("Added overlap "+o);}
3437 												}
3438 											}else{
3439 												if(verbose || flag){System.err.println("Already contained overlap "+o);}
3440 											}
3441 
3442 
3443 //											assert(ua.alreadyHas(o));
3444 //											assert(ub.alreadyHas(o));
3445 //											assert(ua.overlapList.contains(o));
3446 //											assert(ub.overlapList.contains(o));
3447 											if(verbose || flag){
3448 												System.err.println("ua contains o? "+ua.alreadyHas(o));
3449 												System.err.println("ub contains o? "+ub.alreadyHas(o));
3450 												System.err.println("ua.list="+ua.overlapList);
3451 												System.err.println("ub.list="+ub.overlapList);
3452 											}
3453 										}
3454 									}
3455 								}
3456 							}
3457 						}
3458 					}
3459 				}
3460 			}
3461 			if(EA){
3462 				synchronized(u){
3463 					if(u.overlapList!=null && u.overlapList.isEmpty()){
3464 						assert(false) : "Why would this happen?";
3465 						u.overlapList=null;
3466 					}
3467 				}
3468 			}
3469 //			assert(false) : hits+", "+currentOverlaps+", "+baseOverlaps+"\n"+overlapMapT+"\n";
3470 
3471 //			assert(hits==currentOverlaps) : hits+", "+currentOverlaps;
3472 
3473 			overlapCollisionsT+=(hits-currentOverlaps);
3474 //			outstream.println("hits="+hits+", currentOverlaps="+currentOverlaps);
3475 			overlapsT+=currentOverlaps;
3476 			return hits;
3477 		}
3478 
3479 		/** Insert reads processed by a thread into the shared code and affix maps.
3480 		 * If operating in subset mode, only store reads with code equal to subset mod subsetCount. */
3481 		private long mergeMaps(){
3482 			if(verbose){System.err.println("Merging maps.");}
3483 			long novelReads=0, novelKeys=0;
3484 			long collisionReads=0;
3485 			long mergedReads=0;
3486 
3487 			assert(localConflictList.isEmpty());
3488 			assert(sharedConflictList.isEmpty());
3489 
3490 			synchronized(codeMap){
3491 				for(Long key : codeMapT.keySet()){
3492 					if(codeMap.containsKey(key)){
3493 						localConflictList.add(codeMapT.get(key));
3494 						sharedConflictList.add(codeMap.get(key));
3495 					}else{
3496 						ArrayList<Unit> list=codeMapT.get(key);
3497 						codeMap.put(key, list);
3498 						addedList.addAll(list);
3499 						novelReads+=list.size();
3500 						novelKeys++;
3501 					}
3502 				}
3503 			}
3504 
3505 			if(verbose){System.err.println("Novel reads = "+novelReads+", conflicts = "+localConflictList.size());}
3506 
3507 			for(int i=0; i<localConflictList.size(); i++){
3508 				ArrayList<Unit> listT=localConflictList.get(i);
3509 				ArrayList<Unit> list=sharedConflictList.get(i);
3510 				synchronized(list){
3511 					for(Unit u : listT){
3512 						if(verbose){System.err.println("Processing novel unit "+u.name());}
3513 						boolean match=false;
3514 						Read primary=null;
3515 						if(findMatchesT){
3516 							for(Unit u2 : list){
3517 								if(pairedEqualsRC(u, u2)){
3518 									//								if(verbose){System.err.println("Matches "+new String(r2.bases, 0, Tools.min(40, r2.length())));}
3519 									u2.absorbMatch(u);
3520 									if(UNIQUE_ONLY){
3521 										synchronized(u2){
3522 											if(u2.valid()){
3523 												mergedReads++;
3524 												baseMatchesT+=u2.length();
3525 												u2.setValid(false);
3526 												addDupe(u2.r, null);
3527 											}
3528 										}
3529 									}
3530 									match=true;
3531 									primary=u2.r;
3532 									break;
3533 								}
3534 							}
3535 						}
3536 						if(match){
3537 							addDupe(u.r, primary);
3538 							mergedReads++;
3539 							baseMatchesT+=u.length();
3540 							if(verbose){System.err.println("matchesT="+matchesT+", baseMatchesT="+baseMatchesT);}
3541 						}else{
3542 							collisionReads++;
3543 							if(verbose){System.err.println("False collision; count = "+collisionReads);}
3544 							list.add(u);
3545 							addedList.add(u);
3546 						}
3547 					}
3548 				}
3549 			}
3550 			matchesT+=mergedReads;
3551 			collisionsT+=collisionReads;
3552 			if(verbose){System.err.println("Done Merging.");}
3553 			if(verbose){System.err.println("mapT.size="+codeMapT.size()+", basesStoredT="+basesStoredT);}
3554 
3555 			codeMapT.clear();
3556 			localConflictList.clear();
3557 			sharedConflictList.clear();
3558 
3559 			if(!addedList.isEmpty()){
3560 				if(addToAffixMapT){
3561 					final LongM p=new LongM(-1, true);
3562 					assert(affixMap1!=null || affixMap2!=null);
3563 					if(affixMap1!=null && !ignoreAffix1){//Allows you to not use am1
3564 						synchronized(affixMap1){
3565 							for(Unit u : addedList){
3566 								if(verbose){System.err.println("Processing affixes for "+u.name());}
3567 								if(u.prefix1!=-1 || u.prefix1!=u.suffix1){
3568 									if(verbose){System.err.println("Using prefix "+u.prefix1);}
3569 									p.set(u.prefix1);
3570 									ArrayList<Unit> alu=affixMap1.get(p);
3571 									if(alu==null){
3572 										if(verbose){System.err.println("Made new alu for "+p);}
3573 										alu=new ArrayList<Unit>(2);
3574 										affixMap1.put(p.iCopy(), alu);
3575 									}
3576 									if(alu.size()<maxAffixCopies){
3577 										if(verbose){System.err.println("Added "+u.name());}
3578 										alu.add(u);
3579 									}
3580 									if(verbose){System.err.println(affixMap1.get(p));}
3581 								}
3582 								if(storeSuffix && u.prefix1!=u.suffix1){
3583 									if(verbose){System.err.println("Using suffix "+u.suffix1);}
3584 									p.set(u.suffix1);
3585 									ArrayList<Unit> alu=affixMap1.get(p);
3586 									if(alu==null){
3587 										if(verbose){System.err.println("Made new alu for "+p);}
3588 										alu=new ArrayList<Unit>(2);
3589 										affixMap1.put(p.iCopy(), alu);
3590 									}
3591 									if(alu.size()<maxAffixCopies){
3592 										if(verbose){System.err.println("Added "+u.name());}
3593 										alu.add(u);
3594 									}
3595 									if(verbose){System.err.println(affixMap1.get(p));}
3596 								}
3597 							}
3598 						}
3599 					}
3600 					if(affixMap2!=null){
3601 						synchronized(affixMap2){
3602 							for(Unit u : addedList){
3603 								if(u.prefix2!=-1 || u.prefix2!=u.suffix2){
3604 									p.set(u.prefix2);
3605 									ArrayList<Unit> alu=affixMap2.get(p);
3606 									if(alu==null){
3607 										alu=new ArrayList<Unit>(2);
3608 										affixMap2.put(p.iCopy(), alu);
3609 									}
3610 									if(alu.size()<maxAffixCopies){alu.add(u);}
3611 								}
3612 								if(storeSuffix && u.prefix2!=u.suffix2){
3613 									p.set(u.suffix2);
3614 									ArrayList<Unit> alu=affixMap2.get(p);
3615 									if(alu==null){
3616 										alu=new ArrayList<Unit>(2);
3617 										affixMap2.put(p.iCopy(), alu);
3618 									}
3619 									if(alu.size()<maxAffixCopies){alu.add(u);}
3620 								}
3621 							}
3622 						}
3623 					}
3624 				}
3625 			}
3626 
3627 			addedList.clear();
3628 			basesStoredT=0;
3629 			return collisionReads+novelReads;
3630 		}
3631 
3632 		private int getTid(){
3633 			synchronized(HashThread.class){
3634 				int x=tcount;
3635 				tcount++;
3636 				return x;
3637 			}
3638 		}
3639 
3640 		private LinkedHashMap<Long, ArrayList<Unit>> codeMapT=new LinkedHashMap<Long, ArrayList<Unit>>(threadMaxReadsToBuffer*8);
3641 		private ArrayList<Unit> addedList=new ArrayList<Unit>(threadMaxReadsToBuffer);
3642 		private ArrayList<ArrayList<Unit>> localConflictList=new ArrayList<ArrayList<Unit>>(threadMaxReadsToBuffer);
3643 		private ArrayList<ArrayList<Unit>> sharedConflictList=new ArrayList<ArrayList<Unit>>(threadMaxReadsToBuffer);
3644 
3645 		long matchesT=0;
3646 		long baseMatchesT=0;
3647 		long baseContainmentsT=0;
3648 		long collisionsT=0;
3649 		long containmentsT=0;
3650 		long containmentCollisionsT=0;
3651 		long basesStoredT=0;
3652 		long addedToMainT=0;
3653 		long readsProcessedT=0;
3654 		long basesProcessedT=0;
3655 		long overlapsT=0;
3656 		long baseOverlapsT=0;
3657 		long overlapCollisionsT=0;
3658 
3659 		private final boolean addToCodeMapT;
3660 		private final boolean addToAffixMapT;
3661 		private final boolean findContainmentsT;
3662 		private final boolean findOverlapsT;
3663 		private final boolean findMatchesT;
3664 //		private final boolean convertToUpperCaseT;
3665 		private final int tid;
3666 		private final ArrayDeque<ConcurrentReadInputStream> crisq;
3667 		private final BandedAligner bandy;
3668 	}
3669 
3670 	public static boolean equalsRC(byte[] a, byte[] b){
3671 		if(a==b){return true;}
3672 		if(a==null || b==null){return false;}
3673 		if(a.length!=b.length){return false;}
3674 
3675 		boolean ca=isCanonical(a);
3676 		boolean cb=isCanonical(b);
3677 
3678 		if(ca==cb){
3679 			for(int i=0; i<a.length; i++){
3680 				final byte aa=a[i], bb=b[i];
3681 				if(aa!=bb){return false;}
3682 			}
3683 		}else{
3684 			for(int i=0, j=b.length-1; i<a.length; i++, j--){
3685 				final byte aa=a[i], bb=baseToComplementExtended[b[j]];
3686 				if(aa!=bb){return false;}
3687 			}
3688 		}
3689 		return true;
3690 	}
3691 
3692 	public static boolean pairedEqualsRC(Unit ua, Unit ub){
3693 		if(verbose){System.err.println("pairedEqualsRC("+ua.name()+", "+ub.name()+")");}
3694 		if(verbose){System.err.println("ea");}
3695 		boolean b=equalsRC(ua, ub);
3696 		if(verbose){System.err.println("eb");}
3697 		if(!b){return false;}
3698 		if(verbose){System.err.println("ec");}
3699 
3700 		if(ua.r!=null && ub.r!=null){
3701 			if(verbose){System.err.println("ed");}
3702 			assert((ua.r.mate==null)==(ub.r.mate==null));
3703 			if(verbose){System.err.println("ee");}
3704 			if(ua.r.mate!=null && ub.r.mate!=null){
3705 				if(verbose){System.err.println("ef");}
3706 				return ua.canonical()==ub.canonical() && ua.r.pairnum()==ub.r.pairnum() && Tools.compare(ua.r.mate.bases, ub.r.mate.bases)==0;
3707 			}
3708 			if(verbose){System.err.println("eg");}
3709 		}
3710 		if(verbose){System.err.println("eh");}
3711 		return true;
3712 	}
3713 
3714 	private static boolean equalsRC(Unit ua, Unit ub){
3715 		if(verbose){System.err.println("equalsRC("+ua.name()+", "+ub.name()+")");}
3716 		return ua.code1==ub.code1 && ua.code2==ub.code2 && (ua.canonical()==ub.canonical() ? (ua.prefix1==ub.prefix1 && ua.suffix1==ub.suffix1) :
3717 			 (ua.prefix1==ub.suffix1 && ua.suffix1==ub.prefix1)) && compareRC(ua, ub)==0;
3718 	}
3719 
3720 	public static int comparePairedRC(Unit ua, Unit ub){
3721 		if(verbose){System.err.println("comparePairedRC("+ua.name()+", "+ub.name()+")");}
3722 		int x=compareRC(ua, ub);
3723 		if(x!=0){return x;}
3724 
3725 		if(ua.r!=null && ub.r!=null && ua.r.mate!=null && ub.r.mate!=null){
3726 			if(ua.r.pairnum()!=ub.r.pairnum()){return ua.r.pairnum()-ub.r.pairnum();}
3727 			return compareRC((Unit)ua.r.mate.obj, (Unit)ub.r.mate.obj);
3728 		}
3729 		return 0;
3730 	}
3731 
3732 	//TODO
3733 	//This is really for sorting by length.
3734 	private static int compareRC(Unit ua, Unit ub){
3735 		if(verbose){System.err.println("compareRC("+ua.name()+", "+ub.name()+")");}
3736 		if(ua==ub){return 0;}
3737 		if(verbose){System.err.println("a");}
3738 		if(verbose){System.err.println("a1");}
3739 		if(ua.length()!=ub.length()){return ub.length()-ua.length();}
3740 		if(verbose){System.err.println("a2");}
3741 
3742 		if(REQUIRE_MATCHING_NAMES){
3743 			if(ua.name()!=null && ub.name()!=null){
3744 				int x=ua.name().compareTo(ub.name());
3745 				if(x!=0){return x;}
3746 			}
3747 		}
3748 		if(verbose){System.err.println("a3");}
3749 
3750 		if(ua.r==null || ub.r==null){
3751 			if(verbose){System.err.println("b");}
3752 			if(verbose){System.err.println("b1");}
3753 			if(ua.canonical()){
3754 				if(verbose){System.err.println("c");}
3755 				if(ub.canonical()){
3756 					if(ua.prefix1!=ub.prefix1){return ua.prefix1>ub.prefix1 ? 1 : -1;}
3757 					if(ua.suffix1!=ub.suffix1){return ua.suffix1>ub.suffix1 ? 1 : -1;}
3758 				}else{
3759 					if(ua.prefix1!=ub.suffix1){return ua.prefix1>ub.suffix1 ? 1 : -1;}
3760 					if(ua.suffix1!=ub.prefix1){return ua.suffix1>ub.prefix1 ? 1 : -1;}
3761 				}
3762 			}else{
3763 				if(verbose){System.err.println("d");}
3764 				if(ub.canonical()){
3765 					if(ua.suffix1!=ub.prefix1){return ua.suffix1>ub.prefix1 ? 1 : -1;}
3766 					if(ua.prefix1!=ub.suffix1){return ua.prefix1>ub.suffix1 ? 1 : -1;}
3767 				}else{
3768 					if(ua.suffix1!=ub.suffix1){return ua.suffix1>ub.suffix1 ? 1 : -1;}
3769 					if(ua.prefix1!=ub.prefix1){return ua.prefix1>ub.prefix1 ? 1 : -1;}
3770 				}
3771 			}
3772 			if(verbose){System.err.println("e");}
3773 			if(ua.code1!=ub.code1){return ua.code1>ub.code1 ? 1 : -1;}
3774 			if(ua.code2!=ub.code2){return ua.code2>ub.code2 ? 1 : -1;}
3775 
3776 			return ua.pairnum()-ub.pairnum();
3777 		}
3778 		if(verbose){System.err.println("f");}
3779 		final byte[] a=ua.r.bases, b=ub.r.bases;
3780 		if(a==b){return 0;}
3781 		if(a==null || b==null){return a==null ? -1 : 1;}
3782 		if(verbose){System.err.println("g");}
3783 
3784 		if(ua.canonical()==ub.canonical()){
3785 			if(verbose){System.err.println("h");}
3786 			if(ua.canonical() && ub.canonical()){
3787 				for(int i=0; i<a.length; i++){
3788 					final byte aa=a[i], bb=b[i];
3789 					if(aa!=bb){return aa-bb;}
3790 				}
3791 			}else{
3792 				for(int i=a.length-1; i>=0; i--){
3793 					final byte aa=baseToComplementExtended[a[i]], bb=baseToComplementExtended[b[i]];
3794 					if(aa!=bb){return aa-bb;}
3795 				}
3796 			}
3797 		}else{
3798 			if(verbose){System.err.println("i");}
3799 			if(ua.canonical()){
3800 				for(int i=0, j=b.length-1; i<a.length; i++, j--){
3801 					final byte aa=a[i], bb=baseToComplementExtended[b[j]];
3802 					if(aa!=bb){return aa-bb;}
3803 				}
3804 			}else{
3805 				for(int i=a.length-1, j=0; i>=0; i--, j++){
3806 					final byte aa=baseToComplementExtended[a[i]], bb=b[j];
3807 					if(aa!=bb){return aa-bb;}
3808 				}
3809 			}
3810 		}
3811 
3812 		if(verbose){System.err.println("j");}
3813 		return ua.pairnum()-ub.pairnum();
3814 	}
3815 
3816 	private static long hashTip(byte[] bases, boolean prefix, int k, int skipInitialBases){
3817 		if(bases==null || bases.length<k){return -1;}
3818 
3819 		final int shift=2*k;
3820 		final int shift2=shift-2;
3821 		final long mask=(shift>63 ? -1L : ~((-1L)<<shift));
3822 		long kmer=0;
3823 		long rkmer=0;
3824 		int len=0;
3825 
3826 		final int start=(prefix ? 0+skipInitialBases : bases.length-k-skipInitialBases);
3827 		final int stop=start+k;
3828 
3829 //		if(verbose){
3830 //			System.err.println("\n"+new String(bases));
3831 //			System.err.println("prefix="+prefix+", start="+start+", stop="+stop);
3832 ////			System.err.print(new String(bases));
3833 //		}
3834 		for(int i=start; i<stop; i++){
3835 			byte b=bases[i];
3836 //			if(verbose){System.err.print((char)b);}
3837 			long x=baseToNumber[b];
3838 			long x2=baseToComplementNumber[b];
3839 			kmer=((kmer<<2)|x)&mask;
3840 			rkmer=((rkmer>>>2)|(x2<<shift2))&mask;
3841 			len++;
3842 		}
3843 		if(verbose){System.err.println(new String(bases, start, k)+" = "+Tools.max(kmer, rkmer));}
3844 		assert(len==k) : len+","+k;
3845 		return Tools.max(kmer, rkmer);
3846 	}
3847 
3848 	private static final int calcMaxEdits(int maxEdits, float minIdentityMult, int len){
3849 		return minIdentityMult==0 ? maxEdits : Tools.max(maxEdits, (int)Math.round(len*minIdentityMult));
3850 	}
3851 
3852 
3853 	private class Overlap implements Comparable<Overlap>{
3854 
3855 		public Overlap(Unit u1_, Unit u2_, int type_, int start1_, int start2_, int stop1_, int stop2_, int len_, int mismatches_, int edits_, BandedAligner bandy){
3856 			assert(u1_!=u2_);
3857 			if(verbose){System.err.println("\nCreating an overlap.");}
3858 			u1=u1_;
3859 			u2=u2_;
3860 			type=type_;
3861 			start1=start1_;
3862 			start2=start2_;
3863 			stop1=stop1_;
3864 			stop2=stop2_;
3865 			overlapLen=len_;
3866 			mismatches=mismatches_;
3867 			edits=edits_;
3868 
3869 			assert(Tools.absdif(Tools.absdif(start1, stop1), Tools.absdif(start2, stop2))<=maxEdits) : "\n"+this+"\n>1\n"+new String(u1.r.bases)+"\n>2\n"+new String(u2.r.bases)
3870 				+"\n>1a\n"+new String(u1.r.bases, Tools.min(start1, stop1), Tools.max(start1, stop1)-Tools.min(start1, stop1)+1)
3871 				+"\n>2a\n"+new String(u2.r.bases, Tools.min(start2, stop2), Tools.max(start2, stop2)-Tools.min(start2, stop2)+1);
3872 
3873 			assert(start1>=0 && start1<=u1.length()) : "type "+type+": "+start1+", "+stop1+", "+u1.length()+", "+start2+", "+stop2+", "+u2.length();
3874 			assert(stop1>=0 && stop1<=u1.length()) :   "type "+type+": "+start1+", "+stop1+", "+u1.length()+", "+start2+", "+stop2+", "+u2.length();
3875 			assert(start2>=0 && start2<=u2.length()) : "type "+type+": "+start1+", "+stop1+", "+u1.length()+", "+start2+", "+stop2+", "+u2.length();
3876 			assert(stop2>=0 && stop2<=u2.length()) :   "type "+type+": "+start1+", "+stop1+", "+u1.length()+", "+start2+", "+stop2+", "+u2.length();
3877 
3878 			assert(type==FORWARD || type==FORWARDRC || type==REVERSE || type==REVERSERC);
3879 
3880 			if(verbose){System.err.println(this);}
3881 
3882 			assert(Tools.absdif(Tools.absdif(start1, stop1), Tools.absdif(start1, stop1))<=maxEdits);
3883 
3884 			assert(test(bandy, edits+maxEdits)) : "\n"+this+"\n>1\n"+new String(u1.r.bases)+"\n>2\n"+new String(u2.r.bases)
3885 				+"\n>1a\n"+new String(u1.r.bases, Tools.min(start1, stop1), Tools.max(start1, stop1)-Tools.min(start1, stop1)+1)
3886 				+"\n>2a\n"+new String(u2.r.bases, Tools.min(start2, stop2), Tools.max(start2, stop2)-Tools.min(start2, stop2)+1);
3887 			if(verbose){System.err.println("Passed test 1.");}
3888 
3889 //			bandy.verbose=true;
3890 //			test(bandy);
3891 //			assert(false);
3892 
3893 			assert(u1!=u2);
3894 			u1.firstInOverlap(u2);
3895 			u2.firstInOverlap(u1);
3896 			assert(u1.length()!=u2.length() || u1.code1!=u2.code1 || u1.code2!=u2.code2 || (u1.r!=null && u1.r.mate!=null)) : "Collision? \n"+this+"\n"+u1+"\n"+u2;
3897 			assert(u1.firstInOverlap(u2)!=u2.firstInOverlap(u1)) :
3898 				"\nu1.firstInOverlap(u2)="+u1.firstInOverlap(u2)+"\nu2.firstInOverlap(u1)="+u2.firstInOverlap(u1)+"\nu1="+u1+"\nu2="+u2;
3899 
3900 			if(!u1.firstInOverlap(u2)){
3901 				if(verbose){System.err.println("\nSwapping.");}
3902 				swap();
3903 				if(verbose){System.err.println(this);}
3904 
3905 				if(EA && !customBandwidth && !test(bandy, edits+maxEdits)){
3906 					System.err.println("\n"+this);
3907 					swap();
3908 					System.err.println("\n"+this);
3909 					assert(test(bandy, edits+maxEdits)) : "\n"+this+"\n>1\n"+new String(u1.r.bases)+"\n>2\n"+new String(u2.r.bases)+"\n";
3910 					System.err.println("Passed test 2a, "+bandy.lastEdits+" edits.\n");
3911 					swap();
3912 					System.err.println("\n"+this);
3913 					assert(test(bandy, edits+maxEdits)) : "\n"+this+"\n>1\n"+new String(u1.r.bases)+"\n>2\n"+new String(u2.r.bases)
3914 						+"\n>1a\n"+new String(u1.r.bases, Tools.min(start1, stop1), Tools.max(start1, stop1)-Tools.min(start1, stop1)+1)
3915 						+"\n>2a\n"+new String(u2.r.bases, Tools.min(start2, stop2), Tools.max(start2, stop2)-Tools.min(start2, stop2)+1);
3916 					System.err.println("Passed test 2b, "+bandy.lastEdits+" edits.\n");
3917 				}
3918 
3919 				assert(customBandwidth || test(bandy, edits+maxEdits)) : "\n"+this+"\n>1\n"+new String(u1.r.bases)+"\n>2\n"+new String(u2.r.bases)
3920 					+"\n>1a\n"+new String(u1.r.bases, Tools.min(start1, stop1), Tools.max(start1, stop1)-Tools.min(start1, stop1)+1)
3921 					+"\n>2a\n"+new String(u2.r.bases, Tools.min(start2, stop2), Tools.max(start2, stop2)-Tools.min(start2, stop2)+1);
3922 				if(verbose){System.err.println("Passed test 2.");}
3923 			}
3924 
3925 			if(type==REVERSE || type==REVERSERC){
3926 				if(verbose){System.err.println("\nReversing.");}
3927 				reverseDirection();
3928 				if(verbose){System.err.println(this);}
3929 
3930 				if(EA && !Shared.anomaly && !customBandwidth && bandy!=null && !test(bandy, edits+maxEdits)){
3931 					Shared.anomaly=true;
3932 					BandedAligner.verbose=true;
3933 					System.err.println("\n********** Failed test 3, "+bandy.lastEdits+" edits. **********\n");
3934 					reverseDirection();
3935 					System.err.println(this);
3936 					assert(test(bandy, edits+maxEdits)) : "\n"+this+"\n>1\n"+new String(u1.r.bases)+"\n>2\n"+new String(u2.r.bases)+"\n";
3937 					System.err.println("Passed test 3a, "+bandy.lastEdits+" edits.\n");
3938 					reverseDirection();
3939 					System.err.println(this);
3940 					assert(test(bandy, edits+maxEdits)) : "\n"+this+"\n>1\n"+new String(u1.r.bases)+"\n>2\n"+new String(u2.r.bases)
3941 						+"\n>1a\n"+new String(u1.r.bases, Tools.min(start1, stop1), Tools.max(start1, stop1)-Tools.min(start1, stop1)+1)
3942 						+"\n>2a\n"+new String(u2.r.bases, Tools.min(start2, stop2), Tools.max(start2, stop2)-Tools.min(start2, stop2)+1);
3943 					System.err.println("Passed test 3b, "+bandy.lastEdits+" edits.\n");
3944 					BandedAligner.verbose=false;
3945 					assert(false);
3946 				}
3947 
3948 				assert(customBandwidth || test(bandy, edits+maxEdits)) : "\n"+this+"\n>1\n"+new String(u1.r.bases)+"\n>2\n"+new String(u2.r.bases)+"\n";
3949 				if(verbose){System.err.println("Passed test 3.");}
3950 			}
3951 			//Now all overlaps should be FORWARD or FORWARDRC and u1 should be at least as big as u2
3952 			assert(type==FORWARD || type==FORWARDRC);
3953 			assert(u1.length()>=u2.length());
3954 			assert(u1.firstInOverlap(u2));
3955 			assert(!u2.firstInOverlap(u1));
3956 			if(verbose){System.err.println("Finished overlap initialization.");}
3957 		}
3958 
3959 		public boolean test(BandedAligner bandy, int editLimit){
3960 			final int last1=u1.length()-1, last2=u2.length()-1;
3961 			if(verbose){System.err.println("Testing "+OVERLAP_TYPE_NAMES[type]+", "+start1+", "+start2);}
3962 			if(type==FORWARD){
3963 				assert(start1==0 || start2==0) : "start1="+start1+", stop1="+stop1+", last1="+last1+", start2="+start2+", stop2="+stop2+", last2="+last2;
3964 				if(start2==0){
3965 					if(verbose){System.err.println("A");}
3966 					return u1.overlapsForward(u2, start1, start2, bandy, false, editLimit);}
3967 				else{
3968 					if(verbose){System.err.println("B");}
3969 					return u2.overlapsForward(u1, start2, start1, bandy, false, editLimit);}
3970 			}
3971 			if(type==FORWARDRC){
3972 				assert(start1==0 || start2==last2) : "start1="+start1+", stop1="+stop1+", last1="+last1+", start2="+start2+", stop2="+stop2+", last2="+last2;
3973 				if(start2==last2){return u1.overlapsForwardRC(u2, start1, start2, bandy, false, editLimit);}
3974 				else{return u2.overlapsReverseRC(u1, start2, start1, bandy, false, editLimit);}
3975 			}
3976 			if(type==REVERSE){
3977 				assert(start1==last1 || start2==last2) : "start1="+start1+", stop1="+stop1+", last1="+last1+", start2="+start2+", stop2="+stop2+", last2="+last2;
3978 				if(start2==last2){return u1.overlapsReverse(u2, start1, start2, bandy, false, editLimit);}
3979 				else{return u2.overlapsReverse(u1, start2, start1, bandy, false, editLimit);}
3980 			}
3981 			if(type==REVERSERC){
3982 				assert(start1==last1 || start2==0) : "start1="+start1+", stop1="+stop1+", last1="+last1+", start2="+start2+", stop2="+stop2+", last2="+last2;
3983 				if(start2==0){return u1.overlapsReverseRC(u2, start1, start2, bandy, false, editLimit);}
3984 				else{return u2.overlapsForwardRC(u1, start2, start1, bandy, false, editLimit);}
3985 			}
3986 			throw new RuntimeException();
3987 		}
3988 
3989 		@Override
3990 		public boolean equals(Object o){
3991 			return equals((Overlap)o);
3992 		}
3993 
3994 		public boolean equals(Overlap o){
3995 			if(this==o){return true;}
3996 			assert(o!=null) : "*A*\n"+this+"\n"+o+"\n"+u1+"\n"+u2;
3997 			assert(u1!=null && u2!=null) : "*B*\n"+this+"\n"+o+"\n"+u1+"\n"+u2;
3998 			assert(u1!=o.u2 || u2!=o.u1) : "*C*\n"+this+"\n"+o+"\n"+u1.firstInOverlap(u2)+"\n"+o.u1.firstInOverlap(o.u2)+"\n"+u1+"\n"+u2;
3999 			return (u1==o.u1 && u2==o.u2 && type==o.type && start1==o.start1 && start2==o.start2 && stop1==o.stop1 && stop2==o.stop2)
4000 					;//|| (u1==o.u2 && u2==o.u1 && type==reverseType(o.type) && start1==o.start2 && start2==o.start1);
4001 		}
4002 
4003 //		public int compareTo(Overlap o){
4004 //			int a=compareTo2(o);
4005 //			int b=o.compareTo2(this);
4006 //			assert(a==-b) : "\n"+this+"\n"+o+"\na="+a+", b="+b+", equals="+this.equals(o)
4007 //				+"\nu1.compareTo(o.u1)="+u1.compareTo(o.u1)+"\no.u1.compareTo(u1)="+o.u1.compareTo(u1)
4008 //				+"\nu2.compareTo(o.u2)="+u2.compareTo(o.u2)+"\no.u2.compareTo(u2)="+o.u2.compareTo(u2);
4009 //			return a;
4010 //		}
4011 
4012 		@Override
4013 		public int compareTo(Overlap o){
4014 			int score1=overlapLen-50*(mismatches+edits);
4015 			int score2=o.overlapLen-50*(o.mismatches+o.edits);
4016 			if(score1!=score2){return score2-score1;}
4017 			if(overlapLen!=o.overlapLen){return o.overlapLen-overlapLen;}
4018 			int x=u1.compareTo(o.u1);
4019 			if(x!=0){return -x;}
4020 			x=u2.compareTo(o.u2);
4021 			if(x!=0){return -x;}
4022 			if(type!=o.type){return type-o.type;}
4023 			if((u1!=o.u1 || u2!=o.u2) && absorbMatch && !subsetMode){
4024 				boolean oldv=verbose;
4025 				verbose=true;
4026 				System.err.println(this);
4027 				System.err.println(o);
4028 				System.err.println("********");
4029 				System.err.println(u1);
4030 				System.err.println(u2);
4031 				System.err.println(o.u1);
4032 				System.err.println(o.u2);
4033 				System.err.println("********");
4034 				System.err.println(u1.equals(o.u1));
4035 				System.err.println("********");
4036 				System.err.println(u2.equals(o.u2));
4037 				System.err.println("********");
4038 				System.err.println(u1.compareTo(o.u1));
4039 				System.err.println("********");
4040 				System.err.println(u2.compareTo(o.u2));
4041 				System.err.println("********");
4042 				verbose=oldv;
4043 			}
4044 			assert(!absorbMatch || (u1==o.u1 && u2==o.u2) || subsetMode) : "\n"+u1.r+"\n"+u2.r+"\n"+o.u1.r+"\n"+o.u2.r
4045 				+"\n\n"+u1.r.mate+"\n"+u2.r.mate+"\n"+o.u1.r.mate+"\n"+o.u2.r.mate;
4046 //			assert(false) : "\n"+this+"\n"+o+"\n>"+u1.name()+"\n"+new String(u1.bases())+"\n>"+u2.name()+"\n"+new String(u2.bases())+"\n";
4047 			if(start1!=o.start1){return start1-o.start1;}
4048 			if(stop1!=o.stop1){return stop1-o.stop1;}
4049 			if(start2!=o.start2){return start2-o.start2;}
4050 			if(stop2!=o.stop2){return stop2-o.stop2;}
4051 			if(this.equals(o)){
4052 				return 0;
4053 			}else{
4054 				//TODO: ensure this assumption is valid.
4055 				assert(!absorbContainment || !absorbMatch || subsetMode) : "\n"+this+"\n"+o+"\n>"+u1.name()+"\n"+new String(u1.bases())+"\n>"+u2.name()+"\n"+new String(u2.bases())+"\n";
4056 
4057 				if(u1.unitID!=o.u1.unitID){return u1.unitID-o.u1.unitID;}
4058 				if(u2.unitID!=o.u2.unitID){return u2.unitID-o.u2.unitID;}
4059 			}
4060 			return 0;
4061 		}
4062 
4063 		@Override
4064 		public int hashCode(){
4065 			return u1.hashCode()^u2.hashCode()^overlapLen;
4066 		}
4067 
4068 		public void flip(Unit changed, BandedAligner bandy){
4069 
4070 			if(changed==u2){
4071 				if(type==FORWARD){type=FORWARDRC;}
4072 				else if(type==FORWARDRC){type=FORWARD;}
4073 				else if(type==REVERSE){type=REVERSERC;}
4074 				else if(type==REVERSERC){type=REVERSE;}
4075 				else{throw new RuntimeException("Unknown overlap type "+type);}
4076 				start2=u2.length()-start2-1;
4077 				stop2=u2.length()-stop2-1;
4078 			}else if(changed==u1){
4079 				if(type==FORWARD){type=REVERSERC;}
4080 				else if(type==FORWARDRC){type=REVERSE;}
4081 				else if(type==REVERSE){type=FORWARDRC;}
4082 				else if(type==REVERSERC){type=FORWARD;}
4083 				else{throw new RuntimeException("Unknown overlap type "+type);}
4084 				start1=u1.length()-start1-1;
4085 				stop1=u1.length()-stop1-1;
4086 			}else{throw new RuntimeException("'changed' was not in the Overlap.");}
4087 
4088 			assert(test(bandy, edits+maxEdits));
4089 		}
4090 
4091 		public void swap(){
4092 			Unit tempu=u1;
4093 			u1=u2;
4094 			u2=tempu;
4095 			int temp=start1;
4096 			start1=start2;
4097 			start2=temp;
4098 			temp=stop1;
4099 			stop1=stop2;
4100 			stop2=temp;
4101 			if(type==FORWARDRC){type=REVERSERC;}
4102 			else if(type==REVERSERC){type=FORWARDRC;}
4103 		}
4104 
4105 		public void reverseDirection(){
4106 			type=reverseType(type);
4107 			int temp=start1;
4108 			start1=stop1;
4109 			stop1=temp;
4110 			temp=start2;
4111 			start2=stop2;
4112 			stop2=temp;
4113 		}
4114 
4115 		@Override
4116 		public String toString(){
4117 			StringBuilder sb=new StringBuilder(80);
4118 			sb.append("type=");
4119 			sb.append(OVERLAP_TYPE_NAMES[type]);
4120 			sb.append(", len=");
4121 			sb.append(overlapLen);
4122 			sb.append(", subs=");
4123 			sb.append(mismatches);
4124 			sb.append(", edits=");
4125 			sb.append(edits);
4126 
4127 			sb.append(" (");
4128 			sb.append(u1.name()==null ? u1.r.numericID+"" : u1.name());
4129 			if(printLengthInEdges){
4130 				sb.append(", length=");
4131 				sb.append(u1.length());
4132 			}
4133 			sb.append(", start1=");
4134 			sb.append(start1);
4135 			sb.append(", stop1=");
4136 			sb.append(stop1);
4137 
4138 			sb.append(") (");
4139 			sb.append(u2.name()==null ? u2.r.numericID+"" : u2.name());
4140 			if(printLengthInEdges){
4141 				sb.append(", length=");
4142 				sb.append(u2.length());
4143 			}
4144 			sb.append(", start2=");
4145 			sb.append(start2);
4146 			sb.append(", stop2=");
4147 			sb.append(stop2);
4148 			sb.append(")");
4149 			return sb.toString();
4150 		}
4151 
4152 		public String toLabel(){
4153 			StringBuilder sb=new StringBuilder(80);
4154 			sb.append(OVERLAP_TYPE_ABBREVIATIONS[type]);
4155 			sb.append(',');
4156 			sb.append(overlapLen);
4157 			sb.append(',');
4158 			sb.append(mismatches);
4159 			sb.append(',');
4160 			sb.append(edits);
4161 
4162 			if(printLengthInEdges){
4163 				sb.append(',');
4164 				sb.append(u1.length());
4165 			}
4166 			sb.append(',');
4167 			sb.append(start1);
4168 			sb.append(',');
4169 			sb.append(stop1);
4170 
4171 			if(printLengthInEdges){
4172 				sb.append(',');
4173 				sb.append(u2.length());
4174 			}
4175 			sb.append(',');
4176 			sb.append(start2);
4177 			sb.append(',');
4178 			sb.append(stop2);
4179 
4180 			return sb.toString();
4181 		}
4182 
4183 
4184 		private void setCanonContradiction(boolean b){
4185 			assert(b!=canonContradiction()) : b+", "+canonContradiction();
4186 			if(b){flags|=CANON_CONTRADICTION_MASK;}
4187 			else{flags&=~CANON_CONTRADICTION_MASK;}
4188 			assert(b==canonContradiction()) : b+", "+canonContradiction();
4189 		}
4190 
4191 		private void setOffsetContradiction(boolean b){
4192 			assert(b!=offsetContradiction()) : b+", "+offsetContradiction();
4193 			if(b){flags|=OFFSET_CONTRADICTION_MASK;}
4194 			else{flags&=~OFFSET_CONTRADICTION_MASK;}
4195 			assert(b==offsetContradiction()) : b+", "+offsetContradiction();
4196 		}
4197 
4198 		private void setMultiJoin(boolean b){
4199 			assert(b!=multiJoin()) : b+", "+multiJoin();
4200 			if(b){flags|=MULTIJOIN_MASK;}
4201 			else{flags&=~MULTIJOIN_MASK;}
4202 			assert(b==multiJoin()) : b+", "+multiJoin();
4203 		}
4204 
4205 		private void setVisited(boolean b){
4206 			assert(b!=visited()) : b+", "+visited();
4207 			if(b){flags|=VISITED_MASK;}
4208 			else{flags&=~VISITED_MASK;}
4209 			assert(b==visited()) : b+", "+visited();
4210 		}
4211 
4212 		private void setCyclic(boolean b){
4213 			assert(b!=cyclic()) : b+", "+cyclic();
4214 			if(b){flags|=CYCLIC_MASK;}
4215 			else{flags&=~CYCLIC_MASK;}
4216 			assert(b==cyclic()) : b+", "+cyclic();
4217 		}
4218 
4219 		private void setInvalid(boolean b){
4220 			assert(b!=invalid()) : b+", "+invalid();
4221 			assert(b!=mst()) : b+", "+mst()+", "+invalid();
4222 			if(b){flags|=INVALID_MASK;}
4223 			else{flags&=~INVALID_MASK;}
4224 			assert(b==invalid()) : b+", "+invalid();
4225 		}
4226 
4227 		private void setMst(boolean b){
4228 			assert(b!=mst()) : b+", "+mst();
4229 			assert(b!=invalid()) : b+", "+mst()+", "+invalid();
4230 			if(b){flags|=MST_MASK;}
4231 			else{flags&=~MST_MASK;}
4232 			assert(b==mst()) : b+", "+mst();
4233 		}
4234 
4235 		public void clearVolatileFlags(){
4236 			flags=0;
4237 //			flags=flags&~(MULTIJOIN_MASK|VISITED_MASK|CANON_CONTRADICTION_MASK|CYCLIC_MASK|OFFSET_CONTRADICTION_MASK|INVALID_MASK);
4238 //			assert(!canonContradiction());
4239 //			assert(!offsetContradiction());
4240 //			assert(!multiJoin());
4241 //			assert(!visited());
4242 //			assert(!cyclic());
4243 //			assert(!invalid());
4244 		}
4245 
4246 		public boolean canonContradiction(){return (CANON_CONTRADICTION_MASK&flags)==CANON_CONTRADICTION_MASK;}
4247 		public boolean offsetContradiction(){return (OFFSET_CONTRADICTION_MASK&flags)==OFFSET_CONTRADICTION_MASK;}
4248 		public boolean multiJoin(){return (MULTIJOIN_MASK&flags)==MULTIJOIN_MASK;}
4249 		public boolean visited(){return (VISITED_MASK&flags)==VISITED_MASK;}
4250 		public boolean cyclic(){return (CYCLIC_MASK&flags)==CYCLIC_MASK;}
4251 		public boolean invalid(){return (INVALID_MASK&flags)==INVALID_MASK;}
4252 		public boolean mst(){return (MST_MASK&flags)==MST_MASK;}
4253 		public boolean contradiction(){return canonContradiction() || offsetContradiction();}
4254 
4255 		private static final long VISITED_MASK=(1L<<0);
4256 		private static final long MULTIJOIN_MASK=(1L<<1);
4257 		private static final long CYCLIC_MASK=(1L<<2);
4258 		private static final long CANON_CONTRADICTION_MASK=(1L<<3);
4259 		private static final long OFFSET_CONTRADICTION_MASK=(1L<<4);
4260 		private static final long INVALID_MASK=(1L<<5);
4261 		private static final long MST_MASK=(1L<<6);
4262 
4263 		Unit u1;
4264 		Unit u2;
4265 		int type;
4266 		int start1;
4267 		int start2;
4268 		int stop1;
4269 		int stop2;
4270 
4271 		long flags=0;
4272 
4273 		final int overlapLen;
4274 		final int mismatches;
4275 		final int edits;
4276 	}
4277 
4278 	/**
4279 	 * @return
4280 	 */
4281 	private int determineCluster2(final int uid) {
4282 		assert(clusterNumbers!=null);
4283 		boolean stable=false;
4284 		int cluster=uid;
4285 		while(!stable){
4286 			cluster=clusterNumbers.get(uid);
4287 			if(cluster==0 || cluster==uid){return cluster;}
4288 			assert(cluster<=uid);
4289 			final int next=determineCluster2(cluster);
4290 			if(next>=cluster){return cluster;}
4291 			stable=clusterNumbers.compareAndSet(uid, cluster, next);
4292 		}
4293 		return cluster;
4294 	}
4295 
4296 
4297 	private int mergeClusterIds(int cluster1, int cluster2) {
4298 		assert(clusterNumbers!=null);
4299 
4300 //		System.err.println("Merging clusters "+cluster1+" and "+cluster2);
4301 
4302 		while(cluster1!=cluster2){
4303 			int min=Tools.min(cluster1, cluster2);
4304 			if(cluster1!=min){
4305 				assert(cluster1>min);
4306 				boolean b=clusterNumbers.compareAndSet(cluster1, cluster1, min);
4307 				if(!b){
4308 					cluster1=determineCluster2(cluster1);
4309 					min=Tools.min(cluster1, cluster2);
4310 				}
4311 			}
4312 			if(cluster2!=min){
4313 				assert(cluster2>min);
4314 				boolean b=clusterNumbers.compareAndSet(cluster2, cluster2, min);
4315 				if(!b){
4316 					cluster2=determineCluster2(cluster2);
4317 					min=Tools.min(cluster1, cluster2);
4318 				}
4319 			}
4320 		}
4321 //		System.err.println("Returning "+cluster1);
4322 		return cluster1;
4323 	}
4324 
4325 	private class Unit implements Comparable<Unit>, Serializable {
4326 
4327 		/**
4328 		 *
4329 		 */
4330 		private static final long serialVersionUID = 5232407003873807738L;
4331 
4332 		public Unit(Read r_){
4333 			this(r_, isCanonical(r_.bases));
4334 		}
4335 
4336 		public Unit(Read r_, boolean canonical_){
4337 //			this(r_, canonical_, canonical_ ? hash(r_.bases) : hashReversed(r_.bases));
4338 			this(r_, canonical_, hash(r_.bases), hashReversed(r_.bases));
4339 		}
4340 
4341 		public Unit(Read r_, boolean canonical_, long codeF_, long codeR_){
4342 			r=r_;
4343 			code1=Tools.min(codeF_, codeR_);
4344 			code2=Tools.max(codeF_, codeR_);
4345 			long f=r.length();
4346 			prefix1=hashTip(r.bases, true, k, 0);
4347 			suffix1=hashTip(r.bases, false, k, 0);
4348 			if(r.length()>2*k){
4349 				prefix2=hashTip(r.bases, true, k, k);
4350 				suffix2=hashTip(r.bases, false, k, k);
4351 			}
4352 			if(canonical_){f|=CANON_MASK;}
4353 			if(r.pairnum()==1){f|=PAIRNUM_MASK;}
4354 			flags=f;
4355 			assert(canonical()==canonical_);
4356 			assert(length()==r.length());
4357 			assert(pairnum()==r.pairnum());
4358 			if(parseDepth){
4359 				int[] quad=KmerNormalize.parseDepth(r.id, null);
4360 				if(quad!=null){depth=quad[r.pairnum()];}
4361 			}
4362 		}
4363 
4364 		int determineCluster() {
4365 			return determineCluster2(unitID);
4366 		}
4367 
4368 		public void absorbMatch(Unit u){
4369 
4370 			assert(code1==u.code1 && code2==u.code2 && length()==u.length());
4371 			if(r==null || u.r==null){return;}
4372 			u.r.setDiscarded(true);
4373 			final byte[] bases1=r.bases, bases2=u.r.bases;
4374 			final byte[] quals1=r.quality, quals2=u.r.quality;
4375 
4376 			assert((r.mate==null) == (u.r.mate==null));
4377 
4378 			if(r.mate!=null && !u.r.mate.discarded()){
4379 				((Unit)r.mate.obj).absorbMatch((Unit)u.r.mate.obj);
4380 			}
4381 			if(quals1==null || quals2==null){return;}
4382 
4383 			if(canonical()==u.canonical()){
4384 				for(int i=0; i<bases1.length; i++){
4385 					byte b1=bases1[i], b2=bases2[i];
4386 					if(!AminoAcid.isFullyDefined(b1) && AminoAcid.isFullyDefined(b2)){bases1[i]=b2;}
4387 					else{assert(b1==b2);}
4388 					if(quals1!=null && quals2!=null){
4389 						quals1[i]=Tools.max(quals1[i], quals2[i]);
4390 					}
4391 				}
4392 			}else{
4393 				for(int i=0, j=bases2.length-1; i<bases1.length; i++, j--){
4394 					byte b1=bases1[i], b2=baseToComplementExtended[bases2[j]];
4395 					if(!AminoAcid.isFullyDefined(b1) && AminoAcid.isFullyDefined(b2)){bases1[i]=b2;}
4396 					else{assert(b1==b2);}
4397 					if(quals1!=null && quals2!=null){
4398 						quals1[i]=Tools.max(quals1[i], quals2[j]);
4399 					}
4400 				}
4401 			}
4402 
4403 //			if(mergeHeaders){
4404 //				r.id+=mergeDelimiter+u.r.id;
4405 //				assert(false) : r.id;
4406 //			}
4407 //			assert(false) : r.id;
4408 		}
4409 
4410 //		public void absorbContainment(Unit u){
4411 //			if(mergeHeaders){
4412 //				r.id+=mergeDelimiter+u.r.id;
4413 //				assert(false) : r.id;
4414 //			}
4415 //			assert(false) : r.id;
4416 //		}
4417 
4418 		public boolean alreadyHas(Overlap o){
4419 			if(overlapList==null){return false;}
4420 			for(int i=0; i<overlapList.size(); i++){
4421 				Overlap o2=overlapList.get(i);
4422 				if(o.equals(o2)){
4423 					assert(overlapList.contains(o));
4424 					assert(o2.equals(o));
4425 					return true;
4426 				}
4427 			}
4428 			assert(!overlapList.contains(o));
4429 			return false;
4430 		}
4431 
4432 		/**
4433 		 * @return A cluster
4434 		 */
4435 		public ArrayList<Unit> makeCluster() {
4436 			assert(!visited());
4437 			assert(!clustered());
4438 			assert(valid());
4439 //			assert(set.isEmpty());
4440 			ArrayList<Unit> cluster=new ArrayList<Unit>(overlapList==null ? 1 : overlapList.size()+1);
4441 			cluster.add(this);
4442 			setClustered(true);
4443 
4444 			int added=1;
4445 			for(int i=0; i<cluster.size(); i++){
4446 				Unit u=cluster.get(i);
4447 				added+=u.visit(cluster);
4448 			}
4449 
4450 			assert(added==cluster.size());
4451 			return cluster;
4452 		}
4453 
4454 		/**
4455 		 * @param cluster
4456 		 * @return Number of units added to the cluster
4457 		 */
4458 		public int visit(ArrayList<Unit> cluster) {
4459 			assert(!visited());
4460 			assert(clustered());
4461 			assert(valid());
4462 //			assert(cluster.contains(this));
4463 			setVisited(true);
4464 			int added=0;
4465 
4466 			if(r!=null && r.mate!=null){
4467 				Unit u2=(Unit)r.mate.obj;
4468 				assert(u2!=this);
4469 				assert(u2.valid());
4470 				if(!u2.clustered()){
4471 					u2.setClustered(true);
4472 					cluster.add(u2);
4473 					added++;
4474 				}
4475 			}
4476 
4477 			if(overlapList!=null){
4478 				for(Overlap o : overlapList){
4479 					Unit u2=(o.u1==this ? o.u2 : o.u1);
4480 					assert(o.u1==this || o.u2==this);
4481 					assert(u2!=this);
4482 					assert(u2.valid());
4483 					if(!u2.clustered()){
4484 						u2.setClustered(true);
4485 						cluster.add(u2);
4486 						added++;
4487 					}
4488 				}
4489 			}
4490 			return added;
4491 		}
4492 
4493 		public boolean isTransitive(){
4494 			assert(valid());
4495 			if(overlapList==null || overlapList.size()==0){return true;}
4496 			for(Overlap o : overlapList){
4497 				assert(o.u1==this || o.u2==this);
4498 				Unit u2=(o.u1==this ? o.u2 : o.u1);
4499 				assert(u2!=this);
4500 				if(u2.overlapList==null){
4501 					return false;
4502 				}else{
4503 					boolean found=false;
4504 					for(Overlap o2 : u2.overlapList){
4505 						if(o2.u1==this || o2.u2==this){
4506 							found=true; break;
4507 						}
4508 					}
4509 					if(!found){return false;}
4510 				}
4511 			}
4512 			return true;
4513 		}
4514 
4515 		public boolean isPerfectlyTransitive(){
4516 			assert(valid());
4517 			if(overlapList==null || overlapList.size()==0){return true;}
4518 			for(Overlap o : overlapList){
4519 				assert(o.u1==this || o.u2==this);
4520 				Unit u2=(o.u1==this ? o.u2 : o.u1);
4521 				assert(u2!=this);
4522 				if(u2.overlapList==null){
4523 					return false;
4524 				}else{
4525 					boolean found=false;
4526 					for(Overlap o2 : u2.overlapList){
4527 						if(o2==o){
4528 							found=true; break;
4529 						}
4530 					}
4531 					if(!found){return false;}
4532 				}
4533 			}
4534 			return true;
4535 		}
4536 
4537 		public boolean isNonRedundant(){
4538 			assert(valid());
4539 			if(overlapList==null || overlapList.size()==0){return true;}
4540 			for(int i=0; i<overlapList.size(); i++){
4541 				Overlap a=overlapList.get(i);
4542 				for(int j=0; j<overlapList.size(); j++){
4543 					Overlap b=overlapList.get(j);
4544 					if((i==j)!=(a.equals(b))){
4545 						return false;
4546 					}
4547 				}
4548 			}
4549 			return true;
4550 		}
4551 
4552 		public boolean contains(Unit u2, int loc, LongM key, BandedAligner bandy, int tableNum) {
4553 			if(verbose){System.err.println("contains: Considering key "+key+", unit "+u2);}
4554 			if(u2.length()>this.length()){return false;} //Smaller things cannot contain larger things
4555 			if(u2.length()==this.length()){//For equal size, only one can contain the other the other
4556 				boolean pass=false;
4557 				if(!pass && u2.unitID>this.unitID){return false;}else if(u2.unitID<this.unitID){pass=true;}
4558 				if(!pass && u2.r.numericID>this.r.numericID){return false;}else if(u2.r.numericID<this.r.numericID){pass=true;}
4559 				if(!pass && u2.code1>this.code1){return false;}else if(u2.code1<this.code1){pass=true;}
4560 				if(!pass && u2.r.bases.hashCode()>this.r.bases.hashCode()){return false;}else if(u2.r.bases.hashCode()<this.r.bases.hashCode()){pass=true;}
4561 				if(!pass){return false;}
4562 			}
4563 			if(minLengthPercent>0 && (u2.length()*100f/length())<minLengthPercent){return false;}
4564 			assert(u2.code1!=code1 || u2.code2!=code2 || u2.length()!=length() || (r!=null && r.mate!=null) || //REQUIRE_MATCHING_NAMES ||
4565 					(canonical()==u2.canonical() ? (u2.prefix1!=prefix1 && u2.suffix1!=suffix1) : (u2.prefix1!=suffix1 && u2.suffix1!=prefix1))) :
4566 						"Collision? \n"+this+"\n"+u2+"\n"+r+"\n"+u2.r;
4567 
4568 			if(tableNum==0){
4569 				if(key.value()==u2.prefix1){
4570 					if(verbose){System.err.println("Containment A1");}
4571 					if(containsForward(u2, loc-k2, bandy, tableNum==0) || containsReverseRC(u2, loc, bandy, tableNum==0)){return true;}
4572 				}
4573 				if(key.value()==u2.suffix1){
4574 					if(verbose){System.err.println("Containment B1");}
4575 					if(containsReverse(u2, loc, bandy, tableNum==0) || containsForwardRC(u2, loc-k2, bandy, tableNum==0)){return true;}
4576 				}
4577 			}else{
4578 				if(key.value()==u2.prefix2){
4579 					if(verbose){System.err.println("Containment A2");}
4580 					if(containsForward(u2, loc-k2-k, bandy, tableNum==0) || containsReverseRC(u2, loc+k, bandy, tableNum==0)){return true;}
4581 				}
4582 				if(key.value()==u2.suffix2){
4583 					if(verbose){System.err.println("Containment B2");}
4584 					if(containsReverse(u2, loc+k, bandy, tableNum==0) || containsForwardRC(u2, loc-k2-k, bandy, tableNum==0)){return true;}
4585 				}
4586 			}
4587 			return false;
4588 		}
4589 
4590 		private boolean containsForward(Unit u2, int start, BandedAligner bandy, boolean earlyExit) {
4591 			if(start+u2.length()>length() || start<0 || start>=length()){return false;}
4592 //			if(true){return false;}
4593 			if(u2.r!=null){
4594 				final byte[] a=bases(), b=u2.bases();
4595 				int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, b.length);
4596 
4597 				for(int i=start, j=0; j<b.length; i++, j++){
4598 					byte aa=a[i];
4599 					byte bb=b[j];
4600 					if(aa!=bb){
4601 						if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
4602 							if(earlyExit && j<k2){return false;}
4603 							if((mismatches=mismatches+1)>maxMismatches){
4604 								if(bandy==null || maxEdits<1){return false;}
4605 								int edits=bandy.alignForward(b, a, 0, start, maxEdits, exact);
4606 								assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
4607 								return edits<=maxEdits && bandy.score()>4*edits;
4608 							}
4609 						}
4610 					}
4611 				}
4612 				return true;
4613 			}else{
4614 				assert(false) : "TODO: Verify by hashing and checking both tips";
4615 				return false;
4616 			}
4617 		}
4618 
4619 		private boolean containsForwardRC(Unit u2, int start, BandedAligner bandy, boolean earlyExit) {
4620 			if(ignoreReverseComplement){return false;}
4621 			if(start+u2.length()>length() || start<0 || start>=length()){return false;}
4622 //			if(true){return false;}
4623 			if(u2.r!=null){
4624 				final byte[] a=bases(), b=u2.bases();
4625 				int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, b.length);
4626 
4627 				for(int i=start, j=b.length-1, iprefix=start+k2; j>=0; i++, j--){
4628 					byte aa=a[i];
4629 					byte bb=baseToComplementExtended[b[j]];
4630 					if(aa!=bb){
4631 						if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
4632 							if(earlyExit && i<iprefix){return false;}
4633 							if((mismatches=mismatches+1)>maxMismatches){
4634 								if(bandy==null || maxEdits<1){return false;}
4635 								int edits=bandy.alignForwardRC(b, a, b.length-1, start, maxEdits, exact);
4636 								assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
4637 								return edits<=maxEdits && bandy.score()>4*edits;
4638 							}
4639 						}
4640 					}
4641 				}
4642 				return true;
4643 			}else{
4644 				assert(false) : "TODO: Verify by hashing and checking both tips";
4645 				return false;
4646 			}
4647 		}
4648 
4649 		private boolean containsReverse(Unit u2, int start, BandedAligner bandy, boolean earlyExit) {
4650 			if(start+1<u2.length() || start<0 || start>=length()){return false;}
4651 //			if(true){return false;}
4652 			if(u2.r!=null){
4653 				final byte[] a=bases(), b=u2.bases();
4654 				int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, b.length);
4655 
4656 				for(int i=start, j=b.length-1, iprefix=start-k2; j>=0; i--, j--){
4657 					byte aa=a[i];
4658 					byte bb=b[j];
4659 					if(aa!=bb){
4660 						if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
4661 							if(earlyExit && i>iprefix){return false;}
4662 							if((mismatches=mismatches+1)>maxMismatches){
4663 								if(bandy==null || maxEdits<1){return false;}
4664 								int edits=bandy.alignReverse(b, a, b.length-1, start, maxEdits, exact);
4665 								assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
4666 								return edits<=maxEdits && bandy.score()>4*edits;
4667 							}
4668 						}
4669 					}
4670 				}
4671 				return true;
4672 			}else{
4673 				assert(false) : "TODO: Verify by hashing and checking both tips";
4674 				return false;
4675 			}
4676 		}
4677 
4678 		private boolean containsReverseRC(Unit u2, int start, BandedAligner bandy, boolean earlyExit) {
4679 			if(ignoreReverseComplement){return false;}
4680 			if(start+1<u2.length() || start<0 || start>=length()){return false;}
4681 //			if(true){return false;}
4682 			if(u2.r!=null){
4683 				final byte[] a=bases(), b=u2.bases();
4684 				int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, b.length);
4685 
4686 				for(int i=start, j=0; j<b.length; i--, j++){
4687 					byte aa=a[i];
4688 					byte bb=baseToComplementExtended[b[j]];
4689 					if(aa!=bb){
4690 						if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
4691 							if(earlyExit && j<k2){return false;}
4692 							if((mismatches=mismatches+1)>maxMismatches){
4693 								if(bandy==null || maxEdits<1){return false;}
4694 								int edits=bandy.alignReverseRC(b, a, 0, start, maxEdits, exact);
4695 								assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
4696 								return edits<=maxEdits && bandy.score()>4*edits;
4697 							}
4698 						}
4699 					}
4700 				}
4701 				return true;
4702 			}else{
4703 				assert(false) : "TODO: Verify by hashing and checking both tips";
4704 				return false;
4705 			}
4706 		}
4707 
4708 
4709 		public boolean depthCongruent(int aa, int bb){
4710 			if(aa<5 && bb<5){return true;}
4711 			final int a=Tools.max(1, Tools.min(aa, bb));
4712 			final int b=Tools.max(aa, bb);
4713 			return a*depthRatio>=b;
4714 		}
4715 
4716 		public boolean overlaps(Unit u2, int loc, LongM key, BandedAligner bandy, int tableNum, int editLimit) {
4717 //			return makeOverlap(u2, loc, key, bandy, earlyExit)!=null;
4718 
4719 //			assert(false) : "TODO";
4720 			if(verbose){System.err.println("overlaps: Considering key "+key+", unit "+u2);}
4721 			if(parseDepth && !depthCongruent(depth, u2.depth)){return false;}
4722 			if(minLengthPercent>0){
4723 				final int len1=length(), len2=u2.length();
4724 				if(Tools.min(len1, len2)*100f/Tools.max(len1, len2)<minLengthPercent){return false;}
4725 			}
4726 			assert(u2.code1!=code1 || u2.code2!=code2 || u2.length()!=length() ||
4727 					(canonical()==u2.canonical() ? (u2.prefix1!=prefix1 && u2.suffix1!=suffix1) : (u2.prefix1!=suffix1 && u2.suffix1!=prefix1))) :
4728 						"Collision? \n"+this+"\n"+u2+"\n"+r+"\n"+u2.r;
4729 
4730 
4731 			if(tableNum==0){
4732 				if(key.value()==u2.prefix1){
4733 					if(verbose){System.err.println("Testing overlaps A1");}
4734 					if(overlapsForward(u2, loc-k2, 0, bandy, tableNum==0, editLimit)){
4735 						if(verbose){System.err.println("Found Overlap A1F");}
4736 						return true;
4737 					}
4738 					if(overlapsReverseRC(u2, loc, 0, bandy, tableNum==0, editLimit)){
4739 						if(verbose){System.err.println("Found Overlap A1R");}
4740 						return true;
4741 					}
4742 					if(verbose){System.err.println("No Overlap.");}
4743 				}
4744 				if(key.value()==u2.suffix1){
4745 					if(verbose){System.err.println("Testing overlaps B1");}
4746 					if(verbose){System.err.println("Testing overlaps B1F");}
4747 					if(overlapsForwardRC(u2, loc-k2, u2.length()-1, bandy, tableNum==0, editLimit)){
4748 						if(verbose){System.err.println("Found Overlap B1F");}
4749 						return true;
4750 					}
4751 					if(verbose){System.err.println("Testing overlaps B1R");}
4752 					if(overlapsReverse(u2, loc, u2.length()-1, bandy, tableNum==0, editLimit)){
4753 						if(verbose){System.err.println("Found Overlap B1R");}
4754 						return true;
4755 					}
4756 					if(verbose){System.err.println("No Overlap.");}
4757 				}
4758 			}else{
4759 				if(key.value()==u2.prefix2){
4760 					if(verbose){System.err.println("Testing overlaps A2");}
4761 					if(overlapsForward(u2, loc-k2-k, 0, bandy, tableNum==0, editLimit)){
4762 						if(verbose){System.err.println("Found Overlap A2F");}
4763 						return true;
4764 					}
4765 					if(overlapsReverseRC(u2, loc+k, 0, bandy, tableNum==0, editLimit)){
4766 						if(verbose){System.err.println("Found Overlap A2R");}
4767 						return true;
4768 					}
4769 					if(verbose){System.err.println("No Overlap.");}
4770 				}
4771 				if(key.value()==u2.suffix2){
4772 					if(verbose){System.err.println("Testing overlaps B2");}
4773 					if(overlapsForwardRC(u2, loc-k2-k, u2.length()-1, bandy, tableNum==0, editLimit)){
4774 						if(verbose){System.err.println("Found Overlap B2F");}
4775 						return true;
4776 					}
4777 					if(overlapsReverse(u2, loc+k, u2.length()-1, bandy, tableNum==0, editLimit)){
4778 						if(verbose){System.err.println("Found Overlap B2R");}
4779 						return true;
4780 					}
4781 					if(verbose){System.err.println("No Overlap.");}
4782 				}
4783 			}
4784 
4785 			return false;
4786 		}
4787 
4788 		/**
4789 		 * @param u2
4790 		 * @param loc
4791 		 * @param key
4792 		 * @return
4793 		 */
4794 		protected Overlap makeOverlap(Unit u2, int loc, LongM key, BandedAligner bandy, int tableNum) {
4795 			if(verbose){System.err.println("makeOverlap: Considering key "+key+", unit "+u2);}
4796 			if(parseDepth && !depthCongruent(depth, u2.depth)){return null;}
4797 			if(minLengthPercent>0){
4798 				final int len1=length(), len2=u2.length();
4799 				if(Tools.min(len1, len2)*100f/Tools.max(len1, len2)<minLengthPercent){return null;}
4800 			}
4801 			assert(u2.code1!=code1 || u2.code2!=code2 || u2.length()!=length() || (r!=null && r.mate!=null) ||
4802 					(canonical()==u2.canonical() ? (u2.prefix1!=prefix1 && u2.suffix1!=suffix1) : (u2.prefix1!=suffix1 && u2.suffix1!=prefix1))) :
4803 						"Collision? \n"+this+"\n"+u2+"\n"+r+"\n"+u2.r;
4804 
4805 
4806 			Overlap o=null;
4807 			if(tableNum==0){
4808 				if(key.value()==u2.prefix1){
4809 					if(verbose){System.err.println("\nTesting makeOverlap A1F");}
4810 					if((o=makeOverlapForward(u2, loc-k2, bandy, tableNum==0))!=null){
4811 						if(verbose){System.err.println("Made Overlap A1F");}
4812 						return o;
4813 					}
4814 					if(verbose){System.err.println("\nTesting makeOverlap A1R");}
4815 					if((o=makeOverlapReverseRC(u2, loc, bandy, tableNum==0))!=null){
4816 						if(verbose){System.err.println("Made Overlap A1R");}
4817 						return o;
4818 					}
4819 					if(verbose){System.err.println("No Overlap.");}
4820 				}
4821 				if(key.value()==u2.suffix1){
4822 					if(verbose){System.err.println("\nTesting makeOverlap B1F");}
4823 					if((o=makeOverlapForwardRC(u2, loc-k2, bandy, tableNum==0))!=null){
4824 						if(verbose){System.err.println("Made Overlap B1F");}
4825 						return o;
4826 					}
4827 					if(verbose){System.err.println("\nTesting makeOverlap B1R");}
4828 					if((o=makeOverlapReverse(u2, loc, bandy, tableNum==0))!=null){
4829 						if(verbose){System.err.println("Made Overlap B1R");}
4830 						return o;
4831 					}
4832 					if(verbose){System.err.println("No Overlap.");}
4833 				}
4834 			}else{
4835 				if(key.value()==u2.prefix2){
4836 					if(verbose){System.err.println("\nTesting makeOverlap A2F");}
4837 					if((o=makeOverlapForward(u2, loc-k2-k, bandy, tableNum==0))!=null){
4838 						if(verbose){System.err.println("Made Overlap A2F");}
4839 						return o;
4840 					}
4841 					if(verbose){System.err.println("\nTesting makeOverlap A2R");}
4842 					if((o=makeOverlapReverseRC(u2, loc+k, bandy, tableNum==0))!=null){
4843 						if(verbose){System.err.println("Made Overlap A2R");}
4844 						return o;
4845 					}
4846 					if(verbose){System.err.println("No Overlap.");}
4847 				}
4848 				if(key.value()==u2.suffix2){
4849 					if(verbose){System.err.println("\nTesting makeOverlap B2F");}
4850 					if((o=makeOverlapForwardRC(u2, loc-k2-k, bandy, tableNum==0))!=null){
4851 						if(verbose){System.err.println("Made Overlap B2F");}
4852 						return o;
4853 					}
4854 					if(verbose){System.err.println("\nTesting makeOverlap B2R");}
4855 					if((o=makeOverlapReverse(u2, loc+k, bandy, tableNum==0))!=null){
4856 						if(verbose){System.err.println("Made Overlap B2R");}
4857 						return o;
4858 					}
4859 					if(verbose){System.err.println("No Overlap.");}
4860 				}
4861 			}
4862 			return o;
4863 		}
4864 
4865 		private boolean overlapsForward(Unit u2, int start1, int start2, BandedAligner bandy, boolean earlyExit, int maxEdits) {
4866 			if(verbose){System.err.println("overlapsForward(u1="+this.name()+", u2="+u2.name()+", start1="+start1+", start2="+start2+", earlyExit="+earlyExit+")");}
4867 
4868 			final int len1=length(), len2=u2.length();
4869 			if(start1<0){
4870 				start2-=start1;
4871 				start1=0;
4872 				if(verbose){System.err.println("Modified: start1="+start1+", start2="+start2);}
4873 			}
4874 			int overlapLength=Tools.min(len1-start1, len2-start2);
4875 			int overlapLength2=Tools.max(len1-start1, len2-start2);
4876 			int stop1=start1+overlapLength-1, stop2=start2+overlapLength-1;
4877 			if(verbose){System.err.println("Calculated stop1="+stop1+", stop2="+stop2+", overlapLength="+overlapLength);}
4878 
4879 			if(!allowAllContainedOverlaps || overlapLength>Tools.min(len1, len2)){
4880 				if(verbose){
4881 					System.err.println("Side block. allowAllContainedOverlaps="+allowAllContainedOverlaps+", minOverlapCluster="+minOverlapCluster);
4882 					System.err.println("start1="+start1+", stop1="+stop1+", len1="+len1+", start2="+start2+", stop2="+stop2+", len2="+len2+
4883 							", overlapLen="+overlapLength+", maxEdits="+maxEdits);
4884 				}
4885 				if(overlapLength2<minOverlapCluster){return false;}
4886 				if(minOverlapPercentCluster>0f && (overlapLength2*100f/Tools.min(len1, len2))<minOverlapPercentCluster){return false;}
4887 			}
4888 
4889 			final byte[] a=bases(), b=u2.bases();
4890 			assert(a!=null && b!=null) : "Null bases for "+code1+" or "+u2.code1;
4891 			int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, overlapLength);
4892 
4893 			if(verbose){
4894 				System.err.println("start1="+start1+", stop1="+stop1+", len1="+len1+", start2="+start2+", stop2="+stop2+", len2="+len2+
4895 						", overlapLen="+overlapLength+", maxMismatches="+maxMismatches+", maxEdits="+maxEdits);
4896 			}
4897 
4898 			for(int i=start1, j=start2; j<=stop2; i++, j++){
4899 				byte aa=a[i];
4900 				byte bb=b[j];
4901 				if(aa!=bb){
4902 					if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
4903 						if(earlyExit && j<k2){return false;}
4904 						mismatches++;
4905 						if(verbose){System.err.println("i="+i+", j="+j+", "+(char)aa+"!="+(char)bb+", mismatches="+mismatches+"/"+maxMismatches);}
4906 						if(mismatches>maxMismatches){
4907 							if(bandy==null || maxEdits<1){return false;}
4908 							if(verbose){System.err.println("Mismatches exceeded maximum, attempting banded alignment.");}
4909 							int edits=bandy.alignForward(b, a, 0, start1, maxEdits, exact);
4910 							assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
4911 							stop2=bandy.lastQueryLoc;
4912 							stop1=bandy.lastRefLoc;
4913 							return edits<=maxEdits && bandy.score()>2*edits; //Set at 2*edits instead of 4*edits to prevent assertion errors when reversing alignment
4914 						}
4915 					}
4916 				}
4917 			}
4918 			return true;
4919 		}
4920 
4921 		private boolean overlapsForwardRC(Unit u2, int start1, int start2, BandedAligner bandy, boolean earlyExit, int maxEdits) {
4922 			if(verbose){System.err.println("overlapsForwardRC(u1="+this.name()+", u2="+u2.name()+", start1="+start1+", start2="+start2+", earlyExit="+earlyExit+")");}
4923 
4924 			if(ignoreReverseComplement){return false;}
4925 			final int len1=length(), len2=u2.length();
4926 			if(start1<0){
4927 				start2+=start1;
4928 				start1=0;
4929 				if(verbose){System.err.println("Modified: start1="+start1+", start2="+start2);}
4930 			}
4931 			final int overlapLength=Tools.min(len1-start1, start2+1);
4932 			final int overlapLength2=Tools.max(len1-start1, start2+1);
4933 			int stop1=start1+overlapLength-1, stop2=start2-overlapLength+1;
4934 			if(verbose){System.err.println("Calculated stop1="+stop1+", stop2="+stop2+", overlapLength="+overlapLength);}
4935 
4936 			if(!allowAllContainedOverlaps || overlapLength>Tools.min(len1, len2)){
4937 				if(overlapLength2<minOverlapCluster){return false;}
4938 				if(minOverlapPercentCluster>0f && (overlapLength2*100f/Tools.min(len1, len2))<minOverlapPercentCluster){return false;}
4939 			}
4940 
4941 			final byte[] a=bases(), b=u2.bases();
4942 			assert(a!=null && b!=null) : "Null bases for "+code1+" or "+u2.code1;
4943 			int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, b.length);
4944 
4945 			if(verbose){
4946 				System.err.println("start1="+start1+", stop1="+stop1+", len1="+len1+", start2="+start2+", stop2="+stop2+", len2="+len2+
4947 						", overlapLen="+overlapLength+", maxMismatches="+maxMismatches+", maxEdits="+maxEdits);
4948 			}
4949 
4950 			for(int i=start1, j=start2, iprefix=start1+k2; i<=stop1; i++, j--){
4951 				byte aa=a[i];
4952 //				assert(b[j]>=0) : new String(b); //TODO: remove //123
4953 				byte bb=baseToComplementExtended[b[j]];
4954 				if(aa!=bb){
4955 					if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
4956 						if(earlyExit && i<iprefix){return false;}
4957 						mismatches++;
4958 						if(verbose){System.err.println("i="+i+", j="+j+", "+(char)aa+"!="+(char)bb+", mismatches="+mismatches+"/"+maxMismatches);}
4959 						if(mismatches>maxMismatches){
4960 							if(bandy==null || maxEdits<1){return false;}
4961 							if(verbose){System.err.println("Mismatches exceeded maximum, attempting banded alignment.");}
4962 							int edits=bandy.alignForwardRC(b, a, b.length-1, start1, maxEdits, exact);
4963 							assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
4964 							stop2=bandy.lastQueryLoc;
4965 							stop1=bandy.lastRefLoc;
4966 							return edits<=maxEdits && bandy.score()>2*edits; //Set at 2*edits instead of 4*edits to prevent assertion errors when reversing alignment
4967 						}
4968 					}
4969 				}
4970 			}
4971 			return true;
4972 		}
4973 
4974 		private boolean overlapsReverse(Unit u2, int start1, int start2, BandedAligner bandy, boolean earlyExit, int maxEdits) {
4975 			if(verbose){System.err.println("overlapsReverse(u1="+this.name()+", u2="+u2.name()+", start1="+start1+", start2="+start2+", earlyExit="+earlyExit+")");}
4976 
4977 			final int len1=length(), len2=u2.length();
4978 			if(start1>=len1){
4979 				start2-=(start1-len1+1);
4980 				start1=len1-1;
4981 				if(verbose){System.err.println("Modified: start1="+start1+", start2="+start2);}
4982 			}
4983 			final int overlapLength=Tools.min(start1+1, start2+1);
4984 			final int overlapLength2=Tools.max(start1+1, start2+1);
4985 			int stop1=start1-overlapLength+1, stop2=start2-overlapLength+1;
4986 			if(verbose){System.err.println("Calculated stop1="+stop1+", stop2="+stop2+", overlapLength="+overlapLength);}
4987 
4988 			if(!allowAllContainedOverlaps || overlapLength>Tools.min(len1, len2)){
4989 				if(overlapLength2<minOverlapCluster){return false;}
4990 				if(minOverlapPercentCluster>0f && (overlapLength2*100f/Tools.min(len1, len2))<minOverlapPercentCluster){return false;}
4991 			}
4992 
4993 			final byte[] a=bases(), b=u2.bases();
4994 			assert(a!=null && b!=null) : "Null bases for "+code1+" or "+u2.code1;
4995 			int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, b.length);
4996 
4997 			if(verbose){
4998 				System.err.println("start1="+start1+", stop1="+stop1+", len1="+len1+", start2="+start2+", stop2="+stop2+", len2="+len2+
4999 						", overlapLen="+overlapLength+", maxMismatches="+maxMismatches+", maxEdits="+maxEdits);
5000 			}
5001 
5002 			for(int i=start1, j=start2, iprefix=start1-k2; i>=stop1; i--, j--){
5003 				byte aa=a[i];
5004 				byte bb=b[j];
5005 				if(aa!=bb){
5006 					if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
5007 						if(earlyExit && i>iprefix){return false;}
5008 						mismatches++;
5009 						if(verbose){System.err.println("i="+i+", j="+j+", "+(char)aa+"!="+(char)bb+", mismatches="+mismatches+"/"+maxMismatches);}
5010 						if(mismatches>maxMismatches){
5011 							if(bandy==null || maxEdits<1){return false;}
5012 							if(verbose){System.err.println("Mismatches exceeded maximum, attempting banded alignment.");}
5013 							int edits=bandy.alignReverse(b, a, b.length-1, start1, maxEdits, exact);
5014 							assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
5015 							stop2=bandy.lastQueryLoc;
5016 							stop1=bandy.lastRefLoc;
5017 							return edits<=maxEdits && bandy.score()>2*edits; //Set at 2*edits instead of 4*edits to prevent assertion errors when reversing alignment
5018 						}
5019 					}
5020 				}
5021 			}
5022 			return true;
5023 		}
5024 
5025 		private boolean overlapsReverseRC(Unit u2, int start1, int start2, BandedAligner bandy, boolean earlyExit, int maxEdits) {
5026 			if(verbose){System.err.println("overlapsReverseRC(u1="+this.name()+", u2="+u2.name()+", start1="+start1+", start2="+start2+", earlyExit="+earlyExit+")");}
5027 
5028 			if(ignoreReverseComplement){return false;}
5029 			final int len1=length(), len2=u2.length();
5030 			if(start1>=len1){
5031 				start2+=(start1-len1+1);
5032 				start1=len1-1;
5033 				if(verbose){System.err.println("Modified: start1="+start1+", start2="+start2);}
5034 			}
5035 			final int overlapLength=Tools.min(start1+1, len2-start2);
5036 			final int overlapLength2=Tools.max(start1+1, len2-start2);
5037 			int stop1=start1-overlapLength+1, stop2=start2+overlapLength-1;
5038 			if(verbose){System.err.println("Calculated stop1="+stop1+", stop2="+stop2+", overlapLength="+overlapLength);}
5039 
5040 			if(!allowAllContainedOverlaps || overlapLength>Tools.min(len1, len2)){
5041 				if(overlapLength2<minOverlapCluster){return false;}
5042 				if(minOverlapPercentCluster>0f && (overlapLength2*100f/Tools.min(len1, len2))<minOverlapPercentCluster){return false;}
5043 			}
5044 
5045 			final byte[] a=bases(), b=u2.bases();
5046 			assert(a!=null && b!=null) : "Null bases for "+code1+" or "+u2.code1;
5047 			int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, b.length);
5048 
5049 			if(verbose){
5050 				System.err.println("start1="+start1+", stop1="+stop1+", len1="+len1+", start2="+start2+", stop2="+stop2+", len2="+len2+
5051 						", overlapLen="+overlapLength+", maxMismatches="+maxMismatches+", maxEdits="+maxEdits);
5052 			}
5053 
5054 			for(int i=start1, j=start2; j<=stop2; i--, j++){
5055 				byte aa=a[i];
5056 				byte bb=baseToComplementExtended[b[j]];
5057 				if(aa!=bb){
5058 					if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
5059 						if(earlyExit && j<k2){return false;}
5060 						mismatches++;
5061 						if(verbose){System.err.println("i="+i+", j="+j+", "+(char)aa+"!="+(char)bb+", mismatches="+mismatches+"/"+maxMismatches);}
5062 						if(mismatches>maxMismatches){
5063 							if(bandy==null || maxEdits<1){return false;}
5064 							if(verbose){System.err.println("Mismatches exceeded maximum, attempting banded alignment.");}
5065 							int edits=bandy.alignReverseRC(b, a, 0, start1, maxEdits, exact);
5066 							assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
5067 							stop2=bandy.lastQueryLoc;
5068 							stop1=bandy.lastRefLoc;
5069 							return edits<=maxEdits && bandy.score()>2*edits; //Set at 2*edits instead of 4*edits to prevent assertion errors when reversing alignment
5070 						}
5071 					}
5072 				}
5073 			}
5074 			return true;
5075 		}
5076 
5077 
5078 
5079 		private Overlap makeOverlapForward(Unit u2, int start1, BandedAligner bandy, boolean earlyExit) {
5080 			if(verbose){System.err.println("makeOverlapForward(u1="+this.name()+", u2="+u2.name()+", start="+start1+", earlyExit="+earlyExit+")");}
5081 			final int len1=length(), len2=u2.length();
5082 			int start2=0;
5083 			if(start1<0){
5084 				start2-=start1;
5085 				start1=0;
5086 			}
5087 			final int overlapLength=Tools.min(len1-start1, len2-start2);
5088 			final int overlapLength2=Tools.max(len1-start1, len2-start2);
5089 			int stop1=start1+overlapLength-1, stop2=start2+overlapLength-1;
5090 			if(verbose){System.err.println("Calculated stop1="+stop1+", stop2="+stop2+", overlapLength="+overlapLength);}
5091 
5092 			if(!allowAllContainedOverlaps || overlapLength>Tools.min(len1, len2)){
5093 				if(overlapLength<minOverlapCluster){return null;}
5094 				if(minOverlapPercentCluster>0f && (overlapLength*100f/Tools.min(len1, len2))<minOverlapPercentCluster){return null;}
5095 			}
5096 
5097 			final byte[] a=bases(), b=u2.bases();
5098 			assert(a!=null && b!=null) : "Null bases for "+code1+" or "+u2.code1;
5099 			int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, overlapLength);
5100 
5101 			if(verbose){
5102 				System.err.println("start1="+start1+", stop1="+stop1+", len1="+len1+", start2="+start2+", stop2="+stop2+", len2="+len2+
5103 						", overlapLen="+overlapLength+", maxMismatches="+maxMismatches+", maxEdits="+maxEdits);
5104 			}
5105 
5106 			for(int i=start1, j=start2; j<=stop2; i++, j++){
5107 				byte aa=a[i];
5108 				byte bb=b[j];
5109 				if(aa!=bb){
5110 					if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
5111 						if(earlyExit && j<k2){return null;}
5112 						mismatches++;
5113 						if(verbose){System.err.println("i="+i+", j="+j+", "+(char)aa+"!="+(char)bb+", mismatches="+mismatches+"/"+maxMismatches);}
5114 						if(mismatches>1 && bandy!=null){
5115 							if(maxEdits<1){return null;}
5116 							if(verbose){System.err.println("mismatches exceeded 1, attempting banded alignment.");}
5117 							int edits=bandy.alignForward(b, a, start2, start1, maxEdits, exact);
5118 							if(edits>maxEdits || bandy.score()<=4*edits){
5119 								if(verbose){System.err.println((edits>maxEdits ? "Too many edits" : "Alignment score too low")+"; returning null.");}
5120 								return null;
5121 							}
5122 							assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
5123 							stop2=bandy.lastQueryLoc;
5124 							stop1=bandy.lastRefLoc;
5125 //							if(bandy.lastOffset>0){//Ref longer than query
5126 //								for(int k=0; k<bandy.lastOffset; k++){
5127 //									if(stop1+1<=len1){stop1++;}
5128 //									else{stop2--;}//I don't think this can happen
5129 //								}
5130 //							}else if(bandy.lastOffset<0){//Query longer than ref
5131 //								for(int k=0; k>bandy.lastOffset; k--){
5132 //									if(stop2+1<=len2){stop2++;}
5133 //									else{stop1--;}
5134 //								}
5135 //							}
5136 							return new Overlap(this, u2, FORWARD, start1, start2, stop1, stop2, overlapLength, edits, edits, bandy);
5137 						}else if(mismatches>maxMismatches){return null;}
5138 					}
5139 				}
5140 			}
5141 			return new Overlap(this, u2, FORWARD, start1, start2, stop1, stop2, overlapLength, mismatches, 0, bandy);
5142 		}
5143 
5144 		private Overlap makeOverlapForwardRC(Unit u2, int start1, BandedAligner bandy, boolean earlyExit) {
5145 			if(verbose){System.err.println("makeOverlapForwardRC(u1="+this.name()+", u2="+u2.name()+", start="+start1+", earlyExit="+earlyExit+")");}
5146 			if(ignoreReverseComplement){return null;}
5147 			final int len1=length(), len2=u2.length();
5148 			int start2=len2-1;
5149 			if(start1<0){
5150 				start2+=start1;
5151 				start1=0;
5152 			}
5153 			final int overlapLength=Tools.min(len1-start1, start2+1);
5154 			final int overlapLength2=Tools.max(len1-start1, start2+1);
5155 			int stop1=start1+overlapLength-1, stop2=start2-overlapLength+1;
5156 			if(verbose){System.err.println("Calculated stop1="+stop1+", stop2="+stop2+", overlapLength="+overlapLength);}
5157 
5158 			if(!allowAllContainedOverlaps || overlapLength>Tools.min(len1, len2)){
5159 				if(overlapLength<minOverlapCluster){return null;}
5160 				if(minOverlapPercentCluster>0f && (overlapLength*100f/Tools.min(len1, len2))<minOverlapPercentCluster){return null;}
5161 			}
5162 
5163 			final byte[] a=bases(), b=u2.bases();
5164 			assert(a!=null && b!=null) : "Null bases for "+code1+" or "+u2.code1;
5165 			int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, b.length);
5166 
5167 			for(int i=start1, j=start2, iprefix=start1+k2; i<=stop1; i++, j--){
5168 				byte aa=a[i];
5169 				byte bb=baseToComplementExtended[b[j]];
5170 				if(aa!=bb){
5171 					if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
5172 						if(earlyExit && i<iprefix){return null;}
5173 						mismatches++;
5174 						if(verbose){System.err.println("i="+i+", j="+j+", "+(char)aa+"!="+(char)bb+", mismatches="+mismatches+"/"+maxMismatches);}
5175 						if(mismatches>1 && bandy!=null){
5176 							if(maxEdits<1){return null;}
5177 							if(verbose){System.err.println("mismatches exceeded 1, attempting banded alignment.");}
5178 							int edits=bandy.alignForwardRC(b, a, start2, start1, maxEdits, exact);
5179 							if(edits>maxEdits || bandy.score()<=4*edits){
5180 								if(verbose){System.err.println((edits>maxEdits ? "Too many edits" : "Alignment score too low")+"; returning null.");}
5181 								return null;
5182 							}
5183 							assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
5184 							stop2=bandy.lastQueryLoc;
5185 							stop1=bandy.lastRefLoc;
5186 //							if(bandy.lastOffset>0){//Ref longer than query
5187 //								for(int k=0; k<bandy.lastOffset; k++){
5188 //									if(stop1+1<=len1){stop1++;}
5189 //									else{stop2++;}//I don't think this can happen
5190 //								}
5191 //							}else if(bandy.lastOffset<0){//Query longer than ref
5192 //								for(int k=0; k>bandy.lastOffset; k--){
5193 //									if(stop2>0){stop2--;}
5194 //									else{stop1--;}
5195 //								}
5196 //							}
5197 							return new Overlap(this, u2, FORWARDRC, start1, start2, stop1, stop2, overlapLength, edits, edits, bandy);
5198 						}else if(mismatches>maxMismatches){return null;}
5199 					}
5200 				}
5201 			}
5202 			return new Overlap(this, u2, FORWARDRC, start1, start2, stop1, stop2, overlapLength, mismatches, 0, bandy);
5203 		}
5204 
5205 		private Overlap makeOverlapReverse(Unit u2, int start1, BandedAligner bandy, boolean earlyExit) {
5206 			if(verbose){System.err.println("makeOverlapReverse(u1="+this.name()+", u2="+u2.name()+", start="+start1+", earlyExit="+earlyExit+")");}
5207 
5208 			final int len1=length(), len2=u2.length();
5209 			int start2=len2-1;
5210 			if(start1>=len1){
5211 				start2-=(start1-len1+1);
5212 				start1=len1-1;
5213 			}
5214 			final int overlapLength=Tools.min(start1+1, start2+1);
5215 			final int overlapLength2=Tools.max(start1+1, start2+1);
5216 			int stop1=start1-overlapLength+1, stop2=start2-overlapLength+1;
5217 			if(verbose){System.err.println("Calculated stop1="+stop1+", stop2="+stop2+", overlapLength="+overlapLength);}
5218 
5219 			if(!allowAllContainedOverlaps || overlapLength>Tools.min(len1, len2)){
5220 				if(overlapLength<minOverlapCluster){return null;}
5221 				if(minOverlapPercentCluster>0f && (overlapLength*100f/Tools.min(len1, len2))<minOverlapPercentCluster){return null;}
5222 			}
5223 
5224 			final byte[] a=bases(), b=u2.bases();
5225 			assert(a!=null && b!=null) : "Null bases for "+code1+" or "+u2.code1;
5226 			int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, b.length);
5227 
5228 			if(verbose){
5229 				System.err.println("start1="+start1+", stop1="+stop1+", len1="+len1+", start2="+start2+", stop2="+stop2+", len2="+len2+
5230 						", overlapLen="+overlapLength+", maxMismatches="+maxMismatches+", maxEdits="+maxEdits);
5231 			}
5232 
5233 			for(int i=start1, j=start2, iprefix=start1-k2; i>=stop1; i--, j--){
5234 				byte aa=a[i];
5235 				byte bb=b[j];
5236 				if(aa!=bb){
5237 					if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
5238 						if(earlyExit && i>iprefix){return null;}
5239 						mismatches++;
5240 						if(verbose){System.err.println("i="+i+", j="+j+", "+(char)aa+"!="+(char)bb+", mismatches="+mismatches+"/"+maxMismatches);}
5241 						if(mismatches>1 && bandy!=null){
5242 							if(maxEdits<1){return null;}
5243 							if(verbose){System.err.println("mismatches exceeded 1, attempting banded alignment.");}
5244 							int edits=bandy.alignReverse(b, a, start2, start1, maxEdits, exact);
5245 							if(edits>maxEdits || bandy.score()<=4*edits){
5246 								if(verbose){System.err.println((edits>maxEdits ? "Too many edits" : "Alignment score too low")+"; returning null.");}
5247 								return null;
5248 							}
5249 							assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
5250 							stop2=bandy.lastQueryLoc;
5251 							stop1=bandy.lastRefLoc;
5252 //							if(bandy.lastOffset>0){//Ref longer than query
5253 //								for(int k=0; k<bandy.lastOffset; k++){
5254 //									if(stop1>0){stop1--;}
5255 //									else{stop2++;}//I don't think this can happen
5256 //								}
5257 //							}else if(bandy.lastOffset<0){//Query longer than ref
5258 //								for(int k=0; k>bandy.lastOffset; k--){
5259 //									if(stop2>0){stop2--;}
5260 //									else{stop1++;}
5261 //								}
5262 //							}
5263 							return new Overlap(this, u2, REVERSE, start1, start2, stop1, stop2, overlapLength, edits, edits, bandy);
5264 						}else if(mismatches>maxMismatches){return null;}
5265 					}
5266 				}
5267 			}
5268 			return new Overlap(this, u2, REVERSE, start1, start2, stop1, stop2, overlapLength, mismatches, 0, bandy);
5269 		}
5270 
5271 		private Overlap makeOverlapReverseRC(Unit u2, int start1, BandedAligner bandy, boolean earlyExit) {
5272 			if(verbose){System.err.println("makeOverlapReverseRC(u1="+this.name()+", u2="+u2.name()+", start="+start1+", earlyExit="+earlyExit+")");}
5273 			if(ignoreReverseComplement){return null;}
5274 			final int len1=length(), len2=u2.length();
5275 			int start2=0;
5276 			if(start1>=len1){
5277 				start2+=(start1-len1+1);
5278 				start1=len1-1;
5279 			}
5280 			final int overlapLength=Tools.min(start1+1, len2-start2);
5281 			final int overlapLength2=Tools.max(start1+1, len2-start2);
5282 			int stop1=start1-overlapLength+1, stop2=start2+overlapLength-1;
5283 			if(verbose){System.err.println("Calculated stop1="+stop1+", stop2="+stop2+", overlapLength="+overlapLength);}
5284 
5285 			if(!allowAllContainedOverlaps || overlapLength>Tools.min(len1, len2)){
5286 				if(overlapLength<minOverlapCluster){return null;}
5287 				if(minOverlapPercentCluster>0f && (overlapLength*100f/Tools.min(len1, len2))<minOverlapPercentCluster){return null;}
5288 			}
5289 
5290 			final byte[] a=bases(), b=u2.bases();
5291 			assert(a!=null && b!=null) : "Null bases for "+code1+" or "+u2.code1;
5292 			int mismatches=0, maxMismatches=calcMaxEdits(maxSubs, minIdentityMult, b.length);
5293 
5294 			if(verbose){
5295 				System.err.println("start1="+start1+", stop1="+stop1+", len1="+len1+", start2="+start2+", stop2="+stop2+", len2="+len2+
5296 						", overlapLen="+overlapLength+", maxMismatches="+maxMismatches+", maxEdits="+maxEdits);
5297 			}
5298 
5299 			for(int i=start1, j=start2; j<=stop2; i--, j++){
5300 				byte aa=a[i];
5301 				byte bb=baseToComplementExtended[b[j]];
5302 				if(aa!=bb){
5303 					if(exact || (AminoAcid.isFullyDefined(aa) && AminoAcid.isFullyDefined(bb))){
5304 						if(earlyExit && j<k2){return null;}
5305 						mismatches++;
5306 						if(verbose){System.err.println("i="+i+", j="+j+", "+(char)aa+"!="+(char)bb+", mismatches="+mismatches+"/"+maxMismatches);}
5307 						if(mismatches>1 && bandy!=null){
5308 							if(maxEdits<1){return null;}
5309 							if(verbose){System.err.println("mismatches exceeded 1, attempting banded alignment.");}
5310 							int edits=bandy.alignReverseRC(b, a, start2, start1, maxEdits, exact);
5311 							if(edits>maxEdits || bandy.score()<=4*edits){
5312 								if(verbose){System.err.println((edits>maxEdits ? "Too many edits" : "Alignment score too low")+"; returning null.");}
5313 								return null;
5314 							}
5315 							assert(b.length<k || a.length<k || bandy.lastRow>=k2 || edits>maxEdits) : b.length+", "+k+", "+bandy.lastRow+", "+edits;
5316 							stop2=bandy.lastQueryLoc;
5317 							stop1=bandy.lastRefLoc;
5318 //							if(bandy.lastOffset>0){//Ref longer than query
5319 //								for(int k=0; k<bandy.lastOffset; k++){
5320 //									if(stop1>0){stop1--;}
5321 //									else{stop2--;}//I don't think this can happen
5322 //								}
5323 //							}else if(bandy.lastOffset<0){//Query longer than ref
5324 //								for(int k=0; k>bandy.lastOffset; k--){
5325 //									if(stop2+1<=len2){stop2++;}
5326 //									else{stop1++;}
5327 //								}
5328 //							}
5329 							return new Overlap(this, u2, REVERSERC, start1, start2, stop1, stop2, overlapLength, edits, edits, bandy);
5330 						}else if(mismatches>maxMismatches){return null;}
5331 					}
5332 				}
5333 			}
5334 			return new Overlap(this, u2, REVERSERC, start1, start2, stop1, stop2, overlapLength, mismatches, 0, bandy);
5335 		}
5336 
5337 		@Override
5338 		public int compareTo(Unit b) {
5339 			int x=comparePairedRC(this, b);
5340 //			int y=comparePairedRC(b, this);
5341 //			boolean eq1=this.equals(b);
5342 //			boolean eq2=b.equals(this);
5343 //
5344 //			assert((x==y)==(x==0)) : x+", "+y+"\n"+this+"\n"+b;
5345 //			assert((x>0 == y<0) || (x==0 && y==0)) : x+", "+y+"\n"+this+"\n"+b;
5346 //
5347 //			assert(eq1==eq2): x+", "+y+"\n"+this+"\n"+b;
5348 //			assert(eq1==(x==0)): x+", "+y+"\n"+this+"\n"+b;
5349 //
5350 //			assert(eq1 || this!=b);
5351 //
5352 //			if(verbose){ //TODO: Remove
5353 //				System.err.println(this+"\n"+b+"\n"+this.r.toFastq()+"\n"+this.r.mate.toFastq()+"\n"+b.r.toFastq()+"\n"+b.r.mate.toFastq()+"\n");
5354 //				System.err.println("\n"+x+", "+y+", "+eq1+", "+eq2);
5355 //				verbose=false;
5356 //			}
5357 
5358 			return x;
5359 		}
5360 
5361 		@Override
5362 		public boolean equals(Object b){return equals((Unit)b);}
5363 		public boolean equals(Unit b){
5364 			boolean x=pairedEqualsRC(this, b);
5365 //			assert(x==pairedEqualsRC(b, this));
5366 //			assert(x==(comparePairedRC(this, b)==0));
5367 //			assert(x==(comparePairedRC(b, this)==0));
5368 //			assert(x || this!=b);
5369 //			System.err.println("\n****EQUALS?****:\n"+this+"\n"+b+"\n**** ****"); //TODO: Remove
5370 			return x;
5371 		}
5372 
5373 		@Override
5374 		public int hashCode(){
5375 			return (int)((code1^(code1>>>32))&0xFFFFFFFFL);
5376 		}
5377 
5378 		private synchronized void setValid(boolean b){
5379 			assert(b!=valid());
5380 //			if(!b){System.err.println("Setting invalid "+name());}
5381 			if(b){flags&=~INVALID_MASK;}
5382 			else{flags|=INVALID_MASK;}
5383 			assert(b==valid());
5384 		}
5385 
5386 		private synchronized void setClustered(boolean b){
5387 			assert(b!=clustered());
5388 			if(b){flags|=CLUSTER_MASK;}
5389 			else{flags&=~CLUSTER_MASK;}
5390 			assert(b==clustered());
5391 		}
5392 
5393 		private void setVisited(boolean b){
5394 			assert(b!=visited());
5395 			if(b){flags|=VISIT_MASK;}
5396 			else{flags&=~VISIT_MASK;}
5397 			assert(b==visited());
5398 		}
5399 
5400 		private synchronized void setCanonical(boolean b){
5401 			assert(b!=canonical());
5402 			if(b){flags|=CANON_MASK;}
5403 			else{flags&=~CANON_MASK;}
5404 			assert(b==canonical());
5405 			assert(r==null || b==isCanonical(r.bases));
5406 		}
5407 
5408 		private void setCanonicized(boolean b){
5409 			assert(b!=canonicized());
5410 			if(b){flags|=CANONICIZED_MASK;}
5411 			else{flags&=~CANONICIZED_MASK;}
5412 			assert(b==canonicized());
5413 		}
5414 
5415 		private synchronized void setCanonContradiction(boolean b){
5416 //			assert(b!=canonContradiction());
5417 			if(b){flags|=CANON_CONTRADICTION_MASK;}
5418 			else{flags&=~CANON_CONTRADICTION_MASK;}
5419 			assert(b==canonContradiction());
5420 		}
5421 
5422 		private synchronized void setOffset(int x){
5423 			offset=x;
5424 			setOffsetValid(true);
5425 		}
5426 
5427 		private synchronized void setOffsetValid(boolean b){
5428 			assert(!offsetValid());
5429 			if(b){flags|=OFFSET_VALID_MASK;}
5430 			else{flags&=~OFFSET_VALID_MASK;}
5431 			assert(b==offsetValid());
5432 		}
5433 
5434 		private synchronized void setOffsetContradiction(boolean b){
5435 //			assert(b!=offsetContradiction());
5436 			assert(offsetValid());
5437 			if(b){flags|=OFFSET_CONTRADICTION_MASK;}
5438 			else{flags&=~OFFSET_CONTRADICTION_MASK;}
5439 			assert(b==offsetContradiction());
5440 		}
5441 
5442 		private void reverseComplement(){
5443 			assert(r!=null);
5444 			r.reverseComplement();
5445 			long temp=prefix1;
5446 			prefix1=suffix1;
5447 			suffix1=temp;
5448 			temp=prefix2;
5449 			prefix2=suffix2;
5450 			suffix2=temp;
5451 			setCanonical(!canonical());
5452 		}
5453 
5454 		/** Return true if 'this' should be the first Unit in the overlap object */
5455 		public boolean firstInOverlap(Unit u2){
5456 			assert(this!=u2) : "\n"+this.r+"\n"+u2.r;
5457 			if(u2.length()!=length()){return u2.length()<length();}
5458 			if(u2.code1!=code1){return u2.code1<code1;}
5459 			if(u2.code2!=code2){return u2.code2<code2;}
5460 			int x=compareTo(u2);
5461 			assert(x!=0 || (r!=null && r.mate!=null));
5462 			if(x!=0){return x>=0;}
5463 			return r.numericID>=u2.r.numericID;
5464 		}
5465 
5466 		public final boolean inSet(){
5467 			if(subsetCount<2){return true;}
5468 			if(r.pairnum()>0){return ((Unit)r.mate.obj).inSet();}
5469 			return ((code1&Long.MAX_VALUE)%subsetCount)==subset;
5470 		}
5471 
5472 		public byte[] bases(){return r==null ? null : r.bases;}
5473 
5474 		public String name(){return r!=null ? r.id : null /*code+""*/;}
5475 		@Override
5476 		public String toString(){return "("+name()+","+code1+","+code2+","+length()+","+prefix1+","+suffix1+","+(canonical()?"c":"nc")+",d="+depth+")";}
5477 
5478 
5479 		public final Read r;
5480 		public final long code1;
5481 		public final long code2;
5482 		public long prefix1=-1;
5483 		public long suffix1=-1;
5484 		public long prefix2=-1;
5485 		public long suffix2=-1;
5486 		/** Distance of leftmost side of this read relative to leftmost side of root.
5487 		 * Assumes everything is in 'forward' orientation. */
5488 		public int offset=-999999999;
5489 		public int depth=1;
5490 //		private boolean valid=true;
5491 
5492 		public int unitID;
5493 
5494 		public ArrayList<Overlap> overlapList;
5495 
5496 		private long flags;
5497 		/** True if the original read orientation was canonical */
5498 		public final boolean canonical(){return (CANON_MASK&flags)!=0;}
5499 		/** True if this contig should be output, false if not */
5500 		public final boolean valid(){return (INVALID_MASK&flags)==0;}
5501 		/** Length of this contig */
5502 		public final int length(){return (int)(LEN_MASK&flags);}
5503 		/** Position of this contig relative to root */
5504 		public final int offset(){
5505 			assert(offsetValid());
5506 			return offset;
5507 		}
5508 		public int pairnum(){return (PAIRNUM_MASK&flags)==PAIRNUM_MASK ? 1 : 0;}
5509 
5510 		public void clearVolatileFlags(){
5511 			flags=flags&~(CANONICIZED_MASK|VISIT_MASK|CANON_CONTRADICTION_MASK|OFFSET_VALID_MASK|OFFSET_CONTRADICTION_MASK);
5512 			assert(!visited());
5513 			assert(!canonicized());
5514 			assert(!canonContradiction());
5515 			assert(!offsetValid());
5516 			assert(!offsetContradiction());
5517 		}
5518 
5519 		public boolean visited(){return (VISIT_MASK&flags)==VISIT_MASK;}
5520 		public boolean clustered(){return (CLUSTER_MASK&flags)==CLUSTER_MASK;}
5521 		public boolean canonicized(){return (CANONICIZED_MASK&flags)==CANONICIZED_MASK;}
5522 		public boolean canonContradiction(){return (CANON_CONTRADICTION_MASK&flags)==CANON_CONTRADICTION_MASK;}
5523 		public boolean offsetValid(){return (OFFSET_VALID_MASK&flags)==OFFSET_VALID_MASK;}
5524 		public boolean offsetContradiction(){return (OFFSET_CONTRADICTION_MASK&flags)==OFFSET_CONTRADICTION_MASK;}
5525 		public boolean contradiction(){return offsetContradiction() || canonContradiction();}
5526 
5527 		private static final long LEN_MASK=0x7FFFFFFFL;
5528 		private static final long CANON_MASK=(1L<<33);
5529 		private static final long INVALID_MASK=(1L<<34);
5530 		private static final long VISIT_MASK=(1L<<35);
5531 		private static final long CLUSTER_MASK=(1L<<36);
5532 		private static final long CANONICIZED_MASK=(1L<<37);
5533 		private static final long CANON_CONTRADICTION_MASK=(1L<<38);
5534 		private static final long OFFSET_VALID_MASK=(1L<<39);
5535 		private static final long OFFSET_CONTRADICTION_MASK=(1L<<40);
5536 		private static final long PAIRNUM_MASK=(1L<<41);
5537 	}
5538 
5539 	private static final class UnitOffsetComparator implements Comparator<Unit> {
5540 
5541 		UnitOffsetComparator(){}
5542 
5543 		/* (non-Javadoc)
5544 		 * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object)
5545 		 */
5546 		@Override
5547 		public int compare(Unit a, Unit b) {
5548 			if(a.offsetValid() && b.offsetValid()){
5549 				int x=a.offset()-b.offset();
5550 				if(x!=0){return x;}
5551 			}else{
5552 				if(a.offsetValid()){return -1;}
5553 				if(b.offsetValid()){return 1;}
5554 			}
5555 			return a.compareTo(b);
5556 		}
5557 
5558 	}
5559 
5560 	private static final class ClusterLengthComparator implements Comparator<ArrayList<Unit>> {
5561 
5562 		ClusterLengthComparator(){}
5563 
5564 		/* (non-Javadoc)
5565 		 * @see java.util.Comparator#compare(java.lang.Object, java.lang.Object)
5566 		 */
5567 		@Override
5568 		public int compare(ArrayList<Unit> a, ArrayList<Unit> b) {
5569 			if(a.size()!=b.size()){return b.size()-a.size();}
5570 			if(a.isEmpty() && b.isEmpty()){return 0;}
5571 			return a.get(0).compareTo(b.get(0));
5572 		}
5573 
5574 	}
5575 
5576 	public static final int[] makeNmerIndex(final int n){
5577 		final int max=(1<<(2*n))-1;
5578 		int[] array=new int[max+1];
5579 
5580 		int count=0;
5581 		for(int i=0; i<=max; i++){
5582 			final int a=i, b=AminoAcid.reverseComplementBinaryFast(i, n);
5583 			int min=Tools.min(a, b);
5584 			if(min==a){
5585 				array[a]=array[b]=count;
5586 				count++;
5587 			}
5588 		}
5589 		return array;
5590 	}
5591 
5592 	/** Makes a nmer (e.g., tetramer) profile of a cluster */
5593 	private static final float[] makeNmerProfile(ArrayList<Unit> alu, long[] array_){
5594 		final int nbits=2*nmerLength;
5595 		final long[] array=(array_==null ? new long[maxNmer+1] : array_);
5596 		final int mask=~((-1)<<(nbits));
5597 
5598 		long keysCounted=0;
5599 
5600 		for(Unit u : alu){
5601 			byte[] bases=u.r.bases;
5602 			int len=0;
5603 			int kmer=0;
5604 			for(byte b : bases){
5605 				int x=AminoAcid.baseToNumber[b];
5606 				if(x<0){
5607 					len=0;
5608 					kmer=0;
5609 				}else{
5610 					kmer=((kmer<<2)|x)&mask;
5611 					len++;
5612 					if(len>=nmerLength){
5613 						int rkmer=AminoAcid.reverseComplementBinaryFast(kmer, nmerLength);
5614 						keysCounted++;
5615 						array[nmerIndex[Tools.min(kmer, rkmer)]]++;
5616 					}
5617 				}
5618 			}
5619 		}
5620 
5621 		if(keysCounted==0){keysCounted=1;}
5622 		final float mult=1f/keysCounted;
5623 
5624 		float[] r=new float[array.length];
5625 		for(int i=0; i<array.length; i++){
5626 			r[i]=array[i]*mult;
5627 			array[i]=0;
5628 		}
5629 		return r;
5630 	}
5631 
5632 	private ConcurrentReadInputStream crisa[];
5633 
5634 	private final ByteStreamWriter dupeWriter;
5635 
5636 	private String[] in1=null;
5637 	private String[] in2=null;
5638 	private String out=null;
5639 	private String clusterFilePattern=null;
5640 	private String outbest=null;
5641 	private String outdupe=null;
5642 	private String outcsf=null;
5643 	private String outgraph=null;
5644 	private int maxNs=-1;
5645 	private long maxReads=-1;
5646 	public boolean errorState=false;
5647 	boolean sort=false;
5648 //	boolean ascending=true;
5649 	boolean absorbContainment=true;
5650 	boolean absorbMatch=true;
5651 	boolean findOverlaps=false;
5652 	boolean makeClusters=false;
5653 	boolean processClusters=false;
5654 	boolean renameClusters=false;
5655 	boolean absorbOverlap=false;
5656 	boolean storeSuffix=false;
5657 	boolean storeName=true;
5658 	boolean storeQuality=true;
5659 	boolean exact=true;
5660 	boolean uniqueNames=true;
5661 	boolean mergeHeaders=false;
5662 	String mergeDelimiter=">";
5663 	boolean maxSpanningTree=false;
5664 
5665 	boolean canonicizeClusters=true;
5666 	boolean removeCycles=true;
5667 	boolean fixMultiJoins=true;
5668 	boolean fixCanonContradictions=false;
5669 	boolean fixOffsetContradictions=false;
5670 	boolean countTransitive=false;
5671 	boolean countRedundant=false;
5672 
5673 	private boolean multipleInputFiles=false;
5674 	private boolean rigorousTransitive=false;
5675 	private int numAffixMaps=1;
5676 	private int maxAffixCopies=2000000000;
5677 	private int maxEdges=2000000000;
5678 	private int maxEdges2=2000000000;
5679 	private boolean allowAllContainedOverlaps=false;
5680 //	private boolean toUpperCase=false;
5681 
5682 	/** Trim left bases of the read to this position (exclusive, 0-based) */
5683 	private int forceTrimLeft=-1;
5684 	/** Trim right bases of the read after this position (exclusive, 0-based) */
5685 	private int forceTrimRight=-1;
5686 
5687 	private boolean qTrimLeft=false;
5688 	private boolean qTrimRight=false;
5689 	private float trimQ=6;
5690 	/** Error rate for trimming (derived from trimq) */
5691 	private final float trimE;
5692 
5693 	int maxEdits=0;
5694 	int maxSubs=0;
5695 	int bandwidth=9;
5696 	final boolean customBandwidth;
5697 	float minIdentity=100;
5698 	float minIdentityMult=0;
5699 	float minLengthPercent=0;
5700 	int minOverlapCluster=100;
5701 	int minOverlapMerge=1;
5702 	float minOverlapPercentCluster=0;
5703 	float minOverlapPercentMerge=0;
5704 
5705 	private int minClusterSize=1;
5706 	private int minClusterSizeForStats=1;
5707 	private boolean pickBestRepresentativePerCluster=false;
5708 
5709 	long readsProcessed=0;
5710 	long basesProcessed=0;
5711 	long collisions=0;
5712 	long containments=0;
5713 	long baseContainments=0;
5714 	long containmentCollisions=0;
5715 	long matches=0;
5716 	long baseMatches=0;
5717 	long overlaps=0;
5718 	long baseOverlaps=0;
5719 	long overlapCollisions=0;
5720 	long addedToMain=0;
5721 
5722 	private final int subset;
5723 	private final int subsetCount;
5724 	private final boolean subsetMode;
5725 
5726 	private final int k;
5727 	private final int k2;
5728 	private final boolean EA=Shared.EA();
5729 
5730 	/*--------------------------------------------------------------*/
5731 	/*----------------         Static Fields        ----------------*/
5732 	/*--------------------------------------------------------------*/
5733 
5734 	private static ReadComparator comparator=ReadLengthComparator.comparator;
5735 
5736 	private static int tcount=0;
5737 
5738 	private LinkedHashMap<Long, ArrayList<Unit>> codeMap=new LinkedHashMap<Long, ArrayList<Unit>>(4000000);
5739 	private HashMap<LongM, ArrayList<Unit>> affixMap1=null;
5740 	private HashMap<LongM, ArrayList<Unit>> affixMap2=null;
5741 	private HashMap<LongM, ArrayList<Unit>>[] affixMaps=null;
5742 	private ArrayDeque<ArrayList<Unit>> clusterQueue=null;
5743 	private ArrayList<ArrayList<Unit>> processedClusters=null;
5744 	private AtomicIntegerArray clusterNumbers=null;
5745 
5746 	private static final UnitOffsetComparator UNIT_OFFSET_COMPARATOR=new UnitOffsetComparator();
5747 	private static final ClusterLengthComparator CLUSTER_LENGTH_COMPARATOR=new ClusterLengthComparator();
5748 	private static final long[][] hashcodes=makeCodes2(32);
5749 	public static final byte[] baseToNumber=new byte[128];
5750 	public static final byte[] baseToComplementNumber=new byte[128];
5751 	public static final byte[] baseToComplementExtended=new byte[128];
5752 	public static final int nmerLength=4;
5753 	public static final int[] nmerIndex=makeNmerIndex(nmerLength);
5754 	public static final int maxNmer=Tools.max(nmerIndex);
5755 	private static PrintStream outstream=System.err;
5756 	/** Permission to overwrite existing files */
5757 	public static boolean overwrite=false;
5758 	/** Permission to append to existing files */
5759 	public static boolean append=false;
5760 	public static boolean showSpeed=true;
5761 	public static boolean verbose=false;
5762 	public static boolean ignoreReverseComplement=false;
5763 	public static boolean preventTransitiveOverlaps=false;
5764 	public static boolean ignoreAffix1=false;
5765 	public static boolean parseDepth=false;
5766 	public static boolean printLengthInEdges=false;
5767 	public static float depthRatio=2;
5768 	public static int MINSCAF=0;
5769 	public static int THREADS=Shared.threads();
5770 	public static int threadMaxReadsToBuffer=4000;
5771 	public static int threadMaxBasesToBuffer=32000000;
5772 	public static boolean DISPLAY_PROGRESS=true;
5773 	public static boolean UNIQUE_ONLY=false;
5774 	public static boolean REQUIRE_MATCHING_NAMES=false;
5775 	public static boolean NUMBER_GRAPH_NODES=true;
5776 	public static boolean ADD_PAIRNUM_TO_NAME=true;
5777 	public static boolean HASH_NS=false;
5778 
5779 	private static int reverseType(int type){return (type+2)%4;}
5780 	public static final int FORWARD=0;
5781 	public static final int FORWARDRC=1;
5782 	public static final int REVERSE=2;
5783 	public static final int REVERSERC=3;
5784 	public static final String[] OVERLAP_TYPE_NAMES=new String[] {"FORWARD", "FORWARDRC", "REVERSE", "REVERSERC"};
5785 	public static final String[] OVERLAP_TYPE_ABBREVIATIONS=new String[] {"F", "FRC", "R", "RRC"};
5786 
5787 	static{//All others are 0
5788 		baseToNumber['A']=baseToNumber['a']=0;
5789 		baseToNumber['C']=baseToNumber['c']=1;
5790 		baseToNumber['G']=baseToNumber['g']=2;
5791 		baseToNumber['T']=baseToNumber['t']=3;
5792 		baseToNumber['U']=baseToNumber['u']=3;
5793 
5794 		baseToComplementNumber['A']=baseToComplementNumber['a']=3;
5795 		baseToComplementNumber['C']=baseToComplementNumber['c']=2;
5796 		baseToComplementNumber['G']=baseToComplementNumber['g']=1;
5797 		baseToComplementNumber['T']=baseToComplementNumber['t']=0;
5798 		baseToComplementNumber['U']=baseToComplementNumber['u']=0;
5799 
5800 		for(int i=0; i<AminoAcid.baseToComplementExtended.length; i++){
5801 			byte b=AminoAcid.baseToComplementExtended[i];
5802 			baseToComplementExtended[i]=(b<0 ? (byte)i : b);
5803 		}
5804 	}
5805 
5806 }
5807