1 package jgi; 2 3 import java.util.ArrayList; 4 import java.util.Locale; 5 6 import align2.BandedAligner; 7 import fileIO.FileFormat; 8 import fileIO.ReadWrite; 9 import fileIO.TextStreamWriter; 10 import shared.Parse; 11 import shared.Parser; 12 import shared.PreParser; 13 import shared.Shared; 14 import shared.Timer; 15 import shared.Tools; 16 import stream.ConcurrentCollectionReadInputStream; 17 import stream.ConcurrentReadInputStream; 18 import stream.FASTQ; 19 import stream.Read; 20 import structures.ListNum; 21 22 /** 23 * Calculates an all-to-all identity matrix. 24 * @author Brian Bushnell 25 * @date Nov 23, 2014 26 * 27 */ 28 public class IdentityMatrix { 29 main(String[] args)30 public static void main(String[] args){ 31 Timer t=new Timer(); 32 IdentityMatrix x=new IdentityMatrix(args); 33 x.process(t); 34 35 //Close the print stream if it was redirected 36 Shared.closeStream(x.outstream); 37 } 38 IdentityMatrix(String[] args)39 public IdentityMatrix(String[] args){ 40 41 {//Preparse block for help, config files, and outstream 42 PreParser pp=new PreParser(args, getClass(), false); 43 args=pp.args; 44 outstream=pp.outstream; 45 } 46 47 FileFormat.PRINT_WARNING=false; 48 int maxEdits_=-1; 49 int maxWidth_=-1; 50 51 Parser parser=new Parser(); 52 for(int i=0; i<args.length; i++){ 53 String arg=args[i]; 54 String[] split=arg.split("="); 55 String a=split[0].toLowerCase(); 56 String b=split.length>1 ? split[1] : null; 57 58 if(parser.parse(arg, a, b)){ 59 //do nothing 60 }else if(a.equals("parse_flag_goes_here")){ 61 //Set a variable here 62 }else if(a.equals("edits") || a.equals("maxedits")){ 63 maxEdits_=Integer.parseInt(b); 64 }else if(a.equals("width") || a.equals("maxwidth")){ 65 maxWidth_=Integer.parseInt(b); 66 }else if(a.equals("percent")){ 67 percent=Parse.parseBoolean(b); 68 }else{ 69 outstream.println("Unknown parameter "+args[i]); 70 assert(false) : "Unknown parameter "+args[i]; 71 // throw new RuntimeException("Unknown parameter "+args[i]); 72 } 73 } 74 75 {//Process parser fields 76 Parser.processQuality(); 77 78 maxReads=parser.maxReads; 79 in1=parser.in1; 80 out1=parser.out1; 81 } 82 FASTQ.FORCE_INTERLEAVED=false; 83 FASTQ.TEST_INTERLEAVED=false; 84 85 maxEdits=maxEdits_==-1 ? BandedAligner.big : maxEdits_; 86 maxWidth=maxWidth_==-1 ? (int)(Tools.min(maxEdits, BandedAligner.big)*2L+1) : maxWidth_; 87 88 ffout1=FileFormat.testOutput(out1, FileFormat.TXT, null, true, true, false, false); 89 ffin1=FileFormat.testInput(in1, FileFormat.FASTQ, null, true, true); 90 } 91 process(Timer t)92 void process(Timer t){ 93 94 allReads=load(); 95 Shared.setBufferLen(4); 96 ConcurrentCollectionReadInputStream cris=new ConcurrentCollectionReadInputStream(allReads, null, -1); 97 cris.start(); //4567 98 99 100 ArrayList<ProcessThread> threads=new ArrayList<ProcessThread>(); 101 final int tmax=Tools.max(Shared.threads(), 1); 102 for(int i=0; i<tmax; i++){ 103 threads.add(new ProcessThread(cris)); 104 } 105 for(ProcessThread pt : threads){pt.start();} 106 for(ProcessThread pt : threads){ 107 while(pt.getState()!=Thread.State.TERMINATED){ 108 try { 109 pt.join(); 110 } catch (InterruptedException e) { 111 // TODO Auto-generated catch block 112 e.printStackTrace(); 113 } 114 } 115 } 116 ReadWrite.closeStreams(cris); 117 118 final int numReads=allReads.size(); 119 for(int i=1; i<numReads; i++){ 120 Read r1=allReads.get(i); 121 assert(r1.numericID==i); 122 for(int j=0; j<i; j++){ 123 Read r2=allReads.get(j); 124 assert(r2.numericID==j); 125 ((float[])r2.obj)[i]=((float[])r1.obj)[j]; 126 } 127 } 128 129 if(ffout1!=null){ 130 TextStreamWriter tsw=new TextStreamWriter(ffout1); 131 tsw.start(); 132 for(Read r : allReads){ 133 float[] obj=(float[])r.obj; 134 tsw.print(r.id); 135 if(percent){ 136 for(float f : obj){ 137 tsw.print(String.format(Locale.ROOT, "\t%.2f", f)); 138 } 139 }else{ 140 for(float f : obj){ 141 tsw.print(String.format(Locale.ROOT, "\t%.4f", f)); 142 } 143 } 144 tsw.print("\n"); 145 r.obj=null; 146 } 147 tsw.poisonAndWait(); 148 } 149 150 t.stop(); 151 outstream.println("Total Time: \t"+t); 152 outstream.println("Reads Processed: "+allReads.size()+" \t"+String.format(Locale.ROOT, "%.2fk alignments/sec", (allReads.size()*(long)(allReads.size())/(double)(t.elapsed))*1000000)); 153 outstream.println("Min Similarity: "+String.format(Locale.ROOT, "%.5f", minID)); 154 outstream.println("Max Similarity: "+String.format(Locale.ROOT, "%.5f", maxID)); 155 outstream.println("Avg Similarity: "+String.format(Locale.ROOT, "%.5f", avgID)); 156 } 157 load()158 private ArrayList<Read> load(){ 159 Timer t=new Timer(); 160 final ConcurrentReadInputStream cris; 161 { 162 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, null); 163 if(verbose){outstream.println("Started cris");} 164 cris.start(); //4567 165 } 166 boolean paired=cris.paired(); 167 assert(!paired) : "This program is not designed for paired reads."; 168 169 long readsProcessed=0; 170 int maxLen=0; 171 ArrayList<Read> bigList=new ArrayList<Read>(); 172 { 173 174 ListNum<Read> ln=cris.nextList(); 175 ArrayList<Read> reads=(ln!=null ? ln.list : null); 176 177 if(reads!=null && !reads.isEmpty()){ 178 Read r=reads.get(0); 179 assert((ffin1==null || ffin1.samOrBam()) || (r.mate!=null)==cris.paired()); 180 } 181 182 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning 183 if(verbose){outstream.println("Fetched "+reads.size()+" reads.");} 184 185 for(int idx=0; idx<reads.size(); idx++){ 186 final Read r1=reads.get(idx); 187 188 bigList.add(r1); 189 maxLen=Tools.max(maxLen, r1.length()); 190 191 readsProcessed++; 192 } 193 194 cris.returnList(ln); 195 if(verbose){outstream.println("Returned a list.");} 196 ln=cris.nextList(); 197 reads=(ln!=null ? ln.list : null); 198 } 199 if(ln!=null){ 200 cris.returnList(ln.id, ln.list==null || ln.list.isEmpty()); 201 } 202 } 203 ReadWrite.closeStreams(cris); 204 if(verbose){outstream.println("Finished loading "+readsProcessed+" sequences.");} 205 206 longestSequence=maxLen; 207 208 t.stop(); 209 outstream.println("Load Time: \t"+t); 210 211 return bigList; 212 } 213 214 /*--------------------------------------------------------------*/ 215 216 private class ProcessThread extends Thread { 217 ProcessThread(ConcurrentReadInputStream cris_)218 ProcessThread(ConcurrentReadInputStream cris_){ 219 cris=cris_; 220 maxEdits2=Tools.min(maxEdits, longestSequence); 221 int width=Tools.min(maxEdits2*2+1, maxWidth); 222 bandy=BandedAligner.makeBandedAligner(width); 223 } 224 225 @Override run()226 public void run(){ 227 final int numReads=allReads.size(); 228 ListNum<Read> ln=cris.nextList(); 229 ArrayList<Read> reads=(ln!=null ? ln.list : null); 230 231 if(reads!=null && !reads.isEmpty()){ 232 Read r=reads.get(0); 233 assert((ffin1==null || ffin1.samOrBam()) || (r.mate!=null)==cris.paired()); 234 } 235 236 double sum=0; 237 long compares=0; 238 239 while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning 240 if(verbose){outstream.println("Fetched "+reads.size()+" reads.");} 241 242 for(int idx=0; idx<reads.size(); idx++){ 243 final Read r1=reads.get(idx); 244 float[] obj=new float[numReads]; 245 r1.obj=obj; 246 for(Read r2 : allReads){ 247 if(r2.numericID>r1.numericID){break;} 248 // int edits=bandy.alignQuadruple(r1.bases, r2.bases, maxEdits2, false); 249 int edits=bandy.alignQuadrupleProgressive(r1.bases, r2.bases, 10, maxEdits2, false); 250 System.err.println(r1.id+"->"+r2.id+": Edits="+edits); 251 float editRate=edits/(float)Tools.max(r1.length(), r2.length()); 252 float similarity=1-editRate; 253 if(r1!=r2){ 254 compares++; 255 sum+=similarity; 256 minID=Tools.min(minID, similarity); 257 maxID=Tools.max(maxID, similarity); 258 } 259 if(percent){ 260 float id=100*similarity; 261 obj[(int)r2.numericID]=id; 262 }else{ 263 obj[(int)r2.numericID]=similarity; 264 } 265 } 266 } 267 268 cris.returnList(ln); 269 if(verbose){outstream.println("Returned a list.");} 270 ln=cris.nextList(); 271 reads=(ln!=null ? ln.list : null); 272 } 273 if(ln!=null){ 274 cris.returnList(ln.id, ln.list==null || ln.list.isEmpty()); 275 } 276 277 avgID=sum/compares; 278 } 279 280 private final ConcurrentReadInputStream cris; 281 private final BandedAligner bandy; 282 private final int maxEdits2; 283 } 284 285 /*--------------------------------------------------------------*/ 286 287 /*--------------------------------------------------------------*/ 288 289 private String in1=null; 290 private String out1=null; 291 292 private final FileFormat ffin1; 293 private final FileFormat ffout1; 294 private boolean percent=false; 295 296 private ArrayList<Read> allReads; 297 298 /*--------------------------------------------------------------*/ 299 300 private long maxReads=-1; 301 private final int maxEdits; 302 private final int maxWidth; 303 private int longestSequence; 304 305 private double minID=1, maxID=0, avgID=0; 306 307 /*--------------------------------------------------------------*/ 308 309 private java.io.PrintStream outstream=System.err; 310 public static boolean verbose=false; 311 312 } 313