1 package jgi;
2 
3 import java.util.ArrayList;
4 import java.util.Locale;
5 
6 import align2.BandedAligner;
7 import fileIO.FileFormat;
8 import fileIO.ReadWrite;
9 import fileIO.TextStreamWriter;
10 import shared.Parse;
11 import shared.Parser;
12 import shared.PreParser;
13 import shared.Shared;
14 import shared.Timer;
15 import shared.Tools;
16 import stream.ConcurrentCollectionReadInputStream;
17 import stream.ConcurrentReadInputStream;
18 import stream.FASTQ;
19 import stream.Read;
20 import structures.ListNum;
21 
22 /**
23  * Calculates an all-to-all identity matrix.
24  * @author Brian Bushnell
25  * @date Nov 23, 2014
26  *
27  */
28 public class IdentityMatrix {
29 
main(String[] args)30 	public static void main(String[] args){
31 		Timer t=new Timer();
32 		IdentityMatrix x=new IdentityMatrix(args);
33 		x.process(t);
34 
35 		//Close the print stream if it was redirected
36 		Shared.closeStream(x.outstream);
37 	}
38 
IdentityMatrix(String[] args)39 	public IdentityMatrix(String[] args){
40 
41 		{//Preparse block for help, config files, and outstream
42 			PreParser pp=new PreParser(args, getClass(), false);
43 			args=pp.args;
44 			outstream=pp.outstream;
45 		}
46 
47 		FileFormat.PRINT_WARNING=false;
48 		int maxEdits_=-1;
49 		int maxWidth_=-1;
50 
51 		Parser parser=new Parser();
52 		for(int i=0; i<args.length; i++){
53 			String arg=args[i];
54 			String[] split=arg.split("=");
55 			String a=split[0].toLowerCase();
56 			String b=split.length>1 ? split[1] : null;
57 
58 			if(parser.parse(arg, a, b)){
59 				//do nothing
60 			}else if(a.equals("parse_flag_goes_here")){
61 				//Set a variable here
62 			}else if(a.equals("edits") || a.equals("maxedits")){
63 				maxEdits_=Integer.parseInt(b);
64 			}else if(a.equals("width") || a.equals("maxwidth")){
65 				maxWidth_=Integer.parseInt(b);
66 			}else if(a.equals("percent")){
67 				percent=Parse.parseBoolean(b);
68 			}else{
69 				outstream.println("Unknown parameter "+args[i]);
70 				assert(false) : "Unknown parameter "+args[i];
71 				//				throw new RuntimeException("Unknown parameter "+args[i]);
72 			}
73 		}
74 
75 		{//Process parser fields
76 			Parser.processQuality();
77 
78 			maxReads=parser.maxReads;
79 			in1=parser.in1;
80 			out1=parser.out1;
81 		}
82 		FASTQ.FORCE_INTERLEAVED=false;
83 		FASTQ.TEST_INTERLEAVED=false;
84 
85 		maxEdits=maxEdits_==-1 ? BandedAligner.big : maxEdits_;
86 		maxWidth=maxWidth_==-1 ? (int)(Tools.min(maxEdits, BandedAligner.big)*2L+1) : maxWidth_;
87 
88 		ffout1=FileFormat.testOutput(out1, FileFormat.TXT, null, true, true, false, false);
89 		ffin1=FileFormat.testInput(in1, FileFormat.FASTQ, null, true, true);
90 	}
91 
process(Timer t)92 	void process(Timer t){
93 
94 		allReads=load();
95 		Shared.setBufferLen(4);
96 		ConcurrentCollectionReadInputStream cris=new ConcurrentCollectionReadInputStream(allReads, null, -1);
97 		cris.start(); //4567
98 
99 
100 		ArrayList<ProcessThread> threads=new ArrayList<ProcessThread>();
101 		final int tmax=Tools.max(Shared.threads(), 1);
102 		for(int i=0; i<tmax; i++){
103 			threads.add(new ProcessThread(cris));
104 		}
105 		for(ProcessThread pt : threads){pt.start();}
106 		for(ProcessThread pt : threads){
107 			while(pt.getState()!=Thread.State.TERMINATED){
108 				try {
109 					pt.join();
110 				} catch (InterruptedException e) {
111 					// TODO Auto-generated catch block
112 					e.printStackTrace();
113 				}
114 			}
115 		}
116 		ReadWrite.closeStreams(cris);
117 
118 		final int numReads=allReads.size();
119 		for(int i=1; i<numReads; i++){
120 			Read r1=allReads.get(i);
121 			assert(r1.numericID==i);
122 			for(int j=0; j<i; j++){
123 				Read r2=allReads.get(j);
124 				assert(r2.numericID==j);
125 				((float[])r2.obj)[i]=((float[])r1.obj)[j];
126 			}
127 		}
128 
129 		if(ffout1!=null){
130 			TextStreamWriter tsw=new TextStreamWriter(ffout1);
131 			tsw.start();
132 			for(Read r : allReads){
133 				float[] obj=(float[])r.obj;
134 				tsw.print(r.id);
135 				if(percent){
136 					for(float f : obj){
137 						tsw.print(String.format(Locale.ROOT, "\t%.2f", f));
138 					}
139 				}else{
140 					for(float f : obj){
141 						tsw.print(String.format(Locale.ROOT, "\t%.4f", f));
142 					}
143 				}
144 				tsw.print("\n");
145 				r.obj=null;
146 			}
147 			tsw.poisonAndWait();
148 		}
149 
150 		t.stop();
151 		outstream.println("Total Time:                   \t"+t);
152 		outstream.println("Reads Processed:    "+allReads.size()+" \t"+String.format(Locale.ROOT, "%.2fk alignments/sec", (allReads.size()*(long)(allReads.size())/(double)(t.elapsed))*1000000));
153 		outstream.println("Min Similarity:     "+String.format(Locale.ROOT, "%.5f", minID));
154 		outstream.println("Max Similarity:     "+String.format(Locale.ROOT, "%.5f", maxID));
155 		outstream.println("Avg Similarity:     "+String.format(Locale.ROOT, "%.5f", avgID));
156 	}
157 
load()158 	private ArrayList<Read> load(){
159 		Timer t=new Timer();
160 		final ConcurrentReadInputStream cris;
161 		{
162 			cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, null);
163 			if(verbose){outstream.println("Started cris");}
164 			cris.start(); //4567
165 		}
166 		boolean paired=cris.paired();
167 		assert(!paired) : "This program is not designed for paired reads.";
168 
169 		long readsProcessed=0;
170 		int maxLen=0;
171 		ArrayList<Read> bigList=new ArrayList<Read>();
172 		{
173 
174 			ListNum<Read> ln=cris.nextList();
175 			ArrayList<Read> reads=(ln!=null ? ln.list : null);
176 
177 			if(reads!=null && !reads.isEmpty()){
178 				Read r=reads.get(0);
179 				assert((ffin1==null || ffin1.samOrBam()) || (r.mate!=null)==cris.paired());
180 			}
181 
182 			while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
183 				if(verbose){outstream.println("Fetched "+reads.size()+" reads.");}
184 
185 				for(int idx=0; idx<reads.size(); idx++){
186 					final Read r1=reads.get(idx);
187 
188 					bigList.add(r1);
189 					maxLen=Tools.max(maxLen, r1.length());
190 
191 					readsProcessed++;
192 				}
193 
194 				cris.returnList(ln);
195 				if(verbose){outstream.println("Returned a list.");}
196 				ln=cris.nextList();
197 				reads=(ln!=null ? ln.list : null);
198 			}
199 			if(ln!=null){
200 				cris.returnList(ln.id, ln.list==null || ln.list.isEmpty());
201 			}
202 		}
203 		ReadWrite.closeStreams(cris);
204 		if(verbose){outstream.println("Finished loading "+readsProcessed+" sequences.");}
205 
206 		longestSequence=maxLen;
207 
208 		t.stop();
209 		outstream.println("Load Time:                    \t"+t);
210 
211 		return bigList;
212 	}
213 
214 	/*--------------------------------------------------------------*/
215 
216 	private class ProcessThread extends Thread {
217 
ProcessThread(ConcurrentReadInputStream cris_)218 		ProcessThread(ConcurrentReadInputStream cris_){
219 			cris=cris_;
220 			maxEdits2=Tools.min(maxEdits, longestSequence);
221 			int width=Tools.min(maxEdits2*2+1, maxWidth);
222 			bandy=BandedAligner.makeBandedAligner(width);
223 		}
224 
225 		@Override
run()226 		public void run(){
227 			final int numReads=allReads.size();
228 			ListNum<Read> ln=cris.nextList();
229 			ArrayList<Read> reads=(ln!=null ? ln.list : null);
230 
231 			if(reads!=null && !reads.isEmpty()){
232 				Read r=reads.get(0);
233 				assert((ffin1==null || ffin1.samOrBam()) || (r.mate!=null)==cris.paired());
234 			}
235 
236 			double sum=0;
237 			long compares=0;
238 
239 			while(ln!=null && reads!=null && reads.size()>0){//ln!=null prevents a compiler potential null access warning
240 				if(verbose){outstream.println("Fetched "+reads.size()+" reads.");}
241 
242 				for(int idx=0; idx<reads.size(); idx++){
243 					final Read r1=reads.get(idx);
244 					float[] obj=new float[numReads];
245 					r1.obj=obj;
246 					for(Read r2 : allReads){
247 						if(r2.numericID>r1.numericID){break;}
248 //						int edits=bandy.alignQuadruple(r1.bases, r2.bases, maxEdits2, false);
249 						int edits=bandy.alignQuadrupleProgressive(r1.bases, r2.bases, 10, maxEdits2, false);
250 						System.err.println(r1.id+"->"+r2.id+": Edits="+edits);
251 						float editRate=edits/(float)Tools.max(r1.length(), r2.length());
252 						float similarity=1-editRate;
253 						if(r1!=r2){
254 							compares++;
255 							sum+=similarity;
256 							minID=Tools.min(minID, similarity);
257 							maxID=Tools.max(maxID, similarity);
258 						}
259 						if(percent){
260 							float id=100*similarity;
261 							obj[(int)r2.numericID]=id;
262 						}else{
263 							obj[(int)r2.numericID]=similarity;
264 						}
265 					}
266 				}
267 
268 				cris.returnList(ln);
269 				if(verbose){outstream.println("Returned a list.");}
270 				ln=cris.nextList();
271 				reads=(ln!=null ? ln.list : null);
272 			}
273 			if(ln!=null){
274 				cris.returnList(ln.id, ln.list==null || ln.list.isEmpty());
275 			}
276 
277 			avgID=sum/compares;
278 		}
279 
280 		private final ConcurrentReadInputStream cris;
281 		private final BandedAligner bandy;
282 		private final int maxEdits2;
283 	}
284 
285 	/*--------------------------------------------------------------*/
286 
287 	/*--------------------------------------------------------------*/
288 
289 	private String in1=null;
290 	private String out1=null;
291 
292 	private final FileFormat ffin1;
293 	private final FileFormat ffout1;
294 	private boolean percent=false;
295 
296 	private ArrayList<Read> allReads;
297 
298 	/*--------------------------------------------------------------*/
299 
300 	private long maxReads=-1;
301 	private final int maxEdits;
302 	private final int maxWidth;
303 	private int longestSequence;
304 
305 	private double minID=1, maxID=0, avgID=0;
306 
307 	/*--------------------------------------------------------------*/
308 
309 	private java.io.PrintStream outstream=System.err;
310 	public static boolean verbose=false;
311 
312 }
313