1 package sketch; 2 3 import java.io.File; 4 import java.io.PrintStream; 5 import java.util.ArrayList; 6 7 import dna.AminoAcid; 8 import fileIO.ByteFile; 9 import fileIO.ByteFile1; 10 import fileIO.ByteFile2; 11 import fileIO.ByteStreamWriter; 12 import fileIO.FileFormat; 13 import fileIO.ReadWrite; 14 import shared.Parse; 15 import shared.Parser; 16 import shared.PreParser; 17 import shared.ReadStats; 18 import shared.Shared; 19 import shared.Timer; 20 import shared.Tools; 21 import stream.ConcurrentGenericReadInputStream; 22 import stream.ConcurrentReadInputStream; 23 import stream.FASTQ; 24 import stream.FastaReadInputStream; 25 import stream.Read; 26 import structures.ListNum; 27 import structures.LongHashSet; 28 29 /** 30 * @author Brian Bushnell 31 * @date Oct 17, 2014 32 * 33 */ 34 public class InvertKey extends SketchObject { 35 main(String[] args)36 public static void main(String[] args){ 37 Timer t=new Timer(); 38 InvertKey x=new InvertKey(args); 39 x.process(t); 40 41 //Close the print stream if it was redirected 42 Shared.closeStream(x.outstream); 43 } 44 InvertKey(String[] args)45 public InvertKey(String[] args){ 46 47 {//Preparse block for help, config files, and outstream 48 PreParser pp=new PreParser(args, getClass(), false); 49 args=pp.args; 50 outstream=pp.outstream; 51 } 52 53 Shared.capBuffers(4); 54 ReadWrite.USE_PIGZ=ReadWrite.USE_UNPIGZ=true; 55 ReadWrite.MAX_ZIP_THREADS=Shared.threads(); 56 57 FASTQ.TEST_INTERLEAVED=FASTQ.FORCE_INTERLEAVED=false; 58 int k_=32, k2_=0; 59 60 Parser parser=new Parser(); 61 for(int i=0; i<args.length; i++){ 62 String arg=args[i]; 63 String[] split=arg.split("="); 64 String a=split[0].toLowerCase(); 65 String b=split.length>1 ? split[1] : null; 66 67 if(a.equals("verbose")){ 68 verbose=Parse.parseBoolean(b); 69 ByteFile1.verbose=verbose; 70 ByteFile2.verbose=verbose; 71 stream.FastaReadInputStream.verbose=verbose; 72 ConcurrentGenericReadInputStream.verbose=verbose; 73 stream.FastqReadInputStream.verbose=verbose; 74 ReadWrite.verbose=verbose; 75 }else if(a.equals("key")){ 76 keyString=b; 77 }else if(a.equals("out")){ 78 out1=b; 79 }else if(a.equalsIgnoreCase("k")){ 80 assert(b!=null) : "Bad parameter: "+arg; 81 if(b.indexOf(',')>=0){ 82 String[] bsplit=b.split(","); 83 assert(bsplit.length==2) : "Bad argument "+arg; 84 int x=Integer.parseInt(bsplit[0]); 85 int y=Integer.parseInt(bsplit[1]); 86 k_=Tools.max(x, y); 87 k2_=Tools.min(x, y); 88 if(k_==k2_){k2_=0;} 89 }else{ 90 k_=Integer.parseInt(b); 91 k2_=0; 92 } 93 }else if(a.equalsIgnoreCase("printonce")){ 94 printOnce=Parse.parseBoolean(b); 95 }else if(parser.in1==null && i==0 && !arg.contains("=") && (arg.toLowerCase().startsWith("stdin") || new File(arg).exists())){ 96 parser.in1=arg; 97 }else if(parser.out1==null && i==1 && !arg.contains("=")){ 98 out1=arg; 99 }else if(parser.parse(arg, a, b)){ 100 //do nothing 101 }else{ 102 outstream.println("Unknown parameter "+args[i]); 103 assert(false) : "Unknown parameter "+args[i]; 104 // throw new RuntimeException("Unknown parameter "+args[i]); 105 } 106 } 107 108 k=k_; 109 k2=k2_; 110 shift=2*k; 111 shift2=shift-2; 112 mask=(shift>63 ? -1L : ~((-1L)<<shift)); //Conditional allows K=32 113 114 {//Process parser fields 115 Parser.processQuality(); 116 117 maxReads=parser.maxReads; 118 119 overwrite=ReadStats.overwrite=parser.overwrite; 120 append=ReadStats.append=parser.append; 121 122 in1=parser.in1; 123 } 124 125 assert(FastaReadInputStream.settingsOK()); 126 127 if(in1==null){throw new RuntimeException("Error - at least one input file is required.");} 128 if(!ByteFile.FORCE_MODE_BF1 && !ByteFile.FORCE_MODE_BF2){ 129 ByteFile.FORCE_MODE_BF2=false; 130 ByteFile.FORCE_MODE_BF1=true; 131 } 132 133 if(out1!=null && out1.equalsIgnoreCase("null")){out1=null;} 134 135 if(!Tools.testOutputFiles(overwrite, append, false, out1)){ 136 outstream.println((out1==null)+", "+out1); 137 throw new RuntimeException("\n\noverwrite="+overwrite+"; Can't write to output files "+out1+"\n"); 138 } 139 140 ffout1=FileFormat.testOutput(out1, FileFormat.FASTA, null, true, overwrite, append, false); 141 142 ffin1=FileFormat.testInput(in1, FileFormat.FASTA, null, true, true); 143 144 SketchObject.postParse(); 145 146 if(keyString.indexOf(',')>0){ 147 String[] split=keyString.split(","); 148 set=new LongHashSet(split.length*2); 149 for(String s : split){ 150 long x=Long.MAX_VALUE-Sketch.parseA48(s); 151 set.add(x); 152 // assert(set.contains(x)) : x+", "+set.size()+", "+set.toStringListView(); 153 } 154 key0=-1; 155 // System.err.println(set.toStringListView()+", "+set.size()); 156 assert(!set.isEmpty()); 157 }else if(keyString.endsWith(".sketch")){ 158 SketchTool tool=new SketchTool(10000, 0, false, false); 159 Sketch sk=tool.loadSketchesFromFile(keyString, null, 0, 1000000, SketchObject.ONE_SKETCH, 1f, 0f, 0f, (byte)0, false).get(0); 160 set=new LongHashSet(sk.length()*2); 161 for(long x : sk.keys){set.add(Long.MAX_VALUE-x);} 162 key0=-1; 163 // System.err.println(set.toStringListView()+", "+set.size()); 164 assert(!set.isEmpty()); 165 }else{ 166 key0=Long.MAX_VALUE-Sketch.parseA48(keyString); 167 set=null; 168 // System.err.println(key0); 169 } 170 } 171 process(Timer t)172 void process(Timer t){ 173 174 final ConcurrentReadInputStream cris; 175 { 176 cris=ConcurrentReadInputStream.getReadInputStream(maxReads, true, ffin1, null, null, null); 177 cris.start(); 178 if(verbose){outstream.println("Started cris");} 179 } 180 boolean paired=cris.paired(); 181 // if(verbose){ 182 if(!ffin1.samOrBam()){outstream.println("Input is being processed as "+(paired ? "paired" : "unpaired"));} 183 // } 184 185 final ByteStreamWriter bsw; 186 if(out1!=null){ 187 fasta=ffout1.fasta() && !out1.endsWith(".txt"); 188 bsw=new ByteStreamWriter(ffout1); 189 bsw.start(); 190 }else{bsw=null;} 191 192 long readsProcessed=0; 193 long basesProcessed=0; 194 boolean finished=false; 195 196 { 197 198 ListNum<Read> ln=cris.nextList(); 199 ArrayList<Read> reads=(ln!=null ? ln.list : null); 200 201 // outstream.println("Fetched "+reads); 202 203 if(reads!=null && !reads.isEmpty()){ 204 Read r=reads.get(0); 205 assert((ffin1==null || ffin1.samOrBam()) || (r.mate!=null)==cris.paired()); 206 } 207 208 while(reads!=null && reads.size()>0 && !finished){ 209 210 for(int idx=0; idx<reads.size() && !finished; idx++){ 211 final Read r1=reads.get(idx); 212 213 finished=invert(key0, r1, bsw); 214 215 final int initialLength1=r1.length(); 216 217 readsProcessed++; 218 basesProcessed+=initialLength1; 219 } 220 221 cris.returnList(ln); 222 ln=cris.nextList(); 223 reads=(ln!=null ? ln.list : null); 224 } 225 if(ln!=null){ 226 cris.returnList(ln.id, ln.list==null || ln.list.isEmpty()); 227 } 228 } 229 230 errorState|=(ReadWrite.closeStream(cris)); 231 if(bsw!=null){errorState|=bsw.poisonAndWait();} 232 233 t.stop(); 234 outstream.println(Tools.timeReadsBasesProcessed(t, readsProcessed, basesProcessed, 8)); 235 236 if(errorState && !finished && maxReads<1){ 237 throw new RuntimeException(getClass().getName()+" terminated in an error state; the output may be corrupt."); 238 } 239 } 240 invert(long key2, Read r, ByteStreamWriter bsw)241 private boolean invert(long key2, Read r, ByteStreamWriter bsw) { 242 final byte[] bases=r.bases; 243 244 long kmer=0; 245 long rkmer=0; 246 int len=0; 247 248 249 // System.err.println("Looking for "+key+"\t"+Sketch.toA48(key)+"\t"+Sketch.toA48(Long.MAX_VALUE-key)); 250 251 for(int i=0; i<bases.length; i++){ 252 byte b=bases[i]; 253 long x=AminoAcid.baseToNumber[b]; 254 long x2=AminoAcid.baseToComplementNumber[b]; 255 kmer=((kmer<<2)|x)&mask; 256 rkmer=((rkmer>>>2)|(x2<<shift2))&mask; 257 if(x<0){len=0; rkmer=0;}else{len++;} 258 if(len>=k){ 259 kmersProcessed++; 260 final long hashcode=hash(kmer, rkmer); 261 boolean found=(key0>=0 ? hashcode==key0 : set.contains(hashcode)); 262 if(found){ 263 if(fasta){bsw.println(">"+Sketch.toA48(Long.MAX_VALUE-hashcode)+" "+(i-k+1)+" "+r.id);} 264 bsw.println(AminoAcid.kmerToString(Tools.min(kmer, rkmer), k)); 265 if(printOnce){ 266 if(key0>=0){return true;} 267 else{ 268 set.remove(hashcode); 269 return set.isEmpty(); 270 } 271 } 272 } 273 } 274 } 275 return false; 276 } 277 278 /*--------------------------------------------------------------*/ 279 280 final long key0; 281 final LongHashSet set; 282 283 final int shift; 284 final int shift2; 285 final long mask; 286 287 boolean printOnce=true; 288 long kmersProcessed=0; 289 290 private String in1=null; 291 boolean fasta; 292 boolean sketch; 293 private String keyString=null; 294 295 private String out1="stdout.fa"; 296 297 /*--------------------------------------------------------------*/ 298 299 private long maxReads=-1; 300 301 /*--------------------------------------------------------------*/ 302 303 private final FileFormat ffin1; 304 305 private final FileFormat ffout1; 306 307 308 /*--------------------------------------------------------------*/ 309 310 private PrintStream outstream=System.err; 311 public static boolean verbose=false; 312 public boolean errorState=false; 313 private boolean overwrite=false; 314 private boolean append=false; 315 316 } 317