1 package sketch; 2 3 import java.util.ArrayList; 4 import java.util.Collections; 5 import java.util.Comparator; 6 import java.util.Locale; 7 import java.util.Map.Entry; 8 9 import json.JsonObject; 10 import shared.Colors; 11 import shared.Parse; 12 import shared.Tools; 13 import structures.ByteBuilder; 14 import tax.PrintTaxonomy; 15 import tax.TaxFilter; 16 import tax.TaxNode; 17 import tax.TaxTree; 18 19 public class DisplayParams implements Cloneable { 20 21 @Override clone()22 public DisplayParams clone(){ 23 try { 24 DisplayParams copy=(DisplayParams)super.clone(); 25 if(taxFilterWhite!=null){ 26 copy.taxFilterWhite=taxFilterWhite.deepCopy(); 27 } 28 if(taxFilterBlack!=null){ 29 copy.taxFilterBlack=taxFilterBlack.deepCopy(); 30 } 31 copy.postParsed=false; 32 return copy; 33 } catch (CloneNotSupportedException e) { 34 // TODO Auto-generated catch block 35 e.printStackTrace(); 36 throw new RuntimeException(); 37 } 38 } 39 parseDoubleHeader(String s)40 public DisplayParams parseDoubleHeader(String s){ 41 if(!s.startsWith("##")){return this;} 42 // if(!s.startsWith("##")){return this.clone();} 43 StringBuilder sb=new StringBuilder(); 44 for(int i=2; i<s.length(); i++){ 45 char c=s.charAt(i); 46 if(c=='\n'){break;} 47 sb.append(c); 48 } 49 return parseDoubleHeaderLine(sb.toString()); 50 } 51 parseDoubleHeaderLine(String line)52 public DisplayParams parseDoubleHeaderLine(String line) { 53 if(line.startsWith("##")){line=line.substring(2);} 54 else{assert(!line.startsWith("#")) : line;} 55 if(line.length()<1){return this;} 56 57 DisplayParams params=this.clone(); 58 59 String[] args=line.split(" "); 60 for(String arg : args){ 61 String[] split=arg.split("="); 62 String a=split[0].toLowerCase(); 63 String b=split.length>1 ? split[1] : null; 64 if(b==null || b.equalsIgnoreCase("null")){b=null;} //Normally handled by PreParser, but not in this case. 65 while(a.startsWith("-")){a=a.substring(1);} //Strip leading hyphens 66 67 boolean x=params.parse(arg, a, b); 68 // assert(x) : "Unknown parameter "+arg+"\n"+line; 69 if(!x){System.err.println("Warning: Unknown parameter "+arg);} 70 } 71 if(SketchObject.verbose2){System.err.println("Made it to post-parse. taxFilterWhite="+params.taxFilterWhite);} 72 params.postParse(true, true); 73 if(SketchObject.verbose2){System.err.println("Passed post-parse. taxFilterWhite="+params.taxFilterWhite);} 74 75 return params; 76 } 77 parse(String arg, String a, String b)78 public boolean parse(String arg, String a, String b){ 79 80 if(a.equals("chunk")){ 81 chunkNum=Integer.parseInt(b); 82 }else if(a.equals("minhits") || a.equals("hits")){ 83 minHits=Integer.parseInt(b); 84 }else if(a.equalsIgnoreCase("minwkid") || a.equalsIgnoreCase("wkid")){ 85 minWKID=Float.parseFloat(b); 86 if(minWKID>1){minWKID/=100;} 87 assert(minWKID<=1) : "minWKID should between 0 and 1"; 88 }else if(a.equalsIgnoreCase("minid") || a.equalsIgnoreCase("id") || a.equalsIgnoreCase("minani") || a.equalsIgnoreCase("ani")){ 89 minANI=Float.parseFloat(b); 90 if(minANI>1){minANI/=100;} 91 assert(minANI<=1) : "minANI should between 0 and 1"; 92 if(minANI>0){ 93 minWKID=(float)Tools.max(minWKID, Comparison.aniToWkid(minANI, 32));//Lowest possible minWKID for this ANI 94 } 95 }else if(a.equals("minbases")){ 96 minBases=Integer.parseInt(b); 97 }else if(a.equals("minsizeratio")){ 98 minSizeRatio=Float.parseFloat(b); 99 // assert(minSizeRatio>=0f && minSizeRatio<=1.0f) : "\nminSizeRatio must be between 0 and 1, inclusive.\n"; 100 if(minSizeRatio>1){minSizeRatio=1f/minSizeRatio;} 101 }else if(a.equals("records") || a.equals("maxrecords") || a.equals("results")){ 102 maxRecords=Integer.parseInt(b); 103 assert(maxRecords>=1) : "Max records must be at least 1."; 104 }else if(a.equals("recordsperlevel")){ 105 recordsPerLevel=Integer.parseInt(b); 106 }else if(a.equals("format")){ 107 assert(b!=null) : "Invalid format: "+arg; 108 if(b.equalsIgnoreCase("json")){ 109 format=FORMAT_JSON; 110 }else if(b.equalsIgnoreCase("jsonarray")){ 111 format=FORMAT_JSON; 112 jsonArray=true; 113 }else if(b.equalsIgnoreCase("d3")){ 114 format=FORMAT_JSON; 115 printD3=true; 116 }else if(b.equalsIgnoreCase("constellation")){ 117 format=FORMAT_CONSTELLATION; 118 }else if(b.equalsIgnoreCase("3column") || b.equalsIgnoreCase("queryrefani")){ 119 format=FORMAT_QUERY_REF_ANI; 120 }else if(Tools.isDigit(b.charAt(0))){ 121 format=Integer.parseInt(b); 122 }else{ 123 assert(false) : "Invalid format: "+arg; 124 } 125 }else if(a.equalsIgnoreCase("json")){ 126 if(Parse.parseBoolean(b)){ 127 format=FORMAT_JSON; 128 }else{ 129 if(format==FORMAT_JSON){format=default_format;} 130 } 131 }else if(a.equalsIgnoreCase("jsonarray") || a.equalsIgnoreCase("jsonarrays")){ 132 if(Parse.parseBoolean(b)){ 133 format=FORMAT_JSON; 134 jsonArray=true; 135 }else{ 136 jsonArray=false; 137 } 138 }else if(a.equalsIgnoreCase("d3") || a.equalsIgnoreCase("printd3")){ 139 if(Parse.parseBoolean(b)){ 140 format=FORMAT_JSON; 141 printD3=true; 142 }else{ 143 printD3=false; 144 } 145 }else if(a.equalsIgnoreCase("jsonarray") || a.equalsIgnoreCase("jsonarrays")){ 146 if(Parse.parseBoolean(b)){ 147 jsonArray=true; 148 }else{ 149 jsonArray=false; 150 } 151 }else if(a.equalsIgnoreCase("d3levelnodes")){ 152 D3LevelNodes=Parse.parseBoolean(b); 153 }else if(a.equalsIgnoreCase("d3hitsize")){ 154 if(Parse.parseBoolean(b)){D3sizeMode=D3_HIT_SIZE;} 155 }else if(a.equalsIgnoreCase("d3anisize")){ 156 if(Parse.parseBoolean(b)){D3sizeMode=D3_ANI_SIZE;} 157 }else if(a.equalsIgnoreCase("d3wkidsize")){ 158 if(Parse.parseBoolean(b)){D3sizeMode=D3_WKID_SIZE;} 159 }else if(a.equalsIgnoreCase("d3depthsize")){ 160 if(Parse.parseBoolean(b)){ 161 D3sizeMode=D3_DEPTH_SIZE; 162 printDepth=true; 163 } 164 }else if(a.equalsIgnoreCase("d3kidsize")){ 165 if(Parse.parseBoolean(b)){D3sizeMode=D3_KID_SIZE;} 166 }else if(a.equalsIgnoreCase("D3sizeMode")){ 167 D3sizeMode=Integer.parseInt(b); 168 }else if(a.equals("level") || a.equals("lv") || a.equals("taxlevel") || a.equals("tl") || a.equals("minlevel")){ 169 taxLevel=TaxTree.parseLevel(b);//TODO: Change to extended 170 } 171 172 else if(a.equalsIgnoreCase("requireSSU")){ 173 requireSSU=Parse.parseBoolean(b); 174 } 175 176 else if(a.equalsIgnoreCase("minRefSizeEstimate") || a.equalsIgnoreCase("minRefSize")){ 177 minRefSizeEstimate=Long.parseLong(b); 178 }else if(a.equalsIgnoreCase("minRefSizeBases")){ 179 minRefSizeBases=Long.parseLong(b); 180 } 181 182 else if(a.equalsIgnoreCase("printtax") || a.equalsIgnoreCase("printtaxa")){ 183 printTax=Parse.parseBoolean(b); 184 }else if(a.equalsIgnoreCase("printssu") || a.equalsIgnoreCase("print16s") || a.equalsIgnoreCase("ssu")){ 185 printSSU=Parse.parseBoolean(b); 186 }else if(a.equalsIgnoreCase("printSSULen") || a.equalsIgnoreCase("print16slen") || a.equalsIgnoreCase("ssulen")){ 187 printSSULen=Parse.parseBoolean(b); 188 }else if(a.equalsIgnoreCase("printssusequence") || a.equalsIgnoreCase("print16ssequence")){ 189 printSSUSequence=Parse.parseBoolean(b); 190 }else if(a.equalsIgnoreCase("printqueryfilename") || a.equalsIgnoreCase("printqfname") || a.equalsIgnoreCase("printqfile") || a.equalsIgnoreCase("qfname")){ 191 printQueryFileName=Parse.parseBoolean(b); 192 }else if(a.equalsIgnoreCase("printreffilename") || a.equalsIgnoreCase("printrfname") || a.equalsIgnoreCase("printrfile") || a.equalsIgnoreCase("rfname")){ 193 printRefFileName=Parse.parseBoolean(b); 194 }else if(a.equalsIgnoreCase("printfilename") || a.equalsIgnoreCase("printfname") || a.equalsIgnoreCase("printfile")){ 195 printQueryFileName=printRefFileName=Parse.parseBoolean(b); 196 }else if(a.equalsIgnoreCase("printoriginalname") || a.equalsIgnoreCase("printseqname") || a.equalsIgnoreCase("printname0") || a.equals("pn0")){ 197 printOriginalName=Parse.parseBoolean(b); 198 }else if(a.equalsIgnoreCase("printimg")){ 199 printImg=Parse.parseBoolean(b); 200 }else if(a.equalsIgnoreCase("printcompleteness") || a.equalsIgnoreCase("completeness") || a.equalsIgnoreCase("printcomplt")){ 201 printCompleteness=Parse.parseBoolean(b); 202 }else if(a.equalsIgnoreCase("printani") || a.equalsIgnoreCase("ani")){ 203 printAni=Parse.parseBoolean(b); 204 }else if(a.equalsIgnoreCase("printkid") || a.equalsIgnoreCase("kid")){ 205 printKID=Parse.parseBoolean(b); 206 }else if(a.equalsIgnoreCase("printwkid") || a.equalsIgnoreCase("wkid")){ 207 printWKID=Parse.parseBoolean(b); 208 }else if(a.equalsIgnoreCase("printscore") || a.equalsIgnoreCase("score")){ 209 printScore=Parse.parseBoolean(b); 210 }else if(a.equalsIgnoreCase("printevalue") || a.equalsIgnoreCase("evalue")){ 211 printEValue=Parse.parseBoolean(b); 212 } 213 214 else if(a.equalsIgnoreCase("trackcounts")){ 215 trackCounts=Parse.parseBoolean(b); 216 }else if(a.equalsIgnoreCase("printdepth") || a.equalsIgnoreCase("depth")){ 217 printDepth=Parse.parseBoolean(b); 218 }else if(a.equalsIgnoreCase("printdepth2") || a.equalsIgnoreCase("depth2")){ 219 printDepth2=Parse.parseBoolean(b); 220 }else if(a.equalsIgnoreCase("actualdepth") || a.equalsIgnoreCase("printactualdepth")){ 221 printActualDepth=Parse.parseBoolean(b); 222 }else if(a.equalsIgnoreCase("printvolume") || a.equalsIgnoreCase("volume")){ 223 printVolume=Parse.parseBoolean(b); 224 }else if(a.equalsIgnoreCase("printavgrefhits") || a.equalsIgnoreCase("printrefhits") || a.equalsIgnoreCase("avgrefhits") || a.equalsIgnoreCase("refhits")){ 225 printRefHits=Parse.parseBoolean(b); 226 } 227 228 else if(a.equalsIgnoreCase("sortByDepth")){ 229 boolean x=Parse.parseBoolean(b); 230 if(x){comparator=Comparison.depthComparator;} 231 }else if(a.equalsIgnoreCase("sortByDepth2")){ 232 boolean x=Parse.parseBoolean(b); 233 if(x){comparator=Comparison.depth2Comparator;} 234 }else if(a.equalsIgnoreCase("sortByVolume")){ 235 boolean x=Parse.parseBoolean(b); 236 if(x){comparator=Comparison.volumeComparator;} 237 }else if(a.equalsIgnoreCase("sortByScore")){ 238 boolean x=Parse.parseBoolean(b); 239 if(x){comparator=Comparison.scoreComparator;} 240 } 241 else if(a.equalsIgnoreCase("sortByKID")){ 242 boolean x=Parse.parseBoolean(b); 243 if(x){comparator=Comparison.KIDComparator;} 244 }else if(a.equalsIgnoreCase("sortByWKID") || a.equalsIgnoreCase("sortByANI")){ 245 boolean x=Parse.parseBoolean(b); 246 if(x){comparator=Comparison.WKIDComparator;} 247 }else if(a.equalsIgnoreCase("sortBySSU") || a.equalsIgnoreCase("sortBy16S")){ 248 boolean x=Parse.parseBoolean(b); 249 if(x){comparator=Comparison.SSUComparator;} 250 }else if(a.equalsIgnoreCase("sortByHits") || a.equalsIgnoreCase("sortByMatches")){ 251 boolean x=Parse.parseBoolean(b); 252 if(x){comparator=Comparison.HitsComparator;} 253 } 254 255 else if(a.equalsIgnoreCase("printUMatches") || a.equalsIgnoreCase("printUHits") || a.equalsIgnoreCase("printUnique")){ 256 printUnique=Parse.parseBoolean(b); 257 }else if(a.equalsIgnoreCase("printUMatches2") || a.equalsIgnoreCase("printUnique2") || a.equalsIgnoreCase("unique2")){ 258 printUnique2=Parse.parseBoolean(b); 259 }else if(a.equalsIgnoreCase("printUMatches3") || a.equalsIgnoreCase("printUnique3") || a.equalsIgnoreCase("unique3")){ 260 printUnique3=Parse.parseBoolean(b); 261 }else if(a.equalsIgnoreCase("printUContam")){ 262 printUContam=Parse.parseBoolean(b); 263 }else if(a.equalsIgnoreCase("printNoHit")){ 264 printNoHit=Parse.parseBoolean(b); 265 }else if(a.equalsIgnoreCase("contamhits") || a.equalsIgnoreCase("contam") || a.equalsIgnoreCase("printcontam")){ 266 printContam=Parse.parseBoolean(b); 267 }else if(a.equalsIgnoreCase("contamhits2") || a.equalsIgnoreCase("contam2") || a.equalsIgnoreCase("printcontam2")){ 268 if(b==null || b.length()<1){ 269 printContam2=true; 270 }else if(Tools.isDigit(b.charAt(0)) || b.charAt(0)=='-'){ 271 contamLevel=Tools.max(0, TaxTree.levelToExtended(Integer.parseInt(b))); 272 printContam2=true; 273 }else if(TaxTree.levelMapExtendedContains(b)){ 274 contamLevel=TaxTree.stringToLevelExtended(b); 275 printContam2=true; 276 }else{ 277 printContam2=Parse.parseBoolean(b); 278 } 279 }else if(a.equalsIgnoreCase("contamLevel")){ 280 if(Tools.isDigit(b.charAt(0)) || b.charAt(0)=='-'){ 281 contamLevel=Tools.max(0, TaxTree.levelToExtended(Integer.parseInt(b))); 282 printContam2=true; 283 }else if(TaxTree.levelMapExtendedContains(b)){ 284 contamLevel=TaxTree.stringToLevelExtended(b); 285 printContam2=true; 286 } 287 } 288 289 else if(a.equalsIgnoreCase("reportAniOnly") || a.equalsIgnoreCase("AniOnly")){ 290 reportAniOnly=Parse.parseBoolean(b); 291 } 292 293 else if(a.equalsIgnoreCase("printMatches")){ 294 printMatches=Parse.parseBoolean(b); 295 }else if(a.equalsIgnoreCase("printLength")){ 296 printLength=Parse.parseBoolean(b); 297 }else if(a.equalsIgnoreCase("printTaxID")){ 298 printTaxID=Parse.parseBoolean(b); 299 }else if(a.equalsIgnoreCase("printGSize")){ 300 printGSize=Parse.parseBoolean(b); 301 }else if(a.equalsIgnoreCase("gSizeKMG")){ 302 gSizeKMG=Parse.parseBoolean(b); 303 }else if(a.equalsIgnoreCase("printGC")){ 304 printGC=Parse.parseBoolean(b); 305 }else if(a.equalsIgnoreCase("printGKmers")){ 306 printGKmers=Parse.parseBoolean(b); 307 }else if(a.equalsIgnoreCase("printCommonAncestor") || a.equalsIgnoreCase("printCA")){ 308 printCommonAncestor=Parse.parseBoolean(b); 309 }else if(a.equalsIgnoreCase("printCommonAncestorLevel") || a.equalsIgnoreCase("printCAL")){ 310 printCommonAncestorLevel=Parse.parseBoolean(b); 311 }else if(a.equalsIgnoreCase("printTaxName")){ 312 printTaxName=Parse.parseBoolean(b); 313 }else if(a.equalsIgnoreCase("printGSeqs")){ 314 printGSeqs=Parse.parseBoolean(b); 315 }else if(a.equalsIgnoreCase("printGBases")){ 316 printGBases=Parse.parseBoolean(b); 317 } 318 319 else if(a.equalsIgnoreCase("minEntropy") || a.equalsIgnoreCase("entropy") || a.equalsIgnoreCase("efilter")){ 320 minEntropy=Float.parseFloat(b); 321 }else if(a.equalsIgnoreCase("minprob") || a.equalsIgnoreCase("pfilter")){ 322 minProb=(float)Double.parseDouble(b); 323 }else if(a.equalsIgnoreCase("minQual") || a.equalsIgnoreCase("minq")){ 324 minQual=Byte.parseByte(b); 325 } 326 327 else if(a.equalsIgnoreCase("printColors") || a.equalsIgnoreCase("colors") || a.equalsIgnoreCase("color")){ 328 // System.err.println("Parsing '"+arg+"'"); //123 329 if(b==null || b.length()<1){ 330 printColors=true; 331 }else if(b.equalsIgnoreCase("t") || b.equalsIgnoreCase("true")){ 332 printColors=true; 333 }else if(b.equalsIgnoreCase("f") || b.equalsIgnoreCase("false")){ 334 printColors=false; 335 }else{ 336 printColors=true; 337 if(Tools.isDigit(b.charAt(0)) || b.charAt(0)=='-'){ 338 colorLevel=Tools.max(0, TaxTree.levelToExtended(Integer.parseInt(b))); 339 }else{ 340 colorLevel=TaxTree.stringToLevelExtended(b); 341 } 342 } 343 setColors=true; 344 // System.err.println("Parsed "+arg); //123 345 }else if(a.equalsIgnoreCase("colorLevel")){ 346 // System.err.println("Parsing '"+arg+"'"); //123 347 if(Tools.isDigit(b.charAt(0)) || b.charAt(0)=='-'){ 348 colorLevel=Tools.max(0, TaxTree.levelToExtended(Integer.parseInt(b))); 349 }else{ 350 colorLevel=TaxTree.stringToLevelExtended(b); 351 } 352 // System.err.println("Parsed "+arg); //123 353 } 354 355 else if(a.equalsIgnoreCase("printRefDivisor") || a.equalsIgnoreCase("printRDiv")){ 356 printRefDivisor=Parse.parseBoolean(b); 357 }else if(a.equalsIgnoreCase("printQueryDivisor") || a.equalsIgnoreCase("printQDiv")){ 358 printQueryDivisor=Parse.parseBoolean(b); 359 }else if(a.equalsIgnoreCase("printRefSize") || a.equalsIgnoreCase("printRSize")){ 360 printRefSize=Parse.parseBoolean(b); 361 }else if(a.equalsIgnoreCase("printQuerySize") || a.equalsIgnoreCase("printQSize")){ 362 printQuerySize=Parse.parseBoolean(b); 363 }else if(a.equalsIgnoreCase("printContamHits") || a.equalsIgnoreCase("printCHits")){ 364 printContamHits=Parse.parseBoolean(b); 365 } 366 367 else if(a.equalsIgnoreCase("printIntersection") || a.equalsIgnoreCase("intersection") || a.equalsIgnoreCase("intersect")){ 368 printIntersection=Parse.parseBoolean(b); 369 }else if(a.equalsIgnoreCase("mergePairs") || a.equalsIgnoreCase("merge")){ 370 mergePairs=Parse.parseBoolean(b); 371 } 372 373 else if(a.equalsIgnoreCase("printAll")){ 374 if(Parse.parseBoolean(b)){ 375 setPrintAll(); 376 } 377 } 378 379 else if(a.equals("samplerate")){ 380 samplerate=Float.parseFloat(b); 381 }else if(a.equals("reads")){ 382 maxReads=Parse.parseKMG(b); 383 }else if(a.equals("mode") || a.equalsIgnoreCase("single") || a.equalsIgnoreCase("singlesketch") || a.equalsIgnoreCase("onesketch") 384 || a.equalsIgnoreCase("persequence") || a.equalsIgnoreCase("sequence") || a.equalsIgnoreCase("pertaxa") 385 || a.equalsIgnoreCase("perheader") || a.equalsIgnoreCase("perfile")){ 386 mode=SketchObject.parseMode(arg, a, b); 387 } 388 389 //For format 3 390 else if(a.equalsIgnoreCase("useTaxidName") || a.equalsIgnoreCase("useTaxidAsName")){ 391 useTaxidName=Parse.parseBoolean(b); 392 }else if(a.equalsIgnoreCase("useImgName") || a.equalsIgnoreCase("useImgAsName")){ 393 useImgName=Parse.parseBoolean(b); 394 }else if(a.equalsIgnoreCase("useTaxName") || a.equalsIgnoreCase("useTaxAsName")){ 395 useTaxName=Parse.parseBoolean(b); 396 }else if(a.equalsIgnoreCase("useFilePrefixName") || a.equalsIgnoreCase("useFilePrefixAsName")){ 397 useFilePrefixName=Parse.parseBoolean(b); 398 } 399 400 else if(a.equalsIgnoreCase("taxfilterincludelevel") || a.equalsIgnoreCase("includelevel") 401 || a.equalsIgnoreCase("taxlevelwhite") || a.equalsIgnoreCase("ilevel") || a.equalsIgnoreCase("whitelevel")){ 402 taxLevelWhite=TaxTree.parseLevel(b);//TODO: Change to extended 403 }else if(a.equalsIgnoreCase("taxfilterinclude") || a.equalsIgnoreCase("include") || a.equalsIgnoreCase("taxfilterwhitelist")){ 404 taxFilterWhiteList=b; 405 }else if(a.equalsIgnoreCase("taxfilterincludestring") || a.equalsIgnoreCase("includestring") 406 || a.equalsIgnoreCase("taxfilterwhitestring") || a.equalsIgnoreCase("istring")){ 407 taxFilterWhiteString=b; 408 }else if(a.equalsIgnoreCase("banUnclassified") || a.equalsIgnoreCase("noUnclassified")){ 409 banUnclassified=Parse.parseBoolean(b); 410 }else if(a.equalsIgnoreCase("banVirus") || a.equalsIgnoreCase("noVirus") || a.equalsIgnoreCase("banViruses") || a.equalsIgnoreCase("noViruses")){ 411 banVirus=Parse.parseBoolean(b); 412 } 413 414 else if(a.equalsIgnoreCase("taxfilterexcludelevel") || a.equalsIgnoreCase("excludelevel") 415 || a.equalsIgnoreCase("taxlevelblack") || a.equalsIgnoreCase("elevel") || a.equalsIgnoreCase("blacklevel")){ 416 taxLevelBlack=TaxTree.parseLevel(b);//TODO: Change to extended 417 }else if(a.equalsIgnoreCase("taxfilterexclude") || a.equalsIgnoreCase("exclude") || a.equalsIgnoreCase("taxfilterblacklist")){ 418 taxFilterBlackList=b; 419 }else if(a.equalsIgnoreCase("taxfilterexcludestring") || a.equalsIgnoreCase("excludestring") 420 || a.equalsIgnoreCase("taxfilterblackstring") || a.equalsIgnoreCase("estring")){ 421 taxFilterBlackString=b; 422 } 423 424 else if(a.equalsIgnoreCase("minkmercount") || a.equalsIgnoreCase("minkeycount") || a.equalsIgnoreCase("mincount") || a.equalsIgnoreCase("minKeyOccuranceCount")){ 425 minKeyOccuranceCount=Tools.max(1, Integer.parseInt(b)); 426 } 427 428 //TODO: Eventually remove support for "amino" and "k" and just support "hamino" and "hk" 429 //This stands for "header amino" and "header k". 430 431 //Parameters for compatibility verification 432 else if(a.equalsIgnoreCase("k") || a.equalsIgnoreCase("hk")){ 433 // System.err.println("A: k="+k+", k2="+k2+", arg="+arg); 434 if(b.indexOf(',')>=0){ 435 String[] split=b.split(","); 436 assert(split.length==2) : "\nBad argument "+arg+"\n"+b+"\n"; 437 int x=Integer.parseInt(split[0]); 438 int y=Integer.parseInt(split[1]); 439 k=Tools.max(x, y); 440 k2=Tools.min(x, y); 441 if(k==k2){k2=0;} 442 // System.err.println("B: k="+k+", k2="+k2+", split="+Arrays.toString(split)); 443 }else{ 444 k=Integer.parseInt(b); 445 // System.err.println("C: k="+k+", k2="+k2); 446 } 447 }else if(a.equalsIgnoreCase("hashversion") || a.equalsIgnoreCase("hv")){ 448 hashVersion=Integer.parseInt(b); 449 }else if(a.equalsIgnoreCase("amino") || a.equalsIgnoreCase("hamino")){ 450 amino=Parse.parseBoolean(b); 451 if(amino){translate=false;} 452 }else if(a.equalsIgnoreCase("translate")){ 453 translate=Parse.parseBoolean(b); 454 if(translate){amino=false;} 455 }else if(a.equalsIgnoreCase("sixframes")){ 456 sixframes=Parse.parseBoolean(b); 457 if(sixframes){amino=false; translate=true;} 458 } 459 460 else if(a.equalsIgnoreCase("requiredmeta") || a.equalsIgnoreCase("rmeta")){ 461 if(b==null){requiredMeta=null;} 462 else{ 463 String[] split2=b.split(","); 464 requiredMeta=new ArrayList<String>(split2.length); 465 for(String mt : split2){ 466 assert(mt.indexOf(':')>=0) : "Metadata tags must contain ':' symbol: "+mt; 467 requiredMeta.add(mt); 468 } 469 } 470 }else if(a.equalsIgnoreCase("bannedmeta") || a.equalsIgnoreCase("bmeta")){ 471 if(b==null){bannedMeta=null;} 472 else{ 473 String[] split2=b.split(","); 474 bannedMeta=new ArrayList<String>(split2.length); 475 for(String mt : split2){ 476 assert(mt.indexOf(':')>=0) : "Metadata tags must contain ':' symbol: "+mt; 477 bannedMeta.add(mt); 478 } 479 } 480 } 481 482 // else if(a.equalsIgnoreCase("requiredtaxid") || a.equalsIgnoreCase("rtaxid")){ 483 // if(b==null){requiredTaxid=null;} 484 // else{ 485 // String[] split2=b.split(","); 486 // requiredTaxid=new IntList(split2.length); 487 // for(String mt : split2){ 488 // requiredTaxid.add(Integer.parseInt(mt)); 489 // } 490 // if(requiredTaxid.isEmpty()){requiredTaxid=null;} 491 // } 492 // }else if(a.equalsIgnoreCase("bannedtaxid") || a.equalsIgnoreCase("btaxid")){ 493 // if(b==null){bannedTaxid=null;} 494 // else{ 495 // String[] split2=b.split(","); 496 // bannedTaxid=new IntList(split2.length); 497 // for(String mt : split2){ 498 // bannedTaxid.add(Integer.parseInt(mt)); 499 // } 500 // if(bannedTaxid.isEmpty()){bannedTaxid=null;} 501 // } 502 // } 503 504 else if(a.equalsIgnoreCase("requiredmetaand") || a.equalsIgnoreCase("rmetaand")){ 505 requiredMetaAnd=Parse.parseBoolean(b); 506 }else if(a.equalsIgnoreCase("requiredmetaor") || a.equalsIgnoreCase("rmetaor")){ 507 requiredMetaAnd=!Parse.parseBoolean(b); 508 } 509 510 else if(a.equalsIgnoreCase("bbversion")){ 511 inputVersion=b; 512 } 513 514 else{ 515 return false; 516 } 517 return true; 518 } 519 postParse(boolean requireTree, boolean makeTaxFilters)520 public void postParse(boolean requireTree, boolean makeTaxFilters){ 521 assert(!postParsed); 522 synchronized(this){ 523 if(postParsed){return;} 524 525 if(makeTaxFilters){ 526 if(taxFilterWhiteList!=null || taxFilterWhiteString!=null){ 527 taxFilterWhite=new TaxFilter(SketchObject.taxtree, true); 528 taxFilterWhite.setLevel(taxLevelWhite, false); 529 taxFilterWhite.makeSet(); 530 taxFilterWhite.addNamesOrNumbers(taxFilterWhiteList, false); 531 taxFilterWhite.setContainsString(taxFilterWhiteString); 532 if(requireTree){ 533 assert(SketchObject.taxtree!=null) : "No taxtree loaded."; 534 taxFilterWhite.setTree(SketchObject.taxtree); 535 taxFilterWhite.promote(); 536 } 537 } 538 539 if(taxFilterBlackList!=null || taxFilterBlackString!=null){ 540 taxFilterBlack=new TaxFilter(SketchObject.taxtree, false); 541 taxFilterBlack.setLevel(taxLevelBlack, false); 542 taxFilterBlack.makeSet(); 543 taxFilterBlack.addNamesOrNumbers(taxFilterBlackList, false); 544 taxFilterBlack.setContainsString(taxFilterBlackString); 545 if(requireTree){ 546 assert(SketchObject.taxtree!=null) : "No taxtree loaded."; 547 taxFilterBlack.setTree(SketchObject.taxtree); 548 taxFilterBlack.promote(); 549 } 550 } 551 } 552 553 noFilters=(!hasMetaFilters() && !hasTaxFilters() && !requireSSU && minRefSizeEstimate<1 && minRefSizeBases<1); 554 postParsed=true; 555 } 556 } 557 postParsed()558 public boolean postParsed(){return postParsed;} 559 560 @Override toString()561 public String toString(){ 562 return toString(-1); 563 } 564 toString(int chunkNum)565 public String toString(int chunkNum){ 566 StringBuilder sb=new StringBuilder(); 567 sb.append("##"); 568 sb.append("hits=").append(minHits); 569 if(chunkNum>=0){sb.append(" chunk=").append(chunkNum);} 570 sb.append(" wkid=").append(String.format(Locale.ROOT, "%.5f",minWKID)); 571 if(minANI>0){sb.append(" id=").append(String.format(Locale.ROOT, "%.5f",minANI));} 572 if(minBases>0){sb.append(" minbases=").append(minBases);} 573 if(minSizeRatio>0){sb.append(" minsizeratio=").append(String.format(Locale.ROOT, "%.5f",minSizeRatio));} 574 sb.append(" records=").append(maxRecords); 575 if(recordsPerLevel>0){sb.append(" recordsperlevel=").append(recordsPerLevel);} 576 sb.append(" format=").append(format); 577 sb.append(" level=").append(taxLevel); 578 if(inputVersion!=null){sb.append(" bbversion=").append(inputVersion);} 579 580 if(k!=SketchObject.defaultK || k2!=0 || k!=SketchObject.k || k2!=SketchObject.k2){ 581 assert(k>0 && k2>=0 && k2<k) : "Bad values for k: "+k+", "+k2+", "+SketchObject.k+", "+SketchObject.k2; 582 assert(SketchObject.k>0 && SketchObject.k2>=0 && SketchObject.k2<SketchObject.k) : "Bad values for k: "+k+", "+k2+", "+SketchObject.k+", "+SketchObject.k2; 583 sb.append(" hk=").append(SketchObject.k).append(',').append(SketchObject.k2); 584 } 585 if(SketchObject.amino){sb.append(" hamino=").append(SketchObject.amino);} //TODO: This conflicts with Parser flag 586 if(SketchObject.translate){sb.append(" translate=").append(SketchObject.translate);} 587 if(SketchObject.sixframes){sb.append(" sixframes=").append(SketchObject.sixframes);} 588 if(SketchObject.HASH_VERSION>1){sb.append(" hashversion=").append(SketchObject.HASH_VERSION);} 589 590 if(true){sb.append(" printSSU=").append(printSSU());} 591 if(requireSSU){sb.append(" requireSSU=").append(requireSSU);} 592 if(minRefSizeEstimate>0){sb.append(" minRefSizeEstimate=").append(minRefSizeEstimate);} 593 if(minRefSizeBases>0){sb.append(" minRefSizeBases=").append(minRefSizeBases);} 594 595 if(json()){sb.append(" printSSUSequence=").append(printSSUSequence);} 596 if(printSSULen){sb.append(" printSSULen=").append(printSSULen);} 597 if(true || printTax!=default_printTax){sb.append(" printTax=").append(printTax);} 598 // if(true || printFileName!=default_printFileName){sb.append(" printfname=").append(printFileName);} 599 if(true || printQueryFileName!=default_printQueryFileName){sb.append(" printqfname=").append(printQueryFileName);} 600 if(true || printRefFileName!=default_printRefFileName){sb.append(" printrfname=").append(printRefFileName);} 601 if(true || printOriginalName!=default_printOriginalName){sb.append(" pn0=").append(printOriginalName);} 602 if(true || printImg!=default_printImg){sb.append(" printImg=").append(printImg);} 603 if(true || printAni!=default_printAni){sb.append(" printAni=").append(printAni);} 604 if(!printKID){sb.append(" printKID=").append(printKID);} 605 if(!printWKID){sb.append(" printWKID=").append(printWKID);} 606 if(true || printCompleteness!=default_printCompleteness){sb.append(" printCompleteness=").append(printCompleteness);} 607 608 if(true || printUnique!=default_printUnique){sb.append(" printUMatches=").append(printUnique);} 609 if(true || printUnique2!=default_printUnique2){sb.append(" printUnique2=").append(printUnique2);} 610 if(true || printUnique3!=default_printUnique3){sb.append(" printUnique3=").append(printUnique3);} 611 if(true || printUContam!=default_printUContam){sb.append(" printUContam=").append(printUContam);} 612 if(true || printNoHit!=default_printNoHit){sb.append(" printNoHit=").append(printNoHit);} 613 if(true || printContam!=default_printContam){sb.append(" contam=").append(printContam);} 614 if(true){sb.append(" contam2=").append(printContam2 ? TaxTree.extendedToLevel(contamLevel)+"" : "f");} 615 616 if(true || printScore!=default_printScore){sb.append(" printScore=").append(printScore);} 617 if(true || printEValue!=default_printEValue){sb.append(" printEValue=").append(printEValue);} 618 619 if(true || printDepth!=default_printDepth){sb.append(" printDepth=").append(printDepth);} 620 if(true || printDepth2!=default_printDepth2){sb.append(" printDepth2=").append(printDepth2);} 621 if(true || printActualDepth!=default_printActualDepth){sb.append(" printActualDepth=").append(printActualDepth);} 622 if(true || printVolume!=default_printVolume){sb.append(" printVolume=").append(printVolume);} 623 if(true || printRefHits!=default_printRefHits){sb.append(" printRefHits=").append(printRefHits);} 624 625 if(true || printMatches!=default_printMatches){sb.append(" printMatches=").append(printMatches);} 626 if(true || printLength!=default_printLength){sb.append(" printLength=").append(printLength);} 627 if(true || printTaxID!=default_printTaxID){sb.append(" printTaxID=").append(printTaxID);} 628 if(true || printGSize!=default_printGSize){sb.append(" printGSize=").append(printGSize);} 629 if(true || gSizeKMG!=default_gSizeKMG){sb.append(" gSizeKMG=").append(gSizeKMG);} 630 if(true || printGC!=default_printGC){sb.append(" printGC=").append(printGC);} 631 if(true || printGKmers!=default_printGKmers){sb.append(" printGKmers=").append(printGKmers);} 632 633 if(printCommonAncestor){sb.append(" printCommonAncestor=").append(printCommonAncestor);} 634 if(printCommonAncestorLevel){sb.append(" printCommonAncestorLevel=").append(printCommonAncestorLevel);} 635 636 if(true || printTaxName!=default_printTaxName){sb.append(" printTaxName=").append(printTaxName);} 637 if(true || printGSeqs!=default_printGSeqs){sb.append(" printGSeqs=").append(printGSeqs);} 638 if(true || printGBases!=default_printGBases){sb.append(" printGBases=").append(printGBases);} 639 if(true || minEntropy!=default_minEntropy){sb.append(" minEntropy=").append(String.format(Locale.ROOT, "%.4f", minEntropy));} 640 if(true || minProb!=default_minProb){sb.append(" minProb=").append(String.format(Locale.ROOT, "%.4f", minProb));} 641 if(true || minQual!=default_minQual){sb.append(" minQual=").append((int)minQual);} 642 if(jsonArray!=default_jsonArray){sb.append(" jsonArray=").append(jsonArray);} 643 if(printD3!=default_printD3){sb.append(" d3=").append(printD3);} 644 if(printD3){ 645 sb.append(" D3sizeMode=").append(D3sizeMode); 646 sb.append(" D3LevelNodes=").append(D3LevelNodes); 647 } 648 if(comparator!=Comparison.scoreComparator){sb.append(" ").append(comparator.toString());} 649 650 if(taxFilterWhiteList!=null || taxFilterWhiteString!=null){ 651 if(taxFilterWhiteList!=null){sb.append(" taxfilterwhitelist=").append(taxFilterWhiteList);} 652 if(taxFilterWhiteString!=null){sb.append(" taxfilterwhitestring=").append(taxFilterWhiteString);} 653 sb.append(" taxlevelwhite=").append(taxLevelWhite); 654 } 655 if(taxFilterBlackList!=null || taxFilterBlackString!=null){ 656 if(taxFilterBlackList!=null){sb.append(" taxfilterblacklist=").append(taxFilterBlackList);} 657 if(taxFilterBlackString!=null){sb.append(" taxfilterblackstring=").append(taxFilterBlackString);} 658 sb.append(" taxlevelblack=").append(taxLevelBlack); 659 } 660 if(banUnclassified){sb.append(" banunclassified");} 661 if(banVirus){sb.append(" banvirus");} 662 663 if(useTaxidName){sb.append(" useTaxidName=").append(useTaxidName);} 664 if(useImgName){sb.append(" useImgName=").append(useImgName);} 665 if(useTaxName){sb.append(" useTaxName=").append(useTaxName);} 666 667 if(true){sb.append(" colors=").append(printColors ? TaxTree.extendedToLevel(colorLevel)+"" : "f");} 668 669 if(minKeyOccuranceCount!=default_minKeyOccuranceCount){sb.append(" minKeyOccuranceCount=").append(minKeyOccuranceCount);} 670 671 // if(printColors && colorLevel!=default_colorLevel){sb.append(" colorLevel=").append(TaxTree.extendedToLevel(colorLevel));} 672 673 674 if(printRefDivisor){sb.append(" printRefDivisor=").append(printRefDivisor);} 675 if(printQueryDivisor){sb.append(" printQueryDivisor=").append(printQueryDivisor);} 676 if(printRefSize){sb.append(" printRefSize=").append(printRefSize);} 677 if(printQuerySize){sb.append(" printQuerySize=").append(printQuerySize);} 678 if(printContamHits){sb.append(" printContamHits=").append(printContamHits);} 679 if(printIntersection){sb.append(" printIntersection=").append(printIntersection);} 680 if(mergePairs){sb.append(" mergePairs=").append(mergePairs);} 681 682 if(maxReads>-1){sb.append(" reads=").append(maxReads);} 683 if(mode!=default_mode){sb.append(" mode=").append(mode);} 684 if(samplerate!=default_samplerate){sb.append(" samplerate=").append(String.format(Locale.ROOT, "%.4f",samplerate));} 685 686 if(!requiredMetaAnd){sb.append(" requiredmetaand="+requiredMetaAnd);} 687 if(requiredMeta!=null && !requiredMeta.isEmpty()){ 688 sb.append(" rmeta="); 689 for(String s : requiredMeta){ 690 sb.append(s); 691 sb.append(','); 692 } 693 sb.setLength(sb.length()-1); 694 } 695 if(bannedMeta!=null && !bannedMeta.isEmpty()){ 696 sb.append(" bmeta="); 697 for(String s : bannedMeta){ 698 sb.append(s); 699 sb.append(','); 700 } 701 sb.setLength(sb.length()-1); 702 } 703 // if(requiredTaxid!=null && !requiredTaxid.isEmpty()){ 704 // sb.append(" rtaxid="); 705 // for(int i=0; i<requiredTaxid.size; i++){ 706 // sb.append(requiredTaxid.get(i)); 707 // sb.append(','); 708 // } 709 // sb.setLength(sb.length()-1); 710 // } 711 // if(bannedTaxid!=null && !bannedTaxid.isEmpty()){ 712 // sb.append(" btaxid="); 713 // for(int i=0; i<bannedTaxid.size; i++){ 714 // sb.append(bannedTaxid.get(i)); 715 // sb.append(','); 716 // } 717 // sb.setLength(sb.length()-1); 718 // } 719 720 sb.append('\n'); 721 return sb.toString(); 722 } 723 compatible()724 public boolean compatible(){ 725 return SketchObject.k==k && SketchObject.k2==k2 && SketchObject.aminoOrTranslate()==aminoOrTranslate() && hashVersion==SketchObject.HASH_VERSION; 726 } 727 setPrintAll()728 public void setPrintAll(){ 729 printSSU=true; 730 printSSULen=true; 731 printSSUSequence=true; 732 printTax=true; 733 printQueryFileName=true; 734 printRefFileName=true; 735 printOriginalName=true; 736 printImg=true; 737 printAni=true; 738 printKID=true; 739 printWKID=true; 740 printCompleteness=true; 741 printScore=true; 742 printEValue=true; 743 printDepth=true; 744 printDepth2=true; 745 printVolume=true; 746 printRefHits=true; 747 748 printMatches=true; 749 printLength=true; 750 printTaxID=true; 751 printGSize=true; 752 printGC=true; 753 printGKmers=true; 754 printTaxName=true; 755 printGSeqs=true; 756 printGBases=true; 757 758 // printColors=true; 759 760 printUnique=true; 761 printUnique2=true; 762 printUnique3=true; 763 printUContam=true; 764 printNoHit=true; 765 printContam=true; 766 printContam2=true; 767 768 printRefDivisor=true; 769 printQueryDivisor=true; 770 printRefSize=true; 771 printQuerySize=true; 772 printContamHits=true; 773 } 774 775 /*--------------------------------------------------------------*/ 776 /*---------------- JSON ----------------*/ 777 /*--------------------------------------------------------------*/ 778 toJson(SketchResults sr)779 public JsonObject toJson(SketchResults sr){ 780 JsonObject j=toJson(sr.sketch); 781 if(sr.list!=null){ 782 int i=0; 783 for(Comparison c : sr.list){ 784 JsonObject jc=toJson(c); 785 j.add(c.name(), jc); 786 i++; 787 if(i>=maxRecords){break;} 788 } 789 } 790 791 if(jsonArray){ 792 toJsonArrayForm(j); 793 } 794 795 if(printD3){ 796 j.add("D3", toD3(sr)); 797 } 798 799 return j; 800 } 801 toJsonArrayForm(JsonObject j0)802 public void toJsonArrayForm(JsonObject j0){ 803 if(j0.jmapSize()<1){return;} 804 ArrayList<Object> list1=new ArrayList<Object>(j0.jmapSize()); 805 Object[] keys=null; 806 for(Entry<String, JsonObject> e1 : j0.jmap.entrySet()){ 807 JsonObject j1=e1.getValue(); 808 ArrayList<Object> list2=new ArrayList<Object>(j1.omapSize()); 809 for(Entry<String, Object> e2 : j1.omap.entrySet()){ 810 Object o2=e2.getValue(); 811 list2.add(o2); 812 } 813 list1.add(list2.toArray()); 814 if(keys==null){ 815 ArrayList<Object> keyList=new ArrayList<Object>(j1.omapSize()); 816 for(Entry<String, Object> e2 : j1.omap.entrySet()){ 817 Object o2=e2.getKey(); 818 keyList.add(o2); 819 } 820 keys=keyList.toArray(); 821 } 822 } 823 824 JsonObject title=new JsonObject(); 825 for(Entry<String, Object> e : j0.omap.entrySet()){ 826 title.add(e.getKey(), e.getValue()); 827 } 828 829 j0.clearJson(); 830 j0.clearOmap(); 831 832 j0.add("title", title); 833 j0.add("header", keys); 834 j0.add("rows", list1.toArray()); 835 } 836 toJson(Sketch sk)837 public JsonObject toJson(Sketch sk){ 838 assert(format==FORMAT_JSON); 839 840 JsonObject j=new JsonObject(); 841 j.add("Name", sk.name()); 842 if(dbName!=null){j.add("DB", dbName);} 843 j.add("SketchLen", sk.length()); 844 845 j.add("Seqs", sk.genomeSequences); 846 j.add("Bases", sk.genomeSizeBases); 847 j.add("gSize", sk.genomeSizeEstimate()); 848 if(sk.baseCounts!=null){j.addLiteral("GC", sk.gc(), 3);} 849 if(sk.probCorrect<1 && sk.probCorrect>0){j.add("Quality", sk.probCorrect);} 850 if(sk.keyCounts!=null){ 851 double d=Tools.averageDouble(sk.keyCounts); 852 j.add("AvgCount", d); 853 j.add("Depth", Tools.observedToActualCoverage(d)); 854 } 855 856 if(sk.imgID>0){j.add("IMG", sk.imgID);} 857 if(sk.spid>0){j.add("spid", sk.spid);} 858 if(sk.taxID>0 && sk.taxID<SketchObject.minFakeID){j.add("TaxID", sk.taxID);} 859 860 if((printRefFileName) && sk.fname()!=null){j.add("file", sk.fname());} 861 if(printOriginalName && sk.name0()!=null){j.add("SeqName", sk.name0());} 862 863 if(sk.meta!=null){ 864 for(String st : sk.meta){ 865 int colon=st.indexOf(':'); 866 j.add(st.substring(0, colon), st.substring(colon+1)); 867 } 868 } 869 870 if(printSSULen){ 871 if(sk.r16SLen()>0){j.add("16SLen", sk.r16SLen());} 872 if(sk.r18SLen()>0){j.add("18SLen", sk.r18SLen());} 873 } 874 if(printSSUSequence){ 875 if(sk.r16S()!=null){j.add("16SSequence", new String(sk.r16S()));} 876 if(sk.r18S()!=null){j.add("18SSequence", new String(sk.r18S()));} 877 } 878 879 return j; 880 } 881 toJson(Comparison c)882 public JsonObject toJson(Comparison c){ 883 final int tid=c.taxID; 884 885 JsonObject j=new JsonObject(); 886 887 //Text fields 888 if(printTaxName){j.add("taxName", c.taxName()==null ? "." : c.taxName());} 889 890 if(printCommonAncestor){j.add("commonAncestor", c.commonAncestor());} 891 if(printCommonAncestorLevel){j.add("commonAncestorLevel", c.commonAncestorLevel());} 892 893 if(printRefFileName){j.add("file", c.fname()==null ? "." : c.fname());} 894 if(printOriginalName){j.add("seqName", c.name0()==null ? "." : c.name0());} 895 if(printTax && SketchObject.taxtree!=null){ 896 TaxNode tn=null; 897 if(tid>0 && tid<SketchObject.minFakeID){ 898 tn=SketchObject.taxtree.getNode(tid); 899 } 900 901 if(tn!=null){ 902 j.add("taxonomy", SketchObject.taxtree.toSemicolon(tn, SketchObject.skipNonCanonical, false)); 903 }else{ 904 j.add("taxonomy", (Object)null); 905 } 906 } 907 908 if(printWKID){j.addLiteral("WKID", 100*c.wkid(), 4);} 909 if(printKID){j.addLiteral("KID", 100*c.kid(), 4);} 910 // if(printSSU() && c.ssuIdentity()>0){j.addLiteral("SSU", 100*c.ssuIdentity(), 3);} //Old 911 if(printSSU() && c.ssuIdentity()>0){ 912 j.addLiteral(c.ssuType()==18 ? "18S" : "16S", 100*c.ssuIdentity(), 3); 913 } 914 915 //Primary fields 916 if(printAni){j.addLiteral((aminoOrTranslate() ? "AAI" : "ANI"), 100*c.ani(), 3);} 917 if(printCompleteness){j.addLiteral("Complt", 100*c.completeness(), 3);} 918 if(printContam){j.addLiteral("Contam", 100*c.contamFraction(), 3);} 919 if(printContam2){j.addLiteral("Contam2", 100*c.contam2Fraction(), 3);} 920 if(printUContam){j.addLiteral("uContam", 100*c.uContamFraction(), 3);} 921 if(printScore){j.add("Score", c.score());} 922 if(printEValue){j.add("E-Val", String.format(Locale.ROOT, "%5.2e", c.eValue()));} 923 924 if(printDepth){j.add("Depth", c.depth(printActualDepth));} 925 if(printDepth2){j.add("Depth2", c.depth2(printActualDepth));} 926 if(printVolume){j.add("Volume", c.volume()+0.001);} 927 if(printRefHits){j.add("RefHits", c.avgRefHits());} 928 929 if(printMatches){j.add("Matches", c.hits());} 930 if(printUnique){j.add("Unique", c.uHits());} 931 if(printUnique2){j.add("Unique2", c.unique2());} 932 if(printUnique3){j.add("Unique3", c.unique3());} 933 if(printNoHit){j.add("noHit", c.noHits());} 934 if(printLength){j.add("Length", c.maxDivisor());} 935 if(printTaxID){j.add("TaxID", tid>=SketchObject.minFakeID ? -1 : tid);} 936 if(printImg){j.add("ImgID", c.imgID());} 937 if(printGBases){j.add("gBases", c.genomeSizeBases());} 938 if(printGKmers){j.add("gKmers", c.genomeSizeKmers());} 939 if(printGSize){j.add("gSize", c.genomeSizeEstimate());} 940 if(printGSeqs){j.add("gSeqs", c.genomeSequences());} 941 if(c.hasGC()){j.addLiteral("GC", c.gc(), 3);} 942 943 //Raw fields 944 if(printRefDivisor){j.add("rDiv", c.refDivisor());} 945 if(printQueryDivisor){j.add("qDiv", c.queryDivisor());} 946 if(printRefSize){j.add("rSize", c.refSize());} 947 if(printQuerySize){j.add("qSize", c.querySize());} 948 if(printContamHits){j.add("cHits", c.contamHits());} 949 950 951 952 if(printSSULen){ 953 if(c.has18S()){j.add("18SLen", c.b.r18SLen());} 954 /*else*/ if(c.has16S()){j.add("16SLen", c.b.r16SLen());} 955 } 956 if(printSSUSequence){ 957 if(c.has18S()){j.add("18SSequence", new String(c.b.r18S()));} 958 /*else*/ if(c.has16S()){j.add("16SSequence", new String(c.b.r16S()));} 959 } 960 961 if(printIntersection){ 962 Sketch intersection=Sketch.intersection(c.a, c.b); 963 j.add("intersection", intersection.toString()); 964 } 965 966 return j; 967 } 968 json()969 public boolean json(){return format==FORMAT_JSON;} 970 971 /*--------------------------------------------------------------*/ 972 /*---------------- D3 ----------------*/ 973 /*--------------------------------------------------------------*/ 974 toD3(SketchResults sr)975 public JsonObject toD3(SketchResults sr){ 976 if(sr==null || sr.isEmpty()){return new JsonObject("name", "no hits");} 977 JsonObject root=new JsonObject("name", "life"); 978 root.add("level", TaxTree.LIFE_E); 979 if(sr.list!=null){ 980 int i=0; 981 for(Comparison c : sr.list){ 982 ArrayList<JsonObject> tax=toD3List(c); 983 addToLevel(root, tax, 0); 984 i++; 985 if(i>=maxRecords){break;} 986 } 987 } 988 if(D3LevelNodes){ 989 root=converToD3ArrayFormat_LevelNode(root); 990 }else{ 991 root=converToD3ArrayFormat_SingleNodeRoot(root); 992 } 993 return root; 994 } 995 converToD3ArrayFormat_SingleNodeRoot(JsonObject root)996 private JsonObject converToD3ArrayFormat_SingleNodeRoot(JsonObject root){ 997 JsonObject children=root.removeJson("children"); 998 if(children==null){return root;} 999 Object[] array=children.toJmapArray(); 1000 root=(JsonObject)array[0];//Life node 1001 1002 assert(root.getString("name").equalsIgnoreCase("Life")) : root; 1003 return converToD3ArrayFormat_SingleNode(root); 1004 } 1005 converToD3ArrayFormat_SingleNode(JsonObject nameNode)1006 private JsonObject converToD3ArrayFormat_SingleNode(JsonObject nameNode){ 1007 Object[] levelNodes=nameNode.toJmapArray(); 1008 if(levelNodes==null){return nameNode;} 1009 nameNode.clearJson(); 1010 1011 ArrayList<JsonObject> fixed=new ArrayList<JsonObject>(); 1012 for(Object o : levelNodes){ 1013 JsonObject levelNode=(JsonObject)o; 1014 String level=levelNode.getString("name"); 1015 JsonObject children=levelNode.removeJson("children"); 1016 if(children!=null){ 1017 Object[] childArray=children.toJmapArray(); 1018 for(Object o2 : childArray){ 1019 JsonObject child=(JsonObject)o2;//Now a name node 1020 String name=(String)child.removeObject("name"); 1021 child.add("name", level+": "+name); 1022 converToD3ArrayFormat_SingleNode(child); 1023 fixed.add(child); 1024 } 1025 } 1026 } 1027 Object[] children=fixed.toArray(); 1028 nameNode.add("children", children); 1029 return nameNode; 1030 } 1031 converToD3ArrayFormat_LevelNode(JsonObject levelNode)1032 private JsonObject converToD3ArrayFormat_LevelNode(JsonObject levelNode){ 1033 JsonObject children=levelNode.removeJson("children"); 1034 if(children==null){return levelNode;} 1035 1036 Object[] array=children.toJmapArray(); 1037 levelNode.add("children", array); 1038 for(Object o : array){ 1039 converToD3ArrayFormat_NameNode((JsonObject)o); 1040 } 1041 return levelNode; 1042 } 1043 converToD3ArrayFormat_NameNode(JsonObject nameNode)1044 private JsonObject converToD3ArrayFormat_NameNode(JsonObject nameNode){ 1045 Object[] array=nameNode.toJmapArray(); 1046 if(array==null){return nameNode;} 1047 1048 nameNode.clearJson(); 1049 nameNode.add("children", array); 1050 for(Object o : array){ 1051 converToD3ArrayFormat_LevelNode((JsonObject)o); 1052 } 1053 return nameNode; 1054 } 1055 addToLevel(JsonObject levelNode, ArrayList<JsonObject> list, int pos)1056 void addToLevel(JsonObject levelNode, ArrayList<JsonObject> list, int pos){ 1057 JsonObject jo=list.get(pos); 1058 int rootLevel=levelNode.getInt("level"); 1059 int joLevel=jo.getInt("level"); 1060 if(rootLevel==joLevel){ 1061 assert(levelNode.getString("name").equalsIgnoreCase(jo.getString("levelname"))) : levelNode+"\n"+jo; 1062 addAsChild(levelNode, list, pos); 1063 }else{ 1064 assert(joLevel<rootLevel) : levelNode+"\n"+jo; 1065 assert(false) : levelNode+"\n"+jo; 1066 } 1067 } 1068 1069 void addAsChild(JsonObject levelNode, ArrayList<JsonObject> list, int pos){ 1070 JsonObject children=levelNode.getJson("children"); 1071 if(children==null){ 1072 children=new JsonObject(); 1073 levelNode.add("children", children); 1074 } 1075 JsonObject jo=list.get(pos); 1076 String taxName=jo.getString("name"); 1077 JsonObject nameNode=children.getJson(taxName); 1078 if(nameNode==null){ 1079 nameNode=new JsonObject("name", taxName); 1080 children.add(taxName, nameNode); 1081 } 1082 Number size=jo.getNumber("size"); 1083 Number oldSize=nameNode.getNumber("size"); 1084 if(size!=null && (oldSize==null || oldSize.doubleValue()<size.doubleValue())){ 1085 nameNode.add("size", jo.getNumber("size")); 1086 nameNode.add("kid", jo.getNumber("kid")); 1087 nameNode.add("wkid", jo.getNumber("wkid")); 1088 nameNode.add("ani", jo.getNumber("ani")); 1089 nameNode.add("hits", jo.getNumber("hits")); 1090 nameNode.add("depth", jo.getNumber("depth")); 1091 } 1092 1093 if(pos<list.size()-1){//recur 1094 jo=list.get(pos+1); 1095 String levelName=jo.getString("levelname"); 1096 int level=jo.getInt("level"); 1097 JsonObject nextLevelNode=nameNode.getJson(levelName); 1098 if(nextLevelNode==null){ 1099 nextLevelNode=new JsonObject("name", levelName); 1100 nextLevelNode.add("level", level); 1101 nameNode.add(levelName, nextLevelNode); 1102 } 1103 addAsChild(nextLevelNode, list, pos+1); 1104 } 1105 } 1106 1107 int promote(int levelE) { 1108 if(levelE<0){return levelE;} 1109 while(!TaxTree.isSimple2(levelE) && levelE<TaxTree.LIFE){ 1110 levelE++; 1111 } 1112 return levelE; 1113 } 1114 1115 public ArrayList<JsonObject> toD3List(Comparison c){ 1116 final ArrayList<TaxNode> nodes=toTNList(c.taxID); 1117 ArrayList<JsonObject> list=new ArrayList<JsonObject>(nodes.size()); 1118 for(TaxNode tn : nodes){ 1119 JsonObject jo=new JsonObject("name", tn.name); 1120 int levelE=promote(tn.levelExtended); 1121 jo.add("level", levelE); 1122 jo.add("levelname", TaxTree.levelToStringExtended(levelE)); 1123 list.add(jo); 1124 } 1125 if(list.size()>0){ 1126 JsonObject tail=list.get(list.size()-1); 1127 tail.add("size", toD3Size(c)); 1128 tail.add("kid", c.kid()); 1129 tail.add("wkid", c.wkid()); 1130 tail.add("ani", c.ani()); 1131 tail.add("hits", c.hits()); 1132 tail.add("depth", c.depth(printActualDepth)); 1133 } 1134 return list; 1135 } 1136 1137 private Number toD3Size(Comparison c){ 1138 if(D3sizeMode==D3_ANI_SIZE){ 1139 return c.ani(); 1140 }else if(D3sizeMode==D3_KID_SIZE){ 1141 return c.kid(); 1142 }else if(D3sizeMode==D3_WKID_SIZE){ 1143 return c.wkid(); 1144 }else if(D3sizeMode==D3_HIT_SIZE){ 1145 return c.hits(); 1146 }else if(D3sizeMode==D3_DEPTH_SIZE){ 1147 return c.depth(printActualDepth); 1148 } 1149 assert(false) : "Invalid D3sizeMode "+D3sizeMode; 1150 return c.hits(); 1151 } 1152 1153 public ArrayList<TaxNode> toTNList(final int tid){ 1154 final TaxTree tree=TaxTree.getTree(); 1155 1156 final ArrayList<TaxNode> list=new ArrayList<TaxNode>(); 1157 int nulls=0; 1158 { 1159 TaxNode tn=tree.getNode(tid); 1160 if(tn.isRanked() && !tn.cellularOrganisms()){list.add(tn);} 1161 while(tn.pid!=tn.id){ 1162 tn=tree.getNode(tn.pid); 1163 if(tn.isRanked() && !tn.cellularOrganisms()){list.add(tn);} 1164 } 1165 } 1166 Collections.reverse(list); 1167 int prevLevelE=TaxTree.LIFE; 1168 for(int i=0; i<list.size(); i++){ 1169 TaxNode tn=list.get(i); 1170 int levelE=promote(tn.levelExtended); 1171 1172 if(!TaxTree.isSimple2(levelE) || (i>0 && levelE>=prevLevelE)){ 1173 list.set(i, null); 1174 nulls++; 1175 }else{prevLevelE=levelE;} 1176 } 1177 if(nulls>0){Tools.condenseStrict(list);} 1178 return list; 1179 } 1180 1181 /*--------------------------------------------------------------*/ 1182 /*---------------- Formatting ----------------*/ 1183 /*--------------------------------------------------------------*/ 1184 1185 ByteBuilder queryHeader(Sketch sk){ 1186 ByteBuilder bb=new ByteBuilder(); 1187 if(format>2){return bb;} 1188 1189 String color=toColor(sk.taxID); 1190 if(color!=null){bb.append(color);} 1191 1192 bb.append("\nQuery: ").append(sk.name()==null ? "." : sk.name()); 1193 if(dbName!=null){bb.append("\tDB: ").append(dbName);} 1194 bb.append("\tSketchLen: ").append(sk.length()); 1195 bb.append("\tSeqs: ").append(sk.genomeSequences).append(' '); 1196 bb.append("\t"+(aminoOrTranslate() ? "SeqLen" : "Bases")+": ").append(sk.genomeSizeBases); 1197 bb.append("\tgSize: ").append(sk.genomeSizeEstimate()); 1198 if(sk.baseCounts!=null){bb.append("\tGC: ").append(sk.gc(), 3);} 1199 if(sk.probCorrect<1 && sk.probCorrect>0){bb.append("\tQuality: ").append(sk.probCorrect, 4);} 1200 if(sk.keyCounts!=null){ 1201 double d=Tools.averageDouble(sk.keyCounts); 1202 bb.append("\tAvgCount: ").append(d, 3); 1203 bb.append("\tDepth: ").append(Tools.observedToActualCoverage(d), 3); 1204 } 1205 1206 if(sk.imgID>0){bb.append("\tIMG: ").append(sk.imgID);} 1207 if(sk.spid>0){bb.append("\tspid: ").append(sk.spid);} 1208 if(sk.taxID>0 && sk.taxID<SketchObject.minFakeID){bb.append("\tTaxID: ").append(sk.taxID);} 1209 1210 if(printQueryFileName && sk.fname()!=null){bb.append("\tFile: "+sk.fname());} 1211 if(printOriginalName && sk.name0()!=null && !sk.name0().equals(sk.name())){bb.append("\tSeqName: "+sk.name0());} 1212 1213 if(sk.meta!=null){ 1214 for(String st : sk.meta){ 1215 bb.append("\t").append(st.replaceFirst(":", ": ")); 1216 } 1217 } 1218 1219 if(color!=null){bb.append(Colors.RESET);} 1220 1221 return bb; 1222 } 1223 toColorTid(final int taxID)1224 int toColorTid(final int taxID){ 1225 if(!printColors || SketchObject.taxtree==null || taxID<=0 || taxID>=SketchObject.minFakeID){return 0;} 1226 TaxNode tn=SketchObject.taxtree.getNode(taxID); 1227 while(tn!=null && tn.id!=tn.pid && tn.levelExtended<colorLevel){ 1228 tn=SketchObject.taxtree.getNode(tn.pid); 1229 // System.err.println(tn); 1230 } 1231 return tn==null || tn.levelExtended>=TaxTree.LIFE_E || (tn.levelExtended>colorLevel && tn.levelExtended>TaxTree.PHYLUM_E) ? 0 : tn.id; 1232 } 1233 toColor(final int taxID)1234 String toColor(final int taxID){ 1235 if(!printColors || SketchObject.taxtree==null || taxID<=0 || taxID>=SketchObject.minFakeID){return null;} 1236 TaxNode tn=SketchObject.taxtree.getNode(taxID); 1237 while(tn!=null && tn.id!=tn.pid && tn.levelExtended<colorLevel){ 1238 tn=SketchObject.taxtree.getNode(tn.pid); 1239 // System.err.println(tn); 1240 } 1241 if(tn==null){ 1242 return null; 1243 }else{ 1244 if(tn.levelExtended>=TaxTree.LIFE_E || (tn.levelExtended>colorLevel && tn.levelExtended>TaxTree.PHYLUM_E)){return Colors.WHITE;} 1245 else{ 1246 // System.err.println("*"+tn.id+", "+tn.id%Colors.colorArray.length); 1247 return Colors.colorArray[tn.id%Colors.colorArray.length]; 1248 } 1249 } 1250 } 1251 header()1252 String header(){ 1253 if(format==FORMAT_JSON){return null;} 1254 final String ani=(aminoOrTranslate() ? "AAI" : "ANI"); 1255 if(format==FORMAT_QUERY_REF_ANI || format==FORMAT_CONSTELLATION){ 1256 if(reportAniOnly){return "#Query\tRef\t"+ani;} 1257 if(format==FORMAT_QUERY_REF_ANI){ 1258 return "#Query\tRef\t"+ani+ 1259 "\tQSize\tRefSize\tQBases\tRBases"+ 1260 (printTaxID ? "\tQTaxID\tRTaxID" : "")+(printKID ? "\tKID" : "")+(printWKID ? "\tWKID" : "")+ 1261 (printSSU() ? "\tSSU" : "")+(printCommonAncestorLevel ? "\tCALevel" : ""); 1262 } 1263 if(format==FORMAT_CONSTELLATION){return "#Query\tRef\tKID\tWKID\t"+ani+"\tCmplt\tQSize\tRefSize\tQBases\tRefBases";} 1264 } 1265 return columnwiseHeader(); 1266 } 1267 columnwiseHeader()1268 String columnwiseHeader(){ 1269 final String ani=(aminoOrTranslate() ? "AAI" : "ANI"); 1270 1271 StringBuilder sb=new StringBuilder(); 1272 1273 //Numeric fields 1274 if(printKID){sb.append("WKID\t");} 1275 if(printWKID){sb.append("KID\t");} 1276 if(printAni){sb.append(ani+"\t");} 1277 if(printSSU()){sb.append("SSU\t");} 1278 if(printSSULen){sb.append("SSULen\t");} 1279 if(printCompleteness){sb.append("Complt\t");} 1280 if(printContam){sb.append("Contam\t");} 1281 if(printContam2){sb.append("Contam2\t");} 1282 if(printUContam){sb.append("uContam\t");} 1283 if(printScore){sb.append("Score\t");} 1284 if(printEValue){sb.append("E-Val\t");} 1285 1286 if(printDepth){sb.append("Depth\t");} 1287 if(printDepth2){sb.append("Depth2\t");} 1288 if(printVolume){sb.append("Volume\t");} 1289 if(printRefHits){sb.append("RefHits\t");} 1290 if(printMatches){sb.append("Matches\t");} 1291 if(printUnique){sb.append("Unique\t");} 1292 if(printUnique2){sb.append("Unique2\t");} 1293 if(printUnique3){sb.append("Unique3\t");} 1294 if(printNoHit){sb.append("noHit\t");} 1295 if(printLength){sb.append("Length\t");} 1296 if(printTaxID){sb.append("TaxID\t");} 1297 if(printImg){sb.append("ImgID \t");} 1298 if(printGBases){sb.append("gBases\t");} 1299 if(printGKmers){sb.append("gKmers\t");} 1300 if(printGSize){sb.append("gSize\t");} 1301 if(printGSeqs){sb.append("gSeqs\t");} 1302 if(printGC){sb.append("GC\t");} 1303 1304 1305 //Raw fields 1306 if(printRefDivisor){sb.append("rDiv\t");} 1307 if(printQueryDivisor){sb.append("qDiv\t");} 1308 if(printRefSize){sb.append("rSize\t");} 1309 if(printQuerySize){sb.append("qSize\t");} 1310 if(printContamHits){sb.append("cHits\t");} 1311 1312 //Text fields 1313 if(printCommonAncestor){sb.append("CA\t");} 1314 if(printCommonAncestorLevel){sb.append("CALevel\t");} 1315 if(printTaxName){sb.append("taxName\t");} 1316 if(printRefFileName){sb.append("file\t");} 1317 if(printOriginalName){sb.append("seqName\t");} 1318 if(printTax && SketchObject.taxtree!=null){sb.append("taxonomy\t");} 1319 1320 if(sb.length()>1){sb.setLength(sb.length()-1);}//trim trailing tab 1321 1322 return sb.toString(); 1323 } 1324 formatComparisonColumnwise(Comparison c, ByteBuilder bb, int prevTid)1325 void formatComparisonColumnwise(Comparison c, ByteBuilder bb, int prevTid){ 1326 final int tid=c.taxID; 1327 boolean reset=false; 1328 1329 if(printColors){ 1330 final int ctid=toColorTid(tid); 1331 final int prevCtid=toColorTid(prevTid); 1332 1333 final int cnum=ctid%Colors.colorArray.length; 1334 final int prevCnum=prevCtid%Colors.colorArray.length; 1335 1336 String color=toColor(tid); 1337 String underline=(printColors && cnum==prevCnum && ctid!=prevCtid && (ctid>1 && prevCtid>1) ? Colors.UNDERLINE : null); 1338 1339 if(color!=null){bb.append(color);} 1340 if(underline!=null){bb.append(underline);} 1341 reset=(color!=null || underline!=null); 1342 1343 // System.err.println((color==null ? "" : color)+(underline==null ? "" : underline)+ 1344 // tid+", "+prevTid+"; \t"+ctid+", "+prevCtid+"; \t"+cnum+", "+prevCnum+"; \t"+((underline!=null)+"")+Colors.RESET); 1345 // System.err.println(color==null ? "null" : color.substring(1)); 1346 } 1347 1348 // sb.append(String.format(Locale.ROOT, "%.2f%%\t%.2f%%", 100*c.idMinDivisor(), 100*c.idMaxDivisor())); 1349 if(printWKID){bb.append(100*c.wkid(), 2).append('%').tab();} 1350 if(printKID){bb.append(100*c.kid(), 2).append('%');} 1351 1352 // if(printAni){sb.append(String.format(Locale.ROOT, "\t%.2f%%", 100*c.ani()));} 1353 // if(printCompleteness){sb.append(String.format(Locale.ROOT, "\t%.2f%%", 100*c.completeness()));} 1354 // if(printContam){sb.append(String.format(Locale.ROOT, "\t%.2f%%", 100*c.contamFraction()));} 1355 // if(printContam2){sb.append(String.format(Locale.ROOT, "\t%.2f%%", 100*c.contam2Fraction()));} 1356 // if(printUContam){sb.append(String.format(Locale.ROOT, "\t%.2f%%", 100*c.uContamFraction()));} 1357 1358 if(printAni){bb.tab().append(100*c.ani(), 2).append('%');} 1359 if(printSSU()){ 1360 float id=100*c.ssuIdentity(); 1361 if(id>0){ 1362 bb.tab().append(id, 2).append(c.ssuType()==16 ? '%' : '*'); //This is where 16S and 18S are differentiated 1363 }else{ 1364 bb.tab().append('.'); 1365 } 1366 } 1367 if(printSSULen){ 1368 bb.tab().append(c.ssuLen()); 1369 } 1370 if(printCompleteness){bb.tab().append(100*c.completeness(), 2).append('%');} 1371 if(printContam){bb.tab().append(100*c.contamFraction(), 2).append('%');} 1372 if(printContam2){bb.tab().append(100*c.contam2Fraction(), 2).append('%');} 1373 if(printUContam){bb.tab().append(100*c.uContamFraction(), 2).append('%');} 1374 if(printScore){bb.tab().append(c.scoreS());} 1375 if(printEValue){bb.tab().append(String.format(Locale.ROOT, "%5.2e", c.eValue()));} 1376 1377 if(printDepth){bb.tab().append(c.depthS(printActualDepth));} 1378 if(printDepth2){bb.tab().append(c.depth2S(printActualDepth));} 1379 if(printVolume){bb.tab().append(c.volumeS());} 1380 if(printRefHits){bb.tab().append(c.avgRefHitsS());} 1381 1382 if(printMatches){bb.tab().append(c.hits());} 1383 if(printUnique){bb.tab().append(c.uHits());} 1384 if(printUnique2){bb.tab().append(c.unique2());} 1385 if(printUnique3){bb.tab().append(c.unique3());} 1386 if(printNoHit){bb.tab().append(c.noHits());} 1387 if(printLength){bb.tab().append( c.maxDivisor());} 1388 if(printTaxID){bb.tab().append(tid>=SketchObject.minFakeID ? -1 : tid);} 1389 if(printImg){bb.tab().append(c.imgID());} 1390 if(printGBases){appendKMG(c.genomeSizeBases(), bb);} 1391 if(printGKmers){appendKMG(c.genomeSizeKmers(), bb);} 1392 if(printGSize){appendKMG(c.genomeSizeEstimate(), bb);} 1393 if(printGSeqs){appendKMG(c.genomeSequences(), bb);} 1394 if(printGC){bb.tab().append(c.gc(),3);} 1395 1396 //Raw fields 1397 if(printRefDivisor){bb.tab().append(c.refDivisor());} 1398 if(printQueryDivisor){bb.tab().append(c.queryDivisor());} 1399 if(printRefSize){bb.tab().append(c.refSize());} 1400 if(printQuerySize){bb.tab().append(c.querySize());} 1401 if(printContamHits){bb.tab().append(c.contamHits());} 1402 1403 //Text fields 1404 if(printCommonAncestor){bb.tab().append(c.commonAncestor());} 1405 if(printCommonAncestorLevel){bb.tab().append(c.commonAncestorLevel());} 1406 if(printTaxName){bb.tab().append(c.taxName()==null ? "." : c.taxName());} 1407 if(printRefFileName){bb.tab().append(c.fname()==null ? "." : c.fname());} 1408 if(printOriginalName){bb.tab().append(c.name0()==null ? "." : c.name0());} 1409 if(printTax && SketchObject.taxtree!=null){ 1410 bb.tab(); 1411 TaxNode tn=null; 1412 if(tid>0 && tid<SketchObject.minFakeID){ 1413 tn=SketchObject.taxtree.getNode(tid); 1414 } 1415 1416 if(tn!=null){ 1417 bb.append(SketchObject.taxtree.toSemicolon(tn, SketchObject.skipNonCanonical, false)); 1418 }else{ 1419 bb.append('.'); 1420 } 1421 } 1422 if(printTaxName && !printOriginalName && !printRefFileName && c.taxName()==null && c.name0()!=null){bb.tab().append(c.name0());} //Extra column 1423 1424 if(reset){bb.append(Colors.RESET);} 1425 1426 bb.append('\n'); 1427 1428 if(printIntersection){ 1429 Sketch intersection=Sketch.intersection(c.a, c.b); 1430 bb.append(intersection.toString()); 1431 bb.append('\n'); 1432 } 1433 1434 } 1435 appendKMG(long value, ByteBuilder bb)1436 void appendKMG(long value, ByteBuilder bb){ 1437 if(gSizeKMG){ 1438 bb.tab().append(toKMG(value)); 1439 }else{ 1440 bb.tab().append(value); 1441 } 1442 } 1443 toKMG(long value)1444 String toKMG(long value){ 1445 if(value<10000000L){return Long.toString(value);} 1446 value+=5; 1447 if(value<1000000000L){return value/1000L+"K";} 1448 if(value<1000000000000L){return value/1000000L+"M";} 1449 if(value<1000000000000000L){return value/1000000000L+"G";} 1450 return value/1000000000000L+"T"; 1451 } 1452 formatComparison3Column(Comparison c, ByteBuilder sb, int prevTid)1453 void formatComparison3Column(Comparison c, ByteBuilder sb, int prevTid){ 1454 Sketch query=c.a; 1455 final long sea=Tools.max(1, c.a.genomeSizeEstimate()); 1456 final long seb=Tools.max(1, c.b.genomeSizeEstimate()); 1457 final long ba=Tools.max(1, c.a.genomeSizeBases); 1458 final long bb=Tools.max(1, c.b.genomeSizeBases); 1459 final String qName=format==FORMAT_CONSTELLATION ? (useFilePrefixName ? query.filePrefix() : ""+query.sketchID) : useTaxidName ? ""+query.taxID : 1460 useImgName ? ""+query.imgID : useTaxName ? query.taxName() : query.name(); 1461 final String rName=format==FORMAT_CONSTELLATION ? (useFilePrefixName ? c.b.filePrefix() : ""+c.b.sketchID) : useTaxidName ? ""+c.taxID() : 1462 useImgName ? ""+c.imgID() : useTaxName ? c.taxName() : c.name(); 1463 final int tid=c.taxID; 1464 boolean reset=false; 1465 1466 sb.append(qName).append('\t'); 1467 if(printColors){ 1468 final int ctid=toColorTid(tid); 1469 final int prevCtid=toColorTid(prevTid); 1470 1471 final int cnum=ctid%Colors.colorArray.length; 1472 final int prevCnum=prevCtid%Colors.colorArray.length; 1473 1474 String color=toColor(tid); 1475 String underline=(printColors && cnum==prevCnum && ctid!=prevCtid && (ctid>1 && prevCtid>1) ? Colors.UNDERLINE : null); 1476 1477 if(color!=null){sb.append(color);} 1478 if(underline!=null){sb.append(underline);} 1479 reset=(color!=null || underline!=null); 1480 1481 // System.err.println((color==null ? "" : color)+(underline==null ? "" : underline)+ 1482 // tid+", "+prevTid+"; \t"+ctid+", "+prevCtid+"; \t"+cnum+", "+prevCnum+"; \t"+((underline!=null)+"")+Colors.RESET); 1483 // System.err.println(color==null ? "null" : color.substring(1)); 1484 } 1485 1486 // sb.append(rName).append(String.format(Locale.ROOT, "\t%.2f\t%.3f", 100*c.ani(), sea/(float)seb)); 1487 // sb.append(rName).append(String.format(Locale.ROOT, "\t%.2f\t%d\t%d\t%d", 100*c.ani(), sea, seb, ba)); 1488 1489 //"#Query\tRef\tKID\tWKID\tANI\tCmplt\tQSize\tRefSize\tQBases\tRefBases"; 1490 1491 float kid=100*c.kid(); 1492 float wkid=100*c.wkid(); 1493 float ani=100*c.ani(); 1494 float complt=100*c.completeness(); 1495 float ssu=printSSU() ? 100*c.ssuIdentity() : 0; 1496 1497 sb.append(rName).append('\t'); 1498 if(reportAniOnly){ 1499 sb.append(ani, 3).append('\t'); 1500 }else if(format==FORMAT_CONSTELLATION){ 1501 sb.append(kid, 3).append('\t'); 1502 sb.append(wkid, 3).append('\t'); 1503 sb.append(ani, 3).append('\t'); 1504 sb.append(complt, 3).append('\t'); 1505 sb.append(sea).append('\t'); 1506 sb.append(seb).append('\t'); 1507 // sb.append(ba).append('\t'); 1508 // sb.append(bb).append('\t'); 1509 }else{ 1510 sb.append(ani, 3).append('\t'); 1511 sb.append(sea).append('\t'); 1512 sb.append(seb).append('\t'); 1513 sb.append(ba).append('\t'); 1514 sb.append(bb).append('\t'); 1515 if(printTaxID){sb.append(c.a.taxID).append('\t');} 1516 if(printTaxID){sb.append(c.b.taxID).append('\t');} 1517 if(printKID){sb.append(kid, 3).append('\t');} 1518 if(printWKID){sb.append(wkid, 3).append('\t');} 1519 if(printSSU()){ 1520 if(ssu>0){ 1521 sb.append(ssu, 3).append('\t'); 1522 }else{ 1523 sb.append('.').append('\t'); 1524 } 1525 } 1526 if(printCommonAncestorLevel){sb.append(c.commonAncestorLevel()).append('\t');} 1527 } 1528 sb.setLength(sb.length()-1); 1529 if(reset){sb.append(Colors.RESET);} 1530 1531 sb.append('\n'); 1532 1533 // System.err.println(sb); 1534 } 1535 formatComparison(Comparison c, ByteBuilder sb, int prevTaxID)1536 void formatComparison(Comparison c, ByteBuilder sb, int prevTaxID){ 1537 if(format==FORMAT_MULTICOLUMN){ 1538 formatComparisonColumnwise(c, sb, prevTaxID); 1539 return; 1540 }else if(format==FORMAT_QUERY_REF_ANI || format==FORMAT_CONSTELLATION){ 1541 formatComparison3Column(c, sb, prevTaxID); 1542 return; 1543 } 1544 String complt=(printCompleteness ? String.format(Locale.ROOT, "\tcomplt %.2f%%%%", 100*c.completeness()) : ""); 1545 String contam=(printContam ? String.format(Locale.ROOT, "\tcontam %.2f%%%%", 100*c.contamFraction()) : ""); 1546 // String score=(printScore ? String.format(Locale.ROOT, "\tscore %.2f", c.score2()) : ""); 1547 String score=(printScore ? "\tscore "+c.scoreS() : ""); 1548 String depth=(printDepth ? "\tdepth "+c.depthS(printActualDepth) : ""); 1549 String depth2=(printDepth2 ? "\tdepth2 "+c.depth2S(printActualDepth) : ""); 1550 String volume=(printVolume ? "\tvolume "+c.volumeS() : ""); 1551 String ccs=complt+contam+score; 1552 1553 if(format==FORMAT_OLD){ 1554 sb.append(String.format(Locale.ROOT, "WKID %.2f%%\tKID %.2f%%"+ccs+"\tmatches %d\tcompared %d", 1555 100*c.wkid(), 100*c.kid(), c.hits(), c.minDivisor())+"\ttaxID "+c.taxID()+ 1556 (printImg ? "\timgID "+c.imgID() : "")+"\tgKmers "+c.genomeSizeKmers()+"\t"+ 1557 (c.taxName()==null ? "." : c.taxName())+ 1558 ((printOriginalName || (c.taxName()==null && c.name0()!=null)) ? "\t"+(c.name0()==null ? "." : c.name0()) : "")+"\n"); 1559 if(printTax && SketchObject.taxtree!=null){ 1560 if(c.taxID()>=0 && c.taxID()<SketchObject.minFakeID){ 1561 TaxNode tn=SketchObject.taxtree.getNode(c.taxID()); 1562 if(tn!=null){ 1563 PrintTaxonomy.printTaxonomy(tn, sb, SketchObject.taxtree, TaxTree.DOMAIN, SketchObject.skipNonCanonical); 1564 } 1565 } 1566 sb.append('\n'); 1567 } 1568 }else{ 1569 ArrayList<TaxNode> tnl=new ArrayList<TaxNode>(); 1570 if(SketchObject.taxtree!=null && c.taxID()>=0 && c.taxID()<SketchObject.minFakeID){ 1571 TaxNode tn=SketchObject.taxtree.getNode(c.taxID()); 1572 while(tn!=null && tn.pid!=tn.id && tn.level<=TaxTree.DOMAIN){ 1573 tnl.add(tn); 1574 tn=SketchObject.taxtree.getNode(tn.pid); 1575 } 1576 } 1577 1578 sb.append(String.format(Locale.ROOT, "WKID %.2f%%\tKID %.2f%%"+ccs+"\tmatches %d\tcompared %d\t", 1579 100*c.wkid(), 100*c.kid(), c.hits(), c.minDivisor())); 1580 sb.append("\ttaxID ").append(c.taxID()).append('\t'); 1581 if(printImg){sb.append("\timgID ").append(c.imgID()).append('\t');} 1582 sb.append(c.taxName()).append('\t'); 1583 if(printRefFileName){sb.append(c.fname()).append('\t');} 1584 if(printOriginalName || (c.taxName()==null && c.name0()!=null && !printRefFileName)){sb.append(c.name0()).append('\t');} 1585 1586 if(printTax){ 1587 for(int i=tnl.size()-1; i>=0; i--){ 1588 TaxNode tn=tnl.get(i); 1589 sb.append(tn.name); 1590 if(i>0){sb.append(';');} 1591 } 1592 } 1593 sb.append('\n'); 1594 1595 tnl.clear(); 1596 } 1597 } 1598 1599 /*--------------------------------------------------------------*/ 1600 /*---------------- Filtering ----------------*/ 1601 /*--------------------------------------------------------------*/ 1602 passesFilter(Sketch sk)1603 public boolean passesFilter(Sketch sk){ 1604 assert(postParsed); 1605 if(noFilters){return true;} 1606 return passesSSUFilter(sk) && passesSizeFilter(sk) && passesTaxFilter(sk) && passesMetaFilter(sk); 1607 } 1608 passesTaxFilter(Sketch sk)1609 private boolean passesTaxFilter(Sketch sk){ 1610 if(taxFilterWhite==null && taxFilterBlack==null){return true;} 1611 int id=sk.taxID; 1612 if(id>0){ 1613 if(banUnclassified && SketchObject.taxtree.isUnclassified(id)){return false;} 1614 if(banVirus && SketchObject.taxtree.isVirus(id)){return false;} 1615 } 1616 String s=sk.name(); 1617 return passesTaxFilter(taxFilterWhite, id, s) && passesTaxFilter(taxFilterBlack, id, s); 1618 } 1619 passesTaxFilter(TaxFilter filter, int id, String s)1620 private boolean passesTaxFilter(TaxFilter filter, int id, String s){ 1621 if(filter==null){return true;} 1622 if(id>0 && !filter.passesFilter(id)){return false;} 1623 // if(id>0 && !filter.passesFilterFast(id)){return false;} 1624 if(s!=null && !filter.passesFilterByNameOnly(s)){return false;} 1625 return true; 1626 } 1627 passesMetaFilter(Sketch sk)1628 private boolean passesMetaFilter(Sketch sk){ 1629 if(requiredMeta==null && bannedMeta==null){return true;} 1630 return sk.passesMeta(requiredMeta, bannedMeta, requiredMetaAnd); 1631 } 1632 passesSSUFilter(Sketch sk)1633 private boolean passesSSUFilter(Sketch sk){ 1634 return !requireSSU || sk.hasSSU(); 1635 } 1636 passesSizeFilter(Sketch sk)1637 private boolean passesSizeFilter(Sketch sk){ 1638 if(minRefSizeEstimate>0 && sk.genomeSizeEstimate()<minRefSizeEstimate){return false;} 1639 return sk.genomeSizeBases>=minRefSizeBases; 1640 } 1641 1642 /*--------------------------------------------------------------*/ 1643 /*---------------- Fields ----------------*/ 1644 /*--------------------------------------------------------------*/ 1645 1646 //These are shared with SketchObject 1647 //They do not affect anything and are just for the server to validate remote settings. 1648 private int hashVersion=SketchObject.HASH_VERSION; 1649 private int k=SketchObject.k; 1650 private int k2=SketchObject.k2; 1651 boolean amino=SketchObject.amino; 1652 boolean translate=SketchObject.translate; 1653 boolean sixframes=SketchObject.sixframes; aminoOrTranslate()1654 private boolean aminoOrTranslate(){return amino | translate;} 1655 1656 boolean noFilters=false; 1657 boolean postParsed=false; 1658 amino()1659 boolean amino(){return amino;} 1660 1661 //These are unique 1662 public int maxRecords=default_maxRecords; 1663 public int recordsPerLevel=0; 1664 public float minANI=0; 1665 public int minBases=0; 1666 public float minSizeRatio=0; 1667 public float minWKID=default_minWKID; 1668 public int format=default_format; 1669 1670 /** For tracking unique SendSketch queries */ 1671 public int chunkNum=-1; 1672 public int minHits=default_minHits; 1673 public int taxLevel=default_taxLevel; 1674 public int mode=default_mode; 1675 public float samplerate=default_samplerate; 1676 public long maxReads=default_maxReads; 1677 public int minKeyOccuranceCount=default_minKeyOccuranceCount; 1678 public String inputVersion=null; 1679 1680 public String dbName=null; 1681 hasMetaFilters()1682 boolean hasMetaFilters(){return requiredMeta!=null || bannedMeta!=null/* || requiredTaxid!=null || bannedTaxid!=null*/;} hasTaxFilters()1683 boolean hasTaxFilters(){return taxFilterWhite!=null || taxFilterBlack!=null || banUnclassified || banVirus;} 1684 boolean requireSSU=false; 1685 long minRefSizeEstimate=-1; 1686 long minRefSizeBases=-1; 1687 1688 boolean requiredMetaAnd=true; 1689 ArrayList<String> requiredMeta=null; 1690 ArrayList<String> bannedMeta=null; 1691 1692 /*--------------------------------------------------------------*/ 1693 /*---------------- Print Columns ----------------*/ 1694 /*--------------------------------------------------------------*/ 1695 1696 public boolean printKID=true; 1697 public boolean printWKID=true; 1698 public boolean printSSU=true; 1699 public boolean printSSULen=false; printSSU()1700 public boolean printSSU(){return SketchObject.processSSU && printSSU;} 1701 public boolean printSSUSequence=default_printSSUSequence; 1702 1703 //For format 2 1704 public boolean printTax=default_printTax; 1705 public boolean printOriginalName=default_printOriginalName; 1706 public boolean printQueryFileName=default_printQueryFileName; 1707 public boolean printRefFileName=default_printRefFileName; 1708 public boolean printImg=default_printImg; 1709 public boolean printAni=default_printAni; 1710 public boolean printCompleteness=default_printCompleteness; 1711 public boolean printScore=default_printScore; 1712 public boolean printEValue=default_printEValue; 1713 1714 private boolean trackCounts=default_trackCounts; 1715 public boolean printDepth=default_printDepth; 1716 public boolean printDepth2=default_printDepth2; 1717 public boolean printActualDepth=default_printActualDepth; 1718 public boolean printVolume=default_printVolume; 1719 public boolean printRefHits=default_printRefHits; 1720 1721 public boolean printLength=default_printLength; 1722 public boolean printTaxID=default_printTaxID; 1723 public boolean printGSize=default_printGSize; 1724 public boolean printGC=default_printGC; 1725 public boolean gSizeKMG=default_gSizeKMG; 1726 public boolean printGKmers=default_printGKmers; 1727 public boolean printCommonAncestor=default_printCommonAncestor; 1728 public boolean printCommonAncestorLevel=default_printCommonAncestorLevel; 1729 public boolean printTaxName=default_printTaxName; 1730 public boolean printGSeqs=default_printGSeqs; 1731 public boolean printGBases=default_printGBases; 1732 1733 public boolean jsonArray=default_jsonArray; 1734 public boolean printD3=default_printD3; 1735 public boolean D3LevelNodes=false; 1736 public int D3sizeMode=D3_HIT_SIZE; 1737 public static final int D3_HIT_SIZE=0, D3_ANI_SIZE=1, D3_KID_SIZE=2, D3_WKID_SIZE=3, D3_DEPTH_SIZE=4; 1738 1739 public float minEntropy=default_minEntropy; 1740 1741 //For k=32: 1742 //0.000095f is >=Q6 (75%); 0.0008 is >=Q7 (80%); 0.0039 is >=Q8 (84%). 1743 //0.002f is >=Q7.53 (82.3%) 1744 //0.0017f is >=Q7.44 (82.0%) 1745 //0.6f works better for Illumina reads but this is more robust for PacBio. 1746 public float minProb=0.0008f; 1747 public byte minQual=0; 1748 1749 public boolean printUnique=default_printUnique; 1750 public boolean printUnique2=default_printUnique2; 1751 public boolean printUnique3=default_printUnique3; 1752 public boolean printUContam=default_printUContam; 1753 public boolean printNoHit=default_printNoHit; 1754 1755 public boolean printColors=default_printColors; 1756 public boolean setColors=false; 1757 public int colorLevel=default_colorLevel; 1758 1759 /** TODO: Note this is conflated between printing %contam and calculating things based on contam hits. */ 1760 public boolean printContam=default_printContam; 1761 public boolean printContam2=default_printContam2; 1762 private int contamLevel=default_contamLevel; 1763 1764 /** Raw fields */ 1765 public boolean printMatches=default_printMatches; 1766 1767 public boolean printRefDivisor=false; 1768 public boolean printQueryDivisor=false; 1769 public boolean printRefSize=false; 1770 public boolean printQuerySize=false; 1771 public boolean printContamHits=false; 1772 1773 public boolean mergePairs=false; 1774 public boolean printIntersection=false; 1775 1776 //For format 3 or 5 1777 public boolean useTaxidName=false; 1778 public boolean useImgName=false; 1779 public boolean useTaxName=false; 1780 public boolean useFilePrefixName=false; 1781 public boolean reportAniOnly=false; 1782 1783 public int taxLevelWhite=0; 1784 public int taxLevelBlack=0; 1785 1786 public String taxFilterWhiteList=null; 1787 public String taxFilterBlackList=null; 1788 1789 public String taxFilterWhiteString=null; 1790 public String taxFilterBlackString=null; 1791 1792 public TaxFilter taxFilterWhite=null; 1793 public TaxFilter taxFilterBlack=null; 1794 1795 public boolean banUnclassified=false; 1796 public boolean banVirus=false; 1797 1798 /** Make sure the settings are consistent, for CompareSketch. 1799 * This is not yet complete. */ checkValid()1800 public boolean checkValid(){ 1801 if(printUnique2 || printUnique3){ 1802 assert(contamLevel()>=TaxTree.SUBSPECIES_E); 1803 assert(needContamCounts()); 1804 assert(SketchObject.makeIndex); 1805 assert(SketchObject.taxtree!=null); 1806 } 1807 if(printContam2){ 1808 assert(contamLevel()>=TaxTree.SUBSPECIES_E); 1809 assert(needContamCounts()); 1810 assert(SketchObject.makeIndex); 1811 assert(SketchObject.taxtree!=null); 1812 } 1813 return true; 1814 } 1815 trackCounts()1816 public boolean trackCounts() { 1817 return trackCounts || printDepth || printDepth2 || printVolume 1818 || comparator!=Comparison.scoreComparator || printD3; //|| minKeyOccuranceCount>1; 1819 } 1820 needContamCounts()1821 public boolean needContamCounts() { 1822 return printContam || printContam2 || printContamHits || printUnique || printUnique2 || printUnique3 || printUContam || printNoHit; // || true 1823 } 1824 needIndex()1825 public boolean needIndex(){ 1826 return printContam2 || printUnique2 || printUnique3; 1827 } 1828 contamLevel()1829 public int contamLevel() { 1830 return needIndex() ? contamLevel : -1; 1831 } 1832 compare(Comparison a, Comparison b)1833 public int compare(Comparison a, Comparison b){ 1834 return comparator.compare(a, b); 1835 } 1836 1837 public Comparator<Comparison> comparator=Comparison.scoreComparator; 1838 1839 /*--------------------------------------------------------------*/ 1840 /*---------------- Constants ----------------*/ 1841 /*--------------------------------------------------------------*/ 1842 1843 public static final int FORMAT_OLD=0, FORMAT_MULTICOLUMN=2, FORMAT_QUERY_REF_ANI=3, FORMAT_JSON=4, FORMAT_CONSTELLATION=5; 1844 public static final boolean default_printD3=false; 1845 public static final boolean default_jsonArray=false; 1846 1847 public static final int default_maxRecords=20; 1848 public static final float default_minWKID=0.0001f; 1849 public static final int default_format=FORMAT_MULTICOLUMN; 1850 public static final boolean default_printSSUSequence=false; 1851 public static final boolean default_printTax=false; 1852 public static final boolean default_printOriginalName=false; 1853 public static final boolean default_printQueryFileName=true; 1854 public static final boolean default_printRefFileName=false; 1855 public static final boolean default_printImg=false; 1856 public static final boolean default_printAni=true; 1857 public static final boolean default_printCompleteness=true; 1858 public static final boolean default_printScore=false; 1859 public static final boolean default_printEValue=false; 1860 1861 public static final boolean default_trackCounts=false; 1862 public static final boolean default_printDepth=false; 1863 public static final boolean default_printDepth2=false; 1864 public static final boolean default_printActualDepth=true; 1865 public static final boolean default_printVolume=false; 1866 public static final boolean default_printRefHits=false; 1867 1868 public static final boolean default_printContam=true; 1869 public static final boolean default_printContam2=false; 1870 1871 public static final boolean default_printMatches=true; 1872 public static final boolean default_printLength=false; 1873 public static final boolean default_printTaxID=true; 1874 public static final boolean default_printGSize=true; 1875 public static final boolean default_printGC=false; 1876 public static final boolean default_gSizeKMG=true; 1877 public static final boolean default_printGKmers=false; 1878 public static final boolean default_printCommonAncestor=false; 1879 public static final boolean default_printCommonAncestorLevel=false; 1880 public static final boolean default_printTaxName=true; 1881 public static final boolean default_printGSeqs=true; 1882 public static final boolean default_printGBases=false; 1883 1884 public static final float default_minEntropy=0.66f; 1885 public static final float default_minEntropy_amino=0.70f; 1886 public static final float default_minProb=0.0008f; 1887 public static final byte default_minQual=0; 1888 1889 public static final boolean default_printUnique=true; 1890 public static final boolean default_printUnique2=false; 1891 public static final boolean default_printUnique3=false; 1892 public static final boolean default_printUContam=false; 1893 public static final boolean default_printNoHit=false; 1894 1895 public static final boolean default_printColors=true; 1896 public static final int default_colorLevel=TaxTree.FAMILY_E; 1897 1898 public static final int default_taxLevel=TaxTree.SPECIES; 1899 public static final int default_contamLevel=TaxTree.GENUS_E; 1900 1901 public static final int default_mode=SketchObject.ONE_SKETCH; 1902 1903 public static final int default_minHits=3; 1904 public static final float default_samplerate=1; 1905 public static final long default_maxReads=-1; 1906 public static final int default_minKeyOccuranceCount=1; 1907 1908 } 1909