1 /* IOUtils 2 * 3 * created: 2010 4 * 5 * This file is part of Artemis 6 * 7 * Copyright(C) 2010 Genome Research Limited 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License 11 * as published by the Free Software Foundation; either version 2 12 * of the License, or(at your option) any later version. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, write to the Free Software 21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 22 * 23 */ 24 package uk.ac.sanger.artemis.components.variant; 25 26 import java.io.BufferedReader; 27 import java.io.File; 28 import java.io.FileInputStream; 29 import java.io.FileOutputStream; 30 import java.io.FileReader; 31 import java.io.FileWriter; 32 import java.io.IOException; 33 import java.io.InputStream; 34 import java.io.PrintWriter; 35 import java.io.Writer; 36 import java.net.URL; 37 import java.util.HashMap; 38 import java.util.List; 39 import java.util.Map; 40 import java.util.Vector; 41 42 import javax.swing.Box; 43 import javax.swing.JCheckBox; 44 import javax.swing.JComponent; 45 import javax.swing.JFileChooser; 46 import javax.swing.JOptionPane; 47 48 import uk.ac.sanger.artemis.Entry; 49 import uk.ac.sanger.artemis.EntryGroup; 50 import uk.ac.sanger.artemis.Feature; 51 import uk.ac.sanger.artemis.FeatureEnumeration; 52 import uk.ac.sanger.artemis.FeatureKeyQualifierPredicate; 53 import uk.ac.sanger.artemis.FeaturePredicate; 54 import uk.ac.sanger.artemis.FeaturePredicateConjunction; 55 import uk.ac.sanger.artemis.FeatureSegment; 56 import uk.ac.sanger.artemis.FeatureSegmentVector; 57 import uk.ac.sanger.artemis.FeatureVector; 58 import uk.ac.sanger.artemis.Selection; 59 import uk.ac.sanger.artemis.components.FileViewer; 60 import uk.ac.sanger.artemis.components.MessageDialog; 61 import uk.ac.sanger.artemis.components.SequenceViewer; 62 import uk.ac.sanger.artemis.components.StickyFileChooser; 63 import uk.ac.sanger.artemis.io.DocumentEntry; 64 import uk.ac.sanger.artemis.io.EntryInformationException; 65 import uk.ac.sanger.artemis.io.Key; 66 import uk.ac.sanger.artemis.io.Location; 67 import uk.ac.sanger.artemis.io.Qualifier; 68 import uk.ac.sanger.artemis.io.QualifierVector; 69 import uk.ac.sanger.artemis.io.Range; 70 import uk.ac.sanger.artemis.sequence.Bases; 71 import uk.ac.sanger.artemis.sequence.MarkerRange; 72 import uk.ac.sanger.artemis.util.DatabaseDocument; 73 import uk.ac.sanger.artemis.util.FileDocument; 74 import uk.ac.sanger.artemis.util.OutOfRangeException; 75 import uk.ac.sanger.artemis.util.ReadOnlyException; 76 import uk.ac.sanger.artemis.util.RemoteFileDocument; 77 78 import htsjdk.samtools.util.BlockCompressedInputStream; 79 80 class IOUtils 81 { 82 83 private static int MAXIMUM_SELECTED_FEATURES = 25; 84 private static int SEQUENCE_LINE_BASE_COUNT = 60; 85 86 /** 87 * Write filtered uncompressed VCF. Uses the filter in VCFview to 88 * determine if variants are written. 89 * @param manualHash 90 * @param vcfFileName 91 * @param vcfView 92 * @param features 93 * @param nfiles 94 * @return 95 */ writeVCF(final Map<String, Boolean> manualHash, final String vcfFileName, final int vcfIndex, final VCFview vcfView, final FeatureVector features, final int nfiles)96 protected static File writeVCF(final Map<String, Boolean> manualHash, 97 final String vcfFileName, 98 final int vcfIndex, 99 final VCFview vcfView, 100 final FeatureVector features, 101 final int nfiles) 102 { 103 try 104 { 105 File filterFile = getFile(vcfFileName, nfiles, ".filter", null); 106 if(filterFile == null) 107 return null; 108 FileWriter writer = new FileWriter(filterFile); 109 AbstractVCFReader.write(manualHash, vcfFileName, vcfIndex, writer, vcfView, features); 110 111 return filterFile; 112 } 113 catch (IOException e) 114 { 115 e.printStackTrace(); 116 return null; 117 } 118 } 119 getFile(final String vcfFileName, final int nfiles, final String suffix, final JComponent comp)120 private static File getFile(final String vcfFileName, final int nfiles, 121 final String suffix, final JComponent comp) throws IOException 122 { 123 if(nfiles > 1) 124 { 125 if(vcfFileName.startsWith("http")) 126 { 127 int ind = vcfFileName.lastIndexOf('/')+1; 128 return new File(vcfFileName.substring(ind)+suffix); 129 } 130 else 131 return new File(vcfFileName+suffix); 132 } 133 134 final StickyFileChooser file_dialog = new StickyFileChooser(); 135 file_dialog.setSelectedFile(new File(vcfFileName+suffix)); 136 file_dialog.setDialogTitle("Choose save file ..."); 137 file_dialog.setDialogType(JFileChooser.SAVE_DIALOG); 138 if(comp != null) 139 file_dialog.setAccessory(comp); 140 141 final int status = file_dialog.showSaveDialog(null); 142 143 if(status != JFileChooser.APPROVE_OPTION || 144 file_dialog.getSelectedFile() == null) 145 return null; 146 147 return file_dialog.getSelectedFile(); 148 } 149 150 /** 151 * Export as a VCF based on the filtering applied in the VCFview. 152 * @param manualHash 153 * @param vcfFiles 154 * @param vcfView 155 */ export(final Map<String, Boolean> manualHash, final List<String> vcfFiles, final VCFview vcfView)156 protected static void export(final Map<String, Boolean> manualHash, 157 final List<String> vcfFiles, 158 final VCFview vcfView) 159 { 160 // get all CDS features that do not have the /pseudo or /pseudogene qualifier 161 final FeatureVector features = getFeatures( 162 new FeaturePredicateConjunction( 163 new FeatureKeyQualifierPredicate(Key.CDS, "pseudo", false), 164 new FeatureKeyQualifierPredicate(Key.CDS, "pseudogene", false), 165 FeaturePredicateConjunction.AND), vcfView.getEntryGroup()); 166 167 String filterFiles = ""; 168 for(int i=0; i<vcfFiles.size(); i++) 169 { 170 File filterFile = IOUtils.writeVCF(manualHash, vcfFiles.get(i), i, vcfView, features, vcfFiles.size()); 171 if(filterFile == null) 172 return; 173 filterFiles += filterFile.getAbsolutePath()+"\n"; 174 } 175 176 new MessageDialog (null, "Saved Files", filterFiles, false); 177 } 178 179 /** 180 * Export all variant sites to a multiple fasta file. 181 * @param vcfView 182 */ exportVariantFasta(final VCFview vcfView)183 protected static void exportVariantFasta(final VCFview vcfView) 184 { 185 final EntryGroup entryGroup = vcfView.getEntryGroup(); 186 final String name = entryGroup.getActiveEntries().elementAt(0).getName(); 187 final File newfile = new File( 188 getBaseDirectoryFromEntry(entryGroup.getActiveEntries().elementAt(0)), 189 name); 190 191 try 192 { 193 final File f = getFile(newfile.getAbsolutePath(), 1, ".fasta", null); 194 if(f == null) 195 return; 196 197 final FileOutputStream fos = new FileOutputStream(f); 198 exportVariantFasta(vcfView, 199 new PrintWriter(fos), 200 entryGroup.getSequenceLength(), 201 entryGroup.getAllFeatures(), 202 entryGroup.getBases()); 203 fos.close(); 204 } 205 catch(IOException ioe) 206 { 207 ioe.printStackTrace(); 208 } 209 } 210 211 /** 212 * Export all variant sites to a multiple fasta file. 213 * @param vcfView 214 * @param pw 215 * @param length 216 * @param features 217 * @param bases 218 */ exportVariantFasta(final VCFview vcfView, final PrintWriter pw, final int length, final FeatureVector features, final Bases bases)219 protected static void exportVariantFasta(final VCFview vcfView, 220 final PrintWriter pw, 221 final int length, 222 final FeatureVector features, 223 final Bases bases) 224 { 225 try 226 { 227 int ntotalSamples = 0; 228 for (int i = 0; i < vcfView.getVcfReaders().length; i++) 229 ntotalSamples += vcfView.getVcfReaders()[i].getNumberOfSamples(); 230 231 final Writer[] writer = new Writer[ntotalSamples+1]; 232 final File[] tmpFiles = new File[ntotalSamples+1]; 233 234 for (int i = 0; i < vcfView.getVcfReaders().length; i++) 235 { 236 final String names[] = vcfView.getVcfReaders()[i].sampleNames; 237 for(int j=0; j<names.length; j++) 238 { 239 final String fn = (names[j].equals("") ? (j+1)+"_sample" : names[j].replaceAll("[/\\:]", "_")); 240 tmpFiles[i+j] = File.createTempFile(fn, "art"); 241 writer[i+j] = new FileWriter( tmpFiles[i+j] ); 242 writer[i+j].write(">"+fn); 243 } 244 } 245 246 // include reference bases 247 final String refName = vcfView.getEntryGroup().getActiveEntries().elementAt(0).getName(); 248 tmpFiles[ntotalSamples] = File.createTempFile("ref", "art"); 249 writer[ntotalSamples] = new FileWriter( tmpFiles[ntotalSamples] ); 250 writer[ntotalSamples].write(">"+refName); 251 252 253 final int MAX_BASE_CHUNK = (10000/ntotalSamples)*SEQUENCE_LINE_BASE_COUNT; 254 final HashMap<Integer, VCFRecord> records[] = new HashMap[MAX_BASE_CHUNK]; 255 int baseCount = 0; 256 257 // write variant sites to tmp files 258 for(int i=0; i<length; i+=MAX_BASE_CHUNK) 259 { 260 int start = i+1; 261 int end = i+MAX_BASE_CHUNK; 262 if(end > length) 263 end = length; 264 265 storeVCFRecords(vcfView, records, start, end); 266 baseCount = writeVariants(vcfView, records, writer, features, 267 ntotalSamples, start, end, bases, baseCount); 268 } 269 270 for(int i=0; i<writer.length; i++) 271 writer[i].close(); 272 273 // concatenate the single fasta files into a multiple fasta file 274 for (int i = 0; i < tmpFiles.length; i++) 275 { 276 final BufferedReader br = new BufferedReader(new FileReader(tmpFiles[i].getPath())); 277 String line; 278 while ( (line = br.readLine()) != null) 279 pw.println(line); 280 br.close(); 281 tmpFiles[i].delete(); 282 } 283 pw.close(); 284 } 285 catch(IOException e) 286 { 287 e.printStackTrace(); 288 } 289 catch (OutOfRangeException e) 290 { 291 e.printStackTrace(); 292 } 293 } 294 295 /** 296 * For a given range store the VFFRecord in a Hashtable. 297 * @param vcfView 298 * @param records 299 * @param start 300 * @param end 301 * @throws IOException 302 */ storeVCFRecords(final VCFview vcfView, final HashMap<Integer, VCFRecord> records[], final int start, final int end)303 private static void storeVCFRecords(final VCFview vcfView, 304 final HashMap<Integer, VCFRecord> records[], 305 final int start, 306 final int end) throws IOException 307 { 308 309 for (int i = 0; i < vcfView.getVcfReaders().length; i++) 310 records[i] = new HashMap<Integer, VCFRecord> (); 311 312 for (int i = 0; i < vcfView.getVcfReaders().length; i++) 313 { 314 AbstractVCFReader reader = vcfView.getVcfReaders()[i]; 315 if(vcfView.isConcatenate()) 316 { 317 String[] contigs = reader.getSeqNames(); 318 for(int j=0; j<contigs.length; j++) 319 { 320 int offset = vcfView.getSequenceOffset(contigs[j]); 321 int nextOffset; 322 if(j<contigs.length-1) 323 nextOffset = vcfView.getSequenceOffset(contigs[j+1]); 324 else 325 nextOffset = vcfView.seqLength; 326 327 if( (start >= offset && start <= nextOffset) || 328 (end >= offset && end <= nextOffset) ) 329 { 330 int thisStart = start - offset; 331 if(thisStart < 1) 332 thisStart = 1; 333 loadRecords(records[i], reader, contigs[j], thisStart, end - offset, offset); 334 } 335 } 336 } 337 else 338 loadRecords(records[i], reader, vcfView.getChr(), start, end, 0); 339 } 340 } 341 loadRecords(HashMap<Integer, VCFRecord> records, final AbstractVCFReader reader, final String contig, final int start, final int end, final int offset)342 private static void loadRecords(HashMap<Integer, VCFRecord> records, 343 final AbstractVCFReader reader, 344 final String contig, 345 final int start, 346 final int end, 347 final int offset) throws IOException 348 { 349 VCFRecord record; 350 while((record = reader.getNextRecord(contig, start, end)) != null) 351 { 352 if(records == null) 353 records = new HashMap<Integer, VCFRecord> (); 354 records.put(record.getPos()+offset, record); 355 } 356 } 357 358 /** 359 * Write variant positions. 360 * @param vcfView 361 * @param records 362 * @param writer 363 * @param features 364 * @param ntotalSamples 365 * @param start 366 * @param end 367 * @param bases 368 * @param bc 369 * @return 370 * @throws IOException 371 * @throws OutOfRangeException 372 */ writeVariants(final VCFview vcfView, final HashMap<Integer, VCFRecord> records[], final Writer writer[], final FeatureVector features, final int ntotalSamples, final int start, final int end, final Bases bases, int bc)373 private static int writeVariants(final VCFview vcfView, 374 final HashMap<Integer, VCFRecord> records[], 375 final Writer writer[], 376 final FeatureVector features, 377 final int ntotalSamples, 378 final int start, 379 final int end, 380 final Bases bases, 381 int bc) throws IOException, OutOfRangeException 382 { 383 final char basesC[] = bases.getSubSequenceC(new Range(start, end), Bases.FORWARD); 384 for (int i = start; i < end; i++) 385 { 386 int thisSample = 0; 387 final String[] thisBase = new String[ntotalSamples+1]; 388 389 boolean seenSNP = false; 390 int insertionLength = 0; 391 392 // loop over each VCF file 393 for (int j = 0; j < vcfView.getVcfReaders().length; j++) 394 { 395 AbstractVCFReader reader = vcfView.getVcfReaders()[j]; 396 VCFRecord record = records[j].get(i); 397 398 if(record == null) 399 continue; 400 401 boolean vcf_v4 = reader.isVcf_v4(); 402 int nsamples = reader.getNumberOfSamples(); 403 // loop over each sample 404 for(int k=0; k<nsamples; k++) 405 { 406 // look at each type of variant 407 if(vcfView.showVariant(record, features, i, reader, k, j) ) 408 { 409 if(record.getAlt().isDeletion(vcf_v4)) 410 { 411 // note: do not write out if just deletion 412 thisBase[thisSample] = "-"; 413 414 /*if( thisBase[ntotalSamples] == null || 415 thisBase[ntotalSamples].length() < record.getRef().length() ) 416 { 417 thisBase[ntotalSamples] = record.getRef(); 418 if(!record.getAlt().toString().equals(".")) 419 thisBase[thisSample] = record.getAlt().toString(); 420 else 421 thisBase[thisSample] = ""; 422 423 int padLength = thisBase[ntotalSamples].length() - thisBase[thisSample].length(); 424 for(int ipad=0; ipad<padLength; ipad++) 425 thisBase[thisSample] += "-"; 426 }*/ 427 } 428 else if(record.getAlt().isInsertion(vcf_v4)) 429 { 430 String in = record.getAlt().toString(); 431 if(in.startsWith("I")) 432 in = in.substring(1); 433 thisBase[thisSample] = in; 434 if(in.length() > insertionLength) 435 insertionLength = in.length(); 436 seenSNP = true; 437 438 if( (thisBase[ntotalSamples] == null || 439 thisBase[ntotalSamples].length() < record.getRef().length()) && 440 in.toLowerCase().startsWith(record.getRef().toLowerCase())) 441 thisBase[ntotalSamples] = record.getRef(); 442 } 443 else if(record.getAlt().isMultiAllele(k)) 444 { 445 String base = MultipleAlleleVariant.getIUBCode(record); 446 if(base != null) 447 { 448 thisBase[thisSample] = base; 449 seenSNP = true; 450 } 451 } 452 else if(record.getAlt().isNonVariant()) 453 { 454 thisBase[thisSample] = "."; 455 } 456 else 457 { 458 if(record.getAlt().toString().length() == record.getRef().length() ) 459 { 460 thisBase[thisSample] = record.getAlt().toString(); 461 seenSNP = true; 462 463 if(thisBase[ntotalSamples] == null || 464 thisBase[ntotalSamples].length() < record.getRef().length()) 465 thisBase[ntotalSamples] = record.getRef(); 466 } 467 } 468 } 469 else 470 thisBase[thisSample] = "N"; // filtered out 471 472 thisSample++; 473 } 474 } 475 476 if(seenSNP) 477 { 478 // look-up reference base 479 if(thisBase[ntotalSamples] == null) 480 thisBase[ntotalSamples] = String.valueOf( basesC[i-start] ); 481 482 int remainder = 0; 483 for(int j=0; j<thisBase.length; j++) 484 { 485 if(thisBase[j] != null) 486 { 487 for(int k=0; k<thisBase[j].length(); k++) 488 { 489 remainder = (bc+k)%SEQUENCE_LINE_BASE_COUNT; 490 if(remainder == 0) 491 writer[j].write(System.getProperty("line.separator")); 492 writer[j].write(thisBase[j].charAt(k)); 493 } 494 } 495 else 496 { 497 remainder = bc%SEQUENCE_LINE_BASE_COUNT; 498 if(remainder == 0) 499 writer[j].write(System.getProperty("line.separator")); 500 writer[j].write("N"); 501 } 502 503 if(insertionLength > 0) 504 { 505 int ins; 506 if(thisBase[j] != null) 507 ins = insertionLength-thisBase[j].length(); 508 else 509 ins = insertionLength-1; 510 511 int rem = remainder+1; 512 for(int k=0; k<ins; k++) 513 { 514 remainder = (rem+k)%SEQUENCE_LINE_BASE_COUNT; 515 if(remainder == 0) 516 writer[j].write(System.getProperty("line.separator")); 517 writer[j].write("-"); 518 } 519 } 520 } 521 522 if(insertionLength > 0) 523 bc+=insertionLength; 524 else 525 bc++; 526 } 527 } 528 529 return bc; 530 } 531 532 533 /** 534 * Write out FASTA for a selected base range 535 * @param vcfView 536 * @param selection 537 * @param view 538 */ exportFastaByRange( final VCFview vcfView, final Selection selection, final boolean view, Writer writer)539 protected static void exportFastaByRange( 540 final VCFview vcfView, 541 final Selection selection, 542 final boolean view, 543 Writer writer) 544 { 545 if(selection.getMarkerRange() == null) 546 { 547 JOptionPane.showMessageDialog(null, 548 "No base range selected.", 549 "Warning", JOptionPane.WARNING_MESSAGE); 550 return; 551 } 552 553 AbstractVCFReader vcfReaders[] = vcfView.getVcfReaders(); 554 MarkerRange marker = selection.getMarkerRange(); 555 Range range = marker.getRawRange(); 556 String fastaFiles = ""; 557 558 EntryGroup entryGroup = vcfView.getEntryGroup(); 559 String name = entryGroup.getActiveEntries().elementAt(0).getName(); 560 int sbeg = range.getStart(); 561 int send = range.getEnd(); 562 563 StringBuffer buffSeq = null; 564 try 565 { 566 final JCheckBox useNs = new JCheckBox("Use N for filtered out sites", true); 567 useNs.setToolTipText("Mask filtered sites."); 568 final JCheckBox useMask = new JCheckBox("Use N for sites without non-variant", true); 569 useMask.setToolTipText("Mask sites that are not confirmed by a non-variant record."); 570 Box yBox = Box.createVerticalBox(); 571 yBox.add(useNs); 572 yBox.add(useMask); 573 if(writer == null) 574 JOptionPane.showMessageDialog(null, yBox, "Options", JOptionPane.INFORMATION_MESSAGE); 575 else 576 useMask.setSelected(false); 577 578 if(!view && writer == null) 579 { 580 File newfile = new File( 581 getBaseDirectoryFromEntry(entryGroup.getActiveEntries().elementAt(0)), 582 name); 583 584 File f = getFile(newfile.getAbsolutePath(), 1, ".fasta", null); 585 if(f == null) 586 return; 587 writer = new FileWriter(f); 588 fastaFiles += f.getAbsolutePath()+"\n"; 589 } 590 else 591 buffSeq = new StringBuffer(); 592 593 Bases bases = entryGroup.getSequenceEntry().getBases(); 594 // reference 595 writeOrViewRange(null, -1, sbeg, send, writer, buffSeq, 596 marker, bases, name, vcfView, entryGroup, useNs.isSelected(), useMask.isSelected()); 597 598 // vcf sequences 599 for (int i = 0; i < vcfReaders.length; i++) 600 writeOrViewRange(vcfReaders[i], i, sbeg, send, writer, buffSeq, 601 marker, bases, name, vcfView, entryGroup, useNs.isSelected(), useMask.isSelected()); 602 603 if(writer != null) 604 writer.close(); 605 } 606 catch(IOException e) 607 { 608 e.printStackTrace(); 609 } 610 catch (OutOfRangeException e) 611 { 612 e.printStackTrace(); 613 } 614 615 if(!view) 616 { 617 if(writer instanceof FileWriter) 618 new MessageDialog (null, "Saved Files", fastaFiles, false); 619 } 620 else 621 { 622 FileViewer viewer = new FileViewer ("Feature base viewer for selected range: " + 623 sbeg+":"+send+(marker.isForwardMarker() ? "" : " reverse"), true, false, true); 624 viewer.getTextPane().setText(buffSeq.toString()); 625 } 626 } 627 628 /** 629 * Write the FASTA sequence out for the given features for each of the 630 * VCF/BCF files. 631 * @param vcfView 632 * @param features 633 * @param view 634 */ exportFasta(final VCFview vcfView, final FeatureVector features, final boolean view, Writer writer)635 protected static void exportFasta(final VCFview vcfView, 636 final FeatureVector features, 637 final boolean view, 638 Writer writer) 639 { 640 if(features.size () < 1) 641 { 642 JOptionPane.showMessageDialog(null, 643 "No features selected.", 644 "Warning", JOptionPane.WARNING_MESSAGE); 645 return; 646 } 647 648 if(view && features.size () > MAXIMUM_SELECTED_FEATURES) 649 new MessageDialog (null, 650 "warning: only viewing the sequences for " + 651 "the first " + MAXIMUM_SELECTED_FEATURES + 652 " selected features"); 653 654 String suffix = ".fasta"; 655 if(features.size() == 1) 656 suffix = "."+features.elementAt(0).getIDString()+suffix; 657 658 String fastaFiles = ""; 659 final AbstractVCFReader vcfReaders[] = vcfView.getVcfReaders(); 660 661 final JCheckBox single = new JCheckBox("Single FASTA", true); 662 final JCheckBox combineFeats = new JCheckBox("Combine feature sequences", true); 663 final JCheckBox useNs = new JCheckBox("Use N for filtered out sites", true); 664 useNs.setToolTipText("Mask filtered sites."); 665 final JCheckBox useMask = new JCheckBox("Use N for sites without non-variant", true); 666 useMask.setToolTipText("Mask sites that are not confirmed by a non-variant record."); 667 668 if(writer != null) 669 useMask.setSelected(false); 670 671 Box yBox = Box.createVerticalBox(); 672 if(!view && vcfReaders.length > 1) 673 yBox.add(single); 674 yBox.add(combineFeats); 675 yBox.add(useNs); 676 yBox.add(useMask); 677 678 final String name = vcfView.getEntryGroup().getActiveEntries().elementAt(0).getName(); 679 try 680 { 681 if(!view && writer == null) 682 { 683 File newfile = new File( 684 getBaseDirectoryFromEntry(vcfView.getEntryGroup().getActiveEntries().elementAt(0)), 685 name); 686 File f = getFile(newfile.getAbsolutePath(), 1, suffix, yBox); 687 if(f == null) 688 return; 689 writer = new FileWriter(f); 690 fastaFiles += f.getAbsolutePath()+"\n"; 691 } 692 else if(writer == null) 693 JOptionPane.showMessageDialog(null, yBox, "View Option(s)", JOptionPane.INFORMATION_MESSAGE); 694 695 // reference sequence 696 StringBuffer buff = new StringBuffer(); 697 for (int j = 0; j < features.size() && (!view || j < MAXIMUM_SELECTED_FEATURES); j++) 698 { 699 Feature f = features.elementAt(j); 700 buff.append( f.getBases() ); 701 if(!combineFeats.isSelected()) 702 { 703 writeOrView(null, f, writer, buff, ""); 704 buff = new StringBuffer(); 705 } 706 } 707 if(combineFeats.isSelected()) 708 writeOrView(null, null, writer, buff, name); 709 if(writer != null && !single.isSelected()) 710 writer.close(); 711 712 // 713 for (int i = 0; i < vcfReaders.length; i++) 714 { 715 if(!view && !single.isSelected()) 716 { 717 File f = getFile(vcfReaders[i].getFileName(), vcfReaders.length, suffix, null); 718 writer = new FileWriter(f); 719 fastaFiles += f.getAbsolutePath()+"\n"; 720 } 721 buff = new StringBuffer(); 722 723 for (int j = 0; j < features.size() && (!view || j < MAXIMUM_SELECTED_FEATURES); j++) 724 { 725 Feature f = features.elementAt(j); 726 FeatureSegmentVector segs = f.getSegments(); 727 728 for(int k=0; k<segs.size(); k++) 729 { 730 FeatureSegment seg = segs.elementAt(k); 731 int sbeg = seg.getRawRange().getStart(); 732 int send = seg.getRawRange().getEnd(); 733 buff.append( getAllBasesInRegion(vcfReaders[i], i, sbeg, send, seg.getBases(), 734 features, vcfView, f.isForwardFeature(), useNs.isSelected(), useMask.isSelected()) ); 735 } 736 737 if(!combineFeats.isSelected()) 738 { 739 writeOrView(vcfReaders[i], f, writer, buff, ""); 740 buff = new StringBuffer(); 741 } 742 } 743 744 if(combineFeats.isSelected()) 745 writeOrView(vcfReaders[i], null, writer, buff, ""); 746 747 if(writer != null && !single.isSelected()) 748 writer.close(); 749 } 750 751 if(writer != null && single.isSelected()) 752 writer.close(); 753 } 754 catch(IOException e) 755 { 756 e.printStackTrace(); 757 } 758 759 if(!view && writer instanceof FileWriter) 760 new MessageDialog (null, "Saved Files", fastaFiles, false); 761 } 762 getHeader(AbstractVCFReader reader, MarkerRange marker, String seqName, int sbeg, int send)763 private static StringBuffer getHeader(AbstractVCFReader reader, 764 MarkerRange marker, String seqName, 765 int sbeg, int send) 766 { 767 StringBuffer header = new StringBuffer(); 768 if(reader != null) 769 header.append(reader.getName()).append(" "); 770 header.append(seqName).append(" "); 771 header.append(sbeg).append(":").append(send); 772 if(marker != null) 773 header.append((marker.isForwardMarker() ? "" : " reverse")); 774 return header; 775 } 776 writeOrViewRange(AbstractVCFReader reader, final int vcfIndex, int sbeg, int send, Writer writer, StringBuffer buffSeq, MarkerRange marker, Bases bases, String name, VCFview vcfView, final EntryGroup entryGroup, final boolean useNs, final boolean useMask)777 private static void writeOrViewRange(AbstractVCFReader reader, 778 final int vcfIndex, 779 int sbeg, int send, 780 Writer writer, StringBuffer buffSeq, 781 MarkerRange marker, Bases bases, 782 String name, 783 VCFview vcfView, 784 final EntryGroup entryGroup, 785 final boolean useNs, 786 final boolean useMask) throws IOException, OutOfRangeException 787 { 788 int direction = ( marker.isForwardMarker() ? Bases.FORWARD : Bases.REVERSE); 789 int length = send-sbeg+1; 790 int MAX_BASE_CHUNK = 2000*SEQUENCE_LINE_BASE_COUNT; 791 String basesStr; 792 StringBuffer header = getHeader(reader, marker, name, sbeg, send); 793 int linePos = 0; 794 795 for(int i=0; i<length; i+=MAX_BASE_CHUNK) 796 { 797 int sbegc = sbeg+i; 798 int sendc = sbeg+i+MAX_BASE_CHUNK-1; 799 if(i+MAX_BASE_CHUNK-1 > length) 800 sendc = send; 801 802 int sbegc_raw = sbegc; 803 int sendc_raw = sendc; 804 if(direction == Bases.REVERSE) 805 { 806 sendc = bases.getLength () - sbegc_raw + 1; 807 sbegc = bases.getLength () - sendc_raw + 1; 808 } 809 810 MarkerRange m = new MarkerRange(marker.getStrand(), sbegc, sendc); 811 basesStr = bases.getSubSequence(m.getRange(), direction); 812 FeatureVector features = entryGroup.getFeaturesInRange(m.getRange()); 813 //System.out.println((reader == null ? "" : reader.getName())+" "+sbegc+".."+sendc); 814 if(reader != null) 815 basesStr = getAllBasesInRegion(reader, vcfIndex, sbegc_raw, sendc_raw, basesStr, 816 features, vcfView, marker.isForwardMarker(), useNs, useMask); 817 else 818 basesStr = basesStr.toUpperCase(); 819 820 linePos = writeOrView(writer, header, basesStr, buffSeq, linePos); 821 header = null; 822 } 823 } 824 writeOrView(Writer writer, StringBuffer header, String basesStr, StringBuffer buff, int linePos)825 private static int writeOrView(Writer writer, 826 StringBuffer header, 827 String basesStr, 828 StringBuffer buff, 829 int linePos) throws IOException 830 { 831 if(writer == null) // sequence viewer 832 { 833 if(header != null) 834 buff.append(">").append(header.toString()).append("\n"); 835 wrapString(basesStr, buff); 836 } 837 else // write to file 838 return writeSequence(writer, header, basesStr, linePos); 839 840 return 0; 841 } 842 843 /** 844 * Construct a header and write or view the sequence. 845 * @param reader 846 * @param f 847 * @param writer 848 * @param buff 849 * @throws IOException 850 */ writeOrView(AbstractVCFReader reader, Feature f, Writer writer, StringBuffer buff, String hdr)851 private static void writeOrView(AbstractVCFReader reader, Feature f, 852 Writer writer, StringBuffer buff, String hdr) 853 throws IOException 854 { 855 StringBuffer header = new StringBuffer(hdr); 856 final String basesStr; 857 858 if(reader != null) 859 { 860 header.append(reader.getName()).append(" "); 861 basesStr = buff.toString(); 862 } 863 else 864 basesStr = buff.toString().toUpperCase(); 865 866 if(f != null) 867 { 868 header.append(f.getSystematicName()).append(" "); 869 header.append(f.getIDString()).append(" "); 870 final String product = f.getProductString(); 871 header.append( (product == null ? "undefined product" : product) ); 872 header.append(" ").append(f.getWriteRange()); 873 } 874 875 if(writer == null) // sequence viewer 876 { 877 SequenceViewer viewer = 878 new SequenceViewer ("Feature base viewer for feature(s)", false); 879 viewer.setSequence(">"+header.toString(), basesStr); 880 } 881 else // write to file 882 writeSequence(writer, header, basesStr, 0); 883 } 884 885 /** 886 * For a given VCF file change the sequence in a range and return the 887 * base sequence as a string. 888 * @param reader 889 * @param vcfIndex 890 * @param sbeg 891 * @param send 892 * @param basesStr 893 * @param features 894 * @param vcfView 895 * @param isFwd 896 * @param useNs 897 * @param useMask 898 * @return 899 * @throws IOException 900 */ getAllBasesInRegion(final AbstractVCFReader reader, final int vcfIndex, final int sbeg, final int send, String basesStr, final FeatureVector features, final VCFview vcfView, final boolean isFwd, final boolean useNs, final boolean useMask)901 private static String getAllBasesInRegion(final AbstractVCFReader reader, 902 final int vcfIndex, 903 final int sbeg, 904 final int send, 905 String basesStr, 906 final FeatureVector features, 907 final VCFview vcfView, 908 final boolean isFwd, 909 final boolean useNs, 910 final boolean useMask) throws IOException 911 { 912 if(vcfView.isConcatenate()) 913 { 914 String[] contigs = reader.getSeqNames(); 915 for(int j=0; j<contigs.length; j++) 916 { 917 int offset = vcfView.getSequenceOffset(contigs[j]); 918 int nextOffset; 919 if(j<contigs.length-1) 920 nextOffset = vcfView.getSequenceOffset(contigs[j+1]); 921 else 922 nextOffset = vcfView.seqLength; 923 924 if( (offset >= sbeg && offset < send) || 925 (offset < sbeg && sbeg < nextOffset) ) 926 { 927 int thisStart = sbeg - offset; 928 if(thisStart < 1) 929 thisStart = 1; 930 int thisEnd = send - offset; 931 basesStr = getBasesInRegion(reader, vcfIndex, contigs[j], thisStart, thisEnd, 932 basesStr, features, vcfView, isFwd, useNs, useMask); 933 } 934 } 935 } 936 else 937 basesStr = getBasesInRegion(reader, vcfIndex, vcfView.getChr(), sbeg, send, 938 basesStr, features, vcfView, isFwd, useNs, useMask); 939 940 return basesStr; 941 } 942 943 /** 944 * For a given VCF file change the sequence in a range and return the 945 * base sequence as a string. 946 * @param reader 947 * @param vcfIndex 948 * @param chr 949 * @param sbeg 950 * @param send 951 * @param basesStr 952 * @param features 953 * @param vcfView 954 * @param isFwd 955 * @param useNs 956 * @param useMask 957 * @return 958 * @throws IOException 959 */ getBasesInRegion(final AbstractVCFReader reader, final int vcfIndex, final String chr, int sbeg, final int send, String basesStr, final FeatureVector features, final VCFview vcfView, final boolean isFwd, final boolean useNs, final boolean useMask)960 private static String getBasesInRegion(final AbstractVCFReader reader, 961 final int vcfIndex, 962 final String chr, 963 int sbeg, 964 final int send, 965 String basesStr, 966 final FeatureVector features, 967 final VCFview vcfView, 968 final boolean isFwd, 969 final boolean useNs, 970 final boolean useMask) throws IOException 971 { 972 boolean vcf_v4 = reader.isVcf_v4(); 973 int len = basesStr.length(); 974 int baseNum = sbeg; 975 try 976 { 977 VCFRecord record; 978 while ((record = reader.getNextRecord(chr, sbeg, send)) != null) 979 { 980 // 981 // mask regions with N where there are no records 982 if(useMask && baseNum < record.getPos()) 983 basesStr = maskSites(sbeg, record.getPos(), baseNum, isFwd, basesStr); 984 baseNum = record.getPos()+1; 985 986 int basePosition = record.getPos() + vcfView.getSequenceOffset(record.getChrom()); 987 if(vcfView.showVariant(record, features, basePosition, reader, -1, vcfIndex) ) 988 basesStr = getSeqsVariation(record, basesStr, sbeg, isFwd, vcf_v4); 989 else if(useNs && isSNPorNonVariant(record)) 990 { 991 int position = record.getPos()-sbeg; 992 if(!isFwd) 993 position = basesStr.length()-position-1; 994 basesStr = basesStr.substring(0, position) + 'n' + 995 basesStr.substring(position+1); 996 } 997 998 // adjust for insertions 999 if(basesStr.length() > len) 1000 { 1001 sbeg -= (basesStr.length()-len); 1002 len = basesStr.length(); 1003 } 1004 } 1005 } 1006 catch(NullPointerException e) 1007 { 1008 System.err.println(chr+":"+sbeg+"-"+send+"\n"+e.getMessage()); 1009 } 1010 1011 if(useMask && baseNum-sbeg < len) 1012 basesStr = maskSites(sbeg, len+sbeg, baseNum, isFwd, basesStr); 1013 1014 return basesStr; 1015 } 1016 1017 /** 1018 * Mask sequence sites 1019 * @param sbeg 1020 * @param endMask 1021 * @param baseNum 1022 * @param isFwd 1023 * @param basesStr 1024 * @return 1025 */ maskSites(final int sbeg, final int endMask, final int baseNum, final boolean isFwd, String basesStr)1026 private static String maskSites(final int sbeg, final int endMask, 1027 final int baseNum, final boolean isFwd, String basesStr) 1028 { 1029 for(int i=baseNum; i<endMask; i++) 1030 { 1031 int position = i-sbeg; 1032 if(!isFwd) 1033 position = basesStr.length()-position-1; 1034 basesStr = basesStr.substring(0, position) + 'n' + 1035 basesStr.substring(position+1); 1036 } 1037 return basesStr; 1038 } 1039 1040 countVariants(final VCFview vcfView, final FeatureVector features)1041 protected static void countVariants(final VCFview vcfView, 1042 final FeatureVector features) throws IOException 1043 { 1044 if(features.size () < 1) 1045 { 1046 JOptionPane.showMessageDialog(null, 1047 "No features selected.", 1048 "Warning", JOptionPane.WARNING_MESSAGE); 1049 return; 1050 } 1051 1052 String[] columnNames = { 1053 "VCF", "Name", "Variant", "Non-variant", "Deletion", "Insertion", "Synonymous", "Non-synonymous"}; 1054 Vector<String> columnData = new Vector<String>(); 1055 for(String col: columnNames) 1056 columnData.add(col); 1057 Vector<Vector<Object>> rowData = new Vector<Vector<Object>>(); 1058 1059 AbstractVCFReader vcfReaders[] = vcfView.getVcfReaders(); 1060 for(int vcfIndex=0; vcfIndex<vcfReaders.length; vcfIndex++) 1061 { 1062 AbstractVCFReader reader = vcfReaders[vcfIndex]; 1063 for (int j = 0; j < features.size(); j++) 1064 { 1065 int count[] = new int[6]; 1066 for(int c: count) 1067 c = 0; 1068 1069 Feature f = features.elementAt(j); 1070 FeatureSegmentVector segs = f.getSegments(); 1071 1072 for(int k=0; k<segs.size(); k++) 1073 { 1074 FeatureSegment seg = segs.elementAt(k); 1075 int sbeg = seg.getRawRange().getStart(); 1076 int send = seg.getRawRange().getEnd(); 1077 1078 if(vcfView.isConcatenate()) 1079 { 1080 String[] contigs = reader.getSeqNames(); 1081 for(int i=0; i<contigs.length; i++) 1082 { 1083 int offset = vcfView.getSequenceOffset(contigs[i]); 1084 int nextOffset; 1085 if(i<contigs.length-1) 1086 nextOffset = vcfView.getSequenceOffset(contigs[i+1]); 1087 else 1088 nextOffset = vcfView.seqLength; 1089 1090 if( (offset >= sbeg && offset < send) || 1091 (offset < sbeg && sbeg < nextOffset) ) 1092 { 1093 int thisStart = sbeg - offset; 1094 if(thisStart < 1) 1095 thisStart = 1; 1096 int thisEnd = send - offset; 1097 1098 VCFRecord record; 1099 while ((record = reader.getNextRecord(vcfView.getChr(), thisStart, thisEnd)) != null) 1100 count(record, count, features, reader, vcfIndex, vcfView); 1101 } 1102 } 1103 } 1104 else 1105 { 1106 VCFRecord record; 1107 while ((record = reader.getNextRecord(vcfView.getChr(), sbeg, send)) != null) 1108 count(record, count, features, reader, vcfIndex, vcfView); 1109 } 1110 } 1111 1112 Object row[] = { 1113 reader.getName(), f.getSystematicName(), count[0], count[1], count[2], count[3], count[4], count[5] }; 1114 1115 Vector<Object> thisRow = new Vector<Object>(); 1116 for(Object obj: row) 1117 thisRow.add(obj); 1118 rowData.add(thisRow); 1119 } 1120 } 1121 1122 TableViewer tab = new TableViewer(rowData, columnData, "Variant Overview"); 1123 for(int i=2; i< columnData.size(); i++) 1124 tab.setIntegerRowSorter(i); 1125 } 1126 count(VCFRecord record, int count[], FeatureVector features, AbstractVCFReader reader, int vcfIndex, VCFview vcfView)1127 private static void count(VCFRecord record, int count[], FeatureVector features, AbstractVCFReader reader, int vcfIndex, VCFview vcfView) 1128 { 1129 int basePosition = record.getPos() + vcfView.getSequenceOffset(record.getChrom()); 1130 if(!vcfView.showVariant(record, features, basePosition, reader, -1, vcfIndex) ) 1131 return; 1132 1133 if(record.getAlt().isNonVariant()) 1134 { 1135 count[1]++; 1136 return; 1137 } 1138 else 1139 count[0]++; 1140 1141 if(record.getAlt().isDeletion(reader.isVcf_v4())) 1142 count[2]++; 1143 else if(record.getAlt().isInsertion(reader.isVcf_v4())) 1144 count[3]++; 1145 1146 if(record.getAlt().length() == 1 && record.getRef().length() == 1) 1147 { 1148 short synFlag = record.getSynFlag(features, record.getPos()); 1149 switch(synFlag) 1150 { 1151 case 1: count[4]++; break; // synonymous 1152 default: count[5]++; break; // non-synonymous 1153 } 1154 } 1155 } 1156 isSNPorNonVariant(VCFRecord record)1157 private static boolean isSNPorNonVariant(VCFRecord record) 1158 { 1159 return (record.getRef().length() == 1 && record.getAlt().length() == 1) || record.getAlt().isNonVariant(); 1160 } 1161 wrapString(String bases, StringBuffer buff)1162 protected static void wrapString(String bases, StringBuffer buff) 1163 { 1164 final int SEQUENCE_LINE_BASE_COUNT = 60; 1165 for(int k=0; k<bases.length(); k+=SEQUENCE_LINE_BASE_COUNT) 1166 { 1167 int end = k + SEQUENCE_LINE_BASE_COUNT; 1168 if(end > bases.length()) 1169 end = bases.length(); 1170 buff.append ( bases.substring(k,end) ).append("\n"); 1171 } 1172 } 1173 writeSequence(Writer writer, StringBuffer header, String bases, int startPos)1174 private static int writeSequence(Writer writer, 1175 StringBuffer header, 1176 String bases, 1177 int startPos) throws IOException 1178 { 1179 if(header != null) 1180 writer.write (">" + header.toString() + "\n"); 1181 int k = 0; 1182 for(k=0; k<bases.length(); k+=SEQUENCE_LINE_BASE_COUNT) 1183 { 1184 int end = k + SEQUENCE_LINE_BASE_COUNT - startPos; 1185 if(end > bases.length()) 1186 end = bases.length(); 1187 writer.write ( bases.substring(k,end) ); 1188 1189 if(k < bases.length() -1) 1190 writer.write("\n"); 1191 1192 startPos = 0; 1193 } 1194 1195 return k % SEQUENCE_LINE_BASE_COUNT; 1196 } 1197 1198 /** 1199 * Change the bases to reflect a variation record. 1200 * @param vcfRecord 1201 * @param bases 1202 * @param sbeg 1203 * @param isFwd 1204 * @param vcf_v4 1205 * @return 1206 */ getSeqsVariation(VCFRecord vcfRecord, String bases, int sbeg, boolean isFwd, boolean vcf_v4)1207 private static String getSeqsVariation(VCFRecord vcfRecord, 1208 String bases, int sbeg, boolean isFwd, boolean vcf_v4) 1209 { 1210 int position = vcfRecord.getPos()-sbeg; 1211 if(!isFwd) 1212 position = bases.length()-position-1; 1213 1214 if(position > bases.length()) 1215 return bases; 1216 else if(position < 0) 1217 return bases; 1218 1219 if(position < bases.length()-1 && bases.charAt(position) == '-') 1220 return bases; 1221 1222 StringBuffer buff = new StringBuffer(); 1223 buff.append(bases.substring(0,position)); 1224 1225 if(vcfRecord.getAlt().isDeletion(vcf_v4)) 1226 { 1227 int ndel = vcfRecord.getAlt().getNumberOfIndels(vcf_v4); 1228 if(isFwd && 1229 !vcfRecord.getAlt().toString().equals(".") && 1230 !vcfRecord.getAlt().toString().startsWith("D")) 1231 { 1232 buff.append(getBase(vcfRecord.getAlt().toString(), isFwd)); 1233 position+=vcfRecord.getAlt().toString().length(); 1234 } 1235 1236 if(isFwd) 1237 position+=ndel-1; 1238 else 1239 { 1240 if(position-ndel+1 < 0) 1241 buff.delete(0, position); 1242 else 1243 buff.delete(position-ndel+1, position); 1244 } 1245 1246 for(int i=0; i<ndel; i++) 1247 buff.append("-"); 1248 } 1249 else if(vcfRecord.getAlt().isInsertion(vcf_v4)) 1250 { 1251 if(!isFwd) 1252 buff.delete(position-vcfRecord.getRef().length()+1, position); 1253 1254 String in = vcfRecord.getAlt().toString(); 1255 if(in.startsWith("I")) 1256 in = in.substring(1); 1257 buff.append(getBase(in, isFwd)); 1258 1259 if(isFwd) 1260 position+=(vcfRecord.getRef().toString().length()-1); 1261 } 1262 else if(vcfRecord.getAlt().isMultiAllele(-1)) 1263 { 1264 String base = MultipleAlleleVariant.getIUBCode(vcfRecord); 1265 if(base != null) 1266 buff.append(base); 1267 else 1268 buff.append(bases.charAt(position)); 1269 } 1270 else if(vcfRecord.getAlt().isNonVariant()) // non-variant 1271 buff.append(getBase(vcfRecord.getRef(), isFwd).toUpperCase()); 1272 else 1273 buff.append(getBase(vcfRecord.getAlt().toString().toLowerCase(), isFwd)); 1274 1275 if(isFwd && position < bases.length()) 1276 buff.append(bases.substring(position+1)); 1277 else if(!isFwd && position < bases.length()) 1278 buff.append(bases.substring(position+1)); 1279 1280 return buff.toString(); 1281 } 1282 1283 /** 1284 * Get the actual bases by reverse complementing if on the 1285 * reverse strand. 1286 * @param baseStr 1287 * @param isFwd 1288 * @return 1289 */ getBase(String baseStr, boolean isFwd)1290 private static String getBase(String baseStr, boolean isFwd) 1291 { 1292 if(isFwd) 1293 return baseStr; 1294 return Bases.reverseComplement(baseStr); 1295 } 1296 1297 /** 1298 * Get all features in an entry group. 1299 * @param predicate 1300 * @param entryGroup 1301 * @return 1302 */ getFeatures(FeaturePredicate predicate, EntryGroup entryGroup)1303 private static FeatureVector getFeatures(FeaturePredicate predicate, EntryGroup entryGroup) 1304 { 1305 final FeatureVector features = new FeatureVector (); 1306 final FeatureEnumeration feature_enum = entryGroup.features (); 1307 while (feature_enum.hasMoreFeatures ()) 1308 { 1309 final Feature current_feature = feature_enum.nextFeature (); 1310 if (predicate.testPredicate (current_feature)) 1311 features.add (current_feature); 1312 } 1313 return features; 1314 } 1315 1316 /** 1317 * Create features for each variant that has not been filtered out. 1318 * @param vcfView 1319 * @param entryGroup 1320 */ createFeatures(final VCFview vcfView, final EntryGroup entryGroup)1321 protected static void createFeatures(final VCFview vcfView, 1322 final EntryGroup entryGroup) 1323 { 1324 final Entry newEntry = entryGroup.createEntry("VCF"); 1325 1326 int sbeg = 1; 1327 int send = entryGroup.getSequenceLength(); 1328 int MAX_BASE_CHUNK = 1000 * SEQUENCE_LINE_BASE_COUNT; 1329 1330 Bases bases = entryGroup.getSequenceEntry().getBases(); 1331 1332 for (int i = 0; i < send; i += MAX_BASE_CHUNK) 1333 { 1334 int sbegc = sbeg + i; 1335 int sendc = sbeg + i + MAX_BASE_CHUNK - 1; 1336 if (i + MAX_BASE_CHUNK - 1 > send) 1337 sendc = send; 1338 1339 try 1340 { 1341 Range range = new Range(sbegc, sendc); 1342 FeatureVector features = entryGroup.getFeaturesInRange(range); 1343 String chr = vcfView.getChr(); 1344 AbstractVCFReader vcfReaders[] = vcfView.getVcfReaders(); 1345 for(int vcfIndex=0; vcfIndex<vcfReaders.length; vcfIndex++) 1346 { 1347 AbstractVCFReader reader = vcfReaders[vcfIndex]; 1348 if(vcfView.isConcatenate()) 1349 { 1350 for(String contig: reader.getSeqNames()) 1351 makeFeatures(reader, vcfIndex, contig, sbegc, sendc, features, vcfView, bases, newEntry); 1352 } 1353 else 1354 makeFeatures(reader, vcfIndex, chr, sbegc, sendc, features, vcfView, bases, newEntry); 1355 } 1356 } 1357 catch (IOException ioe) 1358 { 1359 ioe.printStackTrace(); 1360 } 1361 catch (OutOfRangeException e) 1362 { 1363 e.printStackTrace(); 1364 } 1365 } 1366 } 1367 makeFeatures( final AbstractVCFReader reader, final int vcfIndex, final String chr, final int sbegc, final int sendc, final FeatureVector features, final VCFview vcfView, final Bases bases, final Entry entry)1368 private static void makeFeatures( 1369 final AbstractVCFReader reader, 1370 final int vcfIndex, 1371 final String chr, 1372 final int sbegc, 1373 final int sendc, 1374 final FeatureVector features, 1375 final VCFview vcfView, 1376 final Bases bases, 1377 final Entry entry) throws IOException, OutOfRangeException 1378 { 1379 Key variantKey = new Key("misc_difference"); 1380 try 1381 { 1382 VCFRecord record; 1383 while( (record = reader.getNextRecord(chr, sbegc, sendc)) != null) 1384 { 1385 makeFeature(record, reader.getName(), vcfView, features, bases, entry, variantKey, reader, vcfIndex); 1386 } 1387 } 1388 catch (NullPointerException e) 1389 { 1390 System.err.println(chr + ":" + sbegc + "-" + sendc + "\n" 1391 + e.getMessage()); 1392 } 1393 } 1394 makeFeature( final VCFRecord record, final String vcfFileName, final VCFview vcfView, final FeatureVector features, final Bases bases, final Entry entry, final Key variantKey, final AbstractVCFReader vcfReader, final int vcfIndex)1395 private static void makeFeature( 1396 final VCFRecord record, 1397 final String vcfFileName, 1398 final VCFview vcfView, 1399 final FeatureVector features, 1400 final Bases bases, 1401 final Entry entry, 1402 final Key variantKey, 1403 final AbstractVCFReader vcfReader, 1404 final int vcfIndex) throws OutOfRangeException, ReadOnlyException 1405 { 1406 int basePosition = record.getPos() + vcfView.getSequenceOffset(record.getChrom()); 1407 if (vcfView.showVariant(record, features, basePosition, vcfReader, -1, vcfIndex)) 1408 { 1409 MarkerRange marker = new MarkerRange(bases.getForwardStrand(), 1410 basePosition, basePosition); 1411 Location location = marker.createLocation(); 1412 QualifierVector qualifiers = new QualifierVector(); 1413 String qualifierStr = record.getRef()+"->"+record.getAlt().toString()+ 1414 "; "+vcfFileName+"; score="+record.getQuality(); 1415 if(record.getAlt().isMultiAllele(-1)) 1416 qualifierStr += "; MULTI-ALLELE"; 1417 else if(record.getAlt().isDeletion(vcfReader.isVcf_v4())) 1418 qualifierStr += "; DELETION"; 1419 else if(record.getAlt().isInsertion(vcfReader.isVcf_v4())) 1420 qualifierStr += "; INSERTION"; 1421 else if(record.getAlt().isNonVariant()) 1422 return; 1423 1424 try 1425 { 1426 FeatureVector fs = entry.getFeaturesInRange(marker.getRange()); 1427 if(fs.size() > 0) 1428 { 1429 for(int i=0; i<fs.size(); i++) 1430 { 1431 Feature f = fs.elementAt(i); 1432 if(f.getKey().compareTo(variantKey) == 0) 1433 { 1434 f.getQualifiers().addQualifierValues( 1435 new Qualifier("note", qualifierStr)); 1436 return; 1437 } 1438 } 1439 } 1440 1441 qualifiers.addQualifierValues(new Qualifier("note", qualifierStr)); 1442 entry.createFeature(variantKey, location, qualifiers); 1443 } 1444 catch (EntryInformationException e) 1445 { 1446 e.printStackTrace(); 1447 } 1448 } 1449 } 1450 1451 /** 1452 * Test if this is a BCF file. 1453 * @param fileName 1454 * @return 1455 * @throws IOException 1456 */ isBCF(String fileName)1457 protected static boolean isBCF(String fileName) throws IOException 1458 { 1459 InputStream ins; 1460 if(fileName.startsWith("http:") || fileName.startsWith("ftp:")) 1461 { 1462 final URL urlBamIndexFile = new URL(fileName); 1463 ins = urlBamIndexFile.openStream(); 1464 } 1465 else 1466 ins = new FileInputStream(fileName); 1467 BlockCompressedInputStream is = new BlockCompressedInputStream(ins); 1468 byte[] magic = new byte[4]; 1469 is.read(magic); 1470 ins.close(); 1471 is.close(); 1472 String line = new String(magic); 1473 if(line.equals("BCF\4")) 1474 return true; 1475 return false; 1476 } 1477 1478 /** 1479 * Return the dirtectory that the given entry was read from. 1480 **/ getBaseDirectoryFromEntry(final Entry entry)1481 private static File getBaseDirectoryFromEntry(final Entry entry) 1482 { 1483 final uk.ac.sanger.artemis.io.Entry embl_entry = entry.getEMBLEntry(); 1484 1485 if(embl_entry instanceof DocumentEntry) 1486 { 1487 final DocumentEntry document_entry =(DocumentEntry) embl_entry; 1488 1489 if(document_entry.getDocument() instanceof FileDocument) 1490 { 1491 final FileDocument file_document = 1492 (FileDocument) document_entry.getDocument(); 1493 1494 if(file_document.getFile().getParent() != null) 1495 return new File(file_document.getFile().getParent()); 1496 } 1497 } 1498 if(((DocumentEntry)entry.getEMBLEntry()).getDocument() 1499 instanceof RemoteFileDocument || 1500 ((DocumentEntry)entry.getEMBLEntry()).getDocument() 1501 instanceof DatabaseDocument) 1502 return new File(System.getProperty("user.dir")); 1503 1504 return null; 1505 } 1506 1507 }