1 package org.broadinstitute.hellbender.tools.walkers.mutect; 2 3 import htsjdk.samtools.SAMFileHeader; 4 import htsjdk.variant.variantcontext.*; 5 import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils; 6 import org.broadinstitute.hellbender.tools.walkers.annotator.UniqueAltReadCount; 7 import org.broadinstitute.hellbender.utils.genotyper.*; 8 import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils; 9 import org.broadinstitute.hellbender.utils.read.GATKRead; 10 import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils; 11 import org.testng.Assert; 12 import org.testng.annotations.Test; 13 14 import java.io.IOException; 15 import java.util.*; 16 import java.util.stream.Collectors; 17 18 public class UniqueAltReadCountUnitTest { 19 final String sampleName = "Mark"; 20 final int chromosomeIndex = 1; 21 final long variantSite = 105; 22 final int numAltReads = 12; 23 24 final SampleList sampleList = new IndexedSampleList(sampleName); 25 26 final List<Allele> alleles = Arrays.asList(Allele.create((byte) 'C', true), Allele.create((byte) 'A', false)); 27 final AlleleList<Allele> alleleList = new IndexedAlleleList<>(alleles); 28 final VariantContext vc = new VariantContextBuilder("source", Integer.toString(chromosomeIndex), variantSite, variantSite, alleles).make(); 29 30 @Test testSingleDuplicate()31 public void testSingleDuplicate() throws IOException { 32 final AlleleLikelihoods<GATKRead, Allele> likelihoods = createTestLikelihoods(Optional.empty()); 33 final UniqueAltReadCount uniqueAltReadCountAnnotation = new UniqueAltReadCount(); 34 final Map<String, Object> annotations = uniqueAltReadCountAnnotation.annotate(null, vc, likelihoods); 35 36 37 final List<Integer> uniqueReadSetCount = AnnotationUtils.decodeAnyASListWithRawDelim((String)annotations.get(UniqueAltReadCount.KEY)).stream().map(Integer::valueOf).collect(Collectors.toList()); 38 Assert.assertEquals(uniqueReadSetCount.get(0).intValue(), 1); 39 } 40 41 @Test testMultipleDuplicateSets()42 public void testMultipleDuplicateSets() throws IOException { 43 final UniqueAltReadCount duplicateReadCountsAnnotation = new UniqueAltReadCount(); 44 45 // should get three unique sets of ALT reads 46 final int numUniqueStarts1 = 3; 47 final AlleleLikelihoods<GATKRead, Allele> likelihoods1 = createTestLikelihoods(Optional.of(numUniqueStarts1)); 48 final Map<String, Object> annotations1 = duplicateReadCountsAnnotation.annotate(null, vc, likelihoods1); 49 50 51 final List<Integer> uniqueReadSetCount1 = AnnotationUtils.decodeAnyASListWithRawDelim((String) annotations1.get(UniqueAltReadCount.KEY)).stream().map(Integer::valueOf).collect(Collectors.toList()); 52 Assert.assertEquals(uniqueReadSetCount1.get(0).intValue(), numUniqueStarts1); 53 54 // here ALT reads are all distinct 55 final int numUniqueStarts2 = numAltReads; 56 final AlleleLikelihoods<GATKRead, Allele> likelihoods2 = createTestLikelihoods(Optional.of(numUniqueStarts2)); 57 final Map<String, Object> annotations2 = duplicateReadCountsAnnotation.annotate(null, vc, likelihoods2); 58 59 60 final List<Integer> uniqueReadSetCount2 = AnnotationUtils.decodeAnyASListWithRawDelim((String) annotations2.get(UniqueAltReadCount.KEY)).stream().map(Integer::valueOf).collect(Collectors.toList()); 61 Assert.assertEquals(uniqueReadSetCount2.get(0).intValue(), numUniqueStarts2); 62 } 63 createTestLikelihoods(final Optional<Integer> shiftModulus)64 private AlleleLikelihoods<GATKRead, Allele> createTestLikelihoods(final Optional<Integer> shiftModulus) { 65 final int numChromosomes = 2; 66 final int startingChromosome = 1; 67 final int chromosomeSize = 1000; 68 final SAMFileHeader SAM_HEADER = ArtificialReadUtils.createArtificialSamHeader(numChromosomes, startingChromosome, chromosomeSize); 69 70 final int alignmentStart = 100; 71 final int readLength = 11; 72 final byte baseq = 30; 73 74 final int numRefReads = 10; 75 final int numReads = numAltReads + numRefReads; 76 77 final Map<String, List<GATKRead>> readMap = new LinkedHashMap<>(); 78 final List<GATKRead> reads = new ArrayList<>(numReads); 79 final byte[] quals = new byte[readLength]; 80 Arrays.fill(quals, baseq); 81 82 // add alt reads, with the start position shifted by i mod shiftModulus 83 for (int i = 0; i < numAltReads; i++) { 84 final int startPosition = shiftModulus.isPresent() ? alignmentStart + (i % shiftModulus.get()) : alignmentStart; 85 final GATKRead read = ArtificialReadUtils.createArtificialRead(SAM_HEADER, "Read" + i, chromosomeIndex, startPosition, 86 "CCCCCACCCCC".getBytes(), quals, "11M"); 87 reads.add(read); 88 } 89 90 // add ref reads 91 for (int j = numAltReads; j < numReads; j++) { 92 final GATKRead read = ArtificialReadUtils.createArtificialRead(SAM_HEADER, "Read" + j, chromosomeIndex, alignmentStart, 93 "CCCCCCCCCCC".getBytes(), quals, "11M"); 94 reads.add(read); 95 } 96 97 readMap.put(sampleName, reads); 98 99 final AlleleLikelihoods<GATKRead, Allele> likelihoods = new AlleleLikelihoods<>(sampleList, alleleList, readMap); 100 final int sampleIndex = 0; 101 final LikelihoodMatrix<GATKRead, Allele> matrix = likelihoods.sampleMatrix(sampleIndex); 102 103 final double logLikelihoodOfBestAllele = 10.0; 104 final int refAlleleIndex = 0; 105 final int altAlleleIndex = 1; 106 107 // Log likelihoods are initialized to 0 for all alleles. For each read_i, set its log likelihood for ALT allele to a positive value. 108 // This makes read_i an ALT read 109 for (int i = 0; i < numAltReads; i++) { 110 matrix.set(altAlleleIndex, i, logLikelihoodOfBestAllele); 111 } 112 113 // Analogously, make read_j a REF read 114 for (int j = numAltReads; j < numReads; j++) { 115 matrix.set(refAlleleIndex, j, logLikelihoodOfBestAllele); 116 } 117 118 return likelihoods; 119 } 120 }