1 package org.broadinstitute.hellbender.tools.walkers.mutect;
2 
3 import htsjdk.samtools.SAMFileHeader;
4 import htsjdk.variant.variantcontext.*;
5 import org.broadinstitute.hellbender.tools.walkers.annotator.AnnotationUtils;
6 import org.broadinstitute.hellbender.tools.walkers.annotator.UniqueAltReadCount;
7 import org.broadinstitute.hellbender.utils.genotyper.*;
8 import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils;
9 import org.broadinstitute.hellbender.utils.read.GATKRead;
10 import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;
11 import org.testng.Assert;
12 import org.testng.annotations.Test;
13 
14 import java.io.IOException;
15 import java.util.*;
16 import java.util.stream.Collectors;
17 
18 public class UniqueAltReadCountUnitTest {
19     final String sampleName = "Mark";
20     final int chromosomeIndex = 1;
21     final long variantSite = 105;
22     final int numAltReads = 12;
23 
24     final SampleList sampleList = new IndexedSampleList(sampleName);
25 
26     final List<Allele> alleles = Arrays.asList(Allele.create((byte) 'C', true), Allele.create((byte) 'A', false));
27     final AlleleList<Allele> alleleList = new IndexedAlleleList<>(alleles);
28     final VariantContext vc = new VariantContextBuilder("source", Integer.toString(chromosomeIndex), variantSite, variantSite, alleles).make();
29 
30     @Test
testSingleDuplicate()31     public void testSingleDuplicate() throws IOException {
32         final AlleleLikelihoods<GATKRead, Allele> likelihoods = createTestLikelihoods(Optional.empty());
33         final UniqueAltReadCount uniqueAltReadCountAnnotation = new UniqueAltReadCount();
34         final Map<String, Object> annotations = uniqueAltReadCountAnnotation.annotate(null, vc, likelihoods);
35 
36 
37         final List<Integer> uniqueReadSetCount = AnnotationUtils.decodeAnyASListWithRawDelim((String)annotations.get(UniqueAltReadCount.KEY)).stream().map(Integer::valueOf).collect(Collectors.toList());
38         Assert.assertEquals(uniqueReadSetCount.get(0).intValue(), 1);
39     }
40 
41     @Test
testMultipleDuplicateSets()42     public void testMultipleDuplicateSets() throws IOException {
43         final UniqueAltReadCount duplicateReadCountsAnnotation = new UniqueAltReadCount();
44 
45         // should get three unique sets of ALT reads
46         final int numUniqueStarts1 = 3;
47         final AlleleLikelihoods<GATKRead, Allele> likelihoods1 = createTestLikelihoods(Optional.of(numUniqueStarts1));
48         final Map<String, Object> annotations1 = duplicateReadCountsAnnotation.annotate(null, vc, likelihoods1);
49 
50 
51         final List<Integer> uniqueReadSetCount1 = AnnotationUtils.decodeAnyASListWithRawDelim((String) annotations1.get(UniqueAltReadCount.KEY)).stream().map(Integer::valueOf).collect(Collectors.toList());
52         Assert.assertEquals(uniqueReadSetCount1.get(0).intValue(), numUniqueStarts1);
53 
54         // here ALT reads are all distinct
55         final int numUniqueStarts2 = numAltReads;
56         final AlleleLikelihoods<GATKRead, Allele> likelihoods2 = createTestLikelihoods(Optional.of(numUniqueStarts2));
57         final Map<String, Object> annotations2 = duplicateReadCountsAnnotation.annotate(null, vc, likelihoods2);
58 
59 
60         final List<Integer> uniqueReadSetCount2 = AnnotationUtils.decodeAnyASListWithRawDelim((String) annotations2.get(UniqueAltReadCount.KEY)).stream().map(Integer::valueOf).collect(Collectors.toList());
61         Assert.assertEquals(uniqueReadSetCount2.get(0).intValue(), numUniqueStarts2);
62     }
63 
createTestLikelihoods(final Optional<Integer> shiftModulus)64     private AlleleLikelihoods<GATKRead, Allele> createTestLikelihoods(final Optional<Integer> shiftModulus) {
65         final int numChromosomes = 2;
66         final int startingChromosome = 1;
67         final int chromosomeSize = 1000;
68         final SAMFileHeader SAM_HEADER = ArtificialReadUtils.createArtificialSamHeader(numChromosomes, startingChromosome, chromosomeSize);
69 
70         final int alignmentStart = 100;
71         final int readLength = 11;
72         final byte baseq = 30;
73 
74         final int numRefReads = 10;
75         final int numReads = numAltReads + numRefReads;
76 
77         final Map<String, List<GATKRead>> readMap = new LinkedHashMap<>();
78         final List<GATKRead> reads = new ArrayList<>(numReads);
79         final byte[] quals = new byte[readLength];
80         Arrays.fill(quals, baseq);
81 
82         // add alt reads, with the start position shifted by i mod shiftModulus
83         for (int i = 0; i < numAltReads; i++) {
84             final int startPosition = shiftModulus.isPresent() ? alignmentStart + (i % shiftModulus.get()) : alignmentStart;
85             final GATKRead read = ArtificialReadUtils.createArtificialRead(SAM_HEADER, "Read" + i, chromosomeIndex, startPosition,
86                     "CCCCCACCCCC".getBytes(), quals, "11M");
87             reads.add(read);
88         }
89 
90         // add ref reads
91         for (int j = numAltReads; j < numReads; j++) {
92             final GATKRead read = ArtificialReadUtils.createArtificialRead(SAM_HEADER, "Read" + j, chromosomeIndex, alignmentStart,
93                     "CCCCCCCCCCC".getBytes(), quals, "11M");
94             reads.add(read);
95         }
96 
97         readMap.put(sampleName, reads);
98 
99         final AlleleLikelihoods<GATKRead, Allele> likelihoods = new AlleleLikelihoods<>(sampleList, alleleList, readMap);
100         final int sampleIndex = 0;
101         final LikelihoodMatrix<GATKRead, Allele> matrix = likelihoods.sampleMatrix(sampleIndex);
102 
103         final double logLikelihoodOfBestAllele = 10.0;
104         final int refAlleleIndex = 0;
105         final int altAlleleIndex = 1;
106 
107         // Log likelihoods are initialized to 0 for all alleles. For each read_i, set its log likelihood for ALT allele to a positive value.
108         // This makes read_i an ALT read
109         for (int i = 0; i < numAltReads; i++) {
110             matrix.set(altAlleleIndex, i, logLikelihoodOfBestAllele);
111         }
112 
113         // Analogously, make read_j a REF read
114         for (int j = numAltReads; j < numReads; j++) {
115             matrix.set(refAlleleIndex, j, logLikelihoodOfBestAllele);
116         }
117 
118         return likelihoods;
119     }
120 }