1 package org.broadinstitute.hellbender.tools.walkers.annotator;
2 
3 import htsjdk.samtools.Cigar;
4 import htsjdk.samtools.TextCigarCodec;
5 import htsjdk.variant.variantcontext.*;
6 import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_FisherStrand;
7 import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_StrandBiasTest;
8 import org.broadinstitute.hellbender.utils.QualityUtils;
9 import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
10 import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils;
11 import org.broadinstitute.hellbender.utils.read.GATKRead;
12 import org.broadinstitute.hellbender.testutils.ArtificialAnnotationUtils;
13 import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
14 import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;
15 import org.testng.Assert;
16 import org.testng.annotations.DataProvider;
17 import org.testng.annotations.Test;
18 
19 import java.util.*;
20 
21 import static org.mockito.Mockito.mock;
22 import static org.mockito.Mockito.when;
23 
24 
25 public final class FisherStrandUnitTest {
26     private static final double DELTA_PRECISION = 10e-8;
27     private static final double DELTA_PRECISION_FOR_PHRED = 0.001;
28 
29     private static final Allele REF = Allele.create("T", true);
30     private static final Allele ALT = Allele.create("A", false);
31 
32     @DataProvider(name = "UsingTable")
makeUsingTable()33     public Object[][] makeUsingTable() {
34 
35         /* NOTE: the expected P values were computed in R after `normalizing` the table, as follows
36              fisher = function(v) {
37                 return(fisher.test(matrix(v, nrow=2, ncol=2))$p.value)
38              }
39              normalize = function(v) {
40                 s=sum(as.numeric(v))
41                 if (s < 2 * 200){ return(v) }
42                 return(v/(s/200))
43             }
44         */
45         //> fisher(floor(normalize(c(2068, 6796, 1133, 0))))
46         //[1] 5.919091e-13
47 
48         final List<Object[]> tests = new ArrayList<>();
49         tests.add(new Object[]{9, 11, 12, 10, 0.7578618});
50         tests.add(new Object[]{12, 10, 9, 11, 0.7578618});
51         tests.add(new Object[]{9, 10, 12, 10, 0.7578618});
52         tests.add(new Object[]{9, 9, 12, 10, 1.0});
53         tests.add(new Object[]{9, 13, 12, 10, 0.5466948});
54         tests.add(new Object[]{12, 10, 9, 13, 0.5466948});
55         tests.add(new Object[]{9, 12, 11, 9, 0.5377362});
56 
57         tests.add(new Object[]{0, 0, 0, 3,                     1.0});
58         tests.add(new Object[]{9, 0, 0, 0,                     1.0});
59         tests.add(new Object[]{0, 200, 200, 0,                 1.942643e-119});
60 
61         tests.add(new Object[]{0, 0, 0, 0, 1.0});
62         tests.add(new Object[]{100000, 100000, 100000, 100000, 1.0});
63         tests.add(new Object[]{0, 0, 100000, 100000, 1.0});
64         tests.add(new Object[]{100000, 100000, 100000, 0, 1.312515e-15});
65 
66         tests.add(new Object[]{100, 100, 100, 0, 3.730187e-23});
67         tests.add(new Object[]{13736, 9047, 41, 1433, 6.162592e-05});
68         tests.add(new Object[]{66, 14, 64, 4, 4.243330e-02});
69         tests.add(new Object[]{351169, 306836, 153739, 2379, 2.193607e-09});
70         tests.add(new Object[]{116449, 131216, 289, 16957, 1.340052e-03});
71         tests.add(new Object[]{137, 159, 9, 23, 6.088506e-02});
72         tests.add(new Object[]{129, 90, 21, 20, 3.919603e-01});
73         tests.add(new Object[]{14054, 9160, 16, 7827, 7.466277e-17});
74         tests.add(new Object[]{32803, 9184, 32117, 3283, 1.795855e-02});
75         tests.add(new Object[]{2068, 6796, 1133, 0, 5.919091e-13});
76 
77         return tests.toArray(new Object[][]{});
78     }
79 
80     @Test(dataProvider = "UsingTable")
testUsingTableData(final int refpos, final int refneg, final int altpos, final int altneg, double expectedPvalue)81     public void testUsingTableData(final int refpos, final int refneg, final int altpos, final int altneg, double expectedPvalue) {
82         final int[][] contingencyTable = new int[2][2];
83         contingencyTable[0][0] = refpos;
84         contingencyTable[0][1] = refneg;
85         contingencyTable[1][0] = altpos;
86         contingencyTable[1][1] = altneg;
87         final double pvalue = FisherStrand.pValueForContingencyTable(contingencyTable);
88         Assert.assertEquals(pvalue, expectedPvalue, DELTA_PRECISION, "Pvalues");
89 
90         final double actualPhredPval = Double.parseDouble((String) new FisherStrand().annotationForOneTable(pvalue).get(GATKVCFConstants.FISHER_STRAND_KEY));
91         final double expectedPhredPvalue = QualityUtils.phredScaleErrorRate(Math.max(expectedPvalue, FisherStrand.MIN_PVALUE));
92         Assert.assertEquals(actualPhredPval, expectedPhredPvalue, DELTA_PRECISION_FOR_PHRED, "phred pvalues");
93     }
94 
95     @Test
testLabels()96     public void testLabels() throws Exception {
97         Assert.assertEquals(new FisherStrand().getKeyNames(), Collections.singletonList(GATKVCFConstants.FISHER_STRAND_KEY));
98         Assert.assertEquals(new FisherStrand().getDescriptions(), Collections.singletonList(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.FISHER_STRAND_KEY)));
99     }
100 
101     @DataProvider(name = "UsingOverflowTable")
makeUsingOverflowTable()102     public Object[][] makeUsingOverflowTable() {
103         final List<Object[]> tests = new ArrayList<>();
104         tests.add(new Object[]{Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE});
105         tests.add(new Object[]{0, 0, Integer.MAX_VALUE, Integer.MAX_VALUE});
106         tests.add(new Object[]{Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE, 0});
107         return tests.toArray(new Object[][]{});
108     }
109 
110     @Test(dataProvider = "UsingOverflowTable", expectedExceptions = ArithmeticException.class)
testUsingOverflowTable(final int refpos, final int refneg, final int altpos, final int altneg)111     public void testUsingOverflowTable(final int refpos, final int refneg, final int altpos, final int altneg) {
112         final int[][] contingencyTable = new int[2][2];
113         contingencyTable[0][0] = refpos;
114         contingencyTable[0][1] = refneg;
115         contingencyTable[1][0] = altpos;
116         contingencyTable[1][1] = altneg;
117         final Double p = FisherStrand.pValueForContingencyTable(contingencyTable);//this call will overflow
118     }
119 
120     @Test
testUsingGT()121     public void testUsingGT() {
122         final Allele A = Allele.create("A", true);
123         final Allele C = Allele.create("C");
124 
125         final List<Allele> AC = Arrays.asList(A, C);
126         final int depth = 1 + 2 + 3 + 4;
127 
128         final String sbbs = "1,2,3,4";
129         final int[][] table = {{1, 2}, {3, 4}};
130 
131         final Genotype gAC = new GenotypeBuilder("1", AC).DP(depth).attribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY, sbbs).make();
132 
133         final double log10PError = -5;
134 
135         final VariantContext vc = new VariantContextBuilder("test", "20", 10, 10, AC).log10PError(log10PError)
136                 .genotypes(Arrays.asList(gAC))
137                 .make();
138 
139         final Map<String, Object> annotatedMap = new FisherStrand().annotate(null, vc, null);
140         Assert.assertNotNull(annotatedMap, vc.toString());
141         final String pPhredStr = (String) annotatedMap.get(GATKVCFConstants.FISHER_STRAND_KEY);
142 
143         final double actualPhredPval = Double.parseDouble(pPhredStr);
144 
145         final double expectedPvalue = FisherStrand.pValueForContingencyTable(table);
146         final double expectedPhredPvalue = QualityUtils.phredScaleErrorRate(Math.max(expectedPvalue, FisherStrand.MIN_PVALUE));
147         Assert.assertEquals(actualPhredPval, expectedPhredPvalue, DELTA_PRECISION_FOR_PHRED, "phred pvalues");
148 
149         Assert.assertEquals(Math.pow(10, actualPhredPval / -10.0), expectedPvalue, DELTA_PRECISION);
150     }
151 
makeRead(final boolean forward)152     private GATKRead makeRead(final boolean forward) {
153         Cigar cigar = TextCigarCodec.decode("10M");
154         final GATKRead read = ArtificialReadUtils.createUniqueArtificialRead(cigar);
155         read.setIsReverseStrand(!forward);
156         return read;
157     }
158 
159     private final String sample1 = "NA1";
160 
makeVC(final Allele refAllele, final Allele altAllele)161     private VariantContext makeVC(final Allele refAllele, final Allele altAllele) {
162         final double[] genotypeLikelihoods1 = {30, 0, 190};
163         final GenotypesContext testGC = GenotypesContext.create(2);
164         // sample1 -> A/T with GQ 30
165         testGC.add(new GenotypeBuilder(sample1).alleles(Arrays.asList(refAllele, altAllele)).PL(genotypeLikelihoods1).GQ(30).make());
166 
167         return (new VariantContextBuilder())
168                 .alleles(Arrays.asList(refAllele, altAllele)).chr("1").start(15L).stop(15L).genotypes(testGC).make();
169     }
170 
171     @Test
testUsingLikelihoods()172     public void testUsingLikelihoods() {
173         final InfoFieldAnnotation ann = new FisherStrand();
174         final String key = GATKVCFConstants.FISHER_STRAND_KEY;
175 
176         final int[][] table = {{1, 1},  // alt: one read in each direction,
177                 {2, 0}}; //ref: 2 reads fwd, 0 reads back
178 
179         final List<GATKRead> refReads = Arrays.asList(makeRead(true), makeRead(true));
180         final List<GATKRead> altReads = Arrays.asList(makeRead(false), makeRead(true));
181         final AlleleLikelihoods<GATKRead, Allele> likelihoods =
182                 ArtificialAnnotationUtils.makeLikelihoods("SAMPLE", refReads, altReads, -100.0, -100.0, REF, ALT);
183 
184         final VariantContext vc = makeVC(REF, ALT);
185 
186         final Map<String, Object> annotatedMap = ann.annotate(null, vc, likelihoods);
187         Assert.assertNotNull(annotatedMap, vc.toString());
188         final String pPhredStr = (String) annotatedMap.get(key);
189 
190         final double actualPhredPval = Double.parseDouble(pPhredStr);
191 
192         final double expectedPvalue = FisherStrand.pValueForContingencyTable(table);
193         final double expectedPhredPvalue = QualityUtils.phredScaleErrorRate(Math.max(expectedPvalue, FisherStrand.MIN_PVALUE));
194         Assert.assertEquals(actualPhredPval, expectedPhredPvalue, DELTA_PRECISION_FOR_PHRED, "phred pvalues");
195 
196         Assert.assertEquals(Math.pow(10, actualPhredPval / -10.0), expectedPvalue, DELTA_PRECISION);
197     }
198 
199     @Test
testUsingLikelihoods_Raw()200     public void testUsingLikelihoods_Raw() {
201         final AS_FisherStrand ann = new AS_FisherStrand();
202         final int[][] table = {{4, 0},  // ref: 4 reads fwd, 0 reads back
203                 {2, 2}}; // alt: two reads in each direction
204 
205         final List<GATKRead> refReads = Arrays.asList(makeRead(true), makeRead(true), makeRead(true), makeRead(true));
206         final List<GATKRead> altReads = Arrays.asList(makeRead(false), makeRead(true), makeRead(false), makeRead(true));
207         final AlleleLikelihoods<GATKRead, Allele> likelihoods =
208                 ArtificialAnnotationUtils.makeLikelihoods("SAMPLE", refReads, altReads, -100.0, -100.0, REF, ALT);
209 
210         final VariantContext vc = makeVC(REF, ALT);
211 
212         final Map<String, Object> annotatedMapRaw = ann.annotateRawData(null, vc, likelihoods);
213         final Map<String, Object> annotatedMapNonRaw = ann.annotate(null, vc, likelihoods);
214         Assert.assertNotNull(annotatedMapRaw, vc.toString());
215         final String actualStringRaw = (String) annotatedMapRaw.get(GATKVCFConstants.AS_SB_TABLE_KEY);
216         Assert.assertNotNull(annotatedMapNonRaw, vc.toString());
217         final String actualStringNonRaw = (String) annotatedMapNonRaw.get(ann.getKeyNames().get(0));
218 
219         final String expectedRawString = AS_StrandBiasTest.rawValueAsString(table);
220         final String expectedFullStringString = "3.680";
221         Assert.assertEquals(actualStringRaw, expectedRawString);
222         Assert.assertEquals(actualStringNonRaw, expectedFullStringString);
223     }
224 
225     @Test
testEmptyIfNonVariant()226     public void testEmptyIfNonVariant() {
227         FisherStrand fs = new FisherStrand();
228         Map<String, Object> ann = fs.annotate(null, when(mock(VariantContext.class).isVariant()).thenReturn(false).getMock(), null);
229         Assert.assertTrue(ann.isEmpty());
230     }
231 }
232