1 package org.broadinstitute.hellbender.tools.walkers.annotator; 2 3 import htsjdk.samtools.Cigar; 4 import htsjdk.samtools.TextCigarCodec; 5 import htsjdk.variant.variantcontext.*; 6 import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_FisherStrand; 7 import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.AS_StrandBiasTest; 8 import org.broadinstitute.hellbender.utils.QualityUtils; 9 import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; 10 import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils; 11 import org.broadinstitute.hellbender.utils.read.GATKRead; 12 import org.broadinstitute.hellbender.testutils.ArtificialAnnotationUtils; 13 import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; 14 import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines; 15 import org.testng.Assert; 16 import org.testng.annotations.DataProvider; 17 import org.testng.annotations.Test; 18 19 import java.util.*; 20 21 import static org.mockito.Mockito.mock; 22 import static org.mockito.Mockito.when; 23 24 25 public final class FisherStrandUnitTest { 26 private static final double DELTA_PRECISION = 10e-8; 27 private static final double DELTA_PRECISION_FOR_PHRED = 0.001; 28 29 private static final Allele REF = Allele.create("T", true); 30 private static final Allele ALT = Allele.create("A", false); 31 32 @DataProvider(name = "UsingTable") makeUsingTable()33 public Object[][] makeUsingTable() { 34 35 /* NOTE: the expected P values were computed in R after `normalizing` the table, as follows 36 fisher = function(v) { 37 return(fisher.test(matrix(v, nrow=2, ncol=2))$p.value) 38 } 39 normalize = function(v) { 40 s=sum(as.numeric(v)) 41 if (s < 2 * 200){ return(v) } 42 return(v/(s/200)) 43 } 44 */ 45 //> fisher(floor(normalize(c(2068, 6796, 1133, 0)))) 46 //[1] 5.919091e-13 47 48 final List<Object[]> tests = new ArrayList<>(); 49 tests.add(new Object[]{9, 11, 12, 10, 0.7578618}); 50 tests.add(new Object[]{12, 10, 9, 11, 0.7578618}); 51 tests.add(new Object[]{9, 10, 12, 10, 0.7578618}); 52 tests.add(new Object[]{9, 9, 12, 10, 1.0}); 53 tests.add(new Object[]{9, 13, 12, 10, 0.5466948}); 54 tests.add(new Object[]{12, 10, 9, 13, 0.5466948}); 55 tests.add(new Object[]{9, 12, 11, 9, 0.5377362}); 56 57 tests.add(new Object[]{0, 0, 0, 3, 1.0}); 58 tests.add(new Object[]{9, 0, 0, 0, 1.0}); 59 tests.add(new Object[]{0, 200, 200, 0, 1.942643e-119}); 60 61 tests.add(new Object[]{0, 0, 0, 0, 1.0}); 62 tests.add(new Object[]{100000, 100000, 100000, 100000, 1.0}); 63 tests.add(new Object[]{0, 0, 100000, 100000, 1.0}); 64 tests.add(new Object[]{100000, 100000, 100000, 0, 1.312515e-15}); 65 66 tests.add(new Object[]{100, 100, 100, 0, 3.730187e-23}); 67 tests.add(new Object[]{13736, 9047, 41, 1433, 6.162592e-05}); 68 tests.add(new Object[]{66, 14, 64, 4, 4.243330e-02}); 69 tests.add(new Object[]{351169, 306836, 153739, 2379, 2.193607e-09}); 70 tests.add(new Object[]{116449, 131216, 289, 16957, 1.340052e-03}); 71 tests.add(new Object[]{137, 159, 9, 23, 6.088506e-02}); 72 tests.add(new Object[]{129, 90, 21, 20, 3.919603e-01}); 73 tests.add(new Object[]{14054, 9160, 16, 7827, 7.466277e-17}); 74 tests.add(new Object[]{32803, 9184, 32117, 3283, 1.795855e-02}); 75 tests.add(new Object[]{2068, 6796, 1133, 0, 5.919091e-13}); 76 77 return tests.toArray(new Object[][]{}); 78 } 79 80 @Test(dataProvider = "UsingTable") testUsingTableData(final int refpos, final int refneg, final int altpos, final int altneg, double expectedPvalue)81 public void testUsingTableData(final int refpos, final int refneg, final int altpos, final int altneg, double expectedPvalue) { 82 final int[][] contingencyTable = new int[2][2]; 83 contingencyTable[0][0] = refpos; 84 contingencyTable[0][1] = refneg; 85 contingencyTable[1][0] = altpos; 86 contingencyTable[1][1] = altneg; 87 final double pvalue = FisherStrand.pValueForContingencyTable(contingencyTable); 88 Assert.assertEquals(pvalue, expectedPvalue, DELTA_PRECISION, "Pvalues"); 89 90 final double actualPhredPval = Double.parseDouble((String) new FisherStrand().annotationForOneTable(pvalue).get(GATKVCFConstants.FISHER_STRAND_KEY)); 91 final double expectedPhredPvalue = QualityUtils.phredScaleErrorRate(Math.max(expectedPvalue, FisherStrand.MIN_PVALUE)); 92 Assert.assertEquals(actualPhredPval, expectedPhredPvalue, DELTA_PRECISION_FOR_PHRED, "phred pvalues"); 93 } 94 95 @Test testLabels()96 public void testLabels() throws Exception { 97 Assert.assertEquals(new FisherStrand().getKeyNames(), Collections.singletonList(GATKVCFConstants.FISHER_STRAND_KEY)); 98 Assert.assertEquals(new FisherStrand().getDescriptions(), Collections.singletonList(GATKVCFHeaderLines.getInfoLine(GATKVCFConstants.FISHER_STRAND_KEY))); 99 } 100 101 @DataProvider(name = "UsingOverflowTable") makeUsingOverflowTable()102 public Object[][] makeUsingOverflowTable() { 103 final List<Object[]> tests = new ArrayList<>(); 104 tests.add(new Object[]{Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE}); 105 tests.add(new Object[]{0, 0, Integer.MAX_VALUE, Integer.MAX_VALUE}); 106 tests.add(new Object[]{Integer.MAX_VALUE, Integer.MAX_VALUE, Integer.MAX_VALUE, 0}); 107 return tests.toArray(new Object[][]{}); 108 } 109 110 @Test(dataProvider = "UsingOverflowTable", expectedExceptions = ArithmeticException.class) testUsingOverflowTable(final int refpos, final int refneg, final int altpos, final int altneg)111 public void testUsingOverflowTable(final int refpos, final int refneg, final int altpos, final int altneg) { 112 final int[][] contingencyTable = new int[2][2]; 113 contingencyTable[0][0] = refpos; 114 contingencyTable[0][1] = refneg; 115 contingencyTable[1][0] = altpos; 116 contingencyTable[1][1] = altneg; 117 final Double p = FisherStrand.pValueForContingencyTable(contingencyTable);//this call will overflow 118 } 119 120 @Test testUsingGT()121 public void testUsingGT() { 122 final Allele A = Allele.create("A", true); 123 final Allele C = Allele.create("C"); 124 125 final List<Allele> AC = Arrays.asList(A, C); 126 final int depth = 1 + 2 + 3 + 4; 127 128 final String sbbs = "1,2,3,4"; 129 final int[][] table = {{1, 2}, {3, 4}}; 130 131 final Genotype gAC = new GenotypeBuilder("1", AC).DP(depth).attribute(GATKVCFConstants.STRAND_BIAS_BY_SAMPLE_KEY, sbbs).make(); 132 133 final double log10PError = -5; 134 135 final VariantContext vc = new VariantContextBuilder("test", "20", 10, 10, AC).log10PError(log10PError) 136 .genotypes(Arrays.asList(gAC)) 137 .make(); 138 139 final Map<String, Object> annotatedMap = new FisherStrand().annotate(null, vc, null); 140 Assert.assertNotNull(annotatedMap, vc.toString()); 141 final String pPhredStr = (String) annotatedMap.get(GATKVCFConstants.FISHER_STRAND_KEY); 142 143 final double actualPhredPval = Double.parseDouble(pPhredStr); 144 145 final double expectedPvalue = FisherStrand.pValueForContingencyTable(table); 146 final double expectedPhredPvalue = QualityUtils.phredScaleErrorRate(Math.max(expectedPvalue, FisherStrand.MIN_PVALUE)); 147 Assert.assertEquals(actualPhredPval, expectedPhredPvalue, DELTA_PRECISION_FOR_PHRED, "phred pvalues"); 148 149 Assert.assertEquals(Math.pow(10, actualPhredPval / -10.0), expectedPvalue, DELTA_PRECISION); 150 } 151 makeRead(final boolean forward)152 private GATKRead makeRead(final boolean forward) { 153 Cigar cigar = TextCigarCodec.decode("10M"); 154 final GATKRead read = ArtificialReadUtils.createUniqueArtificialRead(cigar); 155 read.setIsReverseStrand(!forward); 156 return read; 157 } 158 159 private final String sample1 = "NA1"; 160 makeVC(final Allele refAllele, final Allele altAllele)161 private VariantContext makeVC(final Allele refAllele, final Allele altAllele) { 162 final double[] genotypeLikelihoods1 = {30, 0, 190}; 163 final GenotypesContext testGC = GenotypesContext.create(2); 164 // sample1 -> A/T with GQ 30 165 testGC.add(new GenotypeBuilder(sample1).alleles(Arrays.asList(refAllele, altAllele)).PL(genotypeLikelihoods1).GQ(30).make()); 166 167 return (new VariantContextBuilder()) 168 .alleles(Arrays.asList(refAllele, altAllele)).chr("1").start(15L).stop(15L).genotypes(testGC).make(); 169 } 170 171 @Test testUsingLikelihoods()172 public void testUsingLikelihoods() { 173 final InfoFieldAnnotation ann = new FisherStrand(); 174 final String key = GATKVCFConstants.FISHER_STRAND_KEY; 175 176 final int[][] table = {{1, 1}, // alt: one read in each direction, 177 {2, 0}}; //ref: 2 reads fwd, 0 reads back 178 179 final List<GATKRead> refReads = Arrays.asList(makeRead(true), makeRead(true)); 180 final List<GATKRead> altReads = Arrays.asList(makeRead(false), makeRead(true)); 181 final AlleleLikelihoods<GATKRead, Allele> likelihoods = 182 ArtificialAnnotationUtils.makeLikelihoods("SAMPLE", refReads, altReads, -100.0, -100.0, REF, ALT); 183 184 final VariantContext vc = makeVC(REF, ALT); 185 186 final Map<String, Object> annotatedMap = ann.annotate(null, vc, likelihoods); 187 Assert.assertNotNull(annotatedMap, vc.toString()); 188 final String pPhredStr = (String) annotatedMap.get(key); 189 190 final double actualPhredPval = Double.parseDouble(pPhredStr); 191 192 final double expectedPvalue = FisherStrand.pValueForContingencyTable(table); 193 final double expectedPhredPvalue = QualityUtils.phredScaleErrorRate(Math.max(expectedPvalue, FisherStrand.MIN_PVALUE)); 194 Assert.assertEquals(actualPhredPval, expectedPhredPvalue, DELTA_PRECISION_FOR_PHRED, "phred pvalues"); 195 196 Assert.assertEquals(Math.pow(10, actualPhredPval / -10.0), expectedPvalue, DELTA_PRECISION); 197 } 198 199 @Test testUsingLikelihoods_Raw()200 public void testUsingLikelihoods_Raw() { 201 final AS_FisherStrand ann = new AS_FisherStrand(); 202 final int[][] table = {{4, 0}, // ref: 4 reads fwd, 0 reads back 203 {2, 2}}; // alt: two reads in each direction 204 205 final List<GATKRead> refReads = Arrays.asList(makeRead(true), makeRead(true), makeRead(true), makeRead(true)); 206 final List<GATKRead> altReads = Arrays.asList(makeRead(false), makeRead(true), makeRead(false), makeRead(true)); 207 final AlleleLikelihoods<GATKRead, Allele> likelihoods = 208 ArtificialAnnotationUtils.makeLikelihoods("SAMPLE", refReads, altReads, -100.0, -100.0, REF, ALT); 209 210 final VariantContext vc = makeVC(REF, ALT); 211 212 final Map<String, Object> annotatedMapRaw = ann.annotateRawData(null, vc, likelihoods); 213 final Map<String, Object> annotatedMapNonRaw = ann.annotate(null, vc, likelihoods); 214 Assert.assertNotNull(annotatedMapRaw, vc.toString()); 215 final String actualStringRaw = (String) annotatedMapRaw.get(GATKVCFConstants.AS_SB_TABLE_KEY); 216 Assert.assertNotNull(annotatedMapNonRaw, vc.toString()); 217 final String actualStringNonRaw = (String) annotatedMapNonRaw.get(ann.getKeyNames().get(0)); 218 219 final String expectedRawString = AS_StrandBiasTest.rawValueAsString(table); 220 final String expectedFullStringString = "3.680"; 221 Assert.assertEquals(actualStringRaw, expectedRawString); 222 Assert.assertEquals(actualStringNonRaw, expectedFullStringString); 223 } 224 225 @Test testEmptyIfNonVariant()226 public void testEmptyIfNonVariant() { 227 FisherStrand fs = new FisherStrand(); 228 Map<String, Object> ann = fs.annotate(null, when(mock(VariantContext.class).isVariant()).thenReturn(false).getMock(), null); 229 Assert.assertTrue(ann.isEmpty()); 230 } 231 } 232