1 package org.broadinstitute.hellbender.tools.walkers.annotator;
2 
3 import htsjdk.samtools.Cigar;
4 import htsjdk.samtools.CigarElement;
5 import htsjdk.samtools.CigarOperator;
6 import htsjdk.samtools.TextCigarCodec;
7 import htsjdk.variant.variantcontext.*;
8 import org.broadinstitute.hellbender.engine.ReferenceContext;
9 import org.broadinstitute.hellbender.utils.MannWhitneyU;
10 import org.broadinstitute.hellbender.utils.QualityUtils;
11 import org.broadinstitute.hellbender.utils.genotyper.*;
12 import org.broadinstitute.hellbender.utils.read.ArtificialReadUtils;
13 import org.broadinstitute.hellbender.utils.read.GATKRead;
14 import org.broadinstitute.hellbender.testutils.ArtificialAnnotationUtils;
15 import org.broadinstitute.hellbender.GATKBaseTest;
16 import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
17 import org.testng.Assert;
18 import org.testng.annotations.BeforeClass;
19 import org.testng.annotations.DataProvider;
20 import org.testng.annotations.Test;
21 
22 import java.util.*;
23 
24 public final class ReadPosRankSumTestUnitTest extends GATKBaseTest {
25     private final String sample1 = "NA1";
26     private final String sample2 = "NA2";
27     private static final String CONTIG = "1";
28 
29     private static final Allele REF = Allele.create("T", true);
30     private static final Allele ALT = Allele.create("A", false);
31     private static final Allele ALT_INSERTION = Allele.create("TTTTT", false);
32 
makeVC(final long position)33     private VariantContext makeVC(final long position) {
34         final double[] genotypeLikelihoods1 = {30,0,190};
35         final GenotypesContext testGC = GenotypesContext.create(2);
36         // sample1 -> A/T with GQ 30
37         testGC.add(new GenotypeBuilder(sample1).alleles(Arrays.asList(REF, ALT)).PL(genotypeLikelihoods1).GQ(30).make());
38         // sample2 -> A/T with GQ 40
39         testGC.add(new GenotypeBuilder(sample2).alleles(Arrays.asList(REF, ALT)).PL(genotypeLikelihoods1).GQ(40).make());
40 
41         return (new VariantContextBuilder())
42                 .alleles(Arrays.asList(REF, ALT)).chr(CONTIG).start(position).stop(position).genotypes(testGC).make();
43     }
44 
makeRead(final int start, final int mq)45     private GATKRead makeRead(final int start, final int mq) {
46         Cigar cigar = TextCigarCodec.decode("10M");
47         final GATKRead read = ArtificialReadUtils.createArtificialRead(cigar);
48         read.setMappingQuality(mq);
49         read.setPosition(CONTIG, start);
50         return read;
51     }
52 
53     @Test
testReadPos()54     public void testReadPos(){
55         final InfoFieldAnnotation ann = new ReadPosRankSumTest();
56         final String key =  GATKVCFConstants.READ_POS_RANK_SUM_KEY;
57         final MannWhitneyU mannWhitneyU = new MannWhitneyU();
58 
59         final int[] startAlts = {3, 4};
60         final int[] startRefs = {1, 2};
61         final List<GATKRead> refReads = Arrays.asList(makeRead(startRefs[0], 30), makeRead(startRefs[1], 30));
62         final List<GATKRead> altReads = Arrays.asList(makeRead(startAlts[0], 30), makeRead(startAlts[1], 30));
63         final AlleleLikelihoods<GATKRead, Allele> likelihoods =
64                 ArtificialAnnotationUtils.makeLikelihoods(sample1, refReads, altReads, -100.0, -100.0, REF, ALT);
65 
66         Assert.assertEquals(ann.getDescriptions().size(), 1);
67         Assert.assertEquals(ann.getDescriptions().get(0).getID(), key);
68         Assert.assertEquals(ann.getKeyNames().size(), 1);
69         Assert.assertEquals(ann.getKeyNames().get(0), key);
70 
71         final ReferenceContext ref= null;
72 
73         final long position = 5L;  //middle of the read
74         final VariantContext vc= makeVC(position);
75 
76         final Map<String, Object> annotate = ann.annotate(ref, vc, likelihoods);
77         final double zScore = mannWhitneyU.test(new double[]{position - startAlts[0], position - startAlts[1]}, new double[]{position - startRefs[0], position - startRefs[1]}, MannWhitneyU.TestType.FIRST_DOMINATES).getZ();
78         final String zScoreStr = String.format("%.3f", zScore);
79         Assert.assertEquals(annotate.get(key), zScoreStr);
80 
81         final long positionEnd = 9L;  //past middle
82         final VariantContext vcEnd= makeVC(positionEnd);
83 
84         //Note: past the middle of the read we compute the position from the end.
85         final Map<String, Object> annotateEnd = ann.annotate(ref, vcEnd, likelihoods);
86         final double zScoreEnd = mannWhitneyU.test(new double[]{startAlts[0], startAlts[1]}, new double[]{startRefs[0], startRefs[1]}, MannWhitneyU.TestType.FIRST_DOMINATES).getZ();
87         final String zScoreEndStr = String.format("%.3f", zScoreEnd);
88         Assert.assertEquals(annotateEnd.get(key), zScoreEndStr);
89 
90         final long positionPastEnd = 20L;  //past middle
91         final VariantContext vcPastEnd= makeVC(positionPastEnd);
92 
93         //Note: past the end of the read, there's nothing
94         final Map<String, Object> annotatePastEnd = ann.annotate(ref, vcPastEnd, likelihoods);
95         Assert.assertTrue(annotatePastEnd.isEmpty());
96     }
97 
98     @DataProvider(name = "dataIsUsableRead")
dataIsUsableRead()99     private Object[][] dataIsUsableRead(){
100         return new Object[][]{
101                 {"20M6D2M", 10, 1, 0, true},
102                 {"20M6D2M", 10, 1, 27, true},
103                 {"20M6D2M", 10, 1, 29, false},
104                 {"1I20M1S", 10, 1, 0, true},
105                 {"1I20M1S", 0, 1, 0, false},
106                 {"1I20M1S", QualityUtils.MAPPING_QUALITY_UNAVAILABLE, 1, 0, false},
107                 {"1I20M1S", 10, 1, 22, false},
108                 {"1I20M1S", 10, 21, 42, false},
109                 {"1I20M1H", 10, 1, 21, false},
110         };
111     }
112 
113     @Test(dataProvider = "dataIsUsableRead")
testIsUsableRead(final String cigarString, final int mappingQuality, final int start, final int refLoc, final boolean isUsable )114     public void testIsUsableRead(final String cigarString, final int mappingQuality, final int start, final int refLoc, final boolean isUsable ) {
115         final ReadPosRankSumTest readPosRankSumTest = new ReadPosRankSumTest();
116         final Cigar cigar = TextCigarCodec.decode(cigarString);
117         final GATKRead read = ArtificialReadUtils.createArtificialRead(cigar);
118         read.setMappingQuality(mappingQuality);
119         read.setPosition(CONTIG, start);
120         Assert.assertEquals(readPosRankSumTest.isUsableRead(read, makeVC(refLoc)), isUsable);
121     }
122 
123     //Basic aligned read
124     private GATKRead allMatch;
125 
126     //Read with insertion and deletion
127     private GATKRead twoIndels;
128 
129     //Read with soft clips at start
130     private GATKRead softClipStart;
131 
132     //Read with hard clips at start
133     private GATKRead hardClipStart;
134 
135     //Read with low quality tail
136     private GATKRead lowQualTail;
137 
138     //Read with low quality tail, partially soft clipped
139     private GATKRead lowQualClippedTail;
140 
141     //Read with low quality bases at start
142     private GATKRead lowQualStart;
143 
144     //Read with low quality bases, partially soft clipped at both ends
145     private GATKRead lowQualBothEnds;
146 
147     @BeforeClass
init()148     public void init() {
149         List<CigarElement> cigarElements_allMatch = new LinkedList<>();
150         cigarElements_allMatch.add(new CigarElement(151, CigarOperator.M));
151         allMatch = ArtificialReadUtils.createArtificialRead(new Cigar(cigarElements_allMatch));
152 
153         List<CigarElement> cigarElements_2indels = new LinkedList<>();
154         cigarElements_2indels.add(new CigarElement(66, CigarOperator.M));
155         cigarElements_2indels.add(new CigarElement(10, CigarOperator.I));
156         cigarElements_2indels.add(new CigarElement(7, CigarOperator.M));
157         cigarElements_2indels.add(new CigarElement(10, CigarOperator.D));
158         cigarElements_2indels.add(new CigarElement(68, CigarOperator.M));
159         twoIndels = ArtificialReadUtils.createArtificialRead(new Cigar(cigarElements_2indels));
160 
161         List<CigarElement> cigarElements_softClipStart = new LinkedList<>();
162         cigarElements_softClipStart.add(new CigarElement(17, CigarOperator.S));
163         cigarElements_softClipStart.add(new CigarElement(134, CigarOperator.M));
164         softClipStart = ArtificialReadUtils.createArtificialRead(new Cigar(cigarElements_softClipStart));
165 
166         List<CigarElement> cigarElements_hardClipStart = new LinkedList<>();
167         cigarElements_hardClipStart.add(new CigarElement(17, CigarOperator.H));
168         cigarElements_hardClipStart.add(new CigarElement(134, CigarOperator.M));
169         hardClipStart = ArtificialReadUtils.createArtificialRead(new Cigar(cigarElements_hardClipStart));
170 
171 
172         final byte [] bases_lowQualTail = {'A', 'C', 'T', 'G', 'A', 'A', 'A', 'A', 'A', 'A'};
173         final byte [] quals_lowQualTail = {30, 15, 25, 30, 2, 2, 2, 2, 2, 2};
174         lowQualTail = ArtificialReadUtils.createArtificialRead(bases_lowQualTail, quals_lowQualTail, "10M");
175 
176         final byte [] bases_lowQualClippedTail = {'A', 'C', 'T', 'G', 'A', 'A', 'A', 'A', 'A', 'A'};
177         final byte [] quals_lowQualClippedTail = {30, 15, 25, 30, 2, 2, 2, 2, 2, 2};
178         lowQualClippedTail = ArtificialReadUtils.createArtificialRead(bases_lowQualClippedTail, quals_lowQualClippedTail, "8M2S");
179 
180         final byte [] bases_lowQualStart = {'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', 'T', 'G'};
181         final byte [] quals_lowQualStart = {2, 2, 2, 2, 2, 2, 30, 15, 25, 30};
182         lowQualStart = ArtificialReadUtils.createArtificialRead(bases_lowQualStart, quals_lowQualStart, "10M");
183 
184         final byte [] bases_lowQualBothEnds = {'A', 'A', 'A', 'A', 'A', 'A', 'G', 'C', 'T', 'G', 'A', 'A', 'A', 'A', 'A', 'A'};
185         final byte [] quals_lowQualBothEnds = { 2, 2, 2, 2, 2, 2, 30, 15, 25, 30, 2, 2, 2, 2, 2, 2};
186         lowQualBothEnds = ArtificialReadUtils.createArtificialRead(bases_lowQualBothEnds, quals_lowQualBothEnds, "2S12M2S");
187     }
188 
189     @Test
testLeadingInsertion()190     public void testLeadingInsertion(){
191         final int start = 100;
192         final VariantContext vc = new VariantContextBuilder().alleles(Arrays.asList(REF, ALT_INSERTION)).chr(CONTIG).start(start-1).stop(start-1).make();
193 
194         final Cigar cigar = TextCigarCodec.decode("10I10M");
195         final GATKRead read = ArtificialReadUtils.createArtificialRead(cigar);
196 
197         read.setPosition("CONTIG", start);
198 
199         Assert.assertEquals(ReadPosRankSumTest.getReadPosition(read, vc).getAsDouble(), 0.0);
200     }
201 
202     @Test
testSNPBetweenTwoDeletions()203     public void testSNPBetweenTwoDeletions(){
204         final Cigar cigar = TextCigarCodec.decode("10M10D1M10D10M");
205         final GATKRead read = ArtificialReadUtils.createArtificialRead(cigar);
206         final int start = 100;
207         read.setPosition("CONTIG", start);
208 
209         Assert.assertEquals(ReadPosRankSumTest.getReadPosition(read, makeVC(start + 20)).getAsDouble(), 10.0);
210         Assert.assertEquals(ReadPosRankSumTest.getReadPosition(read, makeVC(start + 19)).getAsDouble(), 10.0);
211     }
212 }
213