1 package org.broadinstitute.hellbender.engine;
2 
3 import org.broadinstitute.hellbender.GATKBaseTest;
4 import org.broadinstitute.hellbender.exceptions.GATKException;
5 import org.broadinstitute.hellbender.utils.SimpleInterval;
6 import org.broadinstitute.hellbender.utils.io.IOUtils;
7 import org.testng.Assert;
8 import org.testng.annotations.DataProvider;
9 import org.testng.annotations.Test;
10 import org.testng.internal.junit.ArrayAsserts;
11 
12 import java.nio.file.Path;
13 import java.util.ArrayList;
14 import java.util.Iterator;
15 import java.util.List;
16 
17 public final class ReferenceContextUnitTest extends GATKBaseTest {
18 
19     private static final Path TEST_REFERENCE = IOUtils.getPath(hg19MiniReference);
20 
21     @DataProvider(name = "EmptyReferenceContextDataProvider")
getEmptyReferenceContextData()22     public Object[][] getEmptyReferenceContextData() {
23         // Default-constructed ReferenceContexts and ReferenceContexts constructed from null ReferenceDataSources
24         // and/or null intervals should behave as empty context objects.
25         return new Object[][] {
26                 { new ReferenceContext() },
27                 { new ReferenceContext(null, null, 0, 0) },
28                 { new ReferenceContext(null, new SimpleInterval("1", 1, 1), 0, 0 ) },
29                 { new ReferenceContext(new ReferenceFileSource(TEST_REFERENCE), null) }
30         };
31     }
32 
33     @Test(dataProvider = "EmptyReferenceContextDataProvider")
testEmptyReferenceContext( final ReferenceContext refContext)34     public void testEmptyReferenceContext( final ReferenceContext refContext) {
35         Assert.assertFalse(refContext.hasBackingDataSource() && refContext.getInterval() != null,
36                            "Empty ReferenceContext reports having both a backing data source and an interval");
37         Assert.assertEquals(refContext.getBases().length, 0, "Empty ReferenceContext should have returned an empty bases array from getBases()");
38         Assert.assertFalse(refContext.iterator().hasNext(), "Empty ReferenceContext should have returned an empty bases iterator from iterator()");
39     }
40 
41     @DataProvider(name = "WindowlessReferenceIntervalDataProvider")
getWindowlessReferenceIntervals()42     public Object[][] getWindowlessReferenceIntervals() {
43         return new Object[][] {
44                 { new SimpleInterval("1", 1, 3), "NNN" },
45                 { new SimpleInterval("1", 11041, 11045), "GCAAA" },
46                 { new SimpleInterval("1", 11210, 11220), "CGGTGCTGTGC" },
47                 { new SimpleInterval("2", 9995, 10005), "NNNNNNCGTAT" },
48                 { new SimpleInterval("2", 10001, 10080), "CGTATCCCACACACCACACCCACACACCACACCCACACACACCCACACCCACACCCACACACACCACACCCACACACCAC" },
49                 { new SimpleInterval("2", 10005, 10084), "TCCCACACACCACACCCACACACCACACCCACACACACCCACACCCACACCCACACACACCACACCCACACACCACACCC" },
50                 { new SimpleInterval("2", 15995, 16000), "TGTCAG" }
51         };
52     }
53 
54     @Test(dataProvider = "WindowlessReferenceIntervalDataProvider")
testWindowlessReferenceContext( final SimpleInterval interval, final String expectedBases )55     public void testWindowlessReferenceContext( final SimpleInterval interval, final String expectedBases ) {
56         try (ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE)) {
57             ReferenceContext refContext = new ReferenceContext(reference, interval);
58 
59             checkReferenceContextBases(refContext, expectedBases);
60             Assert.assertEquals(refContext.getInterval(), interval, "Wrong interval in reference context");
61             Assert.assertEquals(refContext.getWindow(), interval, "Window in windowless reference context not equal to original interval");
62             Assert.assertEquals(refContext.numWindowLeadingBases(), 0, "Non-zero leading window size in windowless reference context");
63             Assert.assertEquals(refContext.numWindowTrailingBases(), 0, "Non-zero trailing window size in windowless reference context");
64         }
65     }
66 
67     @DataProvider(name = "WindowedReferenceIntervalDataProvider")
getWindowedReferenceIntervals()68     public Object[][] getWindowedReferenceIntervals() {
69         return new Object[][] {
70                 // Window off the start of the contig:
71                 { new SimpleInterval("1", 1, 3), 5, 5, new SimpleInterval("1", 1, 8), "NNNNNNNN" },
72                 // Window in middle of contig with equal, non-zero start and stop offsets
73                 { new SimpleInterval("1", 11041, 11045), 5, 5, new SimpleInterval("1", 11036, 11050), "CAGGAGCAAAGTCGC" },
74                 // Window in middle of contig with start offset only
75                 { new SimpleInterval("1", 11210, 11220), 3, 0, new SimpleInterval("1", 11207, 11220), "TCACGGTGCTGTGC" },
76                 // Window in middle of contig with stop offset only
77                 { new SimpleInterval("2", 9995, 10005), 0, 3, new SimpleInterval("2", 9995, 10008), "NNNNNNCGTATCCC" },
78                 // Window in middle of contig with unequal, non-zero start and stop offsets
79                 { new SimpleInterval("2", 10005, 10084), 3, 8, new SimpleInterval("2", 10002, 10092), "GTATCCCACACACCACACCCACACACCACACCCACACACACCCACACCCACACCCACACACACCACACCCACACACCACACCCACACCCAC" },
80                 // Window off the end of the contig
81                 { new SimpleInterval("2", 15995, 16000), 2, 5, new SimpleInterval("2", 15993, 16000), "TGTGTCAG" }
82         };
83     }
84 
85     @Test(dataProvider = "WindowedReferenceIntervalDataProvider")
testWindowedContext( final SimpleInterval interval, final int windowStartOffset, final int windowStopOffset, final SimpleInterval expectedWindow, final String expectedBases )86     public void testWindowedContext( final SimpleInterval interval, final int windowStartOffset, final int windowStopOffset, final SimpleInterval expectedWindow, final String expectedBases ) {
87         try (ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE)) {
88             ReferenceContext refContext = new ReferenceContext(reference, interval, windowStartOffset, windowStopOffset);
89 
90             checkReferenceContextBases(refContext, expectedBases);
91             Assert.assertEquals(refContext.getInterval(), interval, "Wrong interval in reference context");
92             Assert.assertEquals(refContext.getWindow(), expectedWindow, "Window in windowed reference context not equal to expected window");
93             Assert.assertEquals(refContext.numWindowLeadingBases(), interval.getStart() - expectedWindow.getStart(),
94                     "Leading window size in windowed reference context not equal to expected value");
95             Assert.assertEquals(refContext.numWindowTrailingBases(), 0, expectedWindow.getEnd() - interval.getEnd(),
96                     "Trailing window size in windowed reference context not equal to expected value");
97         }
98     }
99 
100     @Test(dataProvider = "WindowedReferenceIntervalDataProvider")
testWindowedContextUsingIntervalObjects( final SimpleInterval interval, final int windowStartOffset, final int windowStopOffset, final SimpleInterval expectedWindow, final String expectedBases )101     public void testWindowedContextUsingIntervalObjects( final SimpleInterval interval, final int windowStartOffset, final int windowStopOffset, final SimpleInterval expectedWindow, final String expectedBases ) {
102         try (ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE)) {
103             ReferenceContext refContext = new ReferenceContext(reference, interval, expectedWindow);
104 
105             checkReferenceContextBases(refContext, expectedBases);
106             Assert.assertEquals(refContext.getInterval(), interval, "Wrong interval in reference context");
107             Assert.assertEquals(refContext.getWindow(), expectedWindow, "Window in windowed reference context not equal to expected window");
108             Assert.assertEquals(refContext.numWindowLeadingBases(), interval.getStart() - expectedWindow.getStart(),
109                     "Leading window size in windowed reference context not equal to expected value");
110             Assert.assertEquals(refContext.numWindowTrailingBases(), 0, expectedWindow.getEnd() - interval.getEnd(),
111                     "Trailing window size in windowed reference context not equal to expected value");
112         }
113     }
114 
115     @Test(expectedExceptions = IllegalArgumentException.class)
testNullIntervalAndNonNullWindow()116     public void testNullIntervalAndNonNullWindow() {
117         try (ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE)) {
118             new ReferenceContext(reference, null, new SimpleInterval("1", 1, 3));
119         }
120     }
121 
122     @Test(expectedExceptions = IllegalArgumentException.class)
testIntervalNotInWindow()123     public void testIntervalNotInWindow() {
124         try (ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE)) {
125             new ReferenceContext(reference, new SimpleInterval("1", 1, 3), new SimpleInterval("1", 10, 30));
126         }
127     }
128 
129     @Test
testWindowedContextUsingIntervalObjects_nullWindow()130     public void testWindowedContextUsingIntervalObjects_nullWindow() {
131         try (ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE)) {
132             final SimpleInterval ival = new SimpleInterval("1", 1, 3);
133             final ReferenceContext refContext = new ReferenceContext(reference, ival, null);
134             Assert.assertEquals(refContext.getWindow(), ival);
135             Assert.assertEquals(refContext.getInterval(), ival);
136         }
137     }
138 
139     @Test
testDynamicallyChangingWindow()140     public void testDynamicallyChangingWindow() {
141         try (final ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE)) {
142             final SimpleInterval interval = new SimpleInterval("1", 11210, 11220);
143             final ReferenceContext refContext = new ReferenceContext(reference, interval);
144             final String intervalBases = "CGGTGCTGTGC";
145 
146             Assert.assertEquals(interval, refContext.getWindow());
147             Assert.assertEquals(refContext.numWindowLeadingBases(), 0);
148             Assert.assertEquals(refContext.numWindowTrailingBases(), 0);
149             checkReferenceContextBases(refContext, intervalBases);
150             Assert.assertEquals(refContext.getBase(), intervalBases.getBytes()[0]);
151             Assert.assertEquals(refContext.getForwardBases(), intervalBases.getBytes());
152 
153             refContext.setWindow(5, 5);
154             Assert.assertEquals(refContext.getWindow(), new SimpleInterval(interval.getContig(), interval.getStart() - 5, interval.getEnd() + 5));
155             Assert.assertEquals(refContext.numWindowLeadingBases(), 5);
156             Assert.assertEquals(refContext.numWindowTrailingBases(), 5);
157             checkReferenceContextBases(refContext, "GCTCA" + intervalBases + "CAGGG");
158             Assert.assertEquals(refContext.getBase(), intervalBases.getBytes()[0]);
159             Assert.assertEquals(refContext.getForwardBases(), (intervalBases+"CAGGG").getBytes());
160 
161             refContext.setWindow(0, 10);
162             Assert.assertEquals(refContext.getWindow(), new SimpleInterval(interval.getContig(), interval.getStart(), interval.getEnd() + 10));
163             Assert.assertEquals(refContext.numWindowLeadingBases(), 0);
164             Assert.assertEquals(refContext.numWindowTrailingBases(), 10);
165             checkReferenceContextBases(refContext, intervalBases + "CAGGGCGCCC");
166             Assert.assertEquals(refContext.getBase(), intervalBases.getBytes()[0]);
167             Assert.assertEquals(refContext.getForwardBases(), (intervalBases+"CAGGGCGCCC").getBytes());
168 
169             refContext.setWindow(20, 3);
170             Assert.assertEquals(refContext.getWindow(), new SimpleInterval(interval.getContig(), interval.getStart() - 20, interval.getEnd() + 3));
171             Assert.assertEquals(refContext.numWindowLeadingBases(), 20);
172             Assert.assertEquals(refContext.numWindowTrailingBases(), 3);
173             checkReferenceContextBases(refContext, "CTACAGGACCCGCTTGCTCA" + intervalBases + "CAG");
174             Assert.assertEquals(refContext.getBase(), intervalBases.getBytes()[0]);
175             Assert.assertEquals(refContext.getForwardBases(), (intervalBases+"CAG").getBytes());
176 
177             refContext.setWindow(0, 0);
178             Assert.assertEquals(interval, refContext.getWindow());
179             Assert.assertEquals(refContext.numWindowLeadingBases(), 0);
180             Assert.assertEquals(refContext.numWindowTrailingBases(), 0);
181             checkReferenceContextBases(refContext, intervalBases);
182             Assert.assertEquals(refContext.getBase(), intervalBases.getBytes()[0]);
183             Assert.assertEquals(refContext.getForwardBases(), intervalBases.getBytes());
184         }
185     }
186 
187     @Test
testGetBasesStaticWindow()188     public void testGetBasesStaticWindow() {
189         try (final ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE)) {
190             final SimpleInterval interval = new SimpleInterval("1", 11210, 11220);
191             final ReferenceContext refContext = new ReferenceContext(reference, interval);
192             final String intervalBases = "CGGTGCTGTGC";
193 
194             checkReferenceContextBasesFromInterval(refContext, intervalBases, interval);
195             Assert.assertEquals(refContext.getWindow(), interval);
196 
197             checkReferenceContextBasesFromInterval(refContext, "GCTCA" + intervalBases + "CAGGG",
198                     new SimpleInterval(interval.getContig(), interval.getStart() - 5, interval.getEnd() + 5)
199             );
200             Assert.assertEquals(refContext.getWindow(), interval);
201 
202             checkReferenceContextBasesFromInterval(refContext, intervalBases + "CAGGGCGCCC",
203                     new SimpleInterval(interval.getContig(), interval.getStart() - 0, interval.getEnd() + 10)
204             );
205             Assert.assertEquals(refContext.getWindow(), interval);
206 
207             checkReferenceContextBasesFromInterval(refContext, "CTACAGGACCCGCTTGCTCA" + intervalBases + "CAG",
208                     new SimpleInterval(interval.getContig(), interval.getStart() - 20, interval.getEnd() + 3)
209             );
210             Assert.assertEquals(refContext.getWindow(), interval);
211         }
212     }
213 
214     @DataProvider
provideForTestCopyConstructor()215     private Object[][] provideForTestCopyConstructor() {
216         return new Object[][] {
217                 {
218                         new SimpleInterval("1", 11210, 11220),
219                         new SimpleInterval("1", 2650, 2650),
220                         0,
221                         0
222                 },
223                 {
224                         new SimpleInterval("1", 11210, 11220),
225                         new SimpleInterval("1", 2650, 2650),
226                         3,
227                         5
228                 },
229                 {
230                         new SimpleInterval("1", 11210, 11220),
231                         new SimpleInterval("1", 2640, 2650),
232                         0,
233                         0
234                 },
235                 {
236                         new SimpleInterval("1", 11210, 11220),
237                         new SimpleInterval("1", 2640, 2650),
238                         3,
239                         5
240                 },
241                 {
242                         new SimpleInterval("1", 11210, 11220),
243                         new SimpleInterval("2", 2650, 2650),
244                         3,
245                         5
246                 },
247                 {
248                         new SimpleInterval("1", 11210, 11220),
249                         new SimpleInterval("2", 2650, 2650),
250                         0,
251                         0
252                 },
253                 {
254                         new SimpleInterval("1", 11210, 11220),
255                         new SimpleInterval("2", 2650, 2660),
256                         3,
257                         5
258                 },
259                 {
260                         new SimpleInterval("1", 11210, 11220),
261                         new SimpleInterval("2", 2650, 2660),
262                         0,
263                         0
264                 },
265         };
266     }
267 
268     @Test(dataProvider = "provideForTestCopyConstructor")
testCopyConstructor(final SimpleInterval originalInterval, final SimpleInterval newInterval, final int leadingBases, final int trailingBases)269     public void testCopyConstructor(final SimpleInterval originalInterval, final SimpleInterval newInterval, final int leadingBases, final int trailingBases) {
270         try (final ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE)) {
271 
272             final ReferenceContext refContext = new ReferenceContext(reference, originalInterval, leadingBases, trailingBases);
273             Assert.assertEquals(refContext.getInterval(), originalInterval, "Set interval is different from expected interval!");
274 
275             final ReferenceContext newRefContext = new ReferenceContext(refContext, newInterval);
276             Assert.assertEquals(newRefContext.getInterval(), newInterval, "Set interval is different from expected interval!");
277 
278             final SimpleInterval newWindow = newRefContext.getWindow();
279 
280             final int newLeadingBases = newInterval.getStart() - newWindow.getStart();
281             final int newTrailingBases = newWindow.getEnd() - newInterval.getEnd();
282 
283             Assert.assertEquals(newLeadingBases, leadingBases, "New window leading bases are not the same as old window leading bases!");
284             Assert.assertEquals(newTrailingBases, trailingBases, "New window trailing bases are not the same as old window trailing bases!");
285         }
286     }
287 
checkReferenceContextBases( final ReferenceContext refContext, final String expectedBases )288     private void checkReferenceContextBases( final ReferenceContext refContext, final String expectedBases ) {
289 
290         final byte[] contextBases = refContext.getBases();
291 
292         final List<Byte> contextBasesFromIterator = new ArrayList<>();
293         final Iterator<Byte> baseIterator = refContext.iterator();
294         while ( baseIterator.hasNext() ) {
295             contextBasesFromIterator.add(baseIterator.next());
296         }
297 
298         Assert.assertEquals(contextBases.length, expectedBases.length(), "Wrong number of bases from refContext.getBases()");
299 
300         final byte[] expectedBasesByteArray = expectedBases.getBytes();
301         for ( int baseIndex = 0; baseIndex < expectedBases.length(); ++baseIndex ) {
302             Assert.assertEquals(contextBases[baseIndex], expectedBasesByteArray[baseIndex], "Base #" + (baseIndex + 1) + " incorrect from refContext.getBases()");
303             Assert.assertEquals(contextBasesFromIterator.get(baseIndex).byteValue(), expectedBasesByteArray[baseIndex], "Base #" + (baseIndex + 1) + " incorrect from refContext.iterator()");
304         }
305     }
306 
checkReferenceContextBasesFromInterval( final ReferenceContext refContext, final String expectedBases, final SimpleInterval interval )307     private void checkReferenceContextBasesFromInterval( final ReferenceContext refContext, final String expectedBases, final SimpleInterval interval ) {
308 
309         // Do this once for the interval-based call:
310         final byte[] contextBases = refContext.getBases(interval);
311         checkReferenceContextBasesFromIntervalHelper(expectedBases, contextBases);
312 
313         // Do this again for the leading/trailing bounds-based call:
314         final byte[] contextBases2 = refContext.getBases(interval);
315 
316         // First check that the two context bases are the same:
317         Assert.assertEquals(contextBases2, contextBases);
318 
319         // Now check vs the expected values:
320         checkReferenceContextBasesFromIntervalHelper(expectedBases, contextBases2);
321     }
322 
checkReferenceContextBasesFromIntervalHelper(final String expectedBases, final byte[] contextBases)323     private void checkReferenceContextBasesFromIntervalHelper(final String expectedBases, final byte[] contextBases) {
324         Assert.assertEquals(contextBases.length, expectedBases.length(), "Wrong number of bases from refContext.getBases()");
325 
326         final byte[] expectedBasesByteArray = expectedBases.getBytes();
327         for ( int baseIndex = 0; baseIndex < expectedBases.length(); ++baseIndex ) {
328             Assert.assertEquals(contextBases[baseIndex], expectedBasesByteArray[baseIndex], "Base #" + (baseIndex + 1) + " incorrect from refContext.getBases()");
329         }
330     }
331 
332     @DataProvider(name = "InvalidWindowDataProvider")
getInvalidWindows()333     public Object[][] getInvalidWindows() {
334         return new Object[][] {
335                 // window start offset < 0
336                 {-1, 1},
337                 // window stop offset < 0
338                 {1, -1},
339                 // window start offset < 0 && window stop offset < 0
340                 {-1, -1}
341         };
342     }
343 
344     @Test(dataProvider = "InvalidWindowDataProvider", expectedExceptions = GATKException.class)
testInvalidWindowHandlingAtConstruction( final int windowStartOffset, final int windowStopOffset )345     public void testInvalidWindowHandlingAtConstruction( final int windowStartOffset, final int windowStopOffset ) {
346         try ( ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE) ) {
347             SimpleInterval interval = new SimpleInterval("1", 5, 10);
348             ReferenceContext refContext = new ReferenceContext(reference, interval, windowStartOffset, windowStopOffset);
349         }
350     }
351 
352     @Test(dataProvider = "InvalidWindowDataProvider", expectedExceptions = GATKException.class)
testInvalidWindowHandlingPostConstruction( final int windowStartOffset, final int windowStopOffset )353     public void testInvalidWindowHandlingPostConstruction( final int windowStartOffset, final int windowStopOffset ) {
354         try ( ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE) ) {
355             SimpleInterval interval = new SimpleInterval("1", 5, 10);
356             ReferenceContext refContext = new ReferenceContext(reference, interval);
357             refContext.setWindow(windowStartOffset, windowStopOffset);
358         }
359     }
360 
361     @DataProvider(name = "SubintervalDataProvider")
getSubintervals()362     public Object[][] getSubintervals() {
363         return new Object[][] {
364                 // start (1120x):   01234567890
365                 // reference bases: CGGTGCTGTGC
366                 {"1", 11211, 1, "CGG"},
367                 {"1", 11219, 1, "TGC"},
368                 {"1", 11217, 2, "CTGTG"}
369         };
370     }
371 
372     @Test(dataProvider = "SubintervalDataProvider")
testGetKmerAround(final String contig, final int start, final int padding, String expectedSubsequence)373     public void testGetKmerAround(final String contig, final int start, final int padding, String expectedSubsequence){
374         // the interval of a ReferenceContext object is *in*clusive on both ends
375         try (final ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE)) {
376             final SimpleInterval interval = new SimpleInterval(contig, start, start);
377             final ReferenceContext refContext = new ReferenceContext(reference, interval);
378             final String kmer = refContext.getKmerAround(start, padding);
379             Assert.assertEquals(kmer, expectedSubsequence);
380         }
381     }
382 }
383