1 package org.broadinstitute.hellbender.engine; 2 3 import org.broadinstitute.hellbender.GATKBaseTest; 4 import org.broadinstitute.hellbender.exceptions.GATKException; 5 import org.broadinstitute.hellbender.utils.SimpleInterval; 6 import org.broadinstitute.hellbender.utils.io.IOUtils; 7 import org.testng.Assert; 8 import org.testng.annotations.DataProvider; 9 import org.testng.annotations.Test; 10 import org.testng.internal.junit.ArrayAsserts; 11 12 import java.nio.file.Path; 13 import java.util.ArrayList; 14 import java.util.Iterator; 15 import java.util.List; 16 17 public final class ReferenceContextUnitTest extends GATKBaseTest { 18 19 private static final Path TEST_REFERENCE = IOUtils.getPath(hg19MiniReference); 20 21 @DataProvider(name = "EmptyReferenceContextDataProvider") getEmptyReferenceContextData()22 public Object[][] getEmptyReferenceContextData() { 23 // Default-constructed ReferenceContexts and ReferenceContexts constructed from null ReferenceDataSources 24 // and/or null intervals should behave as empty context objects. 25 return new Object[][] { 26 { new ReferenceContext() }, 27 { new ReferenceContext(null, null, 0, 0) }, 28 { new ReferenceContext(null, new SimpleInterval("1", 1, 1), 0, 0 ) }, 29 { new ReferenceContext(new ReferenceFileSource(TEST_REFERENCE), null) } 30 }; 31 } 32 33 @Test(dataProvider = "EmptyReferenceContextDataProvider") testEmptyReferenceContext( final ReferenceContext refContext)34 public void testEmptyReferenceContext( final ReferenceContext refContext) { 35 Assert.assertFalse(refContext.hasBackingDataSource() && refContext.getInterval() != null, 36 "Empty ReferenceContext reports having both a backing data source and an interval"); 37 Assert.assertEquals(refContext.getBases().length, 0, "Empty ReferenceContext should have returned an empty bases array from getBases()"); 38 Assert.assertFalse(refContext.iterator().hasNext(), "Empty ReferenceContext should have returned an empty bases iterator from iterator()"); 39 } 40 41 @DataProvider(name = "WindowlessReferenceIntervalDataProvider") getWindowlessReferenceIntervals()42 public Object[][] getWindowlessReferenceIntervals() { 43 return new Object[][] { 44 { new SimpleInterval("1", 1, 3), "NNN" }, 45 { new SimpleInterval("1", 11041, 11045), "GCAAA" }, 46 { new SimpleInterval("1", 11210, 11220), "CGGTGCTGTGC" }, 47 { new SimpleInterval("2", 9995, 10005), "NNNNNNCGTAT" }, 48 { new SimpleInterval("2", 10001, 10080), "CGTATCCCACACACCACACCCACACACCACACCCACACACACCCACACCCACACCCACACACACCACACCCACACACCAC" }, 49 { new SimpleInterval("2", 10005, 10084), "TCCCACACACCACACCCACACACCACACCCACACACACCCACACCCACACCCACACACACCACACCCACACACCACACCC" }, 50 { new SimpleInterval("2", 15995, 16000), "TGTCAG" } 51 }; 52 } 53 54 @Test(dataProvider = "WindowlessReferenceIntervalDataProvider") testWindowlessReferenceContext( final SimpleInterval interval, final String expectedBases )55 public void testWindowlessReferenceContext( final SimpleInterval interval, final String expectedBases ) { 56 try (ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE)) { 57 ReferenceContext refContext = new ReferenceContext(reference, interval); 58 59 checkReferenceContextBases(refContext, expectedBases); 60 Assert.assertEquals(refContext.getInterval(), interval, "Wrong interval in reference context"); 61 Assert.assertEquals(refContext.getWindow(), interval, "Window in windowless reference context not equal to original interval"); 62 Assert.assertEquals(refContext.numWindowLeadingBases(), 0, "Non-zero leading window size in windowless reference context"); 63 Assert.assertEquals(refContext.numWindowTrailingBases(), 0, "Non-zero trailing window size in windowless reference context"); 64 } 65 } 66 67 @DataProvider(name = "WindowedReferenceIntervalDataProvider") getWindowedReferenceIntervals()68 public Object[][] getWindowedReferenceIntervals() { 69 return new Object[][] { 70 // Window off the start of the contig: 71 { new SimpleInterval("1", 1, 3), 5, 5, new SimpleInterval("1", 1, 8), "NNNNNNNN" }, 72 // Window in middle of contig with equal, non-zero start and stop offsets 73 { new SimpleInterval("1", 11041, 11045), 5, 5, new SimpleInterval("1", 11036, 11050), "CAGGAGCAAAGTCGC" }, 74 // Window in middle of contig with start offset only 75 { new SimpleInterval("1", 11210, 11220), 3, 0, new SimpleInterval("1", 11207, 11220), "TCACGGTGCTGTGC" }, 76 // Window in middle of contig with stop offset only 77 { new SimpleInterval("2", 9995, 10005), 0, 3, new SimpleInterval("2", 9995, 10008), "NNNNNNCGTATCCC" }, 78 // Window in middle of contig with unequal, non-zero start and stop offsets 79 { new SimpleInterval("2", 10005, 10084), 3, 8, new SimpleInterval("2", 10002, 10092), "GTATCCCACACACCACACCCACACACCACACCCACACACACCCACACCCACACCCACACACACCACACCCACACACCACACCCACACCCAC" }, 80 // Window off the end of the contig 81 { new SimpleInterval("2", 15995, 16000), 2, 5, new SimpleInterval("2", 15993, 16000), "TGTGTCAG" } 82 }; 83 } 84 85 @Test(dataProvider = "WindowedReferenceIntervalDataProvider") testWindowedContext( final SimpleInterval interval, final int windowStartOffset, final int windowStopOffset, final SimpleInterval expectedWindow, final String expectedBases )86 public void testWindowedContext( final SimpleInterval interval, final int windowStartOffset, final int windowStopOffset, final SimpleInterval expectedWindow, final String expectedBases ) { 87 try (ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE)) { 88 ReferenceContext refContext = new ReferenceContext(reference, interval, windowStartOffset, windowStopOffset); 89 90 checkReferenceContextBases(refContext, expectedBases); 91 Assert.assertEquals(refContext.getInterval(), interval, "Wrong interval in reference context"); 92 Assert.assertEquals(refContext.getWindow(), expectedWindow, "Window in windowed reference context not equal to expected window"); 93 Assert.assertEquals(refContext.numWindowLeadingBases(), interval.getStart() - expectedWindow.getStart(), 94 "Leading window size in windowed reference context not equal to expected value"); 95 Assert.assertEquals(refContext.numWindowTrailingBases(), 0, expectedWindow.getEnd() - interval.getEnd(), 96 "Trailing window size in windowed reference context not equal to expected value"); 97 } 98 } 99 100 @Test(dataProvider = "WindowedReferenceIntervalDataProvider") testWindowedContextUsingIntervalObjects( final SimpleInterval interval, final int windowStartOffset, final int windowStopOffset, final SimpleInterval expectedWindow, final String expectedBases )101 public void testWindowedContextUsingIntervalObjects( final SimpleInterval interval, final int windowStartOffset, final int windowStopOffset, final SimpleInterval expectedWindow, final String expectedBases ) { 102 try (ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE)) { 103 ReferenceContext refContext = new ReferenceContext(reference, interval, expectedWindow); 104 105 checkReferenceContextBases(refContext, expectedBases); 106 Assert.assertEquals(refContext.getInterval(), interval, "Wrong interval in reference context"); 107 Assert.assertEquals(refContext.getWindow(), expectedWindow, "Window in windowed reference context not equal to expected window"); 108 Assert.assertEquals(refContext.numWindowLeadingBases(), interval.getStart() - expectedWindow.getStart(), 109 "Leading window size in windowed reference context not equal to expected value"); 110 Assert.assertEquals(refContext.numWindowTrailingBases(), 0, expectedWindow.getEnd() - interval.getEnd(), 111 "Trailing window size in windowed reference context not equal to expected value"); 112 } 113 } 114 115 @Test(expectedExceptions = IllegalArgumentException.class) testNullIntervalAndNonNullWindow()116 public void testNullIntervalAndNonNullWindow() { 117 try (ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE)) { 118 new ReferenceContext(reference, null, new SimpleInterval("1", 1, 3)); 119 } 120 } 121 122 @Test(expectedExceptions = IllegalArgumentException.class) testIntervalNotInWindow()123 public void testIntervalNotInWindow() { 124 try (ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE)) { 125 new ReferenceContext(reference, new SimpleInterval("1", 1, 3), new SimpleInterval("1", 10, 30)); 126 } 127 } 128 129 @Test testWindowedContextUsingIntervalObjects_nullWindow()130 public void testWindowedContextUsingIntervalObjects_nullWindow() { 131 try (ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE)) { 132 final SimpleInterval ival = new SimpleInterval("1", 1, 3); 133 final ReferenceContext refContext = new ReferenceContext(reference, ival, null); 134 Assert.assertEquals(refContext.getWindow(), ival); 135 Assert.assertEquals(refContext.getInterval(), ival); 136 } 137 } 138 139 @Test testDynamicallyChangingWindow()140 public void testDynamicallyChangingWindow() { 141 try (final ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE)) { 142 final SimpleInterval interval = new SimpleInterval("1", 11210, 11220); 143 final ReferenceContext refContext = new ReferenceContext(reference, interval); 144 final String intervalBases = "CGGTGCTGTGC"; 145 146 Assert.assertEquals(interval, refContext.getWindow()); 147 Assert.assertEquals(refContext.numWindowLeadingBases(), 0); 148 Assert.assertEquals(refContext.numWindowTrailingBases(), 0); 149 checkReferenceContextBases(refContext, intervalBases); 150 Assert.assertEquals(refContext.getBase(), intervalBases.getBytes()[0]); 151 Assert.assertEquals(refContext.getForwardBases(), intervalBases.getBytes()); 152 153 refContext.setWindow(5, 5); 154 Assert.assertEquals(refContext.getWindow(), new SimpleInterval(interval.getContig(), interval.getStart() - 5, interval.getEnd() + 5)); 155 Assert.assertEquals(refContext.numWindowLeadingBases(), 5); 156 Assert.assertEquals(refContext.numWindowTrailingBases(), 5); 157 checkReferenceContextBases(refContext, "GCTCA" + intervalBases + "CAGGG"); 158 Assert.assertEquals(refContext.getBase(), intervalBases.getBytes()[0]); 159 Assert.assertEquals(refContext.getForwardBases(), (intervalBases+"CAGGG").getBytes()); 160 161 refContext.setWindow(0, 10); 162 Assert.assertEquals(refContext.getWindow(), new SimpleInterval(interval.getContig(), interval.getStart(), interval.getEnd() + 10)); 163 Assert.assertEquals(refContext.numWindowLeadingBases(), 0); 164 Assert.assertEquals(refContext.numWindowTrailingBases(), 10); 165 checkReferenceContextBases(refContext, intervalBases + "CAGGGCGCCC"); 166 Assert.assertEquals(refContext.getBase(), intervalBases.getBytes()[0]); 167 Assert.assertEquals(refContext.getForwardBases(), (intervalBases+"CAGGGCGCCC").getBytes()); 168 169 refContext.setWindow(20, 3); 170 Assert.assertEquals(refContext.getWindow(), new SimpleInterval(interval.getContig(), interval.getStart() - 20, interval.getEnd() + 3)); 171 Assert.assertEquals(refContext.numWindowLeadingBases(), 20); 172 Assert.assertEquals(refContext.numWindowTrailingBases(), 3); 173 checkReferenceContextBases(refContext, "CTACAGGACCCGCTTGCTCA" + intervalBases + "CAG"); 174 Assert.assertEquals(refContext.getBase(), intervalBases.getBytes()[0]); 175 Assert.assertEquals(refContext.getForwardBases(), (intervalBases+"CAG").getBytes()); 176 177 refContext.setWindow(0, 0); 178 Assert.assertEquals(interval, refContext.getWindow()); 179 Assert.assertEquals(refContext.numWindowLeadingBases(), 0); 180 Assert.assertEquals(refContext.numWindowTrailingBases(), 0); 181 checkReferenceContextBases(refContext, intervalBases); 182 Assert.assertEquals(refContext.getBase(), intervalBases.getBytes()[0]); 183 Assert.assertEquals(refContext.getForwardBases(), intervalBases.getBytes()); 184 } 185 } 186 187 @Test testGetBasesStaticWindow()188 public void testGetBasesStaticWindow() { 189 try (final ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE)) { 190 final SimpleInterval interval = new SimpleInterval("1", 11210, 11220); 191 final ReferenceContext refContext = new ReferenceContext(reference, interval); 192 final String intervalBases = "CGGTGCTGTGC"; 193 194 checkReferenceContextBasesFromInterval(refContext, intervalBases, interval); 195 Assert.assertEquals(refContext.getWindow(), interval); 196 197 checkReferenceContextBasesFromInterval(refContext, "GCTCA" + intervalBases + "CAGGG", 198 new SimpleInterval(interval.getContig(), interval.getStart() - 5, interval.getEnd() + 5) 199 ); 200 Assert.assertEquals(refContext.getWindow(), interval); 201 202 checkReferenceContextBasesFromInterval(refContext, intervalBases + "CAGGGCGCCC", 203 new SimpleInterval(interval.getContig(), interval.getStart() - 0, interval.getEnd() + 10) 204 ); 205 Assert.assertEquals(refContext.getWindow(), interval); 206 207 checkReferenceContextBasesFromInterval(refContext, "CTACAGGACCCGCTTGCTCA" + intervalBases + "CAG", 208 new SimpleInterval(interval.getContig(), interval.getStart() - 20, interval.getEnd() + 3) 209 ); 210 Assert.assertEquals(refContext.getWindow(), interval); 211 } 212 } 213 214 @DataProvider provideForTestCopyConstructor()215 private Object[][] provideForTestCopyConstructor() { 216 return new Object[][] { 217 { 218 new SimpleInterval("1", 11210, 11220), 219 new SimpleInterval("1", 2650, 2650), 220 0, 221 0 222 }, 223 { 224 new SimpleInterval("1", 11210, 11220), 225 new SimpleInterval("1", 2650, 2650), 226 3, 227 5 228 }, 229 { 230 new SimpleInterval("1", 11210, 11220), 231 new SimpleInterval("1", 2640, 2650), 232 0, 233 0 234 }, 235 { 236 new SimpleInterval("1", 11210, 11220), 237 new SimpleInterval("1", 2640, 2650), 238 3, 239 5 240 }, 241 { 242 new SimpleInterval("1", 11210, 11220), 243 new SimpleInterval("2", 2650, 2650), 244 3, 245 5 246 }, 247 { 248 new SimpleInterval("1", 11210, 11220), 249 new SimpleInterval("2", 2650, 2650), 250 0, 251 0 252 }, 253 { 254 new SimpleInterval("1", 11210, 11220), 255 new SimpleInterval("2", 2650, 2660), 256 3, 257 5 258 }, 259 { 260 new SimpleInterval("1", 11210, 11220), 261 new SimpleInterval("2", 2650, 2660), 262 0, 263 0 264 }, 265 }; 266 } 267 268 @Test(dataProvider = "provideForTestCopyConstructor") testCopyConstructor(final SimpleInterval originalInterval, final SimpleInterval newInterval, final int leadingBases, final int trailingBases)269 public void testCopyConstructor(final SimpleInterval originalInterval, final SimpleInterval newInterval, final int leadingBases, final int trailingBases) { 270 try (final ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE)) { 271 272 final ReferenceContext refContext = new ReferenceContext(reference, originalInterval, leadingBases, trailingBases); 273 Assert.assertEquals(refContext.getInterval(), originalInterval, "Set interval is different from expected interval!"); 274 275 final ReferenceContext newRefContext = new ReferenceContext(refContext, newInterval); 276 Assert.assertEquals(newRefContext.getInterval(), newInterval, "Set interval is different from expected interval!"); 277 278 final SimpleInterval newWindow = newRefContext.getWindow(); 279 280 final int newLeadingBases = newInterval.getStart() - newWindow.getStart(); 281 final int newTrailingBases = newWindow.getEnd() - newInterval.getEnd(); 282 283 Assert.assertEquals(newLeadingBases, leadingBases, "New window leading bases are not the same as old window leading bases!"); 284 Assert.assertEquals(newTrailingBases, trailingBases, "New window trailing bases are not the same as old window trailing bases!"); 285 } 286 } 287 checkReferenceContextBases( final ReferenceContext refContext, final String expectedBases )288 private void checkReferenceContextBases( final ReferenceContext refContext, final String expectedBases ) { 289 290 final byte[] contextBases = refContext.getBases(); 291 292 final List<Byte> contextBasesFromIterator = new ArrayList<>(); 293 final Iterator<Byte> baseIterator = refContext.iterator(); 294 while ( baseIterator.hasNext() ) { 295 contextBasesFromIterator.add(baseIterator.next()); 296 } 297 298 Assert.assertEquals(contextBases.length, expectedBases.length(), "Wrong number of bases from refContext.getBases()"); 299 300 final byte[] expectedBasesByteArray = expectedBases.getBytes(); 301 for ( int baseIndex = 0; baseIndex < expectedBases.length(); ++baseIndex ) { 302 Assert.assertEquals(contextBases[baseIndex], expectedBasesByteArray[baseIndex], "Base #" + (baseIndex + 1) + " incorrect from refContext.getBases()"); 303 Assert.assertEquals(contextBasesFromIterator.get(baseIndex).byteValue(), expectedBasesByteArray[baseIndex], "Base #" + (baseIndex + 1) + " incorrect from refContext.iterator()"); 304 } 305 } 306 checkReferenceContextBasesFromInterval( final ReferenceContext refContext, final String expectedBases, final SimpleInterval interval )307 private void checkReferenceContextBasesFromInterval( final ReferenceContext refContext, final String expectedBases, final SimpleInterval interval ) { 308 309 // Do this once for the interval-based call: 310 final byte[] contextBases = refContext.getBases(interval); 311 checkReferenceContextBasesFromIntervalHelper(expectedBases, contextBases); 312 313 // Do this again for the leading/trailing bounds-based call: 314 final byte[] contextBases2 = refContext.getBases(interval); 315 316 // First check that the two context bases are the same: 317 Assert.assertEquals(contextBases2, contextBases); 318 319 // Now check vs the expected values: 320 checkReferenceContextBasesFromIntervalHelper(expectedBases, contextBases2); 321 } 322 checkReferenceContextBasesFromIntervalHelper(final String expectedBases, final byte[] contextBases)323 private void checkReferenceContextBasesFromIntervalHelper(final String expectedBases, final byte[] contextBases) { 324 Assert.assertEquals(contextBases.length, expectedBases.length(), "Wrong number of bases from refContext.getBases()"); 325 326 final byte[] expectedBasesByteArray = expectedBases.getBytes(); 327 for ( int baseIndex = 0; baseIndex < expectedBases.length(); ++baseIndex ) { 328 Assert.assertEquals(contextBases[baseIndex], expectedBasesByteArray[baseIndex], "Base #" + (baseIndex + 1) + " incorrect from refContext.getBases()"); 329 } 330 } 331 332 @DataProvider(name = "InvalidWindowDataProvider") getInvalidWindows()333 public Object[][] getInvalidWindows() { 334 return new Object[][] { 335 // window start offset < 0 336 {-1, 1}, 337 // window stop offset < 0 338 {1, -1}, 339 // window start offset < 0 && window stop offset < 0 340 {-1, -1} 341 }; 342 } 343 344 @Test(dataProvider = "InvalidWindowDataProvider", expectedExceptions = GATKException.class) testInvalidWindowHandlingAtConstruction( final int windowStartOffset, final int windowStopOffset )345 public void testInvalidWindowHandlingAtConstruction( final int windowStartOffset, final int windowStopOffset ) { 346 try ( ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE) ) { 347 SimpleInterval interval = new SimpleInterval("1", 5, 10); 348 ReferenceContext refContext = new ReferenceContext(reference, interval, windowStartOffset, windowStopOffset); 349 } 350 } 351 352 @Test(dataProvider = "InvalidWindowDataProvider", expectedExceptions = GATKException.class) testInvalidWindowHandlingPostConstruction( final int windowStartOffset, final int windowStopOffset )353 public void testInvalidWindowHandlingPostConstruction( final int windowStartOffset, final int windowStopOffset ) { 354 try ( ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE) ) { 355 SimpleInterval interval = new SimpleInterval("1", 5, 10); 356 ReferenceContext refContext = new ReferenceContext(reference, interval); 357 refContext.setWindow(windowStartOffset, windowStopOffset); 358 } 359 } 360 361 @DataProvider(name = "SubintervalDataProvider") getSubintervals()362 public Object[][] getSubintervals() { 363 return new Object[][] { 364 // start (1120x): 01234567890 365 // reference bases: CGGTGCTGTGC 366 {"1", 11211, 1, "CGG"}, 367 {"1", 11219, 1, "TGC"}, 368 {"1", 11217, 2, "CTGTG"} 369 }; 370 } 371 372 @Test(dataProvider = "SubintervalDataProvider") testGetKmerAround(final String contig, final int start, final int padding, String expectedSubsequence)373 public void testGetKmerAround(final String contig, final int start, final int padding, String expectedSubsequence){ 374 // the interval of a ReferenceContext object is *in*clusive on both ends 375 try (final ReferenceDataSource reference = new ReferenceFileSource(TEST_REFERENCE)) { 376 final SimpleInterval interval = new SimpleInterval(contig, start, start); 377 final ReferenceContext refContext = new ReferenceContext(reference, interval); 378 final String kmer = refContext.getKmerAround(start, padding); 379 Assert.assertEquals(kmer, expectedSubsequence); 380 } 381 } 382 } 383