1 package org.broadinstitute.hellbender.engine;
2 
3 import htsjdk.samtools.SAMSequenceDictionary;
4 import htsjdk.samtools.util.Locatable;
5 import org.broadinstitute.hellbender.utils.IntervalUtils;
6 import org.broadinstitute.hellbender.utils.SimpleInterval;
7 import org.broadinstitute.hellbender.utils.Utils;
8 
9 import java.util.ArrayList;
10 import java.util.List;
11 
12 
13 /**
14  * A Shard of records of type T covering a specific genomic interval, optionally expanded by a configurable
15  * amount of padded data, that provides the ability to iterate over its records.
16  */
17 public interface Shard<T> extends Iterable<T>, Locatable {
18 
19     /**
20      * @return the interval this shard spans
21      */
getInterval()22     SimpleInterval getInterval();
23 
24     /**
25      * @return the interval this shard spans, potentially with additional padding on each side
26      * it must be the case that for a given Shard getPaddedSpan().contains(getInterval())
27      */
getPaddedInterval()28     SimpleInterval getPaddedInterval();
29 
30     /**
31      * @return the start of the non-padded interval this shard covers
32      */
33     @Override
getStart()34     default int getStart() {
35         return getInterval().getStart();
36     }
37 
38     /**
39      * @return the end of the non-padded interval this shard covers
40      */
41     @Override
getEnd()42     default int getEnd() {
43         return getInterval().getEnd();
44     }
45 
46     /**
47      * @return contig this shard belongs to
48      */
49     @Override
getContig()50     default String getContig() {
51         return getInterval().getContig();
52     }
53 
54     /**
55      * Divide an interval into ShardBoundaries. Each shard will cover up to shardSize bases, include shardPadding
56      * bases of extra padding on either side, and begin shardSize bases after the previous shard (ie., shards will
57      * not overlap except potentially in the padded regions).
58      *
59      * @param interval interval to shard; must be on the contig according to the provided dictionary
60      * @param shardSize desired shard size; intervals larger than this will be divided into shards of up to this size
61      * @param shardPadding desired shard padding; each shard's interval will be padded on both sides by this number of bases (may be 0)
62      * @param dictionary sequence dictionary for reads
63      * @return List of {@link ShardBoundary} objects spanning the interval
64      */
divideIntervalIntoShards(final SimpleInterval interval, final int shardSize, final int shardPadding, final SAMSequenceDictionary dictionary)65     static List<ShardBoundary> divideIntervalIntoShards(final SimpleInterval interval, final int shardSize, final int shardPadding, final SAMSequenceDictionary dictionary) {
66         return  divideIntervalIntoShards(interval, shardSize, shardSize, shardPadding, dictionary);
67     }
68 
69     /**
70      * Divide an interval into ShardBoundaries. Each shard will cover up to shardSize bases, include shardPadding
71      * bases of extra padding on either side, and begin shardStep bases after the previous shard.
72      *
73      * @param interval interval to shard; must be on the contig according to the provided dictionary
74      * @param shardSize desired shard size; intervals larger than this will be divided into shards of up to this size
75      * @param shardStep each shard will begin this many bases away from the previous shard
76      * @param shardPadding desired shard padding; each shard's interval will be padded on both sides by this number of bases (may be 0)
77      * @param dictionary sequence dictionary for reads
78      * @return List of {@link ShardBoundary} objects spanning the interval
79      */
divideIntervalIntoShards(final SimpleInterval interval, final int shardSize, final int shardStep, final int shardPadding, final SAMSequenceDictionary dictionary)80     static List<ShardBoundary> divideIntervalIntoShards(final SimpleInterval interval, final int shardSize, final int shardStep, final int shardPadding, final SAMSequenceDictionary dictionary) {
81         Utils.nonNull(interval);
82         Utils.nonNull(dictionary);
83         Utils.validateArg(shardSize >= 1, "shardSize must be >= 1");
84         Utils.validateArg(shardStep >= 1, "shardStep must be >= 1");
85         Utils.validateArg(shardPadding >= 0, "shardPadding must be >= 0");
86 
87         Utils.validateArg(IntervalUtils.intervalIsOnDictionaryContig(interval, dictionary), () ->
88                 "Interval " + interval + " not within the bounds of a contig in the provided dictionary");
89 
90         final List<ShardBoundary> shards = new ArrayList<>();
91         int start = interval.getStart();
92 
93         while ( start <= interval.getEnd() ) {
94             final int end = Math.min(start + shardSize - 1, interval.getEnd());
95             final SimpleInterval nextShardInterval = new SimpleInterval(interval.getContig(), start, end);
96             final SimpleInterval nextShardIntervalPadded = nextShardInterval.expandWithinContig(shardPadding, dictionary);
97             shards.add(new ShardBoundary(nextShardInterval, nextShardIntervalPadded));
98             start += shardStep;
99         }
100 
101         return shards;
102     }
103 }
104