1 package org.broadinstitute.hellbender.engine; 2 3 import htsjdk.samtools.SAMSequenceDictionary; 4 import htsjdk.samtools.util.Locatable; 5 import org.broadinstitute.hellbender.utils.IntervalUtils; 6 import org.broadinstitute.hellbender.utils.SimpleInterval; 7 import org.broadinstitute.hellbender.utils.Utils; 8 9 import java.util.ArrayList; 10 import java.util.List; 11 12 13 /** 14 * A Shard of records of type T covering a specific genomic interval, optionally expanded by a configurable 15 * amount of padded data, that provides the ability to iterate over its records. 16 */ 17 public interface Shard<T> extends Iterable<T>, Locatable { 18 19 /** 20 * @return the interval this shard spans 21 */ getInterval()22 SimpleInterval getInterval(); 23 24 /** 25 * @return the interval this shard spans, potentially with additional padding on each side 26 * it must be the case that for a given Shard getPaddedSpan().contains(getInterval()) 27 */ getPaddedInterval()28 SimpleInterval getPaddedInterval(); 29 30 /** 31 * @return the start of the non-padded interval this shard covers 32 */ 33 @Override getStart()34 default int getStart() { 35 return getInterval().getStart(); 36 } 37 38 /** 39 * @return the end of the non-padded interval this shard covers 40 */ 41 @Override getEnd()42 default int getEnd() { 43 return getInterval().getEnd(); 44 } 45 46 /** 47 * @return contig this shard belongs to 48 */ 49 @Override getContig()50 default String getContig() { 51 return getInterval().getContig(); 52 } 53 54 /** 55 * Divide an interval into ShardBoundaries. Each shard will cover up to shardSize bases, include shardPadding 56 * bases of extra padding on either side, and begin shardSize bases after the previous shard (ie., shards will 57 * not overlap except potentially in the padded regions). 58 * 59 * @param interval interval to shard; must be on the contig according to the provided dictionary 60 * @param shardSize desired shard size; intervals larger than this will be divided into shards of up to this size 61 * @param shardPadding desired shard padding; each shard's interval will be padded on both sides by this number of bases (may be 0) 62 * @param dictionary sequence dictionary for reads 63 * @return List of {@link ShardBoundary} objects spanning the interval 64 */ divideIntervalIntoShards(final SimpleInterval interval, final int shardSize, final int shardPadding, final SAMSequenceDictionary dictionary)65 static List<ShardBoundary> divideIntervalIntoShards(final SimpleInterval interval, final int shardSize, final int shardPadding, final SAMSequenceDictionary dictionary) { 66 return divideIntervalIntoShards(interval, shardSize, shardSize, shardPadding, dictionary); 67 } 68 69 /** 70 * Divide an interval into ShardBoundaries. Each shard will cover up to shardSize bases, include shardPadding 71 * bases of extra padding on either side, and begin shardStep bases after the previous shard. 72 * 73 * @param interval interval to shard; must be on the contig according to the provided dictionary 74 * @param shardSize desired shard size; intervals larger than this will be divided into shards of up to this size 75 * @param shardStep each shard will begin this many bases away from the previous shard 76 * @param shardPadding desired shard padding; each shard's interval will be padded on both sides by this number of bases (may be 0) 77 * @param dictionary sequence dictionary for reads 78 * @return List of {@link ShardBoundary} objects spanning the interval 79 */ divideIntervalIntoShards(final SimpleInterval interval, final int shardSize, final int shardStep, final int shardPadding, final SAMSequenceDictionary dictionary)80 static List<ShardBoundary> divideIntervalIntoShards(final SimpleInterval interval, final int shardSize, final int shardStep, final int shardPadding, final SAMSequenceDictionary dictionary) { 81 Utils.nonNull(interval); 82 Utils.nonNull(dictionary); 83 Utils.validateArg(shardSize >= 1, "shardSize must be >= 1"); 84 Utils.validateArg(shardStep >= 1, "shardStep must be >= 1"); 85 Utils.validateArg(shardPadding >= 0, "shardPadding must be >= 0"); 86 87 Utils.validateArg(IntervalUtils.intervalIsOnDictionaryContig(interval, dictionary), () -> 88 "Interval " + interval + " not within the bounds of a contig in the provided dictionary"); 89 90 final List<ShardBoundary> shards = new ArrayList<>(); 91 int start = interval.getStart(); 92 93 while ( start <= interval.getEnd() ) { 94 final int end = Math.min(start + shardSize - 1, interval.getEnd()); 95 final SimpleInterval nextShardInterval = new SimpleInterval(interval.getContig(), start, end); 96 final SimpleInterval nextShardIntervalPadded = nextShardInterval.expandWithinContig(shardPadding, dictionary); 97 shards.add(new ShardBoundary(nextShardInterval, nextShardIntervalPadded)); 98 start += shardStep; 99 } 100 101 return shards; 102 } 103 } 104