1 package org.broadinstitute.hellbender.tools.spark.sv.discovery;
2 
3 import com.google.common.annotations.VisibleForTesting;
4 import htsjdk.variant.variantcontext.Allele;
5 import org.broadinstitute.hellbender.engine.BasicReference;
6 import org.broadinstitute.hellbender.engine.spark.datasources.ReferenceMultiSparkSource;
7 import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.StrandSwitch;
8 import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.BreakpointComplications;
9 import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.NovelAdjacencyAndAltHaplotype;
10 import org.broadinstitute.hellbender.tools.spark.sv.evidence.EvidenceTargetLink;
11 import org.broadinstitute.hellbender.tools.spark.sv.evidence.ReadMetadata;
12 import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants;
13 import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval;
14 import org.broadinstitute.hellbender.utils.SimpleInterval;
15 
16 import java.util.Collections;
17 import java.util.Map;
18 
19 public abstract class SimpleSVType extends SvType {
createBracketedSymbAlleleString(final String vcfHeaderDefinedSymbAltAllele)20     public static String createBracketedSymbAlleleString(final String vcfHeaderDefinedSymbAltAllele) {
21         return "<" + vcfHeaderDefinedSymbAltAllele + ">";
22     }
23 
SimpleSVType(final String variantCHR, final int variantPOS, final int variantEND, final String variantId, final Allele refAllele, final Allele altAllele, final int svLen, final Map<String, Object> extraAttributes)24     protected SimpleSVType(final String variantCHR, final int variantPOS, final int variantEND, final String variantId,
25                            final Allele refAllele, final Allele altAllele, final int svLen, final Map<String, Object> extraAttributes) {
26         super(variantCHR, variantPOS, variantEND, variantId, refAllele, altAllele, svLen, extraAttributes);
27     }
28 
29     @Override
hasApplicableEnd()30     public final boolean hasApplicableEnd() {
31         return true;
32     }
33     @Override
hasApplicableLength()34     public final boolean hasApplicableLength() {
35         return true;
36     }
37 
38     public enum SupportedType {
39         INV, DEL, INS, DUP, DUP_INV
40     }
41 
42     public static final class Inversion extends SimpleSVType {
43 
44         @Override
toString()45         public String toString() {
46             return SupportedType.INV.name();
47         }
48 
49         @VisibleForTesting
Inversion(final String variantCHR, final int variantPOS, final int variantEND, final String variantId, final Allele refAllele, final Allele altAllele, final int svLen, final Map<String, Object> extraAttributes)50         public Inversion(final String variantCHR, final int variantPOS, final int variantEND, final String variantId,
51                          final Allele refAllele, final Allele altAllele, final int svLen, final Map<String, Object> extraAttributes) {
52             super(variantCHR, variantPOS, variantEND, variantId, refAllele, altAllele, svLen, extraAttributes);
53         }
54 
55         @VisibleForTesting
56 
57         // TODO: 6/12/18 note the following implementation sets POS and REF at the anchor base, which is not requested by the VCF spec
58         // TODO: 6/12/18 also, this interface lets one call inversion with SVLEN !=0, which is not the same as VCF spec examples
Inversion(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype, final int svLength, final BasicReference reference)59         public Inversion(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype, final int svLength,
60                          final BasicReference reference) {
61             super(novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getContig(),
62                     novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getStart(),
63                     novelAdjacencyAndAltHaplotype.getLeftJustifiedRightRefLoc().getEnd(),
64                     getIDString(novelAdjacencyAndAltHaplotype),
65                     Allele.create(extractRefBases(novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc(), reference), true),
66                     Allele.create(createBracketedSymbAlleleString(GATKSVVCFConstants.SYMB_ALT_STRING_INV)),
67                     svLength,
68                     Collections.singletonMap((novelAdjacencyAndAltHaplotype.getStrandSwitch() == StrandSwitch.FORWARD_TO_REVERSE) ? GATKSVVCFConstants.INV55 : GATKSVVCFConstants.INV33, true));
69         }
70 
getIDString(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype)71         private static String getIDString(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype) {
72             final StrandSwitch strandSwitch = novelAdjacencyAndAltHaplotype.getStrandSwitch();
73 
74             return (strandSwitch.equals(StrandSwitch.FORWARD_TO_REVERSE) ? GATKSVVCFConstants.INV55 : GATKSVVCFConstants.INV33) + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR +
75                     makeLocationString(novelAdjacencyAndAltHaplotype);
76         }
77     }
78 
79     public static final class Deletion extends SimpleSVType {
80 
81         @Override
toString()82         public String toString() {
83             return SupportedType.DEL.name();
84         }
85 
86         @VisibleForTesting
Deletion(final String variantCHR, final int variantPOS, final int variantEND, final String variantId, final Allele refAllele, final Allele altAllele, final int svLen, final Map<String, Object> extraAttributes)87         public Deletion(final String variantCHR, final int variantPOS, final int variantEND, final String variantId,
88                         final Allele refAllele, final Allele altAllele, final int svLen, final Map<String, Object> extraAttributes) {
89             super(variantCHR, variantPOS, variantEND, variantId, refAllele, altAllele, svLen, extraAttributes);
90         }
91 
Deletion(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype, final BasicReference reference)92         public Deletion(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype,
93                         final BasicReference reference) {
94             super(novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getContig(),
95                     novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getStart(),
96                     novelAdjacencyAndAltHaplotype.getLeftJustifiedRightRefLoc().getEnd(),
97                     getIDString(novelAdjacencyAndAltHaplotype),
98                     Allele.create(extractRefBases(novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc(), reference), true),
99                     Allele.create(createBracketedSymbAlleleString(GATKSVVCFConstants.SYMB_ALT_STRING_DEL)),
100                     - novelAdjacencyAndAltHaplotype.getDistanceBetweenNovelAdjacencies(),
101                     novelAdjacencyAndAltHaplotype.hasDuplicationAnnotation() ? Collections.singletonMap(GATKSVVCFConstants.DUP_TAN_CONTRACTION_STRING, true) : noExtraAttributes);
102         }
103 
getIDString(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype)104         private static String getIDString(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype) {
105 
106             return  ((novelAdjacencyAndAltHaplotype.hasDuplicationAnnotation()) ? GATKSVVCFConstants.DUP_TAN_CONTRACTION_INTERNAL_ID_START_STRING : SupportedType.DEL.name())
107                     + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR
108                     + makeLocationString(novelAdjacencyAndAltHaplotype);
109         }
110     }
111 
112     public static final class Insertion extends SimpleSVType {
113 
114         @Override
toString()115         public String toString() {
116             return SupportedType.INS.name();
117         }
118 
119         @VisibleForTesting
Insertion(final String variantCHR, final int variantPOS, final int variantEND, final String variantId, final Allele refAllele, final Allele altAllele, final int svLen, final Map<String, Object> extraAttributes)120         public Insertion(final String variantCHR, final int variantPOS, final int variantEND, final String variantId,
121                          final Allele refAllele, final Allele altAllele, final int svLen, final Map<String, Object> extraAttributes) {
122             super(variantCHR, variantPOS, variantEND, variantId, refAllele, altAllele, svLen, extraAttributes);
123         }
124 
Insertion(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype, final BasicReference reference)125         public Insertion(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype,
126                          final BasicReference reference) {
127             super(novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getContig(),
128                     novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getStart(),
129                     getEnd(novelAdjacencyAndAltHaplotype),
130                     getIDString(novelAdjacencyAndAltHaplotype),
131                     Allele.create(getRefBases(novelAdjacencyAndAltHaplotype, reference), true),
132                     Allele.create(createBracketedSymbAlleleString(GATKSVVCFConstants.SYMB_ALT_STRING_INS)),
133                     getLength(novelAdjacencyAndAltHaplotype),
134                     noExtraAttributes);
135         }
136 
137         // these methods exist to distinguish fat insertion and linked del+ins in an RPL event, as well as duplication events whose duplicated unit is not large enough
getEnd(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype)138         private static int getEnd(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype) {
139             return novelAdjacencyAndAltHaplotype.isCandidateForFatInsertion()
140                     ? novelAdjacencyAndAltHaplotype.getLeftJustifiedRightRefLoc().getEnd()
141                     : novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getStart();
142         }
143 
getRefBases(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype, final BasicReference reference)144         private static byte[] getRefBases(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype,
145                                           final BasicReference reference) {
146             return extractRefBases(novelAdjacencyAndAltHaplotype.isCandidateForFatInsertion()
147                     ? novelAdjacencyAndAltHaplotype.getIntervalForFatInsertion()
148                     : novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc(), reference);
149         }
150 
getLength(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype)151         private static int getLength(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype) {
152             return novelAdjacencyAndAltHaplotype.getComplication().hasDuplicationAnnotation()
153                     ? novelAdjacencyAndAltHaplotype.getLengthForDupTandemExpansion()
154                     : novelAdjacencyAndAltHaplotype.getComplication().getInsertedSequenceForwardStrandRep().length();
155         }
156 
getIDString(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype)157         private static String getIDString(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype) {
158             if (novelAdjacencyAndAltHaplotype.isCandidateForFatInsertion())
159                 return SupportedType.INS.name() + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR
160                         + makeLocationString(novelAdjacencyAndAltHaplotype);
161             else {
162                 return SupportedType.INS.name() + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR
163                         + makeLocationString(novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getContig(),
164                                                 novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getStart(),
165                                                 novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getContig(),
166                                                 novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getStart());
167             }
168         }
169     }
170 
171     public static final class DuplicationTandem extends SimpleSVType {
172 
173         @Override
toString()174         public String toString() {
175             return SupportedType.DUP.name();
176         }
177 
178         @VisibleForTesting
DuplicationTandem(final String variantCHR, final int variantPOS, final int variantEND, final String variantId, final Allele refAllele, final Allele altAllele, final int svLen, final Map<String, Object> extraAttributes)179         public DuplicationTandem(final String variantCHR, final int variantPOS, final int variantEND, final String variantId,
180                                  final Allele refAllele, final Allele altAllele, final int svLen, final Map<String, Object> extraAttributes) {
181             super(variantCHR, variantPOS, variantEND, variantId, refAllele, altAllele, svLen, extraAttributes);
182         }
183 
184         // TODO: 6/12/18 the following implementation treats DuplicationTandem as simple insertions, and duplication annotations will be saved in INFO columns
DuplicationTandem(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype, final BasicReference reference)185         public DuplicationTandem(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype,
186                                  final BasicReference reference) {
187             super(novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getContig(),
188                     novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getStart(),
189                     novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getStart(),
190                     getIDString(novelAdjacencyAndAltHaplotype),
191                     Allele.create(extractRefBases(novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc(), reference), true),
192                     Allele.create(createBracketedSymbAlleleString(GATKSVVCFConstants.SYMB_ALT_STRING_DUP)),
193                     novelAdjacencyAndAltHaplotype.getLengthForDupTandemExpansion(),
194                     Collections.singletonMap(GATKSVVCFConstants.DUP_TAN_EXPANSION_STRING, true));
195         }
196 
getIDString(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype)197         private static String getIDString(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype) {
198 
199             final SimpleInterval dupSeqRepeatUnitRefSpan = ((BreakpointComplications.SmallDuplicationBreakpointComplications)
200                     novelAdjacencyAndAltHaplotype.getComplication()).getDupSeqRepeatUnitRefSpan();
201 
202             return GATKSVVCFConstants.DUP_TAN_EXPANSION_INTERNAL_ID_START_STRING + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR
203                     + makeLocationString(dupSeqRepeatUnitRefSpan.getContig(), dupSeqRepeatUnitRefSpan.getStart(),
204                     dupSeqRepeatUnitRefSpan.getContig(), dupSeqRepeatUnitRefSpan.getEnd());
205         }
206     }
207 
208     public static final class ImpreciseDeletion extends SimpleSVType {
209 
210         @Override
toString()211         public String toString() {
212             return SupportedType.DEL.name();
213         }
214 
ImpreciseDeletion(final EvidenceTargetLink evidenceTargetLink, final int svLength, final ReadMetadata metadata, final ReferenceMultiSparkSource reference)215         public ImpreciseDeletion(final EvidenceTargetLink evidenceTargetLink, final int svLength, final ReadMetadata metadata,
216                                  final ReferenceMultiSparkSource reference) {
217 
218             super(metadata.getContigName(evidenceTargetLink.getPairedStrandedIntervals().getLeft().getInterval().getContig()),
219                     evidenceTargetLink.getPairedStrandedIntervals().getLeft().getInterval().midpoint(),
220                     evidenceTargetLink.getPairedStrandedIntervals().getRight().getInterval().midpoint(),
221                     getIDString(evidenceTargetLink, metadata),
222                     Allele.create(getRefBases(evidenceTargetLink, metadata, reference), true),
223                     Allele.create(createBracketedSymbAlleleString(GATKSVVCFConstants.SYMB_ALT_STRING_DEL)),
224                     svLength,
225                     Collections.singletonMap(GATKSVVCFConstants.IMPRECISE, true));
226         }
227 
getRefBases(final EvidenceTargetLink evidenceTargetLink, final ReadMetadata metadata, final ReferenceMultiSparkSource reference)228         private static byte[] getRefBases(final EvidenceTargetLink evidenceTargetLink, final ReadMetadata metadata,
229                                           final ReferenceMultiSparkSource reference) {
230             final SVInterval leftInterval = evidenceTargetLink.getPairedStrandedIntervals().getLeft().getInterval();
231             return extractRefBases(
232                     new SimpleInterval(metadata.getContigName(leftInterval.getContig()), leftInterval.midpoint(), leftInterval.midpoint()),
233                     reference
234             );
235         }
236 
getIDString(final EvidenceTargetLink evidenceTargetLink, final ReadMetadata metadata)237         private static String getIDString(final EvidenceTargetLink evidenceTargetLink, final ReadMetadata metadata) {
238             final SVInterval leftInterval = evidenceTargetLink.getPairedStrandedIntervals().getLeft().getInterval();
239             final SVInterval rightInterval = evidenceTargetLink.getPairedStrandedIntervals().getRight().getInterval();
240 
241             return SupportedType.DEL.name()
242                     + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR
243                     + GATKSVVCFConstants.IMPRECISE + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR
244                     + metadata.getContigName(leftInterval.getContig())
245                     + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR
246                     + leftInterval.getStart() + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR
247                     + leftInterval.getEnd() + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR
248                     + rightInterval.getStart() + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR
249                     + rightInterval.getEnd();
250         }
251     }
252 
253     public static final class DuplicationInverted extends SimpleSVType {
254 
255         @Override
toString()256         public String toString() {
257             return "DUP:INV";
258         }
259 
DuplicationInverted(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype, final BasicReference reference)260         public DuplicationInverted(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype,
261                                    final BasicReference reference) {
262             super(novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getContig(),
263                     novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getStart(),
264                     novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getStart(),
265                     getIDString(novelAdjacencyAndAltHaplotype),
266                     Allele.create(extractRefBases(novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc(), reference), true),
267                     Allele.create(createBracketedSymbAlleleString(GATKSVVCFConstants.SYMB_ALT_STRING_INVDUP)),
268                     getSVLen(novelAdjacencyAndAltHaplotype),
269                     noExtraAttributes);
270         }
271 
getSVLen(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype)272         private static int getSVLen(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype) {
273             return ((BreakpointComplications.InvertedDuplicationBreakpointComplications) novelAdjacencyAndAltHaplotype.getComplication())
274                     .getDupSeqRepeatUnitRefSpan().size();
275         }
276 
getIDString(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype)277         private static String getIDString(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype) {
278             return GATKSVVCFConstants.DUP_INV_INTERNAL_ID_START_STRING + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR
279                     + makeLocationString(novelAdjacencyAndAltHaplotype);
280         }
281 
282     }
283 
284 }
285