1 package org.broadinstitute.hellbender.tools.spark.sv.discovery;
2 
3 import com.google.common.annotations.VisibleForTesting;
4 import htsjdk.samtools.util.SequenceUtil;
5 import htsjdk.variant.variantcontext.Allele;
6 import org.broadinstitute.hellbender.engine.BasicReference;
7 import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.StrandSwitch;
8 import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.NovelAdjacencyAndAltHaplotype;
9 import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.TypeInferredFromSimpleChimera;
10 import org.broadinstitute.hellbender.utils.SimpleInterval;
11 import scala.Tuple2;
12 
13 import java.util.Collections;
14 import java.util.Map;
15 
16 import static org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants.*;
17 
18 public abstract class BreakEndVariantType extends SvType {
19 
20     /**
21      * Technically, a BND-formatted variant should have two VCF records, for mates, hence we also have this field.
22      * Upstream mate is defined as the location in a mate pair that has a lower coordinate according to
23      * the reference sequence dictionary.
24      */
25     private final boolean isTheUpstreamMate;
26 
BreakEndVariantType(final String variantCHR, final int variantPOS, final String variantId, final Allele refAllele, final Allele altAllele, final Map<String, Object> extraAttributes, final boolean isTheUpstreamMate)27     protected BreakEndVariantType(final String variantCHR, final int variantPOS, final String variantId,
28                                   final Allele refAllele, final Allele altAllele, final Map<String, Object> extraAttributes,
29                                   final boolean isTheUpstreamMate) {
30         super(variantCHR, variantPOS, NO_APPLICABLE_END, variantId, refAllele, altAllele, NO_APPLICABLE_LEN, extraAttributes);
31         this.isTheUpstreamMate = isTheUpstreamMate;
32     }
33 
isTheUpstreamMate()34     public final boolean isTheUpstreamMate() {
35         return isTheUpstreamMate;
36     }
37 
38     @Override
hasApplicableEnd()39     public final boolean hasApplicableEnd() {
40         return false;
41     }
42     @Override
hasApplicableLength()43     public final boolean hasApplicableLength() {
44         return false;
45     }
46 
47     @Override
toString()48     public final String toString() {
49         return BREAKEND_STR;
50     }
51 
52     @Override
equals(final Object o)53     public boolean equals(final Object o) {
54         if (this == o) return true;
55         if (o == null || getClass() != o.getClass()) return false;
56         if (!super.equals(o)) return false;
57 
58         final BreakEndVariantType that = (BreakEndVariantType) o;
59 
60         return isTheUpstreamMate == that.isTheUpstreamMate;
61     }
62 
63     @Override
hashCode()64     public int hashCode() {
65         int result = super.hashCode();
66         result = 31 * result + (isTheUpstreamMate ? 1 : 0);
67         return result;
68     }
69 
70     //==================================================================================================================
71 
getIDString(final NovelAdjacencyAndAltHaplotype narl, final boolean forUpstreamLoc)72     private static String getIDString(final NovelAdjacencyAndAltHaplotype narl, final boolean forUpstreamLoc) {
73         // if no strand switch or different contig, "", otherwise append INV55/33
74         final String bndtype = narl.getStrandSwitch().equals(StrandSwitch.NO_SWITCH) || !narl.getLeftJustifiedLeftRefLoc().getContig().equals(narl.getLeftJustifiedRightRefLoc().getContig())? ""
75                 : (narl.getStrandSwitch().equals(StrandSwitch.FORWARD_TO_REVERSE) ? INV55 : INV33);
76         String locationPartOfString = makeLocationString(narl.getLeftJustifiedLeftRefLoc().getContig(),
77                 narl.getLeftJustifiedLeftRefLoc().getStart(), narl.getLeftJustifiedRightRefLoc().getContig(),
78                 narl.getLeftJustifiedRightRefLoc().getEnd());
79         return BREAKEND_STR + INTERVAL_VARIANT_ID_FIELD_SEPARATOR +
80                 (bndtype.isEmpty() ? "" : bndtype + INTERVAL_VARIANT_ID_FIELD_SEPARATOR) +
81                locationPartOfString + INTERVAL_VARIANT_ID_FIELD_SEPARATOR + (forUpstreamLoc ? "1" : "2");
82     }
83 
getRefBaseString(final NovelAdjacencyAndAltHaplotype narl, final boolean forUpstreamLoc, final BasicReference reference)84     private static String getRefBaseString(final NovelAdjacencyAndAltHaplotype narl, final boolean forUpstreamLoc,
85                                            final BasicReference reference) {
86         byte[] refBases = reference.getBases(forUpstreamLoc ? narl.getLeftJustifiedLeftRefLoc() :
87                 narl.getLeftJustifiedRightRefLoc());
88         return new String(refBases);
89     }
90 
91     public enum SupportedType {
92         INTRA_CHR_STRAND_SWITCH_55,// intra-chromosome strand-switch novel adjacency, alignments left-flanking the novel adjacency
93         INTRA_CHR_STRAND_SWITCH_33,// intra-chromosome strand-switch novel adjacency, alignments right-flanking the novel adjacency
94 
95         INTRA_CHR_REF_ORDER_SWAP,// intra-chromosome reference-order swap, but NO strand-switch, novel adjacency
96 
97         INTER_CHR_STRAND_SWITCH_55,// pair WY in Fig.1 in Section 5.4 of VCF spec ver.4.2
98         INTER_CHR_STRAND_SWITCH_33,// pair XZ in Fig.1 in Section 5.4 of VCF spec ver.4.2
99         INTER_CHR_NO_SS_WITH_LEFT_MATE_FIRST_IN_PARTNER, // the green pair in Fig. 7 in Section 5.4 of VCF spec ver.4.2
100         INTER_CHR_NO_SS_WITH_LEFT_MATE_SECOND_IN_PARTNER // the red pair in Fig. 7 in Section 5.4 of VCF spec ver.4.2
101     }
102 
103     /**
104      * Breakend variant type for inversion suspects: those with novel adjacency between two reference locations
105      * on the same chromosome but the novel adjacency brings them together in a strand-switch fashion.
106      * This is to be distinguished from the more general "translocation" breakends, which are novel adjacency between
107      * reference locations without strand switch if the reference bases are from the same chromosome.
108      *
109      * Note that dispersed duplication with some copies inverted could also lead to breakpoints with strand switch.
110      */
111     abstract private static class IntraChromosomalStrandSwitchBreakEnd extends BreakEndVariantType {
112         static final Map<String, Object> INV55_FLAG = Collections.singletonMap(INV55, true);
113         static final Map<String, Object> INV33_FLAG = Collections.singletonMap(INV33, true);
114 
IntraChromosomalStrandSwitchBreakEnd(final String variantCHR, final int variantPOS, final String variantId, final Allele refAllele, final Allele altAllele, final Map<String, Object> extraAttributes, final boolean isTheUpstreamMate)115         private IntraChromosomalStrandSwitchBreakEnd(final String variantCHR, final int variantPOS, final String variantId,
116                                                      final Allele refAllele, final Allele altAllele,
117                                                      final Map<String, Object> extraAttributes,
118                                                      final boolean isTheUpstreamMate) {
119             super(variantCHR, variantPOS, variantId, refAllele, altAllele, extraAttributes, isTheUpstreamMate);
120         }
121 
122         @VisibleForTesting
extractInsertedSequence(final NovelAdjacencyAndAltHaplotype narl, final boolean forUpstreamLoc)123         static String extractInsertedSequence(final NovelAdjacencyAndAltHaplotype narl, final boolean forUpstreamLoc) {
124             final String ins = narl.getComplication().getInsertedSequenceForwardStrandRep();
125             return forUpstreamLoc ? ins : SequenceUtil.reverseComplement(ins);
126         }
127     }
128 
129     public static final class IntraChromosomalStrandSwitch55BreakEnd extends IntraChromosomalStrandSwitchBreakEnd {
130 
131         @VisibleForTesting
IntraChromosomalStrandSwitch55BreakEnd(final String variantCHR, final int variantPOS, final String variantId, final Allele refAllele, final Allele altAllele, final Map<String, Object> extraAttributes, final boolean isTheUpstreamMate)132         public IntraChromosomalStrandSwitch55BreakEnd(final String variantCHR, final int variantPOS, final String variantId,
133                                                       final Allele refAllele, final Allele altAllele, final Map<String, Object> extraAttributes,
134                                                       final boolean isTheUpstreamMate) {
135             super(variantCHR, variantPOS, variantId, refAllele, altAllele, extraAttributes, isTheUpstreamMate);
136         }
137 
IntraChromosomalStrandSwitch55BreakEnd(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference, final boolean isTheUpstreamMate)138         private IntraChromosomalStrandSwitch55BreakEnd(final NovelAdjacencyAndAltHaplotype narl,
139                                                        final BasicReference reference,
140                                                        final boolean isTheUpstreamMate) {
141             super(isTheUpstreamMate ? narl.getLeftJustifiedLeftRefLoc().getContig() : narl.getLeftJustifiedRightRefLoc().getContig(),
142                     isTheUpstreamMate ? narl.getLeftJustifiedLeftRefLoc().getStart() : narl.getLeftJustifiedRightRefLoc().getEnd(),
143                     BreakEndVariantType.getIDString(narl, isTheUpstreamMate),
144                     Allele.create(BreakEndVariantType.getRefBaseString(narl, isTheUpstreamMate, reference), true),
145                     constructAltAllele(BreakEndVariantType.getRefBaseString(narl, isTheUpstreamMate, reference),
146                             extractInsertedSequence(narl, isTheUpstreamMate),
147                             isTheUpstreamMate ? narl.getLeftJustifiedRightRefLoc(): narl.getLeftJustifiedLeftRefLoc()),
148                     INV55_FLAG, isTheUpstreamMate);
149         }
150 
getOrderedMates(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference)151         public static Tuple2<BreakEndVariantType, BreakEndVariantType> getOrderedMates(final NovelAdjacencyAndAltHaplotype narl,
152                                                                                        final BasicReference reference) {
153             return new Tuple2<>(new IntraChromosomalStrandSwitch55BreakEnd(narl, reference, true),
154                                 new IntraChromosomalStrandSwitch55BreakEnd(narl, reference, false));
155         }
156 
constructAltAllele(final String refBase, final String insertedSequence, final SimpleInterval novelAdjRefLoc)157         private static Allele constructAltAllele(final String refBase, final String insertedSequence, final SimpleInterval novelAdjRefLoc) {
158             return Allele.create(refBase + insertedSequence + "]" + novelAdjRefLoc.getContig() + ":" + novelAdjRefLoc.getEnd() + "]");
159         }
160     }
161 
162     public static final class IntraChromosomalStrandSwitch33BreakEnd extends IntraChromosomalStrandSwitchBreakEnd {
163 
164         @VisibleForTesting
IntraChromosomalStrandSwitch33BreakEnd(final String variantCHR, final int variantPOS, final String variantId, final Allele refAllele, final Allele altAllele, final Map<String, Object> extraAttributes, final boolean isTheUpstreamMate)165         public IntraChromosomalStrandSwitch33BreakEnd(final String variantCHR, final int variantPOS, final String variantId,
166                                                       final Allele refAllele, final Allele altAllele, final Map<String, Object> extraAttributes,
167                                                       final boolean isTheUpstreamMate) {
168             super(variantCHR, variantPOS, variantId, refAllele, altAllele, extraAttributes, isTheUpstreamMate);
169         }
170 
IntraChromosomalStrandSwitch33BreakEnd(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference, final boolean isTheUpstreamMate)171         private IntraChromosomalStrandSwitch33BreakEnd(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference,
172                                                        final boolean isTheUpstreamMate) {
173             super(isTheUpstreamMate ? narl.getLeftJustifiedLeftRefLoc().getContig() : narl.getLeftJustifiedRightRefLoc().getContig(),
174                     isTheUpstreamMate ? narl.getLeftJustifiedLeftRefLoc().getStart() : narl.getLeftJustifiedRightRefLoc().getEnd(),
175                     BreakEndVariantType.getIDString(narl, isTheUpstreamMate),
176                     Allele.create(BreakEndVariantType.getRefBaseString(narl, isTheUpstreamMate, reference), true),
177                     constructAltAllele(BreakEndVariantType.getRefBaseString(narl, isTheUpstreamMate, reference),
178                             extractInsertedSequence(narl, isTheUpstreamMate),
179                             isTheUpstreamMate ? narl.getLeftJustifiedRightRefLoc(): narl.getLeftJustifiedLeftRefLoc()),
180                     INV33_FLAG, isTheUpstreamMate);
181         }
182 
getOrderedMates(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference)183         public static Tuple2<BreakEndVariantType, BreakEndVariantType> getOrderedMates(final NovelAdjacencyAndAltHaplotype narl,
184                                                                                        final BasicReference reference) {
185             return new Tuple2<>(new IntraChromosomalStrandSwitch33BreakEnd(narl, reference, true),
186                                 new IntraChromosomalStrandSwitch33BreakEnd(narl, reference, false));
187         }
188 
constructAltAllele(final String refBase, final String insertedSequence, final SimpleInterval novelAdjRefLoc)189         private static Allele constructAltAllele(final String refBase, final String insertedSequence, final SimpleInterval novelAdjRefLoc) {
190             return Allele.create("[" + novelAdjRefLoc.getContig() + ":" + novelAdjRefLoc.getEnd() + "[" + insertedSequence + refBase);
191         }
192     }
193 
194     public static final class IntraChromosomeRefOrderSwap extends BreakEndVariantType {
195 
196         @VisibleForTesting
IntraChromosomeRefOrderSwap(final String variantCHR, final int variantPOS, final String variantId, final Allele refAllele, final Allele altAllele, final Map<String, Object> extraAttributes, final boolean isTheUpstreamMate)197         public IntraChromosomeRefOrderSwap(final String variantCHR, final int variantPOS, final String variantId,
198                                            final Allele refAllele, final Allele altAllele, final Map<String, Object> extraAttributes,
199                                            final boolean isTheUpstreamMate) {
200             super(variantCHR, variantPOS, variantId, refAllele, altAllele, extraAttributes, isTheUpstreamMate);
201         }
202 
IntraChromosomeRefOrderSwap(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference, final boolean isTheUpstreamMate)203         private IntraChromosomeRefOrderSwap(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference,
204                                             final boolean isTheUpstreamMate) {
205             super(isTheUpstreamMate ? narl.getLeftJustifiedLeftRefLoc().getContig() : narl.getLeftJustifiedRightRefLoc().getContig(),
206                     isTheUpstreamMate ? narl.getLeftJustifiedLeftRefLoc().getStart() : narl.getLeftJustifiedRightRefLoc().getEnd(),
207                     BreakEndVariantType.getIDString(narl, isTheUpstreamMate),
208                     Allele.create(BreakEndVariantType.getRefBaseString(narl, isTheUpstreamMate, reference), true),
209                     constructAltAllele(BreakEndVariantType.getRefBaseString(narl, isTheUpstreamMate, reference),
210                             narl.getComplication().getInsertedSequenceForwardStrandRep(),
211                             isTheUpstreamMate ? narl.getLeftJustifiedRightRefLoc(): narl.getLeftJustifiedLeftRefLoc(),
212                             isTheUpstreamMate),
213                     noExtraAttributes, isTheUpstreamMate);
214         }
215 
getOrderedMates(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference)216         public static Tuple2<BreakEndVariantType, BreakEndVariantType> getOrderedMates(final NovelAdjacencyAndAltHaplotype narl,
217                                                                                        final BasicReference reference) {
218             return new Tuple2<>(new IntraChromosomeRefOrderSwap(narl, reference, true),
219                                 new IntraChromosomeRefOrderSwap(narl, reference, false));
220         }
221 
constructAltAllele(final String refBase, final String insertedSequence, final SimpleInterval novelAdjRefLoc, final boolean forUpstreamLoc)222         private static Allele constructAltAllele(final String refBase, final String insertedSequence, final SimpleInterval novelAdjRefLoc,
223                                                  final boolean forUpstreamLoc) {
224             if (forUpstreamLoc) {
225                 return Allele.create("]" + novelAdjRefLoc.getContig() + ":" + novelAdjRefLoc.getEnd() + "]" + insertedSequence + refBase);
226             } else {
227                 return Allele.create(refBase + insertedSequence + "[" + novelAdjRefLoc.getContig() + ":" + novelAdjRefLoc.getEnd() + "[");
228             }
229         }
230     }
231 
232     public static final class InterChromosomeBreakend extends BreakEndVariantType {
233 
234         @VisibleForTesting
InterChromosomeBreakend(final String variantCHR, final int variantPOS, final String variantId, final Allele refAllele, final Allele altAllele, final Map<String, Object> extraAttributes, final boolean isTheUpstreamMate)235         public InterChromosomeBreakend(final String variantCHR, final int variantPOS, final String variantId,
236                                        final Allele refAllele, final Allele altAllele, final Map<String, Object> extraAttributes,
237                                        final boolean isTheUpstreamMate) {
238             super(variantCHR, variantPOS, variantId, refAllele, altAllele, extraAttributes, isTheUpstreamMate);
239         }
240 
InterChromosomeBreakend(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference, final boolean isTheUpstreamMate)241         private InterChromosomeBreakend(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference,
242                                         final boolean isTheUpstreamMate) {
243             super(isTheUpstreamMate ? narl.getLeftJustifiedLeftRefLoc().getContig() : narl.getLeftJustifiedRightRefLoc().getContig(),
244                     isTheUpstreamMate ? narl.getLeftJustifiedLeftRefLoc().getStart() : narl.getLeftJustifiedRightRefLoc().getEnd(),
245                     BreakEndVariantType.getIDString(narl, isTheUpstreamMate),
246                     Allele.create(BreakEndVariantType.getRefBaseString(narl, isTheUpstreamMate, reference), true),
247                     constructAltAllele(narl, reference, isTheUpstreamMate),
248                     noExtraAttributes, isTheUpstreamMate);
249         }
250 
getOrderedMates(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference)251         public static Tuple2<BreakEndVariantType, BreakEndVariantType> getOrderedMates(final NovelAdjacencyAndAltHaplotype narl,
252                                                                                        final BasicReference reference) {
253 
254             return new Tuple2<>(new InterChromosomeBreakend(narl, reference, true),
255                                 new InterChromosomeBreakend(narl, reference, false));
256         }
257 
258         // see VCF spec 4.2 for BND format ALT allele field for SV, in particular the examples shown in Fig.1, Fig.2 and Fig.5 of Section 5.4
constructAltAllele(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference, final boolean forUpstreamLoc)259         private static Allele constructAltAllele(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference,
260                                                  final boolean forUpstreamLoc) {
261             final String refBase = BreakEndVariantType.getRefBaseString(narl, forUpstreamLoc, reference);
262             final String insertedSequence = extractInsertedSequence(narl, forUpstreamLoc);
263             final SimpleInterval novelAdjRefLoc = forUpstreamLoc ? narl.getLeftJustifiedRightRefLoc() : narl.getLeftJustifiedLeftRefLoc();
264 
265             // see Fig.5 of Section 5.4 of spec Version 4.2 (the green pairs)
266             final boolean upstreamLocIsFirstInPartner =
267                     narl.getTypeInferredFromSimpleChimera().equals(TypeInferredFromSimpleChimera.INTER_CHR_NO_SS_WITH_LEFT_MATE_FIRST_IN_PARTNER);
268             if (narl.getStrandSwitch().equals(StrandSwitch.NO_SWITCH)) {
269                 if (forUpstreamLoc == upstreamLocIsFirstInPartner) {
270                     return Allele.create(refBase + insertedSequence + "[" + novelAdjRefLoc.getContig() + ":" + novelAdjRefLoc.getEnd() + "[");
271                 } else {
272                     return Allele.create("]" + novelAdjRefLoc.getContig() + ":" + novelAdjRefLoc.getStart() + "]" + insertedSequence + refBase);
273                 }
274             } else if (narl.getStrandSwitch().equals(StrandSwitch.FORWARD_TO_REVERSE)){
275                 return Allele.create(refBase + insertedSequence + "]" + novelAdjRefLoc.getContig() + ":" + novelAdjRefLoc.getEnd() + "]");
276             } else {
277                 return Allele.create("[" + novelAdjRefLoc.getContig() + ":" + novelAdjRefLoc.getEnd() + "[" + insertedSequence + refBase);
278             }
279         }
280 
extractInsertedSequence(final NovelAdjacencyAndAltHaplotype narl, final boolean forUpstreamLoc)281         private static String extractInsertedSequence(final NovelAdjacencyAndAltHaplotype narl, final boolean forUpstreamLoc) {
282             final String ins = narl.getComplication().getInsertedSequenceForwardStrandRep();
283             if (ins.isEmpty() || narl.getStrandSwitch() == StrandSwitch.NO_SWITCH) {
284                 return ins;
285             } else {
286                 return forUpstreamLoc == (narl.getStrandSwitch().equals(StrandSwitch.FORWARD_TO_REVERSE) ) ? ins: SequenceUtil.reverseComplement(ins);
287             }
288         }
289     }
290 }
291