1 package org.broadinstitute.hellbender.tools.spark.sv.discovery; 2 3 import com.google.common.annotations.VisibleForTesting; 4 import htsjdk.samtools.util.SequenceUtil; 5 import htsjdk.variant.variantcontext.Allele; 6 import org.broadinstitute.hellbender.engine.BasicReference; 7 import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.StrandSwitch; 8 import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.NovelAdjacencyAndAltHaplotype; 9 import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.TypeInferredFromSimpleChimera; 10 import org.broadinstitute.hellbender.utils.SimpleInterval; 11 import scala.Tuple2; 12 13 import java.util.Collections; 14 import java.util.Map; 15 16 import static org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants.*; 17 18 public abstract class BreakEndVariantType extends SvType { 19 20 /** 21 * Technically, a BND-formatted variant should have two VCF records, for mates, hence we also have this field. 22 * Upstream mate is defined as the location in a mate pair that has a lower coordinate according to 23 * the reference sequence dictionary. 24 */ 25 private final boolean isTheUpstreamMate; 26 BreakEndVariantType(final String variantCHR, final int variantPOS, final String variantId, final Allele refAllele, final Allele altAllele, final Map<String, Object> extraAttributes, final boolean isTheUpstreamMate)27 protected BreakEndVariantType(final String variantCHR, final int variantPOS, final String variantId, 28 final Allele refAllele, final Allele altAllele, final Map<String, Object> extraAttributes, 29 final boolean isTheUpstreamMate) { 30 super(variantCHR, variantPOS, NO_APPLICABLE_END, variantId, refAllele, altAllele, NO_APPLICABLE_LEN, extraAttributes); 31 this.isTheUpstreamMate = isTheUpstreamMate; 32 } 33 isTheUpstreamMate()34 public final boolean isTheUpstreamMate() { 35 return isTheUpstreamMate; 36 } 37 38 @Override hasApplicableEnd()39 public final boolean hasApplicableEnd() { 40 return false; 41 } 42 @Override hasApplicableLength()43 public final boolean hasApplicableLength() { 44 return false; 45 } 46 47 @Override toString()48 public final String toString() { 49 return BREAKEND_STR; 50 } 51 52 @Override equals(final Object o)53 public boolean equals(final Object o) { 54 if (this == o) return true; 55 if (o == null || getClass() != o.getClass()) return false; 56 if (!super.equals(o)) return false; 57 58 final BreakEndVariantType that = (BreakEndVariantType) o; 59 60 return isTheUpstreamMate == that.isTheUpstreamMate; 61 } 62 63 @Override hashCode()64 public int hashCode() { 65 int result = super.hashCode(); 66 result = 31 * result + (isTheUpstreamMate ? 1 : 0); 67 return result; 68 } 69 70 //================================================================================================================== 71 getIDString(final NovelAdjacencyAndAltHaplotype narl, final boolean forUpstreamLoc)72 private static String getIDString(final NovelAdjacencyAndAltHaplotype narl, final boolean forUpstreamLoc) { 73 // if no strand switch or different contig, "", otherwise append INV55/33 74 final String bndtype = narl.getStrandSwitch().equals(StrandSwitch.NO_SWITCH) || !narl.getLeftJustifiedLeftRefLoc().getContig().equals(narl.getLeftJustifiedRightRefLoc().getContig())? "" 75 : (narl.getStrandSwitch().equals(StrandSwitch.FORWARD_TO_REVERSE) ? INV55 : INV33); 76 String locationPartOfString = makeLocationString(narl.getLeftJustifiedLeftRefLoc().getContig(), 77 narl.getLeftJustifiedLeftRefLoc().getStart(), narl.getLeftJustifiedRightRefLoc().getContig(), 78 narl.getLeftJustifiedRightRefLoc().getEnd()); 79 return BREAKEND_STR + INTERVAL_VARIANT_ID_FIELD_SEPARATOR + 80 (bndtype.isEmpty() ? "" : bndtype + INTERVAL_VARIANT_ID_FIELD_SEPARATOR) + 81 locationPartOfString + INTERVAL_VARIANT_ID_FIELD_SEPARATOR + (forUpstreamLoc ? "1" : "2"); 82 } 83 getRefBaseString(final NovelAdjacencyAndAltHaplotype narl, final boolean forUpstreamLoc, final BasicReference reference)84 private static String getRefBaseString(final NovelAdjacencyAndAltHaplotype narl, final boolean forUpstreamLoc, 85 final BasicReference reference) { 86 byte[] refBases = reference.getBases(forUpstreamLoc ? narl.getLeftJustifiedLeftRefLoc() : 87 narl.getLeftJustifiedRightRefLoc()); 88 return new String(refBases); 89 } 90 91 public enum SupportedType { 92 INTRA_CHR_STRAND_SWITCH_55,// intra-chromosome strand-switch novel adjacency, alignments left-flanking the novel adjacency 93 INTRA_CHR_STRAND_SWITCH_33,// intra-chromosome strand-switch novel adjacency, alignments right-flanking the novel adjacency 94 95 INTRA_CHR_REF_ORDER_SWAP,// intra-chromosome reference-order swap, but NO strand-switch, novel adjacency 96 97 INTER_CHR_STRAND_SWITCH_55,// pair WY in Fig.1 in Section 5.4 of VCF spec ver.4.2 98 INTER_CHR_STRAND_SWITCH_33,// pair XZ in Fig.1 in Section 5.4 of VCF spec ver.4.2 99 INTER_CHR_NO_SS_WITH_LEFT_MATE_FIRST_IN_PARTNER, // the green pair in Fig. 7 in Section 5.4 of VCF spec ver.4.2 100 INTER_CHR_NO_SS_WITH_LEFT_MATE_SECOND_IN_PARTNER // the red pair in Fig. 7 in Section 5.4 of VCF spec ver.4.2 101 } 102 103 /** 104 * Breakend variant type for inversion suspects: those with novel adjacency between two reference locations 105 * on the same chromosome but the novel adjacency brings them together in a strand-switch fashion. 106 * This is to be distinguished from the more general "translocation" breakends, which are novel adjacency between 107 * reference locations without strand switch if the reference bases are from the same chromosome. 108 * 109 * Note that dispersed duplication with some copies inverted could also lead to breakpoints with strand switch. 110 */ 111 abstract private static class IntraChromosomalStrandSwitchBreakEnd extends BreakEndVariantType { 112 static final Map<String, Object> INV55_FLAG = Collections.singletonMap(INV55, true); 113 static final Map<String, Object> INV33_FLAG = Collections.singletonMap(INV33, true); 114 IntraChromosomalStrandSwitchBreakEnd(final String variantCHR, final int variantPOS, final String variantId, final Allele refAllele, final Allele altAllele, final Map<String, Object> extraAttributes, final boolean isTheUpstreamMate)115 private IntraChromosomalStrandSwitchBreakEnd(final String variantCHR, final int variantPOS, final String variantId, 116 final Allele refAllele, final Allele altAllele, 117 final Map<String, Object> extraAttributes, 118 final boolean isTheUpstreamMate) { 119 super(variantCHR, variantPOS, variantId, refAllele, altAllele, extraAttributes, isTheUpstreamMate); 120 } 121 122 @VisibleForTesting extractInsertedSequence(final NovelAdjacencyAndAltHaplotype narl, final boolean forUpstreamLoc)123 static String extractInsertedSequence(final NovelAdjacencyAndAltHaplotype narl, final boolean forUpstreamLoc) { 124 final String ins = narl.getComplication().getInsertedSequenceForwardStrandRep(); 125 return forUpstreamLoc ? ins : SequenceUtil.reverseComplement(ins); 126 } 127 } 128 129 public static final class IntraChromosomalStrandSwitch55BreakEnd extends IntraChromosomalStrandSwitchBreakEnd { 130 131 @VisibleForTesting IntraChromosomalStrandSwitch55BreakEnd(final String variantCHR, final int variantPOS, final String variantId, final Allele refAllele, final Allele altAllele, final Map<String, Object> extraAttributes, final boolean isTheUpstreamMate)132 public IntraChromosomalStrandSwitch55BreakEnd(final String variantCHR, final int variantPOS, final String variantId, 133 final Allele refAllele, final Allele altAllele, final Map<String, Object> extraAttributes, 134 final boolean isTheUpstreamMate) { 135 super(variantCHR, variantPOS, variantId, refAllele, altAllele, extraAttributes, isTheUpstreamMate); 136 } 137 IntraChromosomalStrandSwitch55BreakEnd(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference, final boolean isTheUpstreamMate)138 private IntraChromosomalStrandSwitch55BreakEnd(final NovelAdjacencyAndAltHaplotype narl, 139 final BasicReference reference, 140 final boolean isTheUpstreamMate) { 141 super(isTheUpstreamMate ? narl.getLeftJustifiedLeftRefLoc().getContig() : narl.getLeftJustifiedRightRefLoc().getContig(), 142 isTheUpstreamMate ? narl.getLeftJustifiedLeftRefLoc().getStart() : narl.getLeftJustifiedRightRefLoc().getEnd(), 143 BreakEndVariantType.getIDString(narl, isTheUpstreamMate), 144 Allele.create(BreakEndVariantType.getRefBaseString(narl, isTheUpstreamMate, reference), true), 145 constructAltAllele(BreakEndVariantType.getRefBaseString(narl, isTheUpstreamMate, reference), 146 extractInsertedSequence(narl, isTheUpstreamMate), 147 isTheUpstreamMate ? narl.getLeftJustifiedRightRefLoc(): narl.getLeftJustifiedLeftRefLoc()), 148 INV55_FLAG, isTheUpstreamMate); 149 } 150 getOrderedMates(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference)151 public static Tuple2<BreakEndVariantType, BreakEndVariantType> getOrderedMates(final NovelAdjacencyAndAltHaplotype narl, 152 final BasicReference reference) { 153 return new Tuple2<>(new IntraChromosomalStrandSwitch55BreakEnd(narl, reference, true), 154 new IntraChromosomalStrandSwitch55BreakEnd(narl, reference, false)); 155 } 156 constructAltAllele(final String refBase, final String insertedSequence, final SimpleInterval novelAdjRefLoc)157 private static Allele constructAltAllele(final String refBase, final String insertedSequence, final SimpleInterval novelAdjRefLoc) { 158 return Allele.create(refBase + insertedSequence + "]" + novelAdjRefLoc.getContig() + ":" + novelAdjRefLoc.getEnd() + "]"); 159 } 160 } 161 162 public static final class IntraChromosomalStrandSwitch33BreakEnd extends IntraChromosomalStrandSwitchBreakEnd { 163 164 @VisibleForTesting IntraChromosomalStrandSwitch33BreakEnd(final String variantCHR, final int variantPOS, final String variantId, final Allele refAllele, final Allele altAllele, final Map<String, Object> extraAttributes, final boolean isTheUpstreamMate)165 public IntraChromosomalStrandSwitch33BreakEnd(final String variantCHR, final int variantPOS, final String variantId, 166 final Allele refAllele, final Allele altAllele, final Map<String, Object> extraAttributes, 167 final boolean isTheUpstreamMate) { 168 super(variantCHR, variantPOS, variantId, refAllele, altAllele, extraAttributes, isTheUpstreamMate); 169 } 170 IntraChromosomalStrandSwitch33BreakEnd(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference, final boolean isTheUpstreamMate)171 private IntraChromosomalStrandSwitch33BreakEnd(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference, 172 final boolean isTheUpstreamMate) { 173 super(isTheUpstreamMate ? narl.getLeftJustifiedLeftRefLoc().getContig() : narl.getLeftJustifiedRightRefLoc().getContig(), 174 isTheUpstreamMate ? narl.getLeftJustifiedLeftRefLoc().getStart() : narl.getLeftJustifiedRightRefLoc().getEnd(), 175 BreakEndVariantType.getIDString(narl, isTheUpstreamMate), 176 Allele.create(BreakEndVariantType.getRefBaseString(narl, isTheUpstreamMate, reference), true), 177 constructAltAllele(BreakEndVariantType.getRefBaseString(narl, isTheUpstreamMate, reference), 178 extractInsertedSequence(narl, isTheUpstreamMate), 179 isTheUpstreamMate ? narl.getLeftJustifiedRightRefLoc(): narl.getLeftJustifiedLeftRefLoc()), 180 INV33_FLAG, isTheUpstreamMate); 181 } 182 getOrderedMates(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference)183 public static Tuple2<BreakEndVariantType, BreakEndVariantType> getOrderedMates(final NovelAdjacencyAndAltHaplotype narl, 184 final BasicReference reference) { 185 return new Tuple2<>(new IntraChromosomalStrandSwitch33BreakEnd(narl, reference, true), 186 new IntraChromosomalStrandSwitch33BreakEnd(narl, reference, false)); 187 } 188 constructAltAllele(final String refBase, final String insertedSequence, final SimpleInterval novelAdjRefLoc)189 private static Allele constructAltAllele(final String refBase, final String insertedSequence, final SimpleInterval novelAdjRefLoc) { 190 return Allele.create("[" + novelAdjRefLoc.getContig() + ":" + novelAdjRefLoc.getEnd() + "[" + insertedSequence + refBase); 191 } 192 } 193 194 public static final class IntraChromosomeRefOrderSwap extends BreakEndVariantType { 195 196 @VisibleForTesting IntraChromosomeRefOrderSwap(final String variantCHR, final int variantPOS, final String variantId, final Allele refAllele, final Allele altAllele, final Map<String, Object> extraAttributes, final boolean isTheUpstreamMate)197 public IntraChromosomeRefOrderSwap(final String variantCHR, final int variantPOS, final String variantId, 198 final Allele refAllele, final Allele altAllele, final Map<String, Object> extraAttributes, 199 final boolean isTheUpstreamMate) { 200 super(variantCHR, variantPOS, variantId, refAllele, altAllele, extraAttributes, isTheUpstreamMate); 201 } 202 IntraChromosomeRefOrderSwap(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference, final boolean isTheUpstreamMate)203 private IntraChromosomeRefOrderSwap(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference, 204 final boolean isTheUpstreamMate) { 205 super(isTheUpstreamMate ? narl.getLeftJustifiedLeftRefLoc().getContig() : narl.getLeftJustifiedRightRefLoc().getContig(), 206 isTheUpstreamMate ? narl.getLeftJustifiedLeftRefLoc().getStart() : narl.getLeftJustifiedRightRefLoc().getEnd(), 207 BreakEndVariantType.getIDString(narl, isTheUpstreamMate), 208 Allele.create(BreakEndVariantType.getRefBaseString(narl, isTheUpstreamMate, reference), true), 209 constructAltAllele(BreakEndVariantType.getRefBaseString(narl, isTheUpstreamMate, reference), 210 narl.getComplication().getInsertedSequenceForwardStrandRep(), 211 isTheUpstreamMate ? narl.getLeftJustifiedRightRefLoc(): narl.getLeftJustifiedLeftRefLoc(), 212 isTheUpstreamMate), 213 noExtraAttributes, isTheUpstreamMate); 214 } 215 getOrderedMates(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference)216 public static Tuple2<BreakEndVariantType, BreakEndVariantType> getOrderedMates(final NovelAdjacencyAndAltHaplotype narl, 217 final BasicReference reference) { 218 return new Tuple2<>(new IntraChromosomeRefOrderSwap(narl, reference, true), 219 new IntraChromosomeRefOrderSwap(narl, reference, false)); 220 } 221 constructAltAllele(final String refBase, final String insertedSequence, final SimpleInterval novelAdjRefLoc, final boolean forUpstreamLoc)222 private static Allele constructAltAllele(final String refBase, final String insertedSequence, final SimpleInterval novelAdjRefLoc, 223 final boolean forUpstreamLoc) { 224 if (forUpstreamLoc) { 225 return Allele.create("]" + novelAdjRefLoc.getContig() + ":" + novelAdjRefLoc.getEnd() + "]" + insertedSequence + refBase); 226 } else { 227 return Allele.create(refBase + insertedSequence + "[" + novelAdjRefLoc.getContig() + ":" + novelAdjRefLoc.getEnd() + "["); 228 } 229 } 230 } 231 232 public static final class InterChromosomeBreakend extends BreakEndVariantType { 233 234 @VisibleForTesting InterChromosomeBreakend(final String variantCHR, final int variantPOS, final String variantId, final Allele refAllele, final Allele altAllele, final Map<String, Object> extraAttributes, final boolean isTheUpstreamMate)235 public InterChromosomeBreakend(final String variantCHR, final int variantPOS, final String variantId, 236 final Allele refAllele, final Allele altAllele, final Map<String, Object> extraAttributes, 237 final boolean isTheUpstreamMate) { 238 super(variantCHR, variantPOS, variantId, refAllele, altAllele, extraAttributes, isTheUpstreamMate); 239 } 240 InterChromosomeBreakend(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference, final boolean isTheUpstreamMate)241 private InterChromosomeBreakend(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference, 242 final boolean isTheUpstreamMate) { 243 super(isTheUpstreamMate ? narl.getLeftJustifiedLeftRefLoc().getContig() : narl.getLeftJustifiedRightRefLoc().getContig(), 244 isTheUpstreamMate ? narl.getLeftJustifiedLeftRefLoc().getStart() : narl.getLeftJustifiedRightRefLoc().getEnd(), 245 BreakEndVariantType.getIDString(narl, isTheUpstreamMate), 246 Allele.create(BreakEndVariantType.getRefBaseString(narl, isTheUpstreamMate, reference), true), 247 constructAltAllele(narl, reference, isTheUpstreamMate), 248 noExtraAttributes, isTheUpstreamMate); 249 } 250 getOrderedMates(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference)251 public static Tuple2<BreakEndVariantType, BreakEndVariantType> getOrderedMates(final NovelAdjacencyAndAltHaplotype narl, 252 final BasicReference reference) { 253 254 return new Tuple2<>(new InterChromosomeBreakend(narl, reference, true), 255 new InterChromosomeBreakend(narl, reference, false)); 256 } 257 258 // see VCF spec 4.2 for BND format ALT allele field for SV, in particular the examples shown in Fig.1, Fig.2 and Fig.5 of Section 5.4 constructAltAllele(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference, final boolean forUpstreamLoc)259 private static Allele constructAltAllele(final NovelAdjacencyAndAltHaplotype narl, final BasicReference reference, 260 final boolean forUpstreamLoc) { 261 final String refBase = BreakEndVariantType.getRefBaseString(narl, forUpstreamLoc, reference); 262 final String insertedSequence = extractInsertedSequence(narl, forUpstreamLoc); 263 final SimpleInterval novelAdjRefLoc = forUpstreamLoc ? narl.getLeftJustifiedRightRefLoc() : narl.getLeftJustifiedLeftRefLoc(); 264 265 // see Fig.5 of Section 5.4 of spec Version 4.2 (the green pairs) 266 final boolean upstreamLocIsFirstInPartner = 267 narl.getTypeInferredFromSimpleChimera().equals(TypeInferredFromSimpleChimera.INTER_CHR_NO_SS_WITH_LEFT_MATE_FIRST_IN_PARTNER); 268 if (narl.getStrandSwitch().equals(StrandSwitch.NO_SWITCH)) { 269 if (forUpstreamLoc == upstreamLocIsFirstInPartner) { 270 return Allele.create(refBase + insertedSequence + "[" + novelAdjRefLoc.getContig() + ":" + novelAdjRefLoc.getEnd() + "["); 271 } else { 272 return Allele.create("]" + novelAdjRefLoc.getContig() + ":" + novelAdjRefLoc.getStart() + "]" + insertedSequence + refBase); 273 } 274 } else if (narl.getStrandSwitch().equals(StrandSwitch.FORWARD_TO_REVERSE)){ 275 return Allele.create(refBase + insertedSequence + "]" + novelAdjRefLoc.getContig() + ":" + novelAdjRefLoc.getEnd() + "]"); 276 } else { 277 return Allele.create("[" + novelAdjRefLoc.getContig() + ":" + novelAdjRefLoc.getEnd() + "[" + insertedSequence + refBase); 278 } 279 } 280 extractInsertedSequence(final NovelAdjacencyAndAltHaplotype narl, final boolean forUpstreamLoc)281 private static String extractInsertedSequence(final NovelAdjacencyAndAltHaplotype narl, final boolean forUpstreamLoc) { 282 final String ins = narl.getComplication().getInsertedSequenceForwardStrandRep(); 283 if (ins.isEmpty() || narl.getStrandSwitch() == StrandSwitch.NO_SWITCH) { 284 return ins; 285 } else { 286 return forUpstreamLoc == (narl.getStrandSwitch().equals(StrandSwitch.FORWARD_TO_REVERSE) ) ? ins: SequenceUtil.reverseComplement(ins); 287 } 288 } 289 } 290 } 291