1 package org.broadinstitute.hellbender.tools.spark.sv.discovery; 2 3 import com.google.common.annotations.VisibleForTesting; 4 import htsjdk.variant.variantcontext.Allele; 5 import org.broadinstitute.hellbender.engine.BasicReference; 6 import org.broadinstitute.hellbender.engine.spark.datasources.ReferenceMultiSparkSource; 7 import org.broadinstitute.hellbender.tools.spark.sv.discovery.alignment.StrandSwitch; 8 import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.BreakpointComplications; 9 import org.broadinstitute.hellbender.tools.spark.sv.discovery.inference.NovelAdjacencyAndAltHaplotype; 10 import org.broadinstitute.hellbender.tools.spark.sv.evidence.EvidenceTargetLink; 11 import org.broadinstitute.hellbender.tools.spark.sv.evidence.ReadMetadata; 12 import org.broadinstitute.hellbender.tools.spark.sv.utils.GATKSVVCFConstants; 13 import org.broadinstitute.hellbender.tools.spark.sv.utils.SVInterval; 14 import org.broadinstitute.hellbender.utils.SimpleInterval; 15 16 import java.util.Collections; 17 import java.util.Map; 18 19 public abstract class SimpleSVType extends SvType { createBracketedSymbAlleleString(final String vcfHeaderDefinedSymbAltAllele)20 public static String createBracketedSymbAlleleString(final String vcfHeaderDefinedSymbAltAllele) { 21 return "<" + vcfHeaderDefinedSymbAltAllele + ">"; 22 } 23 SimpleSVType(final String variantCHR, final int variantPOS, final int variantEND, final String variantId, final Allele refAllele, final Allele altAllele, final int svLen, final Map<String, Object> extraAttributes)24 protected SimpleSVType(final String variantCHR, final int variantPOS, final int variantEND, final String variantId, 25 final Allele refAllele, final Allele altAllele, final int svLen, final Map<String, Object> extraAttributes) { 26 super(variantCHR, variantPOS, variantEND, variantId, refAllele, altAllele, svLen, extraAttributes); 27 } 28 29 @Override hasApplicableEnd()30 public final boolean hasApplicableEnd() { 31 return true; 32 } 33 @Override hasApplicableLength()34 public final boolean hasApplicableLength() { 35 return true; 36 } 37 38 public enum SupportedType { 39 INV, DEL, INS, DUP, DUP_INV 40 } 41 42 public static final class Inversion extends SimpleSVType { 43 44 @Override toString()45 public String toString() { 46 return SupportedType.INV.name(); 47 } 48 49 @VisibleForTesting Inversion(final String variantCHR, final int variantPOS, final int variantEND, final String variantId, final Allele refAllele, final Allele altAllele, final int svLen, final Map<String, Object> extraAttributes)50 public Inversion(final String variantCHR, final int variantPOS, final int variantEND, final String variantId, 51 final Allele refAllele, final Allele altAllele, final int svLen, final Map<String, Object> extraAttributes) { 52 super(variantCHR, variantPOS, variantEND, variantId, refAllele, altAllele, svLen, extraAttributes); 53 } 54 55 @VisibleForTesting 56 57 // TODO: 6/12/18 note the following implementation sets POS and REF at the anchor base, which is not requested by the VCF spec 58 // TODO: 6/12/18 also, this interface lets one call inversion with SVLEN !=0, which is not the same as VCF spec examples Inversion(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype, final int svLength, final BasicReference reference)59 public Inversion(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype, final int svLength, 60 final BasicReference reference) { 61 super(novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getContig(), 62 novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getStart(), 63 novelAdjacencyAndAltHaplotype.getLeftJustifiedRightRefLoc().getEnd(), 64 getIDString(novelAdjacencyAndAltHaplotype), 65 Allele.create(extractRefBases(novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc(), reference), true), 66 Allele.create(createBracketedSymbAlleleString(GATKSVVCFConstants.SYMB_ALT_STRING_INV)), 67 svLength, 68 Collections.singletonMap((novelAdjacencyAndAltHaplotype.getStrandSwitch() == StrandSwitch.FORWARD_TO_REVERSE) ? GATKSVVCFConstants.INV55 : GATKSVVCFConstants.INV33, true)); 69 } 70 getIDString(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype)71 private static String getIDString(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype) { 72 final StrandSwitch strandSwitch = novelAdjacencyAndAltHaplotype.getStrandSwitch(); 73 74 return (strandSwitch.equals(StrandSwitch.FORWARD_TO_REVERSE) ? GATKSVVCFConstants.INV55 : GATKSVVCFConstants.INV33) + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR + 75 makeLocationString(novelAdjacencyAndAltHaplotype); 76 } 77 } 78 79 public static final class Deletion extends SimpleSVType { 80 81 @Override toString()82 public String toString() { 83 return SupportedType.DEL.name(); 84 } 85 86 @VisibleForTesting Deletion(final String variantCHR, final int variantPOS, final int variantEND, final String variantId, final Allele refAllele, final Allele altAllele, final int svLen, final Map<String, Object> extraAttributes)87 public Deletion(final String variantCHR, final int variantPOS, final int variantEND, final String variantId, 88 final Allele refAllele, final Allele altAllele, final int svLen, final Map<String, Object> extraAttributes) { 89 super(variantCHR, variantPOS, variantEND, variantId, refAllele, altAllele, svLen, extraAttributes); 90 } 91 Deletion(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype, final BasicReference reference)92 public Deletion(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype, 93 final BasicReference reference) { 94 super(novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getContig(), 95 novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getStart(), 96 novelAdjacencyAndAltHaplotype.getLeftJustifiedRightRefLoc().getEnd(), 97 getIDString(novelAdjacencyAndAltHaplotype), 98 Allele.create(extractRefBases(novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc(), reference), true), 99 Allele.create(createBracketedSymbAlleleString(GATKSVVCFConstants.SYMB_ALT_STRING_DEL)), 100 - novelAdjacencyAndAltHaplotype.getDistanceBetweenNovelAdjacencies(), 101 novelAdjacencyAndAltHaplotype.hasDuplicationAnnotation() ? Collections.singletonMap(GATKSVVCFConstants.DUP_TAN_CONTRACTION_STRING, true) : noExtraAttributes); 102 } 103 getIDString(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype)104 private static String getIDString(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype) { 105 106 return ((novelAdjacencyAndAltHaplotype.hasDuplicationAnnotation()) ? GATKSVVCFConstants.DUP_TAN_CONTRACTION_INTERNAL_ID_START_STRING : SupportedType.DEL.name()) 107 + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR 108 + makeLocationString(novelAdjacencyAndAltHaplotype); 109 } 110 } 111 112 public static final class Insertion extends SimpleSVType { 113 114 @Override toString()115 public String toString() { 116 return SupportedType.INS.name(); 117 } 118 119 @VisibleForTesting Insertion(final String variantCHR, final int variantPOS, final int variantEND, final String variantId, final Allele refAllele, final Allele altAllele, final int svLen, final Map<String, Object> extraAttributes)120 public Insertion(final String variantCHR, final int variantPOS, final int variantEND, final String variantId, 121 final Allele refAllele, final Allele altAllele, final int svLen, final Map<String, Object> extraAttributes) { 122 super(variantCHR, variantPOS, variantEND, variantId, refAllele, altAllele, svLen, extraAttributes); 123 } 124 Insertion(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype, final BasicReference reference)125 public Insertion(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype, 126 final BasicReference reference) { 127 super(novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getContig(), 128 novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getStart(), 129 getEnd(novelAdjacencyAndAltHaplotype), 130 getIDString(novelAdjacencyAndAltHaplotype), 131 Allele.create(getRefBases(novelAdjacencyAndAltHaplotype, reference), true), 132 Allele.create(createBracketedSymbAlleleString(GATKSVVCFConstants.SYMB_ALT_STRING_INS)), 133 getLength(novelAdjacencyAndAltHaplotype), 134 noExtraAttributes); 135 } 136 137 // these methods exist to distinguish fat insertion and linked del+ins in an RPL event, as well as duplication events whose duplicated unit is not large enough getEnd(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype)138 private static int getEnd(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype) { 139 return novelAdjacencyAndAltHaplotype.isCandidateForFatInsertion() 140 ? novelAdjacencyAndAltHaplotype.getLeftJustifiedRightRefLoc().getEnd() 141 : novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getStart(); 142 } 143 getRefBases(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype, final BasicReference reference)144 private static byte[] getRefBases(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype, 145 final BasicReference reference) { 146 return extractRefBases(novelAdjacencyAndAltHaplotype.isCandidateForFatInsertion() 147 ? novelAdjacencyAndAltHaplotype.getIntervalForFatInsertion() 148 : novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc(), reference); 149 } 150 getLength(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype)151 private static int getLength(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype) { 152 return novelAdjacencyAndAltHaplotype.getComplication().hasDuplicationAnnotation() 153 ? novelAdjacencyAndAltHaplotype.getLengthForDupTandemExpansion() 154 : novelAdjacencyAndAltHaplotype.getComplication().getInsertedSequenceForwardStrandRep().length(); 155 } 156 getIDString(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype)157 private static String getIDString(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype) { 158 if (novelAdjacencyAndAltHaplotype.isCandidateForFatInsertion()) 159 return SupportedType.INS.name() + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR 160 + makeLocationString(novelAdjacencyAndAltHaplotype); 161 else { 162 return SupportedType.INS.name() + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR 163 + makeLocationString(novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getContig(), 164 novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getStart(), 165 novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getContig(), 166 novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getStart()); 167 } 168 } 169 } 170 171 public static final class DuplicationTandem extends SimpleSVType { 172 173 @Override toString()174 public String toString() { 175 return SupportedType.DUP.name(); 176 } 177 178 @VisibleForTesting DuplicationTandem(final String variantCHR, final int variantPOS, final int variantEND, final String variantId, final Allele refAllele, final Allele altAllele, final int svLen, final Map<String, Object> extraAttributes)179 public DuplicationTandem(final String variantCHR, final int variantPOS, final int variantEND, final String variantId, 180 final Allele refAllele, final Allele altAllele, final int svLen, final Map<String, Object> extraAttributes) { 181 super(variantCHR, variantPOS, variantEND, variantId, refAllele, altAllele, svLen, extraAttributes); 182 } 183 184 // TODO: 6/12/18 the following implementation treats DuplicationTandem as simple insertions, and duplication annotations will be saved in INFO columns DuplicationTandem(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype, final BasicReference reference)185 public DuplicationTandem(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype, 186 final BasicReference reference) { 187 super(novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getContig(), 188 novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getStart(), 189 novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getStart(), 190 getIDString(novelAdjacencyAndAltHaplotype), 191 Allele.create(extractRefBases(novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc(), reference), true), 192 Allele.create(createBracketedSymbAlleleString(GATKSVVCFConstants.SYMB_ALT_STRING_DUP)), 193 novelAdjacencyAndAltHaplotype.getLengthForDupTandemExpansion(), 194 Collections.singletonMap(GATKSVVCFConstants.DUP_TAN_EXPANSION_STRING, true)); 195 } 196 getIDString(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype)197 private static String getIDString(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype) { 198 199 final SimpleInterval dupSeqRepeatUnitRefSpan = ((BreakpointComplications.SmallDuplicationBreakpointComplications) 200 novelAdjacencyAndAltHaplotype.getComplication()).getDupSeqRepeatUnitRefSpan(); 201 202 return GATKSVVCFConstants.DUP_TAN_EXPANSION_INTERNAL_ID_START_STRING + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR 203 + makeLocationString(dupSeqRepeatUnitRefSpan.getContig(), dupSeqRepeatUnitRefSpan.getStart(), 204 dupSeqRepeatUnitRefSpan.getContig(), dupSeqRepeatUnitRefSpan.getEnd()); 205 } 206 } 207 208 public static final class ImpreciseDeletion extends SimpleSVType { 209 210 @Override toString()211 public String toString() { 212 return SupportedType.DEL.name(); 213 } 214 ImpreciseDeletion(final EvidenceTargetLink evidenceTargetLink, final int svLength, final ReadMetadata metadata, final ReferenceMultiSparkSource reference)215 public ImpreciseDeletion(final EvidenceTargetLink evidenceTargetLink, final int svLength, final ReadMetadata metadata, 216 final ReferenceMultiSparkSource reference) { 217 218 super(metadata.getContigName(evidenceTargetLink.getPairedStrandedIntervals().getLeft().getInterval().getContig()), 219 evidenceTargetLink.getPairedStrandedIntervals().getLeft().getInterval().midpoint(), 220 evidenceTargetLink.getPairedStrandedIntervals().getRight().getInterval().midpoint(), 221 getIDString(evidenceTargetLink, metadata), 222 Allele.create(getRefBases(evidenceTargetLink, metadata, reference), true), 223 Allele.create(createBracketedSymbAlleleString(GATKSVVCFConstants.SYMB_ALT_STRING_DEL)), 224 svLength, 225 Collections.singletonMap(GATKSVVCFConstants.IMPRECISE, true)); 226 } 227 getRefBases(final EvidenceTargetLink evidenceTargetLink, final ReadMetadata metadata, final ReferenceMultiSparkSource reference)228 private static byte[] getRefBases(final EvidenceTargetLink evidenceTargetLink, final ReadMetadata metadata, 229 final ReferenceMultiSparkSource reference) { 230 final SVInterval leftInterval = evidenceTargetLink.getPairedStrandedIntervals().getLeft().getInterval(); 231 return extractRefBases( 232 new SimpleInterval(metadata.getContigName(leftInterval.getContig()), leftInterval.midpoint(), leftInterval.midpoint()), 233 reference 234 ); 235 } 236 getIDString(final EvidenceTargetLink evidenceTargetLink, final ReadMetadata metadata)237 private static String getIDString(final EvidenceTargetLink evidenceTargetLink, final ReadMetadata metadata) { 238 final SVInterval leftInterval = evidenceTargetLink.getPairedStrandedIntervals().getLeft().getInterval(); 239 final SVInterval rightInterval = evidenceTargetLink.getPairedStrandedIntervals().getRight().getInterval(); 240 241 return SupportedType.DEL.name() 242 + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR 243 + GATKSVVCFConstants.IMPRECISE + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR 244 + metadata.getContigName(leftInterval.getContig()) 245 + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR 246 + leftInterval.getStart() + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR 247 + leftInterval.getEnd() + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR 248 + rightInterval.getStart() + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR 249 + rightInterval.getEnd(); 250 } 251 } 252 253 public static final class DuplicationInverted extends SimpleSVType { 254 255 @Override toString()256 public String toString() { 257 return "DUP:INV"; 258 } 259 DuplicationInverted(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype, final BasicReference reference)260 public DuplicationInverted(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype, 261 final BasicReference reference) { 262 super(novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getContig(), 263 novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getStart(), 264 novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc().getStart(), 265 getIDString(novelAdjacencyAndAltHaplotype), 266 Allele.create(extractRefBases(novelAdjacencyAndAltHaplotype.getLeftJustifiedLeftRefLoc(), reference), true), 267 Allele.create(createBracketedSymbAlleleString(GATKSVVCFConstants.SYMB_ALT_STRING_INVDUP)), 268 getSVLen(novelAdjacencyAndAltHaplotype), 269 noExtraAttributes); 270 } 271 getSVLen(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype)272 private static int getSVLen(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype) { 273 return ((BreakpointComplications.InvertedDuplicationBreakpointComplications) novelAdjacencyAndAltHaplotype.getComplication()) 274 .getDupSeqRepeatUnitRefSpan().size(); 275 } 276 getIDString(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype)277 private static String getIDString(final NovelAdjacencyAndAltHaplotype novelAdjacencyAndAltHaplotype) { 278 return GATKSVVCFConstants.DUP_INV_INTERNAL_ID_START_STRING + GATKSVVCFConstants.INTERVAL_VARIANT_ID_FIELD_SEPARATOR 279 + makeLocationString(novelAdjacencyAndAltHaplotype); 280 } 281 282 } 283 284 } 285