1 // ==========================================================================
2 // Gustaf
3 // ==========================================================================
4 // Copyright (c) 2011-2018, Kathrin Trappe, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above copyright
13 // notice, this list of conditions and the following disclaimer in the
14 // documentation and/or other materials provided with the distribution.
15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 // its contributors may be used to endorse or promote products derived
17 // from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: Kathrin Trappe <kathrin.trappe@fu-berlin.de>
33 // ==========================================================================
34
35 #ifndef SEQAN_APPS_GUSTAF_MSPLAZER_H_
36 #define SEQAN_APPS_GUSTAF_MSPLAZER_H_
37
38 #define BREAKPOINT_DEBUG
39
40 #include <seqan/basic.h>
41 #include <seqan/sequence.h>
42 #include <seqan/arg_parse.h>
43
44 #include "create_stellarmatches_from_file.h"
45
46 using namespace seqan;
47
48 // ============================================================================
49 // Forwards
50 // ============================================================================
51
52 // ============================================================================
53 // Tags, Classes, Enums
54 // ============================================================================
55
56
57 // ----------------------------------------------------------------------------
58 // Class MSplazerOptions
59 // ----------------------------------------------------------------------------
60
61 struct MSplazerOptions
62 {
63 // I/O options
64 CharString databaseFile; // name of database (db) file
65 StringSet<CharString> queryFile; // name of query file(s) (two in case of paired-end)
66 // CharString queryFile2; // name of 2nd query file (mate pairs)
67 CharString emptyReadsOutFile; // file with reads that have broken or empty chains
68 CharString disabledQueriesFile; // name of result file containing disabled queries
69 CharString vcfOutFile; // breakpoint output file name
70 CharString gffOutFile; // breakpoint output file name
71 CharString jobName; // job name, used for dot output
72 CharString stellarInputFile; // optional input file with stellar matches
73 bool dotOut;
74
75 // Transposition penalties and thresholds (criteria for inserting edges in read graphs)
76 unsigned diffDBPen; // Penalty for matches on different databases
77 unsigned diffStrandPen; // Pen. for matches on diff. strand of the same database
78 unsigned diffOrderPen; // Pen. for matches with diff. order wrt the order within the query
79 unsigned noMateMatchesPen; // Pen. for matches with no confirming mate matches
80 double simThresh; // Allowed similarity between overlapping sequences, i.e. percentage of overlap
81 int gapThresh; // Allowed gap or distance between matches
82 int initGapThresh; // Maximal allowed start or end gap length
83 int breakendThresh; // Maximal allowed length for a breakend
84 int tandemThresh; // Minimal length of insertion (double overlap) to be called as tandem repeat
85 unsigned breakpointPosRange; // Allowed range of breakpoint positions
86 unsigned support;
87 unsigned mateSupport;
88 unsigned libSize; // Library size (mate pairs)
89 unsigned libError; // Library size (mate pairs)
90 bool pairedEndMode; // Whether or not to run Gustaf in paired-end mode
91 bool revCompl; // Whether or not to rev-compl the second input file
92 bool inferComplexBP; // Whether or not to inferr complex breakpoints
93 unsigned numThreads; // Number of threads for parallelization
94
MSplazerOptionsMSplazerOptions95 MSplazerOptions() :
96 databaseFile("reference.fa"),
97 emptyReadsOutFile("emptyReads.fa"),
98 disabledQueriesFile("msplazer.disabled.fasta"),
99 vcfOutFile("breakpoints.vcf"),
100 gffOutFile("breakpoints.gff"),
101 dotOut(false),
102 diffDBPen(5),
103 diffStrandPen(5),
104 diffOrderPen(0),
105 noMateMatchesPen(5),
106 simThresh(0.5),
107 gapThresh(5), // microindel size around breakpoints
108 initGapThresh(15),
109 breakendThresh(30),
110 tandemThresh(50),
111 breakpointPosRange(5),
112 support(2),
113 mateSupport(2),
114 libSize(0),
115 libError(0),
116 pairedEndMode(false),
117 revCompl(true),
118 inferComplexBP(true),
119 numThreads(1)
120 {}
121 };
122
123 // ----------------------------------------------------------------------------
124 // Class Breakpoint
125 // ----------------------------------------------------------------------------
126
127 // BreakPoint class: container for structural variant or RNA-seq breakpoint information
128 template <typename TSequence_, typename TId_>
129 struct Breakpoint
130 {
131 enum SVType
132 {
133 INVALID, // 0
134 INSERTION, // 1
135 DELETION, // 2
136 INVERSION, // 3
137 SEQAN_TANDEM, // 4 TANDEM is an IOCTL numeric constant in Hurd
138 DISPDUPLICATION, // 5
139 INTERTRANSLOCATION, // 6
140 TRANSLOCATION, // 7
141 BREAKEND // 8
142 };
143
144 typedef TSequence_ TSequence;
145 typedef TId_ TId;
146 typedef typename Position<TSequence>::Type TPos;
147
148 // Ids of the two sequences
149 TId startSeqId;
150 TId endSeqId;
151 TId midPosId;
152 // Sequence orientation
153 bool startSeqStrand;
154 bool endSeqStrand;
155 bool midPosStrand;
156 // Last position in start sequence and first position in end sequence
157 TPos startSeqPos; // End position of split alignment, i.e. first position of variant
158 TPos endSeqPos; // Begin position of split alignment, i.e. position after split in reference, i.e. after the variant
159 TPos dupTargetPos; // Pos. before inserted, duplicated seq (should be equal to either startSeqPos or endSeqPos)
160 TPos dupMiddlePos; // TPos dupMidPos; Middle position of duplication or translocation
161 // per default position before event, has to be adjustet depending on dupTargetPos
162 TPos readStartPos;
163 TPos readEndPos;
164 TPos cipos; // Confidence after startSeqPos for imprecise breakpoint
165 TPos ciend; // Confidence after endSeqPos for imprecise breakpoint
166 TPos cimiddle; // Confidence after dupMiddlePos for imprecise breakpoint
167 // Counter of occurrences (read support)
168 unsigned support;
169 unsigned similar;
170 // Query Sequence Ids (queries/reads that support the breakpoint)
171 StringSet<TId> supportIds;
172 // SV type
173 SVType svtype;
174 TSequence insertionSeq;
175 bool revStrandDel;
176 bool inferredBP;
177 // If both of these flags are true, then we have seen two (pseudo)deletions supporting both start and end position
178 // of a translocation.
179 bool translSuppStartPos;
180 bool translSuppEndPos;
181 // bool imprecise = false;
182 // Storing on which site the breakend is:
183 // 0: left breakend, i.e. sequence continues right of position
184 // 1: right breakend, i.e. sequence continues left of position
185 bool breakend;
186
BreakpointBreakpoint187 Breakpoint() :
188 startSeqId("####"),
189 endSeqId("####"),
190 midPosId("####"),
191 startSeqStrand(true),
192 endSeqStrand(true),
193 midPosStrand(false),
194 startSeqPos(0),
195 endSeqPos(0),
196 dupTargetPos(std::numeric_limits<unsigned>::max()),
197 dupMiddlePos(std::numeric_limits<unsigned>::max()),
198 readStartPos(0),
199 readEndPos(0),
200 cipos(std::numeric_limits<unsigned>::max()),
201 ciend(std::numeric_limits<unsigned>::max()),
202 cimiddle(std::numeric_limits<unsigned>::max()),
203 support(1),
204 similar(std::numeric_limits<unsigned>::max()),
205 svtype(INVALID),
206 insertionSeq("NNNN"),
207 revStrandDel(false),
208 inferredBP(false),
209 translSuppStartPos(false),
210 translSuppEndPos(false),
211 breakend(false)
212 {}
213
BreakpointBreakpoint214 Breakpoint(TId const & sId,
215 TId const & eId,
216 bool const & sStrand,
217 bool const & eStrand,
218 TPos const & sPos,
219 TPos const & ePos,
220 TPos const & rsPos,
221 TPos const & rePos) :
222 startSeqId(sId),
223 endSeqId(eId),
224 midPosId("####"),
225 startSeqStrand(sStrand),
226 endSeqStrand(eStrand),
227 midPosStrand(false),
228 startSeqPos(sPos),
229 endSeqPos(ePos),
230 dupTargetPos(std::numeric_limits<unsigned>::max()),
231 dupMiddlePos(std::numeric_limits<unsigned>::max()),
232 readStartPos(rsPos),
233 readEndPos(rePos),
234 cipos(std::numeric_limits<unsigned>::max()),
235 ciend(std::numeric_limits<unsigned>::max()),
236 cimiddle(std::numeric_limits<unsigned>::max()),
237 support(1),
238 similar(std::numeric_limits<unsigned>::max()),
239 svtype(INVALID),
240 insertionSeq("NNNN"),
241 revStrandDel(false),
242 inferredBP(false),
243 translSuppStartPos(false),
244 translSuppEndPos(false),
245 breakend(false)
246 {}
247
BreakpointBreakpoint248 Breakpoint(TId const & sId,
249 TId const & eId,
250 bool const & sStrand,
251 bool const & eStrand,
252 TPos const & sPos,
253 TPos const & ePos,
254 TPos const & rsPos,
255 TPos const & rePos,
256 TId const & spId) :
257 startSeqId(sId),
258 endSeqId(eId),
259 midPosId("####"),
260 startSeqStrand(sStrand),
261 endSeqStrand(eStrand),
262 midPosStrand(false),
263 startSeqPos(sPos),
264 endSeqPos(ePos),
265 dupTargetPos(std::numeric_limits<unsigned>::max()),
266 dupMiddlePos(std::numeric_limits<unsigned>::max()),
267 readStartPos(rsPos),
268 readEndPos(rePos),
269 cipos(std::numeric_limits<unsigned>::max()),
270 ciend(std::numeric_limits<unsigned>::max()),
271 cimiddle(std::numeric_limits<unsigned>::max()),
272 support(1),
273 similar(std::numeric_limits<unsigned>::max()),
274 svtype(INVALID),
275 insertionSeq("NNNN"),
276 revStrandDel(false),
277 inferredBP(false),
278 translSuppStartPos(false),
279 translSuppEndPos(false),
280 breakend(false)
281 {appendValue(supportIds, spId); }
282 };
283
284 // ----------------------------------------------------------------------------
285 // Class SparsePropertyMap
286 // ----------------------------------------------------------------------------
287
288 // Sparse property map class: A property map that has only a few objects or where most of the object would be empty
289 template <typename TValue, typename TPos>
290 struct SparsePropertyMap
291 {
292 typedef String<TValue> TValueTable;
293 typedef String<TPos> TSlotLookupTable;
294
295 TValueTable valueTable;
296 // slotLookupTable: Stores at each position (descriptor value) the position of the
297 // corresponding object in valueTable, or -1 if there is no object
298 // for this descriptor
299 TSlotLookupTable slotLookupTable;
300
SparsePropertyMapSparsePropertyMap301 SparsePropertyMap(){}
SparsePropertyMapSparsePropertyMap302 SparsePropertyMap(TValueTable vt, TSlotLookupTable slt) :
303 valueTable(vt), slotLookupTable(slt) {}
304 };
305 /*
306 * Only works within seqan namespace
307 template <typename TValue, typename TPos>
308 struct Value<SparsePropertyMap<TValue, TPos> >
309 {
310 typedef TValue Type;
311 };
312 template <typename TValue, typename TPos>
313 struct Value<SparsePropertyMap<TValue, TPos> const>
314 {
315 typedef TValue Type;
316 };
317 */
318
319 // ----------------------------------------------------------------------------
320 // Class MSplazerChain
321 // ----------------------------------------------------------------------------
322
323 // Container for storing chaining graph, matchDistanceScores, start and end vertex for one read
324 // Vertex descriptor value of each vertex in the graph corresponds to the position of the stellar match within
325 // container for Stellar matches (Query Matches)
326 template <typename TGraph_, typename TVertexDescriptor_, typename TScoreAlloc_, typename TSparsePropertyMap_,
327 typename TMatchAlloc_>
328 struct MSplazerChain
329 {
330 typedef TGraph_ TGraph;
331 typedef TVertexDescriptor_ TVertexDescriptor;
332 typedef TScoreAlloc_ TScoreAlloc;
333 typedef TSparsePropertyMap_ TSparsePropertyMap;
334 typedef typename Size<TGraph>::Type TGraphSize;
335 typedef TMatchAlloc_ TMatchAlloc;
336 typedef IntervalAndCargo<unsigned, unsigned> TInterval;
337 typedef IntervalTree<unsigned, unsigned> TIntervalTree;
338
339 TGraph graph; // Contains (Stellar)matches as vertices and edges between compatible matches
340 TVertexDescriptor startVertex; // Artificial start and end vertex (represents start/end of the read sequence)
341 TVertexDescriptor endVertex;
342 String<TVertexDescriptor> predMap; // Predecessor and distance map for dagShortestPath
343 String<TGraphSize> distMap; // Distance map for shortest path
344 TScoreAlloc matchDistanceScores; // Distance scores of matches (edit distance of reference mapping)
345 TSparsePropertyMap breakpoints;
346 String<TMatchAlloc> bestChains;
347 TIntervalTree rightMateTree;
348 TIntervalTree leftMateTree;
349 unsigned mateJoinPosition;
350 bool isEmpty;
351 bool isPartial;
352 // bool transl/dupl;
353
MSplazerChainMSplazerChain354 MSplazerChain(TScoreAlloc & _scores) :
355 startVertex(), endVertex(),
356 matchDistanceScores(_scores), mateJoinPosition(0),
357 isEmpty(false), isPartial(false)
358 {}
359 };
360
361 struct Options
362 {
363 bool showHelp;
364 bool showVersion;
365 int i;
366 String<CharString> texts;
367
OptionsOptions368 Options(bool const & h, int & it) :
369 showHelp(h), i(it)
370 {}
371
372 };
373
374 // ============================================================================
375 // Metafunctions
376 // ============================================================================
377
378 // ============================================================================
379 // Functions
380 // ============================================================================
381
382 // ----------------------------------------------------------------------------
383 // Function assignProperty()
384 // ----------------------------------------------------------------------------
385
386 template <typename TObject, typename TPos, typename TDescriptor>
assignProperty(SparsePropertyMap<TObject,TPos> & spm,TDescriptor const & descr)387 inline void assignProperty(SparsePropertyMap<TObject, TPos> & spm, TDescriptor const & descr)
388 {
389 assignProperty(spm.slotLookupTable, descr, -1);
390 }
391
392 template <typename TObject, typename TPos, typename TDescriptor, typename TValue>
assignProperty(SparsePropertyMap<TObject,TPos> & spm,TDescriptor const & descr,TValue const & val)393 inline void assignProperty(SparsePropertyMap<TObject, TPos> & spm,
394 TDescriptor const & descr,
395 TValue const & val)
396 {
397 assignProperty(spm.slotLookupTable, descr, length(spm.valueTable));
398 appendValue(spm.valueTable, val);
399 }
400
401 // ----------------------------------------------------------------------------
402 // Function property()
403 // ----------------------------------------------------------------------------
404
405 template <typename TObject, typename TPos, typename TDescriptor>
property(SparsePropertyMap<TObject,TPos> & spm,TDescriptor const & descr)406 inline typename Reference<TObject>::Type property(SparsePropertyMap<TObject, TPos> & spm,
407 TDescriptor const & descr)
408 {
409
410 TPos index = getProperty(spm.slotLookupTable, descr);
411 // obj = getValue(spm.valueTable, index);
412 return value(spm.valueTable, index);
413 }
414
415 template <typename TObject, typename TPos, typename TDescriptor>
property(SparsePropertyMap<TObject,TPos> const & spm,TDescriptor const & descr)416 inline typename Reference<TObject>::Type property(SparsePropertyMap<TObject, TPos> const & spm,
417 TDescriptor const & descr)
418 {
419
420 TPos index = getProperty(spm.slotLookupTable, descr);
421 // obj = getValue(spm.valueTable, index);
422 return value(spm.valueTable, index);
423 }
424
425 // ----------------------------------------------------------------------------
426 // Function getProperty()
427 // ----------------------------------------------------------------------------
428
429 template <typename TObject, typename TPos, typename TDescriptor>
getProperty(SparsePropertyMap<TObject,TPos> & spm,TDescriptor const & descr,TObject & obj)430 inline bool getProperty(SparsePropertyMap<TObject, TPos> & spm, TDescriptor const & descr, TObject & obj)
431 {
432 TPos index = getProperty(spm.slotLookupTable, descr);
433 if (index == static_cast<TPos>(-1))
434 return 0;
435
436 obj = value(spm.valueTable, index);
437 return 1;
438 }
439
440 template <typename TObject, typename TPos, typename TDescriptor>
getProperty(SparsePropertyMap<TObject,TPos> const & spm,TDescriptor const & descr,TObject & obj)441 inline bool getProperty(SparsePropertyMap<TObject, TPos> const & spm, TDescriptor const & descr, TObject & obj)
442 {
443 TPos index = getProperty(spm.slotLookupTable, descr);
444 if (index == static_cast<TPos>(-1))
445 return 0;
446
447 obj = value(spm.valueTable, index);
448 return 1;
449 }
450
451 // ----------------------------------------------------------------------------
452 // Function appendSupportId()
453 // ----------------------------------------------------------------------------
454
455 template <typename TBreakpoint, typename TId>
appendSupportId(TBreakpoint & bp,TId const & id)456 inline void appendSupportId(TBreakpoint & bp, TId const & id)
457 {
458 for (unsigned i = 0; i < length(bp.supportIds); ++i)
459 {
460 if (bp.supportIds[i] == id)
461 {
462 ++bp.support;
463 return;
464 }
465 }
466 appendValue(bp.supportIds, id);
467 ++bp.support;
468 }
469
470 template <typename TBreakpoint, typename TId>
appendSupportId(TBreakpoint & bp,StringSet<TId> const & ids)471 inline void appendSupportId(TBreakpoint & bp, StringSet<TId> const & ids)
472 {
473 /*
474 typedef typename Iterator<StringSet<TId> >::Type TIterator;
475 TIterator it = begin(ids);
476 for(;!atEnd(it);goNext(it))
477 appendSupportId(bp, *it);
478 */
479 for (unsigned i = 0; i < length(ids); ++i)
480 appendSupportId(bp, ids[i]);
481 }
482
483 // ----------------------------------------------------------------------------
484 // Function setSupport()
485 // ----------------------------------------------------------------------------
486 template <typename TBreakpoint>
setSupport(TBreakpoint & bp,unsigned const & value)487 inline void setSupport(TBreakpoint & bp, unsigned const & value)
488 {
489 assignValue(bp.support, value);
490 }
491
492 // ----------------------------------------------------------------------------
493 // Function getSVType()
494 // ----------------------------------------------------------------------------
495
496 template <typename TSequence, typename TId, typename TSVType>
getSVType(Breakpoint<TSequence,TId> & bp)497 inline TSVType getSVType(Breakpoint<TSequence, TId> & bp)
498 {
499 return bp.svtype;
500 }
501
502 // ----------------------------------------------------------------------------
503 // Function setSVType()
504 // ----------------------------------------------------------------------------
505
506 template <typename TBreakpoint, typename TSVType>
setSVType(TBreakpoint & bp,TSVType type)507 inline void setSVType(TBreakpoint & bp, TSVType type)
508 {
509 bp.svtype = type;
510 }
511
512 template <typename TBreakpoint>
setSVType(TBreakpoint & bp,bool refOrder)513 inline bool setSVType(TBreakpoint & bp, bool refOrder)
514 {
515 // if insertion return 1; else return 0;
516 if (bp.startSeqId != bp.endSeqId)
517 {
518 bp.svtype = TBreakpoint::INTERTRANSLOCATION;
519 return false;
520 }
521 if (bp.startSeqStrand != bp.endSeqStrand)
522 {
523 if (bp.startSeqPos > bp.endSeqPos)
524 {
525 std::swap(bp.startSeqPos, bp.endSeqPos);
526 }
527 setSVType(bp, TBreakpoint::INVERSION);
528 return false;
529 }
530 if (bp.startSeqPos < bp.endSeqPos)
531 {
532 // Normal order on reverse strand(!) indicates duplication, on forward a deletion
533 if (!bp.startSeqStrand)
534 {
535 setSVType(bp, TBreakpoint::DISPDUPLICATION);
536 return false;
537 }
538 setSVType(bp, TBreakpoint::DELETION);
539 return false;
540 }
541 if (bp.startSeqPos > bp.endSeqPos)
542 {
543 // Different order on forward strand(!) indicates duplication, on reverse deletion
544 std::swap(bp.startSeqPos, bp.endSeqPos);
545 if (!refOrder && !bp.startSeqStrand)
546 {
547 setSVType(bp, TBreakpoint::DELETION);
548 bp.revStrandDel = true;
549 return false;
550 }
551 setSVType(bp, TBreakpoint::DISPDUPLICATION);
552 return false;
553 }
554 setSVType(bp, TBreakpoint::INSERTION);
555 return true;
556 }
557
558 template <typename TBreakpoint, typename TSequence>
setInsertionSeq(TBreakpoint & bp,TSequence & inSeq)559 inline void setInsertionSeq(TBreakpoint & bp, TSequence & inSeq)
560 {
561 bp.insertionSeq = inSeq;
562 // assignValue(bp.insertionSeq, inSeq);
563 }
564
565 template <typename TPos, typename TPosR>
_posInSameRange(TPos const & pos1,TPos const & pos2,TPosR const & range)566 inline bool _posInSameRange(TPos const & pos1, TPos const & pos2, TPosR const & range)
567 {
568 if (pos1 < pos2)
569 return pos2 <= pos1 + range;
570 else
571 return pos1 <= pos2 + range;
572 }
573
574 // Breakends are distinguishable by there one reference Id (startId=endId) and position
575 // (start and end position are the same), strand doesn't matter
576 template <typename TId, typename TPos, typename TPosR>
_similarBreakends(Breakpoint<TId,TPos> & be1,Breakpoint<TId,TPos> & be2,TPosR const & range)577 inline bool _similarBreakends(Breakpoint<TId, TPos> & be1, Breakpoint<TId, TPos> & be2, TPosR const & range)
578 {
579 if (be1.startSeqId != be2.startSeqId)
580 return false;
581 return _posInSameRange(be1.startSeqPos, be2.startSeqPos, range);
582 }
583
584 template <typename TId, typename TPos, typename TPosR>
_breakendSupport(Breakpoint<TId,TPos> & be,Breakpoint<TId,TPos> & bp,TPosR const & range)585 inline bool _breakendSupport(Breakpoint<TId, TPos> & be, Breakpoint<TId, TPos> & bp, TPosR const & range)
586 {
587 typedef Breakpoint<TId, TPos> TBreakpoint;
588 if (be.startSeqId != bp.startSeqId && be.startSeqId != bp.endSeqId)
589 return false;
590 // If bp is duplication or translocation, also check targetpos
591 if ((bp.svtype == TBreakpoint::DISPDUPLICATION || bp.svtype == TBreakpoint::TRANSLOCATION || bp.svtype == TBreakpoint::INTERTRANSLOCATION)
592 && bp.dupMiddlePos != std::numeric_limits<unsigned>::max())
593 return (_posInSameRange(be.startSeqPos, bp.startSeqPos, range) ||
594 _posInSameRange(be.startSeqPos, bp.endSeqPos, range) ||
595 _posInSameRange(be.startSeqPos, bp.dupMiddlePos, range) );
596
597 return (_posInSameRange(be.startSeqPos, bp.startSeqPos, range) ||
598 _posInSameRange(be.startSeqPos, bp.endSeqPos, range) );
599 }
600 template <typename TId, typename TPos>
_similarBreakpoints(Breakpoint<TId,TPos> & bp1,Breakpoint<TId,TPos> & bp2,unsigned const & range)601 inline bool _similarBreakpoints(Breakpoint<TId, TPos> & bp1, Breakpoint<TId, TPos> & bp2, unsigned const & range)
602 {
603 typedef Breakpoint<TId, TPos> TBreakpoint;
604 if (bp1.svtype != bp2.svtype && bp1.svtype != TBreakpoint::TRANSLOCATION && bp1.svtype != TBreakpoint::DISPDUPLICATION
605 && bp2.svtype != TBreakpoint::TRANSLOCATION && bp2.svtype != TBreakpoint::DISPDUPLICATION)
606 return false;
607 if (bp1.startSeqId != bp2.startSeqId)
608 return false;
609 if (bp1.endSeqId != bp2.endSeqId)
610 return false;
611
612 if (bp1.svtype == TBreakpoint::DELETION || bp1.svtype == TBreakpoint::INVERSION)
613 return (_posInSameRange(bp1.startSeqPos, bp2.startSeqPos, range) && _posInSameRange(bp1.endSeqPos, bp2.endSeqPos, range));
614 if (bp1.svtype == TBreakpoint::INSERTION)
615 return (_posInSameRange(bp1.startSeqPos, bp2.startSeqPos, range)
616 && _posInSameRange(length(bp1.insertionSeq), length(bp2.insertionSeq), range));
617 if (bp1.svtype == TBreakpoint::DISPDUPLICATION || bp1.svtype == TBreakpoint::TRANSLOCATION)
618 {
619 if (bp1.dupMiddlePos != std::numeric_limits<unsigned>::max() && bp2.dupMiddlePos != std::numeric_limits<unsigned>::max())
620 return (_posInSameRange(bp1.dupMiddlePos, bp2.dupMiddlePos, range)
621 && _posInSameRange(bp1.startSeqPos, bp2.startSeqPos, range)
622 && _posInSameRange(bp1.endSeqPos, bp2.endSeqPos, range));
623 else
624 return (_posInSameRange(bp1.startSeqPos, bp2.startSeqPos, range) && _posInSameRange(bp1.endSeqPos, bp2.endSeqPos, range));
625 }
626 return false;
627 }
628
629 template <typename TId, typename TPos>
630 inline bool operator==(Breakpoint<TId, TPos> const & bp1, Breakpoint<TId, TPos> const & bp2)
631 {
632 if (bp1.svtype != bp2.svtype)
633 return false;
634 if (bp1.startSeqId != bp2.startSeqId)
635 return false;
636 if (bp1.endSeqId != bp2.endSeqId)
637 return false;
638 /*
639 if(bp1.startSeqStrand != bp2.startSeqStrand)
640 return false;
641 if(bp1.endSeqStrand != bp2.endSeqStrand)
642 return false;
643 */
644 if (bp1.startSeqPos != bp2.startSeqPos)
645 return false;
646 if (bp1.endSeqPos != bp2.endSeqPos)
647 return false;
648 if (bp1.svtype == 1 && bp2.svtype == 1)
649 return length(bp1.insertionSeq) == length(bp2.insertionSeq);
650
651 return true;
652 }
653
654 // ----------------------------------------------------------------------------
655 // Function operator<(Breakpoint)
656 // ----------------------------------------------------------------------------
657
658 template <typename TId, typename TPos>
659 inline bool operator<(Breakpoint<TId, TPos> const & bp1, Breakpoint<TId, TPos> const & bp2)
660 {
661 if (bp1.startSeqId != bp2.startSeqId)
662 return bp1.startSeqId < bp2.startSeqId;
663
664 if (bp1.endSeqId != bp2.endSeqId)
665 return bp1.endSeqId < bp2.endSeqId;
666
667 if (bp1.startSeqPos != bp2.startSeqPos)
668 return bp1.startSeqPos < bp2.startSeqPos;
669
670 return bp1.endSeqPos < bp2.endSeqPos;
671 }
672
673 template <typename TSequence, typename TId, typename TStream>
674 // std::ostream & operator<<(std::ostream & out, Breakpoint<TSequence, TId> const & value)
675 TStream & operator<<(TStream & out, Breakpoint<TSequence, TId> const & value)
676 {
677 typedef Breakpoint<TSequence, TId> TBreakpoint;
678 out << "Breakpoint: seq1 --> seq2; posInSeq1 --> posInSeq2; readPos1 --> readPos2 :" << std::endl;
679 out << value.startSeqId << " ( " << value.startSeqStrand << " ) " << " --> " << value.endSeqId << " ( " <<
680 value.endSeqStrand << " ) " << std::endl;
681 out << " ( " << value.startSeqPos + 1 << " ) --> ( " << value.endSeqPos + 1 << " ) " << std::endl;
682 if (value.dupMiddlePos != std::numeric_limits<unsigned>::max())
683 out << "dup middle pos " << value.dupMiddlePos + 1 << std::endl;
684 out << " ( " << value.readStartPos + 1 << " ) --> ( " << value.readEndPos + 1 << " ) " << std::endl;
685 switch (value.svtype)
686 {
687 case TBreakpoint::INVALID:
688 out << "SVType: invalid";
689 break;
690 case TBreakpoint::INSERTION:
691 out << "SVType: insertion";
692 break;
693 case TBreakpoint::DELETION:
694 out << "SVType: deletion";
695 break;
696 case TBreakpoint::INVERSION:
697 out << "SVType: inversion";
698 break;
699 case TBreakpoint::SEQAN_TANDEM:
700 out << "SVType: tandem";
701 break;
702 case TBreakpoint::DISPDUPLICATION:
703 out << "SVType: duplication";
704 break;
705 case TBreakpoint::INTERTRANSLOCATION:
706 out << "SVType: inter-transl";
707 break;
708 case TBreakpoint::TRANSLOCATION:
709 out << "SVType: intra-transl";
710 break;
711 case TBreakpoint::BREAKEND:
712 out << "SVType: breakend";
713 }
714 out << " insertionSeq: " << value.insertionSeq << std::endl;
715 out << "Support: " << value.support << " Ids: ";
716 for (unsigned i = 0; i < length(value.supportIds); ++i)
717 out << value.supportIds[i] << ", ";
718 out << std::endl;
719 return out;
720 }
721
722 template <typename TSequence, typename TId, typename TStream>
723 TStream & operator<<(TStream & out, StellarMatch<TSequence, TId> & match)
724 {
725 out << "DB Id: " << match.id;
726 if (match.orientation)
727 out << " + " << std::endl;
728 else
729 out << " - " << std::endl;
730 out << "DB pos: " << match.begin1 << " ... " << match.end1 << std::endl;
731 out << "Query pos: " << match.begin2 << " ... " << match.end2 << std::endl;
732
733 if (!match.orientation)
734 reverseComplement(infix(source(match.row1), match.begin1, match.end1));
735
736 typedef typename StellarMatch<TSequence, TId>::TAlign TAlign;
737 TAlign align;
738 // assignSource makes a local copy of the source sequence (only the part of the row)
739 appendValue(align.data_rows, match.row1);
740 appendValue(align.data_rows, match.row2);
741 out << align;
742 out << std::endl;
743 if (!match.orientation)
744 reverseComplement(infix(source(match.row1), match.begin1, match.end1));
745 return out;
746 }
747 // ----------------------------------------------------------------------------
748 // Function insertBestChain
749 // ----------------------------------------------------------------------------
750
751 template <typename TMSplazerChain, typename TMatchAlloc>
insertBestChain(TMSplazerChain & mspChain,TMatchAlloc const & chain)752 void insertBestChain(TMSplazerChain & mspChain, TMatchAlloc const & chain)
753 {
754 appendValue(mspChain.bestChains, chain);
755 }
756
757 template <typename TMSplazerChain, typename TMatchAlloc>
insertBestChain(TMSplazerChain & mspChain,TMatchAlloc & chain)758 void insertBestChain(TMSplazerChain & mspChain, TMatchAlloc & chain)
759 {
760 appendValue(mspChain.bestChains, chain);
761 }
762
763 template <typename T>
toString(const T & t)764 inline std::string toString(const T & t)
765 {
766 std::stringstream ss;
767 ss << t;
768 return ss.str();
769 }
770
mainWithOptions(Options & options)771 int mainWithOptions(Options & options)
772 {
773 typedef Iterator<String<CharString> >::Type TIterator;
774 std::cout << "Non-option Arguments:" << std::endl;
775 for (TIterator it = begin(options.texts); it != end(options.texts); ++it)
776 {
777 std::cout << " " << *it << std::endl;
778 }
779
780 return 0;
781 }
782
783 #endif // #ifndef SANDBOX_MY_SANDBOX_APPS_MSPLAZER_MSPLAZER_H_
784