1 // ==========================================================================
2 //                                   Gustaf
3 // ==========================================================================
4 // Copyright (c) 2011-2018, Kathrin Trappe, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: Kathrin Trappe <kathrin.trappe@fu-berlin.de>
33 // ==========================================================================
34 
35 #ifndef SEQAN_APPS_GUSTAF_MSPLAZER_H_
36 #define SEQAN_APPS_GUSTAF_MSPLAZER_H_
37 
38 #define BREAKPOINT_DEBUG
39 
40 #include <seqan/basic.h>
41 #include <seqan/sequence.h>
42 #include <seqan/arg_parse.h>
43 
44 #include "create_stellarmatches_from_file.h"
45 
46 using namespace seqan;
47 
48 // ============================================================================
49 // Forwards
50 // ============================================================================
51 
52 // ============================================================================
53 // Tags, Classes, Enums
54 // ============================================================================
55 
56 
57 // ----------------------------------------------------------------------------
58 // Class MSplazerOptions
59 // ----------------------------------------------------------------------------
60 
61 struct MSplazerOptions
62 {
63     // I/O options
64     CharString databaseFile;        // name of database (db) file
65     StringSet<CharString> queryFile;           // name of query file(s) (two in case of paired-end)
66     // CharString queryFile2;        // name of 2nd query file (mate pairs)
67     CharString emptyReadsOutFile;   // file with reads that have broken or empty chains
68     CharString disabledQueriesFile; // name of result file containing disabled queries
69     CharString vcfOutFile;   // breakpoint output file name
70     CharString gffOutFile;   // breakpoint output file name
71     CharString jobName;             // job name, used for dot output
72     CharString stellarInputFile;    // optional input file with stellar matches
73     bool dotOut;
74 
75     // Transposition penalties and thresholds (criteria for inserting edges in read graphs)
76     unsigned diffDBPen;             // Penalty for matches on different databases
77     unsigned diffStrandPen;         // Pen. for matches on diff. strand of the same database
78     unsigned diffOrderPen;          // Pen. for matches with diff. order wrt the order within the query
79     unsigned noMateMatchesPen;      // Pen. for matches with no confirming mate matches
80     double simThresh;               // Allowed similarity between overlapping sequences, i.e. percentage of overlap
81     int gapThresh;                  // Allowed gap or distance between matches
82     int initGapThresh;              // Maximal allowed start or end gap length
83     int breakendThresh;             // Maximal allowed length for a breakend
84     int tandemThresh;               // Minimal length of insertion (double overlap) to be called as tandem repeat
85     unsigned breakpointPosRange;    // Allowed range of breakpoint positions
86     unsigned support;
87     unsigned mateSupport;
88     unsigned libSize;               // Library size (mate pairs)
89     unsigned libError;              // Library size (mate pairs)
90     bool pairedEndMode;             // Whether or not to run Gustaf in paired-end mode
91     bool revCompl;                  // Whether or not to rev-compl the second input file
92     bool inferComplexBP;           // Whether or not to inferr complex breakpoints
93     unsigned numThreads;            // Number of threads for parallelization
94 
MSplazerOptionsMSplazerOptions95     MSplazerOptions() :
96         databaseFile("reference.fa"),
97         emptyReadsOutFile("emptyReads.fa"),
98         disabledQueriesFile("msplazer.disabled.fasta"),
99         vcfOutFile("breakpoints.vcf"),
100         gffOutFile("breakpoints.gff"),
101         dotOut(false),
102         diffDBPen(5),
103         diffStrandPen(5),
104         diffOrderPen(0),
105         noMateMatchesPen(5),
106         simThresh(0.5),
107         gapThresh(5), // microindel size around breakpoints
108         initGapThresh(15),
109         breakendThresh(30),
110         tandemThresh(50),
111         breakpointPosRange(5),
112         support(2),
113         mateSupport(2),
114         libSize(0),
115         libError(0),
116         pairedEndMode(false),
117         revCompl(true),
118         inferComplexBP(true),
119         numThreads(1)
120         {}
121 };
122 
123 // ----------------------------------------------------------------------------
124 // Class Breakpoint
125 // ----------------------------------------------------------------------------
126 
127 // BreakPoint class: container for structural variant or RNA-seq breakpoint information
128 template <typename TSequence_, typename TId_>
129 struct Breakpoint
130 {
131     enum SVType
132     {
133         INVALID,            // 0
134         INSERTION,          // 1
135         DELETION,           // 2
136         INVERSION,          // 3
137         SEQAN_TANDEM,       // 4  TANDEM is an IOCTL numeric constant in Hurd
138         DISPDUPLICATION,    // 5
139         INTERTRANSLOCATION, // 6
140         TRANSLOCATION,      // 7
141         BREAKEND            // 8
142     };
143 
144     typedef TSequence_                          TSequence;
145     typedef TId_                                TId;
146     typedef typename Position<TSequence>::Type  TPos;
147 
148     // Ids of the two sequences
149     TId startSeqId;
150     TId endSeqId;
151     TId midPosId;
152     // Sequence orientation
153     bool startSeqStrand;
154     bool endSeqStrand;
155     bool midPosStrand;
156     // Last position in start sequence and first position in end sequence
157     TPos startSeqPos;  // End position of split alignment, i.e. first position of variant
158     TPos endSeqPos;    // Begin position of split alignment, i.e. position after split in reference, i.e. after the variant
159     TPos dupTargetPos; // Pos. before inserted, duplicated seq (should be equal to either startSeqPos or endSeqPos)
160     TPos dupMiddlePos; // TPos dupMidPos; Middle position of duplication or translocation
161                        // per default position before event, has to be adjustet depending on dupTargetPos
162     TPos readStartPos;
163     TPos readEndPos;
164     TPos cipos;       // Confidence after startSeqPos for imprecise breakpoint
165     TPos ciend;       // Confidence after endSeqPos for imprecise breakpoint
166     TPos cimiddle;    // Confidence after dupMiddlePos for imprecise breakpoint
167     // Counter of occurrences (read support)
168     unsigned support;
169     unsigned similar;
170     // Query Sequence Ids (queries/reads that support the breakpoint)
171     StringSet<TId> supportIds;
172     // SV type
173     SVType svtype;
174     TSequence insertionSeq;
175     bool revStrandDel;
176     bool inferredBP;
177     // If both of these flags are true, then we have seen two (pseudo)deletions supporting both start and end position
178     // of a translocation.
179     bool translSuppStartPos;
180     bool translSuppEndPos;
181     // bool imprecise = false;
182     // Storing on which site the breakend is:
183     // 0: left breakend, i.e. sequence continues right of position
184     // 1: right breakend, i.e. sequence continues left of position
185     bool breakend;
186 
BreakpointBreakpoint187     Breakpoint() :
188         startSeqId("####"),
189         endSeqId("####"),
190         midPosId("####"),
191         startSeqStrand(true),
192         endSeqStrand(true),
193         midPosStrand(false),
194         startSeqPos(0),
195         endSeqPos(0),
196         dupTargetPos(std::numeric_limits<unsigned>::max()),
197         dupMiddlePos(std::numeric_limits<unsigned>::max()),
198         readStartPos(0),
199         readEndPos(0),
200         cipos(std::numeric_limits<unsigned>::max()),
201         ciend(std::numeric_limits<unsigned>::max()),
202         cimiddle(std::numeric_limits<unsigned>::max()),
203         support(1),
204         similar(std::numeric_limits<unsigned>::max()),
205         svtype(INVALID),
206         insertionSeq("NNNN"),
207         revStrandDel(false),
208         inferredBP(false),
209         translSuppStartPos(false),
210         translSuppEndPos(false),
211         breakend(false)
212     {}
213 
BreakpointBreakpoint214     Breakpoint(TId const & sId,
215                TId const & eId,
216                bool const & sStrand,
217                bool const & eStrand,
218                TPos const & sPos,
219                TPos const & ePos,
220                TPos const & rsPos,
221                TPos const & rePos) :
222         startSeqId(sId),
223         endSeqId(eId),
224         midPosId("####"),
225         startSeqStrand(sStrand),
226         endSeqStrand(eStrand),
227         midPosStrand(false),
228         startSeqPos(sPos),
229         endSeqPos(ePos),
230         dupTargetPos(std::numeric_limits<unsigned>::max()),
231         dupMiddlePos(std::numeric_limits<unsigned>::max()),
232         readStartPos(rsPos),
233         readEndPos(rePos),
234         cipos(std::numeric_limits<unsigned>::max()),
235         ciend(std::numeric_limits<unsigned>::max()),
236         cimiddle(std::numeric_limits<unsigned>::max()),
237         support(1),
238         similar(std::numeric_limits<unsigned>::max()),
239         svtype(INVALID),
240         insertionSeq("NNNN"),
241         revStrandDel(false),
242         inferredBP(false),
243         translSuppStartPos(false),
244         translSuppEndPos(false),
245         breakend(false)
246     {}
247 
BreakpointBreakpoint248     Breakpoint(TId const & sId,
249                TId const & eId,
250                bool const & sStrand,
251                bool const & eStrand,
252                TPos const & sPos,
253                TPos const & ePos,
254                TPos const & rsPos,
255                TPos const & rePos,
256                TId const & spId) :
257         startSeqId(sId),
258         endSeqId(eId),
259         midPosId("####"),
260         startSeqStrand(sStrand),
261         endSeqStrand(eStrand),
262         midPosStrand(false),
263         startSeqPos(sPos),
264         endSeqPos(ePos),
265         dupTargetPos(std::numeric_limits<unsigned>::max()),
266         dupMiddlePos(std::numeric_limits<unsigned>::max()),
267         readStartPos(rsPos),
268         readEndPos(rePos),
269         cipos(std::numeric_limits<unsigned>::max()),
270         ciend(std::numeric_limits<unsigned>::max()),
271         cimiddle(std::numeric_limits<unsigned>::max()),
272         support(1),
273         similar(std::numeric_limits<unsigned>::max()),
274         svtype(INVALID),
275         insertionSeq("NNNN"),
276         revStrandDel(false),
277         inferredBP(false),
278         translSuppStartPos(false),
279         translSuppEndPos(false),
280         breakend(false)
281     {appendValue(supportIds, spId); }
282 };
283 
284 // ----------------------------------------------------------------------------
285 // Class SparsePropertyMap
286 // ----------------------------------------------------------------------------
287 
288 // Sparse property map class: A property map that has only a few objects or where most of the object would be empty
289 template <typename TValue, typename TPos>
290 struct SparsePropertyMap
291 {
292     typedef String<TValue>       TValueTable;
293     typedef String<TPos>         TSlotLookupTable;
294 
295     TValueTable valueTable;
296     // slotLookupTable: Stores at each position (descriptor value) the position of the
297     // corresponding object in valueTable, or -1 if there is no object
298     // for this descriptor
299     TSlotLookupTable    slotLookupTable;
300 
SparsePropertyMapSparsePropertyMap301     SparsePropertyMap(){}
SparsePropertyMapSparsePropertyMap302     SparsePropertyMap(TValueTable vt, TSlotLookupTable slt) :
303         valueTable(vt), slotLookupTable(slt) {}
304 };
305 /*
306  * Only works within seqan namespace
307 template <typename TValue, typename TPos>
308 struct Value<SparsePropertyMap<TValue, TPos> >
309 {
310     typedef TValue Type;
311 };
312 template <typename TValue, typename TPos>
313 struct Value<SparsePropertyMap<TValue, TPos> const>
314 {
315     typedef TValue Type;
316 };
317 */
318 
319 // ----------------------------------------------------------------------------
320 // Class MSplazerChain
321 // ----------------------------------------------------------------------------
322 
323 // Container for storing chaining graph, matchDistanceScores, start and end vertex for one read
324 // Vertex descriptor value of each vertex in the graph corresponds to the position of the stellar match within
325 // container for Stellar matches (Query Matches)
326 template <typename TGraph_, typename TVertexDescriptor_, typename TScoreAlloc_, typename TSparsePropertyMap_,
327           typename TMatchAlloc_>
328 struct MSplazerChain
329 {
330     typedef TGraph_                 TGraph;
331     typedef TVertexDescriptor_      TVertexDescriptor;
332     typedef TScoreAlloc_            TScoreAlloc;
333     typedef TSparsePropertyMap_     TSparsePropertyMap;
334     typedef typename Size<TGraph>::Type TGraphSize;
335     typedef TMatchAlloc_        TMatchAlloc;
336     typedef IntervalAndCargo<unsigned, unsigned> TInterval;
337     typedef IntervalTree<unsigned, unsigned> TIntervalTree;
338 
339     TGraph graph;                       // Contains (Stellar)matches as vertices and edges between compatible matches
340     TVertexDescriptor startVertex;      // Artificial start and end vertex (represents start/end of the read sequence)
341     TVertexDescriptor endVertex;
342     String<TVertexDescriptor> predMap;  // Predecessor and distance map for dagShortestPath
343     String<TGraphSize> distMap;         // Distance map for shortest path
344     TScoreAlloc matchDistanceScores;    // Distance scores of matches (edit distance of reference mapping)
345     TSparsePropertyMap breakpoints;
346     String<TMatchAlloc> bestChains;
347     TIntervalTree rightMateTree;
348     TIntervalTree leftMateTree;
349     unsigned mateJoinPosition;
350     bool isEmpty;
351     bool isPartial;
352     // bool transl/dupl;
353 
MSplazerChainMSplazerChain354     MSplazerChain(TScoreAlloc & _scores) :
355         startVertex(), endVertex(),
356         matchDistanceScores(_scores), mateJoinPosition(0),
357         isEmpty(false), isPartial(false)
358     {}
359 };
360 
361 struct Options
362 {
363     bool showHelp;
364     bool showVersion;
365     int i;
366     String<CharString> texts;
367 
OptionsOptions368     Options(bool const & h, int & it) :
369         showHelp(h), i(it)
370     {}
371 
372 };
373 
374 // ============================================================================
375 // Metafunctions
376 // ============================================================================
377 
378 // ============================================================================
379 // Functions
380 // ============================================================================
381 
382 // ----------------------------------------------------------------------------
383 // Function assignProperty()
384 // ----------------------------------------------------------------------------
385 
386 template <typename TObject, typename TPos, typename TDescriptor>
assignProperty(SparsePropertyMap<TObject,TPos> & spm,TDescriptor const & descr)387 inline void assignProperty(SparsePropertyMap<TObject, TPos> & spm, TDescriptor const & descr)
388 {
389     assignProperty(spm.slotLookupTable, descr, -1);
390 }
391 
392 template <typename TObject, typename TPos, typename TDescriptor, typename TValue>
assignProperty(SparsePropertyMap<TObject,TPos> & spm,TDescriptor const & descr,TValue const & val)393 inline void assignProperty(SparsePropertyMap<TObject, TPos> & spm,
394                            TDescriptor const & descr,
395                            TValue const & val)
396 {
397     assignProperty(spm.slotLookupTable, descr, length(spm.valueTable));
398     appendValue(spm.valueTable, val);
399 }
400 
401 // ----------------------------------------------------------------------------
402 // Function property()
403 // ----------------------------------------------------------------------------
404 
405 template <typename TObject, typename TPos, typename TDescriptor>
property(SparsePropertyMap<TObject,TPos> & spm,TDescriptor const & descr)406 inline typename Reference<TObject>::Type property(SparsePropertyMap<TObject, TPos> & spm,
407                                                   TDescriptor const & descr)
408 {
409 
410     TPos index = getProperty(spm.slotLookupTable, descr);
411     // obj = getValue(spm.valueTable, index);
412     return value(spm.valueTable, index);
413 }
414 
415 template <typename TObject, typename TPos, typename TDescriptor>
property(SparsePropertyMap<TObject,TPos> const & spm,TDescriptor const & descr)416 inline typename Reference<TObject>::Type property(SparsePropertyMap<TObject, TPos> const & spm,
417                                                   TDescriptor const & descr)
418 {
419 
420     TPos index = getProperty(spm.slotLookupTable, descr);
421     // obj = getValue(spm.valueTable, index);
422     return value(spm.valueTable, index);
423 }
424 
425 // ----------------------------------------------------------------------------
426 // Function getProperty()
427 // ----------------------------------------------------------------------------
428 
429 template <typename TObject, typename TPos, typename TDescriptor>
getProperty(SparsePropertyMap<TObject,TPos> & spm,TDescriptor const & descr,TObject & obj)430 inline bool getProperty(SparsePropertyMap<TObject, TPos> & spm, TDescriptor const & descr, TObject & obj)
431 {
432     TPos index = getProperty(spm.slotLookupTable, descr);
433     if (index == static_cast<TPos>(-1))
434         return 0;
435 
436     obj = value(spm.valueTable, index);
437     return 1;
438 }
439 
440 template <typename TObject, typename TPos, typename TDescriptor>
getProperty(SparsePropertyMap<TObject,TPos> const & spm,TDescriptor const & descr,TObject & obj)441 inline bool getProperty(SparsePropertyMap<TObject, TPos> const & spm, TDescriptor const & descr, TObject & obj)
442 {
443     TPos index = getProperty(spm.slotLookupTable, descr);
444     if (index == static_cast<TPos>(-1))
445         return 0;
446 
447     obj = value(spm.valueTable, index);
448     return 1;
449 }
450 
451 // ----------------------------------------------------------------------------
452 // Function appendSupportId()
453 // ----------------------------------------------------------------------------
454 
455 template <typename TBreakpoint, typename TId>
appendSupportId(TBreakpoint & bp,TId const & id)456 inline void appendSupportId(TBreakpoint & bp, TId const & id)
457 {
458     for (unsigned i = 0; i < length(bp.supportIds); ++i)
459     {
460         if (bp.supportIds[i] == id)
461         {
462             ++bp.support;
463             return;
464         }
465     }
466     appendValue(bp.supportIds, id);
467     ++bp.support;
468 }
469 
470 template <typename TBreakpoint, typename TId>
appendSupportId(TBreakpoint & bp,StringSet<TId> const & ids)471 inline void appendSupportId(TBreakpoint & bp, StringSet<TId> const & ids)
472 {
473 /*
474     typedef typename Iterator<StringSet<TId> >::Type TIterator;
475     TIterator it = begin(ids);
476     for(;!atEnd(it);goNext(it))
477         appendSupportId(bp, *it);
478 */
479     for (unsigned i = 0; i < length(ids); ++i)
480         appendSupportId(bp, ids[i]);
481 }
482 
483 // ----------------------------------------------------------------------------
484 // Function setSupport()
485 // ----------------------------------------------------------------------------
486 template <typename TBreakpoint>
setSupport(TBreakpoint & bp,unsigned const & value)487 inline void setSupport(TBreakpoint & bp, unsigned const & value)
488 {
489     assignValue(bp.support, value);
490 }
491 
492 // ----------------------------------------------------------------------------
493 // Function getSVType()
494 // ----------------------------------------------------------------------------
495 
496 template <typename TSequence, typename TId, typename TSVType>
getSVType(Breakpoint<TSequence,TId> & bp)497 inline TSVType getSVType(Breakpoint<TSequence, TId> & bp)
498 {
499     return bp.svtype;
500 }
501 
502 // ----------------------------------------------------------------------------
503 // Function setSVType()
504 // ----------------------------------------------------------------------------
505 
506 template <typename TBreakpoint, typename TSVType>
setSVType(TBreakpoint & bp,TSVType type)507 inline void setSVType(TBreakpoint & bp, TSVType type)
508 {
509     bp.svtype = type;
510 }
511 
512 template <typename TBreakpoint>
setSVType(TBreakpoint & bp,bool refOrder)513 inline bool setSVType(TBreakpoint & bp, bool refOrder)
514 {
515     // if insertion return 1; else return 0;
516     if (bp.startSeqId != bp.endSeqId)
517     {
518         bp.svtype = TBreakpoint::INTERTRANSLOCATION;
519         return false;
520     }
521     if (bp.startSeqStrand != bp.endSeqStrand)
522     {
523         if (bp.startSeqPos > bp.endSeqPos)
524         {
525             std::swap(bp.startSeqPos, bp.endSeqPos);
526         }
527         setSVType(bp, TBreakpoint::INVERSION);
528         return false;
529     }
530     if (bp.startSeqPos < bp.endSeqPos)
531     {
532         // Normal order on reverse strand(!) indicates duplication, on forward a deletion
533         if (!bp.startSeqStrand)
534         {
535             setSVType(bp, TBreakpoint::DISPDUPLICATION);
536             return false;
537         }
538         setSVType(bp, TBreakpoint::DELETION);
539         return false;
540     }
541     if (bp.startSeqPos > bp.endSeqPos)
542     {
543         // Different order on forward strand(!) indicates duplication, on reverse deletion
544         std::swap(bp.startSeqPos, bp.endSeqPos);
545         if (!refOrder && !bp.startSeqStrand)
546         {
547             setSVType(bp, TBreakpoint::DELETION);
548             bp.revStrandDel = true;
549             return false;
550         }
551         setSVType(bp, TBreakpoint::DISPDUPLICATION);
552         return false;
553     }
554     setSVType(bp, TBreakpoint::INSERTION);
555     return true;
556 }
557 
558 template <typename TBreakpoint, typename TSequence>
setInsertionSeq(TBreakpoint & bp,TSequence & inSeq)559 inline void setInsertionSeq(TBreakpoint & bp, TSequence & inSeq)
560 {
561     bp.insertionSeq = inSeq;
562     // assignValue(bp.insertionSeq, inSeq);
563 }
564 
565 template <typename TPos, typename TPosR>
_posInSameRange(TPos const & pos1,TPos const & pos2,TPosR const & range)566 inline bool _posInSameRange(TPos const & pos1, TPos const & pos2, TPosR const & range)
567 {
568     if (pos1 < pos2)
569         return pos2 <= pos1 + range;
570     else
571         return pos1 <= pos2 + range;
572 }
573 
574 // Breakends are distinguishable by there one reference Id (startId=endId) and position
575 // (start and end position are the same), strand doesn't matter
576 template <typename TId, typename TPos, typename TPosR>
_similarBreakends(Breakpoint<TId,TPos> & be1,Breakpoint<TId,TPos> & be2,TPosR const & range)577 inline bool _similarBreakends(Breakpoint<TId, TPos> & be1, Breakpoint<TId, TPos> & be2, TPosR const & range)
578 {
579     if (be1.startSeqId != be2.startSeqId)
580         return false;
581     return _posInSameRange(be1.startSeqPos, be2.startSeqPos, range);
582 }
583 
584 template <typename TId, typename TPos, typename TPosR>
_breakendSupport(Breakpoint<TId,TPos> & be,Breakpoint<TId,TPos> & bp,TPosR const & range)585 inline bool _breakendSupport(Breakpoint<TId, TPos> & be, Breakpoint<TId, TPos> & bp, TPosR const & range)
586 {
587     typedef Breakpoint<TId, TPos> TBreakpoint;
588     if (be.startSeqId != bp.startSeqId && be.startSeqId != bp.endSeqId)
589         return false;
590     // If bp is duplication or translocation, also check targetpos
591     if ((bp.svtype == TBreakpoint::DISPDUPLICATION || bp.svtype == TBreakpoint::TRANSLOCATION || bp.svtype == TBreakpoint::INTERTRANSLOCATION)
592             && bp.dupMiddlePos != std::numeric_limits<unsigned>::max())
593         return (_posInSameRange(be.startSeqPos, bp.startSeqPos, range) ||
594                 _posInSameRange(be.startSeqPos, bp.endSeqPos, range)   ||
595                 _posInSameRange(be.startSeqPos, bp.dupMiddlePos, range) );
596 
597     return (_posInSameRange(be.startSeqPos, bp.startSeqPos, range) ||
598             _posInSameRange(be.startSeqPos, bp.endSeqPos, range) );
599 }
600 template <typename TId, typename TPos>
_similarBreakpoints(Breakpoint<TId,TPos> & bp1,Breakpoint<TId,TPos> & bp2,unsigned const & range)601 inline bool _similarBreakpoints(Breakpoint<TId, TPos> & bp1, Breakpoint<TId, TPos> & bp2, unsigned const & range)
602 {
603     typedef Breakpoint<TId, TPos> TBreakpoint;
604     if (bp1.svtype != bp2.svtype && bp1.svtype != TBreakpoint::TRANSLOCATION && bp1.svtype != TBreakpoint::DISPDUPLICATION
605                                  && bp2.svtype != TBreakpoint::TRANSLOCATION && bp2.svtype != TBreakpoint::DISPDUPLICATION)
606         return false;
607     if (bp1.startSeqId != bp2.startSeqId)
608         return false;
609     if (bp1.endSeqId != bp2.endSeqId)
610         return false;
611 
612     if (bp1.svtype == TBreakpoint::DELETION || bp1.svtype == TBreakpoint::INVERSION)
613         return (_posInSameRange(bp1.startSeqPos, bp2.startSeqPos, range) && _posInSameRange(bp1.endSeqPos, bp2.endSeqPos, range));
614     if (bp1.svtype == TBreakpoint::INSERTION)
615         return (_posInSameRange(bp1.startSeqPos, bp2.startSeqPos, range)
616                 && _posInSameRange(length(bp1.insertionSeq), length(bp2.insertionSeq), range));
617     if (bp1.svtype == TBreakpoint::DISPDUPLICATION || bp1.svtype == TBreakpoint::TRANSLOCATION)
618     {
619         if (bp1.dupMiddlePos != std::numeric_limits<unsigned>::max() && bp2.dupMiddlePos != std::numeric_limits<unsigned>::max())
620             return (_posInSameRange(bp1.dupMiddlePos, bp2.dupMiddlePos, range)
621                     && _posInSameRange(bp1.startSeqPos, bp2.startSeqPos, range)
622                     && _posInSameRange(bp1.endSeqPos, bp2.endSeqPos, range));
623         else
624             return (_posInSameRange(bp1.startSeqPos, bp2.startSeqPos, range) && _posInSameRange(bp1.endSeqPos, bp2.endSeqPos, range));
625     }
626     return false;
627 }
628 
629 template <typename TId, typename TPos>
630 inline bool operator==(Breakpoint<TId, TPos> const & bp1, Breakpoint<TId, TPos> const & bp2)
631 {
632     if (bp1.svtype != bp2.svtype)
633         return false;
634     if (bp1.startSeqId != bp2.startSeqId)
635         return false;
636     if (bp1.endSeqId != bp2.endSeqId)
637         return false;
638     /*
639     if(bp1.startSeqStrand != bp2.startSeqStrand)
640         return false;
641     if(bp1.endSeqStrand != bp2.endSeqStrand)
642         return false;
643     */
644     if (bp1.startSeqPos != bp2.startSeqPos)
645         return false;
646     if (bp1.endSeqPos != bp2.endSeqPos)
647         return false;
648     if (bp1.svtype == 1 && bp2.svtype == 1)
649         return length(bp1.insertionSeq) == length(bp2.insertionSeq);
650 
651     return true;
652 }
653 
654 // ----------------------------------------------------------------------------
655 // Function operator<(Breakpoint)
656 // ----------------------------------------------------------------------------
657 
658 template <typename TId, typename TPos>
659 inline bool operator<(Breakpoint<TId, TPos> const & bp1, Breakpoint<TId, TPos> const & bp2)
660 {
661     if (bp1.startSeqId != bp2.startSeqId)
662         return bp1.startSeqId < bp2.startSeqId;
663 
664     if (bp1.endSeqId != bp2.endSeqId)
665         return bp1.endSeqId < bp2.endSeqId;
666 
667     if (bp1.startSeqPos != bp2.startSeqPos)
668         return bp1.startSeqPos < bp2.startSeqPos;
669 
670     return bp1.endSeqPos < bp2.endSeqPos;
671 }
672 
673 template <typename TSequence, typename TId, typename TStream>
674 // std::ostream & operator<<(std::ostream & out, Breakpoint<TSequence, TId> const & value)
675 TStream & operator<<(TStream & out, Breakpoint<TSequence, TId> const & value)
676 {
677     typedef Breakpoint<TSequence, TId> TBreakpoint;
678     out << "Breakpoint: seq1 --> seq2; posInSeq1 --> posInSeq2; readPos1 --> readPos2 :" << std::endl;
679     out << value.startSeqId << " ( " << value.startSeqStrand << " ) " << " --> " << value.endSeqId << " ( " <<
680     value.endSeqStrand << " ) " << std::endl;
681     out << " ( " << value.startSeqPos + 1 << " ) --> ( " << value.endSeqPos + 1 << " ) " << std::endl;
682     if (value.dupMiddlePos != std::numeric_limits<unsigned>::max())
683         out << "dup middle pos " << value.dupMiddlePos + 1 << std::endl;
684     out << " ( " << value.readStartPos + 1 << " ) --> ( " << value.readEndPos + 1 << " ) " << std::endl;
685     switch (value.svtype)
686     {
687         case TBreakpoint::INVALID:
688         out << "SVType: invalid";
689         break;
690         case TBreakpoint::INSERTION:
691         out << "SVType: insertion";
692         break;
693         case TBreakpoint::DELETION:
694         out << "SVType: deletion";
695         break;
696         case TBreakpoint::INVERSION:
697         out << "SVType: inversion";
698         break;
699         case TBreakpoint::SEQAN_TANDEM:
700         out << "SVType: tandem";
701         break;
702         case TBreakpoint::DISPDUPLICATION:
703         out << "SVType: duplication";
704         break;
705         case TBreakpoint::INTERTRANSLOCATION:
706         out << "SVType: inter-transl";
707         break;
708         case TBreakpoint::TRANSLOCATION:
709         out << "SVType: intra-transl";
710         break;
711         case TBreakpoint::BREAKEND:
712         out << "SVType: breakend";
713     }
714     out << " insertionSeq: " << value.insertionSeq << std::endl;
715     out << "Support: " << value.support << " Ids: ";
716     for (unsigned i = 0; i < length(value.supportIds); ++i)
717         out << value.supportIds[i] << ", ";
718     out << std::endl;
719     return out;
720 }
721 
722 template <typename TSequence, typename TId, typename TStream>
723 TStream & operator<<(TStream & out, StellarMatch<TSequence, TId> & match)
724 {
725     out << "DB Id: " << match.id;
726     if (match.orientation)
727         out << " + " << std::endl;
728     else
729         out << " - " << std::endl;
730     out << "DB pos: " << match.begin1 << " ... " << match.end1 << std::endl;
731     out << "Query pos: " << match.begin2 << " ... " << match.end2 << std::endl;
732 
733     if (!match.orientation)
734         reverseComplement(infix(source(match.row1), match.begin1, match.end1));
735 
736     typedef typename StellarMatch<TSequence, TId>::TAlign TAlign;
737     TAlign align;
738     // assignSource makes a local copy of the source sequence (only the part of the row)
739     appendValue(align.data_rows, match.row1);
740     appendValue(align.data_rows, match.row2);
741     out << align;
742     out << std::endl;
743     if (!match.orientation)
744         reverseComplement(infix(source(match.row1), match.begin1, match.end1));
745     return out;
746 }
747 // ----------------------------------------------------------------------------
748 // Function insertBestChain
749 // ----------------------------------------------------------------------------
750 
751 template <typename TMSplazerChain, typename TMatchAlloc>
insertBestChain(TMSplazerChain & mspChain,TMatchAlloc const & chain)752 void insertBestChain(TMSplazerChain & mspChain, TMatchAlloc const & chain)
753 {
754     appendValue(mspChain.bestChains, chain);
755 }
756 
757 template <typename TMSplazerChain, typename TMatchAlloc>
insertBestChain(TMSplazerChain & mspChain,TMatchAlloc & chain)758 void insertBestChain(TMSplazerChain & mspChain, TMatchAlloc & chain)
759 {
760     appendValue(mspChain.bestChains, chain);
761 }
762 
763 template <typename T>
toString(const T & t)764 inline std::string toString(const T & t)
765 {
766     std::stringstream ss;
767     ss << t;
768     return ss.str();
769 }
770 
mainWithOptions(Options & options)771 int mainWithOptions(Options & options)
772 {
773     typedef Iterator<String<CharString> >::Type TIterator;
774     std::cout << "Non-option Arguments:" << std::endl;
775     for (TIterator it = begin(options.texts); it != end(options.texts); ++it)
776     {
777         std::cout << "  " << *it << std::endl;
778     }
779 
780     return 0;
781 }
782 
783 #endif  // #ifndef SANDBOX_MY_SANDBOX_APPS_MSPLAZER_MSPLAZER_H_
784