1 /*===========================================================================
2 *
3 *                            PUBLIC DOMAIN NOTICE
4 *               National Center for Biotechnology Information
5 *
6 *  This software/database is a "United States Government Work" under the
7 *  terms of the United States Copyright Act.  It was written as part of
8 *  the author's official duties as a United States Government employee and
9 *  thus cannot be copyrighted.  This software/database is freely available
10 *  to the public for use. The National Library of Medicine and the U.S.
11 *  Government have not placed any restriction on its use or reproduction.
12 *
13 *  Although all reasonable efforts have been taken to ensure the accuracy
14 *  and reliability of the software and data, the NLM and the U.S.
15 *  Government do not and cannot warrant the performance or results that
16 *  may be obtained by using this software or data. The NLM and the U.S.
17 *  Government disclaim all warranties, express or implied, including
18 *  warranties of performance, merchantability or fitness for any particular
19 *  purpose.
20 *
21 *  Please cite the author in any work or product based on this material.
22 *
23 * ===========================================================================
24 *
25 */
26 
27 #ifndef _hpp_ngs_pileup_event_
28 #define _hpp_ngs_pileup_event_
29 
30 #ifndef _hpp_ngs_error_msg_
31 #include <ngs/ErrorMsg.hpp>
32 #endif
33 
34 #ifndef _hpp_ngs_stringref_
35 #include <ngs/StringRef.hpp>
36 #endif
37 
38 #include <stdint.h>
39 
40 namespace ngs
41 {
42 
43     /*----------------------------------------------------------------------
44      * forwards and typedefs
45      */
46     typedef class PileupEventItf * PileupEventRef;
47 
48 
49     /*======================================================================
50      * PileupEvent
51      *  represents a single cell of a sparse 2D matrix
52      *  with Reference coordinates on one axis
53      *  and stacked Alignments on the other axis
54      */
55     class PileupEvent
56     {
57     public:
58 
59         /*------------------------------------------------------------------
60          * Reference
61          */
62 
63         /* getMappingQuality
64          */
65         int getMappingQuality () const
66             NGS_THROWS ( ErrorMsg );
67 
68 
69         /*------------------------------------------------------------------
70          * Alignment
71          */
72 
73         /* getAlignmentId
74          *  unique within ReadCollection
75          */
76         StringRef getAlignmentId () const
77             NGS_THROWS ( ErrorMsg );
78 
79         /* getAlignmentPosition
80          *  gives position of event on sequence
81          */
82         int64_t getAlignmentPosition () const
83             NGS_THROWS ( ErrorMsg );
84 
85         /* getFirstAlignmentPosition
86          *  returns the position of this Alignment's first event
87          *  in Reference coordinates
88          */
89         int64_t getFirstAlignmentPosition () const
90             NGS_THROWS ( ErrorMsg );
91 
92         /* getLastAlignmentPosition
93          *  returns the position of this Alignment's last event
94          *  in INCLUSIVE Reference coordinates
95          */
96         int64_t getLastAlignmentPosition () const
97             NGS_THROWS ( ErrorMsg );
98 
99 
100         /*------------------------------------------------------------------
101          * event details
102          */
103 
104         /* EventType
105          */
106         enum PileupEventType
107         {
108             // event types representable in reference coordinate space
109             match                     = 0,
110             mismatch                  = 1,
111             deletion                  = 2,
112 
113             // an insertion cannot be represented in reference coordinate
114             // space ( so no insertion event can be directly represented ),
115             // but it can occur before a match or mismatch event.
116             // insertion is represented as a bit
117             insertion                 = 0x08,
118 
119             // insertions into the reference
120             insertion_before_match    = insertion | match,
121             insertion_before_mismatch = insertion | mismatch,
122 
123             // simultaneous insertion and deletion,
124             // a.k.a. a replacement
125             insertion_before_deletion = insertion | deletion,
126             replacement               = insertion_before_deletion,
127 
128             // additional modifier bits - may be added to any event above
129             alignment_start           = 0x80,
130             alignment_stop            = 0x40,
131             alignment_minus_strand    = 0x20
132         };
133 
134         /* getEventType
135          *  the type of event being represented
136          *
137          *  a match event indicates that the aligned sequence base
138          *  exactly matches the corresponding base in the reference.
139          *
140          *  a mismatch event indicates that the sequence and
141          *  references bases do not match even though they are
142          *  considered aligned. The actual sequence base and its
143          *  quality value may be retrieved with
144          *    "getAlignmentBase()" and "getAlignmentQuality()"
145          *
146          *  a deletion event indicates a base that is present in
147          *  the reference but missing in the sequence.
148          *
149          *  an insertion cannot be represented in reference coordinate
150          *  space ( so no insertion event can be directly represented ),
151          *  but it can occur before a match, mismatch or deletion event.
152          *  insertion is represented as a modifier bit. If this bit
153          *  is set, then the event was preceded by an insertion.
154          *  The inserted bases and qualities can be retrieved by
155          *    "getInsertionBases()" and "getInsertionQualities()"
156          *
157          */
158         PileupEventType getEventType () const
159             NGS_THROWS ( ErrorMsg );
160 
161         /* getAlignmentBase
162          *  retrieves base aligned at current Reference position
163          *  returns '-' for deletion events
164          */
165         char getAlignmentBase () const
166             NGS_THROWS ( ErrorMsg );
167 
168         /* getAlignmentQuality
169          *  retrieves quality aligned at current Reference position
170          *  returns '!' for deletion events
171          *  quality is ascii-encoded phred score
172          */
173         char getAlignmentQuality () const
174             NGS_THROWS ( ErrorMsg );
175 
176         /* getInsertionBases
177          *  returns bases corresponding to insertion event
178          *  returns empty string for all non-insertion events
179          */
180         StringRef getInsertionBases () const
181             NGS_THROWS ( ErrorMsg );
182 
183         /* getInsertionQualities
184          *  returns qualities corresponding to insertion event
185          */
186         StringRef getInsertionQualities () const
187             NGS_THROWS ( ErrorMsg );
188 
189         /* getEventRepeatCount
190          *  returns the number of times this event repeats
191          *  i.e. the distance to the first reference position
192          *  yielding a different event for this alignment.
193          */
194         uint32_t getEventRepeatCount () const
195             NGS_THROWS ( ErrorMsg );
196 
197         /* EventIndelType
198          */
199         enum EventIndelType
200         {
201             normal_indel              = 0,
202 
203             // introns behave like deletions
204             // (i.e. can retrieve deletion count),
205             // "_plus" and "_minus" signify direction
206             // of transcription if known
207             intron_plus               = 1,
208             intron_minus              = 2,
209             intron_unknown            = 3,
210 
211             // overlap is reported as an insertion,
212             // but is actually an overlap in the read
213             // inherent in technology like Complete Genomics
214             read_overlap              = 4,
215 
216             // gap is reported as a deletion,
217             // but is actually a gap in the read
218             // inherent in technology like Complete Genomics
219             read_gap                  = 5
220         };
221 
222         /* getEventIndelType
223          *  returns detail about the type of indel
224          *  when event type is an insertion or deletion
225          */
226         EventIndelType getEventIndelType () const
227             NGS_THROWS ( ErrorMsg );
228 
229     public:
230 
231         // C++ support
232 
233         PileupEvent & operator = ( PileupEventRef ref )
234             NGS_NOTHROW ();
235         PileupEvent ( PileupEventRef ref )
236             NGS_NOTHROW ();
237 
238         PileupEvent & operator = ( const PileupEvent & obj )
239             NGS_THROWS ( ErrorMsg );
240         PileupEvent ( const PileupEvent & obj )
241             NGS_THROWS ( ErrorMsg );
242 
243         ~ PileupEvent ()
244             NGS_NOTHROW ();
245 
246     protected:
247 
248         PileupEventRef self;
249     };
250 
251 } // namespace ngs
252 
253 
254 #ifndef _inl_ngs_pileup_event_
255 #include <ngs/inl/PileupEvent.hpp>
256 #endif
257 
258 #endif // _hpp_ngs_pileup_event_
259