1 #ifndef SNP_ANNOT_INFO__HPP
2 #define SNP_ANNOT_INFO__HPP
3 
4 /*  $Id: snp_annot_info.hpp 500967 2016-05-10 14:39:30Z vasilche $
5 * ===========================================================================
6 *
7 *                            PUBLIC DOMAIN NOTICE
8 *               National Center for Biotechnology Information
9 *
10 *  This software/database is a "United States Government Work" under the
11 *  terms of the United States Copyright Act.  It was written as part of
12 *  the author's official duties as a United States Government employee and
13 *  thus cannot be copyrighted.  This software/database is freely available
14 *  to the public for use. The National Library of Medicine and the U.S.
15 *  Government have not placed any restriction on its use or reproduction.
16 *
17 *  Although all reasonable efforts have been taken to ensure the accuracy
18 *  and reliability of the software and data, the NLM and the U.S.
19 *  Government do not and cannot warrant the performance or results that
20 *  may be obtained by using this software or data. The NLM and the U.S.
21 *  Government disclaim all warranties, express or implied, including
22 *  warranties of performance, merchantability or fitness for any particular
23 *  purpose.
24 *
25 *  Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Eugene Vasilchenko
30 *
31 * File Description:
32 *   SNP Seq-annot object information
33 *
34 */
35 
36 #include <corelib/ncbiobj.hpp>
37 #include <corelib/ncbi_limits.hpp>
38 
39 #include <util/range.hpp>
40 
41 #include <vector>
42 #include <map>
43 #include <algorithm>
44 #include <memory>
45 
46 #include <objects/seqloc/Seq_id.hpp>
47 
48 #include <objmgr/impl/tse_info_object.hpp>
49 #include <objmgr/impl/snp_info.hpp>
50 
51 BEGIN_NCBI_SCOPE
52 
53 class CObjectIStream;
54 class IWriter;
55 class IReader;
56 
57 BEGIN_SCOPE(objects)
58 
59 class CSeq_entry;
60 class CSeq_feat;
61 class CSeq_annot;
62 class CSeq_annot_Info;
63 class CSeq_annot_SNP_Info;
64 class CSeq_point;
65 class CSeq_interval;
66 
67 class NCBI_XOBJMGR_EXPORT CIndexedStrings
68 {
69 public:
70     CIndexedStrings(void);
71     CIndexedStrings(const CIndexedStrings& ss);
72 
73     void ClearIndices(void);
74     void Clear(void);
75 
IsEmpty(void) const76     bool IsEmpty(void) const
77         {
78             return m_Strings.empty();
79         }
GetSize(void) const80     size_t GetSize(void) const
81         {
82             return m_Strings.size();
83         }
84 
85     size_t GetIndex(const string& s, size_t max_index);
86 
GetString(size_t index) const87     const string& GetString(size_t index) const
88         {
89             return m_Strings[index];
90         }
91 
92     void Resize(size_t new_size);
SetString(size_t index)93     string& SetString(size_t index)
94         {
95             return m_Strings[index];
96         }
97 
98 private:
99     typedef vector<string> TStrings;
100     typedef map<string, size_t> TIndices;
101 
102     TStrings m_Strings;
103     auto_ptr<TIndices> m_Indices;
104 };
105 
106 
107 class NCBI_XOBJMGR_EXPORT CIndexedOctetStrings
108 {
109 public:
110     typedef vector<char> TOctetString;
111 
112     CIndexedOctetStrings(const CIndexedOctetStrings& ss);
113     CIndexedOctetStrings(void);
114 
115     void ClearIndices(void);
116     void Clear(void);
117 
IsEmpty(void) const118     bool IsEmpty(void) const
119         {
120             return m_Strings.empty();
121         }
GetElementSize(void) const122     size_t GetElementSize(void) const
123         {
124             return m_ElementSize;
125         }
GetTotalSize(void) const126     size_t GetTotalSize(void) const
127         {
128             return m_Strings.size();
129         }
GetSize(void) const130     size_t GetSize(void) const
131         {
132             size_t size = GetTotalSize();
133             if ( size ) {
134                 size /= GetElementSize();
135             }
136             return size;
137         }
GetTotalString(void) const138     const TOctetString& GetTotalString(void) const
139         {
140             return m_Strings;
141         }
142     void SetTotalString(size_t element_size, TOctetString& s);
143 
144     size_t GetIndex(const TOctetString& s, size_t max_index);
145 
146     void GetString(size_t index, TOctetString&) const;
147 
148 private:
149     typedef vector<char> TStrings;
150     typedef map<CTempString, size_t> TIndices;
151 
152     size_t m_ElementSize;
153     TStrings m_Strings;
154     auto_ptr<TIndices> m_Indices;
155 };
156 
157 
158 class NCBI_XOBJMGR_EXPORT CSeq_annot_SNP_Info : public CTSE_Info_Object
159 {
160     typedef CTSE_Info_Object TParent;
161 public:
162     CSeq_annot_SNP_Info(void);
163     CSeq_annot_SNP_Info(CSeq_annot& annot);
164     CSeq_annot_SNP_Info(const CSeq_annot_SNP_Info& info);
165     ~CSeq_annot_SNP_Info(void);
166 
167     const CSeq_annot_Info& GetParentSeq_annot_Info(void) const;
168     CSeq_annot_Info& GetParentSeq_annot_Info(void);
169 
170     const CSeq_entry_Info& GetParentSeq_entry_Info(void) const;
171     CSeq_entry_Info& GetParentSeq_entry_Info(void);
172 
173     // tree initialization
174     void x_ParentAttach(CSeq_annot_Info& parent);
175     void x_ParentDetach(CSeq_annot_Info& parent);
176 
177     void x_UpdateAnnotIndexContents(CTSE_Info& tse);
178     void x_UnmapAnnotObjects(CTSE_Info& tse);
179     void x_DropAnnotObjects(CTSE_Info& tse);
180 
181     typedef vector<SSNP_Info> TSNP_Set;
182     typedef TSNP_Set::const_iterator const_iterator;
183     typedef CRange<TSeqPos> TRange;
184 
185     bool empty(void) const;
186     size_t size(void) const;
187     const_iterator begin(void) const;
188     const_iterator end(void) const;
189 
190     const_iterator FirstIn(const TRange& range) const;
191 
192     const CSeq_id& GetSeq_id(void) const;
193     void SetSeq_id(const CSeq_id& id);
194     NCBI_DEPRECATED
195     void SetGi(TGi gi);
196     void OffsetGi(TIntId gi_offset);
197 
198     size_t GetSize(void) const;
199     const SSNP_Info& GetInfo(size_t index) const;
200     size_t GetIndex(const SSNP_Info& info) const;
201 
202     CSeq_annot& GetRemainingSeq_annot(void);
203     void Reset(void);
204 
205     bool HasLabel(size_t index) const;
206     string GetLabel(size_t index) const;
207 
208     // filling SNP table from parser
209     void x_AddSNP(const SSNP_Info& snp_info);
210     void x_FinishParsing(void);
211 
212     SSNP_Info::TCommentIndex x_GetCommentIndex(const string& comment);
213     const string& x_GetComment(SSNP_Info::TCommentIndex index) const;
214     SSNP_Info::TAlleleIndex x_GetAlleleIndex(const string& allele);
215     const string& x_GetAllele(SSNP_Info::TAlleleIndex index) const;
216     SSNP_Info::TQualityCodesIndex x_GetQualityCodesIndex(const string& str);
217     typedef vector<char> TOctetString;
218     SSNP_Info::TQualityCodesIndex x_GetQualityCodesIndex(const TOctetString& os);
219     const string& x_GetQualityCodesStr(SSNP_Info::TQualityCodesIndex index) const;
220     void x_GetQualityCodesOs(SSNP_Info::TQualityCodesIndex index, TOctetString& os) const;
221     SSNP_Info::TExtraIndex x_GetExtraIndex(const string& str);
222     const string& x_GetExtra(SSNP_Info::TExtraIndex index) const;
223 
x_GetComments(void) const224     const CIndexedStrings& x_GetComments(void) const {
225         return m_Comments;
226     }
x_GetAlleles(void) const227     const CIndexedStrings& x_GetAlleles(void) const {
228         return m_Alleles;
229     }
x_GetQualityCodesStr(void) const230     const CIndexedStrings& x_GetQualityCodesStr(void) const {
231         return m_QualityCodesStr;
232     }
x_GetQualityCodesOs(void) const233     const CIndexedOctetStrings& x_GetQualityCodesOs(void) const {
234         return m_QualityCodesOs;
235     }
236 
237 protected:
238     bool x_CheckId(const CSeq_id& id);
239 
240     void x_DoUpdate(TNeedUpdateFlags flags);
241 
242 private:
243     CSeq_annot_SNP_Info& operator=(const CSeq_annot_SNP_Info&);
244 
245     friend class CSeq_annot_Info;
246     friend class CSeq_annot_SNP_Info_Reader;
247     friend struct SSNP_Info;
248     friend class CSeq_feat_Handle;
249 
250     CRef<CSeq_id>               m_Seq_id;
251     TSNP_Set                    m_SNP_Set;
252     CIndexedStrings             m_Comments;
253     CIndexedStrings             m_Alleles;
254     CIndexedStrings             m_QualityCodesStr;
255     CIndexedOctetStrings        m_QualityCodesOs;
256     CIndexedStrings             m_Extra;
257     CRef<CSeq_annot>            m_Seq_annot;
258 };
259 
260 
261 /////////////////////////////////////////////////////////////////////////////
262 // CSeq_annot_SNP_Info
263 /////////////////////////////////////////////////////////////////////////////
264 
265 inline
empty(void) const266 bool CSeq_annot_SNP_Info::empty(void) const
267 {
268     return m_SNP_Set.empty();
269 }
270 
271 
272 inline
size(void) const273 size_t CSeq_annot_SNP_Info::size(void) const
274 {
275     return m_SNP_Set.size();
276 }
277 
278 
279 inline
280 CSeq_annot_SNP_Info::const_iterator
begin(void) const281 CSeq_annot_SNP_Info::begin(void) const
282 {
283     return m_SNP_Set.begin();
284 }
285 
286 
287 inline
288 CSeq_annot_SNP_Info::const_iterator
end(void) const289 CSeq_annot_SNP_Info::end(void) const
290 {
291     return m_SNP_Set.end();
292 }
293 
294 
295 inline
296 CSeq_annot_SNP_Info::const_iterator
FirstIn(const CRange<TSeqPos> & range) const297 CSeq_annot_SNP_Info::FirstIn(const CRange<TSeqPos>& range) const
298 {
299     return lower_bound(m_SNP_Set.begin(), m_SNP_Set.end(), range.GetFrom());
300 }
301 
302 
303 inline
GetSeq_id(void) const304 const CSeq_id& CSeq_annot_SNP_Info::GetSeq_id(void) const
305 {
306     return *m_Seq_id;
307 }
308 
309 
310 inline
GetRemainingSeq_annot(void)311 CSeq_annot& CSeq_annot_SNP_Info::GetRemainingSeq_annot(void)
312 {
313     return *m_Seq_annot;
314 }
315 
316 
317 inline
318 SSNP_Info::TCommentIndex
x_GetCommentIndex(const string & comment)319 CSeq_annot_SNP_Info::x_GetCommentIndex(const string& comment)
320 {
321     size_t index =
322         m_Comments.GetIndex(comment, SSNP_Info::kMax_CommentIndex);
323     // index cannot be bigger than SSNP_Info::kMax_CommentIndex
324     return SSNP_Info::TCommentIndex(index);
325 }
326 
327 
328 inline
329 SSNP_Info::TExtraIndex
x_GetExtraIndex(const string & str)330 CSeq_annot_SNP_Info::x_GetExtraIndex(const string& str)
331 {
332     size_t index =
333         m_Extra.GetIndex(str, SSNP_Info::kMax_ExtraIndex);
334     // index cannot be bigger than SSNP_Info::kMax_ExtraIndex
335     return SSNP_Info::TExtraIndex(index);
336 }
337 
338 
339 inline
340 SSNP_Info::TQualityCodesIndex
x_GetQualityCodesIndex(const string & str)341 CSeq_annot_SNP_Info::x_GetQualityCodesIndex(const string& str)
342 {
343     size_t index =
344         m_QualityCodesStr.GetIndex(str, SSNP_Info::kMax_QualityCodesIndex);
345     // index cannot be bigger than SSNP_Info::kMax_QualityCodesIndex
346     return SSNP_Info::TQualityCodesIndex(index);
347 }
348 
349 
350 inline
351 SSNP_Info::TQualityCodesIndex
x_GetQualityCodesIndex(const TOctetString & os)352 CSeq_annot_SNP_Info::x_GetQualityCodesIndex(const TOctetString& os)
353 {
354     size_t index =
355         m_QualityCodesOs.GetIndex(os, SSNP_Info::kMax_QualityCodesIndex);
356     // index cannot be bigger than SSNP_Info::kMax_QualityCodesIndex
357     return SSNP_Info::TQualityCodesIndex(index);
358 }
359 
360 
361 inline
362 const string&
x_GetComment(SSNP_Info::TCommentIndex index) const363 CSeq_annot_SNP_Info::x_GetComment(SSNP_Info::TCommentIndex index) const
364 {
365     return m_Comments.GetString(index);
366 }
367 
368 
369 inline
370 const string&
x_GetAllele(SSNP_Info::TAlleleIndex index) const371 CSeq_annot_SNP_Info::x_GetAllele(SSNP_Info::TAlleleIndex index) const
372 {
373     return m_Alleles.GetString(index);
374 }
375 
376 
377 inline
378 const string&
x_GetQualityCodesStr(SSNP_Info::TQualityCodesIndex index) const379 CSeq_annot_SNP_Info::x_GetQualityCodesStr(SSNP_Info::TQualityCodesIndex index) const
380 {
381     return m_QualityCodesStr.GetString(index);
382 }
383 
384 
385 inline
x_GetQualityCodesOs(SSNP_Info::TQualityCodesIndex index,TOctetString & os) const386 void CSeq_annot_SNP_Info::x_GetQualityCodesOs(SSNP_Info::TQualityCodesIndex index,
387                                               TOctetString& os) const
388 {
389     m_QualityCodesOs.GetString(index, os);
390 }
391 
392 
393 inline
394 const string&
x_GetExtra(SSNP_Info::TExtraIndex index) const395 CSeq_annot_SNP_Info::x_GetExtra(SSNP_Info::TExtraIndex index) const
396 {
397     return m_Extra.GetString(index);
398 }
399 
400 
401 inline
x_AddSNP(const SSNP_Info & snp_info)402 void CSeq_annot_SNP_Info::x_AddSNP(const SSNP_Info& snp_info)
403 {
404     m_SNP_Set.push_back(snp_info);
405 }
406 
407 
408 inline
GetSize(void) const409 size_t CSeq_annot_SNP_Info::GetSize(void) const
410 {
411     return m_SNP_Set.size();
412 }
413 
414 
415 inline
GetInfo(size_t index) const416 const SSNP_Info& CSeq_annot_SNP_Info::GetInfo(size_t index) const
417 {
418     _ASSERT(index < m_SNP_Set.size());
419     return m_SNP_Set[index];
420 }
421 
422 
423 inline
GetIndex(const SSNP_Info & info) const424 size_t CSeq_annot_SNP_Info::GetIndex(const SSNP_Info& info) const
425 {
426     _ASSERT(&info >= &m_SNP_Set.front() && &info <= &m_SNP_Set.back());
427     return &info - &m_SNP_Set.front();
428 }
429 
430 
431 inline
HasLabel(size_t index) const432 bool CSeq_annot_SNP_Info::HasLabel(size_t index) const
433 {
434     return GetInfo(index).HasLabel(*this);
435 }
436 
437 
438 inline
GetLabel(size_t index) const439 string CSeq_annot_SNP_Info::GetLabel(size_t index) const
440 {
441     return GetInfo(index).GetLabel(*this);
442 }
443 
444 
445 END_SCOPE(objects)
446 END_NCBI_SCOPE
447 
448 #endif  // SNP_ANNOT_INFO__HPP
449