1 #ifndef SNP_ANNOT_INFO__HPP
2 #define SNP_ANNOT_INFO__HPP
3
4 /* $Id: snp_annot_info.hpp 500967 2016-05-10 14:39:30Z vasilche $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Eugene Vasilchenko
30 *
31 * File Description:
32 * SNP Seq-annot object information
33 *
34 */
35
36 #include <corelib/ncbiobj.hpp>
37 #include <corelib/ncbi_limits.hpp>
38
39 #include <util/range.hpp>
40
41 #include <vector>
42 #include <map>
43 #include <algorithm>
44 #include <memory>
45
46 #include <objects/seqloc/Seq_id.hpp>
47
48 #include <objmgr/impl/tse_info_object.hpp>
49 #include <objmgr/impl/snp_info.hpp>
50
51 BEGIN_NCBI_SCOPE
52
53 class CObjectIStream;
54 class IWriter;
55 class IReader;
56
57 BEGIN_SCOPE(objects)
58
59 class CSeq_entry;
60 class CSeq_feat;
61 class CSeq_annot;
62 class CSeq_annot_Info;
63 class CSeq_annot_SNP_Info;
64 class CSeq_point;
65 class CSeq_interval;
66
67 class NCBI_XOBJMGR_EXPORT CIndexedStrings
68 {
69 public:
70 CIndexedStrings(void);
71 CIndexedStrings(const CIndexedStrings& ss);
72
73 void ClearIndices(void);
74 void Clear(void);
75
IsEmpty(void) const76 bool IsEmpty(void) const
77 {
78 return m_Strings.empty();
79 }
GetSize(void) const80 size_t GetSize(void) const
81 {
82 return m_Strings.size();
83 }
84
85 size_t GetIndex(const string& s, size_t max_index);
86
GetString(size_t index) const87 const string& GetString(size_t index) const
88 {
89 return m_Strings[index];
90 }
91
92 void Resize(size_t new_size);
SetString(size_t index)93 string& SetString(size_t index)
94 {
95 return m_Strings[index];
96 }
97
98 private:
99 typedef vector<string> TStrings;
100 typedef map<string, size_t> TIndices;
101
102 TStrings m_Strings;
103 auto_ptr<TIndices> m_Indices;
104 };
105
106
107 class NCBI_XOBJMGR_EXPORT CIndexedOctetStrings
108 {
109 public:
110 typedef vector<char> TOctetString;
111
112 CIndexedOctetStrings(const CIndexedOctetStrings& ss);
113 CIndexedOctetStrings(void);
114
115 void ClearIndices(void);
116 void Clear(void);
117
IsEmpty(void) const118 bool IsEmpty(void) const
119 {
120 return m_Strings.empty();
121 }
GetElementSize(void) const122 size_t GetElementSize(void) const
123 {
124 return m_ElementSize;
125 }
GetTotalSize(void) const126 size_t GetTotalSize(void) const
127 {
128 return m_Strings.size();
129 }
GetSize(void) const130 size_t GetSize(void) const
131 {
132 size_t size = GetTotalSize();
133 if ( size ) {
134 size /= GetElementSize();
135 }
136 return size;
137 }
GetTotalString(void) const138 const TOctetString& GetTotalString(void) const
139 {
140 return m_Strings;
141 }
142 void SetTotalString(size_t element_size, TOctetString& s);
143
144 size_t GetIndex(const TOctetString& s, size_t max_index);
145
146 void GetString(size_t index, TOctetString&) const;
147
148 private:
149 typedef vector<char> TStrings;
150 typedef map<CTempString, size_t> TIndices;
151
152 size_t m_ElementSize;
153 TStrings m_Strings;
154 auto_ptr<TIndices> m_Indices;
155 };
156
157
158 class NCBI_XOBJMGR_EXPORT CSeq_annot_SNP_Info : public CTSE_Info_Object
159 {
160 typedef CTSE_Info_Object TParent;
161 public:
162 CSeq_annot_SNP_Info(void);
163 CSeq_annot_SNP_Info(CSeq_annot& annot);
164 CSeq_annot_SNP_Info(const CSeq_annot_SNP_Info& info);
165 ~CSeq_annot_SNP_Info(void);
166
167 const CSeq_annot_Info& GetParentSeq_annot_Info(void) const;
168 CSeq_annot_Info& GetParentSeq_annot_Info(void);
169
170 const CSeq_entry_Info& GetParentSeq_entry_Info(void) const;
171 CSeq_entry_Info& GetParentSeq_entry_Info(void);
172
173 // tree initialization
174 void x_ParentAttach(CSeq_annot_Info& parent);
175 void x_ParentDetach(CSeq_annot_Info& parent);
176
177 void x_UpdateAnnotIndexContents(CTSE_Info& tse);
178 void x_UnmapAnnotObjects(CTSE_Info& tse);
179 void x_DropAnnotObjects(CTSE_Info& tse);
180
181 typedef vector<SSNP_Info> TSNP_Set;
182 typedef TSNP_Set::const_iterator const_iterator;
183 typedef CRange<TSeqPos> TRange;
184
185 bool empty(void) const;
186 size_t size(void) const;
187 const_iterator begin(void) const;
188 const_iterator end(void) const;
189
190 const_iterator FirstIn(const TRange& range) const;
191
192 const CSeq_id& GetSeq_id(void) const;
193 void SetSeq_id(const CSeq_id& id);
194 NCBI_DEPRECATED
195 void SetGi(TGi gi);
196 void OffsetGi(TIntId gi_offset);
197
198 size_t GetSize(void) const;
199 const SSNP_Info& GetInfo(size_t index) const;
200 size_t GetIndex(const SSNP_Info& info) const;
201
202 CSeq_annot& GetRemainingSeq_annot(void);
203 void Reset(void);
204
205 bool HasLabel(size_t index) const;
206 string GetLabel(size_t index) const;
207
208 // filling SNP table from parser
209 void x_AddSNP(const SSNP_Info& snp_info);
210 void x_FinishParsing(void);
211
212 SSNP_Info::TCommentIndex x_GetCommentIndex(const string& comment);
213 const string& x_GetComment(SSNP_Info::TCommentIndex index) const;
214 SSNP_Info::TAlleleIndex x_GetAlleleIndex(const string& allele);
215 const string& x_GetAllele(SSNP_Info::TAlleleIndex index) const;
216 SSNP_Info::TQualityCodesIndex x_GetQualityCodesIndex(const string& str);
217 typedef vector<char> TOctetString;
218 SSNP_Info::TQualityCodesIndex x_GetQualityCodesIndex(const TOctetString& os);
219 const string& x_GetQualityCodesStr(SSNP_Info::TQualityCodesIndex index) const;
220 void x_GetQualityCodesOs(SSNP_Info::TQualityCodesIndex index, TOctetString& os) const;
221 SSNP_Info::TExtraIndex x_GetExtraIndex(const string& str);
222 const string& x_GetExtra(SSNP_Info::TExtraIndex index) const;
223
x_GetComments(void) const224 const CIndexedStrings& x_GetComments(void) const {
225 return m_Comments;
226 }
x_GetAlleles(void) const227 const CIndexedStrings& x_GetAlleles(void) const {
228 return m_Alleles;
229 }
x_GetQualityCodesStr(void) const230 const CIndexedStrings& x_GetQualityCodesStr(void) const {
231 return m_QualityCodesStr;
232 }
x_GetQualityCodesOs(void) const233 const CIndexedOctetStrings& x_GetQualityCodesOs(void) const {
234 return m_QualityCodesOs;
235 }
236
237 protected:
238 bool x_CheckId(const CSeq_id& id);
239
240 void x_DoUpdate(TNeedUpdateFlags flags);
241
242 private:
243 CSeq_annot_SNP_Info& operator=(const CSeq_annot_SNP_Info&);
244
245 friend class CSeq_annot_Info;
246 friend class CSeq_annot_SNP_Info_Reader;
247 friend struct SSNP_Info;
248 friend class CSeq_feat_Handle;
249
250 CRef<CSeq_id> m_Seq_id;
251 TSNP_Set m_SNP_Set;
252 CIndexedStrings m_Comments;
253 CIndexedStrings m_Alleles;
254 CIndexedStrings m_QualityCodesStr;
255 CIndexedOctetStrings m_QualityCodesOs;
256 CIndexedStrings m_Extra;
257 CRef<CSeq_annot> m_Seq_annot;
258 };
259
260
261 /////////////////////////////////////////////////////////////////////////////
262 // CSeq_annot_SNP_Info
263 /////////////////////////////////////////////////////////////////////////////
264
265 inline
empty(void) const266 bool CSeq_annot_SNP_Info::empty(void) const
267 {
268 return m_SNP_Set.empty();
269 }
270
271
272 inline
size(void) const273 size_t CSeq_annot_SNP_Info::size(void) const
274 {
275 return m_SNP_Set.size();
276 }
277
278
279 inline
280 CSeq_annot_SNP_Info::const_iterator
begin(void) const281 CSeq_annot_SNP_Info::begin(void) const
282 {
283 return m_SNP_Set.begin();
284 }
285
286
287 inline
288 CSeq_annot_SNP_Info::const_iterator
end(void) const289 CSeq_annot_SNP_Info::end(void) const
290 {
291 return m_SNP_Set.end();
292 }
293
294
295 inline
296 CSeq_annot_SNP_Info::const_iterator
FirstIn(const CRange<TSeqPos> & range) const297 CSeq_annot_SNP_Info::FirstIn(const CRange<TSeqPos>& range) const
298 {
299 return lower_bound(m_SNP_Set.begin(), m_SNP_Set.end(), range.GetFrom());
300 }
301
302
303 inline
GetSeq_id(void) const304 const CSeq_id& CSeq_annot_SNP_Info::GetSeq_id(void) const
305 {
306 return *m_Seq_id;
307 }
308
309
310 inline
GetRemainingSeq_annot(void)311 CSeq_annot& CSeq_annot_SNP_Info::GetRemainingSeq_annot(void)
312 {
313 return *m_Seq_annot;
314 }
315
316
317 inline
318 SSNP_Info::TCommentIndex
x_GetCommentIndex(const string & comment)319 CSeq_annot_SNP_Info::x_GetCommentIndex(const string& comment)
320 {
321 size_t index =
322 m_Comments.GetIndex(comment, SSNP_Info::kMax_CommentIndex);
323 // index cannot be bigger than SSNP_Info::kMax_CommentIndex
324 return SSNP_Info::TCommentIndex(index);
325 }
326
327
328 inline
329 SSNP_Info::TExtraIndex
x_GetExtraIndex(const string & str)330 CSeq_annot_SNP_Info::x_GetExtraIndex(const string& str)
331 {
332 size_t index =
333 m_Extra.GetIndex(str, SSNP_Info::kMax_ExtraIndex);
334 // index cannot be bigger than SSNP_Info::kMax_ExtraIndex
335 return SSNP_Info::TExtraIndex(index);
336 }
337
338
339 inline
340 SSNP_Info::TQualityCodesIndex
x_GetQualityCodesIndex(const string & str)341 CSeq_annot_SNP_Info::x_GetQualityCodesIndex(const string& str)
342 {
343 size_t index =
344 m_QualityCodesStr.GetIndex(str, SSNP_Info::kMax_QualityCodesIndex);
345 // index cannot be bigger than SSNP_Info::kMax_QualityCodesIndex
346 return SSNP_Info::TQualityCodesIndex(index);
347 }
348
349
350 inline
351 SSNP_Info::TQualityCodesIndex
x_GetQualityCodesIndex(const TOctetString & os)352 CSeq_annot_SNP_Info::x_GetQualityCodesIndex(const TOctetString& os)
353 {
354 size_t index =
355 m_QualityCodesOs.GetIndex(os, SSNP_Info::kMax_QualityCodesIndex);
356 // index cannot be bigger than SSNP_Info::kMax_QualityCodesIndex
357 return SSNP_Info::TQualityCodesIndex(index);
358 }
359
360
361 inline
362 const string&
x_GetComment(SSNP_Info::TCommentIndex index) const363 CSeq_annot_SNP_Info::x_GetComment(SSNP_Info::TCommentIndex index) const
364 {
365 return m_Comments.GetString(index);
366 }
367
368
369 inline
370 const string&
x_GetAllele(SSNP_Info::TAlleleIndex index) const371 CSeq_annot_SNP_Info::x_GetAllele(SSNP_Info::TAlleleIndex index) const
372 {
373 return m_Alleles.GetString(index);
374 }
375
376
377 inline
378 const string&
x_GetQualityCodesStr(SSNP_Info::TQualityCodesIndex index) const379 CSeq_annot_SNP_Info::x_GetQualityCodesStr(SSNP_Info::TQualityCodesIndex index) const
380 {
381 return m_QualityCodesStr.GetString(index);
382 }
383
384
385 inline
x_GetQualityCodesOs(SSNP_Info::TQualityCodesIndex index,TOctetString & os) const386 void CSeq_annot_SNP_Info::x_GetQualityCodesOs(SSNP_Info::TQualityCodesIndex index,
387 TOctetString& os) const
388 {
389 m_QualityCodesOs.GetString(index, os);
390 }
391
392
393 inline
394 const string&
x_GetExtra(SSNP_Info::TExtraIndex index) const395 CSeq_annot_SNP_Info::x_GetExtra(SSNP_Info::TExtraIndex index) const
396 {
397 return m_Extra.GetString(index);
398 }
399
400
401 inline
x_AddSNP(const SSNP_Info & snp_info)402 void CSeq_annot_SNP_Info::x_AddSNP(const SSNP_Info& snp_info)
403 {
404 m_SNP_Set.push_back(snp_info);
405 }
406
407
408 inline
GetSize(void) const409 size_t CSeq_annot_SNP_Info::GetSize(void) const
410 {
411 return m_SNP_Set.size();
412 }
413
414
415 inline
GetInfo(size_t index) const416 const SSNP_Info& CSeq_annot_SNP_Info::GetInfo(size_t index) const
417 {
418 _ASSERT(index < m_SNP_Set.size());
419 return m_SNP_Set[index];
420 }
421
422
423 inline
GetIndex(const SSNP_Info & info) const424 size_t CSeq_annot_SNP_Info::GetIndex(const SSNP_Info& info) const
425 {
426 _ASSERT(&info >= &m_SNP_Set.front() && &info <= &m_SNP_Set.back());
427 return &info - &m_SNP_Set.front();
428 }
429
430
431 inline
HasLabel(size_t index) const432 bool CSeq_annot_SNP_Info::HasLabel(size_t index) const
433 {
434 return GetInfo(index).HasLabel(*this);
435 }
436
437
438 inline
GetLabel(size_t index) const439 string CSeq_annot_SNP_Info::GetLabel(size_t index) const
440 {
441 return GetInfo(index).GetLabel(*this);
442 }
443
444
445 END_SCOPE(objects)
446 END_NCBI_SCOPE
447
448 #endif // SNP_ANNOT_INFO__HPP
449