1 /* $Id: getfeature.cpp 632526 2021-06-02 17:25:01Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Jian Ye
27 *
28 * File Description:
29 * Fetching features
30 *
31 */
32
33 #include <ncbi_pch.hpp>
34 #include <util/range.hpp>
35 #include <objtools/readers/getfeature.hpp>
36
37 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)38 BEGIN_SCOPE (objects)
39
40 CGetFeature::CGetFeature(string feat_file, string index_file){
41 m_FeatFile = new CNcbiIfstream(feat_file.c_str(), IOS_BASE::binary);
42 m_FeatFileIndex = new CNcbiIfstream(index_file.c_str(), IOS_BASE::binary);
43 m_5FeatInfo = NULL;
44 m_3FeatInfo = NULL;
45 }
46
47
GetFeatInfo(const string & id_str,const CRange<TSeqPos> & seq_range,SFeatInfo * & feat5,SFeatInfo * & feat3,int max_feature)48 vector<SFeatInfo*>& CGetFeature::GetFeatInfo(const string& id_str,
49 const CRange<TSeqPos>& seq_range,
50 SFeatInfo*& feat5,
51 SFeatInfo*& feat3,
52 int max_feature ){
53 x_Clear();
54 m_5FeatInfo = NULL;
55 m_3FeatInfo = NULL;
56 if(m_FeatFileIndex && m_FeatFile && *m_FeatFileIndex && *m_FeatFile){
57 unsigned int offset = 0;
58 map<string, unsigned int>::const_iterator iter = m_OffsetMap.find(id_str);
59 if ( iter != m_OffsetMap.end() ){
60 offset = iter->second;
61 } else{
62 m_FeatFileIndex->seekg(0);
63 while(!m_FeatFileIndex->eof()){
64 SOffsetInfo offset_info;
65 m_FeatFileIndex->read((char*)(&offset_info), sizeof(SOffsetInfo));
66 if(!(*m_FeatFileIndex)){
67 m_FeatFileIndex->clear();
68 break;
69 }
70
71 if(offset_info.id == id_str){
72 offset = offset_info.offset;
73 m_OffsetMap.insert(map<string, unsigned int>::value_type(offset_info.id, offset_info.offset));
74 m_FeatFileIndex->clear();
75 break;
76 }
77 }
78 m_FeatFileIndex->clear();
79 }
80
81 m_FeatFile->seekg(offset);
82 int count = 0;
83 //now look to retrieve feature info
84 while(!m_FeatFile->eof() && count < max_feature){
85 SFeatInfo* feat_info = new SFeatInfo;
86 m_FeatFile->read((char*)feat_info, sizeof(SFeatInfo));
87 if(*m_FeatFile){
88
89 if(id_str != feat_info->id) { //next id already
90 delete feat_info;
91 m_FeatFile->clear();
92 break;
93 }
94
95 if(seq_range.IntersectingWith(feat_info->range)){
96 m_FeatInfoList.push_back(feat_info);
97 count ++;
98 } else {
99 //track the flank features that are 5' and 3' of the range
100 if(feat_info->range < seq_range ){
101 if(m_5FeatInfo){
102 delete m_5FeatInfo;
103 m_5FeatInfo = feat_info;
104 } else { //first one
105 m_5FeatInfo = feat_info;
106 }
107 } else {
108 m_3FeatInfo = feat_info;
109 break; //already past the range as range was sorted
110 //from low to high
111 }
112 }
113 } else {
114 delete feat_info;
115 m_FeatFile->clear();
116 break;
117 }
118 }
119 m_FeatFile->clear(); //reset
120 }
121
122 if(m_5FeatInfo){
123 feat5 = m_5FeatInfo;
124 }
125
126 if(m_3FeatInfo){
127 feat3 = m_3FeatInfo;
128 }
129
130 return m_FeatInfoList;
131 }
132
~CGetFeature()133 CGetFeature::~CGetFeature(){
134 x_Clear();
135 if(m_FeatFile) {
136 delete m_FeatFile;
137 }
138 if(m_FeatFileIndex){
139 delete m_FeatFileIndex;
140 }
141 }
142
x_Clear()143 void CGetFeature::x_Clear(){
144 ITERATE(vector<SFeatInfo*>, iter, m_FeatInfoList){
145 delete *iter;
146 }
147 m_FeatInfoList.clear();
148
149 if(m_5FeatInfo){
150 delete m_5FeatInfo;
151 }
152 if(m_3FeatInfo){
153 delete m_3FeatInfo;
154 }
155 }
156
157 END_SCOPE(objects)
158 END_NCBI_SCOPE
159