1 /*  $Id: getfeature.cpp 632526 2021-06-02 17:25:01Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Jian Ye
27  *
28  * File Description:
29  *  Fetching features
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <util/range.hpp>
35 #include <objtools/readers/getfeature.hpp>
36 
37 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)38 BEGIN_SCOPE (objects)
39 
40 CGetFeature::CGetFeature(string feat_file, string index_file){
41     m_FeatFile = new CNcbiIfstream(feat_file.c_str(), IOS_BASE::binary);
42     m_FeatFileIndex  = new CNcbiIfstream(index_file.c_str(), IOS_BASE::binary);
43     m_5FeatInfo = NULL;
44     m_3FeatInfo = NULL;
45 }
46 
47 
GetFeatInfo(const string & id_str,const CRange<TSeqPos> & seq_range,SFeatInfo * & feat5,SFeatInfo * & feat3,int max_feature)48 vector<SFeatInfo*>& CGetFeature::GetFeatInfo(const string& id_str,
49                                                  const CRange<TSeqPos>& seq_range,
50                                                  SFeatInfo*& feat5,
51                                                  SFeatInfo*& feat3,
52                                                  int max_feature ){
53     x_Clear();
54     m_5FeatInfo = NULL;
55     m_3FeatInfo = NULL;
56     if(m_FeatFileIndex && m_FeatFile && *m_FeatFileIndex && *m_FeatFile){
57         unsigned int offset = 0;
58         map<string, unsigned int>::const_iterator iter = m_OffsetMap.find(id_str);
59         if ( iter != m_OffsetMap.end() ){
60             offset  = iter->second;
61         } else{
62             m_FeatFileIndex->seekg(0);
63             while(!m_FeatFileIndex->eof()){
64                 SOffsetInfo offset_info;
65                 m_FeatFileIndex->read((char*)(&offset_info), sizeof(SOffsetInfo));
66                 if(!(*m_FeatFileIndex)){
67                     m_FeatFileIndex->clear();
68                     break;
69                 }
70 
71                 if(offset_info.id == id_str){
72                     offset = offset_info.offset;
73                     m_OffsetMap.insert(map<string, unsigned int>::value_type(offset_info.id, offset_info.offset));
74                     m_FeatFileIndex->clear();
75                     break;
76                 }
77             }
78             m_FeatFileIndex->clear();
79         }
80 
81         m_FeatFile->seekg(offset);
82         int count = 0;
83         //now look to retrieve feature info
84         while(!m_FeatFile->eof() && count < max_feature){
85             SFeatInfo* feat_info = new SFeatInfo;
86             m_FeatFile->read((char*)feat_info, sizeof(SFeatInfo));
87             if(*m_FeatFile){
88 
89                 if(id_str != feat_info->id) { //next id already
90                     delete feat_info;
91                     m_FeatFile->clear();
92                     break;
93                 }
94 
95                 if(seq_range.IntersectingWith(feat_info->range)){
96                     m_FeatInfoList.push_back(feat_info);
97                     count ++;
98                 } else {
99                     //track the flank features that are 5' and 3' of the range
100                     if(feat_info->range < seq_range ){
101                         if(m_5FeatInfo){
102                             delete m_5FeatInfo;
103                             m_5FeatInfo = feat_info;
104                         } else { //first one
105                             m_5FeatInfo = feat_info;
106                         }
107                     } else {
108                         m_3FeatInfo = feat_info;
109                         break; //already past the range as range was sorted
110                         //from low to high
111                     }
112                 }
113             } else {
114                 delete feat_info;
115                 m_FeatFile->clear();
116                 break;
117             }
118         }
119         m_FeatFile->clear(); //reset
120     }
121 
122     if(m_5FeatInfo){
123         feat5 = m_5FeatInfo;
124     }
125 
126     if(m_3FeatInfo){
127         feat3 = m_3FeatInfo;
128     }
129 
130     return m_FeatInfoList;
131 }
132 
~CGetFeature()133 CGetFeature::~CGetFeature(){
134     x_Clear();
135     if(m_FeatFile) {
136         delete m_FeatFile;
137     }
138     if(m_FeatFileIndex){
139         delete m_FeatFileIndex;
140     }
141 }
142 
x_Clear()143 void CGetFeature::x_Clear(){
144     ITERATE(vector<SFeatInfo*>, iter, m_FeatInfoList){
145         delete *iter;
146     }
147     m_FeatInfoList.clear();
148 
149     if(m_5FeatInfo){
150         delete m_5FeatInfo;
151     }
152     if(m_3FeatInfo){
153         delete m_3FeatInfo;
154     }
155 }
156 
157 END_SCOPE(objects)
158 END_NCBI_SCOPE
159