1 /* $Id: field_handler.cpp 632623 2021-06-03 17:38:11Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Colleen Bollin, NCBI
27 *
28 * File Description:
29 * CFieldHandler parent class
30 */
31 #include <ncbi_pch.hpp>
32 #include <corelib/ncbistd.hpp>
33 #include <corelib/ncbiobj.hpp>
34 #include <objects/seqfeat/Seq_feat.hpp>
35 #include <objects/seq/Seq_descr.hpp>
36 #include <objects/general/User_object.hpp>
37 #include <objects/general/Object_id.hpp>
38 #include <objmgr/seq_entry_ci.hpp>
39 #include <objmgr/seq_feat_handle.hpp>
40 #include <objmgr/feat_ci.hpp>
41 #include <objmgr/seq_annot_ci.hpp>
42 #include <objmgr/seq_annot_handle.hpp>
43 #include <objmgr/util/sequence.hpp>
44 #include <objtools/edit/field_handler.hpp>
45 #include <objtools/edit/dblink_field.hpp>
46 #include <objtools/edit/gb_block_field.hpp>
47
48 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)49 BEGIN_SCOPE(objects)
50 BEGIN_SCOPE(edit)
51
52 bool CFieldHandler::QualifierNamesAreEquivalent (string name1, string name2)
53 {
54 // ignore protein at beginning
55 const string protein("protein");
56 if (NStr::StartsWith(name1, protein)) {
57 name1 = name1.substr(protein.length());
58 }
59 if (NStr::StartsWith(name2, protein)) {
60 name2 = name2.substr(protein.length());
61 }
62
63 // spaces, dashes, and underscores do not count
64 NStr::ReplaceInPlace (name1, " ", "");
65 NStr::ReplaceInPlace (name1, "_", "");
66 NStr::ReplaceInPlace (name1, "-", "");
67 NStr::ReplaceInPlace (name2, " ", "");
68 NStr::ReplaceInPlace (name2, "_", "");
69 NStr::ReplaceInPlace (name2, "-", "");
70
71 return NStr::EqualNocase(name1, name2);
72 }
73
74
GetRelatedApplyObjects(const CObject & object,CRef<CScope> scope)75 vector<CRef<CApplyObject> > CFieldHandler::GetRelatedApplyObjects(const CObject& object, CRef<CScope> scope)
76 {
77 vector<CRef<CApplyObject> > related = GetApplyObjectsFromRelatedObjects(GetRelatedObjects(object, scope), scope);
78
79 return related;
80 }
81
82
GetApplyObjectsFromRelatedObjects(vector<CConstRef<CObject>> related,CRef<CScope> scope)83 vector<CRef<CApplyObject> > CFieldHandler::GetApplyObjectsFromRelatedObjects(vector<CConstRef<CObject> > related, CRef<CScope> scope)
84 {
85 vector<CRef<CApplyObject> > rval;
86
87 ITERATE(vector<CConstRef<CObject> >, it, related) {
88 const CSeqdesc * obj_desc = dynamic_cast<const CSeqdesc *>((*it).GetPointer());
89 const CSeq_feat * obj_feat = dynamic_cast<const CSeq_feat *>((*it).GetPointer());
90 if (obj_desc) {
91 CSeq_entry_Handle seh = GetSeqEntryForSeqdesc(scope, *obj_desc);
92 CRef<CSeqdesc> new_desc(new CSeqdesc());
93 new_desc->Assign(*obj_desc);
94 CRef<CObject> editable(new_desc.GetPointer());
95 CRef<CApplyObject> apply(new CApplyObject(seh, *it, editable));
96 rval.push_back(apply);
97 } else {
98 CBioseq_Handle bsh = scope->GetBioseqHandle(obj_feat->GetLocation());
99 CRef<CApplyObject> apply(new CApplyObject(bsh, *obj_feat));
100 rval.push_back(apply);
101 }
102 }
103
104 return rval;
105 }
106
107
108 CRef<CFieldHandler>
Create(const string & field_name)109 CFieldHandlerFactory::Create(const string& field_name)
110 {
111 CDBLinkField::EDBLinkFieldType dblink_field = CDBLinkField::GetTypeForLabel(field_name);
112 if (dblink_field != CDBLinkField::eDBLinkFieldType_Unknown) {
113 return CRef<CFieldHandler>(new CDBLinkField(dblink_field));
114 }
115 #if 0
116 if (s_IsSequenceIDField(field_name)) {
117 return CRef<CFieldHandler>(new CSeqIdField());
118 }
119 CPubField::EPubFieldType pub_field = CPubField::GetTypeForLabel(field_name);
120 if (pub_field != CPubField::ePubFieldType_Unknown) {
121 return CRef<CFieldHandler>(new CPubField(pub_field));
122 }
123 CMolInfoField::EMolInfoFieldType molinfo_field = CMolInfoField::GetFieldType(field_name);
124 if (molinfo_field != CMolInfoField::e_Unknown) {
125 return CRef<CFieldHandler>(new CMolInfoField(molinfo_field));
126 }
127
128 if (NStr::EqualNocase(field_name, kGenomeProjectID)) {
129 return CRef<CFieldHandler>(new CGenomeProjectField());
130 }
131 #endif
132 if (CFieldHandler::QualifierNamesAreEquivalent(field_name, kCommentDescriptorLabel)) {
133 return CRef<CFieldHandler>(new CCommentDescField());
134 }
135 if (CFieldHandler::QualifierNamesAreEquivalent(field_name, kDefinitionLineLabel)) {
136 return CRef<CFieldHandler>( new CDefinitionLineField());
137 }
138 CGBBlockField::EGBBlockFieldType gbblock_field = CGBBlockField::GetTypeForLabel(field_name);
139 if (gbblock_field != CGBBlockField::eGBBlockFieldType_Unknown) {
140 return CRef<CFieldHandler>(new CGBBlockField(gbblock_field));
141 }
142
143 // empty
144 CRef<CFieldHandler> empty;
145 return empty;
146 }
147
148
s_IsSequenceIDField(const string & field)149 bool CFieldHandlerFactory::s_IsSequenceIDField(const string& field)
150 {
151 return CFieldHandler::QualifierNamesAreEquivalent(field, kFieldTypeSeqId);
152 }
153
154
DoesObjectMatchFieldConstraint(const CObject & object,const string & field_name,CRef<CStringConstraint> string_constraint,CRef<CScope> scope)155 bool DoesObjectMatchFieldConstraint (const CObject& object, const string& field_name, CRef<CStringConstraint> string_constraint, CRef<CScope> scope)
156 {
157 if (NStr::IsBlank(field_name) || !string_constraint) {
158 return true;
159 }
160
161 CRef<CFieldHandler> fh = CFieldHandlerFactory::Create(field_name);
162 if (!fh) {
163 return false;
164 }
165
166 vector<string> val_list;
167 vector<CConstRef<CObject> > objs = fh->GetRelatedObjects (object, scope);
168 ITERATE(vector<CConstRef<CObject> >, it, objs) {
169 vector<string> add = fh->GetVals(**it);
170 val_list.insert(val_list.end(), add.begin(), add.end());
171 }
172
173 return string_constraint->DoesListMatch(val_list);
174 }
175
176
DoesApplyObjectMatchFieldConstraint(const CApplyObject & object,const string & field_name,CRef<CStringConstraint> string_constraint)177 bool DoesApplyObjectMatchFieldConstraint (const CApplyObject& object, const string& field_name, CRef<CStringConstraint> string_constraint)
178 {
179 if (NStr::IsBlank(field_name) || !string_constraint) {
180 return true;
181 }
182
183 CRef<CFieldHandler> fh = CFieldHandlerFactory::Create(field_name);
184 if (!fh) {
185 return false;
186 }
187
188 vector<string> val_list;
189 vector<CConstRef<CObject> > objs = fh->GetRelatedObjects (object);
190 ITERATE(vector<CConstRef<CObject> >, it, objs) {
191 vector<string> add = fh->GetVals(**it);
192 val_list.insert(val_list.end(), add.begin(), add.end());
193 }
194
195 return string_constraint->DoesListMatch(val_list);
196 }
197
s_GetProtFeatures(CBioseq_Handle p_bsh,CSeqFeatData::ESubtype constraint_type)198 static vector<CConstRef<CSeq_feat> > s_GetProtFeatures(CBioseq_Handle p_bsh, CSeqFeatData::ESubtype constraint_type)
199 {
200 vector<CConstRef<CSeq_feat> > feat_list;
201 if (p_bsh) {
202 CFeat_CI f(p_bsh, constraint_type);
203 while (f) {
204 CConstRef<CSeq_feat> object;
205 object.Reset(f->GetOriginalSeq_feat());
206 feat_list.push_back(object);
207 ++f;
208 }
209 }
210 return feat_list;
211 }
212
213
GetRelatedFeatures(const CSeq_feat & obj_feat,CSeqFeatData::ESubtype constraint_type,CRef<CScope> scope)214 vector<CConstRef<CSeq_feat> > GetRelatedFeatures (const CSeq_feat& obj_feat, CSeqFeatData::ESubtype constraint_type, CRef<CScope> scope)
215 {
216 vector<CConstRef<CSeq_feat> > feat_list;
217
218 CSeqFeatData::ESubtype obj_type = obj_feat.GetData().GetSubtype();
219
220 // is one feature type a protein and the other not?
221 bool obj_is_prot = (CSeqFeatData::GetTypeFromSubtype(obj_type) == CSeqFeatData::e_Prot);
222 bool constraint_is_prot = (CSeqFeatData::GetTypeFromSubtype(constraint_type) == CSeqFeatData::e_Prot);
223 if (obj_is_prot && constraint_is_prot) {
224 // find feature anywhere on protein sequence
225 CBioseq_Handle p_bsh = scope->GetBioseqHandle(obj_feat.GetLocation());
226 feat_list = s_GetProtFeatures(p_bsh, constraint_type);
227 } else if (obj_is_prot && !constraint_is_prot) {
228 // use coding region for starting point of overlap comparison
229 CBioseq_Handle p_bsh = scope->GetBioseqHandle(obj_feat.GetLocation());
230 const CSeq_feat* cds = sequence::GetCDSForProduct(p_bsh);
231 if (cds) {
232 if (CSeqFeatData::GetTypeFromSubtype(constraint_type) == CSeqFeatData::e_Cdregion) {
233 feat_list.push_back(CConstRef<CSeq_feat>(cds));
234 } else {
235 feat_list = GetRelatedFeatures(*cds, constraint_type, scope);
236 }
237 }
238 } else if (!obj_is_prot && constraint_is_prot) {
239 // examine objects on protein sequence
240 // need to find coding region for obj_feat
241 if (obj_type == CSeqFeatData::eSubtype_cdregion) {
242 if (obj_feat.IsSetProduct()) {
243 CBioseq_Handle p_bsh = scope->GetBioseqHandle(obj_feat.GetProduct());
244 feat_list = s_GetProtFeatures(p_bsh, constraint_type);
245 }
246 } else if (obj_type == CSeqFeatData::eSubtype_mRNA) {
247 const CSeq_feat* cds = sequence::GetBestCdsForMrna(obj_feat, *scope);
248 if (cds) {
249 feat_list = GetRelatedFeatures(*cds, constraint_type, scope);
250 }
251 } else if (obj_type == CSeqFeatData::eSubtype_gene) {
252 list<CMappedFeat> cds_feats;
253 feature::GetCdssForGene(scope->GetSeq_featHandle(obj_feat), cds_feats);
254 for (auto& cds_it : cds_feats) {
255 vector<CConstRef<CSeq_feat> > this_list = GetRelatedFeatures(cds_it.GetOriginalFeature(), constraint_type, scope);
256 feat_list.insert(feat_list.end(), this_list.begin(), this_list.end());
257 }
258 }
259 } else {
260 // neither is a protein
261 if (constraint_type == CSeqFeatData::eSubtype_gene) {
262 CConstRef<CSeq_feat> f = sequence::GetOverlappingGene(obj_feat.GetLocation(), *scope);
263 if (f) {
264 feat_list.push_back(f);
265 }
266 } else if (obj_type == CSeqFeatData::eSubtype_gene) {
267 sequence::TFeatScores scores;
268 sequence::GetOverlappingFeatures (obj_feat.GetLocation(),
269 CSeqFeatData::GetTypeFromSubtype(constraint_type),
270 constraint_type,
271 sequence::eOverlap_Contains,
272 scores, *scope);
273 ITERATE (sequence::TFeatScores, it, scores) {
274 feat_list.push_back(it->second);
275 }
276 } else if (obj_type == CSeqFeatData::eSubtype_cdregion
277 && constraint_type == CSeqFeatData::eSubtype_mRNA) {
278 CConstRef<CSeq_feat> f = sequence::GetBestMrnaForCds(obj_feat, *scope);
279 if (f) {
280 feat_list.push_back(f);
281 }
282 } else if (constraint_type == CSeqFeatData::eSubtype_any || constraint_type == obj_type) {
283 CConstRef<CSeq_feat> f(&obj_feat);
284 feat_list.push_back(f);
285 }
286 }
287 return feat_list;
288 }
289
290 END_SCOPE(edit)
291 END_SCOPE(objects)
292 END_NCBI_SCOPE
293
294