1 /*  $Id: field_handler.cpp 632623 2021-06-03 17:38:11Z ivanov $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Colleen Bollin, NCBI
27 *
28 * File Description:
29 *   CFieldHandler parent class
30 */
31 #include <ncbi_pch.hpp>
32 #include <corelib/ncbistd.hpp>
33 #include <corelib/ncbiobj.hpp>
34 #include <objects/seqfeat/Seq_feat.hpp>
35 #include <objects/seq/Seq_descr.hpp>
36 #include <objects/general/User_object.hpp>
37 #include <objects/general/Object_id.hpp>
38 #include <objmgr/seq_entry_ci.hpp>
39 #include <objmgr/seq_feat_handle.hpp>
40 #include <objmgr/feat_ci.hpp>
41 #include <objmgr/seq_annot_ci.hpp>
42 #include <objmgr/seq_annot_handle.hpp>
43 #include <objmgr/util/sequence.hpp>
44 #include <objtools/edit/field_handler.hpp>
45 #include <objtools/edit/dblink_field.hpp>
46 #include <objtools/edit/gb_block_field.hpp>
47 
48 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)49 BEGIN_SCOPE(objects)
50 BEGIN_SCOPE(edit)
51 
52 bool CFieldHandler::QualifierNamesAreEquivalent (string name1, string name2)
53 {
54     // ignore protein at beginning
55     const string protein("protein");
56     if (NStr::StartsWith(name1, protein)) {
57         name1 = name1.substr(protein.length());
58     }
59     if (NStr::StartsWith(name2, protein)) {
60         name2 = name2.substr(protein.length());
61     }
62 
63     // spaces, dashes, and underscores do not count
64     NStr::ReplaceInPlace (name1, " ", "");
65     NStr::ReplaceInPlace (name1, "_", "");
66     NStr::ReplaceInPlace (name1, "-", "");
67     NStr::ReplaceInPlace (name2, " ", "");
68     NStr::ReplaceInPlace (name2, "_", "");
69     NStr::ReplaceInPlace (name2, "-", "");
70 
71     return NStr::EqualNocase(name1, name2);
72 }
73 
74 
GetRelatedApplyObjects(const CObject & object,CRef<CScope> scope)75 vector<CRef<CApplyObject> > CFieldHandler::GetRelatedApplyObjects(const CObject& object, CRef<CScope> scope)
76 {
77     vector<CRef<CApplyObject> > related = GetApplyObjectsFromRelatedObjects(GetRelatedObjects(object, scope), scope);
78 
79     return related;
80 }
81 
82 
GetApplyObjectsFromRelatedObjects(vector<CConstRef<CObject>> related,CRef<CScope> scope)83 vector<CRef<CApplyObject> > CFieldHandler::GetApplyObjectsFromRelatedObjects(vector<CConstRef<CObject> > related, CRef<CScope> scope)
84 {
85     vector<CRef<CApplyObject> > rval;
86 
87     ITERATE(vector<CConstRef<CObject> >, it, related) {
88         const CSeqdesc * obj_desc = dynamic_cast<const CSeqdesc *>((*it).GetPointer());
89         const CSeq_feat * obj_feat = dynamic_cast<const CSeq_feat *>((*it).GetPointer());
90         if (obj_desc) {
91             CSeq_entry_Handle seh = GetSeqEntryForSeqdesc(scope, *obj_desc);
92             CRef<CSeqdesc> new_desc(new CSeqdesc());
93             new_desc->Assign(*obj_desc);
94             CRef<CObject> editable(new_desc.GetPointer());
95             CRef<CApplyObject> apply(new CApplyObject(seh, *it, editable));
96             rval.push_back(apply);
97         } else {
98             CBioseq_Handle bsh = scope->GetBioseqHandle(obj_feat->GetLocation());
99             CRef<CApplyObject> apply(new CApplyObject(bsh, *obj_feat));
100             rval.push_back(apply);
101         }
102     }
103 
104     return rval;
105 }
106 
107 
108 CRef<CFieldHandler>
Create(const string & field_name)109 CFieldHandlerFactory::Create(const string& field_name)
110 {
111     CDBLinkField::EDBLinkFieldType dblink_field = CDBLinkField::GetTypeForLabel(field_name);
112     if (dblink_field != CDBLinkField::eDBLinkFieldType_Unknown) {
113         return CRef<CFieldHandler>(new CDBLinkField(dblink_field));
114     }
115 #if 0
116     if (s_IsSequenceIDField(field_name)) {
117         return CRef<CFieldHandler>(new CSeqIdField());
118     }
119     CPubField::EPubFieldType pub_field = CPubField::GetTypeForLabel(field_name);
120     if (pub_field != CPubField::ePubFieldType_Unknown) {
121         return CRef<CFieldHandler>(new CPubField(pub_field));
122     }
123     CMolInfoField::EMolInfoFieldType molinfo_field = CMolInfoField::GetFieldType(field_name);
124     if (molinfo_field != CMolInfoField::e_Unknown) {
125         return CRef<CFieldHandler>(new CMolInfoField(molinfo_field));
126     }
127 
128     if (NStr::EqualNocase(field_name, kGenomeProjectID)) {
129         return CRef<CFieldHandler>(new CGenomeProjectField());
130     }
131 #endif
132     if (CFieldHandler::QualifierNamesAreEquivalent(field_name, kCommentDescriptorLabel)) {
133         return CRef<CFieldHandler>(new CCommentDescField());
134     }
135     if (CFieldHandler::QualifierNamesAreEquivalent(field_name, kDefinitionLineLabel)) {
136         return CRef<CFieldHandler>( new CDefinitionLineField());
137     }
138     CGBBlockField::EGBBlockFieldType gbblock_field = CGBBlockField::GetTypeForLabel(field_name);
139     if (gbblock_field != CGBBlockField::eGBBlockFieldType_Unknown) {
140         return CRef<CFieldHandler>(new CGBBlockField(gbblock_field));
141     }
142 
143     // empty
144     CRef<CFieldHandler> empty;
145     return empty;
146 }
147 
148 
s_IsSequenceIDField(const string & field)149 bool CFieldHandlerFactory::s_IsSequenceIDField(const string& field)
150 {
151     return CFieldHandler::QualifierNamesAreEquivalent(field, kFieldTypeSeqId);
152 }
153 
154 
DoesObjectMatchFieldConstraint(const CObject & object,const string & field_name,CRef<CStringConstraint> string_constraint,CRef<CScope> scope)155 bool DoesObjectMatchFieldConstraint (const CObject& object, const string& field_name, CRef<CStringConstraint> string_constraint, CRef<CScope> scope)
156 {
157     if (NStr::IsBlank(field_name) || !string_constraint) {
158         return true;
159     }
160 
161     CRef<CFieldHandler> fh = CFieldHandlerFactory::Create(field_name);
162     if (!fh) {
163         return false;
164     }
165 
166     vector<string> val_list;
167     vector<CConstRef<CObject> > objs = fh->GetRelatedObjects (object, scope);
168     ITERATE(vector<CConstRef<CObject> >, it, objs) {
169         vector<string> add = fh->GetVals(**it);
170         val_list.insert(val_list.end(), add.begin(), add.end());
171     }
172 
173     return string_constraint->DoesListMatch(val_list);
174 }
175 
176 
DoesApplyObjectMatchFieldConstraint(const CApplyObject & object,const string & field_name,CRef<CStringConstraint> string_constraint)177 bool DoesApplyObjectMatchFieldConstraint (const CApplyObject& object, const string& field_name, CRef<CStringConstraint> string_constraint)
178 {
179     if (NStr::IsBlank(field_name) || !string_constraint) {
180         return true;
181     }
182 
183     CRef<CFieldHandler> fh = CFieldHandlerFactory::Create(field_name);
184     if (!fh) {
185         return false;
186     }
187 
188     vector<string> val_list;
189     vector<CConstRef<CObject> > objs = fh->GetRelatedObjects (object);
190     ITERATE(vector<CConstRef<CObject> >, it, objs) {
191         vector<string> add = fh->GetVals(**it);
192         val_list.insert(val_list.end(), add.begin(), add.end());
193     }
194 
195     return string_constraint->DoesListMatch(val_list);
196 }
197 
s_GetProtFeatures(CBioseq_Handle p_bsh,CSeqFeatData::ESubtype constraint_type)198 static vector<CConstRef<CSeq_feat> > s_GetProtFeatures(CBioseq_Handle p_bsh, CSeqFeatData::ESubtype constraint_type)
199 {
200     vector<CConstRef<CSeq_feat> > feat_list;
201     if (p_bsh) {
202         CFeat_CI f(p_bsh, constraint_type);
203         while (f) {
204             CConstRef<CSeq_feat> object;
205             object.Reset(f->GetOriginalSeq_feat());
206             feat_list.push_back(object);
207             ++f;
208         }
209     }
210     return feat_list;
211 }
212 
213 
GetRelatedFeatures(const CSeq_feat & obj_feat,CSeqFeatData::ESubtype constraint_type,CRef<CScope> scope)214 vector<CConstRef<CSeq_feat> > GetRelatedFeatures (const CSeq_feat& obj_feat, CSeqFeatData::ESubtype constraint_type, CRef<CScope> scope)
215 {
216     vector<CConstRef<CSeq_feat> > feat_list;
217 
218     CSeqFeatData::ESubtype obj_type = obj_feat.GetData().GetSubtype();
219 
220     // is one feature type a protein and the other not?
221     bool obj_is_prot = (CSeqFeatData::GetTypeFromSubtype(obj_type) == CSeqFeatData::e_Prot);
222     bool constraint_is_prot = (CSeqFeatData::GetTypeFromSubtype(constraint_type) == CSeqFeatData::e_Prot);
223     if (obj_is_prot && constraint_is_prot) {
224         // find feature anywhere on protein sequence
225         CBioseq_Handle p_bsh = scope->GetBioseqHandle(obj_feat.GetLocation());
226         feat_list = s_GetProtFeatures(p_bsh, constraint_type);
227     } else if (obj_is_prot && !constraint_is_prot) {
228         // use coding region for starting point of overlap comparison
229         CBioseq_Handle p_bsh = scope->GetBioseqHandle(obj_feat.GetLocation());
230         const CSeq_feat* cds = sequence::GetCDSForProduct(p_bsh);
231         if (cds) {
232             if (CSeqFeatData::GetTypeFromSubtype(constraint_type) == CSeqFeatData::e_Cdregion) {
233                 feat_list.push_back(CConstRef<CSeq_feat>(cds));
234             } else {
235                 feat_list = GetRelatedFeatures(*cds, constraint_type, scope);
236             }
237         }
238     } else if (!obj_is_prot && constraint_is_prot) {
239         // examine objects on protein sequence
240         // need to find coding region for obj_feat
241         if (obj_type == CSeqFeatData::eSubtype_cdregion) {
242             if (obj_feat.IsSetProduct()) {
243                 CBioseq_Handle p_bsh = scope->GetBioseqHandle(obj_feat.GetProduct());
244                 feat_list = s_GetProtFeatures(p_bsh, constraint_type);
245             }
246         } else if (obj_type == CSeqFeatData::eSubtype_mRNA) {
247             const CSeq_feat* cds = sequence::GetBestCdsForMrna(obj_feat, *scope);
248             if (cds) {
249                 feat_list = GetRelatedFeatures(*cds, constraint_type, scope);
250             }
251         } else if (obj_type == CSeqFeatData::eSubtype_gene) {
252             list<CMappedFeat> cds_feats;
253             feature::GetCdssForGene(scope->GetSeq_featHandle(obj_feat), cds_feats);
254             for (auto& cds_it : cds_feats) {
255                 vector<CConstRef<CSeq_feat> > this_list = GetRelatedFeatures(cds_it.GetOriginalFeature(), constraint_type, scope);
256                 feat_list.insert(feat_list.end(), this_list.begin(), this_list.end());
257             }
258         }
259     } else {
260         // neither is a protein
261         if (constraint_type == CSeqFeatData::eSubtype_gene) {
262             CConstRef<CSeq_feat> f = sequence::GetOverlappingGene(obj_feat.GetLocation(), *scope);
263             if (f) {
264                 feat_list.push_back(f);
265             }
266         } else if (obj_type == CSeqFeatData::eSubtype_gene) {
267             sequence::TFeatScores scores;
268             sequence::GetOverlappingFeatures (obj_feat.GetLocation(),
269                                               CSeqFeatData::GetTypeFromSubtype(constraint_type),
270                                               constraint_type,
271                                               sequence::eOverlap_Contains,
272                                               scores, *scope);
273             ITERATE (sequence::TFeatScores, it, scores) {
274                 feat_list.push_back(it->second);
275             }
276         } else if (obj_type == CSeqFeatData::eSubtype_cdregion
277             && constraint_type == CSeqFeatData::eSubtype_mRNA) {
278             CConstRef<CSeq_feat> f = sequence::GetBestMrnaForCds(obj_feat, *scope);
279             if (f) {
280                 feat_list.push_back(f);
281             }
282         } else if (constraint_type == CSeqFeatData::eSubtype_any || constraint_type == obj_type) {
283             CConstRef<CSeq_feat> f(&obj_feat);
284             feat_list.push_back(f);
285         }
286     }
287     return feat_list;
288 }
289 
290 END_SCOPE(edit)
291 END_SCOPE(objects)
292 END_NCBI_SCOPE
293 
294