1 /*  $Id: source_edit.cpp 632623 2021-06-03 17:38:11Z ivanov $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Colleen Bollin, NCBI
27 *
28 * File Description:
29 *   functions for editing and working with biosources
30 */
31 #include <ncbi_pch.hpp>
32 #include <corelib/ncbistd.hpp>
33 #include <corelib/ncbiobj.hpp>
34 #include <objtools/edit/source_edit.hpp>
35 #include <objects/seqfeat/Org_ref.hpp>
36 #include <objects/seqfeat/OrgName.hpp>
37 #include <objects/seqfeat/OrgMod.hpp>
38 #include <objects/general/Dbtag.hpp>
39 #include <objects/taxon3/Taxon3_request.hpp>
40 #include <objects/taxon3/T3Request.hpp>
41 #include <objects/taxon3/SequenceOfInt.hpp>
42 #include <objects/taxon3/Taxon3_reply.hpp>
43 #include <objects/taxon3/T3Reply.hpp>
44 #include <objects/taxon3/taxon3.hpp>
45 
46 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)47 BEGIN_SCOPE(objects)
48 BEGIN_SCOPE(edit)
49 
50 
51 bool CleanupForTaxnameChange( objects::CBioSource& src )
52 {
53     bool rval = RemoveOldName(src);
54     rval |= RemoveMod(src, COrgMod::eSubtype_type_material);
55     rval |= RemoveTaxId(src);
56     if (src.IsSetOrg() && src.GetOrg().IsSetCommon()) {
57         src.SetOrg().ResetCommon();
58         rval = true;
59     }
60     if (src.IsSetOrg() && src.GetOrg().IsSetSyn()) {
61         src.SetOrg().ResetSyn();
62         rval = true;
63     }
64     return rval;
65 }
66 
RemoveOldName(objects::CBioSource & src)67 bool RemoveOldName( objects::CBioSource& src )
68 {
69     return RemoveMod(src, COrgMod::eSubtype_old_name);
70 }
71 
RemoveMod(objects::CBioSource & src,objects::COrgMod::ESubtype subtype)72 bool RemoveMod( objects::CBioSource& src, objects::COrgMod::ESubtype subtype )
73 {
74     bool erased = false;
75     if (src.IsSetOrg() && src.GetOrg().IsSetOrgname()
76         && src.GetOrg().GetOrgname().IsSetMod()) {
77         COrgName::TMod::iterator it = src.SetOrg().SetOrgname().SetMod().begin();
78         while (it != src.SetOrg().SetOrgname().SetMod().end()) {
79             if ((*it)->GetSubtype() && (*it)->GetSubtype() == subtype) {
80                 it = src.SetOrg().SetOrgname().SetMod().erase(it);
81                 erased = true;
82             } else {
83                 it++;
84             }
85         }
86         if (src.GetOrg().GetOrgname().GetMod().empty()) {
87             src.SetOrg().SetOrgname().ResetMod();
88         }
89     }
90     return erased;
91 }
92 
RemoveTaxId(objects::CBioSource & src)93 bool RemoveTaxId( objects::CBioSource& src )
94 {
95     bool erased = false;
96     if (src.IsSetOrg() && src.GetOrg().IsSetDb()) {
97         COrg_ref::TDb::iterator it = src.SetOrg().SetDb().begin();
98         while (it != src.SetOrg().SetDb().end()) {
99             if ((*it)->IsSetDb() && NStr::EqualNocase((*it)->GetDb(), "taxon")) {
100                 it = src.SetOrg().SetDb().erase(it);
101                 erased = true;
102             } else {
103                 it++;
104             }
105         }
106         if (src.GetOrg().GetDb().empty()) {
107             src.SetOrg().ResetDb();
108         }
109     }
110     return erased;
111 }
112 
113 
s_ProcessReply(const CT3Reply & reply,CRef<COrg_ref> org)114 bool s_ProcessReply(const CT3Reply& reply, CRef<COrg_ref> org)
115 {
116     if (reply.IsData()) {
117         org->Assign(reply.GetData().GetOrg());
118         return true;
119     } else if (reply.IsError() && reply.GetError().IsSetMessage()) {
120         ERR_POST(reply.GetError().GetMessage());
121         return false;
122     } else {
123         ERR_POST("Taxonomy service failure");
124         return false;
125     }
126 }
127 
128 
AddMissingCommonOrgMods(const COrg_ref & o1,const COrg_ref & o2,COrg_ref & common)129 void AddMissingCommonOrgMods(const COrg_ref& o1, const COrg_ref& o2, COrg_ref& common)
130 {
131     if (!o1.IsSetOrgMod() || !o2.IsSetOrgMod()) {
132         return;
133     }
134     ITERATE(COrgName::TMod, it1, o1.GetOrgname().GetMod()) {
135         bool found_in_both = false;
136         ITERATE(COrgName::TMod, it2, o2.GetOrgname().GetMod()) {
137             if ((*it1)->Equals(**it2)) {
138                 found_in_both = true;
139                 break;
140             }
141         }
142         if (found_in_both) {
143             bool already_in_common = false;
144             if (common.IsSetOrgMod()) {
145                 ITERATE(COrgName::TMod, it3, common.GetOrgname().GetMod()) {
146                     if ((*it3)->Equals(**it1)) {
147                         already_in_common = true;
148                         break;
149                     }
150                 }
151             }
152             if (!already_in_common) {
153                 CRef<COrgMod> add(new COrgMod());
154                 add->Assign(**it1);
155                 common.SetOrgname().SetMod().push_back(add);
156             }
157         }
158     }
159 }
160 
161 
MakeCommonBioSource(const objects::CBioSource & src1,const objects::CBioSource & src2)162 CRef<CBioSource> MakeCommonBioSource(const objects::CBioSource& src1, const objects::CBioSource& src2)
163 {
164     CRef<CBioSource> common(NULL);
165 
166     if (!src1.IsSetOrg() || !src2.IsSetOrg()) {
167         return common;
168     }
169 
170     CTaxon3 taxon3;
171     taxon3.Init();
172 
173     // do lookup before attempting to merge
174     vector<CRef<COrg_ref> > rq_list;
175     CRef<COrg_ref> o1(new COrg_ref());
176     o1->Assign(src1.GetOrg());
177     rq_list.push_back(o1);
178     CRef<COrg_ref> o2(new COrg_ref());
179     o2->Assign(src2.GetOrg());
180     rq_list.push_back(o2);
181     CRef<CTaxon3_reply> reply = taxon3.SendOrgRefList(rq_list);
182     if (!reply || reply->GetReply().size() != 2) {
183         ERR_POST("Taxonomy service failure");
184         return CRef<CBioSource>(NULL);
185     }
186     if (!s_ProcessReply(*(reply->GetReply().front()), o1) ||
187         !s_ProcessReply(*(reply->GetReply().back()), o2)) {
188         return common;
189     }
190 
191     TTaxId taxid1 = o1->GetTaxId();
192     TTaxId taxid2 = o2->GetTaxId();
193     if (taxid1 == ZERO_TAX_ID) {
194         ERR_POST("No taxonomy ID for " + o1->GetTaxname());
195         return common;
196     } else if (taxid2 == ZERO_TAX_ID) {
197         ERR_POST("No taxonomy ID for " + o2->GetTaxname());
198         return common;
199     } else if (taxid1 == taxid2) {
200         CRef<CBioSource> tmp1(new CBioSource());
201         tmp1->Assign(src1);
202         tmp1->SetOrg().Assign(*o1);
203         CRef<CBioSource> tmp2(new CBioSource());
204         tmp2->Assign(src2);
205         tmp2->SetOrg().Assign(*o2);
206         common = tmp1->MakeCommon(*tmp2);
207     } else {
208         CRef<CT3Request> rq(new CT3Request());
209         rq->SetJoin().Set().push_back(TAX_ID_TO(int, taxid1));
210         rq->SetJoin().Set().push_back(TAX_ID_TO(int, taxid2));
211         string err_nums = "(" + NStr::NumericToString(taxid1) + "," + NStr::NumericToString(taxid2) + ")";
212         CTaxon3_request request;
213         request.SetRequest().push_back(rq);
214         CRef<CTaxon3_reply> reply = taxon3.SendRequest(request);
215         if (!reply || reply->GetReply().size() != 1) {
216             ERR_POST("Taxonomy service failure" + err_nums);
217             return CRef<CBioSource>(NULL);
218         }
219         const CT3Reply& join_reply = *(reply->GetReply().front());
220         if (join_reply.IsData()) {
221             if (join_reply.GetData().IsSetOrg()) {
222                 if (join_reply.GetData().GetOrg().IsSetTaxname()) {
223                     bool is_species_level = false, force_consult = false, has_nucleomorphs = false;
224                     join_reply.GetData().GetTaxFlags(is_species_level, force_consult, has_nucleomorphs);
225                     if (is_species_level) {
226                         common = src1.MakeCommonExceptOrg(src2);
227                         common->SetOrg().Assign(join_reply.GetData().GetOrg());
228                     } else {
229                         ERR_POST("Taxonomy join reply is not species level" + err_nums);
230                     }
231                 } else {
232                     ERR_POST("Taxonomy join reply Org-ref does not contain taxname" + err_nums);
233                 }
234             } else {
235                 ERR_POST("Taxonomy join reply does not contain Org-ref" + err_nums);
236             }
237         }
238     }
239     return common;
240 }
241 
242 
243 END_SCOPE(edit)
244 END_SCOPE(objects)
245 END_NCBI_SCOPE
246 
247