1 /* $Id: source_edit.cpp 632623 2021-06-03 17:38:11Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Colleen Bollin, NCBI
27 *
28 * File Description:
29 * functions for editing and working with biosources
30 */
31 #include <ncbi_pch.hpp>
32 #include <corelib/ncbistd.hpp>
33 #include <corelib/ncbiobj.hpp>
34 #include <objtools/edit/source_edit.hpp>
35 #include <objects/seqfeat/Org_ref.hpp>
36 #include <objects/seqfeat/OrgName.hpp>
37 #include <objects/seqfeat/OrgMod.hpp>
38 #include <objects/general/Dbtag.hpp>
39 #include <objects/taxon3/Taxon3_request.hpp>
40 #include <objects/taxon3/T3Request.hpp>
41 #include <objects/taxon3/SequenceOfInt.hpp>
42 #include <objects/taxon3/Taxon3_reply.hpp>
43 #include <objects/taxon3/T3Reply.hpp>
44 #include <objects/taxon3/taxon3.hpp>
45
46 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)47 BEGIN_SCOPE(objects)
48 BEGIN_SCOPE(edit)
49
50
51 bool CleanupForTaxnameChange( objects::CBioSource& src )
52 {
53 bool rval = RemoveOldName(src);
54 rval |= RemoveMod(src, COrgMod::eSubtype_type_material);
55 rval |= RemoveTaxId(src);
56 if (src.IsSetOrg() && src.GetOrg().IsSetCommon()) {
57 src.SetOrg().ResetCommon();
58 rval = true;
59 }
60 if (src.IsSetOrg() && src.GetOrg().IsSetSyn()) {
61 src.SetOrg().ResetSyn();
62 rval = true;
63 }
64 return rval;
65 }
66
RemoveOldName(objects::CBioSource & src)67 bool RemoveOldName( objects::CBioSource& src )
68 {
69 return RemoveMod(src, COrgMod::eSubtype_old_name);
70 }
71
RemoveMod(objects::CBioSource & src,objects::COrgMod::ESubtype subtype)72 bool RemoveMod( objects::CBioSource& src, objects::COrgMod::ESubtype subtype )
73 {
74 bool erased = false;
75 if (src.IsSetOrg() && src.GetOrg().IsSetOrgname()
76 && src.GetOrg().GetOrgname().IsSetMod()) {
77 COrgName::TMod::iterator it = src.SetOrg().SetOrgname().SetMod().begin();
78 while (it != src.SetOrg().SetOrgname().SetMod().end()) {
79 if ((*it)->GetSubtype() && (*it)->GetSubtype() == subtype) {
80 it = src.SetOrg().SetOrgname().SetMod().erase(it);
81 erased = true;
82 } else {
83 it++;
84 }
85 }
86 if (src.GetOrg().GetOrgname().GetMod().empty()) {
87 src.SetOrg().SetOrgname().ResetMod();
88 }
89 }
90 return erased;
91 }
92
RemoveTaxId(objects::CBioSource & src)93 bool RemoveTaxId( objects::CBioSource& src )
94 {
95 bool erased = false;
96 if (src.IsSetOrg() && src.GetOrg().IsSetDb()) {
97 COrg_ref::TDb::iterator it = src.SetOrg().SetDb().begin();
98 while (it != src.SetOrg().SetDb().end()) {
99 if ((*it)->IsSetDb() && NStr::EqualNocase((*it)->GetDb(), "taxon")) {
100 it = src.SetOrg().SetDb().erase(it);
101 erased = true;
102 } else {
103 it++;
104 }
105 }
106 if (src.GetOrg().GetDb().empty()) {
107 src.SetOrg().ResetDb();
108 }
109 }
110 return erased;
111 }
112
113
s_ProcessReply(const CT3Reply & reply,CRef<COrg_ref> org)114 bool s_ProcessReply(const CT3Reply& reply, CRef<COrg_ref> org)
115 {
116 if (reply.IsData()) {
117 org->Assign(reply.GetData().GetOrg());
118 return true;
119 } else if (reply.IsError() && reply.GetError().IsSetMessage()) {
120 ERR_POST(reply.GetError().GetMessage());
121 return false;
122 } else {
123 ERR_POST("Taxonomy service failure");
124 return false;
125 }
126 }
127
128
AddMissingCommonOrgMods(const COrg_ref & o1,const COrg_ref & o2,COrg_ref & common)129 void AddMissingCommonOrgMods(const COrg_ref& o1, const COrg_ref& o2, COrg_ref& common)
130 {
131 if (!o1.IsSetOrgMod() || !o2.IsSetOrgMod()) {
132 return;
133 }
134 ITERATE(COrgName::TMod, it1, o1.GetOrgname().GetMod()) {
135 bool found_in_both = false;
136 ITERATE(COrgName::TMod, it2, o2.GetOrgname().GetMod()) {
137 if ((*it1)->Equals(**it2)) {
138 found_in_both = true;
139 break;
140 }
141 }
142 if (found_in_both) {
143 bool already_in_common = false;
144 if (common.IsSetOrgMod()) {
145 ITERATE(COrgName::TMod, it3, common.GetOrgname().GetMod()) {
146 if ((*it3)->Equals(**it1)) {
147 already_in_common = true;
148 break;
149 }
150 }
151 }
152 if (!already_in_common) {
153 CRef<COrgMod> add(new COrgMod());
154 add->Assign(**it1);
155 common.SetOrgname().SetMod().push_back(add);
156 }
157 }
158 }
159 }
160
161
MakeCommonBioSource(const objects::CBioSource & src1,const objects::CBioSource & src2)162 CRef<CBioSource> MakeCommonBioSource(const objects::CBioSource& src1, const objects::CBioSource& src2)
163 {
164 CRef<CBioSource> common(NULL);
165
166 if (!src1.IsSetOrg() || !src2.IsSetOrg()) {
167 return common;
168 }
169
170 CTaxon3 taxon3;
171 taxon3.Init();
172
173 // do lookup before attempting to merge
174 vector<CRef<COrg_ref> > rq_list;
175 CRef<COrg_ref> o1(new COrg_ref());
176 o1->Assign(src1.GetOrg());
177 rq_list.push_back(o1);
178 CRef<COrg_ref> o2(new COrg_ref());
179 o2->Assign(src2.GetOrg());
180 rq_list.push_back(o2);
181 CRef<CTaxon3_reply> reply = taxon3.SendOrgRefList(rq_list);
182 if (!reply || reply->GetReply().size() != 2) {
183 ERR_POST("Taxonomy service failure");
184 return CRef<CBioSource>(NULL);
185 }
186 if (!s_ProcessReply(*(reply->GetReply().front()), o1) ||
187 !s_ProcessReply(*(reply->GetReply().back()), o2)) {
188 return common;
189 }
190
191 TTaxId taxid1 = o1->GetTaxId();
192 TTaxId taxid2 = o2->GetTaxId();
193 if (taxid1 == ZERO_TAX_ID) {
194 ERR_POST("No taxonomy ID for " + o1->GetTaxname());
195 return common;
196 } else if (taxid2 == ZERO_TAX_ID) {
197 ERR_POST("No taxonomy ID for " + o2->GetTaxname());
198 return common;
199 } else if (taxid1 == taxid2) {
200 CRef<CBioSource> tmp1(new CBioSource());
201 tmp1->Assign(src1);
202 tmp1->SetOrg().Assign(*o1);
203 CRef<CBioSource> tmp2(new CBioSource());
204 tmp2->Assign(src2);
205 tmp2->SetOrg().Assign(*o2);
206 common = tmp1->MakeCommon(*tmp2);
207 } else {
208 CRef<CT3Request> rq(new CT3Request());
209 rq->SetJoin().Set().push_back(TAX_ID_TO(int, taxid1));
210 rq->SetJoin().Set().push_back(TAX_ID_TO(int, taxid2));
211 string err_nums = "(" + NStr::NumericToString(taxid1) + "," + NStr::NumericToString(taxid2) + ")";
212 CTaxon3_request request;
213 request.SetRequest().push_back(rq);
214 CRef<CTaxon3_reply> reply = taxon3.SendRequest(request);
215 if (!reply || reply->GetReply().size() != 1) {
216 ERR_POST("Taxonomy service failure" + err_nums);
217 return CRef<CBioSource>(NULL);
218 }
219 const CT3Reply& join_reply = *(reply->GetReply().front());
220 if (join_reply.IsData()) {
221 if (join_reply.GetData().IsSetOrg()) {
222 if (join_reply.GetData().GetOrg().IsSetTaxname()) {
223 bool is_species_level = false, force_consult = false, has_nucleomorphs = false;
224 join_reply.GetData().GetTaxFlags(is_species_level, force_consult, has_nucleomorphs);
225 if (is_species_level) {
226 common = src1.MakeCommonExceptOrg(src2);
227 common->SetOrg().Assign(join_reply.GetData().GetOrg());
228 } else {
229 ERR_POST("Taxonomy join reply is not species level" + err_nums);
230 }
231 } else {
232 ERR_POST("Taxonomy join reply Org-ref does not contain taxname" + err_nums);
233 }
234 } else {
235 ERR_POST("Taxonomy join reply does not contain Org-ref" + err_nums);
236 }
237 }
238 }
239 return common;
240 }
241
242
243 END_SCOPE(edit)
244 END_SCOPE(objects)
245 END_NCBI_SCOPE
246
247