1 /* $Id: Org_ref.cpp 609820 2020-06-08 15:51:58Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: NCBI Staff
27 *
28 * File Description:
29 * Convenience methods for COrg_ref object
30 *
31 * Remark:
32 * This code was originally generated by application DATATOOL
33 * using specifications from the ASN data definition file
34 * 'seqfeat.asn'.
35 */
36
37 // standard includes
38
39 // generated includes
40 #include <ncbi_pch.hpp>
41 #include <objects/seqfeat/Org_ref.hpp>
42 #include <objects/general/Object_id.hpp>
43 #include <objects/general/Dbtag.hpp>
44
45 // generated classes
46
47 BEGIN_NCBI_SCOPE
48
49 BEGIN_objects_SCOPE // namespace ncbi::objects::
50
51 // destructor
~COrg_ref(void)52 COrg_ref::~COrg_ref(void)
53 {
54 }
55
56 // Appends a label to "label" based on content
GetLabel(string * label) const57 void COrg_ref::GetLabel(string* label) const
58 {
59 if (IsSetTaxname()) {
60 *label += GetTaxname();
61 } else if (IsSetCommon()) {
62 *label += GetCommon();
63 } else if (IsSetDb()) {
64 GetDb().front()->GetLabel(label);
65 }
66 }
67
68 static const char* const s_taxonName = "taxon" ;
69 static const string s_nomenclature = "nomenclature=";
70
71 TTaxId
GetTaxId() const72 COrg_ref::GetTaxId() const
73 {
74 if( ! IsSetDb() ) {
75 return ZERO_TAX_ID;
76 }
77 const TDb& lDbTags = GetDb();
78
79 for(TDb::const_iterator i = lDbTags.begin();
80 i != lDbTags.end();
81 ++i) {
82 if( i->GetPointer()
83 && i->GetObject().GetDb().compare(s_taxonName) == 0 ) {
84 const CObject_id& id = i->GetObject().GetTag();
85 if( id.IsId() )
86 return TAX_ID_FROM(CObject_id::TId, id.GetId());
87 }
88 }
89 return ZERO_TAX_ID;
90 }
91
92 TTaxId
SetTaxId(TTaxId tax_id)93 COrg_ref::SetTaxId( TTaxId tax_id )
94 {
95 TTaxId old_id = ZERO_TAX_ID;
96
97 TDb& lDbTags = SetDb();
98 // Try to update existing tax id first
99 for(TDb::iterator i = lDbTags.begin();
100 i != lDbTags.end();
101 ++i) {
102 if( *i && i->GetObject().GetDb() == s_taxonName ) {
103 CObject_id& id = i->GetObject().SetTag();
104 if( id.IsId() )
105 old_id = TAX_ID_FROM(CObject_id::TId, id.GetId());
106 id.SetId() = TAX_ID_TO(CObject_id::TId, tax_id);
107 return old_id;
108 }
109 }
110 // Add new tag
111 CRef< CDbtag > ref( new CDbtag() );
112 ref->SetDb( s_taxonName );
113 ref->SetTag().SetId(TAX_ID_TO(CObject_id::TId, tax_id) );
114 SetDb().push_back( ref );
115
116 return old_id;
117 }
118
IsSetLineage(void) const119 bool COrg_ref::IsSetLineage(void) const
120 {
121 return IsSetOrgname () && GetOrgname ().IsSetLineage ();
122 }
123
GetLineage(void) const124 const string& COrg_ref::GetLineage(void) const
125 {
126 return GetOrgname ().GetLineage ();
127 }
128
IsSetGcode(void) const129 bool COrg_ref::IsSetGcode(void) const
130 {
131 return IsSetOrgname () && GetOrgname ().IsSetGcode ();
132 }
133
GetGcode(void) const134 int COrg_ref::GetGcode(void) const
135 {
136 return GetOrgname ().GetGcode ();
137 }
138
IsSetMgcode(void) const139 bool COrg_ref::IsSetMgcode(void) const
140 {
141 return IsSetOrgname () && GetOrgname ().IsSetMgcode ();
142 }
143
GetMgcode(void) const144 int COrg_ref::GetMgcode(void) const
145 {
146 return GetOrgname ().GetMgcode ();
147 }
148
IsSetPgcode(void) const149 bool COrg_ref::IsSetPgcode(void) const
150 {
151 return IsSetOrgname () && GetOrgname ().IsSetPgcode ();
152 }
153
GetPgcode(void) const154 int COrg_ref::GetPgcode(void) const
155 {
156 return GetOrgname ().GetPgcode ();
157 }
158
IsSetDivision(void) const159 bool COrg_ref::IsSetDivision(void) const
160 {
161 return IsSetOrgname () && GetOrgname ().IsSetDiv ();
162 }
163
GetDivision(void) const164 const string& COrg_ref::GetDivision(void) const
165 {
166 return GetOrgname ().GetDiv ();
167 }
168
IsSetOrgMod(void) const169 bool COrg_ref::IsSetOrgMod(void) const
170 {
171 return IsSetOrgname () && GetOrgname ().IsSetMod ();
172 }
173
174
x_GetTaxnameAfterFirstTwoWords() const175 string COrg_ref::x_GetTaxnameAfterFirstTwoWords() const
176 {
177 string taxname = "";
178 if (IsSetTaxname()) {
179 taxname = GetTaxname();
180 }
181 // Look for modifiers in taxname after first two words
182 size_t pos = NStr::Find (taxname, " ");
183 if (pos == string::npos) {
184 taxname = "";
185 } else {
186 taxname = taxname.substr(pos + 1);
187 NStr::TruncateSpacesInPlace(taxname);
188 pos = NStr::Find (taxname, " ");
189 if (pos == string::npos) {
190 taxname = "";
191 } else {
192 taxname = taxname.substr(pos + 1);
193 NStr::TruncateSpacesInPlace(taxname);
194 }
195 }
196 return taxname;
197 }
198
199
s_FindWholeWord(string taxname,string value)200 bool s_FindWholeWord (string taxname, string value)
201 {
202 if (NStr::IsBlank(taxname) || NStr::IsBlank(value)) {
203 return false;
204 }
205 size_t pos = NStr::Find (taxname, value);
206 size_t value_len = value.length();
207 while (pos != string::npos
208 && ( ( (pos != 0 && isalpha (taxname.c_str()[pos - 1]))
209 || isalpha (taxname.c_str()[pos + value_len])))) {
210 pos = NStr::Find(taxname, value, pos + value_len);
211 }
212 if (pos == string::npos) {
213 return false;
214 } else {
215 return true;
216 }
217 }
218
219
IsVarietyValid(const string & variety) const220 bool COrg_ref::IsVarietyValid(const string& variety) const
221 {
222 if (NStr::IsBlank(variety)) {
223 return true;
224 }
225 string taxname = x_GetTaxnameAfterFirstTwoWords();
226 return s_FindWholeWord(taxname, variety);
227 }
228
229
HasValidVariety() const230 bool COrg_ref::HasValidVariety() const
231 {
232 if (!IsSetOrgname() || !GetOrgname().IsSetMod()) {
233 return false;
234 }
235 ITERATE(COrgName::TMod, it, GetOrgname().GetMod()) {
236 if ((*it)->IsSetSubtype() && (*it)->GetSubtype() == COrgMod::eSubtype_variety
237 && (*it)->IsSetSubname() && !NStr::IsBlank((*it)->GetSubname())
238 && IsVarietyValid((*it)->GetSubname())) {
239 return true;
240 }
241 }
242 return false;
243 }
244
245
IsSubspeciesValid(const string & subspecies) const246 bool COrg_ref::IsSubspeciesValid(const string& subspecies) const
247 {
248 if (NStr::IsBlank(subspecies)) {
249 return true;
250 }
251 string taxname = x_GetTaxnameAfterFirstTwoWords();
252 if (s_FindWholeWord(taxname, subspecies)) {
253 return true;
254 } else {
255 return HasValidVariety();
256 }
257
258 }
259
260
261 #define MAKE_COMMON(o1,o2,o3,Field) if (o1.IsSet##Field() && o2.IsSet##Field() && NStr::Equal(o1.Get##Field(), o2.Get##Field())) o3.Set##Field(o1.Get##Field());
262
s_MakeCommonStringList(const list<string> & list1,const list<string> & list2,list<string> & list3)263 void s_MakeCommonStringList(const list< string >& list1, const list< string >& list2, list< string >& list3)
264 {
265 ITERATE(list< string >, it1, list1) {
266 bool found = false;
267 ITERATE(list< string >, it2, list2) {
268 if (NStr::Equal(*it1, *it2)) {
269 found = true;
270 break;
271 }
272 }
273 if (found) {
274 list3.push_back(*it1);
275 }
276 }
277 }
278
279
MakeCommon(const COrg_ref & other) const280 CRef<COrg_ref> COrg_ref::MakeCommon(const COrg_ref& other) const
281 {
282 TTaxId taxid1 = GetTaxId();
283 TTaxId taxid2 = other.GetTaxId();
284 if (taxid1 != taxid2) {
285 return CRef<COrg_ref>(NULL);
286 }
287
288 CRef<COrg_ref> common(new COrg_ref());
289 if (Equals(other)) {
290 common->Assign(*this);
291 } else {
292 MAKE_COMMON((*this), other, (*common), Taxname);
293 MAKE_COMMON((*this), other, (*common), Common);
294
295 // common mods
296 if (IsSetMod() && other.IsSetMod()) {
297 s_MakeCommonStringList(GetMod(), other.GetMod(), common->SetMod());
298 if (common->GetMod().empty()) {
299 common->ResetMod();
300 }
301 }
302
303 // common synonyms
304 if (IsSetSyn() && other.IsSetSyn()) {
305 s_MakeCommonStringList(GetSyn(), other.GetSyn(), common->SetSyn());
306 if (common->GetSyn().empty()) {
307 common->ResetSyn();
308 }
309 }
310
311 // common dbtags
312 if (IsSetDb() && other.IsSetDb()) {
313 ITERATE(TDb, it1, GetDb()) {
314 bool found = false;
315 ITERATE(TDb, it2, GetDb()) {
316 if ((*it1)->Equals(**it2)) {
317 found = true;
318 break;
319 }
320 }
321 if (found) {
322 CRef<CDbtag> add(new CDbtag());
323 add->Assign(**it1);
324 common->SetDb().push_back(add);
325 }
326 }
327 }
328
329 // common orgname
330 if (IsSetOrgname() && other.IsSetOrgname()) {
331 CRef<COrgName> orgname = GetOrgname().MakeCommon(other.GetOrgname());
332 if (orgname) {
333 common->SetOrgname().Assign(*orgname);
334 }
335 }
336 }
337
338 return common;
339 }
340
341
342 typedef map<string, CRef<COrg_ref>, PNocase> TOrgrefMap;
343 static TOrgrefMap s_OrgRefMap;
344 static vector<string> s_CommonTaxnameList;
345 static bool s_OrgRefMapInitialized = false;
346 DEFINE_STATIC_FAST_MUTEX(s_OrgRefMapMutex);
347
348 // automatically generated include file
349 #include "common_tax.inc"
350
s_ProcessOrgRefMapLine(const CTempString & line)351 static void s_ProcessOrgRefMapLine(const CTempString& line)
352 {
353 vector<string> tokens;
354 NStr::Split(line, "\t", tokens);
355 if (tokens.size() != 8) {
356 // ERR_POST_X(1, Warning << "Bad format in common_tax.txt entry " << line
357 // << "; disregarding");
358 } else {
359 NON_CONST_ITERATE(vector<string>, t, tokens) {
360 NStr::TruncateSpacesInPlace(*t);
361 if (NStr::Equal(*t, "-")) {
362 *t = kEmptyStr;
363 }
364 }
365
366 s_CommonTaxnameList.push_back(tokens[0]);
367 CRef<COrg_ref> org(new COrg_ref());
368 org->SetTaxname(tokens[0]);
369 if (!NStr::IsBlank(tokens[1])) {
370 org->SetCommon(tokens[1]);
371 }
372
373 if (!NStr::IsBlank(tokens[2])) {
374 try {
375 org->SetOrgname().SetGcode(NStr::StringToNonNegativeInt(tokens[2]));
376 } catch (CException& ex) {
377 }
378 }
379 if (!NStr::IsBlank(tokens[3])) {
380 try {
381 org->SetOrgname().SetMgcode(NStr::StringToNonNegativeInt(tokens[3]));
382 } catch (CException& ex) {
383 }
384 }
385 if (!NStr::IsBlank(tokens[4])) {
386 try {
387 org->SetOrgname().SetPgcode(NStr::StringToNonNegativeInt(tokens[4]));
388 } catch (CException& ex) {
389 }
390 }
391
392 if (!NStr::IsBlank(tokens[5])) {
393 try {
394 CRef<CDbtag>taxon(new CDbtag());
395 taxon->SetDb("taxon");
396 taxon->SetTag().SetId(NStr::StringToNonNegativeInt(tokens[5]));
397 org->SetDb().push_back(taxon);
398 } catch (CException& ex) {
399 }
400 }
401
402 if (!NStr::IsBlank(tokens[6])) {
403 org->SetOrgname().SetDiv(tokens[6]);
404 }
405
406 if (!NStr::IsBlank(tokens[7])) {
407 org->SetOrgname().SetLineage(tokens[7]);
408 }
409
410 s_OrgRefMap[tokens[0]] = org;
411 }
412 }
413
414
s_InitializeOrgRefMap(void)415 static void s_InitializeOrgRefMap(void)
416 {
417 CFastMutexGuard GUARD(s_OrgRefMapMutex);
418 if (s_OrgRefMapInitialized) {
419 return;
420 }
421 string file = g_FindDataFile("common_tax.txt");
422 CRef<ILineReader> lr;
423 if (!file.empty()) {
424 LOG_POST("Reading from " + file + " for popular organisms.");
425 try {
426 lr = ILineReader::New(file);
427 } NCBI_CATCH("s_InitializeOrgRefMap")
428 } else {
429 LOG_POST("Falling back on built-in data for popular organisms.");
430 }
431
432 if (lr.Empty()) {
433 size_t num_orgrefs = sizeof(kOrgRefList) / sizeof(char *);
434 for (size_t i = 0; i < num_orgrefs; i++) {
435 const char *p = kOrgRefList[i];
436 s_ProcessOrgRefMapLine(p);
437 }
438 } else {
439 do {
440 s_ProcessOrgRefMapLine(*++*lr);
441 } while (!lr->AtEOF());
442 }
443
444 s_OrgRefMapInitialized = true;
445 }
446
447
TableLookup(const string & taxname)448 CConstRef<COrg_ref> COrg_ref::TableLookup(const string& taxname)
449 {
450 s_InitializeOrgRefMap();
451 TOrgrefMap::iterator it = s_OrgRefMap.find(taxname);
452 if (it != s_OrgRefMap.end()) {
453 return CConstRef<COrg_ref>(it->second.GetPointer());
454 }
455 return CConstRef<COrg_ref>(NULL);
456 }
457
458
UpdateFromTable()459 bool COrg_ref::UpdateFromTable()
460 {
461 if (!IsSetTaxname() || NStr::IsBlank(GetTaxname())) {
462 return false;
463 }
464 CConstRef<COrg_ref> lookup = TableLookup(GetTaxname());
465 if (lookup) {
466 if (lookup->IsSetCommon() && !NStr::IsBlank(lookup->GetCommon())) {
467 SetCommon(lookup->GetCommon());
468 }
469 if (lookup->IsSetGcode()) {
470 SetOrgname().SetGcode(lookup->GetGcode());
471 }
472 if (lookup->IsSetMgcode()) {
473 SetOrgname().SetMgcode(lookup->GetMgcode());
474 }
475 if (lookup->IsSetDivision()) {
476 SetOrgname().SetDiv(lookup->GetDivision());
477 }
478 if (lookup->IsSetDb()) {
479 CObject_id::TId taxid = 0;
480 ITERATE(TDb, it, lookup->GetDb()) {
481 if ((*it)->IsSetDb() &&
482 (*it)->IsSetTag() &&
483 (*it)->GetTag().IsId() &&
484 NStr::Equal((*it)->GetDb(), "taxon")) {
485 taxid = (*it)->GetTag().GetId();
486 break;
487 }
488 }
489 if (taxid > 0) {
490 SetTaxId(TAX_ID_FROM(CObject_id::TId, taxid));
491 }
492 }
493 if (lookup->IsSetLineage()) {
494 SetOrgname().SetLineage(lookup->GetOrgname().GetLineage());
495 }
496 return true;
497 } else {
498 return false;
499 }
500 }
501
502
GetTaxnameList()503 const vector<string>& COrg_ref::GetTaxnameList()
504 {
505 s_InitializeOrgRefMap();
506 return s_CommonTaxnameList;
507 }
508
509
CleanForGenBank()510 void COrg_ref::CleanForGenBank()
511 {
512 ResetSyn();
513 }
514
515 #define NO_FLAG(a,f) (( a & f ) == 0)
516
FilterOutParts(fOrgref_parts to_remain)517 void COrg_ref::FilterOutParts( fOrgref_parts to_remain )
518 {
519 if( to_remain == eOrgref_all ) {
520 return;
521 } else if( to_remain == eOrgref_nothing ) {
522 Reset();
523 } else {
524 if( NO_FLAG( to_remain, eOrgref_taxname ) && IsSetTaxname() ) {
525 ResetTaxname();
526 }
527 if( NO_FLAG( to_remain, eOrgref_common ) && IsSetCommon() ) {
528 ResetCommon();
529 }
530 if( NO_FLAG( to_remain, eOrgref_mod ) && IsSetMod() ) {
531 ResetMod();
532 }
533 if( IsSetDb() ) {
534 if( NO_FLAG( to_remain, eOrgref_db_all ) ) {
535 ResetDb();
536 } else {
537 if( NO_FLAG( to_remain, eOrgref_db_taxid ) ) {
538 TDb& lDbTags = SetDb();
539 for(TDb::iterator i = lDbTags.begin(); i != lDbTags.end(); ) {
540 if( *i && i->GetObject().GetDb() == s_taxonName ) {
541 i = lDbTags.erase( i );
542 } else {
543 ++i;
544 }
545 }
546 }
547 }
548 }
549 if( NO_FLAG( to_remain, eOrgref_syn ) && IsSetSyn() ) {
550 ResetSyn();
551 }
552 if( IsSetOrgname() ) {
553 if( NO_FLAG( to_remain, eOrgref_on_all ) ) {
554 ResetOrgname();
555 } else {
556 COrgName& on = SetOrgname();
557 if( NO_FLAG( to_remain, eOrgref_on_name ) && on.IsSetName() ) {
558 on.ResetName();
559 }
560 if( on.IsSetMod() ) {
561 if( NO_FLAG( to_remain, eOrgref_on_mod ) && on.IsSetMod() ) {
562 on.ResetMod();
563 } else { // Filter out the rest mods
564 if( NO_FLAG( to_remain, eOrgref_on_mod_nom ) ) {
565 on.ResetNomenclature();
566 }
567 if( NO_FLAG( to_remain, eOrgref_on_mod_oldname ) ) {
568 on.RemoveModBySubtype( COrgMod::eSubtype_old_name );
569 }
570 if( NO_FLAG( to_remain, eOrgref_on_mod_tm ) ) {
571 on.RemoveModBySubtype( COrgMod::eSubtype_type_material );
572 }
573 }
574 }
575 if( on.IsSetAttrib() ) {
576 if( NO_FLAG( to_remain, eOrgref_on_attr_all ) ) {
577 on.ResetAttrib();
578 } else {
579 if( NO_FLAG( to_remain, eOrgref_on_attr_nofwd ) && on.IsModifierForwardingDisabled() ) {
580 on.EnableModifierForwarding();
581 }
582 }
583 }
584 if( NO_FLAG( to_remain, eOrgref_on_lin ) && on.IsSetLineage() ) {
585 on.ResetLineage();
586 }
587 if( NO_FLAG( to_remain, eOrgref_on_gc ) && on.IsSetGcode() ) {
588 on.ResetGcode();
589 }
590 if( NO_FLAG( to_remain, eOrgref_on_mgc ) && on.IsSetMgcode() ) {
591 on.ResetMgcode();
592 }
593 if( NO_FLAG( to_remain, eOrgref_on_pgc ) && on.IsSetPgcode() ) {
594 on.ResetPgcode();
595 }
596 if( NO_FLAG( to_remain, eOrgref_on_div ) && on.IsSetDiv() ) {
597 on.ResetDiv();
598 }
599
600 }
601 }
602 }
603 }
604
605
606 END_objects_SCOPE // namespace ncbi::objects::
607
608 END_NCBI_SCOPE
609
610 /* Original file checksum: lines: 61, chars: 1882, CRC32: c3300cc2 */
611