1 /* $Id: Org_ref.cpp 609820 2020-06-08 15:51:58Z grichenk $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  NCBI Staff
27  *
28  * File Description:
29  *   Convenience methods for COrg_ref object
30  *
31  * Remark:
32  *   This code was originally generated by application DATATOOL
33  *   using specifications from the ASN data definition file
34  *   'seqfeat.asn'.
35  */
36 
37 // standard includes
38 
39 // generated includes
40 #include <ncbi_pch.hpp>
41 #include <objects/seqfeat/Org_ref.hpp>
42 #include <objects/general/Object_id.hpp>
43 #include <objects/general/Dbtag.hpp>
44 
45 // generated classes
46 
47 BEGIN_NCBI_SCOPE
48 
49 BEGIN_objects_SCOPE // namespace ncbi::objects::
50 
51 // destructor
~COrg_ref(void)52 COrg_ref::~COrg_ref(void)
53 {
54 }
55 
56 // Appends a label to "label" based on content
GetLabel(string * label) const57 void COrg_ref::GetLabel(string* label) const
58 {
59     if (IsSetTaxname()) {
60         *label += GetTaxname();
61     } else if (IsSetCommon()) {
62         *label += GetCommon();
63     } else if (IsSetDb()) {
64         GetDb().front()->GetLabel(label);
65     }
66 }
67 
68 static const char* const s_taxonName = "taxon" ;
69 static const string s_nomenclature = "nomenclature=";
70 
71 TTaxId
GetTaxId() const72 COrg_ref::GetTaxId() const
73 {
74     if( ! IsSetDb() ) {
75         return ZERO_TAX_ID;
76     }
77     const TDb& lDbTags = GetDb();
78 
79     for(TDb::const_iterator i = lDbTags.begin();
80 	i != lDbTags.end();
81 	++i) {
82 	if( i->GetPointer()
83 	    && i->GetObject().GetDb().compare(s_taxonName) == 0 ) {
84 	    const CObject_id& id = i->GetObject().GetTag();
85 	    if( id.IsId() )
86 		return TAX_ID_FROM(CObject_id::TId, id.GetId());
87 	}
88     }
89     return ZERO_TAX_ID;
90 }
91 
92 TTaxId
SetTaxId(TTaxId tax_id)93 COrg_ref::SetTaxId( TTaxId tax_id )
94 {
95     TTaxId old_id = ZERO_TAX_ID;
96 
97     TDb& lDbTags = SetDb();
98     // Try to update existing tax id first
99     for(TDb::iterator i = lDbTags.begin();
100 	i != lDbTags.end();
101 	++i) {
102 	if( *i && i->GetObject().GetDb() == s_taxonName ) {
103 	    CObject_id& id = i->GetObject().SetTag();
104 	    if( id.IsId() )
105 		old_id = TAX_ID_FROM(CObject_id::TId, id.GetId());
106 	    id.SetId() = TAX_ID_TO(CObject_id::TId, tax_id);
107 	    return old_id;
108 	}
109     }
110     // Add new tag
111     CRef< CDbtag > ref( new CDbtag() );
112     ref->SetDb( s_taxonName );
113     ref->SetTag().SetId(TAX_ID_TO(CObject_id::TId, tax_id) );
114     SetDb().push_back( ref );
115 
116     return old_id;
117 }
118 
IsSetLineage(void) const119 bool COrg_ref::IsSetLineage(void) const
120 {
121     return IsSetOrgname () && GetOrgname ().IsSetLineage ();
122 }
123 
GetLineage(void) const124 const string& COrg_ref::GetLineage(void) const
125 {
126     return GetOrgname ().GetLineage ();
127 }
128 
IsSetGcode(void) const129 bool COrg_ref::IsSetGcode(void) const
130 {
131     return IsSetOrgname () && GetOrgname ().IsSetGcode ();
132 }
133 
GetGcode(void) const134 int COrg_ref::GetGcode(void) const
135 {
136     return GetOrgname ().GetGcode ();
137 }
138 
IsSetMgcode(void) const139 bool COrg_ref::IsSetMgcode(void) const
140 {
141     return IsSetOrgname () && GetOrgname ().IsSetMgcode ();
142 }
143 
GetMgcode(void) const144 int COrg_ref::GetMgcode(void) const
145 {
146     return GetOrgname ().GetMgcode ();
147 }
148 
IsSetPgcode(void) const149 bool COrg_ref::IsSetPgcode(void) const
150 {
151     return IsSetOrgname () && GetOrgname ().IsSetPgcode ();
152 }
153 
GetPgcode(void) const154 int COrg_ref::GetPgcode(void) const
155 {
156     return GetOrgname ().GetPgcode ();
157 }
158 
IsSetDivision(void) const159 bool COrg_ref::IsSetDivision(void) const
160 {
161     return IsSetOrgname () && GetOrgname ().IsSetDiv ();
162 }
163 
GetDivision(void) const164 const string& COrg_ref::GetDivision(void) const
165 {
166     return GetOrgname ().GetDiv ();
167 }
168 
IsSetOrgMod(void) const169 bool COrg_ref::IsSetOrgMod(void) const
170 {
171     return IsSetOrgname () && GetOrgname ().IsSetMod ();
172 }
173 
174 
x_GetTaxnameAfterFirstTwoWords() const175 string COrg_ref::x_GetTaxnameAfterFirstTwoWords() const
176 {
177     string taxname = "";
178     if (IsSetTaxname()) {
179         taxname = GetTaxname();
180     }
181     // Look for modifiers in taxname after first two words
182     size_t pos = NStr::Find (taxname, " ");
183     if (pos == string::npos) {
184         taxname = "";
185     } else {
186         taxname = taxname.substr(pos + 1);
187         NStr::TruncateSpacesInPlace(taxname);
188         pos = NStr::Find (taxname, " ");
189         if (pos == string::npos) {
190             taxname = "";
191         } else {
192             taxname = taxname.substr(pos + 1);
193             NStr::TruncateSpacesInPlace(taxname);
194         }
195     }
196     return taxname;
197 }
198 
199 
s_FindWholeWord(string taxname,string value)200 bool s_FindWholeWord (string taxname, string value)
201 {
202     if (NStr::IsBlank(taxname) || NStr::IsBlank(value)) {
203         return false;
204     }
205     size_t pos = NStr::Find (taxname, value);
206     size_t value_len = value.length();
207     while (pos != string::npos
208            && ( ( (pos != 0 && isalpha (taxname.c_str()[pos - 1]))
209                 || isalpha (taxname.c_str()[pos + value_len])))) {
210         pos = NStr::Find(taxname, value, pos + value_len);
211     }
212     if (pos == string::npos) {
213         return false;
214     } else {
215         return true;
216     }
217 }
218 
219 
IsVarietyValid(const string & variety) const220 bool COrg_ref::IsVarietyValid(const string& variety) const
221 {
222     if (NStr::IsBlank(variety)) {
223         return true;
224     }
225     string taxname = x_GetTaxnameAfterFirstTwoWords();
226     return s_FindWholeWord(taxname, variety);
227 }
228 
229 
HasValidVariety() const230 bool COrg_ref::HasValidVariety() const
231 {
232     if (!IsSetOrgname() || !GetOrgname().IsSetMod()) {
233         return false;
234     }
235     ITERATE(COrgName::TMod, it, GetOrgname().GetMod()) {
236         if ((*it)->IsSetSubtype() && (*it)->GetSubtype() == COrgMod::eSubtype_variety
237             && (*it)->IsSetSubname() && !NStr::IsBlank((*it)->GetSubname())
238             && IsVarietyValid((*it)->GetSubname())) {
239             return true;
240         }
241     }
242     return false;
243 }
244 
245 
IsSubspeciesValid(const string & subspecies) const246 bool COrg_ref::IsSubspeciesValid(const string& subspecies) const
247 {
248     if (NStr::IsBlank(subspecies)) {
249         return true;
250     }
251     string taxname = x_GetTaxnameAfterFirstTwoWords();
252     if (s_FindWholeWord(taxname, subspecies)) {
253         return true;
254     } else {
255         return HasValidVariety();
256     }
257 
258 }
259 
260 
261 #define MAKE_COMMON(o1,o2,o3,Field) if (o1.IsSet##Field() && o2.IsSet##Field() && NStr::Equal(o1.Get##Field(), o2.Get##Field())) o3.Set##Field(o1.Get##Field());
262 
s_MakeCommonStringList(const list<string> & list1,const list<string> & list2,list<string> & list3)263 void s_MakeCommonStringList(const list< string >& list1, const list< string >& list2, list< string >& list3)
264 {
265     ITERATE(list< string >, it1, list1) {
266         bool found = false;
267         ITERATE(list< string >, it2, list2) {
268             if (NStr::Equal(*it1, *it2)) {
269                 found = true;
270                 break;
271             }
272         }
273         if (found) {
274             list3.push_back(*it1);
275         }
276     }
277 }
278 
279 
MakeCommon(const COrg_ref & other) const280 CRef<COrg_ref> COrg_ref::MakeCommon(const COrg_ref& other) const
281 {
282     TTaxId taxid1 = GetTaxId();
283     TTaxId taxid2 = other.GetTaxId();
284     if (taxid1 != taxid2) {
285         return CRef<COrg_ref>(NULL);
286     }
287 
288     CRef<COrg_ref> common(new COrg_ref());
289     if (Equals(other)) {
290         common->Assign(*this);
291     } else {
292         MAKE_COMMON((*this), other, (*common), Taxname);
293         MAKE_COMMON((*this), other, (*common), Common);
294 
295         // common mods
296         if (IsSetMod() && other.IsSetMod()) {
297             s_MakeCommonStringList(GetMod(), other.GetMod(), common->SetMod());
298             if (common->GetMod().empty()) {
299                 common->ResetMod();
300             }
301         }
302 
303         // common synonyms
304         if (IsSetSyn() && other.IsSetSyn()) {
305             s_MakeCommonStringList(GetSyn(), other.GetSyn(), common->SetSyn());
306             if (common->GetSyn().empty()) {
307                 common->ResetSyn();
308             }
309         }
310 
311         // common dbtags
312         if (IsSetDb() && other.IsSetDb()) {
313             ITERATE(TDb, it1, GetDb()) {
314                 bool found = false;
315                 ITERATE(TDb, it2, GetDb()) {
316                     if ((*it1)->Equals(**it2)) {
317                         found = true;
318                         break;
319                     }
320                 }
321                 if (found) {
322                     CRef<CDbtag> add(new CDbtag());
323                     add->Assign(**it1);
324                     common->SetDb().push_back(add);
325                 }
326             }
327         }
328 
329         // common orgname
330         if (IsSetOrgname() && other.IsSetOrgname()) {
331             CRef<COrgName> orgname = GetOrgname().MakeCommon(other.GetOrgname());
332             if (orgname) {
333                 common->SetOrgname().Assign(*orgname);
334             }
335         }
336     }
337 
338     return common;
339 }
340 
341 
342 typedef map<string, CRef<COrg_ref>, PNocase> TOrgrefMap;
343 static TOrgrefMap s_OrgRefMap;
344 static vector<string> s_CommonTaxnameList;
345 static bool                s_OrgRefMapInitialized = false;
346 DEFINE_STATIC_FAST_MUTEX(s_OrgRefMapMutex);
347 
348 // automatically generated include file
349 #include "common_tax.inc"
350 
s_ProcessOrgRefMapLine(const CTempString & line)351 static void s_ProcessOrgRefMapLine(const CTempString& line)
352 {
353     vector<string> tokens;
354     NStr::Split(line, "\t", tokens);
355     if (tokens.size() != 8) {
356         //        ERR_POST_X(1, Warning << "Bad format in common_tax.txt entry " << line
357         //                   << "; disregarding");
358     } else {
359         NON_CONST_ITERATE(vector<string>, t, tokens) {
360             NStr::TruncateSpacesInPlace(*t);
361             if (NStr::Equal(*t, "-")) {
362                 *t = kEmptyStr;
363             }
364         }
365 
366         s_CommonTaxnameList.push_back(tokens[0]);
367         CRef<COrg_ref> org(new COrg_ref());
368         org->SetTaxname(tokens[0]);
369         if (!NStr::IsBlank(tokens[1])) {
370             org->SetCommon(tokens[1]);
371         }
372 
373         if (!NStr::IsBlank(tokens[2])) {
374             try {
375                 org->SetOrgname().SetGcode(NStr::StringToNonNegativeInt(tokens[2]));
376             } catch (CException& ex) {
377             }
378         }
379         if (!NStr::IsBlank(tokens[3])) {
380             try {
381                 org->SetOrgname().SetMgcode(NStr::StringToNonNegativeInt(tokens[3]));
382             } catch (CException& ex) {
383             }
384         }
385         if (!NStr::IsBlank(tokens[4])) {
386             try {
387                 org->SetOrgname().SetPgcode(NStr::StringToNonNegativeInt(tokens[4]));
388             } catch (CException& ex) {
389             }
390         }
391 
392         if (!NStr::IsBlank(tokens[5])) {
393             try {
394                 CRef<CDbtag>taxon(new CDbtag());
395                 taxon->SetDb("taxon");
396                 taxon->SetTag().SetId(NStr::StringToNonNegativeInt(tokens[5]));
397                 org->SetDb().push_back(taxon);
398             } catch (CException& ex) {
399             }
400         }
401 
402         if (!NStr::IsBlank(tokens[6])) {
403             org->SetOrgname().SetDiv(tokens[6]);
404         }
405 
406         if (!NStr::IsBlank(tokens[7])) {
407             org->SetOrgname().SetLineage(tokens[7]);
408         }
409 
410         s_OrgRefMap[tokens[0]] = org;
411     }
412 }
413 
414 
s_InitializeOrgRefMap(void)415 static void s_InitializeOrgRefMap(void)
416 {
417     CFastMutexGuard GUARD(s_OrgRefMapMutex);
418     if (s_OrgRefMapInitialized) {
419         return;
420     }
421     string file = g_FindDataFile("common_tax.txt");
422     CRef<ILineReader> lr;
423     if (!file.empty()) {
424         LOG_POST("Reading from " + file + " for popular organisms.");
425         try {
426             lr = ILineReader::New(file);
427         } NCBI_CATCH("s_InitializeOrgRefMap")
428     } else {
429         LOG_POST("Falling back on built-in data for popular organisms.");
430     }
431 
432     if (lr.Empty()) {
433         size_t num_orgrefs = sizeof(kOrgRefList) / sizeof(char *);
434         for (size_t i = 0; i < num_orgrefs; i++) {
435             const char *p = kOrgRefList[i];
436             s_ProcessOrgRefMapLine(p);
437         }
438     } else {
439         do {
440             s_ProcessOrgRefMapLine(*++*lr);
441         } while (!lr->AtEOF());
442     }
443 
444     s_OrgRefMapInitialized = true;
445 }
446 
447 
TableLookup(const string & taxname)448 CConstRef<COrg_ref> COrg_ref::TableLookup(const string& taxname)
449 {
450     s_InitializeOrgRefMap();
451     TOrgrefMap::iterator it = s_OrgRefMap.find(taxname);
452     if (it != s_OrgRefMap.end()) {
453         return CConstRef<COrg_ref>(it->second.GetPointer());
454     }
455     return CConstRef<COrg_ref>(NULL);
456 }
457 
458 
UpdateFromTable()459 bool COrg_ref::UpdateFromTable()
460 {
461     if (!IsSetTaxname() || NStr::IsBlank(GetTaxname())) {
462         return false;
463     }
464     CConstRef<COrg_ref> lookup = TableLookup(GetTaxname());
465     if (lookup) {
466         if (lookup->IsSetCommon() && !NStr::IsBlank(lookup->GetCommon())) {
467             SetCommon(lookup->GetCommon());
468         }
469         if (lookup->IsSetGcode()) {
470             SetOrgname().SetGcode(lookup->GetGcode());
471         }
472         if (lookup->IsSetMgcode()) {
473             SetOrgname().SetMgcode(lookup->GetMgcode());
474         }
475         if (lookup->IsSetDivision()) {
476             SetOrgname().SetDiv(lookup->GetDivision());
477         }
478         if (lookup->IsSetDb()) {
479             CObject_id::TId taxid = 0;
480             ITERATE(TDb, it, lookup->GetDb()) {
481                 if ((*it)->IsSetDb() &&
482                     (*it)->IsSetTag() &&
483                     (*it)->GetTag().IsId() &&
484                     NStr::Equal((*it)->GetDb(), "taxon")) {
485                     taxid = (*it)->GetTag().GetId();
486                     break;
487                 }
488             }
489             if (taxid > 0) {
490                 SetTaxId(TAX_ID_FROM(CObject_id::TId, taxid));
491             }
492         }
493         if (lookup->IsSetLineage()) {
494             SetOrgname().SetLineage(lookup->GetOrgname().GetLineage());
495         }
496         return true;
497     } else {
498         return false;
499     }
500 }
501 
502 
GetTaxnameList()503 const vector<string>& COrg_ref::GetTaxnameList()
504 {
505     s_InitializeOrgRefMap();
506     return s_CommonTaxnameList;
507 }
508 
509 
CleanForGenBank()510 void COrg_ref::CleanForGenBank()
511 {
512     ResetSyn();
513 }
514 
515 #define NO_FLAG(a,f) (( a & f ) == 0)
516 
FilterOutParts(fOrgref_parts to_remain)517 void COrg_ref::FilterOutParts( fOrgref_parts to_remain )
518 {
519     if( to_remain == eOrgref_all ) {
520         return;
521     } else if( to_remain == eOrgref_nothing ) {
522         Reset();
523     } else {
524         if( NO_FLAG( to_remain, eOrgref_taxname ) && IsSetTaxname() ) {
525             ResetTaxname();
526         }
527         if( NO_FLAG( to_remain, eOrgref_common ) && IsSetCommon() ) {
528             ResetCommon();
529         }
530         if( NO_FLAG( to_remain, eOrgref_mod ) && IsSetMod() ) {
531             ResetMod();
532         }
533 	if( IsSetDb() ) {
534 	    if( NO_FLAG( to_remain, eOrgref_db_all ) ) {
535 		ResetDb();
536 	    } else {
537 		if( NO_FLAG( to_remain, eOrgref_db_taxid ) ) {
538 		    TDb& lDbTags = SetDb();
539 		    for(TDb::iterator i = lDbTags.begin(); i != lDbTags.end(); ) {
540 			if( *i && i->GetObject().GetDb() == s_taxonName ) {
541 			    i = lDbTags.erase( i );
542 			} else {
543 			    ++i;
544 			}
545 		    }
546 		}
547 	    }
548 	}
549         if( NO_FLAG( to_remain, eOrgref_syn ) && IsSetSyn() ) {
550             ResetSyn();
551         }
552 	if( IsSetOrgname() ) {
553 	    if( NO_FLAG( to_remain, eOrgref_on_all ) ) {
554 		ResetOrgname();
555 	    } else {
556 		COrgName& on = SetOrgname();
557 		if( NO_FLAG( to_remain, eOrgref_on_name ) && on.IsSetName() ) {
558 		    on.ResetName();
559 		}
560 		if( on.IsSetMod() ) {
561 		    if( NO_FLAG( to_remain, eOrgref_on_mod ) && on.IsSetMod() ) {
562 			on.ResetMod();
563 		    } else { // Filter out the rest mods
564 			if( NO_FLAG( to_remain, eOrgref_on_mod_nom ) ) {
565 			    on.ResetNomenclature();
566 			}
567 			if( NO_FLAG( to_remain, eOrgref_on_mod_oldname ) ) {
568 			    on.RemoveModBySubtype( COrgMod::eSubtype_old_name );
569 			}
570 			if( NO_FLAG( to_remain, eOrgref_on_mod_tm ) ) {
571 			    on.RemoveModBySubtype( COrgMod::eSubtype_type_material );
572 			}
573 		    }
574 		}
575 		if( on.IsSetAttrib() ) {
576 		    if( NO_FLAG( to_remain, eOrgref_on_attr_all ) ) {
577 			on.ResetAttrib();
578 		    } else {
579 			if( NO_FLAG( to_remain, eOrgref_on_attr_nofwd ) && on.IsModifierForwardingDisabled() ) {
580 			    on.EnableModifierForwarding();
581 			}
582 		    }
583 		}
584 		if( NO_FLAG( to_remain, eOrgref_on_lin ) && on.IsSetLineage() ) {
585 		    on.ResetLineage();
586 		}
587 		if( NO_FLAG( to_remain, eOrgref_on_gc ) && on.IsSetGcode() ) {
588 		    on.ResetGcode();
589 		}
590 		if( NO_FLAG( to_remain, eOrgref_on_mgc ) && on.IsSetMgcode() ) {
591 		    on.ResetMgcode();
592 		}
593 		if( NO_FLAG( to_remain, eOrgref_on_pgc ) && on.IsSetPgcode() ) {
594 		    on.ResetPgcode();
595 		}
596 		if( NO_FLAG( to_remain, eOrgref_on_div ) && on.IsSetDiv() ) {
597 		    on.ResetDiv();
598 		}
599 
600 	    }
601 	}
602     }
603 }
604 
605 
606 END_objects_SCOPE // namespace ncbi::objects::
607 
608 END_NCBI_SCOPE
609 
610 /* Original file checksum: lines: 61, chars: 1882, CRC32: c3300cc2 */
611