1 /*  $Id: discrepancy_core.cpp 629257 2021-04-13 13:28:26Z ivanov $
2  * =========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * =========================================================================
25  *
26  * Authors: Sema Kachalo
27  *
28  */
29 
30 #include <ncbi_pch.hpp>
31 #include "discrepancy_core.hpp"
32 #include "utils.hpp"
33 #include <algorithm>
34 #include <sstream>
35 #include <objmgr/object_manager.hpp>
36 #include <objmgr/seqdesc_ci.hpp>
37 #include <objmgr/util/sequence.hpp>
38 #include <serial/objcopy.hpp>
39 #include <util/compress/stream_util.hpp>
40 #include <util/format_guess.hpp>
41 
42 BEGIN_NCBI_SCOPE
43 BEGIN_SCOPE(NDiscrepancy)
44 USING_SCOPE(objects);
45 
46 CSafeStatic<map<string, CDiscrepancyCaseProps>> CDiscrepancyConstructor::sm_Table;
47 CSafeStatic<map<string, string>> CDiscrepancyConstructor::sm_AliasTable;
48 
49 
GetDiscrepancyCaseName(const string & name)50 string CDiscrepancyConstructor::GetDiscrepancyCaseName(const string& name)
51 {
52     map<string, CDiscrepancyCaseProps>& Table = GetTable();
53     if (Table.find(name) != Table.end()) {
54         return name;
55     }
56     map<string, string>& AliasTable = GetAliasTable();
57     if (AliasTable.find(name) != AliasTable.end()) {
58         return AliasTable[name];
59     }
60     if (name.substr(0, 5) == "DISC_") {
61         return GetDiscrepancyCaseName(name.substr(5));
62     }
63     return "";
64 }
65 
66 
GetDiscrepancyConstructor(const string & name)67 const CDiscrepancyConstructor* CDiscrepancyConstructor::GetDiscrepancyConstructor(const string& name)
68 {
69     string str = GetDiscrepancyCaseName(name);
70     return str.empty() ? nullptr : GetTable()[str].Constructor;
71 }
72 
73 
GetDiscrepancyCaseName(const string & name)74 string GetDiscrepancyCaseName(const string& name)
75 {
76     return CDiscrepancyConstructor::GetDiscrepancyCaseName(name);
77 }
78 
79 
GetDiscrepancyDescr(const string & name)80 string GetDiscrepancyDescr(const string& name)
81 {
82     string str = GetDiscrepancyCaseName(name);
83     return str.empty() ? "" : CDiscrepancyConstructor::GetTable()[str].Descr;
84 }
85 
86 
GetDiscrepancyGroup(const string & name)87 TGroup GetDiscrepancyGroup(const string& name)
88 {
89     string str = GetDiscrepancyCaseName(name);
90     return str.empty() ? 0 : CDiscrepancyConstructor::GetTable()[str].Group;
91 }
92 
93 
GetDiscrepancyNames(TGroup group)94 vector<string> GetDiscrepancyNames(TGroup group)
95 {
96     map<string, CDiscrepancyCaseProps>& Table = CDiscrepancyConstructor::GetTable();
97     vector<string> V;
98     for (const auto& J : Table) {
99         if (J.first[0] != '_' && (J.second.Group & group) == group) {
100             V.push_back(J.first);
101         }
102     }
103     return V;
104 }
105 
106 
GetDiscrepancyAliases(const string & name)107 vector<string> GetDiscrepancyAliases(const string& name)
108 {
109     map<string, CDiscrepancyCaseProps>& Table = CDiscrepancyConstructor::GetTable();
110     return Table.find(name) != Table.end() ? Table[name].AliasList : vector<string>();
111 }
112 
113 
operator [](const string & name)114 CReportNode& CReportNode::operator[](const string& name)
115 {
116     if (m_Map.find(name) == m_Map.end()) {
117         m_Map[name] = CRef<CReportNode>(new CReportNode(name));
118     }
119     return *m_Map[name];
120 }
121 
122 
Add(TReportObjectList & list,TReportObjectSet & hash,CReportObj & obj,bool unique)123 void CReportNode::Add(TReportObjectList& list, TReportObjectSet& hash, CReportObj& obj, bool unique)
124 {
125     // BIG FILE
126     if (unique && hash.find(&obj) != hash.end()) {
127         return;
128     }
129     list.push_back(CRef<CReportObj>(&obj));
130     hash.insert(&obj);
131 }
132 
133 
Add(TReportObjectList & list,TReportObjectSet & hash,TReportObjectList & objs,bool unique)134 void CReportNode::Add(TReportObjectList& list, TReportObjectSet& hash, TReportObjectList& objs, bool unique)
135 {
136     for (auto& it : objs) {
137         Add(list, hash, *it, unique);
138     }
139 }
140 
141 
Copy(CRef<CReportNode> other)142 void CReportNode::Copy(CRef<CReportNode> other)
143 {
144     m_Map = other->m_Map;
145     m_Objs = other->m_Objs;
146     m_Hash = other->m_Hash;
147     m_Severity = other->m_Severity;
148     m_Autofix = other->m_Autofix;
149     m_Ext = other->m_Ext;
150     m_Summ = other->m_Summ;
151     m_NoRec = other->m_NoRec;
152 }
153 
154 
Promote()155 bool CReportNode::Promote()
156 {
157     if (m_Map.size() == 1) {
158         Copy(m_Map.begin()->second);
159         return true;
160     }
161     return false;
162 }
163 
164 
Export(CDiscrepancyCase & test,bool unique) const165 CRef<CReportItem> CReportNode::Export(CDiscrepancyCase& test, bool unique) const
166 {
167     TReportObjectList objs = m_Objs;
168     TReportObjectSet hash = m_Hash;
169     TReportItemList subs;
170     bool autofix = false;
171     CReportItem::ESeverity severity = m_Severity;
172     string unit;
173     for (const auto& it : m_Map) {
174         CRef<CReportItem> sub = it.second->Export(test, unique);
175         if (severity < it.second->m_Severity) {
176             severity = it.second->m_Severity;
177         }
178         if (severity < sub->GetSeverity()) {
179             severity = sub->GetSeverity();
180         }
181         autofix = autofix || sub->CanAutofix();
182         if (unit.empty()) {
183             unit = sub->GetUnit();
184         }
185         subs.push_back(sub);
186         if (!m_NoRec) {
187             TReportObjectList details = sub->GetDetails();
188             for (auto& ob : details) {
189                 Add(objs, hash, *ob, unique);
190             }
191         }
192     }
193     for (auto& ob : objs) {
194         if (ob->CanAutofix()) {
195             static_cast<CDiscrepancyObject&>(*ob).m_Case.Reset(&test);
196             autofix = true;
197         }
198     }
199     string str = m_Name;
200     NStr::TruncateSpacesInPlace(str);
201     for (size_t n = NStr::Find(str, "[*"); n != NPOS; n = NStr::Find(str, "[*")) {
202         size_t k = NStr::Find(str, "*]");
203         if (k != NPOS) {
204             str.erase(n, k - n + 2);
205         }
206         else {
207             str.erase(n);
208         }
209     }
210     string msg = str;
211     string xml = str;
212     size_t count = m_Count > 0 ? m_Count : objs.size();
213 
214     NStr::ReplaceInPlace(msg, "[n]", NStr::Int8ToString(count));
215     NStr::ReplaceInPlace(msg, "[n/2]", NStr::Int8ToString(count / 2));
216     NStr::ReplaceInPlace(msg, "[s]", count == 1 ? "" : "s");  // nouns
217     NStr::ReplaceInPlace(msg, "[S]", count == 1 ? "s" : "");  // verbs
218     NStr::ReplaceInPlace(msg, "[is]", count == 1 ? "is" : "are");
219     NStr::ReplaceInPlace(msg, "[does]", count == 1 ? "does" : "do");
220     NStr::ReplaceInPlace(msg, "[has]", count == 1 ? "has" : "have");
221     NStr::ReplaceInPlace(msg, "[(]", "");
222     NStr::ReplaceInPlace(msg, "[)]", "");
223 
224     NStr::ReplaceInPlace(xml, "[n]", "##");
225     NStr::ReplaceInPlace(xml, "[n/2]", "##");
226     NStr::ReplaceInPlace(xml, "[s]", "s");
227     NStr::ReplaceInPlace(xml, "[S]", "");
228     NStr::ReplaceInPlace(xml, "[is]", "are");
229     NStr::ReplaceInPlace(xml, "[does]", "do");
230     NStr::ReplaceInPlace(xml, "[has]", "have");
231     NStr::ReplaceInPlace(xml, "[(]", "");
232     NStr::ReplaceInPlace(xml, "[)]", "");
233 
234     size_t n = str.find("[n]");
235     if (n != string::npos) {
236         str = str.substr(n + 4);
237     }
238     else if ((n = str.find("[n/2]")) != string::npos) {
239         str = str.substr(n + 6);
240         count /= 2;
241     }
242     if (n != string::npos) {
243         if ((n = str.find("[s]")) != string::npos) {
244             unit = str.substr(0, n);
245         }
246         else if (0 == str.find("CDS ")) {
247             unit = "CDS";
248         }
249         else if ((n = str.find("s ")) != string::npos) {
250             unit = str.substr(0, n);
251         }
252     }
253     CRef<CDiscrepancyItem> item(new CDiscrepancyItem(test, m_Name, msg, xml, unit, count));
254     item->m_Autofix = autofix;
255     item->m_Severity = severity;
256     item->m_Ext = m_Ext;
257     item->m_Summ = m_Summ;
258     item->m_Subs = subs;
259     item->m_Objs = objs;
260     return CRef<CReportItem>(item);
261 }
262 
263 
GetObjects() const264 TReportObjectList CDiscrepancyCore::GetObjects() const
265 {
266     TReportObjectList ret;
267     TReportObjectSet hash;
268     TReportItemList items = GetReport();
269     for (const auto& rep : items) {
270         TReportObjectList objs = rep->GetDetails();
271         for (auto& obj : objs) {
272             CReportNode::Add(ret, hash, *obj);
273         }
274     }
275     return ret;
276 }
277 
278 
CreateReportItem(const string & test,const CReportObj & obj,const string & msg,bool autofix)279 CRef<CReportItem> CReportItem::CreateReportItem(const string& test, const CReportObj& obj, const string& msg, bool autofix)
280 {
281     CRef<CDiscrepancyCase> t = CDiscrepancyConstructor::GetDiscrepancyConstructor(test)->Create();
282     string s = msg;
283     NStr::ReplaceInPlace(s, "[(]", "");
284     NStr::ReplaceInPlace(s, "[)]", "");
285     CRef<CDiscrepancyItem> item(new CDiscrepancyItem(*t, msg, s, s, kEmptyCStr, 0));
286     item->m_Autofix = autofix;
287     auto dobj = static_cast<const CDiscrepancyObject&>(obj);
288     auto x = CRef<CDiscrepancyObject>(new CDiscrepancyObject(dobj.m_Ref));
289     x->m_Case = t;
290     if (autofix) {
291         x->m_Fix = dobj.m_Ref;
292     }
293     item->m_Objs.push_back(CRef<CReportObj>(x));
294     return CRef<CReportItem>(item);
295 }
296 
297 
298 // need to rewrite as a DiscrepancyContext method
Clone(bool fix,CConstRef<CObject> data) const299 CReportObj* CDiscrepancyObject::Clone(bool fix, CConstRef<CObject> data) const
300 {
301     CDiscrepancyObject* obj = new CDiscrepancyObject(*this);
302     if (fix) {
303         obj->m_Fix.Reset(obj->m_Ref);
304     }
305     obj->m_More = data;
306     return obj;
307 }
308 
309 
Call(const T & obj,CDiscrepancyContext & context)310 template<typename T> void CDiscrepancyVisitor<T>::Call(const T& obj, CDiscrepancyContext& context)
311 {
312     try {
313         Visit(obj, context);
314     }
315     catch (CException& e) { // LCOV_EXCL_START
316         string ss = "EXCEPTION caught: "; ss += e.what();
317         m_Objs[ss];
318     } // LCOV_EXCL_STOP
319 }
320 
321 
New(CScope & scope)322 CRef<CDiscrepancySet> CDiscrepancySet::New(CScope& scope) { return CRef<CDiscrepancySet>(new CDiscrepancyContext(scope)); }
323 
324 
Format(const string & s,unsigned int count)325 string CDiscrepancySet::Format(const string& s, unsigned int count)
326 {
327     string str = s;
328     NStr::TruncateSpacesInPlace(str);
329     NStr::ReplaceInPlace(str, "[n]", NStr::Int8ToString(count));
330     NStr::ReplaceInPlace(str, "[n/2]", NStr::Int8ToString(count / 2));
331     NStr::ReplaceInPlace(str, "[s]", count == 1 ? "" : "s");  // nouns
332     NStr::ReplaceInPlace(str, "[S]", count == 1 ? "s" : "");  // verbs
333     NStr::ReplaceInPlace(str, "[is]", count == 1 ? "is" : "are");
334     NStr::ReplaceInPlace(str, "[does]", count == 1 ? "does" : "do");
335     NStr::ReplaceInPlace(str, "[has]", count == 1 ? "has" : "have");
336     NStr::ReplaceInPlace(str, "[(]", "");
337     NStr::ReplaceInPlace(str, "[)]", "");
338     for (size_t n = NStr::Find(str, "[*"); n != NPOS; n = NStr::Find(str, "[*")) {
339         size_t k = NStr::Find(str, "*]");
340         if (k != NPOS) {
341             str.erase(n, k - n + 2);
342         }
343         else {
344             str.erase(n);
345         }
346     }
347     return str;
348 }
349 
350 
AddTest(const string & name)351 bool CDiscrepancyContext::AddTest(const string& name)
352 {
353     string str = GetDiscrepancyCaseName(name);
354     if (str.empty()) {
355         return false; // no such test
356     }
357     if (m_Tests.find(str) != m_Tests.end()) {
358         return false;  // already there
359     }
360     CRef<CDiscrepancyCase> test = CDiscrepancyConstructor::GetDiscrepancyConstructor(str)->Create();
361     m_Tests[str] = test;
362 
363 #define REGISTER_DISCREPANCY_TYPE(type) \
364     if (auto* p = dynamic_cast<CDiscrepancyVisitor<type>*>(test.GetPointer())) {                \
365         m_All_##type.push_back(p);                                                              \
366         m_Enable_##type = true;                                                                 \
367         return true;                                                                            \
368     }
369     //REGISTER_DISCREPANCY_TYPE(CSeq_inst)
370     //REGISTER_DISCREPANCY_TYPE(CSeqdesc)
371     //REGISTER_DISCREPANCY_TYPE(CSeq_feat)
372     //REGISTER_DISCREPANCY_TYPE(CSubmit_block)
373     //REGISTER_DISCREPANCY_TYPE(CSeqFeatData)
374     //REGISTER_DISCREPANCY_TYPE(CSeq_feat_BY_BIOSEQ)
375     //REGISTER_DISCREPANCY_TYPE(COverlappingFeatures)
376     //REGISTER_DISCREPANCY_TYPE(CBioSource)
377     //REGISTER_DISCREPANCY_TYPE(COrgName)
378     //REGISTER_DISCREPANCY_TYPE(CSeq_annot)
379     //REGISTER_DISCREPANCY_TYPE(CPubdesc)
380     //REGISTER_DISCREPANCY_TYPE(CAuth_list)
381     //REGISTER_DISCREPANCY_TYPE(CPerson_id)
382     REGISTER_DISCREPANCY_TYPE(string)
383 
384 /// BIG FILE
385 REGISTER_DISCREPANCY_TYPE(SEQUENCE)
386 REGISTER_DISCREPANCY_TYPE(SEQ_SET)
387 REGISTER_DISCREPANCY_TYPE(FEAT)
388 REGISTER_DISCREPANCY_TYPE(DESC)
389 REGISTER_DISCREPANCY_TYPE(BIOSRC)
390 REGISTER_DISCREPANCY_TYPE(PUBDESC)
391 REGISTER_DISCREPANCY_TYPE(AUTHORS)
392 REGISTER_DISCREPANCY_TYPE(SUBMIT)
393 REGISTER_DISCREPANCY_TYPE(STRING)
394 
395     return false;
396 }
397 
398 
Push(const CSerialObject & root,const string & fname)399 void CDiscrepancyContext::Push(const CSerialObject& root, const string& fname)
400 {
401     if (!fname.empty()) {
402         m_RootNode.Reset(new CParseNode(eFile, 0));
403         m_RootNode->m_Ref->m_Text = fname;
404     }
405     else if (!m_RootNode) {
406         m_RootNode.Reset(new CParseNode(eNone, 0));
407     }
408     m_NodeMap[m_RootNode->m_Ref] = &*m_RootNode;
409     m_CurrentNode.Reset(m_RootNode);
410 
411     if (const CBioseq* bs = dynamic_cast<const CBioseq*>(&root)) {
412         ParseObject(*bs);
413     }
414     else if (const CBioseq_set* st = dynamic_cast<const CBioseq_set*>(&root)) {
415         ParseObject(*st);
416     }
417     else if (const CSeq_entry* se = dynamic_cast<const CSeq_entry*>(&root)) {
418         ParseObject(*se);
419     }
420     else if (const CSeq_submit* ss = dynamic_cast<const CSeq_submit*>(&root)) {
421         ParseObject(*ss);
422     }
423 }
424 
425 
Summarize()426 unsigned CDiscrepancyContext::Summarize()
427 {
428     unsigned severity = 0;
429     for (auto& tt : m_Tests) {
430         CDiscrepancyCore& test = static_cast<CDiscrepancyCore&>(*tt.second);
431         test.Summarize(*this);
432         for (const auto& rep : test.GetReport()) {
433             unsigned sev = rep->GetSeverity();
434             severity = sev > severity ? sev : severity;
435         }
436     }
437     return severity;
438 }
439 
440 
TestString(const string & str)441 void CDiscrepancyContext::TestString(const string& str)
442 {
443     for (auto* it : m_All_string) {
444         Call(*it, str);
445     }
446 }
447 
448 
Collect(TDiscrepancyCaseMap & tests,bool all) const449 TReportItemList CDiscrepancyGroup::Collect(TDiscrepancyCaseMap& tests, bool all) const
450 {
451     TReportItemList out;
452     for (const auto& it : m_List) {
453         TReportItemList tmp = it->Collect(tests, false);
454         for (const auto& tt : tmp) {
455             out.push_back(tt);
456         }
457     }
458     if (!m_Test.empty() && tests.find(m_Test) != tests.end()) {
459         TReportItemList tmp = tests[m_Test]->GetReport();
460         for (const auto& tt : tmp) {
461             out.push_back(tt);
462         }
463         tests.erase(m_Test);
464     }
465     if (!m_Name.empty()) {
466         TReportObjectList objs;
467         TReportObjectSet hash;
468         CRef<CDiscrepancyItem> di(new CDiscrepancyItem(m_Name));
469         di->m_Subs = out;
470         bool empty = true;
471         for (const auto& tt : out) {
472             TReportObjectList details = tt->GetDetails();
473             if (!details.empty() || tt->GetCount() > 0) {
474                 empty = false;
475             }
476             for (auto& ob : details) {
477                 CReportNode::Add(objs, hash, *ob);
478             }
479             if (tt->CanAutofix()) {
480                 di->m_Autofix = true;
481             }
482             if (tt->IsInfo()) {
483                 di->m_Severity = CDiscrepancyItem::eSeverity_info;
484             }
485             else if (tt->IsFatal()) {
486                 di->m_Severity = CDiscrepancyItem::eSeverity_error;
487             }
488         }
489         di->m_Objs = objs;
490         out.clear();
491         if (!empty) {
492             out.push_back(CRef<CReportItem>(di));
493         }
494     }
495     if (all) {
496         for (const auto& it : tests) {
497             TReportItemList list = it.second->GetReport();
498             for (const auto& it : list) {
499                 out.push_back(it);
500             }
501         }
502     }
503     return out;
504 }
505 
506 
RunTests()507 void CDiscrepancyContext::RunTests()
508 {
509     SEQUENCE dummy_seq;
510     SEQ_SET dummy_set;
511     FEAT dummy_feat;
512     DESC dummy_desc;
513     BIOSRC dummy_biosrc;
514     PUBDESC dummy_pubdesc;
515     AUTHORS dummy_authors;
516     SUBMIT dummy_submit;
517     STRING dummy_string;
518     if (m_CurrentNode->m_Type == eBioseq) {
519         ClearFeatureList();
520         for (const auto& feat : GetAllFeat()) {
521             CollectFeature(feat);
522         }
523         for (auto* test : m_All_SEQUENCE) {
524             Call(*test, dummy_seq);
525         }
526         for (auto* test : m_All_FEAT) {
527             Call(*test, dummy_feat);
528         }
529         for (auto* test : m_All_DESC) {
530             Call(*test, dummy_desc);
531         }
532         if (!m_CurrentNode->m_Pubdescs.empty()) {
533             for (auto* test : m_All_PUBDESC) {
534                 Call(*test, dummy_pubdesc);
535             }
536             for (auto* test : m_All_AUTHORS) {
537                 Call(*test, dummy_authors);
538             }
539         }
540         if (m_CurrentNode->m_Biosource) {
541             for (auto* test : m_All_BIOSRC) {
542                 Call(*test, dummy_biosrc);
543             }
544         }
545     }
546     else if (IsSeqSet(m_CurrentNode->m_Type)) {
547         for (auto* test : m_All_SEQ_SET) {
548             Call(*test, dummy_set);
549         }
550         for (auto* test : m_All_FEAT) {
551             Call(*test, dummy_feat);
552         }
553         for (auto* test : m_All_DESC) {
554             Call(*test, dummy_desc);
555         }
556         if (!m_CurrentNode->m_Pubdescs.empty()) {
557             for (auto* test : m_All_PUBDESC) {
558                 Call(*test, dummy_pubdesc);
559             }
560             for (auto* test : m_All_AUTHORS) {
561                 Call(*test, dummy_authors);
562             }
563         }
564         if (m_CurrentNode->m_Biosource) {
565             for (auto* test : m_All_BIOSRC) {
566                 Call(*test, dummy_biosrc);
567             }
568         }
569     }
570     else if (m_CurrentNode->m_Type == eSubmit) {
571         for (auto* test : m_All_SUBMIT) {
572             Call(*test, dummy_submit);
573         }
574         if (!m_CurrentNode->m_Authors.empty()) {
575             for (auto* test : m_All_AUTHORS) {
576                 Call(*test, dummy_authors);
577             }
578         }
579     }
580     else if (m_CurrentNode->m_Type == eString) {
581         for (auto* test : m_All_STRING) {
582             Call(*test, dummy_string);
583         }
584     }
585     else if (m_CurrentNode->m_Type == eFile) {
586         return;
587     }
588     else {
589         ERR_POST(Info << "Tests for "
590                  << TypeName(m_CurrentNode->m_Type)
591                  << " are not yet implemented...");
592     }
593 }
594 
595 
GetText() const596 string CDiscrepancyContext::CRefNode::GetText() const
597 {
598     if (m_Type == eBioseq) {
599         size_t brk = m_Text.find('\n');
600         return brk == string::npos ? m_Text : m_Text.substr(0, brk) + " " + m_Text.substr(brk + 1);
601     }
602     else if (IsSeqSet(m_Type)) {
603         switch (m_Type) {
604             case eSeqSet_NucProt:
605                 return "np|" + (m_Text.empty() ? "(EMPTY BIOSEQ-SET)" : m_Text);
606             case eSeqSet_SegSet:
607                 return "ss|" + (m_Text.empty() ? "(EMPTY BIOSEQ-SET)" : m_Text);
608             default:
609                 return m_Text.empty() ? "BioseqSet" : "Set containing " + m_Text;
610         }
611     }
612     else if (m_Type == eSubmit) {
613         return m_Text.empty() ? "Cit-sub" : "Cit-sub for " + m_Text;
614     }
615     else if (m_Type == eSeqDesc) {
616         string label = GetBioseqLabel();
617         return label.empty() ? m_Text : label + ":" + m_Text;
618     }
619     else if (m_Type == eSeqFeat) {
620         return m_Text;
621     }
622     else if (m_Type == eString) {
623         return m_Text;
624     }
625     return CDiscrepancyContext::TypeName(m_Type) + " - coming soon...";
626 }
627 
628 
GetBioseqLabel() const629 string CDiscrepancyContext::CRefNode::GetBioseqLabel() const
630 {
631     for (const CRefNode* node = this; node; node = node->m_Parent) {
632         if (node->m_Type == eBioseq) {
633             size_t brk = node->m_Text.find('\n');
634             return brk == string::npos ? kEmptyStr : node->m_Text.substr(0, brk);
635         }
636         if (IsSeqSet(node->m_Type) || node->m_Type == eSubmit) {
637             return node->m_Text;
638         }
639     }
640     return kEmptyStr;
641 }
642 
643 
CompareRefs(CRef<CReportObj> a,CRef<CReportObj> b)644 bool CDiscrepancyContext::CompareRefs(CRef<CReportObj> a, CRef<CReportObj> b) {
645     vector<const CRefNode*> A, B;
646     for (const CRefNode* node = static_cast<CDiscrepancyObject&>(*a).m_Ref; node; node = node->m_Parent) {
647         A.push_back(node);
648     }
649     reverse(A.begin(), A.end());
650     for (const CRefNode* node = static_cast<CDiscrepancyObject&>(*b).m_Ref; node; node = node->m_Parent) {
651         B.push_back(node);
652     }
653     reverse(B.begin(), B.end());
654     size_t n = min(A.size(), B.size());
655     for (size_t i = 0; i < n; i++) {
656         if (A[i] != B[i]) {
657             if (A[i]->m_Type == eSeqFeat && B[i]->m_Type != eSeqFeat) {
658                 return true;
659             }
660             if (B[i]->m_Type == eSeqFeat && A[i]->m_Type != eSeqFeat) {
661                 return false;
662             }
663             if (A[i]->m_Type == eSeqDesc && B[i]->m_Type != eSeqDesc) {
664                 return true;
665             }
666             if (B[i]->m_Type == eSeqDesc && A[i]->m_Type != eSeqDesc) {
667                 return false;
668             }
669             if (A[i]->m_Index != B[i]->m_Index) {
670                 return A[i]->m_Index < B[i]->m_Index;
671             }
672         }
673     }
674     return A.size() == B.size() ? &*a < &*b : A.size() < B.size();
675 }
676 
677 
678 DISCREPANCY_LINK_MODULE(discrepancy_case);
679 DISCREPANCY_LINK_MODULE(suspect_product_names);
680 DISCREPANCY_LINK_MODULE(division_code_conflicts);
681 DISCREPANCY_LINK_MODULE(feature_per_bioseq);
682 DISCREPANCY_LINK_MODULE(seqdesc_per_bioseq);
683 DISCREPANCY_LINK_MODULE(gene_names);
684 DISCREPANCY_LINK_MODULE(rna_names);
685 DISCREPANCY_LINK_MODULE(transl_too_long);
686 DISCREPANCY_LINK_MODULE(overlapping_features);
687 DISCREPANCY_LINK_MODULE(sesame_street);
688 DISCREPANCY_LINK_MODULE(transl_note);
689 DISCREPANCY_LINK_MODULE(feature_tests);
690 DISCREPANCY_LINK_MODULE(sequence_tests);
691 DISCREPANCY_LINK_MODULE(pub_tests);
692 DISCREPANCY_LINK_MODULE(biosource_tests);
693 
694 END_SCOPE(NDiscrepancy)
695 END_NCBI_SCOPE
696