1 /* $Id: discrepancy_core.cpp 629257 2021-04-13 13:28:26Z ivanov $
2 * =========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * =========================================================================
25 *
26 * Authors: Sema Kachalo
27 *
28 */
29
30 #include <ncbi_pch.hpp>
31 #include "discrepancy_core.hpp"
32 #include "utils.hpp"
33 #include <algorithm>
34 #include <sstream>
35 #include <objmgr/object_manager.hpp>
36 #include <objmgr/seqdesc_ci.hpp>
37 #include <objmgr/util/sequence.hpp>
38 #include <serial/objcopy.hpp>
39 #include <util/compress/stream_util.hpp>
40 #include <util/format_guess.hpp>
41
42 BEGIN_NCBI_SCOPE
43 BEGIN_SCOPE(NDiscrepancy)
44 USING_SCOPE(objects);
45
46 CSafeStatic<map<string, CDiscrepancyCaseProps>> CDiscrepancyConstructor::sm_Table;
47 CSafeStatic<map<string, string>> CDiscrepancyConstructor::sm_AliasTable;
48
49
GetDiscrepancyCaseName(const string & name)50 string CDiscrepancyConstructor::GetDiscrepancyCaseName(const string& name)
51 {
52 map<string, CDiscrepancyCaseProps>& Table = GetTable();
53 if (Table.find(name) != Table.end()) {
54 return name;
55 }
56 map<string, string>& AliasTable = GetAliasTable();
57 if (AliasTable.find(name) != AliasTable.end()) {
58 return AliasTable[name];
59 }
60 if (name.substr(0, 5) == "DISC_") {
61 return GetDiscrepancyCaseName(name.substr(5));
62 }
63 return "";
64 }
65
66
GetDiscrepancyConstructor(const string & name)67 const CDiscrepancyConstructor* CDiscrepancyConstructor::GetDiscrepancyConstructor(const string& name)
68 {
69 string str = GetDiscrepancyCaseName(name);
70 return str.empty() ? nullptr : GetTable()[str].Constructor;
71 }
72
73
GetDiscrepancyCaseName(const string & name)74 string GetDiscrepancyCaseName(const string& name)
75 {
76 return CDiscrepancyConstructor::GetDiscrepancyCaseName(name);
77 }
78
79
GetDiscrepancyDescr(const string & name)80 string GetDiscrepancyDescr(const string& name)
81 {
82 string str = GetDiscrepancyCaseName(name);
83 return str.empty() ? "" : CDiscrepancyConstructor::GetTable()[str].Descr;
84 }
85
86
GetDiscrepancyGroup(const string & name)87 TGroup GetDiscrepancyGroup(const string& name)
88 {
89 string str = GetDiscrepancyCaseName(name);
90 return str.empty() ? 0 : CDiscrepancyConstructor::GetTable()[str].Group;
91 }
92
93
GetDiscrepancyNames(TGroup group)94 vector<string> GetDiscrepancyNames(TGroup group)
95 {
96 map<string, CDiscrepancyCaseProps>& Table = CDiscrepancyConstructor::GetTable();
97 vector<string> V;
98 for (const auto& J : Table) {
99 if (J.first[0] != '_' && (J.second.Group & group) == group) {
100 V.push_back(J.first);
101 }
102 }
103 return V;
104 }
105
106
GetDiscrepancyAliases(const string & name)107 vector<string> GetDiscrepancyAliases(const string& name)
108 {
109 map<string, CDiscrepancyCaseProps>& Table = CDiscrepancyConstructor::GetTable();
110 return Table.find(name) != Table.end() ? Table[name].AliasList : vector<string>();
111 }
112
113
operator [](const string & name)114 CReportNode& CReportNode::operator[](const string& name)
115 {
116 if (m_Map.find(name) == m_Map.end()) {
117 m_Map[name] = CRef<CReportNode>(new CReportNode(name));
118 }
119 return *m_Map[name];
120 }
121
122
Add(TReportObjectList & list,TReportObjectSet & hash,CReportObj & obj,bool unique)123 void CReportNode::Add(TReportObjectList& list, TReportObjectSet& hash, CReportObj& obj, bool unique)
124 {
125 // BIG FILE
126 if (unique && hash.find(&obj) != hash.end()) {
127 return;
128 }
129 list.push_back(CRef<CReportObj>(&obj));
130 hash.insert(&obj);
131 }
132
133
Add(TReportObjectList & list,TReportObjectSet & hash,TReportObjectList & objs,bool unique)134 void CReportNode::Add(TReportObjectList& list, TReportObjectSet& hash, TReportObjectList& objs, bool unique)
135 {
136 for (auto& it : objs) {
137 Add(list, hash, *it, unique);
138 }
139 }
140
141
Copy(CRef<CReportNode> other)142 void CReportNode::Copy(CRef<CReportNode> other)
143 {
144 m_Map = other->m_Map;
145 m_Objs = other->m_Objs;
146 m_Hash = other->m_Hash;
147 m_Severity = other->m_Severity;
148 m_Autofix = other->m_Autofix;
149 m_Ext = other->m_Ext;
150 m_Summ = other->m_Summ;
151 m_NoRec = other->m_NoRec;
152 }
153
154
Promote()155 bool CReportNode::Promote()
156 {
157 if (m_Map.size() == 1) {
158 Copy(m_Map.begin()->second);
159 return true;
160 }
161 return false;
162 }
163
164
Export(CDiscrepancyCase & test,bool unique) const165 CRef<CReportItem> CReportNode::Export(CDiscrepancyCase& test, bool unique) const
166 {
167 TReportObjectList objs = m_Objs;
168 TReportObjectSet hash = m_Hash;
169 TReportItemList subs;
170 bool autofix = false;
171 CReportItem::ESeverity severity = m_Severity;
172 string unit;
173 for (const auto& it : m_Map) {
174 CRef<CReportItem> sub = it.second->Export(test, unique);
175 if (severity < it.second->m_Severity) {
176 severity = it.second->m_Severity;
177 }
178 if (severity < sub->GetSeverity()) {
179 severity = sub->GetSeverity();
180 }
181 autofix = autofix || sub->CanAutofix();
182 if (unit.empty()) {
183 unit = sub->GetUnit();
184 }
185 subs.push_back(sub);
186 if (!m_NoRec) {
187 TReportObjectList details = sub->GetDetails();
188 for (auto& ob : details) {
189 Add(objs, hash, *ob, unique);
190 }
191 }
192 }
193 for (auto& ob : objs) {
194 if (ob->CanAutofix()) {
195 static_cast<CDiscrepancyObject&>(*ob).m_Case.Reset(&test);
196 autofix = true;
197 }
198 }
199 string str = m_Name;
200 NStr::TruncateSpacesInPlace(str);
201 for (size_t n = NStr::Find(str, "[*"); n != NPOS; n = NStr::Find(str, "[*")) {
202 size_t k = NStr::Find(str, "*]");
203 if (k != NPOS) {
204 str.erase(n, k - n + 2);
205 }
206 else {
207 str.erase(n);
208 }
209 }
210 string msg = str;
211 string xml = str;
212 size_t count = m_Count > 0 ? m_Count : objs.size();
213
214 NStr::ReplaceInPlace(msg, "[n]", NStr::Int8ToString(count));
215 NStr::ReplaceInPlace(msg, "[n/2]", NStr::Int8ToString(count / 2));
216 NStr::ReplaceInPlace(msg, "[s]", count == 1 ? "" : "s"); // nouns
217 NStr::ReplaceInPlace(msg, "[S]", count == 1 ? "s" : ""); // verbs
218 NStr::ReplaceInPlace(msg, "[is]", count == 1 ? "is" : "are");
219 NStr::ReplaceInPlace(msg, "[does]", count == 1 ? "does" : "do");
220 NStr::ReplaceInPlace(msg, "[has]", count == 1 ? "has" : "have");
221 NStr::ReplaceInPlace(msg, "[(]", "");
222 NStr::ReplaceInPlace(msg, "[)]", "");
223
224 NStr::ReplaceInPlace(xml, "[n]", "##");
225 NStr::ReplaceInPlace(xml, "[n/2]", "##");
226 NStr::ReplaceInPlace(xml, "[s]", "s");
227 NStr::ReplaceInPlace(xml, "[S]", "");
228 NStr::ReplaceInPlace(xml, "[is]", "are");
229 NStr::ReplaceInPlace(xml, "[does]", "do");
230 NStr::ReplaceInPlace(xml, "[has]", "have");
231 NStr::ReplaceInPlace(xml, "[(]", "");
232 NStr::ReplaceInPlace(xml, "[)]", "");
233
234 size_t n = str.find("[n]");
235 if (n != string::npos) {
236 str = str.substr(n + 4);
237 }
238 else if ((n = str.find("[n/2]")) != string::npos) {
239 str = str.substr(n + 6);
240 count /= 2;
241 }
242 if (n != string::npos) {
243 if ((n = str.find("[s]")) != string::npos) {
244 unit = str.substr(0, n);
245 }
246 else if (0 == str.find("CDS ")) {
247 unit = "CDS";
248 }
249 else if ((n = str.find("s ")) != string::npos) {
250 unit = str.substr(0, n);
251 }
252 }
253 CRef<CDiscrepancyItem> item(new CDiscrepancyItem(test, m_Name, msg, xml, unit, count));
254 item->m_Autofix = autofix;
255 item->m_Severity = severity;
256 item->m_Ext = m_Ext;
257 item->m_Summ = m_Summ;
258 item->m_Subs = subs;
259 item->m_Objs = objs;
260 return CRef<CReportItem>(item);
261 }
262
263
GetObjects() const264 TReportObjectList CDiscrepancyCore::GetObjects() const
265 {
266 TReportObjectList ret;
267 TReportObjectSet hash;
268 TReportItemList items = GetReport();
269 for (const auto& rep : items) {
270 TReportObjectList objs = rep->GetDetails();
271 for (auto& obj : objs) {
272 CReportNode::Add(ret, hash, *obj);
273 }
274 }
275 return ret;
276 }
277
278
CreateReportItem(const string & test,const CReportObj & obj,const string & msg,bool autofix)279 CRef<CReportItem> CReportItem::CreateReportItem(const string& test, const CReportObj& obj, const string& msg, bool autofix)
280 {
281 CRef<CDiscrepancyCase> t = CDiscrepancyConstructor::GetDiscrepancyConstructor(test)->Create();
282 string s = msg;
283 NStr::ReplaceInPlace(s, "[(]", "");
284 NStr::ReplaceInPlace(s, "[)]", "");
285 CRef<CDiscrepancyItem> item(new CDiscrepancyItem(*t, msg, s, s, kEmptyCStr, 0));
286 item->m_Autofix = autofix;
287 auto dobj = static_cast<const CDiscrepancyObject&>(obj);
288 auto x = CRef<CDiscrepancyObject>(new CDiscrepancyObject(dobj.m_Ref));
289 x->m_Case = t;
290 if (autofix) {
291 x->m_Fix = dobj.m_Ref;
292 }
293 item->m_Objs.push_back(CRef<CReportObj>(x));
294 return CRef<CReportItem>(item);
295 }
296
297
298 // need to rewrite as a DiscrepancyContext method
Clone(bool fix,CConstRef<CObject> data) const299 CReportObj* CDiscrepancyObject::Clone(bool fix, CConstRef<CObject> data) const
300 {
301 CDiscrepancyObject* obj = new CDiscrepancyObject(*this);
302 if (fix) {
303 obj->m_Fix.Reset(obj->m_Ref);
304 }
305 obj->m_More = data;
306 return obj;
307 }
308
309
Call(const T & obj,CDiscrepancyContext & context)310 template<typename T> void CDiscrepancyVisitor<T>::Call(const T& obj, CDiscrepancyContext& context)
311 {
312 try {
313 Visit(obj, context);
314 }
315 catch (CException& e) { // LCOV_EXCL_START
316 string ss = "EXCEPTION caught: "; ss += e.what();
317 m_Objs[ss];
318 } // LCOV_EXCL_STOP
319 }
320
321
New(CScope & scope)322 CRef<CDiscrepancySet> CDiscrepancySet::New(CScope& scope) { return CRef<CDiscrepancySet>(new CDiscrepancyContext(scope)); }
323
324
Format(const string & s,unsigned int count)325 string CDiscrepancySet::Format(const string& s, unsigned int count)
326 {
327 string str = s;
328 NStr::TruncateSpacesInPlace(str);
329 NStr::ReplaceInPlace(str, "[n]", NStr::Int8ToString(count));
330 NStr::ReplaceInPlace(str, "[n/2]", NStr::Int8ToString(count / 2));
331 NStr::ReplaceInPlace(str, "[s]", count == 1 ? "" : "s"); // nouns
332 NStr::ReplaceInPlace(str, "[S]", count == 1 ? "s" : ""); // verbs
333 NStr::ReplaceInPlace(str, "[is]", count == 1 ? "is" : "are");
334 NStr::ReplaceInPlace(str, "[does]", count == 1 ? "does" : "do");
335 NStr::ReplaceInPlace(str, "[has]", count == 1 ? "has" : "have");
336 NStr::ReplaceInPlace(str, "[(]", "");
337 NStr::ReplaceInPlace(str, "[)]", "");
338 for (size_t n = NStr::Find(str, "[*"); n != NPOS; n = NStr::Find(str, "[*")) {
339 size_t k = NStr::Find(str, "*]");
340 if (k != NPOS) {
341 str.erase(n, k - n + 2);
342 }
343 else {
344 str.erase(n);
345 }
346 }
347 return str;
348 }
349
350
AddTest(const string & name)351 bool CDiscrepancyContext::AddTest(const string& name)
352 {
353 string str = GetDiscrepancyCaseName(name);
354 if (str.empty()) {
355 return false; // no such test
356 }
357 if (m_Tests.find(str) != m_Tests.end()) {
358 return false; // already there
359 }
360 CRef<CDiscrepancyCase> test = CDiscrepancyConstructor::GetDiscrepancyConstructor(str)->Create();
361 m_Tests[str] = test;
362
363 #define REGISTER_DISCREPANCY_TYPE(type) \
364 if (auto* p = dynamic_cast<CDiscrepancyVisitor<type>*>(test.GetPointer())) { \
365 m_All_##type.push_back(p); \
366 m_Enable_##type = true; \
367 return true; \
368 }
369 //REGISTER_DISCREPANCY_TYPE(CSeq_inst)
370 //REGISTER_DISCREPANCY_TYPE(CSeqdesc)
371 //REGISTER_DISCREPANCY_TYPE(CSeq_feat)
372 //REGISTER_DISCREPANCY_TYPE(CSubmit_block)
373 //REGISTER_DISCREPANCY_TYPE(CSeqFeatData)
374 //REGISTER_DISCREPANCY_TYPE(CSeq_feat_BY_BIOSEQ)
375 //REGISTER_DISCREPANCY_TYPE(COverlappingFeatures)
376 //REGISTER_DISCREPANCY_TYPE(CBioSource)
377 //REGISTER_DISCREPANCY_TYPE(COrgName)
378 //REGISTER_DISCREPANCY_TYPE(CSeq_annot)
379 //REGISTER_DISCREPANCY_TYPE(CPubdesc)
380 //REGISTER_DISCREPANCY_TYPE(CAuth_list)
381 //REGISTER_DISCREPANCY_TYPE(CPerson_id)
382 REGISTER_DISCREPANCY_TYPE(string)
383
384 /// BIG FILE
385 REGISTER_DISCREPANCY_TYPE(SEQUENCE)
386 REGISTER_DISCREPANCY_TYPE(SEQ_SET)
387 REGISTER_DISCREPANCY_TYPE(FEAT)
388 REGISTER_DISCREPANCY_TYPE(DESC)
389 REGISTER_DISCREPANCY_TYPE(BIOSRC)
390 REGISTER_DISCREPANCY_TYPE(PUBDESC)
391 REGISTER_DISCREPANCY_TYPE(AUTHORS)
392 REGISTER_DISCREPANCY_TYPE(SUBMIT)
393 REGISTER_DISCREPANCY_TYPE(STRING)
394
395 return false;
396 }
397
398
Push(const CSerialObject & root,const string & fname)399 void CDiscrepancyContext::Push(const CSerialObject& root, const string& fname)
400 {
401 if (!fname.empty()) {
402 m_RootNode.Reset(new CParseNode(eFile, 0));
403 m_RootNode->m_Ref->m_Text = fname;
404 }
405 else if (!m_RootNode) {
406 m_RootNode.Reset(new CParseNode(eNone, 0));
407 }
408 m_NodeMap[m_RootNode->m_Ref] = &*m_RootNode;
409 m_CurrentNode.Reset(m_RootNode);
410
411 if (const CBioseq* bs = dynamic_cast<const CBioseq*>(&root)) {
412 ParseObject(*bs);
413 }
414 else if (const CBioseq_set* st = dynamic_cast<const CBioseq_set*>(&root)) {
415 ParseObject(*st);
416 }
417 else if (const CSeq_entry* se = dynamic_cast<const CSeq_entry*>(&root)) {
418 ParseObject(*se);
419 }
420 else if (const CSeq_submit* ss = dynamic_cast<const CSeq_submit*>(&root)) {
421 ParseObject(*ss);
422 }
423 }
424
425
Summarize()426 unsigned CDiscrepancyContext::Summarize()
427 {
428 unsigned severity = 0;
429 for (auto& tt : m_Tests) {
430 CDiscrepancyCore& test = static_cast<CDiscrepancyCore&>(*tt.second);
431 test.Summarize(*this);
432 for (const auto& rep : test.GetReport()) {
433 unsigned sev = rep->GetSeverity();
434 severity = sev > severity ? sev : severity;
435 }
436 }
437 return severity;
438 }
439
440
TestString(const string & str)441 void CDiscrepancyContext::TestString(const string& str)
442 {
443 for (auto* it : m_All_string) {
444 Call(*it, str);
445 }
446 }
447
448
Collect(TDiscrepancyCaseMap & tests,bool all) const449 TReportItemList CDiscrepancyGroup::Collect(TDiscrepancyCaseMap& tests, bool all) const
450 {
451 TReportItemList out;
452 for (const auto& it : m_List) {
453 TReportItemList tmp = it->Collect(tests, false);
454 for (const auto& tt : tmp) {
455 out.push_back(tt);
456 }
457 }
458 if (!m_Test.empty() && tests.find(m_Test) != tests.end()) {
459 TReportItemList tmp = tests[m_Test]->GetReport();
460 for (const auto& tt : tmp) {
461 out.push_back(tt);
462 }
463 tests.erase(m_Test);
464 }
465 if (!m_Name.empty()) {
466 TReportObjectList objs;
467 TReportObjectSet hash;
468 CRef<CDiscrepancyItem> di(new CDiscrepancyItem(m_Name));
469 di->m_Subs = out;
470 bool empty = true;
471 for (const auto& tt : out) {
472 TReportObjectList details = tt->GetDetails();
473 if (!details.empty() || tt->GetCount() > 0) {
474 empty = false;
475 }
476 for (auto& ob : details) {
477 CReportNode::Add(objs, hash, *ob);
478 }
479 if (tt->CanAutofix()) {
480 di->m_Autofix = true;
481 }
482 if (tt->IsInfo()) {
483 di->m_Severity = CDiscrepancyItem::eSeverity_info;
484 }
485 else if (tt->IsFatal()) {
486 di->m_Severity = CDiscrepancyItem::eSeverity_error;
487 }
488 }
489 di->m_Objs = objs;
490 out.clear();
491 if (!empty) {
492 out.push_back(CRef<CReportItem>(di));
493 }
494 }
495 if (all) {
496 for (const auto& it : tests) {
497 TReportItemList list = it.second->GetReport();
498 for (const auto& it : list) {
499 out.push_back(it);
500 }
501 }
502 }
503 return out;
504 }
505
506
RunTests()507 void CDiscrepancyContext::RunTests()
508 {
509 SEQUENCE dummy_seq;
510 SEQ_SET dummy_set;
511 FEAT dummy_feat;
512 DESC dummy_desc;
513 BIOSRC dummy_biosrc;
514 PUBDESC dummy_pubdesc;
515 AUTHORS dummy_authors;
516 SUBMIT dummy_submit;
517 STRING dummy_string;
518 if (m_CurrentNode->m_Type == eBioseq) {
519 ClearFeatureList();
520 for (const auto& feat : GetAllFeat()) {
521 CollectFeature(feat);
522 }
523 for (auto* test : m_All_SEQUENCE) {
524 Call(*test, dummy_seq);
525 }
526 for (auto* test : m_All_FEAT) {
527 Call(*test, dummy_feat);
528 }
529 for (auto* test : m_All_DESC) {
530 Call(*test, dummy_desc);
531 }
532 if (!m_CurrentNode->m_Pubdescs.empty()) {
533 for (auto* test : m_All_PUBDESC) {
534 Call(*test, dummy_pubdesc);
535 }
536 for (auto* test : m_All_AUTHORS) {
537 Call(*test, dummy_authors);
538 }
539 }
540 if (m_CurrentNode->m_Biosource) {
541 for (auto* test : m_All_BIOSRC) {
542 Call(*test, dummy_biosrc);
543 }
544 }
545 }
546 else if (IsSeqSet(m_CurrentNode->m_Type)) {
547 for (auto* test : m_All_SEQ_SET) {
548 Call(*test, dummy_set);
549 }
550 for (auto* test : m_All_FEAT) {
551 Call(*test, dummy_feat);
552 }
553 for (auto* test : m_All_DESC) {
554 Call(*test, dummy_desc);
555 }
556 if (!m_CurrentNode->m_Pubdescs.empty()) {
557 for (auto* test : m_All_PUBDESC) {
558 Call(*test, dummy_pubdesc);
559 }
560 for (auto* test : m_All_AUTHORS) {
561 Call(*test, dummy_authors);
562 }
563 }
564 if (m_CurrentNode->m_Biosource) {
565 for (auto* test : m_All_BIOSRC) {
566 Call(*test, dummy_biosrc);
567 }
568 }
569 }
570 else if (m_CurrentNode->m_Type == eSubmit) {
571 for (auto* test : m_All_SUBMIT) {
572 Call(*test, dummy_submit);
573 }
574 if (!m_CurrentNode->m_Authors.empty()) {
575 for (auto* test : m_All_AUTHORS) {
576 Call(*test, dummy_authors);
577 }
578 }
579 }
580 else if (m_CurrentNode->m_Type == eString) {
581 for (auto* test : m_All_STRING) {
582 Call(*test, dummy_string);
583 }
584 }
585 else if (m_CurrentNode->m_Type == eFile) {
586 return;
587 }
588 else {
589 ERR_POST(Info << "Tests for "
590 << TypeName(m_CurrentNode->m_Type)
591 << " are not yet implemented...");
592 }
593 }
594
595
GetText() const596 string CDiscrepancyContext::CRefNode::GetText() const
597 {
598 if (m_Type == eBioseq) {
599 size_t brk = m_Text.find('\n');
600 return brk == string::npos ? m_Text : m_Text.substr(0, brk) + " " + m_Text.substr(brk + 1);
601 }
602 else if (IsSeqSet(m_Type)) {
603 switch (m_Type) {
604 case eSeqSet_NucProt:
605 return "np|" + (m_Text.empty() ? "(EMPTY BIOSEQ-SET)" : m_Text);
606 case eSeqSet_SegSet:
607 return "ss|" + (m_Text.empty() ? "(EMPTY BIOSEQ-SET)" : m_Text);
608 default:
609 return m_Text.empty() ? "BioseqSet" : "Set containing " + m_Text;
610 }
611 }
612 else if (m_Type == eSubmit) {
613 return m_Text.empty() ? "Cit-sub" : "Cit-sub for " + m_Text;
614 }
615 else if (m_Type == eSeqDesc) {
616 string label = GetBioseqLabel();
617 return label.empty() ? m_Text : label + ":" + m_Text;
618 }
619 else if (m_Type == eSeqFeat) {
620 return m_Text;
621 }
622 else if (m_Type == eString) {
623 return m_Text;
624 }
625 return CDiscrepancyContext::TypeName(m_Type) + " - coming soon...";
626 }
627
628
GetBioseqLabel() const629 string CDiscrepancyContext::CRefNode::GetBioseqLabel() const
630 {
631 for (const CRefNode* node = this; node; node = node->m_Parent) {
632 if (node->m_Type == eBioseq) {
633 size_t brk = node->m_Text.find('\n');
634 return brk == string::npos ? kEmptyStr : node->m_Text.substr(0, brk);
635 }
636 if (IsSeqSet(node->m_Type) || node->m_Type == eSubmit) {
637 return node->m_Text;
638 }
639 }
640 return kEmptyStr;
641 }
642
643
CompareRefs(CRef<CReportObj> a,CRef<CReportObj> b)644 bool CDiscrepancyContext::CompareRefs(CRef<CReportObj> a, CRef<CReportObj> b) {
645 vector<const CRefNode*> A, B;
646 for (const CRefNode* node = static_cast<CDiscrepancyObject&>(*a).m_Ref; node; node = node->m_Parent) {
647 A.push_back(node);
648 }
649 reverse(A.begin(), A.end());
650 for (const CRefNode* node = static_cast<CDiscrepancyObject&>(*b).m_Ref; node; node = node->m_Parent) {
651 B.push_back(node);
652 }
653 reverse(B.begin(), B.end());
654 size_t n = min(A.size(), B.size());
655 for (size_t i = 0; i < n; i++) {
656 if (A[i] != B[i]) {
657 if (A[i]->m_Type == eSeqFeat && B[i]->m_Type != eSeqFeat) {
658 return true;
659 }
660 if (B[i]->m_Type == eSeqFeat && A[i]->m_Type != eSeqFeat) {
661 return false;
662 }
663 if (A[i]->m_Type == eSeqDesc && B[i]->m_Type != eSeqDesc) {
664 return true;
665 }
666 if (B[i]->m_Type == eSeqDesc && A[i]->m_Type != eSeqDesc) {
667 return false;
668 }
669 if (A[i]->m_Index != B[i]->m_Index) {
670 return A[i]->m_Index < B[i]->m_Index;
671 }
672 }
673 }
674 return A.size() == B.size() ? &*a < &*b : A.size() < B.size();
675 }
676
677
678 DISCREPANCY_LINK_MODULE(discrepancy_case);
679 DISCREPANCY_LINK_MODULE(suspect_product_names);
680 DISCREPANCY_LINK_MODULE(division_code_conflicts);
681 DISCREPANCY_LINK_MODULE(feature_per_bioseq);
682 DISCREPANCY_LINK_MODULE(seqdesc_per_bioseq);
683 DISCREPANCY_LINK_MODULE(gene_names);
684 DISCREPANCY_LINK_MODULE(rna_names);
685 DISCREPANCY_LINK_MODULE(transl_too_long);
686 DISCREPANCY_LINK_MODULE(overlapping_features);
687 DISCREPANCY_LINK_MODULE(sesame_street);
688 DISCREPANCY_LINK_MODULE(transl_note);
689 DISCREPANCY_LINK_MODULE(feature_tests);
690 DISCREPANCY_LINK_MODULE(sequence_tests);
691 DISCREPANCY_LINK_MODULE(pub_tests);
692 DISCREPANCY_LINK_MODULE(biosource_tests);
693
694 END_SCOPE(NDiscrepancy)
695 END_NCBI_SCOPE
696