1 /*  $Id: text_output.cpp 634281 2021-07-07 19:11:51Z ivanov $
2  * =========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * =========================================================================
25  *
26  * Authors: please dont mention my name here
27  *
28  */
29 
30 #include <ncbi_pch.hpp>
31 #include "discrepancy_core.hpp"
32 
33 BEGIN_NCBI_SCOPE
34 BEGIN_SCOPE(NDiscrepancy)
35 USING_SCOPE(objects);
36 
37 // THIS WHOLE THING IS EVIL!
38 // DATA LAYER IS MIXED WITH PRESENTATION LAYER :(
39 
40 
ShowFatal(const CReportItem & item)41 static bool ShowFatal(const CReportItem& item)
42 {
43     if (!item.IsFatal()) {
44         return false;
45     }
46     TReportItemList subs = item.GetSubitems();
47     for (const auto& it : subs) {
48         if (it->IsSummary() && it->IsFatal()) {
49             return false;
50         }
51     }
52     return true;
53 }
54 
55 
deunderscore(const string s)56 static inline string deunderscore(const string s)
57 {
58     return s[0] == '_' ? s.substr(1) : s;
59 }
60 
61 
RecursiveText(ostream & out,const TReportItemList & list,unsigned short flags)62 static void RecursiveText(ostream& out, const TReportItemList& list, unsigned short flags)
63 {
64     bool ext = (flags & CDiscrepancySet::eOutput_Ext) != 0;
65     bool fatal = (flags & CDiscrepancySet::eOutput_Fatal) != 0;
66     for (const auto& it : list) {
67         if (it->IsExtended() && !ext) {
68             continue;
69         }
70         if (fatal && ShowFatal(*it)) {
71             out << "FATAL: ";
72         }
73         out << deunderscore(it->GetTitle()) << ": " << it->GetMsg() << '\n';
74         TReportItemList subs = it->GetSubitems();
75         if (!subs.empty() && (ext || !subs[0]->IsExtended())) {
76             RecursiveText(out, subs, flags);
77         }
78         else {
79             TReportObjectList det = it->GetDetails();
80             for (const auto& obj : det) {
81                 if (flags & CDiscrepancySet::eOutput_Files) {
82                     out << obj->GetPath() << ":";
83                 }
84                 if (obj->IsFixed()) {
85                     out << "[FIXED] ";
86                 }
87                 out << obj->GetText() << '\n';
88             }
89         }
90     }
91 }
92 
93 
RecursiveSummary(ostream & out,const TReportItemList & list,unsigned short flags,size_t level=0)94 static void RecursiveSummary(ostream& out, const TReportItemList& list, unsigned short flags, size_t level = 0)
95 {
96     bool fatal = (flags & CDiscrepancySet::eOutput_Fatal) != 0;
97     for (const auto& it : list) {
98         if (level == 0) {
99             if (fatal && ShowFatal(*it)) {
100                 out << "FATAL: ";
101             }
102             out << deunderscore(it->GetTitle()) << ": " << it->GetMsg() << '\n';
103         }
104         else if (it->IsSummary()) {
105             out << string(level, '\t');
106             if (fatal && ShowFatal(*it)) {
107                 out << "FATAL: ";
108             }
109             out << it->GetMsg() << '\n';
110         }
111         else {
112             continue;
113         }
114         RecursiveSummary(out, it->GetSubitems(), flags, level + 1);
115     }
116 }
117 
118 
RecursiveFatalSummary(ostream & out,const TReportItemList & list,size_t level=0)119 static bool RecursiveFatalSummary(ostream& out, const TReportItemList& list, size_t level = 0)
120 {
121     bool found = false;
122     for (const auto& it : list) {
123         if (it->IsFatal() && it->GetTitle() != "SOURCE_QUALS" && it->GetTitle() != "SUSPECT_PRODUCT_NAMES") {
124             found = true;
125             if (level == 0) {
126                 out << "FATAL: ";
127                 out << deunderscore(it->GetTitle()) << ": " << it->GetMsg() << '\n';
128             }
129             else if (it->IsSummary()) {
130                 out << string(level, '\t');
131                 out << "FATAL: ";
132                 out << it->GetMsg() << '\n';
133             }
134             else {
135                 continue;
136             }
137             RecursiveFatalSummary(out, it->GetSubitems(), level + 1);
138         }
139     }
140     return found;
141 }
142 
143 
OutputText(ostream & out,unsigned short flags,char group)144 void CDiscrepancyContext::OutputText(ostream& out, unsigned short flags, char group)
145 {
146     switch (group) {
147         case 'b':
148             out << "Discrepancy Report Results (due to the large size of the file some checks may not have run)\n\n";
149             break;
150         case 'q':
151             out << "Discrepancy Report Results (SMART set of checks)\n\n";
152             break;
153         case 'u':
154             out << "Discrepancy Report Results (submitter set of checks)\n\n";
155             break;
156         default:
157             out << "Discrepancy Report Results\n\n";
158     }
159 
160     out << "Summary\n";
161     if (m_Group0.empty() && m_Group1.empty()) {
162         const CDiscrepancyGroup& order = x_OutputOrder();
163         m_Group0 = order[0].Collect(m_Tests, false);
164         m_Group1 = order[1].Collect(m_Tests, true);
165     }
166     RecursiveSummary(out, m_Group0, flags);
167     if (flags & eOutput_Fatal) {
168         RecursiveFatalSummary(out, m_Group1, flags);
169     }
170     RecursiveSummary(out, m_Group1, flags);
171 
172     if (flags & eOutput_Summary) return;
173 
174     out << "\nDetailed Report\n\n";
175     RecursiveText(out, m_Group0, flags);
176     RecursiveText(out, m_Group1, flags);
177 }
178 
179 
Indent(ostream & out,size_t indent)180 static void Indent(ostream& out, size_t indent)
181 {
182     static const size_t XML_INDENT = 2;
183     out << string(indent * XML_INDENT, ' ');
184 }
185 
186 static string SevLevel[CReportItem::eSeverity_error + 1] = { "INFO", "WARNING", "FATAL" };
187 
RecursiveXML(ostream & out,const TReportItemList & list,unsigned short flags,size_t indent)188 static void RecursiveXML(ostream& out, const TReportItemList& list, unsigned short flags, size_t indent)
189 {
190     bool ext = (flags & CDiscrepancySet::eOutput_Ext) != 0;
191     for (const auto& it : list) {
192         if (it->IsExtended() && !ext) {
193             continue;
194         }
195         Indent(out, indent);
196         out << "<details message=\"" << NStr::XmlEncode(it->GetXml()) << "\"";
197         out << " severity=\"" << SevLevel[it->GetSeverity()] << "\"";
198         if (it->GetCount() > 0) {
199             out << " cardinality=\"" << NStr::Int8ToString(it->GetCount()) << "\"";
200         }
201         if (!it->GetUnit().empty()) {
202             out << " unit=\"" << NStr::XmlEncode(it->GetUnit()) << "\"";
203         }
204         if (it->CanAutofix()) {
205             out << " autofix=\"true\"";
206         }
207         out << ">\n";
208 
209         ++indent;
210         TReportItemList subs = it->GetSubitems();
211         if (!subs.empty() && (ext || !subs[0]->IsExtended())) {
212             RecursiveXML(out, subs, flags, indent);
213         }
214         else {
215             for (const auto& obj : it->GetDetails()) {
216                 Indent(out, indent);
217                 out << "<object type=";
218                 switch (obj->GetType()) {
219                     case CReportObj::eType_feature:
220                         out << "\"feature\"";
221                         break;
222                     case CReportObj::eType_descriptor:
223                         out << "\"descriptor\"";
224                         break;
225                     case CReportObj::eType_sequence:
226                         out << "\"sequence\"";
227                         break;
228                     case CReportObj::eType_seq_set:
229                         out << "\"set\"";
230                         break;
231                     case CReportObj::eType_submit_block:
232                         out << "\"submit_block\"";
233                         break;
234                     case CReportObj::eType_string:
235                         out << "\"string\"";
236                         break;
237                     default:
238                         out << "\"\"";
239                         break;
240                 }
241                 if (flags & CDiscrepancySet::eOutput_Files) {
242                     out << " file=\"" << NStr::XmlEncode(obj->GetPath()) << "\"";
243                 }
244                 const string sFeatureType = obj->GetFeatureType();
245                 if (!sFeatureType.empty()) {
246                     out << " feature_type=\"" << NStr::XmlEncode(sFeatureType) << "\"";
247                 }
248                 const string sProductName = obj->GetProductName();
249                 if (!sProductName.empty()) {
250                     out << (sFeatureType == "Gene" ? " symbol=\"" : " product=\"") << NStr::XmlEncode(sProductName) << "\"";
251                 }
252                 const string sLocation = obj->GetLocation();
253                 if (!sLocation.empty()) {
254                     out << " location=\"" << NStr::XmlEncode(sLocation) << "\"";
255                 }
256                 const string sLocusTag = obj->GetLocusTag();
257                 if (!sLocusTag.empty()) {
258                     out << " locus_tag=\"" << NStr::XmlEncode(sLocusTag) << "\"";
259                 }
260                 const string text = obj->GetText();
261                 out << " label=\"" << NStr::XmlEncode(text) << "\" />\n";
262             }
263         }
264         --indent;
265         Indent(out, indent);
266         out << "</details>\n";
267     }
268 }
269 
270 
OutputXML(ostream & out,unsigned short flags)271 void CDiscrepancyContext::OutputXML(ostream& out, unsigned short flags)
272 {
273     const TDiscrepancyCaseMap& tests = GetTests();
274     out << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
275     out << "<discrepancy_report>\n";
276 
277     for (const auto& tst : tests) {
278         TReportItemList rep = tst.second->GetReport();
279         if (rep.empty()) {
280             continue;
281         }
282         CReportItem::ESeverity max_sev = CReportItem::eSeverity_info;
283         for (const auto& it : rep) {
284             CReportItem::ESeverity s = it->GetSeverity();
285             if (max_sev < s) {
286                 max_sev = s;
287             }
288         }
289         Indent(out, 1);
290         out << "<test name=\"" << deunderscore(tst.first)
291             << "\" description=\"" << NStr::XmlEncode(GetDiscrepancyDescr(tst.first))
292             << "\" severity=\"" << SevLevel[max_sev]
293             << "\" cardinality=\"" << rep.size() << "\">\n";
294         RecursiveXML(out, rep, flags, 2);
295         Indent(out, 1);
296         out << "</test>\n";
297     }
298     out << "</discrepancy_report>\n";
299 }
300 
301 
x_OutputOrder()302 const CDiscrepancyGroup& CDiscrepancyContext::x_OutputOrder()
303 {
304     if (!m_Order) {
305         CRef<CDiscrepancyGroup> G, H;
306         m_Order.Reset(new CDiscrepancyGroup);
307         G.Reset(new CDiscrepancyGroup("", "")); m_Order->Add(G);
308 #define LIST_TEST(name) H.Reset(new CDiscrepancyGroup("", #name)); G->Add(H);
309         LIST_TEST(COUNT_NUCLEOTIDES)
310         LIST_TEST(LONG_NO_ANNOTATION)
311         LIST_TEST(NO_ANNOTATION)
312 
313         G.Reset(new CDiscrepancyGroup("", "")); m_Order->Add(G);
314         LIST_TEST(SOURCE_QUALS)
315         LIST_TEST(DUP_SRC_QUAL)
316         LIST_TEST(MAP_CHROMOSOME_CONFLICT)
317         LIST_TEST(BIOMATERIAL_TAXNAME_MISMATCH)
318         LIST_TEST(SPECVOUCHER_TAXNAME_MISMATCH)
319         LIST_TEST(STRAIN_CULTURE_COLLECTION_MISMATCH)
320         LIST_TEST(TRINOMIAL_SHOULD_HAVE_QUALIFIER)
321         LIST_TEST(REQUIRED_STRAIN)
322         LIST_TEST(BACTERIA_SHOULD_NOT_HAVE_ISOLATE)
323         LIST_TEST(METAGENOMIC)
324         LIST_TEST(METAGENOME_SOURCE)
325         LIST_TEST(MAG_SHOULD_NOT_HAVE_STRAIN)
326         LIST_TEST(MAG_MISSING_ISOLATE)
327 
328         LIST_TEST(TITLE_ENDS_WITH_SEQUENCE)
329         LIST_TEST(GAPS)
330         LIST_TEST(N_RUNS)
331         LIST_TEST(PERCENT_N)
332         LIST_TEST(10_PERCENTN)
333         LIST_TEST(TERMINAL_NS)
334         LIST_TEST(ZERO_BASECOUNT)
335         LIST_TEST(LOW_QUALITY_REGION)
336         LIST_TEST(UNUSUAL_NT)
337         LIST_TEST(SHORT_CONTIG)
338         LIST_TEST(SHORT_SEQUENCES)
339         LIST_TEST(SEQUENCES_ARE_SHORT)
340         LIST_TEST(GENOMIC_MRNA)
341 
342         LIST_TEST(CHECK_AUTH_CAPS)
343         LIST_TEST(CHECK_AUTH_NAME)
344         LIST_TEST(TITLE_AUTHOR_CONFLICT)
345         LIST_TEST(CITSUBAFFIL_CONFLICT)
346         LIST_TEST(SUBMITBLOCK_CONFLICT)
347         LIST_TEST(UNPUB_PUB_WITHOUT_TITLE)
348         LIST_TEST(USA_STATE)
349 
350         LIST_TEST(FEATURE_COUNT)
351         LIST_TEST(PROTEIN_NAMES)
352         LIST_TEST(SUSPECT_PRODUCT_NAMES)
353         LIST_TEST(SUSPECT_PHRASES)
354         LIST_TEST(INCONSISTENT_PROTEIN_ID)
355         LIST_TEST(MISSING_PROTEIN_ID)
356         LIST_TEST(MRNA_SHOULD_HAVE_PROTEIN_TRANSCRIPT_IDS)
357         LIST_TEST(BAD_LOCUS_TAG_FORMAT)
358         LIST_TEST(INCONSISTENT_LOCUS_TAG_PREFIX)
359         LIST_TEST(DUPLICATE_LOCUS_TAGS)
360         LIST_TEST(MISSING_LOCUS_TAGS)
361         LIST_TEST(NON_GENE_LOCUS_TAG)
362         LIST_TEST(MISSING_GENES)
363         LIST_TEST(EXTRA_GENES)
364         LIST_TEST(BAD_BACTERIAL_GENE_NAME)
365         LIST_TEST(BAD_GENE_NAME)
366         LIST_TEST(BAD_GENE_STRAND)
367         LIST_TEST(DUP_GENES_OPPOSITE_STRANDS)
368         LIST_TEST(GENE_PARTIAL_CONFLICT)
369         LIST_TEST(GENE_PRODUCT_CONFLICT)
370         LIST_TEST(SHOW_HYPOTHETICAL_CDS_HAVING_GENE_NAME)
371         LIST_TEST(EC_NUMBER_ON_UNKNOWN_PROTEIN)
372         LIST_TEST(MISC_FEATURE_WITH_PRODUCT_QUAL)
373         LIST_TEST(PARTIAL_CDS_COMPLETE_SEQUENCE)
374         LIST_TEST(CONTAINED_CDS)
375         LIST_TEST(RNA_CDS_OVERLAP)
376         LIST_TEST(CDS_TRNA_OVERLAP)
377         LIST_TEST(OVERLAPPING_RRNAS)
378         LIST_TEST(FIND_OVERLAPPED_GENES)
379         LIST_TEST(ORDERED_LOCATION)
380         LIST_TEST(PARTIAL_PROBLEMS)
381         LIST_TEST(FEATURE_LOCATION_CONFLICT)
382         LIST_TEST(PSEUDO_MISMATCH)
383         LIST_TEST(EUKARYOTE_SHOULD_HAVE_MRNA)
384         LIST_TEST(MULTIPLE_CDS_ON_MRNA)
385         LIST_TEST(CDS_WITHOUT_MRNA)
386         LIST_TEST(BACTERIA_SHOULD_NOT_HAVE_MRNA)
387         LIST_TEST(BACTERIAL_PARTIAL_NONEXTENDABLE_EXCEPTION)
388         LIST_TEST(BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS)
389         LIST_TEST(BACTERIAL_JOINED_FEATURES_NO_EXCEPTION)
390         LIST_TEST(JOINED_FEATURES)
391         LIST_TEST(RIBOSOMAL_SLIPPAGE)
392         LIST_TEST(BAD_BGPIPE_QUALS)
393         LIST_TEST(CDS_HAS_NEW_EXCEPTION)
394         LIST_TEST(SHOW_TRANSL_EXCEPT)
395         LIST_TEST(RNA_NO_PRODUCT)
396         LIST_TEST(RRNA_NAME_CONFLICTS)
397         LIST_TEST(SUSPECT_RRNA_PRODUCTS)
398         LIST_TEST(SHORT_RRNA)
399         LIST_TEST(FIND_BADLEN_TRNAS)
400         LIST_TEST(UNUSUAL_MISC_RNA)
401         LIST_TEST(SHORT_LNCRNA)
402         LIST_TEST(SHORT_INTRON)
403         LIST_TEST(EXON_INTRON_CONFLICT)
404         LIST_TEST(EXON_ON_MRNA)
405         LIST_TEST(SHORT_PROT_SEQUENCES)
406 
407         LIST_TEST(INCONSISTENT_DBLINK)
408         LIST_TEST(INCONSISTENT_MOLINFO_TECH)
409         LIST_TEST(INCONSISTENT_MOLTYPES)
410         LIST_TEST(INCONSISTENT_STRUCTURED_COMMENTS)
411         LIST_TEST(QUALITY_SCORES)
412         LIST_TEST(SEGSETS_PRESENT)
413     }
414     return *m_Order;
415 }
416 
417 
418 END_SCOPE(NDiscrepancy)
419 END_NCBI_SCOPE
420