1 /* $Id: text_output.cpp 634281 2021-07-07 19:11:51Z ivanov $
2 * =========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * =========================================================================
25 *
26 * Authors: please dont mention my name here
27 *
28 */
29
30 #include <ncbi_pch.hpp>
31 #include "discrepancy_core.hpp"
32
33 BEGIN_NCBI_SCOPE
34 BEGIN_SCOPE(NDiscrepancy)
35 USING_SCOPE(objects);
36
37 // THIS WHOLE THING IS EVIL!
38 // DATA LAYER IS MIXED WITH PRESENTATION LAYER :(
39
40
ShowFatal(const CReportItem & item)41 static bool ShowFatal(const CReportItem& item)
42 {
43 if (!item.IsFatal()) {
44 return false;
45 }
46 TReportItemList subs = item.GetSubitems();
47 for (const auto& it : subs) {
48 if (it->IsSummary() && it->IsFatal()) {
49 return false;
50 }
51 }
52 return true;
53 }
54
55
deunderscore(const string s)56 static inline string deunderscore(const string s)
57 {
58 return s[0] == '_' ? s.substr(1) : s;
59 }
60
61
RecursiveText(ostream & out,const TReportItemList & list,unsigned short flags)62 static void RecursiveText(ostream& out, const TReportItemList& list, unsigned short flags)
63 {
64 bool ext = (flags & CDiscrepancySet::eOutput_Ext) != 0;
65 bool fatal = (flags & CDiscrepancySet::eOutput_Fatal) != 0;
66 for (const auto& it : list) {
67 if (it->IsExtended() && !ext) {
68 continue;
69 }
70 if (fatal && ShowFatal(*it)) {
71 out << "FATAL: ";
72 }
73 out << deunderscore(it->GetTitle()) << ": " << it->GetMsg() << '\n';
74 TReportItemList subs = it->GetSubitems();
75 if (!subs.empty() && (ext || !subs[0]->IsExtended())) {
76 RecursiveText(out, subs, flags);
77 }
78 else {
79 TReportObjectList det = it->GetDetails();
80 for (const auto& obj : det) {
81 if (flags & CDiscrepancySet::eOutput_Files) {
82 out << obj->GetPath() << ":";
83 }
84 if (obj->IsFixed()) {
85 out << "[FIXED] ";
86 }
87 out << obj->GetText() << '\n';
88 }
89 }
90 }
91 }
92
93
RecursiveSummary(ostream & out,const TReportItemList & list,unsigned short flags,size_t level=0)94 static void RecursiveSummary(ostream& out, const TReportItemList& list, unsigned short flags, size_t level = 0)
95 {
96 bool fatal = (flags & CDiscrepancySet::eOutput_Fatal) != 0;
97 for (const auto& it : list) {
98 if (level == 0) {
99 if (fatal && ShowFatal(*it)) {
100 out << "FATAL: ";
101 }
102 out << deunderscore(it->GetTitle()) << ": " << it->GetMsg() << '\n';
103 }
104 else if (it->IsSummary()) {
105 out << string(level, '\t');
106 if (fatal && ShowFatal(*it)) {
107 out << "FATAL: ";
108 }
109 out << it->GetMsg() << '\n';
110 }
111 else {
112 continue;
113 }
114 RecursiveSummary(out, it->GetSubitems(), flags, level + 1);
115 }
116 }
117
118
RecursiveFatalSummary(ostream & out,const TReportItemList & list,size_t level=0)119 static bool RecursiveFatalSummary(ostream& out, const TReportItemList& list, size_t level = 0)
120 {
121 bool found = false;
122 for (const auto& it : list) {
123 if (it->IsFatal() && it->GetTitle() != "SOURCE_QUALS" && it->GetTitle() != "SUSPECT_PRODUCT_NAMES") {
124 found = true;
125 if (level == 0) {
126 out << "FATAL: ";
127 out << deunderscore(it->GetTitle()) << ": " << it->GetMsg() << '\n';
128 }
129 else if (it->IsSummary()) {
130 out << string(level, '\t');
131 out << "FATAL: ";
132 out << it->GetMsg() << '\n';
133 }
134 else {
135 continue;
136 }
137 RecursiveFatalSummary(out, it->GetSubitems(), level + 1);
138 }
139 }
140 return found;
141 }
142
143
OutputText(ostream & out,unsigned short flags,char group)144 void CDiscrepancyContext::OutputText(ostream& out, unsigned short flags, char group)
145 {
146 switch (group) {
147 case 'b':
148 out << "Discrepancy Report Results (due to the large size of the file some checks may not have run)\n\n";
149 break;
150 case 'q':
151 out << "Discrepancy Report Results (SMART set of checks)\n\n";
152 break;
153 case 'u':
154 out << "Discrepancy Report Results (submitter set of checks)\n\n";
155 break;
156 default:
157 out << "Discrepancy Report Results\n\n";
158 }
159
160 out << "Summary\n";
161 if (m_Group0.empty() && m_Group1.empty()) {
162 const CDiscrepancyGroup& order = x_OutputOrder();
163 m_Group0 = order[0].Collect(m_Tests, false);
164 m_Group1 = order[1].Collect(m_Tests, true);
165 }
166 RecursiveSummary(out, m_Group0, flags);
167 if (flags & eOutput_Fatal) {
168 RecursiveFatalSummary(out, m_Group1, flags);
169 }
170 RecursiveSummary(out, m_Group1, flags);
171
172 if (flags & eOutput_Summary) return;
173
174 out << "\nDetailed Report\n\n";
175 RecursiveText(out, m_Group0, flags);
176 RecursiveText(out, m_Group1, flags);
177 }
178
179
Indent(ostream & out,size_t indent)180 static void Indent(ostream& out, size_t indent)
181 {
182 static const size_t XML_INDENT = 2;
183 out << string(indent * XML_INDENT, ' ');
184 }
185
186 static string SevLevel[CReportItem::eSeverity_error + 1] = { "INFO", "WARNING", "FATAL" };
187
RecursiveXML(ostream & out,const TReportItemList & list,unsigned short flags,size_t indent)188 static void RecursiveXML(ostream& out, const TReportItemList& list, unsigned short flags, size_t indent)
189 {
190 bool ext = (flags & CDiscrepancySet::eOutput_Ext) != 0;
191 for (const auto& it : list) {
192 if (it->IsExtended() && !ext) {
193 continue;
194 }
195 Indent(out, indent);
196 out << "<details message=\"" << NStr::XmlEncode(it->GetXml()) << "\"";
197 out << " severity=\"" << SevLevel[it->GetSeverity()] << "\"";
198 if (it->GetCount() > 0) {
199 out << " cardinality=\"" << NStr::Int8ToString(it->GetCount()) << "\"";
200 }
201 if (!it->GetUnit().empty()) {
202 out << " unit=\"" << NStr::XmlEncode(it->GetUnit()) << "\"";
203 }
204 if (it->CanAutofix()) {
205 out << " autofix=\"true\"";
206 }
207 out << ">\n";
208
209 ++indent;
210 TReportItemList subs = it->GetSubitems();
211 if (!subs.empty() && (ext || !subs[0]->IsExtended())) {
212 RecursiveXML(out, subs, flags, indent);
213 }
214 else {
215 for (const auto& obj : it->GetDetails()) {
216 Indent(out, indent);
217 out << "<object type=";
218 switch (obj->GetType()) {
219 case CReportObj::eType_feature:
220 out << "\"feature\"";
221 break;
222 case CReportObj::eType_descriptor:
223 out << "\"descriptor\"";
224 break;
225 case CReportObj::eType_sequence:
226 out << "\"sequence\"";
227 break;
228 case CReportObj::eType_seq_set:
229 out << "\"set\"";
230 break;
231 case CReportObj::eType_submit_block:
232 out << "\"submit_block\"";
233 break;
234 case CReportObj::eType_string:
235 out << "\"string\"";
236 break;
237 default:
238 out << "\"\"";
239 break;
240 }
241 if (flags & CDiscrepancySet::eOutput_Files) {
242 out << " file=\"" << NStr::XmlEncode(obj->GetPath()) << "\"";
243 }
244 const string sFeatureType = obj->GetFeatureType();
245 if (!sFeatureType.empty()) {
246 out << " feature_type=\"" << NStr::XmlEncode(sFeatureType) << "\"";
247 }
248 const string sProductName = obj->GetProductName();
249 if (!sProductName.empty()) {
250 out << (sFeatureType == "Gene" ? " symbol=\"" : " product=\"") << NStr::XmlEncode(sProductName) << "\"";
251 }
252 const string sLocation = obj->GetLocation();
253 if (!sLocation.empty()) {
254 out << " location=\"" << NStr::XmlEncode(sLocation) << "\"";
255 }
256 const string sLocusTag = obj->GetLocusTag();
257 if (!sLocusTag.empty()) {
258 out << " locus_tag=\"" << NStr::XmlEncode(sLocusTag) << "\"";
259 }
260 const string text = obj->GetText();
261 out << " label=\"" << NStr::XmlEncode(text) << "\" />\n";
262 }
263 }
264 --indent;
265 Indent(out, indent);
266 out << "</details>\n";
267 }
268 }
269
270
OutputXML(ostream & out,unsigned short flags)271 void CDiscrepancyContext::OutputXML(ostream& out, unsigned short flags)
272 {
273 const TDiscrepancyCaseMap& tests = GetTests();
274 out << "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";
275 out << "<discrepancy_report>\n";
276
277 for (const auto& tst : tests) {
278 TReportItemList rep = tst.second->GetReport();
279 if (rep.empty()) {
280 continue;
281 }
282 CReportItem::ESeverity max_sev = CReportItem::eSeverity_info;
283 for (const auto& it : rep) {
284 CReportItem::ESeverity s = it->GetSeverity();
285 if (max_sev < s) {
286 max_sev = s;
287 }
288 }
289 Indent(out, 1);
290 out << "<test name=\"" << deunderscore(tst.first)
291 << "\" description=\"" << NStr::XmlEncode(GetDiscrepancyDescr(tst.first))
292 << "\" severity=\"" << SevLevel[max_sev]
293 << "\" cardinality=\"" << rep.size() << "\">\n";
294 RecursiveXML(out, rep, flags, 2);
295 Indent(out, 1);
296 out << "</test>\n";
297 }
298 out << "</discrepancy_report>\n";
299 }
300
301
x_OutputOrder()302 const CDiscrepancyGroup& CDiscrepancyContext::x_OutputOrder()
303 {
304 if (!m_Order) {
305 CRef<CDiscrepancyGroup> G, H;
306 m_Order.Reset(new CDiscrepancyGroup);
307 G.Reset(new CDiscrepancyGroup("", "")); m_Order->Add(G);
308 #define LIST_TEST(name) H.Reset(new CDiscrepancyGroup("", #name)); G->Add(H);
309 LIST_TEST(COUNT_NUCLEOTIDES)
310 LIST_TEST(LONG_NO_ANNOTATION)
311 LIST_TEST(NO_ANNOTATION)
312
313 G.Reset(new CDiscrepancyGroup("", "")); m_Order->Add(G);
314 LIST_TEST(SOURCE_QUALS)
315 LIST_TEST(DUP_SRC_QUAL)
316 LIST_TEST(MAP_CHROMOSOME_CONFLICT)
317 LIST_TEST(BIOMATERIAL_TAXNAME_MISMATCH)
318 LIST_TEST(SPECVOUCHER_TAXNAME_MISMATCH)
319 LIST_TEST(STRAIN_CULTURE_COLLECTION_MISMATCH)
320 LIST_TEST(TRINOMIAL_SHOULD_HAVE_QUALIFIER)
321 LIST_TEST(REQUIRED_STRAIN)
322 LIST_TEST(BACTERIA_SHOULD_NOT_HAVE_ISOLATE)
323 LIST_TEST(METAGENOMIC)
324 LIST_TEST(METAGENOME_SOURCE)
325 LIST_TEST(MAG_SHOULD_NOT_HAVE_STRAIN)
326 LIST_TEST(MAG_MISSING_ISOLATE)
327
328 LIST_TEST(TITLE_ENDS_WITH_SEQUENCE)
329 LIST_TEST(GAPS)
330 LIST_TEST(N_RUNS)
331 LIST_TEST(PERCENT_N)
332 LIST_TEST(10_PERCENTN)
333 LIST_TEST(TERMINAL_NS)
334 LIST_TEST(ZERO_BASECOUNT)
335 LIST_TEST(LOW_QUALITY_REGION)
336 LIST_TEST(UNUSUAL_NT)
337 LIST_TEST(SHORT_CONTIG)
338 LIST_TEST(SHORT_SEQUENCES)
339 LIST_TEST(SEQUENCES_ARE_SHORT)
340 LIST_TEST(GENOMIC_MRNA)
341
342 LIST_TEST(CHECK_AUTH_CAPS)
343 LIST_TEST(CHECK_AUTH_NAME)
344 LIST_TEST(TITLE_AUTHOR_CONFLICT)
345 LIST_TEST(CITSUBAFFIL_CONFLICT)
346 LIST_TEST(SUBMITBLOCK_CONFLICT)
347 LIST_TEST(UNPUB_PUB_WITHOUT_TITLE)
348 LIST_TEST(USA_STATE)
349
350 LIST_TEST(FEATURE_COUNT)
351 LIST_TEST(PROTEIN_NAMES)
352 LIST_TEST(SUSPECT_PRODUCT_NAMES)
353 LIST_TEST(SUSPECT_PHRASES)
354 LIST_TEST(INCONSISTENT_PROTEIN_ID)
355 LIST_TEST(MISSING_PROTEIN_ID)
356 LIST_TEST(MRNA_SHOULD_HAVE_PROTEIN_TRANSCRIPT_IDS)
357 LIST_TEST(BAD_LOCUS_TAG_FORMAT)
358 LIST_TEST(INCONSISTENT_LOCUS_TAG_PREFIX)
359 LIST_TEST(DUPLICATE_LOCUS_TAGS)
360 LIST_TEST(MISSING_LOCUS_TAGS)
361 LIST_TEST(NON_GENE_LOCUS_TAG)
362 LIST_TEST(MISSING_GENES)
363 LIST_TEST(EXTRA_GENES)
364 LIST_TEST(BAD_BACTERIAL_GENE_NAME)
365 LIST_TEST(BAD_GENE_NAME)
366 LIST_TEST(BAD_GENE_STRAND)
367 LIST_TEST(DUP_GENES_OPPOSITE_STRANDS)
368 LIST_TEST(GENE_PARTIAL_CONFLICT)
369 LIST_TEST(GENE_PRODUCT_CONFLICT)
370 LIST_TEST(SHOW_HYPOTHETICAL_CDS_HAVING_GENE_NAME)
371 LIST_TEST(EC_NUMBER_ON_UNKNOWN_PROTEIN)
372 LIST_TEST(MISC_FEATURE_WITH_PRODUCT_QUAL)
373 LIST_TEST(PARTIAL_CDS_COMPLETE_SEQUENCE)
374 LIST_TEST(CONTAINED_CDS)
375 LIST_TEST(RNA_CDS_OVERLAP)
376 LIST_TEST(CDS_TRNA_OVERLAP)
377 LIST_TEST(OVERLAPPING_RRNAS)
378 LIST_TEST(FIND_OVERLAPPED_GENES)
379 LIST_TEST(ORDERED_LOCATION)
380 LIST_TEST(PARTIAL_PROBLEMS)
381 LIST_TEST(FEATURE_LOCATION_CONFLICT)
382 LIST_TEST(PSEUDO_MISMATCH)
383 LIST_TEST(EUKARYOTE_SHOULD_HAVE_MRNA)
384 LIST_TEST(MULTIPLE_CDS_ON_MRNA)
385 LIST_TEST(CDS_WITHOUT_MRNA)
386 LIST_TEST(BACTERIA_SHOULD_NOT_HAVE_MRNA)
387 LIST_TEST(BACTERIAL_PARTIAL_NONEXTENDABLE_EXCEPTION)
388 LIST_TEST(BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS)
389 LIST_TEST(BACTERIAL_JOINED_FEATURES_NO_EXCEPTION)
390 LIST_TEST(JOINED_FEATURES)
391 LIST_TEST(RIBOSOMAL_SLIPPAGE)
392 LIST_TEST(BAD_BGPIPE_QUALS)
393 LIST_TEST(CDS_HAS_NEW_EXCEPTION)
394 LIST_TEST(SHOW_TRANSL_EXCEPT)
395 LIST_TEST(RNA_NO_PRODUCT)
396 LIST_TEST(RRNA_NAME_CONFLICTS)
397 LIST_TEST(SUSPECT_RRNA_PRODUCTS)
398 LIST_TEST(SHORT_RRNA)
399 LIST_TEST(FIND_BADLEN_TRNAS)
400 LIST_TEST(UNUSUAL_MISC_RNA)
401 LIST_TEST(SHORT_LNCRNA)
402 LIST_TEST(SHORT_INTRON)
403 LIST_TEST(EXON_INTRON_CONFLICT)
404 LIST_TEST(EXON_ON_MRNA)
405 LIST_TEST(SHORT_PROT_SEQUENCES)
406
407 LIST_TEST(INCONSISTENT_DBLINK)
408 LIST_TEST(INCONSISTENT_MOLINFO_TECH)
409 LIST_TEST(INCONSISTENT_MOLTYPES)
410 LIST_TEST(INCONSISTENT_STRUCTURED_COMMENTS)
411 LIST_TEST(QUALITY_SCORES)
412 LIST_TEST(SEGSETS_PRESENT)
413 }
414 return *m_Order;
415 }
416
417
418 END_SCOPE(NDiscrepancy)
419 END_NCBI_SCOPE
420