1 /* $Id: src_writer.hpp 632624 2021-06-03 17:38:23Z ivanov $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * Authors: Frank Ludwig 27 * 28 * File Description: Write source quailiers 29 * 30 */ 31 32 #ifndef OBJTOOLS_WRITERS___SRC_WRITER__HPP 33 #define OBJTOOLS_WRITERS___SRC_WRITER__HPP 34 35 #include <corelib/ncbistd.hpp> 36 #include <objmgr/bioseq_handle.hpp> 37 #include <objects/seqfeat/BioSource.hpp> 38 #include <objects/seqfeat/Org_ref.hpp> 39 #include <objects/seqfeat/OrgName.hpp> 40 #include <objects/seqfeat/SubSource.hpp> 41 #include <objects/seqfeat/OrgMod.hpp> 42 #include <objects/seqtable/Seq_table.hpp> 43 #include <objects/seqfeat/PCRPrimerSet.hpp> 44 45 BEGIN_NCBI_SCOPE 46 BEGIN_objects_SCOPE 47 48 // ============================================================================ 49 class NCBI_XOBJWRITE_EXPORT CSrcError: 50 public CLineError 51 // ============================================================================ 52 { 53 protected: CSrcError(const CLineError & other)54 CSrcError(const CLineError& other):CLineError(other){}; 55 CSrcError( 56 ncbi::EDiagSev severity, 57 const std::string&); 58 59 public: 60 static CSrcError* Create( 61 ncbi::EDiagSev severity, 62 const std::string&); 63 }; 64 65 /** 66 * Used to generate tables showing qualifier-field entries occuring in the 67 * BioSources of instances of Bioseq and Seq-entry. 68 */ 69 // ============================================================================ 70 class NCBI_XOBJWRITE_EXPORT CSrcWriter: 71 public CObject 72 // ============================================================================ 73 { 74 public: 75 typedef map<string, size_t> COLUMNMAP; 76 typedef map<string, string> NAMEMAP; 77 typedef list<string> NAMELIST; 78 typedef vector<string> FIELDS; 79 typedef bool (CSrcWriter::*HANDLER)(const CBioSource&, const string&, ILineErrorListener*); 80 typedef map<string, CSrcWriter::HANDLER> HANDLERMAP; 81 82 public: CSrcWriter(unsigned int flags=0)83 CSrcWriter( 84 unsigned int flags=0) : 85 mFlags(flags), 86 mDelimiter("\t") { 87 xInit(); 88 }; 89 ~CSrcWriter()90 virtual ~CSrcWriter() 91 {}; 92 93 /** Write a table of the specified qualifier-field entries 94 * found in the BioSource of a given Bioseq. 95 */ 96 virtual bool WriteBioseqHandle( 97 CBioseq_Handle, 98 const FIELDS&, 99 CNcbiOstream&); 100 101 /** Write a table of the specified qualifier-field entries 102 * found in the BioSources of a vector of Bioseqs. 103 */ 104 virtual bool WriteBioseqHandles( 105 const vector<pair<string,CBioseq_Handle> >&, 106 const FIELDS&, 107 CNcbiOstream&, 108 ILineErrorListener* = 0); 109 110 /// Set the column delimiter for the output table. SetDelimiter(const string & delimiter)111 void SetDelimiter( 112 const string& delimiter) { 113 mDelimiter = delimiter; 114 }; 115 116 /// Verify that each string in fields is a valid qualifier name. 117 static bool ValidateFields( 118 const FIELDS& fields, 119 ILineErrorListener* = 0); 120 121 /** Write a table of all qualifier-field entries occurring 122 * in the BioSources for a given Seq-entry, 123 * with columns appearing in a canonical order. 124 */ 125 virtual bool WriteSeqEntry( 126 const CSeq_entry&, 127 CScope&, 128 CNcbiOstream&, 129 bool = false); 130 131 protected: 132 void xInit(); 133 134 virtual bool xGather(CBioseq_Handle, string id, const FIELDS&, ILineErrorListener* =0); 135 virtual bool xGatherId(CBioseq_Handle, ILineErrorListener* =0); 136 virtual bool xGatherGi(CBioseq_Handle, ILineErrorListener* =0); 137 virtual bool xGatherLocalId(CBioseq_Handle, ILineErrorListener* = 0); 138 virtual bool xGatherBankitId(CBioseq_Handle, ILineErrorListener* = 0); 139 virtual bool xGatherDefline(CBioseq_Handle, ILineErrorListener* =0); 140 virtual bool xTryDefaultId(const string& id, ILineErrorListener* =0); 141 virtual bool xHandleSourceField(const CBioSource&, const string&, ILineErrorListener* =0); 142 143 144 virtual bool xGatherTaxname(const CBioSource&, const string&, ILineErrorListener* =0); 145 virtual bool xGatherDivision(const CBioSource&, const string&, ILineErrorListener* =0); 146 virtual bool xGatherGenome(const CBioSource&, const string&, ILineErrorListener* =0); 147 virtual bool xGatherOrigin(const CBioSource&, const string&, ILineErrorListener* =0); 148 virtual bool xGatherSubtypeFeat(const CBioSource&, const string&, ILineErrorListener* =0); 149 virtual bool xGatherOrgModFeat(const CBioSource&, const string&, ILineErrorListener* =0); 150 virtual bool xGatherOrgCommon(const CBioSource&, const string&, ILineErrorListener* =0); 151 virtual bool xGatherOrgnameLineage(const CBioSource&, const string&, ILineErrorListener* =0); 152 virtual bool xGatherPcrPrimers(const CBioSource&, const string&, ILineErrorListener* =0); 153 virtual bool xGatherDb(const CBioSource&, const string&, ILineErrorListener* =0); 154 virtual bool xGatherTaxonId(const CBioSource&, const string&, ILineErrorListener* =0); 155 156 virtual bool xFormatTabDelimited(const FIELDS&, CNcbiOstream&); 157 158 static FIELDS xGetOrderedFieldNames(const FIELDS&); 159 static HANDLER xGetHandler(const string&); 160 static string xPrimerSetNames(const CPCRPrimerSet&); 161 static string xPrimerSetSequences(const CPCRPrimerSet&); 162 static bool xIsSubsourceTypeSuppressed(CSubSource::TSubtype); 163 static bool xIsOrgmodTypeSuppressed(COrgMod::TSubtype); 164 static NAMELIST xGetOrgModSubtypeNames(); 165 static NAMELIST xGetSubSourceSubtypeNames(); 166 static string xCompressFieldName(const string&); 167 static FIELDS xProcessFieldNames(const FIELDS&); 168 169 void xPrepareTableColumn(const string&, const string&, const string& =""); 170 void xAppendColumnValue(const string&, const string&); 171 bool xValueNeedsQuoting(const string&); 172 string xDequotedValue(const string&); 173 string xGetColStub(const string&); 174 string xGetOriginalId(const CBioseq_Handle&) const; 175 176 177 public: 178 static const FIELDS sDefaultSrcCheckFields; ///< Default fields processed by srcchk application, in their canonical order 179 static const FIELDS sAllSrcCheckFields; ///< All possible fields processed by srchck application, in their canonical order 180 181 protected: 182 static const FIELDS sDefaultSeqEntryFields; 183 static const FIELDS sAllSeqEntryFields; 184 static HANDLERMAP sHandlerMap; 185 static NAMEMAP sFieldnameToColname; 186 CRef<CSeq_table> mSrcTable; 187 COLUMNMAP mColnameToIndex; 188 unsigned int mFlags; 189 string mDelimiter; 190 }; 191 192 END_objects_SCOPE 193 END_NCBI_SCOPE 194 195 #endif // OBJTOOLS_WRITERS___SRC_WRITER__HPP 196