1 /*  $Id: src_writer.hpp 632624 2021-06-03 17:38:23Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Frank Ludwig
27  *
28  * File Description:  Write source quailiers
29  *
30  */
31 
32 #ifndef OBJTOOLS_WRITERS___SRC_WRITER__HPP
33 #define OBJTOOLS_WRITERS___SRC_WRITER__HPP
34 
35 #include <corelib/ncbistd.hpp>
36 #include <objmgr/bioseq_handle.hpp>
37 #include <objects/seqfeat/BioSource.hpp>
38 #include <objects/seqfeat/Org_ref.hpp>
39 #include <objects/seqfeat/OrgName.hpp>
40 #include <objects/seqfeat/SubSource.hpp>
41 #include <objects/seqfeat/OrgMod.hpp>
42 #include <objects/seqtable/Seq_table.hpp>
43 #include <objects/seqfeat/PCRPrimerSet.hpp>
44 
45 BEGIN_NCBI_SCOPE
46 BEGIN_objects_SCOPE
47 
48 //  ============================================================================
49 class NCBI_XOBJWRITE_EXPORT CSrcError:
50     public CLineError
51 //  ============================================================================
52 {
53 protected:
CSrcError(const CLineError & other)54     CSrcError(const CLineError& other):CLineError(other){};
55     CSrcError(
56         ncbi::EDiagSev severity,
57         const std::string&);
58 
59 public:
60     static CSrcError* Create(
61         ncbi::EDiagSev severity,
62         const std::string&);
63 };
64 
65 /**
66   * Used to generate tables showing qualifier-field entries occuring in the
67   * BioSources of instances of Bioseq and Seq-entry.
68   */
69 //  ============================================================================
70 class NCBI_XOBJWRITE_EXPORT CSrcWriter:
71     public CObject
72 //  ============================================================================
73 {
74 public:
75     typedef map<string, size_t> COLUMNMAP;
76     typedef map<string, string> NAMEMAP;
77     typedef list<string> NAMELIST;
78     typedef vector<string> FIELDS;
79     typedef bool (CSrcWriter::*HANDLER)(const CBioSource&, const string&, ILineErrorListener*);
80     typedef map<string, CSrcWriter::HANDLER> HANDLERMAP;
81 
82 public:
CSrcWriter(unsigned int flags=0)83     CSrcWriter(
84             unsigned int flags=0) :
85         mFlags(flags),
86         mDelimiter("\t") {
87         xInit();
88     };
89 
~CSrcWriter()90     virtual ~CSrcWriter()
91     {};
92 
93     /** Write a table of the specified qualifier-field entries
94       * found in the BioSource of a given Bioseq.
95       */
96     virtual bool WriteBioseqHandle(
97         CBioseq_Handle,
98         const FIELDS&,
99         CNcbiOstream&);
100 
101     /** Write a table of the specified qualifier-field entries
102       * found in the BioSources of a vector of Bioseqs.
103       */
104     virtual bool WriteBioseqHandles(
105         const vector<pair<string,CBioseq_Handle> >&,
106         const FIELDS&,
107         CNcbiOstream&,
108         ILineErrorListener* = 0);
109 
110     /// Set the column delimiter for the output table.
SetDelimiter(const string & delimiter)111     void SetDelimiter(
112         const string& delimiter) {
113         mDelimiter = delimiter;
114     };
115 
116     /// Verify that each string in fields is a valid qualifier name.
117     static bool ValidateFields(
118         const FIELDS& fields,
119         ILineErrorListener* = 0);
120 
121     /** Write a table of all qualifier-field entries occurring
122       * in the BioSources for a given Seq-entry,
123       * with columns appearing in a canonical order.
124       */
125     virtual bool WriteSeqEntry(
126         const CSeq_entry&,
127         CScope&,
128         CNcbiOstream&,
129         bool = false);
130 
131 protected:
132     void xInit();
133 
134     virtual bool xGather(CBioseq_Handle, string id, const FIELDS&, ILineErrorListener* =0);
135     virtual bool xGatherId(CBioseq_Handle, ILineErrorListener* =0);
136     virtual bool xGatherGi(CBioseq_Handle, ILineErrorListener* =0);
137     virtual bool xGatherLocalId(CBioseq_Handle, ILineErrorListener* = 0);
138     virtual bool xGatherBankitId(CBioseq_Handle, ILineErrorListener* = 0);
139     virtual bool xGatherDefline(CBioseq_Handle, ILineErrorListener* =0);
140     virtual bool xTryDefaultId(const string& id, ILineErrorListener* =0);
141     virtual bool xHandleSourceField(const CBioSource&, const string&, ILineErrorListener* =0);
142 
143 
144     virtual bool xGatherTaxname(const CBioSource&, const string&, ILineErrorListener* =0);
145     virtual bool xGatherDivision(const CBioSource&, const string&, ILineErrorListener* =0);
146     virtual bool xGatherGenome(const CBioSource&, const string&,  ILineErrorListener* =0);
147     virtual bool xGatherOrigin(const CBioSource&, const string&, ILineErrorListener* =0);
148     virtual bool xGatherSubtypeFeat(const CBioSource&, const string&, ILineErrorListener* =0);
149     virtual bool xGatherOrgModFeat(const CBioSource&, const string&, ILineErrorListener* =0);
150     virtual bool xGatherOrgCommon(const CBioSource&, const string&, ILineErrorListener* =0);
151     virtual bool xGatherOrgnameLineage(const CBioSource&, const string&, ILineErrorListener* =0);
152     virtual bool xGatherPcrPrimers(const CBioSource&, const string&, ILineErrorListener* =0);
153     virtual bool xGatherDb(const CBioSource&, const string&, ILineErrorListener* =0);
154     virtual bool xGatherTaxonId(const CBioSource&, const string&, ILineErrorListener* =0);
155 
156     virtual bool xFormatTabDelimited(const FIELDS&, CNcbiOstream&);
157 
158     static FIELDS xGetOrderedFieldNames(const FIELDS&);
159     static HANDLER xGetHandler(const string&);
160     static string xPrimerSetNames(const CPCRPrimerSet&);
161     static string xPrimerSetSequences(const CPCRPrimerSet&);
162     static bool xIsSubsourceTypeSuppressed(CSubSource::TSubtype);
163     static bool xIsOrgmodTypeSuppressed(COrgMod::TSubtype);
164     static NAMELIST xGetOrgModSubtypeNames();
165     static NAMELIST xGetSubSourceSubtypeNames();
166     static string xCompressFieldName(const string&);
167     static FIELDS xProcessFieldNames(const FIELDS&);
168 
169     void xPrepareTableColumn(const string&, const string&, const string& ="");
170     void xAppendColumnValue(const string&, const string&);
171     bool xValueNeedsQuoting(const string&);
172     string xDequotedValue(const string&);
173     string xGetColStub(const string&);
174     string xGetOriginalId(const CBioseq_Handle&) const;
175 
176 
177 public:
178     static const FIELDS sDefaultSrcCheckFields; ///< Default fields processed by srcchk application, in their canonical order
179     static const FIELDS sAllSrcCheckFields; ///< All possible fields processed by srchck application, in their canonical order
180 
181 protected:
182     static const FIELDS sDefaultSeqEntryFields;
183     static const FIELDS sAllSeqEntryFields;
184     static HANDLERMAP sHandlerMap;
185     static NAMEMAP sFieldnameToColname;
186     CRef<CSeq_table> mSrcTable;
187     COLUMNMAP mColnameToIndex;
188     unsigned int mFlags;
189     string mDelimiter;
190 };
191 
192 END_objects_SCOPE
193 END_NCBI_SCOPE
194 
195 #endif  // OBJTOOLS_WRITERS___SRC_WRITER__HPP
196