1 /* $Id: tabular.hpp 616915 2020-09-22 19:19:18Z jianye $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's offical duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author:  Ilya Dondoshansky
27 *
28 * ===========================================================================
29 */
30 
31 /// @file: tabular.hpp
32 /// Formatting of pairwise sequence alignments in tabular form.
33 
34 #ifndef OBJTOOLS_ALIGN_FORMAT___TABULAR_HPP
35 #define OBJTOOLS_ALIGN_FORMAT___TABULAR_HPP
36 
37 #include <corelib/ncbistre.hpp>
38 #include <objects/seqalign/Seq_align.hpp>
39 #include <objects/seqloc/Seq_id.hpp>
40 #include <objmgr/scope.hpp>
41 #include <objtools/align_format/align_format_util.hpp>
42 #include <objtools/blast/seqdb_reader/seqdb.hpp>
43 #include <algo/blast/igblast/igblast.hpp>
44 #include <objects/blastdb/Blast_def_line_set.hpp>
45 
46 #include <algorithm>
47 
48 BEGIN_NCBI_SCOPE
49 BEGIN_SCOPE(align_format)
50 
51 
52 /// Class containing information needed for tabular formatting of BLAST
53 /// results.
54 class NCBI_ALIGN_FORMAT_EXPORT CBlastTabularInfo : public CObject
55 {
56 public:
57     /// In what form should the sequence identifiers be shown?
58     enum ESeqIdType {
59         eFullId = 0, ///< Show full seq-id, with multiple ids concatenated.
60         eAccession,  ///< Show only best accession
61         eAccVersion, ///< Show only best accession.version
62         eGi          ///< Show only gi
63     };
64 
65     /// What delimiter to use between fields in each row of the tabular output.
66     enum EFieldDelimiter {
67         eTab = 0, ///< Tab
68         eSpace,   ///< Space
69         eComma,   ///< Comma
70         eCustom   ///<Custom
71     };
72 
73     /// Constructor
74     /// @param ostr Stream to write output to [in]
75     /// @param format Output format - what fields to include in the output [in]
76     /// @param delim Delimiter to use between tabular fields [in]
77     /// @note fields that are not recognized will be ignored, if no fields are
78     /// specified (or left after purging those that are not recognized), the
79     /// default format is assumed
80     CBlastTabularInfo(CNcbiOstream& ostr,
81                       const string& format = kDfltArgTabularOutputFmt,
82                       EFieldDelimiter delim = eTab,
83                       bool parse_local_ids = false);
84 
85     /// Destructor
86     ~CBlastTabularInfo();
87     /// Set query id from a objects::CSeq_id
88     /// @param id List of Seq-ids to use [in]
89     void SetQueryId(list<CRef<objects::CSeq_id> >& id);
90     /// Set query id from a Bioseq handle
91     /// @param bh Bioseq handle to get Seq-ids from
92     void SetQueryId(const objects::CBioseq_Handle& bh);
93     ///Get query seqid list
GetQueryId() const94     const list<CRef<CSeq_id> >& GetQueryId() const {
95         return m_QueryId;
96     };
97     /// Set subject id from a objects::CSeq_id
98     /// @param id List of Seq-ids to use [in]
99     void SetSubjectId(list<CRef<objects::CSeq_id> >& id);
100     /// Set subject id from a Bioseq handle
101     /// @param bh Bioseq handle to get Seq-ids from
102     void SetSubjectId(const objects::CBioseq_Handle& bh);
103     /// Set the HSP scores
104     /// @param score Raw score [in]
105     /// @param bit_score Bit score [in]
106     /// @param evalue Expect value [in]
107     void SetScores(int score, double bit_score, double evalue);
108     /// Set the HSP endpoints. Note that if alignment is on opposite strands,
109     /// the subject offsets must be reversed.
110     /// @param q_start Starting offset in query [in]
111     /// @param q_end Ending offset in query [in]
112     /// @param s_start Starting offset in subject [in]
113     /// @param s_end Ending offset in subject [in]
114     void SetEndpoints(int q_start, int q_end, int s_start, int s_end);
115     /// Set various counts/lengths
116     /// @param num_ident Number of identities [in]
117     /// @param length Alignment length [in]
118     /// @param gaps Total number of gaps [in]
119     /// @param gap_opens Number of gap openings [in]
120     /// @param positives Number of positives [in]
121     void SetCounts(int num_ident, int length, int gaps, int gap_opens,
122                    int positives =0, int query_frame = 1,
123                    int subject_frame = 1);
124     /// Sets the Blast-traceback-operations string.
125     /// @param btop_string string for blast traceback operations [in]
126     void SetBTOP(string btop_string);
127     /// Set all member fields, given a Seq-align
128     /// @param sal Seq-align to get data from [in]
129     /// @param scope Scope for Bioseq retrieval [in]
130     /// @param matrix Matrix to calculate positives; NULL if not applicable. [in]
131     /// @return 0 on success, 1 if query or subject Bioseq is not found.
132     int SetFields(const objects::CSeq_align& sal,
133                           objects::CScope& scope,
134                           CNcbiMatrix<int>* matrix=0);
SetCustomDelim(string customDelim)135     void SetCustomDelim(string customDelim) {
136         x_SetFieldDelimiter(eCustom,customDelim);
137     }
138     /// Print one line of tabular output
139     virtual void Print(void);
140     /// Print the tabular output header
141     /// @param program Program name to show in the header [in]
142     /// @param bioseq Query Bioseq [in]
143     /// @param dbname Search database name [in]
144     /// @param rid the search RID (if not applicable, it should be empty
145     /// the string) [in]
146     /// @param iteration Iteration number (for PSI-BLAST), use default
147     /// parameter value when not applicable [in]
148     /// @param align_set All alignments for this query [in]
149     void PrintHeader(const string& program,
150                              const objects::CBioseq& bioseq,
151                              const string& dbname,
152                              const string& rid = kEmptyStr,
153                              unsigned int iteration =
154                                 numeric_limits<unsigned int>::max(),
155                              const objects::CSeq_align_set* align_set=0,
156                              CConstRef<objects::CBioseq> subj_bioseq
157                                 = CConstRef<objects::CBioseq>());
158 
159      /// Prints number of queries processed.
160      /// @param num_queries number of queries processed [in]
161      void PrintNumProcessed(int num_queries);
162 
163     /// Return all field names supported in the format string.
164     list<string> GetAllFieldNames(void);
165 
166     /// Should local IDs be parsed or not?
167     /// @param val value to set [in]
168     /// Returns true if the field was requested in the format specification
169     /// @param field Which field to test [in]
SetParseLocalIds(bool val)170     void SetParseLocalIds(bool val) { m_ParseLocalIds = val; }
171 
172     /// Should subject deflien be parsed for id or not?
173     /// @param val value to set [in]
SetParseSubjectDefline(bool val)174     void SetParseSubjectDefline(bool val) { m_ParseSubjectDefline = val; }
175 
176     /// Avoid fetching sequence (if possible)
177     /// If the sequence is needed (e.g., will be formatted, it will be fetched)
178     /// @param nofetch Do not fetch if true [in]
179     void SetNoFetch(bool nofetch);
180     /// Avoid fetch of sequence if true returned
181     bool GetNoFetch();
182 
183     // Set Genetic code for translating seqs
SetQueryGeneticCode(int q_gc)184     void SetQueryGeneticCode(int q_gc) {m_QueryGeneticCode = q_gc;}
SetDbGeneticCode(int db_gc)185     void SetDbGeneticCode(int db_gc) {m_DbGeneticCode = db_gc;}
186 
187     /// Set query range
188     /// @param query range [in]
SetQueryRange(TSeqRange & q_range)189     void SetQueryRange(TSeqRange & q_range) { m_QueryRange = q_range;}
190 
191 protected:
192     bool x_IsFieldRequested(ETabularField field);
193     /// Add a field to the list of fields to show, if it is not yet present in
194     /// the list of fields.
195     /// @param field Which field to add? [in]
196     void x_AddFieldToShow(ETabularField field);
197     /// Delete a field from the list of fields to show
198     /// @param field Which field to delete? [in]
199     void x_DeleteFieldToShow(ETabularField field);
200     /// Add a default set of fields to show.
201     void x_AddDefaultFieldsToShow(void);
202     /// Set fields to show, given an output format string
203     /// @param format Output format [in]
204     void x_SetFieldsToShow(const string& format);
205     /// Reset values of all fields.
206     void x_ResetFields(void);
207     /// Set the tabular fields delimiter.
208     /// @param delim Which delimiter to use
209     void x_SetFieldDelimiter(EFieldDelimiter delim, string customDelim = "");
210     /// Print the names of all supported fields
211     void x_PrintFieldNames(void);
212     /// Print the value of a given field
213     /// @param field Which field to show? [in]
214     void x_PrintField(ETabularField field);
215     /// Print query Seq-id
216     void x_PrintQuerySeqId(void) const;
217     /// Print query gi
218     void x_PrintQueryGi(void);
219     /// Print query accession
220     void x_PrintQueryAccession(void);
221     /// Print query accession.version
222     void x_PrintQueryAccessionVersion(void);
223     /// Print query and database names
224     void x_PrintQueryAndDbNames(const string& program,
225                        const objects::CBioseq& bioseq,
226                        const string& dbname,
227                        const string& rid,
228                        unsigned int iteration,
229                        CConstRef<objects::CBioseq> subj_bioseq);
230     /// Print subject Seq-id
231     void x_PrintSubjectSeqId(void);
232     /// Print all Seq-ids associated with this subject, separated by ';'
233     void x_PrintSubjectAllSeqIds(void);
234     /// Print subject gi
235     void x_PrintSubjectGi(void);
236     /// Print all gis associated with this subject, separated by ';'
237     void x_PrintSubjectAllGis(void);
238     /// Print subject accession
239     void x_PrintSubjectAccession(void);
240     /// Print subject accession.version
241     void x_PrintSubjectAccessionVersion(void);
242     /// Print all accessions associated with this subject, separated by ';'
243     void x_PrintSubjectAllAccessions(void);
244     /// Print aligned part of query sequence
245     void x_PrintQuerySeq(void);
246     /// Print aligned part of subject sequence
247     void x_PrintSubjectSeq(void);
248     /// Print query start
249     void x_PrintQueryStart(void);
250     /// Print query end
251     void x_PrintQueryEnd(void);
252     /// Print subject start
253     void x_PrintSubjectStart(void);
254     /// Print subject end
255     void x_PrintSubjectEnd(void);
256     /// Print e-value
257     void x_PrintEvalue(void);
258     /// Print bit score
259     void x_PrintBitScore(void);
260     /// Print raw score
261     void x_PrintScore(void);
262     /// Print alignment length
263     void x_PrintAlignmentLength(void);
264     /// Print percent of identical matches
265     void x_PrintPercentIdentical(void);
266     /// Print number of identical matches
267     void x_PrintNumIdentical(void);
268     /// Print number of mismatches
269     void x_PrintMismatches(void);
270     /// Print number of positive matches
271     void x_PrintNumPositives(void);
272     /// Print number of gap openings
273     void x_PrintGapOpenings(void);
274     /// Print total number of gaps
275     void x_PrintGaps(void);
276     /// Print percent positives
277     void x_PrintPercentPositives();
278     /// Print frames
279     void x_PrintFrames();
280     void x_PrintQueryFrame();
281     void x_PrintSubjectFrame();
282     void x_PrintBTOP();
283     /// Print the query sequence length
284     void x_PrintQueryLength();
285     /// Print the subject sequence length
286     void x_PrintSubjectLength();
287     /// Print subject tax info
288     void x_PrintSubjectTaxIds();
289     void x_PrintSubjectSciNames();
290     void x_PrintSubjectCommonNames();
291     void x_PrintSubjectBlastNames();
292     void x_PrintSubjectSuperKingdoms();
293     void x_PrintSubjectTaxId();
294     void x_PrintSubjectSciName();
295     void x_PrintSubjectCommonName();
296     void x_PrintSubjectBlastName();
297     void x_PrintSubjectSuperKingdom();
298     void x_PrintSubjectTitle();
299     void x_PrintSubjectAllTitles();
300     void x_PrintSubjectStrand();
301     void x_PrintSeqalignCoverage();
302     void x_PrintSubjectCoverage();
303     void x_PrintUniqSubjectCoverage();
304     void x_SetTaxInfo(const objects::CBioseq_Handle & handle, const CRef<objects::CBlast_def_line_set> & bdlRef);
305     void x_SetTaxInfoAll(const objects::CBioseq_Handle & handle, const CRef<objects::CBlast_def_line_set> & bdlRef);
306     void x_SetSubjectIds(const objects::CBioseq_Handle& bh, const CRef<objects::CBlast_def_line_set> & bdlRef);
307     void x_SetQueryCovSubject(const objects::CSeq_align & align);
308     void x_SetQueryCovUniqSubject(const objects::CSeq_align & align);
309     void x_SetQueryCovSeqalign(const CSeq_align & align, int query_len);
310     void x_CheckTaxDB();
311 
312     CNcbiOstream& m_Ostream; ///< Stream to write output to
313     string m_FieldDelimiter;   ///< Delimiter character for fields to print.
314     string m_QuerySeq;       ///< Aligned part of the query sequence
315     string m_SubjectSeq;     ///< Aligned part of the subject sequence
316     int m_QueryStart;        ///< Starting offset in query
317     int m_QueryEnd;          ///< Ending offset in query
318     int m_QueryFrame;        ///< query frame
319     int m_SubjectStart;      ///< Starting offset in subject
320     int m_SubjectEnd;        ///< Ending offset in subject
321     int m_SubjectFrame;      ///< subject frame
322     bool m_NoFetch;	     ///< program as a string
323 
324 private:
325 
326     list<CRef<objects::CSeq_id> > m_QueryId;  ///< List of query ids for this HSP
327     list<CRef<objects::CSeq_id> > m_SubjectId;
328     /// All subject sequence ids for this HSP
329     vector<list<CRef<objects::CSeq_id> > > m_SubjectIds;
330     TSeqPos m_QueryLength;   ///< Length of query sequence
331     TSeqPos m_SubjectLength; ///< Length of subject sequence
332     int m_Score;             ///< Raw score of this HSP
333     string m_BitScore;       ///< Bit score of this HSP, in appropriate format
334     string m_Evalue;         ///< E-value of this HSP, in appropriate format
335     int m_AlignLength;       ///< Alignment length of this HSP
336     int m_NumGaps;           ///< Total number of gaps in this HSP
337     int m_NumGapOpens;       ///< Number of gap openings in this HSP
338     int m_NumIdent;          ///< Number of identities in this HSP
339     int m_NumPositives;      ///< Number of positives in this HSP
340     /// Map of field enum values to field names.
341     map<string, ETabularField> m_FieldMap;
342     list<ETabularField> m_FieldsToShow; ///< Which fields to show?
343     /// Should the query deflines be parsed for local IDs?
344     bool m_ParseLocalIds;
345     /// Parse subejct defline?
346     bool m_ParseSubjectDefline;
347     string m_BTOP;            /// Blast-traceback-operations.
348 
349     //TaxInfo
350     set<TTaxId> 		m_SubjectTaxIds;
351     vector<string>		m_SubjectSciNames;
352     vector<string>		m_SubjectCommonNames;
353     set<string>		m_SubjectBlastNames;
354     set<string>		m_SubjectSuperKingdoms;
355     TTaxId			m_SubjectTaxId;
356     string			m_SubjectSciName;
357     string			m_SubjectCommonName;
358     string			m_SubjectBlastName;
359     string			m_SubjectSuperKingdom;
360     CRef<CBlast_def_line_set> m_SubjectDefline;
361 
362     string m_SubjectStrand;
363     pair<string, int>  m_QueryCovSubject;
364     pair<string, int>  m_QueryCovUniqSubject;
365     int m_QueryCovSeqalign;
366 
367     int m_QueryGeneticCode;
368     int m_DbGeneticCode;
369 
370     TSeqRange m_QueryRange;
371     string m_CustomDelim;
372 };
373 
374 
x_PrintQuerySeq(void)375 inline void CBlastTabularInfo::x_PrintQuerySeq(void)
376 {
377     m_Ostream << m_QuerySeq;
378 }
379 
x_PrintSubjectSeq(void)380 inline void CBlastTabularInfo::x_PrintSubjectSeq(void)
381 {
382     m_Ostream << m_SubjectSeq;
383 }
384 
x_PrintQueryStart(void)385 inline void CBlastTabularInfo::x_PrintQueryStart(void)
386 {
387     m_Ostream << m_QueryStart;
388 }
389 
x_PrintQueryEnd(void)390 inline void CBlastTabularInfo::x_PrintQueryEnd(void)
391 {
392     m_Ostream << m_QueryEnd;
393 }
394 
x_PrintSubjectStart(void)395 inline void CBlastTabularInfo::x_PrintSubjectStart(void)
396 {
397     m_Ostream << m_SubjectStart;
398 }
399 
x_PrintSubjectEnd(void)400 inline void CBlastTabularInfo::x_PrintSubjectEnd(void)
401 {
402     m_Ostream << m_SubjectEnd;
403 }
404 
x_PrintEvalue(void)405 inline void CBlastTabularInfo::x_PrintEvalue(void)
406 {
407     m_Ostream << m_Evalue;
408 }
409 
x_PrintBitScore(void)410 inline void CBlastTabularInfo::x_PrintBitScore(void)
411 {
412     m_Ostream << m_BitScore;
413 }
414 
x_PrintScore(void)415 inline void CBlastTabularInfo::x_PrintScore(void)
416 {
417     m_Ostream << m_Score;
418 }
419 
x_PrintAlignmentLength(void)420 inline void CBlastTabularInfo::x_PrintAlignmentLength(void)
421 {
422     m_Ostream << m_AlignLength;
423 }
424 
x_PrintPercentIdentical(void)425 inline void CBlastTabularInfo::x_PrintPercentIdentical(void)
426 {
427     double perc_ident =
428         (m_AlignLength > 0 ? ((double)m_NumIdent)/m_AlignLength * 100 : 0);
429     m_Ostream << NStr::DoubleToString(perc_ident, 3);
430 }
431 
x_PrintPercentPositives(void)432 inline void CBlastTabularInfo::x_PrintPercentPositives(void)
433 {
434     double perc_positives =
435         (m_AlignLength > 0 ? ((double)m_NumPositives)/m_AlignLength * 100 : 0);
436     m_Ostream << NStr::DoubleToString(perc_positives, 2);
437 }
438 
x_PrintFrames(void)439 inline void CBlastTabularInfo::x_PrintFrames(void)
440 {
441     m_Ostream << m_QueryFrame << "/" << m_SubjectFrame;
442 }
443 
x_PrintQueryFrame(void)444 inline void CBlastTabularInfo::x_PrintQueryFrame(void)
445 {
446     m_Ostream << m_QueryFrame;
447 }
448 
x_PrintSubjectFrame(void)449 inline void CBlastTabularInfo::x_PrintSubjectFrame(void)
450 {
451     m_Ostream << m_SubjectFrame;
452 }
453 
x_PrintBTOP(void)454 inline void CBlastTabularInfo::x_PrintBTOP(void)
455 {
456     m_Ostream << m_BTOP;
457 }
458 
x_PrintNumIdentical(void)459 inline void CBlastTabularInfo::x_PrintNumIdentical(void)
460 {
461     m_Ostream << m_NumIdent;
462 }
463 
x_PrintMismatches(void)464 inline void CBlastTabularInfo::x_PrintMismatches(void)
465 {
466     int num_mismatches = m_AlignLength - m_NumIdent - m_NumGaps;
467     m_Ostream << num_mismatches;
468 }
469 
x_PrintNumPositives(void)470 inline void CBlastTabularInfo::x_PrintNumPositives(void)
471 {
472     m_Ostream << m_NumPositives;
473 }
474 
475 // FIXME; do this via a bit field
x_IsFieldRequested(ETabularField field)476 inline bool CBlastTabularInfo::x_IsFieldRequested(ETabularField field)
477 {
478     return find(m_FieldsToShow.begin(),
479                 m_FieldsToShow.end(),
480                 field) != m_FieldsToShow.end();
481 }
482 
x_PrintGapOpenings(void)483 inline void CBlastTabularInfo::x_PrintGapOpenings(void)
484 {
485     m_Ostream << m_NumGapOpens;
486 }
487 
x_PrintGaps(void)488 inline void CBlastTabularInfo::x_PrintGaps(void)
489 {
490     m_Ostream << m_NumGaps;
491 }
x_PrintQueryLength(void)492 inline void CBlastTabularInfo::x_PrintQueryLength(void)
493 {
494     m_Ostream << m_QueryLength;
495 }
496 
x_PrintSubjectLength(void)497 inline void CBlastTabularInfo::x_PrintSubjectLength(void)
498 {
499     m_Ostream << m_SubjectLength;
500 }
501 
SetNoFetch(bool nofetch)502 inline void CBlastTabularInfo::SetNoFetch(bool nofetch)
503 {
504     m_NoFetch = nofetch;
505 }
506 
GetNoFetch(void)507 inline bool CBlastTabularInfo::GetNoFetch(void)
508 {
509     return m_NoFetch;
510 }
511 
512 /// Class containing information needed for tabular formatting of BLAST
513 /// results.
514 class NCBI_ALIGN_FORMAT_EXPORT CIgBlastTabularInfo : public CBlastTabularInfo
515 {
516 public:
517 
518     /// struct containing annotated domain information
519     struct SIgDomain {
SIgDomainCIgBlastTabularInfo::SIgDomain520         SIgDomain(const string& n, int s, int e, int ss, int se):
521             name(n), start(s), end(e),
522             s_start(ss), s_end(se), length(0),
523             num_match(0), num_mismatch(0), num_gap(0) {};
524         const string name;
525         int start;
526         int end;  // actual end + 1
527         int s_start;
528         int s_end;  // actual end + 1
529         int length;
530         int num_match;
531         int num_mismatch;
532         int num_gap;
533     };
534 
535     /// struct containing annotated gene information
536     struct SIgGene {
SetCIgBlastTabularInfo::SIgGene537         void Set(const string id, int s, int e) {
538             if (id.substr(0,4) == "lcl|") {
539                 sid = id.substr(4, id.size());
540             } else {
541                 sid = id;
542             }
543             start = s;
544             end = e;
545         }
ResetCIgBlastTabularInfo::SIgGene546         void Reset() {
547             sid = "";
548             start = -1;
549             end = -1;
550         };
551         string sid;
552         int start;
553         int end;
554     };
555 
556     /// What delimiter to use between fields in each row of the tabular output.
557     /// Constructor
558     /// @param ostr Stream to write output to [in]
559     /// @param format Output format - what fields to include in the output [in]
CIgBlastTabularInfo(CNcbiOstream & ostr,const string & format=kDfltArgTabularOutputFmt,EFieldDelimiter delim=eTab)560     CIgBlastTabularInfo(CNcbiOstream& ostr,
561                         const string& format = kDfltArgTabularOutputFmt,
562                         EFieldDelimiter delim = eTab)
563         : CBlastTabularInfo(ostr, format, delim) { };
564 
565     /// Destructor
~CIgBlastTabularInfo()566     ~CIgBlastTabularInfo() {
567         x_ResetIgFields();
568     };
569 
570     void PrintHeader(const string& program,
571                      const objects::CBioseq& bioseq,
572                      const string& dbname,
573                      const string& domain_sys,
574                      const string& rid = kEmptyStr,
575                      unsigned int iteration =
576                                 numeric_limits<unsigned int>::max(),
577                      const objects::CSeq_align_set* align_set=0,
578                      CConstRef<objects::CBioseq> subj_bioseq
579                                 = CConstRef<objects::CBioseq>());
580 
581     /// Set fields for master alignment
582     int SetMasterFields(const objects::CSeq_align& align,
583                         objects::CScope&           scope,
584                         const string&              chain_type,
585                         const string& master_chain_type_to_show,
586                         CNcbiMatrix<int>*          matrix=0);
587 
588     /// Set fields for all other alignments
589     int SetFields(const objects::CSeq_align& align,
590                   objects::CScope&           scope,
591                   const string&              chain_type,
592                   const string& master_chain_type_to_show,
593                   CNcbiMatrix<int>*          matrix=0);
594 
595     /// Override the print method
596     virtual void Print(void);
597 
598     /// Print domain information
599     void PrintMasterAlign(const string& header = "# ") const;
600 
601     void SetAirrFormatData(CScope& scope,
602                            const CRef<blast::CIgAnnotation> &annot,
603                            const CBioseq_Handle& query_handle,
604                            CConstRef<CSeq_align_set> align_result,
605                            const CConstRef<blast::CIgBlastOptions>& ig_opts);
606 
607     void PrintAirrRearrangement(CScope& scope,
608                                 const CRef<blast::CIgAnnotation> &annot,
609                                 const string& program_version,
610                                 const CBioseq& query_bioseq,
611                                 const string& dbname,
612                                 const string& domain_sys,
613                                 const string& rid,
614                                 unsigned int iteration,
615                                 const CSeq_align_set* align_set,
616                                 CConstRef<CBioseq> subj_bioseq,
617                                 CNcbiMatrix<int>* matrix,
618                                 bool print_airr_format_header,
619                                 const CConstRef<blast::CIgBlastOptions>& ig_opts);
620 
621     /// Print Html style summary
622     void PrintHtmlSummary() const;
623 
624     /// Set out-of-frame information
SetFrame(const string & frame="N/A")625     void SetFrame(const string &frame = "N/A") {
626         m_FrameInfo = frame;
627     };
628 
629     /// Set strand information
SetMinusStrand(bool minus=true)630     void SetMinusStrand(bool minus = true) {
631         m_IsMinusStrand = minus;
632     };
633 
634     /// Set sequence type
SetSeqType(bool isNucl)635     void SetSeqType(bool isNucl) {
636         m_IsNucl = isNucl;
637     };
638 
639     /// Set domain info
AddIgDomain(const string & name,int start,int end,int s_start=-1,int s_end=-1)640     void AddIgDomain(const string &name, int start, int end,
641                      int s_start=-1, int s_end=-1) {
642         if (start <0 || end <= start) return;
643         SIgDomain * domain = new SIgDomain(name, start, end, s_start, s_end);
644         x_ComputeIgDomain(*domain);
645         m_IgDomains.push_back(domain);
646     };
647 
648 
649     /// Set gene info
SetVGene(const string & id,int s,int e)650     void SetVGene(const string &id, int s, int e) {
651         m_VGene.Set(id, s,e);
652     }
653 
654     /// Set gene info
SetDGene(const string & id,int s,int e)655     void SetDGene(const string &id, int s, int e) {
656         m_DGene.Set(id, s,e);
657     }
658 
659     /// Set gene info
SetJGene(const string & id,int s,int e)660     void SetJGene(const string &id, int s, int e) {
661         m_JGene.Set(id, s,e);
662     }
663 
664     /// One method to set all annotation information
665     void SetIgAnnotation(const CRef<blast::CIgAnnotation> &annot,
666                          const CConstRef<blast::CIgBlastOptions> &ig_opts,
667                          CConstRef<CSeq_align_set>& align_result,
668                          CScope& scope);
669 
670     ///Getter
GetIgInfo(string & v,string & d,string & j,string & master_chain_to_show,string & cdr3_nuc,string & cdr3_aa,string & productive) const671     void GetIgInfo (string& v,
672                     string& d,
673                     string& j,
674                     string& master_chain_to_show,
675                     string& cdr3_nuc,
676                     string& cdr3_aa,
677                     string& productive) const {
678         v = m_VGene.sid;
679         d = m_DGene.sid;
680         j = m_JGene.sid;
681         master_chain_to_show = m_MasterChainTypeToShow;
682         cdr3_nuc = m_Cdr3Seq;
683         cdr3_aa = m_Cdr3SeqTrans;
684         productive = m_OtherInfo[4];
685     }
686 
687     ///Get Ig domain
GetIgDomains() const688     const vector<SIgDomain*>& GetIgDomains() const {
689 
690         return m_IgDomains;
691     }
692 
693 
694 protected:
695     void x_ResetIgFields();
696     void x_PrintIgGenes(bool isHtml=false, const string& header="# ") const;
697     void x_ComputeIgDomain(SIgDomain &domain);
698     void x_PrintIgDomain(const SIgDomain &domain) const;
699     void x_PrintIgDomainHtml(const SIgDomain &domain) const;
700     void x_PrintPartialQuery(int start, int end, bool isHtml=false) const;
701 
702 
703 private:
704     string m_Query;
705     bool m_IsNucl;
706     bool m_IsMinusStrand;
707     string m_FrameInfo;
708     string m_VFrameShift;
709     string m_ChainType;
710     string m_MasterChainTypeToShow;
711     SIgGene m_VGene;
712     SIgGene m_DGene;
713     SIgGene m_JGene;
714     vector<SIgDomain *> m_IgDomains;
715 
716     //index 0-2, not currently being used
717     // index 4, productive/non-productive
718     // index 3, stop codon or not
719     static const int num_otherinfo = 5;
720     string m_OtherInfo[num_otherinfo];
721     int m_Cdr3Start;
722     int m_Cdr3End;
723     int m_Fwr4Start;
724     int m_Fwr4End;
725 
726     string m_Fwr1Seq;
727     string m_Fwr1SeqTrans;
728     string m_Cdr1Seq;
729     string m_Cdr1SeqTrans;
730     string m_Fwr2Seq;
731     string m_Fwr2SeqTrans;
732     string m_Cdr2Seq;
733     string m_Cdr2SeqTrans;
734     string m_Fwr3Seq;
735     string m_Fwr3SeqTrans;
736     string m_Fwr4Seq;
737     string m_Fwr4SeqTrans;
738     string m_Cdr3Seq;
739     string m_Cdr3SeqTrans;
740 
741     string m_AirrCdr3Seq;
742     string m_AirrCdr3SeqTrans;
743     CRef<CSeq_align> m_TopAlign_V;
744     CRef<CSeq_align> m_TopAlign_D;
745 
746     string m_QueryVAlign;
747     string m_VAlign;
748     int m_QueryVAlignStart;
749     int m_VAlignStart;
750     int m_QueryVAlignEnd;
751     CRef<CSeq_align> m_TopAlign_J;
752     map<string, string> m_AirrData;
753     int m_QueryAlignSeqEnd;
754 };
755 
756 END_SCOPE(align_format)
757 END_NCBI_SCOPE
758 
759 #endif /* OBJTOOLS_ALIGN_FORMAT___TABULAR_HPP */
760