1 /* $Id: tabular.hpp 616915 2020-09-22 19:19:18Z jianye $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's offical duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Ilya Dondoshansky
27 *
28 * ===========================================================================
29 */
30
31 /// @file: tabular.hpp
32 /// Formatting of pairwise sequence alignments in tabular form.
33
34 #ifndef OBJTOOLS_ALIGN_FORMAT___TABULAR_HPP
35 #define OBJTOOLS_ALIGN_FORMAT___TABULAR_HPP
36
37 #include <corelib/ncbistre.hpp>
38 #include <objects/seqalign/Seq_align.hpp>
39 #include <objects/seqloc/Seq_id.hpp>
40 #include <objmgr/scope.hpp>
41 #include <objtools/align_format/align_format_util.hpp>
42 #include <objtools/blast/seqdb_reader/seqdb.hpp>
43 #include <algo/blast/igblast/igblast.hpp>
44 #include <objects/blastdb/Blast_def_line_set.hpp>
45
46 #include <algorithm>
47
48 BEGIN_NCBI_SCOPE
49 BEGIN_SCOPE(align_format)
50
51
52 /// Class containing information needed for tabular formatting of BLAST
53 /// results.
54 class NCBI_ALIGN_FORMAT_EXPORT CBlastTabularInfo : public CObject
55 {
56 public:
57 /// In what form should the sequence identifiers be shown?
58 enum ESeqIdType {
59 eFullId = 0, ///< Show full seq-id, with multiple ids concatenated.
60 eAccession, ///< Show only best accession
61 eAccVersion, ///< Show only best accession.version
62 eGi ///< Show only gi
63 };
64
65 /// What delimiter to use between fields in each row of the tabular output.
66 enum EFieldDelimiter {
67 eTab = 0, ///< Tab
68 eSpace, ///< Space
69 eComma, ///< Comma
70 eCustom ///<Custom
71 };
72
73 /// Constructor
74 /// @param ostr Stream to write output to [in]
75 /// @param format Output format - what fields to include in the output [in]
76 /// @param delim Delimiter to use between tabular fields [in]
77 /// @note fields that are not recognized will be ignored, if no fields are
78 /// specified (or left after purging those that are not recognized), the
79 /// default format is assumed
80 CBlastTabularInfo(CNcbiOstream& ostr,
81 const string& format = kDfltArgTabularOutputFmt,
82 EFieldDelimiter delim = eTab,
83 bool parse_local_ids = false);
84
85 /// Destructor
86 ~CBlastTabularInfo();
87 /// Set query id from a objects::CSeq_id
88 /// @param id List of Seq-ids to use [in]
89 void SetQueryId(list<CRef<objects::CSeq_id> >& id);
90 /// Set query id from a Bioseq handle
91 /// @param bh Bioseq handle to get Seq-ids from
92 void SetQueryId(const objects::CBioseq_Handle& bh);
93 ///Get query seqid list
GetQueryId() const94 const list<CRef<CSeq_id> >& GetQueryId() const {
95 return m_QueryId;
96 };
97 /// Set subject id from a objects::CSeq_id
98 /// @param id List of Seq-ids to use [in]
99 void SetSubjectId(list<CRef<objects::CSeq_id> >& id);
100 /// Set subject id from a Bioseq handle
101 /// @param bh Bioseq handle to get Seq-ids from
102 void SetSubjectId(const objects::CBioseq_Handle& bh);
103 /// Set the HSP scores
104 /// @param score Raw score [in]
105 /// @param bit_score Bit score [in]
106 /// @param evalue Expect value [in]
107 void SetScores(int score, double bit_score, double evalue);
108 /// Set the HSP endpoints. Note that if alignment is on opposite strands,
109 /// the subject offsets must be reversed.
110 /// @param q_start Starting offset in query [in]
111 /// @param q_end Ending offset in query [in]
112 /// @param s_start Starting offset in subject [in]
113 /// @param s_end Ending offset in subject [in]
114 void SetEndpoints(int q_start, int q_end, int s_start, int s_end);
115 /// Set various counts/lengths
116 /// @param num_ident Number of identities [in]
117 /// @param length Alignment length [in]
118 /// @param gaps Total number of gaps [in]
119 /// @param gap_opens Number of gap openings [in]
120 /// @param positives Number of positives [in]
121 void SetCounts(int num_ident, int length, int gaps, int gap_opens,
122 int positives =0, int query_frame = 1,
123 int subject_frame = 1);
124 /// Sets the Blast-traceback-operations string.
125 /// @param btop_string string for blast traceback operations [in]
126 void SetBTOP(string btop_string);
127 /// Set all member fields, given a Seq-align
128 /// @param sal Seq-align to get data from [in]
129 /// @param scope Scope for Bioseq retrieval [in]
130 /// @param matrix Matrix to calculate positives; NULL if not applicable. [in]
131 /// @return 0 on success, 1 if query or subject Bioseq is not found.
132 int SetFields(const objects::CSeq_align& sal,
133 objects::CScope& scope,
134 CNcbiMatrix<int>* matrix=0);
SetCustomDelim(string customDelim)135 void SetCustomDelim(string customDelim) {
136 x_SetFieldDelimiter(eCustom,customDelim);
137 }
138 /// Print one line of tabular output
139 virtual void Print(void);
140 /// Print the tabular output header
141 /// @param program Program name to show in the header [in]
142 /// @param bioseq Query Bioseq [in]
143 /// @param dbname Search database name [in]
144 /// @param rid the search RID (if not applicable, it should be empty
145 /// the string) [in]
146 /// @param iteration Iteration number (for PSI-BLAST), use default
147 /// parameter value when not applicable [in]
148 /// @param align_set All alignments for this query [in]
149 void PrintHeader(const string& program,
150 const objects::CBioseq& bioseq,
151 const string& dbname,
152 const string& rid = kEmptyStr,
153 unsigned int iteration =
154 numeric_limits<unsigned int>::max(),
155 const objects::CSeq_align_set* align_set=0,
156 CConstRef<objects::CBioseq> subj_bioseq
157 = CConstRef<objects::CBioseq>());
158
159 /// Prints number of queries processed.
160 /// @param num_queries number of queries processed [in]
161 void PrintNumProcessed(int num_queries);
162
163 /// Return all field names supported in the format string.
164 list<string> GetAllFieldNames(void);
165
166 /// Should local IDs be parsed or not?
167 /// @param val value to set [in]
168 /// Returns true if the field was requested in the format specification
169 /// @param field Which field to test [in]
SetParseLocalIds(bool val)170 void SetParseLocalIds(bool val) { m_ParseLocalIds = val; }
171
172 /// Should subject deflien be parsed for id or not?
173 /// @param val value to set [in]
SetParseSubjectDefline(bool val)174 void SetParseSubjectDefline(bool val) { m_ParseSubjectDefline = val; }
175
176 /// Avoid fetching sequence (if possible)
177 /// If the sequence is needed (e.g., will be formatted, it will be fetched)
178 /// @param nofetch Do not fetch if true [in]
179 void SetNoFetch(bool nofetch);
180 /// Avoid fetch of sequence if true returned
181 bool GetNoFetch();
182
183 // Set Genetic code for translating seqs
SetQueryGeneticCode(int q_gc)184 void SetQueryGeneticCode(int q_gc) {m_QueryGeneticCode = q_gc;}
SetDbGeneticCode(int db_gc)185 void SetDbGeneticCode(int db_gc) {m_DbGeneticCode = db_gc;}
186
187 /// Set query range
188 /// @param query range [in]
SetQueryRange(TSeqRange & q_range)189 void SetQueryRange(TSeqRange & q_range) { m_QueryRange = q_range;}
190
191 protected:
192 bool x_IsFieldRequested(ETabularField field);
193 /// Add a field to the list of fields to show, if it is not yet present in
194 /// the list of fields.
195 /// @param field Which field to add? [in]
196 void x_AddFieldToShow(ETabularField field);
197 /// Delete a field from the list of fields to show
198 /// @param field Which field to delete? [in]
199 void x_DeleteFieldToShow(ETabularField field);
200 /// Add a default set of fields to show.
201 void x_AddDefaultFieldsToShow(void);
202 /// Set fields to show, given an output format string
203 /// @param format Output format [in]
204 void x_SetFieldsToShow(const string& format);
205 /// Reset values of all fields.
206 void x_ResetFields(void);
207 /// Set the tabular fields delimiter.
208 /// @param delim Which delimiter to use
209 void x_SetFieldDelimiter(EFieldDelimiter delim, string customDelim = "");
210 /// Print the names of all supported fields
211 void x_PrintFieldNames(void);
212 /// Print the value of a given field
213 /// @param field Which field to show? [in]
214 void x_PrintField(ETabularField field);
215 /// Print query Seq-id
216 void x_PrintQuerySeqId(void) const;
217 /// Print query gi
218 void x_PrintQueryGi(void);
219 /// Print query accession
220 void x_PrintQueryAccession(void);
221 /// Print query accession.version
222 void x_PrintQueryAccessionVersion(void);
223 /// Print query and database names
224 void x_PrintQueryAndDbNames(const string& program,
225 const objects::CBioseq& bioseq,
226 const string& dbname,
227 const string& rid,
228 unsigned int iteration,
229 CConstRef<objects::CBioseq> subj_bioseq);
230 /// Print subject Seq-id
231 void x_PrintSubjectSeqId(void);
232 /// Print all Seq-ids associated with this subject, separated by ';'
233 void x_PrintSubjectAllSeqIds(void);
234 /// Print subject gi
235 void x_PrintSubjectGi(void);
236 /// Print all gis associated with this subject, separated by ';'
237 void x_PrintSubjectAllGis(void);
238 /// Print subject accession
239 void x_PrintSubjectAccession(void);
240 /// Print subject accession.version
241 void x_PrintSubjectAccessionVersion(void);
242 /// Print all accessions associated with this subject, separated by ';'
243 void x_PrintSubjectAllAccessions(void);
244 /// Print aligned part of query sequence
245 void x_PrintQuerySeq(void);
246 /// Print aligned part of subject sequence
247 void x_PrintSubjectSeq(void);
248 /// Print query start
249 void x_PrintQueryStart(void);
250 /// Print query end
251 void x_PrintQueryEnd(void);
252 /// Print subject start
253 void x_PrintSubjectStart(void);
254 /// Print subject end
255 void x_PrintSubjectEnd(void);
256 /// Print e-value
257 void x_PrintEvalue(void);
258 /// Print bit score
259 void x_PrintBitScore(void);
260 /// Print raw score
261 void x_PrintScore(void);
262 /// Print alignment length
263 void x_PrintAlignmentLength(void);
264 /// Print percent of identical matches
265 void x_PrintPercentIdentical(void);
266 /// Print number of identical matches
267 void x_PrintNumIdentical(void);
268 /// Print number of mismatches
269 void x_PrintMismatches(void);
270 /// Print number of positive matches
271 void x_PrintNumPositives(void);
272 /// Print number of gap openings
273 void x_PrintGapOpenings(void);
274 /// Print total number of gaps
275 void x_PrintGaps(void);
276 /// Print percent positives
277 void x_PrintPercentPositives();
278 /// Print frames
279 void x_PrintFrames();
280 void x_PrintQueryFrame();
281 void x_PrintSubjectFrame();
282 void x_PrintBTOP();
283 /// Print the query sequence length
284 void x_PrintQueryLength();
285 /// Print the subject sequence length
286 void x_PrintSubjectLength();
287 /// Print subject tax info
288 void x_PrintSubjectTaxIds();
289 void x_PrintSubjectSciNames();
290 void x_PrintSubjectCommonNames();
291 void x_PrintSubjectBlastNames();
292 void x_PrintSubjectSuperKingdoms();
293 void x_PrintSubjectTaxId();
294 void x_PrintSubjectSciName();
295 void x_PrintSubjectCommonName();
296 void x_PrintSubjectBlastName();
297 void x_PrintSubjectSuperKingdom();
298 void x_PrintSubjectTitle();
299 void x_PrintSubjectAllTitles();
300 void x_PrintSubjectStrand();
301 void x_PrintSeqalignCoverage();
302 void x_PrintSubjectCoverage();
303 void x_PrintUniqSubjectCoverage();
304 void x_SetTaxInfo(const objects::CBioseq_Handle & handle, const CRef<objects::CBlast_def_line_set> & bdlRef);
305 void x_SetTaxInfoAll(const objects::CBioseq_Handle & handle, const CRef<objects::CBlast_def_line_set> & bdlRef);
306 void x_SetSubjectIds(const objects::CBioseq_Handle& bh, const CRef<objects::CBlast_def_line_set> & bdlRef);
307 void x_SetQueryCovSubject(const objects::CSeq_align & align);
308 void x_SetQueryCovUniqSubject(const objects::CSeq_align & align);
309 void x_SetQueryCovSeqalign(const CSeq_align & align, int query_len);
310 void x_CheckTaxDB();
311
312 CNcbiOstream& m_Ostream; ///< Stream to write output to
313 string m_FieldDelimiter; ///< Delimiter character for fields to print.
314 string m_QuerySeq; ///< Aligned part of the query sequence
315 string m_SubjectSeq; ///< Aligned part of the subject sequence
316 int m_QueryStart; ///< Starting offset in query
317 int m_QueryEnd; ///< Ending offset in query
318 int m_QueryFrame; ///< query frame
319 int m_SubjectStart; ///< Starting offset in subject
320 int m_SubjectEnd; ///< Ending offset in subject
321 int m_SubjectFrame; ///< subject frame
322 bool m_NoFetch; ///< program as a string
323
324 private:
325
326 list<CRef<objects::CSeq_id> > m_QueryId; ///< List of query ids for this HSP
327 list<CRef<objects::CSeq_id> > m_SubjectId;
328 /// All subject sequence ids for this HSP
329 vector<list<CRef<objects::CSeq_id> > > m_SubjectIds;
330 TSeqPos m_QueryLength; ///< Length of query sequence
331 TSeqPos m_SubjectLength; ///< Length of subject sequence
332 int m_Score; ///< Raw score of this HSP
333 string m_BitScore; ///< Bit score of this HSP, in appropriate format
334 string m_Evalue; ///< E-value of this HSP, in appropriate format
335 int m_AlignLength; ///< Alignment length of this HSP
336 int m_NumGaps; ///< Total number of gaps in this HSP
337 int m_NumGapOpens; ///< Number of gap openings in this HSP
338 int m_NumIdent; ///< Number of identities in this HSP
339 int m_NumPositives; ///< Number of positives in this HSP
340 /// Map of field enum values to field names.
341 map<string, ETabularField> m_FieldMap;
342 list<ETabularField> m_FieldsToShow; ///< Which fields to show?
343 /// Should the query deflines be parsed for local IDs?
344 bool m_ParseLocalIds;
345 /// Parse subejct defline?
346 bool m_ParseSubjectDefline;
347 string m_BTOP; /// Blast-traceback-operations.
348
349 //TaxInfo
350 set<TTaxId> m_SubjectTaxIds;
351 vector<string> m_SubjectSciNames;
352 vector<string> m_SubjectCommonNames;
353 set<string> m_SubjectBlastNames;
354 set<string> m_SubjectSuperKingdoms;
355 TTaxId m_SubjectTaxId;
356 string m_SubjectSciName;
357 string m_SubjectCommonName;
358 string m_SubjectBlastName;
359 string m_SubjectSuperKingdom;
360 CRef<CBlast_def_line_set> m_SubjectDefline;
361
362 string m_SubjectStrand;
363 pair<string, int> m_QueryCovSubject;
364 pair<string, int> m_QueryCovUniqSubject;
365 int m_QueryCovSeqalign;
366
367 int m_QueryGeneticCode;
368 int m_DbGeneticCode;
369
370 TSeqRange m_QueryRange;
371 string m_CustomDelim;
372 };
373
374
x_PrintQuerySeq(void)375 inline void CBlastTabularInfo::x_PrintQuerySeq(void)
376 {
377 m_Ostream << m_QuerySeq;
378 }
379
x_PrintSubjectSeq(void)380 inline void CBlastTabularInfo::x_PrintSubjectSeq(void)
381 {
382 m_Ostream << m_SubjectSeq;
383 }
384
x_PrintQueryStart(void)385 inline void CBlastTabularInfo::x_PrintQueryStart(void)
386 {
387 m_Ostream << m_QueryStart;
388 }
389
x_PrintQueryEnd(void)390 inline void CBlastTabularInfo::x_PrintQueryEnd(void)
391 {
392 m_Ostream << m_QueryEnd;
393 }
394
x_PrintSubjectStart(void)395 inline void CBlastTabularInfo::x_PrintSubjectStart(void)
396 {
397 m_Ostream << m_SubjectStart;
398 }
399
x_PrintSubjectEnd(void)400 inline void CBlastTabularInfo::x_PrintSubjectEnd(void)
401 {
402 m_Ostream << m_SubjectEnd;
403 }
404
x_PrintEvalue(void)405 inline void CBlastTabularInfo::x_PrintEvalue(void)
406 {
407 m_Ostream << m_Evalue;
408 }
409
x_PrintBitScore(void)410 inline void CBlastTabularInfo::x_PrintBitScore(void)
411 {
412 m_Ostream << m_BitScore;
413 }
414
x_PrintScore(void)415 inline void CBlastTabularInfo::x_PrintScore(void)
416 {
417 m_Ostream << m_Score;
418 }
419
x_PrintAlignmentLength(void)420 inline void CBlastTabularInfo::x_PrintAlignmentLength(void)
421 {
422 m_Ostream << m_AlignLength;
423 }
424
x_PrintPercentIdentical(void)425 inline void CBlastTabularInfo::x_PrintPercentIdentical(void)
426 {
427 double perc_ident =
428 (m_AlignLength > 0 ? ((double)m_NumIdent)/m_AlignLength * 100 : 0);
429 m_Ostream << NStr::DoubleToString(perc_ident, 3);
430 }
431
x_PrintPercentPositives(void)432 inline void CBlastTabularInfo::x_PrintPercentPositives(void)
433 {
434 double perc_positives =
435 (m_AlignLength > 0 ? ((double)m_NumPositives)/m_AlignLength * 100 : 0);
436 m_Ostream << NStr::DoubleToString(perc_positives, 2);
437 }
438
x_PrintFrames(void)439 inline void CBlastTabularInfo::x_PrintFrames(void)
440 {
441 m_Ostream << m_QueryFrame << "/" << m_SubjectFrame;
442 }
443
x_PrintQueryFrame(void)444 inline void CBlastTabularInfo::x_PrintQueryFrame(void)
445 {
446 m_Ostream << m_QueryFrame;
447 }
448
x_PrintSubjectFrame(void)449 inline void CBlastTabularInfo::x_PrintSubjectFrame(void)
450 {
451 m_Ostream << m_SubjectFrame;
452 }
453
x_PrintBTOP(void)454 inline void CBlastTabularInfo::x_PrintBTOP(void)
455 {
456 m_Ostream << m_BTOP;
457 }
458
x_PrintNumIdentical(void)459 inline void CBlastTabularInfo::x_PrintNumIdentical(void)
460 {
461 m_Ostream << m_NumIdent;
462 }
463
x_PrintMismatches(void)464 inline void CBlastTabularInfo::x_PrintMismatches(void)
465 {
466 int num_mismatches = m_AlignLength - m_NumIdent - m_NumGaps;
467 m_Ostream << num_mismatches;
468 }
469
x_PrintNumPositives(void)470 inline void CBlastTabularInfo::x_PrintNumPositives(void)
471 {
472 m_Ostream << m_NumPositives;
473 }
474
475 // FIXME; do this via a bit field
x_IsFieldRequested(ETabularField field)476 inline bool CBlastTabularInfo::x_IsFieldRequested(ETabularField field)
477 {
478 return find(m_FieldsToShow.begin(),
479 m_FieldsToShow.end(),
480 field) != m_FieldsToShow.end();
481 }
482
x_PrintGapOpenings(void)483 inline void CBlastTabularInfo::x_PrintGapOpenings(void)
484 {
485 m_Ostream << m_NumGapOpens;
486 }
487
x_PrintGaps(void)488 inline void CBlastTabularInfo::x_PrintGaps(void)
489 {
490 m_Ostream << m_NumGaps;
491 }
x_PrintQueryLength(void)492 inline void CBlastTabularInfo::x_PrintQueryLength(void)
493 {
494 m_Ostream << m_QueryLength;
495 }
496
x_PrintSubjectLength(void)497 inline void CBlastTabularInfo::x_PrintSubjectLength(void)
498 {
499 m_Ostream << m_SubjectLength;
500 }
501
SetNoFetch(bool nofetch)502 inline void CBlastTabularInfo::SetNoFetch(bool nofetch)
503 {
504 m_NoFetch = nofetch;
505 }
506
GetNoFetch(void)507 inline bool CBlastTabularInfo::GetNoFetch(void)
508 {
509 return m_NoFetch;
510 }
511
512 /// Class containing information needed for tabular formatting of BLAST
513 /// results.
514 class NCBI_ALIGN_FORMAT_EXPORT CIgBlastTabularInfo : public CBlastTabularInfo
515 {
516 public:
517
518 /// struct containing annotated domain information
519 struct SIgDomain {
SIgDomainCIgBlastTabularInfo::SIgDomain520 SIgDomain(const string& n, int s, int e, int ss, int se):
521 name(n), start(s), end(e),
522 s_start(ss), s_end(se), length(0),
523 num_match(0), num_mismatch(0), num_gap(0) {};
524 const string name;
525 int start;
526 int end; // actual end + 1
527 int s_start;
528 int s_end; // actual end + 1
529 int length;
530 int num_match;
531 int num_mismatch;
532 int num_gap;
533 };
534
535 /// struct containing annotated gene information
536 struct SIgGene {
SetCIgBlastTabularInfo::SIgGene537 void Set(const string id, int s, int e) {
538 if (id.substr(0,4) == "lcl|") {
539 sid = id.substr(4, id.size());
540 } else {
541 sid = id;
542 }
543 start = s;
544 end = e;
545 }
ResetCIgBlastTabularInfo::SIgGene546 void Reset() {
547 sid = "";
548 start = -1;
549 end = -1;
550 };
551 string sid;
552 int start;
553 int end;
554 };
555
556 /// What delimiter to use between fields in each row of the tabular output.
557 /// Constructor
558 /// @param ostr Stream to write output to [in]
559 /// @param format Output format - what fields to include in the output [in]
CIgBlastTabularInfo(CNcbiOstream & ostr,const string & format=kDfltArgTabularOutputFmt,EFieldDelimiter delim=eTab)560 CIgBlastTabularInfo(CNcbiOstream& ostr,
561 const string& format = kDfltArgTabularOutputFmt,
562 EFieldDelimiter delim = eTab)
563 : CBlastTabularInfo(ostr, format, delim) { };
564
565 /// Destructor
~CIgBlastTabularInfo()566 ~CIgBlastTabularInfo() {
567 x_ResetIgFields();
568 };
569
570 void PrintHeader(const string& program,
571 const objects::CBioseq& bioseq,
572 const string& dbname,
573 const string& domain_sys,
574 const string& rid = kEmptyStr,
575 unsigned int iteration =
576 numeric_limits<unsigned int>::max(),
577 const objects::CSeq_align_set* align_set=0,
578 CConstRef<objects::CBioseq> subj_bioseq
579 = CConstRef<objects::CBioseq>());
580
581 /// Set fields for master alignment
582 int SetMasterFields(const objects::CSeq_align& align,
583 objects::CScope& scope,
584 const string& chain_type,
585 const string& master_chain_type_to_show,
586 CNcbiMatrix<int>* matrix=0);
587
588 /// Set fields for all other alignments
589 int SetFields(const objects::CSeq_align& align,
590 objects::CScope& scope,
591 const string& chain_type,
592 const string& master_chain_type_to_show,
593 CNcbiMatrix<int>* matrix=0);
594
595 /// Override the print method
596 virtual void Print(void);
597
598 /// Print domain information
599 void PrintMasterAlign(const string& header = "# ") const;
600
601 void SetAirrFormatData(CScope& scope,
602 const CRef<blast::CIgAnnotation> &annot,
603 const CBioseq_Handle& query_handle,
604 CConstRef<CSeq_align_set> align_result,
605 const CConstRef<blast::CIgBlastOptions>& ig_opts);
606
607 void PrintAirrRearrangement(CScope& scope,
608 const CRef<blast::CIgAnnotation> &annot,
609 const string& program_version,
610 const CBioseq& query_bioseq,
611 const string& dbname,
612 const string& domain_sys,
613 const string& rid,
614 unsigned int iteration,
615 const CSeq_align_set* align_set,
616 CConstRef<CBioseq> subj_bioseq,
617 CNcbiMatrix<int>* matrix,
618 bool print_airr_format_header,
619 const CConstRef<blast::CIgBlastOptions>& ig_opts);
620
621 /// Print Html style summary
622 void PrintHtmlSummary() const;
623
624 /// Set out-of-frame information
SetFrame(const string & frame="N/A")625 void SetFrame(const string &frame = "N/A") {
626 m_FrameInfo = frame;
627 };
628
629 /// Set strand information
SetMinusStrand(bool minus=true)630 void SetMinusStrand(bool minus = true) {
631 m_IsMinusStrand = minus;
632 };
633
634 /// Set sequence type
SetSeqType(bool isNucl)635 void SetSeqType(bool isNucl) {
636 m_IsNucl = isNucl;
637 };
638
639 /// Set domain info
AddIgDomain(const string & name,int start,int end,int s_start=-1,int s_end=-1)640 void AddIgDomain(const string &name, int start, int end,
641 int s_start=-1, int s_end=-1) {
642 if (start <0 || end <= start) return;
643 SIgDomain * domain = new SIgDomain(name, start, end, s_start, s_end);
644 x_ComputeIgDomain(*domain);
645 m_IgDomains.push_back(domain);
646 };
647
648
649 /// Set gene info
SetVGene(const string & id,int s,int e)650 void SetVGene(const string &id, int s, int e) {
651 m_VGene.Set(id, s,e);
652 }
653
654 /// Set gene info
SetDGene(const string & id,int s,int e)655 void SetDGene(const string &id, int s, int e) {
656 m_DGene.Set(id, s,e);
657 }
658
659 /// Set gene info
SetJGene(const string & id,int s,int e)660 void SetJGene(const string &id, int s, int e) {
661 m_JGene.Set(id, s,e);
662 }
663
664 /// One method to set all annotation information
665 void SetIgAnnotation(const CRef<blast::CIgAnnotation> &annot,
666 const CConstRef<blast::CIgBlastOptions> &ig_opts,
667 CConstRef<CSeq_align_set>& align_result,
668 CScope& scope);
669
670 ///Getter
GetIgInfo(string & v,string & d,string & j,string & master_chain_to_show,string & cdr3_nuc,string & cdr3_aa,string & productive) const671 void GetIgInfo (string& v,
672 string& d,
673 string& j,
674 string& master_chain_to_show,
675 string& cdr3_nuc,
676 string& cdr3_aa,
677 string& productive) const {
678 v = m_VGene.sid;
679 d = m_DGene.sid;
680 j = m_JGene.sid;
681 master_chain_to_show = m_MasterChainTypeToShow;
682 cdr3_nuc = m_Cdr3Seq;
683 cdr3_aa = m_Cdr3SeqTrans;
684 productive = m_OtherInfo[4];
685 }
686
687 ///Get Ig domain
GetIgDomains() const688 const vector<SIgDomain*>& GetIgDomains() const {
689
690 return m_IgDomains;
691 }
692
693
694 protected:
695 void x_ResetIgFields();
696 void x_PrintIgGenes(bool isHtml=false, const string& header="# ") const;
697 void x_ComputeIgDomain(SIgDomain &domain);
698 void x_PrintIgDomain(const SIgDomain &domain) const;
699 void x_PrintIgDomainHtml(const SIgDomain &domain) const;
700 void x_PrintPartialQuery(int start, int end, bool isHtml=false) const;
701
702
703 private:
704 string m_Query;
705 bool m_IsNucl;
706 bool m_IsMinusStrand;
707 string m_FrameInfo;
708 string m_VFrameShift;
709 string m_ChainType;
710 string m_MasterChainTypeToShow;
711 SIgGene m_VGene;
712 SIgGene m_DGene;
713 SIgGene m_JGene;
714 vector<SIgDomain *> m_IgDomains;
715
716 //index 0-2, not currently being used
717 // index 4, productive/non-productive
718 // index 3, stop codon or not
719 static const int num_otherinfo = 5;
720 string m_OtherInfo[num_otherinfo];
721 int m_Cdr3Start;
722 int m_Cdr3End;
723 int m_Fwr4Start;
724 int m_Fwr4End;
725
726 string m_Fwr1Seq;
727 string m_Fwr1SeqTrans;
728 string m_Cdr1Seq;
729 string m_Cdr1SeqTrans;
730 string m_Fwr2Seq;
731 string m_Fwr2SeqTrans;
732 string m_Cdr2Seq;
733 string m_Cdr2SeqTrans;
734 string m_Fwr3Seq;
735 string m_Fwr3SeqTrans;
736 string m_Fwr4Seq;
737 string m_Fwr4SeqTrans;
738 string m_Cdr3Seq;
739 string m_Cdr3SeqTrans;
740
741 string m_AirrCdr3Seq;
742 string m_AirrCdr3SeqTrans;
743 CRef<CSeq_align> m_TopAlign_V;
744 CRef<CSeq_align> m_TopAlign_D;
745
746 string m_QueryVAlign;
747 string m_VAlign;
748 int m_QueryVAlignStart;
749 int m_VAlignStart;
750 int m_QueryVAlignEnd;
751 CRef<CSeq_align> m_TopAlign_J;
752 map<string, string> m_AirrData;
753 int m_QueryAlignSeqEnd;
754 };
755
756 END_SCOPE(align_format)
757 END_NCBI_SCOPE
758
759 #endif /* OBJTOOLS_ALIGN_FORMAT___TABULAR_HPP */
760