1 /* $Id: blastfmtutil.cpp 500404 2016-05-04 14:59:01Z camacho $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Jian Ye
27 * 12/2004
28 * File Description:
29 * blast formatter utilities
30 *
31 */
32 #include <ncbi_pch.hpp>
33
34 #include <algo/blast/format/blastfmtutil.hpp>
35 #include <algo/blast/api/pssm_engine.hpp> // for CScorematPssmConverter
36 #include <objects/scoremat/Pssm.hpp>
37 #include <objects/scoremat/PssmParameters.hpp>
38
39 #include <stdio.h>
40 #include <sstream>
41 #include <iomanip>
42
43 #include <objects/seq/seqport_util.hpp>
44
45 #include <algo/blast/api/blast_types.hpp> // for CScorematPssmConverter
46 #include <objects/general/User_object.hpp>
47 #include <objects/general/User_field.hpp>
48 #include <objects/general/Object_id.hpp>
49
50 #include <objtools/blast/seqdb_reader/seqdb.hpp>
51 #include <util/range.hpp>
52
53
54
55 BEGIN_NCBI_SCOPE
56 USING_SCOPE(ncbi);
57 USING_SCOPE(objects);
58 USING_SCOPE(align_format);
59
BlastGetVersion(const string program)60 string CBlastFormatUtil::BlastGetVersion(const string program)
61 {
62 string program_uc = program;
63 return NStr::ToUpper(program_uc) + " " + blast::CBlastVersion().Print();
64 }
65
BlastPrintVersionInfo(const string program,bool html,CNcbiOstream & out)66 void CBlastFormatUtil::BlastPrintVersionInfo(const string program, bool html,
67 CNcbiOstream& out)
68 {
69 if (html)
70 out << "<b>" << BlastGetVersion(program) << "</b>" << "\n";
71 else
72 out << BlastGetVersion(program) << "\n";
73 }
74
75 void
BlastPrintReference(bool html,size_t line_len,CNcbiOstream & out,blast::CReference::EPublication pub,bool is_psiblast)76 CBlastFormatUtil::BlastPrintReference(bool html, size_t line_len,
77 CNcbiOstream& out,
78 blast::CReference::EPublication pub,
79 bool is_psiblast /* = false */)
80 {
81 string reference("Reference");
82 if (pub == blast::CReference::eCompAdjustedMatrices) {
83 reference += " for compositional score matrix adjustment";
84 } else if (pub == blast::CReference::eCompBasedStats) {
85 reference += " for composition-based statistics";
86 if (is_psiblast) {
87 reference += " starting in round 2";
88 }
89 } else if (pub == blast::CReference::eIndexedMegablast) {
90 reference += " for database indexing";
91 } else if (pub == blast::CReference::eDeltaBlast) {
92 reference += " for DELTA-BLAST";
93 }
94
95 ostringstream str;
96 if(html)
97 {
98 CNcbiIfstream config_file(".ncbirc");
99 CNcbiRegistry config_reg(config_file);
100 string httpProt = "https:";
101 if(!config_reg.Empty()) {
102 if(config_reg.HasEntry("BLASTFMTUTIL","PROTOCOL")) {
103 httpProt = config_reg.Get("BLASTFMTUTIL","PROTOCOL");
104 }
105 }
106 str << "<b><a href=\""
107 << httpProt
108 << blast::CReference::GetPubmedUrl(pub)
109 << "\">" << reference << "</a>:</b>"
110 << "\n";
111 x_WrapOutputLine(str.str() + blast::CReference::GetString(pub),
112 line_len, out);
113 }
114 else
115 {
116 str << reference << ": ";
117 x_WrapOutputLine(str.str() + blast::CReference::GetHTMLFreeString(pub),
118 line_len, out);
119 }
120
121 out << "\n";
122 }
123
PrintDbInformation(size_t line_len,string definition_line,int nNumSeqs,Uint8 nTotalLength,bool html,bool with_links,CNcbiOstream & out)124 void CBlastFormatUtil::PrintDbInformation(size_t line_len,
125 string definition_line,
126 int nNumSeqs,
127 Uint8 nTotalLength,
128 bool html,
129 bool with_links,
130 CNcbiOstream& out)
131
132
133 {
134 ostringstream str;
135 string dbString = (html) ? "<b>Database:</b> " : "Database: ";
136 str << dbString << definition_line << endl;
137 if(!(html && with_links)) x_WrapOutputLine(str.str(),line_len, out);
138
139 out << " " << NStr::IntToString(nNumSeqs,NStr::fWithCommas) << " sequences; " << NStr::UInt8ToString(nTotalLength,NStr::fWithCommas) << " total letters" << endl;
140 }
141
142 /** Standard order of letters according to S. Altschul
143 * FIXME: Move to blast_encoding.[hc] ?
144 */
145 static int RESIDUE_ORDER[] = {
146 1, /* A */
147 16, /* R */
148 13, /* N */
149 4, /* D */
150 3, /* C */
151 15, /* Q */
152 5, /* E */
153 7, /* G */
154 8, /* H */
155 9, /* I */
156 11, /* L */
157 10, /* K */
158 12, /* M */
159 6, /* F */
160 14, /* P */
161 17, /* S */
162 18, /* T */
163 20, /* W */
164 22, /* Y */
165 19 /* V */
166 };
167
168 typedef CNcbiMatrix<int> TNcbiMatrixInt;
169 typedef CNcbiMatrix<double> TNcbiMatrixDouble;
170
171 void
PrintAsciiPssm(const objects::CPssmWithParameters & pssm_with_params,CConstRef<blast::CBlastAncillaryData> ancillary_data,CNcbiOstream & out)172 CBlastFormatUtil::PrintAsciiPssm
173 (const objects::CPssmWithParameters& pssm_with_params,
174 CConstRef<blast::CBlastAncillaryData> ancillary_data,
175 CNcbiOstream& out)
176 {
177 _ASSERT(ancillary_data.NotEmpty());
178 static const Uint1 kXResidue = AMINOACID_TO_NCBISTDAA[(int)'X'];
179 vector<double> info_content, gapless_col_weights, sigma;
180 blast::CScorematPssmConverter::GetInformationContent(pssm_with_params,
181 info_content);
182 blast::CScorematPssmConverter::GetGaplessColumnWeights(pssm_with_params,
183 gapless_col_weights);
184 blast::CScorematPssmConverter::GetSigma(pssm_with_params, sigma);
185
186 // We use whether the information content is available to assume whether
187 // the PSSM computation was done or not
188 bool pssm_calculation_done = info_content.empty() ? false : true;
189
190 if (pssm_calculation_done) {
191 out << "\nLast position-specific scoring matrix computed, weighted ";
192 out << "observed percentages rounded down, information per position, ";
193 out << "and relative weight of gapless real matches to pseudocounts\n";
194 } else {
195 out << "\nLast position-specific scoring matrix computed\n";
196 }
197
198 // will need psiblast statistics: posCount, intervalSizes,
199 // sigma,
200 // posCounts can be calculated from residue_frequencies
201
202 const SIZE_TYPE kQueryLength = pssm_with_params.GetPssm().GetQueryLength();
203 _ASSERT(kQueryLength ==
204 (SIZE_TYPE)pssm_with_params.GetPssm().GetNumColumns());
205 auto_ptr< TNcbiMatrixInt > pssm
206 (blast::CScorematPssmConverter::GetScores(pssm_with_params));
207 auto_ptr< TNcbiMatrixDouble > weighted_res_freqs
208 (blast::CScorematPssmConverter::
209 GetWeightedResidueFrequencies(pssm_with_params));
210 vector<int> interval_sizes, num_matching_seqs;
211 blast::CScorematPssmConverter::GetIntervalSizes(pssm_with_params,
212 interval_sizes);
213 blast::CScorematPssmConverter::GetNumMatchingSeqs(pssm_with_params,
214 num_matching_seqs);
215
216 // find score width
217 // find maximum score
218 int max_score = 0;
219 ITERATE (TNcbiMatrixInt::TData, it, pssm->GetData()) {
220 if (*it <= BLAST_SCORE_MIN) {
221 continue;
222 }
223
224 if (*it > max_score) {
225 max_score = *it;
226 }
227
228 if (-*it > max_score) {
229 max_score = -*it;
230 }
231 }
232
233 // find number of digits in maximum score
234 int num_digits = 0;
235 while (max_score > 0) {
236 max_score /= 10;
237 num_digits++;
238 }
239 int width = num_digits + 2;
240
241 out << " ";
242 // print the header for the last PSSM computed
243 for (size_t c = 0; c < DIM(RESIDUE_ORDER); c++) {
244 out << setw(width) << NCBISTDAA_TO_AMINOACID[RESIDUE_ORDER[c]];
245 }
246 if (pssm_calculation_done) {
247 // print the header for the weigthed observed percentages
248 for (size_t c = 0; c < DIM(RESIDUE_ORDER); c++) {
249 out << " " << NCBISTDAA_TO_AMINOACID[RESIDUE_ORDER[c]];
250 }
251 }
252
253 CNCBIstdaa query;
254 pssm_with_params.GetPssm().GetQuerySequenceData(query);
255 const vector<char>& query_seq = query.Get();
256
257 out << fixed;
258 for (SIZE_TYPE i = 0; i < kQueryLength; i++) {
259 // print the residue for position i
260 out << "\n" << setw(5) << (i+1) << " " <<
261 NCBISTDAA_TO_AMINOACID[(int)query_seq[i]] << " ";
262
263 // print the PSSM
264 for (SIZE_TYPE c = 0; c < DIM(RESIDUE_ORDER); c++) {
265 if ((*pssm)(RESIDUE_ORDER[c], i) == BLAST_SCORE_MIN) {
266 out << "-I ";
267 } else {
268 out << setw(width) << (*pssm)(RESIDUE_ORDER[c], i);
269 }
270 }
271 out << " ";
272
273 if (pssm_calculation_done) {
274 // Print the weighted observed
275 for (SIZE_TYPE c = 0; c < DIM(RESIDUE_ORDER); c++) {
276 if ((*pssm)(RESIDUE_ORDER[c], i) != BLAST_SCORE_MIN) {
277 double value = 100;
278 value *= (*weighted_res_freqs)(RESIDUE_ORDER[c], i);
279 // round to the nearest integer
280 value = (int)(value + (value > 0. ? 0.5 : -0.5));
281 out << setw(4) << (int)value;
282 }
283 }
284
285 // print the information content
286 out << " " << setprecision(2) << info_content[i] << " ";
287
288 // print the relative weight of gapless real matches to pseudocounts
289 if ((num_matching_seqs[i] > 1) && (query_seq[i] != kXResidue)) {
290 out << setprecision(2) << gapless_col_weights[i];
291 } else {
292 out << " 0.00";
293 }
294 }
295 }
296
297 const Blast_KarlinBlk* ungapped_kbp =
298 ancillary_data->GetUngappedKarlinBlk();
299 const Blast_KarlinBlk* gapped_kbp =
300 ancillary_data->GetGappedKarlinBlk();
301 const Blast_KarlinBlk* psi_ungapped_kbp =
302 ancillary_data->GetPsiUngappedKarlinBlk();
303 const Blast_KarlinBlk* psi_gapped_kbp =
304 ancillary_data->GetPsiGappedKarlinBlk();
305 out << "\n\n" << setprecision(4);
306 out << " K Lambda\n";
307 if (ungapped_kbp) {
308 out << "Standard Ungapped "
309 << ungapped_kbp->K << " "
310 << ungapped_kbp->Lambda << "\n";
311 }
312 if (gapped_kbp) {
313 out << "Standard Gapped "
314 << gapped_kbp->K << " "
315 << gapped_kbp->Lambda << "\n";
316 }
317 if (psi_ungapped_kbp) {
318 out << "PSI Ungapped "
319 << psi_ungapped_kbp->K << " "
320 << psi_ungapped_kbp->Lambda << "\n";
321 }
322 if (psi_gapped_kbp) {
323 out << "PSI Gapped "
324 << psi_gapped_kbp->K << " "
325 << psi_gapped_kbp->Lambda << "\n";
326 }
327 }
328
329
330 CRef<objects::CSeq_annot>
CreateSeqAnnotFromSeqAlignSet(const objects::CSeq_align_set & alnset,blast::EProgram program,const string & db_name,const string & db_title,bool vdb_search)331 CBlastFormatUtil::CreateSeqAnnotFromSeqAlignSet(const objects::CSeq_align_set & alnset,
332 blast::EProgram program,
333 const string & db_name,
334 const string & db_title,
335 bool vdb_search)
336 {
337 CRef<CSeq_annot> retval(new CSeq_annot);
338
339 //Fill in Hist Seqalign
340 CRef<CUser_object> hist_align_obj(new CUser_object);
341 static const string kHistSeqalign("Hist Seqalign");
342 hist_align_obj->SetType().SetStr(kHistSeqalign);
343 hist_align_obj->AddField(kHistSeqalign, true);
344 retval->AddUserObject(*hist_align_obj);
345
346 //Fill in Blast Type
347 CRef<CUser_object> blast_type(new CUser_object);
348 static const string kBlastType("Blast Type");
349 blast_type->SetType().SetStr(kBlastType);
350 blast_type->AddField(blast::EProgramToTaskName(program), program);
351 retval->AddUserObject(*blast_type);
352
353 //Fill in DB Title
354 CRef<CUser_object> blast_db_info(new CUser_object);
355 if(vdb_search)
356 {
357 static const string kVDBNames("Database Names");
358 blast_db_info->SetType().SetStr(kVDBNames);
359 blast_db_info->AddField( db_name, true );
360
361 }
362 else
363 {
364 static const string kBlastDBTitle("Blast Database Title");
365 blast_db_info->SetType().SetStr(kBlastDBTitle);
366 if(0 == db_name.size() || 0 == NStr::CompareNocase(db_name, "n/a"))
367 {
368 blast_db_info->AddField( "n/a", false );
369 }
370 else if(0 == NStr::CompareNocase(db_name, "SRA"))
371 {
372 blast_db_info->AddField( db_name, true );
373 }
374 else
375 {
376 bool is_nucl = Blast_SubjectIsNucleotide(EProgramToEBlastProgramType(program));
377 blast_db_info->AddField( db_title, is_nucl );
378 }
379 }
380
381 retval->AddUserObject(*blast_db_info);
382
383 //Fill in data -- Seq align
384 retval->SetData().SetAlign();
385 ITERATE(CSeq_align_set::Tdata, itr, alnset.Get()) {
386 retval->SetData().SetAlign().push_back(*itr);
387 }
388
389 return retval;
390 }
391
CBlastFormattingMatrix(int ** data,unsigned int nrows,unsigned int ncols)392 CBlastFormattingMatrix::CBlastFormattingMatrix(int** data, unsigned int nrows,
393 unsigned int ncols)
394 {
395 const int kAsciiSize = 256;
396 Resize(kAsciiSize, kAsciiSize, INT_MIN);
397
398 // Create a CSeq_data object from a vector of values from 0 to the size of
399 // the matrix (26).
400 const int kNumValues = max(ncols, nrows);
401 vector<char> ncbistdaa_values(kNumValues);
402 for (int index = 0; index < kNumValues; ++index)
403 ncbistdaa_values[index] = (char) index;
404
405 CSeq_data ncbistdaa_seq(ncbistdaa_values, CSeq_data::e_Ncbistdaa);
406
407 // Convert to IUPACaa using the CSeqportUtil::Convert method.
408 CSeq_data iupacaa_seq;
409 CSeqportUtil::Convert(ncbistdaa_seq, &iupacaa_seq, CSeq_data::e_Iupacaa);
410
411 // Extract the IUPACaa values
412 vector<char> iupacaa_values(kNumValues);
413 for (int index = 0; index < kNumValues; ++index)
414 iupacaa_values[index] = iupacaa_seq.GetIupacaa().Get()[index];
415
416 // Fill the 256x256 output matrix.
417 for (unsigned int row = 0; row < nrows; ++row) {
418 for (unsigned int col = 0; col < ncols; ++col) {
419 if (iupacaa_values[row] >= 0 && iupacaa_values[col] >= 0) {
420 (*this)((int)iupacaa_values[row], (int)iupacaa_values[col]) =
421 data[row][col];
422 }
423 }
424 }
425 }
426
427
428 /// Auxiliary structure used for sorting CRange<int> objects in increasing
429 /// order of starting positions.
430 struct SRangeStartSort {
operator ()SRangeStartSort431 bool operator()(CRange<int> const& range1, CRange<int> const& range2)
432 {
433 return (range1.GetFrom() < range2.GetFrom());
434 }
435 };
436
437 /// Masks a query sequence string corresponding to an alignment, given a list
438 /// of mask locations.
439 /// @param alnvec One alignment [in]
440 /// @param query_seq Query string corresponding to this alignment [in] [out]
441 /// @param mask_info List of masking locations [in]
442 /// @param mask_char How should sequence be masked? [in]
443 /// @param query_frame If query is translated, what query frame is this
444 /// alignment for?
445 static void
s_MaskQuerySeq(CAlnVec & alnvec,string & query_seq,const ncbi::TMaskedQueryRegions & mask_info,align_format::CDisplaySeqalign::SeqLocCharOption mask_char,int query_frame)446 s_MaskQuerySeq(CAlnVec& alnvec, string& query_seq,
447 const ncbi::TMaskedQueryRegions& mask_info,
448 align_format::CDisplaySeqalign::SeqLocCharOption mask_char,
449 int query_frame)
450 {
451 const int kNumSegs = alnvec.GetNumSegs();
452 vector<CRange<int> > segs_v;
453 for (int index = 0; index < kNumSegs; ++index) {
454 CRange<int> range(alnvec.GetAlnStart(index),
455 alnvec.GetAlnStop(index));
456 segs_v.push_back(range);
457 }
458
459 vector<CRange<int> > masks_v;
460 int aln_stop = query_seq.size() - 1;
461 ITERATE(ncbi::TMaskedQueryRegions, mask_iter, mask_info) {
462 if ((*mask_iter)->GetFrame() != query_frame)
463 continue;
464 int start =
465 alnvec.GetAlnPosFromSeqPos(0,
466 (*mask_iter)->GetInterval().GetFrom());
467 int stop =
468 alnvec.GetAlnPosFromSeqPos(0,
469 (*mask_iter)->GetInterval().GetTo());
470 // For negative frames, start and stop must be swapped.
471 if (query_frame < 0) {
472 int tmp = start;
473 start = stop;
474 stop = tmp;
475 }
476 if (start >= 0) {
477 if (stop < 0)
478 stop = aln_stop;
479 CRange<int> range(start, stop);
480 masks_v.push_back(range);
481 }
482 }
483
484 sort(masks_v.begin(), masks_v.end(), SRangeStartSort());
485
486 // Mask the sequence
487 int mask_index = 0;
488 for (int seg_index = 0;
489 seg_index < (int) segs_v.size() && mask_index < (int) masks_v.size();
490 ++seg_index) {
491 if (segs_v[seg_index].Empty())
492 continue;
493 int seg_start = segs_v[seg_index].GetFrom();
494 int seg_stop = segs_v[seg_index].GetTo();
495 int mask_pos;
496 while (mask_index < (int) masks_v.size() &&
497 (mask_pos = max(seg_start, masks_v[mask_index].GetFrom()))
498 <= seg_stop) {
499 int mask_stop = min(seg_stop, masks_v[mask_index].GetTo());
500 // Mask the respective part of the sequence
501 for ( ; mask_pos <= mask_stop; ++mask_pos) {
502 if( query_seq[mask_pos] == '-' ) continue; // preserve gap
503 if (mask_char == CDisplaySeqalign::eX) {
504 query_seq[mask_pos] = 'X';
505 } else if (mask_char == CDisplaySeqalign::eN){
506 query_seq[mask_pos]='N';
507 } else if (mask_char == CDisplaySeqalign::eLowerCase) {
508 query_seq[mask_pos] =
509 tolower((unsigned char)query_seq[mask_pos]);
510 }
511 }
512 // Advance to the next mask if this mask is done with. Otherwise
513 // break out of the loop.
514 if (mask_pos < seg_stop)
515 ++mask_index;
516 else
517 break;
518 }
519 }
520 }
521
522 static void
s_GetQueryAndSubjectStrings(CAlnVec & aln_vec,string & query,string & subject,int master_gen_code,int slave_gen_code)523 s_GetQueryAndSubjectStrings(CAlnVec & aln_vec,
524 string & query,
525 string & subject,
526 int master_gen_code,
527 int slave_gen_code)
528 {
529 //Note: do not switch the set order per calnvec specs.
530 aln_vec.SetGenCode(slave_gen_code);
531 aln_vec.SetGenCode(master_gen_code, 0);
532
533 aln_vec.SetGapChar('-');
534 aln_vec.GetWholeAlnSeqString(0, query);
535 aln_vec.GetWholeAlnSeqString(1, subject);
536 }
537
538 void
GetWholeAlnSeqStrings(string & query,string & subject,const objects::CDense_seg & ds,objects::CScope & scope,int master_gen_code,int slave_gen_code)539 CBlastFormatUtil::GetWholeAlnSeqStrings(string & query,
540 string & subject,
541 const objects::CDense_seg& ds,
542 objects::CScope& scope,
543 int master_gen_code,
544 int slave_gen_code)
545 {
546 CAlnVec aln_vec(ds, scope);
547 aln_vec.SetAaCoding(CSeq_data::e_Ncbieaa);
548 s_GetQueryAndSubjectStrings(aln_vec, query, subject, master_gen_code, slave_gen_code);
549 }
550
551 void
GetWholeAlnSeqStrings(string & query,string & masked_query,string & subject,const objects::CDense_seg & ds,objects::CScope & scope,int master_gen_code,int slave_gen_code,const ncbi::TMaskedQueryRegions & mask_info,align_format::CDisplaySeqalign::SeqLocCharOption mask_char,int query_frame)552 CBlastFormatUtil::GetWholeAlnSeqStrings(string & query,
553 string & masked_query,
554 string & subject,
555 const objects::CDense_seg & ds,
556 objects::CScope & scope,
557 int master_gen_code,
558 int slave_gen_code,
559 const ncbi::TMaskedQueryRegions& mask_info,
560 align_format::CDisplaySeqalign::SeqLocCharOption mask_char,
561 int query_frame)
562 {
563 CAlnVec aln_vec(ds, scope);
564 aln_vec.SetAaCoding(CSeq_data::e_Ncbieaa);
565
566 s_GetQueryAndSubjectStrings(aln_vec, query, subject, master_gen_code, slave_gen_code);
567
568 masked_query = query;
569 s_MaskQuerySeq(aln_vec, masked_query, mask_info, mask_char, query_frame);
570 }
571
InsertSubjectScores(CSeq_align_set & org_align_set,const CBioseq_Handle & query_handle,TSeqRange query_range,ESubjectScores score_type)572 void CBlastFormatUtil::InsertSubjectScores (CSeq_align_set & org_align_set,
573 const CBioseq_Handle & query_handle,
574 TSeqRange query_range,
575 ESubjectScores score_type)
576 {
577
578 if(!org_align_set.IsSet() || org_align_set.Get().empty()) {
579 _TRACE("Empty seq_align_set");
580 return;
581 }
582 // Seq align set from
583 int dont_care = 0;
584 unsigned int check_type = score_type;
585 if(org_align_set.Get().front()->GetNamedScore("seq_percent_coverage", dont_care)) {
586 check_type &= (~eQueryCovPerSubj);
587 }
588 if (org_align_set.Get().front()->GetNamedScore("uniq_seq_percent_coverage", dont_care)) {
589 check_type &= (~eQueryCovPerUniqSubj);
590 }
591 if(check_type == eNoQuerySubjCov){
592 return;
593 }
594
595 CConstRef<CBioseq> query_bioseq = query_handle.GetCompleteBioseq();
596 int query_len = 0;
597 if(query_range.NotEmpty())
598 {
599 query_len = query_range.GetLength();
600 }
601 else if (!query_bioseq.Empty() && query_bioseq->IsSetLength())
602 {
603 query_len = query_bioseq->GetLength();
604 }
605
606 if(query_len <= 0)
607 {
608 _TRACE("Invalid Query Length");
609 return;
610 }
611
612 CSeq_align_set tmp_align_set;
613 list<CRef<CSeq_align> > & tmp_align_list = tmp_align_set.Set();
614 list<CRef<CSeq_align> > & org_align_list = org_align_set.Set();
615
616 list<CRef<CSeq_align> >::iterator left_it = org_align_list.begin();
617 list<CRef<CSeq_align> >::iterator right_it = org_align_list.begin();
618
619 while(left_it != org_align_list.end())
620 {
621 const CSeq_id & cur_id = (*left_it)->GetSeq_id(1);
622 ++ right_it;
623
624 for (; right_it != org_align_list.end(); ++right_it)
625 {
626 const CSeq_id & id = (*right_it)->GetSeq_id(1);
627 if (!id.Match(cur_id))
628 break;
629 }
630
631 tmp_align_list.assign(left_it, right_it);
632 if((check_type & eQueryCovPerSubj)) {
633 int master_coverage = align_format::CAlignFormatUtil::GetMasterCoverage(tmp_align_set);
634
635 if (master_coverage)
636 {
637 double subj_coverage = 100.0 * (double) master_coverage/ (double) query_len;
638 //cerr << "Query Length: " << query_len << endl;
639 //cerr << "Query coverage Length: " << master_coverage << endl;
640 if(subj_coverage < 99)
641 subj_coverage +=0.5;
642
643 (*left_it)->SetNamedScore ("seq_percent_coverage", (int) subj_coverage);
644 }
645 }
646 if((check_type & eQueryCovPerUniqSubj)) {
647 int uniq_coverage = align_format::CAlignFormatUtil::GetUniqSeqCoverage(tmp_align_set);
648 if (uniq_coverage)
649 {
650 double uniq_subj_coverage = 100.0 * (double) uniq_coverage/ (double) query_len;
651 //cerr << "Query Uniq coverage Length: " << uniq_coverage << endl;
652 //cerr << uniq_coverage << endl;
653 if(uniq_subj_coverage < 99)
654 uniq_subj_coverage +=0.5;
655
656 (*left_it)->SetNamedScore ("uniq_seq_percent_coverage", (int) uniq_subj_coverage);
657 }
658 }
659 left_it = right_it;
660 }
661 }
662
SBlastXMLIncremental()663 SBlastXMLIncremental::SBlastXMLIncremental()
664 : m_IterationNum(0)
665 {
666 m_SerialXmlEnd = "";
667 }
668 END_NCBI_SCOPE
669