1 /*  $Id: blastfmtutil.cpp 500404 2016-05-04 14:59:01Z camacho $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Jian Ye
27  * 12/2004
28  * File Description:
29  *   blast formatter utilities
30  *
31  */
32 #include <ncbi_pch.hpp>
33 
34 #include <algo/blast/format/blastfmtutil.hpp>
35 #include <algo/blast/api/pssm_engine.hpp>   // for CScorematPssmConverter
36 #include <objects/scoremat/Pssm.hpp>
37 #include <objects/scoremat/PssmParameters.hpp>
38 
39 #include <stdio.h>
40 #include <sstream>
41 #include <iomanip>
42 
43 #include <objects/seq/seqport_util.hpp>
44 
45 #include <algo/blast/api/blast_types.hpp>   // for CScorematPssmConverter
46 #include <objects/general/User_object.hpp>
47 #include <objects/general/User_field.hpp>
48 #include <objects/general/Object_id.hpp>
49 
50 #include <objtools/blast/seqdb_reader/seqdb.hpp>
51 #include <util/range.hpp>
52 
53 
54 
55 BEGIN_NCBI_SCOPE
56 USING_SCOPE(ncbi);
57 USING_SCOPE(objects);
58 USING_SCOPE(align_format);
59 
BlastGetVersion(const string program)60 string CBlastFormatUtil::BlastGetVersion(const string program)
61 {
62     string program_uc = program;
63     return NStr::ToUpper(program_uc) + " " + blast::CBlastVersion().Print();
64 }
65 
BlastPrintVersionInfo(const string program,bool html,CNcbiOstream & out)66 void CBlastFormatUtil::BlastPrintVersionInfo(const string program, bool html,
67                                              CNcbiOstream& out)
68 {
69     if (html)
70         out << "<b>" << BlastGetVersion(program) << "</b>" << "\n";
71     else
72         out << BlastGetVersion(program) << "\n";
73 }
74 
75 void
BlastPrintReference(bool html,size_t line_len,CNcbiOstream & out,blast::CReference::EPublication pub,bool is_psiblast)76 CBlastFormatUtil::BlastPrintReference(bool html, size_t line_len,
77                                       CNcbiOstream& out,
78                                       blast::CReference::EPublication pub,
79                                       bool is_psiblast /* = false */)
80 {
81     string reference("Reference");
82     if (pub == blast::CReference::eCompAdjustedMatrices) {
83         reference += " for compositional score matrix adjustment";
84     } else if (pub == blast::CReference::eCompBasedStats) {
85         reference += " for composition-based statistics";
86         if (is_psiblast) {
87             reference += " starting in round 2";
88         }
89     } else if (pub == blast::CReference::eIndexedMegablast) {
90         reference += " for database indexing";
91     } else if (pub == blast::CReference::eDeltaBlast) {
92         reference += " for DELTA-BLAST";
93     }
94 
95     ostringstream str;
96     if(html)
97     {
98         CNcbiIfstream  config_file(".ncbirc");
99         CNcbiRegistry config_reg(config_file);
100         string httpProt = "https:";
101         if(!config_reg.Empty()) {
102             if(config_reg.HasEntry("BLASTFMTUTIL","PROTOCOL")) {
103                 httpProt = config_reg.Get("BLASTFMTUTIL","PROTOCOL");
104             }
105         }
106         str << "<b><a href=\""
107             << httpProt
108             << blast::CReference::GetPubmedUrl(pub)
109             << "\">" << reference << "</a>:</b>"
110             << "\n";
111         x_WrapOutputLine(str.str() + blast::CReference::GetString(pub),
112                    line_len, out);
113     }
114     else
115     {
116         str << reference << ": ";
117         x_WrapOutputLine(str.str() + blast::CReference::GetHTMLFreeString(pub),
118                    line_len, out);
119     }
120 
121     out << "\n";
122 }
123 
PrintDbInformation(size_t line_len,string definition_line,int nNumSeqs,Uint8 nTotalLength,bool html,bool with_links,CNcbiOstream & out)124 void CBlastFormatUtil::PrintDbInformation(size_t line_len,
125                                           string definition_line,
126                                           int nNumSeqs,
127                                           Uint8 nTotalLength,
128                                           bool html,
129                                           bool with_links,
130                                           CNcbiOstream& out)
131 
132 
133 {
134     ostringstream str;
135     string dbString = (html) ? "<b>Database:</b> " : "Database: ";
136     str << dbString << definition_line << endl;
137     if(!(html && with_links)) x_WrapOutputLine(str.str(),line_len, out);
138 
139     out << "           " << NStr::IntToString(nNumSeqs,NStr::fWithCommas) << " sequences; " << NStr::UInt8ToString(nTotalLength,NStr::fWithCommas) << " total letters" << endl;
140 }
141 
142 /** Standard order of letters according to S. Altschul
143  * FIXME: Move to blast_encoding.[hc] ?
144  */
145 static int RESIDUE_ORDER[] = {
146      1,     /* A */
147     16,     /* R */
148     13,     /* N */
149      4,     /* D */
150      3,     /* C */
151     15,     /* Q */
152      5,     /* E */
153      7,     /* G */
154      8,     /* H */
155      9,     /* I */
156     11,     /* L */
157     10,     /* K */
158     12,     /* M */
159      6,     /* F */
160     14,     /* P */
161     17,     /* S */
162     18,     /* T */
163     20,     /* W */
164     22,     /* Y */
165     19      /* V */
166 };
167 
168 typedef CNcbiMatrix<int> TNcbiMatrixInt;
169 typedef CNcbiMatrix<double> TNcbiMatrixDouble;
170 
171 void
PrintAsciiPssm(const objects::CPssmWithParameters & pssm_with_params,CConstRef<blast::CBlastAncillaryData> ancillary_data,CNcbiOstream & out)172 CBlastFormatUtil::PrintAsciiPssm
173     (const objects::CPssmWithParameters& pssm_with_params,
174      CConstRef<blast::CBlastAncillaryData> ancillary_data,
175      CNcbiOstream& out)
176 {
177     _ASSERT(ancillary_data.NotEmpty());
178     static const Uint1 kXResidue = AMINOACID_TO_NCBISTDAA[(int)'X'];
179     vector<double> info_content, gapless_col_weights, sigma;
180     blast::CScorematPssmConverter::GetInformationContent(pssm_with_params,
181                                                          info_content);
182     blast::CScorematPssmConverter::GetGaplessColumnWeights(pssm_with_params,
183                                                            gapless_col_weights);
184     blast::CScorematPssmConverter::GetSigma(pssm_with_params, sigma);
185 
186     // We use whether the information content is available to assume whether
187     // the PSSM computation was done or not
188     bool pssm_calculation_done = info_content.empty() ? false : true;
189 
190     if (pssm_calculation_done) {
191         out << "\nLast position-specific scoring matrix computed, weighted ";
192         out << "observed percentages rounded down, information per position, ";
193         out << "and relative weight of gapless real matches to pseudocounts\n";
194     } else {
195         out << "\nLast position-specific scoring matrix computed\n";
196     }
197 
198     // will need psiblast statistics: posCount, intervalSizes,
199     // sigma,
200     // posCounts can be calculated from residue_frequencies
201 
202     const SIZE_TYPE kQueryLength = pssm_with_params.GetPssm().GetQueryLength();
203     _ASSERT(kQueryLength ==
204             (SIZE_TYPE)pssm_with_params.GetPssm().GetNumColumns());
205     auto_ptr< TNcbiMatrixInt > pssm
206         (blast::CScorematPssmConverter::GetScores(pssm_with_params));
207     auto_ptr< TNcbiMatrixDouble > weighted_res_freqs
208         (blast::CScorematPssmConverter::
209             GetWeightedResidueFrequencies(pssm_with_params));
210     vector<int> interval_sizes, num_matching_seqs;
211     blast::CScorematPssmConverter::GetIntervalSizes(pssm_with_params,
212                                                     interval_sizes);
213     blast::CScorematPssmConverter::GetNumMatchingSeqs(pssm_with_params,
214                                                       num_matching_seqs);
215 
216     // find score width
217     // find maximum score
218     int max_score = 0;
219     ITERATE (TNcbiMatrixInt::TData, it, pssm->GetData()) {
220         if (*it <= BLAST_SCORE_MIN) {
221             continue;
222         }
223 
224         if (*it > max_score) {
225             max_score = *it;
226         }
227 
228         if (-*it > max_score) {
229             max_score = -*it;
230         }
231     }
232 
233     // find number of digits in maximum score
234     int num_digits = 0;
235     while (max_score > 0) {
236         max_score /= 10;
237         num_digits++;
238     }
239     int width = num_digits + 2;
240 
241     out << "         ";
242     // print the header for the last PSSM computed
243     for (size_t c = 0; c < DIM(RESIDUE_ORDER); c++) {
244         out << setw(width) << NCBISTDAA_TO_AMINOACID[RESIDUE_ORDER[c]];
245     }
246     if (pssm_calculation_done) {
247         // print the header for the weigthed observed percentages
248         for (size_t c = 0; c < DIM(RESIDUE_ORDER); c++) {
249             out << "   " << NCBISTDAA_TO_AMINOACID[RESIDUE_ORDER[c]];
250         }
251     }
252 
253     CNCBIstdaa query;
254     pssm_with_params.GetPssm().GetQuerySequenceData(query);
255     const vector<char>& query_seq = query.Get();
256 
257     out << fixed;
258     for (SIZE_TYPE i = 0; i < kQueryLength; i++) {
259         // print the residue for position i
260         out << "\n" << setw(5) << (i+1) << " " <<
261             NCBISTDAA_TO_AMINOACID[(int)query_seq[i]] << "  ";
262 
263         // print the PSSM
264         for (SIZE_TYPE c = 0; c < DIM(RESIDUE_ORDER); c++) {
265             if ((*pssm)(RESIDUE_ORDER[c], i) == BLAST_SCORE_MIN) {
266                 out << "-I ";
267             } else {
268                 out << setw(width) << (*pssm)(RESIDUE_ORDER[c], i);
269             }
270         }
271         out << " ";
272 
273         if (pssm_calculation_done) {
274             // Print the weighted observed
275             for (SIZE_TYPE c = 0; c < DIM(RESIDUE_ORDER); c++) {
276                 if ((*pssm)(RESIDUE_ORDER[c], i) != BLAST_SCORE_MIN) {
277                     double value = 100;
278                     value *= (*weighted_res_freqs)(RESIDUE_ORDER[c], i);
279                     // round to the nearest integer
280                     value = (int)(value + (value > 0. ? 0.5 : -0.5));
281                     out << setw(4) << (int)value;
282                 }
283             }
284 
285             // print the information content
286             out << "  " << setprecision(2) << info_content[i] << " ";
287 
288             // print the relative weight of gapless real matches to pseudocounts
289             if ((num_matching_seqs[i] > 1) && (query_seq[i] != kXResidue)) {
290                 out << setprecision(2) << gapless_col_weights[i];
291             } else {
292                 out << "    0.00";
293             }
294         }
295     }
296 
297     const Blast_KarlinBlk* ungapped_kbp =
298         ancillary_data->GetUngappedKarlinBlk();
299     const Blast_KarlinBlk* gapped_kbp =
300         ancillary_data->GetGappedKarlinBlk();
301     const Blast_KarlinBlk* psi_ungapped_kbp =
302         ancillary_data->GetPsiUngappedKarlinBlk();
303     const Blast_KarlinBlk* psi_gapped_kbp =
304         ancillary_data->GetPsiGappedKarlinBlk();
305     out << "\n\n" << setprecision(4);
306     out << "                      K         Lambda\n";
307     if (ungapped_kbp) {
308         out << "Standard Ungapped    "
309             << ungapped_kbp->K << "     "
310             << ungapped_kbp->Lambda << "\n";
311     }
312     if (gapped_kbp) {
313         out << "Standard Gapped      "
314             << gapped_kbp->K << "     "
315             << gapped_kbp->Lambda << "\n";
316     }
317     if (psi_ungapped_kbp) {
318         out << "PSI Ungapped         "
319             << psi_ungapped_kbp->K << "     "
320             << psi_ungapped_kbp->Lambda << "\n";
321     }
322     if (psi_gapped_kbp) {
323         out << "PSI Gapped           "
324             << psi_gapped_kbp->K << "     "
325             << psi_gapped_kbp->Lambda << "\n";
326     }
327 }
328 
329 
330 CRef<objects::CSeq_annot>
CreateSeqAnnotFromSeqAlignSet(const objects::CSeq_align_set & alnset,blast::EProgram program,const string & db_name,const string & db_title,bool vdb_search)331 CBlastFormatUtil::CreateSeqAnnotFromSeqAlignSet(const objects::CSeq_align_set & alnset,
332 												blast::EProgram program,
333 												const string & db_name,
334                                                 const string & db_title,
335 												bool vdb_search)
336 {
337     CRef<CSeq_annot> retval(new CSeq_annot);
338 
339     //Fill in Hist Seqalign
340     CRef<CUser_object> hist_align_obj(new CUser_object);
341     static const string kHistSeqalign("Hist Seqalign");
342     hist_align_obj->SetType().SetStr(kHistSeqalign);
343     hist_align_obj->AddField(kHistSeqalign, true);
344     retval->AddUserObject(*hist_align_obj);
345 
346     //Fill in Blast Type
347     CRef<CUser_object> blast_type(new CUser_object);
348     static const string kBlastType("Blast Type");
349     blast_type->SetType().SetStr(kBlastType);
350     blast_type->AddField(blast::EProgramToTaskName(program), program);
351     retval->AddUserObject(*blast_type);
352 
353     //Fill in DB Title
354     CRef<CUser_object> blast_db_info(new CUser_object);
355     if(vdb_search)
356     {
357     	static const string kVDBNames("Database Names");
358     	blast_db_info->SetType().SetStr(kVDBNames);
359    		blast_db_info->AddField( db_name, true );
360 
361     }
362     else
363     {
364     	static const string kBlastDBTitle("Blast Database Title");
365     	blast_db_info->SetType().SetStr(kBlastDBTitle);
366     	if(0 == db_name.size() || 0 == NStr::CompareNocase(db_name, "n/a"))
367     	{
368     		blast_db_info->AddField( "n/a", false );
369     	}
370     	else if(0 == NStr::CompareNocase(db_name, "SRA"))
371     	{
372     		blast_db_info->AddField( db_name, true );
373     	}
374     	else
375     	{
376     		bool is_nucl = Blast_SubjectIsNucleotide(EProgramToEBlastProgramType(program));
377     		blast_db_info->AddField( db_title, is_nucl );
378     	}
379     }
380 
381    	retval->AddUserObject(*blast_db_info);
382 
383    	//Fill in data -- Seq align
384 	retval->SetData().SetAlign();
385     ITERATE(CSeq_align_set::Tdata, itr, alnset.Get()) {
386         retval->SetData().SetAlign().push_back(*itr);
387     }
388 
389     return retval;
390 }
391 
CBlastFormattingMatrix(int ** data,unsigned int nrows,unsigned int ncols)392 CBlastFormattingMatrix::CBlastFormattingMatrix(int** data, unsigned int nrows,
393                                                unsigned int ncols)
394 {
395     const int kAsciiSize = 256;
396     Resize(kAsciiSize, kAsciiSize, INT_MIN);
397 
398     // Create a CSeq_data object from a vector of values from 0 to the size of
399     // the matrix (26).
400     const int kNumValues = max(ncols, nrows);
401     vector<char> ncbistdaa_values(kNumValues);
402     for (int index = 0; index < kNumValues; ++index)
403         ncbistdaa_values[index] = (char) index;
404 
405     CSeq_data ncbistdaa_seq(ncbistdaa_values, CSeq_data::e_Ncbistdaa);
406 
407     // Convert to IUPACaa using the CSeqportUtil::Convert method.
408     CSeq_data iupacaa_seq;
409     CSeqportUtil::Convert(ncbistdaa_seq, &iupacaa_seq, CSeq_data::e_Iupacaa);
410 
411     // Extract the IUPACaa values
412     vector<char> iupacaa_values(kNumValues);
413     for (int index = 0; index < kNumValues; ++index)
414         iupacaa_values[index] = iupacaa_seq.GetIupacaa().Get()[index];
415 
416     // Fill the 256x256 output matrix.
417     for (unsigned int row = 0; row < nrows; ++row) {
418         for (unsigned int col = 0; col < ncols; ++col) {
419             if (iupacaa_values[row] >= 0 && iupacaa_values[col] >= 0) {
420                 (*this)((int)iupacaa_values[row], (int)iupacaa_values[col]) =
421                     data[row][col];
422             }
423         }
424     }
425 }
426 
427 
428 /// Auxiliary structure used for sorting CRange<int> objects in increasing
429 /// order of starting positions.
430 struct SRangeStartSort {
operator ()SRangeStartSort431     bool operator()(CRange<int> const& range1, CRange<int> const& range2)
432     {
433         return (range1.GetFrom() < range2.GetFrom());
434     }
435 };
436 
437 /// Masks a query sequence string corresponding to an alignment, given a list
438 /// of mask locations.
439 /// @param alnvec One alignment [in]
440 /// @param query_seq Query string corresponding to this alignment [in] [out]
441 /// @param mask_info List of masking locations [in]
442 /// @param mask_char How should sequence be masked? [in]
443 /// @param query_frame If query is translated, what query frame is this
444 ///                    alignment for?
445 static void
s_MaskQuerySeq(CAlnVec & alnvec,string & query_seq,const ncbi::TMaskedQueryRegions & mask_info,align_format::CDisplaySeqalign::SeqLocCharOption mask_char,int query_frame)446 s_MaskQuerySeq(CAlnVec& alnvec, string& query_seq,
447                const ncbi::TMaskedQueryRegions& mask_info,
448                align_format::CDisplaySeqalign::SeqLocCharOption mask_char,
449                int query_frame)
450 {
451     const int kNumSegs = alnvec.GetNumSegs();
452     vector<CRange<int> > segs_v;
453     for (int index = 0; index < kNumSegs; ++index) {
454         CRange<int> range(alnvec.GetAlnStart(index),
455                           alnvec.GetAlnStop(index));
456         segs_v.push_back(range);
457     }
458 
459     vector<CRange<int> > masks_v;
460     int aln_stop = query_seq.size() - 1;
461     ITERATE(ncbi::TMaskedQueryRegions, mask_iter, mask_info) {
462         if ((*mask_iter)->GetFrame() != query_frame)
463             continue;
464         int start =
465             alnvec.GetAlnPosFromSeqPos(0,
466                                        (*mask_iter)->GetInterval().GetFrom());
467         int stop =
468             alnvec.GetAlnPosFromSeqPos(0,
469                                        (*mask_iter)->GetInterval().GetTo());
470         // For negative frames, start and stop must be swapped.
471         if (query_frame < 0) {
472             int tmp = start;
473             start = stop;
474             stop = tmp;
475         }
476         if (start >= 0) {
477             if (stop < 0)
478                 stop = aln_stop;
479             CRange<int>  range(start, stop);
480             masks_v.push_back(range);
481         }
482     }
483 
484     sort(masks_v.begin(), masks_v.end(), SRangeStartSort());
485 
486     // Mask the sequence
487     int mask_index = 0;
488     for (int seg_index = 0;
489          seg_index < (int) segs_v.size() && mask_index < (int) masks_v.size();
490          ++seg_index) {
491         if (segs_v[seg_index].Empty())
492             continue;
493         int seg_start = segs_v[seg_index].GetFrom();
494         int seg_stop = segs_v[seg_index].GetTo();
495         int mask_pos;
496         while (mask_index < (int) masks_v.size() &&
497                (mask_pos = max(seg_start, masks_v[mask_index].GetFrom()))
498                <= seg_stop) {
499             int mask_stop = min(seg_stop, masks_v[mask_index].GetTo());
500             // Mask the respective part of the sequence
501             for ( ; mask_pos <= mask_stop; ++mask_pos) {
502 		if(  query_seq[mask_pos] == '-' ) continue; // preserve gap
503                 if (mask_char == CDisplaySeqalign::eX) {
504                     query_seq[mask_pos] = 'X';
505                 } else if (mask_char == CDisplaySeqalign::eN){
506                     query_seq[mask_pos]='N';
507                 } else if (mask_char == CDisplaySeqalign::eLowerCase) {
508                     query_seq[mask_pos] =
509                         tolower((unsigned char)query_seq[mask_pos]);
510                 }
511             }
512             // Advance to the next mask if this mask is done with. Otherwise
513             // break out of the loop.
514             if (mask_pos < seg_stop)
515                 ++mask_index;
516             else
517                 break;
518         }
519     }
520 }
521 
522 static void
s_GetQueryAndSubjectStrings(CAlnVec & aln_vec,string & query,string & subject,int master_gen_code,int slave_gen_code)523 s_GetQueryAndSubjectStrings(CAlnVec & aln_vec,
524 							string & query,
525 							string & subject,
526 							int master_gen_code,
527 							int slave_gen_code)
528 {
529 	//Note: do not switch the set order per calnvec specs.
530 	aln_vec.SetGenCode(slave_gen_code);
531 	aln_vec.SetGenCode(master_gen_code, 0);
532 
533     aln_vec.SetGapChar('-');
534     aln_vec.GetWholeAlnSeqString(0, query);
535     aln_vec.GetWholeAlnSeqString(1, subject);
536 }
537 
538 void
GetWholeAlnSeqStrings(string & query,string & subject,const objects::CDense_seg & ds,objects::CScope & scope,int master_gen_code,int slave_gen_code)539 CBlastFormatUtil::GetWholeAlnSeqStrings(string & query,
540 								 	   string & subject,
541 								 	   const objects::CDense_seg& ds,
542 								 	   objects::CScope& scope,
543 								 	   int master_gen_code,
544 								 	   int slave_gen_code)
545 {
546 	CAlnVec aln_vec(ds, scope);
547 	aln_vec.SetAaCoding(CSeq_data::e_Ncbieaa);
548 	s_GetQueryAndSubjectStrings(aln_vec, query, subject, master_gen_code, slave_gen_code);
549 }
550 
551 void
GetWholeAlnSeqStrings(string & query,string & masked_query,string & subject,const objects::CDense_seg & ds,objects::CScope & scope,int master_gen_code,int slave_gen_code,const ncbi::TMaskedQueryRegions & mask_info,align_format::CDisplaySeqalign::SeqLocCharOption mask_char,int query_frame)552 CBlastFormatUtil::GetWholeAlnSeqStrings(string & query,
553 						   	   	   	   	string & masked_query,
554 						   	   	   	   	string & subject,
555 						   	   	   	   	const objects::CDense_seg & ds,
556 						   	   	   	   	objects::CScope & scope,
557 						   	   	   	   	int master_gen_code,
558 						   	   	   	   	int slave_gen_code,
559 						   	   	   	   	const ncbi::TMaskedQueryRegions& mask_info,
560 						   	   	   	   	align_format::CDisplaySeqalign::SeqLocCharOption mask_char,
561 						   	   	   	   	int query_frame)
562 {
563 	CAlnVec aln_vec(ds, scope);
564 	aln_vec.SetAaCoding(CSeq_data::e_Ncbieaa);
565 
566 	s_GetQueryAndSubjectStrings(aln_vec, query, subject, master_gen_code, slave_gen_code);
567 
568 	masked_query = query;
569 	s_MaskQuerySeq(aln_vec, masked_query, mask_info, mask_char, query_frame);
570 }
571 
InsertSubjectScores(CSeq_align_set & org_align_set,const CBioseq_Handle & query_handle,TSeqRange query_range,ESubjectScores score_type)572 void CBlastFormatUtil::InsertSubjectScores (CSeq_align_set & org_align_set,
573 										    const CBioseq_Handle & query_handle,
574 										    TSeqRange  query_range,
575 										    ESubjectScores score_type)
576 {
577 
578 	if(!org_align_set.IsSet() || org_align_set.Get().empty()) {
579 		_TRACE("Empty seq_align_set");
580 		return;
581 	}
582 	// Seq align set from
583 	int dont_care = 0;
584 	unsigned int check_type = score_type;
585 	if(org_align_set.Get().front()->GetNamedScore("seq_percent_coverage", dont_care)) {
586 		check_type &= (~eQueryCovPerSubj);
587 	}
588 	if (org_align_set.Get().front()->GetNamedScore("uniq_seq_percent_coverage", dont_care)) {
589 		check_type &= (~eQueryCovPerUniqSubj);
590 	}
591 	if(check_type == eNoQuerySubjCov){
592 		return;
593 	}
594 
595     CConstRef<CBioseq> query_bioseq = query_handle.GetCompleteBioseq();
596     int query_len = 0;
597     if(query_range.NotEmpty())
598     {
599     	query_len = query_range.GetLength();
600     }
601     else if (!query_bioseq.Empty() && query_bioseq->IsSetLength())
602     {
603     	query_len = query_bioseq->GetLength();
604     }
605 
606     if(query_len <= 0)
607     {
608     	_TRACE("Invalid Query Length");
609     	return;
610     }
611 
612     CSeq_align_set tmp_align_set;
613     list<CRef<CSeq_align> > & tmp_align_list = tmp_align_set.Set();
614     list<CRef<CSeq_align> > &  org_align_list = org_align_set.Set();
615 
616     list<CRef<CSeq_align> >::iterator left_it = org_align_list.begin();
617     list<CRef<CSeq_align> >::iterator right_it = org_align_list.begin();
618 
619     while(left_it != org_align_list.end())
620     {
621     	const CSeq_id & cur_id = (*left_it)->GetSeq_id(1);
622     	++ right_it;
623 
624     	for (; right_it != org_align_list.end(); ++right_it)
625     	{
626           	const CSeq_id  & id = (*right_it)->GetSeq_id(1);
627           	if (!id.Match(cur_id))
628           		break;
629         }
630 
631        	tmp_align_list.assign(left_it, right_it);
632        	if((check_type & eQueryCovPerSubj)) {
633        		int master_coverage = align_format::CAlignFormatUtil::GetMasterCoverage(tmp_align_set);
634 
635        		if (master_coverage)
636         	{
637            		double subj_coverage = 100.0 * (double) master_coverage/ (double) query_len;
638            		//cerr << "Query Length: " << query_len << endl;
639            		//cerr << "Query coverage Length: " << master_coverage << endl;
640            		if(subj_coverage < 99)
641            			subj_coverage +=0.5;
642 
643            		(*left_it)->SetNamedScore ("seq_percent_coverage", (int) subj_coverage);
644         	}
645        	}
646        	if((check_type & eQueryCovPerUniqSubj)) {
647        		int uniq_coverage = align_format::CAlignFormatUtil::GetUniqSeqCoverage(tmp_align_set);
648        		if (uniq_coverage)
649        	    {
650        	    	double uniq_subj_coverage = 100.0 * (double) uniq_coverage/ (double) query_len;
651            		//cerr << "Query Uniq coverage Length: " << uniq_coverage << endl;
652            		//cerr << uniq_coverage << endl;
653        	    	if(uniq_subj_coverage < 99)
654        	        	uniq_subj_coverage +=0.5;
655 
656        	         (*left_it)->SetNamedScore ("uniq_seq_percent_coverage", (int) uniq_subj_coverage);
657        	    }
658        	}
659         left_it = right_it;
660     }
661 }
662 
SBlastXMLIncremental()663 SBlastXMLIncremental::SBlastXMLIncremental()
664 : m_IterationNum(0)
665 {
666       m_SerialXmlEnd = "";
667 }
668 END_NCBI_SCOPE
669