1 /*  $Id: alnvecprint.cpp 339805 2011-10-03 17:28:28Z grichenk $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author:  Kamen Todorov, NCBI
27 *
28 * File Description:
29 *   CAlnVec printer.
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 #include <objtools/alnmgr/alnvec.hpp>
36 
37 USING_SCOPE(ncbi);
38 USING_SCOPE(objects);
39 
40 
CAlnVecPrinter(const CAlnVec & aln_vec,CNcbiOstream & out)41 CAlnVecPrinter::CAlnVecPrinter(const CAlnVec& aln_vec,
42                                CNcbiOstream&  out)
43     : CAlnMapPrinter(aln_vec, out),
44       m_AlnVec(aln_vec)
45 {
46 }
47 
48 
49 void
x_SetChars()50 CAlnVecPrinter::x_SetChars()
51 {
52     CAlnVec& aln_vec = const_cast<CAlnVec&>(m_AlnVec);
53 
54     m_OrigSetGapChar = aln_vec.IsSetGapChar();
55     if (m_OrigSetGapChar) {
56         m_OrigGapChar = aln_vec.GetGapChar(0);
57     }
58     aln_vec.SetGapChar('-');
59 
60     m_OrigSetEndChar = aln_vec.IsSetEndChar();
61     if (m_OrigSetEndChar) {
62         m_OrigEndChar = aln_vec.GetEndChar();
63     }
64     aln_vec.SetEndChar('-');
65 }
66 
67 
68 void
x_UnsetChars()69 CAlnVecPrinter::x_UnsetChars()
70 {
71     CAlnVec& aln_vec = const_cast<CAlnVec&>(m_AlnVec);
72 
73     if (m_OrigSetGapChar) {
74         aln_vec.SetGapChar(m_OrigGapChar);
75     } else {
76         aln_vec.UnsetGapChar();
77     }
78 
79     if (m_OrigSetEndChar) {
80         aln_vec.SetEndChar(m_OrigEndChar);
81     } else {
82         aln_vec.UnsetEndChar();
83     }
84 }
85 
86 
PopsetStyle(int scrn_width,EAlgorithm algorithm)87 void CAlnVecPrinter::PopsetStyle(int scrn_width,
88                                  EAlgorithm algorithm)
89 {
90     x_SetChars();
91 
92     switch(algorithm) {
93     case eUseSeqString:
94         {
95             TSeqPos aln_len = m_AlnVec.GetAlnStop() + 1;
96             const CAlnMap::TNumrow nrows = m_NumRows;
97             const CAlnMap::TNumseg nsegs = m_AlnVec.GetNumSegs();
98             const CDense_seg::TStarts& starts = m_AlnVec.GetDenseg().GetStarts();
99             const CDense_seg::TLens& lens = m_AlnVec.GetDenseg().GetLens();
100 
101             vector<string> buffer(nrows);
102             for (CAlnMap::TNumrow row = 0; row < nrows; row++) {
103 
104                 // allocate space for the row
105                 buffer[row].reserve(aln_len + 1);
106                 string buff;
107 
108                 int seg, pos, left_seg = -1, right_seg = -1;
109                 TSignedSeqPos start;
110                 TSeqPos len;
111 
112                 // determine the ending right seg
113                 for (seg = nsegs - 1, pos = seg * nrows + row;
114                      seg >= 0; --seg, pos -= nrows) {
115                     if (starts[pos] >= 0) {
116                         right_seg = seg;
117                         break;
118                     }
119                 }
120 
121                 for (seg = 0, pos = row;  seg < nsegs; ++seg, pos += nrows) {
122                     len = lens[seg];
123                     if ((start = starts[pos]) >= 0) {
124 
125                         left_seg = seg; // ending left seg is at most here
126 
127                         m_AlnVec.GetSeqString(buff,
128                                               row,
129                                               start,
130                                               start + len * m_AlnVec.GetWidth(row) - 1);
131                         buffer[row] += buff;
132                     } else {
133                         // add appropriate number of gap/end chars
134                         char* ch_buff = new char[len+1];
135                         char fill_ch;
136                         if (left_seg < 0  ||  seg > right_seg  &&  right_seg > 0) {
137                             fill_ch = m_AlnVec.GetEndChar();
138                         } else {
139                             fill_ch = m_AlnVec.GetGapChar(row);
140                         }
141                         memset(ch_buff, fill_ch, len);
142                         ch_buff[len] = 0;
143                         buffer[row] += ch_buff;
144                         delete[] ch_buff;
145                     }
146                 }
147             }
148 
149             TSeqPos pos = 0;
150             do {
151                 for (CAlnMap::TNumrow row = 0; row < nrows; row++) {
152                     PrintNumRow(row);
153                     PrintId(row);
154                     PrintSeqPos(m_AlnVec.GetSeqPosFromAlnPos(row, pos, CAlnMap::eLeft));
155                     *m_Out << buffer[row].substr(pos, scrn_width)
156                            << "  "
157                            << m_AlnVec.GetSeqPosFromAlnPos(row, pos + scrn_width - 1,
158                                                            CAlnMap::eLeft)
159                            << endl;
160                 }
161                 *m_Out << endl;
162                 pos += scrn_width;
163                 if (pos + scrn_width > aln_len) {
164                     scrn_width = aln_len - pos;
165                 }
166             } while (pos < aln_len);
167             break;
168         }
169     case eUseAlnSeqString:
170         {
171             TSeqPos aln_pos = 0;
172             CAlnMap::TSignedRange rng;
173 
174             do {
175                 // create range
176                 rng.Set(aln_pos, aln_pos + scrn_width - 1);
177 
178                 string aln_seq_str;
179                 aln_seq_str.reserve(scrn_width + 1);
180                 // for each sequence
181                 for (CAlnMap::TNumrow row = 0; row < m_NumRows; row++) {
182                     PrintNumRow(row);
183                     PrintId(row);
184                     PrintSeqPos(m_AlnVec.GetSeqPosFromAlnPos(row, rng.GetFrom(),
185                                                              CAlnMap::eLeft));
186                     *m_Out << m_AlnVec.GetAlnSeqString(aln_seq_str, row, rng)
187                            << " "
188                            << m_AlnVec.GetSeqPosFromAlnPos(row, rng.GetTo(),
189                                                            CAlnMap::eLeft)
190                            << endl;
191                 }
192                 *m_Out << endl;
193                 aln_pos += scrn_width;
194             } while (aln_pos < m_AlnVec.GetAlnStop());
195             break;
196         }
197     case eUseWholeAlnSeqString:
198         {
199             CAlnMap::TNumrow row, nrows = m_NumRows;
200 
201             vector<string> buffer(nrows);
202             vector<CAlnMap::TSeqPosList> insert_aln_starts(nrows);
203             vector<CAlnMap::TSeqPosList> insert_starts(nrows);
204             vector<CAlnMap::TSeqPosList> insert_lens(nrows);
205             vector<CAlnMap::TSeqPosList> scrn_lefts(nrows);
206             vector<CAlnMap::TSeqPosList> scrn_rights(nrows);
207 
208             // Fill in the vectors for each row
209             for (row = 0; row < nrows; row++) {
210                 m_AlnVec.GetWholeAlnSeqString
211                     (row,
212                      buffer[row],
213                      &insert_aln_starts[row],
214                      &insert_starts[row],
215                      &insert_lens[row],
216                      scrn_width,
217                      &scrn_lefts[row],
218                      &scrn_rights[row]);
219             }
220 
221             // Visualization
222             TSeqPos pos = 0, aln_len = m_AlnVec.GetAlnStop() + 1;
223             do {
224                 for (row = 0; row < nrows; row++) {
225                     PrintNumRow(row);
226                     PrintId(row);
227                     PrintSeqPos(scrn_lefts[row].front());
228                     *m_Out << buffer[row].substr(pos, scrn_width)
229                            << " "
230                            << scrn_rights[row].front()
231                            << endl;
232                     scrn_lefts[row].pop_front();
233                     scrn_rights[row].pop_front();
234                 }
235                 *m_Out << endl;
236                 pos += scrn_width;
237                 if (pos + scrn_width > aln_len) {
238                     scrn_width = aln_len - pos;
239                 }
240             } while (pos < aln_len);
241 
242             break;
243         }
244     }
245     x_UnsetChars();
246 }
247 
248 
ClustalStyle(int scrn_width,EAlgorithm algorithm)249 void CAlnVecPrinter::ClustalStyle(int scrn_width,
250                                   EAlgorithm algorithm)
251 {
252     x_SetChars();
253 
254     *m_Out << "CLUSTAL W (1.83) multiple sequence alignment" << endl << endl;
255 
256     switch(algorithm) {
257     case eUseSeqString:
258         {
259             TSeqPos aln_len = m_AlnVec.GetAlnStop() + 1;
260             const CAlnMap::TNumseg nsegs = m_AlnVec.GetNumSegs();
261             const CDense_seg::TStarts& starts = m_AlnVec.GetDenseg().GetStarts();
262             const CDense_seg::TLens& lens = m_AlnVec.GetDenseg().GetLens();
263             CAlnMap::TNumrow row;
264 
265             vector<string> buffer(m_NumRows+1);
266             for (row = 0; row < m_NumRows; row++) {
267 
268                 // allocate space for the row
269                 buffer[row].reserve(aln_len + 1);
270                 string buff;
271 
272                 int seg, pos, left_seg = -1, right_seg = -1;
273                 TSignedSeqPos start;
274                 TSeqPos len;
275 
276                 // determine the ending right seg
277                 for (seg = nsegs - 1, pos = seg * m_NumRows + row;
278                      seg >= 0; --seg, pos -= m_NumRows) {
279                     if (starts[pos] >= 0) {
280                         right_seg = seg;
281                         break;
282                     }
283                 }
284 
285                 for (seg = 0, pos = row;  seg < nsegs; ++seg, pos += m_NumRows) {
286                     len = lens[seg];
287                     if ((start = starts[pos]) >= 0) {
288 
289                         left_seg = seg; // ending left seg is at most here
290 
291                         m_AlnVec.GetSeqString(buff,
292                                               row,
293                                               start,
294                                               start + len * m_AlnVec.GetWidth(row) - 1);
295                         buffer[row] += buff;
296                     } else {
297                         // add appropriate number of gap/end chars
298                         char* ch_buff = new char[len+1];
299                         char fill_ch;
300                         if (left_seg < 0  ||  seg > right_seg  &&  right_seg > 0) {
301                             fill_ch = m_AlnVec.GetEndChar();
302                         } else {
303                             fill_ch = m_AlnVec.GetGapChar(row);
304                         }
305                         memset(ch_buff, fill_ch, len);
306                         ch_buff[len] = 0;
307                         buffer[row] += ch_buff;
308                         delete[] ch_buff;
309                     }
310                 }
311             }
312             // Find identities
313             buffer[m_NumRows].resize(aln_len);
314             for (TSeqPos pos = 0; pos < aln_len; pos++) {
315                 bool identity = true;
316                 char residue = buffer[0][pos];
317                 for (row = 1; row < m_NumRows; row++) {
318                     if (buffer[row][pos] != residue) {
319                         identity = false;
320                         break;
321                     }
322                 }
323                 buffer[m_NumRows][pos] = (identity ? '*' : ' ');
324             }
325 
326 
327             TSeqPos aln_pos = 0;
328             do {
329                 for (CAlnMap::TNumrow row = 0; row < m_NumRows; row++) {
330                     PrintId(row);
331                     *m_Out << buffer[row].substr(aln_pos, scrn_width)
332                            << endl;
333                 }
334                 m_Out->width(m_IdFieldLen);
335                 *m_Out << "";
336                 *m_Out << buffer[m_NumRows].substr(aln_pos, scrn_width)
337                        << endl << endl;
338 
339                 aln_pos += scrn_width;
340                 if (aln_pos + scrn_width > aln_len) {
341                     scrn_width = aln_len - aln_pos;
342                 }
343             } while (aln_pos < aln_len);
344             break;
345         }
346     case eUseAlnSeqString:
347         {
348             TSeqPos aln_pos = 0;
349             TSeqPos aln_stop = m_AlnVec.GetAlnStop();
350             CAlnMap::TSignedRange rng;
351 
352             string identities_str;
353             identities_str.reserve(scrn_width + 1);
354 
355             do {
356                 // create range
357                 rng.Set(aln_pos, min(aln_pos + scrn_width - 1, aln_stop));
358 
359                 string aln_seq_str;
360                 aln_seq_str.reserve(scrn_width + 1);
361 
362                 // for each sequence
363                 for (CAlnMap::TNumrow row = 0; row < m_NumRows; row++) {
364                     PrintId(row);
365                     *m_Out << m_AlnVec.GetAlnSeqString(aln_seq_str, row, rng)
366                            << endl;
367 
368                     if (row == 0) {
369                         identities_str = aln_seq_str;
370                     } else {
371                         for (size_t i = 0; i < aln_seq_str.length(); i++) {
372                             if (aln_seq_str[i] != identities_str[i]) {
373                                 identities_str[i] = ' ';
374                             }
375                         }
376                     }
377                 }
378                 for (size_t i = 0; i < identities_str.length(); i++) {
379                     if (identities_str[i] != ' ') {
380                         identities_str[i] = '*';
381                     }
382                 }
383                 m_Out->width(m_IdFieldLen);
384                 *m_Out << "";
385                 *m_Out << identities_str
386                        << endl << endl;
387                 aln_pos += scrn_width;
388             } while (aln_pos < m_AlnVec.GetAlnStop());
389             break;
390         }
391     case eUseWholeAlnSeqString:
392         {
393             CAlnMap::TNumrow row;
394 
395             vector<string> buffer(m_NumRows+1);
396 
397             // Fill in the vectors for each row
398             for (row = 0; row < m_NumRows; row++) {
399                 m_AlnVec.GetWholeAlnSeqString(row, buffer[row]);
400             }
401 
402             TSeqPos pos = 0;
403             const TSeqPos aln_len = m_AlnVec.GetAlnStop() + 1;
404 
405             // Find identities
406             buffer[m_NumRows].resize(aln_len);
407             for (pos = 0; pos < aln_len; pos++) {
408                 bool identity = true;
409                 char residue = buffer[0][pos];
410                 for (row = 1; row < m_NumRows; row++) {
411                     if (buffer[row][pos] != residue) {
412                         identity = false;
413                         break;
414                     }
415                 }
416                 buffer[m_NumRows][pos] = (identity ? '*' : ' ');
417             }
418 
419 
420             // Visualization
421             pos = 0;
422             do {
423                 for (row = 0; row < m_NumRows; row++) {
424                     PrintId(row);
425                     *m_Out << buffer[row].substr(pos, scrn_width)
426                         << endl;
427                 }
428                 m_Out->width(m_IdFieldLen);
429                 *m_Out << "";
430                 *m_Out << buffer[m_NumRows].substr(pos, scrn_width)
431                        << endl << endl;
432 
433                 pos += scrn_width;
434                 if (pos + scrn_width > aln_len) {
435                     scrn_width = aln_len - pos;
436                 }
437             } while (pos < aln_len);
438 
439             break;
440         }
441     }
442     x_UnsetChars();
443 }
444