1 /* $Id: alnvecprint.cpp 339805 2011-10-03 17:28:28Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Kamen Todorov, NCBI
27 *
28 * File Description:
29 * CAlnVec printer.
30 *
31 * ===========================================================================
32 */
33
34 #include <ncbi_pch.hpp>
35 #include <objtools/alnmgr/alnvec.hpp>
36
37 USING_SCOPE(ncbi);
38 USING_SCOPE(objects);
39
40
CAlnVecPrinter(const CAlnVec & aln_vec,CNcbiOstream & out)41 CAlnVecPrinter::CAlnVecPrinter(const CAlnVec& aln_vec,
42 CNcbiOstream& out)
43 : CAlnMapPrinter(aln_vec, out),
44 m_AlnVec(aln_vec)
45 {
46 }
47
48
49 void
x_SetChars()50 CAlnVecPrinter::x_SetChars()
51 {
52 CAlnVec& aln_vec = const_cast<CAlnVec&>(m_AlnVec);
53
54 m_OrigSetGapChar = aln_vec.IsSetGapChar();
55 if (m_OrigSetGapChar) {
56 m_OrigGapChar = aln_vec.GetGapChar(0);
57 }
58 aln_vec.SetGapChar('-');
59
60 m_OrigSetEndChar = aln_vec.IsSetEndChar();
61 if (m_OrigSetEndChar) {
62 m_OrigEndChar = aln_vec.GetEndChar();
63 }
64 aln_vec.SetEndChar('-');
65 }
66
67
68 void
x_UnsetChars()69 CAlnVecPrinter::x_UnsetChars()
70 {
71 CAlnVec& aln_vec = const_cast<CAlnVec&>(m_AlnVec);
72
73 if (m_OrigSetGapChar) {
74 aln_vec.SetGapChar(m_OrigGapChar);
75 } else {
76 aln_vec.UnsetGapChar();
77 }
78
79 if (m_OrigSetEndChar) {
80 aln_vec.SetEndChar(m_OrigEndChar);
81 } else {
82 aln_vec.UnsetEndChar();
83 }
84 }
85
86
PopsetStyle(int scrn_width,EAlgorithm algorithm)87 void CAlnVecPrinter::PopsetStyle(int scrn_width,
88 EAlgorithm algorithm)
89 {
90 x_SetChars();
91
92 switch(algorithm) {
93 case eUseSeqString:
94 {
95 TSeqPos aln_len = m_AlnVec.GetAlnStop() + 1;
96 const CAlnMap::TNumrow nrows = m_NumRows;
97 const CAlnMap::TNumseg nsegs = m_AlnVec.GetNumSegs();
98 const CDense_seg::TStarts& starts = m_AlnVec.GetDenseg().GetStarts();
99 const CDense_seg::TLens& lens = m_AlnVec.GetDenseg().GetLens();
100
101 vector<string> buffer(nrows);
102 for (CAlnMap::TNumrow row = 0; row < nrows; row++) {
103
104 // allocate space for the row
105 buffer[row].reserve(aln_len + 1);
106 string buff;
107
108 int seg, pos, left_seg = -1, right_seg = -1;
109 TSignedSeqPos start;
110 TSeqPos len;
111
112 // determine the ending right seg
113 for (seg = nsegs - 1, pos = seg * nrows + row;
114 seg >= 0; --seg, pos -= nrows) {
115 if (starts[pos] >= 0) {
116 right_seg = seg;
117 break;
118 }
119 }
120
121 for (seg = 0, pos = row; seg < nsegs; ++seg, pos += nrows) {
122 len = lens[seg];
123 if ((start = starts[pos]) >= 0) {
124
125 left_seg = seg; // ending left seg is at most here
126
127 m_AlnVec.GetSeqString(buff,
128 row,
129 start,
130 start + len * m_AlnVec.GetWidth(row) - 1);
131 buffer[row] += buff;
132 } else {
133 // add appropriate number of gap/end chars
134 char* ch_buff = new char[len+1];
135 char fill_ch;
136 if (left_seg < 0 || seg > right_seg && right_seg > 0) {
137 fill_ch = m_AlnVec.GetEndChar();
138 } else {
139 fill_ch = m_AlnVec.GetGapChar(row);
140 }
141 memset(ch_buff, fill_ch, len);
142 ch_buff[len] = 0;
143 buffer[row] += ch_buff;
144 delete[] ch_buff;
145 }
146 }
147 }
148
149 TSeqPos pos = 0;
150 do {
151 for (CAlnMap::TNumrow row = 0; row < nrows; row++) {
152 PrintNumRow(row);
153 PrintId(row);
154 PrintSeqPos(m_AlnVec.GetSeqPosFromAlnPos(row, pos, CAlnMap::eLeft));
155 *m_Out << buffer[row].substr(pos, scrn_width)
156 << " "
157 << m_AlnVec.GetSeqPosFromAlnPos(row, pos + scrn_width - 1,
158 CAlnMap::eLeft)
159 << endl;
160 }
161 *m_Out << endl;
162 pos += scrn_width;
163 if (pos + scrn_width > aln_len) {
164 scrn_width = aln_len - pos;
165 }
166 } while (pos < aln_len);
167 break;
168 }
169 case eUseAlnSeqString:
170 {
171 TSeqPos aln_pos = 0;
172 CAlnMap::TSignedRange rng;
173
174 do {
175 // create range
176 rng.Set(aln_pos, aln_pos + scrn_width - 1);
177
178 string aln_seq_str;
179 aln_seq_str.reserve(scrn_width + 1);
180 // for each sequence
181 for (CAlnMap::TNumrow row = 0; row < m_NumRows; row++) {
182 PrintNumRow(row);
183 PrintId(row);
184 PrintSeqPos(m_AlnVec.GetSeqPosFromAlnPos(row, rng.GetFrom(),
185 CAlnMap::eLeft));
186 *m_Out << m_AlnVec.GetAlnSeqString(aln_seq_str, row, rng)
187 << " "
188 << m_AlnVec.GetSeqPosFromAlnPos(row, rng.GetTo(),
189 CAlnMap::eLeft)
190 << endl;
191 }
192 *m_Out << endl;
193 aln_pos += scrn_width;
194 } while (aln_pos < m_AlnVec.GetAlnStop());
195 break;
196 }
197 case eUseWholeAlnSeqString:
198 {
199 CAlnMap::TNumrow row, nrows = m_NumRows;
200
201 vector<string> buffer(nrows);
202 vector<CAlnMap::TSeqPosList> insert_aln_starts(nrows);
203 vector<CAlnMap::TSeqPosList> insert_starts(nrows);
204 vector<CAlnMap::TSeqPosList> insert_lens(nrows);
205 vector<CAlnMap::TSeqPosList> scrn_lefts(nrows);
206 vector<CAlnMap::TSeqPosList> scrn_rights(nrows);
207
208 // Fill in the vectors for each row
209 for (row = 0; row < nrows; row++) {
210 m_AlnVec.GetWholeAlnSeqString
211 (row,
212 buffer[row],
213 &insert_aln_starts[row],
214 &insert_starts[row],
215 &insert_lens[row],
216 scrn_width,
217 &scrn_lefts[row],
218 &scrn_rights[row]);
219 }
220
221 // Visualization
222 TSeqPos pos = 0, aln_len = m_AlnVec.GetAlnStop() + 1;
223 do {
224 for (row = 0; row < nrows; row++) {
225 PrintNumRow(row);
226 PrintId(row);
227 PrintSeqPos(scrn_lefts[row].front());
228 *m_Out << buffer[row].substr(pos, scrn_width)
229 << " "
230 << scrn_rights[row].front()
231 << endl;
232 scrn_lefts[row].pop_front();
233 scrn_rights[row].pop_front();
234 }
235 *m_Out << endl;
236 pos += scrn_width;
237 if (pos + scrn_width > aln_len) {
238 scrn_width = aln_len - pos;
239 }
240 } while (pos < aln_len);
241
242 break;
243 }
244 }
245 x_UnsetChars();
246 }
247
248
ClustalStyle(int scrn_width,EAlgorithm algorithm)249 void CAlnVecPrinter::ClustalStyle(int scrn_width,
250 EAlgorithm algorithm)
251 {
252 x_SetChars();
253
254 *m_Out << "CLUSTAL W (1.83) multiple sequence alignment" << endl << endl;
255
256 switch(algorithm) {
257 case eUseSeqString:
258 {
259 TSeqPos aln_len = m_AlnVec.GetAlnStop() + 1;
260 const CAlnMap::TNumseg nsegs = m_AlnVec.GetNumSegs();
261 const CDense_seg::TStarts& starts = m_AlnVec.GetDenseg().GetStarts();
262 const CDense_seg::TLens& lens = m_AlnVec.GetDenseg().GetLens();
263 CAlnMap::TNumrow row;
264
265 vector<string> buffer(m_NumRows+1);
266 for (row = 0; row < m_NumRows; row++) {
267
268 // allocate space for the row
269 buffer[row].reserve(aln_len + 1);
270 string buff;
271
272 int seg, pos, left_seg = -1, right_seg = -1;
273 TSignedSeqPos start;
274 TSeqPos len;
275
276 // determine the ending right seg
277 for (seg = nsegs - 1, pos = seg * m_NumRows + row;
278 seg >= 0; --seg, pos -= m_NumRows) {
279 if (starts[pos] >= 0) {
280 right_seg = seg;
281 break;
282 }
283 }
284
285 for (seg = 0, pos = row; seg < nsegs; ++seg, pos += m_NumRows) {
286 len = lens[seg];
287 if ((start = starts[pos]) >= 0) {
288
289 left_seg = seg; // ending left seg is at most here
290
291 m_AlnVec.GetSeqString(buff,
292 row,
293 start,
294 start + len * m_AlnVec.GetWidth(row) - 1);
295 buffer[row] += buff;
296 } else {
297 // add appropriate number of gap/end chars
298 char* ch_buff = new char[len+1];
299 char fill_ch;
300 if (left_seg < 0 || seg > right_seg && right_seg > 0) {
301 fill_ch = m_AlnVec.GetEndChar();
302 } else {
303 fill_ch = m_AlnVec.GetGapChar(row);
304 }
305 memset(ch_buff, fill_ch, len);
306 ch_buff[len] = 0;
307 buffer[row] += ch_buff;
308 delete[] ch_buff;
309 }
310 }
311 }
312 // Find identities
313 buffer[m_NumRows].resize(aln_len);
314 for (TSeqPos pos = 0; pos < aln_len; pos++) {
315 bool identity = true;
316 char residue = buffer[0][pos];
317 for (row = 1; row < m_NumRows; row++) {
318 if (buffer[row][pos] != residue) {
319 identity = false;
320 break;
321 }
322 }
323 buffer[m_NumRows][pos] = (identity ? '*' : ' ');
324 }
325
326
327 TSeqPos aln_pos = 0;
328 do {
329 for (CAlnMap::TNumrow row = 0; row < m_NumRows; row++) {
330 PrintId(row);
331 *m_Out << buffer[row].substr(aln_pos, scrn_width)
332 << endl;
333 }
334 m_Out->width(m_IdFieldLen);
335 *m_Out << "";
336 *m_Out << buffer[m_NumRows].substr(aln_pos, scrn_width)
337 << endl << endl;
338
339 aln_pos += scrn_width;
340 if (aln_pos + scrn_width > aln_len) {
341 scrn_width = aln_len - aln_pos;
342 }
343 } while (aln_pos < aln_len);
344 break;
345 }
346 case eUseAlnSeqString:
347 {
348 TSeqPos aln_pos = 0;
349 TSeqPos aln_stop = m_AlnVec.GetAlnStop();
350 CAlnMap::TSignedRange rng;
351
352 string identities_str;
353 identities_str.reserve(scrn_width + 1);
354
355 do {
356 // create range
357 rng.Set(aln_pos, min(aln_pos + scrn_width - 1, aln_stop));
358
359 string aln_seq_str;
360 aln_seq_str.reserve(scrn_width + 1);
361
362 // for each sequence
363 for (CAlnMap::TNumrow row = 0; row < m_NumRows; row++) {
364 PrintId(row);
365 *m_Out << m_AlnVec.GetAlnSeqString(aln_seq_str, row, rng)
366 << endl;
367
368 if (row == 0) {
369 identities_str = aln_seq_str;
370 } else {
371 for (size_t i = 0; i < aln_seq_str.length(); i++) {
372 if (aln_seq_str[i] != identities_str[i]) {
373 identities_str[i] = ' ';
374 }
375 }
376 }
377 }
378 for (size_t i = 0; i < identities_str.length(); i++) {
379 if (identities_str[i] != ' ') {
380 identities_str[i] = '*';
381 }
382 }
383 m_Out->width(m_IdFieldLen);
384 *m_Out << "";
385 *m_Out << identities_str
386 << endl << endl;
387 aln_pos += scrn_width;
388 } while (aln_pos < m_AlnVec.GetAlnStop());
389 break;
390 }
391 case eUseWholeAlnSeqString:
392 {
393 CAlnMap::TNumrow row;
394
395 vector<string> buffer(m_NumRows+1);
396
397 // Fill in the vectors for each row
398 for (row = 0; row < m_NumRows; row++) {
399 m_AlnVec.GetWholeAlnSeqString(row, buffer[row]);
400 }
401
402 TSeqPos pos = 0;
403 const TSeqPos aln_len = m_AlnVec.GetAlnStop() + 1;
404
405 // Find identities
406 buffer[m_NumRows].resize(aln_len);
407 for (pos = 0; pos < aln_len; pos++) {
408 bool identity = true;
409 char residue = buffer[0][pos];
410 for (row = 1; row < m_NumRows; row++) {
411 if (buffer[row][pos] != residue) {
412 identity = false;
413 break;
414 }
415 }
416 buffer[m_NumRows][pos] = (identity ? '*' : ' ');
417 }
418
419
420 // Visualization
421 pos = 0;
422 do {
423 for (row = 0; row < m_NumRows; row++) {
424 PrintId(row);
425 *m_Out << buffer[row].substr(pos, scrn_width)
426 << endl;
427 }
428 m_Out->width(m_IdFieldLen);
429 *m_Out << "";
430 *m_Out << buffer[m_NumRows].substr(pos, scrn_width)
431 << endl << endl;
432
433 pos += scrn_width;
434 if (pos + scrn_width > aln_len) {
435 scrn_width = aln_len - pos;
436 }
437 } while (pos < aln_len);
438
439 break;
440 }
441 }
442 x_UnsetChars();
443 }
444