1 /* $Id: update_viewer.cpp 620017 2020-11-13 19:16:01Z hurwitz $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Paul Thiessen
27 *
28 * File Description:
29 * implementation of non-GUI part of update viewer
30 *
31 * ===========================================================================
32 */
33
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbistd.hpp>
36 #include <corelib/ncbistl.hpp>
37 #include <objtools/readers/fasta.hpp>
38 #include <util/line_reader.hpp>
39
40 #include <objects/cdd/Cdd.hpp>
41 #include <objects/cdd/Update_align.hpp>
42 #include <objects/cdd/Update_comment.hpp>
43 #include <objects/seq/Seq_annot.hpp>
44 #include <objects/seq/Bioseq.hpp>
45 #include <objects/seqloc/PDB_seq_id.hpp>
46 #include <objects/seqset/Seq_entry.hpp>
47 #include <objects/seqset/Bioseq_set.hpp>
48 #include <objects/ncbimime/Ncbi_mime_asn1.hpp>
49 #include <objects/ncbimime/Biostruc_seq.hpp>
50 #include <objects/mmdb2/Model_type.hpp>
51 #include <objects/mmdb2/Biostruc_model.hpp>
52 #include <objects/mmdb1/Biostruc_graph.hpp>
53 #include <objects/mmdb1/Biostruc_descr.hpp>
54 #include <objects/general/Dbtag.hpp>
55 #include <objects/general/Object_id.hpp>
56 #include <objects/mmdb1/Biostruc_id.hpp>
57 #include <objects/mmdb1/Mmdb_id.hpp>
58 #include <objects/mmdb1/Biostruc_annot_set.hpp>
59 #include <objects/mmdb3/Biostruc_feature_set.hpp>
60 #include <objects/mmdb3/Chem_graph_alignment.hpp>
61 #include <objects/mmdb3/Chem_graph_pntrs.hpp>
62 #include <objects/mmdb3/Residue_pntrs.hpp>
63 #include <objects/mmdb3/Residue_interval_pntr.hpp>
64 #include <objects/mmdb1/Molecule_id.hpp>
65 #include <objects/mmdb1/Residue_id.hpp>
66 #include <objects/mmdb3/Biostruc_feature_set_id.hpp>
67 #include <objects/mmdb3/Biostruc_feature_id.hpp>
68
69 #include <memory>
70 #include <algorithm>
71
72 #include "remove_header_conflicts.hpp"
73
74 #include "update_viewer.hpp"
75 #include "asn_reader.hpp"
76 #include "update_viewer_window.hpp"
77 #include "messenger.hpp"
78 #include "sequence_display.hpp"
79 #include "cn3d_colors.hpp"
80 #include "alignment_manager.hpp"
81 #include "cn3d_threader.hpp"
82 #include "structure_set.hpp"
83 #include "molecule.hpp"
84 #include "cn3d_tools.hpp"
85 #include "cn3d_blast.hpp"
86 #include "molecule_identifier.hpp"
87 #include "cn3d_cache.hpp"
88 #include "cn3d_ba_interface.hpp"
89 #include "cn3d_pssm.hpp"
90
91 #include <wx/tokenzr.h>
92
93 USING_NCBI_SCOPE;
94 USING_SCOPE(objects);
95
96
BEGIN_SCOPE(Cn3D)97 BEGIN_SCOPE(Cn3D)
98
99 UpdateViewer::UpdateViewer(AlignmentManager *alnMgr) :
100 // not sure why this cast is necessary, but MSVC requires it...
101 ViewerBase(reinterpret_cast<ViewerWindowBase**>(&updateWindow), alnMgr),
102 updateWindow(NULL)
103 {
104 // when first created, start with blank display
105 AlignmentList emptyAlignmentList;
106 SequenceDisplay *blankDisplay = new SequenceDisplay(true, viewerWindow);
107 InitData(&emptyAlignmentList, blankDisplay);
108 EnableStacks();
109 }
110
~UpdateViewer(void)111 UpdateViewer::~UpdateViewer(void)
112 {
113 }
114
SetInitialState(void)115 void UpdateViewer::SetInitialState(void)
116 {
117 KeepCurrent();
118 EnableStacks();
119 }
120
SaveDialog(bool prompt)121 void UpdateViewer::SaveDialog(bool prompt)
122 {
123 if (updateWindow) updateWindow->SaveDialog(prompt, false);
124 }
125
CreateUpdateWindow(void)126 void UpdateViewer::CreateUpdateWindow(void)
127 {
128 if (updateWindow) {
129 updateWindow->Show(true);
130 GlobalMessenger()->PostRedrawSequenceViewer(this);
131 } else {
132 SequenceDisplay *display = GetCurrentDisplay();
133 if (display) {
134 updateWindow = new UpdateViewerWindow(this);
135 updateWindow->NewDisplay(display, false);
136 updateWindow->ScrollToColumn(display->GetStartingColumn());
137 updateWindow->Show(true);
138 // ScrollTo causes immediate redraw, so don't need a second one
139 GlobalMessenger()->UnPostRedrawSequenceViewer(this);
140 }
141 }
142 }
143
AddAlignments(const AlignmentList & newAlignments)144 void UpdateViewer::AddAlignments(const AlignmentList& newAlignments)
145 {
146 AlignmentList& alignments = GetCurrentAlignments();
147 SequenceDisplay *display = GetCurrentDisplay();
148
149 // populate successive lines of the display with each alignment, with blank lines inbetween
150 AlignmentList::const_iterator a, ae = newAlignments.end();
151 for (a=newAlignments.begin(); a!=ae; ++a) {
152 if ((*a)->NRows() != 2) {
153 ERRORMSG("UpdateViewer::AddAlignments() - got alignment with " << (*a)->NRows() << " rows");
154 continue;
155 }
156
157 // add alignment to stack list
158 alignments.push_back(*a);
159 (*a)->ShowGeometryViolations(updateWindow ? updateWindow->GeometryViolationsShown() : false);
160
161 // add alignment to the display, including block row since editor is always on
162 if (display->NRows() != 0) display->AddRowFromString("");
163 display->AddBlockBoundaryRow(*a);
164 for (unsigned int row=0; row<2; ++row)
165 display->AddRowFromAlignment(row, *a);
166 }
167
168 if (alignments.size() > 0)
169 display->SetStartingColumn(alignments.front()->GetFirstAlignedBlockPosition() - 5);
170
171 Save(); // make this an undoable operation
172 if (updateWindow) updateWindow->UpdateDisplay(display);
173 }
174
ReplaceAlignments(const AlignmentList & alignmentList)175 void UpdateViewer::ReplaceAlignments(const AlignmentList& alignmentList)
176 {
177 // empty out the current alignment list and display (but not the undo stacks!)
178 DELETE_ALL_AND_CLEAR(GetCurrentAlignments(), AlignmentList);
179 GetCurrentDisplay()->Empty();
180
181 AddAlignments(alignmentList);
182 }
183
DeleteAlignment(BlockMultipleAlignment * toDelete)184 void UpdateViewer::DeleteAlignment(BlockMultipleAlignment *toDelete)
185 {
186 AlignmentList keepAlignments;
187 AlignmentList::const_iterator a, ae = GetCurrentAlignments().end();
188 for (a=GetCurrentAlignments().begin(); a!=ae; ++a)
189 if (*a != toDelete)
190 keepAlignments.push_back((*a)->Clone());
191
192 ReplaceAlignments(keepAlignments);
193 }
194
SaveAlignments(void)195 void UpdateViewer::SaveAlignments(void)
196 {
197 SetInitialState();
198
199 // construct a new list of ASN Update-aligns
200 CCdd::TPending updates;
201 map < CUpdate_align * , bool > usedUpdateAligns;
202
203 AlignmentList::const_iterator a, ae = GetCurrentAlignments().end();
204 for (a=GetCurrentAlignments().begin(); a!=ae; ++a) {
205
206 // create a Seq-align (with Dense-diags) out of this update
207 if ((*a)->NRows() != 2) {
208 ERRORMSG("CreateSeqAlignFromUpdate() - can only save pairwise updates");
209 continue;
210 }
211 BlockMultipleAlignment::UngappedAlignedBlockList blocks;
212 (*a)->GetUngappedAlignedBlocks(&blocks);
213 CSeq_align *newSeqAlign = CreatePairwiseSeqAlignFromMultipleRow(*a, blocks, 1);
214 if (!newSeqAlign) continue;
215
216 // the Update-align and Seq-annot's list of Seq-aligns where this new Seq-align will go
217 CUpdate_align *updateAlign = (*a)->updateOrigin.GetPointer();
218 CSeq_annot::C_Data::TAlign *seqAlignList = NULL;
219
220 // if this alignment came from an existing Update-align, then replace just the Seq-align
221 // so that the rest of the Update-align's comments/annotations are preserved
222 if (updateAlign) {
223 if (!updateAlign->IsSetSeqannot() || !updateAlign->GetSeqannot().GetData().IsAlign()) {
224 ERRORMSG("UpdateViewer::SaveAlignments() - confused by Update-align format");
225 continue;
226 }
227 }
228
229 // if this is a new update, create a new Update-align and tag it
230 else {
231 updateAlign = new CUpdate_align();
232
233 // set type field depending on whether demoted sequence has structure
234 updateAlign->SetType((*a)->GetSequenceOfRow(1)->molecule ?
235 CUpdate_align::eType_demoted_3d : CUpdate_align::eType_demoted);
236
237 // add a text comment
238 CUpdate_comment *comment = new CUpdate_comment();
239 comment->SetComment("Created by demotion or import in Cn3D");
240 updateAlign->SetDescription().resize(updateAlign->GetDescription().size() + 1);
241 updateAlign->SetDescription().back().Reset(comment);
242
243 // create a new Seq-annot
244 CRef < CSeq_annot > seqAnnotRef(new CSeq_annot());
245 seqAnnotRef->SetData().SetAlign();
246 updateAlign->SetSeqannot(*seqAnnotRef);
247 }
248
249 // get Seq-align list
250 if (!updateAlign || !(seqAlignList = &(updateAlign->SetSeqannot().SetData().SetAlign()))) {
251 ERRORMSG("UpdateViewer::SaveAlignments() - can't find Update-align and/or Seq-align list");
252 continue;
253 }
254
255 // if this is the first re-use of this Update-align, then empty out all existing
256 // Seq-aligns and push it onto the new update list
257 if (usedUpdateAligns.find(updateAlign) == usedUpdateAligns.end()) {
258 seqAlignList->clear();
259 updates.resize(updates.size() + 1);
260 updates.back().Reset(updateAlign);
261 usedUpdateAligns[updateAlign] = true;
262 }
263
264 // finally, add the new Seq-align to the list
265 seqAlignList->resize(seqAlignList->size() + 1);
266 seqAlignList->back().Reset(newSeqAlign);
267 }
268
269 // save these changes to the ASN data
270 alignmentManager->ReplaceUpdatesInASN(updates);
271 }
272
GetMasterSequence(void) const273 const Sequence * UpdateViewer::GetMasterSequence(void) const
274 {
275 const Sequence *master = NULL;
276
277 // if there's a multiple alignment in the sequence viewer, use same master as that
278 if (alignmentManager->GetCurrentMultipleAlignment()) {
279 master = alignmentManager->GetCurrentMultipleAlignment()->GetMaster();
280 }
281 // if there's already an update present, use same master as that
282 else if (GetCurrentAlignments().size() > 0) {
283 master = GetCurrentAlignments().front()->GetMaster();
284 }
285 // otherwise, this must be a single structure with no updates, so we need to pick one of its
286 // chains as the new master
287 else {
288 vector < const Sequence * > chains;
289 if (alignmentManager->GetStructureProteins(&chains)) {
290 if (chains.size() == 1) {
291 master = chains[0];
292 } else {
293 wxString *titles = new wxString[chains.size()];
294 int choice;
295 for (choice=0; choice<(int)chains.size(); ++choice)
296 titles[choice] = chains[choice]->identifier->ToString().c_str();
297 choice = wxGetSingleChoiceIndex("Align to which protein chain?",
298 "Select Chain", chains.size(), titles);
299 if (choice >= 0)
300 master = chains[choice];
301 else // cancelled
302 return NULL;
303 }
304 }
305 }
306 if (!master)
307 ERRORMSG("UpdateViewer::GetMasterSequence() - no master sequence defined");
308 return master;
309 }
310
FetchSequencesViaHTTP(SequenceList * newSequences,StructureSet * sSet) const311 void UpdateViewer::FetchSequencesViaHTTP(SequenceList *newSequences, StructureSet *sSet) const
312 {
313 wxString ids =
314 wxGetTextFromUser("Enter a list of protein GIs or Accessions:",
315 "Input Identifier", "", *viewerWindow);
316 if (ids.size() == 0) return;
317
318 wxStringTokenizer tkz(ids, " ,;\t\r\n", wxTOKEN_STRTOK);
319 while (tkz.HasMoreTokens()) {
320 wxString id = tkz.GetNextToken();
321 CRef < CBioseq > bioseq = FetchSequenceViaHTTP(WX_TO_STD(id));
322 if (bioseq.Empty())
323 return;
324 const Sequence *sequence = sSet->FindOrCreateSequence(*bioseq);
325 if (sequence) {
326 if (sequence->isProtein)
327 newSequences->push_back(sequence);
328 else
329 ERRORMSG("The sequence must be a protein");
330 }
331 }
332 }
333
ReadSequencesFromFile(SequenceList * newSequences,StructureSet * sSet) const334 void UpdateViewer::ReadSequencesFromFile(SequenceList *newSequences, StructureSet *sSet) const
335 {
336 newSequences->clear();
337
338 wxString fastaFile = wxFileSelector("Choose a FASTA file from which to import",
339 "", "", "", "*.*", wxFD_OPEN | wxFD_FILE_MUST_EXIST, *viewerWindow);
340 if (fastaFile.size() > 0) {
341
342 // get Seq-entry of all sequences in the file
343 CNcbiIfstream ifs(WX_TO_STD(fastaFile).c_str(), IOS_BASE::in);
344 if (!ifs) {
345 ERRORMSG("Unable to open file " << fastaFile.c_str());
346 return;
347 }
348 CStreamLineReader slr(ifs);
349 CFastaReader reader(slr, CFastaReader::fAssumeProt);
350 CRef < CSeq_entry > se = reader.ReadSet();
351 // string err;
352 // WriteASNToFile("Seq-entry.txt", *se, false, &err);
353
354 // create Sequences from each one
355 if (se->IsSeq()) {
356 const Sequence *sequence = sSet->FindOrCreateSequence(se->SetSeq());
357 if (sequence)
358 newSequences->push_back(sequence);
359 } else if (se->IsSet() && se->GetSet().GetSeq_set().size() > 0) {
360 CBioseq_set::TSeq_set::iterator b, be = se->SetSet().SetSeq_set().end();
361 for (b=se->SetSet().SetSeq_set().begin(); b!=be; ++b) {
362 if (!(*b)->IsSeq()) {
363 WARNINGMSG("CFastaReader returned nested Bioseq-set");
364 } else {
365 const Sequence *sequence = sSet->FindOrCreateSequence((*b)->SetSeq());
366 if (sequence)
367 newSequences->push_back(sequence);
368 }
369 }
370 } else {
371 WARNINGMSG("CFastaReader returned unknown format or no Bioseqs");
372 }
373 }
374 }
375
MakeEmptyAlignment(const Sequence * master,const Sequence * dependent)376 static BlockMultipleAlignment * MakeEmptyAlignment(const Sequence *master, const Sequence *dependent)
377 {
378 BlockMultipleAlignment::SequenceList *seqs = new BlockMultipleAlignment::SequenceList(2);
379 (*seqs)[0] = master;
380 (*seqs)[1] = dependent;
381 BlockMultipleAlignment *newAlignment =
382 new BlockMultipleAlignment(seqs, master->parentSet->alignmentManager);
383 if (!newAlignment->AddUnalignedBlocks() || !newAlignment->UpdateBlockMapAndColors(false)) {
384 ERRORMSG("MakeEmptyAlignment() - error finalizing alignment");
385 delete newAlignment;
386 return NULL;
387 }
388 return newAlignment;
389 }
390
MakeEmptyAlignments(const SequenceList & newSequences,const Sequence * master,AlignmentList * newAlignments) const391 void UpdateViewer::MakeEmptyAlignments(const SequenceList& newSequences,
392 const Sequence *master, AlignmentList *newAlignments) const
393 {
394 newAlignments->clear();
395 SequenceList::const_iterator s, se = newSequences.end();
396 for (s=newSequences.begin(); s!=se; ++s) {
397 BlockMultipleAlignment *newAlignment = MakeEmptyAlignment(master, *s);
398 if (newAlignment) newAlignments->push_back(newAlignment);
399 }
400 }
401
FetchSequences(StructureSet * sSet,SequenceList * newSequences) const402 void UpdateViewer::FetchSequences(StructureSet *sSet, SequenceList *newSequences) const
403 {
404 // choose import type
405 static const wxString choiceStrings[] = { "Network via GI/Accession", "From a FASTA File" };
406 enum choiceValues { FROM_GI=0, FROM_FASTA, N_CHOICES };
407 int importFrom = wxGetSingleChoiceIndex("From what source would you like to import sequences?",
408 "Select Import Source", N_CHOICES, choiceStrings, *viewerWindow);
409 if (importFrom < 0) return; // cancelled
410
411 // network import
412 if (importFrom == FROM_GI)
413 FetchSequencesViaHTTP(newSequences, sSet);
414
415 // FASTA import
416 else if (importFrom == FROM_FASTA)
417 ReadSequencesFromFile(newSequences, sSet);
418 }
419
ImportSequences(void)420 void UpdateViewer::ImportSequences(void)
421 {
422 // determine the master sequence for new alignments
423 const Sequence *master = GetMasterSequence();
424 if (!master) return;
425
426 // import the new sequence(s)
427 SequenceList newSequences;
428 FetchSequences(master->parentSet, &newSequences);
429 if (newSequences.size() == 0) {
430 WARNINGMSG("UpdateViewer::ImportSequence() - no sequences were imported");
431 return;
432 }
433
434 // create null-alignments for each sequence
435 AlignmentList newAlignments;
436 MakeEmptyAlignments(newSequences, master, &newAlignments);
437
438 // add new alignments to update list
439 if (newAlignments.size() > 0)
440 AddAlignments(newAlignments);
441 else
442 ERRORMSG("UpdateViewer::ImportSequence() - no new alignments were created");
443 }
444
GetVASTAlignments(const SequenceList & newSequences,const Sequence * master,AlignmentList * newAlignments,PendingStructureAlignments * structureAlignments,unsigned int masterFrom,unsigned int masterTo) const445 void UpdateViewer::GetVASTAlignments(const SequenceList& newSequences,
446 const Sequence *master, AlignmentList *newAlignments,
447 PendingStructureAlignments *structureAlignments,
448 unsigned int masterFrom, unsigned int masterTo) const
449 {
450 if (master->identifier->pdbID.size() == 0) {
451 WARNINGMSG("UpdateViewer::GetVASTAlignments() - "
452 "can't be called with non-MMDB master " << master->identifier->ToString());
453 return;
454 }
455
456 SequenceList::const_iterator s, se = newSequences.end();
457 for (s=newSequences.begin(); s!=se; ++s) {
458 if ((*s)->identifier->pdbID.size() == 0) {
459 WARNINGMSG("UpdateViewer::GetVASTAlignments() - "
460 "can't be called with non-MMDB dependent " << (*s)->identifier->ToString());
461 BlockMultipleAlignment *newAlignment = MakeEmptyAlignment(master, *s);
462 if (newAlignment) newAlignments->push_back(newAlignment);
463 continue;
464 }
465
466 // set up URL
467 string
468 host = "www.ncbi.nlm.nih.gov",
469 path = "/Structure/VA/vastalign.cgi", err;
470 CNcbiOstrstream argstr;
471 argstr << "main=" << master->identifier->ToString()
472 << "&neighbor=" << (*s)->identifier->ToString();
473 if (masterFrom <= masterTo && masterFrom < master->Length() && masterTo < master->Length())
474 argstr << "&from=" << (masterFrom+1) << "&to=" << (masterTo+1); // URL #'s are 1-based
475 string args((string) CNcbiOstrstreamToString(argstr));
476
477 // connect to vastalign.cgi
478 CBiostruc_annot_set structureAlignment;
479 INFOMSG("trying to load VAST alignment data from " << host << path << '?' << args);
480
481 if (!GetAsnDataViaHTTPS(host, path, args, &structureAlignment, &err)) {
482 ERRORMSG("Error calling vastalign.cgi: " << err);
483 BlockMultipleAlignment *newAlignment = MakeEmptyAlignment(master, *s);
484 if (newAlignment) newAlignments->push_back(newAlignment);
485 continue;
486 }
487 INFOMSG("successfully loaded data from vastalign.cgi");
488 #ifdef _DEBUG
489 WriteASNToFile("vastalign.dat.txt", structureAlignment, false, &err);
490 #endif
491
492 // create initially empty alignment
493 BlockMultipleAlignment::SequenceList *seqs = new BlockMultipleAlignment::SequenceList(2);
494 (*seqs)[0] = master;
495 (*seqs)[1] = *s;
496 BlockMultipleAlignment *newAlignment =
497 new BlockMultipleAlignment(seqs, master->parentSet->alignmentManager);
498
499 // skip if no VAST alignment found
500 if (structureAlignment.IsSetId() && structureAlignment.GetId().front()->IsMmdb_id() &&
501 structureAlignment.GetId().front()->GetMmdb_id().Get() == 0)
502 {
503 WARNINGMSG("VAST found no alignment for these chains");
504 }
505
506 else try {
507
508 if (!structureAlignment.IsSetId() || !structureAlignment.GetId().front()->IsMmdb_id() ||
509 structureAlignment.GetFeatures().size() != 1 ||
510 structureAlignment.GetFeatures().front()->GetFeatures().size() != 1 ||
511 !structureAlignment.GetFeatures().front()->GetFeatures().front()->IsSetLocation() ||
512 !structureAlignment.GetFeatures().front()->GetFeatures().front()->GetLocation().IsAlignment())
513 {
514 throw "VAST data does not contain exactly one alignment of recognized format - "
515 "possibly a problem with vastalign.cgi";
516 }
517
518 if (structureAlignment.GetId().front()->GetMmdb_id().Get() != master->identifier->mmdbID)
519 throw "Master structure MMDB ID mismatch - check to see if this structure been updated";
520
521 // load in alignment, check format
522 const CChem_graph_alignment& alignment =
523 structureAlignment.GetFeatures().front()->GetFeatures().front()->GetLocation().GetAlignment();
524 if (alignment.GetDimension() != 2 || alignment.GetAlignment().size() != 2 ||
525 alignment.GetBiostruc_ids().size() != 2 ||
526 !alignment.GetBiostruc_ids().front()->IsMmdb_id() ||
527 alignment.GetBiostruc_ids().front()->GetMmdb_id().Get() != master->identifier->mmdbID ||
528 !alignment.GetBiostruc_ids().back()->IsMmdb_id() ||
529 alignment.GetBiostruc_ids().back()->GetMmdb_id().Get() != (*s)->identifier->mmdbID ||
530 !alignment.GetAlignment().front()->IsResidues() ||
531 !alignment.GetAlignment().front()->GetResidues().IsInterval() ||
532 !alignment.GetAlignment().back()->IsResidues() ||
533 !alignment.GetAlignment().back()->GetResidues().IsInterval() ||
534 alignment.GetAlignment().front()->GetResidues().GetInterval().size() !=
535 alignment.GetAlignment().back()->GetResidues().GetInterval().size())
536 {
537 throw "Unrecognized VAST data format";
538 }
539
540 // construct alignment from residue intervals
541 CResidue_pntrs::TInterval::const_iterator i, j,
542 ie = alignment.GetAlignment().front()->GetResidues().GetInterval().end();
543 for (i=alignment.GetAlignment().front()->GetResidues().GetInterval().begin(),
544 j=alignment.GetAlignment().back()->GetResidues().GetInterval().begin(); i!=ie; ++i, ++j)
545 {
546 if ((*i)->GetMolecule_id().Get() != master->identifier->moleculeID ||
547 (*j)->GetMolecule_id().Get() != (*s)->identifier->moleculeID)
548 {
549 throw "Mismatch in molecule ids in alignment interval block";
550 }
551 UngappedAlignedBlock *newBlock = new UngappedAlignedBlock(newAlignment);
552 newBlock->SetRangeOfRow(0, (*i)->GetFrom().Get() - 1, (*i)->GetTo().Get() - 1);
553 newBlock->SetRangeOfRow(1, (*j)->GetFrom().Get() - 1, (*j)->GetTo().Get() - 1);
554 newBlock->width = (*i)->GetTo().Get() - (*i)->GetFrom().Get() + 1;
555 newAlignment->AddAlignedBlockAtEnd(newBlock);
556 }
557
558 // add structure alignment to list
559 if (alignment.GetTransform().size() == 1)
560 {
561 structureAlignments->resize(structureAlignments->size() + 1);
562 structureAlignments->back().structureAlignment =
563 structureAlignment.SetFeatures().front()->SetFeatures().front();
564 structureAlignments->back().masterDomainID =
565 structureAlignment.GetFeatures().front()->GetId().Get();
566 structureAlignments->back().dependentDomainID =
567 structureAlignment.GetFeatures().front()->GetFeatures().front()->GetId().Get();
568 } else
569 throw "No structure alignment in VAST data blob";
570
571 } catch (const char *err) {
572 ERRORMSG("Failed to import VAST alignment: " << err);
573 }
574
575 // finalize alignment
576 if (!newAlignment->AddUnalignedBlocks() || !newAlignment->UpdateBlockMapAndColors(false)) {
577 ERRORMSG("MakeEmptyAlignment() - error finalizing alignment");
578 delete newAlignment;
579 continue;
580 }
581 newAlignments->push_back(newAlignment);
582 }
583 }
584
ImportStructure(void)585 void UpdateViewer::ImportStructure(void)
586 {
587 // determine the master sequence for new alignments
588 const Sequence *master = GetMasterSequence();
589 if (!master) return;
590 if (master->parentSet->objects.size() == 1 && master->parentSet->frameMap.size() != 1) {
591 ERRORMSG("Can't import another structure when current structure has multiple models");
592 return;
593 }
594
595 // choose import type for structure
596 static const wxString choiceStrings[] = { "Via Network", "From a File" };
597 enum choiceValues { FROM_NETWORK=0, FROM_FILE, N_CHOICES };
598 int importFrom = wxGetSingleChoiceIndex(
599 "From what source would you like to import the structure?", "Select Import Source",
600 N_CHOICES, choiceStrings, *viewerWindow);
601 if (importFrom < 0) return; // cancelled
602
603 CRef < CBiostruc > biostruc;
604 BioseqRefList bioseqs;
605
606 if (importFrom == FROM_NETWORK) {
607 wxString id = wxGetTextFromUser("Enter a PDB or MMDB ID:", "Input Identifier", "", *viewerWindow);
608 biostruc.Reset(new CBiostruc());
609 if (!LoadStructureViaCache(WX_TO_STD(id),
610 (master->parentSet->isAlphaOnly ? eModel_type_ncbi_backbone : eModel_type_ncbi_all_atom),
611 0,
612 biostruc, &bioseqs)) {
613 ERRORMSG("Failed to load structure " << id.c_str());
614 return;
615 }
616 }
617
618 else if (importFrom == FROM_FILE) {
619 string filename = WX_TO_STD(wxFileSelector("Choose a single-structure file:",
620 GetUserDir().c_str(), "", "", "*.*", wxFD_OPEN | wxFD_FILE_MUST_EXIST, *viewerWindow));
621 if (filename.size() == 0) return;
622 bool readOK = false;
623 string err;
624 TRACEMSG("trying to read file '" << filename << "' as binary mime");
625 CRef < CNcbi_mime_asn1 > mime(new CNcbi_mime_asn1());
626 SetDiagPostLevel(eDiag_Fatal); // ignore all but Fatal errors while reading data
627 readOK = ReadASNFromFile(filename.c_str(), mime.GetPointer(), true, &err);
628 SetDiagPostLevel(eDiag_Info);
629 if (!readOK) {
630 TRACEMSG("error: " << err);
631 TRACEMSG("trying to read file '" << filename << "' as ascii mime");
632 mime.Reset(new CNcbi_mime_asn1());
633 SetDiagPostLevel(eDiag_Fatal); // ignore all but Fatal errors while reading data
634 readOK = ReadASNFromFile(filename.c_str(), mime.GetPointer(), false, &err);
635 SetDiagPostLevel(eDiag_Info);
636 }
637 if (!readOK) {
638 TRACEMSG("error: " << err);
639 ERRORMSG("Couldn't read structure from " << filename);
640 return;
641 }
642
643 ExtractBiostrucAndBioseqs(*mime, biostruc, &bioseqs);
644 }
645
646 int mmdbID;
647 if (biostruc->GetId().size() == 0 || !biostruc->GetId().front()->IsMmdb_id()) {
648 ERRORMSG("Can't get MMDB ID from loaded structure");
649 return;
650 }
651 mmdbID = biostruc->GetId().front()->GetMmdb_id().Get();
652
653 // make sure Biostruc is of correct type
654 if (master->molecule) {
655 if (biostruc->GetModel().size() != 1 ||
656 (master->parentSet->isAlphaOnly &&
657 biostruc->GetModel().front()->GetType() != eModel_type_ncbi_backbone) ||
658 (!master->parentSet->isAlphaOnly &&
659 biostruc->GetModel().front()->GetType() != eModel_type_ncbi_all_atom)) {
660 ERRORMSG("Biostruc does not match current data - should be "
661 << (master->parentSet->isAlphaOnly ? "alpha-only" : "one-coordinate-per-atom") << " model");
662 return;
663 }
664 }
665
666 // make list of protein chains in this structure
667 vector < pair < const CSeq_id * , string > > chains; // holds Seq-id and chain name
668 map < const CSeq_id * , int > moleculeIDs; // maps Seq-id -> molecule ID within MMDB object
669 CBiostruc_graph::TDescr::const_iterator d, de;
670 CBiostruc_graph::TMolecule_graphs::const_iterator
671 m, me = biostruc->GetChemical_graph().GetMolecule_graphs().end();
672 for (m=biostruc->GetChemical_graph().GetMolecule_graphs().begin(); m!=me; ++m) {
673 bool isProtein = false;
674 const CSeq_id *sid = NULL;
675 char name = 0;
676 string full_name = "";
677
678 // check descr for chain name/type
679 de = (*m)->GetDescr().end();
680 for (d=(*m)->GetDescr().begin(); d!=de; ++d) {
681 if ((*d)->IsName()) {
682 full_name = (*d)->GetName(); // long chain-id
683 name = (*d)->GetName()[0]; // this only works for single-char chain-ids
684 }
685 else if ((*d)->IsMolecule_type() &&
686 (*d)->GetMolecule_type() == CBiomol_descr::eMolecule_type_protein)
687 isProtein = true;
688 if (isProtein && (full_name != "")) break;
689 }
690
691 // get gi
692 if ((*m)->IsSetSeq_id())
693 sid = &((*m)->GetSeq_id());
694
695 // add protein to list
696 if (isProtein && (full_name != "" ) && sid != NULL) {
697 moleculeIDs[sid] = (*m)->GetId().Get();
698 chains.push_back(make_pair(sid, full_name));
699 }
700 }
701 if (chains.size() == 0) {
702 ERRORMSG("No protein chains found in this structure!");
703 return;
704 }
705
706 // get protein name (PDB identifier)
707 string pdbID;
708 CBiostruc::TDescr::const_iterator n, ne = biostruc->GetDescr().end();
709 for (n=biostruc->GetDescr().begin(); n!=ne; ++n) {
710 if ((*n)->IsName()) {
711 pdbID = (*n)->GetName(); // assume first 'name' is pdb id
712 break;
713 }
714 }
715
716 // which chains to align?
717 vector < const CSeq_id * > sids;
718 vector < string > sids_chain_ids;
719 if (chains.size() == 1) {
720 sids.push_back(chains[0].first);
721 sids_chain_ids.push_back(chains[0].second);
722 } else {
723 wxString *choices = new wxString[chains.size()];
724 int choice;
725 for (choice=0; choice<(int)chains.size(); ++choice)
726 choices[choice].Printf("%s_%s %s",
727 pdbID.c_str(), chains[choice].second, chains[choice].first->GetSeqIdString().c_str());
728 wxArrayInt selections;
729 // selections.Add(0); // select first by default
730 int nsel = wxGetSelectedChoices(selections, "Which chain(s) do you want to align?",
731 "Select Chain", chains.size(), choices, *viewerWindow);
732 if (nsel == 0) return;
733 for (choice = 0; choice < nsel; ++choice) {
734 sids.push_back(chains[selections[choice]].first); // chains[i].first: pdb-id
735 sids_chain_ids.push_back(chains[selections[choice]].second); // chains[i].second: chain-id
736 }
737 }
738
739 SequenceList newSequences;
740 SequenceSet::SequenceList::const_iterator s, se = master->parentSet->sequenceSet->sequences.end();
741 map < const Sequence * , const CSeq_id * > seq2id;
742 for (unsigned int j=0; j<sids.size(); ++j) {
743
744 // first check to see if this sequence is already present
745 for (s=master->parentSet->sequenceSet->sequences.begin(); s!=se; ++s) {
746 if ((*s)->identifier->MatchesSeqId(*(sids[j]))) {
747 TRACEMSG("using existing sequence for " << sids[j]->GetSeqIdString());
748 newSequences.push_back(*s);
749 seq2id[*s] = sids[j];
750 break;
751 }
752 }
753
754 if (s == se) {
755 // if not, find the sequence in the list from the structure file
756 BioseqRefList::iterator b, be = bioseqs.end();
757 for (b=bioseqs.begin(); b!=be; ++b) {
758 CBioseq::TId::const_iterator i, ie = (*b)->GetId().end();
759 for (i=(*b)->GetId().begin(); i!=ie; ++i) {
760 if ((*i)->Match(*(sids[j])))
761 break;
762 }
763 if (i != ie) {
764
765 //=======================================================================================================
766 // This is for long chain-id testing. This is section 1.
767 // Get the gi of the (**b) bioseq to use later.
768 // This is so we can find this bioseq later.
769 //=======================================================================================================
770 CBioseq::TId IDs = (*b)->GetId();
771 CBioseq::TId::const_iterator i;
772 TGi bioseq_GI;
773 for (i = IDs.begin(); i != IDs.end(); ++i) {
774 if ((*i)->IsGi()) {
775 bioseq_GI = (*i)->GetGi();
776 }
777 }
778 //=======================================================================================================
779 // end of long chain-id testing section 1.
780 //=======================================================================================================
781
782 const Sequence *sequence = master->parentSet->FindOrCreateSequence(**b);
783 if (sequence) {
784
785 //=======================================================================================================
786 // This is for long chain-id testing. This is section 2.
787 // Some of our test data has biostrucs with long chain-ids and bioseqs with single-char chain-ids.
788 // Replace the single-char chain-id in this sequence (from the bioseqs)
789 // with a long chain-id from sids (from the biostruc) when a matching gi is found
790 //=======================================================================================================
791 // iterate through sids (these come from the biostruc)
792 for (unsigned int j=0; j<sids.size(); ++j) {
793 // if a matching gi is found
794 TGi sequence_GI = sequence->identifier->gi;
795 TGi sid_GI = sids[j]->GetGi();
796 if (bioseq_GI == sid_GI) {
797 // replace the pdb-id in the newSequence with the pdb-id from the biostruc
798 (const_cast<MoleculeIdentifier*>(sequence->identifier))->pdbID = pdbID;
799 (const_cast<MoleculeIdentifier*>(sequence->identifier))->pdbChain = sids_chain_ids[j];
800 // check to see if they match
801 string test1 = sequence->identifier->ToString();
802 string test2 = sids[j]->GetSeqIdString();
803 }
804 }
805 //=======================================================================================================
806 // end of long chain-id testing section 2.
807 //=======================================================================================================
808
809 TRACEMSG("found Bioseq for " << sids[j]->GetSeqIdString());
810
811 newSequences.push_back(sequence);
812 seq2id[sequence] = sids[j];
813 break;
814 }
815 }
816 }
817 if (b == be) {
818 ERRORMSG("ImportStructure() - can't find " << sids[j]->GetSeqIdString() << " in bioseq list!");
819 // return;
820 }
821 }
822 }
823
824 SequenceList::const_iterator w, we = newSequences.end();
825 for (w=newSequences.begin(); w!=we; ++w) {
826
827 // add MMDB id tag to Bioseq if not present already
828 (*w)->AddMMDBAnnotTag(mmdbID);
829
830 // add MMDB and molecule id to identifier if not already set
831 if ((*w)->identifier->mmdbID == MoleculeIdentifier::VALUE_NOT_SET) {
832 (const_cast<MoleculeIdentifier*>((*w)->identifier))->mmdbID = mmdbID;
833 } else {
834 if ((*w)->identifier->mmdbID != mmdbID)
835 ERRORMSG("MMDB ID mismatch in sequence " << (*w)->identifier->ToString()
836 << "; " << (*w)->identifier->mmdbID << " vs " << mmdbID);
837 }
838 if (moleculeIDs.find(seq2id[*w]) != moleculeIDs.end()) {
839 if ((*w)->identifier->moleculeID == MoleculeIdentifier::VALUE_NOT_SET) {
840 (const_cast<MoleculeIdentifier*>((*w)->identifier))->moleculeID =
841 moleculeIDs[seq2id[*w]];
842 } else {
843 if ((*w)->identifier->moleculeID != moleculeIDs[seq2id[*w]])
844 ERRORMSG("Molecule ID mismatch in sequence " << (*w)->identifier->ToString());
845 }
846 } else
847 ERRORMSG("No matching id for MMDB sequence " << (*w)->identifier->ToString());
848 }
849
850 // create VAST alignments (or null-alignments if master is unstructured)
851 AlignmentList newAlignments;
852 if (master->molecule) {
853 int masterFrom = -1, masterTo = -1;
854 const BlockMultipleAlignment *multiple = alignmentManager->GetCurrentMultipleAlignment();
855 if (multiple) {
856 BlockMultipleAlignment::UngappedAlignedBlockList aBlocks;
857 multiple->GetUngappedAlignedBlocks(&aBlocks);
858 if (aBlocks.size() > 0) {
859 masterFrom = aBlocks.front()->GetRangeOfRow(0)->from;
860 masterTo = aBlocks.back()->GetRangeOfRow(0)->to;
861 }
862 }
863 GetVASTAlignments(newSequences, master, &newAlignments,
864 &pendingStructureAlignments, masterFrom, masterTo);
865 } else {
866 SequenceList::const_iterator s, se = newSequences.end();
867 for (s=newSequences.begin(); s!=se; ++s) {
868 // create initially empty alignment
869 BlockMultipleAlignment::SequenceList *seqs = new BlockMultipleAlignment::SequenceList(2);
870 (*seqs)[0] = master;
871 (*seqs)[1] = *s;
872 BlockMultipleAlignment *newAlignment =
873 new BlockMultipleAlignment(seqs, master->parentSet->alignmentManager);
874 // finalize alignment
875 if (!newAlignment->AddUnalignedBlocks() || !newAlignment->UpdateBlockMapAndColors(false)) {
876 ERRORMSG("ImportStructure() - error finalizing alignment");
877 delete newAlignment;
878 continue;
879 }
880 newAlignments.push_back(newAlignment);
881 }
882 }
883
884 // add new alignment to update list
885 if (newAlignments.size() == newSequences.size())
886 AddAlignments(newAlignments);
887 else {
888 ERRORMSG("UpdateViewer::ImportStructure() - no new alignments were created");
889 DELETE_ALL_AND_CLEAR(newAlignments, AlignmentList);
890 return;
891 }
892
893 // will add Biostruc and structure alignments to ASN data later on, after merge
894 if (master->molecule) {
895 pendingStructures.push_back(biostruc);
896 wxMessageBox("The structure has been successfully imported! However, it will not appear until you\n"
897 "save this data to a file and then re-load it in a new session. And depending on the type\n"
898 "of data, it still might not appear unless the corresponding new pairwise alignment has\n"
899 "been merged into the multiple alignment.",
900 "Structure Added", wxOK | wxICON_INFORMATION, *viewerWindow);
901 } else {
902 // sort of assumes alignment with non-structured master is CDD, or uses cache mechanism...
903 wxMessageBox("The selected sequence from this structure has been imported! However, the structure\n"
904 "itself can not appear in the structure window until the sequence has been merged into the\n"
905 "multiple alignment and the alignment has been remastered to a structured sequence.",
906 "Sequence Added", wxOK | wxICON_INFORMATION, *viewerWindow);
907 }
908 }
909
SavePendingStructures(void)910 void UpdateViewer::SavePendingStructures(void)
911 {
912 TRACEMSG("saving pending imported structures and structure alignments");
913 StructureSet *sSet =
914 (alignmentManager->GetCurrentMultipleAlignment() &&
915 alignmentManager->GetCurrentMultipleAlignment()->GetMaster()) ?
916 alignmentManager->GetCurrentMultipleAlignment()->GetMaster()->parentSet : NULL;
917 if (!sSet) return;
918 while (pendingStructures.size() > 0) {
919 if (!sSet->AddBiostrucToASN(pendingStructures.front().GetPointer())) {
920 ERRORMSG("UpdateViewer::SavePendingStructures() - error saving Biostruc");
921 return;
922 }
923 pendingStructures.pop_front();
924 }
925 while (pendingStructureAlignments.size() > 0) {
926 sSet->AddStructureAlignment(pendingStructureAlignments.front().structureAlignment.GetPointer(),
927 pendingStructureAlignments.front().masterDomainID,
928 pendingStructureAlignments.front().dependentDomainID);
929 pendingStructureAlignments.pop_front();
930 }
931 }
932
BlastUpdate(BlockMultipleAlignment * alignment,bool usePSSMFromMultiple)933 void UpdateViewer::BlastUpdate(BlockMultipleAlignment *alignment, bool usePSSMFromMultiple)
934 {
935 const BlockMultipleAlignment *multipleForPSSM = alignmentManager->GetCurrentMultipleAlignment();
936 if (usePSSMFromMultiple && !multipleForPSSM) {
937 ERRORMSG("Can't do BLAST/PSSM when no multiple alignment is present");
938 return;
939 }
940
941 // find alignment, and replace it with BLAST result
942 AlignmentList::iterator a, ae = GetCurrentAlignments().end();
943 for (a=GetCurrentAlignments().begin(); a!=ae; ++a) {
944 if (*a != alignment) continue;
945
946 // run BLAST between master and first dependent (should be only one dependent...)
947 BLASTer::AlignmentList toRealign;
948 toRealign.push_back(alignment);
949 BLASTer::AlignmentList newAlignments;
950 alignmentManager->blaster->CreateNewPairwiseAlignmentsByBlast(
951 multipleForPSSM, toRealign, &newAlignments, usePSSMFromMultiple);
952 if (newAlignments.size() != 1) {
953 ERRORMSG("UpdateViewer::BlastUpdate() - CreateNewPairwiseAlignmentsByBlast() failed");
954 return;
955 }
956 if (newAlignments.front()->NAlignedBlocks() == 0) {
957 WARNINGMSG("alignment unchanged");
958 delete newAlignments.front();
959 return;
960 }
961
962 // replace alignment with BLAST result
963 TRACEMSG("BLAST succeeded - replacing alignment");
964 delete alignment;
965 *a = newAlignments.front();
966 break;
967 }
968
969 // recreate alignment display with new alignment
970 AlignmentList copy = GetCurrentAlignments();
971 GetCurrentAlignments().clear();
972 GetCurrentDisplay()->Empty();
973 AddAlignments(copy);
974 // (*viewerWindow)->ScrollToColumn(GetCurrentDisplay()->GetStartingColumn());
975 }
976
MapDependentToMaster(const BlockMultipleAlignment * alignment,int dependentRow,vector<int> * dependent2master)977 static void MapDependentToMaster(const BlockMultipleAlignment *alignment,
978 int dependentRow, vector < int > *dependent2master)
979 {
980 dependent2master->clear();
981 dependent2master->resize(alignment->GetSequenceOfRow(dependentRow)->Length(), -1);
982 BlockMultipleAlignment::UngappedAlignedBlockList uaBlocks;
983 alignment->GetUngappedAlignedBlocks(&uaBlocks);
984 BlockMultipleAlignment::UngappedAlignedBlockList::const_iterator b, be = uaBlocks.end();
985 for (b=uaBlocks.begin(); b!=be; ++b) {
986 const Block::Range
987 *masterRange = (*b)->GetRangeOfRow(0),
988 *dependentRange = (*b)->GetRangeOfRow(dependentRow);
989 for (unsigned int i=0; i<(*b)->width; ++i)
990 (*dependent2master)[dependentRange->from + i] = masterRange->from + i;
991 }
992 }
993
GetAlignmentByBestNeighbor(const BlockMultipleAlignment * multiple,const BLASTer::AlignmentList rowAlignments)994 static BlockMultipleAlignment * GetAlignmentByBestNeighbor(
995 const BlockMultipleAlignment *multiple,
996 const BLASTer::AlignmentList rowAlignments)
997 {
998 if (rowAlignments.size() != multiple->NRows()) {
999 ERRORMSG("GetAlignmentByBestNeighbor: wrong # alignments");
1000 return NULL;
1001 }
1002
1003 // find best-scoring aligment above some threshold; assumes E-value is in RowDouble
1004 const BlockMultipleAlignment *bestMatchFromMultiple = NULL;
1005 unsigned int b, bestRow = 0;
1006 BLASTer::AlignmentList::const_iterator p, pe = rowAlignments.end();
1007 for (b=0, p=rowAlignments.begin(); p!=pe; ++b, ++p) {
1008 if (!bestMatchFromMultiple || (*p)->GetRowDouble(0) < bestMatchFromMultiple->GetRowDouble(0)) {
1009 bestMatchFromMultiple = *p;
1010 bestRow = b;
1011 }
1012 }
1013 if (!bestMatchFromMultiple || bestMatchFromMultiple->GetRowDouble(0) > 0.000001) {
1014 WARNINGMSG("GetAlignmentByBestNeighbor: no significant hit found");
1015 return NULL;
1016 }
1017 INFOMSG("Closest neighbor from multiple: sequence "
1018 << bestMatchFromMultiple->GetMaster()->identifier->ToString()
1019 << ", E-value: " << bestMatchFromMultiple->GetRowDouble(0));
1020 GlobalMessenger()->RemoveAllHighlights(true);
1021 GlobalMessenger()->AddHighlights(
1022 bestMatchFromMultiple->GetMaster(), 0, bestMatchFromMultiple->GetMaster()->Length()-1);
1023
1024 // if the best match is the multiple's master, then just use that alignment
1025 if (bestRow == 0) return bestMatchFromMultiple->Clone();
1026
1027 // otherwise, use best match as a guide alignment to align the dependent with the multiple's master
1028 vector < int > import2dependent, dependent2master;
1029 MapDependentToMaster(bestMatchFromMultiple, 1, &import2dependent);
1030 MapDependentToMaster(multiple, bestRow, &dependent2master);
1031
1032 const Sequence *importSeq = bestMatchFromMultiple->GetSequenceOfRow(1);
1033 BlockMultipleAlignment::SequenceList *seqs = new BlockMultipleAlignment::SequenceList(2);
1034 (*seqs)[0] = multiple->GetSequenceOfRow(0);
1035 (*seqs)[1] = importSeq;
1036 BlockMultipleAlignment *newAlignment =
1037 new BlockMultipleAlignment(seqs, importSeq->parentSet->alignmentManager);
1038
1039 // create maximally sized blocks
1040 int masterStart=-1, importStart, importLoc, dependentLoc, masterLoc, len=0;
1041 for (importStart=-1, importLoc=0; importLoc<=(int)importSeq->Length(); ++importLoc) {
1042
1043 // map import -> dependent -> master
1044 dependentLoc = (importLoc < (int)importSeq->Length()) ? import2dependent[importLoc] : -1;
1045 masterLoc = (dependentLoc >= 0) ? dependent2master[dependentLoc] : -1;
1046
1047 // if we're currently inside a block..
1048 if (importStart >= 0) {
1049
1050 // add another residue to a continuously aligned block
1051 if (masterLoc >= 0 && masterLoc-masterStart == importLoc-importStart) {
1052 ++len;
1053 }
1054
1055 // terminate block
1056 else {
1057 UngappedAlignedBlock *newBlock = new UngappedAlignedBlock(newAlignment);
1058 newBlock->SetRangeOfRow(0, masterStart, masterStart + len - 1);
1059 newBlock->SetRangeOfRow(1, importStart, importStart + len - 1);
1060 newBlock->width = len;
1061 newAlignment->AddAlignedBlockAtEnd(newBlock);
1062 importStart = -1;
1063 }
1064 }
1065
1066 // search for start of block
1067 if (importStart < 0) {
1068 if (masterLoc >= 0) {
1069 masterStart = masterLoc;
1070 importStart = importLoc;
1071 len = 1;
1072 }
1073 }
1074 }
1075
1076 // finalize and and add new alignment to list
1077 if (!newAlignment->AddUnalignedBlocks() || !newAlignment->UpdateBlockMapAndColors(false)) {
1078 ERRORMSG("error finalizing alignment");
1079 delete newAlignment;
1080 return NULL;
1081 }
1082
1083 return newAlignment;
1084 }
1085
BlastNeighbor(BlockMultipleAlignment * update)1086 void UpdateViewer::BlastNeighbor(BlockMultipleAlignment *update)
1087 {
1088 const BlockMultipleAlignment *multiple = alignmentManager->GetCurrentMultipleAlignment();
1089 if (!multiple) {
1090 ERRORMSG("Can't do BLAST Neighbor when no multiple alignment is present");
1091 return;
1092 }
1093 BlockMultipleAlignment::UngappedAlignedBlockList uaBlocks;
1094 multiple->GetUngappedAlignedBlocks(&uaBlocks);
1095 if (uaBlocks.size() == 0) {
1096 ERRORMSG("Can't do BLAST Neighbor with null multiple alignment");
1097 return;
1098 }
1099 const Sequence *updateSeq = update->GetSequenceOfRow(1);
1100
1101 // find alignment, to replace it with BLAST result
1102 AlignmentList::iterator a, ae = GetCurrentAlignments().end();
1103 for (a=GetCurrentAlignments().begin(); a!=ae; ++a)
1104 if (*a == update) break;
1105 if (a == GetCurrentAlignments().end()) return;
1106
1107 // do BLAST-2-sequences between update dependent and each sequence from the multiple
1108 BLASTer::AlignmentList newAlignments;
1109 for (unsigned int row=0; row<multiple->NRows(); ++row) {
1110 BLASTer::AlignmentList toRealign;
1111 BlockMultipleAlignment::SequenceList *seqs = new BlockMultipleAlignment::SequenceList(2);
1112 (*seqs)[0] = multiple->GetSequenceOfRow(row);
1113 (*seqs)[1] = updateSeq;
1114 BlockMultipleAlignment *newAlignment =
1115 new BlockMultipleAlignment(seqs, updateSeq->parentSet->alignmentManager);
1116 if (newAlignment->AddUnalignedBlocks() && newAlignment->UpdateBlockMapAndColors(false))
1117 {
1118 int excess = 0;
1119 if (!RegistryGetInteger(REG_ADVANCED_SECTION, REG_FOOTPRINT_RES, &excess))
1120 WARNINGMSG("Can't get footprint excess residues from registry");
1121 newAlignment->alignMasterFrom = uaBlocks.front()->GetRangeOfRow(row)->from - excess;
1122 if (newAlignment->alignMasterFrom < 0)
1123 newAlignment->alignMasterFrom = 0;
1124 newAlignment->alignMasterTo = uaBlocks.back()->GetRangeOfRow(row)->to + excess;
1125 if (newAlignment->alignMasterTo >= ((int)((*seqs)[0]->Length())))
1126 newAlignment->alignMasterTo = (*seqs)[0]->Length() - 1;
1127 newAlignment->alignDependentFrom = update->alignDependentFrom;
1128 newAlignment->alignDependentTo = update->alignDependentTo;
1129 toRealign.push_back(newAlignment);
1130 } else {
1131 ERRORMSG("error finalizing alignment");
1132 delete newAlignment;
1133 }
1134 BLASTer::AlignmentList result;
1135 SetDiagPostLevel(eDiag_Error);
1136 alignmentManager->blaster->CreateNewPairwiseAlignmentsByBlast(NULL, toRealign, &result, false);
1137 SetDiagPostLevel(eDiag_Info);
1138 DELETE_ALL_AND_CLEAR(toRealign, BLASTer::AlignmentList);
1139 if (result.size() != 1) {
1140 ERRORMSG("UpdateViewer::BlastUpdate() - CreateNewPairwiseAlignmentsByBlast() failed");
1141 return;
1142 }
1143 newAlignments.push_back(result.front());
1144 }
1145
1146 // replace alignment with result
1147 BlockMultipleAlignment *alignmentByNeighbor = GetAlignmentByBestNeighbor(multiple, newAlignments);
1148 DELETE_ALL_AND_CLEAR(newAlignments, BLASTer::AlignmentList);
1149 if (!alignmentByNeighbor) {
1150 WARNINGMSG("alignment unchanged");
1151 return;
1152 }
1153 TRACEMSG("BLAST Neighbor succeeded - replacing alignment");
1154 delete update;
1155 *a = alignmentByNeighbor;
1156
1157 // recreate alignment display with new alignment
1158 AlignmentList copy = GetCurrentAlignments();
1159 GetCurrentAlignments().clear();
1160 GetCurrentDisplay()->Empty();
1161 AddAlignments(copy);
1162 // (*viewerWindow)->ScrollToColumn(GetCurrentDisplay()->GetStartingColumn());
1163 }
1164
1165 // comparison function: if CompareRows(a, b) == true, then row a moves up
1166 typedef bool (*CompareUpdates)(BlockMultipleAlignment *a, BlockMultipleAlignment *b);
1167
CompareUpdatesByIdentifier(BlockMultipleAlignment * a,BlockMultipleAlignment * b)1168 static bool CompareUpdatesByIdentifier(BlockMultipleAlignment *a, BlockMultipleAlignment *b)
1169 {
1170 return MoleculeIdentifier::CompareIdentifiers(
1171 a->GetSequenceOfRow(1)->identifier, // sort by first dependent row
1172 b->GetSequenceOfRow(1)->identifier);
1173 }
1174
1175 // assumes scores are stored in the master row double
CompareUpdatesByScore(BlockMultipleAlignment * a,BlockMultipleAlignment * b)1176 static bool CompareUpdatesByScore(BlockMultipleAlignment *a, BlockMultipleAlignment *b)
1177 {
1178 return (a->GetRowDouble(0) > b->GetRowDouble(0)); // sort in descending order of score
1179 }
1180
1181 static CompareUpdates updateComparisonFunction = NULL;
1182
SortByIdentifier(void)1183 void UpdateViewer::SortByIdentifier(void)
1184 {
1185 TRACEMSG("sorting updates by identifier");
1186 updateComparisonFunction = CompareUpdatesByIdentifier;
1187 SortUpdates();
1188 }
1189
SortByPSSM(void)1190 void UpdateViewer::SortByPSSM(void)
1191 {
1192 TRACEMSG("sorting updates by PSSM");
1193
1194 const BlockMultipleAlignment *multiple = alignmentManager->GetCurrentMultipleAlignment();
1195 if (!multiple) {
1196 ERRORMSG("Can't do sort by PSSM when no multiple alignment is present");
1197 return;
1198 }
1199
1200 AlignmentList& currentAlignments = GetCurrentAlignments();
1201 AlignmentList::const_iterator a, ae = currentAlignments.end();
1202 for (a=currentAlignments.begin(); a!=ae; ++a) {
1203
1204 const Sequence *updateSeq = (*a)->GetSequenceOfRow(1);
1205 BlockMultipleAlignment::UngappedAlignedBlockList uaBlocks;
1206 (*a)->GetUngappedAlignedBlocks(&uaBlocks);
1207 BlockMultipleAlignment::UngappedAlignedBlockList::const_iterator b, be = uaBlocks.end();
1208
1209 // compute scores of aligned residues vs PSSM from the multiple alignment (not the PSSM for the 2-row update alignment)
1210 int score = 0;
1211 for (b=uaBlocks.begin(); b!=be; ++b) {
1212 const Block::Range *m = (*b)->GetRangeOfRow(0), *u = (*b)->GetRangeOfRow(1);
1213 for (unsigned int i=0; i<(*b)->width; ++i)
1214 score += multiple->GetPSSM().GetPSSMScore(
1215 LookupNCBIStdaaNumberFromCharacter(updateSeq->sequenceString[u->from + i]),
1216 m->from + i);
1217 }
1218
1219 // store score in update alignment
1220 (*a)->SetRowStatusLine(0, string("Score vs. PSSM: ") + NStr::IntToString(score));
1221 (*a)->SetRowStatusLine(1, (*a)->GetRowStatusLine(0));
1222 (*a)->SetRowDouble(0, score);
1223 (*a)->SetRowDouble(1, score);
1224 }
1225
1226 updateComparisonFunction = CompareUpdatesByScore;
1227 SortUpdates();
1228 }
1229
SortUpdates(void)1230 void UpdateViewer::SortUpdates(void)
1231 {
1232 if (!updateComparisonFunction) {
1233 ERRORMSG("UpdateViewer::SortUpdates() - must first set comparison function");
1234 return;
1235 }
1236
1237 // make vector of alignments
1238 AlignmentList& currentAlignments = GetCurrentAlignments();
1239 if (currentAlignments.size() < 2) return;
1240 vector < BlockMultipleAlignment * > sortedVector(currentAlignments.size());
1241 AlignmentList::const_iterator a, ae = currentAlignments.end();
1242 unsigned int i = 0;
1243 for (a=currentAlignments.begin(); a!=ae; ++a) sortedVector[i++] = *a;
1244
1245 // sort them
1246 stable_sort(sortedVector.begin(), sortedVector.end(), updateComparisonFunction);
1247 updateComparisonFunction = NULL;
1248
1249 // replace window contents with sorted list
1250 currentAlignments.clear();
1251 GetCurrentDisplay()->Empty();
1252
1253 AlignmentList sortedList;
1254 for (i=0; i<sortedVector.size(); ++i) sortedList.push_back(sortedVector[i]);
1255 AddAlignments(sortedList);
1256 }
1257
1258 END_SCOPE(Cn3D)
1259