1 /*  $Id: update_viewer.cpp 620017 2020-11-13 19:16:01Z hurwitz $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors:  Paul Thiessen
27 *
28 * File Description:
29 *      implementation of non-GUI part of update viewer
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbistd.hpp>
36 #include <corelib/ncbistl.hpp>
37 #include <objtools/readers/fasta.hpp>
38 #include <util/line_reader.hpp>
39 
40 #include <objects/cdd/Cdd.hpp>
41 #include <objects/cdd/Update_align.hpp>
42 #include <objects/cdd/Update_comment.hpp>
43 #include <objects/seq/Seq_annot.hpp>
44 #include <objects/seq/Bioseq.hpp>
45 #include <objects/seqloc/PDB_seq_id.hpp>
46 #include <objects/seqset/Seq_entry.hpp>
47 #include <objects/seqset/Bioseq_set.hpp>
48 #include <objects/ncbimime/Ncbi_mime_asn1.hpp>
49 #include <objects/ncbimime/Biostruc_seq.hpp>
50 #include <objects/mmdb2/Model_type.hpp>
51 #include <objects/mmdb2/Biostruc_model.hpp>
52 #include <objects/mmdb1/Biostruc_graph.hpp>
53 #include <objects/mmdb1/Biostruc_descr.hpp>
54 #include <objects/general/Dbtag.hpp>
55 #include <objects/general/Object_id.hpp>
56 #include <objects/mmdb1/Biostruc_id.hpp>
57 #include <objects/mmdb1/Mmdb_id.hpp>
58 #include <objects/mmdb1/Biostruc_annot_set.hpp>
59 #include <objects/mmdb3/Biostruc_feature_set.hpp>
60 #include <objects/mmdb3/Chem_graph_alignment.hpp>
61 #include <objects/mmdb3/Chem_graph_pntrs.hpp>
62 #include <objects/mmdb3/Residue_pntrs.hpp>
63 #include <objects/mmdb3/Residue_interval_pntr.hpp>
64 #include <objects/mmdb1/Molecule_id.hpp>
65 #include <objects/mmdb1/Residue_id.hpp>
66 #include <objects/mmdb3/Biostruc_feature_set_id.hpp>
67 #include <objects/mmdb3/Biostruc_feature_id.hpp>
68 
69 #include <memory>
70 #include <algorithm>
71 
72 #include "remove_header_conflicts.hpp"
73 
74 #include "update_viewer.hpp"
75 #include "asn_reader.hpp"
76 #include "update_viewer_window.hpp"
77 #include "messenger.hpp"
78 #include "sequence_display.hpp"
79 #include "cn3d_colors.hpp"
80 #include "alignment_manager.hpp"
81 #include "cn3d_threader.hpp"
82 #include "structure_set.hpp"
83 #include "molecule.hpp"
84 #include "cn3d_tools.hpp"
85 #include "cn3d_blast.hpp"
86 #include "molecule_identifier.hpp"
87 #include "cn3d_cache.hpp"
88 #include "cn3d_ba_interface.hpp"
89 #include "cn3d_pssm.hpp"
90 
91 #include <wx/tokenzr.h>
92 
93 USING_NCBI_SCOPE;
94 USING_SCOPE(objects);
95 
96 
BEGIN_SCOPE(Cn3D)97 BEGIN_SCOPE(Cn3D)
98 
99 UpdateViewer::UpdateViewer(AlignmentManager *alnMgr) :
100     // not sure why this cast is necessary, but MSVC requires it...
101     ViewerBase(reinterpret_cast<ViewerWindowBase**>(&updateWindow), alnMgr),
102     updateWindow(NULL)
103 {
104     // when first created, start with blank display
105     AlignmentList emptyAlignmentList;
106     SequenceDisplay *blankDisplay = new SequenceDisplay(true, viewerWindow);
107     InitData(&emptyAlignmentList, blankDisplay);
108     EnableStacks();
109 }
110 
~UpdateViewer(void)111 UpdateViewer::~UpdateViewer(void)
112 {
113 }
114 
SetInitialState(void)115 void UpdateViewer::SetInitialState(void)
116 {
117     KeepCurrent();
118     EnableStacks();
119 }
120 
SaveDialog(bool prompt)121 void UpdateViewer::SaveDialog(bool prompt)
122 {
123     if (updateWindow) updateWindow->SaveDialog(prompt, false);
124 }
125 
CreateUpdateWindow(void)126 void UpdateViewer::CreateUpdateWindow(void)
127 {
128     if (updateWindow) {
129         updateWindow->Show(true);
130         GlobalMessenger()->PostRedrawSequenceViewer(this);
131     } else {
132         SequenceDisplay *display = GetCurrentDisplay();
133         if (display) {
134             updateWindow = new UpdateViewerWindow(this);
135             updateWindow->NewDisplay(display, false);
136             updateWindow->ScrollToColumn(display->GetStartingColumn());
137             updateWindow->Show(true);
138             // ScrollTo causes immediate redraw, so don't need a second one
139             GlobalMessenger()->UnPostRedrawSequenceViewer(this);
140         }
141     }
142 }
143 
AddAlignments(const AlignmentList & newAlignments)144 void UpdateViewer::AddAlignments(const AlignmentList& newAlignments)
145 {
146     AlignmentList& alignments = GetCurrentAlignments();
147     SequenceDisplay *display = GetCurrentDisplay();
148 
149     // populate successive lines of the display with each alignment, with blank lines inbetween
150     AlignmentList::const_iterator a, ae = newAlignments.end();
151     for (a=newAlignments.begin(); a!=ae; ++a) {
152         if ((*a)->NRows() != 2) {
153             ERRORMSG("UpdateViewer::AddAlignments() - got alignment with " << (*a)->NRows() << " rows");
154             continue;
155         }
156 
157         // add alignment to stack list
158         alignments.push_back(*a);
159         (*a)->ShowGeometryViolations(updateWindow ? updateWindow->GeometryViolationsShown() : false);
160 
161         // add alignment to the display, including block row since editor is always on
162         if (display->NRows() != 0) display->AddRowFromString("");
163         display->AddBlockBoundaryRow(*a);
164         for (unsigned int row=0; row<2; ++row)
165             display->AddRowFromAlignment(row, *a);
166     }
167 
168     if (alignments.size() > 0)
169         display->SetStartingColumn(alignments.front()->GetFirstAlignedBlockPosition() - 5);
170 
171     Save();    // make this an undoable operation
172     if (updateWindow) updateWindow->UpdateDisplay(display);
173 }
174 
ReplaceAlignments(const AlignmentList & alignmentList)175 void UpdateViewer::ReplaceAlignments(const AlignmentList& alignmentList)
176 {
177     // empty out the current alignment list and display (but not the undo stacks!)
178     DELETE_ALL_AND_CLEAR(GetCurrentAlignments(), AlignmentList);
179     GetCurrentDisplay()->Empty();
180 
181     AddAlignments(alignmentList);
182 }
183 
DeleteAlignment(BlockMultipleAlignment * toDelete)184 void UpdateViewer::DeleteAlignment(BlockMultipleAlignment *toDelete)
185 {
186     AlignmentList keepAlignments;
187     AlignmentList::const_iterator a, ae = GetCurrentAlignments().end();
188     for (a=GetCurrentAlignments().begin(); a!=ae; ++a)
189         if (*a != toDelete)
190             keepAlignments.push_back((*a)->Clone());
191 
192     ReplaceAlignments(keepAlignments);
193 }
194 
SaveAlignments(void)195 void UpdateViewer::SaveAlignments(void)
196 {
197     SetInitialState();
198 
199     // construct a new list of ASN Update-aligns
200     CCdd::TPending updates;
201     map < CUpdate_align * , bool > usedUpdateAligns;
202 
203     AlignmentList::const_iterator a, ae = GetCurrentAlignments().end();
204     for (a=GetCurrentAlignments().begin(); a!=ae; ++a) {
205 
206         // create a Seq-align (with Dense-diags) out of this update
207         if ((*a)->NRows() != 2) {
208             ERRORMSG("CreateSeqAlignFromUpdate() - can only save pairwise updates");
209             continue;
210         }
211         BlockMultipleAlignment::UngappedAlignedBlockList blocks;
212         (*a)->GetUngappedAlignedBlocks(&blocks);
213         CSeq_align *newSeqAlign = CreatePairwiseSeqAlignFromMultipleRow(*a, blocks, 1);
214         if (!newSeqAlign) continue;
215 
216         // the Update-align and Seq-annot's list of Seq-aligns where this new Seq-align will go
217         CUpdate_align *updateAlign = (*a)->updateOrigin.GetPointer();
218         CSeq_annot::C_Data::TAlign *seqAlignList = NULL;
219 
220         // if this alignment came from an existing Update-align, then replace just the Seq-align
221         // so that the rest of the Update-align's comments/annotations are preserved
222         if (updateAlign) {
223             if (!updateAlign->IsSetSeqannot() || !updateAlign->GetSeqannot().GetData().IsAlign()) {
224                 ERRORMSG("UpdateViewer::SaveAlignments() - confused by Update-align format");
225                 continue;
226             }
227         }
228 
229         // if this is a new update, create a new Update-align and tag it
230         else {
231             updateAlign = new CUpdate_align();
232 
233             // set type field depending on whether demoted sequence has structure
234             updateAlign->SetType((*a)->GetSequenceOfRow(1)->molecule ?
235                 CUpdate_align::eType_demoted_3d : CUpdate_align::eType_demoted);
236 
237             // add a text comment
238             CUpdate_comment *comment = new CUpdate_comment();
239             comment->SetComment("Created by demotion or import in Cn3D");
240             updateAlign->SetDescription().resize(updateAlign->GetDescription().size() + 1);
241             updateAlign->SetDescription().back().Reset(comment);
242 
243             // create a new Seq-annot
244             CRef < CSeq_annot > seqAnnotRef(new CSeq_annot());
245             seqAnnotRef->SetData().SetAlign();
246             updateAlign->SetSeqannot(*seqAnnotRef);
247         }
248 
249         // get Seq-align list
250         if (!updateAlign || !(seqAlignList = &(updateAlign->SetSeqannot().SetData().SetAlign()))) {
251             ERRORMSG("UpdateViewer::SaveAlignments() - can't find Update-align and/or Seq-align list");
252             continue;
253         }
254 
255         // if this is the first re-use of this Update-align, then empty out all existing
256         // Seq-aligns and push it onto the new update list
257         if (usedUpdateAligns.find(updateAlign) == usedUpdateAligns.end()) {
258             seqAlignList->clear();
259             updates.resize(updates.size() + 1);
260             updates.back().Reset(updateAlign);
261             usedUpdateAligns[updateAlign] = true;
262         }
263 
264         // finally, add the new Seq-align to the list
265         seqAlignList->resize(seqAlignList->size() + 1);
266         seqAlignList->back().Reset(newSeqAlign);
267     }
268 
269     // save these changes to the ASN data
270     alignmentManager->ReplaceUpdatesInASN(updates);
271 }
272 
GetMasterSequence(void) const273 const Sequence * UpdateViewer::GetMasterSequence(void) const
274 {
275     const Sequence *master = NULL;
276 
277     // if there's a multiple alignment in the sequence viewer, use same master as that
278     if (alignmentManager->GetCurrentMultipleAlignment()) {
279         master = alignmentManager->GetCurrentMultipleAlignment()->GetMaster();
280     }
281     // if there's already an update present, use same master as that
282     else if (GetCurrentAlignments().size() > 0) {
283         master = GetCurrentAlignments().front()->GetMaster();
284     }
285     // otherwise, this must be a single structure with no updates, so we need to pick one of its
286     // chains as the new master
287     else {
288         vector < const Sequence * > chains;
289         if (alignmentManager->GetStructureProteins(&chains)) {
290             if (chains.size() == 1) {
291                 master = chains[0];
292             } else {
293                 wxString *titles = new wxString[chains.size()];
294                 int choice;
295                 for (choice=0; choice<(int)chains.size(); ++choice)
296                     titles[choice] = chains[choice]->identifier->ToString().c_str();
297                 choice = wxGetSingleChoiceIndex("Align to which protein chain?",
298                     "Select Chain", chains.size(), titles);
299                 if (choice >= 0)
300                     master = chains[choice];
301                 else    // cancelled
302                     return NULL;
303             }
304         }
305     }
306     if (!master)
307         ERRORMSG("UpdateViewer::GetMasterSequence() - no master sequence defined");
308     return master;
309 }
310 
FetchSequencesViaHTTP(SequenceList * newSequences,StructureSet * sSet) const311 void UpdateViewer::FetchSequencesViaHTTP(SequenceList *newSequences, StructureSet *sSet) const
312 {
313     wxString ids =
314         wxGetTextFromUser("Enter a list of protein GIs or Accessions:",
315             "Input Identifier", "", *viewerWindow);
316     if (ids.size() == 0) return;
317 
318     wxStringTokenizer tkz(ids, " ,;\t\r\n", wxTOKEN_STRTOK);
319     while (tkz.HasMoreTokens()) {
320         wxString id = tkz.GetNextToken();
321         CRef < CBioseq > bioseq = FetchSequenceViaHTTP(WX_TO_STD(id));
322         if (bioseq.Empty())
323             return;
324         const Sequence *sequence = sSet->FindOrCreateSequence(*bioseq);
325         if (sequence) {
326             if (sequence->isProtein)
327                 newSequences->push_back(sequence);
328             else
329                 ERRORMSG("The sequence must be a protein");
330         }
331     }
332 }
333 
ReadSequencesFromFile(SequenceList * newSequences,StructureSet * sSet) const334 void UpdateViewer::ReadSequencesFromFile(SequenceList *newSequences, StructureSet *sSet) const
335 {
336     newSequences->clear();
337 
338     wxString fastaFile = wxFileSelector("Choose a FASTA file from which to import",
339         "", "", "", "*.*", wxFD_OPEN | wxFD_FILE_MUST_EXIST, *viewerWindow);
340     if (fastaFile.size() > 0) {
341 
342         // get Seq-entry of all sequences in the file
343         CNcbiIfstream ifs(WX_TO_STD(fastaFile).c_str(), IOS_BASE::in);
344         if (!ifs) {
345             ERRORMSG("Unable to open file " << fastaFile.c_str());
346             return;
347         }
348         CStreamLineReader slr(ifs);
349         CFastaReader reader(slr, CFastaReader::fAssumeProt);
350         CRef < CSeq_entry > se = reader.ReadSet();
351 //        string err;
352 //        WriteASNToFile("Seq-entry.txt", *se, false, &err);
353 
354         // create Sequences from each one
355         if (se->IsSeq()) {
356             const Sequence *sequence = sSet->FindOrCreateSequence(se->SetSeq());
357             if (sequence)
358                  newSequences->push_back(sequence);
359         } else if (se->IsSet() && se->GetSet().GetSeq_set().size() > 0)  {
360             CBioseq_set::TSeq_set::iterator b, be = se->SetSet().SetSeq_set().end();
361             for (b=se->SetSet().SetSeq_set().begin(); b!=be; ++b) {
362                 if (!(*b)->IsSeq()) {
363                     WARNINGMSG("CFastaReader returned nested Bioseq-set");
364                 } else {
365                     const Sequence *sequence = sSet->FindOrCreateSequence((*b)->SetSeq());
366                     if (sequence)
367                          newSequences->push_back(sequence);
368                 }
369             }
370         } else {
371             WARNINGMSG("CFastaReader returned unknown format or no Bioseqs");
372         }
373     }
374 }
375 
MakeEmptyAlignment(const Sequence * master,const Sequence * dependent)376 static BlockMultipleAlignment * MakeEmptyAlignment(const Sequence *master, const Sequence *dependent)
377 {
378     BlockMultipleAlignment::SequenceList *seqs = new BlockMultipleAlignment::SequenceList(2);
379     (*seqs)[0] = master;
380     (*seqs)[1] = dependent;
381     BlockMultipleAlignment *newAlignment =
382         new BlockMultipleAlignment(seqs, master->parentSet->alignmentManager);
383     if (!newAlignment->AddUnalignedBlocks() || !newAlignment->UpdateBlockMapAndColors(false)) {
384         ERRORMSG("MakeEmptyAlignment() - error finalizing alignment");
385         delete newAlignment;
386         return NULL;
387     }
388     return newAlignment;
389 }
390 
MakeEmptyAlignments(const SequenceList & newSequences,const Sequence * master,AlignmentList * newAlignments) const391 void UpdateViewer::MakeEmptyAlignments(const SequenceList& newSequences,
392     const Sequence *master, AlignmentList *newAlignments) const
393 {
394     newAlignments->clear();
395     SequenceList::const_iterator s, se = newSequences.end();
396     for (s=newSequences.begin(); s!=se; ++s) {
397         BlockMultipleAlignment *newAlignment = MakeEmptyAlignment(master, *s);
398         if (newAlignment) newAlignments->push_back(newAlignment);
399     }
400 }
401 
FetchSequences(StructureSet * sSet,SequenceList * newSequences) const402 void UpdateViewer::FetchSequences(StructureSet *sSet, SequenceList *newSequences) const
403 {
404     // choose import type
405     static const wxString choiceStrings[] = { "Network via GI/Accession", "From a FASTA File" };
406     enum choiceValues { FROM_GI=0, FROM_FASTA, N_CHOICES };
407     int importFrom = wxGetSingleChoiceIndex("From what source would you like to import sequences?",
408         "Select Import Source", N_CHOICES, choiceStrings, *viewerWindow);
409     if (importFrom < 0) return;     // cancelled
410 
411     // network import
412     if (importFrom == FROM_GI)
413         FetchSequencesViaHTTP(newSequences, sSet);
414 
415     // FASTA import
416     else if (importFrom == FROM_FASTA)
417         ReadSequencesFromFile(newSequences, sSet);
418 }
419 
ImportSequences(void)420 void UpdateViewer::ImportSequences(void)
421 {
422     // determine the master sequence for new alignments
423     const Sequence *master = GetMasterSequence();
424     if (!master) return;
425 
426     // import the new sequence(s)
427     SequenceList newSequences;
428     FetchSequences(master->parentSet, &newSequences);
429     if (newSequences.size() == 0) {
430         WARNINGMSG("UpdateViewer::ImportSequence() - no sequences were imported");
431         return;
432     }
433 
434     // create null-alignments for each sequence
435     AlignmentList newAlignments;
436     MakeEmptyAlignments(newSequences, master, &newAlignments);
437 
438     // add new alignments to update list
439     if (newAlignments.size() > 0)
440         AddAlignments(newAlignments);
441     else
442         ERRORMSG("UpdateViewer::ImportSequence() - no new alignments were created");
443 }
444 
GetVASTAlignments(const SequenceList & newSequences,const Sequence * master,AlignmentList * newAlignments,PendingStructureAlignments * structureAlignments,unsigned int masterFrom,unsigned int masterTo) const445 void UpdateViewer::GetVASTAlignments(const SequenceList& newSequences,
446     const Sequence *master, AlignmentList *newAlignments,
447     PendingStructureAlignments *structureAlignments,
448     unsigned int masterFrom, unsigned int masterTo) const
449 {
450     if (master->identifier->pdbID.size() == 0) {
451         WARNINGMSG("UpdateViewer::GetVASTAlignments() - "
452             "can't be called with non-MMDB master " << master->identifier->ToString());
453         return;
454     }
455 
456     SequenceList::const_iterator s, se = newSequences.end();
457     for (s=newSequences.begin(); s!=se; ++s) {
458         if ((*s)->identifier->pdbID.size() == 0) {
459             WARNINGMSG("UpdateViewer::GetVASTAlignments() - "
460                 "can't be called with non-MMDB dependent " << (*s)->identifier->ToString());
461             BlockMultipleAlignment *newAlignment = MakeEmptyAlignment(master, *s);
462             if (newAlignment) newAlignments->push_back(newAlignment);
463             continue;
464         }
465 
466         // set up URL
467         string
468             host = "www.ncbi.nlm.nih.gov",
469             path = "/Structure/VA/vastalign.cgi", err;
470         CNcbiOstrstream argstr;
471         argstr << "main=" << master->identifier->ToString()
472             << "&neighbor=" << (*s)->identifier->ToString();
473         if (masterFrom <= masterTo && masterFrom < master->Length() && masterTo < master->Length())
474             argstr << "&from=" << (masterFrom+1) << "&to=" << (masterTo+1); // URL #'s are 1-based
475         string args((string) CNcbiOstrstreamToString(argstr));
476 
477         // connect to vastalign.cgi
478         CBiostruc_annot_set structureAlignment;
479         INFOMSG("trying to load VAST alignment data from " << host << path << '?' << args);
480 
481         if (!GetAsnDataViaHTTPS(host, path, args, &structureAlignment, &err)) {
482                 ERRORMSG("Error calling vastalign.cgi: " << err);
483             BlockMultipleAlignment *newAlignment = MakeEmptyAlignment(master, *s);
484             if (newAlignment) newAlignments->push_back(newAlignment);
485             continue;
486         }
487         INFOMSG("successfully loaded data from vastalign.cgi");
488 #ifdef _DEBUG
489         WriteASNToFile("vastalign.dat.txt", structureAlignment, false, &err);
490 #endif
491 
492         // create initially empty alignment
493         BlockMultipleAlignment::SequenceList *seqs = new BlockMultipleAlignment::SequenceList(2);
494         (*seqs)[0] = master;
495         (*seqs)[1] = *s;
496         BlockMultipleAlignment *newAlignment =
497             new BlockMultipleAlignment(seqs, master->parentSet->alignmentManager);
498 
499         // skip if no VAST alignment found
500         if (structureAlignment.IsSetId() && structureAlignment.GetId().front()->IsMmdb_id() &&
501             structureAlignment.GetId().front()->GetMmdb_id().Get() == 0)
502         {
503             WARNINGMSG("VAST found no alignment for these chains");
504         }
505 
506         else try {
507 
508             if (!structureAlignment.IsSetId() || !structureAlignment.GetId().front()->IsMmdb_id() ||
509                 structureAlignment.GetFeatures().size() != 1 ||
510                 structureAlignment.GetFeatures().front()->GetFeatures().size() != 1 ||
511                 !structureAlignment.GetFeatures().front()->GetFeatures().front()->IsSetLocation() ||
512                 !structureAlignment.GetFeatures().front()->GetFeatures().front()->GetLocation().IsAlignment())
513             {
514                 throw "VAST data does not contain exactly one alignment of recognized format - "
515                     "possibly a problem with vastalign.cgi";
516             }
517 
518             if (structureAlignment.GetId().front()->GetMmdb_id().Get() != master->identifier->mmdbID)
519                 throw "Master structure MMDB ID mismatch - check to see if this structure been updated";
520 
521             // load in alignment, check format
522             const CChem_graph_alignment& alignment =
523                 structureAlignment.GetFeatures().front()->GetFeatures().front()->GetLocation().GetAlignment();
524             if (alignment.GetDimension() != 2 || alignment.GetAlignment().size() != 2 ||
525                 alignment.GetBiostruc_ids().size() != 2 ||
526                 !alignment.GetBiostruc_ids().front()->IsMmdb_id() ||
527                 alignment.GetBiostruc_ids().front()->GetMmdb_id().Get() != master->identifier->mmdbID ||
528                 !alignment.GetBiostruc_ids().back()->IsMmdb_id() ||
529                 alignment.GetBiostruc_ids().back()->GetMmdb_id().Get() != (*s)->identifier->mmdbID ||
530                 !alignment.GetAlignment().front()->IsResidues() ||
531                 !alignment.GetAlignment().front()->GetResidues().IsInterval() ||
532                 !alignment.GetAlignment().back()->IsResidues() ||
533                 !alignment.GetAlignment().back()->GetResidues().IsInterval() ||
534                 alignment.GetAlignment().front()->GetResidues().GetInterval().size() !=
535                     alignment.GetAlignment().back()->GetResidues().GetInterval().size())
536             {
537                 throw "Unrecognized VAST data format";
538             }
539 
540             // construct alignment from residue intervals
541             CResidue_pntrs::TInterval::const_iterator i, j,
542                 ie = alignment.GetAlignment().front()->GetResidues().GetInterval().end();
543             for (i=alignment.GetAlignment().front()->GetResidues().GetInterval().begin(),
544                  j=alignment.GetAlignment().back()->GetResidues().GetInterval().begin(); i!=ie; ++i, ++j)
545             {
546                 if ((*i)->GetMolecule_id().Get() != master->identifier->moleculeID ||
547                     (*j)->GetMolecule_id().Get() != (*s)->identifier->moleculeID)
548                 {
549                     throw "Mismatch in molecule ids in alignment interval block";
550                 }
551                 UngappedAlignedBlock *newBlock = new UngappedAlignedBlock(newAlignment);
552                 newBlock->SetRangeOfRow(0, (*i)->GetFrom().Get() - 1, (*i)->GetTo().Get() - 1);
553                 newBlock->SetRangeOfRow(1, (*j)->GetFrom().Get() - 1, (*j)->GetTo().Get() - 1);
554                 newBlock->width = (*i)->GetTo().Get() - (*i)->GetFrom().Get() + 1;
555                 newAlignment->AddAlignedBlockAtEnd(newBlock);
556             }
557 
558             // add structure alignment to list
559             if (alignment.GetTransform().size() == 1)
560             {
561                 structureAlignments->resize(structureAlignments->size() + 1);
562                 structureAlignments->back().structureAlignment =
563                     structureAlignment.SetFeatures().front()->SetFeatures().front();
564                 structureAlignments->back().masterDomainID =
565                     structureAlignment.GetFeatures().front()->GetId().Get();
566                 structureAlignments->back().dependentDomainID =
567                     structureAlignment.GetFeatures().front()->GetFeatures().front()->GetId().Get();
568             } else
569                 throw "No structure alignment in VAST data blob";
570 
571         } catch (const char *err) {
572             ERRORMSG("Failed to import VAST alignment: " << err);
573         }
574 
575         // finalize alignment
576         if (!newAlignment->AddUnalignedBlocks() || !newAlignment->UpdateBlockMapAndColors(false)) {
577             ERRORMSG("MakeEmptyAlignment() - error finalizing alignment");
578             delete newAlignment;
579             continue;
580         }
581         newAlignments->push_back(newAlignment);
582     }
583 }
584 
ImportStructure(void)585 void UpdateViewer::ImportStructure(void)
586 {
587     // determine the master sequence for new alignments
588     const Sequence *master = GetMasterSequence();
589     if (!master) return;
590     if (master->parentSet->objects.size() == 1 && master->parentSet->frameMap.size() != 1) {
591         ERRORMSG("Can't import another structure when current structure has multiple models");
592         return;
593     }
594 
595     // choose import type for structure
596     static const wxString choiceStrings[] = { "Via Network", "From a File" };
597     enum choiceValues { FROM_NETWORK=0, FROM_FILE, N_CHOICES };
598     int importFrom = wxGetSingleChoiceIndex(
599         "From what source would you like to import the structure?", "Select Import Source",
600         N_CHOICES, choiceStrings, *viewerWindow);
601     if (importFrom < 0) return;     // cancelled
602 
603     CRef < CBiostruc > biostruc;
604     BioseqRefList bioseqs;
605 
606     if (importFrom == FROM_NETWORK) {
607         wxString id = wxGetTextFromUser("Enter a PDB or MMDB ID:", "Input Identifier", "", *viewerWindow);
608         biostruc.Reset(new CBiostruc());
609         if (!LoadStructureViaCache(WX_TO_STD(id),
610                 (master->parentSet->isAlphaOnly ? eModel_type_ncbi_backbone : eModel_type_ncbi_all_atom),
611                 0,
612                 biostruc, &bioseqs)) {
613             ERRORMSG("Failed to load structure " << id.c_str());
614             return;
615         }
616     }
617 
618     else if (importFrom == FROM_FILE) {
619         string filename = WX_TO_STD(wxFileSelector("Choose a single-structure file:",
620             GetUserDir().c_str(), "", "", "*.*", wxFD_OPEN | wxFD_FILE_MUST_EXIST, *viewerWindow));
621         if (filename.size() == 0) return;
622         bool readOK = false;
623         string err;
624         TRACEMSG("trying to read file '" << filename << "' as binary mime");
625         CRef < CNcbi_mime_asn1 > mime(new CNcbi_mime_asn1());
626         SetDiagPostLevel(eDiag_Fatal); // ignore all but Fatal errors while reading data
627         readOK = ReadASNFromFile(filename.c_str(), mime.GetPointer(), true, &err);
628         SetDiagPostLevel(eDiag_Info);
629         if (!readOK) {
630             TRACEMSG("error: " << err);
631             TRACEMSG("trying to read file '" << filename << "' as ascii mime");
632             mime.Reset(new CNcbi_mime_asn1());
633             SetDiagPostLevel(eDiag_Fatal); // ignore all but Fatal errors while reading data
634             readOK = ReadASNFromFile(filename.c_str(), mime.GetPointer(), false, &err);
635             SetDiagPostLevel(eDiag_Info);
636         }
637         if (!readOK) {
638             TRACEMSG("error: " << err);
639             ERRORMSG("Couldn't read structure from " << filename);
640             return;
641         }
642 
643         ExtractBiostrucAndBioseqs(*mime, biostruc, &bioseqs);
644     }
645 
646     int mmdbID;
647     if (biostruc->GetId().size() == 0 || !biostruc->GetId().front()->IsMmdb_id()) {
648         ERRORMSG("Can't get MMDB ID from loaded structure");
649         return;
650     }
651     mmdbID = biostruc->GetId().front()->GetMmdb_id().Get();
652 
653     // make sure Biostruc is of correct type
654     if (master->molecule) {
655         if (biostruc->GetModel().size() != 1 ||
656             (master->parentSet->isAlphaOnly &&
657                 biostruc->GetModel().front()->GetType() != eModel_type_ncbi_backbone) ||
658             (!master->parentSet->isAlphaOnly &&
659                 biostruc->GetModel().front()->GetType() != eModel_type_ncbi_all_atom)) {
660             ERRORMSG("Biostruc does not match current data - should be "
661                 << (master->parentSet->isAlphaOnly ? "alpha-only" : "one-coordinate-per-atom") << " model");
662             return;
663         }
664     }
665 
666     // make list of protein chains in this structure
667     vector < pair < const CSeq_id * , string > > chains;  // holds Seq-id and chain name
668     map < const CSeq_id * , int > moleculeIDs;          // maps Seq-id -> molecule ID within MMDB object
669     CBiostruc_graph::TDescr::const_iterator d, de;
670     CBiostruc_graph::TMolecule_graphs::const_iterator
671         m, me = biostruc->GetChemical_graph().GetMolecule_graphs().end();
672     for (m=biostruc->GetChemical_graph().GetMolecule_graphs().begin(); m!=me; ++m) {
673         bool isProtein = false;
674         const CSeq_id *sid = NULL;
675         char name = 0;
676         string full_name = "";
677 
678         // check descr for chain name/type
679         de = (*m)->GetDescr().end();
680         for (d=(*m)->GetDescr().begin(); d!=de; ++d) {
681             if ((*d)->IsName()) {
682                 full_name = (*d)->GetName();  // long chain-id
683                 name = (*d)->GetName()[0];  // this only works for single-char chain-ids
684             }
685             else if ((*d)->IsMolecule_type() &&
686                      (*d)->GetMolecule_type() == CBiomol_descr::eMolecule_type_protein)
687                 isProtein = true;
688             if (isProtein && (full_name != "")) break;
689         }
690 
691         // get gi
692         if ((*m)->IsSetSeq_id())
693             sid = &((*m)->GetSeq_id());
694 
695         // add protein to list
696         if (isProtein && (full_name != "" ) && sid != NULL) {
697             moleculeIDs[sid] = (*m)->GetId().Get();
698             chains.push_back(make_pair(sid, full_name));
699         }
700     }
701     if (chains.size() == 0) {
702         ERRORMSG("No protein chains found in this structure!");
703         return;
704     }
705 
706     // get protein name (PDB identifier)
707     string pdbID;
708     CBiostruc::TDescr::const_iterator n, ne = biostruc->GetDescr().end();
709     for (n=biostruc->GetDescr().begin(); n!=ne; ++n) {
710         if ((*n)->IsName()) {
711             pdbID = (*n)->GetName();    // assume first 'name' is pdb id
712             break;
713         }
714     }
715 
716     // which chains to align?
717     vector < const CSeq_id * > sids;
718     vector < string > sids_chain_ids;
719     if (chains.size() == 1) {
720         sids.push_back(chains[0].first);
721         sids_chain_ids.push_back(chains[0].second);
722     } else {
723         wxString *choices = new wxString[chains.size()];
724         int choice;
725         for (choice=0; choice<(int)chains.size(); ++choice)
726             choices[choice].Printf("%s_%s %s",
727                 pdbID.c_str(), chains[choice].second, chains[choice].first->GetSeqIdString().c_str());
728         wxArrayInt selections;
729 //        selections.Add(0);    // select first by default
730         int nsel = wxGetSelectedChoices(selections, "Which chain(s) do you want to align?",
731             "Select Chain", chains.size(), choices, *viewerWindow);
732         if (nsel == 0) return;
733         for (choice = 0; choice < nsel; ++choice) {
734             sids.push_back(chains[selections[choice]].first);               // chains[i].first: pdb-id
735             sids_chain_ids.push_back(chains[selections[choice]].second);    // chains[i].second: chain-id
736         }
737     }
738 
739     SequenceList newSequences;
740     SequenceSet::SequenceList::const_iterator s, se = master->parentSet->sequenceSet->sequences.end();
741     map < const Sequence * , const CSeq_id * > seq2id;
742     for (unsigned int j=0; j<sids.size(); ++j) {
743 
744         // first check to see if this sequence is already present
745         for (s=master->parentSet->sequenceSet->sequences.begin(); s!=se; ++s) {
746             if ((*s)->identifier->MatchesSeqId(*(sids[j]))) {
747                 TRACEMSG("using existing sequence for " << sids[j]->GetSeqIdString());
748                 newSequences.push_back(*s);
749                 seq2id[*s] = sids[j];
750                 break;
751             }
752         }
753 
754         if (s == se) {
755             // if not, find the sequence in the list from the structure file
756             BioseqRefList::iterator b, be = bioseqs.end();
757             for (b=bioseqs.begin(); b!=be; ++b) {
758                 CBioseq::TId::const_iterator i, ie = (*b)->GetId().end();
759                 for (i=(*b)->GetId().begin(); i!=ie; ++i) {
760                     if ((*i)->Match(*(sids[j])))
761                         break;
762                 }
763                 if (i != ie) {
764 
765                     //=======================================================================================================
766                     // This is for long chain-id testing. This is section 1.
767                     // Get the gi of the (**b) bioseq to use later.
768                     // This is so we can find this bioseq later.
769                     //=======================================================================================================
770                     CBioseq::TId IDs = (*b)->GetId();
771                     CBioseq::TId::const_iterator i;
772                     TGi bioseq_GI;
773                     for (i = IDs.begin(); i != IDs.end(); ++i) {
774                         if ((*i)->IsGi()) {
775                             bioseq_GI = (*i)->GetGi();
776                         }
777                     }
778                     //=======================================================================================================
779                     // end of long chain-id testing section 1.
780                     //=======================================================================================================
781 
782                     const Sequence *sequence = master->parentSet->FindOrCreateSequence(**b);
783                     if (sequence) {
784 
785                         //=======================================================================================================
786                         // This is for long chain-id testing. This is section 2.
787                         // Some of our test data has biostrucs with long chain-ids and bioseqs with single-char chain-ids.
788                         // Replace the single-char chain-id in this sequence (from the bioseqs)
789                         // with a long chain-id from sids (from the biostruc) when a matching gi is found
790                         //=======================================================================================================
791                         // iterate through sids (these come from the biostruc)
792                         for (unsigned int j=0; j<sids.size(); ++j) {
793                             // if a matching gi is found
794                             TGi sequence_GI = sequence->identifier->gi;
795                             TGi sid_GI = sids[j]->GetGi();
796                             if (bioseq_GI == sid_GI) {
797                                 // replace the pdb-id in the newSequence with the pdb-id from the biostruc
798                                 (const_cast<MoleculeIdentifier*>(sequence->identifier))->pdbID = pdbID;
799                                 (const_cast<MoleculeIdentifier*>(sequence->identifier))->pdbChain = sids_chain_ids[j];
800                                 // check to see if they match
801                                 string test1 = sequence->identifier->ToString();
802                                 string test2 = sids[j]->GetSeqIdString();
803                             }
804                         }
805                         //=======================================================================================================
806                         // end of long chain-id testing section 2.
807                         //=======================================================================================================
808 
809                         TRACEMSG("found Bioseq for " << sids[j]->GetSeqIdString());
810 
811                         newSequences.push_back(sequence);
812                         seq2id[sequence] = sids[j];
813                         break;
814                     }
815                 }
816             }
817             if (b == be) {
818                 ERRORMSG("ImportStructure() - can't find " << sids[j]->GetSeqIdString() << " in bioseq list!");
819 //                return;
820             }
821         }
822     }
823 
824     SequenceList::const_iterator w, we = newSequences.end();
825     for (w=newSequences.begin(); w!=we; ++w) {
826 
827         // add MMDB id tag to Bioseq if not present already
828         (*w)->AddMMDBAnnotTag(mmdbID);
829 
830         // add MMDB and molecule id to identifier if not already set
831         if ((*w)->identifier->mmdbID == MoleculeIdentifier::VALUE_NOT_SET) {
832             (const_cast<MoleculeIdentifier*>((*w)->identifier))->mmdbID = mmdbID;
833         } else {
834             if ((*w)->identifier->mmdbID != mmdbID)
835                 ERRORMSG("MMDB ID mismatch in sequence " << (*w)->identifier->ToString()
836                     << "; " << (*w)->identifier->mmdbID << " vs " << mmdbID);
837         }
838         if (moleculeIDs.find(seq2id[*w]) != moleculeIDs.end()) {
839             if ((*w)->identifier->moleculeID == MoleculeIdentifier::VALUE_NOT_SET) {
840                 (const_cast<MoleculeIdentifier*>((*w)->identifier))->moleculeID =
841                     moleculeIDs[seq2id[*w]];
842             } else {
843                 if ((*w)->identifier->moleculeID != moleculeIDs[seq2id[*w]])
844                     ERRORMSG("Molecule ID mismatch in sequence " << (*w)->identifier->ToString());
845             }
846         } else
847             ERRORMSG("No matching id for MMDB sequence " << (*w)->identifier->ToString());
848     }
849 
850     // create VAST alignments (or null-alignments if master is unstructured)
851     AlignmentList newAlignments;
852     if (master->molecule) {
853         int masterFrom = -1, masterTo = -1;
854         const BlockMultipleAlignment *multiple = alignmentManager->GetCurrentMultipleAlignment();
855         if (multiple) {
856             BlockMultipleAlignment::UngappedAlignedBlockList aBlocks;
857             multiple->GetUngappedAlignedBlocks(&aBlocks);
858             if (aBlocks.size() > 0) {
859                 masterFrom = aBlocks.front()->GetRangeOfRow(0)->from;
860                 masterTo = aBlocks.back()->GetRangeOfRow(0)->to;
861             }
862         }
863         GetVASTAlignments(newSequences, master, &newAlignments,
864             &pendingStructureAlignments, masterFrom, masterTo);
865     } else {
866         SequenceList::const_iterator s, se = newSequences.end();
867         for (s=newSequences.begin(); s!=se; ++s) {
868             // create initially empty alignment
869             BlockMultipleAlignment::SequenceList *seqs = new BlockMultipleAlignment::SequenceList(2);
870             (*seqs)[0] = master;
871             (*seqs)[1] = *s;
872             BlockMultipleAlignment *newAlignment =
873                 new BlockMultipleAlignment(seqs, master->parentSet->alignmentManager);
874             // finalize alignment
875             if (!newAlignment->AddUnalignedBlocks() || !newAlignment->UpdateBlockMapAndColors(false)) {
876                 ERRORMSG("ImportStructure() - error finalizing alignment");
877                 delete newAlignment;
878                 continue;
879             }
880             newAlignments.push_back(newAlignment);
881         }
882     }
883 
884     // add new alignment to update list
885     if (newAlignments.size() == newSequences.size())
886         AddAlignments(newAlignments);
887     else {
888         ERRORMSG("UpdateViewer::ImportStructure() - no new alignments were created");
889         DELETE_ALL_AND_CLEAR(newAlignments, AlignmentList);
890         return;
891     }
892 
893     // will add Biostruc and structure alignments to ASN data later on, after merge
894     if (master->molecule) {
895         pendingStructures.push_back(biostruc);
896         wxMessageBox("The structure has been successfully imported! However, it will not appear until you\n"
897             "save this data to a file and then re-load it in a new session. And depending on the type\n"
898             "of data, it still might not appear unless the corresponding new pairwise alignment has\n"
899             "been merged into the multiple alignment.",
900             "Structure Added", wxOK | wxICON_INFORMATION, *viewerWindow);
901     } else {
902         // sort of assumes alignment with non-structured master is CDD, or uses cache mechanism...
903         wxMessageBox("The selected sequence from this structure has been imported! However, the structure\n"
904             "itself can not appear in the structure window until the sequence has been merged into the\n"
905             "multiple alignment and the alignment has been remastered to a structured sequence.",
906             "Sequence Added", wxOK | wxICON_INFORMATION, *viewerWindow);
907     }
908 }
909 
SavePendingStructures(void)910 void UpdateViewer::SavePendingStructures(void)
911 {
912     TRACEMSG("saving pending imported structures and structure alignments");
913     StructureSet *sSet =
914         (alignmentManager->GetCurrentMultipleAlignment() &&
915          alignmentManager->GetCurrentMultipleAlignment()->GetMaster()) ?
916          alignmentManager->GetCurrentMultipleAlignment()->GetMaster()->parentSet : NULL;
917     if (!sSet) return;
918     while (pendingStructures.size() > 0) {
919         if (!sSet->AddBiostrucToASN(pendingStructures.front().GetPointer())) {
920             ERRORMSG("UpdateViewer::SavePendingStructures() - error saving Biostruc");
921             return;
922         }
923         pendingStructures.pop_front();
924     }
925     while (pendingStructureAlignments.size() > 0) {
926         sSet->AddStructureAlignment(pendingStructureAlignments.front().structureAlignment.GetPointer(),
927             pendingStructureAlignments.front().masterDomainID,
928             pendingStructureAlignments.front().dependentDomainID);
929         pendingStructureAlignments.pop_front();
930     }
931 }
932 
BlastUpdate(BlockMultipleAlignment * alignment,bool usePSSMFromMultiple)933 void UpdateViewer::BlastUpdate(BlockMultipleAlignment *alignment, bool usePSSMFromMultiple)
934 {
935     const BlockMultipleAlignment *multipleForPSSM = alignmentManager->GetCurrentMultipleAlignment();
936     if (usePSSMFromMultiple && !multipleForPSSM) {
937         ERRORMSG("Can't do BLAST/PSSM when no multiple alignment is present");
938         return;
939     }
940 
941     // find alignment, and replace it with BLAST result
942     AlignmentList::iterator a, ae = GetCurrentAlignments().end();
943     for (a=GetCurrentAlignments().begin(); a!=ae; ++a) {
944         if (*a != alignment) continue;
945 
946         // run BLAST between master and first dependent (should be only one dependent...)
947         BLASTer::AlignmentList toRealign;
948         toRealign.push_back(alignment);
949         BLASTer::AlignmentList newAlignments;
950         alignmentManager->blaster->CreateNewPairwiseAlignmentsByBlast(
951             multipleForPSSM, toRealign, &newAlignments, usePSSMFromMultiple);
952         if (newAlignments.size() != 1) {
953             ERRORMSG("UpdateViewer::BlastUpdate() - CreateNewPairwiseAlignmentsByBlast() failed");
954             return;
955         }
956         if (newAlignments.front()->NAlignedBlocks() == 0) {
957             WARNINGMSG("alignment unchanged");
958             delete newAlignments.front();
959             return;
960         }
961 
962         // replace alignment with BLAST result
963         TRACEMSG("BLAST succeeded - replacing alignment");
964         delete alignment;
965         *a = newAlignments.front();
966         break;
967     }
968 
969     // recreate alignment display with new alignment
970     AlignmentList copy = GetCurrentAlignments();
971     GetCurrentAlignments().clear();
972     GetCurrentDisplay()->Empty();
973     AddAlignments(copy);
974 //    (*viewerWindow)->ScrollToColumn(GetCurrentDisplay()->GetStartingColumn());
975 }
976 
MapDependentToMaster(const BlockMultipleAlignment * alignment,int dependentRow,vector<int> * dependent2master)977 static void MapDependentToMaster(const BlockMultipleAlignment *alignment,
978     int dependentRow, vector < int > *dependent2master)
979 {
980     dependent2master->clear();
981     dependent2master->resize(alignment->GetSequenceOfRow(dependentRow)->Length(), -1);
982     BlockMultipleAlignment::UngappedAlignedBlockList uaBlocks;
983     alignment->GetUngappedAlignedBlocks(&uaBlocks);
984     BlockMultipleAlignment::UngappedAlignedBlockList::const_iterator b, be = uaBlocks.end();
985     for (b=uaBlocks.begin(); b!=be; ++b) {
986         const Block::Range
987             *masterRange = (*b)->GetRangeOfRow(0),
988             *dependentRange = (*b)->GetRangeOfRow(dependentRow);
989         for (unsigned int i=0; i<(*b)->width; ++i)
990             (*dependent2master)[dependentRange->from + i] = masterRange->from + i;
991     }
992 }
993 
GetAlignmentByBestNeighbor(const BlockMultipleAlignment * multiple,const BLASTer::AlignmentList rowAlignments)994 static BlockMultipleAlignment * GetAlignmentByBestNeighbor(
995     const BlockMultipleAlignment *multiple,
996     const BLASTer::AlignmentList rowAlignments)
997 {
998     if (rowAlignments.size() != multiple->NRows()) {
999         ERRORMSG("GetAlignmentByBestNeighbor: wrong # alignments");
1000         return NULL;
1001     }
1002 
1003     // find best-scoring aligment above some threshold; assumes E-value is in RowDouble
1004     const BlockMultipleAlignment *bestMatchFromMultiple = NULL;
1005     unsigned int b, bestRow = 0;
1006     BLASTer::AlignmentList::const_iterator p, pe = rowAlignments.end();
1007     for (b=0, p=rowAlignments.begin(); p!=pe; ++b, ++p) {
1008         if (!bestMatchFromMultiple || (*p)->GetRowDouble(0) < bestMatchFromMultiple->GetRowDouble(0)) {
1009             bestMatchFromMultiple = *p;
1010             bestRow = b;
1011         }
1012     }
1013     if (!bestMatchFromMultiple || bestMatchFromMultiple->GetRowDouble(0) > 0.000001) {
1014         WARNINGMSG("GetAlignmentByBestNeighbor: no significant hit found");
1015         return NULL;
1016     }
1017     INFOMSG("Closest neighbor from multiple: sequence "
1018         << bestMatchFromMultiple->GetMaster()->identifier->ToString()
1019         << ", E-value: " << bestMatchFromMultiple->GetRowDouble(0));
1020     GlobalMessenger()->RemoveAllHighlights(true);
1021     GlobalMessenger()->AddHighlights(
1022         bestMatchFromMultiple->GetMaster(), 0, bestMatchFromMultiple->GetMaster()->Length()-1);
1023 
1024     // if the best match is the multiple's master, then just use that alignment
1025     if (bestRow == 0) return bestMatchFromMultiple->Clone();
1026 
1027     // otherwise, use best match as a guide alignment to align the dependent with the multiple's master
1028     vector < int > import2dependent, dependent2master;
1029     MapDependentToMaster(bestMatchFromMultiple, 1, &import2dependent);
1030     MapDependentToMaster(multiple, bestRow, &dependent2master);
1031 
1032     const Sequence *importSeq = bestMatchFromMultiple->GetSequenceOfRow(1);
1033     BlockMultipleAlignment::SequenceList *seqs = new BlockMultipleAlignment::SequenceList(2);
1034     (*seqs)[0] = multiple->GetSequenceOfRow(0);
1035     (*seqs)[1] = importSeq;
1036     BlockMultipleAlignment *newAlignment =
1037         new BlockMultipleAlignment(seqs, importSeq->parentSet->alignmentManager);
1038 
1039     // create maximally sized blocks
1040     int masterStart=-1, importStart, importLoc, dependentLoc, masterLoc, len=0;
1041     for (importStart=-1, importLoc=0; importLoc<=(int)importSeq->Length(); ++importLoc) {
1042 
1043         // map import -> dependent -> master
1044         dependentLoc = (importLoc < (int)importSeq->Length()) ? import2dependent[importLoc] : -1;
1045         masterLoc = (dependentLoc >= 0) ? dependent2master[dependentLoc] : -1;
1046 
1047         // if we're currently inside a block..
1048         if (importStart >= 0) {
1049 
1050             // add another residue to a continuously aligned block
1051             if (masterLoc >= 0 && masterLoc-masterStart == importLoc-importStart) {
1052                 ++len;
1053             }
1054 
1055             // terminate block
1056             else {
1057                 UngappedAlignedBlock *newBlock = new UngappedAlignedBlock(newAlignment);
1058                 newBlock->SetRangeOfRow(0, masterStart, masterStart + len - 1);
1059                 newBlock->SetRangeOfRow(1, importStart, importStart + len - 1);
1060                 newBlock->width = len;
1061                 newAlignment->AddAlignedBlockAtEnd(newBlock);
1062                 importStart = -1;
1063             }
1064         }
1065 
1066         // search for start of block
1067         if (importStart < 0) {
1068             if (masterLoc >= 0) {
1069                 masterStart = masterLoc;
1070                 importStart = importLoc;
1071                 len = 1;
1072             }
1073         }
1074     }
1075 
1076     // finalize and and add new alignment to list
1077     if (!newAlignment->AddUnalignedBlocks() || !newAlignment->UpdateBlockMapAndColors(false)) {
1078         ERRORMSG("error finalizing alignment");
1079         delete newAlignment;
1080         return NULL;
1081     }
1082 
1083     return newAlignment;
1084 }
1085 
BlastNeighbor(BlockMultipleAlignment * update)1086 void UpdateViewer::BlastNeighbor(BlockMultipleAlignment *update)
1087 {
1088     const BlockMultipleAlignment *multiple = alignmentManager->GetCurrentMultipleAlignment();
1089     if (!multiple) {
1090         ERRORMSG("Can't do BLAST Neighbor when no multiple alignment is present");
1091         return;
1092     }
1093     BlockMultipleAlignment::UngappedAlignedBlockList uaBlocks;
1094     multiple->GetUngappedAlignedBlocks(&uaBlocks);
1095     if (uaBlocks.size() == 0) {
1096         ERRORMSG("Can't do BLAST Neighbor with null multiple alignment");
1097         return;
1098     }
1099     const Sequence *updateSeq = update->GetSequenceOfRow(1);
1100 
1101     // find alignment, to replace it with BLAST result
1102     AlignmentList::iterator a, ae = GetCurrentAlignments().end();
1103     for (a=GetCurrentAlignments().begin(); a!=ae; ++a)
1104         if (*a == update) break;
1105     if (a == GetCurrentAlignments().end()) return;
1106 
1107     // do BLAST-2-sequences between update dependent and each sequence from the multiple
1108     BLASTer::AlignmentList newAlignments;
1109     for (unsigned int row=0; row<multiple->NRows(); ++row) {
1110         BLASTer::AlignmentList toRealign;
1111         BlockMultipleAlignment::SequenceList *seqs = new BlockMultipleAlignment::SequenceList(2);
1112         (*seqs)[0] = multiple->GetSequenceOfRow(row);
1113         (*seqs)[1] = updateSeq;
1114         BlockMultipleAlignment *newAlignment =
1115             new BlockMultipleAlignment(seqs, updateSeq->parentSet->alignmentManager);
1116         if (newAlignment->AddUnalignedBlocks() && newAlignment->UpdateBlockMapAndColors(false))
1117         {
1118             int excess = 0;
1119             if (!RegistryGetInteger(REG_ADVANCED_SECTION, REG_FOOTPRINT_RES, &excess))
1120                 WARNINGMSG("Can't get footprint excess residues from registry");
1121             newAlignment->alignMasterFrom = uaBlocks.front()->GetRangeOfRow(row)->from - excess;
1122             if (newAlignment->alignMasterFrom < 0)
1123                 newAlignment->alignMasterFrom = 0;
1124             newAlignment->alignMasterTo = uaBlocks.back()->GetRangeOfRow(row)->to + excess;
1125             if (newAlignment->alignMasterTo >= ((int)((*seqs)[0]->Length())))
1126                 newAlignment->alignMasterTo = (*seqs)[0]->Length() - 1;
1127             newAlignment->alignDependentFrom = update->alignDependentFrom;
1128             newAlignment->alignDependentTo = update->alignDependentTo;
1129             toRealign.push_back(newAlignment);
1130         } else {
1131             ERRORMSG("error finalizing alignment");
1132             delete newAlignment;
1133         }
1134         BLASTer::AlignmentList result;
1135         SetDiagPostLevel(eDiag_Error);
1136         alignmentManager->blaster->CreateNewPairwiseAlignmentsByBlast(NULL, toRealign, &result, false);
1137         SetDiagPostLevel(eDiag_Info);
1138         DELETE_ALL_AND_CLEAR(toRealign, BLASTer::AlignmentList);
1139         if (result.size() != 1) {
1140             ERRORMSG("UpdateViewer::BlastUpdate() - CreateNewPairwiseAlignmentsByBlast() failed");
1141             return;
1142         }
1143         newAlignments.push_back(result.front());
1144     }
1145 
1146     // replace alignment with result
1147     BlockMultipleAlignment *alignmentByNeighbor = GetAlignmentByBestNeighbor(multiple, newAlignments);
1148     DELETE_ALL_AND_CLEAR(newAlignments, BLASTer::AlignmentList);
1149     if (!alignmentByNeighbor) {
1150         WARNINGMSG("alignment unchanged");
1151         return;
1152     }
1153     TRACEMSG("BLAST Neighbor succeeded - replacing alignment");
1154     delete update;
1155     *a = alignmentByNeighbor;
1156 
1157     // recreate alignment display with new alignment
1158     AlignmentList copy = GetCurrentAlignments();
1159     GetCurrentAlignments().clear();
1160     GetCurrentDisplay()->Empty();
1161     AddAlignments(copy);
1162 //    (*viewerWindow)->ScrollToColumn(GetCurrentDisplay()->GetStartingColumn());
1163 }
1164 
1165 // comparison function: if CompareRows(a, b) == true, then row a moves up
1166 typedef bool (*CompareUpdates)(BlockMultipleAlignment *a, BlockMultipleAlignment *b);
1167 
CompareUpdatesByIdentifier(BlockMultipleAlignment * a,BlockMultipleAlignment * b)1168 static bool CompareUpdatesByIdentifier(BlockMultipleAlignment *a, BlockMultipleAlignment *b)
1169 {
1170     return MoleculeIdentifier::CompareIdentifiers(
1171         a->GetSequenceOfRow(1)->identifier, // sort by first dependent row
1172         b->GetSequenceOfRow(1)->identifier);
1173 }
1174 
1175 // assumes scores are stored in the master row double
CompareUpdatesByScore(BlockMultipleAlignment * a,BlockMultipleAlignment * b)1176 static bool CompareUpdatesByScore(BlockMultipleAlignment *a, BlockMultipleAlignment *b)
1177 {
1178     return (a->GetRowDouble(0) > b->GetRowDouble(0));    // sort in descending order of score
1179 }
1180 
1181 static CompareUpdates updateComparisonFunction = NULL;
1182 
SortByIdentifier(void)1183 void UpdateViewer::SortByIdentifier(void)
1184 {
1185     TRACEMSG("sorting updates by identifier");
1186     updateComparisonFunction = CompareUpdatesByIdentifier;
1187     SortUpdates();
1188 }
1189 
SortByPSSM(void)1190 void UpdateViewer::SortByPSSM(void)
1191 {
1192     TRACEMSG("sorting updates by PSSM");
1193 
1194     const BlockMultipleAlignment *multiple = alignmentManager->GetCurrentMultipleAlignment();
1195     if (!multiple) {
1196         ERRORMSG("Can't do sort by PSSM when no multiple alignment is present");
1197         return;
1198     }
1199 
1200     AlignmentList& currentAlignments = GetCurrentAlignments();
1201     AlignmentList::const_iterator a, ae = currentAlignments.end();
1202     for (a=currentAlignments.begin(); a!=ae; ++a) {
1203 
1204         const Sequence *updateSeq = (*a)->GetSequenceOfRow(1);
1205         BlockMultipleAlignment::UngappedAlignedBlockList uaBlocks;
1206         (*a)->GetUngappedAlignedBlocks(&uaBlocks);
1207         BlockMultipleAlignment::UngappedAlignedBlockList::const_iterator b, be = uaBlocks.end();
1208 
1209         // compute scores of aligned residues vs PSSM from the multiple alignment (not the PSSM for the 2-row update alignment)
1210         int score = 0;
1211         for (b=uaBlocks.begin(); b!=be; ++b) {
1212             const Block::Range *m = (*b)->GetRangeOfRow(0), *u = (*b)->GetRangeOfRow(1);
1213             for (unsigned int i=0; i<(*b)->width; ++i)
1214                 score += multiple->GetPSSM().GetPSSMScore(
1215                     LookupNCBIStdaaNumberFromCharacter(updateSeq->sequenceString[u->from + i]),
1216                     m->from + i);
1217         }
1218 
1219         // store score in update alignment
1220         (*a)->SetRowStatusLine(0, string("Score vs. PSSM: ") + NStr::IntToString(score));
1221         (*a)->SetRowStatusLine(1, (*a)->GetRowStatusLine(0));
1222         (*a)->SetRowDouble(0, score);
1223         (*a)->SetRowDouble(1, score);
1224     }
1225 
1226     updateComparisonFunction = CompareUpdatesByScore;
1227     SortUpdates();
1228 }
1229 
SortUpdates(void)1230 void UpdateViewer::SortUpdates(void)
1231 {
1232     if (!updateComparisonFunction) {
1233         ERRORMSG("UpdateViewer::SortUpdates() - must first set comparison function");
1234         return;
1235     }
1236 
1237     // make vector of alignments
1238     AlignmentList& currentAlignments = GetCurrentAlignments();
1239     if (currentAlignments.size() < 2) return;
1240     vector < BlockMultipleAlignment * > sortedVector(currentAlignments.size());
1241     AlignmentList::const_iterator a, ae = currentAlignments.end();
1242     unsigned int i = 0;
1243     for (a=currentAlignments.begin(); a!=ae; ++a) sortedVector[i++] = *a;
1244 
1245     // sort them
1246     stable_sort(sortedVector.begin(), sortedVector.end(), updateComparisonFunction);
1247     updateComparisonFunction = NULL;
1248 
1249     // replace window contents with sorted list
1250     currentAlignments.clear();
1251     GetCurrentDisplay()->Empty();
1252 
1253     AlignmentList sortedList;
1254     for (i=0; i<sortedVector.size(); ++i) sortedList.push_back(sortedVector[i]);
1255     AddAlignments(sortedList);
1256 }
1257 
1258 END_SCOPE(Cn3D)
1259