1 /*  $Id: structure_set.cpp 591063 2019-08-09 15:27:34Z wangjiy $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors:  Paul Thiessen
27 *
28 * File Description:
29 *      Classes to hold sets of structure data
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbistd.hpp>
36 #include <corelib/ncbistre.hpp>
37 #include <corelib/ncbi_limits.h>
38 #include <corelib/ncbistl.hpp>
39 
40 #include <deque>
41 
42 #include <objects/ncbimime/Biostruc_seq.hpp>
43 #include <objects/ncbimime/Biostruc_seqs.hpp>
44 #include <objects/ncbimime/Biostruc_align.hpp>
45 #include <objects/ncbimime/Entrez_general.hpp>
46 #include <objects/mmdb1/Biostruc_id.hpp>
47 #include <objects/mmdb1/Mmdb_id.hpp>
48 #include <objects/mmdb1/Biostruc_descr.hpp>
49 #include <objects/mmdb2/Biostruc_model.hpp>
50 #include <objects/mmdb2/Model_type.hpp>
51 #include <objects/mmdb3/Biostruc_feature.hpp>
52 #include <objects/mmdb3/Biostruc_feature_id.hpp>
53 #include <objects/mmdb3/Chem_graph_alignment.hpp>
54 #include <objects/mmdb3/Transform.hpp>
55 #include <objects/mmdb3/Move.hpp>
56 #include <objects/mmdb3/Trans_matrix.hpp>
57 #include <objects/mmdb3/Rot_matrix.hpp>
58 #include <objects/mmdb3/Chem_graph_pntrs.hpp>
59 #include <objects/mmdb3/Residue_pntrs.hpp>
60 #include <objects/mmdb3/Residue_interval_pntr.hpp>
61 #include <objects/seqset/Bioseq_set.hpp>
62 #include <objects/cn3d/Cn3d_style_dictionary.hpp>
63 #include <objects/cn3d/Cn3d_user_annotations.hpp>
64 #include <objects/seqalign/Dense_diag.hpp>
65 #include <objects/seqalign/Dense_seg.hpp>
66 #include <objects/mmdb3/Biostruc_feature_set_id.hpp>
67 #include <objects/mmdb1/Molecule_id.hpp>
68 #include <objects/mmdb1/Residue_id.hpp>
69 #include <objects/cdd/Reject_id.hpp>
70 #include <objects/cdd/Update_comment.hpp>
71 
72 #include "remove_header_conflicts.hpp"
73 
74 #include "structure_set.hpp"
75 #include "data_manager.hpp"
76 #include "coord_set.hpp"
77 #include "chemical_graph.hpp"
78 #include "atom_set.hpp"
79 #include "opengl_renderer.hpp"
80 #include "show_hide_manager.hpp"
81 #include "style_manager.hpp"
82 #include "sequence_set.hpp"
83 #include "alignment_set.hpp"
84 #include "alignment_manager.hpp"
85 #include "messenger.hpp"
86 #include "block_multiple_alignment.hpp"
87 #include "cn3d_tools.hpp"
88 #include "molecule_identifier.hpp"
89 #include "cn3d_cache.hpp"
90 #include "molecule.hpp"
91 #include "residue.hpp"
92 #include "show_hide_dialog.hpp"
93 
94 USING_NCBI_SCOPE;
95 USING_SCOPE(objects);
96 
97 
98 BEGIN_SCOPE(Cn3D)
99 
100 const unsigned int
101     StructureSet::ePSSMData                   = 0x01,
102     StructureSet::eRowOrderData               = 0x02,
103     StructureSet::eAnyAlignmentData           = 0x04,
104     StructureSet::eStructureAlignmentData     = 0x08,
105     StructureSet::eSequenceData               = 0x10,
106     StructureSet::eUpdateData                 = 0x20,
107     StructureSet::eStyleData                  = 0x40,
108     StructureSet::eUserAnnotationData         = 0x80,
109     StructureSet::eCDDData                    = 0x100,
110     StructureSet::eOtherData                  = 0x200;
111 
112 const unsigned int
113     StructureSet::eSelectProtein              = 0x01,
114     StructureSet::eSelectNucleotide           = 0x02,
115     StructureSet::eSelectHeterogen            = 0x04,
116     StructureSet::eSelectSolvent              = 0x08,
117     StructureSet::eSelectOtherMoleculesOnly   = 0x10;
118 
StructureSet(CNcbi_mime_asn1 * mime,unsigned int structureLimit,OpenGLRenderer * r)119 StructureSet::StructureSet(CNcbi_mime_asn1 *mime, unsigned int structureLimit, OpenGLRenderer *r) :
120     StructureBase(NULL), renderer(r)
121 {
122     dataManager = new ASNDataManager(mime);
123     Load(structureLimit);
124 }
125 
StructureSet(CCdd * cdd,unsigned int structureLimit,OpenGLRenderer * r)126 StructureSet::StructureSet(CCdd *cdd, unsigned int structureLimit, OpenGLRenderer *r) :
127     StructureBase(NULL), renderer(r)
128 {
129     dataManager = new ASNDataManager(cdd);
130     Load(structureLimit);
131 }
132 
LoadSequencesForSingleStructure(void)133 void StructureSet::LoadSequencesForSingleStructure(void)
134 {
135     sequenceSet = new SequenceSet(this, *(dataManager->GetSequences()));
136 
137     if (objects.size() > 1)
138         ERRORMSG("LoadSequencesForSingleStructure() called, but there is > 1 structure");
139     if (objects.size() != 1) return;
140 
141     // look for biopolymer molecules
142     ChemicalGraph::MoleculeMap::const_iterator m, me = objects.front()->graph->molecules.end();
143     SequenceSet::SequenceList::const_iterator s, se = sequenceSet->sequences.end();
144     for (m=objects.front()->graph->molecules.begin(); m!=me; ++m) {
145         if (!m->second->IsProtein() && !m->second->IsNucleotide()) continue;
146 
147         // find matching sequence for each biopolymer
148         for (s=sequenceSet->sequences.begin(); s!=se; ++s) {
149             if ((*s)->molecule != NULL) continue; // skip already-matched sequences
150 
151             if (m->second->identifier == (*s)->identifier) {
152 
153                 // verify length
154                 if (m->second->residues.size() != (*s)->Length()) {
155                     ERRORMSG(
156                         "LoadSequencesForSingleStructure() - length mismatch between sequence gi "
157                         << "and matching molecule " << m->second->identifier->ToString());
158                     continue;
159                 }
160                 TRACEMSG("matched sequence " << " gi " << (*s)->identifier->gi << " with object "
161                     << objects.front()->GetPDBID() << " moleculeID " << m->second->id);
162 
163                 (const_cast<Molecule*>(m->second))->sequence = *s;
164                 (const_cast<Sequence*>(*s))->molecule = m->second;
165                 break;
166             }
167         }
168         if (s == se)
169             ERRORMSG("LoadSequencesForSingleStructure() - can't find sequence for molecule "
170                 << m->second->identifier->ToString());
171     }
172 }
173 
LoadMaster(int masterMMDBID)174 bool StructureSet::LoadMaster(int masterMMDBID)
175 {
176     if (objects.size() > 0) return false;
177     TRACEMSG("loading master " << masterMMDBID);
178 
179     if (dataManager->GetMasterStructure()) {
180         objects.push_back(new StructureObject(this, *(dataManager->GetMasterStructure()), true));
181         if (masterMMDBID != MoleculeIdentifier::VALUE_NOT_SET && objects.front()->mmdbID != masterMMDBID)
182             ERRORMSG("StructureSet::LoadMaster() - mismatched master MMDB ID");
183     } else if (masterMMDBID != MoleculeIdentifier::VALUE_NOT_SET && dataManager->GetStructureList()) {
184         ASNDataManager::BiostrucList::const_iterator b, be = dataManager->GetStructureList()->end();
185         for (b=dataManager->GetStructureList()->begin(); b!=be; ++b) {
186             if ((*b)->GetId().front()->IsMmdb_id() &&
187                 (*b)->GetId().front()->GetMmdb_id().Get() == masterMMDBID) {
188                 objects.push_back(new StructureObject(this, **b, true));
189                 usedStructures[b->GetPointer()] = true;
190                 break;
191             }
192         }
193     }
194     if (masterMMDBID != MoleculeIdentifier::VALUE_NOT_SET && objects.size() == 0) {
195         CRef < CBiostruc > biostruc;
196         if (LoadStructureViaCache(NStr::IntToString(masterMMDBID), dataManager->GetBiostrucModelType(), 0, biostruc, NULL))
197             objects.push_back(new StructureObject(this, *biostruc, true));
198     }
199     return (objects.size() > 0);
200 }
201 
MatchSequenceToMoleculeInObject(const Sequence * seq,const StructureObject * obj,const Sequence ** seqHandle)202 bool StructureSet::MatchSequenceToMoleculeInObject(const Sequence *seq,
203     const StructureObject *obj, const Sequence **seqHandle)
204 {
205     ChemicalGraph::MoleculeMap::const_iterator m, me = obj->graph->molecules.end();
206     for (m=obj->graph->molecules.begin(); m!=me; ++m) {
207         if (!(m->second->IsProtein() || m->second->IsNucleotide())) continue;
208 
209         if (m->second->identifier == seq->identifier) {
210 
211             // verify length
212             if (m->second->residues.size() != seq->Length()) {
213                 ERRORMSG(
214                     "MatchSequenceToMoleculeInObject() - length mismatch between sequence gi "
215                     << "and matching molecule " << m->second->identifier->ToString());
216                 continue;
217             }
218             TRACEMSG("matched sequence " << " gi " << seq->identifier->gi << " with object "
219                 << obj->GetPDBID() << " moleculeID " << m->second->id);
220 
221             // sanity check
222             if (m->second->sequence) {
223                 ERRORMSG("Molecule " << m->second->identifier->ToString()
224                     << " already has an associated sequence");
225                 continue;
226             }
227 
228             // automatically duplicate Sequence if it's already associated with a molecule
229             if (seq->molecule) {
230                 TRACEMSG("duplicating sequence " << seq->identifier->ToString());
231 				SequenceSet *seqSetMod = const_cast<SequenceSet*>(sequenceSet);
232                 CBioseq& bioseqMod = const_cast<CBioseq&>(seq->bioseqASN.GetObject());
233                 seq = new Sequence(seqSetMod, bioseqMod);
234                 seqSetMod->sequences.push_back(seq);
235                 // update Sequence handle, which should be a handle to a MasterDependentAlignment dependent,
236                 // so that this new Sequence* is correctly loaded into the BlockMultipleAlignment
237                 if (seqHandle) *seqHandle = seq;
238             }
239 
240             // do the cross-match
241             (const_cast<Molecule*>(m->second))->sequence = seq;
242             (const_cast<Sequence*>(seq))->molecule = m->second;
243             break;
244         }
245     }
246     return (m != me);
247 }
248 
SetStructureRowFlags(const AlignmentSet * alignmentSet,unsigned int * structureLimit,vector<bool> * dontLoadRowStructure)249 static void SetStructureRowFlags(const AlignmentSet *alignmentSet, unsigned int *structureLimit,
250     vector < bool > *dontLoadRowStructure)
251 {
252     vector < string > titles;
253     vector < unsigned int > rows;
254 
255     // find dependent rows with associated structure
256     AlignmentSet::AlignmentList::const_iterator l, le = alignmentSet->alignments.end();
257     unsigned int row;
258     for (l=alignmentSet->alignments.begin(), row=0; l!=le; ++l, ++row) {
259         if ((*l)->dependent->identifier->mmdbID != MoleculeIdentifier::VALUE_NOT_SET) {
260             titles.push_back((*l)->dependent->identifier->ToString());
261             rows.push_back(row);
262         }
263     }
264 
265     if (*structureLimit - 1 >= titles.size()) return;
266 
267     // let user select which dependents to load
268     wxString *items = new wxString[titles.size()];
269     vector < bool > itemsOn(titles.size(), false);
270     for (row=0; row<titles.size(); ++row) {
271         items[row] = titles[row].c_str();
272         if (row < *structureLimit - 1)      // by default, first N-1 are selected
273             itemsOn[row] = true;
274     }
275     ShowHideDialog dialog(items, &itemsOn, NULL, true, NULL, -1, "Choose structures:");
276 
277     if (dialog.ShowModal() == wxOK) {
278         // figure out which rows the user selected, and adjust structureLimit accordingly
279         *structureLimit = 1;     // master always visible
280         for (row=0; row<itemsOn.size(); ++row) {
281             if (itemsOn[row])
282                 (*structureLimit)++;                        // structure should be loaded
283             else
284                 (*dontLoadRowStructure)[rows[row]] = true;  // structure should not be loaded
285         }
286     }
287 
288     delete[] items;
289 }
290 
LoadAlignmentsAndStructures(unsigned int structureLimit)291 void StructureSet::LoadAlignmentsAndStructures(unsigned int structureLimit)
292 {
293     // try to determine the master structure
294     int masterMMDBID = MoleculeIdentifier::VALUE_NOT_SET;
295     // explicit master structure
296     if (dataManager->GetMasterStructure() &&
297         dataManager->GetMasterStructure()->GetId().front()->IsMmdb_id())
298         masterMMDBID = dataManager->GetMasterStructure()->GetId().front()->GetMmdb_id().Get();
299     // master of structure alignments
300     else if (dataManager->GetStructureAlignments() &&
301                 dataManager->GetStructureAlignments()->IsSetId() &&
302                 dataManager->GetStructureAlignments()->GetId().front()->IsMmdb_id())
303         masterMMDBID = dataManager->GetStructureAlignments()->GetId().front()->GetMmdb_id().Get();
304     // SPECIAL CASE: if there's no explicit master structure, but there is a structure
305     // list that contains a single structure, then assume that it is the master structure
306     else if (dataManager->GetStructureList() &&
307                 dataManager->GetStructureList()->size() == 1 &&
308                 dataManager->GetStructureList()->front()->GetId().front()->IsMmdb_id())
309         masterMMDBID = dataManager->GetStructureList()->front()->GetId().front()->GetMmdb_id().Get();
310 
311     // assume data manager has already screened the alignment list
312     ASNDataManager::SeqAnnotList *alignments = dataManager->GetSequenceAlignments();
313     typedef list < CRef < CSeq_align > > SeqAlignList;
314     const SeqAlignList& seqAligns = alignments->front()->GetData().GetAlign();
315 
316     // we need to determine the identity of the master sequence; most rigorous way is to look
317     // for a Seq-id that is present in all pairwise alignments
318     const Sequence *seq1 = NULL, *seq2 = NULL, *master = NULL;
319     bool seq1PresentInAll = true, seq2PresentInAll = true;
320 
321     // first, find sequences for first pairwise alignment
322     const CSeq_id& frontSid = seqAligns.front()->GetSegs().IsDendiag() ?
323         seqAligns.front()->GetSegs().GetDendiag().front()->GetIds().front().GetObject() :
324         seqAligns.front()->GetSegs().GetDenseg().GetIds().front().GetObject();
325     const CSeq_id& backSid = seqAligns.front()->GetSegs().IsDendiag() ?
326         seqAligns.front()->GetSegs().GetDendiag().front()->GetIds().back().GetObject() :
327         seqAligns.front()->GetSegs().GetDenseg().GetIds().back().GetObject();
328     SequenceSet::SequenceList::const_iterator s, se = sequenceSet->sequences.end();
329     for (s=sequenceSet->sequences.begin(); s!=se; ++s) {
330         if ((*s)->identifier->MatchesSeqId(frontSid)) seq1 = *s;
331         if ((*s)->identifier->MatchesSeqId(backSid)) seq2 = *s;
332         if (seq1 && seq2) break;
333     }
334     if (!(seq1 && seq2)) {
335         ERRORMSG("Can't match first pair of Seq-ids to Sequences");
336         return;
337     }
338 
339     // now, make sure one of these sequences is present in all the other pairwise alignments
340     SeqAlignList::const_iterator a = seqAligns.begin(), ae = seqAligns.end();
341     for (++a; a!=ae; ++a) {
342         const CSeq_id& frontSid2 = (*a)->GetSegs().IsDendiag() ?
343             (*a)->GetSegs().GetDendiag().front()->GetIds().front().GetObject() :
344             (*a)->GetSegs().GetDenseg().GetIds().front().GetObject();
345         const CSeq_id& backSid2 = (*a)->GetSegs().IsDendiag() ?
346             (*a)->GetSegs().GetDendiag().front()->GetIds().back().GetObject() :
347             (*a)->GetSegs().GetDenseg().GetIds().back().GetObject();
348         if (!seq1->identifier->MatchesSeqId(frontSid2) && !seq1->identifier->MatchesSeqId(backSid2))
349             seq1PresentInAll = false;
350         if (!seq2->identifier->MatchesSeqId(frontSid2) && !seq2->identifier->MatchesSeqId(backSid2))
351             seq2PresentInAll = false;
352     }
353     if (!seq1PresentInAll && !seq2PresentInAll) {
354         ERRORMSG("All pairwise sequence alignments must have a common master sequence");
355         return;
356     } else if (seq1PresentInAll && !seq2PresentInAll)
357         master = seq1;
358     else if (seq2PresentInAll && !seq1PresentInAll)
359         master = seq2;
360     else if (seq1PresentInAll && seq2PresentInAll && seq1 == seq2)
361         master = seq1;
362 
363     // if still ambiguous, see if master3d is set in CDD data
364     if (!master && dataManager->GetCDDMaster3d()) {
365         if (seq1->identifier->MatchesSeqId(*(dataManager->GetCDDMaster3d())))
366             master = seq1;
367         else if (seq2->identifier->MatchesSeqId(*(dataManager->GetCDDMaster3d())))
368             master = seq2;
369         else
370             ERRORMSG("Unable to match CDD's master3d with either sequence in first pairwise alignment");
371     }
372 
373     // if still ambiguous, try to use master structure info to find master sequence
374     if (!master && masterMMDBID != MoleculeIdentifier::VALUE_NOT_SET) {
375         // load master - has side affect of matching gi's with PDB/molecule ID during graph evaluation
376         if (structureLimit > 0)
377             LoadMaster(masterMMDBID);
378 
379         // see if there's a sequence in the master structure that matches
380         if (seq1->identifier->mmdbID != MoleculeIdentifier::VALUE_NOT_SET &&
381             seq1->identifier->mmdbID != seq2->identifier->mmdbID) {
382             if (masterMMDBID == seq1->identifier->mmdbID)
383                 master = seq1;
384             else if (masterMMDBID == seq2->identifier->mmdbID)
385                 master = seq2;
386             else {
387                 ERRORMSG("Structure master does not contain either sequence in first pairwise alignment");
388                 return;
389             }
390         }
391     }
392 
393     // if still ambiguous, just use the first one
394     if (!master) {
395         WARNINGMSG("Ambiguous master; using " << seq1->identifier->ToString());
396         master = seq1;
397     }
398 
399     TRACEMSG("determined that master sequence is " << master->identifier->ToString());
400 
401     // load alignments now that we know the identity of the master
402     alignmentSet = new AlignmentSet(this, master, *(dataManager->GetSequenceAlignments()));
403 
404     // check mmdb id's and load master if not already present (and if master has structure)
405     if (masterMMDBID == MoleculeIdentifier::VALUE_NOT_SET) {
406         masterMMDBID = master->identifier->mmdbID;
407     } else if (master->identifier->mmdbID != MoleculeIdentifier::VALUE_NOT_SET &&
408                master->identifier->mmdbID != masterMMDBID) {
409         ERRORMSG("master structure (" << masterMMDBID <<
410             ") disagrees with master sequence (apparently from " << master->identifier->mmdbID << ')');
411         return;
412     }
413     if (objects.size() == 0 && structureLimit > 0 && masterMMDBID != MoleculeIdentifier::VALUE_NOT_SET &&
414             master->identifier->GetLabel() != "consensus")   // special case for looking at "raw" CD's
415         LoadMaster(masterMMDBID);
416 
417     // cross-match master sequence and structure
418     if (objects.size() == 1 && !MatchSequenceToMoleculeInObject(master, objects.front())) {
419         ERRORMSG("MatchSequenceToMoleculeInObject() - can't find molecule in object "
420             << objects.front()->GetPDBID() << " to match master sequence "
421             << master->identifier->ToString());
422         return;
423     }
424 
425     // IFF there's a master structure, then also load dependent structures and cross-match sequences
426     if (objects.size() == 1 && structureLimit > 1) {
427         ASNDataManager::BiostrucList::const_iterator b, be;
428         if (dataManager->GetStructureList()) be = dataManager->GetStructureList()->end();
429         int row;
430         vector < bool > loadedStructureForDependentRow(alignmentSet->alignments.size(), false);
431 
432         // first, load each remaining dependent structure, and for each one, find the first dependent
433         // sequence that matches it (and that doesn't already have structure)
434         AlignmentSet::AlignmentList::const_iterator l, le = alignmentSet->alignments.end();
435         if (dataManager->GetStructureList()) {
436             for (b=dataManager->GetStructureList()->begin(); b!=be && objects.size()<structureLimit; ++b) {
437 
438                 // load structure
439                 if (usedStructures.find(b->GetPointer()) != usedStructures.end()) continue;
440                 StructureObject *object = new StructureObject(this, **b, false);
441                 objects.push_back(object);
442                 if (dataManager->GetStructureAlignments())
443                     object->SetTransformToMaster(
444                         *(dataManager->GetStructureAlignments()), master->identifier->mmdbID);
445                 usedStructures[b->GetPointer()] = true;
446 
447                 // find matching unstructured dependent sequence
448                 for (l=alignmentSet->alignments.begin(), row=0; l!=le; ++l, ++row) {
449                     if (loadedStructureForDependentRow[row]) continue;
450                     if (MatchSequenceToMoleculeInObject((*l)->dependent, object,
451                             &((const_cast<MasterDependentAlignment*>(*l))->dependent))) {
452                         loadedStructureForDependentRow[row] = true;
453                         break;
454                     }
455                 }
456                 if (l == le)
457                     ERRORMSG("Warning: Structure " << object->GetPDBID()
458                         << " doesn't have a matching dependent sequence in the multiple alignment");
459             }
460         }
461 
462         // now loop through dependent rows of the alignment; if the dependent
463         // sequence has an MMDB ID but no structure yet, then load it.
464         if (objects.size() < structureLimit && (dataManager->IsCDD() || dataManager->IsGeneralMime())) {
465 
466             // for CDD's, ask user which structures to load if structureLimit is low
467             if (dataManager->IsCDD())
468                 SetStructureRowFlags(alignmentSet, &structureLimit, &loadedStructureForDependentRow);
469 
470             for (l=alignmentSet->alignments.begin(), row=0; l!=le && objects.size()<structureLimit; ++l, ++row) {
471 
472                 if ((*l)->dependent->identifier->mmdbID != MoleculeIdentifier::VALUE_NOT_SET &&
473                     !loadedStructureForDependentRow[row]) {
474 
475                     // first check the biostruc list to see if this structure is present already
476                     CRef < CBiostruc > biostruc;
477                     if (dataManager->GetStructureList()) {
478                         for (b=dataManager->GetStructureList()->begin(); b!=be ; ++b) {
479                             if ((*b)->GetId().front()->IsMmdb_id() &&
480                                 (*b)->GetId().front()->GetMmdb_id().Get() == (*l)->dependent->identifier->mmdbID) {
481                                 biostruc = *b;
482                                 break;
483                             }
484                         }
485                     }
486 
487                     // if not in list, load Biostruc via HTTP/cache
488                     if (biostruc.Empty()) {
489                         if (!LoadStructureViaCache(NStr::IntToString((*l)->dependent->identifier->mmdbID),
490                                 dataManager->GetBiostrucModelType(), 0, biostruc, NULL)) {
491                             ERRORMSG("Failed to load MMDB #" << (*l)->dependent->identifier->mmdbID);
492                             continue;
493                         }
494                     }
495 
496                     // create StructureObject and cross-match
497                     StructureObject *object = new StructureObject(this, *biostruc, false);
498                     objects.push_back(object);
499                     if (dataManager->GetStructureAlignments())
500                         object->SetTransformToMaster(
501                             *(dataManager->GetStructureAlignments()), master->identifier->mmdbID);
502                     if (!MatchSequenceToMoleculeInObject((*l)->dependent, object,
503                             &((const_cast<MasterDependentAlignment*>(*l))->dependent)))
504                         ERRORMSG("Failed to match any molecule in structure " << object->GetPDBID()
505                             << " with sequence " << (*l)->dependent->identifier->ToString());
506                     loadedStructureForDependentRow[row] = true;
507                 }
508             }
509         }
510     }
511 }
512 
Load(unsigned int structureLimit)513 void StructureSet::Load(unsigned int structureLimit)
514 {
515     // member data initialization
516     lastAtomName = OpenGLRenderer::NO_NAME;
517     lastDisplayList = OpenGLRenderer::NO_LIST;
518     sequenceSet = NULL;
519     alignmentSet = NULL;
520     alignmentManager = NULL;
521     nDomains = 0;
522     isAlphaOnly = false;
523     parentSet = this;
524     showHideManager = new ShowHideManager();
525     styleManager = new StyleManager(this);
526     havePrevPickedAtomCoord = false;
527     hasUserStyle = false;
528 
529     // if this is a single structure, then there should be one sequence per biopolymer
530     if (dataManager->IsSingleStructure()) {
531         const CBiostruc *masterBiostruc = dataManager->GetMasterStructure();
532         if (!masterBiostruc && dataManager->GetStructureList() && dataManager->GetStructureList()->size() == 1)
533             masterBiostruc = dataManager->GetStructureList()->front().GetPointer();
534         if (masterBiostruc)
535             objects.push_back(new StructureObject(this, *masterBiostruc, true));
536         if (dataManager->GetSequences())
537             LoadSequencesForSingleStructure();
538     }
539 
540     // multiple structure: should have exactly one sequence per structure (plus unstructured sequences)
541     else {
542         if (!dataManager->GetSequences() || !dataManager->GetSequenceAlignments()) {
543             ERRORMSG("Data interpreted as multiple alignment, "
544                 "but missing sequences and/or sequence alignments");
545             return;
546         }
547         sequenceSet = new SequenceSet(this, *(dataManager->GetSequences()));
548         LoadAlignmentsAndStructures(structureLimit);
549     }
550 
551     // find center of coordinates
552     SetCenter();
553 
554     // create alignment manager
555     if (sequenceSet) {
556         if (dataManager->GetUpdates())
557             // if updates present, alignment manager will load those into update viewer
558             alignmentManager = new AlignmentManager(sequenceSet, alignmentSet, *(dataManager->GetUpdates()));
559         else
560             alignmentManager = new AlignmentManager(sequenceSet, alignmentSet);
561     }
562 
563     VerifyFrameMap();
564 
565     // load style dictionary and user annotations
566     const CCn3d_style_dictionary *styles = dataManager->GetStyleDictionary();
567     if (styles) {
568         if (!styleManager->LoadFromASNStyleDictionary(*styles) ||
569             !styleManager->CheckGlobalStyleSettings())
570             ERRORMSG("Error loading style dictionary");
571         dataManager->RemoveStyleDictionary();   // remove now; recreated with current settings upon save
572         hasUserStyle = true;
573     }
574 
575     const CCn3d_user_annotations *annots = dataManager->GetUserAnnotations();
576     if (annots) {
577         if (!styleManager->LoadFromASNUserAnnotations(*annots) ||
578             !renderer->LoadFromASNViewSettings(*annots))
579             ERRORMSG("Error loading user annotations or camera settings");
580         dataManager->RemoveUserAnnotations();   // remove now; recreated with current settings upon save
581     }
582 
583     // setup show/hide items
584     showHideManager->ConstructShowHideArray(this);
585 
586     // alignments always start with aligned domains only
587     if (alignmentSet)
588         showHideManager->ShowAlignedOrAnnotatedDomains(this);
589 
590     dataManager->SetDataUnchanged();
591 }
592 
~StructureSet(void)593 StructureSet::~StructureSet(void)
594 {
595     delete dataManager;
596     delete showHideManager;
597     delete styleManager;
598     if (alignmentManager) delete alignmentManager;
599 
600     GlobalMessenger()->RemoveAllHighlights(false);
601     MoleculeIdentifier::ClearIdentifiers();
602 }
603 
AddBiostrucToASN(ncbi::objects::CBiostruc * biostruc)604 bool StructureSet::AddBiostrucToASN(ncbi::objects::CBiostruc *biostruc)
605 {
606     bool added = dataManager->AddBiostrucToASN(biostruc);
607     if (added && objects.size() == 1)
608         InitStructureAlignments(objects.front()->mmdbID);
609     return added;
610 }
611 
612 static const int NO_DOMAIN = -1, MULTI_DOMAIN = 0;
613 
InitStructureAlignments(int masterMMDBID)614 void StructureSet::InitStructureAlignments(int masterMMDBID)
615 {
616     // create or empty the Biostruc-annot-set that will contain these alignments
617     // in the asn data, erasing any structure alignments currently stored there
618     CBiostruc_annot_set *structureAlignments = dataManager->GetStructureAlignments();
619     if (structureAlignments) {
620         structureAlignments->SetId().clear();
621         structureAlignments->SetDescr().clear();
622         structureAlignments->SetFeatures().clear();
623     } else {
624         structureAlignments = new CBiostruc_annot_set();
625         dataManager->SetStructureAlignments(structureAlignments);
626     }
627 
628     // set up the skeleton of the new Biostruc-annot-set
629     // new Mmdb-id
630     structureAlignments->SetId().resize(1);
631     structureAlignments->SetId().front().Reset(new CBiostruc_id());
632     CMmdb_id *mid = new CMmdb_id(masterMMDBID);
633     structureAlignments->SetId().front().GetObject().SetMmdb_id(*mid);
634     // new Biostruc-feature-set
635     CRef<CBiostruc_feature_set> featSet(new CBiostruc_feature_set());
636     featSet->SetId().Set(NO_DOMAIN);
637     featSet->SetFeatures();    // just create an empty list
638     structureAlignments->SetFeatures().resize(1, featSet);
639 
640     // flag a change in data
641     SetDataChanged(eStructureAlignmentData);
642 }
643 
AddStructureAlignment(CBiostruc_feature * feature,int masterDomainID,int dependentDomainID)644 void StructureSet::AddStructureAlignment(CBiostruc_feature *feature,
645     int masterDomainID, int dependentDomainID)
646 {
647     CBiostruc_annot_set *structureAlignments = dataManager->GetStructureAlignments();
648     if (!structureAlignments) {
649         WARNINGMSG("StructureSet::AddStructureAlignment() - creating new structure alignment list");
650         InitStructureAlignments(objects.front()->mmdbID);
651         structureAlignments = dataManager->GetStructureAlignments();
652     }
653 
654     // check master domain ID, to see if alignments have crossed master's domain boundaries
655     int *currentMasterDomainID = &(structureAlignments->SetFeatures().front().GetObject().SetId().Set());
656     if (*currentMasterDomainID == NO_DOMAIN)
657         *currentMasterDomainID = masterDomainID;
658     else if ((*currentMasterDomainID % 100) != (masterDomainID % 100))
659         *currentMasterDomainID = (*currentMasterDomainID / 100) * 100;
660 
661     // check to see if this dependent domain already has an alignment; if so, increment alignment #
662     CBiostruc_feature_set::TFeatures::const_iterator
663         f, fe = structureAlignments->GetFeatures().front().GetObject().GetFeatures().end();
664     for (f=structureAlignments->GetFeatures().front().GetObject().GetFeatures().begin(); f!=fe; ++f) {
665         if ((f->GetObject().GetId().Get() / 10) == (dependentDomainID / 10))
666             ++dependentDomainID;
667     }
668     CBiostruc_feature_id id(dependentDomainID);
669     feature->SetId(id);
670 
671     CRef<CBiostruc_feature> featureRef(feature);
672     structureAlignments->SetFeatures().front().GetObject().SetFeatures().resize(
673         structureAlignments->GetFeatures().front().GetObject().GetFeatures().size() + 1, featureRef);
674 
675     // flag a change in data
676     SetDataChanged(eStructureAlignmentData);
677 }
678 
RemoveStructureAlignments(void)679 void StructureSet::RemoveStructureAlignments(void)
680 {
681     dataManager->SetStructureAlignments(NULL);
682     // flag a change in data
683     SetDataChanged(eStructureAlignmentData);
684 }
685 
ReplaceAlignmentSet(AlignmentSet * newAlignmentSet)686 void StructureSet::ReplaceAlignmentSet(AlignmentSet *newAlignmentSet)
687 {
688     ASNDataManager::SeqAnnotList *seqAnnots = dataManager->GetOrCreateSequenceAlignments();
689     if (!seqAnnots) {
690         ERRORMSG("StructureSet::ReplaceAlignmentSet() - "
691             << "can't figure out where in the asn the alignments are to go");
692         return;
693     }
694 
695     // update the AlignmentSet
696     if (alignmentSet)
697         delete alignmentSet;
698     alignmentSet = newAlignmentSet;
699 
700     // update the asn alignments
701     seqAnnots->resize(alignmentSet->newAsnAlignmentData->size());
702     ASNDataManager::SeqAnnotList::iterator o = seqAnnots->begin();
703     ASNDataManager::SeqAnnotList::iterator n, ne = alignmentSet->newAsnAlignmentData->end();
704     for (n=alignmentSet->newAsnAlignmentData->begin(); n!=ne; ++n, ++o)
705         o->Reset(n->GetPointer());   // copy each Seq-annot CRef
706 
707     // don't set data PSSM/row order flags here; done by AlignmentManager::SavePairwiseFromMultiple()
708     SetDataChanged(eAnyAlignmentData);
709 }
710 
ReplaceUpdates(ncbi::objects::CCdd::TPending & newUpdates)711 void StructureSet::ReplaceUpdates(ncbi::objects::CCdd::TPending& newUpdates)
712 {
713     dataManager->ReplaceUpdates(newUpdates);
714 }
715 
RemoveUnusedSequences(void)716 void StructureSet::RemoveUnusedSequences(void)
717 {
718 	ASNDataManager::SequenceList updateSequences;
719     if (alignmentManager) alignmentManager->GetUpdateSequences(&updateSequences);
720     dataManager->RemoveUnusedSequences(alignmentSet, updateSequences);
721 }
722 
MonitorAlignments(void) const723 bool StructureSet::MonitorAlignments(void) const
724 {
725     return dataManager->MonitorAlignments();
726 }
727 
SaveASNData(const char * filename,bool doBinary,unsigned int * changeFlags)728 bool StructureSet::SaveASNData(const char *filename, bool doBinary, unsigned int *changeFlags)
729 {
730     // force a save of any edits to alignment and updates first (it's okay if this has already been done)
731     GlobalMessenger()->SequenceWindowsSave(true);
732     /*if (dataManager->HasDataChanged())*/ RemoveUnusedSequences();
733 
734     // create and temporarily attach a style dictionary, and annotation set + camera info
735     // to the data (and then remove it again, so it's never out of date)
736     CRef < CCn3d_style_dictionary > styleDictionary(styleManager->CreateASNStyleDictionary());
737     dataManager->SetStyleDictionary(*styleDictionary);
738     CRef < CCn3d_user_annotations > userAnnotations(new CCn3d_user_annotations());
739     if (!styleManager->SaveToASNUserAnnotations(userAnnotations.GetPointer()) ||
740         (objects.size() >= 1 && !renderer->SaveToASNViewSettings(userAnnotations.GetPointer()))) {
741         ERRORMSG("StructureSet::SaveASNData() - error creating user annotations blob");
742         return false;
743     }
744     if (userAnnotations->IsSetAnnotations() || userAnnotations->IsSetView())
745         dataManager->SetUserAnnotations(*userAnnotations);
746 
747     string err;
748     bool writeOK = MonitorAlignments();
749     if (!writeOK)
750         err = "MonitorAlignments() returned error, no file written";
751     else
752         writeOK = dataManager->WriteDataToFile(filename, doBinary, &err, eFNP_Replace);
753 
754     // remove style dictionary and annotations from asn
755     dataManager->RemoveStyleDictionary();
756     dataManager->RemoveUserAnnotations();
757 
758     if (writeOK) {
759         *changeFlags = dataManager->GetDataChanged();
760         dataManager->SetDataUnchanged();
761     } else {
762         ERRORMSG("Write failed: " << err);
763     }
764     return writeOK;
765 }
766 
767 // because the frame map (for each frame, a list of diplay lists) is complicated
768 // to create, this just verifies that all display lists occur exactly once
769 // in the map. Also, make sure that total # display lists in all frames adds up.
VerifyFrameMap(void) const770 void StructureSet::VerifyFrameMap(void) const
771 {
772     TRACEMSG("# display lists: " << (lastDisplayList - OpenGLRenderer::FIRST_LIST + 1));
773     for (unsigned int l=OpenGLRenderer::FIRST_LIST; l<=lastDisplayList; ++l) {
774         bool found = false;
775         for (unsigned int f=0; f<frameMap.size(); ++f) {
776             DisplayLists::const_iterator d, de=frameMap[f].end();
777             for (d=frameMap[f].begin(); d!=de; ++d) {
778                 if (*d == l) {
779                     if (!found)
780                         found = true;
781                     else
782                         ERRORMSG("frameMap: repeated display list " << l);
783                 }
784             }
785         }
786         if (!found)
787             ERRORMSG("display list " << l << " not in frameMap");
788     }
789 
790     unsigned int nLists = 0;
791     for (unsigned int f=0; f<frameMap.size(); ++f) {
792         DisplayLists::const_iterator d, de=frameMap[f].end();
793         for (d=frameMap[f].begin(); d!=de; ++d) ++nLists;
794     }
795     if (nLists != lastDisplayList)
796         ERRORMSG("frameMap has too many display lists");
797 }
798 
SetCenter(const Vector * given)799 void StructureSet::SetCenter(const Vector *given)
800 {
801     Vector siteSum;
802     int nAtoms = 0;
803     double dist;
804     maxDistFromCenter = 0.0;
805 
806     // set new center if given one
807     if (given) center = *given;
808 
809     // loop trough all atoms twice - once to get average center, then once to
810     // find max distance from this center
811     for (int i=0; i<2; ++i) {
812         if (given && i==0) continue; // skip center calculation if given one
813         ObjectList::const_iterator o, oe=objects.end();
814         for (o=objects.begin(); o!=oe; ++o) {
815             StructureObject::CoordSetList::const_iterator c, ce=(*o)->coordSets.end();
816             for (c=(*o)->coordSets.begin(); c!=ce; ++c) {
817                 AtomSet::AtomMap::const_iterator a, ae=(*c)->atomSet->atomMap.end();
818                 for (a=(*c)->atomSet->atomMap.begin(); a!=ae; ++a) {
819                     Vector site(a->second.front()->site);
820                     if ((*o)->IsDependent() && (*o)->transformToMaster)
821                         ApplyTransformation(&site, *((*o)->transformToMaster));
822                     if (i==0) {
823                         siteSum += site;
824                         ++nAtoms;
825                     } else {
826                         dist = (site - center).length();
827                         if (dist > maxDistFromCenter)
828                             maxDistFromCenter = dist;
829                     }
830                 }
831             }
832         }
833         if (i==0) {
834             if (nAtoms == 0) {
835                 center.Set(0.0, 0.0, 0.0);
836                 break;
837             }
838             center = siteSum / nAtoms;
839         }
840     }
841     TRACEMSG("center: " << center << ", maxDistFromCenter " << maxDistFromCenter);
842     rotationCenter = center;
843 }
844 
CenterViewOnStructure(void)845 void StructureSet::CenterViewOnStructure(void)
846 {
847     if (objects.size() != 1)
848         return;
849 
850     TRACEMSG("Computing structure view...");
851     Vector alphaCenter;
852     double alphaRadius = 0.0;
853     int nAlphas = 0;
854 
855     // loop through twice - once to get average center, then once to
856     // find max distance from this center
857     for (int i=0; i<2; ++i) {
858 
859         // find all biopolymer alpha residues
860         ChemicalGraph::MoleculeMap::const_iterator m, me = objects.front()->graph->molecules.end();
861         for (m=objects.front()->graph->molecules.begin(); m!=me; ++m) {
862             if (!m->second->IsBiopolymer())
863                 continue;
864             Molecule::ResidueMap::const_iterator r, re = m->second->residues.end();
865             for (r=m->second->residues.begin(); r!=re; ++r) {
866                 if (r->second->alphaID == Residue::NO_ALPHA_ID) continue;
867                 AtomPntr ap(m->second->id, r->first, r->second->alphaID);
868                 const AtomCoord* atom = objects.front()->coordSets.front()->atomSet->GetAtom(ap, true, true);
869                 if (atom) {
870                     if (i == 0) {
871                         alphaCenter += atom->site;
872                         ++nAlphas;
873                     } else {
874                         double dist = (atom->site - alphaCenter).length();
875                         if (dist > alphaRadius)
876                             alphaRadius = dist;
877                     }
878                 }
879             }
880         }
881 
882         if (i == 0) {
883             if (nAlphas == 0)
884                 return;
885             alphaCenter /= nAlphas;
886         }
887     }
888 
889     renderer->CenterView(alphaCenter, alphaRadius);
890     TRACEMSG("Centered view at " << alphaCenter << " radius " << alphaRadius);
891 }
892 
CenterViewOnAlignedResidues(void)893 bool StructureSet::CenterViewOnAlignedResidues(void)
894 {
895     const BlockMultipleAlignment *alignment = alignmentManager->GetCurrentMultipleAlignment();
896     if (!alignment || !alignment->GetSequenceOfRow(0) || !alignment->GetSequenceOfRow(0))
897         return false;;                     // no alignment
898     const Molecule *masterMolecule = alignment->GetSequenceOfRow(0)->molecule;
899     if (!masterMolecule) return false;    // no structured master
900     const StructureObject *masterObject;
901     if (!masterMolecule->GetParentOfType(&masterObject)) return false;
902 
903     // get coords of all aligned c-alphas
904     deque < Vector > coords;
905     Molecule::ResidueMap::const_iterator r, re = masterMolecule->residues.end();
906     for (r=masterMolecule->residues.begin(); r!=re; ++r) {
907         if (!alignment->IsAligned(0U, r->first - 1)) continue;
908         if (r->second->alphaID == Residue::NO_ALPHA_ID) continue;
909         AtomPntr ap(masterMolecule->id, r->first, r->second->alphaID);
910         const AtomCoord* atom = masterObject->coordSets.front()->atomSet->GetAtom(ap, true, true);
911         if (atom) coords.push_back(atom->site);
912     }
913     if (coords.size() == 0)
914         return false;
915 
916     // calculate center
917     unsigned int i;
918     Vector alignedCenter;
919     for (i=0; i<coords.size(); ++i) alignedCenter += coords[i];
920     alignedCenter /= coords.size();
921 
922     // find radius
923     double radius = 0.0, d;
924     for (i=0; i<coords.size(); ++i) {
925         d = (coords[i] - alignedCenter).length();
926         if (d > radius) radius = d;
927     }
928 
929     // set view
930     renderer->CenterView(alignedCenter, radius);
931     TRACEMSG("Centered view at " << alignedCenter << " radius " << radius);
932     return true;
933 }
934 
Draw(const AtomSet * atomSet) const935 bool StructureSet::Draw(const AtomSet *atomSet) const
936 {
937     TRACEMSG("drawing StructureSet");
938     if (!styleManager->CheckGlobalStyleSettings()) return false;
939     return true;
940 }
941 
CreateName(const Residue * residue,int atomID)942 unsigned int StructureSet::CreateName(const Residue *residue, int atomID)
943 {
944     ++lastAtomName;
945     nameMap[lastAtomName] = make_pair(residue, atomID);
946     return lastAtomName;
947 }
948 
GetAtomFromName(unsigned int name,const Residue ** residue,int * atomID) const949 bool StructureSet::GetAtomFromName(unsigned int name, const Residue **residue, int *atomID) const
950 {
951     NameMap::const_iterator i = nameMap.find(name);
952     if (i == nameMap.end()) return false;
953     *residue = i->second.first;
954     *atomID = i->second.second;
955 	return true;
956 }
957 
SelectedAtom(unsigned int name,bool setCenter)958 void StructureSet::SelectedAtom(unsigned int name, bool setCenter)
959 {
960     const Residue *residue;
961     int atomID;
962 
963     if (name == OpenGLRenderer::NO_NAME || !GetAtomFromName(name, &residue, &atomID)) {
964         INFOMSG("nothing selected");
965         return;
966     }
967 
968     // add highlight
969     const Molecule *molecule;
970     if (!residue->GetParentOfType(&molecule)) return;
971     GlobalMessenger()->ToggleHighlight(molecule, residue->id, true);
972     wxString molresid;
973     if (molecule->IsHeterogen() || molecule->IsSolvent()) {
974         const StructureObject *object;
975         if (molecule->GetParentOfType(&object)) {
976             // assume hets/solvents are single residue
977             if (object->GetPDBID().size() > 0)
978                 molresid.Printf("%s heterogen/solvent molecule %i", object->GetPDBID().c_str(), molecule->id);
979             else
980                 molresid = molecule->identifier->ToString().c_str();
981         }
982     } else
983         molresid.Printf("chain %s residue %i", molecule->identifier->ToString().c_str(), residue->id);
984     INFOMSG("selected " << molresid.c_str() << " (PDB: " << residue->nameGraph << ' ' << residue->namePDB
985         << ") atom " << atomID << " (PDB: " << residue->GetAtomInfo(atomID)->name << ')');
986 
987     // get coordinate of picked atom, in coordinates of master frame
988     const StructureObject *object;
989     if (!molecule->GetParentOfType(&object)) return;
990     object->coordSets.front()->atomSet->SetActiveEnsemble(NULL);    // don't actually know which alternate...
991 	const AtomCoord *pickedAtom = object->coordSets.front()->atomSet->
992         GetAtom(AtomPntr(molecule->id, residue->id, atomID));
993 	if (!pickedAtom) {
994 		WARNINGMSG("Can't get coordinates for this atom (in the first coordinate set)");
995 		return;
996 	}
997 	Vector pickedAtomCoord = pickedAtom->site;
998     if (object->IsDependent() && object->transformToMaster)
999         ApplyTransformation(&pickedAtomCoord, *(object->transformToMaster));
1000 
1001     // print out distance to previous picked atom
1002     if (havePrevPickedAtomCoord)
1003         INFOMSG("distance to previously selected atom: " << setprecision(3) <<
1004             (pickedAtomCoord - prevPickedAtomCoord).length() << setprecision(6) << " A");
1005     prevPickedAtomCoord = pickedAtomCoord;
1006     havePrevPickedAtomCoord = true;
1007 
1008     // if indicated, use atom site as rotation center; use coordinate from first CoordSet, default altConf
1009     if (setCenter) {
1010         INFOMSG("rotating about " << object->GetPDBID()
1011             << " molecule " << molecule->id << " residue " << residue->id << ", atom " << atomID);
1012         rotationCenter = pickedAtomCoord;
1013     }
1014 }
1015 
SelectByDistance(double cutoff,unsigned int options) const1016 void StructureSet::SelectByDistance(double cutoff, unsigned int options) const
1017 {
1018     StructureObject::ResidueMap residuesToHighlight;
1019 
1020     // add residues to highlight to master list, based on proximities within objects
1021     ObjectList::const_iterator o, oe = objects.end();
1022     for (o=objects.begin(); o!=oe; ++o)
1023         (*o)->SelectByDistance(cutoff, options, &residuesToHighlight);
1024 
1025     // now actually add highlights for new selected residues
1026     StructureObject::ResidueMap::const_iterator r, re = residuesToHighlight.end();
1027     for (r=residuesToHighlight.begin(); r!=re; ++r)
1028         if (!GlobalMessenger()->IsHighlighted(r->second, r->first->id))
1029             GlobalMessenger()->ToggleHighlight(r->second, r->first->id, false);
1030 }
1031 
FindOrCreateSequence(ncbi::objects::CBioseq & bioseq)1032 const Sequence * StructureSet::FindOrCreateSequence(ncbi::objects::CBioseq& bioseq)
1033 {
1034     // see if we have this sequence already
1035     const Sequence *seq = sequenceSet->FindMatchingSequence(bioseq.GetId());
1036     if (seq)
1037         return seq;
1038 
1039     // if not, add new Sequence to SequenceSet
1040     SequenceSet *modifiableSet = const_cast<SequenceSet*>(sequenceSet);
1041     seq = new Sequence(modifiableSet, bioseq);
1042     if (!seq->identifier) {
1043         ERRORMSG("StructureSet::FindOrCreateSequence() - identifier conflict, no new sequence created");
1044         delete seq;
1045         return NULL;
1046     }
1047     modifiableSet->sequences.push_back(seq);
1048 
1049     // add asn sequence to asn data
1050     if (dataManager->GetSequences()) {
1051         CSeq_entry *se = new CSeq_entry();
1052         se->SetSeq(bioseq);
1053         dataManager->GetSequences()->push_back(CRef<CSeq_entry>(se));
1054     } else
1055         ERRORMSG("StructureSet::FindOrCreateSequence() - no sequence list in asn data");
1056     SetDataChanged(eSequenceData);
1057 
1058     return seq;
1059 }
1060 
RejectAndPurgeSequence(const Sequence * reject,string reason,bool purge)1061 void StructureSet::RejectAndPurgeSequence(const Sequence *reject, string reason, bool purge)
1062 {
1063     if (!dataManager->IsCDD() || !reject || reason.size() == 0) return;
1064 
1065     CReject_id *rejectID = new CReject_id();
1066     rejectID->SetIds() = reject->bioseqASN->GetId();    // copy Seq-id lists
1067 
1068     CUpdate_comment *comment = new CUpdate_comment();
1069     comment->SetComment(reason);
1070     rejectID->SetDescription().push_back(CRef < CUpdate_comment > (comment));
1071 
1072     dataManager->AddReject(rejectID);
1073 
1074     if (purge)
1075         alignmentManager->PurgeSequence(reject->identifier);
1076 }
1077 
GetRejects(void) const1078 const StructureSet::RejectList * StructureSet::GetRejects(void) const
1079 {
1080     return dataManager->GetRejects();
1081 }
1082 
ShowRejects(void) const1083 void StructureSet::ShowRejects(void) const
1084 {
1085     const RejectList *rejects = GetRejects();
1086     if (!rejects) {
1087         INFOMSG("No rejects in this CD");
1088         return;
1089     }
1090 
1091     INFOMSG("Rejects:");
1092     RejectList::const_iterator r, re = rejects->end();
1093     for (r=rejects->begin(); r!=re; ++r) {
1094         string idstr;
1095         CReject_id::TIds::const_iterator i, ie = (*r)->GetIds().end();
1096         for (i=(*r)->GetIds().begin(); i!=ie; ++i)
1097             idstr += (*i)->AsFastaString() + ", ";
1098         INFOMSG(idstr << "Reason: " <<
1099             (((*r)->IsSetDescription() && (*r)->GetDescription().front()->IsComment()) ?
1100                 (*r)->GetDescription().front()->GetComment() : string("none given")));
1101     }
1102 }
1103 
ConvertMimeDataToCDD(const std::string & cddName)1104 bool StructureSet::ConvertMimeDataToCDD(const std::string& cddName)
1105 {
1106     if (!dataManager->ConvertMimeDataToCDD(cddName))
1107         return false;
1108 
1109     // make sure all structured sequences have MMDB annot tags
1110     SequenceSet::SequenceList::const_iterator s, se = sequenceSet->sequences.end();
1111     for (s=sequenceSet->sequences.begin(); s!=se; ++s) {
1112         if ((*s)->molecule) {
1113             if ((*s)->identifier->mmdbID == MoleculeIdentifier::VALUE_NOT_SET) {
1114                 ERRORMSG("sequence " << (*s)->identifier->ToString()
1115                     << " has associated molecule but no MMDB id");
1116                 return false;
1117             }
1118             (*s)->AddMMDBAnnotTag((*s)->identifier->mmdbID);
1119         }
1120     }
1121 
1122     return true;
1123 }
1124 
1125 // trivial methods...
IsMultiStructure(void) const1126 bool StructureSet::IsMultiStructure(void) const { return !dataManager->IsSingleStructure(); }
HasDataChanged(void) const1127 bool StructureSet::HasDataChanged(void) const { return dataManager->HasDataChanged(); }
SetDataChanged(unsigned int what) const1128 void StructureSet::SetDataChanged(unsigned int what) const { dataManager->SetDataChanged(what); }
IsCDD(void) const1129 bool StructureSet::IsCDD(void) const { return dataManager->IsCDD(); }
IsCDDInMime(void) const1130 bool StructureSet::IsCDDInMime(void) const { return dataManager->IsCDDInMime(); }
GetCDDName(void) const1131 const string& StructureSet::GetCDDName(void) const { return dataManager->GetCDDName(); }
SetCDDName(const string & name)1132 bool StructureSet::SetCDDName(const string& name) { return dataManager->SetCDDName(name); }
GetCDDDescription(void) const1133 const string& StructureSet::GetCDDDescription(void) const { return dataManager->GetCDDDescription(); }
SetCDDDescription(const string & descr)1134 bool StructureSet::SetCDDDescription(const string& descr) { return dataManager->SetCDDDescription(descr); }
GetCDDNotes(StructureSet::TextLines * lines) const1135 bool StructureSet::GetCDDNotes(StructureSet::TextLines *lines) const { return dataManager->GetCDDNotes(lines); }
SetCDDNotes(const StructureSet::TextLines & lines)1136 bool StructureSet::SetCDDNotes(const StructureSet::TextLines& lines) { return dataManager->SetCDDNotes(lines); }
GetCDDDescrSet(void)1137 ncbi::objects::CCdd_descr_set * StructureSet::GetCDDDescrSet(void) { return dataManager->GetCDDDescrSet(); }
GetCDDAnnotSet(void)1138 ncbi::objects::CAlign_annot_set * StructureSet::GetCDDAnnotSet(void) { return dataManager->GetCDDAnnotSet(); }
1139 
HasStructuredMaster(void) const1140 bool StructureSet::HasStructuredMaster(void) const { return (alignmentSet && alignmentSet->master && alignmentSet->master->identifier->HasStructure()); }
1141 
1142 
1143 ///// StructureObject stuff /////
1144 
1145 const int StructureObject::NO_MMDB_ID = -1;
1146 const double StructureObject::NO_TEMPERATURE = kMin_Double;
1147 
StructureObject(StructureBase * parent,const CBiostruc & biostruc,bool master)1148 StructureObject::StructureObject(StructureBase *parent, const CBiostruc& biostruc, bool master) :
1149     StructureBase(parent), isMaster(master), mmdbID(NO_MMDB_ID), transformToMaster(NULL),
1150     minTemperature(NO_TEMPERATURE), maxTemperature(NO_TEMPERATURE)
1151 {
1152     // set numerical id simply based on # objects in parentSet
1153     id = parentSet->objects.size() + 1;
1154 
1155     // get MMDB id
1156     CBiostruc::TId::const_iterator j, je=biostruc.GetId().end();
1157     for (j=biostruc.GetId().begin(); j!=je; ++j) {
1158         if (j->GetObject().IsMmdb_id()) {
1159             mmdbID = j->GetObject().GetMmdb_id().Get();
1160             break;
1161         }
1162     }
1163     TRACEMSG("MMDB id " << mmdbID);
1164 
1165     // get PDB id
1166     if (biostruc.IsSetDescr()) {
1167         CBiostruc::TDescr::const_iterator k, ke=biostruc.GetDescr().end();
1168         for (k=biostruc.GetDescr().begin(); k!=ke; ++k) {
1169             if (k->GetObject().IsName()) {
1170                 pdbIDs.push_back(k->GetObject().GetName());
1171                 TRACEMSG("PDB id " << pdbIDs.back());
1172             }
1173         }
1174     }
1175 
1176     // get atom and feature spatial coordinates
1177     if (biostruc.IsSetModel()) {
1178         // iterate SEQUENCE OF Biostruc-model
1179         CBiostruc::TModel::const_iterator i, ie=biostruc.GetModel().end();
1180         for (i=biostruc.GetModel().begin(); i!=ie; ++i) {
1181 
1182             // don't know how to deal with these...
1183             if (i->GetObject().GetType() == eModel_type_ncbi_vector ||
1184                 i->GetObject().GetType() == eModel_type_other) continue;
1185 
1186             // otherwise, assume all models in this set are of same type
1187             if (i->GetObject().GetType() == eModel_type_ncbi_backbone)
1188                 parentSet->isAlphaOnly = true;
1189             else
1190                 parentSet->isAlphaOnly = false;
1191 
1192             // load each Biostruc-model into a CoordSet
1193             if (i->GetObject().IsSetModel_coordinates()) {
1194                 CoordSet *coordSet =
1195                     new CoordSet(this, i->GetObject().GetModel_coordinates());
1196                 coordSets.push_back(coordSet);
1197             }
1198         }
1199     }
1200 
1201     TRACEMSG("temperature range: " << minTemperature << " to " << maxTemperature);
1202 
1203     // get graph - must be done after atom coordinates are loaded, so we can
1204     // avoid storing graph nodes for atoms not present in the model
1205     graph = new ChemicalGraph(this, biostruc.GetChemical_graph(), biostruc.GetFeatures());
1206 }
1207 
GetPDBID(char separator) const1208 std::string StructureObject::GetPDBID(char separator) const
1209 {
1210     string s;
1211     for (unsigned int i=0; i<pdbIDs.size(); ++i) {
1212         if (i > 0)
1213             s += separator;
1214         s += pdbIDs[i];
1215     }
1216     return s;
1217 }
1218 
1219 
1220 
SetTransformToMaster(const CBiostruc_annot_set & annot,int masterMMDBID)1221 bool StructureObject::SetTransformToMaster(const CBiostruc_annot_set& annot, int masterMMDBID)
1222 {
1223     CBiostruc_annot_set::TFeatures::const_iterator f1, f1e=annot.GetFeatures().end();
1224     for (f1=annot.GetFeatures().begin(); f1!=f1e; ++f1) {
1225         CBiostruc_feature_set::TFeatures::const_iterator f2, f2e=f1->GetObject().GetFeatures().end();
1226         for (f2=f1->GetObject().GetFeatures().begin(); f2!=f2e; ++f2) {
1227 
1228             // skip if already used
1229             if (f2->GetObject().IsSetId() &&
1230                     parentSet->usedFeatures.find(f2->GetObject().GetId().Get()) !=
1231                         parentSet->usedFeatures.end())
1232                 continue;
1233 
1234             // look for alignment feature
1235             if (f2->GetObject().IsSetType() &&
1236 				f2->GetObject().GetType() == CBiostruc_feature::eType_alignment &&
1237                 f2->GetObject().IsSetLocation() &&
1238                 f2->GetObject().GetLocation().IsAlignment()) {
1239                 const CChem_graph_alignment& graphAlign =
1240 					f2->GetObject().GetLocation().GetAlignment();
1241 
1242                 // find transform alignment of this object with master
1243                 if (graphAlign.GetDimension() == 2 &&
1244                     graphAlign.GetBiostruc_ids().size() == 2 &&
1245                     graphAlign.IsSetTransform() &&
1246                     graphAlign.GetTransform().size() == 1 &&
1247                     graphAlign.GetBiostruc_ids().front().GetObject().IsMmdb_id() &&
1248                     graphAlign.GetBiostruc_ids().front().GetObject().GetMmdb_id().Get() == masterMMDBID &&
1249                     graphAlign.GetBiostruc_ids().back().GetObject().IsMmdb_id() &&
1250                     graphAlign.GetBiostruc_ids().back().GetObject().GetMmdb_id().Get() == mmdbID) {
1251 
1252                     // mark feature as used
1253                     if (f2->GetObject().IsSetId())
1254                         parentSet->usedFeatures[f2->GetObject().GetId().Get()] = true;
1255                     TRACEMSG("got transform for " << GetPDBID() << "->master");
1256 
1257                     // unpack transform into matrix, moves in reverse order;
1258                     Matrix xform;
1259                     transformToMaster = new Matrix();
1260                     CTransform::TMoves::const_iterator
1261                         m, me=graphAlign.GetTransform().front().GetObject().GetMoves().end();
1262                     for (m=graphAlign.GetTransform().front().GetObject().GetMoves().begin(); m!=me; ++m) {
1263                         Matrix xmat;
1264                         double scale;
1265                         if (m->GetObject().IsTranslate()) {
1266                             const CTrans_matrix& trans = m->GetObject().GetTranslate();
1267                             scale = 1.0 / trans.GetScale_factor();
1268                             SetTranslationMatrix(&xmat,
1269                                 Vector(scale * trans.GetTran_1(),
1270                                        scale * trans.GetTran_2(),
1271                                        scale * trans.GetTran_3()));
1272                         } else { // rotate
1273                             const CRot_matrix& rot = m->GetObject().GetRotate();
1274                             scale = 1.0 / rot.GetScale_factor();
1275                             xmat.m[0]=scale*rot.GetRot_11(); xmat.m[1]=scale*rot.GetRot_21(); xmat.m[2]= scale*rot.GetRot_31(); xmat.m[3]=0;
1276                             xmat.m[4]=scale*rot.GetRot_12(); xmat.m[5]=scale*rot.GetRot_22(); xmat.m[6]= scale*rot.GetRot_32(); xmat.m[7]=0;
1277                             xmat.m[8]=scale*rot.GetRot_13(); xmat.m[9]=scale*rot.GetRot_23(); xmat.m[10]=scale*rot.GetRot_33(); xmat.m[11]=0;
1278                             xmat.m[12]=0;                    xmat.m[13]=0;                    xmat.m[14]=0;                     xmat.m[15]=1;
1279                         }
1280                         ComposeInto(transformToMaster, xmat, xform);
1281                         xform = *transformToMaster;
1282                     }
1283                     return true;
1284                 }
1285             }
1286         }
1287     }
1288 
1289     WARNINGMSG("Can't get structure alignment for dependent " << GetPDBID()
1290         << " with master " << masterMMDBID << ";\nwill likely require manual realignment");
1291     return false;
1292 }
1293 
AddDomain(int * domain,const Molecule * molecule,const Block::Range * range)1294 static void AddDomain(int *domain, const Molecule *molecule, const Block::Range *range)
1295 {
1296     const StructureObject *object;
1297     if (!molecule->GetParentOfType(&object)) return;
1298     for (int l=range->from; l<=range->to; ++l) {
1299         if (molecule->residueDomains[l] != Molecule::NO_DOMAIN_SET) {
1300             if (*domain == NO_DOMAIN)
1301                 *domain = object->domainID2MMDB.find(molecule->residueDomains[l])->second;
1302             else if (*domain != MULTI_DOMAIN &&
1303                      *domain != object->domainID2MMDB.find(molecule->residueDomains[l])->second)
1304                 *domain = MULTI_DOMAIN;
1305         }
1306     }
1307 }
1308 
RealignStructure(int nCoords,const Vector * const * masterCoords,const Vector * const * dependentCoords,const double * weights,int dependentRow)1309 void StructureObject::RealignStructure(int nCoords,
1310     const Vector * const *masterCoords, const Vector * const *dependentCoords,
1311     const double *weights, int dependentRow)
1312 {
1313     Vector masterCOM, dependentCOM; // centers of mass for master, dependent
1314     Matrix dependentRotation;       // rotation to align dependent with master
1315     if (!transformToMaster) transformToMaster = new Matrix();
1316 
1317     // do the fit
1318     RigidBodyFit(nCoords, masterCoords, dependentCoords, weights, masterCOM, dependentCOM, dependentRotation);
1319 
1320     // apply the resulting transform elements from the fit to this object's transform Matrix
1321     Matrix single, combined;
1322     SetTranslationMatrix(&single, -dependentCOM);
1323     ComposeInto(transformToMaster, dependentRotation, single);
1324     combined = *transformToMaster;
1325     SetTranslationMatrix(&single, masterCOM);
1326     ComposeInto(transformToMaster, single, combined);
1327 
1328     // print out RMSD
1329     INFOMSG("RMSD of alpha coordinates used to align master structure and " << GetPDBID() << ": "
1330         << setprecision(3) << ComputeRMSD(nCoords, masterCoords, dependentCoords, transformToMaster) << setprecision(6) << " A");
1331 
1332     // create a new Biostruc-feature that contains this alignment
1333     CBiostruc_feature *feature = new CBiostruc_feature();
1334     feature->SetType(CBiostruc_feature::eType_alignment);
1335     CRef<CBiostruc_feature::C_Location> location(new CBiostruc_feature::C_Location());
1336     feature->SetLocation(*location);
1337     CChem_graph_alignment *graphAlignment = new CChem_graph_alignment();
1338     location.GetObject().SetAlignment(*graphAlignment);
1339 
1340     // fill out the Chem-graph-alignment
1341     graphAlignment->SetDimension(2);
1342     CMmdb_id
1343         *masterMID = new CMmdb_id(parentSet->objects.front()->mmdbID),
1344         *dependentMID = new CMmdb_id(mmdbID);
1345     CBiostruc_id
1346         *masterBID = new CBiostruc_id(),
1347         *dependentBID = new CBiostruc_id();
1348     masterBID->SetMmdb_id(*masterMID);
1349     dependentBID->SetMmdb_id(*dependentMID);
1350     graphAlignment->SetBiostruc_ids().resize(2);
1351     graphAlignment->SetBiostruc_ids().front().Reset(masterBID);
1352     graphAlignment->SetBiostruc_ids().back().Reset(dependentBID);
1353     graphAlignment->SetAlignment().resize(2);
1354 
1355     // fill out sequence alignment intervals, tracking domains in alignment
1356     const BlockMultipleAlignment *multiple = parentSet->alignmentManager->GetCurrentMultipleAlignment();
1357     int masterDomain = NO_DOMAIN, dependentDomain = NO_DOMAIN;
1358     const Molecule
1359         *masterMolecule = multiple->GetSequenceOfRow(0)->molecule,
1360         *dependentMolecule = multiple->GetSequenceOfRow(dependentRow)->molecule;
1361     BlockMultipleAlignment::UngappedAlignedBlockList blocks;
1362     multiple->GetUngappedAlignedBlocks(&blocks);
1363     if (blocks.size() > 0) {
1364         CChem_graph_pntrs
1365             *masterCGPs = new CChem_graph_pntrs(),
1366             *dependentCGPs = new CChem_graph_pntrs();
1367         graphAlignment->SetAlignment().front().Reset(masterCGPs);
1368         graphAlignment->SetAlignment().back().Reset(dependentCGPs);
1369         CResidue_pntrs
1370             *masterRPs = new CResidue_pntrs(),
1371             *dependentRPs = new CResidue_pntrs();
1372         masterCGPs->SetResidues(*masterRPs);
1373         dependentCGPs->SetResidues(*dependentRPs);
1374 
1375         masterRPs->SetInterval().resize(blocks.size());
1376         dependentRPs->SetInterval().resize(blocks.size());
1377         BlockMultipleAlignment::UngappedAlignedBlockList::const_iterator b, be = blocks.end();
1378         CResidue_pntrs::TInterval::iterator
1379             mi = masterRPs->SetInterval().begin(),
1380             si = dependentRPs->SetInterval().begin();
1381         for (b=blocks.begin(); b!=be; ++b, ++mi, ++si) {
1382             CResidue_interval_pntr
1383                 *masterRIP = new CResidue_interval_pntr(),
1384                 *dependentRIP = new CResidue_interval_pntr();
1385             mi->Reset(masterRIP);
1386             si->Reset(dependentRIP);
1387 
1388             masterRIP->SetMolecule_id().Set(masterMolecule->id);
1389             dependentRIP->SetMolecule_id().Set(dependentMolecule->id);
1390 
1391             const Block::Range *range = (*b)->GetRangeOfRow(0);
1392             masterRIP->SetFrom().Set(range->from + 1); // +1 to convert seqLoc to residueID
1393             masterRIP->SetTo().Set(range->to + 1);
1394             AddDomain(&masterDomain, masterMolecule, range);
1395 
1396             range = (*b)->GetRangeOfRow(dependentRow);
1397             dependentRIP->SetFrom().Set(range->from + 1);
1398             dependentRIP->SetTo().Set(range->to + 1);
1399             AddDomain(&dependentDomain, dependentMolecule, range);
1400         }
1401     }
1402 
1403     // fill out structure alignment transform
1404     CTransform *xform = new CTransform();
1405     graphAlignment->SetTransform().resize(1);
1406     graphAlignment->SetTransform().front().Reset(xform);
1407     xform->SetId(1);
1408     xform->SetMoves().resize(3);
1409     CTransform::TMoves::iterator m = xform->SetMoves().begin();
1410     for (int i=0; i<3; ++i, ++m) {
1411         CMove *move = new CMove();
1412         m->Reset(move);
1413         static const int scaleFactor = 100000;
1414         if (i == 0) {   // translate dependent so its COM is at origin
1415             CTrans_matrix *trans = new CTrans_matrix();
1416             move->SetTranslate(*trans);
1417             trans->SetScale_factor(scaleFactor);
1418             trans->SetTran_1((int)(-(dependentCOM.x * scaleFactor)));
1419             trans->SetTran_2((int)(-(dependentCOM.y * scaleFactor)));
1420             trans->SetTran_3((int)(-(dependentCOM.z * scaleFactor)));
1421         } else if (i == 1) {
1422             CRot_matrix *rot = new CRot_matrix();
1423             move->SetRotate(*rot);
1424             rot->SetScale_factor(scaleFactor);
1425             rot->SetRot_11((int)(dependentRotation[0] * scaleFactor));
1426             rot->SetRot_12((int)(dependentRotation[4] * scaleFactor));
1427             rot->SetRot_13((int)(dependentRotation[8] * scaleFactor));
1428             rot->SetRot_21((int)(dependentRotation[1] * scaleFactor));
1429             rot->SetRot_22((int)(dependentRotation[5] * scaleFactor));
1430             rot->SetRot_23((int)(dependentRotation[9] * scaleFactor));
1431             rot->SetRot_31((int)(dependentRotation[2] * scaleFactor));
1432             rot->SetRot_32((int)(dependentRotation[6] * scaleFactor));
1433             rot->SetRot_33((int)(dependentRotation[10] * scaleFactor));
1434         } else if (i == 2) {    // translate dependent so its COM is at COM of master
1435             CTrans_matrix *trans = new CTrans_matrix();
1436             move->SetTranslate(*trans);
1437             trans->SetScale_factor(scaleFactor);
1438             trans->SetTran_1((int)(masterCOM.x * scaleFactor));
1439             trans->SetTran_2((int)(masterCOM.y * scaleFactor));
1440             trans->SetTran_3((int)(masterCOM.z * scaleFactor));
1441         }
1442     }
1443 
1444     // store the new alignment in the Biostruc-annot-set,
1445     // setting the feature id depending on the aligned domain(s)
1446     if (masterDomain == NO_DOMAIN) masterDomain = 0;    // can happen if single domain chain
1447     if (dependentDomain == NO_DOMAIN) dependentDomain = 0;
1448     const StructureObject *masterObject;
1449     if (!masterMolecule->GetParentOfType(&masterObject)) return;
1450     int
1451         masterDomainID = masterObject->mmdbID*10000 + masterMolecule->id*100 + masterDomain,
1452         dependentDomainID = mmdbID*100000 + dependentMolecule->id*1000 + dependentDomain*10 + 1;
1453     parentSet->AddStructureAlignment(feature, masterDomainID, dependentDomainID);
1454 
1455     // for backward-compatibility with Cn3D 3.5, need name to encode chain/domain
1456     CNcbiOstrstream oss;
1457 	#ifdef _STRUCTURE_USE_LONG_PDB_CHAINS_
1458 		oss << masterMolecule->identifier->pdbID << masterMolecule->identifier->pdbChain << masterDomain << ' '
1459 			<< dependentMolecule->identifier->pdbID << dependentMolecule->identifier->pdbChain << dependentDomain << ' '
1460 			<< "Structure alignment of dependent " << multiple->GetSequenceOfRow(dependentRow)->identifier->ToString()
1461 			<< " with master " << multiple->GetSequenceOfRow(0)->identifier->ToString()
1462 			<< ", as computed by Cn3D";
1463 	#else
1464 		oss << masterMolecule->identifier->pdbID << ((char) masterMolecule->identifier->pdbChain) << masterDomain << ' '
1465 			<< dependentMolecule->identifier->pdbID << ((char) dependentMolecule->identifier->pdbChain) << dependentDomain << ' '
1466 			<< "Structure alignment of dependent " << multiple->GetSequenceOfRow(dependentRow)->identifier->ToString()
1467 			<< " with master " << multiple->GetSequenceOfRow(0)->identifier->ToString()
1468 			<< ", as computed by Cn3D";
1469 	#endif
1470 
1471     feature->SetName((string) CNcbiOstrstreamToString(oss));
1472 }
1473 
SelectByDistance(double cutoff,unsigned int options,ResidueMap * selectedResidues) const1474 void StructureObject::SelectByDistance(double cutoff, unsigned int options,
1475     ResidueMap *selectedResidues) const
1476 {
1477     // first make a list of coordinates of atoms in selected residues,
1478     typedef vector < const AtomCoord * > CoordList;
1479     CoordList highlightedAtoms;
1480     typedef map < const Molecule * , bool > MoleculeList;
1481     MoleculeList moleculesWithHighlights;
1482 
1483     ChemicalGraph::MoleculeMap::const_iterator m, me = graph->molecules.end();
1484     for (m=graph->molecules.begin(); m!=me; ++m) {
1485         Molecule::ResidueMap::const_iterator r, re = m->second->residues.end();
1486         for (r=m->second->residues.begin(); r!=re; ++r) {
1487 
1488             if (GlobalMessenger()->IsHighlighted(m->second, r->second->id)) {
1489                 const Residue::AtomInfoMap& atomInfos = r->second->GetAtomInfos();
1490                 Residue::AtomInfoMap::const_iterator a, ae = atomInfos.end();
1491                 for (a=atomInfos.begin(); a!=ae; ++a) {
1492                     const AtomCoord *atomCoord = coordSets.front()->atomSet->
1493                         GetAtom(AtomPntr(m->second->id, r->second->id, a->first), true, true);
1494                     if (atomCoord) highlightedAtoms.push_back(atomCoord);
1495                 }
1496                 moleculesWithHighlights[m->second] = true;
1497             }
1498         }
1499     }
1500     if (highlightedAtoms.size() == 0) return;
1501 
1502     // now make a list of unselected residues to check for proximity
1503     typedef vector < const Residue * > ResidueList;
1504     ResidueList unhighlightedResidues;
1505 
1506     for (m=graph->molecules.begin(); m!=me; ++m) {
1507         Molecule::ResidueMap::const_iterator r, re = m->second->residues.end();
1508 
1509         if ((options & StructureSet::eSelectOtherMoleculesOnly) &&
1510                 moleculesWithHighlights.find(m->second) != moleculesWithHighlights.end())
1511             continue;
1512 
1513         for (r=m->second->residues.begin(); r!=re; ++r) {
1514 
1515             if (!GlobalMessenger()->IsHighlighted(m->second, r->second->id) &&
1516                     (((options & StructureSet::eSelectProtein) && m->second->IsProtein()) ||
1517                      ((options & StructureSet::eSelectNucleotide) && m->second->IsNucleotide()) ||
1518                      ((options & StructureSet::eSelectHeterogen) && m->second->IsHeterogen()) ||
1519                      ((options & StructureSet::eSelectSolvent) && m->second->IsSolvent())))
1520                 unhighlightedResidues.push_back(r->second);
1521         }
1522     }
1523     if (unhighlightedResidues.size() == 0) return;
1524 
1525     // now check all unhighlighted residues, to see if any atoms are within cutoff distance
1526     // of any highlighted atoms; if so, add to residue selection list
1527     CoordList::const_iterator h, he = highlightedAtoms.end();
1528     ResidueList::const_iterator u, ue = unhighlightedResidues.end();
1529     for (u=unhighlightedResidues.begin(); u!=ue; ++u) {
1530         const Molecule *molecule;
1531         if (!(*u)->GetParentOfType(&molecule)) continue;
1532 
1533         const Residue::AtomInfoMap& atomInfos = (*u)->GetAtomInfos();
1534         Residue::AtomInfoMap::const_iterator a, ae = atomInfos.end();
1535         for (a=atomInfos.begin(); a!=ae; ++a) {
1536             const AtomCoord *uAtomCoord = coordSets.front()->atomSet->
1537                 GetAtom(AtomPntr(molecule->id, (*u)->id, a->first), true, true);
1538             if (!uAtomCoord) continue;
1539 
1540             // for each unhighlighted atom, check for proximity to a highlighted atom
1541             for (h=highlightedAtoms.begin(); h!=he; ++h) {
1542                 if ((uAtomCoord->site - (*h)->site).length() <= cutoff)
1543                     break;
1544             }
1545             if (h != he)
1546                 break;
1547         }
1548 
1549         // if any atom of this residue is near a highlighted atom, add to selection list
1550         if (a != ae)
1551             (*selectedResidues)[*u] = molecule;
1552     }
1553 }
1554 
1555 END_SCOPE(Cn3D)
1556