1 /*  $Id: structure_set.hpp 410904 2013-08-22 17:32:35Z thiessen $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors:  Paul Thiessen
27 *
28 * File Description:
29 *      Classes to hold sets of structure data
30 *
31 * ===========================================================================
32 */
33 
34 #ifndef CN3D_STRUCTURESET__HPP
35 #define CN3D_STRUCTURESET__HPP
36 
37 #include <corelib/ncbistd.hpp>
38 #include <corelib/ncbistl.hpp>
39 
40 #include <objects/ncbimime/Ncbi_mime_asn1.hpp>
41 #include <objects/cdd/Cdd.hpp>
42 #include <objects/mmdb1/Biostruc.hpp>
43 #include <objects/mmdb1/Biostruc_annot_set.hpp>
44 #include <objects/mmdb3/Biostruc_feature.hpp>
45 #include <objects/cdd/Align_annot_set.hpp>
46 #include <objects/seq/Bioseq.hpp>
47 #include <objects/cdd/Reject_id.hpp>
48 
49 #include <string>
50 #include <map>
51 #include <vector>
52 #include <list>
53 
54 #include "structure_base.hpp"
55 #include "vector_math.hpp"
56 
57 
58 BEGIN_SCOPE(Cn3D)
59 
60 // StructureSet is the top-level container. It holds a set of SturctureObjects;
61 // A SturctureObject is basically the contents of one PDB entry.
62 
63 class StructureObject;
64 class OpenGLRenderer;
65 class ShowHideManager;
66 class StyleManager;
67 class Residue;
68 class SequenceSet;
69 class AlignmentSet;
70 class AlignmentManager;
71 class SequenceViewer;
72 class Messenger;
73 class Colors;
74 class Molecule;
75 class BlockMultipleAlignment;
76 class Sequence;
77 class SequenceSet;
78 class ChemicalGraph;
79 class CoordSet;
80 class ASNDataManager;
81 
82 class StructureSet : public StructureBase
83 {
84 public:
85     StructureSet(ncbi::objects::CNcbi_mime_asn1 *mime, unsigned int structureLimit, OpenGLRenderer *r);
86     StructureSet(ncbi::objects::CCdd *cdd, unsigned int structureLimit, OpenGLRenderer *r);
87     ~StructureSet(void);
88 
89     // public data
90 
91     bool isAlphaOnly;   // assume if one Object is alpha-only, then they all are
92     int nDomains;       // total number of domains over all objects
93     bool hasUserStyle;  // whether there's a global style in the original data
94 
95     typedef std::list < const StructureObject * > ObjectList;
96     ObjectList objects;
97 
98     // sequence and alignment information
99     const SequenceSet *sequenceSet;
100     const AlignmentSet *alignmentSet;
101     AlignmentManager *alignmentManager;
102 
103     OpenGLRenderer *renderer;
104     ShowHideManager *showHideManager;
105     StyleManager *styleManager;
106 
107     Vector center; // center of structure (relative to Master's coordinates)
108     double maxDistFromCenter; // max distance of any atom from center
109     Vector rotationCenter; // center of rotation (relative to Master's coordinates)
110 
111     // for assigning display lists and frames
112     unsigned int lastDisplayList;
113 
114     typedef std::list < unsigned int > DisplayLists;
115     typedef std::vector < DisplayLists > FrameMap;
116     FrameMap frameMap;
117 
118     // to map display list -> dependent transform
119     typedef std::map < unsigned int, const Matrix * const * > TransformMap;
120     TransformMap transformMap;
121 
122     // for ensuring unique structure<->structure alignments for repeated structures
123     std::map < int, bool > usedFeatures;
124 
125 
126     // public methods
127 
128     bool IsMultiStructure(void) const;
129 
130     // set screen and rotation center of model (coordinate relative to Master);
131     // if NULL, will calculate average geometric center
132     void SetCenter(const Vector *setTo = NULL);
133 
134     // try to find an "optimal" view of a single structure
135     void CenterViewOnStructure(void);
136 
137     // center rotation and view on aligned residues only
138     bool CenterViewOnAlignedResidues(void);
139 
140     bool Draw(const AtomSet *atomSet) const;
141 
142     // keep a list of names to look atoms from GL selection
143     unsigned int CreateName(const Residue *residue, int atomID);
144     bool GetAtomFromName(unsigned int name, const Residue **residue, int *atomID) const;
145 
146     // called when an atom is selected in the GL window. If setCenter == true, then
147     // the atom's location is used as the global rotation center
148     void SelectedAtom(unsigned int name, bool setCenter);
149 
150     // select various components based on distance
151     static const unsigned int
152         eSelectProtein,
153         eSelectNucleotide,
154         eSelectHeterogen,
155         eSelectSolvent,
156         eSelectOtherMoleculesOnly;
157     void SelectByDistance(double cutoff, unsigned int options) const;
158 
159     // updates sequences in the asn, to remove any sequences
160     // that are not used by the current alignmentSet or updates
161     void RemoveUnusedSequences(void);
162 
163     // put in new AlignmentSet - e.g. when alignment has been edited
164     void ReplaceAlignmentSet(AlignmentSet *newAlignmentSet);
165 
166     // replace the ASN update list with the current updates
167     void ReplaceUpdates(ncbi::objects::CCdd::TPending& newUpdates);
168 
169     // bit flags to tell whether various parts of the data have been changed
170     static const unsigned int
171         ePSSMData,                  // PSSM values have changed
172         eRowOrderData,              // row order has changed
173         eAnyAlignmentData,          // any change to alignment (including edits that don't change prev two)
174         eStructureAlignmentData,
175         eSequenceData,
176         eUpdateData,
177         eStyleData,
178         eUserAnnotationData,
179         eCDDData,
180         eOtherData;
181 
182     bool HasDataChanged(void) const;
183     void SetDataChanged(unsigned int what) const;
184 
185     // CDD-specific data accessors
186     bool IsCDD(void) const;
187     bool IsCDDInMime(void) const;
188     bool HasStructuredMaster(void) const;
189     const std::string& GetCDDName(void) const;
190     bool SetCDDName(const std::string& name);
191     const std::string& GetCDDDescription(void) const;
192     bool SetCDDDescription(const std::string& descr);
193     typedef std::vector < std::string > TextLines;
194     bool GetCDDNotes(TextLines *lines) const;
195     bool SetCDDNotes(const TextLines& lines);
196     ncbi::objects::CCdd_descr_set * GetCDDDescrSet(void);
197     ncbi::objects::CAlign_annot_set * GetCDDAnnotSet(void);
198 
199     // convert underlying data from mime to cdd
200     bool ConvertMimeDataToCDD(const std::string& cddName);
201 
202     // writes data to a file; returns true on success
203     bool SaveASNData(const char *filename, bool doBinary, unsigned int *changeFlags);
204 
205     // adds a new Sequence to the SequenceSet if it doesn't exist already
206     const Sequence * FindOrCreateSequence(ncbi::objects::CBioseq& bioseq);
207 
208     // reject sequence (from CDD)
209     void RejectAndPurgeSequence(const Sequence *reject, std::string reason, bool purge);
210     typedef std::list < ncbi::CRef < ncbi::objects::CReject_id > > RejectList;
211     const RejectList * GetRejects(void) const;
212     void ShowRejects(void) const;
213 
214     // adds a new Biostruc to the asn data, if appropriate
215     bool AddBiostrucToASN(ncbi::objects::CBiostruc *biostruc);
216 
217     // for manipulating structure alignment features
218     void InitStructureAlignments(int masterMMDBID);
219     void AddStructureAlignment(ncbi::objects::CBiostruc_feature *feature,
220         int masterDomainID, int dependentDomainID);
221     void RemoveStructureAlignments(void);
222 
223     bool MonitorAlignments(void) const;
224 
225 private:
226     ASNDataManager *dataManager;
227 
228     // data preparation methods
229     void Load(unsigned int structureLimit);
230     void LoadSequencesForSingleStructure(void);             // for single structures
231     void LoadAlignmentsAndStructures(unsigned int structureLimit);   // for alignments
232     std::map < const ncbi::objects::CBiostruc * , bool > usedStructures;
233     bool MatchSequenceToMoleculeInObject(const Sequence *seq,
234         const StructureObject *obj, const Sequence **seqHandle = NULL);
235     bool LoadMaster(int masterMMDBID);
236     void VerifyFrameMap(void) const;
237 
238     // to keep track of gl "name" -> atom correspondence (for structure picking)
239     typedef std::pair < const Residue*, int > NamePair;
240     typedef std::map < unsigned int, NamePair > NameMap;
241     NameMap nameMap;
242     unsigned int lastAtomName;
243 
244     // for printing out distances between successively picked atoms
245     Vector prevPickedAtomCoord;
246     bool havePrevPickedAtomCoord;
247 };
248 
249 class StructureObject : public StructureBase
250 {
251 private:
252     const bool isMaster;
253 
254 public:
255     // biostruc must not be "raw" mmdb data - will get confused by presence of different models
256     StructureObject(StructureBase *parent, const ncbi::objects::CBiostruc& biostruc, bool isMaster);
~StructureObject(void)257     ~StructureObject(void) { if (transformToMaster) delete transformToMaster; }
258 
259     // public data
260 
261     static const int NO_MMDB_ID;
262     int id, mmdbID;
263     std::vector < std::string > pdbIDs;
264     Matrix *transformToMaster;
265 
266     // an object has one ChemicalGraph that can be applied to one or more
267     // CoordSets to generate the object's model(s)
268     const ChemicalGraph *graph;
269     typedef std::list < const CoordSet * > CoordSetList;
270     CoordSetList coordSets;
271 
272     // min and max atomic temperatures
273     static const double NO_TEMPERATURE;
274     double minTemperature, maxTemperature;
275 
276     // map of internal domainID -> Molecule and MMDB-assigned id
277     typedef std::map < int, const Molecule * > DomainMap;
278     DomainMap domainMap;
279     typedef std::map < int, int > DomainIDMap;
280     DomainIDMap domainID2MMDB;
281 
282     // public methods
283 
284     std::string GetPDBID(char separator = '_') const;
285 
286     typedef std::map < const Residue *, const Molecule * > ResidueMap;
287     void SelectByDistance(double cutoff, unsigned int options, ResidueMap *selectedResidues) const;
288 
289     // set transform based on asn1 data
290     bool SetTransformToMaster(const ncbi::objects::CBiostruc_annot_set& annot, int masterMMDBID);
291 
292     // set transform based on rigid body fit of given coordinates
293     void RealignStructure(int nCoords,
294         const Vector * const *masterCoords, const Vector * const *dependentCoords,
295         const double *weights, int dependentRow);
296 
IsMaster(void) const297     bool IsMaster(void) const { return isMaster; }
IsDependent(void) const298     bool IsDependent(void) const { return !isMaster; }
NDomains(void) const299     int NDomains(void) const { return domainMap.size(); }
300 };
301 
302 END_SCOPE(Cn3D)
303 
304 #endif // CN3D_STRUCTURESET__HPP
305