1 /*  $Id: molecule.hpp 103491 2007-05-04 17:18:18Z kazimird $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors:  Paul Thiessen
27 *
28 * File Description:
29 *      Classes to hold molecules
30 *
31 * ===========================================================================
32 */
33 
34 #ifndef CN3D_MOLECULE__HPP
35 #define CN3D_MOLECULE__HPP
36 
37 #include <corelib/ncbistl.hpp>
38 
39 #include <map>
40 #include <string>
41 #include <vector>
42 
43 #include <objects/mmdb1/Molecule_graph.hpp>
44 #include <objects/mmdb1/Residue_graph.hpp>
45 #include <objects/mmdb1/Biomol_descr.hpp>
46 
47 #include "structure_base.hpp"
48 #include "residue.hpp"
49 #include "vector_math.hpp"
50 
51 
52 BEGIN_SCOPE(Cn3D)
53 
54 // A Molecule is generally a fully connected set of atoms - e.g. a protein chain,
55 // heterogen, etc. For proteins and nucleotides, it is divided into an ordered
56 // sequence of Residues, along with inter-residue bonds.
57 
58 typedef std::list< ncbi::CRef< ncbi::objects::CResidue_graph > > ResidueGraphList;
59 
60 class ChemicalGraph;
61 class Bond;
62 class Sequence;
63 class MoleculeIdentifier;
64 
65 class Molecule : public StructureBase
66 {
67 public:
68     Molecule(ChemicalGraph *parentGraph,
69         const ncbi::objects::CMolecule_graph& graph,
70         const ResidueGraphList& standardDictionary,
71         const ResidueGraphList& localDictionary);
72 
73     // public data
74     enum eType {
75         eDNA = ncbi::objects::CBiomol_descr::eMolecule_type_dna,
76         eRNA = ncbi::objects::CBiomol_descr::eMolecule_type_rna,
77         eProtein = ncbi::objects::CBiomol_descr::eMolecule_type_protein,
78         eBiopolymer = ncbi::objects::CBiomol_descr::eMolecule_type_other_biopolymer,
79         eSolvent = ncbi::objects::CBiomol_descr::eMolecule_type_solvent,
80         eNonpolymer = ncbi::objects::CBiomol_descr::eMolecule_type_other_nonpolymer,
81         eOther = ncbi::objects::CBiomol_descr::eMolecule_type_other
82     };
83     eType type;
84     int id;
85     std::string name;
86     const MoleculeIdentifier *identifier;
87 
88     typedef std::map < int, const Residue * > ResidueMap;
89     ResidueMap residues;
90     typedef std::list < const Bond * > BondList;
91     BondList interResidueBonds; // includes virtual and disulfide bonds
92 
93     // ints are residue IDs; tracks intramolecular disulfides (mainly for fast lookup by threader)
94     typedef std::map < int, int > DisulfideMap;
95     DisulfideMap disulfideMap;
96 
97     // maps of sequence location ( = residueID - 1) to secondary structure and domains
98     static const int NO_DOMAIN_SET;
99     enum eSecStruc {
100         eHelix,
101         eStrand,
102         eCoil
103     };
104     std::vector < eSecStruc > residueSecondaryStructures;
105     std::vector < int > residueDomains;
106     int nDomains;
107 
108     // corresponding sequence (if present)
109     const Sequence *sequence;
110 
111     typedef std::list < unsigned int > DisplayListList;
112     DisplayListList displayLists;
113 
114     // public methods
IsProtein(void) const115     bool IsProtein(void) const { return (type == eProtein); }
IsNucleotide(void) const116     bool IsNucleotide(void) const { return (type == eDNA || type == eRNA); }
IsBiopolymer(void) const117     bool IsBiopolymer(void) const { return (type == eProtein || type == eDNA || type == eRNA || type == eBiopolymer); }
IsSolvent(void) const118     bool IsSolvent(void) const { return (type == eSolvent); }
IsHeterogen(void) const119     bool IsHeterogen(void) const { return (!IsProtein() && !IsNucleotide() && !IsSolvent()); }
120 
NResidues(void) const121     unsigned int NResidues(void) const { return residues.size(); }
GetAtomInfo(int rID,int aID) const122     const Residue::AtomInfo * GetAtomInfo(int rID, int aID) const
123     {
124         ResidueMap::const_iterator info=residues.find(rID);
125         if (info != residues.end()) return (*info).second->GetAtomInfo(aID);
126         ERR_POST(ncbi::Warning << "Molecule #" << id << ": can't find residue #" << rID);
127         return NULL;
128     }
129 
130     // residue color method - called by sequence/alignment viewer - note
131     // that sequenceIndex is numbered from zero.
132     Vector GetResidueColor(int sequenceIndex) const;
133 
134     // get coordinates for alpha atoms of residues with given sequence indexes;
135     // returns actual # coordinates retrieved if successful, -1 on failure
136     int GetAlphaCoords(int nResidues, const int *seqIndexes, const Vector * *coords) const;
137 
138     // secondary structure query methods
139 
IsResidueInHelix(int residueID) const140     bool IsResidueInHelix(int residueID) const
141         { return (IsProtein() && residueSecondaryStructures[residueID - 1] == eHelix); }
IsResidueInStrand(int residueID) const142     bool IsResidueInStrand(int residueID) const
143         { return (IsProtein() && residueSecondaryStructures[residueID - 1] == eStrand); }
IsResidueInCoil(int residueID) const144     bool IsResidueInCoil(int residueID) const
145         { return (!IsProtein() || residueSecondaryStructures[residueID - 1] == eCoil); }
146 
147     // domain query
ResidueDomainID(int residueID) const148     int ResidueDomainID(int residueID) const
149         { return residueDomains[residueID - 1]; }
150 
151     // drawing - include chain termini labels
152     bool DrawAllWithTerminiLabels(const AtomSet *atomSet = NULL) const;
153 };
154 
155 END_SCOPE(Cn3D)
156 
157 #endif // CN3D_MOLECULE__HPP
158