1 /*  $Id: molecule_identifier.hpp 591063 2019-08-09 15:27:34Z wangjiy $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors:  Paul Thiessen
27 *
28 * File Description:
29 *      Class to hold, and factory to generate, general
30 *      (instance-independent) identifier for any molecule
31 *
32 * ===========================================================================
33 */
34 
35 #ifndef CN3D_MOLECULE_IDENTIFIER__HPP
36 #define CN3D_MOLECULE_IDENTIFIER__HPP
37 
38 #include <corelib/ncbistl.hpp>
39 
40 #include <objects/seqloc/Seq_id.hpp>
41 
42 #include <list>
43 #include <string>
44 
45 
46 BEGIN_SCOPE(Cn3D)
47 
48 class Molecule;
49 class Sequence;
50 
51 #ifndef _STRUCTURE_USE_LONG_PDB_CHAINS_
52 #define _STRUCTURE_USE_LONG_PDB_CHAINS_
53 #endif
54 
55 class MoleculeIdentifier
56 {
57 public:
58     static const int VALUE_NOT_SET;
59 
60     // store all Seq-ids, and also mmdb info
61     typedef std::list < ncbi::CRef < ncbi::objects::CSeq_id > > SeqIdList;
62     SeqIdList seqIDs;
63     int mmdbID, moleculeID, gi;
64 
65 	#ifdef _STRUCTURE_USE_LONG_PDB_CHAINS_
66 	  std::string pdbChain;
67 	#else
68 	  int pdbChain;
69 	#endif
70 
71     std::string pdbID;
72 
73     // # residues (1 for non-biopolymers - hets, solvents)
74     unsigned int nResidues;
75 
76     // get title string based on identifiers present
77     std::string ToString(void) const;
78 
79     // create, or retrieve if known, an identifier for an entity
80     static const MoleculeIdentifier * GetIdentifier(const Molecule *molecule, const SeqIdList& ids);
81     static const MoleculeIdentifier * GetIdentifier(const Sequence *sequence, int mmdbID, const SeqIdList& ids);
82 
83     // get identifier for MMDB ID + molecule (NULL if not found)
84     static const MoleculeIdentifier * FindIdentifier(int mmdbID, int moleculeID);
85 
86     // test for Seq-id match
87     bool MatchesSeqId(const ncbi::objects::CSeq_id& sid) const;
88 
89     // clear identifier store (e.g. when a new file is loaded)
90     static void ClearIdentifiers(void);
91 
92     // does this molecule have structure?
HasStructure(void) const93     bool HasStructure(void) const
94     {
95 
96 		#ifdef _STRUCTURE_USE_LONG_PDB_CHAINS_
97 			return (
98 				(mmdbID != VALUE_NOT_SET && moleculeID != VALUE_NOT_SET) ||
99 				(pdbID.size() > 0 && !pdbChain.empty()));
100 		#else
101 			return (
102 				(mmdbID != VALUE_NOT_SET && moleculeID != VALUE_NOT_SET) ||
103 				(pdbID.size() > 0 && pdbChain != VALUE_NOT_SET));
104 		#endif
105     }
106 
107     // comparison of identifiers (e.g. for sorting) - floats PDB's to top, then gi's in numerical order
108     static bool CompareIdentifiers(const MoleculeIdentifier *a, const MoleculeIdentifier *b);
109 
110     // get general label (e.g. an accession)
GetLabel(void) const111     std::string GetLabel(void) const
112     {
113         std::string label;
114         if (seqIDs.size() > 0)
115             seqIDs.front()->GetLabel(&label, ncbi::objects::CSeq_id::eContent, 0);
116         return label;
117     }
118 
119 private:
120     // can't create one of these directly - must use GetIdentifier()
121 	#ifdef _STRUCTURE_USE_LONG_PDB_CHAINS_
MoleculeIdentifier(void)122 		MoleculeIdentifier(void) :
123 			mmdbID(VALUE_NOT_SET), moleculeID(VALUE_NOT_SET), pdbChain(""), gi(VALUE_NOT_SET), nResidues(0)
124 			{ }
125 	#else
126 		MoleculeIdentifier(void) :
127 			mmdbID(VALUE_NOT_SET), moleculeID(VALUE_NOT_SET), pdbChain(VALUE_NOT_SET), gi(VALUE_NOT_SET), nResidues(0)
128 			{ }
129 	#endif
130 
131     // get identifier based on Seq-id match
132     static MoleculeIdentifier * GetIdentifier(const SeqIdList& ids);
133 
134     // get identifier based on MMDB ID + molecule, for stuff like ligands that don't have Seq-id
135     static MoleculeIdentifier * GetIdentifier(int mmdbID, int moleculeID);
136 
137     // save and fill out special id fields from Seq-ids
138     void AddFields(const SeqIdList& ids);
139 };
140 
141 END_SCOPE(Cn3D)
142 
143 #endif // CN3D_MOLECULE_IDENTIFIER__HPP
144