1 /********************************************************************** 2 chains.h - Parse for macromolecule chains and residues 3 4 Copyright (C) 1998-2001 by OpenEye Scientific Software, Inc. 5 Some portions Copyright (C) 2001-2006 by Geoffrey R. Hutchison 6 Some portions Copyright (C) 2008 by Tim Vandermeersch 7 8 This file is part of the Open Babel project. 9 For more information, see <http://openbabel.org/> 10 11 This program is free software; you can redistribute it and/or modify 12 it under the terms of the GNU General Public License as published by 13 the Free Software Foundation version 2 of the License. 14 15 This program is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU General Public License for more details. 19 ***********************************************************************/ 20 21 #ifndef OB_CHAINS_H 22 #define OB_CHAINS_H 23 24 #define MaxMonoAtom 20 25 #define MaxMonoBond 20 26 27 #include <openbabel/babelconfig.h> 28 #include <vector> 29 30 namespace OpenBabel 31 { 32 33 class OBAtom; 34 class OBMol; 35 36 //! Structure template for atomic patterns in residues for OBChainsParser 37 // implementation in chains.cpp 38 struct Template; 39 typedef struct Template Template; 40 41 /** @class OBChainsParser chains.h <openbabel/chains.h> 42 @brief Perceives peptide or nucleotide chains and residues in an OBMol 43 44 Perceive peptide or nucleotide chains and residues from atom connectivity. 45 Based on original RasMol code by Roger Sayle and modified by Joe Corkery. 46 For more on Roger's original talk, see: 47 http://www.daylight.com/meetings/mug96/sayle/sayle.html 48 */ 49 class OBAPI OBChainsParser 50 { 51 public: 52 53 OBChainsParser(void); 54 ~OBChainsParser(void); 55 56 /** 57 * Perceive macromolecular (peptide and nucleotide) residues and chains 58 * @param mol The molecule to parse and update 59 * @param nukeSingleResidue If only one residue is found, clear information 60 * default = false -- single residue files should still be recognized. 61 */ 62 bool PerceiveChains(OBMol &mol, bool nukeSingleResidue = false); 63 64 private: // internal methods 65 66 //! @name Step 1: Determine hetero atoms 67 //@{ 68 /** 69 * Determine HETATOM records for all atoms with a heavy valance of 0. 70 * This includes HOH, Cl, Fe, ... 71 * 72 * Sets resids[i] & hetflags[i] for these atoms. 73 * @todo add ions (Cl, Fe, ...) 74 */ 75 bool DetermineHetAtoms(OBMol &); 76 //@} 77 78 //! @name Step 2: Determine connected chains 79 //@{ 80 /** 81 * Determine connected chains (e.g., subunits). Chains will be labeled A, B, C, ... 82 * Ligands also get assigned a chain label. The chain for ligands will later be 83 * replaced by ' '. The residue numbers will also be updated in this process to 84 * make sure all ligands, HOH, ions, etc. have a unique residue number in the ' ' 85 * chain. 86 * 87 * Sets chains[i] for all atoms. (through RecurseChain()) 88 */ 89 bool DetermineConnectedChains(OBMol &); 90 /** 91 * Perform the actual work for DetermineConnectedChains(). Set chains[i] 92 * to @p c for all atoms of the recursed chain. 93 * @param mol The molecule. 94 * @param i Index for the current atom. (RecurseChain() will be called for all neighbours) 95 * @param c The chain which we are recusring. ('A' + count) 96 * @return The number of heavy atoms in the recursed chain. 97 */ 98 unsigned int RecurseChain(OBMol &mol, unsigned int i, int c); 99 //@} 100 101 //! @name Step 3: Determine peptide backbone 102 //@{ 103 /** 104 * Walk a peptide "backbone" atom sequence, from one residue to the next. This 105 * function will look for N-CA-C-O sequences and mark these atoms. 106 * 107 * Sets bitmaks[i] for these atoms. (through ConstrainBackbone()) 108 * Sets resnos[i] for these atoms. (through TracePeptideChain()) 109 */ 110 bool DeterminePeptideBackbone(OBMol &); 111 /** 112 * First the bitmasks[i] will be OR-ed with Template::flag for all atoms based on 113 * on Template::element and Template::count. 114 * 115 * Next, the bitmasks[i] are iteratively resolved by matching the 116 * constraints in OpenBabel::Peptide or OpenBabel::Nucleotide. 117 * @param mol The molecule. 118 * @param templ OpenBabel::Peptide or OpenBabel::Nucleotide 119 * @param tmax Number of entries in @p templ 120 */ 121 void ConstrainBackbone(OBMol &mol, Template *templ, int tmax); 122 /** 123 * @return True if the bitmasks[i] for @p atom matches @p mask. 124 */ 125 bool MatchConstraint(OBAtom *atom, int mask); 126 /** 127 * @return True if atom @p na and @p nb match the Template::n1 and 128 * Template::n2. 129 */ 130 bool Match2Constraints(Template *templ, OBAtom *na, OBAtom *nb); 131 /** 132 * @return True if atom @p na, @p nb and @p nc match the Template::n1, 133 * Template::n2 and Template::n3. 134 */ 135 bool Match3Constraints(Template *templ, OBAtom *na, OBAtom *nb, OBAtom *nc); 136 /** 137 * @return True if atom @p na, @p nb, @p nc and @p nd match the Template::n1, 138 * Template::n2, Template::n3 and Template::n4. 139 */ 140 bool Match4Constraints(Template *templ, OBAtom *na, OBAtom *nb, OBAtom *nc, OBAtom *nd); 141 /** 142 * Now we have the constrained bitmaks[i], trace N-CA-C-O-... and set 143 * resnos[i] and atomids[i] for each N-CA-C-O sequence. 144 * 145 * Also adds BF_DOUBLE to flags[b] for< each carbonyl bond in N-CA-C=O. 146 * @param mol The molecule. 147 * @param i Index for the current atom. (TracePeptideChain() will be called for all neighbours) 148 * @param r The residue number which we are tracing. 149 */ 150 void TracePeptideChain(OBMol &mol, unsigned int i, int r); 151 //@} 152 153 //! @name Step 4: Determine peptide side chains 154 //@{ 155 /** 156 * Look for atoms with atomids[i] CA and identify their side chain. 157 * 158 * Sets resnos[i] and resids[i] for all identified residues (including the N-CA-C-O). 159 * (through IdentifyResidue() and AssignResidue()) 160 */ 161 bool DeterminePeptideSidechains(OBMol &); 162 /** 163 * Identify a residue based on the @p tree ByteCode. 164 * 165 * Sets resnos[i] for all sidechain atoms to the residue number of 166 * the seed CA/C1 atom. 167 * @param tree Bytecode for the residues. (OBChainsParser::PDecisionTree or OBChainsParser::NDecisionTree) 168 * @param mol The molecule. 169 * @param seed Atom index for the CA (peptides) or C1 (nucleotides) atom. 170 * @param resno The residue number for this residue. 171 * @return The resids[i] for the identified residue. 172 */ 173 int IdentifyResidue(void *tree, OBMol &mol, unsigned int seed, int resno); // ByteCode * 174 /** 175 * Set resids[i] for all atoms where resids[i] = @p r and chains[i] = @p c. 176 * @param mol The molecule. 177 * @param r The residue number. 178 * @param c The chain number. 179 * @param i The residue id (resids[i] returned by IdentifyResidue()) 180 */ 181 void AssignResidue(OBMol &mol, int r, int c, int i); 182 //@} 183 184 //! @name Step 5: Assign hydrogens 185 //@{ 186 /** 187 * Assign the resids[i], resnos[i], ... for all hydrogens based on the 188 * atom they are bound to. 189 */ 190 bool DetermineHydrogens(OBMol &); 191 //@} 192 193 //! @name Step 6: Set the residue information 194 //@{ 195 /** 196 * Convert the private data vectors to OBResidue objects and add them to @p mol. 197 * @param mol The molecule to parse and update 198 * @param nukeSingleResidue If only one residue is found, clear information 199 * default = false -- single residue files should still be recognized. 200 */ 201 void SetResidueInformation(OBMol &, bool nukeSingleResidue); 202 //@} 203 204 //! @name Nucleic acids (analog to peptides) 205 //@{ 206 /** 207 * Walk a nucleic "backbone" atom sequence, from one residue to the next. This 208 * function will look for ribose-5-P sequences and mark these atoms. 209 * 210 * Sets bitmaks[i] for these atoms. (through ConstrainBackbone()) 211 * Sets resnos[i] for these atoms. (through TraceNucleicChain()) 212 */ 213 bool DetermineNucleicBackbone(OBMol &); 214 /** 215 * Now we have the constrained bitmaks[i], trace nucleic backbone and set 216 * resnos[i] and atomids[i] for each ribose-5-P sequence. 217 * @param mol The molecule. 218 * @param i Index for the current atom. (TraceNucleicChain() will be called for all neighbours) 219 * @param r The residue number which we are tracing. 220 */ 221 void TraceNucleicChain(OBMol &, unsigned int i, int r); 222 /** 223 * Look for atoms with atomids[i] C1 and identify their side chain. 224 * 225 * Sets resnos[i] and resids[i] for all identified residues. 226 * (through IdentifyResidue() and AssignResidue()) 227 */ 228 bool DetermineNucleicSidechains(OBMol &); 229 //@} 230 231 /** 232 * Set up the chain perception to operate on the supplied molecule 233 * by resizing and initializing the private data vectors. 234 */ 235 void SetupMol(OBMol &); 236 /** 237 * Delete all residues in @p mol 238 */ 239 void ClearResidueInformation(OBMol &mol); 240 /** 241 * Clear all private data vectors 242 */ 243 void CleanupMol(); 244 /** 245 * Construct and add ByteCode to the @p tree for a single residue. 246 * @param tree Bytecode for the residues. (OBChainsParser::PDecisionTree or OBChainsParser::NDecisionTree) 247 * @param resid The residue id. 248 * @param smiles The pseudo-smiles string (from OpenBabel::AminoAcids or OpenBabel::Nucleotides) 249 */ 250 void DefineMonomer(void **tree, int resid, const char *smiles); // ByteCode ** 251 /** 252 * @param ptr Element id (from OpenBabel::ChainsAtomName) 253 * @return The element number. 254 */ 255 int IdentifyElement(char *ptr); 256 /** 257 * Parse a pseudo smiles from OpenBabel::AminoAcids or OpenBabel::Nucleotides. 258 * @param smiles The pseudo-smiles string. 259 * @param prev The previous position (used for recursing, use -1 to start). 260 */ 261 const char *ParseSmiles(const char *smiles, int prev); 262 /** 263 * Debugging function. 264 */ 265 void DumpState(); 266 267 void *PDecisionTree; //!< ByteCode decision tree for peptides 268 void *NDecisionTree; //!< ByteCode decision tree for nucleotides 269 270 int ResMonoAtom[MaxMonoAtom]; 271 int ResMonoBond[MaxMonoBond]; 272 273 std::vector<unsigned short> bitmasks; 274 std::vector<bool> visits; //!< mark visits to prevent looping 275 std::vector<unsigned char> resids; 276 std::vector<unsigned char> flags; 277 std::vector<bool> hetflags; 278 std::vector<int> atomids; 279 std::vector<short> resnos; 280 std::vector<short> sernos; //!< array of residue serial numbers 281 std::vector<char> hcounts; 282 std::vector<char> chains; 283 }; 284 285 //! Global OBChainsParser for detecting macromolecular chains and residues 286 OB_EXTERN OBChainsParser chainsparser; 287 288 } 289 #endif // OB_CHAINS_H 290 291 //! \file chains.h 292 //! \brief Parse for macromolecule chains and residues. 293