1 // 2 // Copyright (C) 2001-2021 Greg Landrum and Rational Discovery LLC 3 // Copyright (c) 2014, Novartis Institutes for BioMedical Research Inc. 4 // 5 // @@ All Rights Reserved @@ 6 // This file is part of the RDKit. 7 // The contents are covered by the terms of the BSD license 8 // which is included in the file license.txt, found at the root 9 // of the RDKit source tree. 10 // 11 #include <RDGeneral/export.h> 12 #ifndef _RD_MOL_OPS_H_ 13 #define _RD_MOL_OPS_H_ 14 15 #include <vector> 16 #include <map> 17 #include <list> 18 #include <RDGeneral/BoostStartInclude.h> 19 #include <boost/smart_ptr.hpp> 20 #include <boost/dynamic_bitset.hpp> 21 #include <RDGeneral/BoostEndInclude.h> 22 #include <RDGeneral/types.h> 23 #include "SanitException.h" 24 25 RDKIT_GRAPHMOL_EXPORT extern const int ci_LOCAL_INF; 26 namespace RDKit { 27 class ROMol; 28 class RWMol; 29 class Atom; 30 class Bond; 31 class Conformer; 32 typedef std::vector<double> INVAR_VECT; 33 typedef INVAR_VECT::iterator INVAR_VECT_I; 34 typedef INVAR_VECT::const_iterator INVAR_VECT_CI; 35 36 //! \brief Groups a variety of molecular query and transformation operations. 37 namespace MolOps { 38 39 //! return the number of electrons available on an atom to donate for 40 // aromaticity 41 /*! 42 The result is determined using the default valency, number of lone pairs, 43 number of bonds and the formal charge. Note that the atom may not donate 44 all of these electrons to a ring for aromaticity (also used in Conjugation 45 and hybridization code). 46 47 \param at the atom of interest 48 49 \return the number of electrons 50 */ 51 RDKIT_GRAPHMOL_EXPORT int countAtomElec(const Atom *at); 52 53 //! sums up all atomic formal charges and returns the result 54 RDKIT_GRAPHMOL_EXPORT int getFormalCharge(const ROMol &mol); 55 56 //! returns whether or not the given Atom is involved in a conjugated bond 57 RDKIT_GRAPHMOL_EXPORT bool atomHasConjugatedBond(const Atom *at); 58 59 //! find fragments (disconnected components of the molecular graph) 60 /*! 61 62 \param mol the molecule of interest 63 \param mapping used to return the mapping of Atoms->fragments. 64 On return \c mapping will be <tt>mol->getNumAtoms()</tt> long 65 and will contain the fragment assignment for each Atom 66 67 \return the number of fragments found. 68 69 */ 70 RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol, 71 std::vector<int> &mapping); 72 //! find fragments (disconnected components of the molecular graph) 73 /*! 74 75 \param mol the molecule of interest 76 \param frags used to return the Atoms in each fragment 77 On return \c mapping will be \c numFrags long, and each entry 78 will contain the indices of the Atoms in that fragment. 79 80 \return the number of fragments found. 81 82 */ 83 RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags( 84 const ROMol &mol, std::vector<std::vector<int>> &frags); 85 86 //! splits a molecule into its component fragments 87 // (disconnected components of the molecular graph) 88 /*! 89 90 \param mol the molecule of interest 91 \param sanitizeFrags toggles sanitization of the fragments after 92 they are built 93 \param frags used to return the mapping of Atoms->fragments. 94 if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long 95 on return and will contain the fragment assignment for each Atom 96 \param fragsMolAtomMapping used to return the Atoms in each fragment 97 On return \c mapping will be \c numFrags long, and each entry 98 will contain the indices of the Atoms in that fragment. 99 \param copyConformers toggles copying conformers of the fragments after 100 they are built 101 \return a vector of the fragments as smart pointers to ROMols 102 103 */ 104 RDKIT_GRAPHMOL_EXPORT std::vector<boost::shared_ptr<ROMol>> getMolFrags( 105 const ROMol &mol, bool sanitizeFrags = true, 106 std::vector<int> *frags = nullptr, 107 std::vector<std::vector<int>> *fragsMolAtomMapping = nullptr, 108 bool copyConformers = true); 109 110 //! splits a molecule into pieces based on labels assigned using a query 111 /*! 112 113 \param mol the molecule of interest 114 \param query the query used to "label" the molecule for fragmentation 115 \param sanitizeFrags toggles sanitization of the fragments after 116 they are built 117 \param whiteList if provided, only labels in the list will be kept 118 \param negateList if true, the white list logic will be inverted: only labels 119 not in the list will be kept 120 121 \return a map of the fragments and their labels 122 123 */ 124 template <typename T> 125 RDKIT_GRAPHMOL_EXPORT std::map<T, boost::shared_ptr<ROMol>> 126 getMolFragsWithQuery(const ROMol &mol, T (*query)(const ROMol &, const Atom *), 127 bool sanitizeFrags = true, 128 const std::vector<T> *whiteList = nullptr, 129 bool negateList = false); 130 131 #if 0 132 //! finds a molecule's minimum spanning tree (MST) 133 /*! 134 \param mol the molecule of interest 135 \param mst used to return the MST as a vector of bond indices 136 */ 137 RDKIT_GRAPHMOL_EXPORT void findSpanningTree(const ROMol &mol,std::vector<int> &mst); 138 #endif 139 140 //! calculates Balaban's J index for the molecule 141 /*! 142 \param mol the molecule of interest 143 \param useBO toggles inclusion of the bond order in the calculation 144 (when false, we're not really calculating the J value) 145 \param force forces the calculation (instead of using cached results) 146 \param bondPath when included, only paths using bonds whose indices occur 147 in this vector will be included in the calculation 148 \param cacheIt If this is true, the calculated value will be cached 149 as a property on the molecule 150 \return the J index 151 152 */ 153 RDKIT_GRAPHMOL_EXPORT double computeBalabanJ( 154 const ROMol &mol, bool useBO = true, bool force = false, 155 const std::vector<int> *bondPath = nullptr, bool cacheIt = true); 156 //! \overload 157 RDKIT_GRAPHMOL_EXPORT double computeBalabanJ(double *distMat, int nb, int nAts); 158 159 //! \name Dealing with hydrogens 160 //{@ 161 162 //! returns a copy of a molecule with hydrogens added in as explicit Atoms 163 /*! 164 \param mol the molecule to add Hs to 165 \param explicitOnly (optional) if this \c true, only explicit Hs will be 166 added 167 \param addCoords (optional) If this is true, estimates for the atomic 168 coordinates 169 of the added Hs will be used. 170 \param onlyOnAtoms (optional) if provided, this should be a vector of 171 IDs of the atoms that will be considered for H addition. 172 \param addResidueInfo (optional) if this is true, add residue info to 173 hydrogen atoms (useful for PDB files). 174 175 \return the new molecule 176 177 <b>Notes:</b> 178 - it makes no sense to use the \c addCoords option if the molecule's 179 heavy 180 atoms don't already have coordinates. 181 - the caller is responsible for <tt>delete</tt>ing the pointer this 182 returns. 183 */ 184 RDKIT_GRAPHMOL_EXPORT ROMol *addHs(const ROMol &mol, bool explicitOnly = false, 185 bool addCoords = false, 186 const UINT_VECT *onlyOnAtoms = nullptr, 187 bool addResidueInfo = false); 188 //! \overload 189 // modifies the molecule in place 190 RDKIT_GRAPHMOL_EXPORT void addHs(RWMol &mol, bool explicitOnly = false, 191 bool addCoords = false, 192 const UINT_VECT *onlyOnAtoms = nullptr, 193 bool addResidueInfo = false); 194 195 //! Sets Cartesian coordinates for a terminal atom. 196 //! Useful for growing an atom off a molecule with sensible 197 //! coordinates based on the geometry of the neighbor. 198 /*! 199 NOTE: this sets appropriate coordinates in all of the molecule's conformers. 200 \param mol the molecule the atoms belong to 201 \param idx index of the terminal atom whose coordinates are set 202 \param otherIdx index of the bonded neighbor atom 203 */ 204 205 RDKIT_GRAPHMOL_EXPORT void setTerminalAtomCoords(ROMol &mol, unsigned int idx, 206 unsigned int otherIdx); 207 208 //! returns a copy of a molecule with hydrogens removed 209 /*! 210 \param mol the molecule to remove Hs from 211 \param implicitOnly (optional) if this \c true, only implicit Hs will be 212 removed 213 \param updateExplicitCount (optional) If this is \c true, when explicit Hs 214 are removed 215 from the graph, the heavy atom to which they are bound will have its 216 counter of 217 explicit Hs increased. 218 \param sanitize: (optional) If this is \c true, the final molecule will be 219 sanitized 220 221 \return the new molecule 222 223 <b>Notes:</b> 224 - Hydrogens which aren't connected to a heavy atom will not be 225 removed. This prevents molecules like <tt>"[H][H]"</tt> from having 226 all atoms removed. 227 - Labelled hydrogen (e.g. atoms with atomic number=1, but mass > 1), 228 will not be removed. 229 - two coordinate Hs, like the central H in C[H-]C, will not be removed 230 - Hs connected to dummy atoms will not be removed 231 - Hs that are part of the definition of double bond Stereochemistry 232 will not be removed 233 - Hs that are not connected to anything else will not be removed 234 - Hs that have a query defined (i.e. hasQuery() returns true) will not 235 be removed 236 237 - the caller is responsible for <tt>delete</tt>ing the pointer this 238 returns. 239 */ 240 241 RDKIT_GRAPHMOL_EXPORT ROMol *removeHs(const ROMol &mol, 242 bool implicitOnly = false, 243 bool updateExplicitCount = false, 244 bool sanitize = true); 245 //! \overload 246 // modifies the molecule in place 247 RDKIT_GRAPHMOL_EXPORT void removeHs(RWMol &mol, bool implicitOnly = false, 248 bool updateExplicitCount = false, 249 bool sanitize = true); 250 struct RDKIT_GRAPHMOL_EXPORT RemoveHsParameters { 251 bool removeDegreeZero = false; /**< hydrogens that have no bonds */ 252 bool removeHigherDegrees = false; /**< hydrogens with two (or more) bonds */ 253 bool removeOnlyHNeighbors = 254 false; /**< hydrogens with bonds only to other hydrogens */ 255 bool removeIsotopes = false; /**< hydrogens with non-default isotopes */ 256 bool removeAndTrackIsotopes = false; /**< removes hydrogens with non-default 257 isotopes and keeps track of the heavy atom the isotopes were attached to in 258 the private _isotopicHs atom property, so they are re-added by AddHs() as the 259 original isotopes if possible*/ 260 bool removeDummyNeighbors = 261 false; /**< hydrogens with at least one dummy-atom neighbor */ 262 bool removeDefiningBondStereo = 263 false; /**< hydrogens defining bond stereochemistry */ 264 bool removeWithWedgedBond = true; /**< hydrogens with wedged bonds to them */ 265 bool removeWithQuery = false; /**< hydrogens with queries defined */ 266 bool removeMapped = true; /**< mapped hydrogens */ 267 bool removeInSGroups = false; /**< part of a SubstanceGroup */ 268 bool showWarnings = true; /**< display warnings for Hs that are not removed */ 269 bool removeNonimplicit = true; /**< DEPRECATED equivalent of !implicitOnly */ 270 bool updateExplicitCount = 271 false; /**< DEPRECATED equivalent of updateExplicitCount */ 272 bool removeHydrides = true; /**< Removing Hydrides */ 273 }; 274 //! \overload 275 // modifies the molecule in place 276 RDKIT_GRAPHMOL_EXPORT void removeHs(RWMol &mol, const RemoveHsParameters &ps, 277 bool sanitize = true); 278 //! \overload 279 // The caller owns the pointer this returns 280 RDKIT_GRAPHMOL_EXPORT ROMol *removeHs(const ROMol &mol, 281 const RemoveHsParameters &ps, 282 bool sanitize = true); 283 284 //! removes all Hs from a molecule 285 RDKIT_GRAPHMOL_EXPORT void removeAllHs(RWMol &mol, bool sanitize = true); 286 //! \overload 287 // The caller owns the pointer this returns 288 RDKIT_GRAPHMOL_EXPORT ROMol *removeAllHs(const ROMol &mol, 289 bool sanitize = true); 290 291 //! returns a copy of a molecule with hydrogens removed and added as queries 292 //! to the heavy atoms to which they are bound. 293 /*! 294 This is really intended to be used with molecules that contain QueryAtoms 295 296 \param mol the molecule to remove Hs from 297 298 \return the new molecule 299 300 <b>Notes:</b> 301 - Atoms that do not already have hydrogen count queries will have one 302 added, other H-related queries will not be touched. Examples: 303 - C[H] -> [C;!H0] 304 - [C;H1][H] -> [C;H1] 305 - [C;H2][H] -> [C;H2] 306 - Hydrogens which aren't connected to a heavy atom will not be 307 removed. This prevents molecules like <tt>"[H][H]"</tt> from having 308 all atoms removed. 309 - the caller is responsible for <tt>delete</tt>ing the pointer this 310 returns. 311 - By default all hydrogens are removed, however if 312 mergeUnmappedOnly is true, any hydrogen participating 313 in an atom map will be retained 314 315 */ 316 RDKIT_GRAPHMOL_EXPORT ROMol *mergeQueryHs(const ROMol &mol, 317 bool mergeUnmappedOnly = false); 318 //! \overload 319 // modifies the molecule in place 320 RDKIT_GRAPHMOL_EXPORT void mergeQueryHs(RWMol &mol, 321 bool mergeUnmappedOnly = false); 322 323 typedef enum { 324 ADJUST_IGNORENONE = 0x0, 325 ADJUST_IGNORECHAINS = 0x1, 326 ADJUST_IGNORERINGS = 0x4, 327 ADJUST_IGNOREDUMMIES = 0x2, 328 ADJUST_IGNORENONDUMMIES = 0x8, 329 ADJUST_IGNOREMAPPED = 0x10, 330 ADJUST_IGNOREALL = 0xFFFFFFF 331 } AdjustQueryWhichFlags; 332 333 //! Parameters controlling the behavior of MolOps::adjustQueryProperties 334 /*! 335 336 Note that some of the options here are either directly contradictory or make 337 no sense when combined with each other. We generally assume that client code 338 is doing something sensible and don't attempt to detect possible conflicts or 339 problems. 340 341 */ 342 struct RDKIT_GRAPHMOL_EXPORT AdjustQueryParameters { 343 bool adjustDegree = true; /**< add degree queries */ 344 std::uint32_t adjustDegreeFlags = ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS; 345 346 bool adjustRingCount = false; /**< add ring-count queries */ 347 std::uint32_t adjustRingCountFlags = 348 ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS; 349 350 bool makeDummiesQueries = true; /**< convert dummy atoms without isotope 351 labels to any-atom queries */ 352 353 bool aromatizeIfPossible = true; /**< perceive and set aromaticity */ 354 355 bool makeBondsGeneric = 356 false; /**< convert bonds to generic queries (any bonds) */ 357 std::uint32_t makeBondsGenericFlags = ADJUST_IGNORENONE; 358 359 bool makeAtomsGeneric = 360 false; /**< convert atoms to generic queries (any atoms) */ 361 std::uint32_t makeAtomsGenericFlags = ADJUST_IGNORENONE; 362 363 bool adjustHeavyDegree = false; /**< adjust the heavy-atom degree instead of 364 overall degree */ 365 std::uint32_t adjustHeavyDegreeFlags = 366 ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS; 367 368 bool adjustRingChain = false; /**< add ring-chain queries */ 369 std::uint32_t adjustRingChainFlags = ADJUST_IGNORENONE; 370 371 bool useStereoCareForBonds = 372 false; /**< remove stereochemistry info from double bonds that do not have 373 the stereoCare property set */ 374 375 bool adjustConjugatedFiveRings = 376 false; /**< sets bond queries in conjugated five-rings to 377 SINGLE|DOUBLE|AROMATIC */ 378 379 bool setMDLFiveRingAromaticity = 380 false; /**< uses the 5-ring aromaticity behavior of the (former) MDL 381 software as documented in the Chemical Representation Guide */ 382 383 bool adjustSingleBondsToDegreeOneNeighbors = 384 false; /**< sets single bonds between aromatic atoms and degree one 385 neighbors to SINGLE|AROMATIC */ 386 387 bool adjustSingleBondsBetweenAromaticAtoms = 388 false; /**< sets non-ring single bonds between two aromatic atoms to 389 SINGLE|AROMATIC */ 390 //! \brief returns an AdjustQueryParameters object with all adjustments 391 //! disabled noAdjustmentsAdjustQueryParameters392 static AdjustQueryParameters noAdjustments() { 393 AdjustQueryParameters res; 394 res.adjustDegree = false; 395 res.makeDummiesQueries = false; 396 res.aromatizeIfPossible = false; 397 return res; 398 } AdjustQueryParametersAdjustQueryParameters399 AdjustQueryParameters() {} 400 }; 401 402 //! updates an AdjustQueryParameters object from a JSON string 403 RDKIT_GRAPHMOL_EXPORT void parseAdjustQueryParametersFromJSON( 404 MolOps::AdjustQueryParameters &p, const std::string &json); 405 406 //! returns a copy of a molecule with query properties adjusted 407 /*! 408 \param mol the molecule to adjust 409 \param params controls the adjustments made 410 411 \return the new molecule, the caller owns the memory 412 */ 413 RDKIT_GRAPHMOL_EXPORT ROMol *adjustQueryProperties( 414 const ROMol &mol, const AdjustQueryParameters *params = nullptr); 415 //! \overload 416 // modifies the molecule in place 417 RDKIT_GRAPHMOL_EXPORT void adjustQueryProperties( 418 RWMol &mol, const AdjustQueryParameters *params = nullptr); 419 420 //! returns a copy of a molecule with the atoms renumbered 421 /*! 422 423 \param mol the molecule to work with 424 \param newOrder the new ordering of the atoms (should be numAtoms long) 425 for example: if newOrder is [3,2,0,1], then atom 3 in the original 426 molecule will be atom 0 in the new one 427 428 \return the new molecule 429 430 <b>Notes:</b> 431 - the caller is responsible for <tt>delete</tt>ing the pointer this 432 returns. 433 434 */ 435 RDKIT_GRAPHMOL_EXPORT ROMol *renumberAtoms( 436 const ROMol &mol, const std::vector<unsigned int> &newOrder); 437 438 //@} 439 440 //! \name Sanitization 441 //@{ 442 443 typedef enum { 444 SANITIZE_NONE = 0x0, 445 SANITIZE_CLEANUP = 0x1, 446 SANITIZE_PROPERTIES = 0x2, 447 SANITIZE_SYMMRINGS = 0x4, 448 SANITIZE_KEKULIZE = 0x8, 449 SANITIZE_FINDRADICALS = 0x10, 450 SANITIZE_SETAROMATICITY = 0x20, 451 SANITIZE_SETCONJUGATION = 0x40, 452 SANITIZE_SETHYBRIDIZATION = 0x80, 453 SANITIZE_CLEANUPCHIRALITY = 0x100, 454 SANITIZE_ADJUSTHS = 0x200, 455 SANITIZE_ALL = 0xFFFFFFF 456 } SanitizeFlags; 457 458 //! \brief carries out a collection of tasks for cleaning up a molecule and 459 // ensuring 460 //! that it makes "chemical sense" 461 /*! 462 This functions calls the following in sequence 463 -# MolOps::cleanUp() 464 -# mol.updatePropertyCache() 465 -# MolOps::symmetrizeSSSR() 466 -# MolOps::Kekulize() 467 -# MolOps::assignRadicals() 468 -# MolOps::setAromaticity() 469 -# MolOps::setConjugation() 470 -# MolOps::setHybridization() 471 -# MolOps::cleanupChirality() 472 -# MolOps::adjustHs() 473 474 \param mol : the RWMol to be cleaned 475 476 \param operationThatFailed : the first (if any) sanitization operation that 477 fails is set here. 478 The values are taken from the \c SanitizeFlags 479 enum. On success, the value is \c 480 SanitizeFlags::SANITIZE_NONE 481 482 \param sanitizeOps : the bits here are used to set which sanitization 483 operations are carried out. The elements of the \c 484 SanitizeFlags enum define the operations. 485 486 <b>Notes:</b> 487 - If there is a failure in the sanitization, a \c MolSanitizeException 488 will be thrown. 489 - in general the user of this function should cast the molecule following 490 this function to a ROMol, so that new atoms and bonds cannot be added to 491 the molecule and screw up the sanitizing that has been done here 492 */ 493 RDKIT_GRAPHMOL_EXPORT void sanitizeMol(RWMol &mol, 494 unsigned int &operationThatFailed, 495 unsigned int sanitizeOps = SANITIZE_ALL); 496 //! \overload 497 RDKIT_GRAPHMOL_EXPORT void sanitizeMol(RWMol &mol); 498 499 //! \brief Identifies chemistry problems (things that don't make chemical 500 //! sense) in a molecule 501 /*! 502 This functions uses the operations in sanitizeMol but does not change 503 the input structure and returns a list of the problems encountered instead 504 of stopping at the first failure, 505 506 The problems this looks for come from the sanitization operations: 507 -# mol.updatePropertyCache() : Unreasonable valences 508 -# MolOps::Kekulize() : Unkekulizable ring systems, aromatic atoms not 509 in rings, aromatic bonds to non-aromatic atoms. 510 511 \param mol : the ROMol to be cleaned 512 513 \param sanitizeOps : the bits here are used to set which sanitization 514 operations are carried out. The elements of the \c 515 SanitizeFlags enum define the operations. 516 517 \return a vector of \c MolSanitizeException values that indicate what 518 problems were encountered 519 520 */ 521 RDKIT_GRAPHMOL_EXPORT 522 std::vector<std::unique_ptr<MolSanitizeException>> detectChemistryProblems( 523 const ROMol &mol, unsigned int sanitizeOps = SANITIZE_ALL); 524 525 //! Possible aromaticity models 526 /*! 527 - \c AROMATICITY_DEFAULT at the moment always uses \c AROMATICITY_RDKIT 528 - \c AROMATICITY_RDKIT is the standard RDKit model (as documented in the RDKit 529 Book) 530 - \c AROMATICITY_SIMPLE only considers 5- and 6-membered simple rings (it 531 does not consider the outer envelope of fused rings) 532 - \c AROMATICITY_MDL 533 - \c AROMATICITY_CUSTOM uses a caller-provided function 534 */ 535 typedef enum { 536 AROMATICITY_DEFAULT = 0x0, ///< future proofing 537 AROMATICITY_RDKIT = 0x1, 538 AROMATICITY_SIMPLE = 0x2, 539 AROMATICITY_MDL = 0x4, 540 AROMATICITY_CUSTOM = 0xFFFFFFF ///< use a function 541 } AromaticityModel; 542 543 //! Sets up the aromaticity for a molecule 544 /*! 545 546 This is what happens here: 547 -# find all the simple rings by calling the findSSSR function 548 -# loop over all the Atoms in each ring and mark them if they are 549 candidates 550 for aromaticity. A ring atom is a candidate if it can spare electrons 551 to the ring and if it's from the first two rows of the periodic table. 552 -# based on the candidate atoms, mark the rings to be either candidates 553 or non-candidates. A ring is a candidate only if all its atoms are 554 candidates 555 -# apply Hueckel rule to each of the candidate rings to check if the ring 556 can be 557 aromatic 558 559 \param mol the RWMol of interest 560 \param model the aromaticity model to use 561 \param func a custom function for assigning aromaticity (only used when 562 model=\c AROMATICITY_CUSTOM) 563 564 \return >0 on success, <= 0 otherwise 565 566 <b>Assumptions:</b> 567 - Kekulization has been done (i.e. \c MolOps::Kekulize() has already 568 been called) 569 570 */ 571 RDKIT_GRAPHMOL_EXPORT int setAromaticity( 572 RWMol &mol, AromaticityModel model = AROMATICITY_DEFAULT, 573 int (*func)(RWMol &) = nullptr); 574 575 //! Designed to be called by the sanitizer to handle special cases before 576 // anything is done. 577 /*! 578 579 Currently this: 580 - modifies nitro groups, so that the nitrogen does not have an 581 unreasonable valence of 5, as follows: 582 - the nitrogen gets a positive charge 583 - one of the oxygens gets a negative chage and the double bond to 584 this oxygen is changed to a single bond The net result is that nitro groups 585 can be counted on to be: \c "[N+](=O)[O-]" 586 - modifies halogen-oxygen containing species as follows: 587 \c [Cl,Br,I](=O)(=O)(=O)O -> [X+3]([O-])([O-])([O-])O 588 \c [Cl,Br,I](=O)(=O)O -> [X+3]([O-])([O-])O 589 \c [Cl,Br,I](=O)O -> [X+]([O-])O 590 - converts the substructure [N,C]=P(=O)-* to [N,C]=[P+](-[O-])-* 591 592 \param mol the molecule of interest 593 594 */ 595 RDKIT_GRAPHMOL_EXPORT void cleanUp(RWMol &mol); 596 597 //! Called by the sanitizer to assign radical counts to atoms 598 RDKIT_GRAPHMOL_EXPORT void assignRadicals(RWMol &mol); 599 600 //! adjust the number of implicit and explicit Hs for special cases 601 /*! 602 603 Currently this: 604 - modifies aromatic nitrogens so that, when appropriate, they have an 605 explicit H marked (e.g. so that we get things like \c "c1cc[nH]cc1" 606 607 \param mol the molecule of interest 608 609 <b>Assumptions</b> 610 - this is called after the molecule has been sanitized, 611 aromaticity has been perceived, and the implicit valence of 612 everything has been calculated. 613 614 */ 615 RDKIT_GRAPHMOL_EXPORT void adjustHs(RWMol &mol); 616 617 //! Kekulizes the molecule 618 /*! 619 620 \param mol the molecule of interest 621 \param markAtomsBonds if this is set to true, \c isAromatic boolean 622 settings on both the Bonds and Atoms are turned to false following the 623 Kekulization, otherwise they are left alone in their original state. \param 624 maxBackTracks the maximum number of attempts at back-tracking. The 625 algorithm 626 uses a back-tracking procedure to revisit a previous 627 setting of 628 double bond if we hit a wall in the kekulization 629 process 630 631 <b>Notes:</b> 632 - even if \c markAtomsBonds is \c false the \c BondType for all aromatic 633 bonds will be changed from \c RDKit::Bond::AROMATIC to \c 634 RDKit::Bond::SINGLE 635 or RDKit::Bond::DOUBLE during Kekulization. 636 637 */ 638 RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds = true, 639 unsigned int maxBackTracks = 100); 640 641 //! flags the molecule's conjugated bonds 642 RDKIT_GRAPHMOL_EXPORT void setConjugation(ROMol &mol); 643 644 //! calculates and sets the hybridization of all a molecule's Stoms 645 RDKIT_GRAPHMOL_EXPORT void setHybridization(ROMol &mol); 646 647 // @} 648 649 //! \name Ring finding and SSSR 650 //@{ 651 652 //! finds a molecule's Smallest Set of Smallest Rings 653 /*! 654 Currently this implements a modified form of Figueras algorithm 655 (JCICS - Vol. 36, No. 5, 1996, 986-991) 656 657 \param mol the molecule of interest 658 \param res used to return the vector of rings. Each entry is a vector with 659 atom indices. This information is also stored in the molecule's 660 RingInfo structure, so this argument is optional (see overload) 661 662 \return number of smallest rings found 663 664 Base algorithm: 665 - The original algorithm starts by finding representative degree 2 666 nodes. 667 - Representative because if a series of deg 2 nodes are found only 668 one of them is picked. 669 - The smallest ring around each of them is found. 670 - The bonds that connect to this degree 2 node are them chopped off, 671 yielding 672 new deg two nodes 673 - The process is repeated on the new deg 2 nodes. 674 - If no deg 2 nodes are found, a deg 3 node is picked. The smallest ring 675 with it is found. A bond from this is "carefully" (look in the paper) 676 selected and chopped, yielding deg 2 nodes. The process is same as 677 above once this is done. 678 679 Our Modifications: 680 - If available, more than one smallest ring around a representative deg 2 681 node will be computed and stored 682 - Typically 3 rings are found around a degree 3 node (when no deg 2s are 683 available) 684 and all the bond to that node are chopped. 685 - The extra rings that were found in this process are removed after all 686 the nodes have been covered. 687 688 These changes were motivated by several factors: 689 - We believe the original algorithm fails to find the correct SSSR 690 (finds the correct number of them but the wrong ones) on some sample 691 mols 692 - Since SSSR may not be unique, a post-SSSR step to symmetrize may be 693 done. The extra rings this process adds can be quite useful. 694 */ 695 RDKIT_GRAPHMOL_EXPORT int findSSSR(const ROMol &mol, 696 std::vector<std::vector<int>> &res); 697 //! \overload 698 RDKIT_GRAPHMOL_EXPORT int findSSSR( 699 const ROMol &mol, std::vector<std::vector<int>> *res = nullptr); 700 701 //! use a DFS algorithm to identify ring bonds and atoms in a molecule 702 /*! 703 \b NOTE: though the RingInfo structure is populated by this function, 704 the only really reliable calls that can be made are to check if 705 mol.getRingInfo().numAtomRings(idx) or mol.getRingInfo().numBondRings(idx) 706 return values >0 707 */ 708 RDKIT_GRAPHMOL_EXPORT void fastFindRings(const ROMol &mol); 709 710 RDKIT_GRAPHMOL_EXPORT void findRingFamilies(const ROMol &mol); 711 712 //! symmetrize the molecule's Smallest Set of Smallest Rings 713 /*! 714 SSSR rings obatined from "findSSSR" can be non-unique in some case. 715 For example, cubane has five SSSR rings, not six as one would hope. 716 717 This function adds additional rings to the SSSR list if necessary 718 to make the list symmetric, e.g. all atoms in cubane will be part of the 719 same number of SSSRs. This function choses these extra rings from the extra 720 rings computed and discarded during findSSSR. The new ring are chosen such 721 that: 722 - replacing a same sized ring in the SSSR list with an extra ring yields 723 the same union of bond IDs as the original SSSR list 724 725 \param mol - the molecule of interest 726 \param res used to return the vector of rings. Each entry is a vector with 727 atom indices. This information is also stored in the molecule's 728 RingInfo structure, so this argument is optional (see overload) 729 730 \return the total number of rings = (new rings + old SSSRs) 731 732 <b>Notes:</b> 733 - if no SSSR rings are found on the molecule - MolOps::findSSSR() is called 734 first 735 */ 736 RDKIT_GRAPHMOL_EXPORT int symmetrizeSSSR(ROMol &mol, 737 std::vector<std::vector<int>> &res); 738 //! \overload 739 RDKIT_GRAPHMOL_EXPORT int symmetrizeSSSR(ROMol &mol); 740 741 //@} 742 743 //! \name Shortest paths and other matrices 744 //@{ 745 746 //! returns a molecule's adjacency matrix 747 /*! 748 \param mol the molecule of interest 749 \param useBO toggles use of bond orders in the matrix 750 \param emptyVal sets the empty value (for non-adjacent atoms) 751 \param force forces calculation of the matrix, even if already 752 computed 753 \param propNamePrefix used to set the cached property name 754 755 \return the adjacency matrix. 756 757 <b>Notes</b> 758 - The result of this is cached in the molecule's local property 759 dictionary, which will handle deallocation. The caller should <b>not</b> \c 760 delete this pointer. 761 762 */ 763 RDKIT_GRAPHMOL_EXPORT double *getAdjacencyMatrix( 764 const ROMol &mol, bool useBO = false, int emptyVal = 0, bool force = false, 765 const char *propNamePrefix = nullptr, 766 const boost::dynamic_bitset<> *bondsToUse = nullptr); 767 768 //! Computes the molecule's topological distance matrix 769 /*! 770 Uses the Floyd-Warshall all-pairs-shortest-paths algorithm. 771 772 \param mol the molecule of interest 773 \param useBO toggles use of bond orders in the matrix 774 \param useAtomWts sets the diagonal elements of the result to 775 6.0/(atomic number) so that the matrix can be used to calculate 776 Balaban J values. This does not affect the bond weights. 777 \param force forces calculation of the matrix, even if already 778 computed 779 \param propNamePrefix used to set the cached property name 780 781 \return the distance matrix. 782 783 <b>Notes</b> 784 - The result of this is cached in the molecule's local property 785 dictionary, which will handle deallocation. The caller should <b>not</b> \c 786 delete this pointer. 787 788 789 */ 790 RDKIT_GRAPHMOL_EXPORT double *getDistanceMat( 791 const ROMol &mol, bool useBO = false, bool useAtomWts = false, 792 bool force = false, const char *propNamePrefix = nullptr); 793 794 //! Computes the molecule's topological distance matrix 795 /*! 796 Uses the Floyd-Warshall all-pairs-shortest-paths algorithm. 797 798 \param mol the molecule of interest 799 \param activeAtoms only elements corresponding to these atom indices 800 will be included in the calculation 801 \param bonds only bonds found in this list will be included in the 802 calculation 803 \param useBO toggles use of bond orders in the matrix 804 \param useAtomWts sets the diagonal elements of the result to 805 6.0/(atomic number) so that the matrix can be used to calculate 806 Balaban J values. This does not affect the bond weights. 807 808 \return the distance matrix. 809 810 <b>Notes</b> 811 - The results of this call are not cached, the caller <b>should</b> \c 812 delete 813 this pointer. 814 815 816 */ 817 RDKIT_GRAPHMOL_EXPORT double *getDistanceMat( 818 const ROMol &mol, const std::vector<int> &activeAtoms, 819 const std::vector<const Bond *> &bonds, bool useBO = false, 820 bool useAtomWts = false); 821 822 //! Computes the molecule's 3D distance matrix 823 /*! 824 825 \param mol the molecule of interest 826 \param confId the conformer to use 827 \param useAtomWts sets the diagonal elements of the result to 828 6.0/(atomic number) 829 \param force forces calculation of the matrix, even if already 830 computed 831 \param propNamePrefix used to set the cached property name 832 (if set to an empty string, the matrix will not be 833 cached) 834 835 \return the distance matrix. 836 837 <b>Notes</b> 838 - If propNamePrefix is not empty the result of this is cached in the 839 molecule's local property dictionary, which will handle deallocation. 840 In other cases the caller is responsible for freeing the memory. 841 842 */ 843 RDKIT_GRAPHMOL_EXPORT double *get3DDistanceMat( 844 const ROMol &mol, int confId = -1, bool useAtomWts = false, 845 bool force = false, const char *propNamePrefix = nullptr); 846 //! Find the shortest path between two atoms 847 /*! 848 Uses the Bellman-Ford algorithm 849 850 \param mol molecule of interest 851 \param aid1 index of the first atom 852 \param aid2 index of the second atom 853 854 \return an std::list with the indices of the atoms along the shortest 855 path 856 857 <b>Notes:</b> 858 - the starting and end atoms are included in the path 859 - if no path is found, an empty path is returned 860 861 */ 862 RDKIT_GRAPHMOL_EXPORT std::list<int> getShortestPath(const ROMol &mol, int aid1, 863 int aid2); 864 865 //@} 866 867 //! \name Stereochemistry 868 //@{ 869 870 //! removes bogus chirality markers (those on non-sp3 centers): 871 RDKIT_GRAPHMOL_EXPORT void cleanupChirality(RWMol &mol); 872 873 //! \brief Uses a conformer to assign ChiralType to a molecule's atoms 874 /*! 875 \param mol the molecule of interest 876 \param confId the conformer to use 877 \param replaceExistingTags if this flag is true, any existing atomic chiral 878 tags will be replaced 879 880 If the conformer provided is not a 3D conformer, nothing will be done. 881 */ 882 RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFrom3D( 883 ROMol &mol, int confId = -1, bool replaceExistingTags = true); 884 885 //! \brief Uses a conformer to assign ChiralTypes to a molecule's atoms and 886 //! stereo flags to its bonds 887 /*! 888 889 \param mol the molecule of interest 890 \param confId the conformer to use 891 \param replaceExistingTags if this flag is true, any existing info about 892 stereochemistry will be replaced 893 894 If the conformer provided is not a 3D conformer, nothing will be done. 895 */ 896 RDKIT_GRAPHMOL_EXPORT void assignStereochemistryFrom3D( 897 ROMol &mol, int confId = -1, bool replaceExistingTags = true); 898 899 //! \brief Use bond directions to assign ChiralTypes to a molecule's atoms and 900 //! stereo flags to its bonds 901 /*! 902 903 \param mol the molecule of interest 904 \param confId the conformer to use 905 \param replaceExistingTags if this flag is true, any existing info about 906 stereochemistry will be replaced 907 */ 908 RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromBondDirs( 909 ROMol &mol, int confId = -1, bool replaceExistingTags = true); 910 911 //! \deprecated: this function will be removed in a future release. Use 912 //! setDoubleBondNeighborDirections() instead 913 RDKIT_GRAPHMOL_EXPORT void detectBondStereochemistry(ROMol &mol, 914 int confId = -1); 915 //! Sets bond directions based on double bond stereochemistry 916 RDKIT_GRAPHMOL_EXPORT void setDoubleBondNeighborDirections( 917 ROMol &mol, const Conformer *conf = nullptr); 918 919 //! Assign CIS/TRANS bond stereochemistry tags based on neighboring directions 920 RDKIT_GRAPHMOL_EXPORT void setBondStereoFromDirections(ROMol &mol); 921 922 //! Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E) 923 /*! 924 Does the CIP stereochemistry assignment for the molecule's atoms 925 (R/S) and double bond (Z/E). Chiral atoms will have a property 926 '_CIPCode' indicating their chiral code. 927 928 \param mol the molecule to use 929 \param cleanIt if true, any existing values of the property `_CIPCode` 930 will be cleared, atoms with a chiral specifier that aren't 931 actually chiral (e.g. atoms with duplicate 932 substituents or only 2 substituents, etc.) will have 933 their chiral code set to CHI_UNSPECIFIED. Bonds with 934 STEREOCIS/STEREOTRANS specified that have duplicate 935 substituents based upon the CIP atom ranks will be 936 marked STEREONONE. 937 \param force causes the calculation to be repeated even if it has 938 already been done 939 \param flagPossibleStereoCenters set the _ChiralityPossible property on 940 atoms that are possible stereocenters 941 942 <b>Notes:M</b> 943 - Throughout we assume that we're working with a hydrogen-suppressed 944 graph. 945 946 */ 947 RDKIT_GRAPHMOL_EXPORT void assignStereochemistry( 948 ROMol &mol, bool cleanIt = false, bool force = false, 949 bool flagPossibleStereoCenters = false); 950 //! Removes all stereochemistry information from atoms (i.e. R/S) and bonds 951 //(i.e. Z/E) 952 /*! 953 954 \param mol the molecule of interest 955 */ 956 RDKIT_GRAPHMOL_EXPORT void removeStereochemistry(ROMol &mol); 957 958 //! \brief finds bonds that could be cis/trans in a molecule and mark them as 959 //! Bond::STEREOANY. 960 /*! 961 \param mol the molecule of interest 962 \param cleanIt toggles removal of stereo flags from double bonds that can 963 not have stereochemistry 964 965 This function finds any double bonds that can potentially be part of 966 a cis/trans system. No attempt is made here to mark them cis or 967 trans. No attempt is made to detect double bond stereo in ring systems. 968 969 This function is useful in the following situations: 970 - when parsing a mol file; for the bonds marked here, coordinate 971 information on the neighbors can be used to indentify cis or trans states 972 - when writing a mol file; bonds that can be cis/trans but not marked as 973 either need to be specially marked in the mol file 974 - finding double bonds with unspecified stereochemistry so they 975 can be enumerated for downstream 3D tools 976 977 The CIPranks on the neighboring atoms are checked in this function. The 978 _CIPCode property if set to any on the double bond. 979 */ 980 RDKIT_GRAPHMOL_EXPORT void findPotentialStereoBonds(ROMol &mol, 981 bool cleanIt = false); 982 //! \brief Uses the molParity atom property to assign ChiralType to a molecule's 983 //! atoms 984 /*! 985 \param mol the molecule of interest 986 \param replaceExistingTags if this flag is true, any existing atomic chiral 987 tags will be replaced 988 */ 989 RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromMolParity( 990 ROMol &mol, bool replaceExistingTags = true); 991 992 //@} 993 994 //! returns the number of atoms which have a particular property set 995 RDKIT_GRAPHMOL_EXPORT unsigned getNumAtomsWithDistinctProperty( 996 const ROMol &mol, std::string prop); 997 998 //! returns whether or not a molecule needs to have Hs added to it. 999 RDKIT_GRAPHMOL_EXPORT bool needsHs(const ROMol &mol); 1000 1001 namespace details { 1002 //! not recommended for use in other code 1003 RDKIT_GRAPHMOL_EXPORT void KekulizeFragment( 1004 RWMol &mol, const boost::dynamic_bitset<> &atomsToUse, 1005 const boost::dynamic_bitset<> &bondsToUse, bool markAtomsBonds = true, 1006 unsigned int maxBackTracks = 100); 1007 } // namespace details 1008 1009 } // namespace MolOps 1010 } // namespace RDKit 1011 1012 #endif 1013