1 //
2 //  Copyright (C) 2001-2021 Greg Landrum and Rational Discovery LLC
3 //  Copyright (c) 2014, Novartis Institutes for BioMedical Research Inc.
4 //
5 //   @@ All Rights Reserved @@
6 //  This file is part of the RDKit.
7 //  The contents are covered by the terms of the BSD license
8 //  which is included in the file license.txt, found at the root
9 //  of the RDKit source tree.
10 //
11 #include <RDGeneral/export.h>
12 #ifndef _RD_MOL_OPS_H_
13 #define _RD_MOL_OPS_H_
14 
15 #include <vector>
16 #include <map>
17 #include <list>
18 #include <RDGeneral/BoostStartInclude.h>
19 #include <boost/smart_ptr.hpp>
20 #include <boost/dynamic_bitset.hpp>
21 #include <RDGeneral/BoostEndInclude.h>
22 #include <RDGeneral/types.h>
23 #include "SanitException.h"
24 
25 RDKIT_GRAPHMOL_EXPORT extern const int ci_LOCAL_INF;
26 namespace RDKit {
27 class ROMol;
28 class RWMol;
29 class Atom;
30 class Bond;
31 class Conformer;
32 typedef std::vector<double> INVAR_VECT;
33 typedef INVAR_VECT::iterator INVAR_VECT_I;
34 typedef INVAR_VECT::const_iterator INVAR_VECT_CI;
35 
36 //! \brief Groups a variety of molecular query and transformation operations.
37 namespace MolOps {
38 
39 //! return the number of electrons available on an atom to donate for
40 // aromaticity
41 /*!
42    The result is determined using the default valency, number of lone pairs,
43    number of bonds and the formal charge. Note that the atom may not donate
44    all of these electrons to a ring for aromaticity (also used in Conjugation
45    and hybridization code).
46 
47    \param at the atom of interest
48 
49    \return the number of electrons
50 */
51 RDKIT_GRAPHMOL_EXPORT int countAtomElec(const Atom *at);
52 
53 //! sums up all atomic formal charges and returns the result
54 RDKIT_GRAPHMOL_EXPORT int getFormalCharge(const ROMol &mol);
55 
56 //! returns whether or not the given Atom is involved in a conjugated bond
57 RDKIT_GRAPHMOL_EXPORT bool atomHasConjugatedBond(const Atom *at);
58 
59 //! find fragments (disconnected components of the molecular graph)
60 /*!
61 
62   \param mol     the molecule of interest
63   \param mapping used to return the mapping of Atoms->fragments.
64      On return \c mapping will be <tt>mol->getNumAtoms()</tt> long
65      and will contain the fragment assignment for each Atom
66 
67   \return the number of fragments found.
68 
69 */
70 RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(const ROMol &mol,
71                                                std::vector<int> &mapping);
72 //! find fragments (disconnected components of the molecular graph)
73 /*!
74 
75   \param mol    the molecule of interest
76   \param frags  used to return the Atoms in each fragment
77      On return \c mapping will be \c numFrags long, and each entry
78      will contain the indices of the Atoms in that fragment.
79 
80   \return the number of fragments found.
81 
82 */
83 RDKIT_GRAPHMOL_EXPORT unsigned int getMolFrags(
84     const ROMol &mol, std::vector<std::vector<int>> &frags);
85 
86 //! splits a molecule into its component fragments
87 //  (disconnected components of the molecular graph)
88 /*!
89 
90   \param mol     the molecule of interest
91   \param sanitizeFrags  toggles sanitization of the fragments after
92                         they are built
93   \param frags used to return the mapping of Atoms->fragments.
94      if provided, \c frags will be <tt>mol->getNumAtoms()</tt> long
95          on return and will contain the fragment assignment for each Atom
96   \param fragsMolAtomMapping  used to return the Atoms in each fragment
97      On return \c mapping will be \c numFrags long, and each entry
98      will contain the indices of the Atoms in that fragment.
99    \param copyConformers  toggles copying conformers of the fragments after
100                         they are built
101   \return a vector of the fragments as smart pointers to ROMols
102 
103 */
104 RDKIT_GRAPHMOL_EXPORT std::vector<boost::shared_ptr<ROMol>> getMolFrags(
105     const ROMol &mol, bool sanitizeFrags = true,
106     std::vector<int> *frags = nullptr,
107     std::vector<std::vector<int>> *fragsMolAtomMapping = nullptr,
108     bool copyConformers = true);
109 
110 //! splits a molecule into pieces based on labels assigned using a query
111 /*!
112 
113   \param mol     the molecule of interest
114   \param query   the query used to "label" the molecule for fragmentation
115   \param sanitizeFrags  toggles sanitization of the fragments after
116                         they are built
117   \param whiteList  if provided, only labels in the list will be kept
118   \param negateList if true, the white list logic will be inverted: only labels
119                     not in the list will be kept
120 
121   \return a map of the fragments and their labels
122 
123 */
124 template <typename T>
125 RDKIT_GRAPHMOL_EXPORT std::map<T, boost::shared_ptr<ROMol>>
126 getMolFragsWithQuery(const ROMol &mol, T (*query)(const ROMol &, const Atom *),
127                      bool sanitizeFrags = true,
128                      const std::vector<T> *whiteList = nullptr,
129                      bool negateList = false);
130 
131 #if 0
132     //! finds a molecule's minimum spanning tree (MST)
133     /*!
134       \param mol  the molecule of interest
135       \param mst  used to return the MST as a vector of bond indices
136     */
137     RDKIT_GRAPHMOL_EXPORT void findSpanningTree(const ROMol &mol,std::vector<int> &mst);
138 #endif
139 
140 //! calculates Balaban's J index for the molecule
141 /*!
142   \param mol      the molecule of interest
143   \param useBO    toggles inclusion of the bond order in the calculation
144                   (when false, we're not really calculating the J value)
145   \param force    forces the calculation (instead of using cached results)
146   \param bondPath when included, only paths using bonds whose indices occur
147                   in this vector will be included in the calculation
148   \param cacheIt  If this is true, the calculated value will be cached
149                   as a property on the molecule
150   \return the J index
151 
152 */
153 RDKIT_GRAPHMOL_EXPORT double computeBalabanJ(
154     const ROMol &mol, bool useBO = true, bool force = false,
155     const std::vector<int> *bondPath = nullptr, bool cacheIt = true);
156 //! \overload
157 RDKIT_GRAPHMOL_EXPORT double computeBalabanJ(double *distMat, int nb, int nAts);
158 
159 //! \name Dealing with hydrogens
160 //{@
161 
162 //! returns a copy of a molecule with hydrogens added in as explicit Atoms
163 /*!
164     \param mol          the molecule to add Hs to
165     \param explicitOnly (optional) if this \c true, only explicit Hs will be
166    added
167     \param addCoords    (optional) If this is true, estimates for the atomic
168    coordinates
169                 of the added Hs will be used.
170     \param onlyOnAtoms   (optional) if provided, this should be a vector of
171                 IDs of the atoms that will be considered for H addition.
172     \param addResidueInfo   (optional) if this is true, add residue info to
173                 hydrogen atoms (useful for PDB files).
174 
175     \return the new molecule
176 
177     <b>Notes:</b>
178        - it makes no sense to use the \c addCoords option if the molecule's
179    heavy
180          atoms don't already have coordinates.
181        - the caller is responsible for <tt>delete</tt>ing the pointer this
182    returns.
183  */
184 RDKIT_GRAPHMOL_EXPORT ROMol *addHs(const ROMol &mol, bool explicitOnly = false,
185                                    bool addCoords = false,
186                                    const UINT_VECT *onlyOnAtoms = nullptr,
187                                    bool addResidueInfo = false);
188 //! \overload
189 // modifies the molecule in place
190 RDKIT_GRAPHMOL_EXPORT void addHs(RWMol &mol, bool explicitOnly = false,
191                                  bool addCoords = false,
192                                  const UINT_VECT *onlyOnAtoms = nullptr,
193                                  bool addResidueInfo = false);
194 
195 //! Sets Cartesian coordinates for a terminal atom.
196 //! Useful for growing an atom off a molecule with sensible
197 //! coordinates based on the geometry of the neighbor.
198 /*!
199     NOTE: this sets appropriate coordinates in all of the molecule's conformers.
200     \param mol       the molecule the atoms belong to
201     \param idx       index of the terminal atom whose coordinates are set
202     \param otherIdx  index of the bonded neighbor atom
203 */
204 
205 RDKIT_GRAPHMOL_EXPORT void setTerminalAtomCoords(ROMol &mol, unsigned int idx,
206                                                  unsigned int otherIdx);
207 
208 //! returns a copy of a molecule with hydrogens removed
209 /*!
210     \param mol          the molecule to remove Hs from
211     \param implicitOnly (optional) if this \c true, only implicit Hs will be
212    removed
213     \param updateExplicitCount  (optional) If this is \c true, when explicit Hs
214    are removed
215          from the graph, the heavy atom to which they are bound will have its
216    counter of
217          explicit Hs increased.
218     \param sanitize:  (optional) If this is \c true, the final molecule will be
219    sanitized
220 
221     \return the new molecule
222 
223     <b>Notes:</b>
224        - Hydrogens which aren't connected to a heavy atom will not be
225          removed.  This prevents molecules like <tt>"[H][H]"</tt> from having
226          all atoms removed.
227        - Labelled hydrogen (e.g. atoms with atomic number=1, but mass > 1),
228          will not be removed.
229        - two coordinate Hs, like the central H in C[H-]C, will not be removed
230        - Hs connected to dummy atoms will not be removed
231        - Hs that are part of the definition of double bond Stereochemistry
232          will not be removed
233        - Hs that are not connected to anything else will not be removed
234        - Hs that have a query defined (i.e. hasQuery() returns true) will not
235          be removed
236 
237        - the caller is responsible for <tt>delete</tt>ing the pointer this
238    returns.
239 */
240 
241 RDKIT_GRAPHMOL_EXPORT ROMol *removeHs(const ROMol &mol,
242                                       bool implicitOnly = false,
243                                       bool updateExplicitCount = false,
244                                       bool sanitize = true);
245 //! \overload
246 // modifies the molecule in place
247 RDKIT_GRAPHMOL_EXPORT void removeHs(RWMol &mol, bool implicitOnly = false,
248                                     bool updateExplicitCount = false,
249                                     bool sanitize = true);
250 struct RDKIT_GRAPHMOL_EXPORT RemoveHsParameters {
251   bool removeDegreeZero = false;    /**< hydrogens that have no bonds */
252   bool removeHigherDegrees = false; /**< hydrogens with two (or more) bonds */
253   bool removeOnlyHNeighbors =
254       false; /**< hydrogens with bonds only to other hydrogens */
255   bool removeIsotopes = false; /**< hydrogens with non-default isotopes */
256   bool removeAndTrackIsotopes = false; /**< removes hydrogens with non-default
257    isotopes and keeps track of the heavy atom the isotopes were attached to in
258    the private _isotopicHs atom property, so they are re-added by AddHs() as the
259    original isotopes if possible*/
260   bool removeDummyNeighbors =
261       false; /**< hydrogens with at least one dummy-atom neighbor */
262   bool removeDefiningBondStereo =
263       false; /**< hydrogens defining bond stereochemistry */
264   bool removeWithWedgedBond = true; /**< hydrogens with wedged bonds to them */
265   bool removeWithQuery = false;     /**< hydrogens with queries defined */
266   bool removeMapped = true;         /**< mapped hydrogens */
267   bool removeInSGroups = false;     /**< part of a SubstanceGroup */
268   bool showWarnings = true; /**< display warnings for Hs that are not removed */
269   bool removeNonimplicit = true; /**< DEPRECATED equivalent of !implicitOnly */
270   bool updateExplicitCount =
271       false; /**< DEPRECATED equivalent of updateExplicitCount */
272   bool removeHydrides = true; /**< Removing Hydrides */
273 };
274 //! \overload
275 // modifies the molecule in place
276 RDKIT_GRAPHMOL_EXPORT void removeHs(RWMol &mol, const RemoveHsParameters &ps,
277                                     bool sanitize = true);
278 //! \overload
279 // The caller owns the pointer this returns
280 RDKIT_GRAPHMOL_EXPORT ROMol *removeHs(const ROMol &mol,
281                                       const RemoveHsParameters &ps,
282                                       bool sanitize = true);
283 
284 //! removes all Hs from a molecule
285 RDKIT_GRAPHMOL_EXPORT void removeAllHs(RWMol &mol, bool sanitize = true);
286 //! \overload
287 // The caller owns the pointer this returns
288 RDKIT_GRAPHMOL_EXPORT ROMol *removeAllHs(const ROMol &mol,
289                                          bool sanitize = true);
290 
291 //! returns a copy of a molecule with hydrogens removed and added as queries
292 //!  to the heavy atoms to which they are bound.
293 /*!
294   This is really intended to be used with molecules that contain QueryAtoms
295 
296   \param mol the molecule to remove Hs from
297 
298   \return the new molecule
299 
300   <b>Notes:</b>
301     - Atoms that do not already have hydrogen count queries will have one
302             added, other H-related queries will not be touched. Examples:
303           - C[H] -> [C;!H0]
304           - [C;H1][H] -> [C;H1]
305           - [C;H2][H] -> [C;H2]
306     - Hydrogens which aren't connected to a heavy atom will not be
307       removed.  This prevents molecules like <tt>"[H][H]"</tt> from having
308       all atoms removed.
309     - the caller is responsible for <tt>delete</tt>ing the pointer this
310   returns.
311     - By default all hydrogens are removed, however if
312       mergeUnmappedOnly is true, any hydrogen participating
313       in an atom map will be retained
314 
315 */
316 RDKIT_GRAPHMOL_EXPORT ROMol *mergeQueryHs(const ROMol &mol,
317                                           bool mergeUnmappedOnly = false);
318 //! \overload
319 // modifies the molecule in place
320 RDKIT_GRAPHMOL_EXPORT void mergeQueryHs(RWMol &mol,
321                                         bool mergeUnmappedOnly = false);
322 
323 typedef enum {
324   ADJUST_IGNORENONE = 0x0,
325   ADJUST_IGNORECHAINS = 0x1,
326   ADJUST_IGNORERINGS = 0x4,
327   ADJUST_IGNOREDUMMIES = 0x2,
328   ADJUST_IGNORENONDUMMIES = 0x8,
329   ADJUST_IGNOREMAPPED = 0x10,
330   ADJUST_IGNOREALL = 0xFFFFFFF
331 } AdjustQueryWhichFlags;
332 
333 //! Parameters controlling the behavior of MolOps::adjustQueryProperties
334 /*!
335 
336   Note that some of the options here are either directly contradictory or make
337   no sense when combined with each other. We generally assume that client code
338   is doing something sensible and don't attempt to detect possible conflicts or
339   problems.
340 
341 */
342 struct RDKIT_GRAPHMOL_EXPORT AdjustQueryParameters {
343   bool adjustDegree = true; /**< add degree queries */
344   std::uint32_t adjustDegreeFlags = ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS;
345 
346   bool adjustRingCount = false; /**< add ring-count queries */
347   std::uint32_t adjustRingCountFlags =
348       ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS;
349 
350   bool makeDummiesQueries = true; /**< convert dummy atoms without isotope
351                                 labels to any-atom queries */
352 
353   bool aromatizeIfPossible = true; /**< perceive and set aromaticity */
354 
355   bool makeBondsGeneric =
356       false; /**< convert bonds to generic queries (any bonds) */
357   std::uint32_t makeBondsGenericFlags = ADJUST_IGNORENONE;
358 
359   bool makeAtomsGeneric =
360       false; /**< convert atoms to generic queries (any atoms) */
361   std::uint32_t makeAtomsGenericFlags = ADJUST_IGNORENONE;
362 
363   bool adjustHeavyDegree = false; /**< adjust the heavy-atom degree instead of
364                                overall degree */
365   std::uint32_t adjustHeavyDegreeFlags =
366       ADJUST_IGNOREDUMMIES | ADJUST_IGNORECHAINS;
367 
368   bool adjustRingChain = false; /**< add ring-chain queries */
369   std::uint32_t adjustRingChainFlags = ADJUST_IGNORENONE;
370 
371   bool useStereoCareForBonds =
372       false; /**< remove stereochemistry info from double bonds that do not have
373                 the stereoCare property set */
374 
375   bool adjustConjugatedFiveRings =
376       false; /**< sets bond queries in conjugated five-rings to
377                 SINGLE|DOUBLE|AROMATIC */
378 
379   bool setMDLFiveRingAromaticity =
380       false; /**< uses the 5-ring aromaticity behavior of the (former) MDL
381                 software as documented in the Chemical Representation Guide */
382 
383   bool adjustSingleBondsToDegreeOneNeighbors =
384       false; /**<  sets single bonds between aromatic atoms and degree one
385                 neighbors to SINGLE|AROMATIC */
386 
387   bool adjustSingleBondsBetweenAromaticAtoms =
388       false; /**<  sets non-ring single bonds between two aromatic atoms to
389                 SINGLE|AROMATIC */
390   //! \brief returns an AdjustQueryParameters object with all adjustments
391   //! disabled
noAdjustmentsAdjustQueryParameters392   static AdjustQueryParameters noAdjustments() {
393     AdjustQueryParameters res;
394     res.adjustDegree = false;
395     res.makeDummiesQueries = false;
396     res.aromatizeIfPossible = false;
397     return res;
398   }
AdjustQueryParametersAdjustQueryParameters399   AdjustQueryParameters() {}
400 };
401 
402 //! updates an AdjustQueryParameters object from a JSON string
403 RDKIT_GRAPHMOL_EXPORT void parseAdjustQueryParametersFromJSON(
404     MolOps::AdjustQueryParameters &p, const std::string &json);
405 
406 //! returns a copy of a molecule with query properties adjusted
407 /*!
408   \param mol the molecule to adjust
409   \param params controls the adjustments made
410 
411   \return the new molecule, the caller owns the memory
412 */
413 RDKIT_GRAPHMOL_EXPORT ROMol *adjustQueryProperties(
414     const ROMol &mol, const AdjustQueryParameters *params = nullptr);
415 //! \overload
416 // modifies the molecule in place
417 RDKIT_GRAPHMOL_EXPORT void adjustQueryProperties(
418     RWMol &mol, const AdjustQueryParameters *params = nullptr);
419 
420 //! returns a copy of a molecule with the atoms renumbered
421 /*!
422 
423   \param mol the molecule to work with
424   \param newOrder the new ordering of the atoms (should be numAtoms long)
425      for example: if newOrder is [3,2,0,1], then atom 3 in the original
426      molecule will be atom 0 in the new one
427 
428   \return the new molecule
429 
430   <b>Notes:</b>
431     - the caller is responsible for <tt>delete</tt>ing the pointer this
432   returns.
433 
434 */
435 RDKIT_GRAPHMOL_EXPORT ROMol *renumberAtoms(
436     const ROMol &mol, const std::vector<unsigned int> &newOrder);
437 
438 //@}
439 
440 //! \name Sanitization
441 //@{
442 
443 typedef enum {
444   SANITIZE_NONE = 0x0,
445   SANITIZE_CLEANUP = 0x1,
446   SANITIZE_PROPERTIES = 0x2,
447   SANITIZE_SYMMRINGS = 0x4,
448   SANITIZE_KEKULIZE = 0x8,
449   SANITIZE_FINDRADICALS = 0x10,
450   SANITIZE_SETAROMATICITY = 0x20,
451   SANITIZE_SETCONJUGATION = 0x40,
452   SANITIZE_SETHYBRIDIZATION = 0x80,
453   SANITIZE_CLEANUPCHIRALITY = 0x100,
454   SANITIZE_ADJUSTHS = 0x200,
455   SANITIZE_ALL = 0xFFFFFFF
456 } SanitizeFlags;
457 
458 //! \brief carries out a collection of tasks for cleaning up a molecule and
459 // ensuring
460 //! that it makes "chemical sense"
461 /*!
462    This functions calls the following in sequence
463      -# MolOps::cleanUp()
464      -# mol.updatePropertyCache()
465      -# MolOps::symmetrizeSSSR()
466      -# MolOps::Kekulize()
467      -# MolOps::assignRadicals()
468      -# MolOps::setAromaticity()
469      -# MolOps::setConjugation()
470      -# MolOps::setHybridization()
471      -# MolOps::cleanupChirality()
472      -# MolOps::adjustHs()
473 
474    \param mol : the RWMol to be cleaned
475 
476    \param operationThatFailed : the first (if any) sanitization operation that
477                                 fails is set here.
478                                 The values are taken from the \c SanitizeFlags
479                                 enum. On success, the value is \c
480                                 SanitizeFlags::SANITIZE_NONE
481 
482    \param sanitizeOps : the bits here are used to set which sanitization
483                         operations are carried out. The elements of the \c
484                         SanitizeFlags enum define the operations.
485 
486    <b>Notes:</b>
487     - If there is a failure in the sanitization, a \c MolSanitizeException
488       will be thrown.
489     - in general the user of this function should cast the molecule following
490       this function to a ROMol, so that new atoms and bonds cannot be added to
491       the molecule and screw up the sanitizing that has been done here
492 */
493 RDKIT_GRAPHMOL_EXPORT void sanitizeMol(RWMol &mol,
494                                        unsigned int &operationThatFailed,
495                                        unsigned int sanitizeOps = SANITIZE_ALL);
496 //! \overload
497 RDKIT_GRAPHMOL_EXPORT void sanitizeMol(RWMol &mol);
498 
499 //! \brief Identifies chemistry problems (things that don't make chemical
500 //! sense) in a molecule
501 /*!
502    This functions uses the operations in sanitizeMol but does not change
503    the input structure and returns a list of the problems encountered instead
504    of stopping at the first failure,
505 
506    The problems this looks for come from the sanitization operations:
507      -# mol.updatePropertyCache()  : Unreasonable valences
508      -# MolOps::Kekulize()  : Unkekulizable ring systems, aromatic atoms not
509    in rings, aromatic bonds to non-aromatic atoms.
510 
511    \param mol : the ROMol to be cleaned
512 
513    \param sanitizeOps : the bits here are used to set which sanitization
514                         operations are carried out. The elements of the \c
515                         SanitizeFlags enum define the operations.
516 
517    \return a vector of \c MolSanitizeException values that indicate what
518            problems were encountered
519 
520 */
521 RDKIT_GRAPHMOL_EXPORT
522 std::vector<std::unique_ptr<MolSanitizeException>> detectChemistryProblems(
523     const ROMol &mol, unsigned int sanitizeOps = SANITIZE_ALL);
524 
525 //! Possible aromaticity models
526 /*!
527 - \c AROMATICITY_DEFAULT at the moment always uses \c AROMATICITY_RDKIT
528 - \c AROMATICITY_RDKIT is the standard RDKit model (as documented in the RDKit
529 Book)
530 - \c AROMATICITY_SIMPLE only considers 5- and 6-membered simple rings (it
531 does not consider the outer envelope of fused rings)
532 - \c AROMATICITY_MDL
533 - \c AROMATICITY_CUSTOM uses a caller-provided function
534 */
535 typedef enum {
536   AROMATICITY_DEFAULT = 0x0,  ///< future proofing
537   AROMATICITY_RDKIT = 0x1,
538   AROMATICITY_SIMPLE = 0x2,
539   AROMATICITY_MDL = 0x4,
540   AROMATICITY_CUSTOM = 0xFFFFFFF  ///< use a function
541 } AromaticityModel;
542 
543 //! Sets up the aromaticity for a molecule
544 /*!
545 
546   This is what happens here:
547      -# find all the simple rings by calling the findSSSR function
548      -# loop over all the Atoms in each ring and mark them if they are
549   candidates
550         for aromaticity. A ring atom is a candidate if it can spare electrons
551         to the ring and if it's from the first two rows of the periodic table.
552      -# based on the candidate atoms, mark the rings to be either candidates
553         or non-candidates. A ring is a candidate only if all its atoms are
554   candidates
555      -# apply Hueckel rule to each of the candidate rings to check if the ring
556   can be
557         aromatic
558 
559   \param mol the RWMol of interest
560   \param model the aromaticity model to use
561   \param func a custom function for assigning aromaticity (only used when
562   model=\c AROMATICITY_CUSTOM)
563 
564   \return >0 on success, <= 0 otherwise
565 
566   <b>Assumptions:</b>
567     - Kekulization has been done (i.e. \c MolOps::Kekulize() has already
568       been called)
569 
570 */
571 RDKIT_GRAPHMOL_EXPORT int setAromaticity(
572     RWMol &mol, AromaticityModel model = AROMATICITY_DEFAULT,
573     int (*func)(RWMol &) = nullptr);
574 
575 //! Designed to be called by the sanitizer to handle special cases before
576 // anything is done.
577 /*!
578 
579     Currently this:
580      - modifies nitro groups, so that the nitrogen does not have an
581    unreasonable valence of 5, as follows:
582          - the nitrogen gets a positive charge
583          - one of the oxygens gets a negative chage and the double bond to
584    this oxygen is changed to a single bond The net result is that nitro groups
585    can be counted on to be: \c "[N+](=O)[O-]"
586      - modifies halogen-oxygen containing species as follows:
587         \c [Cl,Br,I](=O)(=O)(=O)O -> [X+3]([O-])([O-])([O-])O
588         \c [Cl,Br,I](=O)(=O)O -> [X+3]([O-])([O-])O
589         \c [Cl,Br,I](=O)O -> [X+]([O-])O
590      - converts the substructure [N,C]=P(=O)-* to [N,C]=[P+](-[O-])-*
591 
592    \param mol    the molecule of interest
593 
594 */
595 RDKIT_GRAPHMOL_EXPORT void cleanUp(RWMol &mol);
596 
597 //! Called by the sanitizer to assign radical counts to atoms
598 RDKIT_GRAPHMOL_EXPORT void assignRadicals(RWMol &mol);
599 
600 //! adjust the number of implicit and explicit Hs for special cases
601 /*!
602 
603     Currently this:
604      - modifies aromatic nitrogens so that, when appropriate, they have an
605        explicit H marked (e.g. so that we get things like \c "c1cc[nH]cc1"
606 
607     \param mol    the molecule of interest
608 
609     <b>Assumptions</b>
610        - this is called after the molecule has been sanitized,
611          aromaticity has been perceived, and the implicit valence of
612          everything has been calculated.
613 
614 */
615 RDKIT_GRAPHMOL_EXPORT void adjustHs(RWMol &mol);
616 
617 //! Kekulizes the molecule
618 /*!
619 
620    \param mol             the molecule of interest
621    \param markAtomsBonds  if this is set to true, \c isAromatic boolean
622    settings on both the Bonds and Atoms are turned to false following the
623    Kekulization, otherwise they are left alone in their original state. \param
624    maxBackTracks   the maximum number of attempts at back-tracking. The
625    algorithm
626                           uses a back-tracking procedure to revisit a previous
627    setting of
628                           double bond if we hit a wall in the kekulization
629    process
630 
631    <b>Notes:</b>
632      - even if \c markAtomsBonds is \c false the \c BondType for all aromatic
633        bonds will be changed from \c RDKit::Bond::AROMATIC to \c
634    RDKit::Bond::SINGLE
635        or RDKit::Bond::DOUBLE during Kekulization.
636 
637 */
638 RDKIT_GRAPHMOL_EXPORT void Kekulize(RWMol &mol, bool markAtomsBonds = true,
639                                     unsigned int maxBackTracks = 100);
640 
641 //! flags the molecule's conjugated bonds
642 RDKIT_GRAPHMOL_EXPORT void setConjugation(ROMol &mol);
643 
644 //! calculates and sets the hybridization of all a molecule's Stoms
645 RDKIT_GRAPHMOL_EXPORT void setHybridization(ROMol &mol);
646 
647 // @}
648 
649 //! \name Ring finding and SSSR
650 //@{
651 
652 //! finds a molecule's Smallest Set of Smallest Rings
653 /*!
654   Currently this implements a modified form of Figueras algorithm
655     (JCICS - Vol. 36, No. 5, 1996, 986-991)
656 
657   \param mol the molecule of interest
658   \param res used to return the vector of rings. Each entry is a vector with
659       atom indices.  This information is also stored in the molecule's
660       RingInfo structure, so this argument is optional (see overload)
661 
662   \return number of smallest rings found
663 
664   Base algorithm:
665     - The original algorithm starts by finding representative degree 2
666       nodes.
667     - Representative because if a series of deg 2 nodes are found only
668       one of them is picked.
669     - The smallest ring around each of them is found.
670     - The bonds that connect to this degree 2 node are them chopped off,
671   yielding
672       new deg two nodes
673     - The process is repeated on the new deg 2 nodes.
674     - If no deg 2 nodes are found, a deg 3 node is picked. The smallest ring
675       with it is found. A bond from this is "carefully" (look in the paper)
676       selected and chopped, yielding deg 2 nodes. The process is same as
677       above once this is done.
678 
679   Our Modifications:
680     - If available, more than one smallest ring around a representative deg 2
681       node will be computed and stored
682     - Typically 3 rings are found around a degree 3 node (when no deg 2s are
683   available)
684       and all the bond to that node are chopped.
685     - The extra rings that were found in this process are removed after all
686   the nodes have been covered.
687 
688   These changes were motivated by several factors:
689     - We believe the original algorithm fails to find the correct SSSR
690       (finds the correct number of them but the wrong ones) on some sample
691   mols
692     - Since SSSR may not be unique, a post-SSSR step to symmetrize may be
693   done. The extra rings this process adds can be quite useful.
694 */
695 RDKIT_GRAPHMOL_EXPORT int findSSSR(const ROMol &mol,
696                                    std::vector<std::vector<int>> &res);
697 //! \overload
698 RDKIT_GRAPHMOL_EXPORT int findSSSR(
699     const ROMol &mol, std::vector<std::vector<int>> *res = nullptr);
700 
701 //! use a DFS algorithm to identify ring bonds and atoms in a molecule
702 /*!
703   \b NOTE: though the RingInfo structure is populated by this function,
704   the only really reliable calls that can be made are to check if
705   mol.getRingInfo().numAtomRings(idx) or mol.getRingInfo().numBondRings(idx)
706   return values >0
707 */
708 RDKIT_GRAPHMOL_EXPORT void fastFindRings(const ROMol &mol);
709 
710 RDKIT_GRAPHMOL_EXPORT void findRingFamilies(const ROMol &mol);
711 
712 //! symmetrize the molecule's Smallest Set of Smallest Rings
713 /*!
714    SSSR rings obatined from "findSSSR" can be non-unique in some case.
715    For example, cubane has five SSSR rings, not six as one would hope.
716 
717    This function adds additional rings to the SSSR list if necessary
718    to make the list symmetric, e.g. all atoms in cubane will be part of the
719   same number of SSSRs. This function choses these extra rings from the extra
720   rings computed and discarded during findSSSR. The new ring are chosen such
721   that:
722     - replacing a same sized ring in the SSSR list with an extra ring yields
723       the same union of bond IDs as the original SSSR list
724 
725   \param mol - the molecule of interest
726   \param res used to return the vector of rings. Each entry is a vector with
727       atom indices.  This information is also stored in the molecule's
728       RingInfo structure, so this argument is optional (see overload)
729 
730   \return the total number of rings = (new rings + old SSSRs)
731 
732   <b>Notes:</b>
733    - if no SSSR rings are found on the molecule - MolOps::findSSSR() is called
734   first
735 */
736 RDKIT_GRAPHMOL_EXPORT int symmetrizeSSSR(ROMol &mol,
737                                          std::vector<std::vector<int>> &res);
738 //! \overload
739 RDKIT_GRAPHMOL_EXPORT int symmetrizeSSSR(ROMol &mol);
740 
741 //@}
742 
743 //! \name Shortest paths and other matrices
744 //@{
745 
746 //! returns a molecule's adjacency matrix
747 /*!
748   \param mol             the molecule of interest
749   \param useBO           toggles use of bond orders in the matrix
750   \param emptyVal        sets the empty value (for non-adjacent atoms)
751   \param force           forces calculation of the matrix, even if already
752   computed
753   \param propNamePrefix  used to set the cached property name
754 
755   \return the adjacency matrix.
756 
757   <b>Notes</b>
758     - The result of this is cached in the molecule's local property
759   dictionary, which will handle deallocation. The caller should <b>not</b> \c
760   delete this pointer.
761 
762 */
763 RDKIT_GRAPHMOL_EXPORT double *getAdjacencyMatrix(
764     const ROMol &mol, bool useBO = false, int emptyVal = 0, bool force = false,
765     const char *propNamePrefix = nullptr,
766     const boost::dynamic_bitset<> *bondsToUse = nullptr);
767 
768 //! Computes the molecule's topological distance matrix
769 /*!
770    Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
771 
772   \param mol             the molecule of interest
773   \param useBO           toggles use of bond orders in the matrix
774   \param useAtomWts      sets the diagonal elements of the result to
775            6.0/(atomic number) so that the matrix can be used to calculate
776            Balaban J values.  This does not affect the bond weights.
777   \param force           forces calculation of the matrix, even if already
778   computed
779   \param propNamePrefix  used to set the cached property name
780 
781   \return the distance matrix.
782 
783   <b>Notes</b>
784     - The result of this is cached in the molecule's local property
785   dictionary, which will handle deallocation. The caller should <b>not</b> \c
786   delete this pointer.
787 
788 
789 */
790 RDKIT_GRAPHMOL_EXPORT double *getDistanceMat(
791     const ROMol &mol, bool useBO = false, bool useAtomWts = false,
792     bool force = false, const char *propNamePrefix = nullptr);
793 
794 //! Computes the molecule's topological distance matrix
795 /*!
796    Uses the Floyd-Warshall all-pairs-shortest-paths algorithm.
797 
798   \param mol             the molecule of interest
799   \param activeAtoms     only elements corresponding to these atom indices
800                          will be included in the calculation
801   \param bonds           only bonds found in this list will be included in the
802                          calculation
803   \param useBO           toggles use of bond orders in the matrix
804   \param useAtomWts      sets the diagonal elements of the result to
805            6.0/(atomic number) so that the matrix can be used to calculate
806            Balaban J values.  This does not affect the bond weights.
807 
808   \return the distance matrix.
809 
810   <b>Notes</b>
811     - The results of this call are not cached, the caller <b>should</b> \c
812   delete
813       this pointer.
814 
815 
816 */
817 RDKIT_GRAPHMOL_EXPORT double *getDistanceMat(
818     const ROMol &mol, const std::vector<int> &activeAtoms,
819     const std::vector<const Bond *> &bonds, bool useBO = false,
820     bool useAtomWts = false);
821 
822 //! Computes the molecule's 3D distance matrix
823 /*!
824 
825   \param mol             the molecule of interest
826   \param confId          the conformer to use
827   \param useAtomWts      sets the diagonal elements of the result to
828            6.0/(atomic number)
829   \param force           forces calculation of the matrix, even if already
830   computed
831   \param propNamePrefix  used to set the cached property name
832                          (if set to an empty string, the matrix will not be
833   cached)
834 
835   \return the distance matrix.
836 
837   <b>Notes</b>
838     - If propNamePrefix is not empty the result of this is cached in the
839       molecule's local property dictionary, which will handle deallocation.
840       In other cases the caller is responsible for freeing the memory.
841 
842 */
843 RDKIT_GRAPHMOL_EXPORT double *get3DDistanceMat(
844     const ROMol &mol, int confId = -1, bool useAtomWts = false,
845     bool force = false, const char *propNamePrefix = nullptr);
846 //! Find the shortest path between two atoms
847 /*!
848   Uses the Bellman-Ford algorithm
849 
850  \param mol  molecule of interest
851  \param aid1 index of the first atom
852  \param aid2 index of the second atom
853 
854  \return an std::list with the indices of the atoms along the shortest
855     path
856 
857  <b>Notes:</b>
858    - the starting and end atoms are included in the path
859    - if no path is found, an empty path is returned
860 
861 */
862 RDKIT_GRAPHMOL_EXPORT std::list<int> getShortestPath(const ROMol &mol, int aid1,
863                                                      int aid2);
864 
865 //@}
866 
867 //! \name Stereochemistry
868 //@{
869 
870 //! removes bogus chirality markers (those on non-sp3 centers):
871 RDKIT_GRAPHMOL_EXPORT void cleanupChirality(RWMol &mol);
872 
873 //! \brief Uses a conformer to assign ChiralType to a molecule's atoms
874 /*!
875   \param mol                  the molecule of interest
876   \param confId               the conformer to use
877   \param replaceExistingTags  if this flag is true, any existing atomic chiral
878                               tags will be replaced
879 
880   If the conformer provided is not a 3D conformer, nothing will be done.
881 */
882 RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFrom3D(
883     ROMol &mol, int confId = -1, bool replaceExistingTags = true);
884 
885 //! \brief Uses a conformer to assign ChiralTypes to a molecule's atoms and
886 //! stereo flags to its bonds
887 /*!
888 
889   \param mol                  the molecule of interest
890   \param confId               the conformer to use
891   \param replaceExistingTags  if this flag is true, any existing info about
892                               stereochemistry will be replaced
893 
894   If the conformer provided is not a 3D conformer, nothing will be done.
895 */
896 RDKIT_GRAPHMOL_EXPORT void assignStereochemistryFrom3D(
897     ROMol &mol, int confId = -1, bool replaceExistingTags = true);
898 
899 //! \brief Use bond directions to assign ChiralTypes to a molecule's atoms and
900 //! stereo flags to its bonds
901 /*!
902 
903   \param mol                  the molecule of interest
904   \param confId               the conformer to use
905   \param replaceExistingTags  if this flag is true, any existing info about
906                               stereochemistry will be replaced
907 */
908 RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromBondDirs(
909     ROMol &mol, int confId = -1, bool replaceExistingTags = true);
910 
911 //! \deprecated: this function will be removed in a future release. Use
912 //! setDoubleBondNeighborDirections() instead
913 RDKIT_GRAPHMOL_EXPORT void detectBondStereochemistry(ROMol &mol,
914                                                      int confId = -1);
915 //! Sets bond directions based on double bond stereochemistry
916 RDKIT_GRAPHMOL_EXPORT void setDoubleBondNeighborDirections(
917     ROMol &mol, const Conformer *conf = nullptr);
918 
919 //! Assign CIS/TRANS bond stereochemistry tags based on neighboring directions
920 RDKIT_GRAPHMOL_EXPORT void setBondStereoFromDirections(ROMol &mol);
921 
922 //! Assign stereochemistry tags to atoms (i.e. R/S) and bonds (i.e. Z/E)
923 /*!
924   Does the CIP stereochemistry assignment for the molecule's atoms
925   (R/S) and double bond (Z/E). Chiral atoms will have a property
926   '_CIPCode' indicating their chiral code.
927 
928   \param mol     the molecule to use
929   \param cleanIt if true, any existing values of the property `_CIPCode`
930                  will be cleared, atoms with a chiral specifier that aren't
931                  actually chiral (e.g. atoms with duplicate
932                  substituents or only 2 substituents, etc.) will have
933                  their chiral code set to CHI_UNSPECIFIED. Bonds with
934                  STEREOCIS/STEREOTRANS specified that have duplicate
935                  substituents based upon the CIP atom ranks will be
936                  marked STEREONONE.
937   \param force   causes the calculation to be repeated even if it has
938                  already been done
939   \param flagPossibleStereoCenters   set the _ChiralityPossible property on
940                                      atoms that are possible stereocenters
941 
942   <b>Notes:M</b>
943     - Throughout we assume that we're working with a hydrogen-suppressed
944       graph.
945 
946 */
947 RDKIT_GRAPHMOL_EXPORT void assignStereochemistry(
948     ROMol &mol, bool cleanIt = false, bool force = false,
949     bool flagPossibleStereoCenters = false);
950 //! Removes all stereochemistry information from atoms (i.e. R/S) and bonds
951 //(i.e. Z/E)
952 /*!
953 
954   \param mol     the molecule of interest
955 */
956 RDKIT_GRAPHMOL_EXPORT void removeStereochemistry(ROMol &mol);
957 
958 //! \brief finds bonds that could be cis/trans in a molecule and mark them as
959 //!  Bond::STEREOANY.
960 /*!
961   \param mol     the molecule of interest
962   \param cleanIt toggles removal of stereo flags from double bonds that can
963                  not have stereochemistry
964 
965   This function finds any double bonds that can potentially be part of
966   a cis/trans system. No attempt is made here to mark them cis or
967   trans. No attempt is made to detect double bond stereo in ring systems.
968 
969   This function is useful in the following situations:
970     - when parsing a mol file; for the bonds marked here, coordinate
971       information on the neighbors can be used to indentify cis or trans states
972     - when writing a mol file; bonds that can be cis/trans but not marked as
973       either need to be specially marked in the mol file
974     - finding double bonds with unspecified stereochemistry so they
975       can be enumerated for downstream 3D tools
976 
977   The CIPranks on the neighboring atoms are checked in this function. The
978   _CIPCode property if set to any on the double bond.
979 */
980 RDKIT_GRAPHMOL_EXPORT void findPotentialStereoBonds(ROMol &mol,
981                                                     bool cleanIt = false);
982 //! \brief Uses the molParity atom property to assign ChiralType to a molecule's
983 //! atoms
984 /*!
985   \param mol                  the molecule of interest
986   \param replaceExistingTags  if this flag is true, any existing atomic chiral
987                               tags will be replaced
988 */
989 RDKIT_GRAPHMOL_EXPORT void assignChiralTypesFromMolParity(
990     ROMol &mol, bool replaceExistingTags = true);
991 
992 //@}
993 
994 //! returns the number of atoms which have a particular property set
995 RDKIT_GRAPHMOL_EXPORT unsigned getNumAtomsWithDistinctProperty(
996     const ROMol &mol, std::string prop);
997 
998 //! returns whether or not a molecule needs to have Hs added to it.
999 RDKIT_GRAPHMOL_EXPORT bool needsHs(const ROMol &mol);
1000 
1001 namespace details {
1002 //! not recommended for use in other code
1003 RDKIT_GRAPHMOL_EXPORT void KekulizeFragment(
1004     RWMol &mol, const boost::dynamic_bitset<> &atomsToUse,
1005     const boost::dynamic_bitset<> &bondsToUse, bool markAtomsBonds = true,
1006     unsigned int maxBackTracks = 100);
1007 }  // namespace details
1008 
1009 }  // namespace MolOps
1010 }  // namespace RDKit
1011 
1012 #endif
1013