1 //
2 //  Copyright (c) 2010-2013, Novartis Institutes for BioMedical Research Inc.
3 //  All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 //       notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 //       copyright notice, this list of conditions and the following
13 //       disclaimer in the documentation and/or other materials provided
14 //       with the distribution.
15 //     * Neither the name of Novartis Institutes for BioMedical Research Inc.
16 //       nor the names of its contributors may be used to endorse or promote
17 //       products derived from this software without specific prior written
18 //       permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 #include <GraphMol/RDKitBase.h>
33 #include <GraphMol/MolPickler.h>
34 #include <GraphMol/ChemReactions/ReactionPickler.h>
35 #include <GraphMol/ChemReactions/ReactionParser.h>
36 #include <GraphMol/ChemReactions/Reaction.h>
37 #include <GraphMol/SmilesParse/SmilesParse.h>
38 #include <GraphMol/SmilesParse/SmartsWrite.h>
39 #include <GraphMol/SmilesParse/SmilesWrite.h>
40 #include <GraphMol/Fingerprints/Fingerprints.h>
41 #include <GraphMol/FileParsers/FileParsers.h>
42 #include <GraphMol/Depictor/RDDepictor.h>
43 #include <GraphMol/Fingerprints/AtomPairs.h>
44 #include <GraphMol/Fingerprints/MorganFingerprints.h>
45 #include <GraphMol/Fingerprints/MACCS.h>
46 #include <GraphMol/Substruct/SubstructMatch.h>
47 #include <GraphMol/Descriptors/MolDescriptors.h>
48 #include <GraphMol/ChemTransforms/ChemTransforms.h>
49 #include <GraphMol/MolHash/MolHash.h>
50 #include <GraphMol/FMCS/FMCS.h>
51 #include <DataStructs/BitOps.h>
52 #include <DataStructs/SparseIntVect.h>
53 #include <GraphMol/MolDraw2D/MolDraw2D.h>
54 #include <GraphMol/MolDraw2D/MolDraw2DSVG.h>
55 #include <GraphMol/MolDraw2D/MolDraw2DUtils.h>
56 
57 #include <RDGeneral/BoostStartInclude.h>
58 #include <boost/integer_traits.hpp>
59 #include <boost/property_tree/ptree.hpp>
60 #include <boost/property_tree/json_parser.hpp>
61 #include <boost/algorithm/string.hpp>
62 #include <boost/tokenizer.hpp>
63 #include <RDGeneral/BoostEndInclude.h>
64 
65 #ifdef RDK_BUILD_INCHI_SUPPORT
66 #include <INCHI-API/inchi.h>
67 #endif
68 #ifdef RDK_BUILD_AVALON_SUPPORT
69 #include <AvalonTools/AvalonTools.h>
70 #endif
71 #include <GraphMol/ChemReactions/ReactionFingerprints.h>
72 #include <GraphMol/ChemReactions/ReactionUtils.h>
73 
74 #include "rdkit.h"
75 #include "guc.h"
76 #include "bitstring.h"
77 
78 using namespace std;
79 using namespace RDKit;
80 
81 class ByteA : public std::string {
82  public:
ByteA()83   ByteA() : string(){};
ByteA(bytea * b)84   ByteA(bytea *b) : string(VARDATA(b), VARSIZE(b) - VARHDRSZ){};
ByteA(string & s)85   ByteA(string &s) : string(s){};
86 
87   /*
88    * Convert string to bytea. Convertaion is in pgsql's memory
89    */
toByteA()90   bytea *toByteA() {
91     bytea *res;
92     int len;
93 
94     len = this->size();
95     res = (bytea *)palloc(VARHDRSZ + len);
96     memcpy(VARDATA(res), this->data(), len);
97     SET_VARSIZE(res, VARHDRSZ + len);
98 
99     return res;
100   };
101 
102   /* Just the copy of string's method */
operator =(const string & __str)103   ByteA &operator=(const string &__str) {
104     return (ByteA &)this->assign(__str);
105   };
106 };
107 
108 /*
109  * Constant io
110  */
111 static string StringData;
112 
113 /*
114  * Real sparse vector
115  */
116 
117 typedef SparseIntVect<std::uint32_t> SparseFP;
118 
119 /*******************************************
120  *        ROMol transformation             *
121  *******************************************/
122 
freeCROMol(CROMol data)123 extern "C" void freeCROMol(CROMol data) {
124   auto *mol = (ROMol *)data;
125   delete mol;
126 }
127 
constructROMol(Mol * data)128 extern "C" CROMol constructROMol(Mol *data) {
129   auto *mol = new ROMol();
130 
131   try {
132     ByteA b(data);
133     MolPickler::molFromPickle(b, mol);
134   } catch (MolPicklerException &e) {
135     elog(ERROR, "molFromPickle: %s", e.what());
136   } catch (...) {
137     elog(ERROR, "constructROMol: Unknown exception");
138   }
139 
140   return (CROMol)mol;
141 }
142 
deconstructROMol(CROMol data)143 extern "C" Mol *deconstructROMol(CROMol data) {
144   auto *mol = (ROMol *)data;
145   ByteA b;
146 
147   try {
148     MolPickler::pickleMol(mol, b);
149   } catch (MolPicklerException &e) {
150     elog(ERROR, "pickleMol: %s", e.what());
151   } catch (...) {
152     elog(ERROR, "deconstructROMol: Unknown exception");
153   }
154 
155   return (Mol *)b.toByteA();
156 }
157 
parseMolText(char * data,bool asSmarts,bool warnOnFail,bool asQuery)158 extern "C" CROMol parseMolText(char *data, bool asSmarts, bool warnOnFail,
159                                bool asQuery) {
160   RWMol *mol = nullptr;
161 
162   try {
163     if (!asSmarts) {
164       if (!asQuery) {
165         mol = SmilesToMol(data);
166       } else {
167         mol = SmilesToMol(data, 0, false);
168         if (mol != nullptr) {
169           MolOps::sanitizeMol(*mol);
170           MolOps::mergeQueryHs(*mol);
171         }
172       }
173     } else {
174       mol = SmartsToMol(data, 0, false);
175     }
176   } catch (...) {
177     mol = nullptr;
178   }
179   if (mol == nullptr) {
180     if (warnOnFail) {
181       ereport(WARNING,
182               (errcode(ERRCODE_WARNING),
183                errmsg("could not create molecule from SMILES '%s'", data)));
184     } else {
185       ereport(ERROR,
186               (errcode(ERRCODE_DATA_EXCEPTION),
187                errmsg("could not create molecule from SMILES '%s'", data)));
188     }
189   }
190 
191   return (CROMol)mol;
192 }
193 
parseMolBlob(char * data,int len)194 extern "C" CROMol parseMolBlob(char *data, int len) {
195   ROMol *mol = nullptr;
196 
197   try {
198     string binStr(data, len);
199     mol = new ROMol(binStr);
200   } catch (...) {
201     ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION),
202                     errmsg("problem generating molecule from blob data")));
203   }
204   if (mol == nullptr) {
205     ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION),
206                     errmsg("blob data could not be parsed")));
207   }
208 
209   return (CROMol)mol;
210 }
211 
parseMolCTAB(char * data,bool keepConformer,bool warnOnFail,bool asQuery)212 extern "C" CROMol parseMolCTAB(char *data, bool keepConformer, bool warnOnFail,
213                                bool asQuery) {
214   RWMol *mol = nullptr;
215 
216   try {
217     if (!asQuery) {
218       mol = MolBlockToMol(data);
219     } else {
220       mol = MolBlockToMol(data, true, false);
221       if (mol != nullptr) {
222         MolOps::mergeQueryHs(*mol);
223       }
224     }
225   } catch (...) {
226     mol = nullptr;
227   }
228   if (mol == nullptr) {
229     if (warnOnFail) {
230       ereport(WARNING,
231               (errcode(ERRCODE_WARNING),
232                errmsg("could not create molecule from CTAB '%s'", data)));
233 
234     } else {
235       ereport(ERROR,
236               (errcode(ERRCODE_DATA_EXCEPTION),
237                errmsg("could not create molecule from CTAB '%s'", data)));
238     }
239   } else {
240     if (!keepConformer) {
241       mol->clearConformers();
242     }
243   }
244 
245   return (CROMol)mol;
246 }
247 
isValidSmiles(char * data)248 extern "C" bool isValidSmiles(char *data) {
249   RWMol *mol = nullptr;
250   bool res;
251   try {
252     string str(data);
253     if (str.empty()) {
254       // Pass the test - No-Structure input is allowed. No cleanup necessary.
255       return true;
256     }
257     mol = SmilesToMol(str, 0, 0);
258     if (mol) {
259       MolOps::cleanUp(*mol);
260       mol->updatePropertyCache();
261       MolOps::Kekulize(*mol);
262       MolOps::assignRadicals(*mol);
263       MolOps::setAromaticity(*mol);
264       MolOps::adjustHs(*mol);
265     }
266   } catch (...) {
267     mol = nullptr;
268   }
269   if (mol == nullptr) {
270     res = false;
271   } else {
272     res = true;
273     delete mol;
274   }
275   return res;
276 }
277 
isValidSmarts(char * data)278 extern "C" bool isValidSmarts(char *data) {
279   ROMol *mol = nullptr;
280   bool res;
281   try {
282     mol = SmartsToMol(data);
283   } catch (...) {
284     mol = nullptr;
285   }
286   if (mol == nullptr) {
287     res = false;
288   } else {
289     res = true;
290     delete mol;
291   }
292   return res;
293 }
294 
isValidCTAB(char * data)295 extern "C" bool isValidCTAB(char *data) {
296   RWMol *mol = nullptr;
297   bool res;
298   try {
299     mol = MolBlockToMol(data, false, false);
300     if (mol) {
301       MolOps::cleanUp(*mol);
302       mol->updatePropertyCache();
303       MolOps::Kekulize(*mol);
304       MolOps::assignRadicals(*mol);
305       MolOps::setAromaticity(*mol);
306       MolOps::adjustHs(*mol);
307     }
308   } catch (...) {
309     mol = nullptr;
310   }
311   if (mol == nullptr) {
312     res = false;
313   } else {
314     res = true;
315     delete mol;
316   }
317   return res;
318 }
319 
isValidMolBlob(char * data,int len)320 extern "C" bool isValidMolBlob(char *data, int len) {
321   ROMol *mol = nullptr;
322   bool res = false;
323   try {
324     string binStr(data, len);
325     mol = new ROMol(binStr);
326   } catch (...) {
327     mol = nullptr;
328   }
329   if (mol == nullptr) {
330     res = false;
331   } else {
332     delete mol;
333     res = true;
334   }
335   return res;
336 }
337 
makeMolText(CROMol data,int * len,bool asSmarts,bool cxSmiles)338 extern "C" char *makeMolText(CROMol data, int *len, bool asSmarts,
339                              bool cxSmiles) {
340   auto *mol = (ROMol *)data;
341 
342   try {
343     if (!asSmarts) {
344       if (!cxSmiles) {
345         StringData = MolToSmiles(*mol);
346       } else {
347         StringData = MolToCXSmiles(*mol);
348       }
349     } else {
350       StringData = MolToSmarts(*mol, false);
351     }
352   } catch (...) {
353     ereport(
354         WARNING,
355         (errcode(ERRCODE_WARNING),
356          errmsg("makeMolText: problems converting molecule to SMILES/SMARTS")));
357     StringData = "";
358   }
359 
360   *len = StringData.size();
361   return (char *)StringData.c_str();
362 }
363 
makeCtabText(CROMol data,int * len,bool createDepictionIfMissing)364 extern "C" char *makeCtabText(CROMol data, int *len,
365                               bool createDepictionIfMissing) {
366   auto *mol = (ROMol *)data;
367 
368   try {
369     if (createDepictionIfMissing && mol->getNumConformers() == 0) {
370       RDDepict::compute2DCoords(*mol);
371     }
372     StringData = MolToMolBlock(*mol);
373   } catch (...) {
374     ereport(WARNING,
375             (errcode(ERRCODE_WARNING),
376              errmsg("makeCtabText: problems converting molecule to CTAB")));
377     StringData = "";
378   }
379 
380   *len = StringData.size();
381   return (char *)StringData.c_str();
382 }
383 
makeMolBlob(CROMol data,int * len)384 extern "C" char *makeMolBlob(CROMol data, int *len) {
385   auto *mol = (ROMol *)data;
386   StringData.clear();
387   try {
388     MolPickler::pickleMol(*mol, StringData);
389   } catch (...) {
390     elog(ERROR, "makeMolBlob: Unknown exception");
391   }
392 
393   *len = StringData.size();
394   return (char *)StringData.data();
395 }
396 
makeMolSignature(CROMol data)397 extern "C" bytea *makeMolSignature(CROMol data) {
398   auto *mol = (ROMol *)data;
399   ExplicitBitVect *res = nullptr;
400   bytea *ret = nullptr;
401 
402   try {
403     res = RDKit::PatternFingerprintMol(*mol, getSubstructFpSize());
404     // res =
405     // RDKit::LayeredFingerprintMol(*mol,RDKit::substructLayers,1,5,SSS_FP_SIZE);
406 
407     if (res) {
408       std::string sres = BitVectToBinaryText(*res);
409 
410       unsigned int varsize = VARHDRSZ + sres.size();
411       ret = (bytea *)palloc0(varsize);
412       memcpy(VARDATA(ret), sres.data(), sres.size());
413       SET_VARSIZE(ret, varsize);
414 
415       delete res;
416       res = nullptr;
417     }
418   } catch (...) {
419     elog(ERROR, "makeMolSignature: Unknown exception");
420     if (res) {
421       delete res;
422     }
423   }
424 
425   return ret;
426 }
427 
molcmp(CROMol i,CROMol a)428 extern "C" int molcmp(CROMol i, CROMol a) {
429   auto *im = (ROMol *)i;
430   auto *am = (ROMol *)a;
431 
432   if (!im) {
433     if (!am) {
434       return 0;
435     }
436     return -1;
437   }
438   if (!am) {
439     return 1;
440   }
441 
442   int res = im->getNumAtoms() - am->getNumAtoms();
443   if (res) {
444     return res;
445   }
446 
447   res = im->getNumBonds() - am->getNumBonds();
448   if (res) {
449     return res;
450   }
451 
452   res = int(RDKit::Descriptors::calcAMW(*im, false)) -
453         int(RDKit::Descriptors::calcAMW(*am, false));
454   if (res) {
455     return res;
456   }
457 
458   res = im->getRingInfo()->numRings() - am->getRingInfo()->numRings();
459   if (res) {
460     return res;
461   }
462 
463   RDKit::MatchVectType matchVect;
464   bool recursionPossible = false;
465   bool doChiralMatch = getDoChiralSSS();
466   bool ss1 = RDKit::SubstructMatch(*im, *am, matchVect, recursionPossible,
467                                    doChiralMatch);
468   bool ss2 = RDKit::SubstructMatch(*am, *im, matchVect, recursionPossible,
469                                    doChiralMatch);
470   if (ss1 && !ss2) {
471     return 1;
472   } else if (!ss1 && ss2) {
473     return -1;
474   }
475 
476   // the above can still fail in some chirality cases
477   std::string smi1 = MolToSmiles(*im, doChiralMatch);
478   std::string smi2 = MolToSmiles(*am, doChiralMatch);
479   return smi1 == smi2 ? 0 : (smi1 < smi2 ? -1 : 1);
480 }
481 
MolSubstruct(CROMol i,CROMol a)482 extern "C" int MolSubstruct(CROMol i, CROMol a) {
483   auto *im = (ROMol *)i;
484   auto *am = (ROMol *)a;
485   RDKit::MatchVectType matchVect;
486 
487   return RDKit::SubstructMatch(*im, *am, matchVect, true, getDoChiralSSS());
488 }
489 
MolSubstructCount(CROMol i,CROMol a,bool uniquify)490 extern "C" int MolSubstructCount(CROMol i, CROMol a, bool uniquify) {
491   auto *im = (ROMol *)i;
492   auto *am = (ROMol *)a;
493   std::vector<RDKit::MatchVectType> matchVect;
494 
495   return static_cast<int>(RDKit::SubstructMatch(*im, *am, matchVect, uniquify,
496                                                 true, getDoChiralSSS()));
497 }
498 
499 /*******************************************
500  *     Molecule operations                 *
501  *******************************************/
502 #define MOLDESCR(name, func, ret)      \
503   extern "C" ret Mol##name(CROMol i) { \
504     const ROMol *im = (ROMol *)i;      \
505     return func(*im);                  \
506   }
MOLDESCR(FractionCSP3,RDKit::Descriptors::calcFractionCSP3,double)507 MOLDESCR(FractionCSP3, RDKit::Descriptors::calcFractionCSP3, double)
508 MOLDESCR(TPSA, RDKit::Descriptors::calcTPSA, double)
509 MOLDESCR(AMW, RDKit::Descriptors::calcAMW, double)
510 MOLDESCR(HBA, RDKit::Descriptors::calcLipinskiHBA, int)
511 MOLDESCR(HBD, RDKit::Descriptors::calcLipinskiHBD, int)
512 MOLDESCR(NumHeteroatoms, RDKit::Descriptors::calcNumHeteroatoms, int)
513 MOLDESCR(NumRings, RDKit::Descriptors::calcNumRings, int)
514 MOLDESCR(NumAromaticRings, RDKit::Descriptors::calcNumAromaticRings, int)
515 MOLDESCR(NumAliphaticRings, RDKit::Descriptors::calcNumAliphaticRings, int)
516 MOLDESCR(NumSaturatedRings, RDKit::Descriptors::calcNumSaturatedRings, int)
517 MOLDESCR(NumAromaticHeterocycles,
518          RDKit::Descriptors::calcNumAromaticHeterocycles, int)
519 MOLDESCR(NumAliphaticHeterocycles,
520          RDKit::Descriptors::calcNumAliphaticHeterocycles, int)
521 MOLDESCR(NumSaturatedHeterocycles,
522          RDKit::Descriptors::calcNumSaturatedHeterocycles, int)
523 MOLDESCR(NumAromaticCarbocycles, RDKit::Descriptors::calcNumAromaticCarbocycles,
524          int)
525 MOLDESCR(NumAliphaticCarbocycles,
526          RDKit::Descriptors::calcNumAliphaticCarbocycles, int)
527 MOLDESCR(NumSaturatedCarbocycles,
528          RDKit::Descriptors::calcNumSaturatedCarbocycles, int)
529 MOLDESCR(NumHeterocycles, RDKit::Descriptors::calcNumHeterocycles, int)
530 
531 MOLDESCR(NumRotatableBonds, RDKit::Descriptors::calcNumRotatableBonds, int)
532 MOLDESCR(Chi0v, RDKit::Descriptors::calcChi0v, double)
533 MOLDESCR(Chi1v, RDKit::Descriptors::calcChi1v, double)
534 MOLDESCR(Chi2v, RDKit::Descriptors::calcChi2v, double)
535 MOLDESCR(Chi3v, RDKit::Descriptors::calcChi3v, double)
536 MOLDESCR(Chi4v, RDKit::Descriptors::calcChi4v, double)
537 MOLDESCR(Chi0n, RDKit::Descriptors::calcChi0n, double)
538 MOLDESCR(Chi1n, RDKit::Descriptors::calcChi1n, double)
539 MOLDESCR(Chi2n, RDKit::Descriptors::calcChi2n, double)
540 MOLDESCR(Chi3n, RDKit::Descriptors::calcChi3n, double)
541 MOLDESCR(Chi4n, RDKit::Descriptors::calcChi4n, double)
542 MOLDESCR(Kappa1, RDKit::Descriptors::calcKappa1, double)
543 MOLDESCR(Kappa2, RDKit::Descriptors::calcKappa2, double)
544 MOLDESCR(Kappa3, RDKit::Descriptors::calcKappa3, double)
545 MOLDESCR(NumSpiroAtoms, RDKit::Descriptors::calcNumSpiroAtoms, int)
546 MOLDESCR(NumBridgeheadAtoms, RDKit::Descriptors::calcNumBridgeheadAtoms, int)
547 
548 extern "C" double MolLogP(CROMol i) {
549   double logp, mr;
550   RDKit::Descriptors::calcCrippenDescriptors(*(ROMol *)i, logp, mr);
551   return logp;
552 }
MolNumAtoms(CROMol i)553 extern "C" int MolNumAtoms(CROMol i) {
554   const ROMol *im = (ROMol *)i;
555   return im->getNumAtoms(false);
556 }
MolNumHeavyAtoms(CROMol i)557 extern "C" int MolNumHeavyAtoms(CROMol i) {
558   const ROMol *im = (ROMol *)i;
559   return im->getNumHeavyAtoms();
560 }
561 
makeMolFormulaText(CROMol data,int * len,bool separateIsotopes,bool abbreviateHIsotopes)562 extern "C" char *makeMolFormulaText(CROMol data, int *len,
563                                     bool separateIsotopes,
564                                     bool abbreviateHIsotopes) {
565   auto *mol = (ROMol *)data;
566 
567   try {
568     StringData = RDKit::Descriptors::calcMolFormula(*mol, separateIsotopes,
569                                                     abbreviateHIsotopes);
570   } catch (...) {
571     ereport(WARNING,
572             (errcode(ERRCODE_WARNING),
573              errmsg("makeMolFormulaText: problems converting molecule to "
574                     "sum formula")));
575     StringData = "";
576   }
577 
578   *len = StringData.size();
579   return (char *)StringData.c_str();
580 }
581 
MolInchi(CROMol i,const char * opts)582 extern "C" const char *MolInchi(CROMol i, const char *opts) {
583   std::string inchi = "InChI not available";
584 #ifdef RDK_BUILD_INCHI_SUPPORT
585   const ROMol *im = (ROMol *)i;
586   ExtraInchiReturnValues rv;
587   try {
588     std::string sopts = "/AuxNone /WarnOnEmptyStructure";
589     if (strlen(opts)) {
590       sopts += std::string(" ") + std::string(opts);
591     }
592     inchi = MolToInchi(*im, rv, sopts.c_str());
593   } catch (MolSanitizeException &e) {
594     inchi = "";
595     elog(ERROR, "MolInchi: cannot kekulize molecule");
596   } catch (...) {
597     inchi = "";
598     elog(ERROR, "MolInchi: Unknown exception");
599   }
600 #endif
601   return strdup(inchi.c_str());
602 }
MolInchiKey(CROMol i,const char * opts)603 extern "C" const char *MolInchiKey(CROMol i, const char *opts) {
604   std::string key = "InChI not available";
605 #ifdef RDK_BUILD_INCHI_SUPPORT
606   const ROMol *im = (ROMol *)i;
607   ExtraInchiReturnValues rv;
608   try {
609     std::string sopts = "/AuxNone /WarnOnEmptyStructure";
610     if (strlen(opts)) {
611       sopts += std::string(" ") + std::string(opts);
612     }
613     std::string inchi = MolToInchi(*im, rv, sopts.c_str());
614     key = InchiToInchiKey(inchi);
615   } catch (MolSanitizeException &e) {
616     key = "";
617     elog(ERROR, "MolInchiKey: cannot kekulize molecule");
618   } catch (...) {
619     key = "";
620     elog(ERROR, "MolInchiKey: Unknown exception");
621   }
622 #endif
623   return strdup(key.c_str());
624 }
625 
MolMurckoScaffold(CROMol i)626 extern "C" CROMol MolMurckoScaffold(CROMol i) {
627   const ROMol *im = (ROMol *)i;
628   ROMol *mol = MurckoDecompose(*im);
629   if (mol && !mol->getNumAtoms()) {
630     delete mol;
631     mol = nullptr;
632   } else {
633     try {
634       MolOps::sanitizeMol(*(RWMol *)mol);
635     } catch (...) {
636       delete mol;
637       mol = nullptr;
638     }
639   }
640   return (CROMol)mol;
641 }
642 
MolAdjustQueryProperties(CROMol i,const char * params)643 extern "C" CROMol MolAdjustQueryProperties(CROMol i, const char *params) {
644   const ROMol *im = (ROMol *)i;
645 
646   MolOps::AdjustQueryParameters p;
647 
648   if (params && strlen(params)) {
649     std::string pstring(params);
650     try {
651       MolOps::parseAdjustQueryParametersFromJSON(p, pstring);
652     } catch (const ValueErrorException &e) {
653       elog(ERROR, e.what());
654     } catch (...) {
655       elog(WARNING,
656            "adjustQueryProperties: Invalid argument \'params\' ignored");
657     }
658   }
659   ROMol *mol = MolOps::adjustQueryProperties(*im, &p);
660   return (CROMol)mol;
661 }
662 
MolGetSVG(CROMol i,unsigned int w,unsigned int h,const char * legend,const char * params)663 extern "C" char *MolGetSVG(CROMol i, unsigned int w, unsigned int h,
664                            const char *legend, const char *params) {
665   // SVG routines need an RWMol since they change the
666   // molecule as they prepare it for drawing. We don't
667   // want a plain SQL function (mol_to_svg) to have
668   // unexpected side effects, so take a copy and render
669   // (and change) that.
670   RWMol input_copy(*(ROMol *)i);
671 
672   MolDraw2DUtils::prepareMolForDrawing(input_copy);
673   std::string slegend(legend ? legend : "");
674   MolDraw2DSVG drawer(w, h);
675   if (params && strlen(params)) {
676     try {
677       MolDraw2DUtils::updateDrawerParamsFromJSON(drawer, params);
678     } catch (...) {
679       elog(WARNING,
680            "adjustQueryProperties: Invalid argument \'params\' ignored");
681     }
682   }
683   drawer.drawMolecule(input_copy, legend);
684   drawer.finishDrawing();
685   std::string txt = drawer.getDrawingText();
686   return strdup(txt.c_str());
687 }
688 
ReactionGetSVG(CChemicalReaction i,unsigned int w,unsigned int h,bool highlightByReactant,const char * params)689 extern "C" char *ReactionGetSVG(CChemicalReaction i, unsigned int w,
690                                 unsigned int h, bool highlightByReactant,
691                                 const char *params) {
692   auto *rxn = (ChemicalReaction *)i;
693 
694   MolDraw2DSVG drawer(w, h);
695   if (params && strlen(params)) {
696     try {
697       MolDraw2DUtils::updateDrawerParamsFromJSON(drawer, params);
698     } catch (...) {
699       elog(WARNING,
700            "adjustQueryProperties: Invalid argument \'params\' ignored");
701     }
702   }
703   drawer.drawReaction(*rxn, highlightByReactant);
704   drawer.finishDrawing();
705   std::string txt = drawer.getDrawingText();
706   return strdup(txt.c_str());
707 }
708 
709 /*******************************************
710  *     CBfp transformation                 *
711  *******************************************/
712 
freeCBfp(CBfp data)713 extern "C" void freeCBfp(CBfp data) {
714   auto *fp = (std::string *)data;
715   delete fp;
716 }
717 
constructCBfp(Bfp * data)718 extern "C" CBfp constructCBfp(Bfp *data) {
719   std::string *ebv = nullptr;
720 
721   try {
722     ebv = new std::string(VARDATA(data), VARSIZE(data) - VARHDRSZ);
723   } catch (...) {
724     elog(ERROR, "constructMolFingerPrint: Unknown exception");
725   }
726 
727   return (CBfp)ebv;
728 }
729 
deconstructCBfp(CBfp data)730 extern "C" Bfp *deconstructCBfp(CBfp data) {
731   auto *ebv = (std::string *)data;
732   ByteA b;
733 
734   try {
735     b = *ebv;
736   } catch (...) {
737     elog(ERROR, "deconstructMolFingerPrint: Unknown exception");
738   }
739 
740   return b.toByteA();
741 }
742 
makeBfpSignature(CBfp data)743 extern "C" BfpSignature *makeBfpSignature(CBfp data) {
744   auto *ebv = (std::string *)data;
745   int siglen = ebv->size();
746 
747   unsigned int varsize = sizeof(BfpSignature) + siglen;
748   BfpSignature *res = (BfpSignature *)palloc0(varsize);
749   SET_VARSIZE(res, varsize);
750 
751   res->weight = bitstringWeight(siglen, (uint8 *)ebv->data());
752   memcpy(res->fp, ebv->data(), siglen);
753 
754   return res;
755 }
756 
CBfpSize(CBfp a)757 extern "C" int CBfpSize(CBfp a) {
758   auto *ebv = (std::string *)a;
759   int numBits = ebv->size() * 8;
760   return numBits;
761 }
762 
calcBitmapTanimotoSml(CBfp a,CBfp b)763 extern "C" double calcBitmapTanimotoSml(CBfp a, CBfp b) {
764   auto *abv = (std::string *)a;
765   auto *bbv = (std::string *)b;
766   const auto *afp = (const unsigned char *)abv->c_str();
767   const auto *bfp = (const unsigned char *)bbv->c_str();
768   /* return CalcBitmapTanimoto(afp, bfp, abv->size()); */
769   return bitstringTanimotoSimilarity(abv->size(), (uint8 *)afp, (uint8 *)bfp);
770 }
771 
calcBitmapDiceSml(CBfp a,CBfp b)772 extern "C" double calcBitmapDiceSml(CBfp a, CBfp b) {
773   auto *abv = (std::string *)a;
774   auto *bbv = (std::string *)b;
775   const auto *afp = (const unsigned char *)abv->c_str();
776   const auto *bfp = (const unsigned char *)bbv->c_str();
777   return CalcBitmapDice(afp, bfp, abv->size());
778 }
779 
calcBitmapTverskySml(CBfp a,CBfp b,float ca,float cb)780 double calcBitmapTverskySml(CBfp a, CBfp b, float ca, float cb) {
781   auto *abv = (std::string *)a;
782   auto *bbv = (std::string *)b;
783   const auto *afp = (const unsigned char *)abv->c_str();
784   const auto *bfp = (const unsigned char *)bbv->c_str();
785   return CalcBitmapTversky(afp, bfp, abv->size(), ca, cb);
786 }
787 
788 /*******************************************
789  *     CSfp transformation                 *
790  *******************************************/
791 
freeCSfp(CSfp data)792 extern "C" void freeCSfp(CSfp data) {
793   auto *fp = (SparseFP *)data;
794   delete fp;
795 }
796 
constructCSfp(Sfp * data)797 extern "C" CSfp constructCSfp(Sfp *data) {
798   SparseFP *ebv = nullptr;
799 
800   try {
801     ebv = new SparseFP(VARDATA(data), VARSIZE(data) - VARHDRSZ);
802   } catch (...) {
803     elog(ERROR, "constructMolFingerPrint: Unknown exception");
804   }
805 
806   return (CSfp)ebv;
807 }
808 
deconstructCSfp(CSfp data)809 extern "C" Sfp *deconstructCSfp(CSfp data) {
810   auto *ebv = (SparseFP *)data;
811   ByteA b;
812 
813   try {
814     b = ebv->toString();
815   } catch (...) {
816     elog(ERROR, "deconstructMolFingerPrint: Unknown exception");
817   }
818 
819   return b.toByteA();
820 }
821 
makeSfpSignature(CSfp data,int numBits)822 extern "C" bytea *makeSfpSignature(CSfp data, int numBits) {
823   auto *v = (SparseFP *)data;
824   int n, numBytes;
825   bytea *res;
826   unsigned char *s;
827   SparseFP::StorageType::const_iterator iter;
828 
829   numBytes = VARHDRSZ + (numBits / 8);
830   if ((numBits % 8) != 0) {
831     numBytes++;
832   }
833 
834   res = (bytea *)palloc0(numBytes);
835   SET_VARSIZE(res, numBytes);
836   s = (unsigned char *)VARDATA(res);
837 
838   for (iter = v->getNonzeroElements().begin();
839        iter != v->getNonzeroElements().end(); iter++) {
840     n = iter->first % numBits;
841     s[n / 8] |= 1 << (n % 8);
842   }
843 
844   return res;
845 }
846 
makeLowSparseFingerPrint(CSfp data,int numInts)847 extern "C" bytea *makeLowSparseFingerPrint(CSfp data, int numInts) {
848   auto *v = (SparseFP *)data;
849   int numBytes;
850   bytea *res;
851   IntRange *s;
852   int n;
853   SparseFP::StorageType::const_iterator iter;
854 
855   numBytes = VARHDRSZ + (numInts * sizeof(IntRange));
856 
857   res = (bytea *)palloc0(numBytes);
858   SET_VARSIZE(res, numBytes);
859   s = (IntRange *)VARDATA(res);
860 
861   for (iter = v->getNonzeroElements().begin();
862        iter != v->getNonzeroElements().end(); iter++) {
863     uint32 iterV = (uint32)iter->second;
864     n = iter->first % numInts;
865 
866     if (iterV > INTRANGEMAX) {
867 #if 0
868         elog(ERROR, "sparse fingerprint is too big, increase INTRANGEMAX in rdkit.h");
869 #else
870       iterV = INTRANGEMAX;
871 #endif
872     }
873 
874     if (s[n].low == 0 || s[n].low > iterV) s[n].low = iterV;
875     if (s[n].high < iterV) s[n].high = iterV;
876   }
877 
878   return res;
879 }
880 
countOverlapValues(bytea * sign,CSfp data,int numBits,int * sum,int * overlapSum,int * overlapN)881 extern "C" void countOverlapValues(bytea *sign, CSfp data, int numBits,
882                                    int *sum, int *overlapSum, int *overlapN) {
883   auto *v = (SparseFP *)data;
884   SparseFP::StorageType::const_iterator iter;
885 
886   *sum = *overlapSum = *overlapN = 0;
887 
888   if (sign) {
889     unsigned char *s = (unsigned char *)VARDATA(sign);
890     int n;
891 
892     for (iter = v->getNonzeroElements().begin();
893          iter != v->getNonzeroElements().end(); iter++) {
894       *sum += iter->second;
895       n = iter->first % numBits;
896       if (s[n / 8] & (1 << (n % 8))) {
897         *overlapSum += iter->second;
898         *overlapN += 1;
899       }
900     }
901   } else {
902     /* Assume, sign has only true bits */
903     for (iter = v->getNonzeroElements().begin();
904          iter != v->getNonzeroElements().end(); iter++)
905       *sum += iter->second;
906 
907     *overlapSum = *sum;
908     *overlapN = v->getNonzeroElements().size();
909   }
910 }
911 
countLowOverlapValues(bytea * sign,CSfp data,int numInts,int * querySum,int * keySum,int * overlapUp,int * overlapDown)912 extern "C" void countLowOverlapValues(bytea *sign, CSfp data, int numInts,
913                                       int *querySum, int *keySum,
914                                       int *overlapUp, int *overlapDown) {
915   auto *v = (SparseFP *)data;
916   SparseFP::StorageType::const_iterator iter;
917   IntRange *s = (IntRange *)VARDATA(sign);
918   int n;
919 
920   *querySum = *keySum = *overlapUp = *overlapDown = 0;
921 
922   for (iter = v->getNonzeroElements().begin();
923        iter != v->getNonzeroElements().end(); iter++) {
924     *querySum += iter->second;
925     n = iter->first % numInts;
926     if (s[n].low == 0) {
927       Assert(s[n].high == 0);
928       continue;
929     }
930 
931     *overlapDown += Min(s[n].low, (uint32)iter->second);
932     *overlapUp += Min(s[n].high, (uint32)iter->second);
933   }
934 
935   Assert(*overlapDown <= *overlapUp);
936 
937   for (n = 0; n < numInts; n++) {
938     *keySum += s[n].low;
939     if (s[n].low != s[n].high)
940       *keySum += s[n].high; /* there is at least two key mapped into current
941                                backet */
942   }
943 
944   Assert(*overlapUp <= *keySum);
945 }
946 
calcSparseTanimotoSml(CSfp a,CSfp b)947 extern "C" double calcSparseTanimotoSml(CSfp a, CSfp b) {
948   double res = -1.0;
949 
950   /*
951    * Nsame / (Na + Nb - Nsame)
952    */
953 
954   try {
955     res = TanimotoSimilarity(*(SparseFP *)a, *(SparseFP *)b);
956   } catch (ValueErrorException &e) {
957     elog(ERROR, "TanimotoSimilarity: %s", e.what());
958   } catch (...) {
959     elog(ERROR, "calcSparseTanimotoSml: Unknown exception");
960   }
961 
962   return res;
963 }
964 
calcSparseDiceSml(CSfp a,CSfp b)965 extern "C" double calcSparseDiceSml(CSfp a, CSfp b) {
966   double res = -1.0;
967 
968   /*
969    * 2 * Nsame / (Na + Nb)
970    */
971 
972   try {
973     res = DiceSimilarity(*(SparseFP *)a, *(SparseFP *)b);
974   } catch (ValueErrorException &e) {
975     elog(ERROR, "DiceSimilarity: %s", e.what());
976   } catch (...) {
977     elog(ERROR, "calcSparseDiceSml: Unknown exception");
978   }
979 
980   return res;
981 }
982 
calcSparseStringDiceSml(const char * a,unsigned int sza,const char * b,unsigned int szb)983 extern "C" double calcSparseStringDiceSml(const char *a, unsigned int sza,
984                                           const char *b, unsigned int szb) {
985   const auto *t1 = (const unsigned char *)a;
986   const auto *t2 = (const unsigned char *)b;
987 
988   std::uint32_t tmp;
989   tmp = *(reinterpret_cast<const std::uint32_t *>(t1));
990   t1 += sizeof(std::uint32_t);
991   if (tmp != (std::uint32_t)ci_SPARSEINTVECT_VERSION) {
992     elog(ERROR, "calcSparseStringDiceSml: could not convert argument 1");
993   }
994   tmp = *(reinterpret_cast<const std::uint32_t *>(t2));
995   t2 += sizeof(std::uint32_t);
996   if (tmp != (std::uint32_t)ci_SPARSEINTVECT_VERSION) {
997     elog(ERROR, "calcSparseStringDiceSml: could not convert argument 2");
998   }
999 
1000   // check the element size:
1001   tmp = *(reinterpret_cast<const std::uint32_t *>(t1));
1002   t1 += sizeof(std::uint32_t);
1003   if (tmp != sizeof(std::uint32_t)) {
1004     elog(ERROR,
1005          "calcSparseStringDiceSml: could not convert argument 1 -> uint32_t");
1006   }
1007   tmp = *(reinterpret_cast<const std::uint32_t *>(t2));
1008   t2 += sizeof(std::uint32_t);
1009   if (tmp != sizeof(std::uint32_t)) {
1010     elog(ERROR,
1011          "calcSparseStringDiceSml: could not convert argument 2 -> uint32_t");
1012   }
1013 
1014   double res = 0.;
1015   // start reading:
1016   std::uint32_t len1, len2;
1017   len1 = *(reinterpret_cast<const std::uint32_t *>(t1));
1018   t1 += sizeof(std::uint32_t);
1019   len2 = *(reinterpret_cast<const std::uint32_t *>(t2));
1020   t2 += sizeof(std::uint32_t);
1021   if (len1 != len2) {
1022     elog(ERROR, "attempt to compare fingerprints of different length");
1023   }
1024 
1025   std::uint32_t nElem1, nElem2;
1026   nElem1 = *(reinterpret_cast<const std::uint32_t *>(t1));
1027   t1 += sizeof(std::uint32_t);
1028   nElem2 = *(reinterpret_cast<const std::uint32_t *>(t2));
1029   t2 += sizeof(std::uint32_t);
1030 
1031   if (!nElem1 || !nElem2) {
1032     return 0.0;
1033   }
1034 
1035   double v1Sum = 0, v2Sum = 0, numer = 0;
1036   std::uint32_t idx1 = 0;
1037   std::int32_t v1;
1038   std::uint32_t idx2 = 0;
1039   std::int32_t v2;
1040   idx1 = *(reinterpret_cast<const std::uint32_t *>(t1));
1041   t1 += sizeof(std::uint32_t);
1042   v1 = *(reinterpret_cast<const std::int32_t *>(t1));
1043   t1 += sizeof(std::int32_t);
1044   nElem1--;
1045   v1Sum += v1;
1046 
1047   idx2 = *(reinterpret_cast<const std::uint32_t *>(t2));
1048   t2 += sizeof(std::uint32_t);
1049   v2 = *(reinterpret_cast<const std::int32_t *>(t2));
1050   t2 += sizeof(std::int32_t);
1051   nElem2--;
1052   v2Sum += v2;
1053 
1054   while (1) {
1055     while (nElem2 && idx2 < idx1) {
1056       idx2 = *(reinterpret_cast<const std::uint32_t *>(t2));
1057       t2 += sizeof(std::uint32_t);
1058       v2 = *(reinterpret_cast<const std::int32_t *>(t2));
1059       t2 += sizeof(std::int32_t);
1060       nElem2--;
1061       v2Sum += v2;
1062     }
1063     if (idx2 == idx1) {
1064       // std::cerr<<"   --- "<<idx1<<" "<<v1<<" - "<<idx2<<" "<<v2<<std::endl;
1065       numer += std::min(v1, v2);
1066     }
1067     if (nElem1) {
1068       idx1 = *(reinterpret_cast<const std::uint32_t *>(t1));
1069       t1 += sizeof(std::uint32_t);
1070       v1 = *(reinterpret_cast<const std::int32_t *>(t1));
1071       t1 += sizeof(std::int32_t);
1072       nElem1--;
1073       v1Sum += v1;
1074     } else {
1075       break;
1076     }
1077   }
1078   while (nElem2) {
1079     idx2 = *(reinterpret_cast<const std::uint32_t *>(t2));
1080     t2 += sizeof(std::uint32_t);
1081     v2 = *(reinterpret_cast<const std::int32_t *>(t2));
1082     t2 += sizeof(std::int32_t);
1083     nElem2--;
1084     v2Sum += v2;
1085   }
1086   double denom = v1Sum + v2Sum;
1087   if (fabs(denom) < 1e-6) {
1088     res = 0.0;
1089   } else {
1090     res = 2. * numer / denom;
1091   }
1092 
1093   return res;
1094 }
1095 
calcSparseStringAllValsGT(const char * a,unsigned int sza,int tgt)1096 extern "C" bool calcSparseStringAllValsGT(const char *a, unsigned int sza,
1097                                           int tgt) {
1098   const auto *t1 = (const unsigned char *)a;
1099 
1100   std::uint32_t tmp;
1101   tmp = *(reinterpret_cast<const std::uint32_t *>(t1));
1102   t1 += sizeof(std::uint32_t);
1103   if (tmp != (std::uint32_t)ci_SPARSEINTVECT_VERSION) {
1104     elog(ERROR, "calcSparseStringAllValsGT: could not convert argument 1");
1105   }
1106   // check the element size:
1107   tmp = *(reinterpret_cast<const std::uint32_t *>(t1));
1108   t1 += sizeof(std::uint32_t);
1109   if (tmp != sizeof(std::uint32_t)) {
1110     elog(ERROR,
1111          "calcSparseStringAllValsGT: could not convert argument 1 -> "
1112          "uint32_t");
1113   }
1114 
1115   // std::uint32_t len1;
1116   // len1 = *(reinterpret_cast<const std::uint32_t *>(t1));
1117   t1 += sizeof(std::uint32_t);
1118 
1119   std::uint32_t nElem1;
1120   nElem1 = *(reinterpret_cast<const std::uint32_t *>(t1));
1121   t1 += sizeof(std::uint32_t);
1122 
1123   while (nElem1) {
1124     --nElem1;
1125     // skip the index:
1126     t1 += sizeof(std::uint32_t);
1127     std::int32_t v1 = *(reinterpret_cast<const std::int32_t *>(t1));
1128     t1 += sizeof(std::int32_t);
1129 
1130     if (v1 <= tgt) {
1131       return false;
1132     }
1133   }
1134   return true;
1135 }
calcSparseStringAllValsLT(const char * a,unsigned int sza,int tgt)1136 extern "C" bool calcSparseStringAllValsLT(const char *a, unsigned int sza,
1137                                           int tgt) {
1138   const auto *t1 = (const unsigned char *)a;
1139 
1140   std::uint32_t tmp;
1141   tmp = *(reinterpret_cast<const std::uint32_t *>(t1));
1142   t1 += sizeof(std::uint32_t);
1143   if (tmp != (std::uint32_t)ci_SPARSEINTVECT_VERSION) {
1144     elog(ERROR, "calcSparseStringAllValsGT: could not convert argument 1");
1145   }
1146   // check the element size:
1147   tmp = *(reinterpret_cast<const std::uint32_t *>(t1));
1148   t1 += sizeof(std::uint32_t);
1149   if (tmp != sizeof(std::uint32_t)) {
1150     elog(ERROR,
1151          "calcSparseStringAllValsGT: could not convert argument 1 -> "
1152          "uint32_t");
1153   }
1154 
1155   // std::uint32_t len1;
1156   // len1 = *(reinterpret_cast<const std::uint32_t *>(t1));
1157   t1 += sizeof(std::uint32_t);
1158 
1159   std::uint32_t nElem1;
1160   nElem1 = *(reinterpret_cast<const std::uint32_t *>(t1));
1161   t1 += sizeof(std::uint32_t);
1162 
1163   while (nElem1) {
1164     --nElem1;
1165     // skip the index:
1166     t1 += sizeof(std::uint32_t);
1167     std::int32_t v1 = *(reinterpret_cast<const std::int32_t *>(t1));
1168     t1 += sizeof(std::int32_t);
1169 
1170     if (v1 >= tgt) {
1171       return false;
1172     }
1173   }
1174   return true;
1175 }
1176 
addSFP(CSfp a,CSfp b)1177 extern "C" CSfp addSFP(CSfp a, CSfp b) {
1178   SparseFP *res = nullptr;
1179   try {
1180     SparseFP tmp = (*(SparseFP *)a + *(SparseFP *)b);
1181     res = (SparseFP *)new SparseFP(tmp);
1182   } catch (...) {
1183     elog(ERROR, "addSFP: Unknown exception");
1184   }
1185   return (CSfp)res;
1186 }
1187 
subtractSFP(CSfp a,CSfp b)1188 extern "C" CSfp subtractSFP(CSfp a, CSfp b) {
1189   SparseFP *res = nullptr;
1190   try {
1191     SparseFP tmp = (*(SparseFP *)a - *(SparseFP *)b);
1192     res = (SparseFP *)new SparseFP(tmp);
1193   } catch (...) {
1194     elog(ERROR, "addSFP: Unknown exception");
1195   }
1196   return (CSfp)res;
1197 }
1198 
1199 /*
1200  * Mol -> fp
1201  */
makeLayeredBFP(CROMol data)1202 extern "C" CBfp makeLayeredBFP(CROMol data) {
1203   auto *mol = (ROMol *)data;
1204   ExplicitBitVect *res = nullptr;
1205 
1206   try {
1207     res = RDKit::LayeredFingerprintMol(*mol, 0xFFFFFFFF, 1, 7,
1208                                        getLayeredFpSize());
1209   } catch (...) {
1210     elog(ERROR, "makeLayeredBFP: Unknown exception");
1211     if (res) {
1212       delete res;
1213     }
1214     res = nullptr;
1215   }
1216   if (res) {
1217     std::string *sres = new std::string(BitVectToBinaryText(*res));
1218     delete res;
1219     return (CBfp)sres;
1220   } else {
1221     return nullptr;
1222   }
1223 }
1224 
makeRDKitBFP(CROMol data)1225 extern "C" CBfp makeRDKitBFP(CROMol data) {
1226   auto *mol = (ROMol *)data;
1227   ExplicitBitVect *res = nullptr;
1228 
1229   try {
1230     res = RDKit::RDKFingerprintMol(*mol, 1, 6, getRDKitFpSize(), 2);
1231   } catch (...) {
1232     elog(ERROR, "makeRDKitBFP: Unknown exception");
1233     if (res) {
1234       delete res;
1235     }
1236     res = nullptr;
1237   }
1238 
1239   if (res) {
1240     std::string *sres = new std::string(BitVectToBinaryText(*res));
1241     delete res;
1242     return (CBfp)sres;
1243   } else {
1244     return nullptr;
1245   }
1246 }
1247 
makeMorganSFP(CROMol data,int radius)1248 extern "C" CSfp makeMorganSFP(CROMol data, int radius) {
1249   auto *mol = (ROMol *)data;
1250   SparseFP *res = nullptr;
1251   std::vector<std::uint32_t> invars(mol->getNumAtoms());
1252   try {
1253     RDKit::MorganFingerprints::getConnectivityInvariants(*mol, invars, true);
1254     res = (SparseFP *)RDKit::MorganFingerprints::getFingerprint(*mol, radius,
1255                                                                 &invars);
1256   } catch (...) {
1257     elog(ERROR, "makeMorganSFP: Unknown exception");
1258   }
1259 
1260   return (CSfp)res;
1261 }
1262 
makeMorganBFP(CROMol data,int radius)1263 extern "C" CBfp makeMorganBFP(CROMol data, int radius) {
1264   auto *mol = (ROMol *)data;
1265   ExplicitBitVect *res = nullptr;
1266   std::vector<std::uint32_t> invars(mol->getNumAtoms());
1267   try {
1268     RDKit::MorganFingerprints::getConnectivityInvariants(*mol, invars, true);
1269     res = RDKit::MorganFingerprints::getFingerprintAsBitVect(
1270         *mol, radius, getMorganFpSize(), &invars);
1271   } catch (...) {
1272     elog(ERROR, "makeMorganBFP: Unknown exception");
1273   }
1274 
1275   if (res) {
1276     std::string *sres = new std::string(BitVectToBinaryText(*res));
1277     delete res;
1278     return (CBfp)sres;
1279   } else {
1280     return nullptr;
1281   }
1282 }
1283 
makeFeatMorganSFP(CROMol data,int radius)1284 extern "C" CSfp makeFeatMorganSFP(CROMol data, int radius) {
1285   auto *mol = (ROMol *)data;
1286   SparseFP *res = nullptr;
1287   std::vector<std::uint32_t> invars(mol->getNumAtoms());
1288   try {
1289     RDKit::MorganFingerprints::getFeatureInvariants(*mol, invars);
1290     res = (SparseFP *)RDKit::MorganFingerprints::getFingerprint(*mol, radius,
1291                                                                 &invars);
1292   } catch (...) {
1293     elog(ERROR, "makeMorganSFP: Unknown exception");
1294   }
1295 
1296   return (CSfp)res;
1297 }
1298 
makeFeatMorganBFP(CROMol data,int radius)1299 extern "C" CBfp makeFeatMorganBFP(CROMol data, int radius) {
1300   auto *mol = (ROMol *)data;
1301   ExplicitBitVect *res = nullptr;
1302   std::vector<std::uint32_t> invars(mol->getNumAtoms());
1303   try {
1304     RDKit::MorganFingerprints::getFeatureInvariants(*mol, invars);
1305     res = RDKit::MorganFingerprints::getFingerprintAsBitVect(
1306         *mol, radius, getFeatMorganFpSize(), &invars);
1307   } catch (...) {
1308     elog(ERROR, "makeMorganBFP: Unknown exception");
1309   }
1310 
1311   if (res) {
1312     std::string *sres = new std::string(BitVectToBinaryText(*res));
1313     delete res;
1314     return (CBfp)sres;
1315   } else {
1316     return nullptr;
1317   }
1318 }
1319 
makeAtomPairSFP(CROMol data)1320 extern "C" CSfp makeAtomPairSFP(CROMol data) {
1321   auto *mol = (ROMol *)data;
1322   SparseFP *res = nullptr;
1323 #ifdef UNHASHED_PAIR_FPS
1324   try {
1325     SparseIntVect<std::int32_t> *afp =
1326         RDKit::AtomPairs::getAtomPairFingerprint(*mol);
1327     res = new SparseFP(1 << RDKit::AtomPairs::numAtomPairFingerprintBits);
1328     for (SparseIntVect<std::int32_t>::StorageType::const_iterator iter =
1329              afp->getNonzeroElements().begin();
1330          iter != afp->getNonzeroElements().end(); ++iter) {
1331       res->setVal(iter->first, iter->second);
1332     }
1333     delete afp;
1334   } catch (...) {
1335     elog(ERROR, "makeAtomPairSFP: Unknown exception");
1336   }
1337 #else
1338   try {
1339     SparseIntVect<std::int32_t> *afp =
1340         RDKit::AtomPairs::getHashedAtomPairFingerprint(
1341             *mol, getHashedAtomPairFpSize());
1342     res = new SparseFP(getHashedAtomPairFpSize());
1343     for (const auto &iter : afp->getNonzeroElements()) {
1344       res->setVal(iter.first, iter.second);
1345     }
1346     delete afp;
1347   } catch (...) {
1348     elog(ERROR, "makeAtomPairSFP: Unknown exception");
1349   }
1350 #endif
1351   return (CSfp)res;
1352 }
1353 
makeTopologicalTorsionSFP(CROMol data)1354 extern "C" CSfp makeTopologicalTorsionSFP(CROMol data) {
1355   auto *mol = (ROMol *)data;
1356   SparseFP *res = nullptr;
1357 
1358 #ifdef UNHASHED_PAIR_FPS
1359   try {
1360     SparseIntVect<boost::int64_t> *afp =
1361         RDKit::AtomPairs::getHashedTopologicalTorsionFingerprint(
1362             *mol, boost::integer_traits<std::uint32_t>::const_max);
1363     res = new SparseFP(boost::integer_traits<std::uint32_t>::const_max);
1364     for (SparseIntVect<boost::int64_t>::StorageType::const_iterator iter =
1365              afp->getNonzeroElements().begin();
1366          iter != afp->getNonzeroElements().end(); ++iter) {
1367       res->setVal(iter->first, iter->second);
1368     }
1369     delete afp;
1370   } catch (...) {
1371     elog(ERROR, "makeTopologicalTorsionSFP: Unknown exception");
1372   }
1373 #else
1374   try {
1375     SparseIntVect<boost::int64_t> *afp =
1376         RDKit::AtomPairs::getHashedTopologicalTorsionFingerprint(
1377             *mol, getHashedTorsionFpSize());
1378     res = new SparseFP(getHashedTorsionFpSize());
1379     for (const auto &iter : afp->getNonzeroElements()) {
1380       res->setVal(iter.first, iter.second);
1381     }
1382     delete afp;
1383   } catch (...) {
1384     elog(ERROR, "makeTopologicalTorsionSFP: Unknown exception");
1385   }
1386 #endif
1387   return (CSfp)res;
1388 }
1389 
makeAtomPairBFP(CROMol data)1390 extern "C" CBfp makeAtomPairBFP(CROMol data) {
1391   auto *mol = (ROMol *)data;
1392   ExplicitBitVect *res = nullptr;
1393   try {
1394     res = RDKit::AtomPairs::getHashedAtomPairFingerprintAsBitVect(
1395         *mol, getHashedAtomPairFpSize());
1396   } catch (...) {
1397     elog(ERROR, "makeAtomPairBFP: Unknown exception");
1398   }
1399   if (res) {
1400     std::string *sres = new std::string(BitVectToBinaryText(*res));
1401     delete res;
1402     return (CBfp)sres;
1403   } else {
1404     return nullptr;
1405   }
1406 }
1407 
makeTopologicalTorsionBFP(CROMol data)1408 extern "C" CBfp makeTopologicalTorsionBFP(CROMol data) {
1409   auto *mol = (ROMol *)data;
1410   ExplicitBitVect *res = nullptr;
1411   try {
1412     res = RDKit::AtomPairs::getHashedTopologicalTorsionFingerprintAsBitVect(
1413         *mol, getHashedTorsionFpSize());
1414   } catch (...) {
1415     elog(ERROR, "makeTopologicalTorsionBFP: Unknown exception");
1416   }
1417   if (res) {
1418     std::string *sres = new std::string(BitVectToBinaryText(*res));
1419     delete res;
1420     return (CBfp)sres;
1421   } else {
1422     return nullptr;
1423   }
1424 }
1425 
makeMACCSBFP(CROMol data)1426 extern "C" CBfp makeMACCSBFP(CROMol data) {
1427   auto *mol = (ROMol *)data;
1428   ExplicitBitVect *res = nullptr;
1429   try {
1430     res = RDKit::MACCSFingerprints::getFingerprintAsBitVect(*mol);
1431   } catch (...) {
1432     elog(ERROR, "makeMACCSBFP: Unknown exception");
1433   }
1434   if (res) {
1435     std::string *sres = new std::string(BitVectToBinaryText(*res));
1436     delete res;
1437     return (CBfp)sres;
1438   } else {
1439     return nullptr;
1440   }
1441 }
1442 
makeAvalonBFP(CROMol data,bool isQuery,unsigned int bitFlags)1443 extern "C" CBfp makeAvalonBFP(CROMol data, bool isQuery,
1444                               unsigned int bitFlags) {
1445 #ifdef RDK_BUILD_AVALON_SUPPORT
1446   auto *mol = (ROMol *)data;
1447   ExplicitBitVect *res = nullptr;
1448   try {
1449     res = new ExplicitBitVect(getAvalonFpSize());
1450     AvalonTools::getAvalonFP(*mol, *res, getAvalonFpSize(), isQuery, true,
1451                              bitFlags);
1452   } catch (...) {
1453     elog(ERROR, "makeAvalonBFP: Unknown exception");
1454   }
1455 
1456   if (res) {
1457     std::string *sres = new std::string(BitVectToBinaryText(*res));
1458     delete res;
1459     return (CBfp)sres;
1460   } else {
1461     return nullptr;
1462   }
1463 #else
1464   elog(ERROR, "Avalon support not enabled");
1465   return NULL;
1466 #endif
1467 }
1468 
1469 /* chemical reactions */
1470 
freeChemReaction(CChemicalReaction data)1471 extern "C" void freeChemReaction(CChemicalReaction data) {
1472   auto *rxn = (ChemicalReaction *)data;
1473   delete rxn;
1474 }
1475 
constructChemReact(Reaction * data)1476 extern "C" CChemicalReaction constructChemReact(Reaction *data) {
1477   auto *rxn = new ChemicalReaction();
1478 
1479   try {
1480     ByteA b(data);
1481     ReactionPickler::reactionFromPickle(b, rxn);
1482   } catch (ReactionPicklerException &e) {
1483     elog(ERROR, "reactionFromPickle: %s", e.what());
1484   } catch (...) {
1485     elog(ERROR, "constructChemReact: Unknown exception");
1486   }
1487 
1488   return (CChemicalReaction)rxn;
1489 }
1490 
deconstructChemReact(CChemicalReaction data)1491 extern "C" Reaction *deconstructChemReact(CChemicalReaction data) {
1492   auto *rxn = (ChemicalReaction *)data;
1493   ByteA b;
1494 
1495   try {
1496     ReactionPickler::pickleReaction(rxn, b);
1497   } catch (ReactionPicklerException &e) {
1498     elog(ERROR, "pickleReaction: %s", e.what());
1499   } catch (...) {
1500     elog(ERROR, "deconstructChemReact: Unknown exception");
1501   }
1502 
1503   return (Reaction *)b.toByteA();
1504 }
1505 
parseChemReactText(char * data,bool asSmarts,bool warnOnFail)1506 extern "C" CChemicalReaction parseChemReactText(char *data, bool asSmarts,
1507                                                 bool warnOnFail) {
1508   ChemicalReaction *rxn = nullptr;
1509 
1510   try {
1511     if (asSmarts) {
1512       rxn = RxnSmartsToChemicalReaction(data);
1513     } else {
1514       rxn = RxnSmartsToChemicalReaction(data, nullptr, true);
1515     }
1516     if (getInitReaction()) {
1517       rxn->initReactantMatchers();
1518     }
1519     if (getMoveUnmappedReactantsToAgents() && hasReactionAtomMapping(*rxn)) {
1520       rxn->removeUnmappedReactantTemplates(getThresholdUnmappedReactantAtoms());
1521     }
1522   } catch (...) {
1523     rxn = nullptr;
1524   }
1525   if (rxn == nullptr) {
1526     if (warnOnFail) {
1527       ereport(WARNING,
1528               (errcode(ERRCODE_WARNING),
1529                errmsg("could not create chemical reaction from SMILES '%s'",
1530                       data)));
1531     } else {
1532       ereport(ERROR,
1533               (errcode(ERRCODE_DATA_EXCEPTION),
1534                errmsg("could not create chemical reaction  from SMILES '%s'",
1535                       data)));
1536     }
1537   }
1538 
1539   return (CChemicalReaction)rxn;
1540 }
1541 
parseChemReactBlob(char * data,int len)1542 extern "C" CChemicalReaction parseChemReactBlob(char *data, int len) {
1543   ChemicalReaction *rxn = nullptr;
1544 
1545   try {
1546     string binStr(data, len);
1547     rxn = new ChemicalReaction(binStr);
1548     if (getInitReaction()) {
1549       rxn->initReactantMatchers();
1550     }
1551     if (getMoveUnmappedReactantsToAgents() && hasReactionAtomMapping(*rxn)) {
1552       rxn->removeUnmappedReactantTemplates(getThresholdUnmappedReactantAtoms());
1553     }
1554   } catch (...) {
1555     ereport(ERROR,
1556             (errcode(ERRCODE_DATA_EXCEPTION),
1557              errmsg("problem generating chemical reaction from blob data")));
1558   }
1559   if (rxn == nullptr) {
1560     ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION),
1561                     errmsg("blob data could not be parsed")));
1562   }
1563 
1564   return (CChemicalReaction)rxn;
1565 }
1566 
makeChemReactText(CChemicalReaction data,int * len,bool asSmarts)1567 extern "C" char *makeChemReactText(CChemicalReaction data, int *len,
1568                                    bool asSmarts) {
1569   auto *rxn = (ChemicalReaction *)data;
1570 
1571   try {
1572     if (!asSmarts) {
1573       StringData = ChemicalReactionToRxnSmiles(*rxn);
1574     } else {
1575       StringData = ChemicalReactionToRxnSmarts(*rxn);
1576     }
1577   } catch (...) {
1578     ereport(WARNING, (errcode(ERRCODE_WARNING),
1579                       errmsg("makeChemReactText: problems converting chemical "
1580                              "reaction  to SMILES/SMARTS")));
1581     StringData = "";
1582   }
1583 
1584   *len = StringData.size();
1585   return (char *)StringData.c_str();
1586 }
1587 
makeChemReactBlob(CChemicalReaction data,int * len)1588 extern "C" char *makeChemReactBlob(CChemicalReaction data, int *len) {
1589   auto *rxn = (ChemicalReaction *)data;
1590   StringData.clear();
1591   try {
1592     ReactionPickler::pickleReaction(*rxn, StringData);
1593   } catch (...) {
1594     elog(ERROR, "makeChemReactBlob: Unknown exception");
1595   }
1596 
1597   *len = StringData.size();
1598   return (char *)StringData.data();
1599 }
1600 
parseChemReactCTAB(char * data,bool warnOnFail)1601 extern "C" CChemicalReaction parseChemReactCTAB(char *data, bool warnOnFail) {
1602   ChemicalReaction *rxn = nullptr;
1603 
1604   try {
1605     rxn = RxnBlockToChemicalReaction(data);
1606     if (getInitReaction()) {
1607       rxn->initReactantMatchers();
1608     }
1609     if (getMoveUnmappedReactantsToAgents() && hasReactionAtomMapping(*rxn)) {
1610       rxn->removeUnmappedReactantTemplates(getThresholdUnmappedReactantAtoms());
1611     }
1612   } catch (...) {
1613     rxn = nullptr;
1614   }
1615   if (rxn == nullptr) {
1616     if (warnOnFail) {
1617       ereport(WARNING,
1618               (errcode(ERRCODE_WARNING),
1619                errmsg("could not create reaction from CTAB '%s'", data)));
1620 
1621     } else {
1622       ereport(ERROR,
1623               (errcode(ERRCODE_DATA_EXCEPTION),
1624                errmsg("could not create reaction from CTAB '%s'", data)));
1625     }
1626   }
1627 
1628   return (CChemicalReaction)rxn;
1629 }
1630 
makeCTABChemReact(CChemicalReaction data,int * len)1631 extern "C" char *makeCTABChemReact(CChemicalReaction data, int *len) {
1632   auto *rxn = (ChemicalReaction *)data;
1633 
1634   try {
1635     StringData = ChemicalReactionToRxnBlock(*rxn);
1636   } catch (...) {
1637     ereport(
1638         WARNING,
1639         (errcode(ERRCODE_WARNING),
1640          errmsg("makeCTABChemReact: problems converting reaction to CTAB")));
1641     StringData = "";
1642   }
1643 
1644   *len = StringData.size();
1645   return (char *)StringData.c_str();
1646 }
1647 
ChemReactNumReactants(CChemicalReaction crxn)1648 extern "C" int ChemReactNumReactants(CChemicalReaction crxn) {
1649   const ChemicalReaction *rxn = (ChemicalReaction *)crxn;
1650   return rxn->getNumReactantTemplates();
1651 }
1652 
ChemReactNumProducts(CChemicalReaction crxn)1653 extern "C" int ChemReactNumProducts(CChemicalReaction crxn) {
1654   const ChemicalReaction *rxn = (ChemicalReaction *)crxn;
1655   return rxn->getNumProductTemplates();
1656 }
1657 
ChemReactNumAgents(CChemicalReaction crxn)1658 extern "C" int ChemReactNumAgents(CChemicalReaction crxn) {
1659   const ChemicalReaction *rxn = (ChemicalReaction *)crxn;
1660   return rxn->getNumAgentTemplates();
1661 }
1662 
makeReactionSign(CChemicalReaction data)1663 extern "C" bytea *makeReactionSign(CChemicalReaction data) {
1664   auto *rxn = (ChemicalReaction *)data;
1665   ExplicitBitVect *res = nullptr;
1666   bytea *ret = nullptr;
1667 
1668   try {
1669     RDKit::ReactionFingerprintParams params;
1670     params.fpType = static_cast<FingerprintType>(getReactionSubstructFpType());
1671     params.fpSize = getReactionSubstructFpSize();
1672     params.includeAgents = (!getIgnoreReactionAgents());
1673     params.bitRatioAgents = getReactionStructuralFPAgentBitRatio();
1674     res = RDKit::StructuralFingerprintChemReaction(*rxn, params);
1675 
1676     if (res) {
1677       std::string sres = BitVectToBinaryText(*res);
1678 
1679       unsigned int varsize = VARHDRSZ + sres.size();
1680       ret = (bytea *)palloc0(varsize);
1681       memcpy(VARDATA(ret), sres.data(), sres.size());
1682       SET_VARSIZE(ret, varsize);
1683 
1684       delete res;
1685       res = nullptr;
1686     }
1687   } catch (...) {
1688     elog(ERROR, "makeReactionSign: Unknown exception");
1689     if (res) {
1690       delete res;
1691     }
1692   }
1693   return ret;
1694 }
1695 
ReactionSubstruct(CChemicalReaction rxn,CChemicalReaction rxn2)1696 extern "C" int ReactionSubstruct(CChemicalReaction rxn,
1697                                  CChemicalReaction rxn2) {
1698   auto *rxnm = (ChemicalReaction *)rxn;
1699   auto *rxn2m = (ChemicalReaction *)rxn2;
1700 
1701   /* Reaction search */
1702   if (rxn2m->getNumReactantTemplates() != 0 &&
1703       rxn2m->getNumProductTemplates() != 0) {
1704     return hasReactionSubstructMatch(*rxnm, *rxn2m,
1705                                      (!getIgnoreReactionAgents()));
1706   }
1707   /* Product search */
1708   if (rxn2m->getNumReactantTemplates() == 0 &&
1709       rxn2m->getNumProductTemplates() != 0) {
1710     if (rxn2m->getNumAgentTemplates() != 0 && !getIgnoreReactionAgents()) {
1711       return (hasProductTemplateSubstructMatch(*rxnm, *rxn2m) &&
1712               hasAgentTemplateSubstructMatch(*rxnm, *rxn2m));
1713     }
1714     return hasProductTemplateSubstructMatch(*rxnm, *rxn2m);
1715   }
1716   /* Reactant search */
1717   if (rxn2m->getNumReactantTemplates() != 0 &&
1718       rxn2m->getNumProductTemplates() == 0) {
1719     if (rxn2m->getNumAgentTemplates() != 0 && !getIgnoreReactionAgents()) {
1720       return (hasReactantTemplateSubstructMatch(*rxnm, *rxn2m) &&
1721               hasAgentTemplateSubstructMatch(*rxnm, *rxn2m));
1722     }
1723     return hasReactantTemplateSubstructMatch(*rxnm, *rxn2m);
1724   }
1725   /* Agent search */
1726   if (rxn2m->getNumReactantTemplates() == 0 &&
1727       rxn2m->getNumProductTemplates() == 0 &&
1728       rxn2m->getNumAgentTemplates() != 0) {
1729     return hasAgentTemplateSubstructMatch(*rxnm, *rxn2m);
1730   }
1731 
1732   return false;
1733 }
1734 
ReactionSubstructFP(CChemicalReaction rxn,CChemicalReaction rxnquery)1735 extern "C" int ReactionSubstructFP(CChemicalReaction rxn,
1736                                    CChemicalReaction rxnquery) {
1737   auto *rxnm = (ChemicalReaction *)rxn;
1738   auto *rxnqm = (ChemicalReaction *)rxnquery;
1739 
1740   RDKit::ReactionFingerprintParams params;
1741   params.fpType = static_cast<FingerprintType>(getReactionSubstructFpType());
1742   params.fpSize = getReactionSubstructFpSize();
1743   params.includeAgents = (!getIgnoreReactionAgents());
1744   params.bitRatioAgents = getReactionStructuralFPAgentBitRatio();
1745 
1746   ExplicitBitVect *fp1 = StructuralFingerprintChemReaction(*rxnm, params);
1747   ExplicitBitVect *fp2 = StructuralFingerprintChemReaction(*rxnqm, params);
1748 
1749   if (fp1->getNumOnBits() < fp2->getNumOnBits()) {
1750     return false;
1751   }
1752   for (unsigned i = 0; i < fp1->getNumBits(); i++) {
1753     if ((fp1->getBit(i) & fp2->getBit(i)) != fp2->getBit(i)) {
1754       return false;
1755     }
1756   }
1757   return true;
1758 }
1759 
1760 // some helper functions in anonymous namespace
1761 namespace {
1762 
1763 struct MoleculeDescriptors {
MoleculeDescriptors__anonedffb5b40111::MoleculeDescriptors1764   MoleculeDescriptors() {}
1765   unsigned nAtoms{0};
1766   unsigned nBonds{0};
1767   unsigned nRings{0};
1768   double MW{0.0};
1769 };
1770 
calcMolecularDescriptorsReaction(RDKit::ChemicalReaction * rxn,RDKit::ReactionMoleculeType t)1771 MoleculeDescriptors *calcMolecularDescriptorsReaction(
1772     RDKit::ChemicalReaction *rxn, RDKit::ReactionMoleculeType t) {
1773   auto *des = new MoleculeDescriptors();
1774   auto begin = getStartIterator(*rxn, t);
1775   auto end = getEndIterator(*rxn, t);
1776   for (; begin != end; ++begin) {
1777     des->nAtoms += begin->get()->getNumHeavyAtoms();
1778     des->nBonds += begin->get()->getNumBonds(true);
1779     des->MW = RDKit::Descriptors::calcAMW(*begin->get(), true);
1780     if (!begin->get()->getRingInfo()->isInitialized()) {
1781       begin->get()->updatePropertyCache();
1782       RDKit::MolOps::findSSSR(*begin->get());
1783     }
1784     des->nRings += begin->get()->getRingInfo()->numRings();
1785   }
1786   return des;
1787 }
1788 
compareMolDescriptors(const MoleculeDescriptors & md1,const MoleculeDescriptors & md2)1789 int compareMolDescriptors(const MoleculeDescriptors &md1,
1790                           const MoleculeDescriptors &md2) {
1791   int res = md1.nAtoms - md2.nAtoms;
1792   if (res) {
1793     return res;
1794   }
1795   res = md1.nBonds - md2.nBonds;
1796   if (res) {
1797     return res;
1798   }
1799   res = md1.nRings - md2.nRings;
1800   if (res) {
1801     return res;
1802   }
1803   res = int(md1.MW - md2.MW);
1804   if (res) {
1805     return res;
1806   }
1807   return 0;
1808 }
1809 }  // namespace
1810 
reactioncmp(CChemicalReaction rxn,CChemicalReaction rxn2)1811 extern "C" int reactioncmp(CChemicalReaction rxn, CChemicalReaction rxn2) {
1812   auto *rxnm = (ChemicalReaction *)rxn;
1813   auto *rxn2m = (ChemicalReaction *)rxn2;
1814 
1815   if (!rxnm) {
1816     if (!rxn2m) {
1817       return 0;
1818     }
1819     return -1;
1820   }
1821   if (!rxn2m) {
1822     return 1;
1823   }
1824 
1825   int res = rxnm->getNumReactantTemplates() - rxn2m->getNumReactantTemplates();
1826   if (res) {
1827     return res;
1828   }
1829   res = rxnm->getNumProductTemplates() - rxn2m->getNumProductTemplates();
1830   if (res) {
1831     return res;
1832   }
1833   if (!getIgnoreReactionAgents()) {
1834     res = rxnm->getNumAgentTemplates() - rxn2m->getNumAgentTemplates();
1835     if (res) {
1836       return res;
1837     }
1838   }
1839 
1840   MoleculeDescriptors *rxn_react =
1841       calcMolecularDescriptorsReaction(rxnm, Reactant);
1842   MoleculeDescriptors *rxn2_react =
1843       calcMolecularDescriptorsReaction(rxn2m, Reactant);
1844   res = compareMolDescriptors(*rxn_react, *rxn2_react);
1845   delete (rxn_react);
1846   delete (rxn2_react);
1847   if (res) {
1848     return res;
1849   }
1850   MoleculeDescriptors *rxn_product =
1851       calcMolecularDescriptorsReaction(rxnm, Product);
1852   MoleculeDescriptors *rxn2_product =
1853       calcMolecularDescriptorsReaction(rxn2m, Product);
1854   res = compareMolDescriptors(*rxn_product, *rxn2_product);
1855   delete (rxn_product);
1856   delete (rxn2_product);
1857   if (res) {
1858     return res;
1859   }
1860   if (!getIgnoreReactionAgents()) {
1861     MoleculeDescriptors *rxn_agent =
1862         calcMolecularDescriptorsReaction(rxnm, Agent);
1863     MoleculeDescriptors *rxn2_agent =
1864         calcMolecularDescriptorsReaction(rxn2m, Agent);
1865     res = compareMolDescriptors(*rxn_agent, *rxn2_agent);
1866     delete (rxn_agent);
1867     delete (rxn2_agent);
1868     if (res) {
1869       return res;
1870     }
1871   }
1872 
1873   RDKit::MatchVectType matchVect;
1874   if (hasReactionSubstructMatch(*rxnm, *rxn2m, (!getIgnoreReactionAgents()))) {
1875     return 0;
1876   }
1877   return -1;
1878 }
1879 
makeReactionDifferenceSFP(CChemicalReaction data,int size,int fpType)1880 extern "C" CSfp makeReactionDifferenceSFP(CChemicalReaction data, int size,
1881                                           int fpType) {
1882   auto *rxn = (ChemicalReaction *)data;
1883   SparseFP *res = nullptr;
1884 
1885   try {
1886     if (fpType > 3 || fpType < 1) {
1887       elog(ERROR, "makeReactionDifferenceSFP: Unknown Fingerprint type");
1888     }
1889     auto fp = static_cast<RDKit::FingerprintType>(fpType);
1890     RDKit::ReactionFingerprintParams params;
1891     params.fpType = static_cast<FingerprintType>(fpType);
1892     params.fpSize = size;
1893     params.includeAgents = (!getIgnoreReactionAgents());
1894     params.agentWeight = getReactionDifferenceFPWeightAgents();
1895     params.nonAgentWeight = getReactionDifferenceFPWeightNonagents();
1896     res = (SparseFP *)RDKit::DifferenceFingerprintChemReaction(*rxn, params);
1897   } catch (...) {
1898     elog(ERROR, "makeReactionDifferenceSFP: Unknown exception");
1899   }
1900   return (CSfp)res;
1901 }
1902 
makeReactionBFP(CChemicalReaction data,int size,int fpType)1903 extern "C" CBfp makeReactionBFP(CChemicalReaction data, int size, int fpType) {
1904   auto *rxn = (ChemicalReaction *)data;
1905   ExplicitBitVect *res = nullptr;
1906 
1907   try {
1908     if (fpType > 5 || fpType < 1) {
1909       elog(ERROR, "makeReactionBFP: Unknown Fingerprint type");
1910     }
1911     auto fp = static_cast<RDKit::FingerprintType>(fpType);
1912     RDKit::ReactionFingerprintParams params;
1913     params.fpType = static_cast<FingerprintType>(fpType);
1914     params.fpSize = size;
1915     params.includeAgents = (!getIgnoreReactionAgents());
1916     params.bitRatioAgents = getReactionStructuralFPAgentBitRatio();
1917     res = (ExplicitBitVect *)RDKit::StructuralFingerprintChemReaction(*rxn,
1918                                                                       params);
1919   } catch (...) {
1920     elog(ERROR, "makeReactionBFP: Unknown exception");
1921   }
1922 
1923   if (res) {
1924     std::string *sres = new std::string(BitVectToBinaryText(*res));
1925     delete res;
1926     return (CBfp)sres;
1927   } else {
1928     return nullptr;
1929   }
1930 }
1931 
computeNMMolHash(CROMol data,const char * which)1932 extern "C" char *computeNMMolHash(CROMol data, const char *which) {
1933   RWMol mol(*(ROMol *)data);
1934 
1935   RDKit::MolHash::HashFunction func =
1936       RDKit::MolHash::HashFunction::AnonymousGraph;
1937   if (!strcmp(which, "AnonymousGraph")) {
1938     func = RDKit::MolHash::HashFunction::AnonymousGraph;
1939   } else if (!strcmp(which, "ElementGraph")) {
1940     func = RDKit::MolHash::HashFunction::ElementGraph;
1941   } else if (!strcmp(which, "CanonicalSmiles")) {
1942     func = RDKit::MolHash::HashFunction::CanonicalSmiles;
1943   } else if (!strcmp(which, "MurckoScaffold")) {
1944     func = RDKit::MolHash::HashFunction::MurckoScaffold;
1945   } else if (!strcmp(which, "ExtendedMurcko")) {
1946     func = RDKit::MolHash::HashFunction::ExtendedMurcko;
1947   } else if (!strcmp(which, "MolFormula")) {
1948     func = RDKit::MolHash::HashFunction::MolFormula;
1949   } else if (!strcmp(which, "AtomBondCounts")) {
1950     func = RDKit::MolHash::HashFunction::AtomBondCounts;
1951   } else if (!strcmp(which, "DegreeVector")) {
1952     func = RDKit::MolHash::HashFunction::DegreeVector;
1953   } else if (!strcmp(which, "Mesomer")) {
1954     func = RDKit::MolHash::HashFunction::Mesomer;
1955   } else if (!strcmp(which, "HetAtomTautomer")) {
1956     func = RDKit::MolHash::HashFunction::HetAtomTautomer;
1957   } else if (!strcmp(which, "HetAtomProtomer")) {
1958     func = RDKit::MolHash::HashFunction::HetAtomProtomer;
1959   } else if (!strcmp(which, "RedoxPair")) {
1960     func = RDKit::MolHash::HashFunction::RedoxPair;
1961   } else if (!strcmp(which, "Regioisomer")) {
1962     func = RDKit::MolHash::HashFunction::Regioisomer;
1963   } else if (!strcmp(which, "NetCharge")) {
1964     func = RDKit::MolHash::HashFunction::NetCharge;
1965   } else if (!strcmp(which, "SmallWorldIndexBR")) {
1966     func = RDKit::MolHash::HashFunction::SmallWorldIndexBR;
1967   } else if (!strcmp(which, "SmallWorldIndexBRL")) {
1968     func = RDKit::MolHash::HashFunction::SmallWorldIndexBRL;
1969   } else if (!strcmp(which, "ArthorSubstructureOrder")) {
1970     func = RDKit::MolHash::HashFunction::ArthorSubstructureOrder;
1971   } else {
1972     ereport(
1973         WARNING,
1974         (errcode(ERRCODE_WARNING),
1975          errmsg(
1976              "computeNMMolHash: hash %s not recognized, using AnonymousGraph",
1977              which)));
1978   }
1979 
1980   string text;
1981   try {
1982     text = RDKit::MolHash::MolHash(&mol, func);
1983   } catch (...) {
1984     ereport(WARNING,
1985             (errcode(ERRCODE_WARNING), errmsg("computeMolHash: failed")));
1986   }
1987   return strdup(text.c_str());
1988 }
1989 
findMCSsmiles(char * smiles,char * params)1990 extern "C" char *findMCSsmiles(char *smiles, char *params) {
1991   static string mcs;
1992   mcs.clear();
1993 
1994   char *str = smiles;
1995   char *s = str;
1996   int len, nmols = 0;
1997   std::vector<RDKit::ROMOL_SPTR> molecules;
1998   while (*s && *s <= ' ') {
1999     s++;
2000   }
2001   while (*s > ' ') {
2002     len = 0;
2003     while (s[len] > ' ') {
2004       len++;
2005     }
2006     s[len] = '\0';
2007     if (0 == strlen(s)) {
2008       continue;
2009     }
2010     ROMol *molptr = nullptr;
2011     try {
2012       molptr = RDKit::SmilesToMol(s);
2013     } catch (...) {
2014       molptr = nullptr;
2015     }
2016     if (molptr == nullptr) {
2017       ereport(
2018           ERROR,
2019           (errcode(ERRCODE_DATA_EXCEPTION),
2020            errmsg("findMCS: could not create molecule from SMILES '%s'", s)));
2021       return strdup("");
2022     }
2023     molecules.push_back(RDKit::ROMOL_SPTR(molptr));
2024     // elog(WARNING, s);
2025     s += len;
2026     s++;  // do s++; while(*s && *s <= ' ');
2027   }
2028 
2029   RDKit::MCSParameters p;
2030 
2031   if (params && 0 != strlen(params)) {
2032     try {
2033       RDKit::parseMCSParametersJSON(params, &p);
2034     } catch (...) {
2035       ereport(WARNING, (errcode(ERRCODE_WARNING),
2036                         errmsg("findMCS: Invalid argument \'params\'")));
2037       return strdup("");
2038     }
2039   }
2040 
2041   try {
2042     MCSResult res = RDKit::findMCS(molecules, &p);
2043     mcs = res.SmartsString;
2044     if (!res.isCompleted())
2045       ereport(WARNING, (errcode(ERRCODE_WARNING),
2046                         errmsg("findMCS timed out, result is not maximal")));
2047   } catch (...) {
2048     ereport(WARNING, (errcode(ERRCODE_WARNING), errmsg("findMCS: failed")));
2049     mcs.clear();
2050   }
2051   return mcs.empty() ? strdup("") : strdup(mcs.c_str());
2052 }
2053 
addMol2list(void * lst,Mol * mol)2054 extern "C" void *addMol2list(void *lst, Mol *mol) {
2055   try {
2056     if (!lst) {
2057       // elog(WARNING, "addMol2list: allocate new list");
2058       lst = new std::vector<RDKit::ROMOL_SPTR>;
2059     }
2060     std::vector<RDKit::ROMOL_SPTR> &mlst =
2061         *(std::vector<RDKit::ROMOL_SPTR> *)lst;
2062     // elog(WARNING, "addMol2list: create a copy of new mol");
2063     auto *m = (ROMol *)constructROMol(
2064         mol);  // new ROMol(*(const ROMol*)mol, false); // create a copy
2065     // elog(WARNING, "addMol2list: append new mol into list");
2066     mlst.push_back(RDKit::ROMOL_SPTR(m));
2067     // elog(WARNING, "addMol2list: finished");
2068   } catch (...) {
2069     // elog(WARNING, "addMol2list: ERROR");
2070     ereport(WARNING, (errcode(ERRCODE_WARNING), errmsg("addMol2list: failed")));
2071   }
2072   return lst;
2073 }
2074 
findMCS(void * vmols,char * params)2075 extern "C" char *findMCS(void *vmols, char *params) {
2076   static string mcs;
2077   mcs.clear();
2078   std::vector<RDKit::ROMOL_SPTR> *molecules =
2079       (std::vector<RDKit::ROMOL_SPTR> *)vmols;
2080   // char t[256];
2081   // sprintf(t,"findMCS(): lst=%p, size=%u", molecules, molecules->size());
2082   // elog(WARNING, t);
2083 
2084   RDKit::MCSParameters p;
2085 
2086   if (params && 0 != strlen(params)) {
2087     try {
2088       RDKit::parseMCSParametersJSON(params, &p);
2089     } catch (...) {
2090       // mcs = params; //DEBUG
2091       ereport(WARNING, (errcode(ERRCODE_WARNING),
2092                         errmsg("findMCS: Invalid argument \'params\'")));
2093       return strdup(mcs.c_str());
2094     }
2095   }
2096 
2097   try {
2098     MCSResult res = RDKit::findMCS(*molecules, &p);
2099     if (!res.isCompleted())
2100       ereport(WARNING, (errcode(ERRCODE_WARNING),
2101                         errmsg("findMCS timed out, result is not maximal")));
2102     mcs = res.SmartsString;
2103   } catch (...) {
2104     ereport(WARNING, (errcode(ERRCODE_WARNING), errmsg("findMCS: failed")));
2105     mcs.clear();
2106   }
2107   // sprintf(t,"findMCS(): MCS='%s'", mcs.c_str());
2108   // elog(WARNING, t);
2109   delete molecules;
2110   // elog(WARNING, "findMCS(): molecules deleted. FINISHED.");
2111   return strdup(mcs.c_str());
2112 }
2113