1 //
2 // Copyright (c) 2010-2013, Novartis Institutes for BioMedical Research Inc.
3 // All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following
13 // disclaimer in the documentation and/or other materials provided
14 // with the distribution.
15 // * Neither the name of Novartis Institutes for BioMedical Research Inc.
16 // nor the names of its contributors may be used to endorse or promote
17 // products derived from this software without specific prior written
18 // permission.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //
32 #include <GraphMol/RDKitBase.h>
33 #include <GraphMol/MolPickler.h>
34 #include <GraphMol/ChemReactions/ReactionPickler.h>
35 #include <GraphMol/ChemReactions/ReactionParser.h>
36 #include <GraphMol/ChemReactions/Reaction.h>
37 #include <GraphMol/SmilesParse/SmilesParse.h>
38 #include <GraphMol/SmilesParse/SmartsWrite.h>
39 #include <GraphMol/SmilesParse/SmilesWrite.h>
40 #include <GraphMol/Fingerprints/Fingerprints.h>
41 #include <GraphMol/FileParsers/FileParsers.h>
42 #include <GraphMol/Depictor/RDDepictor.h>
43 #include <GraphMol/Fingerprints/AtomPairs.h>
44 #include <GraphMol/Fingerprints/MorganFingerprints.h>
45 #include <GraphMol/Fingerprints/MACCS.h>
46 #include <GraphMol/Substruct/SubstructMatch.h>
47 #include <GraphMol/Descriptors/MolDescriptors.h>
48 #include <GraphMol/ChemTransforms/ChemTransforms.h>
49 #include <GraphMol/MolHash/MolHash.h>
50 #include <GraphMol/FMCS/FMCS.h>
51 #include <DataStructs/BitOps.h>
52 #include <DataStructs/SparseIntVect.h>
53 #include <GraphMol/MolDraw2D/MolDraw2D.h>
54 #include <GraphMol/MolDraw2D/MolDraw2DSVG.h>
55 #include <GraphMol/MolDraw2D/MolDraw2DUtils.h>
56
57 #include <RDGeneral/BoostStartInclude.h>
58 #include <boost/integer_traits.hpp>
59 #include <boost/property_tree/ptree.hpp>
60 #include <boost/property_tree/json_parser.hpp>
61 #include <boost/algorithm/string.hpp>
62 #include <boost/tokenizer.hpp>
63 #include <RDGeneral/BoostEndInclude.h>
64
65 #ifdef RDK_BUILD_INCHI_SUPPORT
66 #include <INCHI-API/inchi.h>
67 #endif
68 #ifdef RDK_BUILD_AVALON_SUPPORT
69 #include <AvalonTools/AvalonTools.h>
70 #endif
71 #include <GraphMol/ChemReactions/ReactionFingerprints.h>
72 #include <GraphMol/ChemReactions/ReactionUtils.h>
73
74 #include "rdkit.h"
75 #include "guc.h"
76 #include "bitstring.h"
77
78 using namespace std;
79 using namespace RDKit;
80
81 class ByteA : public std::string {
82 public:
ByteA()83 ByteA() : string(){};
ByteA(bytea * b)84 ByteA(bytea *b) : string(VARDATA(b), VARSIZE(b) - VARHDRSZ){};
ByteA(string & s)85 ByteA(string &s) : string(s){};
86
87 /*
88 * Convert string to bytea. Convertaion is in pgsql's memory
89 */
toByteA()90 bytea *toByteA() {
91 bytea *res;
92 int len;
93
94 len = this->size();
95 res = (bytea *)palloc(VARHDRSZ + len);
96 memcpy(VARDATA(res), this->data(), len);
97 SET_VARSIZE(res, VARHDRSZ + len);
98
99 return res;
100 };
101
102 /* Just the copy of string's method */
operator =(const string & __str)103 ByteA &operator=(const string &__str) {
104 return (ByteA &)this->assign(__str);
105 };
106 };
107
108 /*
109 * Constant io
110 */
111 static string StringData;
112
113 /*
114 * Real sparse vector
115 */
116
117 typedef SparseIntVect<std::uint32_t> SparseFP;
118
119 /*******************************************
120 * ROMol transformation *
121 *******************************************/
122
freeCROMol(CROMol data)123 extern "C" void freeCROMol(CROMol data) {
124 auto *mol = (ROMol *)data;
125 delete mol;
126 }
127
constructROMol(Mol * data)128 extern "C" CROMol constructROMol(Mol *data) {
129 auto *mol = new ROMol();
130
131 try {
132 ByteA b(data);
133 MolPickler::molFromPickle(b, mol);
134 } catch (MolPicklerException &e) {
135 elog(ERROR, "molFromPickle: %s", e.what());
136 } catch (...) {
137 elog(ERROR, "constructROMol: Unknown exception");
138 }
139
140 return (CROMol)mol;
141 }
142
deconstructROMol(CROMol data)143 extern "C" Mol *deconstructROMol(CROMol data) {
144 auto *mol = (ROMol *)data;
145 ByteA b;
146
147 try {
148 MolPickler::pickleMol(mol, b);
149 } catch (MolPicklerException &e) {
150 elog(ERROR, "pickleMol: %s", e.what());
151 } catch (...) {
152 elog(ERROR, "deconstructROMol: Unknown exception");
153 }
154
155 return (Mol *)b.toByteA();
156 }
157
parseMolText(char * data,bool asSmarts,bool warnOnFail,bool asQuery)158 extern "C" CROMol parseMolText(char *data, bool asSmarts, bool warnOnFail,
159 bool asQuery) {
160 RWMol *mol = nullptr;
161
162 try {
163 if (!asSmarts) {
164 if (!asQuery) {
165 mol = SmilesToMol(data);
166 } else {
167 mol = SmilesToMol(data, 0, false);
168 if (mol != nullptr) {
169 MolOps::sanitizeMol(*mol);
170 MolOps::mergeQueryHs(*mol);
171 }
172 }
173 } else {
174 mol = SmartsToMol(data, 0, false);
175 }
176 } catch (...) {
177 mol = nullptr;
178 }
179 if (mol == nullptr) {
180 if (warnOnFail) {
181 ereport(WARNING,
182 (errcode(ERRCODE_WARNING),
183 errmsg("could not create molecule from SMILES '%s'", data)));
184 } else {
185 ereport(ERROR,
186 (errcode(ERRCODE_DATA_EXCEPTION),
187 errmsg("could not create molecule from SMILES '%s'", data)));
188 }
189 }
190
191 return (CROMol)mol;
192 }
193
parseMolBlob(char * data,int len)194 extern "C" CROMol parseMolBlob(char *data, int len) {
195 ROMol *mol = nullptr;
196
197 try {
198 string binStr(data, len);
199 mol = new ROMol(binStr);
200 } catch (...) {
201 ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION),
202 errmsg("problem generating molecule from blob data")));
203 }
204 if (mol == nullptr) {
205 ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION),
206 errmsg("blob data could not be parsed")));
207 }
208
209 return (CROMol)mol;
210 }
211
parseMolCTAB(char * data,bool keepConformer,bool warnOnFail,bool asQuery)212 extern "C" CROMol parseMolCTAB(char *data, bool keepConformer, bool warnOnFail,
213 bool asQuery) {
214 RWMol *mol = nullptr;
215
216 try {
217 if (!asQuery) {
218 mol = MolBlockToMol(data);
219 } else {
220 mol = MolBlockToMol(data, true, false);
221 if (mol != nullptr) {
222 MolOps::mergeQueryHs(*mol);
223 }
224 }
225 } catch (...) {
226 mol = nullptr;
227 }
228 if (mol == nullptr) {
229 if (warnOnFail) {
230 ereport(WARNING,
231 (errcode(ERRCODE_WARNING),
232 errmsg("could not create molecule from CTAB '%s'", data)));
233
234 } else {
235 ereport(ERROR,
236 (errcode(ERRCODE_DATA_EXCEPTION),
237 errmsg("could not create molecule from CTAB '%s'", data)));
238 }
239 } else {
240 if (!keepConformer) {
241 mol->clearConformers();
242 }
243 }
244
245 return (CROMol)mol;
246 }
247
isValidSmiles(char * data)248 extern "C" bool isValidSmiles(char *data) {
249 RWMol *mol = nullptr;
250 bool res;
251 try {
252 string str(data);
253 if (str.empty()) {
254 // Pass the test - No-Structure input is allowed. No cleanup necessary.
255 return true;
256 }
257 mol = SmilesToMol(str, 0, 0);
258 if (mol) {
259 MolOps::cleanUp(*mol);
260 mol->updatePropertyCache();
261 MolOps::Kekulize(*mol);
262 MolOps::assignRadicals(*mol);
263 MolOps::setAromaticity(*mol);
264 MolOps::adjustHs(*mol);
265 }
266 } catch (...) {
267 mol = nullptr;
268 }
269 if (mol == nullptr) {
270 res = false;
271 } else {
272 res = true;
273 delete mol;
274 }
275 return res;
276 }
277
isValidSmarts(char * data)278 extern "C" bool isValidSmarts(char *data) {
279 ROMol *mol = nullptr;
280 bool res;
281 try {
282 mol = SmartsToMol(data);
283 } catch (...) {
284 mol = nullptr;
285 }
286 if (mol == nullptr) {
287 res = false;
288 } else {
289 res = true;
290 delete mol;
291 }
292 return res;
293 }
294
isValidCTAB(char * data)295 extern "C" bool isValidCTAB(char *data) {
296 RWMol *mol = nullptr;
297 bool res;
298 try {
299 mol = MolBlockToMol(data, false, false);
300 if (mol) {
301 MolOps::cleanUp(*mol);
302 mol->updatePropertyCache();
303 MolOps::Kekulize(*mol);
304 MolOps::assignRadicals(*mol);
305 MolOps::setAromaticity(*mol);
306 MolOps::adjustHs(*mol);
307 }
308 } catch (...) {
309 mol = nullptr;
310 }
311 if (mol == nullptr) {
312 res = false;
313 } else {
314 res = true;
315 delete mol;
316 }
317 return res;
318 }
319
isValidMolBlob(char * data,int len)320 extern "C" bool isValidMolBlob(char *data, int len) {
321 ROMol *mol = nullptr;
322 bool res = false;
323 try {
324 string binStr(data, len);
325 mol = new ROMol(binStr);
326 } catch (...) {
327 mol = nullptr;
328 }
329 if (mol == nullptr) {
330 res = false;
331 } else {
332 delete mol;
333 res = true;
334 }
335 return res;
336 }
337
makeMolText(CROMol data,int * len,bool asSmarts,bool cxSmiles)338 extern "C" char *makeMolText(CROMol data, int *len, bool asSmarts,
339 bool cxSmiles) {
340 auto *mol = (ROMol *)data;
341
342 try {
343 if (!asSmarts) {
344 if (!cxSmiles) {
345 StringData = MolToSmiles(*mol);
346 } else {
347 StringData = MolToCXSmiles(*mol);
348 }
349 } else {
350 StringData = MolToSmarts(*mol, false);
351 }
352 } catch (...) {
353 ereport(
354 WARNING,
355 (errcode(ERRCODE_WARNING),
356 errmsg("makeMolText: problems converting molecule to SMILES/SMARTS")));
357 StringData = "";
358 }
359
360 *len = StringData.size();
361 return (char *)StringData.c_str();
362 }
363
makeCtabText(CROMol data,int * len,bool createDepictionIfMissing)364 extern "C" char *makeCtabText(CROMol data, int *len,
365 bool createDepictionIfMissing) {
366 auto *mol = (ROMol *)data;
367
368 try {
369 if (createDepictionIfMissing && mol->getNumConformers() == 0) {
370 RDDepict::compute2DCoords(*mol);
371 }
372 StringData = MolToMolBlock(*mol);
373 } catch (...) {
374 ereport(WARNING,
375 (errcode(ERRCODE_WARNING),
376 errmsg("makeCtabText: problems converting molecule to CTAB")));
377 StringData = "";
378 }
379
380 *len = StringData.size();
381 return (char *)StringData.c_str();
382 }
383
makeMolBlob(CROMol data,int * len)384 extern "C" char *makeMolBlob(CROMol data, int *len) {
385 auto *mol = (ROMol *)data;
386 StringData.clear();
387 try {
388 MolPickler::pickleMol(*mol, StringData);
389 } catch (...) {
390 elog(ERROR, "makeMolBlob: Unknown exception");
391 }
392
393 *len = StringData.size();
394 return (char *)StringData.data();
395 }
396
makeMolSignature(CROMol data)397 extern "C" bytea *makeMolSignature(CROMol data) {
398 auto *mol = (ROMol *)data;
399 ExplicitBitVect *res = nullptr;
400 bytea *ret = nullptr;
401
402 try {
403 res = RDKit::PatternFingerprintMol(*mol, getSubstructFpSize());
404 // res =
405 // RDKit::LayeredFingerprintMol(*mol,RDKit::substructLayers,1,5,SSS_FP_SIZE);
406
407 if (res) {
408 std::string sres = BitVectToBinaryText(*res);
409
410 unsigned int varsize = VARHDRSZ + sres.size();
411 ret = (bytea *)palloc0(varsize);
412 memcpy(VARDATA(ret), sres.data(), sres.size());
413 SET_VARSIZE(ret, varsize);
414
415 delete res;
416 res = nullptr;
417 }
418 } catch (...) {
419 elog(ERROR, "makeMolSignature: Unknown exception");
420 if (res) {
421 delete res;
422 }
423 }
424
425 return ret;
426 }
427
molcmp(CROMol i,CROMol a)428 extern "C" int molcmp(CROMol i, CROMol a) {
429 auto *im = (ROMol *)i;
430 auto *am = (ROMol *)a;
431
432 if (!im) {
433 if (!am) {
434 return 0;
435 }
436 return -1;
437 }
438 if (!am) {
439 return 1;
440 }
441
442 int res = im->getNumAtoms() - am->getNumAtoms();
443 if (res) {
444 return res;
445 }
446
447 res = im->getNumBonds() - am->getNumBonds();
448 if (res) {
449 return res;
450 }
451
452 res = int(RDKit::Descriptors::calcAMW(*im, false)) -
453 int(RDKit::Descriptors::calcAMW(*am, false));
454 if (res) {
455 return res;
456 }
457
458 res = im->getRingInfo()->numRings() - am->getRingInfo()->numRings();
459 if (res) {
460 return res;
461 }
462
463 RDKit::MatchVectType matchVect;
464 bool recursionPossible = false;
465 bool doChiralMatch = getDoChiralSSS();
466 bool ss1 = RDKit::SubstructMatch(*im, *am, matchVect, recursionPossible,
467 doChiralMatch);
468 bool ss2 = RDKit::SubstructMatch(*am, *im, matchVect, recursionPossible,
469 doChiralMatch);
470 if (ss1 && !ss2) {
471 return 1;
472 } else if (!ss1 && ss2) {
473 return -1;
474 }
475
476 // the above can still fail in some chirality cases
477 std::string smi1 = MolToSmiles(*im, doChiralMatch);
478 std::string smi2 = MolToSmiles(*am, doChiralMatch);
479 return smi1 == smi2 ? 0 : (smi1 < smi2 ? -1 : 1);
480 }
481
MolSubstruct(CROMol i,CROMol a)482 extern "C" int MolSubstruct(CROMol i, CROMol a) {
483 auto *im = (ROMol *)i;
484 auto *am = (ROMol *)a;
485 RDKit::MatchVectType matchVect;
486
487 return RDKit::SubstructMatch(*im, *am, matchVect, true, getDoChiralSSS());
488 }
489
MolSubstructCount(CROMol i,CROMol a,bool uniquify)490 extern "C" int MolSubstructCount(CROMol i, CROMol a, bool uniquify) {
491 auto *im = (ROMol *)i;
492 auto *am = (ROMol *)a;
493 std::vector<RDKit::MatchVectType> matchVect;
494
495 return static_cast<int>(RDKit::SubstructMatch(*im, *am, matchVect, uniquify,
496 true, getDoChiralSSS()));
497 }
498
499 /*******************************************
500 * Molecule operations *
501 *******************************************/
502 #define MOLDESCR(name, func, ret) \
503 extern "C" ret Mol##name(CROMol i) { \
504 const ROMol *im = (ROMol *)i; \
505 return func(*im); \
506 }
MOLDESCR(FractionCSP3,RDKit::Descriptors::calcFractionCSP3,double)507 MOLDESCR(FractionCSP3, RDKit::Descriptors::calcFractionCSP3, double)
508 MOLDESCR(TPSA, RDKit::Descriptors::calcTPSA, double)
509 MOLDESCR(AMW, RDKit::Descriptors::calcAMW, double)
510 MOLDESCR(HBA, RDKit::Descriptors::calcLipinskiHBA, int)
511 MOLDESCR(HBD, RDKit::Descriptors::calcLipinskiHBD, int)
512 MOLDESCR(NumHeteroatoms, RDKit::Descriptors::calcNumHeteroatoms, int)
513 MOLDESCR(NumRings, RDKit::Descriptors::calcNumRings, int)
514 MOLDESCR(NumAromaticRings, RDKit::Descriptors::calcNumAromaticRings, int)
515 MOLDESCR(NumAliphaticRings, RDKit::Descriptors::calcNumAliphaticRings, int)
516 MOLDESCR(NumSaturatedRings, RDKit::Descriptors::calcNumSaturatedRings, int)
517 MOLDESCR(NumAromaticHeterocycles,
518 RDKit::Descriptors::calcNumAromaticHeterocycles, int)
519 MOLDESCR(NumAliphaticHeterocycles,
520 RDKit::Descriptors::calcNumAliphaticHeterocycles, int)
521 MOLDESCR(NumSaturatedHeterocycles,
522 RDKit::Descriptors::calcNumSaturatedHeterocycles, int)
523 MOLDESCR(NumAromaticCarbocycles, RDKit::Descriptors::calcNumAromaticCarbocycles,
524 int)
525 MOLDESCR(NumAliphaticCarbocycles,
526 RDKit::Descriptors::calcNumAliphaticCarbocycles, int)
527 MOLDESCR(NumSaturatedCarbocycles,
528 RDKit::Descriptors::calcNumSaturatedCarbocycles, int)
529 MOLDESCR(NumHeterocycles, RDKit::Descriptors::calcNumHeterocycles, int)
530
531 MOLDESCR(NumRotatableBonds, RDKit::Descriptors::calcNumRotatableBonds, int)
532 MOLDESCR(Chi0v, RDKit::Descriptors::calcChi0v, double)
533 MOLDESCR(Chi1v, RDKit::Descriptors::calcChi1v, double)
534 MOLDESCR(Chi2v, RDKit::Descriptors::calcChi2v, double)
535 MOLDESCR(Chi3v, RDKit::Descriptors::calcChi3v, double)
536 MOLDESCR(Chi4v, RDKit::Descriptors::calcChi4v, double)
537 MOLDESCR(Chi0n, RDKit::Descriptors::calcChi0n, double)
538 MOLDESCR(Chi1n, RDKit::Descriptors::calcChi1n, double)
539 MOLDESCR(Chi2n, RDKit::Descriptors::calcChi2n, double)
540 MOLDESCR(Chi3n, RDKit::Descriptors::calcChi3n, double)
541 MOLDESCR(Chi4n, RDKit::Descriptors::calcChi4n, double)
542 MOLDESCR(Kappa1, RDKit::Descriptors::calcKappa1, double)
543 MOLDESCR(Kappa2, RDKit::Descriptors::calcKappa2, double)
544 MOLDESCR(Kappa3, RDKit::Descriptors::calcKappa3, double)
545 MOLDESCR(NumSpiroAtoms, RDKit::Descriptors::calcNumSpiroAtoms, int)
546 MOLDESCR(NumBridgeheadAtoms, RDKit::Descriptors::calcNumBridgeheadAtoms, int)
547
548 extern "C" double MolLogP(CROMol i) {
549 double logp, mr;
550 RDKit::Descriptors::calcCrippenDescriptors(*(ROMol *)i, logp, mr);
551 return logp;
552 }
MolNumAtoms(CROMol i)553 extern "C" int MolNumAtoms(CROMol i) {
554 const ROMol *im = (ROMol *)i;
555 return im->getNumAtoms(false);
556 }
MolNumHeavyAtoms(CROMol i)557 extern "C" int MolNumHeavyAtoms(CROMol i) {
558 const ROMol *im = (ROMol *)i;
559 return im->getNumHeavyAtoms();
560 }
561
makeMolFormulaText(CROMol data,int * len,bool separateIsotopes,bool abbreviateHIsotopes)562 extern "C" char *makeMolFormulaText(CROMol data, int *len,
563 bool separateIsotopes,
564 bool abbreviateHIsotopes) {
565 auto *mol = (ROMol *)data;
566
567 try {
568 StringData = RDKit::Descriptors::calcMolFormula(*mol, separateIsotopes,
569 abbreviateHIsotopes);
570 } catch (...) {
571 ereport(WARNING,
572 (errcode(ERRCODE_WARNING),
573 errmsg("makeMolFormulaText: problems converting molecule to "
574 "sum formula")));
575 StringData = "";
576 }
577
578 *len = StringData.size();
579 return (char *)StringData.c_str();
580 }
581
MolInchi(CROMol i,const char * opts)582 extern "C" const char *MolInchi(CROMol i, const char *opts) {
583 std::string inchi = "InChI not available";
584 #ifdef RDK_BUILD_INCHI_SUPPORT
585 const ROMol *im = (ROMol *)i;
586 ExtraInchiReturnValues rv;
587 try {
588 std::string sopts = "/AuxNone /WarnOnEmptyStructure";
589 if (strlen(opts)) {
590 sopts += std::string(" ") + std::string(opts);
591 }
592 inchi = MolToInchi(*im, rv, sopts.c_str());
593 } catch (MolSanitizeException &e) {
594 inchi = "";
595 elog(ERROR, "MolInchi: cannot kekulize molecule");
596 } catch (...) {
597 inchi = "";
598 elog(ERROR, "MolInchi: Unknown exception");
599 }
600 #endif
601 return strdup(inchi.c_str());
602 }
MolInchiKey(CROMol i,const char * opts)603 extern "C" const char *MolInchiKey(CROMol i, const char *opts) {
604 std::string key = "InChI not available";
605 #ifdef RDK_BUILD_INCHI_SUPPORT
606 const ROMol *im = (ROMol *)i;
607 ExtraInchiReturnValues rv;
608 try {
609 std::string sopts = "/AuxNone /WarnOnEmptyStructure";
610 if (strlen(opts)) {
611 sopts += std::string(" ") + std::string(opts);
612 }
613 std::string inchi = MolToInchi(*im, rv, sopts.c_str());
614 key = InchiToInchiKey(inchi);
615 } catch (MolSanitizeException &e) {
616 key = "";
617 elog(ERROR, "MolInchiKey: cannot kekulize molecule");
618 } catch (...) {
619 key = "";
620 elog(ERROR, "MolInchiKey: Unknown exception");
621 }
622 #endif
623 return strdup(key.c_str());
624 }
625
MolMurckoScaffold(CROMol i)626 extern "C" CROMol MolMurckoScaffold(CROMol i) {
627 const ROMol *im = (ROMol *)i;
628 ROMol *mol = MurckoDecompose(*im);
629 if (mol && !mol->getNumAtoms()) {
630 delete mol;
631 mol = nullptr;
632 } else {
633 try {
634 MolOps::sanitizeMol(*(RWMol *)mol);
635 } catch (...) {
636 delete mol;
637 mol = nullptr;
638 }
639 }
640 return (CROMol)mol;
641 }
642
MolAdjustQueryProperties(CROMol i,const char * params)643 extern "C" CROMol MolAdjustQueryProperties(CROMol i, const char *params) {
644 const ROMol *im = (ROMol *)i;
645
646 MolOps::AdjustQueryParameters p;
647
648 if (params && strlen(params)) {
649 std::string pstring(params);
650 try {
651 MolOps::parseAdjustQueryParametersFromJSON(p, pstring);
652 } catch (const ValueErrorException &e) {
653 elog(ERROR, e.what());
654 } catch (...) {
655 elog(WARNING,
656 "adjustQueryProperties: Invalid argument \'params\' ignored");
657 }
658 }
659 ROMol *mol = MolOps::adjustQueryProperties(*im, &p);
660 return (CROMol)mol;
661 }
662
MolGetSVG(CROMol i,unsigned int w,unsigned int h,const char * legend,const char * params)663 extern "C" char *MolGetSVG(CROMol i, unsigned int w, unsigned int h,
664 const char *legend, const char *params) {
665 // SVG routines need an RWMol since they change the
666 // molecule as they prepare it for drawing. We don't
667 // want a plain SQL function (mol_to_svg) to have
668 // unexpected side effects, so take a copy and render
669 // (and change) that.
670 RWMol input_copy(*(ROMol *)i);
671
672 MolDraw2DUtils::prepareMolForDrawing(input_copy);
673 std::string slegend(legend ? legend : "");
674 MolDraw2DSVG drawer(w, h);
675 if (params && strlen(params)) {
676 try {
677 MolDraw2DUtils::updateDrawerParamsFromJSON(drawer, params);
678 } catch (...) {
679 elog(WARNING,
680 "adjustQueryProperties: Invalid argument \'params\' ignored");
681 }
682 }
683 drawer.drawMolecule(input_copy, legend);
684 drawer.finishDrawing();
685 std::string txt = drawer.getDrawingText();
686 return strdup(txt.c_str());
687 }
688
ReactionGetSVG(CChemicalReaction i,unsigned int w,unsigned int h,bool highlightByReactant,const char * params)689 extern "C" char *ReactionGetSVG(CChemicalReaction i, unsigned int w,
690 unsigned int h, bool highlightByReactant,
691 const char *params) {
692 auto *rxn = (ChemicalReaction *)i;
693
694 MolDraw2DSVG drawer(w, h);
695 if (params && strlen(params)) {
696 try {
697 MolDraw2DUtils::updateDrawerParamsFromJSON(drawer, params);
698 } catch (...) {
699 elog(WARNING,
700 "adjustQueryProperties: Invalid argument \'params\' ignored");
701 }
702 }
703 drawer.drawReaction(*rxn, highlightByReactant);
704 drawer.finishDrawing();
705 std::string txt = drawer.getDrawingText();
706 return strdup(txt.c_str());
707 }
708
709 /*******************************************
710 * CBfp transformation *
711 *******************************************/
712
freeCBfp(CBfp data)713 extern "C" void freeCBfp(CBfp data) {
714 auto *fp = (std::string *)data;
715 delete fp;
716 }
717
constructCBfp(Bfp * data)718 extern "C" CBfp constructCBfp(Bfp *data) {
719 std::string *ebv = nullptr;
720
721 try {
722 ebv = new std::string(VARDATA(data), VARSIZE(data) - VARHDRSZ);
723 } catch (...) {
724 elog(ERROR, "constructMolFingerPrint: Unknown exception");
725 }
726
727 return (CBfp)ebv;
728 }
729
deconstructCBfp(CBfp data)730 extern "C" Bfp *deconstructCBfp(CBfp data) {
731 auto *ebv = (std::string *)data;
732 ByteA b;
733
734 try {
735 b = *ebv;
736 } catch (...) {
737 elog(ERROR, "deconstructMolFingerPrint: Unknown exception");
738 }
739
740 return b.toByteA();
741 }
742
makeBfpSignature(CBfp data)743 extern "C" BfpSignature *makeBfpSignature(CBfp data) {
744 auto *ebv = (std::string *)data;
745 int siglen = ebv->size();
746
747 unsigned int varsize = sizeof(BfpSignature) + siglen;
748 BfpSignature *res = (BfpSignature *)palloc0(varsize);
749 SET_VARSIZE(res, varsize);
750
751 res->weight = bitstringWeight(siglen, (uint8 *)ebv->data());
752 memcpy(res->fp, ebv->data(), siglen);
753
754 return res;
755 }
756
CBfpSize(CBfp a)757 extern "C" int CBfpSize(CBfp a) {
758 auto *ebv = (std::string *)a;
759 int numBits = ebv->size() * 8;
760 return numBits;
761 }
762
calcBitmapTanimotoSml(CBfp a,CBfp b)763 extern "C" double calcBitmapTanimotoSml(CBfp a, CBfp b) {
764 auto *abv = (std::string *)a;
765 auto *bbv = (std::string *)b;
766 const auto *afp = (const unsigned char *)abv->c_str();
767 const auto *bfp = (const unsigned char *)bbv->c_str();
768 /* return CalcBitmapTanimoto(afp, bfp, abv->size()); */
769 return bitstringTanimotoSimilarity(abv->size(), (uint8 *)afp, (uint8 *)bfp);
770 }
771
calcBitmapDiceSml(CBfp a,CBfp b)772 extern "C" double calcBitmapDiceSml(CBfp a, CBfp b) {
773 auto *abv = (std::string *)a;
774 auto *bbv = (std::string *)b;
775 const auto *afp = (const unsigned char *)abv->c_str();
776 const auto *bfp = (const unsigned char *)bbv->c_str();
777 return CalcBitmapDice(afp, bfp, abv->size());
778 }
779
calcBitmapTverskySml(CBfp a,CBfp b,float ca,float cb)780 double calcBitmapTverskySml(CBfp a, CBfp b, float ca, float cb) {
781 auto *abv = (std::string *)a;
782 auto *bbv = (std::string *)b;
783 const auto *afp = (const unsigned char *)abv->c_str();
784 const auto *bfp = (const unsigned char *)bbv->c_str();
785 return CalcBitmapTversky(afp, bfp, abv->size(), ca, cb);
786 }
787
788 /*******************************************
789 * CSfp transformation *
790 *******************************************/
791
freeCSfp(CSfp data)792 extern "C" void freeCSfp(CSfp data) {
793 auto *fp = (SparseFP *)data;
794 delete fp;
795 }
796
constructCSfp(Sfp * data)797 extern "C" CSfp constructCSfp(Sfp *data) {
798 SparseFP *ebv = nullptr;
799
800 try {
801 ebv = new SparseFP(VARDATA(data), VARSIZE(data) - VARHDRSZ);
802 } catch (...) {
803 elog(ERROR, "constructMolFingerPrint: Unknown exception");
804 }
805
806 return (CSfp)ebv;
807 }
808
deconstructCSfp(CSfp data)809 extern "C" Sfp *deconstructCSfp(CSfp data) {
810 auto *ebv = (SparseFP *)data;
811 ByteA b;
812
813 try {
814 b = ebv->toString();
815 } catch (...) {
816 elog(ERROR, "deconstructMolFingerPrint: Unknown exception");
817 }
818
819 return b.toByteA();
820 }
821
makeSfpSignature(CSfp data,int numBits)822 extern "C" bytea *makeSfpSignature(CSfp data, int numBits) {
823 auto *v = (SparseFP *)data;
824 int n, numBytes;
825 bytea *res;
826 unsigned char *s;
827 SparseFP::StorageType::const_iterator iter;
828
829 numBytes = VARHDRSZ + (numBits / 8);
830 if ((numBits % 8) != 0) {
831 numBytes++;
832 }
833
834 res = (bytea *)palloc0(numBytes);
835 SET_VARSIZE(res, numBytes);
836 s = (unsigned char *)VARDATA(res);
837
838 for (iter = v->getNonzeroElements().begin();
839 iter != v->getNonzeroElements().end(); iter++) {
840 n = iter->first % numBits;
841 s[n / 8] |= 1 << (n % 8);
842 }
843
844 return res;
845 }
846
makeLowSparseFingerPrint(CSfp data,int numInts)847 extern "C" bytea *makeLowSparseFingerPrint(CSfp data, int numInts) {
848 auto *v = (SparseFP *)data;
849 int numBytes;
850 bytea *res;
851 IntRange *s;
852 int n;
853 SparseFP::StorageType::const_iterator iter;
854
855 numBytes = VARHDRSZ + (numInts * sizeof(IntRange));
856
857 res = (bytea *)palloc0(numBytes);
858 SET_VARSIZE(res, numBytes);
859 s = (IntRange *)VARDATA(res);
860
861 for (iter = v->getNonzeroElements().begin();
862 iter != v->getNonzeroElements().end(); iter++) {
863 uint32 iterV = (uint32)iter->second;
864 n = iter->first % numInts;
865
866 if (iterV > INTRANGEMAX) {
867 #if 0
868 elog(ERROR, "sparse fingerprint is too big, increase INTRANGEMAX in rdkit.h");
869 #else
870 iterV = INTRANGEMAX;
871 #endif
872 }
873
874 if (s[n].low == 0 || s[n].low > iterV) s[n].low = iterV;
875 if (s[n].high < iterV) s[n].high = iterV;
876 }
877
878 return res;
879 }
880
countOverlapValues(bytea * sign,CSfp data,int numBits,int * sum,int * overlapSum,int * overlapN)881 extern "C" void countOverlapValues(bytea *sign, CSfp data, int numBits,
882 int *sum, int *overlapSum, int *overlapN) {
883 auto *v = (SparseFP *)data;
884 SparseFP::StorageType::const_iterator iter;
885
886 *sum = *overlapSum = *overlapN = 0;
887
888 if (sign) {
889 unsigned char *s = (unsigned char *)VARDATA(sign);
890 int n;
891
892 for (iter = v->getNonzeroElements().begin();
893 iter != v->getNonzeroElements().end(); iter++) {
894 *sum += iter->second;
895 n = iter->first % numBits;
896 if (s[n / 8] & (1 << (n % 8))) {
897 *overlapSum += iter->second;
898 *overlapN += 1;
899 }
900 }
901 } else {
902 /* Assume, sign has only true bits */
903 for (iter = v->getNonzeroElements().begin();
904 iter != v->getNonzeroElements().end(); iter++)
905 *sum += iter->second;
906
907 *overlapSum = *sum;
908 *overlapN = v->getNonzeroElements().size();
909 }
910 }
911
countLowOverlapValues(bytea * sign,CSfp data,int numInts,int * querySum,int * keySum,int * overlapUp,int * overlapDown)912 extern "C" void countLowOverlapValues(bytea *sign, CSfp data, int numInts,
913 int *querySum, int *keySum,
914 int *overlapUp, int *overlapDown) {
915 auto *v = (SparseFP *)data;
916 SparseFP::StorageType::const_iterator iter;
917 IntRange *s = (IntRange *)VARDATA(sign);
918 int n;
919
920 *querySum = *keySum = *overlapUp = *overlapDown = 0;
921
922 for (iter = v->getNonzeroElements().begin();
923 iter != v->getNonzeroElements().end(); iter++) {
924 *querySum += iter->second;
925 n = iter->first % numInts;
926 if (s[n].low == 0) {
927 Assert(s[n].high == 0);
928 continue;
929 }
930
931 *overlapDown += Min(s[n].low, (uint32)iter->second);
932 *overlapUp += Min(s[n].high, (uint32)iter->second);
933 }
934
935 Assert(*overlapDown <= *overlapUp);
936
937 for (n = 0; n < numInts; n++) {
938 *keySum += s[n].low;
939 if (s[n].low != s[n].high)
940 *keySum += s[n].high; /* there is at least two key mapped into current
941 backet */
942 }
943
944 Assert(*overlapUp <= *keySum);
945 }
946
calcSparseTanimotoSml(CSfp a,CSfp b)947 extern "C" double calcSparseTanimotoSml(CSfp a, CSfp b) {
948 double res = -1.0;
949
950 /*
951 * Nsame / (Na + Nb - Nsame)
952 */
953
954 try {
955 res = TanimotoSimilarity(*(SparseFP *)a, *(SparseFP *)b);
956 } catch (ValueErrorException &e) {
957 elog(ERROR, "TanimotoSimilarity: %s", e.what());
958 } catch (...) {
959 elog(ERROR, "calcSparseTanimotoSml: Unknown exception");
960 }
961
962 return res;
963 }
964
calcSparseDiceSml(CSfp a,CSfp b)965 extern "C" double calcSparseDiceSml(CSfp a, CSfp b) {
966 double res = -1.0;
967
968 /*
969 * 2 * Nsame / (Na + Nb)
970 */
971
972 try {
973 res = DiceSimilarity(*(SparseFP *)a, *(SparseFP *)b);
974 } catch (ValueErrorException &e) {
975 elog(ERROR, "DiceSimilarity: %s", e.what());
976 } catch (...) {
977 elog(ERROR, "calcSparseDiceSml: Unknown exception");
978 }
979
980 return res;
981 }
982
calcSparseStringDiceSml(const char * a,unsigned int sza,const char * b,unsigned int szb)983 extern "C" double calcSparseStringDiceSml(const char *a, unsigned int sza,
984 const char *b, unsigned int szb) {
985 const auto *t1 = (const unsigned char *)a;
986 const auto *t2 = (const unsigned char *)b;
987
988 std::uint32_t tmp;
989 tmp = *(reinterpret_cast<const std::uint32_t *>(t1));
990 t1 += sizeof(std::uint32_t);
991 if (tmp != (std::uint32_t)ci_SPARSEINTVECT_VERSION) {
992 elog(ERROR, "calcSparseStringDiceSml: could not convert argument 1");
993 }
994 tmp = *(reinterpret_cast<const std::uint32_t *>(t2));
995 t2 += sizeof(std::uint32_t);
996 if (tmp != (std::uint32_t)ci_SPARSEINTVECT_VERSION) {
997 elog(ERROR, "calcSparseStringDiceSml: could not convert argument 2");
998 }
999
1000 // check the element size:
1001 tmp = *(reinterpret_cast<const std::uint32_t *>(t1));
1002 t1 += sizeof(std::uint32_t);
1003 if (tmp != sizeof(std::uint32_t)) {
1004 elog(ERROR,
1005 "calcSparseStringDiceSml: could not convert argument 1 -> uint32_t");
1006 }
1007 tmp = *(reinterpret_cast<const std::uint32_t *>(t2));
1008 t2 += sizeof(std::uint32_t);
1009 if (tmp != sizeof(std::uint32_t)) {
1010 elog(ERROR,
1011 "calcSparseStringDiceSml: could not convert argument 2 -> uint32_t");
1012 }
1013
1014 double res = 0.;
1015 // start reading:
1016 std::uint32_t len1, len2;
1017 len1 = *(reinterpret_cast<const std::uint32_t *>(t1));
1018 t1 += sizeof(std::uint32_t);
1019 len2 = *(reinterpret_cast<const std::uint32_t *>(t2));
1020 t2 += sizeof(std::uint32_t);
1021 if (len1 != len2) {
1022 elog(ERROR, "attempt to compare fingerprints of different length");
1023 }
1024
1025 std::uint32_t nElem1, nElem2;
1026 nElem1 = *(reinterpret_cast<const std::uint32_t *>(t1));
1027 t1 += sizeof(std::uint32_t);
1028 nElem2 = *(reinterpret_cast<const std::uint32_t *>(t2));
1029 t2 += sizeof(std::uint32_t);
1030
1031 if (!nElem1 || !nElem2) {
1032 return 0.0;
1033 }
1034
1035 double v1Sum = 0, v2Sum = 0, numer = 0;
1036 std::uint32_t idx1 = 0;
1037 std::int32_t v1;
1038 std::uint32_t idx2 = 0;
1039 std::int32_t v2;
1040 idx1 = *(reinterpret_cast<const std::uint32_t *>(t1));
1041 t1 += sizeof(std::uint32_t);
1042 v1 = *(reinterpret_cast<const std::int32_t *>(t1));
1043 t1 += sizeof(std::int32_t);
1044 nElem1--;
1045 v1Sum += v1;
1046
1047 idx2 = *(reinterpret_cast<const std::uint32_t *>(t2));
1048 t2 += sizeof(std::uint32_t);
1049 v2 = *(reinterpret_cast<const std::int32_t *>(t2));
1050 t2 += sizeof(std::int32_t);
1051 nElem2--;
1052 v2Sum += v2;
1053
1054 while (1) {
1055 while (nElem2 && idx2 < idx1) {
1056 idx2 = *(reinterpret_cast<const std::uint32_t *>(t2));
1057 t2 += sizeof(std::uint32_t);
1058 v2 = *(reinterpret_cast<const std::int32_t *>(t2));
1059 t2 += sizeof(std::int32_t);
1060 nElem2--;
1061 v2Sum += v2;
1062 }
1063 if (idx2 == idx1) {
1064 // std::cerr<<" --- "<<idx1<<" "<<v1<<" - "<<idx2<<" "<<v2<<std::endl;
1065 numer += std::min(v1, v2);
1066 }
1067 if (nElem1) {
1068 idx1 = *(reinterpret_cast<const std::uint32_t *>(t1));
1069 t1 += sizeof(std::uint32_t);
1070 v1 = *(reinterpret_cast<const std::int32_t *>(t1));
1071 t1 += sizeof(std::int32_t);
1072 nElem1--;
1073 v1Sum += v1;
1074 } else {
1075 break;
1076 }
1077 }
1078 while (nElem2) {
1079 idx2 = *(reinterpret_cast<const std::uint32_t *>(t2));
1080 t2 += sizeof(std::uint32_t);
1081 v2 = *(reinterpret_cast<const std::int32_t *>(t2));
1082 t2 += sizeof(std::int32_t);
1083 nElem2--;
1084 v2Sum += v2;
1085 }
1086 double denom = v1Sum + v2Sum;
1087 if (fabs(denom) < 1e-6) {
1088 res = 0.0;
1089 } else {
1090 res = 2. * numer / denom;
1091 }
1092
1093 return res;
1094 }
1095
calcSparseStringAllValsGT(const char * a,unsigned int sza,int tgt)1096 extern "C" bool calcSparseStringAllValsGT(const char *a, unsigned int sza,
1097 int tgt) {
1098 const auto *t1 = (const unsigned char *)a;
1099
1100 std::uint32_t tmp;
1101 tmp = *(reinterpret_cast<const std::uint32_t *>(t1));
1102 t1 += sizeof(std::uint32_t);
1103 if (tmp != (std::uint32_t)ci_SPARSEINTVECT_VERSION) {
1104 elog(ERROR, "calcSparseStringAllValsGT: could not convert argument 1");
1105 }
1106 // check the element size:
1107 tmp = *(reinterpret_cast<const std::uint32_t *>(t1));
1108 t1 += sizeof(std::uint32_t);
1109 if (tmp != sizeof(std::uint32_t)) {
1110 elog(ERROR,
1111 "calcSparseStringAllValsGT: could not convert argument 1 -> "
1112 "uint32_t");
1113 }
1114
1115 // std::uint32_t len1;
1116 // len1 = *(reinterpret_cast<const std::uint32_t *>(t1));
1117 t1 += sizeof(std::uint32_t);
1118
1119 std::uint32_t nElem1;
1120 nElem1 = *(reinterpret_cast<const std::uint32_t *>(t1));
1121 t1 += sizeof(std::uint32_t);
1122
1123 while (nElem1) {
1124 --nElem1;
1125 // skip the index:
1126 t1 += sizeof(std::uint32_t);
1127 std::int32_t v1 = *(reinterpret_cast<const std::int32_t *>(t1));
1128 t1 += sizeof(std::int32_t);
1129
1130 if (v1 <= tgt) {
1131 return false;
1132 }
1133 }
1134 return true;
1135 }
calcSparseStringAllValsLT(const char * a,unsigned int sza,int tgt)1136 extern "C" bool calcSparseStringAllValsLT(const char *a, unsigned int sza,
1137 int tgt) {
1138 const auto *t1 = (const unsigned char *)a;
1139
1140 std::uint32_t tmp;
1141 tmp = *(reinterpret_cast<const std::uint32_t *>(t1));
1142 t1 += sizeof(std::uint32_t);
1143 if (tmp != (std::uint32_t)ci_SPARSEINTVECT_VERSION) {
1144 elog(ERROR, "calcSparseStringAllValsGT: could not convert argument 1");
1145 }
1146 // check the element size:
1147 tmp = *(reinterpret_cast<const std::uint32_t *>(t1));
1148 t1 += sizeof(std::uint32_t);
1149 if (tmp != sizeof(std::uint32_t)) {
1150 elog(ERROR,
1151 "calcSparseStringAllValsGT: could not convert argument 1 -> "
1152 "uint32_t");
1153 }
1154
1155 // std::uint32_t len1;
1156 // len1 = *(reinterpret_cast<const std::uint32_t *>(t1));
1157 t1 += sizeof(std::uint32_t);
1158
1159 std::uint32_t nElem1;
1160 nElem1 = *(reinterpret_cast<const std::uint32_t *>(t1));
1161 t1 += sizeof(std::uint32_t);
1162
1163 while (nElem1) {
1164 --nElem1;
1165 // skip the index:
1166 t1 += sizeof(std::uint32_t);
1167 std::int32_t v1 = *(reinterpret_cast<const std::int32_t *>(t1));
1168 t1 += sizeof(std::int32_t);
1169
1170 if (v1 >= tgt) {
1171 return false;
1172 }
1173 }
1174 return true;
1175 }
1176
addSFP(CSfp a,CSfp b)1177 extern "C" CSfp addSFP(CSfp a, CSfp b) {
1178 SparseFP *res = nullptr;
1179 try {
1180 SparseFP tmp = (*(SparseFP *)a + *(SparseFP *)b);
1181 res = (SparseFP *)new SparseFP(tmp);
1182 } catch (...) {
1183 elog(ERROR, "addSFP: Unknown exception");
1184 }
1185 return (CSfp)res;
1186 }
1187
subtractSFP(CSfp a,CSfp b)1188 extern "C" CSfp subtractSFP(CSfp a, CSfp b) {
1189 SparseFP *res = nullptr;
1190 try {
1191 SparseFP tmp = (*(SparseFP *)a - *(SparseFP *)b);
1192 res = (SparseFP *)new SparseFP(tmp);
1193 } catch (...) {
1194 elog(ERROR, "addSFP: Unknown exception");
1195 }
1196 return (CSfp)res;
1197 }
1198
1199 /*
1200 * Mol -> fp
1201 */
makeLayeredBFP(CROMol data)1202 extern "C" CBfp makeLayeredBFP(CROMol data) {
1203 auto *mol = (ROMol *)data;
1204 ExplicitBitVect *res = nullptr;
1205
1206 try {
1207 res = RDKit::LayeredFingerprintMol(*mol, 0xFFFFFFFF, 1, 7,
1208 getLayeredFpSize());
1209 } catch (...) {
1210 elog(ERROR, "makeLayeredBFP: Unknown exception");
1211 if (res) {
1212 delete res;
1213 }
1214 res = nullptr;
1215 }
1216 if (res) {
1217 std::string *sres = new std::string(BitVectToBinaryText(*res));
1218 delete res;
1219 return (CBfp)sres;
1220 } else {
1221 return nullptr;
1222 }
1223 }
1224
makeRDKitBFP(CROMol data)1225 extern "C" CBfp makeRDKitBFP(CROMol data) {
1226 auto *mol = (ROMol *)data;
1227 ExplicitBitVect *res = nullptr;
1228
1229 try {
1230 res = RDKit::RDKFingerprintMol(*mol, 1, 6, getRDKitFpSize(), 2);
1231 } catch (...) {
1232 elog(ERROR, "makeRDKitBFP: Unknown exception");
1233 if (res) {
1234 delete res;
1235 }
1236 res = nullptr;
1237 }
1238
1239 if (res) {
1240 std::string *sres = new std::string(BitVectToBinaryText(*res));
1241 delete res;
1242 return (CBfp)sres;
1243 } else {
1244 return nullptr;
1245 }
1246 }
1247
makeMorganSFP(CROMol data,int radius)1248 extern "C" CSfp makeMorganSFP(CROMol data, int radius) {
1249 auto *mol = (ROMol *)data;
1250 SparseFP *res = nullptr;
1251 std::vector<std::uint32_t> invars(mol->getNumAtoms());
1252 try {
1253 RDKit::MorganFingerprints::getConnectivityInvariants(*mol, invars, true);
1254 res = (SparseFP *)RDKit::MorganFingerprints::getFingerprint(*mol, radius,
1255 &invars);
1256 } catch (...) {
1257 elog(ERROR, "makeMorganSFP: Unknown exception");
1258 }
1259
1260 return (CSfp)res;
1261 }
1262
makeMorganBFP(CROMol data,int radius)1263 extern "C" CBfp makeMorganBFP(CROMol data, int radius) {
1264 auto *mol = (ROMol *)data;
1265 ExplicitBitVect *res = nullptr;
1266 std::vector<std::uint32_t> invars(mol->getNumAtoms());
1267 try {
1268 RDKit::MorganFingerprints::getConnectivityInvariants(*mol, invars, true);
1269 res = RDKit::MorganFingerprints::getFingerprintAsBitVect(
1270 *mol, radius, getMorganFpSize(), &invars);
1271 } catch (...) {
1272 elog(ERROR, "makeMorganBFP: Unknown exception");
1273 }
1274
1275 if (res) {
1276 std::string *sres = new std::string(BitVectToBinaryText(*res));
1277 delete res;
1278 return (CBfp)sres;
1279 } else {
1280 return nullptr;
1281 }
1282 }
1283
makeFeatMorganSFP(CROMol data,int radius)1284 extern "C" CSfp makeFeatMorganSFP(CROMol data, int radius) {
1285 auto *mol = (ROMol *)data;
1286 SparseFP *res = nullptr;
1287 std::vector<std::uint32_t> invars(mol->getNumAtoms());
1288 try {
1289 RDKit::MorganFingerprints::getFeatureInvariants(*mol, invars);
1290 res = (SparseFP *)RDKit::MorganFingerprints::getFingerprint(*mol, radius,
1291 &invars);
1292 } catch (...) {
1293 elog(ERROR, "makeMorganSFP: Unknown exception");
1294 }
1295
1296 return (CSfp)res;
1297 }
1298
makeFeatMorganBFP(CROMol data,int radius)1299 extern "C" CBfp makeFeatMorganBFP(CROMol data, int radius) {
1300 auto *mol = (ROMol *)data;
1301 ExplicitBitVect *res = nullptr;
1302 std::vector<std::uint32_t> invars(mol->getNumAtoms());
1303 try {
1304 RDKit::MorganFingerprints::getFeatureInvariants(*mol, invars);
1305 res = RDKit::MorganFingerprints::getFingerprintAsBitVect(
1306 *mol, radius, getFeatMorganFpSize(), &invars);
1307 } catch (...) {
1308 elog(ERROR, "makeMorganBFP: Unknown exception");
1309 }
1310
1311 if (res) {
1312 std::string *sres = new std::string(BitVectToBinaryText(*res));
1313 delete res;
1314 return (CBfp)sres;
1315 } else {
1316 return nullptr;
1317 }
1318 }
1319
makeAtomPairSFP(CROMol data)1320 extern "C" CSfp makeAtomPairSFP(CROMol data) {
1321 auto *mol = (ROMol *)data;
1322 SparseFP *res = nullptr;
1323 #ifdef UNHASHED_PAIR_FPS
1324 try {
1325 SparseIntVect<std::int32_t> *afp =
1326 RDKit::AtomPairs::getAtomPairFingerprint(*mol);
1327 res = new SparseFP(1 << RDKit::AtomPairs::numAtomPairFingerprintBits);
1328 for (SparseIntVect<std::int32_t>::StorageType::const_iterator iter =
1329 afp->getNonzeroElements().begin();
1330 iter != afp->getNonzeroElements().end(); ++iter) {
1331 res->setVal(iter->first, iter->second);
1332 }
1333 delete afp;
1334 } catch (...) {
1335 elog(ERROR, "makeAtomPairSFP: Unknown exception");
1336 }
1337 #else
1338 try {
1339 SparseIntVect<std::int32_t> *afp =
1340 RDKit::AtomPairs::getHashedAtomPairFingerprint(
1341 *mol, getHashedAtomPairFpSize());
1342 res = new SparseFP(getHashedAtomPairFpSize());
1343 for (const auto &iter : afp->getNonzeroElements()) {
1344 res->setVal(iter.first, iter.second);
1345 }
1346 delete afp;
1347 } catch (...) {
1348 elog(ERROR, "makeAtomPairSFP: Unknown exception");
1349 }
1350 #endif
1351 return (CSfp)res;
1352 }
1353
makeTopologicalTorsionSFP(CROMol data)1354 extern "C" CSfp makeTopologicalTorsionSFP(CROMol data) {
1355 auto *mol = (ROMol *)data;
1356 SparseFP *res = nullptr;
1357
1358 #ifdef UNHASHED_PAIR_FPS
1359 try {
1360 SparseIntVect<boost::int64_t> *afp =
1361 RDKit::AtomPairs::getHashedTopologicalTorsionFingerprint(
1362 *mol, boost::integer_traits<std::uint32_t>::const_max);
1363 res = new SparseFP(boost::integer_traits<std::uint32_t>::const_max);
1364 for (SparseIntVect<boost::int64_t>::StorageType::const_iterator iter =
1365 afp->getNonzeroElements().begin();
1366 iter != afp->getNonzeroElements().end(); ++iter) {
1367 res->setVal(iter->first, iter->second);
1368 }
1369 delete afp;
1370 } catch (...) {
1371 elog(ERROR, "makeTopologicalTorsionSFP: Unknown exception");
1372 }
1373 #else
1374 try {
1375 SparseIntVect<boost::int64_t> *afp =
1376 RDKit::AtomPairs::getHashedTopologicalTorsionFingerprint(
1377 *mol, getHashedTorsionFpSize());
1378 res = new SparseFP(getHashedTorsionFpSize());
1379 for (const auto &iter : afp->getNonzeroElements()) {
1380 res->setVal(iter.first, iter.second);
1381 }
1382 delete afp;
1383 } catch (...) {
1384 elog(ERROR, "makeTopologicalTorsionSFP: Unknown exception");
1385 }
1386 #endif
1387 return (CSfp)res;
1388 }
1389
makeAtomPairBFP(CROMol data)1390 extern "C" CBfp makeAtomPairBFP(CROMol data) {
1391 auto *mol = (ROMol *)data;
1392 ExplicitBitVect *res = nullptr;
1393 try {
1394 res = RDKit::AtomPairs::getHashedAtomPairFingerprintAsBitVect(
1395 *mol, getHashedAtomPairFpSize());
1396 } catch (...) {
1397 elog(ERROR, "makeAtomPairBFP: Unknown exception");
1398 }
1399 if (res) {
1400 std::string *sres = new std::string(BitVectToBinaryText(*res));
1401 delete res;
1402 return (CBfp)sres;
1403 } else {
1404 return nullptr;
1405 }
1406 }
1407
makeTopologicalTorsionBFP(CROMol data)1408 extern "C" CBfp makeTopologicalTorsionBFP(CROMol data) {
1409 auto *mol = (ROMol *)data;
1410 ExplicitBitVect *res = nullptr;
1411 try {
1412 res = RDKit::AtomPairs::getHashedTopologicalTorsionFingerprintAsBitVect(
1413 *mol, getHashedTorsionFpSize());
1414 } catch (...) {
1415 elog(ERROR, "makeTopologicalTorsionBFP: Unknown exception");
1416 }
1417 if (res) {
1418 std::string *sres = new std::string(BitVectToBinaryText(*res));
1419 delete res;
1420 return (CBfp)sres;
1421 } else {
1422 return nullptr;
1423 }
1424 }
1425
makeMACCSBFP(CROMol data)1426 extern "C" CBfp makeMACCSBFP(CROMol data) {
1427 auto *mol = (ROMol *)data;
1428 ExplicitBitVect *res = nullptr;
1429 try {
1430 res = RDKit::MACCSFingerprints::getFingerprintAsBitVect(*mol);
1431 } catch (...) {
1432 elog(ERROR, "makeMACCSBFP: Unknown exception");
1433 }
1434 if (res) {
1435 std::string *sres = new std::string(BitVectToBinaryText(*res));
1436 delete res;
1437 return (CBfp)sres;
1438 } else {
1439 return nullptr;
1440 }
1441 }
1442
makeAvalonBFP(CROMol data,bool isQuery,unsigned int bitFlags)1443 extern "C" CBfp makeAvalonBFP(CROMol data, bool isQuery,
1444 unsigned int bitFlags) {
1445 #ifdef RDK_BUILD_AVALON_SUPPORT
1446 auto *mol = (ROMol *)data;
1447 ExplicitBitVect *res = nullptr;
1448 try {
1449 res = new ExplicitBitVect(getAvalonFpSize());
1450 AvalonTools::getAvalonFP(*mol, *res, getAvalonFpSize(), isQuery, true,
1451 bitFlags);
1452 } catch (...) {
1453 elog(ERROR, "makeAvalonBFP: Unknown exception");
1454 }
1455
1456 if (res) {
1457 std::string *sres = new std::string(BitVectToBinaryText(*res));
1458 delete res;
1459 return (CBfp)sres;
1460 } else {
1461 return nullptr;
1462 }
1463 #else
1464 elog(ERROR, "Avalon support not enabled");
1465 return NULL;
1466 #endif
1467 }
1468
1469 /* chemical reactions */
1470
freeChemReaction(CChemicalReaction data)1471 extern "C" void freeChemReaction(CChemicalReaction data) {
1472 auto *rxn = (ChemicalReaction *)data;
1473 delete rxn;
1474 }
1475
constructChemReact(Reaction * data)1476 extern "C" CChemicalReaction constructChemReact(Reaction *data) {
1477 auto *rxn = new ChemicalReaction();
1478
1479 try {
1480 ByteA b(data);
1481 ReactionPickler::reactionFromPickle(b, rxn);
1482 } catch (ReactionPicklerException &e) {
1483 elog(ERROR, "reactionFromPickle: %s", e.what());
1484 } catch (...) {
1485 elog(ERROR, "constructChemReact: Unknown exception");
1486 }
1487
1488 return (CChemicalReaction)rxn;
1489 }
1490
deconstructChemReact(CChemicalReaction data)1491 extern "C" Reaction *deconstructChemReact(CChemicalReaction data) {
1492 auto *rxn = (ChemicalReaction *)data;
1493 ByteA b;
1494
1495 try {
1496 ReactionPickler::pickleReaction(rxn, b);
1497 } catch (ReactionPicklerException &e) {
1498 elog(ERROR, "pickleReaction: %s", e.what());
1499 } catch (...) {
1500 elog(ERROR, "deconstructChemReact: Unknown exception");
1501 }
1502
1503 return (Reaction *)b.toByteA();
1504 }
1505
parseChemReactText(char * data,bool asSmarts,bool warnOnFail)1506 extern "C" CChemicalReaction parseChemReactText(char *data, bool asSmarts,
1507 bool warnOnFail) {
1508 ChemicalReaction *rxn = nullptr;
1509
1510 try {
1511 if (asSmarts) {
1512 rxn = RxnSmartsToChemicalReaction(data);
1513 } else {
1514 rxn = RxnSmartsToChemicalReaction(data, nullptr, true);
1515 }
1516 if (getInitReaction()) {
1517 rxn->initReactantMatchers();
1518 }
1519 if (getMoveUnmappedReactantsToAgents() && hasReactionAtomMapping(*rxn)) {
1520 rxn->removeUnmappedReactantTemplates(getThresholdUnmappedReactantAtoms());
1521 }
1522 } catch (...) {
1523 rxn = nullptr;
1524 }
1525 if (rxn == nullptr) {
1526 if (warnOnFail) {
1527 ereport(WARNING,
1528 (errcode(ERRCODE_WARNING),
1529 errmsg("could not create chemical reaction from SMILES '%s'",
1530 data)));
1531 } else {
1532 ereport(ERROR,
1533 (errcode(ERRCODE_DATA_EXCEPTION),
1534 errmsg("could not create chemical reaction from SMILES '%s'",
1535 data)));
1536 }
1537 }
1538
1539 return (CChemicalReaction)rxn;
1540 }
1541
parseChemReactBlob(char * data,int len)1542 extern "C" CChemicalReaction parseChemReactBlob(char *data, int len) {
1543 ChemicalReaction *rxn = nullptr;
1544
1545 try {
1546 string binStr(data, len);
1547 rxn = new ChemicalReaction(binStr);
1548 if (getInitReaction()) {
1549 rxn->initReactantMatchers();
1550 }
1551 if (getMoveUnmappedReactantsToAgents() && hasReactionAtomMapping(*rxn)) {
1552 rxn->removeUnmappedReactantTemplates(getThresholdUnmappedReactantAtoms());
1553 }
1554 } catch (...) {
1555 ereport(ERROR,
1556 (errcode(ERRCODE_DATA_EXCEPTION),
1557 errmsg("problem generating chemical reaction from blob data")));
1558 }
1559 if (rxn == nullptr) {
1560 ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION),
1561 errmsg("blob data could not be parsed")));
1562 }
1563
1564 return (CChemicalReaction)rxn;
1565 }
1566
makeChemReactText(CChemicalReaction data,int * len,bool asSmarts)1567 extern "C" char *makeChemReactText(CChemicalReaction data, int *len,
1568 bool asSmarts) {
1569 auto *rxn = (ChemicalReaction *)data;
1570
1571 try {
1572 if (!asSmarts) {
1573 StringData = ChemicalReactionToRxnSmiles(*rxn);
1574 } else {
1575 StringData = ChemicalReactionToRxnSmarts(*rxn);
1576 }
1577 } catch (...) {
1578 ereport(WARNING, (errcode(ERRCODE_WARNING),
1579 errmsg("makeChemReactText: problems converting chemical "
1580 "reaction to SMILES/SMARTS")));
1581 StringData = "";
1582 }
1583
1584 *len = StringData.size();
1585 return (char *)StringData.c_str();
1586 }
1587
makeChemReactBlob(CChemicalReaction data,int * len)1588 extern "C" char *makeChemReactBlob(CChemicalReaction data, int *len) {
1589 auto *rxn = (ChemicalReaction *)data;
1590 StringData.clear();
1591 try {
1592 ReactionPickler::pickleReaction(*rxn, StringData);
1593 } catch (...) {
1594 elog(ERROR, "makeChemReactBlob: Unknown exception");
1595 }
1596
1597 *len = StringData.size();
1598 return (char *)StringData.data();
1599 }
1600
parseChemReactCTAB(char * data,bool warnOnFail)1601 extern "C" CChemicalReaction parseChemReactCTAB(char *data, bool warnOnFail) {
1602 ChemicalReaction *rxn = nullptr;
1603
1604 try {
1605 rxn = RxnBlockToChemicalReaction(data);
1606 if (getInitReaction()) {
1607 rxn->initReactantMatchers();
1608 }
1609 if (getMoveUnmappedReactantsToAgents() && hasReactionAtomMapping(*rxn)) {
1610 rxn->removeUnmappedReactantTemplates(getThresholdUnmappedReactantAtoms());
1611 }
1612 } catch (...) {
1613 rxn = nullptr;
1614 }
1615 if (rxn == nullptr) {
1616 if (warnOnFail) {
1617 ereport(WARNING,
1618 (errcode(ERRCODE_WARNING),
1619 errmsg("could not create reaction from CTAB '%s'", data)));
1620
1621 } else {
1622 ereport(ERROR,
1623 (errcode(ERRCODE_DATA_EXCEPTION),
1624 errmsg("could not create reaction from CTAB '%s'", data)));
1625 }
1626 }
1627
1628 return (CChemicalReaction)rxn;
1629 }
1630
makeCTABChemReact(CChemicalReaction data,int * len)1631 extern "C" char *makeCTABChemReact(CChemicalReaction data, int *len) {
1632 auto *rxn = (ChemicalReaction *)data;
1633
1634 try {
1635 StringData = ChemicalReactionToRxnBlock(*rxn);
1636 } catch (...) {
1637 ereport(
1638 WARNING,
1639 (errcode(ERRCODE_WARNING),
1640 errmsg("makeCTABChemReact: problems converting reaction to CTAB")));
1641 StringData = "";
1642 }
1643
1644 *len = StringData.size();
1645 return (char *)StringData.c_str();
1646 }
1647
ChemReactNumReactants(CChemicalReaction crxn)1648 extern "C" int ChemReactNumReactants(CChemicalReaction crxn) {
1649 const ChemicalReaction *rxn = (ChemicalReaction *)crxn;
1650 return rxn->getNumReactantTemplates();
1651 }
1652
ChemReactNumProducts(CChemicalReaction crxn)1653 extern "C" int ChemReactNumProducts(CChemicalReaction crxn) {
1654 const ChemicalReaction *rxn = (ChemicalReaction *)crxn;
1655 return rxn->getNumProductTemplates();
1656 }
1657
ChemReactNumAgents(CChemicalReaction crxn)1658 extern "C" int ChemReactNumAgents(CChemicalReaction crxn) {
1659 const ChemicalReaction *rxn = (ChemicalReaction *)crxn;
1660 return rxn->getNumAgentTemplates();
1661 }
1662
makeReactionSign(CChemicalReaction data)1663 extern "C" bytea *makeReactionSign(CChemicalReaction data) {
1664 auto *rxn = (ChemicalReaction *)data;
1665 ExplicitBitVect *res = nullptr;
1666 bytea *ret = nullptr;
1667
1668 try {
1669 RDKit::ReactionFingerprintParams params;
1670 params.fpType = static_cast<FingerprintType>(getReactionSubstructFpType());
1671 params.fpSize = getReactionSubstructFpSize();
1672 params.includeAgents = (!getIgnoreReactionAgents());
1673 params.bitRatioAgents = getReactionStructuralFPAgentBitRatio();
1674 res = RDKit::StructuralFingerprintChemReaction(*rxn, params);
1675
1676 if (res) {
1677 std::string sres = BitVectToBinaryText(*res);
1678
1679 unsigned int varsize = VARHDRSZ + sres.size();
1680 ret = (bytea *)palloc0(varsize);
1681 memcpy(VARDATA(ret), sres.data(), sres.size());
1682 SET_VARSIZE(ret, varsize);
1683
1684 delete res;
1685 res = nullptr;
1686 }
1687 } catch (...) {
1688 elog(ERROR, "makeReactionSign: Unknown exception");
1689 if (res) {
1690 delete res;
1691 }
1692 }
1693 return ret;
1694 }
1695
ReactionSubstruct(CChemicalReaction rxn,CChemicalReaction rxn2)1696 extern "C" int ReactionSubstruct(CChemicalReaction rxn,
1697 CChemicalReaction rxn2) {
1698 auto *rxnm = (ChemicalReaction *)rxn;
1699 auto *rxn2m = (ChemicalReaction *)rxn2;
1700
1701 /* Reaction search */
1702 if (rxn2m->getNumReactantTemplates() != 0 &&
1703 rxn2m->getNumProductTemplates() != 0) {
1704 return hasReactionSubstructMatch(*rxnm, *rxn2m,
1705 (!getIgnoreReactionAgents()));
1706 }
1707 /* Product search */
1708 if (rxn2m->getNumReactantTemplates() == 0 &&
1709 rxn2m->getNumProductTemplates() != 0) {
1710 if (rxn2m->getNumAgentTemplates() != 0 && !getIgnoreReactionAgents()) {
1711 return (hasProductTemplateSubstructMatch(*rxnm, *rxn2m) &&
1712 hasAgentTemplateSubstructMatch(*rxnm, *rxn2m));
1713 }
1714 return hasProductTemplateSubstructMatch(*rxnm, *rxn2m);
1715 }
1716 /* Reactant search */
1717 if (rxn2m->getNumReactantTemplates() != 0 &&
1718 rxn2m->getNumProductTemplates() == 0) {
1719 if (rxn2m->getNumAgentTemplates() != 0 && !getIgnoreReactionAgents()) {
1720 return (hasReactantTemplateSubstructMatch(*rxnm, *rxn2m) &&
1721 hasAgentTemplateSubstructMatch(*rxnm, *rxn2m));
1722 }
1723 return hasReactantTemplateSubstructMatch(*rxnm, *rxn2m);
1724 }
1725 /* Agent search */
1726 if (rxn2m->getNumReactantTemplates() == 0 &&
1727 rxn2m->getNumProductTemplates() == 0 &&
1728 rxn2m->getNumAgentTemplates() != 0) {
1729 return hasAgentTemplateSubstructMatch(*rxnm, *rxn2m);
1730 }
1731
1732 return false;
1733 }
1734
ReactionSubstructFP(CChemicalReaction rxn,CChemicalReaction rxnquery)1735 extern "C" int ReactionSubstructFP(CChemicalReaction rxn,
1736 CChemicalReaction rxnquery) {
1737 auto *rxnm = (ChemicalReaction *)rxn;
1738 auto *rxnqm = (ChemicalReaction *)rxnquery;
1739
1740 RDKit::ReactionFingerprintParams params;
1741 params.fpType = static_cast<FingerprintType>(getReactionSubstructFpType());
1742 params.fpSize = getReactionSubstructFpSize();
1743 params.includeAgents = (!getIgnoreReactionAgents());
1744 params.bitRatioAgents = getReactionStructuralFPAgentBitRatio();
1745
1746 ExplicitBitVect *fp1 = StructuralFingerprintChemReaction(*rxnm, params);
1747 ExplicitBitVect *fp2 = StructuralFingerprintChemReaction(*rxnqm, params);
1748
1749 if (fp1->getNumOnBits() < fp2->getNumOnBits()) {
1750 return false;
1751 }
1752 for (unsigned i = 0; i < fp1->getNumBits(); i++) {
1753 if ((fp1->getBit(i) & fp2->getBit(i)) != fp2->getBit(i)) {
1754 return false;
1755 }
1756 }
1757 return true;
1758 }
1759
1760 // some helper functions in anonymous namespace
1761 namespace {
1762
1763 struct MoleculeDescriptors {
MoleculeDescriptors__anonedffb5b40111::MoleculeDescriptors1764 MoleculeDescriptors() {}
1765 unsigned nAtoms{0};
1766 unsigned nBonds{0};
1767 unsigned nRings{0};
1768 double MW{0.0};
1769 };
1770
calcMolecularDescriptorsReaction(RDKit::ChemicalReaction * rxn,RDKit::ReactionMoleculeType t)1771 MoleculeDescriptors *calcMolecularDescriptorsReaction(
1772 RDKit::ChemicalReaction *rxn, RDKit::ReactionMoleculeType t) {
1773 auto *des = new MoleculeDescriptors();
1774 auto begin = getStartIterator(*rxn, t);
1775 auto end = getEndIterator(*rxn, t);
1776 for (; begin != end; ++begin) {
1777 des->nAtoms += begin->get()->getNumHeavyAtoms();
1778 des->nBonds += begin->get()->getNumBonds(true);
1779 des->MW = RDKit::Descriptors::calcAMW(*begin->get(), true);
1780 if (!begin->get()->getRingInfo()->isInitialized()) {
1781 begin->get()->updatePropertyCache();
1782 RDKit::MolOps::findSSSR(*begin->get());
1783 }
1784 des->nRings += begin->get()->getRingInfo()->numRings();
1785 }
1786 return des;
1787 }
1788
compareMolDescriptors(const MoleculeDescriptors & md1,const MoleculeDescriptors & md2)1789 int compareMolDescriptors(const MoleculeDescriptors &md1,
1790 const MoleculeDescriptors &md2) {
1791 int res = md1.nAtoms - md2.nAtoms;
1792 if (res) {
1793 return res;
1794 }
1795 res = md1.nBonds - md2.nBonds;
1796 if (res) {
1797 return res;
1798 }
1799 res = md1.nRings - md2.nRings;
1800 if (res) {
1801 return res;
1802 }
1803 res = int(md1.MW - md2.MW);
1804 if (res) {
1805 return res;
1806 }
1807 return 0;
1808 }
1809 } // namespace
1810
reactioncmp(CChemicalReaction rxn,CChemicalReaction rxn2)1811 extern "C" int reactioncmp(CChemicalReaction rxn, CChemicalReaction rxn2) {
1812 auto *rxnm = (ChemicalReaction *)rxn;
1813 auto *rxn2m = (ChemicalReaction *)rxn2;
1814
1815 if (!rxnm) {
1816 if (!rxn2m) {
1817 return 0;
1818 }
1819 return -1;
1820 }
1821 if (!rxn2m) {
1822 return 1;
1823 }
1824
1825 int res = rxnm->getNumReactantTemplates() - rxn2m->getNumReactantTemplates();
1826 if (res) {
1827 return res;
1828 }
1829 res = rxnm->getNumProductTemplates() - rxn2m->getNumProductTemplates();
1830 if (res) {
1831 return res;
1832 }
1833 if (!getIgnoreReactionAgents()) {
1834 res = rxnm->getNumAgentTemplates() - rxn2m->getNumAgentTemplates();
1835 if (res) {
1836 return res;
1837 }
1838 }
1839
1840 MoleculeDescriptors *rxn_react =
1841 calcMolecularDescriptorsReaction(rxnm, Reactant);
1842 MoleculeDescriptors *rxn2_react =
1843 calcMolecularDescriptorsReaction(rxn2m, Reactant);
1844 res = compareMolDescriptors(*rxn_react, *rxn2_react);
1845 delete (rxn_react);
1846 delete (rxn2_react);
1847 if (res) {
1848 return res;
1849 }
1850 MoleculeDescriptors *rxn_product =
1851 calcMolecularDescriptorsReaction(rxnm, Product);
1852 MoleculeDescriptors *rxn2_product =
1853 calcMolecularDescriptorsReaction(rxn2m, Product);
1854 res = compareMolDescriptors(*rxn_product, *rxn2_product);
1855 delete (rxn_product);
1856 delete (rxn2_product);
1857 if (res) {
1858 return res;
1859 }
1860 if (!getIgnoreReactionAgents()) {
1861 MoleculeDescriptors *rxn_agent =
1862 calcMolecularDescriptorsReaction(rxnm, Agent);
1863 MoleculeDescriptors *rxn2_agent =
1864 calcMolecularDescriptorsReaction(rxn2m, Agent);
1865 res = compareMolDescriptors(*rxn_agent, *rxn2_agent);
1866 delete (rxn_agent);
1867 delete (rxn2_agent);
1868 if (res) {
1869 return res;
1870 }
1871 }
1872
1873 RDKit::MatchVectType matchVect;
1874 if (hasReactionSubstructMatch(*rxnm, *rxn2m, (!getIgnoreReactionAgents()))) {
1875 return 0;
1876 }
1877 return -1;
1878 }
1879
makeReactionDifferenceSFP(CChemicalReaction data,int size,int fpType)1880 extern "C" CSfp makeReactionDifferenceSFP(CChemicalReaction data, int size,
1881 int fpType) {
1882 auto *rxn = (ChemicalReaction *)data;
1883 SparseFP *res = nullptr;
1884
1885 try {
1886 if (fpType > 3 || fpType < 1) {
1887 elog(ERROR, "makeReactionDifferenceSFP: Unknown Fingerprint type");
1888 }
1889 auto fp = static_cast<RDKit::FingerprintType>(fpType);
1890 RDKit::ReactionFingerprintParams params;
1891 params.fpType = static_cast<FingerprintType>(fpType);
1892 params.fpSize = size;
1893 params.includeAgents = (!getIgnoreReactionAgents());
1894 params.agentWeight = getReactionDifferenceFPWeightAgents();
1895 params.nonAgentWeight = getReactionDifferenceFPWeightNonagents();
1896 res = (SparseFP *)RDKit::DifferenceFingerprintChemReaction(*rxn, params);
1897 } catch (...) {
1898 elog(ERROR, "makeReactionDifferenceSFP: Unknown exception");
1899 }
1900 return (CSfp)res;
1901 }
1902
makeReactionBFP(CChemicalReaction data,int size,int fpType)1903 extern "C" CBfp makeReactionBFP(CChemicalReaction data, int size, int fpType) {
1904 auto *rxn = (ChemicalReaction *)data;
1905 ExplicitBitVect *res = nullptr;
1906
1907 try {
1908 if (fpType > 5 || fpType < 1) {
1909 elog(ERROR, "makeReactionBFP: Unknown Fingerprint type");
1910 }
1911 auto fp = static_cast<RDKit::FingerprintType>(fpType);
1912 RDKit::ReactionFingerprintParams params;
1913 params.fpType = static_cast<FingerprintType>(fpType);
1914 params.fpSize = size;
1915 params.includeAgents = (!getIgnoreReactionAgents());
1916 params.bitRatioAgents = getReactionStructuralFPAgentBitRatio();
1917 res = (ExplicitBitVect *)RDKit::StructuralFingerprintChemReaction(*rxn,
1918 params);
1919 } catch (...) {
1920 elog(ERROR, "makeReactionBFP: Unknown exception");
1921 }
1922
1923 if (res) {
1924 std::string *sres = new std::string(BitVectToBinaryText(*res));
1925 delete res;
1926 return (CBfp)sres;
1927 } else {
1928 return nullptr;
1929 }
1930 }
1931
computeNMMolHash(CROMol data,const char * which)1932 extern "C" char *computeNMMolHash(CROMol data, const char *which) {
1933 RWMol mol(*(ROMol *)data);
1934
1935 RDKit::MolHash::HashFunction func =
1936 RDKit::MolHash::HashFunction::AnonymousGraph;
1937 if (!strcmp(which, "AnonymousGraph")) {
1938 func = RDKit::MolHash::HashFunction::AnonymousGraph;
1939 } else if (!strcmp(which, "ElementGraph")) {
1940 func = RDKit::MolHash::HashFunction::ElementGraph;
1941 } else if (!strcmp(which, "CanonicalSmiles")) {
1942 func = RDKit::MolHash::HashFunction::CanonicalSmiles;
1943 } else if (!strcmp(which, "MurckoScaffold")) {
1944 func = RDKit::MolHash::HashFunction::MurckoScaffold;
1945 } else if (!strcmp(which, "ExtendedMurcko")) {
1946 func = RDKit::MolHash::HashFunction::ExtendedMurcko;
1947 } else if (!strcmp(which, "MolFormula")) {
1948 func = RDKit::MolHash::HashFunction::MolFormula;
1949 } else if (!strcmp(which, "AtomBondCounts")) {
1950 func = RDKit::MolHash::HashFunction::AtomBondCounts;
1951 } else if (!strcmp(which, "DegreeVector")) {
1952 func = RDKit::MolHash::HashFunction::DegreeVector;
1953 } else if (!strcmp(which, "Mesomer")) {
1954 func = RDKit::MolHash::HashFunction::Mesomer;
1955 } else if (!strcmp(which, "HetAtomTautomer")) {
1956 func = RDKit::MolHash::HashFunction::HetAtomTautomer;
1957 } else if (!strcmp(which, "HetAtomProtomer")) {
1958 func = RDKit::MolHash::HashFunction::HetAtomProtomer;
1959 } else if (!strcmp(which, "RedoxPair")) {
1960 func = RDKit::MolHash::HashFunction::RedoxPair;
1961 } else if (!strcmp(which, "Regioisomer")) {
1962 func = RDKit::MolHash::HashFunction::Regioisomer;
1963 } else if (!strcmp(which, "NetCharge")) {
1964 func = RDKit::MolHash::HashFunction::NetCharge;
1965 } else if (!strcmp(which, "SmallWorldIndexBR")) {
1966 func = RDKit::MolHash::HashFunction::SmallWorldIndexBR;
1967 } else if (!strcmp(which, "SmallWorldIndexBRL")) {
1968 func = RDKit::MolHash::HashFunction::SmallWorldIndexBRL;
1969 } else if (!strcmp(which, "ArthorSubstructureOrder")) {
1970 func = RDKit::MolHash::HashFunction::ArthorSubstructureOrder;
1971 } else {
1972 ereport(
1973 WARNING,
1974 (errcode(ERRCODE_WARNING),
1975 errmsg(
1976 "computeNMMolHash: hash %s not recognized, using AnonymousGraph",
1977 which)));
1978 }
1979
1980 string text;
1981 try {
1982 text = RDKit::MolHash::MolHash(&mol, func);
1983 } catch (...) {
1984 ereport(WARNING,
1985 (errcode(ERRCODE_WARNING), errmsg("computeMolHash: failed")));
1986 }
1987 return strdup(text.c_str());
1988 }
1989
findMCSsmiles(char * smiles,char * params)1990 extern "C" char *findMCSsmiles(char *smiles, char *params) {
1991 static string mcs;
1992 mcs.clear();
1993
1994 char *str = smiles;
1995 char *s = str;
1996 int len, nmols = 0;
1997 std::vector<RDKit::ROMOL_SPTR> molecules;
1998 while (*s && *s <= ' ') {
1999 s++;
2000 }
2001 while (*s > ' ') {
2002 len = 0;
2003 while (s[len] > ' ') {
2004 len++;
2005 }
2006 s[len] = '\0';
2007 if (0 == strlen(s)) {
2008 continue;
2009 }
2010 ROMol *molptr = nullptr;
2011 try {
2012 molptr = RDKit::SmilesToMol(s);
2013 } catch (...) {
2014 molptr = nullptr;
2015 }
2016 if (molptr == nullptr) {
2017 ereport(
2018 ERROR,
2019 (errcode(ERRCODE_DATA_EXCEPTION),
2020 errmsg("findMCS: could not create molecule from SMILES '%s'", s)));
2021 return strdup("");
2022 }
2023 molecules.push_back(RDKit::ROMOL_SPTR(molptr));
2024 // elog(WARNING, s);
2025 s += len;
2026 s++; // do s++; while(*s && *s <= ' ');
2027 }
2028
2029 RDKit::MCSParameters p;
2030
2031 if (params && 0 != strlen(params)) {
2032 try {
2033 RDKit::parseMCSParametersJSON(params, &p);
2034 } catch (...) {
2035 ereport(WARNING, (errcode(ERRCODE_WARNING),
2036 errmsg("findMCS: Invalid argument \'params\'")));
2037 return strdup("");
2038 }
2039 }
2040
2041 try {
2042 MCSResult res = RDKit::findMCS(molecules, &p);
2043 mcs = res.SmartsString;
2044 if (!res.isCompleted())
2045 ereport(WARNING, (errcode(ERRCODE_WARNING),
2046 errmsg("findMCS timed out, result is not maximal")));
2047 } catch (...) {
2048 ereport(WARNING, (errcode(ERRCODE_WARNING), errmsg("findMCS: failed")));
2049 mcs.clear();
2050 }
2051 return mcs.empty() ? strdup("") : strdup(mcs.c_str());
2052 }
2053
addMol2list(void * lst,Mol * mol)2054 extern "C" void *addMol2list(void *lst, Mol *mol) {
2055 try {
2056 if (!lst) {
2057 // elog(WARNING, "addMol2list: allocate new list");
2058 lst = new std::vector<RDKit::ROMOL_SPTR>;
2059 }
2060 std::vector<RDKit::ROMOL_SPTR> &mlst =
2061 *(std::vector<RDKit::ROMOL_SPTR> *)lst;
2062 // elog(WARNING, "addMol2list: create a copy of new mol");
2063 auto *m = (ROMol *)constructROMol(
2064 mol); // new ROMol(*(const ROMol*)mol, false); // create a copy
2065 // elog(WARNING, "addMol2list: append new mol into list");
2066 mlst.push_back(RDKit::ROMOL_SPTR(m));
2067 // elog(WARNING, "addMol2list: finished");
2068 } catch (...) {
2069 // elog(WARNING, "addMol2list: ERROR");
2070 ereport(WARNING, (errcode(ERRCODE_WARNING), errmsg("addMol2list: failed")));
2071 }
2072 return lst;
2073 }
2074
findMCS(void * vmols,char * params)2075 extern "C" char *findMCS(void *vmols, char *params) {
2076 static string mcs;
2077 mcs.clear();
2078 std::vector<RDKit::ROMOL_SPTR> *molecules =
2079 (std::vector<RDKit::ROMOL_SPTR> *)vmols;
2080 // char t[256];
2081 // sprintf(t,"findMCS(): lst=%p, size=%u", molecules, molecules->size());
2082 // elog(WARNING, t);
2083
2084 RDKit::MCSParameters p;
2085
2086 if (params && 0 != strlen(params)) {
2087 try {
2088 RDKit::parseMCSParametersJSON(params, &p);
2089 } catch (...) {
2090 // mcs = params; //DEBUG
2091 ereport(WARNING, (errcode(ERRCODE_WARNING),
2092 errmsg("findMCS: Invalid argument \'params\'")));
2093 return strdup(mcs.c_str());
2094 }
2095 }
2096
2097 try {
2098 MCSResult res = RDKit::findMCS(*molecules, &p);
2099 if (!res.isCompleted())
2100 ereport(WARNING, (errcode(ERRCODE_WARNING),
2101 errmsg("findMCS timed out, result is not maximal")));
2102 mcs = res.SmartsString;
2103 } catch (...) {
2104 ereport(WARNING, (errcode(ERRCODE_WARNING), errmsg("findMCS: failed")));
2105 mcs.clear();
2106 }
2107 // sprintf(t,"findMCS(): MCS='%s'", mcs.c_str());
2108 // elog(WARNING, t);
2109 delete molecules;
2110 // elog(WARNING, "findMCS(): molecules deleted. FINISHED.");
2111 return strdup(mcs.c_str());
2112 }
2113