1 // Copyright (C) 2018 Lorton
2 //
3 // @@ All Rights Reserved @@
4 // This file is part of the RDKit.
5 // The contents are covered by the terms of the BSD license
6 // which is included in the file license.txt, found at the root
7 // of the RDKit source tree.
8 //
9
10 #define NO_IMPORT_ARRAY
11
12 #include <RDBoost/python.h>
13
14 #include <string>
15 #include <fstream>
16
17 // ours
18 #include <RDGeneral/BadFileException.h>
19 #include <RDGeneral/FileParseException.h>
20 #include <GraphMol/FileParsers/MolSupplier.h>
21 #include <GraphMol/RDKitBase.h>
22 #include <RDBoost/python_streambuf.h>
23
24 #include <maeparser/MaeConstants.hpp>
25 #include <maeparser/Reader.hpp>
26
27 #include "MolSupplier.h"
28 #include "ContextManagers.h"
29
30 namespace python = boost::python;
31
32 using namespace schrodinger;
33 using boost_adaptbx::python::streambuf;
34 namespace {
35
streamIsGoodOrExhausted(std::istream * stream)36 bool streamIsGoodOrExhausted(std::istream *stream) {
37 PRECONDITION(stream, "bad stream");
38 return stream->good() || (stream->eof() && stream->fail() && !stream->bad());
39 }
40
41 class LocalMaeMolSupplier : public RDKit::MaeMolSupplier {
42 public:
LocalMaeMolSupplier(python::object & input,bool sanitize,bool removeHs)43 LocalMaeMolSupplier(python::object &input, bool sanitize, bool removeHs) {
44 // FIX: minor leak here
45 auto *sb = new streambuf(input);
46 dp_inStream = new streambuf::istream(*sb);
47 dp_sInStream.reset(dp_inStream);
48 df_owner = true;
49 df_sanitize = sanitize;
50 df_removeHs = removeHs;
51 d_reader.reset(new mae::Reader(dp_sInStream));
52 CHECK_INVARIANT(streamIsGoodOrExhausted(dp_inStream), "bad instream");
53
54 try {
55 d_next_struct = d_reader->next(mae::CT_BLOCK);
56 } catch (const mae::read_exception &e) {
57 throw RDKit::FileParseException(e.what());
58 }
59 }
LocalMaeMolSupplier(streambuf & input,bool sanitize,bool removeHs)60 LocalMaeMolSupplier(streambuf &input, bool sanitize, bool removeHs) {
61 dp_inStream = new streambuf::istream(input);
62 dp_sInStream.reset(dp_inStream);
63 df_owner = true;
64 df_sanitize = sanitize;
65 df_removeHs = removeHs;
66 d_reader.reset(new mae::Reader(dp_sInStream));
67 CHECK_INVARIANT(streamIsGoodOrExhausted(dp_inStream), "bad instream");
68
69 try {
70 d_next_struct = d_reader->next(mae::CT_BLOCK);
71 } catch (const mae::read_exception &e) {
72 throw RDKit::FileParseException(e.what());
73 }
74 }
75
LocalMaeMolSupplier(const std::string & fname,bool sanitize=true,bool removeHs=true)76 LocalMaeMolSupplier(const std::string &fname, bool sanitize = true,
77 bool removeHs = true)
78 : RDKit::MaeMolSupplier(fname, sanitize, removeHs) {}
79 }; // namespace
80
FwdMolSupplIter(LocalMaeMolSupplier * self)81 LocalMaeMolSupplier *FwdMolSupplIter(LocalMaeMolSupplier *self) { return self; }
82 } // namespace
83
84 namespace RDKit {
85
86 std::string maeMolSupplierClassDoc =
87 "A class which supplies molecules from file-like object containing Maestro data.\n\
88 \n\
89 Usage examples:\n\
90 \n\
91 1) Lazy evaluation: the molecules are not constructed until we ask for them:\n\n\
92 >>> suppl = MaeMolSupplier(file('in.mae'))\n\
93 >>> for mol in suppl:\n\
94 ... if mol is not None: mol.GetNumAtoms()\n\
95 \n\
96 2) we can also read from compressed files: \n\n\
97 >>> import gzip\n\
98 >>> suppl = MaeMolSupplier(gzip.open('in.maegz'))\n\
99 >>> for mol in suppl:\n\
100 ... if mol is not None: print mol.GetNumAtoms()\n\
101 \n\
102 Properties in the Maestro file are used to set properties on each molecule.\n\
103 The properties are accessible using the mol.GetProp(propName) method.\n\
104 \n";
105 struct maemolsup_wrap {
wrapRDKit::maemolsup_wrap106 static void wrap() {
107 python::class_<LocalMaeMolSupplier, boost::noncopyable>(
108 "MaeMolSupplier", maeMolSupplierClassDoc.c_str(), python::no_init)
109 .def(python::init<python::object &, bool, bool>(
110 (python::arg("fileobj"), python::arg("sanitize") = true,
111 python::arg("removeHs") =
112 true))[python::with_custodian_and_ward_postcall<0, 2>()])
113 .def(python::init<streambuf &, bool, bool>(
114 (python::arg("streambuf"), python::arg("sanitize") = true,
115 python::arg("removeHs") =
116 true))[python::with_custodian_and_ward_postcall<0, 2>()])
117 .def(python::init<std::string, bool, bool>(
118 (python::arg("filename"), python::arg("sanitize") = true,
119 python::arg("removeHs") = true)))
120 .def("__enter__", &MolIOEnter<LocalMaeMolSupplier>,
121 python::return_internal_reference<>())
122 .def("__exit__", &MolIOExit<LocalMaeMolSupplier>)
123 .def("__next__", &MolSupplNext<LocalMaeMolSupplier>,
124 "Returns the next molecule in the file. Raises _StopIteration_ "
125 "on EOF.\n",
126 python::return_value_policy<python::manage_new_object>())
127 .def("atEnd", &MaeMolSupplier::atEnd,
128 "Returns whether or not we have hit EOF.\n")
129 .def("__iter__", &FwdMolSupplIter,
130 python::return_internal_reference<1>());
131 };
132 };
133 } // namespace RDKit
134
wrap_maesupplier()135 void wrap_maesupplier() { RDKit::maemolsup_wrap::wrap(); }
136