1 /**********************************************************************
2 sort.cpp - A OBOp for sorting molecules during conversion.
3
4 Copyright (C) 2009 by Chris Morley
5
6 This file is part of the Open Babel project.
7 For more information, see <http://openbabel.org/>
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation version 2 of the License.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17 ***********************************************************************/
18 #include <openbabel/babelconfig.h>
19 #include <openbabel/op.h>
20 #include <openbabel/mol.h>
21 #include <openbabel/obconversion.h>
22 #include <openbabel/descriptor.h>
23 #include <openbabel/obutil.h>
24 #include "deferred.h"
25 #include <set>
26 #include <algorithm>
27
28 namespace OpenBabel
29 {
30
31 template<class T>
32 struct Order : public std::binary_function<std::pair<OBBase*,T>, std::pair<OBBase*,T>, bool>
33 {
OrderOpenBabel::Order34 Order(OBDescriptor* pDesc, bool rev) : _pDesc(pDesc), _rev(rev){}
operator ()OpenBabel::Order35 bool operator()(std::pair<OBBase*,T> p1, std::pair<OBBase*,T> p2) const
36 {
37 return _rev ?
38 _pDesc->Order(p2.second, p1.second) :
39 _pDesc->Order(p1.second, p2.second);
40 }
41 OBDescriptor* _pDesc;
42 bool _rev;
43 };
44 //*****************************************************************
45 class OpSort : public OBOp
46 {
47 public:
OpSort(const char * ID)48 OpSort(const char* ID) : OBOp(ID, false)
49 {
50 OBConversion::RegisterOptionParam(ID, nullptr, 1, OBConversion::GENOPTIONS);
51 }
52
Description()53 const char* Description(){ return "<desc> Sort by descriptor(~desc for reverse)"
54 "\n Follow descriptor with + to also add it to the title, e.g. MW+ "
55 "\n Custom ordering is possible; see inchi descriptor"; }
56
WorksWith(OBBase * pOb) const57 virtual bool WorksWith(OBBase* pOb) const { return dynamic_cast<OBMol*>(pOb) != nullptr; }
58 virtual bool Do(OBBase* pOb, const char* OptionText, OpMap* pmap, OBConversion* pConv);
59 virtual bool ProcessVec(std::vector<OBBase*>& vec);
60 private:
61 OBDescriptor* _pDesc;
62 std::string _pDescOption;
63 bool _rev;
64 bool _addDescToTitle;
65 };
66
67 /////////////////////////////////////////////////////////////////
68 OpSort theOpSort("sort"); //Global instance
69
70 /////////////////////////////////////////////////////////////////
Do(OBBase * pOb,const char * OptionText,OpMap * pmap,OBConversion * pConv)71 bool OpSort::Do(OBBase* pOb, const char* OptionText, OpMap* pmap, OBConversion* pConv)
72 {
73 if(pConv && pConv->IsFirstInput())
74 {
75 _rev=false;
76 if(*OptionText=='~')
77 {
78 _rev=true;
79 ++OptionText;
80 }
81
82 const char* pLast = OptionText + strlen(OptionText)-1;
83 _addDescToTitle = *(OptionText + strlen(OptionText)-1)=='+';//last char
84 if(_addDescToTitle)
85 *const_cast<char*>(pLast)='\0';
86
87 std::istringstream optionStream(OptionText);
88 std::pair<std::string,std::string> spair = OBDescriptor::GetIdentifier(optionStream);
89 _pDesc = OBDescriptor::FindType(spair.first.c_str());
90 if(!_pDesc)
91 {
92 obErrorLog.ThrowError(__FUNCTION__,
93 std::string("Unknown descriptor ") + OptionText, obError, onceOnly);
94 return false;
95 }
96 _pDescOption = spair.second;
97 _pDesc->Init();//needed to clear cache of InChIFilter
98
99
100 //Make a deferred format and divert the output to it
101 new DeferredFormat(pConv, this); //it will delete itself
102 }
103 return true;
104 }
105
106 //****************************************************************
ProcessVec(std::vector<OBBase * > & vec)107 bool OpSort::ProcessVec(std::vector<OBBase*>& vec)
108 {
109 // Make a vector containing both the OBBase* and the descriptor value and the sort it
110 if(!IsNan(_pDesc->Predict(vec[0], &_pDescOption)))
111 {
112 //a numerical descriptor
113 //Copy into a pair vector
114 std::vector<std::pair<OBBase*,double> > valvec;
115 valvec.reserve(vec.size());
116 std::vector<OBBase*>::iterator iter;
117 for(iter=vec.begin();iter!=vec.end();++iter)
118 valvec.push_back(std::make_pair<OBBase*,double>(&(**iter), _pDesc->Predict(*iter, &_pDescOption)));
119
120 //Sort
121 std::sort(valvec.begin(),valvec.end(), Order<double>(_pDesc, _rev));
122
123 //Copy back
124 std::vector<std::pair<OBBase*,double> >::iterator valiter;
125 iter=vec.begin();
126 for(valiter=valvec.begin();valiter!=valvec.end();++valiter, ++iter)
127 {
128 *iter = valiter->first;
129 if(_addDescToTitle)
130 {
131 std::stringstream ss;
132 ss << (*iter)->GetTitle() << ' ' << valiter->second;
133 (*iter)->SetTitle(ss.str().c_str());
134 }
135 }
136 }
137 else
138 {
139 //a string descriptor
140 //Copy into a pair vector
141 std::vector<std::pair<OBBase*,std::string> > valvec;
142 valvec.reserve(vec.size());
143 std::vector<OBBase*>::iterator iter;
144 std::string s;
145 for(iter=vec.begin();iter!=vec.end();++iter)
146 {
147 _pDesc->GetStringValue(*iter, s, &_pDescOption);
148 valvec.push_back(std::pair<OBBase*,std::string>(&(**iter), s));
149 }
150
151 //Sort
152 std::sort(valvec.begin(),valvec.end(), Order<std::string>(_pDesc, _rev));
153
154 //Copy back
155 std::vector<std::pair<OBBase*,std::string> >::iterator valiter;
156 iter=vec.begin();
157 for(valiter=valvec.begin();valiter!=valvec.end();++valiter, ++iter)
158 {
159 *iter = valiter->first;
160 if(_addDescToTitle)
161 {
162 std::stringstream ss;
163 ss << (*iter)->GetTitle() << ' ' << valiter->second;
164 (*iter)->SetTitle(ss.str().c_str());
165 }
166 }
167 }
168
169 return true;
170 }
171 /*
172 This started as a nice compact piece of code! The need to handle descriptors
173 which return either numbers or strings was originally achieved without testing
174 the type here by using LessThan() in the descriptor. But this meant that the
175 descriptor value was recalculated every time it was needed, which is inappropriate
176 for sorting. A local cache of InChI values was implemented but a more general
177 solution was needed. The values are now calculated once and stored here in a
178 vector, which stores numbers or strings and the code is extensively duplicated
179 because of this. But using templates was not much shorter because four templated
180 functions were needed, and the code more difficult to understand.
181 */
182
183 }//namespace
184