1 /**********************************************************************
2 sort.cpp - A OBOp for sorting molecules during conversion.
3 
4 Copyright (C) 2009 by Chris Morley
5 
6 This file is part of the Open Babel project.
7 For more information, see <http://openbabel.org/>
8 
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation version 2 of the License.
12 
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 ***********************************************************************/
18 #include <openbabel/babelconfig.h>
19 #include <openbabel/op.h>
20 #include <openbabel/mol.h>
21 #include <openbabel/obconversion.h>
22 #include <openbabel/descriptor.h>
23 #include <openbabel/obutil.h>
24 #include "deferred.h"
25 #include <set>
26 #include <algorithm>
27 
28 namespace OpenBabel
29 {
30 
31 template<class T>
32 struct Order : public std::binary_function<std::pair<OBBase*,T>, std::pair<OBBase*,T>, bool>
33 {
OrderOpenBabel::Order34   Order(OBDescriptor* pDesc, bool rev) : _pDesc(pDesc), _rev(rev){}
operator ()OpenBabel::Order35   bool operator()(std::pair<OBBase*,T> p1, std::pair<OBBase*,T> p2) const
36   {
37     return _rev ?
38       _pDesc->Order(p2.second, p1.second) :
39       _pDesc->Order(p1.second, p2.second);
40   }
41   OBDescriptor* _pDesc;
42   bool _rev;
43 };
44 //*****************************************************************
45 class OpSort : public OBOp
46 {
47 public:
OpSort(const char * ID)48   OpSort(const char* ID) : OBOp(ID, false)
49   {
50     OBConversion::RegisterOptionParam(ID, nullptr, 1, OBConversion::GENOPTIONS);
51   }
52 
Description()53   const char* Description(){ return "<desc> Sort by descriptor(~desc for reverse)"
54     "\n Follow descriptor with + to also add it to the title, e.g. MW+ "
55     "\n Custom ordering is possible; see inchi descriptor"; }
56 
WorksWith(OBBase * pOb) const57   virtual bool WorksWith(OBBase* pOb) const { return dynamic_cast<OBMol*>(pOb) != nullptr; }
58   virtual bool Do(OBBase* pOb, const char* OptionText, OpMap* pmap, OBConversion* pConv);
59   virtual bool ProcessVec(std::vector<OBBase*>& vec);
60 private:
61   OBDescriptor* _pDesc;
62   std::string _pDescOption;
63   bool _rev;
64   bool _addDescToTitle;
65 };
66 
67 /////////////////////////////////////////////////////////////////
68 OpSort theOpSort("sort"); //Global instance
69 
70 /////////////////////////////////////////////////////////////////
Do(OBBase * pOb,const char * OptionText,OpMap * pmap,OBConversion * pConv)71 bool OpSort::Do(OBBase* pOb, const char* OptionText, OpMap* pmap, OBConversion* pConv)
72 {
73   if(pConv && pConv->IsFirstInput())
74   {
75     _rev=false;
76     if(*OptionText=='~')
77     {
78       _rev=true;
79       ++OptionText;
80     }
81 
82     const char* pLast = OptionText + strlen(OptionText)-1;
83     _addDescToTitle = *(OptionText + strlen(OptionText)-1)=='+';//last char
84     if(_addDescToTitle)
85       *const_cast<char*>(pLast)='\0';
86 
87     std::istringstream optionStream(OptionText);
88     std::pair<std::string,std::string> spair = OBDescriptor::GetIdentifier(optionStream);
89     _pDesc = OBDescriptor::FindType(spair.first.c_str());
90     if(!_pDesc)
91     {
92      obErrorLog.ThrowError(__FUNCTION__,
93               std::string("Unknown descriptor ") + OptionText, obError, onceOnly);
94      return false;
95     }
96     _pDescOption = spair.second;
97     _pDesc->Init();//needed  to clear cache of InChIFilter
98 
99 
100     //Make a deferred format and divert the output to it
101     new DeferredFormat(pConv, this); //it will delete itself
102   }
103   return true;
104 }
105 
106 //****************************************************************
ProcessVec(std::vector<OBBase * > & vec)107 bool OpSort::ProcessVec(std::vector<OBBase*>& vec)
108 {
109   // Make a vector containing both the OBBase* and the descriptor value and the sort it
110   if(!IsNan(_pDesc->Predict(vec[0], &_pDescOption)))
111   {
112     //a numerical descriptor
113     //Copy into a pair vector
114     std::vector<std::pair<OBBase*,double> > valvec;
115     valvec.reserve(vec.size());
116     std::vector<OBBase*>::iterator iter;
117     for(iter=vec.begin();iter!=vec.end();++iter)
118       valvec.push_back(std::make_pair<OBBase*,double>(&(**iter), _pDesc->Predict(*iter, &_pDescOption)));
119 
120     //Sort
121     std::sort(valvec.begin(),valvec.end(), Order<double>(_pDesc, _rev));
122 
123     //Copy back
124     std::vector<std::pair<OBBase*,double> >::iterator valiter;
125     iter=vec.begin();
126     for(valiter=valvec.begin();valiter!=valvec.end();++valiter, ++iter)
127     {
128       *iter = valiter->first;
129       if(_addDescToTitle)
130       {
131         std::stringstream ss;
132         ss << (*iter)->GetTitle() << ' ' << valiter->second;
133         (*iter)->SetTitle(ss.str().c_str());
134       }
135     }
136   }
137   else
138   {
139     //a string descriptor
140     //Copy into a pair vector
141     std::vector<std::pair<OBBase*,std::string> > valvec;
142     valvec.reserve(vec.size());
143     std::vector<OBBase*>::iterator iter;
144     std::string s;
145     for(iter=vec.begin();iter!=vec.end();++iter)
146     {
147       _pDesc->GetStringValue(*iter, s, &_pDescOption);
148       valvec.push_back(std::pair<OBBase*,std::string>(&(**iter), s));
149     }
150 
151     //Sort
152     std::sort(valvec.begin(),valvec.end(), Order<std::string>(_pDesc, _rev));
153 
154     //Copy back
155     std::vector<std::pair<OBBase*,std::string> >::iterator valiter;
156     iter=vec.begin();
157     for(valiter=valvec.begin();valiter!=valvec.end();++valiter, ++iter)
158     {
159       *iter = valiter->first;
160       if(_addDescToTitle)
161       {
162         std::stringstream ss;
163         ss << (*iter)->GetTitle() << ' ' << valiter->second;
164         (*iter)->SetTitle(ss.str().c_str());
165       }
166     }
167   }
168 
169   return true;
170 }
171 /*
172 This started as a nice compact piece of code! The need to handle descriptors
173 which return either numbers or strings was originally achieved without testing
174 the type here by using LessThan() in the descriptor. But this meant that the
175 descriptor value was recalculated every time it was needed, which is inappropriate
176 for sorting. A local cache of InChI values was implemented but a more general
177 solution was needed. The values are now calculated once and stored here in a
178 vector, which stores numbers or strings and the code is extensively duplicated
179 because of this. But using templates was not much shorter because four templated
180 functions were needed, and the code more difficult to understand.
181 */
182 
183 }//namespace
184