1 //
2 //  Copyright (C) 2004-2008 Greg Landrum and Rational Discovery LLC
3 //
4 //   @@ All Rights Reserved @@
5 //  This file is part of the RDKit.
6 //  The contents are covered by the terms of the BSD license
7 //  which is included in the file license.txt, found at the root
8 //  of the RDKit source tree.
9 //
10 #include <RDGeneral/export.h>
11 #ifndef __RD_VECTOR_H__
12 #define __RD_VECTOR_H__
13 
14 #include <RDGeneral/Invariant.h>
15 #include <RDGeneral/utils.h>
16 #include <cmath>
17 #include <iostream>
18 #include <iomanip>
19 #include <cstdlib>
20 #include <cstring>
21 #include <ctime>
22 #include <boost/random.hpp>
23 #include <boost/smart_ptr.hpp>
24 
25 namespace RDNumeric {
26 
27 //! A class to represent vectors of numbers.
28 template <class TYPE>
29 class Vector {
30  public:
31   typedef boost::shared_array<TYPE> DATA_SPTR;
32 
33   //! Initialize with only a size.
Vector(unsigned int N)34   explicit Vector(unsigned int N) {
35     d_size = N;
36     TYPE *data = new TYPE[N];
37     memset(static_cast<void *>(data), 0, d_size * sizeof(TYPE));
38     d_data.reset(data);
39   }
40 
41   //! Initialize with a size and default value.
Vector(unsigned int N,TYPE val)42   Vector(unsigned int N, TYPE val) {  //: Vector(N) {
43     d_size = N;
44     TYPE *data = new TYPE[N];
45 
46     unsigned int i;
47     for (i = 0; i < N; i++) {
48       data[i] = val;
49     }
50     d_data.reset(data);
51   }
52 
53   //! Initialize from a smart pointer.
54   /*!
55     <b>NOTE:</b> the data is not copied in this case
56   */
Vector(unsigned int N,DATA_SPTR data)57   Vector(unsigned int N, DATA_SPTR data) {  // TYPE *data) {
58     d_size = N;
59     d_data = data;
60   }
61 
62   //! copy constructor
63   /*! We make a copy of the other vector's data.
64    */
Vector(const Vector & other)65   Vector(const Vector &other) {
66     d_size = other.size();
67     const TYPE *otherData = other.getData();
68     TYPE *data = new TYPE[d_size];
69 
70     memcpy(static_cast<void *>(data), static_cast<const void *>(otherData),
71            d_size * sizeof(TYPE));
72     d_data.reset(data);
73   }
74 
~Vector()75   ~Vector() {}
76 
77   //! return the size (dimension) of the vector
size()78   unsigned int size() const { return d_size; }
79 
80   //! returns the value at a particular index
getVal(unsigned int i)81   inline TYPE getVal(unsigned int i) const {
82     PRECONDITION(i < d_size, "bad index");
83     return d_data[i];
84   }
85 
86   //! sets the index at a particular value
setVal(unsigned int i,TYPE val)87   inline void setVal(unsigned int i, TYPE val) {
88     PRECONDITION(i < d_size, "bad index");
89     d_data[i] = val;
90   }
91 
92   inline TYPE operator[](unsigned int i) const {
93     PRECONDITION(i < d_size, "bad index");
94     return d_data[i];
95   }
96 
97   inline TYPE &operator[](unsigned int i) {
98     PRECONDITION(i < d_size, "bad index");
99     return d_data[i];
100   }
101 
102   //! returns a pointer to our data array
getData()103   inline TYPE *getData() { return d_data.get(); }
104 
105   //! returns a const pointer to our data array
getData()106   inline const TYPE *getData() const {
107     // return dp_data;
108     return d_data.get();
109   }
110 
111   //! Copy operator.
112   /*! We make a copy of the other Vector's data.
113    */
114 
assign(const Vector<TYPE> & other)115   Vector<TYPE> &assign(const Vector<TYPE> &other) {
116     PRECONDITION(d_size == other.size(), "Size mismatch in vector copying");
117     const TYPE *otherData = other.getData();
118     memcpy(static_cast<void *>(d_data.get()),
119            static_cast<const void *>(otherData), d_size * sizeof(TYPE));
120     return *this;
121   }
122 
123   //! elementwise addition, vectors must be the same size.
124   Vector<TYPE> &operator+=(const Vector<TYPE> &other) {
125     PRECONDITION(d_size == other.size(), "Size mismatch in vector addition");
126     const TYPE *otherData = other.getData();
127     TYPE *data = d_data.get();
128     unsigned int i;
129     for (i = 0; i < d_size; i++) {
130       data[i] += otherData[i];
131     }
132     return *this;
133   }
134 
135   //! elementwise subtraction, vectors must be the same size.
136   Vector<TYPE> &operator-=(const Vector<TYPE> &other) {
137     PRECONDITION(d_size == other.size(), "Size mismatch in vector subtraction");
138     const TYPE *otherData = other.getData();
139     TYPE *data = d_data.get();
140     unsigned int i;
141     for (i = 0; i < d_size; i++) {
142       data[i] -= otherData[i];
143     }
144     return *this;
145   }
146 
147   //! multiplication by a scalar
148   Vector<TYPE> &operator*=(TYPE scale) {
149     unsigned int i;
150     for (i = 0; i < d_size; i++) {
151       d_data[i] *= scale;
152     }
153     return *this;
154   }
155 
156   //! division by a scalar
157   Vector<TYPE> &operator/=(TYPE scale) {
158     unsigned int i;
159     for (i = 0; i < d_size; i++) {
160       d_data[i] /= scale;
161     }
162     return *this;
163   }
164 
165   //! L2 norm squared
normL2Sq()166   inline TYPE normL2Sq() const {
167     TYPE res = (TYPE)0.0;
168     unsigned int i;
169     TYPE *data = d_data.get();
170     for (i = 0; i < d_size; i++) {
171       res += data[i] * data[i];
172     }
173     return res;
174   }
175 
176   //! L2 norm
normL2()177   inline TYPE normL2() const { return sqrt(this->normL2Sq()); }
178 
179   //! L1 norm
normL1()180   inline TYPE normL1() const {
181     TYPE res = (TYPE)0.0;
182     unsigned int i;
183     TYPE *data = d_data.get();
184     for (i = 0; i < d_size; i++) {
185       res += fabs(data[i]);
186     }
187     return res;
188   }
189 
190   //! L-infinity norm
normLinfinity()191   inline TYPE normLinfinity() const {
192     TYPE res = (TYPE)(-1.0);
193     unsigned int i;
194     TYPE *data = d_data.get();
195     for (i = 0; i < d_size; i++) {
196       if (fabs(data[i]) > res) {
197         res = fabs(data[i]);
198       }
199     }
200     return res;
201   }
202 
203   //! \brief Gets the ID of the entry that has the largest absolute value
204   //! i.e. the entry being used for the L-infinity norm
largestAbsValId()205   inline unsigned int largestAbsValId() const {
206     TYPE res = (TYPE)(-1.0);
207     unsigned int i, id = d_size;
208     TYPE *data = d_data.get();
209     for (i = 0; i < d_size; i++) {
210       if (fabs(data[i]) > res) {
211         res = fabs(data[i]);
212         id = i;
213       }
214     }
215     return id;
216   }
217 
218   //! \brief Gets the ID of the entry that has the largest value
largestValId()219   inline unsigned int largestValId() const {
220     TYPE res = (TYPE)(-1.e8);
221     unsigned int i, id = d_size;
222     TYPE *data = d_data.get();
223     for (i = 0; i < d_size; i++) {
224       if (data[i] > res) {
225         res = data[i];
226         id = i;
227       }
228     }
229     return id;
230   }
231 
232   //! \brief Gets the ID of the entry that has the smallest value
smallestValId()233   inline unsigned int smallestValId() const {
234     TYPE res = (TYPE)(1.e8);
235     unsigned int i, id = d_size;
236     TYPE *data = d_data.get();
237     for (i = 0; i < d_size; i++) {
238       if (data[i] < res) {
239         res = data[i];
240         id = i;
241       }
242     }
243     return id;
244   }
245 
246   //! returns the dot product between two Vectors
dotProduct(const Vector<TYPE> other)247   inline TYPE dotProduct(const Vector<TYPE> other) const {
248     PRECONDITION(d_size == other.size(),
249                  "Size mismatch in vector doct product");
250     const TYPE *oData = other.getData();
251     unsigned int i;
252     TYPE res = (TYPE)(0.0);
253     TYPE *data = d_data.get();
254     for (i = 0; i < d_size; i++) {
255       res += (data[i] * oData[i]);
256     }
257     return res;
258   }
259 
260   //! Normalize the vector using the L2 norm
normalize()261   inline void normalize() {
262     TYPE val = this->normL2();
263     (*this) /= val;
264   }
265 
266   //! Set to a random unit vector
267   inline void setToRandom(unsigned int seed = 0) {
268     // we want to get our own RNG here instead of using the global
269     // one.  This is related to Issue285.
270     RDKit::rng_type generator(42u);
271     RDKit::uniform_double dist(0, 1.0);
272     RDKit::double_source_type randSource(generator, dist);
273     if (seed > 0) {
274       generator.seed(seed);
275     } else {
276       // we can't initialize using only clock(), because it's possible
277       // that we'll get here fast enough that clock() will return 0
278       // and generator.seed(0) is an error:
279       generator.seed(clock() + 1);
280     }
281 
282     unsigned int i;
283     TYPE *data = d_data.get();
284     for (i = 0; i < d_size; i++) {
285       data[i] = randSource();
286     }
287     this->normalize();
288   }
289 
290  private:
291   unsigned int d_size;  //! < our length
292   DATA_SPTR d_data;
293   Vector<TYPE> &operator=(const Vector<TYPE> &other);
294 };
295 
296 typedef Vector<double> DoubleVector;
297 
298 //! returns the algebraic tanimoto similarity [defn' from JCIM 46:587-96 (2006)]
299 template <typename T>
TanimotoSimilarity(const Vector<T> & v1,const Vector<T> & v2)300 double TanimotoSimilarity(const Vector<T> &v1, const Vector<T> &v2) {
301   double numer = v1.dotProduct(v2);
302   if (numer == 0.0) return 0.0;
303   double denom = v1.normL2Sq() + v2.normL2Sq() - numer;
304   if (denom == 0.0) return 0.0;
305   return numer / denom;
306 }
307 }  // end of namespace RDNumeric
308 
309 //! ostream operator for Vectors
310 template <typename TYPE>
311 std::ostream &operator<<(std::ostream &target,
312                          const RDNumeric::Vector<TYPE> &vec) {
313   unsigned int siz = vec.size();
314   target << "Size: " << siz << " [";
315   unsigned int i;
316   for (i = 0; i < siz; i++) {
317     target << std::setw(7) << std::setprecision(3) << vec.getVal(i) << ", ";
318   }
319   target << "]\n";
320   return target;
321 }
322 
323 #endif
324