1 // Copyright (C) 2017  Davis E. King (davis@dlib.net)
2 // License: Boost Software License   See LICENSE.txt for the full license.
3 #ifndef DLIB_UPPER_bOUND_FUNCTION_Hh_
4 #define DLIB_UPPER_bOUND_FUNCTION_Hh_
5 
6 #include "upper_bound_function_abstract.h"
7 #include "../svm/svm_c_linear_dcd_trainer.h"
8 #include "../statistics.h"
9 #include <limits>
10 #include <utility>
11 
12 namespace dlib
13 {
14 
15 // ----------------------------------------------------------------------------------------
16 
17     struct function_evaluation
18     {
19         function_evaluation() = default;
function_evaluationfunction_evaluation20         function_evaluation(const matrix<double,0,1>& x, double y) :x(x), y(y) {}
21 
22         matrix<double,0,1> x;
23         double y = std::numeric_limits<double>::quiet_NaN();
24     };
25 
26 // ----------------------------------------------------------------------------------------
27 
28     class upper_bound_function
29     {
30 
31     public:
32 
33         upper_bound_function(
34         ) = default;
35 
upper_bound_function(const double relative_noise_magnitude,const double solver_eps)36         upper_bound_function(
37             const double relative_noise_magnitude,
38             const double solver_eps
39         ) : relative_noise_magnitude(relative_noise_magnitude), solver_eps(solver_eps)
40         {
41             DLIB_CASSERT(relative_noise_magnitude >= 0);
42             DLIB_CASSERT(solver_eps > 0);
43         }
44 
45         explicit upper_bound_function(
46             const std::vector<function_evaluation>& _points,
47             const double relative_noise_magnitude = 0.001,
48             const double solver_eps = 0.0001
relative_noise_magnitude(relative_noise_magnitude)49         ) : relative_noise_magnitude(relative_noise_magnitude), solver_eps(solver_eps), points(_points)
50         {
51             DLIB_CASSERT(relative_noise_magnitude >= 0);
52             DLIB_CASSERT(solver_eps > 0);
53 
54             if (points.size() > 1)
55             {
56                 DLIB_CASSERT(points[0].x.size() > 0, "The vectors can't be empty.");
57 
58                 const long dims = points[0].x.size();
59                 for (auto& p : points)
60                     DLIB_CASSERT(p.x.size() == dims, "All the vectors given to upper_bound_function must have the same dimensionality.");
61 
62                 learn_params();
63             }
64 
65         }
66 
add(const function_evaluation & point)67         void add (
68             const function_evaluation& point
69         )
70         {
71             DLIB_CASSERT(point.x.size() != 0, "The vectors can't be empty.");
72             if (points.size() == 0)
73             {
74                 points.push_back(point);
75                 return;
76             }
77 
78             DLIB_CASSERT(point.x.size() == dimensionality(), "All the vectors given to upper_bound_function must have the same dimensionality.");
79 
80             if (points.size() < 4)
81             {
82                 points.push_back(point);
83                 *this = upper_bound_function(points, relative_noise_magnitude, solver_eps);
84                 return;
85             }
86 
87             points.push_back(point);
88             // add constraints between the new point and the old points
89             for (size_t i = 0; i < points.size()-1; ++i)
90                 active_constraints.push_back(std::make_pair(i,points.size()-1));
91 
92             learn_params();
93         }
94 
num_points()95         long num_points(
96         ) const
97         {
98             return points.size();
99         }
100 
dimensionality()101         long dimensionality(
102         ) const
103         {
104             if (points.size() == 0)
105                 return 0;
106             else
107                 return points[0].x.size();
108         }
109 
get_points()110         const std::vector<function_evaluation>& get_points(
111         ) const
112         {
113             return points;
114         }
115 
operator()116         double operator() (
117             const matrix<double,0,1>& x
118         ) const
119         {
120             DLIB_CASSERT(num_points() > 0);
121             DLIB_CASSERT(x.size() == dimensionality());
122 
123 
124 
125             double upper_bound = std::numeric_limits<double>::infinity();
126 
127             for (size_t i = 0; i < points.size(); ++i)
128             {
129                 const double local_bound = points[i].y + std::sqrt(offsets[i] + dot(slopes, squared(x-points[i].x)));
130                 upper_bound = std::min(upper_bound, local_bound);
131             }
132 
133             return upper_bound;
134         }
135 
136     private:
137 
learn_params()138         void learn_params (
139         )
140         {
141             const long dims = points[0].x.size();
142 
143             using sample_type = std::vector<std::pair<size_t,double>>;
144             using kernel_type = sparse_linear_kernel<sample_type>;
145             std::vector<sample_type> x;
146             std::vector<double> y;
147 
148             // We are going to normalize the data so the values aren't extreme.  First, we
149             // collect statistics on our data.
150             std::vector<running_stats<double>> x_rs(dims);
151             running_stats<double> y_rs;
152             for (auto& v : points)
153             {
154                 for (long i = 0; i < v.x.size(); ++i)
155                     x_rs[i].add(v.x(i));
156                 y_rs.add(v.y);
157             }
158 
159 
160             // compute normalization vectors for the data.  The only reason we do this is
161             // to make the optimization well conditioned.  In particular, scaling the y
162             // values will prevent numerical errors in the 1-diff*diff computation below that
163             // would otherwise result when diff is really big.  Also, scaling the xvalues
164             // to be about 1 will similarly make the optimization more stable and it also
165             // has the added benefit of keeping the relative_noise_magnitude's scale
166             // constant regardless of the size of x values.
167             const double yscale = 1.0/y_rs.stddev();
168             std::vector<double> xscale(dims);
169             for (size_t i = 0; i < xscale.size(); ++i)
170                 xscale[i] = 1.0/(x_rs[i].stddev()*yscale); // make it so that xscale[i]*yscale ==  1/x_rs[i].stddev()
171 
172             sample_type samp;
173             auto add_constraint = [&](long i, long j) {
174                 samp.clear();
175                 for (long k = 0; k < dims; ++k)
176                 {
177                     double temp = (points[i].x(k) - points[j].x(k))*xscale[k]*yscale;
178                     samp.push_back(std::make_pair(k, temp*temp));
179                 }
180 
181                 if (points[i].y > points[j].y)
182                     samp.push_back(std::make_pair(dims + j, relative_noise_magnitude));
183                 else
184                     samp.push_back(std::make_pair(dims + i, relative_noise_magnitude));
185 
186                 const double diff = (points[i].y - points[j].y)*yscale;
187                 samp.push_back(std::make_pair(dims + points.size(), 1-diff*diff));
188 
189                 x.push_back(samp);
190                 y.push_back(1);
191             };
192 
193             if (active_constraints.size() == 0)
194             {
195                 x.reserve(points.size()*(points.size()-1)/2);
196                 y.reserve(points.size()*(points.size()-1)/2);
197                 for (size_t i = 0; i < points.size(); ++i)
198                 {
199                     for (size_t j = i+1; j < points.size(); ++j)
200                     {
201                         add_constraint(i,j);
202                     }
203                 }
204             }
205             else
206             {
207                 for (auto& p : active_constraints)
208                     add_constraint(p.first, p.second);
209             }
210 
211 
212 
213 
214             svm_c_linear_dcd_trainer<kernel_type> trainer;
215             trainer.set_c(std::numeric_limits<double>::infinity());
216             //trainer.be_verbose();
217             trainer.force_last_weight_to_1(true);
218             trainer.set_epsilon(solver_eps);
219 
220             svm_c_linear_dcd_trainer<kernel_type>::optimizer_state state;
221             auto df = trainer.train(x,y, state);
222 
223             // save the active constraints for later so we can use them inside add() to add
224             // new points efficiently.
225             if (active_constraints.size() == 0)
226             {
227                 long k = 0;
228                 for (size_t i = 0; i < points.size(); ++i)
229                 {
230                     for (size_t j = i+1; j < points.size(); ++j)
231                     {
232                         if (state.get_alpha()[k++] != 0)
233                             active_constraints.push_back(std::make_pair(i,j));
234                     }
235                 }
236             }
237             else
238             {
239                 DLIB_CASSERT(state.get_alpha().size() == active_constraints.size());
240                 new_active_constraints.clear();
241                 for (size_t i = 0; i < state.get_alpha().size(); ++i)
242                 {
243                     if (state.get_alpha()[i] != 0)
244                         new_active_constraints.push_back(active_constraints[i]);
245                 }
246                 active_constraints.swap(new_active_constraints);
247             }
248 
249             //std::cout << "points.size(): " << points.size() << std::endl;
250             //std::cout << "active_constraints.size(): " << active_constraints.size() << std::endl;
251 
252 
253             const auto& bv = df.basis_vectors(0);
254             slopes.set_size(dims);
255             for (long i = 0; i < dims; ++i)
256                 slopes(i) = bv[i].second*xscale[i]*xscale[i];
257 
258             //std::cout << "slopes:" << trans(slopes);
259 
260             offsets.assign(points.size(),0);
261 
262 
263             for (size_t i = 0; i < points.size(); ++i)
264             {
265                 offsets[i] += bv[slopes.size()+i].second*relative_noise_magnitude;
266             }
267         }
268 
269 
270 
271         double relative_noise_magnitude = 0.001;
272         double solver_eps = 0.0001;
273         std::vector<std::pair<size_t,size_t>> active_constraints, new_active_constraints;
274 
275         std::vector<function_evaluation> points;
276         std::vector<double> offsets; // offsets.size() == points.size()
277         matrix<double,0,1> slopes; // slopes.size() == points[0].first.size()
278     };
279 
280 // ----------------------------------------------------------------------------------------
281 
282 }
283 
284 #endif // DLIB_UPPER_bOUND_FUNCTION_Hh_
285 
286 
287