1 // Copyright (C) 2017 Davis E. King (davis@dlib.net) 2 // License: Boost Software License See LICENSE.txt for the full license. 3 #ifndef DLIB_UPPER_bOUND_FUNCTION_Hh_ 4 #define DLIB_UPPER_bOUND_FUNCTION_Hh_ 5 6 #include "upper_bound_function_abstract.h" 7 #include "../svm/svm_c_linear_dcd_trainer.h" 8 #include "../statistics.h" 9 #include <limits> 10 #include <utility> 11 12 namespace dlib 13 { 14 15 // ---------------------------------------------------------------------------------------- 16 17 struct function_evaluation 18 { 19 function_evaluation() = default; function_evaluationfunction_evaluation20 function_evaluation(const matrix<double,0,1>& x, double y) :x(x), y(y) {} 21 22 matrix<double,0,1> x; 23 double y = std::numeric_limits<double>::quiet_NaN(); 24 }; 25 26 // ---------------------------------------------------------------------------------------- 27 28 class upper_bound_function 29 { 30 31 public: 32 33 upper_bound_function( 34 ) = default; 35 upper_bound_function(const double relative_noise_magnitude,const double solver_eps)36 upper_bound_function( 37 const double relative_noise_magnitude, 38 const double solver_eps 39 ) : relative_noise_magnitude(relative_noise_magnitude), solver_eps(solver_eps) 40 { 41 DLIB_CASSERT(relative_noise_magnitude >= 0); 42 DLIB_CASSERT(solver_eps > 0); 43 } 44 45 explicit upper_bound_function( 46 const std::vector<function_evaluation>& _points, 47 const double relative_noise_magnitude = 0.001, 48 const double solver_eps = 0.0001 relative_noise_magnitude(relative_noise_magnitude)49 ) : relative_noise_magnitude(relative_noise_magnitude), solver_eps(solver_eps), points(_points) 50 { 51 DLIB_CASSERT(relative_noise_magnitude >= 0); 52 DLIB_CASSERT(solver_eps > 0); 53 54 if (points.size() > 1) 55 { 56 DLIB_CASSERT(points[0].x.size() > 0, "The vectors can't be empty."); 57 58 const long dims = points[0].x.size(); 59 for (auto& p : points) 60 DLIB_CASSERT(p.x.size() == dims, "All the vectors given to upper_bound_function must have the same dimensionality."); 61 62 learn_params(); 63 } 64 65 } 66 add(const function_evaluation & point)67 void add ( 68 const function_evaluation& point 69 ) 70 { 71 DLIB_CASSERT(point.x.size() != 0, "The vectors can't be empty."); 72 if (points.size() == 0) 73 { 74 points.push_back(point); 75 return; 76 } 77 78 DLIB_CASSERT(point.x.size() == dimensionality(), "All the vectors given to upper_bound_function must have the same dimensionality."); 79 80 if (points.size() < 4) 81 { 82 points.push_back(point); 83 *this = upper_bound_function(points, relative_noise_magnitude, solver_eps); 84 return; 85 } 86 87 points.push_back(point); 88 // add constraints between the new point and the old points 89 for (size_t i = 0; i < points.size()-1; ++i) 90 active_constraints.push_back(std::make_pair(i,points.size()-1)); 91 92 learn_params(); 93 } 94 num_points()95 long num_points( 96 ) const 97 { 98 return points.size(); 99 } 100 dimensionality()101 long dimensionality( 102 ) const 103 { 104 if (points.size() == 0) 105 return 0; 106 else 107 return points[0].x.size(); 108 } 109 get_points()110 const std::vector<function_evaluation>& get_points( 111 ) const 112 { 113 return points; 114 } 115 operator()116 double operator() ( 117 const matrix<double,0,1>& x 118 ) const 119 { 120 DLIB_CASSERT(num_points() > 0); 121 DLIB_CASSERT(x.size() == dimensionality()); 122 123 124 125 double upper_bound = std::numeric_limits<double>::infinity(); 126 127 for (size_t i = 0; i < points.size(); ++i) 128 { 129 const double local_bound = points[i].y + std::sqrt(offsets[i] + dot(slopes, squared(x-points[i].x))); 130 upper_bound = std::min(upper_bound, local_bound); 131 } 132 133 return upper_bound; 134 } 135 136 private: 137 learn_params()138 void learn_params ( 139 ) 140 { 141 const long dims = points[0].x.size(); 142 143 using sample_type = std::vector<std::pair<size_t,double>>; 144 using kernel_type = sparse_linear_kernel<sample_type>; 145 std::vector<sample_type> x; 146 std::vector<double> y; 147 148 // We are going to normalize the data so the values aren't extreme. First, we 149 // collect statistics on our data. 150 std::vector<running_stats<double>> x_rs(dims); 151 running_stats<double> y_rs; 152 for (auto& v : points) 153 { 154 for (long i = 0; i < v.x.size(); ++i) 155 x_rs[i].add(v.x(i)); 156 y_rs.add(v.y); 157 } 158 159 160 // compute normalization vectors for the data. The only reason we do this is 161 // to make the optimization well conditioned. In particular, scaling the y 162 // values will prevent numerical errors in the 1-diff*diff computation below that 163 // would otherwise result when diff is really big. Also, scaling the xvalues 164 // to be about 1 will similarly make the optimization more stable and it also 165 // has the added benefit of keeping the relative_noise_magnitude's scale 166 // constant regardless of the size of x values. 167 const double yscale = 1.0/y_rs.stddev(); 168 std::vector<double> xscale(dims); 169 for (size_t i = 0; i < xscale.size(); ++i) 170 xscale[i] = 1.0/(x_rs[i].stddev()*yscale); // make it so that xscale[i]*yscale == 1/x_rs[i].stddev() 171 172 sample_type samp; 173 auto add_constraint = [&](long i, long j) { 174 samp.clear(); 175 for (long k = 0; k < dims; ++k) 176 { 177 double temp = (points[i].x(k) - points[j].x(k))*xscale[k]*yscale; 178 samp.push_back(std::make_pair(k, temp*temp)); 179 } 180 181 if (points[i].y > points[j].y) 182 samp.push_back(std::make_pair(dims + j, relative_noise_magnitude)); 183 else 184 samp.push_back(std::make_pair(dims + i, relative_noise_magnitude)); 185 186 const double diff = (points[i].y - points[j].y)*yscale; 187 samp.push_back(std::make_pair(dims + points.size(), 1-diff*diff)); 188 189 x.push_back(samp); 190 y.push_back(1); 191 }; 192 193 if (active_constraints.size() == 0) 194 { 195 x.reserve(points.size()*(points.size()-1)/2); 196 y.reserve(points.size()*(points.size()-1)/2); 197 for (size_t i = 0; i < points.size(); ++i) 198 { 199 for (size_t j = i+1; j < points.size(); ++j) 200 { 201 add_constraint(i,j); 202 } 203 } 204 } 205 else 206 { 207 for (auto& p : active_constraints) 208 add_constraint(p.first, p.second); 209 } 210 211 212 213 214 svm_c_linear_dcd_trainer<kernel_type> trainer; 215 trainer.set_c(std::numeric_limits<double>::infinity()); 216 //trainer.be_verbose(); 217 trainer.force_last_weight_to_1(true); 218 trainer.set_epsilon(solver_eps); 219 220 svm_c_linear_dcd_trainer<kernel_type>::optimizer_state state; 221 auto df = trainer.train(x,y, state); 222 223 // save the active constraints for later so we can use them inside add() to add 224 // new points efficiently. 225 if (active_constraints.size() == 0) 226 { 227 long k = 0; 228 for (size_t i = 0; i < points.size(); ++i) 229 { 230 for (size_t j = i+1; j < points.size(); ++j) 231 { 232 if (state.get_alpha()[k++] != 0) 233 active_constraints.push_back(std::make_pair(i,j)); 234 } 235 } 236 } 237 else 238 { 239 DLIB_CASSERT(state.get_alpha().size() == active_constraints.size()); 240 new_active_constraints.clear(); 241 for (size_t i = 0; i < state.get_alpha().size(); ++i) 242 { 243 if (state.get_alpha()[i] != 0) 244 new_active_constraints.push_back(active_constraints[i]); 245 } 246 active_constraints.swap(new_active_constraints); 247 } 248 249 //std::cout << "points.size(): " << points.size() << std::endl; 250 //std::cout << "active_constraints.size(): " << active_constraints.size() << std::endl; 251 252 253 const auto& bv = df.basis_vectors(0); 254 slopes.set_size(dims); 255 for (long i = 0; i < dims; ++i) 256 slopes(i) = bv[i].second*xscale[i]*xscale[i]; 257 258 //std::cout << "slopes:" << trans(slopes); 259 260 offsets.assign(points.size(),0); 261 262 263 for (size_t i = 0; i < points.size(); ++i) 264 { 265 offsets[i] += bv[slopes.size()+i].second*relative_noise_magnitude; 266 } 267 } 268 269 270 271 double relative_noise_magnitude = 0.001; 272 double solver_eps = 0.0001; 273 std::vector<std::pair<size_t,size_t>> active_constraints, new_active_constraints; 274 275 std::vector<function_evaluation> points; 276 std::vector<double> offsets; // offsets.size() == points.size() 277 matrix<double,0,1> slopes; // slopes.size() == points[0].first.size() 278 }; 279 280 // ---------------------------------------------------------------------------------------- 281 282 } 283 284 #endif // DLIB_UPPER_bOUND_FUNCTION_Hh_ 285 286 287