// Copyright (C) 2017 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #ifndef DLIB_UPPER_bOUND_FUNCTION_Hh_ #define DLIB_UPPER_bOUND_FUNCTION_Hh_ #include "upper_bound_function_abstract.h" #include "../svm/svm_c_linear_dcd_trainer.h" #include "../statistics.h" #include #include namespace dlib { // ---------------------------------------------------------------------------------------- struct function_evaluation { function_evaluation() = default; function_evaluation(const matrix& x, double y) :x(x), y(y) {} matrix x; double y = std::numeric_limits::quiet_NaN(); }; // ---------------------------------------------------------------------------------------- class upper_bound_function { public: upper_bound_function( ) = default; upper_bound_function( const double relative_noise_magnitude, const double solver_eps ) : relative_noise_magnitude(relative_noise_magnitude), solver_eps(solver_eps) { DLIB_CASSERT(relative_noise_magnitude >= 0); DLIB_CASSERT(solver_eps > 0); } explicit upper_bound_function( const std::vector& _points, const double relative_noise_magnitude = 0.001, const double solver_eps = 0.0001 ) : relative_noise_magnitude(relative_noise_magnitude), solver_eps(solver_eps), points(_points) { DLIB_CASSERT(relative_noise_magnitude >= 0); DLIB_CASSERT(solver_eps > 0); if (points.size() > 1) { DLIB_CASSERT(points[0].x.size() > 0, "The vectors can't be empty."); const long dims = points[0].x.size(); for (auto& p : points) DLIB_CASSERT(p.x.size() == dims, "All the vectors given to upper_bound_function must have the same dimensionality."); learn_params(); } } void add ( const function_evaluation& point ) { DLIB_CASSERT(point.x.size() != 0, "The vectors can't be empty."); if (points.size() == 0) { points.push_back(point); return; } DLIB_CASSERT(point.x.size() == dimensionality(), "All the vectors given to upper_bound_function must have the same dimensionality."); if (points.size() < 4) { points.push_back(point); *this = upper_bound_function(points, relative_noise_magnitude, solver_eps); return; } points.push_back(point); // add constraints between the new point and the old points for (size_t i = 0; i < points.size()-1; ++i) active_constraints.push_back(std::make_pair(i,points.size()-1)); learn_params(); } long num_points( ) const { return points.size(); } long dimensionality( ) const { if (points.size() == 0) return 0; else return points[0].x.size(); } const std::vector& get_points( ) const { return points; } double operator() ( const matrix& x ) const { DLIB_CASSERT(num_points() > 0); DLIB_CASSERT(x.size() == dimensionality()); double upper_bound = std::numeric_limits::infinity(); for (size_t i = 0; i < points.size(); ++i) { const double local_bound = points[i].y + std::sqrt(offsets[i] + dot(slopes, squared(x-points[i].x))); upper_bound = std::min(upper_bound, local_bound); } return upper_bound; } private: void learn_params ( ) { const long dims = points[0].x.size(); using sample_type = std::vector>; using kernel_type = sparse_linear_kernel; std::vector x; std::vector y; // We are going to normalize the data so the values aren't extreme. First, we // collect statistics on our data. std::vector> x_rs(dims); running_stats y_rs; for (auto& v : points) { for (long i = 0; i < v.x.size(); ++i) x_rs[i].add(v.x(i)); y_rs.add(v.y); } // compute normalization vectors for the data. The only reason we do this is // to make the optimization well conditioned. In particular, scaling the y // values will prevent numerical errors in the 1-diff*diff computation below that // would otherwise result when diff is really big. Also, scaling the xvalues // to be about 1 will similarly make the optimization more stable and it also // has the added benefit of keeping the relative_noise_magnitude's scale // constant regardless of the size of x values. const double yscale = 1.0/y_rs.stddev(); std::vector xscale(dims); for (size_t i = 0; i < xscale.size(); ++i) xscale[i] = 1.0/(x_rs[i].stddev()*yscale); // make it so that xscale[i]*yscale == 1/x_rs[i].stddev() sample_type samp; auto add_constraint = [&](long i, long j) { samp.clear(); for (long k = 0; k < dims; ++k) { double temp = (points[i].x(k) - points[j].x(k))*xscale[k]*yscale; samp.push_back(std::make_pair(k, temp*temp)); } if (points[i].y > points[j].y) samp.push_back(std::make_pair(dims + j, relative_noise_magnitude)); else samp.push_back(std::make_pair(dims + i, relative_noise_magnitude)); const double diff = (points[i].y - points[j].y)*yscale; samp.push_back(std::make_pair(dims + points.size(), 1-diff*diff)); x.push_back(samp); y.push_back(1); }; if (active_constraints.size() == 0) { x.reserve(points.size()*(points.size()-1)/2); y.reserve(points.size()*(points.size()-1)/2); for (size_t i = 0; i < points.size(); ++i) { for (size_t j = i+1; j < points.size(); ++j) { add_constraint(i,j); } } } else { for (auto& p : active_constraints) add_constraint(p.first, p.second); } svm_c_linear_dcd_trainer trainer; trainer.set_c(std::numeric_limits::infinity()); //trainer.be_verbose(); trainer.force_last_weight_to_1(true); trainer.set_epsilon(solver_eps); svm_c_linear_dcd_trainer::optimizer_state state; auto df = trainer.train(x,y, state); // save the active constraints for later so we can use them inside add() to add // new points efficiently. if (active_constraints.size() == 0) { long k = 0; for (size_t i = 0; i < points.size(); ++i) { for (size_t j = i+1; j < points.size(); ++j) { if (state.get_alpha()[k++] != 0) active_constraints.push_back(std::make_pair(i,j)); } } } else { DLIB_CASSERT(state.get_alpha().size() == active_constraints.size()); new_active_constraints.clear(); for (size_t i = 0; i < state.get_alpha().size(); ++i) { if (state.get_alpha()[i] != 0) new_active_constraints.push_back(active_constraints[i]); } active_constraints.swap(new_active_constraints); } //std::cout << "points.size(): " << points.size() << std::endl; //std::cout << "active_constraints.size(): " << active_constraints.size() << std::endl; const auto& bv = df.basis_vectors(0); slopes.set_size(dims); for (long i = 0; i < dims; ++i) slopes(i) = bv[i].second*xscale[i]*xscale[i]; //std::cout << "slopes:" << trans(slopes); offsets.assign(points.size(),0); for (size_t i = 0; i < points.size(); ++i) { offsets[i] += bv[slopes.size()+i].second*relative_noise_magnitude; } } double relative_noise_magnitude = 0.001; double solver_eps = 0.0001; std::vector> active_constraints, new_active_constraints; std::vector points; std::vector offsets; // offsets.size() == points.size() matrix slopes; // slopes.size() == points[0].first.size() }; // ---------------------------------------------------------------------------------------- } #endif // DLIB_UPPER_bOUND_FUNCTION_Hh_