1 // Copyright (C) 2011 Davis E. King (davis@dlib.net) 2 // License: Boost Software License See LICENSE.txt for the full license. 3 #ifndef DLIB_FIND_MAX_FACTOR_GRAPH_nMPLP_Hh_ 4 #define DLIB_FIND_MAX_FACTOR_GRAPH_nMPLP_Hh_ 5 6 #include "find_max_factor_graph_nmplp_abstract.h" 7 #include <vector> 8 #include <map> 9 #include "../matrix.h" 10 #include "../hash.h" 11 12 13 namespace dlib 14 { 15 16 // ---------------------------------------------------------------------------------------- 17 18 namespace impl 19 { 20 class simple_hash_map 21 { 22 public: 23 simple_hash_map()24 simple_hash_map( 25 ) : 26 scan_dist(6) 27 { 28 data.resize(5000); 29 } 30 insert(const unsigned long a,const unsigned long b,const unsigned long value)31 void insert ( 32 const unsigned long a, 33 const unsigned long b, 34 const unsigned long value 35 ) 36 /*! 37 requires 38 - a != std::numeric_limits<unsigned long>::max() 39 ensures 40 - #(*this)(a,b) == value 41 !*/ 42 { 43 const uint32 h = murmur_hash3_2(a,b)%(data.size()-scan_dist); 44 45 const unsigned long empty_bucket = std::numeric_limits<unsigned long>::max(); 46 47 for (uint32 i = 0; i < scan_dist; ++i) 48 { 49 if (data[i+h].key1 == empty_bucket) 50 { 51 data[i+h].key1 = a; 52 data[i+h].key2 = b; 53 data[i+h].value = value; 54 return; 55 } 56 } 57 58 // if we get this far it means the hash table is filling up. So double its size. 59 std::vector<bucket> new_data; 60 new_data.resize(data.size()*2); 61 new_data.swap(data); 62 for (uint32 i = 0; i < new_data.size(); ++i) 63 { 64 if (new_data[i].key1 != empty_bucket) 65 { 66 insert(new_data[i].key1, new_data[i].key2, new_data[i].value); 67 } 68 } 69 70 insert(a,b,value); 71 } 72 operator()73 unsigned long operator() ( 74 const unsigned long a, 75 const unsigned long b 76 ) const 77 /*! 78 requires 79 - this->insert(a,b,some_value) has been called 80 ensures 81 - returns the value stored at key (a,b) 82 !*/ 83 { 84 DLIB_ASSERT(a != b, "An invalid map_problem was given to find_max_factor_graph_nmplp()." 85 << "\nNode " << a << " is listed as being a neighbor with itself, which is illegal."); 86 87 uint32 h = murmur_hash3_2(a,b)%(data.size()-scan_dist); 88 89 90 for (unsigned long i = 0; i < scan_dist; ++i) 91 { 92 if (data[h].key1 == a && data[h].key2 == b) 93 { 94 return data[h].value; 95 } 96 ++h; 97 } 98 99 100 // this should never happen (since this function requires (a,b) to be in the hash table 101 DLIB_ASSERT(false, "An invalid map_problem was given to find_max_factor_graph_nmplp()." 102 << "\nThe nodes in the map_problem are inconsistent because node "<<a<<" is in the neighbor list" 103 << "\nof node "<<b<< " but node "<<b<<" isn't in the neighbor list of node "<<a<<". The neighbor relationship" 104 << "\nis supposed to be symmetric." 105 ); 106 return 0; 107 } 108 109 private: 110 111 struct bucket 112 { 113 // having max() in key1 indicates that the bucket isn't used. bucketbucket114 bucket() : key1(std::numeric_limits<unsigned long>::max()) {} 115 unsigned long key1; 116 unsigned long key2; 117 unsigned long value; 118 }; 119 120 std::vector<bucket> data; 121 const unsigned int scan_dist; 122 }; 123 } 124 125 // ---------------------------------------------------------------------------------------- 126 127 template < 128 typename map_problem 129 > find_max_factor_graph_nmplp(const map_problem & prob,std::vector<unsigned long> & map_assignment,unsigned long max_iter,double eps)130 void find_max_factor_graph_nmplp ( 131 const map_problem& prob, 132 std::vector<unsigned long>& map_assignment, 133 unsigned long max_iter, 134 double eps 135 ) 136 { 137 // make sure requires clause is not broken 138 DLIB_ASSERT( eps > 0, 139 "\t void find_max_factor_graph_nmplp()" 140 << "\n\t eps must be greater than zero" 141 << "\n\t eps: " << eps 142 ); 143 144 /* 145 This function is an implementation of the NMPLP algorithm introduced in the 146 following papers: 147 Fixing Max-Product: Convergent Message Passing Algorithms for MAP LP-Relaxations (2008) 148 by Amir Globerson and Tommi Jaakkola 149 150 Introduction to dual decomposition for inference (2011) 151 by David Sontag, Amir Globerson, and Tommi Jaakkola 152 153 In particular, this function implements the star MPLP update equations shown as 154 equation 1.20 from the paper Introduction to dual decomposition for inference 155 (the method was called NMPLP in the first paper). It should also be noted that 156 the original description of the NMPLP in the first paper had an error in the 157 equations and the second paper contains corrected equations, which is what this 158 function uses. 159 */ 160 161 typedef typename map_problem::node_iterator node_iterator; 162 typedef typename map_problem::neighbor_iterator neighbor_iterator; 163 164 map_assignment.resize(prob.number_of_nodes()); 165 166 167 if (prob.number_of_nodes() == 0) 168 return; 169 170 171 std::vector<double> delta_elements; 172 delta_elements.reserve(prob.number_of_nodes()*prob.num_states(prob.begin())*3); 173 174 impl::simple_hash_map delta_idx; 175 176 177 178 // Initialize delta to zero and fill up the hash table with the appropriate values 179 // so we can index into delta later on. 180 for (node_iterator i = prob.begin(); i != prob.end(); ++i) 181 { 182 const unsigned long id_i = prob.node_id(i); 183 184 for (neighbor_iterator j = prob.begin(i); j != prob.end(i); ++j) 185 { 186 const unsigned long id_j = prob.node_id(j); 187 delta_idx.insert(id_i, id_j, delta_elements.size()); 188 189 const unsigned long num_states_xj = prob.num_states(j); 190 for (unsigned long xj = 0; xj < num_states_xj; ++xj) 191 delta_elements.push_back(0); 192 } 193 } 194 195 196 std::vector<double> gamma_i; 197 std::vector<std::vector<double> > gamma_ji; 198 std::vector<std::vector<double> > delta_to_j_no_i; 199 // These arrays will end up with a length equal to the maximum number of neighbors 200 // of any node in the graph. So reserve a bigish number of slots so that we are 201 // very unlikely to need to preform an expensive reallocation during the 202 // optimization. 203 gamma_ji.reserve(10000); 204 delta_to_j_no_i.reserve(10000); 205 206 207 double max_change = eps + 1; 208 // Now do the main body of the optimization. 209 unsigned long iter; 210 for (iter = 0; iter < max_iter && max_change > eps; ++iter) 211 { 212 max_change = -std::numeric_limits<double>::infinity(); 213 214 for (node_iterator i = prob.begin(); i != prob.end(); ++i) 215 { 216 const unsigned long id_i = prob.node_id(i); 217 const unsigned long num_states_xi = prob.num_states(i); 218 gamma_i.assign(num_states_xi, 0); 219 220 double num_neighbors = 0; 221 222 unsigned int jcnt = 0; 223 // first we fill in the gamma vectors 224 for (neighbor_iterator j = prob.begin(i); j != prob.end(i); ++j) 225 { 226 // Make sure these arrays are big enough to hold all the neighbor 227 // information. 228 if (jcnt >= gamma_ji.size()) 229 { 230 gamma_ji.resize(gamma_ji.size()+1); 231 delta_to_j_no_i.resize(delta_to_j_no_i.size()+1); 232 } 233 234 ++num_neighbors; 235 const unsigned long id_j = prob.node_id(j); 236 const unsigned long num_states_xj = prob.num_states(j); 237 238 gamma_ji[jcnt].assign(num_states_xi, -std::numeric_limits<double>::infinity()); 239 delta_to_j_no_i[jcnt].assign(num_states_xj, 0); 240 241 // compute delta_j^{-i} and store it in delta_to_j_no_i[jcnt] 242 for (neighbor_iterator k = prob.begin(j); k != prob.end(j); ++k) 243 { 244 const unsigned long id_k = prob.node_id(k); 245 if (id_k==id_i) 246 continue; 247 const double* const delta_kj = &delta_elements[delta_idx(id_k,id_j)]; 248 for (unsigned long xj = 0; xj < num_states_xj; ++xj) 249 { 250 delta_to_j_no_i[jcnt][xj] += delta_kj[xj]; 251 } 252 } 253 254 // now compute gamma values 255 for (unsigned long xi = 0; xi < num_states_xi; ++xi) 256 { 257 for (unsigned long xj = 0; xj < num_states_xj; ++xj) 258 { 259 gamma_ji[jcnt][xi] = std::max(gamma_ji[jcnt][xi], prob.factor_value(i,j,xi,xj) + delta_to_j_no_i[jcnt][xj]); 260 } 261 gamma_i[xi] += gamma_ji[jcnt][xi]; 262 } 263 ++jcnt; 264 } 265 266 // now update the delta values 267 jcnt = 0; 268 for (neighbor_iterator j = prob.begin(i); j != prob.end(i); ++j) 269 { 270 const unsigned long id_j = prob.node_id(j); 271 const unsigned long num_states_xj = prob.num_states(j); 272 273 // messages from j to i 274 double* const delta_ji = &delta_elements[delta_idx(id_j,id_i)]; 275 276 // messages from i to j 277 double* const delta_ij = &delta_elements[delta_idx(id_i,id_j)]; 278 279 for (unsigned long xj = 0; xj < num_states_xj; ++xj) 280 { 281 double best_val = -std::numeric_limits<double>::infinity(); 282 283 for (unsigned long xi = 0; xi < num_states_xi; ++xi) 284 { 285 double val = prob.factor_value(i,j,xi,xj) + 2/(num_neighbors+1)*gamma_i[xi] -gamma_ji[jcnt][xi]; 286 if (val > best_val) 287 best_val = val; 288 } 289 best_val = -0.5*delta_to_j_no_i[jcnt][xj] + 0.5*best_val; 290 291 if (std::abs(delta_ij[xj] - best_val) > max_change) 292 max_change = std::abs(delta_ij[xj] - best_val); 293 294 delta_ij[xj] = best_val; 295 } 296 297 for (unsigned long xi = 0; xi < num_states_xi; ++xi) 298 { 299 double new_val = -1/(num_neighbors+1)*gamma_i[xi] + gamma_ji[jcnt][xi]; 300 if (std::abs(delta_ji[xi] - new_val) > max_change) 301 max_change = std::abs(delta_ji[xi] - new_val); 302 delta_ji[xi] = new_val; 303 } 304 ++jcnt; 305 } 306 } 307 } 308 309 310 // now decode the "beliefs" 311 std::vector<double> b; 312 for (node_iterator i = prob.begin(); i != prob.end(); ++i) 313 { 314 const unsigned long id_i = prob.node_id(i); 315 b.assign(prob.num_states(i), 0); 316 317 for (neighbor_iterator k = prob.begin(i); k != prob.end(i); ++k) 318 { 319 const unsigned long id_k = prob.node_id(k); 320 321 for (unsigned long xi = 0; xi < b.size(); ++xi) 322 { 323 const double* const delta_ki = &delta_elements[delta_idx(id_k,id_i)]; 324 b[xi] += delta_ki[xi]; 325 } 326 } 327 328 map_assignment[id_i] = index_of_max(mat(b)); 329 } 330 } 331 332 // ---------------------------------------------------------------------------------------- 333 334 } 335 336 #endif // DLIB_FIND_MAX_FACTOR_GRAPH_nMPLP_Hh_ 337 338