1 // Copyright (C) 2011  Davis E. King (davis@dlib.net)
2 // License: Boost Software License   See LICENSE.txt for the full license.
3 #ifndef DLIB_FIND_MAX_FACTOR_GRAPH_nMPLP_Hh_
4 #define DLIB_FIND_MAX_FACTOR_GRAPH_nMPLP_Hh_
5 
6 #include "find_max_factor_graph_nmplp_abstract.h"
7 #include <vector>
8 #include <map>
9 #include "../matrix.h"
10 #include "../hash.h"
11 
12 
13 namespace dlib
14 {
15 
16 // ----------------------------------------------------------------------------------------
17 
18     namespace impl
19     {
20         class simple_hash_map
21         {
22         public:
23 
simple_hash_map()24             simple_hash_map(
25             ) :
26                 scan_dist(6)
27             {
28                 data.resize(5000);
29             }
30 
insert(const unsigned long a,const unsigned long b,const unsigned long value)31             void insert (
32                 const unsigned long a,
33                 const unsigned long b,
34                 const unsigned long value
35             )
36             /*!
37                 requires
38                     - a != std::numeric_limits<unsigned long>::max()
39                 ensures
40                     - #(*this)(a,b) == value
41             !*/
42             {
43                 const uint32 h = murmur_hash3_2(a,b)%(data.size()-scan_dist);
44 
45                 const unsigned long empty_bucket = std::numeric_limits<unsigned long>::max();
46 
47                 for (uint32 i = 0; i < scan_dist; ++i)
48                 {
49                     if (data[i+h].key1 == empty_bucket)
50                     {
51                         data[i+h].key1 = a;
52                         data[i+h].key2 = b;
53                         data[i+h].value = value;
54                         return;
55                     }
56                 }
57 
58                 // if we get this far it means the hash table is filling up.  So double its size.
59                 std::vector<bucket> new_data;
60                 new_data.resize(data.size()*2);
61                 new_data.swap(data);
62                 for (uint32 i = 0; i < new_data.size(); ++i)
63                 {
64                     if (new_data[i].key1 != empty_bucket)
65                     {
66                         insert(new_data[i].key1, new_data[i].key2, new_data[i].value);
67                     }
68                 }
69 
70                 insert(a,b,value);
71             }
72 
operator()73             unsigned long operator() (
74                 const unsigned long a,
75                 const unsigned long b
76             ) const
77             /*!
78                 requires
79                     - this->insert(a,b,some_value) has been called
80                 ensures
81                     - returns the value stored at key (a,b)
82             !*/
83             {
84                 DLIB_ASSERT(a != b, "An invalid map_problem was given to find_max_factor_graph_nmplp()."
85                             << "\nNode " << a << " is listed as being a neighbor with itself, which is illegal.");
86 
87                 uint32 h = murmur_hash3_2(a,b)%(data.size()-scan_dist);
88 
89 
90                 for (unsigned long i = 0; i < scan_dist; ++i)
91                 {
92                     if (data[h].key1 == a && data[h].key2 == b)
93                     {
94                         return data[h].value;
95                     }
96                     ++h;
97                 }
98 
99 
100                 // this should never happen (since this function requires (a,b) to be in the hash table
101                 DLIB_ASSERT(false, "An invalid map_problem was given to find_max_factor_graph_nmplp()."
102                             << "\nThe nodes in the map_problem are inconsistent because node "<<a<<" is in the neighbor list"
103                             << "\nof node "<<b<< " but node "<<b<<" isn't in the neighbor list of node "<<a<<".  The neighbor relationship"
104                             << "\nis supposed to be symmetric."
105                             );
106                 return 0;
107             }
108 
109         private:
110 
111             struct bucket
112             {
113                 // having max() in key1 indicates that the bucket isn't used.
bucketbucket114                 bucket() : key1(std::numeric_limits<unsigned long>::max()) {}
115                 unsigned long key1;
116                 unsigned long key2;
117                 unsigned long value;
118             };
119 
120             std::vector<bucket> data;
121             const unsigned int scan_dist;
122         };
123     }
124 
125 // ----------------------------------------------------------------------------------------
126 
127     template <
128         typename map_problem
129         >
find_max_factor_graph_nmplp(const map_problem & prob,std::vector<unsigned long> & map_assignment,unsigned long max_iter,double eps)130     void find_max_factor_graph_nmplp (
131         const map_problem& prob,
132         std::vector<unsigned long>& map_assignment,
133         unsigned long max_iter,
134         double eps
135     )
136     {
137         // make sure requires clause is not broken
138         DLIB_ASSERT( eps > 0,
139                      "\t void find_max_factor_graph_nmplp()"
140                      << "\n\t eps must be greater than zero"
141                      << "\n\t eps:  " << eps
142                 );
143 
144         /*
145             This function is an implementation of the NMPLP algorithm introduced in the
146             following papers:
147                 Fixing Max-Product: Convergent Message Passing Algorithms for MAP LP-Relaxations (2008)
148                 by Amir Globerson and Tommi Jaakkola
149 
150                 Introduction to dual decomposition for inference (2011)
151                 by David Sontag, Amir Globerson, and Tommi Jaakkola
152 
153             In particular, this function implements the star MPLP update equations shown as
154             equation 1.20 from the paper Introduction to dual decomposition for inference
155             (the method was called NMPLP in the first paper).  It should also be noted that
156             the original description of the NMPLP in the first paper had an error in the
157             equations and the second paper contains corrected equations, which is what this
158             function uses.
159         */
160 
161         typedef typename map_problem::node_iterator node_iterator;
162         typedef typename map_problem::neighbor_iterator neighbor_iterator;
163 
164         map_assignment.resize(prob.number_of_nodes());
165 
166 
167         if (prob.number_of_nodes() == 0)
168             return;
169 
170 
171         std::vector<double> delta_elements;
172         delta_elements.reserve(prob.number_of_nodes()*prob.num_states(prob.begin())*3);
173 
174         impl::simple_hash_map delta_idx;
175 
176 
177 
178         // Initialize delta to zero and fill up the hash table with the appropriate values
179         // so we can index into delta later on.
180         for (node_iterator i = prob.begin(); i != prob.end(); ++i)
181         {
182             const unsigned long id_i = prob.node_id(i);
183 
184             for (neighbor_iterator j = prob.begin(i); j != prob.end(i); ++j)
185             {
186                 const unsigned long id_j = prob.node_id(j);
187                 delta_idx.insert(id_i, id_j, delta_elements.size());
188 
189                 const unsigned long num_states_xj = prob.num_states(j);
190                 for (unsigned long xj = 0; xj < num_states_xj; ++xj)
191                     delta_elements.push_back(0);
192             }
193         }
194 
195 
196         std::vector<double> gamma_i;
197         std::vector<std::vector<double> > gamma_ji;
198         std::vector<std::vector<double> > delta_to_j_no_i;
199         // These arrays will end up with a length equal to the maximum number of neighbors
200         // of any node in the graph.  So reserve a bigish number of slots so that we are
201         // very unlikely to need to preform an expensive reallocation during the
202         // optimization.
203         gamma_ji.reserve(10000);
204         delta_to_j_no_i.reserve(10000);
205 
206 
207         double max_change = eps + 1;
208         // Now do the main body of the optimization.
209         unsigned long iter;
210         for (iter = 0; iter < max_iter && max_change > eps; ++iter)
211         {
212             max_change = -std::numeric_limits<double>::infinity();
213 
214             for (node_iterator i = prob.begin(); i != prob.end(); ++i)
215             {
216                 const unsigned long id_i = prob.node_id(i);
217                 const unsigned long num_states_xi = prob.num_states(i);
218                 gamma_i.assign(num_states_xi, 0);
219 
220                 double num_neighbors = 0;
221 
222                 unsigned int jcnt = 0;
223                 // first we fill in the gamma vectors
224                 for (neighbor_iterator j = prob.begin(i); j != prob.end(i); ++j)
225                 {
226                     // Make sure these arrays are big enough to hold all the neighbor
227                     // information.
228                     if (jcnt >= gamma_ji.size())
229                     {
230                         gamma_ji.resize(gamma_ji.size()+1);
231                         delta_to_j_no_i.resize(delta_to_j_no_i.size()+1);
232                     }
233 
234                     ++num_neighbors;
235                     const unsigned long id_j = prob.node_id(j);
236                     const unsigned long num_states_xj = prob.num_states(j);
237 
238                     gamma_ji[jcnt].assign(num_states_xi, -std::numeric_limits<double>::infinity());
239                     delta_to_j_no_i[jcnt].assign(num_states_xj, 0);
240 
241                     // compute delta_j^{-i} and store it in delta_to_j_no_i[jcnt]
242                     for (neighbor_iterator k = prob.begin(j); k != prob.end(j); ++k)
243                     {
244                         const unsigned long id_k = prob.node_id(k);
245                         if (id_k==id_i)
246                             continue;
247                         const double* const delta_kj = &delta_elements[delta_idx(id_k,id_j)];
248                         for (unsigned long xj = 0; xj < num_states_xj; ++xj)
249                         {
250                             delta_to_j_no_i[jcnt][xj] += delta_kj[xj];
251                         }
252                     }
253 
254                     // now compute gamma values
255                     for (unsigned long xi = 0; xi < num_states_xi; ++xi)
256                     {
257                         for (unsigned long xj = 0; xj < num_states_xj; ++xj)
258                         {
259                             gamma_ji[jcnt][xi] = std::max(gamma_ji[jcnt][xi], prob.factor_value(i,j,xi,xj) + delta_to_j_no_i[jcnt][xj]);
260                         }
261                         gamma_i[xi] += gamma_ji[jcnt][xi];
262                     }
263                     ++jcnt;
264                 }
265 
266                 // now update the delta values
267                 jcnt = 0;
268                 for (neighbor_iterator j = prob.begin(i); j != prob.end(i); ++j)
269                 {
270                     const unsigned long id_j = prob.node_id(j);
271                     const unsigned long num_states_xj = prob.num_states(j);
272 
273                     // messages from j to i
274                     double* const delta_ji = &delta_elements[delta_idx(id_j,id_i)];
275 
276                     // messages from i to j
277                     double* const delta_ij = &delta_elements[delta_idx(id_i,id_j)];
278 
279                     for (unsigned long xj = 0; xj < num_states_xj; ++xj)
280                     {
281                         double best_val = -std::numeric_limits<double>::infinity();
282 
283                         for (unsigned long xi = 0; xi < num_states_xi; ++xi)
284                         {
285                             double val = prob.factor_value(i,j,xi,xj) + 2/(num_neighbors+1)*gamma_i[xi] -gamma_ji[jcnt][xi];
286                             if (val > best_val)
287                                 best_val = val;
288                         }
289                         best_val = -0.5*delta_to_j_no_i[jcnt][xj] + 0.5*best_val;
290 
291                         if (std::abs(delta_ij[xj] - best_val) > max_change)
292                             max_change = std::abs(delta_ij[xj] - best_val);
293 
294                         delta_ij[xj] = best_val;
295                     }
296 
297                     for (unsigned long xi = 0; xi < num_states_xi; ++xi)
298                     {
299                         double new_val = -1/(num_neighbors+1)*gamma_i[xi] + gamma_ji[jcnt][xi];
300                         if (std::abs(delta_ji[xi] - new_val) > max_change)
301                             max_change = std::abs(delta_ji[xi] - new_val);
302                         delta_ji[xi] = new_val;
303                     }
304                     ++jcnt;
305                 }
306             }
307         }
308 
309 
310         // now decode the "beliefs"
311         std::vector<double> b;
312         for (node_iterator i = prob.begin(); i != prob.end(); ++i)
313         {
314             const unsigned long id_i = prob.node_id(i);
315             b.assign(prob.num_states(i), 0);
316 
317             for (neighbor_iterator k = prob.begin(i); k != prob.end(i); ++k)
318             {
319                 const unsigned long id_k = prob.node_id(k);
320 
321                 for (unsigned long xi = 0; xi < b.size(); ++xi)
322                 {
323                     const double* const delta_ki = &delta_elements[delta_idx(id_k,id_i)];
324                     b[xi] += delta_ki[xi];
325                 }
326             }
327 
328             map_assignment[id_i] = index_of_max(mat(b));
329         }
330     }
331 
332 // ----------------------------------------------------------------------------------------
333 
334 }
335 
336 #endif // DLIB_FIND_MAX_FACTOR_GRAPH_nMPLP_Hh_
337 
338