1 /*
2 Copyright (c) by respective owners including Yahoo!, Microsoft, and
3 individual contributors. All rights reserved.  Released under a BSD (revised)
4 license as described in the file LICENSE.
5  */
6 #include "search_graph.h"
7 #include "vw.h"
8 #include "rand48.h"
9 #include "gd.h"
10 
11 /*
12 example format:
13 
14 ALL NODES
15 ALL EDGES
16 <blank>
17 ALL NODES
18 ALL EDGES
19 <blank>
20 
21 and so on
22 
23 node lines look like normal vw examples with unary features:
24 
25 label:weight |n features
26 label:weight |n features
27 ...
28 
29 they are *implicitly* labeled starting at 1. (note the namespace
30 needn't be called n.) if weight is
31 omitted it is assumed to be 1.0.
32 
33 edge lines look like:
34 
35 n1 n2 n3 ... |e features
36 n1 n2 n3 ... |e features
37 ...
38 
39 here, n1 n2 n3 are integer node ids, starting at one. technically
40 these are hyperedges, since they can touch more than two nodes. in the
41 canonical representation, there will just be n1 and n2.
42 
43 the only thing that differentiates edges from nodes is that edges have
44 >1 input.
45 */
46 
47 namespace GraphTask {
48   Search::search_task task = { "graph", run, initialize, finish, setup, takedown };
49 
50   struct task_data {
51     // global data
52     size_t num_loops;
53     size_t K;  // number of labels, *NOT* including the +1 for 'unlabeled'
54     bool   use_structure;
55 
56     // per-example data
57     uint32_t N;  // number of nodes
58     uint32_t E;  // number of edges
59     vector< vector<size_t> > adj;  // adj[n] is a vector of *edge example ids* that contain n
60     vector<uint32_t> bfs;   // order of nodes to process
61     vector<size_t>   pred;  // predictions
62     example*cur_node;       // pointer to the current node for add_edge_features_fn
63     float* neighbor_predictions;  // prediction on this neighbor for add_edge_features_fn
64   };
65 
example_is_test(void * ld)66   bool example_is_test(void*ld) {
67     // TODO
68     return false;
69   }
70 
initialize(Search::search & sch,size_t & num_actions,po::variables_map & vm)71   void initialize(Search::search& sch, size_t& num_actions, po::variables_map& vm) {
72     task_data * D = new task_data();
73     po::options_description sspan_opts("search graphtask options");
74     sspan_opts.add_options()("search_graph_num_loops", po::value<size_t>(), "how many loops to run [def: 2]");
75     sspan_opts.add_options()("search_graph_no_structure", "turn off edge features");
76     sch.add_program_options(vm, sspan_opts);
77 
78     D->num_loops = 2;
79     D->use_structure = true;
80     if (vm.count("search_graph_num_loops"))      D->num_loops = vm["search_graph_num_loops"].as<size_t>();
81     if (vm.count("search_graph_no_structure"))   D->use_structure = false;
82 
83     D->K = num_actions;
84     D->neighbor_predictions = calloc_or_die<float>(D->K+1);
85 
86     sch.set_task_data<task_data>(D);
87     sch.set_options( Search::AUTO_HAMMING_LOSS );
88     sch.set_label_parser( COST_SENSITIVE::cs_label, example_is_test );
89   }
90 
finish(Search::search & sch)91   void finish(Search::search& sch) {
92     task_data * D = sch.get_task_data<task_data>();
93     free(D->neighbor_predictions);
94     delete D;
95   }
96 
example_is_edge(example * e)97   inline bool example_is_edge(example*e) { return e->l.cs.costs.size() > 1; }
98 
run_bfs(task_data & D,vector<example * > & ec)99   void run_bfs(task_data &D, vector<example*>& ec) {
100     D.bfs.clear();
101     vector<bool> touched;
102     for (size_t n=0; n<D.N; n++) touched.push_back(false);
103 
104     touched[0] = true;
105     D.bfs.push_back(0);
106 
107     size_t i = 0;
108     while (D.bfs.size() < D.N) {
109       while (i < D.bfs.size()) {
110         uint32_t n = D.bfs[i];
111         for (size_t id : D.adj[n])
112           for (size_t j=0; j<ec[id]->l.cs.costs.size(); j++) {
113             uint32_t m = ec[id]->l.cs.costs[j].class_index - 1;
114             if (!touched[m]) {
115               D.bfs.push_back(m);
116               touched[m] = true;
117             }
118           }
119         i++;
120       }
121 
122       if (D.bfs.size() < D.N)
123         // we finished a SCC, need to find another
124         for (uint32_t n=0; n<D.N; n++)
125           if (! touched[n]) {
126             touched[n] = true;
127             D.bfs.push_back(n);
128             break;
129           }
130     }
131   }
132 
setup(Search::search & sch,vector<example * > & ec)133   void setup(Search::search& sch, vector<example*>& ec) {
134     task_data& D = *sch.get_task_data<task_data>();
135 
136     D.N = 0;
137     D.E = 0;
138     for (size_t i=0; i<ec.size(); i++)
139       if (example_is_edge(ec[i]))
140         D.E++;
141       else { // it's a node!
142         if (D.E > 0) { cerr << "error: got a node after getting edges!" << endl; throw exception(); }
143         D.N++;
144       }
145 
146     if ((D.N == 0) && (D.E > 0)) { cerr << "error: got edges without any nodes!" << endl; throw exception(); }
147 
148     D.adj = vector<vector<size_t>>(D.N, vector<size_t>(0));
149 
150     for (size_t i=D.N; i<ec.size(); i++) {
151       for (size_t n=0; n<ec[i]->l.cs.costs.size(); n++) {
152         if (ec[i]->l.cs.costs[n].class_index > D.N) {
153           cerr << "error: edge source points to too large of a node id: " << (ec[i]->l.cs.costs[n].class_index) << " > " << D.N << endl;
154           throw exception();
155         }
156       }
157       for (size_t n=0; n<ec[i]->l.cs.costs.size(); n++) {
158         size_t nn = ec[i]->l.cs.costs[n].class_index - 1;
159         if ((D.adj[nn].size() == 0) || (D.adj[nn][D.adj[nn].size()-1] != i)) // don't allow dups
160           D.adj[nn].push_back(i);
161       }
162     }
163 
164     run_bfs(D, ec);
165 
166     D.pred.clear();
167     for (size_t n=0; n<D.N; n++)
168       D.pred.push_back( D.K+1 );
169   }
170 
takedown(Search::search & sch,vector<example * > & ec)171   void takedown(Search::search& sch, vector<example*>& ec) {
172     task_data& D = *sch.get_task_data<task_data>();
173     D.bfs.clear();
174     D.pred.clear();
175     D.adj.clear();
176   }
177 
add_edge_features_fn(task_data & D,float fv,uint32_t fx)178   void add_edge_features_fn(task_data&D, float fv, uint32_t fx) {
179     example*node = D.cur_node;
180     for (size_t k=0; k<=D.K; k++) {
181       if (D.neighbor_predictions[k] == 0.) continue;
182       feature f = { fv * D.neighbor_predictions[k], (uint32_t) ( fx + 348919043 * k ) };
183       node->atomics[neighbor_namespace].push_back(f);
184       node->sum_feat_sq[neighbor_namespace] += f.x * f.x;
185     }
186     /*
187     feature f = { fv, (uint32_t) ( fx + 348919043 * D.neighbor_prediction ) };
188     node->atomics[neighbor_namespace].push_back(f);
189     node->sum_feat_sq[neighbor_namespace] += f.x * f.x;
190     */
191     // TODO: audit
192   }
193 
add_edge_features(Search::search & sch,task_data & D,uint32_t n,vector<example * > & ec)194   void add_edge_features(Search::search&sch, task_data&D, uint32_t n, vector<example*>&ec) {
195     D.cur_node = ec[n];
196 
197     for (size_t i : D.adj[n]) {
198       for (size_t k=0; k<D.K+1; k++) D.neighbor_predictions[k] = 0.;
199       float pred_total = 0.;
200 
201       for (size_t j=0; j<ec[i]->l.cs.costs.size(); j++) {
202         size_t m = ec[i]->l.cs.costs[j].class_index - 1;
203         if (m == n) continue;
204         D.neighbor_predictions[ D.pred[m]-1 ] += 1.;
205         pred_total += 1.;
206       }
207 
208       if (pred_total == 0.) continue;
209       //for (size_t k=0; k<D.K+1; k++) D.neighbor_predictions[k] /= pred_total;
210       example&edge = *ec[i];
211       GD::foreach_feature<task_data,uint32_t,add_edge_features_fn>(sch.get_vw_pointer_unsafe(), edge, D);
212     }
213     ec[n]->indices.push_back(neighbor_namespace);
214     ec[n]->total_sum_feat_sq += ec[n]->sum_feat_sq[neighbor_namespace];
215     ec[n]->num_features += ec[n]->atomics[neighbor_namespace].size();
216   }
217 
del_edge_features(task_data & D,uint32_t n,vector<example * > & ec)218   void del_edge_features(task_data&D, uint32_t n, vector<example*>&ec) {
219     ec[n]->indices.pop();
220     ec[n]->total_sum_feat_sq -= ec[n]->sum_feat_sq[neighbor_namespace];
221     ec[n]->num_features -= ec[n]->atomics[neighbor_namespace].size();
222     ec[n]->atomics[neighbor_namespace].erase();
223     ec[n]->sum_feat_sq[neighbor_namespace] = 0.;
224   }
225 
run(Search::search & sch,vector<example * > & ec)226   void run(Search::search& sch, vector<example*>& ec) {
227     task_data& D = *sch.get_task_data<task_data>();
228 
229     for (size_t n=0; n<D.N; n++) D.pred[n] = D.K+1;
230 
231     for (size_t loop=0; loop<D.num_loops; loop++) {
232       int start = 0; int end = D.N; int step = 1;
233       if (loop % 2 == 1) { start = D.N-1; end=-1; step = -1; } // go inward on odd loops
234       for (int n_id = start; n_id != end; n_id += step) {
235         uint32_t n = D.bfs[n_id];
236 
237         if (D.use_structure) add_edge_features(sch, D, n, ec);
238         Search::predictor P = Search::predictor(sch, n+1);
239         P.set_input(*ec[n]);
240         P.set_oracle(ec[n]->l.cs.costs[0].class_index); // TODO: check if it exists for test data
241         // add all the conditioning
242         for (size_t i=0; i<D.adj[n].size(); i++) {
243           for (size_t j=0; j<ec[i]->l.cs.costs.size(); j++) {
244             uint32_t m = ec[i]->l.cs.costs[j].class_index - 1;
245             if (m == n) continue;
246             P.add_condition(m+1, 'e');
247           }
248         }
249 
250         // make the prediction
251         D.pred[n] = P.predict();
252 
253         if (D.use_structure) del_edge_features(D, n, ec);
254       }
255     }
256 
257     if (sch.output().good())
258       for (uint32_t n=0; n<D.N; n++)
259         sch.output() << D.pred[n] << ' ';
260   }
261 }
262 
263 
264 
265 
266 
267 
268 
269 
270 
271 
272 
273 
274 
275 
276 
277 
278 
279 
280