1 /* 2 Copyright (c) by respective owners including Yahoo!, Microsoft, and 3 individual contributors. All rights reserved. Released under a BSD (revised) 4 license as described in the file LICENSE. 5 */ 6 #include "search_graph.h" 7 #include "vw.h" 8 #include "rand48.h" 9 #include "gd.h" 10 11 /* 12 example format: 13 14 ALL NODES 15 ALL EDGES 16 <blank> 17 ALL NODES 18 ALL EDGES 19 <blank> 20 21 and so on 22 23 node lines look like normal vw examples with unary features: 24 25 label:weight |n features 26 label:weight |n features 27 ... 28 29 they are *implicitly* labeled starting at 1. (note the namespace 30 needn't be called n.) if weight is 31 omitted it is assumed to be 1.0. 32 33 edge lines look like: 34 35 n1 n2 n3 ... |e features 36 n1 n2 n3 ... |e features 37 ... 38 39 here, n1 n2 n3 are integer node ids, starting at one. technically 40 these are hyperedges, since they can touch more than two nodes. in the 41 canonical representation, there will just be n1 and n2. 42 43 the only thing that differentiates edges from nodes is that edges have 44 >1 input. 45 */ 46 47 namespace GraphTask { 48 Search::search_task task = { "graph", run, initialize, finish, setup, takedown }; 49 50 struct task_data { 51 // global data 52 size_t num_loops; 53 size_t K; // number of labels, *NOT* including the +1 for 'unlabeled' 54 bool use_structure; 55 56 // per-example data 57 uint32_t N; // number of nodes 58 uint32_t E; // number of edges 59 vector< vector<size_t> > adj; // adj[n] is a vector of *edge example ids* that contain n 60 vector<uint32_t> bfs; // order of nodes to process 61 vector<size_t> pred; // predictions 62 example*cur_node; // pointer to the current node for add_edge_features_fn 63 float* neighbor_predictions; // prediction on this neighbor for add_edge_features_fn 64 }; 65 example_is_test(void * ld)66 bool example_is_test(void*ld) { 67 // TODO 68 return false; 69 } 70 initialize(Search::search & sch,size_t & num_actions,po::variables_map & vm)71 void initialize(Search::search& sch, size_t& num_actions, po::variables_map& vm) { 72 task_data * D = new task_data(); 73 po::options_description sspan_opts("search graphtask options"); 74 sspan_opts.add_options()("search_graph_num_loops", po::value<size_t>(), "how many loops to run [def: 2]"); 75 sspan_opts.add_options()("search_graph_no_structure", "turn off edge features"); 76 sch.add_program_options(vm, sspan_opts); 77 78 D->num_loops = 2; 79 D->use_structure = true; 80 if (vm.count("search_graph_num_loops")) D->num_loops = vm["search_graph_num_loops"].as<size_t>(); 81 if (vm.count("search_graph_no_structure")) D->use_structure = false; 82 83 D->K = num_actions; 84 D->neighbor_predictions = calloc_or_die<float>(D->K+1); 85 86 sch.set_task_data<task_data>(D); 87 sch.set_options( Search::AUTO_HAMMING_LOSS ); 88 sch.set_label_parser( COST_SENSITIVE::cs_label, example_is_test ); 89 } 90 finish(Search::search & sch)91 void finish(Search::search& sch) { 92 task_data * D = sch.get_task_data<task_data>(); 93 free(D->neighbor_predictions); 94 delete D; 95 } 96 example_is_edge(example * e)97 inline bool example_is_edge(example*e) { return e->l.cs.costs.size() > 1; } 98 run_bfs(task_data & D,vector<example * > & ec)99 void run_bfs(task_data &D, vector<example*>& ec) { 100 D.bfs.clear(); 101 vector<bool> touched; 102 for (size_t n=0; n<D.N; n++) touched.push_back(false); 103 104 touched[0] = true; 105 D.bfs.push_back(0); 106 107 size_t i = 0; 108 while (D.bfs.size() < D.N) { 109 while (i < D.bfs.size()) { 110 uint32_t n = D.bfs[i]; 111 for (size_t id : D.adj[n]) 112 for (size_t j=0; j<ec[id]->l.cs.costs.size(); j++) { 113 uint32_t m = ec[id]->l.cs.costs[j].class_index - 1; 114 if (!touched[m]) { 115 D.bfs.push_back(m); 116 touched[m] = true; 117 } 118 } 119 i++; 120 } 121 122 if (D.bfs.size() < D.N) 123 // we finished a SCC, need to find another 124 for (uint32_t n=0; n<D.N; n++) 125 if (! touched[n]) { 126 touched[n] = true; 127 D.bfs.push_back(n); 128 break; 129 } 130 } 131 } 132 setup(Search::search & sch,vector<example * > & ec)133 void setup(Search::search& sch, vector<example*>& ec) { 134 task_data& D = *sch.get_task_data<task_data>(); 135 136 D.N = 0; 137 D.E = 0; 138 for (size_t i=0; i<ec.size(); i++) 139 if (example_is_edge(ec[i])) 140 D.E++; 141 else { // it's a node! 142 if (D.E > 0) { cerr << "error: got a node after getting edges!" << endl; throw exception(); } 143 D.N++; 144 } 145 146 if ((D.N == 0) && (D.E > 0)) { cerr << "error: got edges without any nodes!" << endl; throw exception(); } 147 148 D.adj = vector<vector<size_t>>(D.N, vector<size_t>(0)); 149 150 for (size_t i=D.N; i<ec.size(); i++) { 151 for (size_t n=0; n<ec[i]->l.cs.costs.size(); n++) { 152 if (ec[i]->l.cs.costs[n].class_index > D.N) { 153 cerr << "error: edge source points to too large of a node id: " << (ec[i]->l.cs.costs[n].class_index) << " > " << D.N << endl; 154 throw exception(); 155 } 156 } 157 for (size_t n=0; n<ec[i]->l.cs.costs.size(); n++) { 158 size_t nn = ec[i]->l.cs.costs[n].class_index - 1; 159 if ((D.adj[nn].size() == 0) || (D.adj[nn][D.adj[nn].size()-1] != i)) // don't allow dups 160 D.adj[nn].push_back(i); 161 } 162 } 163 164 run_bfs(D, ec); 165 166 D.pred.clear(); 167 for (size_t n=0; n<D.N; n++) 168 D.pred.push_back( D.K+1 ); 169 } 170 takedown(Search::search & sch,vector<example * > & ec)171 void takedown(Search::search& sch, vector<example*>& ec) { 172 task_data& D = *sch.get_task_data<task_data>(); 173 D.bfs.clear(); 174 D.pred.clear(); 175 D.adj.clear(); 176 } 177 add_edge_features_fn(task_data & D,float fv,uint32_t fx)178 void add_edge_features_fn(task_data&D, float fv, uint32_t fx) { 179 example*node = D.cur_node; 180 for (size_t k=0; k<=D.K; k++) { 181 if (D.neighbor_predictions[k] == 0.) continue; 182 feature f = { fv * D.neighbor_predictions[k], (uint32_t) ( fx + 348919043 * k ) }; 183 node->atomics[neighbor_namespace].push_back(f); 184 node->sum_feat_sq[neighbor_namespace] += f.x * f.x; 185 } 186 /* 187 feature f = { fv, (uint32_t) ( fx + 348919043 * D.neighbor_prediction ) }; 188 node->atomics[neighbor_namespace].push_back(f); 189 node->sum_feat_sq[neighbor_namespace] += f.x * f.x; 190 */ 191 // TODO: audit 192 } 193 add_edge_features(Search::search & sch,task_data & D,uint32_t n,vector<example * > & ec)194 void add_edge_features(Search::search&sch, task_data&D, uint32_t n, vector<example*>&ec) { 195 D.cur_node = ec[n]; 196 197 for (size_t i : D.adj[n]) { 198 for (size_t k=0; k<D.K+1; k++) D.neighbor_predictions[k] = 0.; 199 float pred_total = 0.; 200 201 for (size_t j=0; j<ec[i]->l.cs.costs.size(); j++) { 202 size_t m = ec[i]->l.cs.costs[j].class_index - 1; 203 if (m == n) continue; 204 D.neighbor_predictions[ D.pred[m]-1 ] += 1.; 205 pred_total += 1.; 206 } 207 208 if (pred_total == 0.) continue; 209 //for (size_t k=0; k<D.K+1; k++) D.neighbor_predictions[k] /= pred_total; 210 example&edge = *ec[i]; 211 GD::foreach_feature<task_data,uint32_t,add_edge_features_fn>(sch.get_vw_pointer_unsafe(), edge, D); 212 } 213 ec[n]->indices.push_back(neighbor_namespace); 214 ec[n]->total_sum_feat_sq += ec[n]->sum_feat_sq[neighbor_namespace]; 215 ec[n]->num_features += ec[n]->atomics[neighbor_namespace].size(); 216 } 217 del_edge_features(task_data & D,uint32_t n,vector<example * > & ec)218 void del_edge_features(task_data&D, uint32_t n, vector<example*>&ec) { 219 ec[n]->indices.pop(); 220 ec[n]->total_sum_feat_sq -= ec[n]->sum_feat_sq[neighbor_namespace]; 221 ec[n]->num_features -= ec[n]->atomics[neighbor_namespace].size(); 222 ec[n]->atomics[neighbor_namespace].erase(); 223 ec[n]->sum_feat_sq[neighbor_namespace] = 0.; 224 } 225 run(Search::search & sch,vector<example * > & ec)226 void run(Search::search& sch, vector<example*>& ec) { 227 task_data& D = *sch.get_task_data<task_data>(); 228 229 for (size_t n=0; n<D.N; n++) D.pred[n] = D.K+1; 230 231 for (size_t loop=0; loop<D.num_loops; loop++) { 232 int start = 0; int end = D.N; int step = 1; 233 if (loop % 2 == 1) { start = D.N-1; end=-1; step = -1; } // go inward on odd loops 234 for (int n_id = start; n_id != end; n_id += step) { 235 uint32_t n = D.bfs[n_id]; 236 237 if (D.use_structure) add_edge_features(sch, D, n, ec); 238 Search::predictor P = Search::predictor(sch, n+1); 239 P.set_input(*ec[n]); 240 P.set_oracle(ec[n]->l.cs.costs[0].class_index); // TODO: check if it exists for test data 241 // add all the conditioning 242 for (size_t i=0; i<D.adj[n].size(); i++) { 243 for (size_t j=0; j<ec[i]->l.cs.costs.size(); j++) { 244 uint32_t m = ec[i]->l.cs.costs[j].class_index - 1; 245 if (m == n) continue; 246 P.add_condition(m+1, 'e'); 247 } 248 } 249 250 // make the prediction 251 D.pred[n] = P.predict(); 252 253 if (D.use_structure) del_edge_features(D, n, ec); 254 } 255 } 256 257 if (sch.output().good()) 258 for (uint32_t n=0; n<D.N; n++) 259 sch.output() << D.pred[n] << ' '; 260 } 261 } 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280