1 #include <float.h> 2 #include <cassert> 3 4 #include "gd.h" 5 #include "accumulate.h" 6 #include "reductions.h" 7 #include "vw.h" 8 9 //#define MAGIC_ARGUMENT //MAY IT NEVER DIE 10 11 using namespace std; 12 using namespace LEARNER; 13 14 static const uint32_t parent_bit = 1; 15 static const uint32_t cycle_bit = 2; 16 static const uint32_t tree_atomics = 134; 17 static const float tolerance = 1e-9f; 18 static const uint32_t indicator_bit = 128; 19 static const uint32_t default_depth = 127; 20 21 struct sort_data { 22 float wval; 23 uint32_t wid; 24 }; 25 26 struct stagewise_poly 27 { 28 vw *all; // many uses, unmodular reduction 29 30 float sched_exponent; 31 uint32_t batch_sz; 32 bool batch_sz_double; 33 34 sort_data *sd; 35 uint32_t sd_len; 36 uint8_t *depthsbits; //interleaved array storing depth information and parent/cycle bits 37 38 uint64_t sum_sparsity; //of synthetic example 39 uint64_t sum_input_sparsity; //of input example 40 uint64_t num_examples; 41 //following three are for parallel (see end_pass()) 42 uint64_t sum_sparsity_sync; 43 uint64_t sum_input_sparsity_sync; 44 uint64_t num_examples_sync; 45 46 example synth_ec; 47 //following is bookkeeping in synth_ec creation (dfs) 48 feature synth_rec_f; 49 example *original_ec; 50 uint32_t cur_depth; 51 bool training; 52 uint64_t last_example_counter; 53 size_t numpasses; 54 uint32_t next_batch_sz; 55 bool update_support; 56 57 #ifdef DEBUG 58 uint32_t max_depth; 59 uint32_t depths[100000]; 60 #endif //DEBUG 61 62 #ifdef MAGIC_ARGUMENT 63 float magic_argument; 64 #endif //MAGIC_ARGUMENT 65 }; 66 67 stride_shift(const stagewise_poly & poly,uint32_t idx)68 inline uint32_t stride_shift(const stagewise_poly &poly, uint32_t idx) 69 { 70 return idx << poly.all->reg.stride_shift; 71 } 72 stride_un_shift(const stagewise_poly & poly,uint32_t idx)73 inline uint32_t stride_un_shift(const stagewise_poly &poly, uint32_t idx) 74 { 75 return idx >> poly.all->reg.stride_shift; 76 } 77 do_ft_offset(const stagewise_poly & poly,uint32_t idx)78 inline uint32_t do_ft_offset(const stagewise_poly &poly, uint32_t idx) 79 { 80 //cout << poly.synth_ec.ft_offset << " " << poly.original_ec->ft_offset << endl; 81 assert(!poly.original_ec || poly.synth_ec.ft_offset == poly.original_ec->ft_offset); 82 return idx + poly.synth_ec.ft_offset; 83 } 84 un_ft_offset(const stagewise_poly & poly,uint32_t idx)85 inline uint32_t un_ft_offset(const stagewise_poly &poly, uint32_t idx) 86 { 87 assert(!poly.original_ec || poly.synth_ec.ft_offset == poly.original_ec->ft_offset); 88 if (poly.synth_ec.ft_offset == 0) 89 return idx; 90 else { 91 while (idx < poly.synth_ec.ft_offset) { 92 idx += (uint32_t) poly.all->length() << poly.all->reg.stride_shift; 93 } 94 return idx - poly.synth_ec.ft_offset; 95 } 96 } 97 wid_mask(const stagewise_poly & poly,uint32_t wid)98 inline uint32_t wid_mask(const stagewise_poly &poly, uint32_t wid) 99 { 100 return wid & poly.all->reg.weight_mask; 101 } 102 wid_mask_un_shifted(const stagewise_poly & poly,uint32_t wid)103 inline uint32_t wid_mask_un_shifted(const stagewise_poly &poly, uint32_t wid) 104 { 105 return stride_un_shift(poly, wid & poly.all->reg.weight_mask); 106 } 107 constant_feat(const stagewise_poly & poly)108 inline uint32_t constant_feat(const stagewise_poly &poly) 109 { 110 return stride_shift(poly, constant * poly.all->wpp); 111 } 112 constant_feat_masked(const stagewise_poly & poly)113 inline uint32_t constant_feat_masked(const stagewise_poly &poly) 114 { 115 return wid_mask(poly, constant_feat(poly)); 116 } 117 118 depthsbits_sizeof(const stagewise_poly & poly)119 inline uint32_t depthsbits_sizeof(const stagewise_poly &poly) 120 { 121 return (uint32_t)(2 * poly.all->length() * sizeof(uint8_t)); 122 } 123 depthsbits_create(stagewise_poly & poly)124 void depthsbits_create(stagewise_poly &poly) 125 { 126 poly.depthsbits = calloc_or_die<uint8_t>(2 * poly.all->length()); 127 for (uint32_t i = 0; i < poly.all->length() * 2; i += 2) { 128 poly.depthsbits[i] = default_depth; 129 poly.depthsbits[i+1] = indicator_bit; 130 } 131 } 132 depthsbits_destroy(stagewise_poly & poly)133 void depthsbits_destroy(stagewise_poly &poly) 134 { 135 free(poly.depthsbits); 136 } 137 parent_get(const stagewise_poly & poly,uint32_t wid)138 inline bool parent_get(const stagewise_poly &poly, uint32_t wid) 139 { 140 assert(wid % stride_shift(poly, 1) == 0); 141 assert(do_ft_offset(poly, wid) % stride_shift(poly, 1) == 0); 142 return poly.depthsbits[wid_mask_un_shifted(poly, do_ft_offset(poly, wid)) * 2 + 1] & parent_bit; 143 } 144 parent_toggle(stagewise_poly & poly,uint32_t wid)145 inline void parent_toggle(stagewise_poly &poly, uint32_t wid) 146 { 147 assert(wid % stride_shift(poly, 1) == 0); 148 assert(do_ft_offset(poly, wid) % stride_shift(poly, 1) == 0); 149 poly.depthsbits[wid_mask_un_shifted(poly, do_ft_offset(poly, wid)) * 2 + 1] ^= parent_bit; 150 } 151 cycle_get(const stagewise_poly & poly,uint32_t wid)152 inline bool cycle_get(const stagewise_poly &poly, uint32_t wid) 153 { 154 //note: intentionally leaving out ft_offset. 155 assert(wid % stride_shift(poly, 1) == 0); 156 if ((poly.depthsbits[wid_mask_un_shifted(poly, wid) * 2 + 1] & cycle_bit) > 0) 157 return true; 158 else 159 return false; 160 } 161 cycle_toggle(stagewise_poly & poly,uint32_t wid)162 inline void cycle_toggle(stagewise_poly &poly, uint32_t wid) 163 { 164 //note: intentionally leaving out ft_offset. 165 assert(wid % stride_shift(poly, 1) == 0); 166 poly.depthsbits[wid_mask_un_shifted(poly, wid) * 2 + 1] ^= cycle_bit; 167 } 168 min_depths_get(const stagewise_poly & poly,uint32_t wid)169 inline uint8_t min_depths_get(const stagewise_poly &poly, uint32_t wid) 170 { 171 assert(wid % stride_shift(poly, 1) == 0); 172 assert(do_ft_offset(poly, wid) % stride_shift(poly, 1) == 0); 173 return poly.depthsbits[stride_un_shift(poly, do_ft_offset(poly, wid)) * 2]; 174 } 175 min_depths_set(stagewise_poly & poly,uint32_t wid,uint8_t depth)176 inline void min_depths_set(stagewise_poly &poly, uint32_t wid, uint8_t depth) 177 { 178 assert(wid % stride_shift(poly, 1) == 0); 179 assert(do_ft_offset(poly, wid) % stride_shift(poly, 1) == 0); 180 poly.depthsbits[stride_un_shift(poly, do_ft_offset(poly, wid)) * 2] = depth; 181 } 182 183 #ifndef NDEBUG sanity_check_state(stagewise_poly & poly)184 void sanity_check_state(stagewise_poly &poly) 185 { 186 for (uint32_t i = 0; i != poly.all->length(); ++i) 187 { 188 uint32_t wid = stride_shift(poly, i); 189 190 assert( ! cycle_get(poly,wid) ); 191 192 assert( ! (min_depths_get(poly, wid) == default_depth && parent_get(poly, wid)) ); 193 194 assert( ! (min_depths_get(poly, wid) == default_depth && fabsf(poly.all->reg.weight_vector[wid]) > 0) ); 195 //assert( min_depths_get(poly, wid) != default_depth && fabsf(poly.all->reg.weight_vector[wid]) < tolerance ); 196 197 assert( ! (poly.depthsbits[wid_mask_un_shifted(poly, wid) * 2 + 1] & ~(parent_bit + cycle_bit + indicator_bit)) ); 198 } 199 } 200 #endif //NDEBUG 201 202 //Note. OUTPUT & INPUT masked. 203 //It is very important that this function is invariant to stride. child_wid(const stagewise_poly & poly,uint32_t wi_atomic,uint32_t wi_general)204 inline uint32_t child_wid(const stagewise_poly &poly, uint32_t wi_atomic, uint32_t wi_general) 205 { 206 assert(wi_atomic == wid_mask(poly, wi_atomic)); 207 assert(wi_general == wid_mask(poly, wi_general)); 208 assert((wi_atomic & (stride_shift(poly, 1) - 1)) == 0); 209 assert((wi_general & (stride_shift(poly, 1) - 1)) == 0); 210 211 if (wi_atomic == constant_feat_masked(poly)) 212 return wi_general; 213 else if (wi_general == constant_feat_masked(poly)) 214 return wi_atomic; 215 else { 216 //This is basically the "Fowler–Noll–Vo" hash. Ideally, the hash would be invariant 217 //to the monomial, whereas this here is sensitive to the path followed, but whatever. 218 //the two main big differences with FNV are: (1) the "*constant" case should also have 219 //a big prime (so the default hash shouldn't be identity on small things, and (2) the 220 //size should not just be a power of 2, but some big prime. 221 return wid_mask(poly, stride_shift(poly, stride_un_shift(poly, wi_atomic) 222 ^ (16777619 * stride_un_shift(poly, wi_general)))); 223 } 224 } 225 sort_data_create(stagewise_poly & poly)226 void sort_data_create(stagewise_poly &poly) 227 { 228 poly.sd = NULL; 229 poly.sd_len = 0; 230 } 231 sort_data_ensure_sz(stagewise_poly & poly,uint32_t len)232 void sort_data_ensure_sz(stagewise_poly &poly, uint32_t len) 233 { 234 if (poly.sd_len < len) { 235 uint32_t len_candidate = 2 * len; 236 #ifdef DEBUG 237 cout << "resizing sort buffer; current size " << poly.sd_len; 238 #endif //DEBUG 239 poly.sd_len = (len_candidate > poly.all->length()) ? (uint32_t)poly.all->length() : len_candidate; 240 #ifdef DEBUG 241 cout << ", new size " << poly.sd_len << endl; 242 #endif //DEBUG 243 free(poly.sd); //okay for null. 244 poly.sd = calloc_or_die<sort_data>(poly.sd_len); 245 } 246 assert(len <= poly.sd_len); 247 } 248 sort_data_destroy(stagewise_poly & poly)249 void sort_data_destroy(stagewise_poly &poly) 250 { 251 free(poly.sd); 252 } 253 254 #ifdef DEBUG sort_data_compar(const void * a_v,const void * b_v)255 int sort_data_compar(const void *a_v, const void *b_v) 256 { 257 return 2 * ( ((sort_data *) a_v)->wval < ((sort_data *) b_v)->wval ) - 1; 258 } 259 #endif //DEBUG 260 sort_data_compar_heap(sort_data & a_v,sort_data & b_v)261 int sort_data_compar_heap(sort_data &a_v, sort_data &b_v) 262 { 263 return (a_v.wval > b_v.wval); 264 } 265 266 /* 267 * Performance note. 268 * 269 * On my laptop (Intel(R) Core(TM) i7-3520M CPU @ 2.90GHz), with compiler 270 * optimizations enabled, this routine takes ~0.001 seconds with -b 18 and 271 * ~0.06 seconds with -b 24. Since it is intended to run ~8 times in 1-pass 272 * mode and otherwise once per pass, it is considered adequate. 273 * 274 * Another choice (implemented in another version) is to never process the 275 * whole weight vector (e.g., by updating a hash set of nonzero weights after 276 * creating the synthetic example, and only processing that here). This 277 * choice was implemented in a similar algorithm and performed well. 278 */ sort_data_update_support(stagewise_poly & poly)279 void sort_data_update_support(stagewise_poly &poly) 280 { 281 assert(poly.num_examples); 282 283 //ft_offset affects parent_set / parent_get. This state must be reset at end. 284 uint32_t pop_ft_offset = poly.original_ec->ft_offset; 285 poly.synth_ec.ft_offset = 0; 286 assert(poly.original_ec); 287 poly.original_ec->ft_offset = 0; 288 289 uint32_t num_new_features = (uint32_t)pow(poly.sum_input_sparsity * 1.0f / poly.num_examples, poly.sched_exponent); 290 num_new_features = (num_new_features > poly.all->length()) ? (uint32_t)poly.all->length() : num_new_features; 291 sort_data_ensure_sz(poly, num_new_features); 292 293 sort_data *heap_end = poly.sd; 294 make_heap(poly.sd, heap_end, sort_data_compar_heap); //redundant 295 for (uint32_t i = 0; i != poly.all->length(); ++i) { 296 uint32_t wid = stride_shift(poly, i); 297 if (!parent_get(poly, wid) && wid != constant_feat_masked(poly)) { 298 float wval = (fabsf(poly.all->reg.weight_vector[wid]) 299 * poly.all->reg.weight_vector[poly.all->normalized_idx + (wid)]) 300 /* 301 * here's some depth penalization code. It was found to not improve 302 * statistical performance, and meanwhile it is verified as giving 303 * a nontrivial computational hit, thus commented out. 304 * 305 * - poly.magic_argument 306 * sqrtf(min_depths_get(poly, stride_shift(poly, i)) * 1.0 / poly.num_examples) 307 */ 308 ; 309 if (wval > tolerance) { 310 assert(heap_end >= poly.sd); 311 assert(heap_end <= poly.sd + num_new_features); 312 313 if (heap_end - poly.sd == (int)num_new_features && poly.sd->wval < wval) { 314 pop_heap(poly.sd, heap_end, sort_data_compar_heap); 315 --heap_end; 316 } 317 318 assert(heap_end >= poly.sd); 319 assert(heap_end < poly.sd + poly.sd_len); 320 321 if (heap_end - poly.sd < (int)num_new_features) { 322 heap_end->wval = wval; 323 heap_end->wid = wid; 324 ++heap_end; 325 push_heap(poly.sd, heap_end, sort_data_compar_heap); 326 } 327 } 328 } 329 } 330 num_new_features = (uint32_t) (heap_end - poly.sd); 331 332 #ifdef DEBUG 333 //eyeballing weights a pain if unsorted. 334 qsort(poly.sd, num_new_features, sizeof(sort_data), sort_data_compar); 335 #endif //DEBUG 336 337 for (uint32_t pos = 0; pos < num_new_features && pos < poly.sd_len; ++pos) { 338 assert(!parent_get(poly, poly.sd[pos].wid) 339 && poly.sd[pos].wval > tolerance 340 && poly.sd[pos].wid != constant_feat_masked(poly)); 341 parent_toggle(poly, poly.sd[pos].wid); 342 #ifdef DEBUG 343 cout 344 << "Adding feature " << pos << "/" << num_new_features 345 << " || wid " << poly.sd[pos].wid 346 << " || sort value " << poly.sd[pos].wval 347 << endl; 348 #endif //DEBUG 349 } 350 351 #ifdef DEBUG 352 cout << "depths:"; 353 for (uint32_t depth = 0; depth <= poly.max_depth && depth < sizeof(poly.depths) / sizeof(*poly.depths); ++depth) 354 cout << " [" << depth << "] = " << poly.depths[depth]; 355 cout << endl; 356 357 cout << "Sanity check after sort... " << flush; 358 sanity_check_state(poly); 359 cout << "done" << endl; 360 #endif //DEBUG 361 362 //it's okay that these may have been initially unequal; synth_ec value irrelevant so far. 363 poly.original_ec->ft_offset = pop_ft_offset; 364 poly.synth_ec.ft_offset = pop_ft_offset; 365 } 366 synthetic_reset(stagewise_poly & poly,example & ec)367 void synthetic_reset(stagewise_poly &poly, example &ec) 368 { 369 poly.synth_ec.l = ec.l; 370 poly.synth_ec.tag = ec.tag; 371 poly.synth_ec.example_counter = ec.example_counter; 372 373 /** 374 * Some comments on ft_offset. 375 * 376 * The plan is to do the feature mapping dfs with weight indices ignoring 377 * the ft_offset. This is because ft_offset is then added at the end, 378 * guaranteeing local/strided access on synth_ec. This might not matter 379 * too much in this implementation (where, e.g., --oaa runs one after the 380 * other, not interleaved), but who knows. 381 * 382 * (The other choice is to basically ignore adjusting for ft_offset when 383 * doing the traversal, which means synth_ec.ft_offset is 0 here...) 384 * 385 * Anyway, so here is how ft_offset matters: 386 * - synthetic_create_rec must "normalize it out" of the fed weight value 387 * - parent and min_depths set/get are adjusted for it. 388 * - cycle set/get are not adjusted for it, since it doesn't matter for them. 389 * - operations on the whole weight vector (sorting, save_load, all_reduce) 390 * ignore ft_offset, just treat the thing as a flat vector. 391 **/ 392 poly.synth_ec.ft_offset = ec.ft_offset; 393 394 poly.synth_ec.test_only = ec.test_only; 395 poly.synth_ec.end_pass = ec.end_pass; 396 poly.synth_ec.sorted = ec.sorted; 397 poly.synth_ec.in_use = ec.in_use; 398 399 poly.synth_ec.atomics[tree_atomics].erase(); 400 poly.synth_ec.audit_features[tree_atomics].erase(); 401 poly.synth_ec.num_features = 0; 402 poly.synth_ec.total_sum_feat_sq = 0; 403 poly.synth_ec.sum_feat_sq[tree_atomics] = 0; 404 poly.synth_ec.example_t = ec.example_t; 405 406 if (poly.synth_ec.indices.size()==0) 407 poly.synth_ec.indices.push_back(tree_atomics); 408 } 409 synthetic_decycle(stagewise_poly & poly)410 void synthetic_decycle(stagewise_poly &poly) 411 { 412 for (feature *f = poly.synth_ec.atomics[tree_atomics].begin; 413 f != poly.synth_ec.atomics[tree_atomics].end; ++f) { 414 assert(cycle_get(poly, f->weight_index)); 415 cycle_toggle(poly, f->weight_index); 416 } 417 } 418 synthetic_create_rec(stagewise_poly & poly,float v,float & w)419 void synthetic_create_rec(stagewise_poly &poly, float v, float &w) 420 { 421 //Note: need to un_ft_shift since gd::foreach_feature bakes in the offset. 422 uint32_t wid_atomic = wid_mask(poly, un_ft_offset(poly, (uint32_t)((&w - poly.all->reg.weight_vector)))); 423 uint32_t wid_cur = child_wid(poly, wid_atomic, poly.synth_rec_f.weight_index); 424 assert(wid_atomic % stride_shift(poly, 1) == 0); 425 426 //Note: only mutate learner state when in training mode. This is because 427 //the average test errors across multiple data sets should be equal to 428 //the test error on the merged dataset (which is violated if the code 429 //below is run at training time). 430 if (poly.cur_depth < min_depths_get(poly, wid_cur) && poly.training) { 431 if (parent_get(poly, wid_cur)) { 432 #ifdef DEBUG 433 cout 434 << "FOUND A TRANSPLANT!!! moving [" << wid_cur 435 << "] from depth " << (uint32_t) min_depths_get(poly, wid_cur) 436 << " to depth " << poly.cur_depth << endl; 437 #endif //DEBUG 438 //XXX arguably, should also fear transplants that occured with 439 //a different ft_offset ; e.g., need to look out for cross-reduction 440 //collisions. Have not played with this issue yet... 441 parent_toggle(poly, wid_cur); 442 } 443 min_depths_set(poly, wid_cur, poly.cur_depth); 444 } 445 446 if ( ! cycle_get(poly, wid_cur) 447 && ((poly.cur_depth > default_depth ? default_depth : poly.cur_depth) == min_depths_get(poly, wid_cur)) 448 ) { 449 cycle_toggle(poly, wid_cur); 450 451 #ifdef DEBUG 452 ++poly.depths[poly.cur_depth]; 453 #endif //DEBUG 454 455 feature new_f = { v * poly.synth_rec_f.x, wid_cur }; 456 poly.synth_ec.atomics[tree_atomics].push_back(new_f); 457 poly.synth_ec.num_features++; 458 poly.synth_ec.sum_feat_sq[tree_atomics] += new_f.x * new_f.x; 459 460 if (parent_get(poly, new_f.weight_index)) { 461 feature parent_f = poly.synth_rec_f; 462 poly.synth_rec_f = new_f; 463 ++poly.cur_depth; 464 #ifdef DEBUG 465 poly.max_depth = (poly.max_depth > poly.cur_depth) ? poly.max_depth : poly.cur_depth; 466 #endif //DEBUG 467 GD::foreach_feature<stagewise_poly, synthetic_create_rec>(*(poly.all), *(poly.original_ec), poly); 468 --poly.cur_depth; 469 poly.synth_rec_f = parent_f; 470 } 471 } 472 } 473 synthetic_create(stagewise_poly & poly,example & ec,bool training)474 void synthetic_create(stagewise_poly &poly, example &ec, bool training) 475 { 476 synthetic_reset(poly, ec); 477 478 poly.cur_depth = 0; 479 480 poly.synth_rec_f.x = 1.0; 481 poly.synth_rec_f.weight_index = constant_feat_masked(poly); //note: not ft_offset'd 482 poly.training = training; 483 /* 484 * Another choice is to mark the constant feature as the single initial 485 * parent, and recurse just on that feature (which arguably correctly interprets poly.cur_depth). 486 * Problem with this is if there is a collision with the root... 487 */ 488 GD::foreach_feature<stagewise_poly, synthetic_create_rec>(*poly.all, *poly.original_ec, poly); 489 synthetic_decycle(poly); 490 poly.synth_ec.total_sum_feat_sq = poly.synth_ec.sum_feat_sq[tree_atomics]; 491 492 if (training) { 493 poly.sum_sparsity += poly.synth_ec.num_features; 494 poly.sum_input_sparsity += ec.num_features; 495 poly.num_examples += 1; 496 } 497 } 498 predict(stagewise_poly & poly,base_learner & base,example & ec)499 void predict(stagewise_poly &poly, base_learner &base, example &ec) 500 { 501 poly.original_ec = &ec; 502 synthetic_create(poly, ec, false); 503 base.predict(poly.synth_ec); 504 ec.partial_prediction = poly.synth_ec.partial_prediction; 505 ec.updated_prediction = poly.synth_ec.updated_prediction; 506 ec.pred.scalar = poly.synth_ec.pred.scalar; 507 } 508 learn(stagewise_poly & poly,base_learner & base,example & ec)509 void learn(stagewise_poly &poly, base_learner &base, example &ec) 510 { 511 bool training = poly.all->training && ec.l.simple.label != FLT_MAX; 512 poly.original_ec = &ec; 513 514 if (training) { 515 if(poly.update_support) { 516 sort_data_update_support(poly); 517 poly.update_support = false; 518 } 519 520 synthetic_create(poly, ec, training); 521 base.learn(poly.synth_ec); 522 ec.partial_prediction = poly.synth_ec.partial_prediction; 523 ec.updated_prediction = poly.synth_ec.updated_prediction; 524 ec.pred.scalar = poly.synth_ec.pred.scalar; 525 526 if (ec.example_counter 527 //following line is to avoid repeats when multiple reductions on same example. 528 //XXX ideally, would get all "copies" of an example before scheduling the support 529 //update, but how do we know? 530 && poly.last_example_counter != ec.example_counter 531 && poly.batch_sz 532 && ( (poly.batch_sz_double && !(ec.example_counter % poly.next_batch_sz)) 533 || (!poly.batch_sz_double && !(ec.example_counter % poly.batch_sz)))) { 534 poly.next_batch_sz *= 2; //no effect when !poly.batch_sz_double 535 poly.update_support = (poly.all->span_server == "" || poly.numpasses == 1); 536 } 537 poly.last_example_counter = ec.example_counter; 538 } else 539 predict(poly, base, ec); 540 } 541 542 reduce_min(uint8_t & v1,const uint8_t & v2)543 void reduce_min(uint8_t &v1,const uint8_t &v2) 544 { 545 if(v1 == default_depth) 546 v1 = v2; 547 else if(v2 != default_depth) 548 v1 = (v1 <= v2) ? v1 : v2; 549 } 550 reduce_min_max(uint8_t & v1,const uint8_t & v2)551 void reduce_min_max(uint8_t &v1,const uint8_t &v2) 552 { 553 bool parent_or_depth; 554 if (v1 & indicator_bit) 555 parent_or_depth = true; 556 else 557 parent_or_depth = false; 558 bool p_or_d2; 559 if (v2 & indicator_bit) 560 p_or_d2 = true; 561 else 562 p_or_d2 = false; 563 if(parent_or_depth != p_or_d2) { 564 #ifdef DEBUG 565 cout << "Reducing parent with depth!!!!!"; 566 #endif //DEBUG 567 return; 568 } 569 570 if(parent_or_depth) 571 v1 = (v1 >= v2) ? v1 : v2; 572 else { 573 if(v1 == default_depth) 574 v1 = v2; 575 else if(v2 != default_depth) 576 v1 = (v1 <= v2) ? v1 : v2; 577 } 578 } 579 end_pass(stagewise_poly & poly)580 void end_pass(stagewise_poly &poly) 581 { 582 if (!!poly.batch_sz || (poly.all->span_server != "" && poly.numpasses > 1)) 583 return; 584 585 uint64_t sum_sparsity_inc = poly.sum_sparsity - poly.sum_sparsity_sync; 586 uint64_t sum_input_sparsity_inc = poly.sum_input_sparsity - poly.sum_input_sparsity_sync; 587 uint64_t num_examples_inc = poly.num_examples - poly.num_examples_sync; 588 589 #ifdef DEBUG 590 cout << "Sanity before allreduce\n"; 591 sanity_check_state(poly); 592 #endif //DEBUG 593 594 vw &all = *poly.all; 595 if (all.span_server != "") { 596 /* 597 * The following is inconsistent with the transplant code in 598 * synthetic_create_rec(), which clears parent bits on depth mismatches. 599 * But it's unclear what the right behavior is in general for either 600 * case... 601 */ 602 all_reduce<uint8_t, reduce_min_max>(poly.depthsbits, depthsbits_sizeof(poly), 603 all.span_server, all.unique_id, all.total, all.node, all.socks); 604 605 sum_input_sparsity_inc = (uint64_t)accumulate_scalar(all, all.span_server, (float)sum_input_sparsity_inc); 606 sum_sparsity_inc = (uint64_t)accumulate_scalar(all, all.span_server, (float)sum_sparsity_inc); 607 num_examples_inc = (uint64_t)accumulate_scalar(all, all.span_server, (float)num_examples_inc); 608 } 609 610 poly.sum_input_sparsity_sync = poly.sum_input_sparsity_sync + sum_input_sparsity_inc; 611 poly.sum_input_sparsity = poly.sum_input_sparsity_sync; 612 poly.sum_sparsity_sync = poly.sum_sparsity_sync + sum_sparsity_inc; 613 poly.sum_sparsity = poly.sum_sparsity_sync; 614 poly.num_examples_sync = poly.num_examples_sync + num_examples_inc; 615 poly.num_examples = poly.num_examples_sync; 616 617 #ifdef DEBUG 618 cout << "Sanity after allreduce\n"; 619 sanity_check_state(poly); 620 #endif //DEBUG 621 622 if (poly.numpasses != poly.all->numpasses) { 623 poly.update_support = true; 624 poly.numpasses++; 625 } 626 } 627 finish_example(vw & all,stagewise_poly & poly,example & ec)628 void finish_example(vw &all, stagewise_poly &poly, example &ec) 629 { 630 size_t temp_num_features = ec.num_features; 631 ec.num_features = poly.synth_ec.num_features; 632 output_and_account_example(all, ec); 633 ec.num_features = temp_num_features; 634 VW::finish_example(all, &ec); 635 } 636 finish(stagewise_poly & poly)637 void finish(stagewise_poly &poly) 638 { 639 #ifdef DEBUG 640 cout << "total feature number (after poly expansion!) = " << poly.sum_sparsity << endl; 641 #endif //DEBUG 642 643 poly.synth_ec.atomics[tree_atomics].delete_v(); 644 sort_data_destroy(poly); 645 depthsbits_destroy(poly); 646 } 647 648 save_load(stagewise_poly & poly,io_buf & model_file,bool read,bool text)649 void save_load(stagewise_poly &poly, io_buf &model_file, bool read, bool text) 650 { 651 if (model_file.files.size() > 0) 652 bin_text_read_write_fixed(model_file, (char *) poly.depthsbits, depthsbits_sizeof(poly), "", read, "", 0, text); 653 654 //unfortunately, following can't go here since save_load called before gd::save_load and thus 655 //weight vector state uninitialiazed. 656 //#ifdef DEBUG 657 // cout << "Sanity check after save_load... " << flush; 658 // sanity_check_state(poly); 659 // cout << "done" << endl; 660 //#endif //DEBUG 661 } 662 stagewise_poly_setup(vw & all)663 base_learner *stagewise_poly_setup(vw &all) 664 { 665 if (missing_option(all, true, "stage_poly", "use stagewise polynomial feature learning")) 666 return NULL; 667 668 new_options(all, "Stagewise poly options") 669 ("sched_exponent", po::value<float>(), "exponent controlling quantity of included features") 670 ("batch_sz", po::value<uint32_t>(), "multiplier on batch size before including more features") 671 ("batch_sz_no_doubling", "batch_sz does not double") 672 #ifdef MAGIC_ARGUMENT 673 ("magic_argument", po::value<float>(), "magical feature flag") 674 #endif //MAGIC_ARGUMENT 675 ; 676 add_options(all); 677 678 po::variables_map &vm = all.vm; 679 stagewise_poly& poly = calloc_or_die<stagewise_poly>(); 680 poly.all = &all; 681 depthsbits_create(poly); 682 sort_data_create(poly); 683 684 poly.sched_exponent = vm.count("sched_exponent") ? vm["sched_exponent"].as<float>() : 1.f; 685 poly.batch_sz = vm.count("batch_sz") ? vm["batch_sz"].as<uint32_t>() : 1000; 686 poly.batch_sz_double = vm.count("batch_sz_no_doubling") ? false : true; 687 #ifdef MAGIC_ARGUMENT 688 poly.magic_argument = vm.count("magic_argument") ? vm["magic_argument"].as<float>() : 0.; 689 #endif //MAGIC_ARGUMENT 690 691 poly.sum_sparsity = 0; 692 poly.sum_input_sparsity = 0; 693 poly.num_examples = 0; 694 poly.sum_sparsity_sync = 0; 695 poly.sum_input_sparsity_sync = 0; 696 poly.num_examples_sync = 0; 697 poly.last_example_counter = -1; 698 poly.numpasses = 1; 699 poly.update_support = false; 700 poly.original_ec = NULL; 701 poly.next_batch_sz = poly.batch_sz; 702 703 learner<stagewise_poly>& l = init_learner(&poly, setup_base(all), learn, predict); 704 l.set_finish(finish); 705 l.set_save_load(save_load); 706 l.set_finish_example(finish_example); 707 l.set_end_pass(end_pass); 708 709 return make_base(l); 710 } 711