1 /*++ 2 Copyright (c) 2006 Microsoft Corporation 3 4 Module Name: 5 6 dl_sparse_table.cpp 7 8 Abstract: 9 10 <abstract> 11 12 Author: 13 14 Krystof Hoder (t-khoder) 2010-09-24. 15 16 Revision History: 17 18 --*/ 19 20 #include<utility> 21 #include "muz/base/dl_context.h" 22 #include "muz/base/dl_util.h" 23 #include "muz/rel/dl_sparse_table.h" 24 25 namespace datalog { 26 27 28 // ----------------------------------- 29 // 30 // entry_storage 31 // 32 // ----------------------------------- 33 insert_or_get_reserve_content()34 entry_storage::store_offset entry_storage::insert_or_get_reserve_content() { 35 SASSERT(has_reserve()); 36 store_offset entry_ofs = m_data_indexer.insert_if_not_there(m_reserve); 37 if (m_reserve == entry_ofs) { 38 //entry inserted, so reserve is no longer a reserve 39 m_reserve = NO_RESERVE; 40 } 41 return entry_ofs; 42 } insert_reserve_content()43 bool entry_storage::insert_reserve_content() { 44 SASSERT(has_reserve()); 45 store_offset entry_ofs = m_data_indexer.insert_if_not_there(m_reserve); 46 if (m_reserve == entry_ofs) { 47 //entry inserted, so reserve is no longer a reserve 48 m_reserve = NO_RESERVE; 49 return true; 50 } 51 return false; 52 } 53 remove_reserve_content()54 bool entry_storage::remove_reserve_content() { 55 SASSERT(has_reserve()); 56 store_offset entry_ofs; 57 if (!find_reserve_content(entry_ofs)) { 58 //the fact was not in the table 59 return false; 60 } 61 remove_offset(entry_ofs); 62 return true; 63 } 64 remove_offset(store_offset ofs)65 void entry_storage::remove_offset(store_offset ofs) { 66 m_data_indexer.remove(ofs); 67 store_offset last_ofs = after_last_offset() - m_entry_size; 68 if (ofs!=last_ofs) { 69 SASSERT(ofs + m_entry_size <= last_ofs); 70 //we don't want any holes, so we put the last element at the place 71 //of the removed one 72 m_data_indexer.remove(last_ofs); 73 char * base = &m_data.get(0); 74 memcpy(base+ofs, base+last_ofs, m_entry_size); 75 m_data_indexer.insert(ofs); 76 } 77 if (has_reserve()) { 78 //we already have a reserve, so we need to shrink a little to keep having just one 79 resize_data(m_data_size-m_entry_size); 80 } 81 m_reserve=last_ofs; 82 } 83 get_size_estimate_bytes() const84 unsigned entry_storage::get_size_estimate_bytes() const { 85 size_t sz = m_data.capacity(); 86 sz += m_data_indexer.capacity()*sizeof(storage_indexer::entry); 87 return static_cast<unsigned>(sz); 88 } 89 90 // ----------------------------------- 91 // 92 // sparse_table::column_layout 93 // 94 // ----------------------------------- 95 get_domain_length(uint64_t dom_size)96 unsigned get_domain_length(uint64_t dom_size) { 97 SASSERT(dom_size>0); 98 99 unsigned length = 0; 100 101 unsigned dom_size_sm; 102 if (dom_size>UINT_MAX) { 103 dom_size_sm = static_cast<unsigned>(dom_size>>32); 104 length += 32; 105 if ( (dom_size&UINT_MAX)!=0 && dom_size_sm!=UINT_MAX ) { 106 dom_size_sm++; 107 } 108 } 109 else { 110 dom_size_sm=static_cast<unsigned>(dom_size); 111 } 112 if (dom_size_sm == 1) { 113 length += 1; //unary domains 114 } 115 else if (dom_size_sm > 0x80000000u) { 116 length += 32; 117 } 118 else { 119 length += get_num_1bits(next_power_of_two(dom_size_sm)-1); //ceil(log2(dom_size)) 120 } 121 return length; 122 } 123 column_layout(const table_signature & sig)124 sparse_table::column_layout::column_layout(const table_signature & sig) 125 : m_functional_col_cnt(sig.functional_columns()) { 126 SASSERT(sig.size() > 0); 127 unsigned ofs = 0; 128 unsigned sig_sz = sig.size(); 129 unsigned first_functional = sig_sz-m_functional_col_cnt; 130 for (unsigned i=0; i<sig_sz; i++) { 131 uint64_t dom_size = sig[i]; 132 unsigned length = get_domain_length(dom_size); 133 SASSERT(length>0); 134 SASSERT(length<=64); 135 136 if (size() > 0 && (length > 54 || i == first_functional)) { 137 //large domains must start byte-aligned, as well as functional columns 138 make_byte_aligned_end(size()-1); 139 ofs = back().next_ofs(); 140 } 141 142 push_back(column_info(ofs, length)); 143 ofs += length; 144 } 145 make_byte_aligned_end(size()-1); 146 SASSERT(back().next_ofs()%8 == 0);//the entries must be aligned to whole bytes 147 m_entry_size = back().next_ofs()/8; 148 if (m_functional_col_cnt) { 149 SASSERT((*this)[first_functional].m_offset%8 == 0); 150 m_functional_part_size = m_entry_size - (*this)[first_functional].m_offset/8; 151 } 152 else { 153 m_functional_part_size = 0; 154 } 155 } 156 make_byte_aligned_end(unsigned col_index0)157 void sparse_table::column_layout::make_byte_aligned_end(unsigned col_index0) { 158 unsigned ofs = (*this)[col_index0].next_ofs(); 159 unsigned ofs_bit_part = ofs%8; 160 unsigned rounded_ofs = (ofs_bit_part == 0) ? ofs : (ofs+8-ofs_bit_part); 161 162 if (rounded_ofs!=ofs) { 163 SASSERT(rounded_ofs>ofs); 164 int diff = rounded_ofs-ofs; 165 unsigned col_idx = col_index0+1; 166 while(diff!=0) { 167 //we should always be able to fix the alignment by the time we reach zero 168 SASSERT(col_idx>0); 169 col_idx--; 170 column_info & ci = (*this)[col_idx]; 171 unsigned new_length = ci.m_length; 172 if (ci.m_length < 64) { 173 unsigned swallowed = std::min(64-static_cast<int>(ci.m_length), diff); 174 diff -= swallowed; 175 new_length += swallowed; 176 } 177 unsigned new_ofs = ci.m_offset+diff; 178 ci = column_info(new_ofs, new_length); 179 } 180 } 181 182 SASSERT(rounded_ofs%8 == 0); 183 SASSERT((*this)[col_index0].next_ofs()%8 == 0); 184 } 185 186 // ----------------------------------- 187 // 188 // sparse_table 189 // 190 // ----------------------------------- 191 192 class sparse_table::our_iterator_core : public iterator_core { 193 194 class our_row : public row_interface { 195 const our_iterator_core & m_parent; 196 public: our_row(const sparse_table & t,const our_iterator_core & parent)197 our_row(const sparse_table & t, const our_iterator_core & parent) : 198 row_interface(t), 199 m_parent(parent) {} 200 operator [](unsigned col) const201 table_element operator[](unsigned col) const override { 202 return m_parent.m_layout.get(m_parent.m_ptr, col); 203 } 204 205 }; 206 207 const char * m_end; 208 const char * m_ptr; 209 unsigned m_fact_size; 210 our_row m_row_obj; 211 const column_layout & m_layout; 212 213 public: our_iterator_core(const sparse_table & t,bool finished)214 our_iterator_core(const sparse_table & t, bool finished) : 215 m_end(t.m_data.after_last()), 216 m_ptr(finished ? m_end : t.m_data.begin()), 217 m_fact_size(t.m_fact_size), 218 m_row_obj(t, *this), 219 m_layout(t.m_column_layout) {} 220 is_finished() const221 bool is_finished() const override { 222 return m_ptr == m_end; 223 } 224 operator *()225 row_interface & operator*() override { 226 SASSERT(!is_finished()); 227 return m_row_obj; 228 } operator ++()229 void operator++() override { 230 SASSERT(!is_finished()); 231 m_ptr+=m_fact_size; 232 } 233 }; 234 235 class sparse_table::key_indexer { 236 protected: 237 unsigned_vector m_key_cols; 238 public: 239 typedef const store_offset * offset_iterator; 240 241 /** 242 Iterators returned by \c begin() and \c end() are valid only as long as the \c query_result 243 object that returned them exists. 244 */ 245 struct query_result { 246 private: 247 bool m_singleton; 248 union { 249 store_offset m_single_result; 250 struct { 251 offset_iterator begin; 252 offset_iterator end; 253 } m_many; 254 }; 255 public: 256 /** 257 \brief Empty result. 258 */ query_resultdatalog::sparse_table::key_indexer::query_result259 query_result() : m_singleton(false) { 260 m_many.begin = nullptr; 261 m_many.end = nullptr; 262 } query_resultdatalog::sparse_table::key_indexer::query_result263 query_result(offset_iterator begin, offset_iterator end) : m_singleton(false) { 264 m_many.begin = begin; 265 m_many.end = end; 266 } query_resultdatalog::sparse_table::key_indexer::query_result267 query_result(store_offset single_result) : m_singleton(true), m_single_result(single_result) {} 268 begindatalog::sparse_table::key_indexer::query_result269 offset_iterator begin() const { return m_singleton ? &m_single_result : m_many.begin; } enddatalog::sparse_table::key_indexer::query_result270 offset_iterator end() const { return m_singleton ? (&m_single_result+1) : m_many.end; } emptydatalog::sparse_table::key_indexer::query_result271 bool empty() const { return begin() == end(); } 272 }; 273 key_indexer(unsigned key_len,const unsigned * key_cols)274 key_indexer(unsigned key_len, const unsigned * key_cols) 275 : m_key_cols(key_len, key_cols) {} 276 ~key_indexer()277 virtual ~key_indexer() {} 278 update(const sparse_table & t)279 virtual void update(const sparse_table & t) {} 280 281 virtual query_result get_matching_offsets(const key_value & key) const = 0; 282 }; 283 284 285 class sparse_table::general_key_indexer : public key_indexer { 286 typedef svector<store_offset> offset_vector; 287 typedef size_t_map<offset_vector> index_map; 288 289 index_map m_map; 290 mutable entry_storage m_keys; 291 store_offset m_first_nonindexed; 292 293 key_to_reserve(const key_value & key) const294 void key_to_reserve(const key_value & key) const { 295 m_keys.ensure_reserve(); 296 m_keys.write_into_reserve((char *)(key.c_ptr())); 297 } 298 get_matching_offset_vector(const key_value & key)299 offset_vector & get_matching_offset_vector(const key_value & key) { 300 key_to_reserve(key); 301 store_offset ofs = m_keys.insert_or_get_reserve_content(); 302 index_map::entry * e = m_map.find_core(ofs); 303 if (!e) { 304 TRACE("dl_table_relation", tout << "inserting\n";); 305 e = m_map.insert_if_not_there3(ofs, offset_vector()); 306 } 307 return e->get_data().m_value; 308 } 309 public: general_key_indexer(unsigned key_len,const unsigned * key_cols)310 general_key_indexer(unsigned key_len, const unsigned * key_cols) 311 : key_indexer(key_len, key_cols), 312 m_keys(key_len*sizeof(table_element)), 313 m_first_nonindexed(0) {} 314 update(const sparse_table & t)315 void update(const sparse_table & t) override { 316 if (m_first_nonindexed == t.m_data.after_last_offset()) { 317 return; 318 } 319 SASSERT(m_first_nonindexed<t.m_data.after_last_offset()); 320 //we need to add new facts into the index 321 322 unsigned key_len = m_key_cols.size(); 323 324 store_offset ofs = m_first_nonindexed; 325 store_offset after_last = t.m_data.after_last_offset(); 326 327 key_value key; 328 key.resize(key_len); 329 330 offset_vector * index_entry = nullptr; 331 bool key_modified = true; 332 333 for (; ofs!=after_last; ofs+=t.m_fact_size) { 334 for (unsigned i=0; i<key_len; i++) { 335 table_element col_val = t.get_cell(ofs, m_key_cols[i]); 336 if (key[i]!=col_val) { 337 key[i] = col_val; 338 key_modified = true; 339 } 340 } 341 342 if (key_modified) { 343 index_entry = &get_matching_offset_vector(key); 344 key_modified = false; 345 } 346 SASSERT(index_entry); 347 //here we insert the offset of the fact in m_data vector into the indexer 348 index_entry->insert(ofs); 349 } 350 351 m_first_nonindexed = t.m_data.after_last_offset(); 352 } 353 get_matching_offsets(const key_value & key) const354 query_result get_matching_offsets(const key_value & key) const override { 355 key_to_reserve(key); 356 store_offset ofs; 357 if (!m_keys.find_reserve_content(ofs)) { 358 return query_result(); 359 } 360 index_map::entry * e = m_map.find_core(ofs); 361 if (!e) { 362 return query_result(); 363 } 364 const offset_vector & res = e->get_data().m_value; 365 return query_result(res.begin(), res.end()); 366 } 367 }; 368 369 /** 370 When doing lookup using this index, the content of the reserve in sparse_table::m_data changes. 371 */ 372 class sparse_table::full_signature_key_indexer : public key_indexer { 373 const sparse_table & m_table; 374 375 /** 376 Permutation of key columns to make it into table facts. If empty, no permutation is necessary. 377 */ 378 unsigned_vector m_permutation; 379 mutable table_fact m_key_fact; 380 public: 381 can_handle(unsigned key_len,const unsigned * key_cols,const sparse_table & t)382 static bool can_handle(unsigned key_len, const unsigned * key_cols, const sparse_table & t) { 383 unsigned non_func_cols = t.get_signature().first_functional(); 384 if (key_len!=non_func_cols) { 385 return false; 386 } 387 counter ctr; 388 ctr.count(key_len, key_cols); 389 if (ctr.get_max_counter_value()!=1 || ctr.get_max_positive()!=non_func_cols-1) { 390 return false; 391 } 392 SASSERT(ctr.get_positive_count() == non_func_cols); 393 return true; 394 } 395 full_signature_key_indexer(unsigned key_len,const unsigned * key_cols,const sparse_table & t)396 full_signature_key_indexer(unsigned key_len, const unsigned * key_cols, const sparse_table & t) 397 : key_indexer(key_len, key_cols), 398 m_table(t) { 399 SASSERT(can_handle(key_len, key_cols, t)); 400 401 m_permutation.resize(key_len); 402 for (unsigned i=0; i<key_len; i++) { 403 //m_permutation[m_key_cols[i]] = i; 404 m_permutation[i] = m_key_cols[i]; 405 } 406 m_key_fact.resize(t.get_signature().size()); 407 } 408 ~full_signature_key_indexer()409 ~full_signature_key_indexer() override {} 410 get_matching_offsets(const key_value & key) const411 query_result get_matching_offsets(const key_value & key) const override { 412 unsigned key_len = m_key_cols.size(); 413 for (unsigned i=0; i<key_len; i++) { 414 m_key_fact[m_permutation[i]] = key[i]; 415 } 416 //We will change the content of the reserve; which does not change the 'high-level' 417 //content of the table. 418 sparse_table & t = const_cast<sparse_table&>(m_table); 419 t.write_into_reserve(m_key_fact.c_ptr()); 420 421 store_offset res; 422 if (!t.m_data.find_reserve_content(res)) { 423 return query_result(); 424 } 425 return query_result(res); 426 } 427 }; 428 sparse_table(sparse_table_plugin & p,const table_signature & sig,unsigned init_capacity)429 sparse_table::sparse_table(sparse_table_plugin & p, const table_signature & sig, unsigned init_capacity) 430 : table_base(p, sig), 431 m_column_layout(sig), 432 m_fact_size(m_column_layout.m_entry_size), 433 m_data(m_fact_size, m_column_layout.m_functional_part_size, init_capacity) {} 434 sparse_table(const sparse_table & t)435 sparse_table::sparse_table(const sparse_table & t) 436 : table_base(t.get_plugin(), t.get_signature()), 437 m_column_layout(t.m_column_layout), 438 m_fact_size(t.m_fact_size), 439 m_data(t.m_data) {} 440 clone() const441 table_base * sparse_table::clone() const { 442 return get_plugin().mk_clone(*this); 443 } 444 ~sparse_table()445 sparse_table::~sparse_table() { 446 reset_indexes(); 447 } 448 reset()449 void sparse_table::reset() { 450 reset_indexes(); 451 m_data.reset(); 452 } 453 begin() const454 table_base::iterator sparse_table::begin() const { 455 return mk_iterator(alloc(our_iterator_core, *this, false)); 456 } 457 end() const458 table_base::iterator sparse_table::end() const { 459 return mk_iterator(alloc(our_iterator_core, *this, true)); 460 } 461 get_key_indexer(unsigned key_len,const unsigned * key_cols) const462 sparse_table::key_indexer& sparse_table::get_key_indexer(unsigned key_len, 463 const unsigned * key_cols) const { 464 verbose_action _va("get_key_indexer"); 465 466 #if Z3DEBUG 467 //We allow indexes only on non-functional columns because we want to be able to modify them 468 //without having to worry about updating indexes. 469 //Maybe we might keep a list of indexes that contain functional columns and on an update reset 470 //only those. 471 SASSERT(key_len == 0 || 472 counter().count(key_len, key_cols).get_max_positive()<get_signature().first_functional()); 473 #endif 474 key_spec kspec; 475 kspec.append(key_len, key_cols); 476 key_index_map::entry * key_map_entry = m_key_indexes.insert_if_not_there3(kspec, nullptr); 477 if (!key_map_entry->get_data().m_value) { 478 if (full_signature_key_indexer::can_handle(key_len, key_cols, *this)) { 479 key_map_entry->get_data().m_value = alloc(full_signature_key_indexer, key_len, key_cols, *this); 480 } 481 else { 482 key_map_entry->get_data().m_value = alloc(general_key_indexer, key_len, key_cols); 483 } 484 } 485 key_indexer & indexer = *key_map_entry->get_data().m_value; 486 indexer.update(*this); 487 return indexer; 488 } 489 reset_indexes()490 void sparse_table::reset_indexes() { 491 key_index_map::iterator kmit = m_key_indexes.begin(); 492 key_index_map::iterator kmend = m_key_indexes.end(); 493 for (; kmit!=kmend; ++kmit) { 494 dealloc((*kmit).m_value); 495 } 496 m_key_indexes.reset(); 497 } 498 write_into_reserve(const table_element * f)499 void sparse_table::write_into_reserve(const table_element* f) { 500 TRACE("dl_table_relation", tout << "\n";); 501 m_data.ensure_reserve(); 502 char * reserve = m_data.get_reserve_ptr(); 503 unsigned col_cnt = m_column_layout.size(); 504 for (unsigned i = 0; i < col_cnt; ++i) { 505 SASSERT(f[i] < get_signature()[i]); //the value fits into the table signature 506 m_column_layout.set(reserve, i, f[i]); 507 } 508 } 509 add_fact(const char * data)510 bool sparse_table::add_fact(const char * data) { 511 verbose_action _va("add_fact", 10); 512 m_data.write_into_reserve(data); 513 return add_reserve_content(); 514 } 515 add_fact(const table_fact & f)516 void sparse_table::add_fact(const table_fact & f) { 517 write_into_reserve(f.c_ptr()); 518 add_reserve_content(); 519 } 520 add_reserve_content()521 bool sparse_table::add_reserve_content() { 522 return m_data.insert_reserve_content(); 523 } 524 contains_fact(const table_fact & f) const525 bool sparse_table::contains_fact(const table_fact & f) const { 526 verbose_action _va("contains_fact", 2); 527 sparse_table & t = const_cast<sparse_table &>(*this); 528 t.write_into_reserve(f.c_ptr()); 529 unsigned func_col_cnt = get_signature().functional_columns(); 530 if (func_col_cnt == 0) { 531 return t.m_data.reserve_content_already_present(); 532 } 533 else { 534 store_offset ofs; 535 if (!t.m_data.find_reserve_content(ofs)) { 536 return false; 537 } 538 unsigned sz = get_signature().size(); 539 for (unsigned i=func_col_cnt; i<sz; i++) { 540 if (t.get_cell(ofs, i)!=f[i]) { 541 return false; 542 } 543 } 544 return true; 545 } 546 } 547 fetch_fact(table_fact & f) const548 bool sparse_table::fetch_fact(table_fact & f) const { 549 verbose_action _va("fetch_fact", 2); 550 const table_signature & sig = get_signature(); 551 SASSERT(f.size() == sig.size()); 552 if (sig.functional_columns() == 0) { 553 return contains_fact(f); 554 } 555 else { 556 sparse_table & t = const_cast<sparse_table &>(*this); 557 t.write_into_reserve(f.c_ptr()); 558 store_offset ofs; 559 if (!t.m_data.find_reserve_content(ofs)) { 560 return false; 561 } 562 unsigned sz = sig.size(); 563 for (unsigned i=sig.first_functional(); i<sz; i++) { 564 f[i] = t.get_cell(ofs, i); 565 } 566 return true; 567 } 568 } 569 570 /** 571 In this function we modify the content of table functional columns without resetting indexes. 572 This is ok as long as we do not allow indexing on functional columns. 573 */ ensure_fact(const table_fact & f)574 void sparse_table::ensure_fact(const table_fact & f) { 575 verbose_action _va("ensure_fact", 2); 576 const table_signature & sig = get_signature(); 577 if (sig.functional_columns() == 0) { 578 add_fact(f); 579 } 580 else { 581 write_into_reserve(f.c_ptr()); 582 store_offset ofs; 583 if (!m_data.find_reserve_content(ofs)) { 584 add_fact(f); 585 return; 586 } 587 unsigned sz = sig.size(); 588 for (unsigned i=sig.first_functional(); i<sz; i++) { 589 set_cell(ofs, i, f[i]); 590 } 591 } 592 } 593 remove_fact(const table_element * f)594 void sparse_table::remove_fact(const table_element* f) { 595 verbose_action _va("remove_fact", 2); 596 //first insert the fact so that we find it's original location and remove it 597 write_into_reserve(f); 598 if (!m_data.remove_reserve_content()) { 599 //the fact was not in the table 600 return; 601 } 602 reset_indexes(); 603 } 604 copy_columns(const column_layout & src_layout,const column_layout & dest_layout,unsigned start_index,unsigned after_last,const char * src,char * dest,unsigned & dest_idx,unsigned & pre_projection_idx,const unsigned * & next_removed)605 void sparse_table::copy_columns(const column_layout & src_layout, const column_layout & dest_layout, 606 unsigned start_index, unsigned after_last, const char * src, char * dest, 607 unsigned & dest_idx, unsigned & pre_projection_idx, const unsigned * & next_removed) { 608 for (unsigned i=start_index; i<after_last; i++, pre_projection_idx++) { 609 if (*next_removed == pre_projection_idx) { 610 next_removed++; 611 continue; 612 } 613 SASSERT(*next_removed>pre_projection_idx); 614 dest_layout.set(dest, dest_idx++, src_layout.get(src, i)); 615 } 616 } 617 concatenate_rows(const column_layout & layout1,const column_layout & layout2,const column_layout & layout_res,const char * ptr1,const char * ptr2,char * res,const unsigned * removed_cols)618 void sparse_table::concatenate_rows(const column_layout & layout1, const column_layout & layout2, 619 const column_layout & layout_res, const char * ptr1, const char * ptr2, char * res, 620 const unsigned * removed_cols) { 621 unsigned t1non_func = layout1.size()-layout1.m_functional_col_cnt; 622 unsigned t2non_func = layout2.size()-layout2.m_functional_col_cnt; 623 unsigned t1cols = layout1.size(); 624 unsigned t2cols = layout2.size(); 625 unsigned orig_i = 0; 626 unsigned res_i = 0; 627 const unsigned * next_removed = removed_cols; 628 copy_columns(layout1, layout_res, 0, t1non_func, ptr1, res, res_i, orig_i, next_removed); 629 copy_columns(layout2, layout_res, 0, t2non_func, ptr2, res, res_i, orig_i, next_removed); 630 copy_columns(layout1, layout_res, t1non_func, t1cols, ptr1, res, res_i, orig_i, next_removed); 631 copy_columns(layout2, layout_res, t2non_func, t2cols, ptr2, res, res_i, orig_i, next_removed); 632 } 633 garbage_collect()634 void sparse_table::garbage_collect() { 635 if (memory::above_high_watermark()) { 636 get_plugin().garbage_collect(); 637 } 638 if (memory::above_high_watermark()) { 639 IF_VERBOSE(1, verbose_stream() << "Ran out of memory while filling table of size: " << get_size_estimate_rows() << " rows " << get_size_estimate_bytes() << " bytes\n";); 640 throw out_of_memory_error(); 641 } 642 } 643 self_agnostic_join_project(const sparse_table & t1,const sparse_table & t2,unsigned joined_col_cnt,const unsigned * t1_joined_cols,const unsigned * t2_joined_cols,const unsigned * removed_cols,bool tables_swapped,sparse_table & result)644 void sparse_table::self_agnostic_join_project(const sparse_table & t1, const sparse_table & t2, 645 unsigned joined_col_cnt, const unsigned * t1_joined_cols, const unsigned * t2_joined_cols, 646 const unsigned * removed_cols, bool tables_swapped, sparse_table & result) { 647 648 verbose_action _va("join_project", 1); 649 unsigned t1_entry_size = t1.m_fact_size; 650 unsigned t2_entry_size = t2.m_fact_size; 651 652 size_t t1idx = 0; 653 size_t t1end = t1.m_data.after_last_offset(); 654 655 TRACE("dl_table_relation", 656 tout << "joined_col_cnt: " << joined_col_cnt << "\n"; 657 tout << "t1_entry_size: " << t1_entry_size << "\n"; 658 tout << "t2_entry_size: " << t2_entry_size << "\n"; 659 t1.display(tout); 660 t2.display(tout); 661 tout << (&t1) << " " << (&t2) << " " << (&result) << "\n"; 662 ); 663 664 if (joined_col_cnt == 0) { 665 size_t t2idx = 0; 666 size_t t2end = t2.m_data.after_last_offset(); 667 668 for (; t1idx!=t1end; t1idx+=t1_entry_size) { 669 for (t2idx = 0; t2idx != t2end; t2idx += t2_entry_size) { 670 result.m_data.ensure_reserve(); 671 result.garbage_collect(); 672 char * res_reserve = result.m_data.get_reserve_ptr(); 673 char const* t1ptr = t1.get_at_offset(t1idx); 674 char const* t2ptr = t2.get_at_offset(t2idx); 675 if (tables_swapped) { 676 concatenate_rows(t2.m_column_layout, t1.m_column_layout, result.m_column_layout, 677 t2ptr, t1ptr, res_reserve, removed_cols); 678 } else { 679 concatenate_rows(t1.m_column_layout, t2.m_column_layout, result.m_column_layout, 680 t1ptr, t2ptr, res_reserve, removed_cols); 681 } 682 result.add_reserve_content(); 683 } 684 } 685 return; 686 } 687 688 key_value t1_key; 689 t1_key.resize(joined_col_cnt); 690 key_indexer& t2_indexer = t2.get_key_indexer(joined_col_cnt, t2_joined_cols); 691 692 bool key_modified = true; 693 key_indexer::query_result t2_offsets; 694 695 for (; t1idx != t1end; t1idx += t1_entry_size) { 696 for (unsigned i = 0; i < joined_col_cnt; i++) { 697 table_element val = t1.m_column_layout.get(t1.get_at_offset(t1idx), t1_joined_cols[i]); 698 TRACE("dl_table_relation", tout << "val: " << val << " " << t1idx << " " << t1_joined_cols[i] << "\n";); 699 if (t1_key[i] != val) { 700 t1_key[i] = val; 701 key_modified = true; 702 } 703 } 704 if (key_modified) { 705 t2_offsets = t2_indexer.get_matching_offsets(t1_key); 706 key_modified = false; 707 } 708 709 if (t2_offsets.empty()) { 710 continue; 711 } 712 713 key_indexer::offset_iterator t2ofs_it = t2_offsets.begin(); 714 key_indexer::offset_iterator t2ofs_end = t2_offsets.end(); 715 for (; t2ofs_it != t2ofs_end; ++t2ofs_it) { 716 store_offset t2ofs = *t2ofs_it; 717 result.m_data.ensure_reserve(); 718 result.garbage_collect(); 719 char * res_reserve = result.m_data.get_reserve_ptr(); 720 char const * t1ptr = t1.get_at_offset(t1idx); 721 char const * t2ptr = t2.get_at_offset(t2ofs); 722 if (tables_swapped) { 723 concatenate_rows(t2.m_column_layout, t1.m_column_layout, result.m_column_layout, 724 t2ptr, t1ptr, res_reserve, removed_cols); 725 } else { 726 concatenate_rows(t1.m_column_layout, t2.m_column_layout, result.m_column_layout, 727 t1ptr, t2ptr, res_reserve, removed_cols); 728 } 729 result.add_reserve_content(); 730 } 731 } 732 } 733 734 735 // ----------------------------------- 736 // 737 // sparse_table_plugin 738 // 739 // ----------------------------------- 740 sparse_table_plugin(relation_manager & manager)741 sparse_table_plugin::sparse_table_plugin(relation_manager & manager) 742 : table_plugin(symbol("sparse"), manager) {} 743 ~sparse_table_plugin()744 sparse_table_plugin::~sparse_table_plugin() { 745 reset(); 746 } 747 get(table_base const & t)748 sparse_table const& sparse_table_plugin::get(table_base const& t) { return dynamic_cast<sparse_table const&>(t); } get(table_base & t)749 sparse_table& sparse_table_plugin::get(table_base& t) { return dynamic_cast<sparse_table&>(t); } get(table_base const * t)750 sparse_table const* sparse_table_plugin::get(table_base const* t) { return dynamic_cast<sparse_table const*>(t); } get(table_base * t)751 sparse_table* sparse_table_plugin::get(table_base* t) { return dynamic_cast<sparse_table*>(t); } 752 753 reset()754 void sparse_table_plugin::reset() { 755 table_pool::iterator it = m_pool.begin(); 756 table_pool::iterator end = m_pool.end(); 757 for (; it!=end; ++it) { 758 sp_table_vector * vect = it->m_value; 759 sp_table_vector::iterator vit = vect->begin(); 760 sp_table_vector::iterator vend = vect->end(); 761 for (; vit!=vend; ++vit) { 762 (*vit)->destroy(); //calling deallocate() would only put the table back into the pool 763 } 764 dealloc(vect); 765 } 766 m_pool.reset(); 767 } 768 garbage_collect()769 void sparse_table_plugin::garbage_collect() { 770 IF_VERBOSE(2, verbose_stream() << "garbage collecting "<< memory::get_allocation_size() << " bytes down to ";); 771 reset(); 772 IF_VERBOSE(2, verbose_stream() << memory::get_allocation_size() << " bytes\n";); 773 } 774 recycle(sparse_table * t)775 void sparse_table_plugin::recycle(sparse_table * t) { 776 verbose_action _va("recycle", 2); 777 const table_signature & sig = t->get_signature(); 778 t->reset(); 779 780 sp_table_vector * & vect = m_pool.insert_if_not_there(sig, nullptr); 781 if (vect == nullptr) { 782 vect = alloc(sp_table_vector); 783 } 784 IF_VERBOSE(12, verbose_stream() << "Recycle: " << t->get_size_estimate_bytes() << "\n";); 785 786 vect->push_back(t); 787 } 788 mk_empty(const table_signature & s)789 table_base * sparse_table_plugin::mk_empty(const table_signature & s) { 790 SASSERT(can_handle_signature(s)); 791 792 sp_table_vector * vect; 793 if (!m_pool.find(s, vect) || vect->empty()) { 794 return alloc(sparse_table, *this, s); 795 } 796 sparse_table * res = vect->back(); 797 vect->pop_back(); 798 return res; 799 } 800 mk_clone(const sparse_table & t)801 sparse_table * sparse_table_plugin::mk_clone(const sparse_table & t) { 802 sparse_table * res = get(mk_empty(t.get_signature())); 803 res->m_data = t.m_data; 804 return res; 805 } 806 807 join_involves_functional(const table_signature & s1,const table_signature & s2,unsigned col_cnt,const unsigned * cols1,const unsigned * cols2)808 bool sparse_table_plugin::join_involves_functional(const table_signature & s1, const table_signature & s2, 809 unsigned col_cnt, const unsigned * cols1, const unsigned * cols2) { 810 if (col_cnt == 0) { 811 return false; 812 } 813 return counter().count(col_cnt, cols1).get_max_positive()>=s1.first_functional() 814 || counter().count(col_cnt, cols2).get_max_positive()>=s2.first_functional(); 815 } 816 817 818 class sparse_table_plugin::join_project_fn : public convenient_table_join_project_fn { 819 public: join_project_fn(const table_signature & t1_sig,const table_signature & t2_sig,unsigned col_cnt,const unsigned * cols1,const unsigned * cols2,unsigned removed_col_cnt,const unsigned * removed_cols)820 join_project_fn(const table_signature & t1_sig, const table_signature & t2_sig, unsigned col_cnt, 821 const unsigned * cols1, const unsigned * cols2, unsigned removed_col_cnt, 822 const unsigned * removed_cols) 823 : convenient_table_join_project_fn(t1_sig, t2_sig, col_cnt, cols1, cols2, 824 removed_col_cnt, removed_cols) { 825 m_removed_cols.push_back(UINT_MAX); 826 } 827 operator ()(const table_base & tb1,const table_base & tb2)828 table_base * operator()(const table_base & tb1, const table_base & tb2) override { 829 830 const sparse_table & t1 = get(tb1); 831 const sparse_table & t2 = get(tb2); 832 833 sparse_table_plugin & plugin = t1.get_plugin(); 834 835 sparse_table * res = get(plugin.mk_empty(get_result_signature())); 836 837 //If we join with some intersection, want to iterate over the smaller table and 838 //do indexing into the bigger one. If we simply do a product, we want the bigger 839 //one to be at the outer iteration (then the small one will hopefully fit into 840 //the cache) 841 if ( (t1.row_count() > t2.row_count()) == (!m_cols1.empty()) ) { 842 sparse_table::self_agnostic_join_project(t2, t1, m_cols1.size(), m_cols2.c_ptr(), 843 m_cols1.c_ptr(), m_removed_cols.c_ptr(), true, *res); 844 } 845 else { 846 sparse_table::self_agnostic_join_project(t1, t2, m_cols1.size(), m_cols1.c_ptr(), 847 m_cols2.c_ptr(), m_removed_cols.c_ptr(), false, *res); 848 } 849 TRACE("dl_table_relation", tb1.display(tout); tb2.display(tout); res->display(tout); ); 850 return res; 851 } 852 }; 853 mk_join_fn(const table_base & t1,const table_base & t2,unsigned col_cnt,const unsigned * cols1,const unsigned * cols2)854 table_join_fn * sparse_table_plugin::mk_join_fn(const table_base & t1, const table_base & t2, 855 unsigned col_cnt, const unsigned * cols1, const unsigned * cols2) { 856 const table_signature & sig1 = t1.get_signature(); 857 const table_signature & sig2 = t2.get_signature(); 858 if (t1.get_kind()!=get_kind() || t2.get_kind()!=get_kind() 859 || join_involves_functional(sig1, sig2, col_cnt, cols1, cols2)) { 860 //We also don't allow indexes on functional columns (and they are needed for joins) 861 return nullptr; 862 } 863 return mk_join_project_fn(t1, t2, col_cnt, cols1, cols2, 0, static_cast<unsigned*>(nullptr)); 864 } 865 mk_join_project_fn(const table_base & t1,const table_base & t2,unsigned col_cnt,const unsigned * cols1,const unsigned * cols2,unsigned removed_col_cnt,const unsigned * removed_cols)866 table_join_fn * sparse_table_plugin::mk_join_project_fn(const table_base & t1, const table_base & t2, 867 unsigned col_cnt, const unsigned * cols1, const unsigned * cols2, unsigned removed_col_cnt, 868 const unsigned * removed_cols) { 869 const table_signature & sig1 = t1.get_signature(); 870 const table_signature & sig2 = t2.get_signature(); 871 if (t1.get_kind()!=get_kind() || t2.get_kind()!=get_kind() 872 || removed_col_cnt == t1.get_signature().size()+t2.get_signature().size() 873 || join_involves_functional(sig1, sig2, col_cnt, cols1, cols2)) { 874 //We don't allow sparse tables with zero signatures (and project on all columns leads to such) 875 //We also don't allow indexes on functional columns. 876 return nullptr; 877 } 878 return alloc(join_project_fn, t1.get_signature(), t2.get_signature(), col_cnt, cols1, cols2, 879 removed_col_cnt, removed_cols); 880 } 881 882 class sparse_table_plugin::union_fn : public table_union_fn { 883 public: operator ()(table_base & tgt0,const table_base & src0,table_base * delta0)884 void operator()(table_base & tgt0, const table_base & src0, table_base * delta0) override { 885 verbose_action _va("union"); 886 sparse_table & tgt = get(tgt0); 887 const sparse_table & src = get(src0); 888 sparse_table * delta = get(delta0); 889 890 unsigned fact_size = tgt.m_fact_size; 891 const char* ptr = src.m_data.begin(); 892 const char* after_last=src.m_data.after_last(); 893 for (; ptr<after_last; ptr+=fact_size) { 894 if (tgt.add_fact(ptr) && delta) { 895 delta->add_fact(ptr); 896 } 897 } 898 } 899 }; 900 mk_union_fn(const table_base & tgt,const table_base & src,const table_base * delta)901 table_union_fn * sparse_table_plugin::mk_union_fn(const table_base & tgt, const table_base & src, 902 const table_base * delta) { 903 if (tgt.get_kind()!=get_kind() || src.get_kind()!=get_kind() 904 || (delta && delta->get_kind()!=get_kind()) 905 || tgt.get_signature()!=src.get_signature() 906 || (delta && delta->get_signature()!=tgt.get_signature())) { 907 return nullptr; 908 } 909 return alloc(union_fn); 910 } 911 912 class sparse_table_plugin::project_fn : public convenient_table_project_fn { 913 const unsigned m_inp_col_cnt; 914 const unsigned m_removed_col_cnt; 915 const unsigned m_result_col_cnt; 916 public: project_fn(const table_signature & orig_sig,unsigned removed_col_cnt,const unsigned * removed_cols)917 project_fn(const table_signature & orig_sig, unsigned removed_col_cnt, const unsigned * removed_cols) 918 : convenient_table_project_fn(orig_sig, removed_col_cnt, removed_cols), 919 m_inp_col_cnt(orig_sig.size()), 920 m_removed_col_cnt(removed_col_cnt), 921 m_result_col_cnt(orig_sig.size()-removed_col_cnt) { 922 SASSERT(removed_col_cnt>0); 923 } 924 transform_row(const char * src,char * tgt,const sparse_table::column_layout & src_layout,const sparse_table::column_layout & tgt_layout)925 virtual void transform_row(const char * src, char * tgt, 926 const sparse_table::column_layout & src_layout, 927 const sparse_table::column_layout & tgt_layout) { 928 unsigned r_idx=0; 929 unsigned tgt_i=0; 930 for (unsigned i=0; i<m_inp_col_cnt; i++) { 931 if (r_idx!=m_removed_col_cnt && i == m_removed_cols[r_idx]) { 932 SASSERT(r_idx<m_removed_col_cnt); 933 r_idx++; 934 continue; 935 } 936 tgt_layout.set(tgt, tgt_i, src_layout.get(src, i)); 937 tgt_i++; 938 } 939 SASSERT(tgt_i == m_result_col_cnt); 940 SASSERT(r_idx == m_removed_col_cnt); 941 } 942 operator ()(const table_base & tb)943 table_base * operator()(const table_base & tb) override { 944 verbose_action _va("project"); 945 const sparse_table & t = get(tb); 946 947 unsigned t_fact_size = t.m_fact_size; 948 949 sparse_table_plugin & plugin = t.get_plugin(); 950 sparse_table * res = get(plugin.mk_empty(get_result_signature())); 951 952 const sparse_table::column_layout & src_layout = t.m_column_layout; 953 const sparse_table::column_layout & tgt_layout = res->m_column_layout; 954 955 const char* t_ptr = t.m_data.begin(); 956 const char* t_end = t.m_data.after_last(); 957 for (; t_ptr!=t_end; t_ptr+=t_fact_size) { 958 SASSERT(t_ptr<t_end); 959 res->m_data.ensure_reserve(); 960 char * res_ptr = res->m_data.get_reserve_ptr(); 961 transform_row(t_ptr, res_ptr, src_layout, tgt_layout); 962 res->m_data.insert_reserve_content(); 963 } 964 return res; 965 } 966 }; 967 mk_project_fn(const table_base & t,unsigned col_cnt,const unsigned * removed_cols)968 table_transformer_fn * sparse_table_plugin::mk_project_fn(const table_base & t, unsigned col_cnt, 969 const unsigned * removed_cols) { 970 if (col_cnt == t.get_signature().size()) { 971 return nullptr; 972 } 973 return alloc(project_fn, t.get_signature(), col_cnt, removed_cols); 974 } 975 976 977 class sparse_table_plugin::select_equal_and_project_fn : public convenient_table_transformer_fn { 978 const unsigned m_col; 979 sparse_table::key_value m_key; 980 public: select_equal_and_project_fn(const table_signature & orig_sig,table_element val,unsigned col)981 select_equal_and_project_fn(const table_signature & orig_sig, table_element val, unsigned col) 982 : m_col(col) { 983 table_signature::from_project(orig_sig, 1, &col, get_result_signature()); 984 m_key.push_back(val); 985 } 986 operator ()(const table_base & tb)987 table_base * operator()(const table_base & tb) override { 988 verbose_action _va("select_equal_and_project"); 989 const sparse_table & t = get(tb); 990 991 sparse_table_plugin & plugin = t.get_plugin(); 992 sparse_table * res = get(plugin.mk_empty(get_result_signature())); 993 994 const sparse_table::column_layout & t_layout = t.m_column_layout; 995 const sparse_table::column_layout & res_layout = res->m_column_layout; 996 unsigned t_cols = t_layout.size(); 997 998 sparse_table::key_indexer & indexer = t.get_key_indexer(1, &m_col); 999 sparse_table::key_indexer::query_result t_offsets = indexer.get_matching_offsets(m_key); 1000 if (t_offsets.empty()) { 1001 //no matches 1002 return res; 1003 } 1004 sparse_table::key_indexer::offset_iterator ofs_it=t_offsets.begin(); 1005 sparse_table::key_indexer::offset_iterator ofs_end=t_offsets.end(); 1006 1007 for (; ofs_it!=ofs_end; ++ofs_it) { 1008 sparse_table::store_offset t_ofs = *ofs_it; 1009 const char * t_ptr = t.get_at_offset(t_ofs); 1010 1011 res->m_data.ensure_reserve(); 1012 char * res_reserve = res->m_data.get_reserve_ptr(); 1013 1014 unsigned res_i = 0; 1015 for (unsigned i=0; i<t_cols; i++) { 1016 if (i == m_col) { 1017 continue; 1018 } 1019 res_layout.set(res_reserve, res_i++, t_layout.get(t_ptr, i)); 1020 } 1021 res->add_reserve_content(); 1022 } 1023 return res; 1024 } 1025 }; 1026 mk_select_equal_and_project_fn(const table_base & t,const table_element & value,unsigned col)1027 table_transformer_fn * sparse_table_plugin::mk_select_equal_and_project_fn(const table_base & t, 1028 const table_element & value, unsigned col) { 1029 if (t.get_kind()!=get_kind() || t.get_signature().size() == 1 || col>=t.get_signature().first_functional()) { 1030 //We don't allow sparse tables with zero signatures (and project on a single 1031 //column table produces one). 1032 //We also don't allow indexes on functional columns. And our implementation of 1033 //select_equal_and_project uses index on \c col. 1034 return nullptr; 1035 } 1036 return alloc(select_equal_and_project_fn, t.get_signature(), value, col); 1037 } 1038 1039 1040 class sparse_table_plugin::rename_fn : public convenient_table_rename_fn { 1041 unsigned_vector m_out_of_cycle; 1042 public: rename_fn(const table_signature & orig_sig,unsigned permutation_cycle_len,const unsigned * permutation_cycle)1043 rename_fn(const table_signature & orig_sig, unsigned permutation_cycle_len, const unsigned * permutation_cycle) 1044 : convenient_table_rename_fn(orig_sig, permutation_cycle_len, permutation_cycle) { 1045 SASSERT(permutation_cycle_len>=2); 1046 idx_set cycle_cols; 1047 for (unsigned i=0; i < permutation_cycle_len; ++i) { 1048 cycle_cols.insert(permutation_cycle[i]); 1049 } 1050 for (unsigned i=0; i < orig_sig.size(); ++i) { 1051 if (!cycle_cols.contains(i)) { 1052 m_out_of_cycle.push_back(i); 1053 } 1054 } 1055 } 1056 transform_row(const char * src,char * tgt,const sparse_table::column_layout & src_layout,const sparse_table::column_layout & tgt_layout)1057 void transform_row(const char * src, char * tgt, 1058 const sparse_table::column_layout & src_layout, 1059 const sparse_table::column_layout & tgt_layout) { 1060 1061 for (unsigned i=1; i < m_cycle.size(); ++i) { 1062 tgt_layout.set(tgt, m_cycle[i-1], src_layout.get(src, m_cycle[i])); 1063 } 1064 tgt_layout.set(tgt, m_cycle[m_cycle.size()-1], src_layout.get(src, m_cycle[0])); 1065 1066 unsigned_vector::const_iterator it = m_out_of_cycle.begin(); 1067 unsigned_vector::const_iterator end = m_out_of_cycle.end(); 1068 for (; it!=end; ++it) { 1069 unsigned col = *it; 1070 tgt_layout.set(tgt, col, src_layout.get(src, col)); 1071 } 1072 } 1073 operator ()(const table_base & tb)1074 table_base * operator()(const table_base & tb) override { 1075 verbose_action _va("rename"); 1076 1077 const sparse_table & t = get(tb); 1078 1079 unsigned t_fact_size = t.m_fact_size; 1080 1081 sparse_table_plugin & plugin = t.get_plugin(); 1082 sparse_table * res = get(plugin.mk_empty(get_result_signature())); 1083 1084 size_t res_fact_size = res->m_fact_size; 1085 size_t res_data_size = res_fact_size*t.row_count(); 1086 if (res_fact_size != 0 && (res_data_size / res_fact_size) != t.row_count()) { 1087 throw default_exception("multiplication overflow"); 1088 } 1089 1090 res->m_data.resize_data(res_data_size); 1091 1092 //here we can separate data creating and insertion into hashmap, since we know 1093 //that no row will become duplicate 1094 1095 //create the data 1096 const char* t_ptr = t.m_data.begin(); 1097 char* res_ptr = res->m_data.begin(); 1098 char* res_end = res_ptr+res_data_size; 1099 for (; res_ptr!=res_end; t_ptr+=t_fact_size, res_ptr+=res_fact_size) { 1100 transform_row(t_ptr, res_ptr, t.m_column_layout, res->m_column_layout); 1101 } 1102 1103 //and insert them into the hash-map 1104 for (size_t i = 0; i != res_data_size; i += res_fact_size) { 1105 TRUSTME(res->m_data.insert_offset(i)); 1106 } 1107 1108 return res; 1109 } 1110 }; 1111 mk_rename_fn(const table_base & t,unsigned permutation_cycle_len,const unsigned * permutation_cycle)1112 table_transformer_fn * sparse_table_plugin::mk_rename_fn(const table_base & t, unsigned permutation_cycle_len, 1113 const unsigned * permutation_cycle) { 1114 if (t.get_kind()!=get_kind()) { 1115 return nullptr; 1116 } 1117 return alloc(rename_fn, t.get_signature(), permutation_cycle_len, permutation_cycle); 1118 } 1119 1120 class sparse_table_plugin::negation_filter_fn : public convenient_table_negation_filter_fn { 1121 typedef sparse_table::store_offset store_offset; 1122 typedef sparse_table::key_value key_value; 1123 typedef sparse_table::key_indexer key_indexer; 1124 1125 bool m_joining_neg_non_functional; 1126 1127 /** 1128 Used by \c collect_intersection_offsets function. 1129 If tgt_is_first is false, contains the same items as \c res. 1130 */ 1131 idx_set m_intersection_content; 1132 1133 1134 public: negation_filter_fn(const table_base & tgt,const table_base & neg,unsigned joined_col_cnt,const unsigned * t_cols,const unsigned * negated_cols)1135 negation_filter_fn(const table_base & tgt, const table_base & neg, 1136 unsigned joined_col_cnt, const unsigned * t_cols, const unsigned * negated_cols) 1137 : convenient_table_negation_filter_fn(tgt, neg, joined_col_cnt, t_cols, negated_cols) { 1138 unsigned neg_first_func = neg.get_signature().first_functional(); 1139 counter ctr; 1140 ctr.count(m_cols2); 1141 m_joining_neg_non_functional = ctr.get_max_counter_value() == 1 1142 && ctr.get_positive_count() == neg_first_func 1143 && (neg_first_func == 0 || ctr.get_max_positive() == neg_first_func-1); 1144 } 1145 1146 /** 1147 Collect offsets of rows in \c t1 or \c t2 (depends on whether \c tgt_is_first is true or false) 1148 that have a match in the other table into \c res. Offsets in \c res are in ascending order. 1149 */ collect_intersection_offsets(const sparse_table & t1,const sparse_table & t2,bool tgt_is_first,svector<store_offset> & res)1150 void collect_intersection_offsets(const sparse_table & t1, const sparse_table & t2, 1151 bool tgt_is_first, svector<store_offset> & res) { 1152 SASSERT(res.empty()); 1153 1154 m_intersection_content.reset(); 1155 1156 unsigned joined_col_cnt = m_cols1.size(); 1157 unsigned t1_entry_size = t1.m_data.entry_size(); 1158 1159 const unsigned * cols1 = tgt_is_first ? m_cols1.c_ptr() : m_cols2.c_ptr(); 1160 const unsigned * cols2 = tgt_is_first ? m_cols2.c_ptr() : m_cols1.c_ptr(); 1161 1162 key_value t1_key; 1163 t1_key.resize(joined_col_cnt); 1164 key_indexer & t2_indexer = t2.get_key_indexer(joined_col_cnt, cols2); 1165 1166 bool key_modified=true; 1167 key_indexer::query_result t2_offsets; 1168 store_offset t1_after_last = t1.m_data.after_last_offset(); 1169 for (store_offset t1_ofs=0; t1_ofs<t1_after_last; t1_ofs+=t1_entry_size) { 1170 1171 for (unsigned i=0; i<joined_col_cnt; i++) { 1172 table_element val = t1.get_cell(t1_ofs, cols1[i]); 1173 if (t1_key[i]!=val) { 1174 t1_key[i]=val; 1175 key_modified=true; 1176 } 1177 } 1178 if (key_modified) { 1179 t2_offsets = t2_indexer.get_matching_offsets(t1_key); 1180 key_modified = false; 1181 } 1182 1183 if (t2_offsets.empty()) { 1184 continue; 1185 } 1186 if (tgt_is_first) { 1187 res.push_back(t1_ofs); 1188 } 1189 else { 1190 key_indexer::offset_iterator it = t2_offsets.begin(); 1191 key_indexer::offset_iterator end = t2_offsets.end(); 1192 for (; it!=end; ++it) { 1193 store_offset ofs = *it; 1194 unsigned offs2 = static_cast<unsigned>(ofs); 1195 if (ofs != offs2) { 1196 throw default_exception("Z3 cannot perform negation with excessively large tables"); 1197 } 1198 if (!m_intersection_content.contains(offs2)) { 1199 m_intersection_content.insert(offs2); 1200 res.push_back(ofs); 1201 } 1202 } 1203 } 1204 } 1205 1206 if (!tgt_is_first) { 1207 //in this case \c res now may be in arbitrary order 1208 std::sort(res.begin(), res.end()); 1209 } 1210 } 1211 operator ()(table_base & tgt0,const table_base & neg0)1212 void operator()(table_base & tgt0, const table_base & neg0) override { 1213 sparse_table & tgt = get(tgt0); 1214 const sparse_table & neg = get(neg0); 1215 1216 verbose_action _va("filter_by_negation"); 1217 1218 if (m_cols1.empty()) { 1219 if (!neg.empty()) { 1220 tgt.reset(); 1221 } 1222 return; 1223 } 1224 1225 svector<store_offset> to_remove; //offsets here are in increasing order 1226 1227 //We don't do just the simple tgt.row_count()>neg.row_count() because the swapped case is 1228 //more expensive. The constant 4 is, however, just my guess what the ratio might be. 1229 if (tgt.row_count()/4>neg.row_count()) { 1230 collect_intersection_offsets(neg, tgt, false, to_remove); 1231 } 1232 else { 1233 collect_intersection_offsets(tgt, neg, true, to_remove); 1234 } 1235 1236 1237 //the largest offsets are at the end, so we can remove them one by one 1238 while (!to_remove.empty()) { 1239 store_offset removed_ofs = to_remove.back(); 1240 to_remove.pop_back(); 1241 tgt.m_data.remove_offset(removed_ofs); 1242 } 1243 tgt.reset_indexes(); 1244 } 1245 1246 }; 1247 mk_filter_by_negation_fn(const table_base & t,const table_base & negated_obj,unsigned joined_col_cnt,const unsigned * t_cols,const unsigned * negated_cols)1248 table_intersection_filter_fn * sparse_table_plugin::mk_filter_by_negation_fn(const table_base & t, 1249 const table_base & negated_obj, unsigned joined_col_cnt, 1250 const unsigned * t_cols, const unsigned * negated_cols) { 1251 if (!check_kind(t) || !check_kind(negated_obj) 1252 || join_involves_functional(t.get_signature(), negated_obj.get_signature(), joined_col_cnt, 1253 t_cols, negated_cols) ) { 1254 return nullptr; 1255 } 1256 return alloc(negation_filter_fn, t, negated_obj, joined_col_cnt, t_cols, negated_cols); 1257 } 1258 1259 /** 1260 T \ (S1 Join S2) 1261 1262 t_cols - columns from T 1263 s_cols - columns from (S1 Join S2) that are equated 1264 src1_cols - columns from S1 equated with columns from S2 1265 src2_cols - columns from S2 equated with columns from S1 1266 1267 t1_cols - columns from T that map into S1 1268 s1_cols - matching columns from s_cols for t1_cols 1269 t2s1_cols - columns from T that map into S2, and columns from src1 that join src2 1270 s2_cols - matching columns from t2s1_cols 1271 1272 columns from s2 that are equal to a column from s1 that is in s_cols: 1273 1274 - ... 1275 1276 */ 1277 1278 class sparse_table_plugin::negated_join_fn : public table_intersection_join_filter_fn { 1279 typedef sparse_table::store_offset store_offset; 1280 typedef sparse_table::key_value key_value; 1281 typedef sparse_table::key_indexer key_indexer; 1282 unsigned_vector m_t1_cols; 1283 unsigned_vector m_s1_cols; 1284 unsigned_vector m_t2_cols; 1285 unsigned_vector m_s2_cols; 1286 unsigned_vector m_src1_cols; 1287 public: negated_join_fn(table_base const & src1,unsigned_vector const & t_cols,unsigned_vector const & src_cols,unsigned_vector const & src1_cols,unsigned_vector const & src2_cols)1288 negated_join_fn( 1289 table_base const& src1, 1290 unsigned_vector const& t_cols, 1291 unsigned_vector const& src_cols, 1292 unsigned_vector const& src1_cols, 1293 unsigned_vector const& src2_cols): 1294 m_src1_cols(src1_cols) { 1295 1296 // split t_cols and src_cols according to src1, and src2 1297 1298 unsigned src1_size = src1.get_signature().size(); 1299 for (unsigned i = 0; i < t_cols.size(); ++i) { 1300 if (src_cols[i] < src1_size) { 1301 m_t1_cols.push_back(t_cols[i]); 1302 m_s1_cols.push_back(src_cols[i]); 1303 } 1304 else { 1305 m_t2_cols.push_back(t_cols[i]); 1306 m_s2_cols.push_back(src_cols[i]); 1307 } 1308 } 1309 m_s2_cols.append(src2_cols); 1310 } 1311 operator ()(table_base & _t,const table_base & _s1,const table_base & _s2)1312 void operator()(table_base & _t, const table_base & _s1, const table_base& _s2) override { 1313 1314 verbose_action _va("negated_join"); 1315 sparse_table& t = get(_t); 1316 svector<store_offset> to_remove; 1317 collect_to_remove(t, get(_s1), get(_s2), to_remove); 1318 for (unsigned i = 0; i < to_remove.size(); ++i) { 1319 t.m_data.remove_offset(to_remove[i]); 1320 } 1321 t.reset_indexes(); 1322 } 1323 1324 private: collect_to_remove(sparse_table & t,sparse_table const & s1,sparse_table const & s2,svector<store_offset> & to_remove)1325 void collect_to_remove(sparse_table& t, sparse_table const& s1, sparse_table const& s2, svector<store_offset>& to_remove) { 1326 key_value s1_key, s2_key; 1327 SASSERT(&s1 != &s2); 1328 SASSERT(m_s1_cols.size() == m_t1_cols.size()); 1329 SASSERT(m_s2_cols.size() == m_t2_cols.size() + m_src1_cols.size()); 1330 s1_key.resize(m_s1_cols.size()); 1331 s2_key.resize(m_s2_cols.size()); 1332 key_indexer & s1_indexer = s1.get_key_indexer(m_s1_cols.size(), m_s1_cols.c_ptr()); 1333 key_indexer & s2_indexer = s2.get_key_indexer(m_s2_cols.size(), m_s2_cols.c_ptr()); 1334 1335 store_offset t_after_last = t.m_data.after_last_offset(); 1336 key_indexer::query_result s1_offsets, s2_offsets; 1337 unsigned t_entry_size = t.m_data.entry_size(); 1338 for (store_offset t_ofs = 0; t_ofs < t_after_last; t_ofs += t_entry_size) { 1339 1340 if (update_key(s1_key, 0, t, t_ofs, m_t1_cols)) { 1341 s1_offsets = s1_indexer.get_matching_offsets(s1_key); 1342 } 1343 key_indexer::offset_iterator it = s1_offsets.begin(); 1344 key_indexer::offset_iterator end = s1_offsets.end(); 1345 for (; it != end; ++it) { 1346 store_offset s1_ofs = *it; 1347 bool upd1 = update_key(s2_key, 0, t, t_ofs, m_t2_cols); 1348 bool upd2 = update_key(s2_key, m_t2_cols.size(), s1, s1_ofs, m_src1_cols); 1349 if (upd1 || upd2) { 1350 s2_offsets = s2_indexer.get_matching_offsets(s2_key); 1351 } 1352 if (!s2_offsets.empty()) { 1353 to_remove.push_back(t_ofs); 1354 break; 1355 } 1356 } 1357 } 1358 } 1359 update_key(key_value & key,unsigned key_offset,sparse_table const & t,store_offset ofs,unsigned_vector const & cols)1360 inline bool update_key(key_value& key, unsigned key_offset, sparse_table const& t, store_offset ofs, unsigned_vector const& cols) { 1361 bool modified = false; 1362 unsigned sz = cols.size(); 1363 for (unsigned i = 0; i < sz; ++i) { 1364 table_element val = t.get_cell(ofs, cols[i]); 1365 modified = update_key(key[i+key_offset], val) || modified; 1366 } 1367 return modified; 1368 } 1369 update_key(table_element & tgt,table_element src)1370 inline bool update_key(table_element& tgt, table_element src) { 1371 if (tgt == src) { 1372 return false; 1373 } 1374 else { 1375 tgt = src; 1376 return true; 1377 } 1378 } 1379 1380 }; 1381 mk_filter_by_negated_join_fn(const table_base & t,const table_base & src1,const table_base & src2,unsigned_vector const & t_cols,unsigned_vector const & src_cols,unsigned_vector const & src1_cols,unsigned_vector const & src2_cols)1382 table_intersection_join_filter_fn* sparse_table_plugin::mk_filter_by_negated_join_fn( 1383 const table_base & t, 1384 1385 1386 const table_base & src1, 1387 const table_base & src2, 1388 unsigned_vector const& t_cols, 1389 unsigned_vector const& src_cols, 1390 unsigned_vector const& src1_cols, 1391 unsigned_vector const& src2_cols) { 1392 if (check_kind(t) && check_kind(src1) && check_kind(src2)) { 1393 return alloc(negated_join_fn, src1, t_cols, src_cols, src1_cols, src2_cols); 1394 } 1395 else { 1396 return nullptr; 1397 } 1398 } 1399 1400 get_size_estimate_bytes() const1401 unsigned sparse_table::get_size_estimate_bytes() const { 1402 unsigned sz = 0; 1403 sz += m_data.get_size_estimate_bytes(); 1404 sz += m_key_indexes.capacity()*8; // TBD 1405 return sz; 1406 } 1407 1408 1409 }; 1410 1411