1 /*++ 2 Copyright (c) 2006 Microsoft Corporation 3 4 Module Name: 5 6 dl_table.h 7 8 Abstract: 9 10 <abstract> 11 12 Author: 13 14 Krystof Hoder (t-khoder) 2010-09-01. 15 16 Revision History: 17 18 --*/ 19 20 #pragma once 21 22 #include<iostream> 23 #include<list> 24 #include<utility> 25 26 #include "ast/ast.h" 27 #include "util/bit_vector.h" 28 #include "util/buffer.h" 29 #include "util/hashtable.h" 30 #include "util/map.h" 31 #include "util/ref_vector.h" 32 #include "util/vector.h" 33 34 #include "muz/rel/dl_base.h" 35 36 37 namespace datalog { 38 class sparse_table; 39 40 class sparse_table_plugin : public table_plugin { 41 friend class sparse_table; 42 protected: 43 class join_project_fn; 44 class union_fn; 45 class transformer_fn; 46 class rename_fn; 47 class project_fn; 48 class negation_filter_fn; 49 class select_equal_and_project_fn; 50 class negated_join_fn; 51 52 typedef ptr_vector<sparse_table> sp_table_vector; 53 typedef map<table_signature, sp_table_vector *, 54 table_signature::hash, table_signature::eq > table_pool; 55 56 table_pool m_pool; 57 58 void recycle(sparse_table * t); 59 60 void garbage_collect(); 61 62 void reset(); 63 64 static bool join_involves_functional(const table_signature & s1, const table_signature & s2, 65 unsigned col_cnt, const unsigned * cols1, const unsigned * cols2); 66 67 public: 68 typedef sparse_table table; 69 70 sparse_table_plugin(relation_manager & manager); 71 ~sparse_table_plugin() override; 72 can_handle_signature(const table_signature & s)73 bool can_handle_signature(const table_signature & s) override 74 { return !s.empty(); } 75 76 table_base * mk_empty(const table_signature & s) override; 77 sparse_table * mk_clone(const sparse_table & t); 78 79 protected: 80 table_join_fn * mk_join_fn(const table_base & t1, const table_base & t2, 81 unsigned col_cnt, const unsigned * cols1, const unsigned * cols2) override; 82 table_join_fn * mk_join_project_fn(const table_base & t1, const table_base & t2, 83 unsigned col_cnt, const unsigned * cols1, const unsigned * cols2, unsigned removed_col_cnt, 84 const unsigned * removed_cols) override; 85 table_union_fn * mk_union_fn(const table_base & tgt, const table_base & src, 86 const table_base * delta) override; 87 table_transformer_fn * mk_project_fn(const table_base & t, unsigned col_cnt, 88 const unsigned * removed_cols) override; 89 table_transformer_fn * mk_rename_fn(const table_base & t, unsigned permutation_cycle_len, 90 const unsigned * permutation_cycle) override; 91 table_transformer_fn * mk_select_equal_and_project_fn(const table_base & t, 92 const table_element & value, unsigned col) override; 93 table_intersection_filter_fn * mk_filter_by_negation_fn(const table_base & t, 94 const table_base & negated_obj, unsigned joined_col_cnt, 95 const unsigned * t_cols, const unsigned * negated_cols) override; 96 table_intersection_join_filter_fn* mk_filter_by_negated_join_fn( 97 const table_base & t, 98 const table_base & src1, 99 const table_base & src2, 100 unsigned_vector const& t_cols, 101 unsigned_vector const& src_cols, 102 unsigned_vector const& src1_cols, 103 unsigned_vector const& src2_cols) override; 104 105 static sparse_table const& get(table_base const&); 106 static sparse_table& get(table_base&); 107 static sparse_table const* get(table_base const*); 108 static sparse_table* get(table_base*); 109 110 }; 111 112 class entry_storage { 113 public: 114 typedef size_t store_offset; 115 private: 116 typedef svector<char, size_t> storage; 117 118 class offset_hash_proc { 119 storage & m_storage; 120 unsigned m_unique_entry_size; 121 public: offset_hash_proc(storage & s,unsigned unique_entry_sz)122 offset_hash_proc(storage & s, unsigned unique_entry_sz) 123 : m_storage(s), m_unique_entry_size(unique_entry_sz) {} operator()124 unsigned operator()(store_offset ofs) const { 125 return string_hash(m_storage.c_ptr()+ofs, m_unique_entry_size, 0); 126 } 127 }; 128 129 class offset_eq_proc { 130 storage & m_storage; 131 unsigned m_unique_entry_size; 132 public: offset_eq_proc(storage & s,unsigned unique_entry_sz)133 offset_eq_proc(storage & s, unsigned unique_entry_sz) 134 : m_storage(s), m_unique_entry_size(unique_entry_sz) {} operator()135 bool operator()(store_offset o1, store_offset o2) const { 136 const char * base = m_storage.c_ptr(); 137 return memcmp(base+o1, base+o2, m_unique_entry_size)==0; 138 } 139 }; 140 141 typedef hashtable<store_offset, offset_hash_proc, offset_eq_proc> storage_indexer; 142 143 static const store_offset NO_RESERVE = UINT_MAX; 144 145 unsigned m_entry_size; 146 unsigned m_unique_part_size; 147 size_t m_data_size; 148 /** 149 Invariant: Every or all but one blocks of length \c m_entry_size in the \c m_data vector 150 are unique sequences of bytes and have their offset stored in the \c m_data_indexer hashtable. 151 If the offset of the last block is not stored in the hashtable, it is stored in the \c m_reserve 152 variable. Otherwise \c m_reserve==NO_RESERVE. 153 154 The size of m_data is actually 8 bytes larger than stated in m_data_size, so that we may 155 deref an uint64_t pointer at the end of the array. 156 */ 157 storage m_data; 158 storage_indexer m_data_indexer; 159 store_offset m_reserve; 160 public: 161 entry_storage(unsigned entry_size, unsigned functional_size = 0, unsigned init_size = 0) m_entry_size(entry_size)162 : m_entry_size(entry_size), 163 m_unique_part_size(entry_size-functional_size), 164 m_data_indexer(next_power_of_two(std::max(8u,init_size)), 165 offset_hash_proc(m_data, m_unique_part_size), offset_eq_proc(m_data, m_unique_part_size)), 166 m_reserve(NO_RESERVE) { 167 SASSERT(entry_size>0); 168 SASSERT(functional_size<=entry_size); 169 resize_data(init_size); 170 resize_data(0); 171 } entry_storage(const entry_storage & s)172 entry_storage(const entry_storage &s) 173 : m_entry_size(s.m_entry_size), 174 m_unique_part_size(s.m_unique_part_size), 175 m_data_size(s.m_data_size), 176 m_data(s.m_data), 177 m_data_indexer(next_power_of_two(std::max(8u,s.entry_count())), 178 offset_hash_proc(m_data, m_unique_part_size), offset_eq_proc(m_data, m_unique_part_size)), 179 m_reserve(s.m_reserve) { 180 store_offset after_last=after_last_offset(); 181 for(store_offset i=0; i<after_last; i+=m_entry_size) { 182 m_data_indexer.insert(i); 183 } 184 } 185 186 entry_storage & operator=(const entry_storage & o) { 187 m_data_indexer.reset(); 188 m_entry_size = o.m_entry_size; 189 m_unique_part_size = o.m_unique_part_size; 190 m_data_size = o.m_data_size; 191 m_data = o.m_data; 192 m_reserve = o.m_reserve; 193 store_offset after_last=after_last_offset(); 194 for(store_offset i=0; i<after_last; i+=m_entry_size) { 195 m_data_indexer.insert(i); 196 } 197 return *this; 198 } 199 reset()200 void reset() { 201 resize_data(0); 202 m_data_indexer.reset(); 203 m_reserve = NO_RESERVE; 204 } 205 entry_size()206 unsigned entry_size() const { return m_entry_size; } 207 unsigned get_size_estimate_bytes() const; get(store_offset ofs)208 char * get(store_offset ofs) { return m_data.begin()+ofs; } get(store_offset ofs)209 const char * get(store_offset ofs) const 210 { return const_cast<entry_storage *>(this)->get(ofs); } 211 entry_count()212 unsigned entry_count() const { return m_data_indexer.size(); } 213 after_last_offset()214 store_offset after_last_offset() const { 215 return (m_reserve==NO_RESERVE) ? m_data_size : m_reserve; 216 } 217 begin()218 char * begin() { return get(0); } begin()219 const char * begin() const { return get(0); } after_last()220 const char * after_last() const { return get(after_last_offset()); } 221 222 has_reserve()223 bool has_reserve() const { return m_reserve!=NO_RESERVE; } reserve()224 store_offset reserve() const { SASSERT(has_reserve()); return m_reserve; } 225 ensure_reserve()226 void ensure_reserve() { 227 if(has_reserve()) { 228 SASSERT(m_reserve==m_data_size-m_entry_size); 229 return; 230 } 231 m_reserve = m_data_size; 232 resize_data(m_data_size+m_entry_size); 233 } 234 235 /** 236 \brief Return pointer to the reserve. 237 238 The reserve must exist when the function is called. 239 */ get_reserve_ptr()240 char * get_reserve_ptr() { 241 SASSERT(has_reserve()); 242 return &m_data.get(reserve()); 243 } 244 reserve_content_already_present()245 bool reserve_content_already_present() const { 246 SASSERT(has_reserve()); 247 return m_data_indexer.contains(reserve()); 248 } 249 find_reserve_content(store_offset & result)250 bool find_reserve_content(store_offset & result) const { 251 SASSERT(has_reserve()); 252 storage_indexer::entry * indexer_entry = m_data_indexer.find_core(reserve()); 253 if(!indexer_entry) { 254 return false; 255 } 256 result = indexer_entry->get_data(); 257 return true; 258 } 259 260 /** 261 \brief Write fact \c f into the reserve at the end of the \c m_data storage. 262 263 If the reserve does not exist, this function creates it. 264 */ write_into_reserve(const char * data)265 void write_into_reserve(const char * data) { 266 ensure_reserve(); 267 memcpy(get_reserve_ptr(), data, m_entry_size); 268 } 269 270 /** 271 \brief If the fact in reserve is not in the table, insert it there and return true; 272 otherwise return false. 273 274 When a fact is inserted into the table, the reserve becomes part of the table and 275 is no longer a reserve. 276 */ 277 bool insert_reserve_content(); 278 store_offset insert_or_get_reserve_content(); 279 bool remove_reserve_content(); 280 /** 281 Remove data at the offset \c ofs. 282 283 Data with offset lower than \c ofs are not be modified by this function, data with 284 higher offset may be moved. 285 */ 286 void remove_offset(store_offset ofs); 287 288 289 //the following two operations allow breaking of the object invariant! resize_data(size_t sz)290 void resize_data(size_t sz) { 291 m_data_size = sz; 292 if (sz + sizeof(uint64_t) < sz) { 293 throw default_exception("overflow resizing data section for sparse table"); 294 } 295 m_data.resize(sz + sizeof(uint64_t)); 296 } 297 insert_offset(store_offset ofs)298 bool insert_offset(store_offset ofs) { 299 return m_data_indexer.insert_if_not_there(ofs)==ofs; 300 } 301 }; 302 303 class sparse_table : public table_base { 304 friend class sparse_table_plugin; 305 friend class sparse_table_plugin::join_project_fn; 306 friend class sparse_table_plugin::union_fn; 307 friend class sparse_table_plugin::transformer_fn; 308 friend class sparse_table_plugin::rename_fn; 309 friend class sparse_table_plugin::project_fn; 310 friend class sparse_table_plugin::negation_filter_fn; 311 friend class sparse_table_plugin::select_equal_and_project_fn; 312 313 class our_iterator_core; 314 class key_indexer; 315 class general_key_indexer; 316 class full_signature_key_indexer; 317 typedef entry_storage::store_offset store_offset; 318 319 320 class column_info { 321 unsigned m_big_offset; 322 unsigned m_small_offset; 323 uint64_t m_mask; 324 uint64_t m_write_mask; 325 public: 326 unsigned m_offset; //!< in bits 327 unsigned m_length; //!< in bits 328 column_info(unsigned offset,unsigned length)329 column_info(unsigned offset, unsigned length) 330 : m_big_offset(offset / 8), 331 m_small_offset(offset % 8), 332 m_mask( length == 64 ? ULLONG_MAX : (static_cast<uint64_t>(1)<<length)-1 ), 333 m_write_mask( ~(m_mask << m_small_offset) ), 334 m_offset(offset), 335 m_length(length) { 336 SASSERT(length <= 64); 337 SASSERT(length + m_small_offset <= 64); 338 } get(const char * rec)339 table_element get(const char * rec) const { 340 341 uint64_t res; 342 memcpy(&res, rec + m_big_offset, sizeof(res)); 343 res >>= m_small_offset; 344 res &= m_mask; 345 return res; 346 } set(char * rec,table_element val)347 void set(char * rec, table_element val) const { 348 SASSERT( (val&~m_mask)==0 ); //the value fits into the column 349 uint64_t cell; 350 memcpy(&cell, rec + m_big_offset, sizeof(cell)); 351 cell &= m_write_mask; 352 cell |= val << m_small_offset; 353 memcpy(rec + m_big_offset, &cell, sizeof(cell)); 354 } next_ofs()355 unsigned next_ofs() const { return m_offset+m_length; } 356 }; 357 class column_layout : public svector<column_info> { 358 359 void make_byte_aligned_end(unsigned col_index); 360 public: 361 362 unsigned m_entry_size; 363 /** 364 Number of last bytes which correspond to functional columns in the signature. 365 */ 366 unsigned m_functional_part_size; 367 unsigned m_functional_col_cnt; 368 369 column_layout(const table_signature & sig); 370 get(const char * rec,unsigned col)371 table_element get(const char * rec, unsigned col) const { 372 return (*this)[col].get(rec); 373 } set(char * rec,unsigned col,table_element val)374 void set(char * rec, unsigned col, table_element val) const { 375 return (*this)[col].set(rec, val); 376 } 377 }; 378 379 380 typedef svector<unsigned> key_spec; //sequence of columns in a key 381 typedef svector<table_element> key_value; //values of key columns 382 typedef map<key_spec, key_indexer*, svector_hash_proc<unsigned_hash>, 383 vector_eq_proc<key_spec> > key_index_map; 384 385 static const store_offset NO_RESERVE = UINT_MAX; 386 387 column_layout m_column_layout; 388 unsigned m_fact_size; 389 entry_storage m_data; 390 mutable key_index_map m_key_indexes; 391 392 get_at_offset(store_offset i)393 const char * get_at_offset(store_offset i) const { 394 return m_data.get(i); 395 } 396 get_cell(store_offset ofs,unsigned column)397 table_element get_cell(store_offset ofs, unsigned column) const { 398 return m_column_layout.get(m_data.get(ofs), column); 399 } 400 set_cell(store_offset ofs,unsigned column,table_element val)401 void set_cell(store_offset ofs, unsigned column, table_element val) { 402 m_column_layout.set(m_data.get(ofs), column, val); 403 } 404 405 void write_into_reserve(const table_element* f); 406 407 /** 408 \brief Return reference to an indexer over columns in \c key_cols. 409 410 An indexer can retrieve a sequence of offsets that with \c key_cols columns equal to 411 the specified key. Indexers are populated lazily -- they remember the position of the 412 last fact they contain, and when an indexer is retrieved by the \c get_key_indexer function, 413 all the new facts are added into the indexer. 414 415 When a fact is removed from the table, all indexers are destroyed. This is not an extra 416 expense in the current use scenario, because we first perform all fact removals and do the 417 joins only after that (joins are the only operations that lead to index construction). 418 */ 419 key_indexer& get_key_indexer(unsigned key_len, const unsigned * key_cols) const; 420 421 void reset_indexes(); 422 423 static void copy_columns(const column_layout & src_layout, const column_layout & dest_layout, 424 unsigned start_index, unsigned after_last, const char * src, char * dest, 425 unsigned & dest_idx, unsigned & pre_projection_idx, const unsigned * & next_removed); 426 427 /** 428 \c array \c removed_cols contains column indexes to be removed in ascending order and 429 is terminated by a number greater than the highest column index of a join the two tables. 430 This is to simplify the traversal of the array when building facts. 431 */ 432 static void concatenate_rows(const column_layout & layout1, const column_layout & layout2, 433 const column_layout & layout_res, const char * ptr1, const char * ptr2, char * res, 434 const unsigned * removed_cols); 435 436 /** 437 \brief Perform join-project between t1 and t2 iterating through t1 and retrieving relevant 438 columns from t2 using indexing. 439 440 \c array \c removed_cols contains column indexes to be removed in ascending order and 441 is terminated by a number greater than the highest column index of a join the two tables. 442 This is to simplify the traversal of the array when building facts. 443 444 \c tables_swapped value means that the resulting facts should contain facts from t2 first, 445 instead of the default behavior that would concatenate the two facts as \c (t1,t2). 446 447 \remark The function is called \c self_agnostic_join since, unlike the virtual method 448 \c join, it is static and therefore allows to easily swap the roles of the two joined 449 tables (the indexed and iterated one) in a way that is expected to give better performance. 450 */ 451 static void self_agnostic_join_project(const sparse_table & t1, const sparse_table & t2, 452 unsigned joined_col_cnt, const unsigned * t1_joined_cols, const unsigned * t2_joined_cols, 453 const unsigned * removed_cols, bool tables_swapped, sparse_table & result); 454 455 456 /** 457 If the fact at \c data (in table's native representation) is not in the table, 458 add it and return true. Otherwise return false. 459 */ 460 bool add_fact(const char * data); 461 462 bool add_reserve_content(); 463 464 void garbage_collect(); 465 466 sparse_table(sparse_table_plugin & p, const table_signature & sig, unsigned init_capacity=0); 467 sparse_table(const sparse_table & t); 468 ~sparse_table() override; 469 public: 470 deallocate()471 void deallocate() override { 472 get_plugin().recycle(this); 473 } 474 row_count()475 unsigned row_count() const { return m_data.entry_count(); } 476 get_plugin()477 sparse_table_plugin & get_plugin() const 478 { return static_cast<sparse_table_plugin &>(table_base::get_plugin()); } 479 empty()480 bool empty() const override { return row_count()==0; } 481 void add_fact(const table_fact & f) override; 482 bool contains_fact(const table_fact & f) const override; 483 bool fetch_fact(table_fact & f) const override; 484 void ensure_fact(const table_fact & f) override; 485 void remove_fact(const table_element* fact) override; 486 void reset() override; 487 488 table_base * clone() const override; 489 490 table_base::iterator begin() const override; 491 table_base::iterator end() const override; 492 get_size_estimate_rows()493 unsigned get_size_estimate_rows() const override { return row_count(); } 494 unsigned get_size_estimate_bytes() const override; knows_exact_size()495 bool knows_exact_size() const override { return true; } 496 }; 497 498 }; 499 500