1 /*++
2 Copyright (c) 2006 Microsoft Corporation
3 
4 Module Name:
5 
6     dl_sparse_table.cpp
7 
8 Abstract:
9 
10     <abstract>
11 
12 Author:
13 
14     Krystof Hoder (t-khoder) 2010-09-24.
15 
16 Revision History:
17 
18 --*/
19 
20 #include<utility>
21 #include "muz/base/dl_context.h"
22 #include "muz/base/dl_util.h"
23 #include "muz/rel/dl_sparse_table.h"
24 
25 namespace datalog {
26 
27 
28     // -----------------------------------
29     //
30     // entry_storage
31     //
32     // -----------------------------------
33 
insert_or_get_reserve_content()34     entry_storage::store_offset entry_storage::insert_or_get_reserve_content() {
35         SASSERT(has_reserve());
36         store_offset entry_ofs = m_data_indexer.insert_if_not_there(m_reserve);
37         if (m_reserve == entry_ofs) {
38             //entry inserted, so reserve is no longer a reserve
39             m_reserve = NO_RESERVE;
40         }
41         return entry_ofs;
42     }
insert_reserve_content()43     bool entry_storage::insert_reserve_content() {
44         SASSERT(has_reserve());
45         store_offset entry_ofs = m_data_indexer.insert_if_not_there(m_reserve);
46         if (m_reserve == entry_ofs) {
47             //entry inserted, so reserve is no longer a reserve
48             m_reserve = NO_RESERVE;
49             return true;
50         }
51         return false;
52     }
53 
remove_reserve_content()54     bool entry_storage::remove_reserve_content() {
55         SASSERT(has_reserve());
56         store_offset entry_ofs;
57         if (!find_reserve_content(entry_ofs)) {
58             //the fact was not in the table
59             return false;
60         }
61         remove_offset(entry_ofs);
62         return true;
63     }
64 
remove_offset(store_offset ofs)65     void entry_storage::remove_offset(store_offset ofs) {
66         m_data_indexer.remove(ofs);
67         store_offset last_ofs = after_last_offset() - m_entry_size;
68         if (ofs!=last_ofs) {
69             SASSERT(ofs + m_entry_size <= last_ofs);
70             //we don't want any holes, so we put the last element at the place
71             //of the removed one
72             m_data_indexer.remove(last_ofs);
73             char * base = &m_data.get(0);
74             memcpy(base+ofs, base+last_ofs, m_entry_size);
75             m_data_indexer.insert(ofs);
76         }
77         if (has_reserve()) {
78             //we already have a reserve, so we need to shrink a little to keep having just one
79             resize_data(m_data_size-m_entry_size);
80         }
81         m_reserve=last_ofs;
82     }
83 
get_size_estimate_bytes() const84     unsigned entry_storage::get_size_estimate_bytes() const {
85         size_t sz = m_data.capacity();
86         sz += m_data_indexer.capacity()*sizeof(storage_indexer::entry);
87         return static_cast<unsigned>(sz);
88     }
89 
90     // -----------------------------------
91     //
92     // sparse_table::column_layout
93     //
94     // -----------------------------------
95 
get_domain_length(uint64_t dom_size)96     unsigned get_domain_length(uint64_t dom_size) {
97         SASSERT(dom_size>0);
98 
99         unsigned length = 0;
100 
101         unsigned dom_size_sm;
102         if (dom_size>UINT_MAX) {
103             dom_size_sm = static_cast<unsigned>(dom_size>>32);
104             length += 32;
105             if ( (dom_size&UINT_MAX)!=0 && dom_size_sm!=UINT_MAX ) {
106                 dom_size_sm++;
107             }
108         }
109         else {
110             dom_size_sm=static_cast<unsigned>(dom_size);
111         }
112         if (dom_size_sm == 1) {
113             length += 1; //unary domains
114         }
115         else if (dom_size_sm > 0x80000000u) {
116             length += 32;
117         }
118         else {
119             length += get_num_1bits(next_power_of_two(dom_size_sm)-1); //ceil(log2(dom_size))
120         }
121         return length;
122     }
123 
column_layout(const table_signature & sig)124     sparse_table::column_layout::column_layout(const table_signature & sig)
125             : m_functional_col_cnt(sig.functional_columns()) {
126         SASSERT(sig.size() > 0);
127         unsigned ofs = 0;
128         unsigned sig_sz = sig.size();
129         unsigned first_functional = sig_sz-m_functional_col_cnt;
130         for (unsigned i=0; i<sig_sz; i++) {
131             uint64_t dom_size = sig[i];
132             unsigned length = get_domain_length(dom_size);
133             SASSERT(length>0);
134             SASSERT(length<=64);
135 
136             if (size() > 0 && (length > 54 || i == first_functional)) {
137                 //large domains must start byte-aligned, as well as functional columns
138                 make_byte_aligned_end(size()-1);
139                 ofs = back().next_ofs();
140             }
141 
142             push_back(column_info(ofs, length));
143             ofs += length;
144         }
145         make_byte_aligned_end(size()-1);
146         SASSERT(back().next_ofs()%8 == 0);//the entries must be aligned to whole bytes
147         m_entry_size = back().next_ofs()/8;
148         if (m_functional_col_cnt) {
149             SASSERT((*this)[first_functional].m_offset%8 == 0);
150             m_functional_part_size = m_entry_size - (*this)[first_functional].m_offset/8;
151         }
152         else {
153             m_functional_part_size = 0;
154         }
155     }
156 
make_byte_aligned_end(unsigned col_index0)157     void sparse_table::column_layout::make_byte_aligned_end(unsigned col_index0) {
158         unsigned ofs = (*this)[col_index0].next_ofs();
159         unsigned ofs_bit_part = ofs%8;
160         unsigned rounded_ofs = (ofs_bit_part == 0) ? ofs : (ofs+8-ofs_bit_part);
161 
162         if (rounded_ofs!=ofs) {
163             SASSERT(rounded_ofs>ofs);
164             int diff = rounded_ofs-ofs;
165             unsigned col_idx = col_index0+1;
166             while(diff!=0) {
167                 //we should always be able to fix the alignment by the time we reach zero
168                 SASSERT(col_idx>0);
169                 col_idx--;
170                 column_info & ci = (*this)[col_idx];
171                 unsigned new_length = ci.m_length;
172                 if (ci.m_length < 64) {
173                     unsigned swallowed = std::min(64-static_cast<int>(ci.m_length), diff);
174                     diff -= swallowed;
175                     new_length += swallowed;
176                 }
177                 unsigned new_ofs = ci.m_offset+diff;
178                 ci = column_info(new_ofs, new_length);
179             }
180         }
181 
182         SASSERT(rounded_ofs%8 == 0);
183         SASSERT((*this)[col_index0].next_ofs()%8 == 0);
184     }
185 
186     // -----------------------------------
187     //
188     // sparse_table
189     //
190     // -----------------------------------
191 
192     class sparse_table::our_iterator_core : public iterator_core {
193 
194         class our_row : public row_interface {
195             const our_iterator_core & m_parent;
196         public:
our_row(const sparse_table & t,const our_iterator_core & parent)197             our_row(const sparse_table & t, const our_iterator_core & parent) :
198               row_interface(t),
199               m_parent(parent) {}
200 
operator [](unsigned col) const201             table_element operator[](unsigned col) const override {
202                 return m_parent.m_layout.get(m_parent.m_ptr, col);
203             }
204 
205         };
206 
207         const char * m_end;
208         const char * m_ptr;
209         unsigned m_fact_size;
210         our_row m_row_obj;
211         const column_layout & m_layout;
212 
213     public:
our_iterator_core(const sparse_table & t,bool finished)214         our_iterator_core(const sparse_table & t, bool finished) :
215           m_end(t.m_data.after_last()),
216           m_ptr(finished ? m_end : t.m_data.begin()),
217           m_fact_size(t.m_fact_size),
218           m_row_obj(t, *this),
219           m_layout(t.m_column_layout) {}
220 
is_finished() const221         bool is_finished() const override {
222             return m_ptr == m_end;
223         }
224 
operator *()225         row_interface & operator*() override {
226             SASSERT(!is_finished());
227             return m_row_obj;
228         }
operator ++()229         void operator++() override {
230             SASSERT(!is_finished());
231             m_ptr+=m_fact_size;
232         }
233     };
234 
235     class sparse_table::key_indexer {
236     protected:
237         unsigned_vector m_key_cols;
238     public:
239         typedef const store_offset * offset_iterator;
240 
241         /**
242            Iterators returned by \c begin() and \c end() are valid only as long as the \c query_result
243            object that returned them exists.
244         */
245         struct query_result {
246         private:
247             bool m_singleton;
248             union {
249                 store_offset m_single_result;
250                 struct {
251                     offset_iterator begin;
252                     offset_iterator end;
253                 } m_many;
254             };
255         public:
256             /**
257                \brief Empty result.
258             */
query_resultdatalog::sparse_table::key_indexer::query_result259             query_result() : m_singleton(false) {
260                 m_many.begin = nullptr;
261                 m_many.end = nullptr;
262             }
query_resultdatalog::sparse_table::key_indexer::query_result263             query_result(offset_iterator begin, offset_iterator end) : m_singleton(false) {
264                 m_many.begin = begin;
265                 m_many.end = end;
266             }
query_resultdatalog::sparse_table::key_indexer::query_result267             query_result(store_offset single_result) : m_singleton(true), m_single_result(single_result) {}
268 
begindatalog::sparse_table::key_indexer::query_result269             offset_iterator begin() const { return m_singleton ? &m_single_result : m_many.begin; }
enddatalog::sparse_table::key_indexer::query_result270             offset_iterator end() const { return m_singleton ? (&m_single_result+1) : m_many.end; }
emptydatalog::sparse_table::key_indexer::query_result271             bool empty() const { return begin() == end(); }
272         };
273 
key_indexer(unsigned key_len,const unsigned * key_cols)274         key_indexer(unsigned key_len, const unsigned * key_cols)
275             : m_key_cols(key_len, key_cols) {}
276 
~key_indexer()277         virtual ~key_indexer() {}
278 
update(const sparse_table & t)279         virtual void update(const sparse_table & t) {}
280 
281         virtual query_result get_matching_offsets(const key_value & key) const = 0;
282     };
283 
284 
285     class sparse_table::general_key_indexer : public key_indexer {
286         typedef svector<store_offset> offset_vector;
287         typedef size_t_map<offset_vector> index_map;
288 
289         index_map m_map;
290         mutable entry_storage m_keys;
291         store_offset m_first_nonindexed;
292 
293 
key_to_reserve(const key_value & key) const294         void key_to_reserve(const key_value & key) const {
295             m_keys.ensure_reserve();
296             m_keys.write_into_reserve((char *)(key.c_ptr()));
297         }
298 
get_matching_offset_vector(const key_value & key)299         offset_vector & get_matching_offset_vector(const key_value & key) {
300             key_to_reserve(key);
301             store_offset ofs = m_keys.insert_or_get_reserve_content();
302             index_map::entry * e = m_map.find_core(ofs);
303             if (!e) {
304                 TRACE("dl_table_relation", tout << "inserting\n";);
305                 e = m_map.insert_if_not_there3(ofs, offset_vector());
306             }
307             return e->get_data().m_value;
308         }
309     public:
general_key_indexer(unsigned key_len,const unsigned * key_cols)310         general_key_indexer(unsigned key_len, const unsigned * key_cols)
311             : key_indexer(key_len, key_cols),
312             m_keys(key_len*sizeof(table_element)),
313             m_first_nonindexed(0) {}
314 
update(const sparse_table & t)315         void update(const sparse_table & t) override {
316             if (m_first_nonindexed == t.m_data.after_last_offset()) {
317                 return;
318             }
319             SASSERT(m_first_nonindexed<t.m_data.after_last_offset());
320             //we need to add new facts into the index
321 
322             unsigned key_len = m_key_cols.size();
323 
324             store_offset ofs = m_first_nonindexed;
325             store_offset after_last = t.m_data.after_last_offset();
326 
327             key_value key;
328             key.resize(key_len);
329 
330             offset_vector * index_entry = nullptr;
331             bool key_modified = true;
332 
333             for (; ofs!=after_last; ofs+=t.m_fact_size) {
334                 for (unsigned i=0; i<key_len; i++) {
335                     table_element col_val = t.get_cell(ofs, m_key_cols[i]);
336                     if (key[i]!=col_val) {
337                         key[i] = col_val;
338                         key_modified = true;
339                     }
340                 }
341 
342                 if (key_modified) {
343                     index_entry = &get_matching_offset_vector(key);
344                     key_modified = false;
345                 }
346                 SASSERT(index_entry);
347                 //here we insert the offset of the fact in m_data vector into the indexer
348                 index_entry->insert(ofs);
349             }
350 
351             m_first_nonindexed = t.m_data.after_last_offset();
352         }
353 
get_matching_offsets(const key_value & key) const354         query_result get_matching_offsets(const key_value & key) const override {
355             key_to_reserve(key);
356             store_offset ofs;
357             if (!m_keys.find_reserve_content(ofs)) {
358                 return query_result();
359             }
360             index_map::entry * e = m_map.find_core(ofs);
361             if (!e) {
362                 return query_result();
363             }
364             const offset_vector & res = e->get_data().m_value;
365             return query_result(res.begin(), res.end());
366         }
367     };
368 
369     /**
370        When doing lookup using this index, the content of the reserve in sparse_table::m_data changes.
371     */
372     class sparse_table::full_signature_key_indexer : public key_indexer {
373         const sparse_table & m_table;
374 
375         /**
376            Permutation of key columns to make it into table facts. If empty, no permutation is necessary.
377         */
378         unsigned_vector m_permutation;
379         mutable table_fact m_key_fact;
380     public:
381 
can_handle(unsigned key_len,const unsigned * key_cols,const sparse_table & t)382         static bool can_handle(unsigned key_len, const unsigned * key_cols, const sparse_table & t) {
383             unsigned non_func_cols = t.get_signature().first_functional();
384             if (key_len!=non_func_cols) {
385                 return false;
386             }
387             counter ctr;
388             ctr.count(key_len, key_cols);
389             if (ctr.get_max_counter_value()!=1 || ctr.get_max_positive()!=non_func_cols-1) {
390                 return false;
391             }
392             SASSERT(ctr.get_positive_count() == non_func_cols);
393             return true;
394         }
395 
full_signature_key_indexer(unsigned key_len,const unsigned * key_cols,const sparse_table & t)396         full_signature_key_indexer(unsigned key_len, const unsigned * key_cols, const sparse_table & t)
397                 : key_indexer(key_len, key_cols),
398                 m_table(t) {
399             SASSERT(can_handle(key_len, key_cols, t));
400 
401             m_permutation.resize(key_len);
402             for (unsigned i=0; i<key_len; i++) {
403                 //m_permutation[m_key_cols[i]] = i;
404                 m_permutation[i] = m_key_cols[i];
405             }
406             m_key_fact.resize(t.get_signature().size());
407         }
408 
~full_signature_key_indexer()409         ~full_signature_key_indexer() override {}
410 
get_matching_offsets(const key_value & key) const411         query_result get_matching_offsets(const key_value & key) const override {
412             unsigned key_len = m_key_cols.size();
413             for (unsigned i=0; i<key_len; i++) {
414                 m_key_fact[m_permutation[i]] = key[i];
415             }
416             //We will change the content of the reserve; which does not change the 'high-level'
417             //content of the table.
418             sparse_table & t = const_cast<sparse_table&>(m_table);
419             t.write_into_reserve(m_key_fact.c_ptr());
420 
421             store_offset res;
422             if (!t.m_data.find_reserve_content(res)) {
423                 return query_result();
424             }
425             return query_result(res);
426         }
427     };
428 
sparse_table(sparse_table_plugin & p,const table_signature & sig,unsigned init_capacity)429     sparse_table::sparse_table(sparse_table_plugin & p, const table_signature & sig, unsigned init_capacity)
430             : table_base(p, sig),
431             m_column_layout(sig),
432             m_fact_size(m_column_layout.m_entry_size),
433             m_data(m_fact_size, m_column_layout.m_functional_part_size, init_capacity) {}
434 
sparse_table(const sparse_table & t)435     sparse_table::sparse_table(const sparse_table & t)
436             : table_base(t.get_plugin(), t.get_signature()),
437             m_column_layout(t.m_column_layout),
438             m_fact_size(t.m_fact_size),
439             m_data(t.m_data) {}
440 
clone() const441     table_base * sparse_table::clone() const {
442         return get_plugin().mk_clone(*this);
443     }
444 
~sparse_table()445     sparse_table::~sparse_table() {
446         reset_indexes();
447     }
448 
reset()449     void sparse_table::reset() {
450         reset_indexes();
451         m_data.reset();
452     }
453 
begin() const454     table_base::iterator sparse_table::begin() const {
455         return mk_iterator(alloc(our_iterator_core, *this, false));
456     }
457 
end() const458     table_base::iterator sparse_table::end() const {
459         return mk_iterator(alloc(our_iterator_core, *this, true));
460     }
461 
get_key_indexer(unsigned key_len,const unsigned * key_cols) const462     sparse_table::key_indexer& sparse_table::get_key_indexer(unsigned key_len,
463             const unsigned * key_cols) const {
464             verbose_action  _va("get_key_indexer");
465 
466 #if Z3DEBUG
467         //We allow indexes only on non-functional columns because we want to be able to modify them
468         //without having to worry about updating indexes.
469         //Maybe we might keep a list of indexes that contain functional columns and on an update reset
470         //only those.
471         SASSERT(key_len == 0 ||
472             counter().count(key_len, key_cols).get_max_positive()<get_signature().first_functional());
473 #endif
474         key_spec kspec;
475         kspec.append(key_len, key_cols);
476         key_index_map::entry * key_map_entry = m_key_indexes.insert_if_not_there3(kspec, nullptr);
477         if (!key_map_entry->get_data().m_value) {
478             if (full_signature_key_indexer::can_handle(key_len, key_cols, *this)) {
479                 key_map_entry->get_data().m_value = alloc(full_signature_key_indexer, key_len, key_cols, *this);
480             }
481             else {
482                 key_map_entry->get_data().m_value = alloc(general_key_indexer, key_len, key_cols);
483             }
484         }
485         key_indexer & indexer = *key_map_entry->get_data().m_value;
486         indexer.update(*this);
487         return indexer;
488     }
489 
reset_indexes()490     void sparse_table::reset_indexes() {
491         key_index_map::iterator kmit = m_key_indexes.begin();
492         key_index_map::iterator kmend = m_key_indexes.end();
493         for (; kmit!=kmend; ++kmit) {
494             dealloc((*kmit).m_value);
495         }
496         m_key_indexes.reset();
497     }
498 
write_into_reserve(const table_element * f)499     void sparse_table::write_into_reserve(const table_element* f) {
500         TRACE("dl_table_relation", tout << "\n";);
501         m_data.ensure_reserve();
502         char * reserve = m_data.get_reserve_ptr();
503         unsigned col_cnt = m_column_layout.size();
504         for (unsigned i = 0; i < col_cnt; ++i) {
505             SASSERT(f[i] < get_signature()[i]); //the value fits into the table signature
506             m_column_layout.set(reserve, i, f[i]);
507         }
508     }
509 
add_fact(const char * data)510     bool sparse_table::add_fact(const char * data) {
511         verbose_action  _va("add_fact", 10);
512         m_data.write_into_reserve(data);
513         return add_reserve_content();
514     }
515 
add_fact(const table_fact & f)516     void sparse_table::add_fact(const table_fact & f) {
517         write_into_reserve(f.c_ptr());
518         add_reserve_content();
519     }
520 
add_reserve_content()521     bool sparse_table::add_reserve_content() {
522         return m_data.insert_reserve_content();
523     }
524 
contains_fact(const table_fact & f) const525     bool sparse_table::contains_fact(const table_fact & f) const {
526         verbose_action  _va("contains_fact", 2);
527         sparse_table & t = const_cast<sparse_table &>(*this);
528         t.write_into_reserve(f.c_ptr());
529         unsigned func_col_cnt = get_signature().functional_columns();
530         if (func_col_cnt == 0) {
531             return t.m_data.reserve_content_already_present();
532         }
533         else {
534             store_offset ofs;
535             if (!t.m_data.find_reserve_content(ofs)) {
536                 return false;
537             }
538             unsigned sz = get_signature().size();
539             for (unsigned i=func_col_cnt; i<sz; i++) {
540                 if (t.get_cell(ofs, i)!=f[i]) {
541                     return false;
542                 }
543             }
544             return true;
545         }
546     }
547 
fetch_fact(table_fact & f) const548     bool sparse_table::fetch_fact(table_fact & f) const {
549         verbose_action  _va("fetch_fact", 2);
550         const table_signature & sig = get_signature();
551         SASSERT(f.size() == sig.size());
552         if (sig.functional_columns() == 0) {
553             return contains_fact(f);
554         }
555         else {
556             sparse_table & t = const_cast<sparse_table &>(*this);
557             t.write_into_reserve(f.c_ptr());
558             store_offset ofs;
559             if (!t.m_data.find_reserve_content(ofs)) {
560                 return false;
561             }
562             unsigned sz = sig.size();
563             for (unsigned i=sig.first_functional(); i<sz; i++) {
564                 f[i] = t.get_cell(ofs, i);
565             }
566             return true;
567         }
568     }
569 
570     /**
571        In this function we modify the content of table functional columns without resetting indexes.
572        This is ok as long as we do not allow indexing on functional columns.
573     */
ensure_fact(const table_fact & f)574     void sparse_table::ensure_fact(const table_fact & f) {
575         verbose_action  _va("ensure_fact", 2);
576         const table_signature & sig = get_signature();
577         if (sig.functional_columns() == 0) {
578             add_fact(f);
579         }
580         else {
581             write_into_reserve(f.c_ptr());
582             store_offset ofs;
583             if (!m_data.find_reserve_content(ofs)) {
584                 add_fact(f);
585                 return;
586             }
587             unsigned sz = sig.size();
588             for (unsigned i=sig.first_functional(); i<sz; i++) {
589                 set_cell(ofs, i, f[i]);
590             }
591         }
592     }
593 
remove_fact(const table_element * f)594     void sparse_table::remove_fact(const table_element*  f) {
595         verbose_action  _va("remove_fact", 2);
596         //first insert the fact so that we find it's original location and remove it
597         write_into_reserve(f);
598         if (!m_data.remove_reserve_content()) {
599             //the fact was not in the table
600             return;
601         }
602         reset_indexes();
603     }
604 
copy_columns(const column_layout & src_layout,const column_layout & dest_layout,unsigned start_index,unsigned after_last,const char * src,char * dest,unsigned & dest_idx,unsigned & pre_projection_idx,const unsigned * & next_removed)605     void sparse_table::copy_columns(const column_layout & src_layout, const column_layout & dest_layout,
606             unsigned start_index, unsigned after_last, const char * src, char * dest,
607             unsigned & dest_idx, unsigned & pre_projection_idx, const unsigned * & next_removed) {
608         for (unsigned i=start_index; i<after_last; i++, pre_projection_idx++) {
609             if (*next_removed == pre_projection_idx) {
610                 next_removed++;
611                 continue;
612             }
613             SASSERT(*next_removed>pre_projection_idx);
614             dest_layout.set(dest, dest_idx++, src_layout.get(src, i));
615         }
616     }
617 
concatenate_rows(const column_layout & layout1,const column_layout & layout2,const column_layout & layout_res,const char * ptr1,const char * ptr2,char * res,const unsigned * removed_cols)618     void sparse_table::concatenate_rows(const column_layout & layout1, const column_layout & layout2,
619             const column_layout & layout_res, const char * ptr1, const char * ptr2, char * res,
620             const unsigned * removed_cols) {
621         unsigned t1non_func = layout1.size()-layout1.m_functional_col_cnt;
622         unsigned t2non_func = layout2.size()-layout2.m_functional_col_cnt;
623         unsigned t1cols = layout1.size();
624         unsigned t2cols = layout2.size();
625         unsigned orig_i = 0;
626         unsigned res_i = 0;
627         const unsigned * next_removed = removed_cols;
628         copy_columns(layout1, layout_res, 0, t1non_func, ptr1, res, res_i, orig_i, next_removed);
629         copy_columns(layout2, layout_res, 0, t2non_func, ptr2, res, res_i, orig_i, next_removed);
630         copy_columns(layout1, layout_res, t1non_func, t1cols, ptr1, res, res_i, orig_i, next_removed);
631         copy_columns(layout2, layout_res, t2non_func, t2cols, ptr2, res, res_i, orig_i, next_removed);
632     }
633 
garbage_collect()634     void sparse_table::garbage_collect() {
635         if (memory::above_high_watermark()) {
636             get_plugin().garbage_collect();
637         }
638         if (memory::above_high_watermark()) {
639             IF_VERBOSE(1, verbose_stream() << "Ran out of memory while filling table of size: " << get_size_estimate_rows() << " rows " << get_size_estimate_bytes() << " bytes\n";);
640             throw out_of_memory_error();
641         }
642     }
643 
self_agnostic_join_project(const sparse_table & t1,const sparse_table & t2,unsigned joined_col_cnt,const unsigned * t1_joined_cols,const unsigned * t2_joined_cols,const unsigned * removed_cols,bool tables_swapped,sparse_table & result)644     void sparse_table::self_agnostic_join_project(const sparse_table & t1, const sparse_table & t2,
645             unsigned joined_col_cnt, const unsigned * t1_joined_cols, const unsigned * t2_joined_cols,
646             const unsigned * removed_cols, bool tables_swapped, sparse_table & result) {
647 
648         verbose_action _va("join_project", 1);
649         unsigned t1_entry_size = t1.m_fact_size;
650         unsigned t2_entry_size = t2.m_fact_size;
651 
652         size_t t1idx = 0;
653         size_t t1end = t1.m_data.after_last_offset();
654 
655         TRACE("dl_table_relation",
656               tout << "joined_col_cnt: " << joined_col_cnt << "\n";
657               tout << "t1_entry_size:  " << t1_entry_size << "\n";
658               tout << "t2_entry_size:  " << t2_entry_size << "\n";
659               t1.display(tout);
660               t2.display(tout);
661               tout << (&t1) << " " << (&t2) << " " << (&result) << "\n";
662               );
663 
664         if (joined_col_cnt == 0) {
665             size_t t2idx = 0;
666             size_t t2end = t2.m_data.after_last_offset();
667 
668             for (; t1idx!=t1end; t1idx+=t1_entry_size) {
669                 for (t2idx = 0; t2idx != t2end; t2idx += t2_entry_size) {
670                     result.m_data.ensure_reserve();
671                     result.garbage_collect();
672                     char * res_reserve = result.m_data.get_reserve_ptr();
673                     char const* t1ptr = t1.get_at_offset(t1idx);
674                     char const* t2ptr = t2.get_at_offset(t2idx);
675                     if (tables_swapped) {
676                         concatenate_rows(t2.m_column_layout, t1.m_column_layout, result.m_column_layout,
677                             t2ptr, t1ptr, res_reserve, removed_cols);
678                     } else {
679                         concatenate_rows(t1.m_column_layout, t2.m_column_layout, result.m_column_layout,
680                             t1ptr, t2ptr, res_reserve, removed_cols);
681                     }
682                     result.add_reserve_content();
683                 }
684             }
685             return;
686         }
687 
688         key_value t1_key;
689         t1_key.resize(joined_col_cnt);
690         key_indexer& t2_indexer = t2.get_key_indexer(joined_col_cnt, t2_joined_cols);
691 
692         bool key_modified = true;
693         key_indexer::query_result t2_offsets;
694 
695         for (; t1idx != t1end; t1idx += t1_entry_size) {
696             for (unsigned i = 0; i < joined_col_cnt; i++) {
697                 table_element val = t1.m_column_layout.get(t1.get_at_offset(t1idx), t1_joined_cols[i]);
698                 TRACE("dl_table_relation", tout << "val: " << val << " " << t1idx << " " << t1_joined_cols[i] << "\n";);
699                 if (t1_key[i] != val) {
700                     t1_key[i] = val;
701                     key_modified = true;
702                 }
703             }
704             if (key_modified) {
705                 t2_offsets = t2_indexer.get_matching_offsets(t1_key);
706                 key_modified = false;
707             }
708 
709             if (t2_offsets.empty()) {
710                 continue;
711             }
712 
713             key_indexer::offset_iterator t2ofs_it  = t2_offsets.begin();
714             key_indexer::offset_iterator t2ofs_end = t2_offsets.end();
715             for (; t2ofs_it != t2ofs_end; ++t2ofs_it) {
716                 store_offset t2ofs = *t2ofs_it;
717                 result.m_data.ensure_reserve();
718                 result.garbage_collect();
719                 char * res_reserve = result.m_data.get_reserve_ptr();
720                 char const * t1ptr = t1.get_at_offset(t1idx);
721                 char const * t2ptr = t2.get_at_offset(t2ofs);
722                 if (tables_swapped) {
723                     concatenate_rows(t2.m_column_layout, t1.m_column_layout, result.m_column_layout,
724                         t2ptr, t1ptr, res_reserve, removed_cols);
725                 } else {
726                     concatenate_rows(t1.m_column_layout, t2.m_column_layout, result.m_column_layout,
727                         t1ptr, t2ptr, res_reserve, removed_cols);
728                 }
729                 result.add_reserve_content();
730             }
731         }
732     }
733 
734 
735     // -----------------------------------
736     //
737     // sparse_table_plugin
738     //
739     // -----------------------------------
740 
sparse_table_plugin(relation_manager & manager)741     sparse_table_plugin::sparse_table_plugin(relation_manager & manager)
742         : table_plugin(symbol("sparse"), manager) {}
743 
~sparse_table_plugin()744     sparse_table_plugin::~sparse_table_plugin() {
745         reset();
746     }
747 
get(table_base const & t)748     sparse_table const& sparse_table_plugin::get(table_base const& t) { return dynamic_cast<sparse_table const&>(t); }
get(table_base & t)749     sparse_table& sparse_table_plugin::get(table_base& t) { return dynamic_cast<sparse_table&>(t); }
get(table_base const * t)750     sparse_table const* sparse_table_plugin::get(table_base const* t) { return dynamic_cast<sparse_table const*>(t); }
get(table_base * t)751     sparse_table* sparse_table_plugin::get(table_base* t) { return dynamic_cast<sparse_table*>(t); }
752 
753 
reset()754     void sparse_table_plugin::reset() {
755         table_pool::iterator it = m_pool.begin();
756         table_pool::iterator end = m_pool.end();
757         for (; it!=end; ++it) {
758             sp_table_vector * vect = it->m_value;
759             sp_table_vector::iterator vit = vect->begin();
760             sp_table_vector::iterator vend = vect->end();
761             for (; vit!=vend; ++vit) {
762                 (*vit)->destroy(); //calling deallocate() would only put the table back into the pool
763             }
764             dealloc(vect);
765         }
766         m_pool.reset();
767     }
768 
garbage_collect()769     void sparse_table_plugin::garbage_collect() {
770         IF_VERBOSE(2, verbose_stream() << "garbage collecting "<< memory::get_allocation_size() << " bytes down to ";);
771         reset();
772         IF_VERBOSE(2, verbose_stream() << memory::get_allocation_size() << " bytes\n";);
773     }
774 
recycle(sparse_table * t)775     void sparse_table_plugin::recycle(sparse_table * t) {
776         verbose_action  _va("recycle", 2);
777         const table_signature & sig = t->get_signature();
778         t->reset();
779 
780         sp_table_vector * & vect = m_pool.insert_if_not_there(sig, nullptr);
781         if (vect == nullptr) {
782             vect = alloc(sp_table_vector);
783         }
784         IF_VERBOSE(12, verbose_stream() << "Recycle: " << t->get_size_estimate_bytes() << "\n";);
785 
786         vect->push_back(t);
787     }
788 
mk_empty(const table_signature & s)789     table_base * sparse_table_plugin::mk_empty(const table_signature & s) {
790         SASSERT(can_handle_signature(s));
791 
792         sp_table_vector * vect;
793         if (!m_pool.find(s, vect) || vect->empty()) {
794             return alloc(sparse_table, *this, s);
795         }
796         sparse_table * res = vect->back();
797         vect->pop_back();
798         return res;
799     }
800 
mk_clone(const sparse_table & t)801     sparse_table * sparse_table_plugin::mk_clone(const sparse_table & t) {
802         sparse_table * res = get(mk_empty(t.get_signature()));
803         res->m_data = t.m_data;
804         return res;
805     }
806 
807 
join_involves_functional(const table_signature & s1,const table_signature & s2,unsigned col_cnt,const unsigned * cols1,const unsigned * cols2)808     bool sparse_table_plugin::join_involves_functional(const table_signature & s1, const table_signature & s2,
809         unsigned col_cnt, const unsigned * cols1, const unsigned * cols2) {
810         if (col_cnt == 0) {
811             return false;
812         }
813         return counter().count(col_cnt, cols1).get_max_positive()>=s1.first_functional()
814             || counter().count(col_cnt, cols2).get_max_positive()>=s2.first_functional();
815     }
816 
817 
818     class sparse_table_plugin::join_project_fn : public convenient_table_join_project_fn {
819     public:
join_project_fn(const table_signature & t1_sig,const table_signature & t2_sig,unsigned col_cnt,const unsigned * cols1,const unsigned * cols2,unsigned removed_col_cnt,const unsigned * removed_cols)820         join_project_fn(const table_signature & t1_sig, const table_signature & t2_sig, unsigned col_cnt,
821                 const unsigned * cols1, const unsigned * cols2, unsigned removed_col_cnt,
822                 const unsigned * removed_cols)
823                 : convenient_table_join_project_fn(t1_sig, t2_sig, col_cnt, cols1, cols2,
824                 removed_col_cnt, removed_cols) {
825             m_removed_cols.push_back(UINT_MAX);
826         }
827 
operator ()(const table_base & tb1,const table_base & tb2)828         table_base * operator()(const table_base & tb1, const table_base & tb2) override {
829 
830             const sparse_table & t1 = get(tb1);
831             const sparse_table & t2 = get(tb2);
832 
833             sparse_table_plugin & plugin = t1.get_plugin();
834 
835             sparse_table * res = get(plugin.mk_empty(get_result_signature()));
836 
837             //If we join with some intersection, want to iterate over the smaller table and
838             //do indexing into the bigger one. If we simply do a product, we want the bigger
839             //one to be at the outer iteration (then the small one will hopefully fit into
840             //the cache)
841             if ( (t1.row_count() > t2.row_count()) == (!m_cols1.empty()) ) {
842                 sparse_table::self_agnostic_join_project(t2, t1, m_cols1.size(), m_cols2.c_ptr(),
843                     m_cols1.c_ptr(), m_removed_cols.c_ptr(), true, *res);
844             }
845             else {
846                 sparse_table::self_agnostic_join_project(t1, t2, m_cols1.size(), m_cols1.c_ptr(),
847                     m_cols2.c_ptr(), m_removed_cols.c_ptr(), false, *res);
848             }
849             TRACE("dl_table_relation", tb1.display(tout); tb2.display(tout); res->display(tout); );
850             return res;
851         }
852     };
853 
mk_join_fn(const table_base & t1,const table_base & t2,unsigned col_cnt,const unsigned * cols1,const unsigned * cols2)854     table_join_fn * sparse_table_plugin::mk_join_fn(const table_base & t1, const table_base & t2,
855             unsigned col_cnt, const unsigned * cols1, const unsigned * cols2) {
856         const table_signature & sig1 = t1.get_signature();
857         const table_signature & sig2 = t2.get_signature();
858         if (t1.get_kind()!=get_kind() || t2.get_kind()!=get_kind()
859             || join_involves_functional(sig1, sig2, col_cnt, cols1, cols2)) {
860             //We also don't allow indexes on functional columns (and they are needed for joins)
861             return nullptr;
862         }
863         return mk_join_project_fn(t1, t2, col_cnt, cols1, cols2, 0, static_cast<unsigned*>(nullptr));
864     }
865 
mk_join_project_fn(const table_base & t1,const table_base & t2,unsigned col_cnt,const unsigned * cols1,const unsigned * cols2,unsigned removed_col_cnt,const unsigned * removed_cols)866     table_join_fn * sparse_table_plugin::mk_join_project_fn(const table_base & t1, const table_base & t2,
867             unsigned col_cnt, const unsigned * cols1, const unsigned * cols2, unsigned removed_col_cnt,
868             const unsigned * removed_cols) {
869         const table_signature & sig1 = t1.get_signature();
870         const table_signature & sig2 = t2.get_signature();
871         if (t1.get_kind()!=get_kind() || t2.get_kind()!=get_kind()
872             || removed_col_cnt == t1.get_signature().size()+t2.get_signature().size()
873             || join_involves_functional(sig1, sig2, col_cnt, cols1, cols2)) {
874             //We don't allow sparse tables with zero signatures (and project on all columns leads to such)
875             //We also don't allow indexes on functional columns.
876             return nullptr;
877         }
878         return alloc(join_project_fn, t1.get_signature(), t2.get_signature(), col_cnt, cols1, cols2,
879             removed_col_cnt, removed_cols);
880     }
881 
882     class sparse_table_plugin::union_fn : public table_union_fn {
883     public:
operator ()(table_base & tgt0,const table_base & src0,table_base * delta0)884         void operator()(table_base & tgt0, const table_base & src0, table_base * delta0) override {
885             verbose_action  _va("union");
886             sparse_table & tgt = get(tgt0);
887             const sparse_table & src = get(src0);
888             sparse_table * delta = get(delta0);
889 
890             unsigned fact_size = tgt.m_fact_size;
891             const char* ptr = src.m_data.begin();
892             const char* after_last=src.m_data.after_last();
893             for (; ptr<after_last; ptr+=fact_size) {
894                 if (tgt.add_fact(ptr) && delta) {
895                     delta->add_fact(ptr);
896                 }
897             }
898         }
899     };
900 
mk_union_fn(const table_base & tgt,const table_base & src,const table_base * delta)901     table_union_fn * sparse_table_plugin::mk_union_fn(const table_base & tgt, const table_base & src,
902             const table_base * delta) {
903         if (tgt.get_kind()!=get_kind() || src.get_kind()!=get_kind()
904             || (delta && delta->get_kind()!=get_kind())
905             || tgt.get_signature()!=src.get_signature()
906             || (delta && delta->get_signature()!=tgt.get_signature())) {
907             return nullptr;
908         }
909         return alloc(union_fn);
910     }
911 
912     class sparse_table_plugin::project_fn : public convenient_table_project_fn {
913         const unsigned m_inp_col_cnt;
914         const unsigned m_removed_col_cnt;
915         const unsigned m_result_col_cnt;
916     public:
project_fn(const table_signature & orig_sig,unsigned removed_col_cnt,const unsigned * removed_cols)917         project_fn(const table_signature & orig_sig, unsigned removed_col_cnt, const unsigned * removed_cols)
918             : convenient_table_project_fn(orig_sig, removed_col_cnt, removed_cols),
919             m_inp_col_cnt(orig_sig.size()),
920             m_removed_col_cnt(removed_col_cnt),
921             m_result_col_cnt(orig_sig.size()-removed_col_cnt) {
922                 SASSERT(removed_col_cnt>0);
923         }
924 
transform_row(const char * src,char * tgt,const sparse_table::column_layout & src_layout,const sparse_table::column_layout & tgt_layout)925         virtual void transform_row(const char * src, char * tgt,
926             const sparse_table::column_layout & src_layout,
927             const sparse_table::column_layout & tgt_layout) {
928                 unsigned r_idx=0;
929                 unsigned tgt_i=0;
930                 for (unsigned i=0; i<m_inp_col_cnt; i++) {
931                     if (r_idx!=m_removed_col_cnt && i == m_removed_cols[r_idx]) {
932                         SASSERT(r_idx<m_removed_col_cnt);
933                         r_idx++;
934                         continue;
935                     }
936                     tgt_layout.set(tgt, tgt_i, src_layout.get(src, i));
937                     tgt_i++;
938                 }
939                 SASSERT(tgt_i == m_result_col_cnt);
940                 SASSERT(r_idx == m_removed_col_cnt);
941         }
942 
operator ()(const table_base & tb)943         table_base * operator()(const table_base & tb) override {
944             verbose_action  _va("project");
945             const sparse_table & t = get(tb);
946 
947             unsigned t_fact_size = t.m_fact_size;
948 
949             sparse_table_plugin & plugin = t.get_plugin();
950             sparse_table * res = get(plugin.mk_empty(get_result_signature()));
951 
952             const sparse_table::column_layout & src_layout = t.m_column_layout;
953             const sparse_table::column_layout & tgt_layout = res->m_column_layout;
954 
955             const char* t_ptr = t.m_data.begin();
956             const char* t_end = t.m_data.after_last();
957             for (; t_ptr!=t_end; t_ptr+=t_fact_size) {
958                 SASSERT(t_ptr<t_end);
959                 res->m_data.ensure_reserve();
960                 char * res_ptr = res->m_data.get_reserve_ptr();
961                 transform_row(t_ptr, res_ptr, src_layout, tgt_layout);
962                 res->m_data.insert_reserve_content();
963             }
964             return res;
965         }
966     };
967 
mk_project_fn(const table_base & t,unsigned col_cnt,const unsigned * removed_cols)968     table_transformer_fn * sparse_table_plugin::mk_project_fn(const table_base & t, unsigned col_cnt,
969             const unsigned * removed_cols) {
970         if (col_cnt == t.get_signature().size()) {
971             return nullptr;
972         }
973         return alloc(project_fn, t.get_signature(), col_cnt, removed_cols);
974     }
975 
976 
977     class sparse_table_plugin::select_equal_and_project_fn : public convenient_table_transformer_fn {
978         const unsigned m_col;
979         sparse_table::key_value m_key;
980     public:
select_equal_and_project_fn(const table_signature & orig_sig,table_element val,unsigned col)981         select_equal_and_project_fn(const table_signature & orig_sig, table_element val, unsigned col)
982                 : m_col(col) {
983             table_signature::from_project(orig_sig, 1, &col, get_result_signature());
984             m_key.push_back(val);
985         }
986 
operator ()(const table_base & tb)987         table_base * operator()(const table_base & tb) override {
988             verbose_action  _va("select_equal_and_project");
989             const sparse_table & t = get(tb);
990 
991             sparse_table_plugin & plugin = t.get_plugin();
992             sparse_table * res = get(plugin.mk_empty(get_result_signature()));
993 
994             const sparse_table::column_layout & t_layout = t.m_column_layout;
995             const sparse_table::column_layout & res_layout = res->m_column_layout;
996             unsigned t_cols = t_layout.size();
997 
998             sparse_table::key_indexer & indexer = t.get_key_indexer(1, &m_col);
999             sparse_table::key_indexer::query_result t_offsets = indexer.get_matching_offsets(m_key);
1000             if (t_offsets.empty()) {
1001                 //no matches
1002                 return res;
1003             }
1004             sparse_table::key_indexer::offset_iterator ofs_it=t_offsets.begin();
1005             sparse_table::key_indexer::offset_iterator ofs_end=t_offsets.end();
1006 
1007             for (; ofs_it!=ofs_end; ++ofs_it) {
1008                 sparse_table::store_offset t_ofs = *ofs_it;
1009                 const char * t_ptr = t.get_at_offset(t_ofs);
1010 
1011                 res->m_data.ensure_reserve();
1012                 char * res_reserve = res->m_data.get_reserve_ptr();
1013 
1014                 unsigned res_i = 0;
1015                 for (unsigned i=0; i<t_cols; i++) {
1016                     if (i == m_col) {
1017                         continue;
1018                     }
1019                     res_layout.set(res_reserve, res_i++, t_layout.get(t_ptr, i));
1020                 }
1021                 res->add_reserve_content();
1022             }
1023             return res;
1024         }
1025     };
1026 
mk_select_equal_and_project_fn(const table_base & t,const table_element & value,unsigned col)1027     table_transformer_fn * sparse_table_plugin::mk_select_equal_and_project_fn(const table_base & t,
1028             const table_element & value, unsigned col) {
1029         if (t.get_kind()!=get_kind() || t.get_signature().size() == 1 || col>=t.get_signature().first_functional()) {
1030             //We don't allow sparse tables with zero signatures (and project on a single
1031             //column table produces one).
1032             //We also don't allow indexes on functional columns. And our implementation of
1033             //select_equal_and_project uses index on \c col.
1034             return nullptr;
1035         }
1036         return alloc(select_equal_and_project_fn, t.get_signature(), value, col);
1037     }
1038 
1039 
1040     class sparse_table_plugin::rename_fn : public convenient_table_rename_fn {
1041         unsigned_vector m_out_of_cycle;
1042     public:
rename_fn(const table_signature & orig_sig,unsigned permutation_cycle_len,const unsigned * permutation_cycle)1043         rename_fn(const table_signature & orig_sig, unsigned permutation_cycle_len, const unsigned * permutation_cycle)
1044             : convenient_table_rename_fn(orig_sig, permutation_cycle_len, permutation_cycle) {
1045                 SASSERT(permutation_cycle_len>=2);
1046                 idx_set cycle_cols;
1047                 for (unsigned i=0; i < permutation_cycle_len; ++i) {
1048                     cycle_cols.insert(permutation_cycle[i]);
1049                 }
1050                 for (unsigned i=0; i < orig_sig.size(); ++i) {
1051                     if (!cycle_cols.contains(i)) {
1052                         m_out_of_cycle.push_back(i);
1053                     }
1054                 }
1055         }
1056 
transform_row(const char * src,char * tgt,const sparse_table::column_layout & src_layout,const sparse_table::column_layout & tgt_layout)1057         void transform_row(const char * src, char * tgt,
1058             const sparse_table::column_layout & src_layout,
1059             const sparse_table::column_layout & tgt_layout) {
1060 
1061                 for (unsigned i=1; i < m_cycle.size(); ++i) {
1062                     tgt_layout.set(tgt, m_cycle[i-1], src_layout.get(src, m_cycle[i]));
1063                 }
1064                 tgt_layout.set(tgt, m_cycle[m_cycle.size()-1], src_layout.get(src, m_cycle[0]));
1065 
1066                 unsigned_vector::const_iterator it = m_out_of_cycle.begin();
1067                 unsigned_vector::const_iterator end = m_out_of_cycle.end();
1068                 for (; it!=end; ++it) {
1069                     unsigned col = *it;
1070                     tgt_layout.set(tgt, col, src_layout.get(src, col));
1071                 }
1072         }
1073 
operator ()(const table_base & tb)1074         table_base * operator()(const table_base & tb) override {
1075             verbose_action  _va("rename");
1076 
1077             const sparse_table & t = get(tb);
1078 
1079             unsigned t_fact_size = t.m_fact_size;
1080 
1081             sparse_table_plugin & plugin = t.get_plugin();
1082             sparse_table * res = get(plugin.mk_empty(get_result_signature()));
1083 
1084             size_t res_fact_size = res->m_fact_size;
1085             size_t res_data_size = res_fact_size*t.row_count();
1086             if (res_fact_size != 0 && (res_data_size / res_fact_size) != t.row_count()) {
1087                 throw default_exception("multiplication overflow");
1088             }
1089 
1090             res->m_data.resize_data(res_data_size);
1091 
1092             //here we can separate data creating and insertion into hashmap, since we know
1093             //that no row will become duplicate
1094 
1095             //create the data
1096             const char* t_ptr = t.m_data.begin();
1097             char* res_ptr = res->m_data.begin();
1098             char* res_end = res_ptr+res_data_size;
1099             for (; res_ptr!=res_end; t_ptr+=t_fact_size, res_ptr+=res_fact_size) {
1100                 transform_row(t_ptr, res_ptr, t.m_column_layout, res->m_column_layout);
1101             }
1102 
1103             //and insert them into the hash-map
1104             for (size_t i = 0; i != res_data_size; i += res_fact_size) {
1105                 TRUSTME(res->m_data.insert_offset(i));
1106             }
1107 
1108             return res;
1109         }
1110     };
1111 
mk_rename_fn(const table_base & t,unsigned permutation_cycle_len,const unsigned * permutation_cycle)1112     table_transformer_fn * sparse_table_plugin::mk_rename_fn(const table_base & t, unsigned permutation_cycle_len,
1113             const unsigned * permutation_cycle) {
1114         if (t.get_kind()!=get_kind()) {
1115             return nullptr;
1116         }
1117         return alloc(rename_fn, t.get_signature(), permutation_cycle_len, permutation_cycle);
1118     }
1119 
1120     class sparse_table_plugin::negation_filter_fn : public convenient_table_negation_filter_fn {
1121         typedef sparse_table::store_offset store_offset;
1122         typedef sparse_table::key_value key_value;
1123         typedef sparse_table::key_indexer key_indexer;
1124 
1125         bool m_joining_neg_non_functional;
1126 
1127         /**
1128            Used by \c collect_intersection_offsets function.
1129            If tgt_is_first is false, contains the same items as \c res.
1130         */
1131         idx_set m_intersection_content;
1132 
1133 
1134     public:
negation_filter_fn(const table_base & tgt,const table_base & neg,unsigned joined_col_cnt,const unsigned * t_cols,const unsigned * negated_cols)1135         negation_filter_fn(const table_base & tgt, const table_base & neg,
1136                     unsigned joined_col_cnt, const unsigned * t_cols, const unsigned * negated_cols)
1137                 : convenient_table_negation_filter_fn(tgt, neg, joined_col_cnt, t_cols, negated_cols) {
1138             unsigned neg_first_func = neg.get_signature().first_functional();
1139             counter ctr;
1140             ctr.count(m_cols2);
1141             m_joining_neg_non_functional = ctr.get_max_counter_value() == 1
1142                 && ctr.get_positive_count() == neg_first_func
1143                 && (neg_first_func == 0 || ctr.get_max_positive() == neg_first_func-1);
1144         }
1145 
1146         /**
1147            Collect offsets of rows in \c t1 or \c t2 (depends on whether \c tgt_is_first is true or false)
1148            that have a match in the other table into \c res. Offsets in \c res are in ascending order.
1149         */
collect_intersection_offsets(const sparse_table & t1,const sparse_table & t2,bool tgt_is_first,svector<store_offset> & res)1150         void collect_intersection_offsets(const sparse_table & t1, const sparse_table & t2,
1151                 bool tgt_is_first, svector<store_offset> & res) {
1152             SASSERT(res.empty());
1153 
1154             m_intersection_content.reset();
1155 
1156             unsigned joined_col_cnt = m_cols1.size();
1157             unsigned t1_entry_size = t1.m_data.entry_size();
1158 
1159             const unsigned * cols1 = tgt_is_first ? m_cols1.c_ptr() : m_cols2.c_ptr();
1160             const unsigned * cols2 = tgt_is_first ? m_cols2.c_ptr() : m_cols1.c_ptr();
1161 
1162             key_value t1_key;
1163             t1_key.resize(joined_col_cnt);
1164             key_indexer & t2_indexer = t2.get_key_indexer(joined_col_cnt, cols2);
1165 
1166             bool key_modified=true;
1167             key_indexer::query_result t2_offsets;
1168             store_offset t1_after_last = t1.m_data.after_last_offset();
1169             for (store_offset t1_ofs=0; t1_ofs<t1_after_last; t1_ofs+=t1_entry_size) {
1170 
1171                 for (unsigned i=0; i<joined_col_cnt; i++) {
1172                     table_element val = t1.get_cell(t1_ofs, cols1[i]);
1173                     if (t1_key[i]!=val) {
1174                         t1_key[i]=val;
1175                         key_modified=true;
1176                     }
1177                 }
1178                 if (key_modified) {
1179                     t2_offsets = t2_indexer.get_matching_offsets(t1_key);
1180                     key_modified = false;
1181                 }
1182 
1183                 if (t2_offsets.empty()) {
1184                     continue;
1185                 }
1186                 if (tgt_is_first) {
1187                     res.push_back(t1_ofs);
1188                 }
1189                 else {
1190                     key_indexer::offset_iterator it  = t2_offsets.begin();
1191                     key_indexer::offset_iterator end = t2_offsets.end();
1192                     for (; it!=end; ++it) {
1193                         store_offset ofs = *it;
1194                         unsigned offs2 = static_cast<unsigned>(ofs);
1195                         if (ofs != offs2) {
1196                             throw default_exception("Z3 cannot perform negation with excessively large tables");
1197                         }
1198                         if (!m_intersection_content.contains(offs2)) {
1199                             m_intersection_content.insert(offs2);
1200                             res.push_back(ofs);
1201                         }
1202                     }
1203                 }
1204             }
1205 
1206             if (!tgt_is_first) {
1207                 //in this case \c res now may be in arbitrary order
1208                 std::sort(res.begin(), res.end());
1209             }
1210         }
1211 
operator ()(table_base & tgt0,const table_base & neg0)1212         void operator()(table_base & tgt0, const table_base & neg0) override {
1213             sparse_table & tgt = get(tgt0);
1214             const sparse_table & neg = get(neg0);
1215 
1216             verbose_action  _va("filter_by_negation");
1217 
1218             if (m_cols1.empty()) {
1219                 if (!neg.empty()) {
1220                     tgt.reset();
1221                 }
1222                 return;
1223             }
1224 
1225             svector<store_offset> to_remove; //offsets here are in increasing order
1226 
1227             //We don't do just the simple tgt.row_count()>neg.row_count() because the swapped case is
1228             //more expensive. The constant 4 is, however, just my guess what the ratio might be.
1229             if (tgt.row_count()/4>neg.row_count()) {
1230                 collect_intersection_offsets(neg, tgt, false, to_remove);
1231             }
1232             else {
1233                 collect_intersection_offsets(tgt, neg, true, to_remove);
1234             }
1235 
1236 
1237             //the largest offsets are at the end, so we can remove them one by one
1238             while (!to_remove.empty()) {
1239                 store_offset removed_ofs = to_remove.back();
1240                 to_remove.pop_back();
1241                 tgt.m_data.remove_offset(removed_ofs);
1242             }
1243             tgt.reset_indexes();
1244         }
1245 
1246     };
1247 
mk_filter_by_negation_fn(const table_base & t,const table_base & negated_obj,unsigned joined_col_cnt,const unsigned * t_cols,const unsigned * negated_cols)1248     table_intersection_filter_fn * sparse_table_plugin::mk_filter_by_negation_fn(const table_base & t,
1249             const table_base & negated_obj, unsigned joined_col_cnt,
1250             const unsigned * t_cols, const unsigned * negated_cols) {
1251         if (!check_kind(t) || !check_kind(negated_obj)
1252             || join_involves_functional(t.get_signature(), negated_obj.get_signature(), joined_col_cnt,
1253                 t_cols, negated_cols) ) {
1254             return nullptr;
1255         }
1256         return alloc(negation_filter_fn, t, negated_obj, joined_col_cnt, t_cols, negated_cols);
1257     }
1258 
1259     /**
1260            T \ (S1 Join S2)
1261 
1262            t_cols    - columns from T
1263            s_cols    - columns from (S1 Join S2) that are equated
1264            src1_cols - columns from S1 equated with columns from S2
1265            src2_cols - columns from S2 equated with columns from S1
1266 
1267            t1_cols    - columns from T that map into S1
1268            s1_cols    - matching columns from s_cols for t1_cols
1269            t2s1_cols  - columns from T that map into S2, and columns from src1 that join src2
1270            s2_cols    - matching columns from t2s1_cols
1271 
1272            columns from s2 that are equal to a column from s1 that is in s_cols:
1273 
1274            - ...
1275 
1276     */
1277 
1278     class sparse_table_plugin::negated_join_fn : public table_intersection_join_filter_fn {
1279         typedef sparse_table::store_offset store_offset;
1280         typedef sparse_table::key_value key_value;
1281         typedef sparse_table::key_indexer key_indexer;
1282         unsigned_vector m_t1_cols;
1283         unsigned_vector m_s1_cols;
1284         unsigned_vector m_t2_cols;
1285         unsigned_vector m_s2_cols;
1286         unsigned_vector m_src1_cols;
1287     public:
negated_join_fn(table_base const & src1,unsigned_vector const & t_cols,unsigned_vector const & src_cols,unsigned_vector const & src1_cols,unsigned_vector const & src2_cols)1288         negated_join_fn(
1289             table_base const& src1,
1290             unsigned_vector const& t_cols,
1291             unsigned_vector const& src_cols,
1292             unsigned_vector const& src1_cols,
1293             unsigned_vector const& src2_cols):
1294             m_src1_cols(src1_cols) {
1295 
1296             // split t_cols and src_cols according to src1, and src2
1297 
1298             unsigned src1_size = src1.get_signature().size();
1299             for (unsigned i = 0; i < t_cols.size(); ++i) {
1300                 if (src_cols[i] < src1_size) {
1301                     m_t1_cols.push_back(t_cols[i]);
1302                     m_s1_cols.push_back(src_cols[i]);
1303                 }
1304                 else {
1305                     m_t2_cols.push_back(t_cols[i]);
1306                     m_s2_cols.push_back(src_cols[i]);
1307                 }
1308             }
1309             m_s2_cols.append(src2_cols);
1310         }
1311 
operator ()(table_base & _t,const table_base & _s1,const table_base & _s2)1312         void operator()(table_base & _t, const table_base & _s1, const table_base& _s2) override {
1313 
1314             verbose_action  _va("negated_join");
1315             sparse_table& t = get(_t);
1316             svector<store_offset> to_remove;
1317             collect_to_remove(t, get(_s1), get(_s2), to_remove);
1318             for (unsigned i = 0; i < to_remove.size(); ++i) {
1319                 t.m_data.remove_offset(to_remove[i]);
1320             }
1321             t.reset_indexes();
1322         }
1323 
1324     private:
collect_to_remove(sparse_table & t,sparse_table const & s1,sparse_table const & s2,svector<store_offset> & to_remove)1325         void collect_to_remove(sparse_table& t, sparse_table const& s1, sparse_table const& s2, svector<store_offset>& to_remove) {
1326             key_value s1_key, s2_key;
1327             SASSERT(&s1 != &s2);
1328             SASSERT(m_s1_cols.size() == m_t1_cols.size());
1329             SASSERT(m_s2_cols.size() == m_t2_cols.size() + m_src1_cols.size());
1330             s1_key.resize(m_s1_cols.size());
1331             s2_key.resize(m_s2_cols.size());
1332             key_indexer & s1_indexer = s1.get_key_indexer(m_s1_cols.size(), m_s1_cols.c_ptr());
1333             key_indexer & s2_indexer = s2.get_key_indexer(m_s2_cols.size(), m_s2_cols.c_ptr());
1334 
1335             store_offset t_after_last = t.m_data.after_last_offset();
1336             key_indexer::query_result s1_offsets, s2_offsets;
1337             unsigned t_entry_size = t.m_data.entry_size();
1338             for (store_offset t_ofs = 0; t_ofs < t_after_last; t_ofs += t_entry_size) {
1339 
1340                 if (update_key(s1_key, 0, t, t_ofs, m_t1_cols)) {
1341                     s1_offsets = s1_indexer.get_matching_offsets(s1_key);
1342                 }
1343                 key_indexer::offset_iterator it  = s1_offsets.begin();
1344                 key_indexer::offset_iterator end = s1_offsets.end();
1345                 for (; it != end; ++it) {
1346                     store_offset s1_ofs = *it;
1347                     bool upd1 = update_key(s2_key, 0, t, t_ofs, m_t2_cols);
1348                     bool upd2 = update_key(s2_key, m_t2_cols.size(), s1, s1_ofs, m_src1_cols);
1349                     if (upd1 || upd2) {
1350                         s2_offsets = s2_indexer.get_matching_offsets(s2_key);
1351                     }
1352                     if (!s2_offsets.empty()) {
1353                         to_remove.push_back(t_ofs);
1354                         break;
1355                     }
1356                 }
1357             }
1358         }
1359 
update_key(key_value & key,unsigned key_offset,sparse_table const & t,store_offset ofs,unsigned_vector const & cols)1360         inline bool update_key(key_value& key, unsigned key_offset, sparse_table const& t, store_offset ofs, unsigned_vector const& cols) {
1361             bool modified = false;
1362             unsigned sz = cols.size();
1363             for (unsigned i = 0; i < sz; ++i) {
1364                 table_element val = t.get_cell(ofs, cols[i]);
1365                 modified = update_key(key[i+key_offset], val) || modified;
1366             }
1367             return modified;
1368         }
1369 
update_key(table_element & tgt,table_element src)1370         inline bool update_key(table_element& tgt, table_element src) {
1371             if (tgt == src) {
1372                 return false;
1373             }
1374             else {
1375                 tgt = src;
1376                 return true;
1377             }
1378         }
1379 
1380     };
1381 
mk_filter_by_negated_join_fn(const table_base & t,const table_base & src1,const table_base & src2,unsigned_vector const & t_cols,unsigned_vector const & src_cols,unsigned_vector const & src1_cols,unsigned_vector const & src2_cols)1382     table_intersection_join_filter_fn* sparse_table_plugin::mk_filter_by_negated_join_fn(
1383         const table_base & t,
1384 
1385 
1386         const table_base & src1,
1387         const table_base & src2,
1388         unsigned_vector const& t_cols,
1389         unsigned_vector const& src_cols,
1390         unsigned_vector const& src1_cols,
1391         unsigned_vector const& src2_cols) {
1392         if (check_kind(t) && check_kind(src1) && check_kind(src2)) {
1393             return alloc(negated_join_fn, src1, t_cols, src_cols, src1_cols, src2_cols);
1394         }
1395         else {
1396             return nullptr;
1397         }
1398     }
1399 
1400 
get_size_estimate_bytes() const1401     unsigned sparse_table::get_size_estimate_bytes() const {
1402         unsigned sz = 0;
1403         sz += m_data.get_size_estimate_bytes();
1404         sz += m_key_indexes.capacity()*8; // TBD
1405         return sz;
1406     }
1407 
1408 
1409 };
1410 
1411