1 /*
2 ** Copyright (c) 2007 D. Richard Hipp
3 **
4 ** This program is free software; you can redistribute it and/or
5 ** modify it under the terms of the Simplified BSD License (also
6 ** known as the "2-Clause License" or "FreeBSD License".)
7 
8 ** This program is distributed in the hope that it will be useful,
9 ** but without any warranty; without even the implied warranty of
10 ** merchantability or fitness for a particular purpose.
11 **
12 ** Author contact information:
13 **   drh@hwaci.com
14 **   http://www.hwaci.com/drh/
15 **
16 *******************************************************************************
17 **
18 ** This file contains code used to implement a "bag" of integers.
19 ** A bag is an unordered collection without duplicates.  In this
20 ** implementation, all elements must be positive integers.
21 */
22 #include "config.h"
23 #include "bag.h"
24 #include <assert.h>
25 
26 
27 #if INTERFACE
28 /*
29 ** An integer can appear in the bag at most once.
30 ** Integers must be positive.
31 **
32 ** On a hash collision, search continues to the next slot in the array,
33 ** looping back to the beginning of the array when we reach the end.
34 ** The search stops when a match is found or upon encountering a 0 entry.
35 **
36 ** When an entry is deleted, its value is changed to -1.
37 **
38 ** Bag.cnt is the number of live entries in the table.  Bag.used is
39 ** the number of live entries plus the number of deleted entries.  So
40 ** Bag.used>=Bag.cnt.  We want to keep Bag.used-Bag.cnt as small as
41 ** possible.
42 **
43 ** The length of a search increases as the hash table fills up.  So
44 ** the table is enlarged whenever Bag.used reaches half of Bag.sz.  That
45 ** way, the expected collision length never exceeds 2.
46 */
47 struct Bag {
48   int cnt;   /* Number of integers in the bag */
49   int sz;    /* Number of slots in a[] */
50   int used;  /* Number of used slots in a[] */
51   int *a;    /* Hash table of integers that are in the bag */
52 };
53 /*
54 ** An expression for statically initializing a Bag instance, to be
55 ** assigned to Bag instances at their declaration point. It has
56 ** the same effect as passing the Bag to bag_init().
57 */
58 #define Bag_INIT {0,0,0,0}
59 #endif
60 
61 /*
62 ** Initialize a Bag structure
63 */
bag_init(Bag * p)64 void bag_init(Bag *p){
65   memset(p, 0, sizeof(*p));
66 }
67 
68 /*
69 ** Destroy a Bag.  Delete all of its content.
70 */
bag_clear(Bag * p)71 void bag_clear(Bag *p){
72   free(p->a);
73   bag_init(p);
74 }
75 
76 /*
77 ** The hash function
78 */
79 #define bag_hash(i)  (i*101)
80 
81 /*
82 ** Change the size of the hash table on a bag so that
83 ** it contains N slots
84 **
85 ** Completely reconstruct the hash table from scratch.  Deleted
86 ** entries (indicated by a -1) are removed.  When finished, it
87 ** should be the case that p->cnt==p->used.
88 */
bag_resize(Bag * p,int newSize)89 static void bag_resize(Bag *p, int newSize){
90   int i;
91   Bag old;
92   int nDel = 0;   /* Number of deleted entries */
93   int nLive = 0;  /* Number of live entries */
94 
95   old = *p;
96   assert( newSize>old.cnt );
97   p->a = fossil_malloc( sizeof(p->a[0])*newSize );
98   p->sz = newSize;
99   memset(p->a, 0, sizeof(p->a[0])*newSize );
100   for(i=0; i<old.sz; i++){
101     int e = old.a[i];
102     if( e>0 ){
103       unsigned h = bag_hash(e)%newSize;
104       while( p->a[h] ){
105         h++;
106         if( h==newSize ) h = 0;
107       }
108       p->a[h] = e;
109       nLive++;
110     }else if( e<0 ){
111       nDel++;
112     }
113   }
114   assert( p->cnt == nLive );
115   assert( p->used == nLive+nDel );
116   p->used = p->cnt;
117   bag_clear(&old);
118 }
119 
120 /*
121 ** Insert element e into the bag if it is not there already.
122 ** Return TRUE if the insert actually occurred.  Return FALSE
123 ** if the element was already in the bag.
124 */
bag_insert(Bag * p,int e)125 int bag_insert(Bag *p, int e){
126   unsigned h;
127   int rc = 0;
128   assert( e>0 );
129   if( p->used+1 >= p->sz/2 ){
130     int n = p->sz*2;
131     bag_resize(p,  n + 20 );
132   }
133   h = bag_hash(e)%p->sz;
134   while( p->a[h]>0 && p->a[h]!=e ){
135     h++;
136     if( h>=p->sz ) h = 0;
137   }
138   if( p->a[h]<=0 ){
139     if( p->a[h]==0 ) p->used++;
140     p->a[h] = e;
141     p->cnt++;
142     rc = 1;
143   }
144   return rc;
145 }
146 
147 /*
148 ** Return true if e in the bag.  Return false if it is no.
149 */
bag_find(Bag * p,int e)150 int bag_find(Bag *p, int e){
151   unsigned h;
152   assert( e>0 );
153   if( p->sz==0 ){
154     return 0;
155   }
156   h = bag_hash(e)%p->sz;
157   while( p->a[h] && p->a[h]!=e ){
158     h++;
159     if( h>=p->sz ) h = 0;
160   }
161   return p->a[h]==e;
162 }
163 
164 /*
165 ** Remove element e from the bag if it exists in the bag.
166 ** If e is not in the bag, this is a no-op.
167 */
bag_remove(Bag * p,int e)168 void bag_remove(Bag *p, int e){
169   unsigned h;
170   assert( e>0 );
171   if( p->sz==0 ) return;
172   h = bag_hash(e)%p->sz;
173   while( p->a[h] && p->a[h]!=e ){
174     h++;
175     if( h>=p->sz ) h = 0;
176   }
177   if( p->a[h] ){
178     int nx = h+1;
179     if( nx>=p->sz ) nx = 0;
180     if( p->a[nx]==0 ){
181       p->a[h] = 0;
182       p->used--;
183     }else{
184       p->a[h] = -1;
185     }
186     p->cnt--;
187     if( p->cnt==0 ){
188       memset(p->a, 0, p->sz*sizeof(p->a[0]));
189       p->used = 0;
190     }else if( p->sz>40 && p->cnt<p->sz/8 ){
191       bag_resize(p, p->sz/2);
192     }
193   }
194 }
195 
196 /*
197 ** Return the first element in the bag.  Return 0 if the bag
198 ** is empty.
199 */
bag_first(Bag * p)200 int bag_first(Bag *p){
201   int i;
202   for(i=0; i<p->sz && p->a[i]<=0; i++){}
203   if( i<p->sz ){
204     return p->a[i];
205   }else{
206     return 0;
207   }
208 }
209 
210 /*
211 ** Return the next element in the bag after e.  Return 0 if
212 ** is the last element in the bag.  Any insert or removal from
213 ** the bag might reorder the bag.
214 */
bag_next(Bag * p,int e)215 int bag_next(Bag *p, int e){
216   unsigned h;
217   assert( p->sz>0 );
218   assert( e>0 );
219   h = bag_hash(e)%p->sz;
220   while( p->a[h] && p->a[h]!=e ){
221     h++;
222     if( h>=p->sz ) h = 0;
223   }
224   assert( p->a[h] );
225   h++;
226   while( h<p->sz && p->a[h]<=0 ){
227     h++;
228   }
229   return h<p->sz ? p->a[h] : 0;
230 }
231 
232 /*
233 ** Return the number of elements in the bag.
234 */
bag_count(Bag * p)235 int bag_count(Bag *p){
236   return p->cnt;
237 }
238