1 /*
2 ** Copyright (c) 2007 D. Richard Hipp
3 **
4 ** This program is free software; you can redistribute it and/or
5 ** modify it under the terms of the Simplified BSD License (also
6 ** known as the "2-Clause License" or "FreeBSD License".)
7
8 ** This program is distributed in the hope that it will be useful,
9 ** but without any warranty; without even the implied warranty of
10 ** merchantability or fitness for a particular purpose.
11 **
12 ** Author contact information:
13 ** drh@hwaci.com
14 ** http://www.hwaci.com/drh/
15 **
16 *******************************************************************************
17 **
18 ** This file contains code used to implement a "bag" of integers.
19 ** A bag is an unordered collection without duplicates. In this
20 ** implementation, all elements must be positive integers.
21 */
22 #include "config.h"
23 #include "bag.h"
24 #include <assert.h>
25
26
27 #if INTERFACE
28 /*
29 ** An integer can appear in the bag at most once.
30 ** Integers must be positive.
31 **
32 ** On a hash collision, search continues to the next slot in the array,
33 ** looping back to the beginning of the array when we reach the end.
34 ** The search stops when a match is found or upon encountering a 0 entry.
35 **
36 ** When an entry is deleted, its value is changed to -1.
37 **
38 ** Bag.cnt is the number of live entries in the table. Bag.used is
39 ** the number of live entries plus the number of deleted entries. So
40 ** Bag.used>=Bag.cnt. We want to keep Bag.used-Bag.cnt as small as
41 ** possible.
42 **
43 ** The length of a search increases as the hash table fills up. So
44 ** the table is enlarged whenever Bag.used reaches half of Bag.sz. That
45 ** way, the expected collision length never exceeds 2.
46 */
47 struct Bag {
48 int cnt; /* Number of integers in the bag */
49 int sz; /* Number of slots in a[] */
50 int used; /* Number of used slots in a[] */
51 int *a; /* Hash table of integers that are in the bag */
52 };
53 /*
54 ** An expression for statically initializing a Bag instance, to be
55 ** assigned to Bag instances at their declaration point. It has
56 ** the same effect as passing the Bag to bag_init().
57 */
58 #define Bag_INIT {0,0,0,0}
59 #endif
60
61 /*
62 ** Initialize a Bag structure
63 */
bag_init(Bag * p)64 void bag_init(Bag *p){
65 memset(p, 0, sizeof(*p));
66 }
67
68 /*
69 ** Destroy a Bag. Delete all of its content.
70 */
bag_clear(Bag * p)71 void bag_clear(Bag *p){
72 free(p->a);
73 bag_init(p);
74 }
75
76 /*
77 ** The hash function
78 */
79 #define bag_hash(i) (i*101)
80
81 /*
82 ** Change the size of the hash table on a bag so that
83 ** it contains N slots
84 **
85 ** Completely reconstruct the hash table from scratch. Deleted
86 ** entries (indicated by a -1) are removed. When finished, it
87 ** should be the case that p->cnt==p->used.
88 */
bag_resize(Bag * p,int newSize)89 static void bag_resize(Bag *p, int newSize){
90 int i;
91 Bag old;
92 int nDel = 0; /* Number of deleted entries */
93 int nLive = 0; /* Number of live entries */
94
95 old = *p;
96 assert( newSize>old.cnt );
97 p->a = fossil_malloc( sizeof(p->a[0])*newSize );
98 p->sz = newSize;
99 memset(p->a, 0, sizeof(p->a[0])*newSize );
100 for(i=0; i<old.sz; i++){
101 int e = old.a[i];
102 if( e>0 ){
103 unsigned h = bag_hash(e)%newSize;
104 while( p->a[h] ){
105 h++;
106 if( h==newSize ) h = 0;
107 }
108 p->a[h] = e;
109 nLive++;
110 }else if( e<0 ){
111 nDel++;
112 }
113 }
114 assert( p->cnt == nLive );
115 assert( p->used == nLive+nDel );
116 p->used = p->cnt;
117 bag_clear(&old);
118 }
119
120 /*
121 ** Insert element e into the bag if it is not there already.
122 ** Return TRUE if the insert actually occurred. Return FALSE
123 ** if the element was already in the bag.
124 */
bag_insert(Bag * p,int e)125 int bag_insert(Bag *p, int e){
126 unsigned h;
127 int rc = 0;
128 assert( e>0 );
129 if( p->used+1 >= p->sz/2 ){
130 int n = p->sz*2;
131 bag_resize(p, n + 20 );
132 }
133 h = bag_hash(e)%p->sz;
134 while( p->a[h]>0 && p->a[h]!=e ){
135 h++;
136 if( h>=p->sz ) h = 0;
137 }
138 if( p->a[h]<=0 ){
139 if( p->a[h]==0 ) p->used++;
140 p->a[h] = e;
141 p->cnt++;
142 rc = 1;
143 }
144 return rc;
145 }
146
147 /*
148 ** Return true if e in the bag. Return false if it is no.
149 */
bag_find(Bag * p,int e)150 int bag_find(Bag *p, int e){
151 unsigned h;
152 assert( e>0 );
153 if( p->sz==0 ){
154 return 0;
155 }
156 h = bag_hash(e)%p->sz;
157 while( p->a[h] && p->a[h]!=e ){
158 h++;
159 if( h>=p->sz ) h = 0;
160 }
161 return p->a[h]==e;
162 }
163
164 /*
165 ** Remove element e from the bag if it exists in the bag.
166 ** If e is not in the bag, this is a no-op.
167 */
bag_remove(Bag * p,int e)168 void bag_remove(Bag *p, int e){
169 unsigned h;
170 assert( e>0 );
171 if( p->sz==0 ) return;
172 h = bag_hash(e)%p->sz;
173 while( p->a[h] && p->a[h]!=e ){
174 h++;
175 if( h>=p->sz ) h = 0;
176 }
177 if( p->a[h] ){
178 int nx = h+1;
179 if( nx>=p->sz ) nx = 0;
180 if( p->a[nx]==0 ){
181 p->a[h] = 0;
182 p->used--;
183 }else{
184 p->a[h] = -1;
185 }
186 p->cnt--;
187 if( p->cnt==0 ){
188 memset(p->a, 0, p->sz*sizeof(p->a[0]));
189 p->used = 0;
190 }else if( p->sz>40 && p->cnt<p->sz/8 ){
191 bag_resize(p, p->sz/2);
192 }
193 }
194 }
195
196 /*
197 ** Return the first element in the bag. Return 0 if the bag
198 ** is empty.
199 */
bag_first(Bag * p)200 int bag_first(Bag *p){
201 int i;
202 for(i=0; i<p->sz && p->a[i]<=0; i++){}
203 if( i<p->sz ){
204 return p->a[i];
205 }else{
206 return 0;
207 }
208 }
209
210 /*
211 ** Return the next element in the bag after e. Return 0 if
212 ** is the last element in the bag. Any insert or removal from
213 ** the bag might reorder the bag.
214 */
bag_next(Bag * p,int e)215 int bag_next(Bag *p, int e){
216 unsigned h;
217 assert( p->sz>0 );
218 assert( e>0 );
219 h = bag_hash(e)%p->sz;
220 while( p->a[h] && p->a[h]!=e ){
221 h++;
222 if( h>=p->sz ) h = 0;
223 }
224 assert( p->a[h] );
225 h++;
226 while( h<p->sz && p->a[h]<=0 ){
227 h++;
228 }
229 return h<p->sz ? p->a[h] : 0;
230 }
231
232 /*
233 ** Return the number of elements in the bag.
234 */
bag_count(Bag * p)235 int bag_count(Bag *p){
236 return p->cnt;
237 }
238