1 #include "system.h"
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <pthread.h>
5 #include <rpm/rpmstring.h>
6 #include <rpm/rpmstrpool.h>
7 #include "debug.h"
8 
9 #define STRDATA_CHUNKS 1024
10 #define STRDATA_CHUNK 65536
11 #define STROFFS_CHUNK 2048
12 /* XXX this is ridiculously small... */
13 #define STRHASH_INITSIZE 1024
14 
15 static int pool_debug = 0;
16 
17 typedef struct poolHash_s * poolHash;
18 typedef struct poolHashBucket_s poolHashBucket;
19 
20 struct poolHashBucket_s {
21     rpmsid keyid;
22 };
23 
24 struct poolHash_s {
25     int numBuckets;
26     poolHashBucket * buckets;
27     int keyCount;
28 };
29 
30 struct rpmstrPool_s {
31     const char ** offs;		/* pointers into data area */
32     rpmsid offs_size;		/* largest offset index */;
33     rpmsid offs_alloced;	/* offsets allocation size */
34 
35     char ** chunks;		/* memory chunks for storing the strings */
36     size_t chunks_size;		/* current chunk */
37     size_t chunks_allocated;	/* allocated size of the chunks array */
38     size_t chunk_allocated;	/* size of the current chunk */
39     size_t chunk_used;		/* usage of the current chunk */
40 
41     poolHash hash;		/* string -> sid hash table */
42     int frozen;			/* are new id additions allowed? */
43     int nrefs;			/* refcount */
44     pthread_rwlock_t lock;
45 };
46 
47 static inline const char *id2str(rpmstrPool pool, rpmsid sid);
48 
poolLock(rpmstrPool pool,int write)49 static inline void poolLock(rpmstrPool pool, int write)
50 {
51     if (write)
52 	pthread_rwlock_wrlock(&pool->lock);
53     else
54 	pthread_rwlock_rdlock(&pool->lock);
55 }
56 
poolUnlock(rpmstrPool pool)57 static inline void poolUnlock(rpmstrPool pool)
58 {
59     pthread_rwlock_unlock(&pool->lock);
60 }
61 
62 /* calculate hash and string length on at once */
rstrlenhash(const char * str,size_t * len)63 static inline unsigned int rstrlenhash(const char * str, size_t * len)
64 {
65     /* Jenkins One-at-a-time hash */
66     unsigned int hash = 0xe4721b68;
67     const char * s = str;
68 
69     while (*s != '\0') {
70       hash += *s;
71       hash += (hash << 10);
72       hash ^= (hash >> 6);
73       s++;
74     }
75     hash += (hash << 3);
76     hash ^= (hash >> 11);
77     hash += (hash << 15);
78 
79     if (len)
80 	*len = (s - str);
81 
82     return hash;
83 }
84 
rstrnlenhash(const char * str,size_t n,size_t * len)85 static inline unsigned int rstrnlenhash(const char * str, size_t n, size_t * len)
86 {
87     /* Jenkins One-at-a-time hash */
88     unsigned int hash = 0xe4721b68;
89     const char * s = str;
90 
91     while (n > 0 && *s != '\0') {
92       hash += *s;
93       hash += (hash << 10);
94       hash ^= (hash >> 6);
95       s++;
96       n--;
97     }
98     hash += (hash << 3);
99     hash ^= (hash >> 11);
100     hash += (hash << 15);
101 
102     if (len)
103 	*len = (s - str);
104 
105     return hash;
106 }
107 
rstrnhash(const char * string,size_t n)108 static inline unsigned int rstrnhash(const char * string, size_t n)
109 {
110     return rstrnlenhash(string, n, NULL);
111 }
112 
rstrhash(const char * string)113 unsigned int rstrhash(const char * string)
114 {
115     return rstrlenhash(string, NULL);
116 }
117 
hashbucket(unsigned int hash,unsigned int number)118 static inline unsigned int hashbucket(unsigned int hash, unsigned int number)
119 {
120     return hash + number*number;
121 }
122 
poolHashCreate(int numBuckets)123 static poolHash poolHashCreate(int numBuckets)
124 {
125     poolHash ht;
126 
127     ht = xmalloc(sizeof(*ht));
128     ht->numBuckets = numBuckets;
129     ht->buckets = xcalloc(numBuckets, sizeof(*ht->buckets));
130     ht->keyCount = 0;
131     return ht;
132 }
133 
poolHashResize(rpmstrPool pool,int numBuckets)134 static void poolHashResize(rpmstrPool pool, int numBuckets)
135 {
136     poolHash ht = pool->hash;
137     poolHashBucket * buckets = xcalloc(numBuckets, sizeof(*ht->buckets));
138 
139     for (int i=0; i<ht->numBuckets; i++) {
140         if (!ht->buckets[i].keyid) continue;
141         unsigned int keyHash = rstrhash(id2str(pool, ht->buckets[i].keyid));
142         for (unsigned int j=0;;j++) {
143             unsigned int hash = hashbucket(keyHash, j) % numBuckets;
144             if (!buckets[hash].keyid) {
145                 buckets[hash].keyid = ht->buckets[i].keyid;
146                 break;
147             }
148         }
149     }
150     free((void *)(ht->buckets));
151     ht->buckets = buckets;
152     ht->numBuckets = numBuckets;
153 }
154 
poolHashAddHEntry(rpmstrPool pool,const char * key,unsigned int keyHash,rpmsid keyid)155 static void poolHashAddHEntry(rpmstrPool pool, const char * key, unsigned int keyHash, rpmsid keyid)
156 {
157     poolHash ht = pool->hash;
158 
159     /* keep load factor between 0.25 and 0.5 */
160     if (2*(ht->keyCount) > ht->numBuckets) {
161         poolHashResize(pool, ht->numBuckets * 2);
162     }
163 
164     for (unsigned int i=0;;i++) {
165         unsigned int hash = hashbucket(keyHash, i) % ht->numBuckets;
166         if (!ht->buckets[hash].keyid) {
167             ht->buckets[hash].keyid = keyid;
168             ht->keyCount++;
169             break;
170         } else if (!strcmp(id2str(pool, ht->buckets[hash].keyid), key)) {
171             return;
172         }
173     }
174 }
175 
poolHashAddEntry(rpmstrPool pool,const char * key,rpmsid keyid)176 static void poolHashAddEntry(rpmstrPool pool, const char * key, rpmsid keyid)
177 {
178     poolHashAddHEntry(pool, key, rstrhash(key), keyid);
179 }
180 
poolHashEmpty(poolHash ht)181 static void poolHashEmpty( poolHash ht)
182 {
183     unsigned int i;
184 
185     if (ht->keyCount == 0) return;
186 
187     for (i = 0; i < ht->numBuckets; i++) {
188 	ht->buckets[i].keyid = 0;
189     }
190     ht->keyCount = 0;
191 }
192 
poolHashFree(poolHash ht)193 static poolHash poolHashFree(poolHash ht)
194 {
195     if (ht==NULL)
196         return ht;
197     poolHashEmpty(ht);
198     ht->buckets = _free(ht->buckets);
199     ht = _free(ht);
200 
201     return NULL;
202 }
203 
poolHashPrintStats(rpmstrPool pool)204 static void poolHashPrintStats(rpmstrPool pool)
205 {
206     poolHash ht = pool->hash;
207     int i;
208     int collisions = 0;
209     int maxcollisions = 0;
210 
211     for (i=0; i<ht->numBuckets; i++) {
212         unsigned int keyHash = rstrhash(id2str(pool, ht->buckets[i].keyid));
213         for (unsigned int j=0;;j++) {
214             unsigned int hash = hashbucket(keyHash, i) % ht->numBuckets;
215             if (hash==i) {
216                 collisions += j;
217                 maxcollisions = maxcollisions > j ? maxcollisions : j;
218                 break;
219             }
220         }
221     }
222     fprintf(stderr, "Hashsize: %i\n", ht->numBuckets);
223     fprintf(stderr, "Keys: %i\n", ht->keyCount);
224     fprintf(stderr, "Collisions: %i\n", collisions);
225     fprintf(stderr, "Max collisions: %i\n", maxcollisions);
226 }
227 
rpmstrPoolRehash(rpmstrPool pool)228 static void rpmstrPoolRehash(rpmstrPool pool)
229 {
230     int sizehint;
231 
232     if (pool->offs_size < STRHASH_INITSIZE)
233 	sizehint = STRHASH_INITSIZE;
234     else
235 	sizehint = pool->offs_size * 2;
236 
237     if (pool->hash)
238 	pool->hash = poolHashFree(pool->hash);
239 
240     pool->hash = poolHashCreate(sizehint);
241     for (int i = 1; i <= pool->offs_size; i++)
242 	poolHashAddEntry(pool, id2str(pool, i), i);
243 }
244 
rpmstrPoolCreate(void)245 rpmstrPool rpmstrPoolCreate(void)
246 {
247     rpmstrPool pool = xcalloc(1, sizeof(*pool));
248 
249     pool->offs_alloced = STROFFS_CHUNK;
250     pool->offs = xcalloc(pool->offs_alloced, sizeof(*pool->offs));
251 
252     pool->chunks_allocated = STRDATA_CHUNKS;
253     pool->chunks = xcalloc(pool->chunks_allocated, sizeof(*pool->chunks));
254     pool->chunks_size = 1;
255     pool->chunk_allocated = STRDATA_CHUNK;
256     pool->chunks[pool->chunks_size] = xcalloc(1, pool->chunk_allocated);
257     pool->offs[1] = pool->chunks[pool->chunks_size];
258 
259     rpmstrPoolRehash(pool);
260     pool->nrefs = 1;
261     pthread_rwlock_init(&pool->lock, NULL);
262     return pool;
263 }
264 
rpmstrPoolFree(rpmstrPool pool)265 rpmstrPool rpmstrPoolFree(rpmstrPool pool)
266 {
267     if (pool) {
268 	poolLock(pool, 1);
269 	if (pool->nrefs > 1) {
270 	    pool->nrefs--;
271 	    poolUnlock(pool);
272 	} else {
273 	    if (pool_debug)
274 		poolHashPrintStats(pool);
275 	    poolHashFree(pool->hash);
276 	    free(pool->offs);
277 	    for (int i=1;i<=pool->chunks_size;i++) {
278 		pool->chunks[i] = _free(pool->chunks[i]);
279 	    }
280 	    free(pool->chunks);
281 	    poolUnlock(pool);
282 	    pthread_rwlock_destroy(&pool->lock);
283 	    free(pool);
284 	}
285     }
286     return NULL;
287 }
288 
rpmstrPoolLink(rpmstrPool pool)289 rpmstrPool rpmstrPoolLink(rpmstrPool pool)
290 {
291     if (pool) {
292 	poolLock(pool, 1);
293 	pool->nrefs++;
294 	poolUnlock(pool);
295     }
296     return pool;
297 }
298 
rpmstrPoolFreeze(rpmstrPool pool,int keephash)299 void rpmstrPoolFreeze(rpmstrPool pool, int keephash)
300 {
301     if (!pool)
302 	return;
303 
304     poolLock(pool, 1);
305     if (!pool->frozen) {
306 	if (!keephash) {
307 	    pool->hash = poolHashFree(pool->hash);
308 	}
309 	pool->offs_alloced = pool->offs_size + 2; /* space for end marker */
310 	pool->offs = xrealloc(pool->offs,
311 			      pool->offs_alloced * sizeof(*pool->offs));
312 	pool->frozen = 1;
313     }
314     poolUnlock(pool);
315 }
316 
rpmstrPoolUnfreeze(rpmstrPool pool)317 void rpmstrPoolUnfreeze(rpmstrPool pool)
318 {
319     if (pool) {
320 	poolLock(pool, 1);
321 	if (pool->hash == NULL) {
322 	    rpmstrPoolRehash(pool);
323 	}
324 	pool->frozen = 0;
325 	poolUnlock(pool);
326     }
327 }
328 
rpmstrPoolPut(rpmstrPool pool,const char * s,size_t slen,unsigned int hash)329 static rpmsid rpmstrPoolPut(rpmstrPool pool, const char *s, size_t slen, unsigned int hash)
330 {
331     char *t = NULL;
332     size_t ssize = slen + 1;
333 
334     pool->offs_size += 1;
335     if (pool->offs_alloced <= pool->offs_size) {
336 	pool->offs_alloced += STROFFS_CHUNK;
337 	pool->offs = xrealloc(pool->offs,
338 			      pool->offs_alloced * sizeof(*pool->offs));
339     }
340 
341     /* Do we need a new chunk to store the string? */
342     if (ssize > pool->chunk_allocated - pool->chunk_used) {
343 	pool->chunks_size += 1;
344 	/* Grow chunks array if needed */
345 	if (pool->chunks_size >= pool->chunks_allocated) {
346 	    pool->chunks_allocated += pool->chunks_allocated;
347 	    pool->chunks = xrealloc(pool->chunks,
348 				pool->chunks_allocated * sizeof(*pool->chunks));
349 	}
350 
351 	/* Ensure the string fits in the new chunk we're about to allocate */
352 	if (ssize > pool->chunk_allocated) {
353 	    pool->chunk_allocated = 2 * ssize;
354 	}
355 
356 	pool->chunks[pool->chunks_size] = xcalloc(1, pool->chunk_allocated);
357 	pool->chunk_used = 0;
358     }
359 
360     /* Copy the string into current chunk, ensure termination */
361     t = memcpy(pool->chunks[pool->chunks_size] + pool->chunk_used, s, slen);
362     t[slen] = '\0';
363     pool->chunk_used += ssize;
364 
365     /* Actually add the string to the pool */
366     pool->offs[pool->offs_size] = t;
367     poolHashAddHEntry(pool, t, hash, pool->offs_size);
368 
369     return pool->offs_size;
370 }
371 
rpmstrPoolGet(rpmstrPool pool,const char * key,size_t keylen,unsigned int keyHash)372 static rpmsid rpmstrPoolGet(rpmstrPool pool, const char * key, size_t keylen,
373 			    unsigned int keyHash)
374 {
375     poolHash ht = pool->hash;
376     const char * s;
377 
378 
379     for (unsigned int i=0;; i++) {
380         unsigned int hash = hashbucket(keyHash, i) % ht->numBuckets;
381 
382         if (!ht->buckets[hash].keyid) {
383             return 0;
384         }
385 
386 	s = id2str(pool, ht->buckets[hash].keyid);
387 	/* pool string could be longer than keylen, require exact matche */
388 	if (strncmp(s, key, keylen) == 0 && s[keylen] == '\0')
389 	    return ht->buckets[hash].keyid;
390     }
391 }
392 
strn2id(rpmstrPool pool,const char * s,size_t slen,unsigned int hash,int create)393 static inline rpmsid strn2id(rpmstrPool pool, const char *s, size_t slen,
394 			     unsigned int hash, int create)
395 {
396     rpmsid sid = 0;
397 
398     if (pool->hash) {
399 	sid = rpmstrPoolGet(pool, s, slen, hash);
400 	if (sid == 0 && create && !pool->frozen)
401 	    sid = rpmstrPoolPut(pool, s, slen, hash);
402     }
403     return sid;
404 }
405 
id2str(rpmstrPool pool,rpmsid sid)406 static inline const char *id2str(rpmstrPool pool, rpmsid sid)
407 {
408     const char *s = NULL;
409     if (sid > 0 && sid <= pool->offs_size)
410 	s = pool->offs[sid];
411     return s;
412 }
413 
rpmstrPoolIdn(rpmstrPool pool,const char * s,size_t slen,int create)414 rpmsid rpmstrPoolIdn(rpmstrPool pool, const char *s, size_t slen, int create)
415 {
416     rpmsid sid = 0;
417 
418     if (pool && s) {
419 	unsigned int hash = rstrnhash(s, slen);
420 	poolLock(pool, create);
421 	sid = strn2id(pool, s, slen, hash, create);
422 	poolUnlock(pool);
423     }
424     return sid;
425 }
426 
rpmstrPoolId(rpmstrPool pool,const char * s,int create)427 rpmsid rpmstrPoolId(rpmstrPool pool, const char *s, int create)
428 {
429     rpmsid sid = 0;
430 
431     if (pool && s) {
432 	size_t slen;
433 	unsigned int hash = rstrlenhash(s, &slen);
434 	poolLock(pool, create);
435 	sid = strn2id(pool, s, slen, hash, create);
436 	poolUnlock(pool);
437     }
438     return sid;
439 }
440 
rpmstrPoolStr(rpmstrPool pool,rpmsid sid)441 const char * rpmstrPoolStr(rpmstrPool pool, rpmsid sid)
442 {
443     const char *s = NULL;
444     if (pool) {
445 	poolLock(pool, 0);
446 	s = id2str(pool, sid);
447 	poolUnlock(pool);
448     }
449     return s;
450 }
451 
rpmstrPoolStrlen(rpmstrPool pool,rpmsid sid)452 size_t rpmstrPoolStrlen(rpmstrPool pool, rpmsid sid)
453 {
454     size_t slen = 0;
455     if (pool) {
456 	poolLock(pool, 0);
457 	const char *s = id2str(pool, sid);
458 	if (s)
459 	    slen = strlen(s);
460 	poolUnlock(pool);
461     }
462     return slen;
463 }
464 
rpmstrPoolStreq(rpmstrPool poolA,rpmsid sidA,rpmstrPool poolB,rpmsid sidB)465 int rpmstrPoolStreq(rpmstrPool poolA, rpmsid sidA,
466 		    rpmstrPool poolB, rpmsid sidB)
467 {
468     int eq;
469     if (poolA == poolB)
470 	 eq = (sidA == sidB);
471     else {
472 	poolLock(poolA, 0);
473 	poolLock(poolB, 0);
474 	const char *a = rpmstrPoolStr(poolA, sidA);
475 	const char *b = rpmstrPoolStr(poolB, sidB);
476 	eq = rstreq(a, b);
477 	poolUnlock(poolA);
478 	poolUnlock(poolB);
479     }
480     return eq;
481 }
482 
rpmstrPoolNumStr(rpmstrPool pool)483 rpmsid rpmstrPoolNumStr(rpmstrPool pool)
484 {
485     rpmsid n = 0;
486     if (pool) {
487 	poolLock(pool, 0);
488 	n = pool->offs_size;
489 	poolUnlock(pool);
490     }
491     return n;
492 }
493