1 /*
2  * Copyright 1993, 1995 Christopher Seiwald.
3  * Copyright 2011 Steven Watanabe
4  *
5  * This file is part of Jam - see jam.c for Copyright information.
6  */
7 
8 /*
9  * object.c - object manipulation routines
10  *
11  * External functions:
12  *    object_new()       - create an object from a string
13  *    object_new_range() - create an object from a string of given length
14  *    object_copy()      - return a copy of an object
15  *    object_free()      - free an object
16  *    object_str()       - get the string value of an object
17  *    object_done()      - free string tables
18  *
19  * This implementation builds a hash table of all strings, so that multiple
20  * calls of object_new() on the same string allocate memory for the string once.
21  * Strings are never actually freed.
22  */
23 
24 #include "jam.h"
25 #include "object.h"
26 #include "output.h"
27 
28 #include <assert.h>
29 #include <stddef.h>
30 #include <stdlib.h>
31 
32 
33 #define OBJECT_MAGIC 0xa762e0e3u
34 
35 #ifndef object_copy
36 
37 struct hash_header
38 {
39 #ifndef NDEBUG
40     unsigned int magic;
41 #endif
42     unsigned int hash;
43     struct hash_item * next;
44 };
45 
46 #endif
47 
48 struct hash_item
49 {
50     struct hash_header header;
51     char data[ 1 ];
52 };
53 
54 #define ALLOC_ALIGNMENT (sizeof(struct hash_item) - sizeof(struct hash_header))
55 
56 typedef struct string_set
57 {
58     unsigned int num;
59     unsigned int size;
60     struct hash_item * * data;
61 } string_set;
62 
63 static string_set strhash;
64 static int strtotal = 0;
65 static int strcount_in = 0;
66 static int strcount_out = 0;
67 
68 
69 /*
70  * Immortal string allocator implementation speeds string allocation and cuts
71  * down on internal fragmentation.
72  */
73 
74 #define STRING_BLOCK 4096
75 typedef struct strblock
76 {
77     struct strblock * next;
78     char data[ STRING_BLOCK ];
79 } strblock;
80 
81 static strblock * strblock_chain = 0;
82 
83 /* Storage remaining in the current strblock */
84 static char * storage_start = 0;
85 static char * storage_finish = 0;
86 
87 
88 /*
89  * allocate() - Allocate n bytes of immortal string storage.
90  */
91 
allocate(size_t n)92 static char * allocate( size_t n )
93 {
94 #ifdef BJAM_NEWSTR_NO_ALLOCATE
95     return (char *)BJAM_MALLOC( n );
96 #else
97     /* See if we can grab storage from an existing block. */
98     size_t remaining = storage_finish - storage_start;
99     n = ( ( n + ALLOC_ALIGNMENT - 1 ) / ALLOC_ALIGNMENT ) * ALLOC_ALIGNMENT;
100     if ( remaining >= n )
101     {
102         char * result = storage_start;
103         storage_start += n;
104         return result;
105     }
106     else  /* Must allocate a new block. */
107     {
108         strblock * new_block;
109         size_t nalloc = n;
110         if ( nalloc < STRING_BLOCK )
111             nalloc = STRING_BLOCK;
112 
113         /* Allocate a new block and link into the chain. */
114         new_block = (strblock *)BJAM_MALLOC( offsetof( strblock, data[ 0 ] ) +
115             nalloc * sizeof( new_block->data[ 0 ] ) );
116         if ( new_block == 0 )
117             return 0;
118         new_block->next = strblock_chain;
119         strblock_chain = new_block;
120 
121         /* Take future allocations out of the larger remaining space. */
122         if ( remaining < nalloc - n )
123         {
124             storage_start = new_block->data + n;
125             storage_finish = new_block->data + nalloc;
126         }
127         return new_block->data;
128     }
129 #endif
130 }
131 
132 
hash_keyval(char const * key,int const size)133 static unsigned int hash_keyval( char const * key, int const size )
134 {
135     unsigned int const magic = 2147059363;
136     unsigned int hash = 0;
137 
138     unsigned int i;
139     for ( i = 0; i < size / sizeof( unsigned int ); ++i )
140     {
141         unsigned int val;
142         memcpy( &val, key, sizeof( unsigned int ) );
143         hash = hash * magic + val;
144         key += sizeof( unsigned int );
145     }
146 
147     {
148         unsigned int val = 0;
149         memcpy( &val, key, size % sizeof( unsigned int ) );
150         hash = hash * magic + val;
151     }
152 
153     return hash + ( hash >> 17 );
154 }
155 
156 
string_set_init(string_set * set)157 static void string_set_init( string_set * set )
158 {
159     set->size = 0;
160     set->num = 4;
161     set->data = (struct hash_item * *)BJAM_MALLOC( set->num * sizeof( struct hash_item * ) );
162     memset( set->data, 0, set->num * sizeof( struct hash_item * ) );
163 }
164 
165 
string_set_done(string_set * set)166 static void string_set_done( string_set * set )
167 {
168     BJAM_FREE( set->data );
169 }
170 
171 
string_set_resize(string_set * set)172 static void string_set_resize( string_set * set )
173 {
174     unsigned i;
175     string_set new_set;
176     new_set.num = set->num * 2;
177     new_set.size = set->size;
178     new_set.data = (struct hash_item * *)BJAM_MALLOC( sizeof( struct hash_item *
179         ) * new_set.num );
180     memset( new_set.data, 0, sizeof( struct hash_item * ) * new_set.num );
181     for ( i = 0; i < set->num; ++i )
182     {
183         while ( set->data[ i ] )
184         {
185             struct hash_item * temp = set->data[ i ];
186             unsigned pos = temp->header.hash % new_set.num;
187             set->data[ i ] = temp->header.next;
188             temp->header.next = new_set.data[ pos ];
189             new_set.data[ pos ] = temp;
190         }
191     }
192     BJAM_FREE( set->data );
193     *set = new_set;
194 }
195 
196 
string_set_insert(string_set * set,char const * string,int const size)197 static char const * string_set_insert( string_set * set, char const * string,
198     int const size )
199 {
200     unsigned hash = hash_keyval( string, size );
201     unsigned pos = hash % set->num;
202 
203     struct hash_item * result;
204 
205     for ( result = set->data[ pos ]; result; result = result->header.next )
206         if ( !strncmp( result->data, string, size ) && !result->data[ size ] )
207             return result->data;
208 
209     if ( set->size >= set->num )
210     {
211         string_set_resize( set );
212         pos = hash % set->num;
213     }
214 
215     result = (struct hash_item *)allocate( sizeof( struct hash_header ) + size +
216         1 );
217     result->header.hash = hash;
218     result->header.next = set->data[ pos ];
219 #ifndef NDEBUG
220     result->header.magic = OBJECT_MAGIC;
221 #endif
222     memcpy( result->data, string, size );
223     result->data[ size ] = '\0';
224     assert( hash_keyval( result->data, size ) == result->header.hash );
225     set->data[ pos ] = result;
226     strtotal += size + 1;
227     ++set->size;
228 
229     return result->data;
230 }
231 
232 
object_get_item(OBJECT * obj)233 static struct hash_item * object_get_item( OBJECT * obj )
234 {
235     return (struct hash_item *)( (char *)obj - offsetof( struct hash_item, data
236         ) );
237 }
238 
239 
object_validate(OBJECT * obj)240 static void object_validate( OBJECT * obj )
241 {
242     assert( obj );
243     assert( object_get_item( obj )->header.magic == OBJECT_MAGIC );
244 }
245 
246 
247 /*
248  * object_new_range() - create an object from a string of given length
249  */
250 
object_new_range(char const * const string,int const size)251 OBJECT * object_new_range( char const * const string, int const size )
252 {
253     ++strcount_in;
254 
255 #ifdef BJAM_NO_MEM_CACHE
256     {
257         struct hash_item * const m = (struct hash_item *)BJAM_MALLOC( sizeof(
258             struct hash_header ) + size + 1 );
259         strtotal += size + 1;
260         memcpy( m->data, string, size );
261         m->data[ size ] = '\0';
262 #ifndef NDEBUG
263         m->header.magic = OBJECT_MAGIC;
264 #endif
265         return (OBJECT *)m->data;
266     }
267 #else
268     if ( !strhash.data )
269         string_set_init( &strhash );
270     return (OBJECT *)string_set_insert( &strhash, string, size );
271 #endif
272 }
273 
274 
275 /*
276  * object_new() - create an object from a string
277  */
278 
object_new(char const * const string)279 OBJECT * object_new( char const * const string )
280 {
281     return object_new_range( string, strlen( string ) );
282 }
283 
284 
285 #ifndef object_copy
286 
287 /*
288  * object_copy() - return a copy of an object
289  */
290 
object_copy(OBJECT * obj)291 OBJECT * object_copy( OBJECT * obj )
292 {
293     object_validate( obj );
294 #ifdef BJAM_NO_MEM_CACHE
295     return object_new( object_str( obj ) );
296 #else
297     ++strcount_in;
298     return obj;
299 #endif
300 }
301 
302 
303 /*
304  * object_free() - free an object
305  */
306 
object_free(OBJECT * obj)307 void object_free( OBJECT * obj )
308 {
309     object_validate( obj );
310 #ifdef BJAM_NO_MEM_CACHE
311     BJAM_FREE( object_get_item( obj ) );
312 #endif
313     ++strcount_out;
314 }
315 
316 
317 /*
318  * object_str() - return the OBJECT's internal C string
319  */
320 
object_str(OBJECT * obj)321 char const * object_str( OBJECT * obj )
322 {
323     object_validate( obj );
324     return (char const *)obj;
325 }
326 
327 
328 /*
329  * object_equal() - compare two objects
330  */
331 
object_equal(OBJECT * lhs,OBJECT * rhs)332 int object_equal( OBJECT * lhs, OBJECT * rhs )
333 {
334     object_validate( lhs );
335     object_validate( rhs );
336 #ifdef BJAM_NO_MEM_CACHE
337     return !strcmp( object_str( lhs ), object_str( rhs ) );
338 #else
339     assert( ( lhs == rhs ) == !strcmp( object_str( lhs ), object_str( rhs ) ) );
340     return lhs == rhs;
341 #endif
342 }
343 
344 
345 /*
346  * object_hash() - returns the hash value of an object
347  */
348 
object_hash(OBJECT * obj)349 unsigned int object_hash( OBJECT * obj )
350 {
351     object_validate( obj );
352 #ifdef BJAM_NO_MEM_CACHE
353     return hash_keyval( object_str( obj ), strlen( object_str( obj ) ) );
354 #else
355     return object_get_item( obj )->header.hash;
356 #endif
357 }
358 
359 #endif
360 
361 /*
362  * object_done() - free string tables.
363  */
364 
object_done()365 void object_done()
366 {
367 #ifdef BJAM_NEWSTR_NO_ALLOCATE
368     unsigned i;
369     for ( i = 0; i < strhash.num; ++i )
370     {
371         while ( strhash.data[ i ] )
372         {
373             struct hash_item * item = strhash.data[ i ];
374             strhash.data[ i ] = item->header.next;
375             BJAM_FREE( item );
376         }
377     }
378 #else
379     /* Reclaim string blocks. */
380     while ( strblock_chain )
381     {
382         strblock * const n = strblock_chain->next;
383         BJAM_FREE( strblock_chain );
384         strblock_chain = n;
385     }
386 #endif
387 
388     string_set_done( &strhash );
389 
390     if ( DEBUG_MEM )
391     {
392         out_printf( "%dK in strings\n", strtotal / 1024 );
393         if ( strcount_in != strcount_out )
394             out_printf( "--- %d strings of %d dangling\n", strcount_in -
395                 strcount_out, strcount_in );
396     }
397 }
398