1 /* Copyright (C) 2011 G.P. Halkes
2 This program is free software: you can redistribute it and/or modify
3 it under the terms of the GNU General Public License version 3, as
4 published by the Free Software Foundation.
5
6 This program is distributed in the hope that it will be useful,
7 but WITHOUT ANY WARRANTY; without even the implied warranty of
8 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 GNU General Public License for more details.
10
11 You should have received a copy of the GNU General Public License
12 along with this program. If not, see <http://www.gnu.org/licenses/>.
13 */
14
15 #include <stdlib.h>
16 #include <string.h>
17 #include <stddef.h>
18 #include "definitions.h"
19 #include "hashtable.h"
20
21 #define MEMBLOCK_SIZE 32768
22 #define ALIGNOF(type) offsetof(struct { char c; type member; }, member)
23 #define ROUNDUP(x, to) (((x + (to - 1)) / to) * to)
24
25 typedef struct MemBlock {
26 struct MemBlock *next;
27 size_t size;
28 size_t idx;
29 } MemBlock;
30
31 static MemBlock *head;
32
33 #define HASHTABLE_SIZE 8091
34
35 typedef struct Tuple {
36 ValueType value;
37 size_t stringLength;
38 struct Tuple *next;
39 char string[1];
40 } Tuple;
41
42 static Tuple *hashtable[HASHTABLE_SIZE];
43 static ValueType nextValue;
44
45 ValueType baseHashMax;
46
allocFromBlock(size_t size)47 static Tuple *allocFromBlock(size_t size) {
48 Tuple *result;
49
50 size = ROUNDUP(size, ALIGNOF(Tuple));
51 if (head == NULL || size > head->size - head->idx) {
52 MemBlock *newBlock;
53 if (size > 2048) {
54 newBlock = safe_malloc(size + sizeof(MemBlock));
55 newBlock->size = size + sizeof(MemBlock);
56 } else {
57 newBlock = safe_malloc(MEMBLOCK_SIZE);
58 newBlock->size = MEMBLOCK_SIZE;
59 }
60 newBlock->idx = ROUNDUP(sizeof(MemBlock), ALIGNOF(Tuple));
61 newBlock->next = head;
62 head = newBlock;
63 }
64 result = (Tuple *)(((char *) head) + head->idx);
65 head->idx += size;
66 return result;
67 }
68
freeBlocks(void)69 static void freeBlocks(void) {
70 MemBlock *ptr;
71 while (head != NULL) {
72 ptr = head;
73 head = head->next;
74 free(ptr);
75 }
76 }
77
78 #ifdef PROFILE_HASH
79 static int collisions;
80 static int hits;
81
printHashStatistics(void)82 void printHashStatistics(void) {
83 fprintf(stderr, "Hash statistics: unique words: %d, collisions %d (%.2f%%), hits %d\n",
84 (int) nextValue, collisions, (double) collisions * 100.0 / nextValue, hits);
85 collisions = 0;
86 hits = 0;
87 }
88 #endif
89
90 /** Calculate a hash value for a string.
91 @param key The string to hash.
92 @return The hash value associated with the string.
93
94 This function uses the djb2 hash function.
95 */
hash(void * data,size_t size)96 static unsigned int hash(void *data, size_t size) {
97 const unsigned char *ptr;
98 size_t i;
99 unsigned int hashValue = 5381;
100
101 ptr = data;
102
103 for (i = 0; i < size; i++)
104 hashValue = (hashValue << 5) + hashValue + (unsigned int) *ptr++;
105
106 return hashValue;
107 }
108
109 /** Get the value associated with a word.
110 @param word The word to get the value for.
111
112 This function gets the value associated with a word. If the word has not been
113 seen before a new value will be given.
114 */
getValueFromContext(CharBuffer * word)115 ValueType getValueFromContext(CharBuffer *word) {
116 return getValue(word->data, word->used);
117 }
118
getValue(void * data,size_t size)119 ValueType getValue(void *data, size_t size) {
120 Tuple *tuple;
121 unsigned int hashValue = hash(data, size) % HASHTABLE_SIZE;
122
123 tuple = hashtable[hashValue];
124 /* Search the singly linked list. */
125 while (tuple != NULL && !(size == tuple->stringLength && memcmp(data, tuple->string, size) == 0))
126 tuple = tuple->next;
127
128 if (tuple == NULL) {
129 ASSERT(nextValue != VALUE_MAX);
130 #ifdef PROFILE_HASH
131 if (hashtable[hashValue] != NULL)
132 collisions++;
133 #endif
134 tuple = allocFromBlock(sizeof(Tuple) - 1 + size);
135
136 tuple->value = nextValue++;
137 tuple->stringLength = size;
138 memcpy(tuple->string, data, size);
139 tuple->next = hashtable[hashValue];
140 hashtable[hashValue] = tuple;
141 }
142 #ifdef PROFILE_HASH
143 else
144 hits++;
145 #endif
146 return tuple->value;
147 }
148
getHashMax(void)149 ValueType getHashMax(void) {
150 ValueType hashMax = nextValue;
151 int i;
152
153 #ifdef PROFILE_HASH
154 printHashStatistics();
155 #endif
156
157 /* Reset the hashtable for the next iteration. */
158 for (i = 0; i < HASHTABLE_SIZE; i++)
159 hashtable[i] = NULL;
160 freeBlocks();
161
162 nextValue = 0;
163
164 return hashMax;
165 }
166