1 /*************************************************************************/
2 /* Copyright (c) 2004 */
3 /* Daniel Sleator, David Temperley, and John Lafferty */
4 /* Copyright 2019 Amir Plivatsky */
5 /* All rights reserved */
6 /* */
7 /* Use of the link grammar parsing system is subject to the terms of the */
8 /* license set forth in the LICENSE file included with this software. */
9 /* This license allows free redistribution and use in source and binary */
10 /* forms, with or without modification, subject to certain conditions. */
11 /* */
12 /*************************************************************************/
13
14 #ifdef DEBUG
15 #include <inttypes.h> // format macros
16 #endif
17
18 #include "const-prime.h"
19 #include "connectors.h"
20 #include "tracon-set.h"
21 #include "utilities.h"
22
23 /**
24 * This is an adaptation of the string_set module for detecting unique
25 * connector trailing sequences (aka tracons).
26 *
27 * It is used to generate tracon encoding for the parsing and for the
28 * pruning steps. Not like string_set, the actual hash values here
29 * are external.
30 *
31 * A tracon is identified by (the address of) its first connector.
32 *
33 * The API here is similar to that of string_set, with these differences:
34 *
35 * 1. tracon_set_add() returns a pointer to the hash slot of the tracon if
36 * it finds it. Else it returns a pointer to a NULL hash slot, and
37 * the caller is expected to assign to it the tracon (its address
38 * after it is copied to the destination buffer).
39 *
40 * 2. A new API tracon_set_shallow() is used to require that tracons
41 * which start with a shallow connector will not be considered the same
42 * as ones that start with a deep connector. The power pruning algo
43 * depends on that.
44 *
45 * 3. A new API tracon_set_reset() is used to clear the hash table slots
46 * (only, their value remains intact).
47 */
48
hash_connectors(const Connector * c,unsigned int shallow)49 static unsigned int hash_connectors(const Connector *c, unsigned int shallow)
50 {
51 unsigned int accum = shallow && c->shallow;
52
53 for (; c != NULL; c = c->next)
54 {
55 accum = (19 * accum) +
56 ((c->desc->uc_num)<<18) +
57 (((unsigned int)c->multi)<<31) +
58 (unsigned int)c->desc->lc_letters;
59 }
60
61 return accum;
62 }
63
find_prime_for(size_t count)64 static unsigned int find_prime_for(size_t count)
65 {
66 size_t i;
67 for (i = 0; i < MAX_S_PRIMES; i ++)
68 if (count < MAX_TRACON_SET_TABLE_SIZE(s_prime[i])) return i;
69
70 assert(0, "find_prime_for(%zu): Absurdly big count", count);
71 return 0;
72 }
73
tracon_set_reset(Tracon_set * ss)74 void tracon_set_reset(Tracon_set *ss)
75 {
76 size_t ncount = MAX(ss->count, ss->ocount);
77
78 /* Table sizing heuristic: The number of tracons as a function of
79 * word number is usually first increasing and then decreasing.
80 * Continue the trend of the last 2 words. */
81 if (ss->count > ss->ocount)
82 ncount = ncount * 3 / 4;
83 else
84 ncount = ncount * 4 / 3;
85 unsigned int prime_idx = find_prime_for(ncount);
86 if (prime_idx < ss->prime_idx) ss->prime_idx = prime_idx;
87
88 ss->size = s_prime[ss->prime_idx];
89 ss->mod_func = prime_mod_func[ss->prime_idx];
90 memset(ss->table, 0, ss->size * sizeof(clist_slot));
91 ss->ocount = ss->count;
92 ss->count = 0;
93 ss->available_count = MAX_TRACON_SET_TABLE_SIZE(ss->size);
94 }
95
tracon_set_create(void)96 Tracon_set *tracon_set_create(void)
97 {
98 Tracon_set *ss = (Tracon_set *) malloc(sizeof(Tracon_set));
99
100 ss->prime_idx = 0;
101 ss->size = s_prime[ss->prime_idx];
102 ss->mod_func = prime_mod_func[ss->prime_idx];
103 ss->table = (clist_slot *) malloc(ss->size * sizeof(clist_slot));
104 memset(ss->table, 0, ss->size * sizeof(clist_slot));
105 ss->count = ss->ocount = 0;
106 ss->shallow = false;
107 ss->available_count = MAX_TRACON_SET_TABLE_SIZE(ss->size);
108
109 return ss;
110 }
111
112 /**
113 * The connectors must be exactly equal.
114 */
connector_equal(const Connector * c1,const Connector * c2)115 static bool connector_equal(const Connector *c1, const Connector *c2)
116 {
117 return (c1->desc == c2->desc) && (c1->multi == c2->multi);
118 }
119
120 /** Return TRUE iff the tracon is exactly the same. */
connector_list_equal(const Connector * c1,const Connector * c2)121 static bool connector_list_equal(const Connector *c1, const Connector *c2)
122 {
123 while ((c1 != NULL) && (c2 != NULL)) {
124 if (!connector_equal(c1, c2)) return false;
125 c1 = c1->next;
126 c2 = c2->next;
127 }
128 return (c1 == NULL) && (c2 == NULL);
129 }
130
131 #if defined DEBUG || defined TRACON_SET_DEBUG
132 uint64_t fp_count;
133 uint64_t coll_count;
prt_stat(void)134 static void prt_stat(void)
135 {
136 lgdebug(+5, "%"PRIu64" accesses, chain %.4f\n",
137 fp_count, 1.*(fp_count+coll_count)/fp_count);
138 }
139 #define PRT_STAT(...) __VA_ARGS__
140 #else
141 #define PRT_STAT(...)
142 #endif
143
place_found(const Connector * c,const clist_slot * slot,unsigned int hash,Tracon_set * ss)144 static bool place_found(const Connector *c, const clist_slot *slot,
145 unsigned int hash, Tracon_set *ss)
146 {
147 if (slot->clist == NULL) return true;
148 if (hash != slot->hash) return false;
149 if (!connector_list_equal(slot->clist, c)) return false;
150 if (ss->shallow && (slot->clist->shallow != c->shallow)) return false;
151 return connector_list_equal(slot->clist, c);
152 }
153
154 /**
155 * lookup the given string in the table. Return an index
156 * to the place it is, or the place where it should be.
157 */
find_place(const Connector * c,unsigned int h,Tracon_set * ss)158 static unsigned int find_place(const Connector *c, unsigned int h,
159 Tracon_set *ss)
160 {
161 PRT_STAT(if (fp_count == 0) atexit(prt_stat); fp_count++;)
162 unsigned int coll_num = 0;
163 unsigned int key = ss->mod_func(h);
164
165 /* Quadratic probing. */
166 while (!place_found(c, &ss->table[key], h, ss))
167 {
168 PRT_STAT(coll_count++;)
169 key += 2 * ++coll_num - 1;
170 if (key >= ss->size) key = ss->mod_func(key);
171 }
172
173 return key;
174 }
175
grow_table(Tracon_set * ss)176 static void grow_table(Tracon_set *ss)
177 {
178 Tracon_set old = *ss;
179
180 PRT_STAT(uint64_t fp_count_save = fp_count;)
181 ss->prime_idx++;
182 ss->size = s_prime[ss->prime_idx];
183 ss->mod_func = prime_mod_func[ss->prime_idx];
184 ss->table = (clist_slot *)malloc(ss->size * sizeof(clist_slot));
185 memset(ss->table, 0, ss->size*sizeof(clist_slot));
186 for (size_t i = 0; i < old.size; i++)
187 {
188 if (old.table[i].clist != NULL)
189 {
190 unsigned int p = find_place(old.table[i].clist, old.table[i].hash, ss);
191 ss->table[p] = old.table[i];
192 }
193 }
194 ss->available_count = MAX_STRING_SET_TABLE_SIZE(ss->size);
195
196 /* printf("growing from %zu to %zu\n", old.size, ss->size); */
197 PRT_STAT(fp_count = fp_count_save);
198 free(old.table);
199 }
200
tracon_set_shallow(bool shallow,Tracon_set * ss)201 void tracon_set_shallow(bool shallow, Tracon_set *ss)
202 {
203 ss->shallow = shallow;
204 }
205
tracon_set_add(Connector * clist,Tracon_set * ss)206 Connector **tracon_set_add(Connector *clist, Tracon_set *ss)
207 {
208 assert(clist != NULL, "Connector-ID: Can't insert a null list");
209
210 /* We may need to add it to the table. If the table got too big,
211 * first we grow it. */
212 if (ss->available_count == 0) grow_table(ss);
213
214 unsigned int h = hash_connectors(clist, ss->shallow);
215 unsigned int p = find_place(clist, h, ss);
216
217 if (ss->table[p].clist != NULL)
218 return &ss->table[p].clist;
219
220 ss->table[p].hash = h;
221 ss->count++;
222 ss->available_count--;
223
224 return &ss->table[p].clist;
225 }
226
tracon_set_lookup(const Connector * clist,Tracon_set * ss)227 Connector *tracon_set_lookup(const Connector *clist, Tracon_set *ss)
228 {
229 unsigned int h = hash_connectors(clist, ss->shallow);
230 unsigned int p = find_place(clist, h, ss);
231 return ss->table[p].clist;
232 }
233
tracon_set_delete(Tracon_set * ss)234 void tracon_set_delete(Tracon_set *ss)
235 {
236 if (ss == NULL) return;
237 free(ss->table);
238 free(ss);
239 }
240