1 /*************************************************************************/
2 /* Copyright (c) 2004                                                    */
3 /* Daniel Sleator, David Temperley, and John Lafferty                    */
4 /* Copyright 2019 Amir Plivatsky                                         */
5 /* All rights reserved                                                   */
6 /*                                                                       */
7 /* Use of the link grammar parsing system is subject to the terms of the */
8 /* license set forth in the LICENSE file included with this software.    */
9 /* This license allows free redistribution and use in source and binary  */
10 /* forms, with or without modification, subject to certain conditions.   */
11 /*                                                                       */
12 /*************************************************************************/
13 
14 #ifdef DEBUG
15 #include <inttypes.h>                   // format macros
16 #endif
17 
18 #include "const-prime.h"
19 #include "connectors.h"
20 #include "tracon-set.h"
21 #include "utilities.h"
22 
23 /**
24  * This is an adaptation of the string_set module for detecting unique
25  * connector trailing sequences (aka tracons).
26  *
27  * It is used to generate tracon encoding for the parsing and for the
28  * pruning steps. Not like string_set, the actual hash values here
29  * are external.
30  *
31  * A tracon is identified by (the address of) its first connector.
32  *
33  * The API here is similar to that of string_set, with these differences:
34  *
35  * 1. tracon_set_add() returns a pointer to the hash slot of the tracon if
36  * it finds it. Else it returns a pointer to a NULL hash slot, and
37  * the caller is expected to assign to it the tracon (its address
38  * after it is copied to the destination buffer).
39  *
40  * 2. A new API tracon_set_shallow() is used to require that tracons
41  * which start with a shallow connector will not be considered the same
42  * as ones that start with a deep connector. The power pruning algo
43  * depends on that.
44  *
45  * 3. A new API tracon_set_reset() is used to clear the hash table slots
46  * (only, their value remains intact).
47  */
48 
hash_connectors(const Connector * c,unsigned int shallow)49 static unsigned int hash_connectors(const Connector *c, unsigned int shallow)
50 {
51 	unsigned int accum = shallow && c->shallow;
52 
53 	for (; c != NULL; c = c->next)
54 	{
55 		accum = (19 * accum) +
56 		((c->desc->uc_num)<<18) +
57 		(((unsigned int)c->multi)<<31) +
58 		(unsigned int)c->desc->lc_letters;
59 	}
60 
61 	return accum;
62 }
63 
find_prime_for(size_t count)64 static unsigned int find_prime_for(size_t count)
65 {
66 	size_t i;
67 	for (i = 0; i < MAX_S_PRIMES; i ++)
68 	   if (count < MAX_TRACON_SET_TABLE_SIZE(s_prime[i])) return i;
69 
70 	assert(0, "find_prime_for(%zu): Absurdly big count", count);
71 	return 0;
72 }
73 
tracon_set_reset(Tracon_set * ss)74 void tracon_set_reset(Tracon_set *ss)
75 {
76 	size_t ncount = MAX(ss->count, ss->ocount);
77 
78 	/* Table sizing heuristic: The number of tracons as a function of
79 	 * word number is usually first increasing and then decreasing.
80 	 * Continue the trend of the last 2 words. */
81 	if (ss->count > ss->ocount)
82 		ncount = ncount * 3 / 4;
83 	else
84 		ncount = ncount * 4 / 3;
85 	unsigned int prime_idx = find_prime_for(ncount);
86 	if (prime_idx < ss->prime_idx) ss->prime_idx = prime_idx;
87 
88 	ss->size = s_prime[ss->prime_idx];
89 	ss->mod_func = prime_mod_func[ss->prime_idx];
90 	memset(ss->table, 0, ss->size * sizeof(clist_slot));
91 	ss->ocount = ss->count;
92 	ss->count = 0;
93 	ss->available_count = MAX_TRACON_SET_TABLE_SIZE(ss->size);
94 }
95 
tracon_set_create(void)96 Tracon_set *tracon_set_create(void)
97 {
98 	Tracon_set *ss = (Tracon_set *) malloc(sizeof(Tracon_set));
99 
100 	ss->prime_idx = 0;
101 	ss->size = s_prime[ss->prime_idx];
102 	ss->mod_func = prime_mod_func[ss->prime_idx];
103 	ss->table = (clist_slot *) malloc(ss->size * sizeof(clist_slot));
104 	memset(ss->table, 0, ss->size * sizeof(clist_slot));
105 	ss->count = ss->ocount = 0;
106 	ss->shallow = false;
107 	ss->available_count = MAX_TRACON_SET_TABLE_SIZE(ss->size);
108 
109 	return ss;
110 }
111 
112 /**
113  * The connectors must be exactly equal.
114  */
connector_equal(const Connector * c1,const Connector * c2)115 static bool connector_equal(const Connector *c1, const Connector *c2)
116 {
117 	return (c1->desc == c2->desc) && (c1->multi == c2->multi);
118 }
119 
120 /** Return TRUE iff the tracon is exactly the same. */
connector_list_equal(const Connector * c1,const Connector * c2)121 static bool connector_list_equal(const Connector *c1, const Connector *c2)
122 {
123 	while ((c1 != NULL) && (c2 != NULL)) {
124 		if (!connector_equal(c1, c2)) return false;
125 		c1 = c1->next;
126 		c2 = c2->next;
127 	}
128 	return (c1 == NULL) && (c2 == NULL);
129 }
130 
131 #if defined DEBUG || defined TRACON_SET_DEBUG
132 uint64_t fp_count;
133 uint64_t coll_count;
prt_stat(void)134 static void prt_stat(void)
135 {
136 	lgdebug(+5, "%"PRIu64" accesses, chain %.4f\n",
137 	        fp_count, 1.*(fp_count+coll_count)/fp_count);
138 }
139 #define PRT_STAT(...) __VA_ARGS__
140 #else
141 #define PRT_STAT(...)
142 #endif
143 
place_found(const Connector * c,const clist_slot * slot,unsigned int hash,Tracon_set * ss)144 static bool place_found(const Connector *c, const clist_slot *slot,
145                         unsigned int hash, Tracon_set *ss)
146 {
147 	if (slot->clist == NULL) return true;
148 	if (hash != slot->hash) return false;
149 	if (!connector_list_equal(slot->clist, c)) return false;
150 	if (ss->shallow && (slot->clist->shallow != c->shallow)) return false;
151 	return connector_list_equal(slot->clist, c);
152 }
153 
154 /**
155  * lookup the given string in the table.  Return an index
156  * to the place it is, or the place where it should be.
157  */
find_place(const Connector * c,unsigned int h,Tracon_set * ss)158 static unsigned int find_place(const Connector *c, unsigned int h,
159                                Tracon_set *ss)
160 {
161 	PRT_STAT(if (fp_count == 0) atexit(prt_stat); fp_count++;)
162 	unsigned int coll_num = 0;
163 	unsigned int key = ss->mod_func(h);
164 
165 	/* Quadratic probing. */
166 	while (!place_found(c, &ss->table[key], h, ss))
167 	{
168 		PRT_STAT(coll_count++;)
169 		key += 2 * ++coll_num - 1;
170 		if (key >= ss->size) key = ss->mod_func(key);
171 	}
172 
173 	return key;
174 }
175 
grow_table(Tracon_set * ss)176 static void grow_table(Tracon_set *ss)
177 {
178 	Tracon_set old = *ss;
179 
180 	PRT_STAT(uint64_t fp_count_save = fp_count;)
181 	ss->prime_idx++;
182 	ss->size = s_prime[ss->prime_idx];
183 	ss->mod_func = prime_mod_func[ss->prime_idx];
184 	ss->table = (clist_slot *)malloc(ss->size * sizeof(clist_slot));
185 	memset(ss->table, 0, ss->size*sizeof(clist_slot));
186 	for (size_t i = 0; i < old.size; i++)
187 	{
188 		if (old.table[i].clist != NULL)
189 		{
190 			unsigned int p = find_place(old.table[i].clist, old.table[i].hash, ss);
191 			ss->table[p] = old.table[i];
192 		}
193 	}
194 	ss->available_count = MAX_STRING_SET_TABLE_SIZE(ss->size);
195 
196 	/* printf("growing from %zu to %zu\n", old.size, ss->size); */
197 	PRT_STAT(fp_count = fp_count_save);
198 	free(old.table);
199 }
200 
tracon_set_shallow(bool shallow,Tracon_set * ss)201 void tracon_set_shallow(bool shallow, Tracon_set *ss)
202 {
203 	ss->shallow = shallow;
204 }
205 
tracon_set_add(Connector * clist,Tracon_set * ss)206 Connector **tracon_set_add(Connector *clist, Tracon_set *ss)
207 {
208 	assert(clist != NULL, "Connector-ID: Can't insert a null list");
209 
210 	/* We may need to add it to the table. If the table got too big,
211 	 * first we grow it. */
212 	if (ss->available_count == 0) grow_table(ss);
213 
214 	unsigned int h = hash_connectors(clist, ss->shallow);
215 	unsigned int p = find_place(clist, h, ss);
216 
217 	if (ss->table[p].clist != NULL)
218 		return &ss->table[p].clist;
219 
220 	ss->table[p].hash = h;
221 	ss->count++;
222 	ss->available_count--;
223 
224 	return &ss->table[p].clist;
225 }
226 
tracon_set_lookup(const Connector * clist,Tracon_set * ss)227 Connector *tracon_set_lookup(const Connector *clist, Tracon_set *ss)
228 {
229 	unsigned int h = hash_connectors(clist, ss->shallow);
230 	unsigned int p = find_place(clist, h, ss);
231 	return ss->table[p].clist;
232 }
233 
tracon_set_delete(Tracon_set * ss)234 void tracon_set_delete(Tracon_set *ss)
235 {
236 	if (ss == NULL) return;
237 	free(ss->table);
238 	free(ss);
239 }
240