1 # include "indConfig.h"
2
3 # include <stdlib.h>
4
5 # include <uniUtf8.h>
6 # include "indlocal.h"
7 # include <appDebugon.h>
8
9 /************************************************************************/
10 /* Allocate a new node. In Theory, it might be advisable to look for */
11 /* a hole in the array of nodes. As in the current version of the */
12 /* software, no nodes are really freed. this does not really make */
13 /* sense. */
14 /************************************************************************/
15
indTNmake(IND * ind)16 int indTNmake( IND * ind )
17 {
18 TrieNode * node;
19
20 if ( ind->indNodeCount >= ind->indAllocatedNodes )
21 {
22 int newm= ( ind->indAllocatedNodes+ TNsBLOCK )/ TNsBLOCK;
23 unsigned sz= newm* sizeof(TrieNode *);
24 TrieNode ** fresh;
25 TrieNode * nwpg;
26
27 if ( ind->indNodePages )
28 { fresh= (TrieNode **) realloc( (char *)ind->indNodePages, sz );}
29 else{ fresh= (TrieNode **) malloc( sz ); }
30 if ( ! fresh )
31 { return -1; }
32 ind->indNodePages= fresh;
33 nwpg= (TrieNode *)malloc( TNsBLOCK* sizeof( TrieNode ) );
34 if ( ! nwpg )
35 { return -1; }
36 ind->indNodePages[newm-1]= nwpg;
37 ind->indAllocatedNodes= newm* TNsBLOCK;
38 }
39
40 node= NODE(ind,ind->indNodeCount);
41 node->tn_transitions= -1;
42 node->tn_ntrans= 0;
43 node->tn_flags= TNfUSED;
44 node->tn_unused= 0;
45
46 /*appDebug( "NODE %7d of %7d\n", ind->indNodeCount, ind->indAllocatedNodes );*/
47 return ind->indNodeCount++;
48 }
49
indTNfree(IND * ind,int tn)50 void indTNfree( IND * ind,
51 int tn )
52 {
53 TrieNode * node= NODE(ind,tn);
54
55 if ( node->tn_transitions >= 0 )
56 { indTLfree( ind, node->tn_transitions ); node->tn_transitions= -1; }
57 node->tn_flags= 0;
58 node->tn_ntrans= 0;
59
60 while( ind->indNodeCount > 0)
61 {
62 node= NODE(ind,ind->indNodeCount-1);
63
64 if ( node->tn_flags == 0 )
65 { ind->indNodeCount--; }
66 else{ break; }
67 }
68 }
69
70 /************************************************************************/
71 /* */
72 /* Follow one transition in the automaton. */
73 /* */
74 /* Return: The index of the transition from this node in *pTrans. */
75 /* If there is no transition for the input symbol, *pTrans */
76 /* is set to the candidate insertion point. */
77 /* */
78 /* Return: The number in the automaton of the next state (If any) */
79 /* Return: -1 if there is no state for this value. */
80 /* */
81 /************************************************************************/
82
indINDstep(int * pTrans,IND * ind,int tn,int sym)83 int indINDstep( int * pTrans,
84 IND * ind,
85 int tn,
86 int sym )
87 {
88 TrieNode * node;
89 int l, r;
90 int transitions;
91
92 node= NODE(ind,tn);
93
94 l= 0;
95 r= node->tn_ntrans;
96 *pTrans= ( l+ r )/2;
97 transitions= node->tn_transitions;
98
99 if ( r <= 0 )
100 { return -1; }
101
102 while( l < *pTrans )
103 {
104 if ( sym < LINK(ind,transitions+*pTrans)->tl_key )
105 { r= *pTrans; }
106 else{ l= *pTrans; }
107 *pTrans= ( l+ r )/2;
108 }
109
110 if ( sym != LINK(ind,transitions+*pTrans)->tl_key )
111 { return -1; }
112
113 return LINK(ind,transitions+*pTrans)->tl_to;
114 }
115
116 /************************************************************************/
117 /* */
118 /* Retrieve a word in an automaton. */
119 /* */
120 /************************************************************************/
121
indINDgetUtf8(int * paccept,IND * ind,int tn,const char * key)122 int indINDgetUtf8( int * paccept,
123 IND * ind,
124 int tn,
125 const char * key )
126 {
127 int m;
128
129 if ( tn < 0 || tn >= ind->indNodeCount )
130 { LLDEB(tn,ind->indNodeCount); return -1; }
131
132 for (;;)
133 {
134 int step;
135 unsigned short symbol;
136
137 if ( ! * key )
138 {
139 if ( NODE(ind,tn)->tn_flags & TNfACCEPTS )
140 { *paccept= 1; }
141 else{ *paccept= 0; }
142
143 return tn;
144 }
145
146 step= uniGetUtf8( &symbol, key );
147 if ( step < 1 )
148 { LDEB(step); return -1; }
149 key += step;
150
151 tn= indINDstep( &m, ind, tn, symbol );
152
153 if ( tn < 0 )
154 { return tn; }
155 }
156 }
157
indINDgetUtf16(int * paccept,IND * ind,int tn,const unsigned short * key)158 int indINDgetUtf16( int * paccept,
159 IND * ind,
160 int tn,
161 const unsigned short * key )
162 {
163 int m;
164
165 if ( tn < 0 || tn >= ind->indNodeCount )
166 { LLDEB(tn,ind->indNodeCount); return -1; }
167
168 for (;;)
169 {
170 unsigned short symbol;
171
172 if ( ! * key )
173 {
174 if ( NODE(ind,tn)->tn_flags & TNfACCEPTS )
175 { *paccept= 1; }
176 else{ *paccept= 0; }
177
178 return tn;
179 }
180
181 symbol= *key;
182 key += 1;
183
184 tn= indINDstep( &m, ind, tn, symbol );
185
186 if ( tn < 0 )
187 { return tn; }
188 }
189 }
190
191 /************************************************************************/
192 /* */
193 /* Remove a word from the automaton. */
194 /* Simply remove the 'accepts' flag from its final node. */
195 /* */
196 /* This is not correct in a minimized automaton. */
197 /* This is not correct if the automaton is used to find out whether */
198 /* strings are a prefix of accepted strings. */
199 /* */
200 /************************************************************************/
201
indINDforget(IND * ind,const char * key)202 int indINDforget( IND * ind,
203 const char * key )
204 {
205 int accepted;
206 int tn= indINDgetUtf8( &accepted, ind, ind->ind_start, key );
207 TrieNode * node;
208
209 if ( tn < 0 || ! accepted )
210 { return -1; }
211
212 node= NODE(ind,tn);
213
214 node->tn_flags &= ~TNfACCEPTS;
215
216 return 0;
217 }
218
219 /************************************************************************/
220
indAddLinkToNode(IND * ind,TrieNode * node,int m,int symbol,int to)221 static int indAddLinkToNode( IND * ind,
222 TrieNode * node,
223 int m,
224 int symbol,
225 int to )
226 {
227 int transitions= node->tn_transitions;
228
229 if ( node->tn_flags & TNfREAD_ONLY )
230 { XXDEB(node->tn_flags,TNfREAD_ONLY); return -1; }
231
232 if ( node->tn_ntrans > 0 )
233 {
234 int r;
235
236 transitions= indTLalloc( ind, transitions, node->tn_ntrans+ 1 );
237 if ( transitions < 0 )
238 { LLDEB(node->tn_ntrans,transitions); return -1; }
239 else{ node->tn_transitions= transitions; }
240
241 if ( symbol > LINK(ind,transitions+m)->tl_key )
242 { m++; }
243 r= node->tn_ntrans++;
244 while( r > m )
245 {
246 *LINK(ind,transitions+r)= *LINK(ind,transitions+r-1);
247 r--;
248 }
249 LINK(ind,transitions+m)->tl_to= to;
250 LINK(ind,transitions+m)->tl_key= symbol;
251 }
252 else{
253 node->tn_transitions= transitions= indTLalloc( ind, -1, 1 );
254 if ( transitions < 0 )
255 { LDEB(transitions); return -1; }
256
257 node->tn_ntrans= 1;
258 LINK(ind,transitions)->tl_to= to;
259 LINK(ind,transitions)->tl_key= symbol;
260 m= 0;
261 }
262
263 return transitions+ m;
264 }
265
indAddNodeToNode(IND * ind,TrieNode * node,int m,int symbol)266 static int indAddNodeToNode( IND * ind,
267 TrieNode * node,
268 int m,
269 int symbol )
270 {
271 int to;
272
273 if ( node->tn_flags & TNfREAD_ONLY )
274 { XXDEB(node->tn_flags,TNfREAD_ONLY); return -1; }
275
276 to= indTNmake( ind );
277 if ( to < 0 )
278 { LDEB(to); return -1; }
279
280 if ( indAddLinkToNode( ind, node, m, symbol, to ) < 0 )
281 { LLDEB(to,symbol); return -1; }
282
283 return to;
284 }
285
286 /************************************************************************/
287 /* */
288 /* Store a word in the automaton. */
289 /* */
290 /************************************************************************/
291
indINDputUtf8(IND * ind,int tn,const char * key)292 int indINDputUtf8( IND * ind,
293 int tn,
294 const char * key )
295 {
296 for (;;)
297 {
298 TrieNode * node;
299 int m;
300
301 int step;
302 unsigned short symbol;
303
304 if ( tn < 0 || tn >= ind->indNodeCount )
305 { LLDEB(tn,ind->indNodeCount); return -1; }
306 node= NODE(ind,tn);
307
308 /*indTLwalk(ind);*/
309
310 if ( ! * key )
311 {
312 if ( node->tn_flags & TNfREAD_ONLY )
313 { XXDEB(node->tn_flags,TNfREAD_ONLY); return -1; }
314
315 node->tn_flags |= TNfACCEPTS;
316 return tn;
317 }
318
319 step= uniGetUtf8( &symbol, key );
320 if ( step < 1 )
321 { LDEB(step); return -1; }
322 key += step;
323
324 tn= indINDstep( &m, ind, tn, symbol );
325 if ( tn < 0 )
326 {
327 tn= indAddNodeToNode( ind, node, m, symbol );
328 if ( tn < 0 )
329 { LDEB(tn); return -1; }
330 }
331 }
332 }
333
indINDputUtf16(IND * ind,int tn,const unsigned short * key)334 int indINDputUtf16( IND * ind,
335 int tn,
336 const unsigned short * key )
337 {
338 for (;;)
339 {
340 TrieNode * node;
341 int m;
342
343 unsigned short symbol;
344
345 if ( tn < 0 || tn >= ind->indNodeCount )
346 { LLDEB(tn,ind->indNodeCount); return -1; }
347 node= NODE(ind,tn);
348
349 /*indTLwalk(ind);*/
350
351 if ( ! * key )
352 {
353 if ( node->tn_flags & TNfREAD_ONLY )
354 { XXDEB(node->tn_flags,TNfREAD_ONLY); return -1; }
355
356 node->tn_flags |= TNfACCEPTS;
357 return tn;
358 }
359
360 symbol= *key;
361 key += 1;
362
363 tn= indINDstep( &m, ind, tn, symbol );
364 if ( tn < 0 )
365 {
366 tn= indAddNodeToNode( ind, node, m, symbol );
367 if ( tn < 0 )
368 { LDEB(tn); return -1; }
369 }
370 }
371 }
372
373
374 /************************************************************************/
375 /* */
376 /* Do something for all nodes in a trie. */
377 /* NOTE: The approach is naive and loops on cyclic structures. */
378 /* */
379 /************************************************************************/
380
indINDforall(IND * ind,int tn,void * through,IndForAllFun fun)381 int indINDforall( IND * ind,
382 int tn,
383 void * through,
384 IndForAllFun fun )
385 {
386 const TrieNode * frNode;
387 int i;
388 int n;
389
390 if ( tn < 0 )
391 { return 0; }
392 frNode= NODE(ind,tn);
393 n= frNode->tn_ntrans;
394
395 for ( i= 0; i < n; i++ )
396 {
397 const TrieLink * toLink= LINK(ind,frNode->tn_transitions+i);
398 const TrieNode * toNode= NODE(ind,toLink->tl_to);
399
400 if ( (*fun)( through, +1, tn, toLink->tl_to, toLink->tl_key,
401 (toNode->tn_flags & TNfACCEPTS) != 0 ) < 0 )
402 { LLLDEB(tn,toLink->tl_to,toLink->tl_key); return -1; }
403
404 if ( indINDforall( ind, toLink->tl_to, through, fun ) < 0 )
405 { LDEB(1); return -1; }
406
407 if ( (*fun)( through, -1, tn, toLink->tl_to, toLink->tl_key,
408 (toNode->tn_flags & TNfACCEPTS) != 0 ) < 0 )
409 { LLLDEB(tn,toLink->tl_to,toLink->tl_key); return -1; }
410 }
411
412 return 0;
413 }
414
415 /************************************************************************/
416 /* */
417 /* Add a (static) suffix to a node. */
418 /* */
419 /************************************************************************/
420
indINDaddSuffix(IND * ind,int tnTo,int tnSuf)421 int indINDaddSuffix( IND * ind,
422 int tnTo,
423 int tnSuf )
424 {
425 TrieNode * nodeTo= NODE(ind,tnTo);
426 const TrieNode * nodeSuf= NODE(ind,tnSuf);
427
428 int m;
429
430 int i;
431
432 if ( nodeTo->tn_flags & TNfREAD_ONLY )
433 { XXDEB(nodeTo->tn_flags,TNfREAD_ONLY); return -1; }
434 if ( nodeSuf->tn_flags & TNfACCEPTS )
435 { nodeTo->tn_flags |= TNfACCEPTS; }
436
437 i= nodeSuf->tn_ntrans;
438 while( --i >= 0 )
439 {
440 const TrieLink * linkSuf= LINK(ind,nodeSuf->tn_transitions+i);
441
442 tnSuf= linkSuf->tl_to;
443
444 tnTo= indINDstep( &m, ind, tnTo, linkSuf->tl_key );
445 if ( tnTo < 0 )
446 {
447 /* Connect suffix here */
448 int tlTo;
449
450 tlTo= indAddLinkToNode( ind, nodeTo, m, linkSuf->tl_key, tnSuf );
451 if ( tlTo < 0 )
452 { LDEB(tlTo); return -1; }
453
454 continue; /* Ready with this branch */
455 }
456
457 if ( i > 0 )
458 {
459 if ( indINDaddSuffix( ind, tnTo, tnSuf ) < 0 )
460 { LLDEB(tnTo,tnSuf); }
461 }
462 else{
463 /* Avoid chain recursion stay in same stack frame */
464 nodeTo= NODE(ind,tnTo);
465 nodeSuf= NODE(ind,tnSuf);
466 i= nodeSuf->tn_ntrans;
467
468 if ( nodeTo->tn_flags & TNfREAD_ONLY )
469 { XXDEB(nodeTo->tn_flags,TNfREAD_ONLY); return -1; }
470 if ( nodeSuf->tn_flags & TNfACCEPTS )
471 { nodeTo->tn_flags |= TNfACCEPTS; }
472 }
473 }
474
475 return 0;
476 }
477