1 /*  Part of SWI-Prolog
2 
3     Author:        Jan Wielemaker
4     E-mail:        J.Wielemaker@vu.nl
5     WWW:           http://www.swi-prolog.org
6     Copyright (c)  1985-2019, University of Amsterdam
7                               VU University Amsterdam
8     All rights reserved.
9 
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions
12     are met:
13 
14     1. Redistributions of source code must retain the above copyright
15        notice, this list of conditions and the following disclaimer.
16 
17     2. Redistributions in binary form must reproduce the above copyright
18        notice, this list of conditions and the following disclaimer in
19        the documentation and/or other materials provided with the
20        distribution.
21 
22     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
26     COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
30     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
32     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33     POSSIBILITY OF SUCH DAMAGE.
34 */
35 
36 /*#define O_DEBUG 1*/
37 #include "pl-incl.h"
38 #include "os/pl-ctype.h"
39 #undef LD
40 #define LD LOCAL_LD
41 
42 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
43 Implementation issues
44 ---------------------
45 
46 There are two parts in the atom   administration. One is a dynamic array
47 (called buffer) atom_array, which is there to   find  the atoms back. An
48 atom   as   it   appears   is   of   type   word   and   of   the   form
49 (n<<LMASK_BITS)|TAG_ATOM. The atom structure is   located by getting the
50 n-th pointer from the atom_array  dynamic   array.  See  atomValue() for
51 translating the word into the address of the structure.
52 
53 Next, there is a hash-table. Thanks to the work by Keri Harris, the atom
54 table now uses a lock-free algorithm.  This works as follows:
55 
56   - Atoms have a ->next pointer, organizing them as an open hash table.
57   - The head pointers for the hash-buckets are in a struct atom_table,
58     of which the most recent is in GD->atoms.table.  This structure
59     contains a pointer to older atom_tables (before resizing).  A
60     resize allocates a new struct atom_table, copies all atoms
61     (updating Atom->next) and makes the new atom-table current.
62     Lookup and creation work reliable during this process because
63     - If the bucket scan accidentally finds the right atom, great.
64     - If not, but the atom table has changed we retry, now using
65       the new table where all atoms are properly linked again.
66     - If the resize is in progress though, we may not find the
67       atom.  In that case we create a new one.  As our hash-table
68       is still too small, we lock and call rehashAtoms().  So,
69       when we get to linking the new atom into the hash-table,
70       we will find the table is old, destroy the atom and redo.
71 
72   - The creation of an atom needs to guarantee that it is added
73     to the latest table and only added once.  We do this by creating
74     a RESERVED atom and fully preparing it.  Now, if we managed to
75     CAS it into the bucket, we know we are the only one that created
76     the atom and we make the atom VALID, so others can find it.  If
77     not, we redo the lookup, possibly finding that someone created it
78     for us.  It is possible that another thread inserted another atom
79     into the same bucket.  In that case we have bad luck and must
80     recreate the atom.  Hash collisions should be fairly rare.
81 
82 Atom garbage collection
83 -----------------------
84 
85 There are various categories of atoms:
86 
87 	* Built-in atoms
88 	These are used directly in the C-source of the system and cannot
89 	be removed. These are the atoms upto a certain number. This
90 	number is GD->atoms.builtin
91 
92 	* Foreign referenced atoms
93 	These are references hold in foreign code by means of
94 	PL_new_atom() or other calls returning an atom. The system has
95 	no way to determine the lifetime of them.  Foreign code must
96 	keep track of references to atoms using these two functions:
97 
98 	      - PL_register_atom(atom_t atom)
99 	      - PL_unregister_atom(atom_t atom)
100 
101 	* References from the Prolog stacks
102 	Reference counting is unacceptable here, which implies we must
103 	mark atoms that are accessible from the stacks.  This is done
104 	by markAtomsOnStacks().
105 
106 	* References from other structures
107 	Various of the structures contain or may contain atom
108 	references.  There are two options: lock/unlock them using
109 	PL_register_atom() on creation/destruction of the structure
110 	or enumerate them and flag the atoms.  Currently, we use
111 	registration everywhere, except for temporary structures
112 	used by findall/3 and message queues, which are marked
113 	by markAtomsThreads().
114 
115 	* References from compiled code and records
116 	This uses PL_register_atom(), except for the cases mentioned
117 	above.
118 
119 Reclaiming
120 ----------
121 
122 Atoms are reclaimed by collectAtoms(), which is a two pass process.
123 
124   - First, the atom-array is scanned and unmarked atoms without
125     references are handed to invalidateAtom(), which
126     - Uses CAS to reset VALID
127     - Removes the atom from the hash bucket
128     - adds atom to chain of invalidated atoms
129     - sets length to 0;
130   - In the second pass, we call destroyAtom() for all atoms in
131     the invalidated chain.  destroyAtom() only destroys
132     the atom if pl_atom_bucket_in_use() returns FALSE.  This serves
133     two purposes:
134       - Garantee that Atom->name is valid
135       - Avoid a race between looking up the atom and destroying it:
136 
137 	thread1                      thread2
138 	--------------------------------------------
139 	lookupBlob(s, length, type)
140 	  v = hash(s)
141 	  a = atoms[v]
142 	  length == a.length
143 	  type == a.type
144 	  memcmp(s, a.name)
145 	  // we have a match!
146 				     AGC
147 				       invalidate atom
148 				       free atom
149 				     lookupBlob(s2)
150 				       v = hash(s2)
151 				       not found so insert at atoms[v]
152 	  CAS ref -> ref+1
153 
154 The dynamic array gets holes and  we   remember  the  first free hole to
155 avoid too much search. Alternatively, we could  turn the holes into some
156 form of linked list, for example by   encoding an integer that expresses
157 the location of the next hole. We   cannot  shrink the array, unless all
158 atoms above a certain index are gone.
159 
160 Atom GC and multi-threading
161 ---------------------------
162 
163 This is a hard problem. Atom-GC cannot   run  while some thread performs
164 normal GC because the pointer relocation makes it extremely hard to find
165 referenced atoms. Otherwise, ask all  threads   to  mark their reachable
166 atoms and run collectAtoms() to reclaim the unreferenced atoms. The lock
167 LD->thread.scan_lock is used to ensure garbage   collection does not run
168 concurrently with atom garbage collection.
169 
170 Atom-GC asynchronously walks  the  stacks  of   all  threads  and  marks
171 everything  that  looks  `atom-like',   i.e.,    our   collector   is  a
172 `conservative' collector. While agc is running the VM will mark atoms as
173 it pushes them onto the stacks. See e.g.,   the H_ATOM VMI. An atom that
174 is unregistered (PL_unregister_atom()) just before   AGC  starts may not
175 get marked this way. This is fixed by setting LD->atoms.unregistering.
176 
177 Note that threads can mark their atoms and continue execution because:
178 
179   - If a marked atom is no longer needed it is merely not reclaimed this
180     time (but might be in the next collection).
181   - New atoms are added to the stacks using pushVolatileAtom(), which
182     marks the atom if AGC is active.
183   - Finally, message queues and bags as used by findall/3 complicate
184     the issue.  An atom sent to these structures subsequently may
185     become inaccessible from the stack (the normal case for findall/3,
186     which backtracks).  If the atom is copied back from the structure
187     to the stack ('$collect_findall_bag'/3 or thread_get_message/1,2),
188     the atom can no longer be marked from the structure but is added
189     to the stacks without locking.   We resolve this issue as follows:
190     - Run marking from queues/bags after marking the stack.  This
191       ensures that atoms added to the these structures get marked,
192       also if the atom is no longer on the stack.
193     - If an atom is copied to the stack from such a structure while
194       AGC is running, we are ok, because this is merely the same issue
195       as atoms living on the stack.  TBD: redesign the structures such
196       that they can safely be walked.
197 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
198 
199 static int	rehashAtoms(void);
200 static void	considerAGC(void);
201 static unsigned int register_atom(volatile Atom p);
202 static unsigned int unregister_atom(volatile Atom p);
203 #ifdef O_DEBUG_ATOMGC
204 static int	tracking(const Atom a);
205 IOSTREAM *atomLogFd = 0;
206 #endif
207 
208 static inline int
bump_atom_references(Atom a,unsigned int ref)209 bump_atom_references(Atom a, unsigned int ref)
210 { for(;;)
211   { unsigned int nref = ref+1;
212 
213     if ( ATOM_REF_COUNT(nref) == 0 )
214       return TRUE;			/* reached max references */
215 
216     if ( COMPARE_AND_SWAP_UINT(&a->references, ref, nref) )
217     { if ( ATOM_REF_COUNT(ref) == 0 )
218 	ATOMIC_DEC(&GD->atoms.unregistered);
219       return TRUE;
220     } else
221     { ref = a->references;
222       if ( !ATOM_IS_VALID(ref) )
223 	return FALSE;
224     }
225   }
226 }
227 
228 		 /*******************************
229 		 *	      TYPES		*
230 		 *******************************/
231 
232 static PL_blob_t text_atom =
233 { PL_BLOB_MAGIC,
234   PL_BLOB_UNIQUE|PL_BLOB_TEXT,		/* unique representation of text */
235   "text"
236 };
237 
238 
239 static PL_blob_t unregistered_blob_atom =
240 { PL_BLOB_MAGIC,
241   PL_BLOB_NOCOPY|PL_BLOB_TEXT,
242   "unregistered"
243 };
244 
245 
246 void
PL_register_blob_type(PL_blob_t * type)247 PL_register_blob_type(PL_blob_t *type)
248 { if ( !type->registered )
249   { PL_LOCK(L_MISC);
250 
251     if ( !type->registered )
252     { if ( !GD->atoms.types )
253       { GD->atoms.types = type;
254       } else
255       { PL_blob_t *t = GD->atoms.types;
256 
257 	while(t->next)
258 	  t = t->next;
259 
260 	t->next = type;
261       }
262       if ( true(type, PL_BLOB_TEXT) )
263       { type->rank = ++GD->atoms.text_rank;
264 	if ( true(type, PL_BLOB_WCHAR) )
265 	  type->padding = sizeof(pl_wchar_t);
266 	else
267 	  type->padding = sizeof(char);
268       } else
269       { type->rank = --GD->atoms.nontext_rank;
270       }
271 
272       if ( !GD->atoms.initialised )
273 	type->registered = TRUE;
274       if ( !type->atom_name )
275 	type->atom_name = PL_new_atom(type->name);
276       type->registered = TRUE;
277     }
278 
279     PL_UNLOCK(L_MISC);
280   }
281 }
282 
283 
284 PL_blob_t *
PL_find_blob_type(const char * name)285 PL_find_blob_type(const char *name)
286 { PL_blob_t *t;
287 
288   PL_LOCK(L_MISC);
289   for(t = GD->atoms.types; t; t = t->next)
290   { if ( streq(name, t->name) )
291       break;
292   }
293   PL_UNLOCK(L_MISC);
294 
295   return t;
296 }
297 
298 
299 
300 int
PL_unregister_blob_type(PL_blob_t * type)301 PL_unregister_blob_type(PL_blob_t *type)
302 { size_t index;
303   int i, last=FALSE;
304   PL_blob_t **t;
305   int discarded = 0;
306 
307   PL_LOCK(L_MISC);
308   for(t = &GD->atoms.types; *t; t = &(*t)->next)
309   { if ( *t == type )
310     { *t = type->next;
311       type->next = NULL;
312     }
313   }
314   PL_UNLOCK(L_MISC);
315 
316   PL_register_blob_type(&unregistered_blob_atom);
317 
318   for(index=1, i=0; !last; i++)
319   { size_t upto = (size_t)2<<i;
320     size_t high = GD->atoms.highest;
321     Atom b = GD->atoms.array.blocks[i];
322 
323     if ( upto >= high )
324     { upto = high;
325       last = TRUE;
326     }
327 
328     for(; index<upto; index++)
329     { Atom atom = b + index;
330       unsigned int refs = atom->references;
331       PL_blob_t *btype = atom->type;
332 
333       if ( ATOM_IS_VALID(refs) && btype == type &&
334 	   bump_atom_references(atom, refs) )
335       { atom->type = &unregistered_blob_atom;
336 
337 	atom->name = "<discarded blob>";
338 	atom->length = strlen(atom->name);
339 	discarded++;
340 	PL_unregister_atom(atom->atom);
341       }
342     }
343   }
344 
345   return discarded == 0 ? TRUE : FALSE;
346 }
347 
348 
349 		 /*******************************
350 		 *      BUILT-IN ATOM TABLE	*
351 		 *******************************/
352 
353 #define ATOM(s) s
354 
355 typedef const char * ccharp;
356 static const ccharp atoms[] = {
357 #include "pl-atom.ic"
358   ATOM((char *)NULL)
359 };
360 #undef ATOM
361 
362 #ifdef O_PLMT
363 
364 #define acquire_atom_table(t, b) \
365   { LD->thread.info->access.atom_table = GD->atoms.table; \
366     t = LD->thread.info->access.atom_table->table; \
367     b = LD->thread.info->access.atom_table->buckets; \
368   }
369 
370 #define release_atom_table() \
371   { LD->thread.info->access.atom_table = NULL; \
372     LD->thread.info->access.atom_bucket = NULL; \
373   }
374 
375 #define acquire_atom_bucket(b) \
376   { LD->thread.info->access.atom_bucket = (b); \
377   }
378 
379 #define release_atom_bucket() \
380   { LD->thread.info->access.atom_bucket = NULL; \
381   }
382 
383 #else
384 
385 #define acquire_atom_table(t, b) \
386   { t = GD->atoms.table->table; \
387     b = GD->atoms.table->buckets; \
388   }
389 
390 #define release_atom_table() (void)0
391 
392 #define acquire_atom_bucket(b) (void)0
393 
394 #define release_atom_bucket() (void)0
395 
396 #endif
397 
398 /* Note that we use PL_malloc_uncollectable() here because the pointer in
399    our block is not the real memory pointer.  Probably it is better to
400    have two pointers; one to the allocated memory and one with the
401    necessary offset.
402 */
403 
404 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
405 It might be wise to  provide  for   an  option  that does not reallocate
406 atoms. In that case accessing a GC'ed   atom  causes a crash rather then
407 another atom.
408 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
409 
410 static void
allocateAtomBlock(int idx)411 allocateAtomBlock(int idx)
412 { if ( !GD->atoms.array.blocks[idx] )
413   { size_t bs = (size_t)1<<idx;
414     size_t i;
415     Atom newblock;
416 
417     if ( !(newblock=PL_malloc_uncollectable(bs*sizeof(struct atom))) )
418       outOfCore();
419 
420     memset(newblock, 0, bs*sizeof(struct atom));
421     for(i=0; i<bs; i++)
422     { newblock[i].type = ATOM_TYPE_INVALID;
423       newblock[i].name = "<virgin>";
424     }
425     if ( !COMPARE_AND_SWAP_PTR(&GD->atoms.array.blocks[idx],
426 			   NULL, newblock-bs) )
427       PL_free(newblock);		/* done by someone else */
428   }
429 }
430 
431 static Atom
reserveAtom(void)432 reserveAtom(void)
433 { size_t index;
434 #ifdef O_ATOMGC				/* try to find a hole! */
435   int i;
436   int last = FALSE;
437   Atom a;
438   unsigned int ref;
439   int idx;
440 
441   for(index=GD->atoms.no_hole_before, i=MSB(index); !last; i++)
442   { size_t upto = (size_t)2<<i;
443     size_t high = GD->atoms.highest;
444     Atom b = GD->atoms.array.blocks[i];
445 
446     if ( upto >= high )
447     { upto = high;
448       last = TRUE;
449     }
450 
451     for(; index<upto; index++)
452     { a = b + index;
453       ref = a->references;
454 
455       if ( ATOM_IS_FREE(ref) &&
456 	   COMPARE_AND_SWAP_UINT(&a->references, ref, ATOM_RESERVED_REFERENCE) )
457       { assert(a->type == ATOM_TYPE_INVALID);
458 	GD->atoms.no_hole_before = index+1;
459         a->atom = (index<<LMASK_BITS)|TAG_ATOM;
460 
461 	return a;
462       }
463     }
464   }
465   GD->atoms.no_hole_before = index+1;
466 #endif /*O_ATOMGC*/
467 
468   for(;;)
469   { index = GD->atoms.highest;
470     idx = MSB(index);
471     assert(index >= 0);
472 
473     if ( !GD->atoms.array.blocks[idx] )
474       allocateAtomBlock(idx);
475 
476     a = &GD->atoms.array.blocks[idx][index];
477     ref = a->references;
478 
479     if ( ATOM_IS_FREE(ref) &&
480 	 COMPARE_AND_SWAP_UINT(&a->references, ref, ATOM_RESERVED_REFERENCE) )
481     { COMPARE_AND_SWAP_SIZE(&GD->atoms.highest, index, index+1);
482       a->atom = (index<<LMASK_BITS)|TAG_ATOM;
483 
484       return a;
485     }
486   }
487 }
488 
489 
490 		 /*******************************
491 		 *	  GENERAL LOOKUP	*
492 		 *******************************/
493 
494 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
495 (*) AGC starting. As we cannot run AGC if   we  are not in a safe state,
496 AGC is started using a   software interrupt using PL_raise(SIG_ATOM_GC).
497 Earlier versions only fired the signal   at exactly (last+margin) atoms,
498 but it is possible the signal is not  handled due to the thread dying or
499 the thread starting an indefinite  wait.   Therefore  we keep signalling
500 every 128 new atoms. Sooner or later   some  actually active thread will
501 pick up the request and process it.
502 
503 PL_handle_signals() decides on the actual invocation of atom-gc and will
504 treat the signal as bogus if agc has already been performed.
505 
506 (**)   Without   this    check,    some     threads    may    pass   the
507 PL_LOCK(L_REHASH_ATOMS) around rehashAtoms() and create   their atom. If
508 they manage to register the atom in   the old table before rehashAtoms()
509 activates the new table the insertion   is successful, but rehashAtoms()
510 may not have moved the atom to the new   table. Now we will repeat if we
511 bypassed the LOCK as either GD->atoms.rehashing is TRUE or the new table
512 is activated.
513 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
514 
515 static int
same_name(const Atom a,const char * s,size_t length,const PL_blob_t * type)516 same_name(const Atom a, const char *s, size_t length, const PL_blob_t *type)
517 { if ( false(type, PL_BLOB_NOCOPY) )
518     return length == 0 || memcmp(s, a->name, length) == 0;
519   else
520     return s == a->name;
521 }
522 
523 
524 word
lookupBlob(const char * s,size_t length,PL_blob_t * type,int * new)525 lookupBlob(const char *s, size_t length, PL_blob_t *type, int *new)
526 { GET_LD
527   unsigned int v0, v, ref;
528   Atom *table;
529   int buckets;
530   Atom a, head;
531 
532   if ( !type->registered )		/* avoid deadlock */
533     PL_register_blob_type(type);
534   v0 = MurmurHashAligned2(s, length, MURMUR_SEED);
535 
536 redo:
537 
538   acquire_atom_table(table, buckets);
539 
540   v  = v0 & (buckets-1);
541   head = table[v];
542   acquire_atom_bucket(table+v);
543   DEBUG(MSG_HASH_STAT, GD->atoms.lookups++);
544 
545   if ( true(type, PL_BLOB_UNIQUE) )
546   { for(a = table[v]; a; a = a->next)
547     { DEBUG(MSG_HASH_STAT, GD->atoms.cmps++);
548       ref = a->references;
549       if ( ATOM_IS_RESERVED(ref) &&
550 	   length == a->length &&
551 	   type == a->type &&
552 	   same_name(a, s, length, type) )
553       { if ( !ATOM_IS_VALID(ref) )
554 	  goto redo;
555 #ifdef O_ATOMGC
556         if ( indexAtom(a->atom) >= GD->atoms.builtin &&
557 	     !likely(bump_atom_references(a, ref)) )
558 	  break;			/* atom was just GCed.  Re-create */
559 #endif
560 #ifdef O_DEBUG_ATOMGC
561         if ( atomLogFd && tracking(a) )
562           Sfprintf(atomLogFd, "Lookup `%s' at (#%d)\n",
563 		   a->name, indexAtom(a->atom));
564 #endif
565         *new = FALSE;
566 	release_atom_table();
567 	release_atom_bucket();
568 	return a->atom;
569       }
570     }
571   }
572 
573   if ( GD->atoms.table->buckets * 2 < GD->statistics.atoms )
574   { int rc;
575 
576     PL_LOCK(L_REHASH_ATOMS);
577     rc = rehashAtoms();
578     PL_UNLOCK(L_REHASH_ATOMS);
579 
580     if ( !rc )
581       outOfCore();
582   }
583 
584   if ( !( table == GD->atoms.table->table && head == table[v] ) )
585     goto redo;
586 
587   a = reserveAtom();
588   a->length = length;
589   a->type = type;
590   if ( false(type, PL_BLOB_NOCOPY) )
591   { if ( type->padding )
592     { size_t pad = type->padding;
593 
594       a->name = PL_malloc_atomic(length+pad);
595       memcpy(a->name, s, length);
596       memset(a->name+length, 0, pad);
597       ATOMIC_ADD(&GD->statistics.atom_string_space, length+pad);
598     } else
599     { a->name = PL_malloc(length);
600       memcpy(a->name, s, length);
601       ATOMIC_ADD(&GD->statistics.atom_string_space, length);
602     }
603   } else
604   { a->name = (char *)s;
605   }
606 
607 #ifdef O_TERMHASH
608   a->hash_value = v0;
609 #endif
610 
611   if ( true(type, PL_BLOB_UNIQUE) )
612   { a->next = table[v];
613     if ( !( !GD->atoms.rehashing &&	/* See (**) above */
614             COMPARE_AND_SWAP_PTR(&table[v], head, a) &&
615 	    table == GD->atoms.table->table ) )
616     { if ( false(type, PL_BLOB_NOCOPY) )
617         PL_free(a->name);
618       a->type = ATOM_TYPE_INVALID;
619       a->name = "<race>";
620       MEMORY_BARRIER();
621       a->references = 0;
622       goto redo;
623     }
624   }
625 
626 #ifdef O_ATOMGC
627   a->references = 1 | ATOM_VALID_REFERENCE | ATOM_RESERVED_REFERENCE;
628 #endif
629 
630 #ifdef O_DEBUG_ATOMGC
631   if ( atomLogFd && tracking(a) )
632     Sfprintf(atomLogFd, "Created `%s' at (#%d)\n",
633 	     a->name, indexAtom(a->atom));
634 #endif
635   *new = TRUE;
636   if ( type->acquire )
637     (*type->acquire)(a->atom);
638 
639   release_atom_table();
640   release_atom_bucket();
641 
642   if ( ATOMIC_INC(&GD->statistics.atoms) % 128 == 0 )
643     considerAGC();
644 
645   return a->atom;
646 }
647 
648 
649 word
lookupAtom(const char * s,size_t length)650 lookupAtom(const char *s, size_t length)
651 { int new;
652 
653   return lookupBlob(s, length, &text_atom, &new);
654 }
655 
656 
657 		 /*******************************
658 		 *	      ATOM-GC		*
659 		 *******************************/
660 
661 #ifdef O_ATOMGC
662 
663 #ifdef O_DEBUG_ATOMGC
664 static char *t_tracking;
665 
666 static int
tracking(const Atom a)667 tracking(const Atom a)
668 { return ( a->type == &text_atom &&
669 	   strcmp(a->name, t_tracking) == 0 );
670 }
671 
672 
673 void
_PL_debug_register_atom(atom_t a,const char * file,int line,const char * func)674 _PL_debug_register_atom(atom_t a,
675 			const char *file, int line, const char *func)
676 { size_t i  = indexAtom(a);
677   size_t mx = GD->atoms.highest;
678 
679   assert(i>=0 && i<mx);
680   if ( i >= GD->atoms.builtin )
681   { Atom atom = fetchAtomArray(i);
682     unsigned int refs;
683 
684     refs = ATOM_REF_COUNT(register_atom(atom));
685     if ( atomLogFd && tracking(atom) )
686       Sfprintf(atomLogFd, "%s:%d: %s(): ++ (%d) for `%s' (#%d)\n",
687 	       file, line, func, refs, atom->name, i);
688   }
689 }
690 
691 
692 void
_PL_debug_unregister_atom(atom_t a,const char * file,int line,const char * func)693 _PL_debug_unregister_atom(atom_t a,
694 			  const char *file, int line, const char *func)
695 { size_t i  = indexAtom(a);
696   size_t mx = GD->atoms.highest;
697 
698   assert(i>=0 && i<mx);
699   if ( i >= GD->atoms.builtin )
700   { Atom atom = fetchAtomArray(i);
701     unsigned int refs;
702 
703     refs = unregister_atom(atom);
704     if ( atomLogFd && tracking(atom) )
705       Sfprintf(atomLogFd, "%s:%d: %s(): -- (%d) for `%s' (#%d)\n",
706 	       file, line, func, refs, atom->name, i);
707   }
708 }
709 
710 
711 word
pl_track_atom(term_t which,term_t stream)712 pl_track_atom(term_t which, term_t stream)
713 { char *s;
714 
715   if ( t_tracking )
716     remove_string(t_tracking);
717   t_tracking = NULL;
718   atomLogFd = NULL;
719 
720   if ( PL_get_nil(stream) )
721     succeed;
722 
723   if ( !PL_get_chars(which, &s, CVT_LIST|CVT_STRING|CVT_EXCEPTION) ||
724        !PL_get_stream_handle(stream, &atomLogFd) )
725     return FALSE;
726 
727   PL_release_stream(atomLogFd);
728 
729   t_tracking = store_string(s);
730 
731   succeed;
732 }
733 #endif /*O_DEBUG_ATOMGC*/
734 
735 
736 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
737 lockAtoms() discards all currently defined atoms for garbage collection.
738 To be used after loading the program,   so we won't traverse the program
739 atoms each pass.
740 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
741 
742 static void
lockAtoms(void)743 lockAtoms(void)
744 { GD->atoms.builtin      = GD->atoms.highest;
745   GD->atoms.unregistered = 0;
746 }
747 
748 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
749 Mark an atom from the stacks.  We must be prepared to handle fake-atoms!
750 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
751 
752 void
markAtom(atom_t a)753 markAtom(atom_t a)
754 { size_t i = indexAtom(a);
755   Atom ap;
756 
757   if ( i >= GD->atoms.highest )
758     return;				/* not an atom */
759   if ( i < GD->atoms.builtin )
760     return;				/* locked range */
761 
762   ap = fetchAtomArray(i);
763 
764   if ( ATOM_IS_VALID(ap->references) && !ATOM_IS_MARKED(ap->references) )
765   {
766 #ifdef O_DEBUG_ATOMGC
767     if ( atomLogFd && tracking(ap) )
768       Sfprintf(atomLogFd, "Marked `%s' at (#%d)\n", ap->name, i);
769 #endif
770     ATOMIC_OR(&ap->references, ATOM_MARKED_REFERENCE);
771   }
772 }
773 
774 void
unmarkAtoms(void)775 unmarkAtoms(void)
776 { size_t index;
777   int i, last=FALSE;
778 
779   for(index=GD->atoms.builtin, i=MSB(index); !last; i++)
780   { size_t upto = (size_t)2<<i;
781     size_t high = GD->atoms.highest;
782     Atom b = GD->atoms.array.blocks[i];
783 
784     if ( upto >= high )
785     { upto = high;
786       last = TRUE;
787     }
788 
789     for(; index<upto; index++)
790     { Atom a = b + index;
791 
792       if ( ATOM_IS_MARKED(a->references) )
793       { ATOMIC_AND(&a->references, ~ATOM_MARKED_REFERENCE);
794       }
795     }
796   }
797 }
798 
799 
800 void
maybe_free_atom_tables(void)801 maybe_free_atom_tables(void)
802 {
803   AtomTable t = GD->atoms.table;
804   while ( t )
805   { AtomTable t2 = t->prev;
806     if ( t2 && !pl_atom_table_in_use(t2) )
807     { t->prev = t2->prev;
808       freeHeap(t2->table, t2->buckets * sizeof(Atom));
809       freeHeap(t2, sizeof(atom_table));
810     }
811     t = t->prev;
812   }
813 }
814 
815 
816 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
817 destroyAtom()  actually  discards  an  atom.
818 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
819 
820 #define ATOM_NAME_MUST_FREE 0x1
821 
822 static Atom invalid_atoms = NULL;
823 
824 static int
invalidateAtom(Atom a,unsigned int ref)825 invalidateAtom(Atom a, unsigned int ref)
826 { Atom *ap;
827 
828 #define ATOM_PRE_DESTROY_REFERENCE \
829 	(ATOM_DESTROY_REFERENCE|ATOM_RESERVED_REFERENCE)
830 
831   if ( !COMPARE_AND_SWAP_UINT(&a->references, ref, ATOM_PRE_DESTROY_REFERENCE) )
832   { return FALSE;
833   }
834 
835   if ( a->type->release )
836   { if ( !(*a->type->release)(a->atom) )
837     { COMPARE_AND_SWAP_UINT(&a->references, ATOM_PRE_DESTROY_REFERENCE, ref);
838       return FALSE;
839     }
840   } else if ( GD->atoms.gc_hook )
841   { if ( !(*GD->atoms.gc_hook)(a->atom) )
842     { COMPARE_AND_SWAP_UINT(&a->references, ATOM_PRE_DESTROY_REFERENCE, ref);
843       return FALSE;				/* foreign hooks says `no' */
844     }
845   }
846 
847   a->references = ATOM_DESTROY_REFERENCE;
848 
849 #ifdef O_DEBUG_ATOMGC
850   if ( atomLogFd && tracking(a) )
851     Sfprintf(atomLogFd, "Invalidated `%s'\n", a->name);
852 #endif
853 
854   if ( true(a->type, PL_BLOB_UNIQUE) )
855   { AtomTable table;
856     uintptr_t mask;
857 
858   redo:
859     table = GD->atoms.table;
860     mask = table->buckets-1;
861     ap = &table->table[a->hash_value & mask];
862 
863     if ( *ap == a )
864     { if ( !COMPARE_AND_SWAP_PTR(&table->table[a->hash_value & mask], a, a->next) )
865       { goto redo;
866       }
867     }
868     else
869     { for( ; ; ap = &(*ap)->next )
870       { assert(*ap);		// MT: TBD: failed a few times!?
871 
872         if ( *ap == a )
873         { *ap = a->next;
874           break;
875         }
876       }
877     }
878   }
879 
880   if ( false(a->type, PL_BLOB_NOCOPY) )
881   { size_t slen = a->length + a->type->padding;
882     ATOMIC_SUB(&GD->statistics.atom_string_space, slen);
883     ATOMIC_ADD(&GD->statistics.atom_string_space_freed, slen);
884     a->next_invalid = (uintptr_t)invalid_atoms | ATOM_NAME_MUST_FREE;
885   } else
886   { a->next_invalid = (uintptr_t)invalid_atoms;
887   }
888   invalid_atoms = a;
889   a->length = 0;
890 
891   return TRUE;
892 }
893 
894 static int
destroyAtom(Atom a,Atom ** buckets)895 destroyAtom(Atom a, Atom **buckets)
896 { unsigned int v;
897   AtomTable t;
898   size_t index;
899 
900   while ( buckets && *buckets )
901   { t = GD->atoms.table;
902     while ( t )
903     { v = a->hash_value & (t->buckets-1);
904       if ( *buckets == t->table+v )
905       { return FALSE;
906       }
907       t = t->prev;
908     }
909     buckets++;
910   }
911 
912 #ifdef O_DEBUG_ATOMGC
913   /* tracking() always returns FALSE as the type is lost */
914   if ( atomLogFd && tracking(a) )
915     Sfprintf(atomLogFd, "Deleted `%s'\n", a->name);
916 #endif
917 
918   if ( a->next_invalid & ATOM_NAME_MUST_FREE )
919   { PL_free(a->name);
920   }
921 
922   a->name = "<reclaimed>";
923   a->type = ATOM_TYPE_INVALID;
924   MEMORY_BARRIER();
925   a->references = 0;
926 
927   index = indexAtom(a->atom);
928   if ( GD->atoms.no_hole_before > index )
929     GD->atoms.no_hole_before = index;
930 
931   return TRUE;
932 }
933 
934 
935 static size_t
collectAtoms(void)936 collectAtoms(void)
937 { size_t reclaimed = 0;
938   size_t unregistered = 0;
939   size_t index;
940   int i, last=FALSE;
941   Atom temp, next, prev = NULL;	 /* = NULL to keep compiler happy */
942 
943   for(index=GD->atoms.builtin, i=MSB(index); !last; i++)
944   { size_t upto = (size_t)2<<i;
945     size_t high = GD->atoms.highest;
946     Atom b = GD->atoms.array.blocks[i];
947 
948     if ( upto >= high )
949     { upto = high;
950       last = TRUE;
951     }
952 
953     for(; index<upto; index++)
954     { Atom a = b + index;
955       unsigned int ref = a->references;
956 
957       if ( !ATOM_IS_VALID(ref) )
958       { continue;
959       }
960 
961       if ( !ATOM_IS_MARKED(ref) && (ATOM_REF_COUNT(ref) == 0) )
962       { invalidateAtom(a, ref);
963       } else
964       {	ATOMIC_AND(&a->references, ~ATOM_MARKED_REFERENCE);
965         if ( ATOM_REF_COUNT(ref) == 0 )
966 	  unregistered++;
967       }
968     }
969   }
970 
971   Atom** buckets = pl_atom_buckets_in_use();
972 
973   temp = invalid_atoms;
974   while ( temp && temp == invalid_atoms )
975   { next = (Atom)(temp->next_invalid & ~ATOM_NAME_MUST_FREE);
976     if ( destroyAtom(temp, buckets) )
977     { reclaimed++;
978       invalid_atoms = next;
979     }
980     prev = temp;
981     temp = next;
982   }
983   while ( temp )
984   { next = (Atom)(temp->next_invalid & ~ATOM_NAME_MUST_FREE);
985     if ( destroyAtom(temp, buckets) )
986     { reclaimed++;
987       prev->next_invalid = ((uintptr_t)next | (prev->next_invalid & ATOM_NAME_MUST_FREE));
988     } else
989     { prev = temp;
990     }
991     temp = next;
992   }
993 
994   if ( buckets )
995     PL_free(buckets);
996   maybe_free_atom_tables();
997 
998   GD->atoms.unregistered = GD->atoms.non_garbage = unregistered;
999 
1000   return reclaimed;
1001 }
1002 
1003 
1004 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1005 pl_garbage_collect_atoms() realised the atom   garbage  collector (AGC).
1006 
1007 Issues around the design of the atom  garbage collector are explained at
1008 the start of this file.
1009 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1010 
1011 foreign_t
pl_garbage_collect_atoms(void)1012 pl_garbage_collect_atoms(void)
1013 { GET_LD
1014   int64_t oldcollected;
1015   int verbose = truePrologFlag(PLFLAG_TRACE_GC) && !LD->in_print_message;
1016   double t;
1017   sigset_t set;
1018   size_t reclaimed;
1019   int rc = TRUE;
1020 
1021   if ( GD->cleaning != CLN_NORMAL )	/* Cleaning up */
1022     return TRUE;
1023 
1024   if ( !COMPARE_AND_SWAP_INT(&GD->atoms.gc_active, FALSE, TRUE) )
1025     return TRUE;
1026 
1027   if ( verbose )
1028   { if ( !printMessage(ATOM_informational,
1029 		       PL_FUNCTOR_CHARS, "agc", 1,
1030 		         PL_CHARS, "start") )
1031     { GD->atoms.gc_active = FALSE;
1032       return FALSE;
1033     }
1034   }
1035 
1036   PL_LOCK(L_REHASH_ATOMS);
1037   blockSignals(&set);
1038   t = CpuTime(CPU_USER);
1039   unmarkAtoms();
1040   markAtomsOnStacks(LD);
1041 #ifdef O_PLMT
1042   forThreadLocalDataUnsuspended(markAtomsOnStacks, 0);
1043   markAtomsMessageQueues();
1044 #endif
1045   oldcollected = GD->atoms.collected;
1046   reclaimed = collectAtoms();
1047   GD->atoms.collected += reclaimed;
1048   ATOMIC_SUB(&GD->statistics.atoms, reclaimed);
1049   t = CpuTime(CPU_USER) - t;
1050   GD->atoms.gc_time += t;
1051   GD->atoms.gc++;
1052   unblockSignals(&set);
1053   PL_UNLOCK(L_REHASH_ATOMS);
1054 
1055   if ( verbose )
1056     rc = printMessage(ATOM_informational,
1057 		      PL_FUNCTOR_CHARS, "agc", 1,
1058 		        PL_FUNCTOR_CHARS, "done", 3,
1059 		          PL_INT64, GD->atoms.collected - oldcollected,
1060 		          PL_INT, GD->statistics.atoms,
1061 		          PL_DOUBLE, (double)t);
1062 
1063   GD->atoms.gc_active = FALSE;
1064 
1065   return rc;
1066 }
1067 
1068 
1069 PL_agc_hook_t
PL_agc_hook(PL_agc_hook_t new)1070 PL_agc_hook(PL_agc_hook_t new)
1071 { PL_agc_hook_t old = GD->atoms.gc_hook;
1072   GD->atoms.gc_hook = new;
1073 
1074   return old;
1075 }
1076 
1077 
1078 static void
considerAGC(void)1079 considerAGC(void)
1080 { if ( GD->atoms.margin != 0 &&
1081        GD->atoms.unregistered >= GD->atoms.non_garbage + GD->atoms.margin )
1082   { signalGCThread(SIG_ATOM_GC);
1083   }
1084 }
1085 
1086 
1087 #endif /*O_ATOMGC*/
1088 
1089 #undef PL_register_atom
1090 #undef PL_unregister_atom
1091 
1092 void
resetAtoms()1093 resetAtoms()
1094 {
1095 }
1096 
1097 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1098 (un)register atoms. If  possible,  this   is  implemented  using  atomic
1099 operations. This should be safe because:
1100 
1101     - When we register an atom, it must be referenced from somewhere
1102       else.
1103     - When we unregister an atom, it must have at least one reference.
1104 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1105 
1106 static unsigned int
register_atom(volatile Atom p)1107 register_atom(volatile Atom p)
1108 { for(;;)
1109   { unsigned int ref  = p->references;
1110     unsigned int nref = ref+1;
1111 
1112     if ( ATOM_REF_COUNT(nref) != 0 )
1113     { if ( COMPARE_AND_SWAP_UINT(&p->references, ref, nref) )
1114       { if ( ATOM_REF_COUNT(nref) == 1 )
1115 	  ATOMIC_DEC(&GD->atoms.unregistered);
1116 	return nref;
1117       }
1118     } else
1119     { return ref;
1120     }
1121   }
1122 }
1123 
1124 
1125 void
PL_register_atom(atom_t a)1126 PL_register_atom(atom_t a)
1127 {
1128 #ifdef O_ATOMGC
1129   size_t index = indexAtom(a);
1130 
1131   if ( index >= GD->atoms.builtin )
1132   { Atom p = fetchAtomArray(index);
1133 
1134     register_atom(p);
1135   }
1136 #endif
1137 }
1138 
1139 
1140 static char *
dbgAtomName(Atom a,char * enc,char ** buf)1141 dbgAtomName(Atom a, char *enc, char **buf)
1142 { if ( a->type == &text_atom )
1143   { if ( enc ) *enc = 'L';
1144     return a->name;
1145   } else if ( isUCSAtom(a) )
1146   { if ( enc ) *enc = 'W';
1147     return a->name;
1148   } else
1149   { size_t len = 0;
1150     IOSTREAM *fd = Sopenmem(buf, &len, "w");
1151     (a->type->write)(fd, a->atom, 0);
1152     Sclose(fd);
1153     return *buf;
1154   }
1155 }
1156 
1157 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1158 Foreign code reduces the reference count. This is safe, unless we are in
1159 the following scenario:
1160 
1161   - A threads has done its atom-marking during a GC and is continued.
1162   - Now, it fetches an atom from foreign code and the foreign code calls
1163     PL_unregister_atom() which drops the reference count to zero. We can
1164     now get into the position where the atom is no longer accessible
1165     from foreign code and has not be seen while marking atoms from the
1166     stack.
1167 
1168 The locking version of this code  is   not  a  problem, as the reference
1169 count cannot be dropped as  long  as   AGC  is  running. In the unlocked
1170 version, we need  to  replace  1   by  ATOM_MARKED_REFERENCE  if  AGC is
1171 running. We can be a bit sloppy here:  if   we  do this while AGC is not
1172 running we merely prevent the atom to be  collected in the next AGC. The
1173 next AGC resets the flag  and  thus   the  atom  becomes a candidate for
1174 collection afterwards.  So, basically we must do something like this:
1175 
1176   if ( agc_running )
1177   { do
1178     { unsigned int oldref = p->references;
1179       unsigned int newref = oldref == 1 ? ATOM_MARKED_REFERENCE : oldref-1;
1180     } while( !compare_and_swap(&p->references, oldref, newref) );
1181   } else
1182   { atomic_dec(&p->references);
1183   }
1184 
1185 But, this fails because AGC might kick in between agc_running was tested
1186 FALSE the atomic decrement. This is  fixed   by  putting the atom we are
1187 unregistering  in  LD->atoms.unregistered  and  mark    this  atom  from
1188 markAtomsOnStacks().
1189 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1190 
1191 static unsigned int
unregister_atom(volatile Atom p)1192 unregister_atom(volatile Atom p)
1193 { unsigned int refs;
1194 
1195   if ( !ATOM_IS_VALID(p->references) )
1196   { Sdprintf("OOPS: PL_unregister_atom('%s'): invalid atom\n", p->name);
1197     trap_gdb();
1198   }
1199 
1200   if ( unlikely(ATOM_REF_COUNT(p->references+1) == 0) )
1201     return ATOM_REF_COUNT(~(unsigned int)0);
1202 
1203   if ( GD->atoms.gc_active )
1204   { unsigned int oldref, newref;
1205 
1206     do
1207     { oldref = p->references;
1208       newref = oldref - 1;
1209 
1210       if ( ATOM_REF_COUNT(newref) == 0 )
1211 	newref |= ATOM_MARKED_REFERENCE;
1212     } while( !COMPARE_AND_SWAP_UINT(&p->references, oldref, newref) );
1213     refs = ATOM_REF_COUNT(newref);
1214 #ifdef O_DEBUG_ATOMGC
1215     if ( refs == 0 && atomLogFd && tracking(p) )
1216       Sfprintf(atomLogFd, "Marked '%s' at (#%d) (unregistered)\n",
1217 	       p->name, indexAtom(p->atom));
1218 #endif
1219   } else
1220   { GET_LD
1221 
1222     if ( HAS_LD )
1223     { LD->atoms.unregistering = p->atom;
1224 #ifdef O_DEBUG_ATOMGC
1225     if ( atomLogFd && tracking(p) )
1226       Sfprintf(atomLogFd, "Set atoms.unregistering for '%s' at (#%d)\n",
1227 	       p->name, indexAtom(p->atom));
1228 #endif
1229     }
1230     if ( (refs=ATOM_REF_COUNT(ATOMIC_DEC(&p->references))) == 0 )
1231       ATOMIC_INC(&GD->atoms.unregistered);
1232   }
1233 
1234   if ( refs == ATOM_REF_COUNT((unsigned int)-1) )
1235   { char fmt[100];
1236     char *enc;
1237     char *buf = NULL;
1238 
1239     strcpy(fmt, "OOPS: PL_unregister_atom('%Ls'): -1 references\n");
1240     enc = strchr(fmt, '%')+1;
1241 
1242     Sdprintf(fmt, dbgAtomName(p, enc, &buf));
1243     if ( buf )
1244       PL_free(buf);
1245     trap_gdb();
1246   }
1247 
1248   return refs;
1249 }
1250 
1251 
1252 void
PL_unregister_atom(atom_t a)1253 PL_unregister_atom(atom_t a)
1254 {
1255 #ifdef O_ATOMGC
1256   size_t index = indexAtom(a);
1257 
1258   if ( index >= GD->atoms.builtin )
1259   { Atom p;
1260 
1261     p = fetchAtomArray(index);
1262     unregister_atom(p);
1263   }
1264 #endif
1265 }
1266 
1267 
1268 /* TRUE if `a` is a possible candidate for AGC
1269 */
1270 int
is_volatile_atom(atom_t a)1271 is_volatile_atom(atom_t a)
1272 {
1273 #ifdef O_ATOMGC
1274   size_t index = indexAtom(a);
1275 
1276   if ( index >= GD->atoms.builtin )
1277   { Atom p = fetchAtomArray(index);
1278     return !p->references;
1279   }
1280 #endif
1281   return FALSE;
1282 }
1283 
1284 
1285 		 /*******************************
1286 		 *	      CHECK		*
1287 		 *******************************/
1288 
1289 #ifdef O_DEBUG
1290 
1291 static int
findAtomSelf(Atom a)1292 findAtomSelf(Atom a)
1293 { GET_LD
1294   Atom *table;
1295   int buckets;
1296   Atom head, ap;
1297   unsigned int v;
1298 
1299 redo:
1300   acquire_atom_table(table, buckets);
1301   v = a->hash_value & (buckets-1);
1302   head = table[v];
1303   acquire_atom_bucket(table+v);
1304 
1305   for(ap=head; ap; ap = ap->next )
1306   { if ( ap == a )
1307     { release_atom_table();
1308       release_atom_bucket();
1309       return TRUE;
1310     }
1311   }
1312 
1313   if ( !( table == GD->atoms.table->table && head == table[v] ) )
1314     goto redo;
1315 
1316   return FALSE;
1317 }
1318 
1319 
1320 int
checkAtoms_src(const char * file,int line)1321 checkAtoms_src(const char *file, int line)
1322 { size_t index;
1323   int i, last=FALSE;
1324   int errors = 0;
1325 
1326   for(index=1, i=0; !last; i++)
1327   { size_t upto = (size_t)2<<i;
1328     size_t high = GD->atoms.highest;
1329     Atom b = GD->atoms.array.blocks[i];
1330 
1331     if ( upto >= high )
1332     { upto = high;
1333       last = TRUE;
1334     }
1335 
1336     for(; index<upto; index++)
1337     { Atom a = b + index;
1338 
1339       if ( ATOM_IS_VALID(a->references) )
1340       { if ( !a->type || !a->name || (int)ATOM_REF_COUNT(a->references) < 0 )
1341 	{ size_t bs = (size_t)1<<i;
1342 	  Sdprintf("%s%d: invalid atom %p at index %zd in "
1343 		   "block at %p (size %d)\n",
1344 		   file, line, a, index, b+bs, bs);
1345 	  errors++;
1346 	  trap_gdb();
1347 	}
1348 
1349 	if ( true(a->type, PL_BLOB_UNIQUE) )
1350 	{ if ( !findAtomSelf(a) )
1351 	  { Sdprintf("%s%d: cannot find self: %p\n", file, line, a);
1352 	  }
1353 	}
1354       }
1355     }
1356   }
1357 
1358   return errors;
1359 }
1360 
1361 #endif /*O_DEBUG*/
1362 
1363 
1364 		 /*******************************
1365 		 *	    REHASH TABLE	*
1366 		 *******************************/
1367 
1368 static int
rehashAtoms(void)1369 rehashAtoms(void)
1370 { AtomTable newtab;
1371   uintptr_t mask;
1372   size_t index;
1373   int i, last=FALSE;
1374 
1375   if ( GD->cleaning != CLN_NORMAL )
1376     return TRUE;			/* no point anymore and foreign ->type */
1377 					/* pointers may have gone */
1378 
1379   if ( GD->atoms.table->buckets * 2 >= GD->statistics.atoms )
1380     return TRUE;
1381 
1382   if ( !(newtab = allocHeap(sizeof(*newtab))) )
1383     return FALSE;
1384   newtab->buckets = GD->atoms.table->buckets * 2;
1385   if ( !(newtab->table = allocHeapOrHalt(newtab->buckets * sizeof(Atom))) )
1386   { freeHeap(newtab, sizeof(*newtab));
1387     return FALSE;
1388   }
1389   memset(newtab->table, 0, newtab->buckets * sizeof(Atom));
1390   newtab->prev = GD->atoms.table;
1391   mask = newtab->buckets-1;
1392 
1393   DEBUG(MSG_HASH_STAT,
1394 	Sdprintf("rehashing atoms (%d --> %d)\n",
1395 		 GD->atoms.table->buckets, newtab->buckets));
1396 
1397   GD->atoms.rehashing = TRUE;
1398 
1399   for(index=1, i=0; !last; i++)
1400   { size_t upto = (size_t)2<<i;
1401     size_t high = GD->atoms.highest;
1402     Atom b = GD->atoms.array.blocks[i];
1403 
1404     if ( upto >= high )
1405     { upto = high;
1406       last = TRUE;
1407     }
1408 
1409     for(; index<upto; index++)
1410     { volatile Atom a = b + index;
1411       unsigned int ref;
1412 redo:
1413       ref = a->references;
1414       if ( ATOM_IS_RESERVED(ref) )
1415       { if ( !ATOM_IS_VALID(ref) )
1416 	  goto redo;
1417 	if ( true(a->type, PL_BLOB_UNIQUE) )
1418 	{ size_t v;
1419 	  v = a->hash_value & mask;
1420 	  a->next = newtab->table[v];
1421 	  newtab->table[v] = a;
1422 	}
1423       }
1424     }
1425   }
1426 
1427   GD->atoms.table = newtab;
1428   GD->atoms.rehashing = FALSE;
1429 
1430   return TRUE;
1431 }
1432 
1433 
1434 word
pl_atom_hashstat(term_t idx,term_t n)1435 pl_atom_hashstat(term_t idx, term_t n)
1436 { GET_LD
1437   long i, m;
1438   int buckets;
1439   Atom *table;
1440   Atom a;
1441 
1442   acquire_atom_table(table, buckets);
1443 
1444   if ( !PL_get_long(idx, &i) || i < 0 || i >= (long)buckets )
1445   { release_atom_table();
1446     fail;
1447   }
1448   for(m = 0, a = table[i]; a; a = a->next)
1449     m++;
1450 
1451   release_atom_table();
1452 
1453   return PL_unify_integer(n, m);
1454 }
1455 
1456 
1457 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1458 resetListAtoms() resets the atom '[|]' to point  to '.' and switches the
1459 type for '[]' back to  the  normal   text_atom  type.  This is needed to
1460 switch to traditional mode if the atom table has been initialised.
1461 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1462 
1463 int
resetListAtoms(void)1464 resetListAtoms(void)
1465 { Atom a = atomValue(ATOM_dot);
1466 
1467   if ( strcmp(a->name, ".") != 0 )
1468   { Atom *ap2 = &GD->atoms.table->table[a->hash_value & (GD->atoms.table->buckets-1)];
1469     unsigned int v;
1470     static char *s = ".";
1471 
1472     DEBUG(0, Sdprintf("Resetting list constructor to ./2\n"));
1473 
1474     for( ; ; ap2 = &(*ap2)->next )
1475     { assert(*ap2);		/* MT: TBD: failed a few times!? */
1476 
1477       if ( *ap2 == a )
1478       { *ap2 = a->next;
1479 	goto modify;
1480       }
1481     }
1482     assert(0);
1483 
1484   modify:
1485     a->name   = s;
1486     a->length = strlen(s);
1487     a->hash_value = MurmurHashAligned2(s, a->length, MURMUR_SEED);
1488     v = a->hash_value & (GD->atoms.table->buckets-1);
1489 
1490     a->next      = GD->atoms.table->table[v];
1491     GD->atoms.table->table[v] = a;
1492   }
1493 
1494   a = atomValue(ATOM_nil);
1495   a->type = &text_atom;
1496 
1497   return TRUE;
1498 }
1499 
1500 
1501 static void
registerBuiltinAtoms(void)1502 registerBuiltinAtoms(void)
1503 { int size = sizeof(atoms)/sizeof(char *) - 1;
1504   Atom a;
1505   const ccharp *sp;
1506   size_t index;
1507   int idx;
1508 
1509   GD->statistics.atoms = size;
1510 
1511   for( sp = atoms, index = GD->atoms.highest; *sp; sp++, index++ )
1512   { const char *s = *sp;
1513     size_t len = strlen(s);
1514     unsigned int v0, v;
1515 
1516     idx = MSB(index);
1517 
1518     if ( !GD->atoms.array.blocks[idx] )
1519     { allocateAtomBlock(idx);
1520     }
1521 
1522     if ( *s == '.' && len == 1 && !GD->options.traditional )
1523     { s = "[|]";
1524       len = strlen(s);
1525     }
1526 
1527     v0 = MurmurHashAligned2(s, len, MURMUR_SEED);
1528     v  = v0 & (GD->atoms.table->buckets-1);
1529 
1530     a = &GD->atoms.array.blocks[idx][index];
1531     a->atom       = (index<<LMASK_BITS)|TAG_ATOM;
1532     a->name       = (char *)s;
1533     a->length     = len;
1534     a->type       = &text_atom;
1535 #ifdef O_ATOMGC
1536     a->references = ATOM_VALID_REFERENCE | ATOM_RESERVED_REFERENCE;
1537 #endif
1538 #ifdef O_TERMHASH
1539     a->hash_value = v0;
1540 #endif
1541     a->next       = GD->atoms.table->table[v];
1542     GD->atoms.table->table[v]  = a;
1543 
1544     GD->atoms.no_hole_before = index+1;
1545     GD->atoms.highest = index+1;
1546   }
1547 }
1548 
1549 
1550 #if O_DEBUG
1551 static int
exitAtoms(int status,void * context)1552 exitAtoms(int status, void *context)
1553 { (void)status;
1554   (void)context;
1555 
1556   Sdprintf("hashstat: %d lookupAtom() calls used %d strcmp() calls\n",
1557 	   GD->atoms.lookups, GD->atoms.cmps);
1558 
1559   return 0;
1560 }
1561 #endif
1562 
1563 
1564 void
do_init_atoms(void)1565 do_init_atoms(void)
1566 { PL_LOCK(L_INIT_ATOMS);
1567   if ( !GD->atoms.initialised )			/* Atom hash table */
1568   { GD->atoms.table = allocHeapOrHalt(sizeof(*GD->atoms.table));
1569     GD->atoms.table->buckets = ATOMHASHSIZE;
1570     GD->atoms.table->table = allocHeapOrHalt(ATOMHASHSIZE * sizeof(Atom));
1571     memset(GD->atoms.table->table, 0, ATOMHASHSIZE * sizeof(Atom));
1572     GD->atoms.table->prev = NULL;
1573 
1574     GD->atoms.highest = 1;
1575     GD->atoms.no_hole_before = 1;
1576     registerBuiltinAtoms();
1577 #ifdef O_ATOMGC
1578     GD->atoms.margin = 10000;
1579     lockAtoms();
1580 #endif
1581     text_atom.atom_name = ATOM_text;
1582     PL_register_blob_type(&text_atom);
1583 
1584     DEBUG(MSG_HASH_STAT, PL_on_halt(exitAtoms, NULL));
1585 #ifdef O_RESERVED_SYMBOLS
1586     initReservedSymbols();
1587 #endif
1588     GD->atoms.initialised = TRUE;
1589   }
1590   PL_UNLOCK(L_INIT_ATOMS);
1591 }
1592 
1593 
1594 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1595 cleanupAtoms() is called at shutdown. There are three possible scenarios
1596 these days: (1) do not cleanup at  all, (2) cleanup the main structures,
1597 leaving the rest to GC or (3) cleanup the whole thing.
1598 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
1599 
1600 void
cleanupAtoms(void)1601 cleanupAtoms(void)
1602 { AtomTable table;
1603   size_t index;
1604   int i, last=FALSE;
1605 
1606   for(index=GD->atoms.builtin, i=MSB(index); !last; i++)
1607   { size_t upto = (size_t)2<<i;
1608     size_t high = GD->atoms.highest;
1609     Atom b = GD->atoms.array.blocks[i];
1610 
1611     if ( upto >= high )
1612     { upto = high;
1613       last = TRUE;
1614     }
1615 
1616     for(; index<upto; index++)
1617     { Atom a = b + index;
1618       unsigned int ref = a->references;
1619 
1620       if ( !ATOM_IS_VALID(ref) )
1621         continue;
1622 
1623       if ( a->type->release )
1624         (*a->type->release)(a->atom);
1625       else if ( GD->atoms.gc_hook )
1626         (*GD->atoms.gc_hook)(a->atom);
1627 
1628       if ( false(a->type, PL_BLOB_NOCOPY) )
1629         PL_free(a->name);
1630     }
1631   }
1632 
1633   i = 0;
1634   while( GD->atoms.array.blocks[i] )
1635   { size_t bs = (size_t)1<<i;
1636     PL_free(GD->atoms.array.blocks[i++] + bs);
1637   }
1638 
1639   for(i=0; i<256; i++)			/* char-code -> char-atom map */
1640   { atom_t *p;
1641 
1642     if ( (p=GD->atoms.for_code[i]) )
1643     { GD->atoms.for_code[i] = NULL;
1644       PL_free(p);
1645     }
1646   }
1647 
1648   table = GD->atoms.table;
1649   while ( table )
1650   { AtomTable prev = table->prev;
1651     freeHeap(table->table, table->buckets * sizeof(Atom));
1652     freeHeap(table, sizeof(atom_table));
1653     table = prev;
1654   }
1655   if ( GD->atoms.table )
1656   { GD->atoms.table = NULL;
1657   }
1658 }
1659 
1660 
1661 static word
current_blob(term_t a,term_t type,frg_code call,intptr_t state ARG_LD)1662 current_blob(term_t a, term_t type, frg_code call, intptr_t state ARG_LD)
1663 { atom_t type_name = 0;
1664   size_t index;
1665   int i, last=0;
1666 
1667   switch( call )
1668   { case FRG_FIRST_CALL:
1669     { PL_blob_t *bt;
1670 
1671       if ( PL_is_blob(a, &bt) )
1672       { if ( type )
1673 	  return PL_unify_atom(type, bt->atom_name);
1674 	else if ( false(bt, PL_BLOB_TEXT) )
1675 	  return FALSE;
1676 
1677 	succeed;
1678       }
1679       if ( !PL_is_variable(a) )
1680 	return FALSE;
1681 
1682       index = 1;
1683       break;
1684     }
1685     case FRG_REDO:
1686       index = state;
1687       break;
1688     case FRG_CUTTED:
1689     default:
1690       return TRUE;
1691   }
1692 
1693   if ( type )
1694   { if ( !PL_is_variable(type) &&
1695 	 !PL_get_atom_ex(type, &type_name) )
1696       return FALSE;
1697   }
1698 
1699   for(i=MSB(index); !last; i++)
1700   { size_t upto = (size_t)2<<i;
1701     size_t high = GD->atoms.highest;
1702     Atom b = GD->atoms.array.blocks[i];
1703 
1704     if ( upto >= high )
1705     { upto = high;
1706       last = TRUE;
1707     }
1708 
1709     for(; index<upto; index++)
1710     { Atom atom = b + index;
1711       unsigned int refs = atom->references;
1712       PL_blob_t *btype = atom->type;
1713       int rc;
1714 
1715       if ( ATOM_IS_VALID(refs) && btype &&
1716 	   (!type_name || type_name == btype->atom_name) &&
1717 	   atom->atom != ATOM_garbage_collected &&
1718 	   bump_atom_references(atom, refs) )
1719       { DEBUG(CHK_ATOM_GARBAGE_COLLECTED,
1720 	      /* avoid trap through linkVal__LD() check */
1721 	      if ( atom->atom == ATOM_garbage_collected )
1722 	      { PL_unregister_atom(atom->atom);
1723 	        continue;
1724 	      });
1725 
1726 	if ( type )
1727 	{ if ( !type_name )
1728 	  { if ( !PL_unify_atom(type, btype->atom_name) )
1729 	    { PL_unregister_atom(atom->atom);
1730 	      return FALSE;
1731 	    }
1732 	  }
1733 	} else if ( false(btype, PL_BLOB_TEXT) )
1734 	{ PL_unregister_atom(atom->atom);
1735 	  continue;
1736 	}
1737 
1738 	rc = PL_unify_atom(a, atom->atom);
1739 	PL_unregister_atom(atom->atom);
1740 	if ( rc )
1741 	  ForeignRedoInt(index+1);
1742 	else
1743 	  return rc;
1744       }
1745     }
1746   }
1747 
1748   return FALSE;
1749 }
1750 
1751 
1752 static
1753 PRED_IMPL("current_blob", 2, current_blob, PL_FA_NONDETERMINISTIC)
1754 { PRED_LD
1755 
1756   return current_blob(A1, A2, CTX_CNTRL, CTX_INT PASS_LD);
1757 }
1758 
1759 
1760 static
1761 PRED_IMPL("current_atom", 1, current_atom, PL_FA_NONDETERMINISTIC)
1762 { PRED_LD
1763 
1764   return current_blob(A1, 0, CTX_CNTRL, CTX_INT PASS_LD);
1765 }
1766 
1767 
1768 /** blob(@Term, ?Type) is semidet
1769 
1770 Type-test for a blob.
1771 */
1772 
1773 static
1774 PRED_IMPL("blob", 2, blob, 0)
1775 { PRED_LD
1776   PL_blob_t *bt;
1777 
1778   if ( PL_is_blob(A1, &bt) )
1779     return PL_unify_atom(A2, bt->atom_name);
1780 
1781   return FALSE;
1782 }
1783 
1784 
1785 static
1786 PRED_IMPL("$atom_references", 2, atom_references, 0)
1787 { PRED_LD
1788   atom_t atom;
1789 
1790   if ( PL_get_atom_ex(A1, &atom) )
1791   { Atom av = atomValue(atom);
1792 
1793     return PL_unify_integer(A2, ATOM_REF_COUNT(av->references));
1794   }
1795 
1796   fail;
1797 }
1798 
1799 		 /*******************************
1800 		 *	 ATOM COMPLETION	*
1801 		 *******************************/
1802 
1803 #define ALT_SIZ 80		/* maximum length of one alternative */
1804 #define ALT_MAX 256		/* maximum number of alternatives */
1805 #define stringMatch(m)	((m)->name->name)
1806 
1807 typedef struct match
1808 { Atom		name;
1809   size_t	length;
1810 } *Match;
1811 
1812 
1813 /* An atom without references cannot be part of the program
1814 */
1815 
1816 static int
global_atom(Atom a)1817 global_atom(Atom a)
1818 { return ( ATOM_REF_COUNT(a->references) != 0 ||
1819 	   indexAtom(a->atom) < GD->atoms.builtin );
1820 }
1821 
1822 
1823 static int
is_identifier_text(PL_chars_t * txt)1824 is_identifier_text(PL_chars_t *txt)
1825 { if ( txt->length == 0 )
1826     return FALSE;
1827 
1828   switch(txt->encoding)
1829   { case ENC_ISO_LATIN_1:
1830     { const unsigned char *s = (const unsigned char *)txt->text.t;
1831       const unsigned char *e = &s[txt->length];
1832 
1833       if ( !f_is_prolog_atom_start(*s) )
1834 	return FALSE;
1835 
1836       for(s++; s<e; s++)
1837       { if ( !f_is_prolog_identifier_continue(*s) )
1838 	  return FALSE;
1839       }
1840       return TRUE;
1841     }
1842     case ENC_WCHAR:
1843     { const pl_wchar_t *s = (const pl_wchar_t*)txt->text.w;
1844       const pl_wchar_t *e = &s[txt->length];
1845 
1846       if ( !f_is_prolog_atom_start(*s) )
1847 	return FALSE;
1848 
1849       for(s++; s<e; s++)
1850       { if ( !f_is_prolog_identifier_continue(*s) )
1851 	  return FALSE;
1852       }
1853       return TRUE;
1854     }
1855     default:
1856       assert(0);
1857       return FALSE;
1858   }
1859 }
1860 
1861 
1862 static int
extendAtom(char * prefix,bool * unique,char * common)1863 extendAtom(char *prefix, bool *unique, char *common)
1864 { size_t index;
1865   int i, last=FALSE;
1866   bool first = TRUE;
1867   size_t lp = strlen(prefix);
1868 
1869   *unique = TRUE;
1870 
1871   for(index=1, i=0; !last; i++)
1872   { size_t upto = (size_t)2<<i;
1873     size_t high = GD->atoms.highest;
1874     Atom b = GD->atoms.array.blocks[i];
1875 
1876     if ( upto >= high )
1877     { upto = high;
1878       last = TRUE;
1879     }
1880 
1881     for(; index<upto; index++)
1882     { Atom a = b + index;
1883 
1884       if ( ATOM_IS_VALID(a->references) && a->type == &text_atom &&
1885 	   global_atom(a) &&
1886 	   strprefix(a->name, prefix) &&
1887 	   strlen(a->name) < LINESIZ )
1888       { if ( first == TRUE )
1889 	{ strcpy(common, a->name+lp);
1890 	  first = FALSE;
1891 	} else
1892 	{ char *s = common;
1893 	  char *q = a->name+lp;
1894 	  while( *s && *s == *q )
1895 	    s++, q++;
1896 	  *s = EOS;
1897 	  *unique = FALSE;
1898 	}
1899       }
1900     }
1901   }
1902 
1903   return !first;
1904 }
1905 
1906 
1907 /** '$complete_atom'(+Prefix, -Common, -Unique) is semidet.
1908 
1909 True when Prefix can be extended based on currently defined atoms.
1910 
1911 @arg Common is a code list consisting of the characters from Prefix
1912      and the common text for all possible completions
1913 @arg Unique is either =unique= or =not_unique=.  In the second case,
1914      this implies that there are longer atoms that have the prefix
1915      Common.
1916 @see '$atom_completions'/2.
1917 @bug This version only handles ISO Latin 1 text
1918 */
1919 
1920 static
1921 PRED_IMPL("$complete_atom", 3, complete_atom, 0)
1922 { PRED_LD
1923   term_t prefix = A1;
1924   term_t common = A2;
1925   term_t unique = A3;
1926 
1927   char *p;
1928   size_t len;
1929   bool u;
1930   char buf[LINESIZ];
1931   char cmm[LINESIZ];
1932 
1933   if ( !PL_get_nchars(prefix, &len, &p, CVT_ALL|CVT_EXCEPTION) ||
1934        len >= sizeof(buf) )
1935     return FALSE;
1936   strcpy(buf, p);
1937 
1938   if ( extendAtom(p, &u, cmm) )
1939   { strcat(buf, cmm);
1940     if ( PL_unify_list_codes(common, buf) &&
1941 	 PL_unify_atom(unique, u ? ATOM_unique
1942 				 : ATOM_not_unique) )
1943       return TRUE;
1944   }
1945 
1946   return FALSE;
1947 }
1948 
1949 
1950 static int
compareMatch(const void * m1,const void * m2)1951 compareMatch(const void *m1, const void *m2)
1952 { return strcmp(stringMatch((Match)m1), stringMatch((Match)m2));
1953 }
1954 
1955 
1956 static int
extend_alternatives(PL_chars_t * prefix,struct match * altv,int * altn)1957 extend_alternatives(PL_chars_t *prefix, struct match *altv, int *altn)
1958 { size_t index;
1959   int i, last=FALSE;
1960 
1961   *altn = 0;
1962   for(index=1, i=0; !last; i++)
1963   { size_t upto = (size_t)2<<i;
1964     size_t high = GD->atoms.highest;
1965     Atom b = GD->atoms.array.blocks[i];
1966 
1967     if ( upto >= high )
1968     { upto = high;
1969       last = TRUE;
1970     }
1971 
1972     for(; index<upto; index++)
1973     { Atom a = b + index;
1974       PL_chars_t hit;
1975 
1976       if ( index % 256 == 0 && PL_handle_signals() < 0 )
1977 	return FALSE;			/* interrupted */
1978 
1979       if ( ATOM_IS_VALID(a->references) &&
1980 	   global_atom(a) &&
1981 	   get_atom_ptr_text(a, &hit) &&
1982 	   hit.length < ALT_SIZ &&
1983 	   PL_cmp_text(prefix, 0, &hit, 0, prefix->length) == 0 &&
1984 	   is_identifier_text(&hit) )
1985       { Match m = &altv[(*altn)++];
1986 
1987 	m->name = a;
1988 	m->length = a->length;
1989 	if ( *altn >= ALT_MAX )
1990 	  goto out;
1991       }
1992     }
1993   }
1994 
1995 out:
1996   qsort(altv, *altn, sizeof(struct match), compareMatch);
1997 
1998   return TRUE;
1999 }
2000 
2001 
2002 /** '$atom_completions'(+Prefix, -Alternatives:list(atom)) is det.
2003 
2004 True when Alternatives is a list of   all  atoms that have prefix Prefix
2005 and are considered completion  candidates.   Completions  candidates are
2006 atoms that
2007 
2008   - Are built-in or referenced from some static datastructure
2009   - All characters are legal characters for unquoted atoms
2010   - The atom is at most 80 characters long
2011 */
2012 
2013 static
2014 PRED_IMPL("$atom_completions", 2, atom_completions, 0)
2015 { PRED_LD
2016   term_t prefix = A1;
2017   term_t alternatives = A2;
2018 
2019   PL_chars_t p_text;
2020   struct match altv[ALT_MAX];
2021   int altn;
2022   int i;
2023   term_t alts = PL_copy_term_ref(alternatives);
2024   term_t head = PL_new_term_ref();
2025 
2026   if ( !PL_get_text(prefix, &p_text, CVT_ALL|CVT_EXCEPTION) )
2027     return FALSE;
2028 
2029   if ( !extend_alternatives(&p_text, altv, &altn) )
2030     return FALSE;			/* interrupt */
2031 
2032   for(i=0; i<altn; i++)
2033   { if ( !PL_unify_list(alts, head, alts) ||
2034 	 !PL_unify_atom(head, altv[i].name->atom) )
2035       return FALSE;
2036   }
2037 
2038   return PL_unify_nil(alts);
2039 }
2040 
2041 
2042 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2043 Completeness generation for the GNU readline library. This function uses
2044 a state variable to indicate  the   generator  should maintain/reset its
2045 state. Horrible!
2046 
2047 We must use thread-local data here.  Worse   is  we can't use the normal
2048 Prolog one as there might  not  be   a  Prolog  engine associated to the
2049 thread.
2050 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
2051 
2052 #ifdef O_PLMT
2053 #include <pthread.h>
2054 static pthread_once_t key_created = PTHREAD_ONCE_INIT;
2055 static pthread_key_t key;
2056 
2057 static void
atom_generator_create_key(void)2058 atom_generator_create_key(void)
2059 { pthread_key_create(&key, NULL);
2060 }
2061 #endif
2062 
2063 static int
atom_generator(PL_chars_t * prefix,PL_chars_t * hit,int state)2064 atom_generator(PL_chars_t *prefix, PL_chars_t *hit, int state)
2065 { GET_LD
2066   size_t index;
2067   int i, last=FALSE;
2068 
2069 #ifdef O_PLMT
2070   if ( !LD )
2071     pthread_once(&key_created, atom_generator_create_key);
2072 #endif
2073 
2074   if ( !state )
2075   { index = 1;
2076   } else
2077   { if ( HAS_LD )
2078       index = LD->atoms.generator;
2079 #ifdef O_PLMT
2080     else
2081       index = (size_t)pthread_getspecific(key);
2082 #endif
2083   }
2084 
2085   for(i=MSB(index); !last; i++)
2086   { size_t upto = (size_t)2<<i;
2087     size_t high = GD->atoms.highest;
2088     Atom b = GD->atoms.array.blocks[i];
2089 
2090     if ( upto >= high )
2091     { upto = high;
2092       last = TRUE;
2093     }
2094 
2095     for(; index<upto; index++)
2096     { Atom a = b + index;
2097 
2098       if ( is_signalled(PASS_LD1) )	/* Notably allow windows version */
2099       { if ( PL_handle_signals() < 0 )	/* to break out on ^C */
2100 	  return FALSE;
2101       }
2102 
2103       if ( ATOM_IS_VALID(a->references) && global_atom(a) &&
2104 	   get_atom_ptr_text(a, hit) &&
2105 	   hit->length < ALT_SIZ &&
2106 	   PL_cmp_text(prefix, 0, hit, 0, prefix->length) == 0 &&
2107 	   is_identifier_text(hit) )
2108       { if ( HAS_LD )
2109 	  LD->atoms.generator = index+1;
2110 #ifdef O_PLMT
2111 	else
2112 	  pthread_setspecific(key, (void *)(index+1));
2113 #endif
2114 
2115         return TRUE;
2116       }
2117     }
2118   }
2119 
2120   return FALSE;
2121 }
2122 
2123 
2124 char *
PL_atom_generator(const char * prefix,int state)2125 PL_atom_generator(const char *prefix, int state)
2126 { PL_chars_t txt, hit;
2127 
2128   PL_init_text(&txt);
2129   txt.text.t   = (char *)prefix;
2130   txt.encoding = ENC_ISO_LATIN_1;
2131   txt.length   = strlen(prefix);
2132 
2133   while ( atom_generator(&txt, &hit, state) )
2134   { if ( hit.encoding == ENC_ISO_LATIN_1 )
2135       return hit.text.t;		/* text is from atoms, thus static */
2136     state = TRUE;
2137   }
2138 
2139   return NULL;
2140 }
2141 
2142 
2143 pl_wchar_t *
PL_atom_generator_w(const pl_wchar_t * prefix,pl_wchar_t * buffer,size_t buflen,int state)2144 PL_atom_generator_w(const pl_wchar_t *prefix,
2145 		    pl_wchar_t *buffer,
2146 		    size_t buflen,
2147 		    int state)
2148 { PL_chars_t txt, hit;
2149 
2150   PL_init_text(&txt);
2151   txt.text.w   = (pl_wchar_t *)prefix;
2152   txt.encoding = ENC_WCHAR;
2153   txt.length   = wcslen(prefix);
2154 
2155   for( ; atom_generator(&txt, &hit, state); state = TRUE )
2156   { if ( buflen > hit.length+1 )
2157     { if ( hit.encoding == ENC_WCHAR )
2158       { wcscpy(buffer, hit.text.w);
2159       } else
2160       { const unsigned char *s = (const unsigned char *)hit.text.t;
2161 	const unsigned char *e = &s[hit.length];
2162 	pl_wchar_t *o;
2163 
2164 	for(o=buffer; s<e;)
2165 	  *o++ = *s++;
2166 	*o = EOS;
2167       }
2168 
2169       return buffer;
2170     }
2171   }
2172 
2173   return NULL;
2174 }
2175 
2176 
2177 size_t
atom_space(void)2178 atom_space(void)
2179 { size_t array = ((size_t)2<<MSB(GD->atoms.highest))*sizeof(struct atom);
2180   size_t index;
2181   int i, last=FALSE;
2182   size_t table = GD->atoms.table->buckets * sizeof(Atom);
2183   size_t data = 0;
2184 
2185   for(index=1, i=0; !last; i++)
2186   { size_t upto = (size_t)2<<i;
2187     size_t high = GD->atoms.highest;
2188     Atom b = GD->atoms.array.blocks[i];
2189 
2190     if ( upto >= high )
2191     { upto = high;
2192       last = TRUE;
2193     }
2194 
2195     for(; index<upto; index++)
2196     { Atom a = b + index;
2197 
2198       if ( ATOM_IS_VALID(a->references) )
2199       { data += a->length;		/* TBD: malloc rounding? */
2200       }
2201     }
2202   }
2203 
2204   return array+table+data;
2205 }
2206 
2207 
2208 		 /*******************************
2209 		 *      PUBLISH PREDICATES	*
2210 		 *******************************/
2211 
2212 BeginPredDefs(atom)
2213   PRED_DEF("current_blob",  2, current_blob, PL_FA_NONDETERMINISTIC)
2214   PRED_DEF("current_atom", 1, current_atom, PL_FA_NONDETERMINISTIC)
2215   PRED_DEF("blob", 2, blob, 0)
2216   PRED_DEF("$atom_references", 2, atom_references, 0)
2217   PRED_DEF("$atom_completions", 2, atom_completions, 0)
2218   PRED_DEF("$complete_atom", 3, complete_atom, 0)
2219 EndPredDefs
2220