/* set.c -- Set routines for Khepera
 * Created: Wed Nov  9 13:31:24 1994 by faith@dict.org
 * Copyright 1994-1997, 2002 Rickard E. Faith (faith@dict.org)
 * Copyright 2002-2008 Aleksey Cheusov (vle@gmx.net)
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 * 
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * \section{Set Routines}
 *
 * \intro The set implementation is similar to the hash table
 * implementation, except that the set object does not associate a |datum|
 * with a |key|.  For sets, keys are called elements.  All of the hash
 * table functions are supported, with the addition of a membership test
 * and several set operations.
 *
 * The underlying data structure is a hash table of prime length, with
 * self-organizing linked lists \cite[pp.~398--9]{faith:Knuth73c} used for
 * collision resolution.  The hash table automatically grows as necessary
 * to preserve efficient access.
 *
 */

#include "maaP.h"

typedef struct bucket {
	const void    *elem;
	unsigned long  hash;
	struct bucket *next;
} *bucketType;

typedef struct set {
#if MAA_MAGIC
	int           magic;
#endif
	unsigned long prime;
	unsigned long entries;
	bucketType    *buckets;
	unsigned long resizings;
	unsigned long retrievals;
	unsigned long hits;
	unsigned long misses;
	unsigned long (*hash)(const void *);
	int           (*compare)(const void *, const void *);
	int           readonly;
} *setType;

static void _set_check(setType t, const char *function)
{
	if (!t) err_internal(function, "set is null");
#if MAA_MAGIC
	if (t->magic != SET_MAGIC)
		err_internal(function,
					 "Bad magic: 0x%08x (should be 0x%08x)",
					 t->magic,
					 SET_MAGIC);
#endif
}

static set_Set _set_create(unsigned long seed,
						   set_HashFunction hash,
						   set_CompareFunction compare)
{
	setType       t;
	unsigned long i;
	unsigned long prime = prm_next_prime(seed);

	t               = xmalloc(sizeof(struct set));
#if MAA_MAGIC
	t->magic        = SET_MAGIC;
#endif
	t->prime        = prime;
	t->entries      = 0;
	t->buckets      = xmalloc(t->prime * sizeof(struct bucket));
	t->resizings    = 0;
	t->retrievals   = 0;
	t->hits         = 0;
	t->misses       = 0;
	t->hash         = hash ? hash : hsh_string_hash;
	t->compare      = compare ? compare : hsh_string_compare;
	t->readonly     = 0;

	for (i = 0; i < t->prime; i++) t->buckets[i] = NULL;

	return t;
}

/* \doc The |set_create| function initilizes a set object.  Elements are
   pointers to "void".

   The internal representation of the set will grow automatically when an
   insertion is performed and the table is more than half full.

   The |hash| function should take a pointer to a |elem| and return an
   "unsigned int".  If |hash| is "NULL", then the |elem| is assumed to be a
   pointer to a null-terminated string, and the function shown in
   \grindref{fig:hshstringhash} will be used for |hash|.

   The |compare| function should take a pair of pointers to elements and
   return zero if the elements are the same and non-zero if they are
   different.  If |compare| is "NULL", then the elements are assumed to
   point to null-terminated strings, and the |strcmp| function will be used
   for |compare|.

   Additionally, the |hsh_pointer_hash| and |hsh_pointer_compare| functions
   are available and can be used to treat the \emph{value} of the "void"
   pointer as the element.  These functions are often useful for
   maintaining sets of objects. */

set_Set set_create(set_HashFunction hash, set_CompareFunction compare)
{
	return _set_create(0, hash, compare);
}

set_HashFunction set_get_hash(set_Set set)
{
	setType t = (setType)set;

	_set_check(t, __func__);
	return t->hash;
}

set_CompareFunction set_get_compare(set_Set set)
{
	setType t = (setType)set;

	_set_check(t, __func__);
	return t->compare;
}

static void _set_destroy_buckets(set_Set set)
{
	unsigned long i;
	setType       t = (setType)set;

	_set_check(t, __func__);
	for (i = 0; i < t->prime; i++) {
		bucketType b = t->buckets[i];

		while (b) {
			bucketType next = b->next;
	 
			xfree(b);		/* terminal */
			b = next;
		}
	}

	xfree(t->buckets);		/* terminal */
	t->buckets = NULL;
}

static void _set_destroy_table(set_Set set)
{
	setType t = (setType)set;
   
	_set_check(t, __func__);
   
#if MAA_MAGIC
	t->magic = SET_MAGIC_FREED;
#endif
	xfree(t);			/* terminal */
}

/* \doc |set_destroy| frees all of the memory associated with the set
   object.

   The memory used by elements is \emph{not} freed---this memory is the
   responsibility of the user.  However, a call to |set_iterate| can be
   used to free this memory \emph{immediately} before a call to
   |set_destroy|. */

void set_destroy(set_Set set)
{
	setType       t = (setType)set;

	_set_check(t, __func__);
	if (t->readonly)
		err_internal(__func__, "Attempt to destroy readonly set");
	_set_destroy_buckets(set);
	_set_destroy_table(set);
}

static void _set_insert(set_Set set, unsigned long hash, const void *elem)
{
	setType       t = (setType)set;
	unsigned long h = hash % t->prime;
	bucketType    b;

	_set_check(t, __func__);
   
	b        = xmalloc(sizeof(struct bucket));
	b->hash  = hash;
	b->elem  = elem;
	b->next  = NULL;
   
	if (t->buckets[h]) b->next = t->buckets[h];
	t->buckets[h] = b;
	++t->entries;
}

/* \doc |set_insert| inserts a new |elem| into the |set|.  If the insertion
   is successful, zero is returned.  If the |elem| already exists, 1 is
   returned.

   If the internal representation of the set becomes more than half full,
   its size is increased automatically.  At present, this requires that all
   of the element pointers are copied into a new set.  Rehashing is not
   required, however, since the hash values are stored for each element. */

int set_insert(set_Set set, const void *elem)
{
	setType       t         = (setType)set;
	unsigned long hashValue = t->hash(elem);
	unsigned long h;

	_set_check(t, __func__);
	if (t->readonly)
		err_internal(__func__, "Attempt to insert into readonly set");
   
	/* Keep table less than half full */
	if (t->entries * 2 > t->prime) {
		setType       new = _set_create(t->prime * 3, t->hash, t->compare);
		unsigned long i;

		for (i = 0; i < t->prime; i++) {
			if (t->buckets[i]) {
				bucketType pt;

				for (pt = t->buckets[i]; pt; pt = pt->next)
					_set_insert(new, pt->hash, pt->elem);
			}
		}

		/* fixup values */
		_set_destroy_buckets(t);
		t->prime = new->prime;
		t->buckets = new->buckets;
		_set_destroy_table(new);
		++t->resizings;
	}
   
	h = hashValue % t->prime;

	if (t->buckets[h]) {		/* Assert uniqueness */
		bucketType pt;
	  
		for (pt = t->buckets[h]; pt; pt = pt->next)
			if (!t->compare(pt->elem, elem)) return 1;
	}

	_set_insert(t, hashValue, elem);
	return 0;
}

/* \doc |set_delete| removes an |elem| from the |set|.  Zero is returned if
   the |elem| was present.  Otherwise, 1 is returned. */

int set_delete(set_Set set, const void *elem)
{
	setType       t = (setType)set;
	unsigned long h = t->hash(elem) % t->prime;

	_set_check(t, __func__);
	if (t->readonly)
		err_internal(__func__, "Attempt to delete from readonly set");
   
	if (t->buckets[h]) {
		bucketType pt;
		bucketType prev;
	  
		for (prev = NULL, pt = t->buckets[h]; pt; prev = pt, pt = pt->next)
			if (!t->compare(pt->elem, elem)) {
				--t->entries;
	       
				if (!prev) t->buckets[h] = pt->next;
				else       prev->next = pt->next;
	       
				xfree(pt);
				return 0;
			}
	}
   
	return 1;
}

/* \doc |set_member| returns 1 if |elem| is in |set|.  Otherwise, zero is
   returned. */

int set_member(set_Set set, const void *elem)
{
	setType       t = (setType)set;
	unsigned long h = t->hash(elem) % t->prime;

	_set_check(t, __func__);
   
	++t->retrievals;
	if (t->buckets[h]) {
		bucketType pt;
		bucketType prev;
	  
		for (prev = NULL, pt = t->buckets[h]; pt; prev = pt, pt = pt->next)
			if (!t->compare(pt->elem, elem)) {
				if (!prev) {
					++t->hits;
				} else if (!t->readonly) {
					/* Self organize */
					prev->next    = pt->next;
					pt->next      = t->buckets[h];
					t->buckets[h] = pt;
				}
				return 1;
			}
	}

	++t->misses;
	return 0;
}

/* \doc |set_iterate| is used to iterate a function over every |elem| in
   the |set|.  The function, |iterator|, is passed each |elem|.  If
   |iterator| returns a non-zero value, the iterations stop, and
   |set_iterate| returns.  Note that the elements are in some arbitrary
   order, and that this order may change between two successive calls to
   |set_iterate|. */

int set_iterate(set_Set set,
				int (*iterator)(const void *elem))
{
	setType       t = (setType)set;
	unsigned long i;
	int           savedReadonly;

	_set_check(t, __func__);

	savedReadonly = t->readonly;
	t->readonly   = 1;
   
	for (i = 0; i < t->prime; i++) {
		if (t->buckets[i]) {
			bucketType pt;
	 
			for (pt = t->buckets[i]; pt; pt = pt->next)
				if (iterator(pt->elem)) {
					t->readonly = savedReadonly;
					return 1;
				}
		}
	}
   
	t->readonly = savedReadonly;
	return 0;
}

/* \doc |set_iterate_arg| is used to iterate a function over every |elem|
   in the |set|.  The function, |iterator|, is passed each |elem|.  If
   |iterator| returns a non-zero value, the iterations stop, and
   |set_iterate| returns.  Note that the elements are in some arbitrary
   order, and that this order may change between two successive calls to
   |set_iterate|. */

int set_iterate_arg(set_Set set,
					int (*iterator)(const void *elem, void *arg),
					void *arg)
{
	setType       t = (setType)set;
	unsigned long i;
	int           savedReadonly;

	_set_check(t, __func__);

	savedReadonly = t->readonly;
	t->readonly   = 1;
   
	for (i = 0; i < t->prime; i++) {
		if (t->buckets[i]) {
			bucketType pt;
	 
			for (pt = t->buckets[i]; pt; pt = pt->next)
				if (iterator(pt->elem, arg)) {
					t->readonly = savedReadonly;
					return 1;
				}
		}
	}
   
	t->readonly = savedReadonly;
	return 0;
}

/* \doc |set_init_position| returns a position marker for some arbitary
   first element in the set.  This marker can be used with
   |set_next_position| and |set_get_position|. */

set_Position set_init_position(set_Set set)
{
	setType       t = (setType)set;
	unsigned long i;

	_set_check(t, __func__);
	for (i = 0; i < t->prime; i++) if (t->buckets[i]) {
			t->readonly = 1;
			return t->buckets[i];
		}
	return NULL;
}

/* \doc |set_next_position| returns a position marker for the next element
   in the set.  Elements are in arbitrary order based on their positions in
   the hash table. */

set_Position set_next_position(set_Set set, set_Position position)
{
	setType       t = (setType)set;
	bucketType    b = (bucketType)position;
	unsigned long i;
	unsigned long h;

	_set_check(t, __func__);
   
	if (!b) {
		t->readonly = 0;
		return NULL;
	}
   
	if (b->next) return b->next;

	for (h = b->hash % t->prime, i = h + 1; i < t->prime; i++)
		if (t->buckets[i]) return t->buckets[i];

	t->readonly = 0;
	return NULL;
}

/* \doc |set_get_position| returns the element associated with the
   |position| marker, or "NULL" if there is no such element. */

void *set_get_position(set_Position position)
{
	bucketType b = (bucketType)position;

	if (!b) return NULL;
	return __UNCONST(b->elem);	/* Discard const */
}

/* \doc |set_readonly| sets the |readonly| flag for the |set| to |flag|.
   |flag| should be 0 or 1.  The value of the previous flag is returned.
   When a set is marked as readonly, self-organization of the
   bucket-overflow lists will not take place, and any attempt to modify the
   list (e.g., insertion or deletion) will result in an error. */

int set_readonly(set_Set set, int flag)
{
	setType t = (setType)set;
	int     current;

	_set_check(t, __func__);

	current     = t->readonly;
	t->readonly = flag;
	return current;
}

/* \doc |set_add| returns |set1|, which now contains all of the elements
   in |set1| and |set2|.  Only pointers to elements are copied, \emph{not}
   the data pointed (this has memory management implications).  The |hash|
   and |compare| functions must be identical for the two sets.  |set2| is
   not changed. */

set_Set set_add(set_Set set1, set_Set set2)
{
	setType       t1 = (setType)set1;
	setType       t2 = (setType)set2;
	unsigned long i;

	_set_check(t1, __func__);
	_set_check(t2, __func__);
   
	if (t1->hash != t2->hash)
		err_fatal(__func__,
				  "Sets do not have identical hash functions");

	if (t1->compare != t2->compare)
		err_fatal(__func__,
				  "Sets do not have identical comparison functions");

	for (i = 0; i < t2->prime; i++) {
		if (t2->buckets[i]) {
			bucketType pt;
	 
			for (pt = t2->buckets[i]; pt; pt = pt->next)
				set_insert(set1, pt->elem);
		}
	}

	return set1;
}

/* \doc |set_del| returns |set1|, which now contains all of the elements in
   |set1| other than those in |set2|.  Only pointers to elements are
   copied, \emph{not} the data pointed (this has memory management
   implications).  The |hash| and |compare| functions must be identical for
   the two sets.  |set2| is not changed. */

set_Set set_del(set_Set set1, set_Set set2)
{
	setType       t1 = (setType)set1;
	setType       t2 = (setType)set2;
	unsigned long i;

	_set_check(t1, __func__);
	_set_check(t2, __func__);
   
	if (t1->hash != t2->hash)
		err_fatal(__func__,
				  "Sets do not have identical hash functions");

	if (t1->compare != t2->compare)
		err_fatal(__func__,
				  "Sets do not have identical comparison functions");

	for (i = 0; i < t2->prime; i++) {
		if (t2->buckets[i]) {
			bucketType pt;
	 
			for (pt = t2->buckets[i]; pt; pt = pt->next)
				set_delete(set1, pt->elem);
		}
	}

	return set1;
}

/* \doc |set_union| returns a new set which is the union of |set1| and
   |set2|.  Only pointers to elements are copied, \emph{not} the data
   pointed (this has memory management implications).  The |hash| and
   |compare| functions must be identical for the two sets. */

set_Set set_union(set_Set set1, set_Set set2)
{
	setType       t1 = (setType)set1;
	setType       t2 = (setType)set2;
	set_Set       set;
	unsigned long i;

	_set_check(t1, __func__);
	_set_check(t2, __func__);
   
	if (t1->hash != t2->hash)
		err_fatal(__func__,
				  "Sets do not have identical hash functions");

	if (t1->compare != t2->compare)
		err_fatal(__func__,
				  "Sets do not have identical comparison functions");

	set = set_create(t1->hash, t1->compare);

	for (i = 0; i < t1->prime; i++) {
		if (t1->buckets[i]) {
			bucketType pt;
	 
			for (pt = t1->buckets[i]; pt; pt = pt->next)
				set_insert(set, pt->elem);
		}
	}
   
	for (i = 0; i < t2->prime; i++) {
		if (t2->buckets[i]) {
			bucketType pt;
	 
			for (pt = t2->buckets[i]; pt; pt = pt->next)
				set_insert(set, pt->elem);
		}
	}

	return set;
}

/* \doc |set_inter| returns a new set which is the intersection of |set1|
   and |set2|.  Only pointers to elements are copied, \emph{not} the data
   pointed (this has memory management implications).  The |hash| and
   |compare| functions must be identical for the two sets. */

set_Set set_inter(set_Set set1, set_Set set2)
{
	setType       t1 = (setType)set1;
	setType       t2 = (setType)set2;
	set_Set       set;
	unsigned long i;
	int           savedReadonly;

	_set_check(t1, __func__);
	_set_check(t2, __func__);
   
	if (t1->hash != t2->hash)
		err_fatal(__func__,
				  "Sets do not have identical hash functions");

	if (t1->compare != t2->compare)
		err_fatal(__func__,
				  "Sets do not have identical comparison functions");

	set = set_create(t1->hash, t1->compare);

	savedReadonly = t2->readonly;
	t2->readonly  = 1;		/* save time on set_member */
	for (i = 0; i < t1->prime; i++) {
		if (t1->buckets[i]) {
			bucketType pt;
	 
			for (pt = t1->buckets[i]; pt; pt = pt->next)
				if (set_member(t2, pt->elem))
					set_insert(set, pt->elem);
		}
	}
	t2->readonly = savedReadonly;

	return set;
}

/* \doc |set_diff| returns a new set which is the difference resulting from
   removing every element in |set2| from the elements in |set1|.  Only
   pointers to elements are copied, \emph{not} the data pointed (this has
   memory management implications).  The |hash| and |compare| functions
   must be identical for the two sets. */

set_Set set_diff(set_Set set1, set_Set set2)
{
	setType       t1 = (setType)set1;
	setType       t2 = (setType)set2;
	set_Set       set;
	unsigned long i;
	int           savedReadonly;

	_set_check(t1, __func__);
	_set_check(t2, __func__);
   
	if (t1->hash != t2->hash)
		err_fatal(__func__,
				  "Sets do not have identical hash functions");

	if (t1->compare != t2->compare)
		err_fatal(__func__,
				  "Sets do not have identical comparison functions");

	set = set_create(t1->hash, t1->compare);

	savedReadonly = t2->readonly;
	t2->readonly  = 1;		/* save time on set_member */
	for (i = 0; i < t1->prime; i++) {
		if (t1->buckets[i]) {
			bucketType pt;
	 
			for (pt = t1->buckets[i]; pt; pt = pt->next)
				if (!set_member(t2, pt->elem))
					set_insert(set, pt->elem);
		}
	}
	t2->readonly = savedReadonly;

	return set;
}

/* \doc |set_equal| returns non-zero if |set1| and |set2| contain the same
   number of elements, and all of the elements in |set1| are also in
   |set2|.  The |hash| and |compare| functions must be identical for the
   two sets. */

int set_equal(set_Set set1, set_Set set2)
{
	setType       t1 = (setType)set1;
	setType       t2 = (setType)set2;
	unsigned long i;
	int           savedReadonly;

	_set_check(t1, __func__);
	_set_check(t2, __func__);
   
	if (t1->hash != t2->hash)
		err_fatal(__func__,
				  "Sets do not have identical hash functions");

	if (t1->compare != t2->compare)
		err_fatal(__func__,
				  "Sets do not have identical comparison functions");

	if (t1->entries != t2->entries) return 0; /* not equal */

	savedReadonly = t2->readonly;
	t2->readonly  = 1;		/* save time on set_member */
	for (i = 0; i < t1->prime; i++) {
		if (t1->buckets[i]) {
			bucketType pt;
	 
			for (pt = t1->buckets[i]; pt; pt = pt->next)
				if (!set_member(t2, pt->elem)) {
					t2->readonly = savedReadonly;
					return 0; /* not equal */
				}
		}
	}
	t2->readonly = savedReadonly;

	return 1;			/* equal */
}

int set_count(set_Set set)
{
   setType t = (setType)set;

   _set_check(t, __func__);
   return t->entries;
}

/* \doc |set_get_stats| returns statistics about the |set|.  The
   |set_Stats| structure is shown in \grind{set_Stats}. */

set_Stats set_get_stats(set_Set set)
{
	setType       t = (setType)set;
	set_Stats     s = xmalloc(sizeof(struct set_Stats));
	unsigned long i;
	unsigned long count;

	_set_check(t, __func__);
   
	s->size           = t->prime;
	s->resizings      = t->resizings;
	s->entries        = 0;
	s->buckets_used   = 0;
	s->singletons     = 0;
	s->maximum_length = 0;
	s->retrievals     = t->retrievals;
	s->hits           = t->hits;
	s->misses         = t->misses;
   
	for (i = 0; i < t->prime; i++) {
		if (t->buckets[i]) {
			bucketType pt;
	 
			++s->buckets_used;
			for (count = 0, pt = t->buckets[i]; pt; ++count, pt = pt->next);
			if (count == 1) ++s->singletons;
			s->maximum_length = max(s->maximum_length, count);
			s->entries += count;
		}
	}

	if (t->entries != s->entries)
		err_internal(__func__,
					 "Incorrect count for entries: %lu vs. %lu",
					 t->entries,
					 s->entries);


	return s;
}

/* \doc |set_print_stats| prints the statistics for |set| on the specified
   |stream|.  If |stream| is "NULL", then "stdout" will be used. */

void set_print_stats(set_Set set, FILE *stream)
{
	setType    t   = (setType)set;
	FILE      *str = stream ? stream : stdout;
	set_Stats  s   = set_get_stats(t);

	_set_check(t, __func__);
   
	fprintf(str, "Statistics for set at %p:\n", set);
	fprintf(str, "   %lu resizings to %lu total\n", s->resizings, s->size);
	fprintf(str, "   %lu entries (%lu buckets used, %lu without overflow)\n",
			s->entries, s->buckets_used, s->singletons);
	fprintf(str, "   maximum list length is %lu", s->maximum_length);
	if (s->buckets_used)
		fprintf(str, " (optimal is %.1f)\n",
				(double)s->entries / (double)s->buckets_used);
	else
		fprintf(str, "\n");
	fprintf(str, "   %lu retrievals (%lu from top, %lu failed)\n",
			s->retrievals, s->hits, s->misses);

	xfree(s);			/* rare */
}