1 
2 /*===========================================================================*/
3 
4 /*
5  *  Copyright (C) 1997 Jason Hutchens
6  *
7  *  This program is free software; you can redistribute it and/or modify it
8  *  under the terms of the GNU General Public License as published by the Free
9  *  Software Foundation; either version 2 of the license or (at your option)
10  *  any later version.
11  *
12  *  This program is distributed in the hope that it will be useful, but
13  *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14  *  or FITNESS FOR A PARTICULAR PURPOSE.  See the Gnu Public License for more
15  *  details.
16  *
17  *  You should have received a copy of the GNU General Public License along
18  *  with this program; if not, write to the Free Software Foundation, Inc.,
19  *  675 Mass Ave, Cambridge, MA 02139, USA.
20  */
21 
22 /*===========================================================================*/
23 
24 /*
25  *	$Id: hal.c,v 1.6 2001/08/21 06:18:37 devinteske Exp $
26  *
27  *	File:		megahal.c
28  *
29  *	Program:	MegaHAL v8
30  *
31  *	Purpose:	To simulate a natural language conversation with a psychotic
32  *			computer.  This is achieved by learning from the user's
33  *			input using a third-order Markov model on the word level.
34  *			Words are considered to be sequences of characters separated
35  *			by whitespace and punctuation.  Replies are generated
36  *			randomly based on a keyword, and they are scored using
37  *			measures of surprise.
38  *
39  *	Author:		Mr. Jason L. Hutchens
40  *
41  *	WWW:		http://ciips.ee.uwa.edu.au/~hutch/hal/
42  *
43  *	E-Mail:		hutch@ciips.ee.uwa.edu.au
44  *
45  *	Contact:	The Centre for Intelligent Information Processing Systems
46  *			Department of Electrical and Electronic Engineering
47  *			The University of Western Australia
48  *			AUSTRALIA 6907
49  *
50  *	Phone:		+61-8-9380-3856
51  *
52  *	Facsimile:	+61-8-9380-1168
53  *
54  *	Notes:		This file is best viewed with tabstops set to three spaces.
55  */
56 
57 /*===========================================================================*/
58 
59 #include <stdio.h>
60 #include <stdlib.h>
61 #include <stdarg.h>
62 #include <string.h>
63 #include <unistd.h>
64 #include <math.h>
65 #include <time.h>
66 #include <ctype.h>
67 #include "hx.h"
68 #include "xmalloc.h"
69 
70 /*===========================================================================*/
71 
72 #define P_THINK		40
73 #define D_KEY		100000
74 #define V_KEY		50000
75 #define D_THINK		500000
76 #define V_THINK		250000
77 
78 #ifndef MIN
79 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
80 #endif
81 
82 #define COOKIE "MegaHALv8"
83 
84 /*===========================================================================*/
85 
86 typedef enum {FALSE, TRUE} bool;
87 
88 typedef struct {
89 	u_int8_t length;
90 	char *word;
91 } STRING;
92 
93 typedef struct {
94 	u_int32_t size;
95 	STRING *entry;
96 	u_int16_t *index;
97 } DICTIONARY;
98 
99 typedef struct {
100 	u_int16_t size;
101 	STRING *from;
102 	STRING *to;
103 } SWAP;
104 
105 typedef struct NODE {
106 	u_int16_t symbol;
107 	u_int32_t usage;
108 	u_int16_t count;
109 	u_int16_t branch;
110 	struct NODE **tree;
111 } TREE;
112 
113 typedef struct {
114 	u_int8_t order;
115 	TREE *forward;
116 	TREE *backward;
117 	TREE **context;
118 	DICTIONARY *dictionary;
119 } MODEL;
120 
121 /*===========================================================================*/
122 
123 void add_aux (MODEL *, DICTIONARY *, STRING);
124 void add_key (MODEL *, DICTIONARY *, STRING);
125 void add_node (TREE *, TREE *, int);
126 void add_swap (SWAP *, char *, char *);
127 TREE *add_symbol (TREE *, u_int16_t);
128 u_int16_t add_word (DICTIONARY *, STRING);
129 int babble (MODEL *, DICTIONARY *, DICTIONARY *);
130 bool boundary (char *, unsigned int);
131 void capitalize (char *);
132 void delay (struct htlc_conn *, char *);
133 bool dissimilar (DICTIONARY *, DICTIONARY *);
134 void error (const char *, const char *, ...);
135 float evaluate_reply (MODEL *, DICTIONARY *, DICTIONARY *);
136 void exithal (void);
137 TREE *find_symbol (TREE *, int);
138 TREE *find_symbol_add (TREE *, int);
139 u_int16_t find_word (DICTIONARY *, STRING);
140 void free_dictionary (DICTIONARY *);
141 char *generate_reply (MODEL *, DICTIONARY *);
142 void initialize_context (MODEL *);
143 void initialize_dictionary (DICTIONARY *);
144 void initialize_error (const char *);
145 DICTIONARY *initialize_list (const char *);
146 void initialize_status (const char *);
147 SWAP *initialize_swap (const char *);
148 void learn (MODEL *, DICTIONARY *);
149 void load_dictionary (FILE *, DICTIONARY *);
150 bool load_model (const char *, MODEL *);
151 void load_tree (FILE *, TREE *);
152 void load_word (FILE *, DICTIONARY *);
153 void make_greeting (DICTIONARY *);
154 DICTIONARY *make_keywords (MODEL *, DICTIONARY *);
155 char *make_output (DICTIONARY *);
156 void make_words (char *, DICTIONARY *);
157 DICTIONARY *new_dictionary (void);
158 MODEL *new_model (int);
159 TREE *new_node (void);
160 SWAP *new_swap (void);
161 void print_header (FILE *);
162 DICTIONARY *reply (MODEL *, DICTIONARY *);
163 void save_dictionary (FILE *, DICTIONARY *);
164 void save_model (const char *, MODEL *);
165 void save_tree (FILE *, TREE *);
166 void save_word (FILE *, STRING);
167 int search_dictionary (DICTIONARY *, STRING, bool *);
168 int search_node (TREE *, int, bool *);
169 int seed (MODEL *, DICTIONARY *);
170 void show_dictionary (DICTIONARY *);
171 void status (const char *, ...);
172 void train (MODEL *, const char *);
173 void update_context (MODEL *, int);
174 void update_model (MODEL *, int);
175 void upper (char *);
176 int wordcmp (STRING, STRING);
177 bool word_exists (DICTIONARY *, STRING);
178 void write_input (char *, char *);
179 void write_output (struct htlc_conn *, char *);
180 
181 /*===========================================================================*/
182 #define ORDER	5
183 #define TIMEOUT	2
184 
185 int hal_active = 0;
186 bool used_key, typing_delay = TRUE;
187 DICTIONARY *g_words = 0, *g_ban = 0, *g_aux = 0, *g_grt = 0;
188 MODEL *g_model = 0;
189 SWAP *g_swp = 0;
190 FILE *errorfp, *statusfp;
191 
192 #include "getopt.h"
193 
194 static struct option hal_opts[] = {
195 	{"delay",	0, 0,	'd'},
196 	{"quit",	0, 0,	'q'},
197 	{"restart",	0, 0,	'r'},
198 	{"save",	0, 0,	's'},
199 	{0, 0, 0, 0}
200 };
201 
202 void
cmd_hal(int argc,char ** argv,char * str,struct htlc_conn * htlc,struct hx_chat * chat)203 cmd_hal (int argc, char **argv, char *str, struct htlc_conn *htlc, struct hx_chat *chat)
204 {
205 	int longind, o;
206 	struct opt_r opt;
207 
208 	if (str) {} /* removes unused parameter compiler warning */
209 	if (argc < 2) {
210 usage:		hx_printf(htlc, chat, "usage: %s [-sdrq] [--save] [--delay] [--restart] [--quit]\n", argv[0]);
211 		return;
212 	}
213 
214 	opt.ind = 0;
215 	opt.err_printf = 0;
216 	while ((o = getopt_long_r(argc, argv, "dqrs", hal_opts, &longind, &opt)) != EOF) {
217 		if (o == 0)
218 			o = hal_opts[longind].val;
219 		switch (o) {
220 			case 's':
221 				if (g_model) {
222 					save_model("megahal.brn", g_model);
223 					make_greeting(g_words);
224 				}
225 				break;
226 			case 'd':
227 				typing_delay = typing_delay == TRUE ? FALSE : TRUE;
228 				break;
229 			case 'r':
230 				hal_active = 1;
231 				break;
232 			case 'q':
233 				if (g_model)
234 					save_model("megahal.brn", g_model);
235 				exithal();
236 				break;
237 			default:
238 				goto usage;
239 		}
240 	}
241 }
242 
243 void
hal_rcv(struct htlc_conn * htlc,char * input_p,char * user)244 hal_rcv (struct htlc_conn *htlc, char *input_p, char *user)
245 {
246 	char *output, *input = xstrdup(input_p);
247 
248 	if (!g_words) {
249 		errorfp = stderr;
250 		statusfp = stdout;
251 		/*
252 		 *	Create a dictionary which will be used to hold the segmented
253 		 *	version of the user's input.
254 		 */
255 		g_words = new_dictionary();
256 
257 		/*
258 		 *	Do some initialisation
259 		 */
260 		initialize_error("megahal.log");
261 		srandom(time(0) + clock() + getpid());
262 
263 		/*
264 		 *	Create a language model.
265 		 */
266 		g_model = new_model(ORDER);
267 
268 		/*
269 		 *	Train the model on a text if one exists
270 		 */
271 		if (load_model("megahal.brn", g_model) == FALSE)
272 			train(g_model, "megahal.trn");
273 
274 		/*
275 		 *	Read a dictionary containing banned keywords, auxiliary keywords,
276 		 *	greeting keywords and swap keywords
277 		 */
278 		g_ban = initialize_list("megahal.ban");
279 		g_aux = initialize_list("megahal.aux");
280 		g_grt = initialize_list("megahal.grt");
281 		g_swp = initialize_swap("megahal.swp");
282 
283 		initialize_status("megahal.txt");
284 	}
285 
286 	write_input(input, user);
287 	upper(input);
288 	make_words(input, g_words);
289 	learn(g_model, g_words);
290 	output = generate_reply(g_model, g_words);
291 	write_output(htlc, output);
292 	xfree(output);
293 	xfree(input);
294 }
295 
296 /*---------------------------------------------------------------------------*/
297 
298 /*
299  *	Function:	exithal
300  *
301  *	Purpose:	Terminate the program.
302  */
303 void
exithal(void)304 exithal (void)
305 {
306 	hal_active = 0;
307 }
308 
309 /*---------------------------------------------------------------------------*/
310 
311 /*
312  *	Function:	initialize_error
313  *
314  *	Purpose:	Close the current error file pointer, and open a new one.
315  */
316 void
initialize_error(const char * filename)317 initialize_error (const char *filename)
318 {
319 	if (errorfp != stderr)
320 		fclose(errorfp);
321 	if (!filename || !(errorfp = fopen(filename, "a"))) {
322 		errorfp = stderr;
323 		return;
324 	}
325 
326 	print_header(errorfp);
327 }
328 
329 /*---------------------------------------------------------------------------*/
330 
331 /*
332  *	Function:	error
333  *
334  *	Purpose:	Print the specified message to the error file.
335  */
336 void
error(const char * title,const char * fmt,...)337 error (const char *title, const char *fmt, ...)
338 {
339 	va_list argp;
340 
341 	fprintf(errorfp, "%s: ", title);
342 	va_start(argp, fmt);
343 	vfprintf(errorfp, fmt, argp);
344 	va_end(argp);
345 	fprintf(errorfp, ".\n");
346 	fflush(errorfp);
347 }
348 
349 /*---------------------------------------------------------------------------*/
350 
351 /*
352  *	Function:	initialize_status
353  *
354  *	Purpose:	Close the current status file pointer, and open a new one.
355  */
356 void
initialize_status(const char * filename)357 initialize_status (const char *filename)
358 {
359 	if (statusfp != stdout)
360 		fclose(statusfp);
361 	if (!filename || !(statusfp = fopen(filename, "a"))) {
362 		statusfp = stdout;
363 		return;
364 	}
365 
366 	print_header(statusfp);
367 }
368 
369 /*---------------------------------------------------------------------------*/
370 
371 /*
372  *	Function:	status
373  *
374  *	Purpose		Print the specified message to the status file.
375  */
376 void
status(const char * fmt,...)377 status (const char *fmt, ...)
378 {
379 	va_list argp;
380 
381 	va_start(argp, fmt);
382 	vfprintf(statusfp, fmt, argp);
383 	va_end(argp);
384 	fflush(statusfp);
385 }
386 
387 /*---------------------------------------------------------------------------*/
388 
389 /*
390  *	Function:	print_header
391  *
392  *	Purpose:	Display a copyright message and timestamp.
393  */
394 void
print_header(FILE * fp)395 print_header (FILE *fp)
396 {
397 	time_t t;
398 	char timestamp[1024];
399 	struct tm *local;
400 
401 	t = time(0);
402 	local = localtime(&t);
403 	strftime(timestamp, 1024, "Start at: [%Y/%m/%d %H:%M:%S]", local);
404 
405 	fprintf(fp,	"(c)1998 Cambridge Center For Behavioral Studies all rights reserved\n"
406 			"[MegaHALv8][Jason Hutchens]\n"
407 			"%s\n", timestamp);
408 	fflush(fp);
409 }
410 
411 /*---------------------------------------------------------------------------*/
412 
413 /*
414  *    Function:   write_output
415  *
416  *    Purpose:    Display the output string.
417  */
418 void
write_output(struct htlc_conn * htlc,char * output)419 write_output (struct htlc_conn *htlc, char *output)
420 {
421 	time_t t;
422 	char timestamp[1024];
423 	struct tm *local;
424 
425 	capitalize(output);
426 	t = time(0);
427 	local = localtime(&t);
428 	strftime(timestamp, 1024, "HAL[%H:%M:%S]", local);
429 
430 	delay(htlc, output);
431 
432 	status("%s %s\n", timestamp, output);
433 }
434 
435 /*---------------------------------------------------------------------------*/
436 
437 /*
438  *	Function:	capitalize
439  *
440  *	Purpose:	Convert a string to look nice.
441  */
442 void
capitalize(char * str)443 capitalize (char *str)
444 {
445 	register unsigned int i;
446 	bool start = TRUE;
447 
448 	for (i = 0; i < 3 && str[i]; i++)
449 		if (isalpha(str[i])) {
450 			if (start == TRUE)
451 				str[i] = toupper(str[i]);
452 			else
453 				str[i] = tolower(str[i]);
454 			start = FALSE;
455 		}
456 
457 	for (i = 3; str[i]; i++) {
458 		if (isalpha(str[i])) {
459 			if (start == TRUE)
460 				str[i] = toupper(str[i]);
461 			else
462 				str[i] = tolower(str[i]);
463 			start = FALSE;
464 		}
465 		if (isspace(str[i])) {
466 			switch (str[i - 1]) {
467 				case '?':
468 				case '.':
469 				case '!':
470 					start = TRUE;
471 			}
472 		} else {
473 			switch (str[i]) {
474 				case '"':
475 					start = TRUE;
476 			}
477 		}
478 	}
479 }
480 
481 /*---------------------------------------------------------------------------*/
482 
483 /*
484  *	Function:	upper
485  *
486  *	Purpose:	Convert a string to its uppercase representation.
487  */
488 void
upper(char * str)489 upper (char *str)
490 {
491 	register unsigned int i;
492 
493 	for (i = 0; str[i]; i++)
494 		str[i] = toupper(str[i]);
495 }
496 
497 /*---------------------------------------------------------------------------*/
498 
499 /*
500  *	Function:	write_input
501  *
502  *	Purpose:	Log the user's input
503  */
504 void
write_input(char * input,char * user)505 write_input (char *input, char *user)
506 {
507 	time_t t;
508 	char timestamp[1024], tmp[1024];
509 	struct tm *local;
510 
511 	t = time(0);
512 	local = localtime(&t);
513 	strftime(tmp, 1024, "[%H:%M:%S]", local);
514 	sprintf(timestamp, "%s%s", user, tmp);
515 
516 	status("%s %s\n", timestamp, input);
517 }
518 
519 /*---------------------------------------------------------------------------*/
520 
521 /*
522  *	Function:	add_word
523  *
524  *	Purpose:	Add a word to a dictionary, and return the identifier
525  *			assigned to the word.  If the word already exists in
526  *			the dictionary, then return its current identifier
527  *			without adding it again.
528  */
529 u_int16_t
add_word(DICTIONARY * dictionary,STRING word)530 add_word (DICTIONARY *dictionary, STRING word)
531 {
532 	register int i;
533 	int position;
534 	bool found;
535 
536 	/*
537 	 *	If the word's already in the dictionary, there is no need to add it
538 	 */
539 	position = search_dictionary(dictionary, word, &found);
540 	if (found == TRUE)
541 		goto succeed;
542 
543 	/*
544 	 *	Increase the number of words in the dictionary
545 	 */
546 	dictionary->size++;
547 
548 	/*
549 	 *	Allocate one more entry for the word index
550 	 */
551 	dictionary->index = xrealloc(dictionary->index, 2 * dictionary->size);
552 
553 	/*
554 	 *	Allocate one more entry for the word array
555 	 */
556 	dictionary->entry = xrealloc(dictionary->entry, sizeof(STRING) * dictionary->size);
557 
558 	/*
559 	 *	Copy the new word into the word array
560 	 */
561 	dictionary->entry[dictionary->size - 1].length = word.length;
562 	dictionary->entry[dictionary->size - 1].word = xmalloc(word.length);
563 	memcpy(dictionary->entry[dictionary->size - 1].word, word.word, word.length);
564 
565 	/*
566 	 *	Shuffle the word index to keep it sorted alphabetically
567 	 */
568 	for (i = dictionary->size - 1; i > position; i--)
569 		dictionary->index[i] = dictionary->index[i - 1];
570 
571 	/*
572 	 *	Copy the new symbol identifier into the word index
573 	 */
574 	dictionary->index[position] = dictionary->size - 1;
575 
576 succeed:
577 	return dictionary->index[position];
578 }
579 
580 /*---------------------------------------------------------------------------*/
581 
582 /*
583  *	Function:	search_dictionary
584  *
585  *	Purpose:	Search the dictionary for the specified word, returning its
586  *			position in the index if found, or the position where it
587  *			should be inserted otherwise.
588  */
589 int
search_dictionary(DICTIONARY * dictionary,STRING word,bool * find)590 search_dictionary (DICTIONARY *dictionary, STRING word, bool *find)
591 {
592 	int position, min, max, middle, compar;
593 
594 	/*
595 	 *	If the dictionary is empty, then obviously the word won't be found
596 	 */
597 	if (!dictionary->size) {
598 		position = 0;
599 		goto notfound;
600 	}
601 
602 	/*
603 	 *	Initialize the lower and upper bounds of the search
604 	 */
605 	min = 0;
606 	max = dictionary->size - 1;
607 	/*
608 	 *	Search repeatedly, halving the search space each time, until either
609 	 *	the entry is found, or the search space becomes empty
610 	 */
611 	while (TRUE) {
612 		/*
613 		 *	See whether the middle element of the search space is greater
614 		 *	than, equal to, or less than the element being searched for.
615 		 */
616 		middle = (min + max) / 2;
617 		compar = wordcmp(word, dictionary->entry[dictionary->index[middle]]);
618 		/*
619 		 *	If it is equal then we have found the element.  Otherwise we
620 		 *	can halve the search space accordingly.
621 		 */
622 		if (!compar) {
623 			position = middle;
624 			goto found;
625 		} else if (compar > 0) {
626 			if (max == middle) {
627 				position = middle + 1;
628 				goto notfound;
629 			}
630 			min = middle + 1;
631 		} else {
632 			if (min == middle) {
633 				position = middle;
634 				goto notfound;
635 			}
636 			max = middle - 1;
637 		}
638 	}
639 
640 found:
641 	*find = TRUE;
642 
643 	return position;
644 
645 notfound:
646 	*find = FALSE;
647 
648 	return position;
649 }
650 
651 /*---------------------------------------------------------------------------*/
652 
653 /*
654  *	Function:	find_word
655  *
656  *	Purpose:	Return the symbol corresponding to the word specified.
657  *			We assume that the word with index zero is equal to a
658  *			NULL word, indicating an error condition.
659  */
660 u_int16_t
find_word(DICTIONARY * dictionary,STRING word)661 find_word (DICTIONARY *dictionary, STRING word)
662 {
663 	int position;
664 	bool found;
665 
666 	position = search_dictionary(dictionary, word, &found);
667 
668 	if (found == TRUE)
669 		return dictionary->index[position];
670 	else
671 		return 0;
672 }
673 
674 /*---------------------------------------------------------------------------*/
675 
676 /*
677  *	Function:	wordcmp
678  *
679  *	Purpose:	Compare two words, and return an integer indicating whether
680  *			the first word is less than, equal to or greater than the
681  *			second word.
682  */
683 int
wordcmp(STRING word1,STRING word2)684 wordcmp (STRING word1, STRING word2)
685 {
686 	register int i;
687 	int bound;
688 
689 	bound = MIN(word1.length,word2.length);
690 
691 	for (i = 0; i < bound; i++)
692 		if (word1.word[i] != word2.word[i])
693 			return (int)(word1.word[i] - word2.word[i]);
694 
695 	if (word1.length < word2.length)
696 		return -1;
697 	if (word1.length > word2.length)
698 		return 1;
699 
700 	return 0;
701 }
702 
703 /*---------------------------------------------------------------------------*/
704 
705 /*
706  *	Function:	free_dictionary
707  *
708  *	Purpose:	Release the memory consumed by the dictionary.
709  */
710 void
free_dictionary(DICTIONARY * dictionary)711 free_dictionary (DICTIONARY *dictionary)
712 {
713 	register u_int32_t i;
714 
715 	if (!dictionary->size)
716 		return;
717 	for (i = 0; i < dictionary->size; i++)
718 		xfree(dictionary->entry[i].word);
719 	xfree(dictionary->entry);
720 	dictionary->entry = 0;
721 	xfree(dictionary->index);
722 	dictionary->index = 0;
723 	dictionary->size = 0;
724 
725 	initialize_dictionary(dictionary);
726 }
727 
728 /*---------------------------------------------------------------------------*/
729 
730 /*
731  *	Function:	initialize_dictionary
732  *
733  *	Purpose:	Add dummy words to the dictionary.
734  */
735 void
initialize_dictionary(DICTIONARY * dictionary)736 initialize_dictionary (DICTIONARY *dictionary)
737 {
738 	char err[] = "<ERROR>", fin[] = "<FIN>";
739 	STRING word, end;
740 
741 	word.word = err;
742 	word.length = 7;
743 	end.word = fin;
744 	end.length = 5;
745 
746 	add_word(dictionary, word);
747 	add_word(dictionary, end);
748 }
749 
750 /*---------------------------------------------------------------------------*/
751 
752 /*
753  *	Function:	new_dictionary
754  *
755  *	Purpose:	Allocate room for a new dictionary.
756  */
757 DICTIONARY *
new_dictionary(void)758 new_dictionary (void)
759 {
760 	DICTIONARY *dictionary;
761 
762 	dictionary = xmalloc(sizeof(*dictionary));
763 
764 	dictionary->size = 0;
765 	dictionary->index = 0;
766 	dictionary->entry = 0;
767 
768 	initialize_dictionary(dictionary);
769 
770 	return dictionary;
771 }
772 
773 /*---------------------------------------------------------------------------*/
774 
775 /*
776  *	Function:	save_dictionary
777  *
778  *	Purpose:	Save a dictionary to the specified file.
779  */
780 void
save_dictionary(FILE * fp,DICTIONARY * dictionary)781 save_dictionary (FILE *fp, DICTIONARY *dictionary)
782 {
783 	register u_int32_t i;
784 
785 	fwrite(&(dictionary->size), sizeof(dictionary->size), 1, fp);
786 	for (i = 0; i < dictionary->size; i++)
787 		save_word(fp, dictionary->entry[i]);
788 }
789 
790 /*---------------------------------------------------------------------------*/
791 
792 /*
793  *	Function:	load_dictionary
794  *
795  *	Purpose:	Load a dictionary from the specified file.
796  */
797 void
load_dictionary(FILE * fp,DICTIONARY * dictionary)798 load_dictionary (FILE *fp, DICTIONARY *dictionary)
799 {
800 	register u_int32_t i;
801 	u_int32_t size;
802 
803 	fread(&size, sizeof(size), 1, fp);
804 	for (i = 0; i < size; i++)
805 		load_word(fp, dictionary);
806 }
807 
808 /*---------------------------------------------------------------------------*/
809 
810 /*
811  *	Function:	save_word
812  *
813  *	Purpose:	Save a dictionary word to a file.
814  */
815 void
save_word(FILE * fp,STRING word)816 save_word (FILE *fp, STRING word)
817 {
818 	fwrite(&(word.length), sizeof(word.length), 1, fp);
819 	fwrite(word.word, sizeof(*(word.word)), word.length, fp);
820 }
821 
822 /*---------------------------------------------------------------------------*/
823 
824 /*
825  *	Function:	load_word
826  *
827  *	Purpose:	Load a dictionary word from a file.
828  */
829 void
load_word(FILE * fp,DICTIONARY * dictionary)830 load_word (FILE *fp, DICTIONARY *dictionary)
831 {
832 	char buf[0xff];
833 	STRING word;
834 
835 	fread(&(word.length), sizeof(word.length), 1, fp);
836 	word.word = buf;
837 	fread(word.word, sizeof(*(word.word)), word.length, fp);
838 	add_word(dictionary, word);
839 }
840 
841 /*---------------------------------------------------------------------------*/
842 
843 /*
844  *	Function:	new_node
845  *
846  *	Purpose:	Allocate a new node for the n-gram tree, and initialise
847  *			its contents to sensible values.
848  */
849 TREE *
new_node(void)850 new_node (void)
851 {
852 	TREE *node;
853 
854 	/*
855 	 *	Allocate memory for the new node
856 	 */
857 	node = xmalloc(sizeof(*node));
858 
859 	/*
860 	 *	Initialise the contents of the node
861 	 */
862 	node->symbol = 0;
863 	node->usage = 0;
864 	node->count = 0;
865 	node->branch = 0;
866 	node->tree = 0;
867 
868 	return node;
869 }
870 
871 /*---------------------------------------------------------------------------*/
872 
873 /*
874  *	Function:	new_model
875  *
876  *	Purpose:	Create and initialise a new ngram model.
877  */
878 MODEL *
new_model(int order)879 new_model (int order)
880 {
881 	MODEL *model;
882 
883 	model = xmalloc(sizeof(*model));
884 
885 	model->order = order;
886 	model->forward = new_node();
887 	model->backward = new_node();
888 	model->context = xmalloc(sizeof(TREE *) * (order + 2));
889 	initialize_context(model);
890 	model->dictionary = new_dictionary();
891 
892 	return model;
893 }
894 
895 /*---------------------------------------------------------------------------*/
896 
897 /*
898  *	Function:	update_model
899  *
900  *	Purpose:	Update the model with the specified symbol.
901  */
902 void
update_model(MODEL * model,int symbol)903 update_model (MODEL *model, int symbol)
904 {
905 	register u_int16_t i;
906 
907 	/*
908 	 *	Update all of the models in the current context with the specified
909 	 *	symbol.
910 	 */
911 	for (i = model->order + 1; i > 0; i--)
912 		if (model->context[i - 1])
913 			model->context[i] = add_symbol(model->context[i - 1], (u_int16_t)symbol);
914 }
915 
916 /*---------------------------------------------------------------------------*/
917 
918 /*
919  *	Function:	update_context
920  *
921  *	Purpose:	Update the context of the model without adding the symbol.
922  */
923 void
update_context(MODEL * model,int symbol)924 update_context (MODEL *model, int symbol)
925 {
926 	register u_int16_t i;
927 
928 	for (i = model->order + 1; i > 0; i--)
929 		if (model->context[i - 1])
930 			model->context[i] = find_symbol(model->context[i - 1], symbol);
931 }
932 
933 /*---------------------------------------------------------------------------*/
934 
935 /*
936  *	Function:	add_symbol
937  *
938  *	Purpose:	Update the statistics of the specified tree with the
939  *			specified symbol, which may mean growing the tree if the
940  *			symbol hasn't been seen in this context before.
941  */
942 TREE *
add_symbol(TREE * tree,u_int16_t symbol)943 add_symbol (TREE *tree, u_int16_t symbol)
944 {
945 	TREE *node = 0;
946 
947 	/*
948 	 *	Search for the symbol in the subtree of the tree node.
949 	 */
950 	node = find_symbol_add(tree, symbol);
951 
952 	/*
953 	 *Increment the symbol counts
954 	 */
955 	if (node->count < 65535) {
956 		node->count++;
957 		tree->usage++;
958 	}
959 
960 	return node;
961 }
962 
963 /*---------------------------------------------------------------------------*/
964 
965 /*
966  *	Function:	find_symbol
967  *
968  *	Purpose:	Return a pointer to the child node, if one exists, which
969  *			contains the specified symbol.
970  */
971 TREE *
find_symbol(TREE * node,int symbol)972 find_symbol (TREE *node, int symbol)
973 {
974 	register int i;
975 	TREE *found = 0;
976 	bool found_symbol = FALSE;
977 
978 	/*
979 	 *	Perform a binary search for the symbol.
980 	 */
981 	i = search_node(node, symbol, &found_symbol);
982 	if (found_symbol == TRUE)
983 		found = node->tree[i];
984 
985 	return found;
986 }
987 
988 /*---------------------------------------------------------------------------*/
989 
990 /*
991  *	Function:	find_symbol_add
992  *
993  *	Purpose:	This function is conceptually similar to find_symbol,
994  *			apart from the fact that if the symbol is not found,
995  *			a new node is automatically allocated and added to the
996  *			tree.
997  */
998 TREE *
find_symbol_add(TREE * node,int symbol)999 find_symbol_add (TREE *node, int symbol)
1000 {
1001 	register int i;
1002 	TREE *found = 0;
1003 	bool found_symbol = FALSE;
1004 
1005 	/*
1006 	 *	Perform a binary search for the symbol.  If the symbol isn't found,
1007 	 *	attach a new sub-node to the tree node so that it remains sorted.
1008 	 */
1009 	i = search_node(node, symbol, &found_symbol);
1010 	if (found_symbol == TRUE) {
1011 		found = node->tree[i];
1012 	} else {
1013 		found = new_node();
1014 		found->symbol = symbol;
1015 		add_node(node, found, i);
1016 	}
1017 
1018 	return found;
1019 }
1020 
1021 /*---------------------------------------------------------------------------*/
1022 
1023 /*
1024  *	Function:	add_node
1025  *
1026  *	Purpose:	Attach a new child node to the sub-tree of the tree
1027  *			specified.
1028  */
1029 void
add_node(TREE * tree,TREE * node,int position)1030 add_node (TREE *tree, TREE *node, int position)
1031 {
1032 	register int i;
1033 
1034 	/*
1035 	 *	Allocate room for one more child node, which may mean allocating
1036 	 *	the sub-tree from scratch.
1037 	 */
1038 	tree->tree = xrealloc(tree->tree, sizeof(TREE *) * (tree->branch + 1));
1039 
1040 	/*
1041 	 *	Shuffle the nodes down so that we can insert the new node at the
1042 	 *	subtree index given by position.
1043 	 */
1044 	for (i = tree->branch; i > position; i--)
1045 		tree->tree[i] = tree->tree[i - 1];
1046 
1047 	/*
1048 	 *	Add the new node to the sub-tree.
1049 	 */
1050 	tree->tree[position] = node;
1051 	tree->branch++;
1052 }
1053 
1054 /*---------------------------------------------------------------------------*/
1055 
1056 /*
1057  *	Function:	search_node
1058  *
1059  *	Purpose:	Perform a binary search for the specified symbol on the
1060  *			subtree of the given node.  Return the position of the
1061  *			child node in the subtree if the symbol was found, or the
1062  *			position where it should be inserted to keep the subtree
1063  *			sorted if it wasn't.
1064  */
1065 int
search_node(TREE * node,int symbol,bool * found_symbol)1066 search_node (TREE *node, int symbol, bool *found_symbol)
1067 {
1068 	register int position;
1069 	int min, max, middle, compar;
1070 
1071 	/*
1072 	 *	Handle the special case where the subtree is empty.
1073 	 */
1074 	if (!node->branch) {
1075 		position = 0;
1076 		goto notfound;
1077 	}
1078 
1079 	/*
1080 	 *	Perform a binary search on the subtree.
1081 	 */
1082 	min = 0;
1083 	max = node->branch - 1;
1084 	while (TRUE) {
1085 		middle = (min + max) / 2;
1086 		compar = symbol-node->tree[middle]->symbol;
1087 		if (!compar) {
1088 			position = middle;
1089 			goto found;
1090 		} else if (compar > 0) {
1091 			if (max == middle) {
1092 				position = middle + 1;
1093 				goto notfound;
1094 			}
1095 			min = middle + 1;
1096 		} else {
1097 			if (min == middle) {
1098 				position = middle;
1099 				goto notfound;
1100 			}
1101 			max = middle - 1;
1102 		}
1103 	}
1104 
1105 found:
1106 	*found_symbol = TRUE;
1107 
1108 	return position;
1109 
1110 notfound:
1111 	*found_symbol = FALSE;
1112 
1113 	return position;
1114 }
1115 
1116 /*---------------------------------------------------------------------------*/
1117 
1118 /*
1119  *	Function:	initialize_context
1120  *
1121  *	Purpose:	Set the context of the model to a default value.
1122  */
1123 void
initialize_context(MODEL * model)1124 initialize_context (MODEL *model)
1125 {
1126 	register u_int16_t i;
1127 
1128 	for (i = 0; i <= model->order; i++)
1129 		model->context[i] = 0;
1130 }
1131 
1132 /*---------------------------------------------------------------------------*/
1133 
1134 /*
1135  *	Function:	learn
1136  *
1137  *	Purpose:	Learn from the user's input.
1138  */
1139 void
learn(MODEL * model,DICTIONARY * words)1140 learn (MODEL *model, DICTIONARY *words)
1141 {
1142 	register int i;
1143 	u_int16_t symbol;
1144 
1145 	/*
1146 	 *	We only learn from inputs which are long enough
1147 	 */
1148 	if (words->size <= model->order)
1149 		return;
1150 
1151 	/*
1152 	 *	Train the model in the forwards direction.  Start by initializing
1153 	 *	the context of the model.
1154 	 */
1155 	initialize_context(model);
1156 	model->context[0] = model->forward;
1157 	for (i = 0; i < (signed)words->size; i++) {
1158 		/*
1159 		 *	Add the symbol to the model's dictionary if necessary, and then
1160 		 *	update the forward model accordingly.
1161 		 */
1162 		symbol = add_word(model->dictionary, words->entry[i]);
1163 		update_model(model, symbol);
1164 	}
1165 	/*
1166 	 *	Add the sentence-terminating symbol.
1167 	 */
1168 	update_model(model, 1);
1169 
1170 	/*
1171 	 *	Train the model in the backwards direction.  Start by initializing
1172 	 *	the context of the model.
1173 	 */
1174 	initialize_context(model);
1175 	model->context[0] = model->backward;
1176 	for (i = words->size - 1; i >= 0; --i) {
1177 		/*
1178 		 *	Find the symbol in the model's dictionary, and then update
1179 		 *	the backward model accordingly.
1180 		 */
1181 		symbol = find_word(model->dictionary, words->entry[i]);
1182 		update_model(model, symbol);
1183 	}
1184 	/*
1185 	 *	Add the sentence-terminating symbol.
1186 	 */
1187 	update_model(model, 1);
1188 
1189 	return;
1190 }
1191 
1192 /*---------------------------------------------------------------------------*/
1193 
1194 /*
1195  *	Function:	train
1196  *
1197  *	Purpose:	Infer a MegaHAL brain from the contents of a text file.
1198  */
1199 void
train(MODEL * model,const char * filename)1200 train (MODEL *model, const char *filename)
1201 {
1202 	FILE *fp;
1203 	char buffer[1024];
1204 	DICTIONARY *words = 0;
1205 
1206 	if (!filename || !(fp = fopen(filename, "r")))
1207 		return;
1208 
1209 	words = new_dictionary();
1210 
1211 	while (fgets(buffer, sizeof(buffer), fp)) {
1212 		if (buffer[0] == '#')
1213 			continue;
1214 		buffer[strlen(buffer) - 1] = 0;
1215 		upper(buffer);
1216 		make_words(buffer, words);
1217 		learn(model, words);
1218 	}
1219 	fclose(fp);
1220 
1221 	xfree(words->entry);
1222 	xfree(words);
1223 }
1224 
1225 /*---------------------------------------------------------------------------*/
1226 
1227 /*
1228  *	Function:	show_dictionary
1229  *
1230  *	Purpose:	Display the dictionary for training purposes.
1231  */
1232 void
show_dictionary(DICTIONARY * dictionary)1233 show_dictionary (DICTIONARY *dictionary)
1234 {
1235 	register u_int32_t i;
1236 	FILE *fp;
1237 
1238 	if (!(fp = fopen("megahal.dic", "w"))) {
1239 		error("show_dictionary", "Unable to open file");
1240 		return;
1241 	}
1242 
1243 	for (i = 0; i < dictionary->size; i++)
1244 		fprintf(fp, "%.*s\n", dictionary->entry[i].length, dictionary->entry[i].word);
1245 
1246 	fclose(fp);
1247 }
1248 
1249 /*---------------------------------------------------------------------------*/
1250 
1251 /*
1252  *	Function:	save_model
1253  *
1254  *	Purpose:	Save the current state to a MegaHAL brain file.
1255  */
1256 void
save_model(const char * filename,MODEL * model)1257 save_model (const char *filename, MODEL *model)
1258 {
1259 	FILE *fp;
1260 
1261 	show_dictionary(model->dictionary);
1262 
1263 	if (!filename || !(fp = fopen(filename, "w"))) {
1264 		error("save_model", "Unable to open file `%s'", filename);
1265 		return;
1266 	}
1267 
1268 	fwrite(COOKIE, sizeof(char), sizeof(COOKIE) - 1, fp);
1269 	fwrite(&(model->order), sizeof(model->order), 1, fp);
1270 	save_tree(fp, model->forward);
1271 	save_tree(fp, model->backward);
1272 	save_dictionary(fp, model->dictionary);
1273 
1274 	fclose(fp);
1275 }
1276 
1277 /*---------------------------------------------------------------------------*/
1278 
1279 /*
1280  *	Function:	save_tree
1281  *
1282  *	Purpose:	Save a tree structure to the specified file.
1283  */
1284 void
save_tree(FILE * fp,TREE * node)1285 save_tree (FILE *fp, TREE *node)
1286 {
1287 	register u_int16_t i;
1288 
1289 	fwrite(&(node->symbol), sizeof(node->symbol), 1, fp);
1290 	fwrite(&(node->usage), sizeof(node->usage), 1, fp);
1291 	fwrite(&(node->count), sizeof(node->count), 1, fp);
1292 	fwrite(&(node->branch), sizeof(node->branch), 1, fp);
1293 
1294 	for (i = 0; i < node->branch; i++)
1295 		save_tree(fp, node->tree[i]);
1296 }
1297 
1298 /*---------------------------------------------------------------------------*/
1299 
1300 /*
1301  *	Function:	load_tree
1302  *
1303  *	Purpose:	Load a tree structure from the specified file.
1304  */
1305 void
load_tree(FILE * fp,TREE * node)1306 load_tree (FILE *fp, TREE *node)
1307 {
1308 	register u_int16_t i;
1309 
1310 	fread(&(node->symbol), sizeof(node->symbol), 1, fp);
1311 	fread(&(node->usage), sizeof(node->usage), 1, fp);
1312 	fread(&(node->count), sizeof(node->count), 1, fp);
1313 	fread(&(node->branch), sizeof(node->branch), 1, fp);
1314 
1315 	if (!node->branch)
1316 		return;
1317 
1318 	node->tree = xmalloc(sizeof(TREE *) * node->branch);
1319 
1320 	for (i = 0; i < node->branch; i++) {
1321 		node->tree[i] = new_node();
1322 		load_tree(fp, node->tree[i]);
1323 	}
1324 }
1325 
1326 /*---------------------------------------------------------------------------*/
1327 
1328 /*
1329  *	Function:	load_model
1330  *
1331  *	Purpose:	Load a model into memory.
1332  */
1333 bool
load_model(const char * filename,MODEL * model)1334 load_model (const char *filename, MODEL *model)
1335 {
1336 	FILE *fp;
1337 	char cookie[sizeof(COOKIE)];
1338 
1339 	if (!filename || !(fp = fopen(filename, "r"))) {
1340 		error("load_model", "Unable to open file `%s'", filename);
1341 		return FALSE;
1342 	}
1343 
1344 	fread(cookie, sizeof(*cookie), sizeof(COOKIE) - 1, fp);
1345 	if (memcmp(cookie, COOKIE, sizeof(COOKIE) - 1)) {
1346 		error("load_model", "File `%s' is not a MegaHAL brain (bad cookie %.*s)",
1347 			filename, sizeof(COOKIE) - 1, cookie);
1348 		fclose(fp);
1349 		return FALSE;
1350 	}
1351 
1352 	fread(&(model->order), sizeof(model->order), 1, fp);
1353 	load_tree(fp, model->forward);
1354 	load_tree(fp, model->backward);
1355 	load_dictionary(fp, model->dictionary);
1356 
1357 	fclose(fp);
1358 
1359 	return TRUE;
1360 }
1361 
1362 static char period_str[] = ".";
1363 
1364 /*---------------------------------------------------------------------------*/
1365 
1366 /*
1367  *    Function:   make_words
1368  *
1369  *    Purpose:    Break a string into an array of words.
1370  */
1371 void
make_words(char * input,DICTIONARY * words)1372 make_words (char *input, DICTIONARY *words)
1373 {
1374 	unsigned int offset = 0;
1375 
1376 	/*
1377 	 *	Clear the entries in the dictionary
1378 	 */
1379 	words->size = 0;
1380 
1381 	/*
1382 	 *	If the string is empty then do nothing, for it contains no words.
1383 	 */
1384 	if (!input[0])
1385 		return;
1386 
1387 	/*
1388 	 *	Loop forever.
1389 	 */
1390 	for (;;) {
1391 		/*
1392 		 *	If the current character is of the same type as the previous
1393 		 *	character, then include it in the word.  Otherwise, terminate
1394 		 *	the current word.
1395 		 */
1396 		if (boundary(input, offset)) {
1397 			/*
1398 			 *	Add the word to the dictionary
1399 			 */
1400 			words->entry = xrealloc(words->entry, (words->size + 1) * sizeof(STRING));
1401 			words->entry[words->size].length = offset;
1402 			words->entry[words->size].word = input;
1403 			words->size++;
1404 			if (offset == strlen(input))
1405 				break;
1406 			input += offset;
1407 			offset = 0;
1408 		} else {
1409 			offset++;
1410 		}
1411 	}
1412 
1413 	/*
1414 	 *	If the last word isn't punctuation, then replace it with a
1415 	 *	full-stop character.
1416 	 */
1417 	if (isalnum(words->entry[words->size - 1].word[0])) {
1418 		words->entry = xrealloc(words->entry, (words->size + 1) * sizeof(STRING));
1419 		words->entry[words->size].length = 1;
1420 		words->entry[words->size].word = period_str;
1421 		words->size++;
1422 	} else if (!strchr("!.?", words->entry[words->size - 1].word[0])) {
1423 		words->entry[words->size - 1].length = 1;
1424 		words->entry[words->size - 1].word = period_str;
1425 	}
1426 }
1427 
1428 /*---------------------------------------------------------------------------*/
1429 /*
1430  *	Function:	boundary
1431  *
1432  *	Purpose:	Return whether or not a word boundary exists in a string
1433  *			at the specified location.
1434  */
1435 bool
boundary(char * str,unsigned int position)1436 boundary (char *str, unsigned int position)
1437 {
1438 	if (!position)
1439 		return FALSE;
1440 
1441 	if (position == strlen(str))
1442 		return TRUE;
1443 
1444 	if (
1445 		str[position] == '\'' &&
1446 		isalpha(str[position - 1]) &&
1447 		isalpha(str[position + 1])
1448 	)
1449 		return FALSE;
1450 
1451 	if (
1452 		position > 1 &&
1453 		str[position - 1] == '\'' &&
1454 		isalpha(str[position - 2]) &&
1455 		isalpha(str[position])
1456 	)
1457 		return FALSE;
1458 
1459 	if (
1460 		isalpha(str[position]) &&
1461 		!isalpha(str[position - 1])
1462 	)
1463 		return TRUE;
1464 
1465 	if (
1466 		!isalpha(str[position]) &&
1467 		isalpha(str[position - 1])
1468 	)
1469 		return TRUE;
1470 
1471 	if (isdigit(str[position]) != isdigit(str[position - 1]))
1472 		return TRUE;
1473 
1474 	return FALSE;
1475 }
1476 
1477 /*---------------------------------------------------------------------------*/
1478 /*
1479  *	Function:	make_greeting
1480  *
1481  *	Purpose:	Put some special words into the dictionary so that the
1482  *			program will respond as if to a new judge.
1483  */
1484 void
make_greeting(DICTIONARY * words)1485 make_greeting (DICTIONARY *words)
1486 {
1487 	words->size = 0;
1488 	if (g_grt->size > 2)
1489 		add_word(words, g_grt->entry[random() % (g_grt->size - 2) + 2]);
1490 }
1491 
1492 /*---------------------------------------------------------------------------*/
1493 /*
1494  *    Function:   generate_reply
1495  *
1496  *    Purpose:    Take a string of user input and return a string of output
1497  *                which may vaguely be construed as containing a reply to
1498  *                whatever is in the input string.
1499  */
1500 char *
generate_reply(MODEL * model,DICTIONARY * words)1501 generate_reply (MODEL *model, DICTIONARY *words)
1502 {
1503 	static DICTIONARY *dummy = 0;
1504 	DICTIONARY *replywords, *keywords;
1505 	float surprise, max_surprise;
1506 	char *output = 0;
1507 	int count, basetime;
1508 
1509 	/*
1510 	 *	Create an array of keywords from the words in the user's input
1511 	 */
1512 	keywords = make_keywords(model, words);
1513 
1514 	/*
1515 	 *	Make sure some sort of reply exists
1516 	 */
1517 	if (!dummy)
1518 		dummy = new_dictionary();
1519 	replywords = reply(model, dummy);
1520 	if (dissimilar(words, replywords) == TRUE)
1521 		output = make_output(replywords);
1522 
1523 	/*
1524 	 *	Loop for the specified waiting period, generating and evaluating
1525 	 *	replies
1526 	 */
1527 	max_surprise = (float)-1.0;
1528 	count = 0;
1529 	basetime = time(0);
1530 	do {
1531 		replywords = reply(model, keywords);
1532 		surprise = evaluate_reply(model, keywords, replywords);
1533 		++count;
1534 		if ((surprise > max_surprise) && (dissimilar(words, replywords) == TRUE)) {
1535 			max_surprise = surprise;
1536 			output = make_output(replywords);
1537 		}
1538 	} while ((time(0) - basetime) < TIMEOUT);
1539 
1540 	/*
1541 	 *	Return the best answer we generated
1542 	 */
1543 	return output ? output : xstrdup("I forgot what i was gonna say!");
1544 }
1545 
1546 /*---------------------------------------------------------------------------*/
1547 
1548 /*
1549  *	Function:	dissimilar
1550  *
1551  *	Purpose:	Return TRUE or FALSE depending on whether the dictionaries
1552  *			are the same or not.
1553  */
1554 bool
dissimilar(DICTIONARY * words1,DICTIONARY * words2)1555 dissimilar (DICTIONARY *words1, DICTIONARY *words2)
1556 {
1557 	register unsigned int i;
1558 
1559 	if (words1->size != words2->size)
1560 		return TRUE;
1561 	for (i = 0; i < words1->size; i++)
1562 		if (wordcmp(words1->entry[i], words2->entry[i]))
1563 			return TRUE;
1564 
1565 	return FALSE;
1566 }
1567 
1568 /*---------------------------------------------------------------------------*/
1569 
1570 /*
1571  *	Function:	make_keywords
1572  *
1573  *	Purpose:	Put all the interesting words from the user's input into
1574  *			a keywords dictionary, which will be used when generating
1575  *			a reply.
1576  */
1577 DICTIONARY *
make_keywords(MODEL * model,DICTIONARY * words)1578 make_keywords (MODEL *model, DICTIONARY *words)
1579 {
1580 	static DICTIONARY *keys = 0;
1581 	register unsigned int i, j;
1582 	int c;
1583 
1584 	if (!keys)
1585 		keys = new_dictionary();
1586 	else
1587 		free_dictionary(keys);
1588 
1589 	for (i = 0; i < words->size; i++) {
1590 		/*
1591 		 *	Find the symbol ID of the word.  If it doesn't exist in
1592 		 *	the model, or if it begins with a non-alphanumeric
1593 		 *	character, or if it is in the exclusion array, then
1594 		 *	skip over it.
1595 		 */
1596 		c = 0;
1597 		for (j = 0; j < g_swp->size; j++)
1598 			if (!wordcmp(g_swp->from[j], words->entry[i])) {
1599 				add_key(model, keys, g_swp->to[j]);
1600 				c = 1;
1601 			}
1602 		if (!c)
1603 			add_key(model, keys, words->entry[i]);
1604 	}
1605 
1606 	if (keys->size > 2)
1607 		for (i = 0; i < words->size; i++) {
1608 			c = 0;
1609 			for (j = 0; j < g_swp->size; j++)
1610 				if (!wordcmp(g_swp->from[j], words->entry[i])) {
1611 					add_aux(model, keys, g_swp->to[j]);
1612 					c = 1;
1613 				}
1614 			if (!c)
1615 				add_aux(model, keys, words->entry[i]);
1616 	}
1617 
1618 	return keys;
1619 }
1620 
1621 /*---------------------------------------------------------------------------*/
1622 
1623 /*
1624  *	Function:	add_key
1625  *
1626  *	Purpose:	Add a word to the keyword dictionary.
1627  */
1628 void
add_key(MODEL * model,DICTIONARY * keys,STRING word)1629 add_key (MODEL *model, DICTIONARY *keys, STRING word)
1630 {
1631 	int symbol;
1632 
1633 	symbol = find_word(model->dictionary, word);
1634 	if (!symbol)
1635 		return;
1636 	if (!isalnum(word.word[0]))
1637 		return;
1638 	symbol = find_word(g_ban, word);
1639 	if (symbol)
1640 		return;
1641 	symbol = find_word(g_aux, word);
1642 	if (symbol)
1643 		return;
1644 
1645 	add_word(keys, word);
1646 }
1647 
1648 /*---------------------------------------------------------------------------*/
1649 
1650 /*
1651  *	Function:	add_aux
1652  *
1653  *	Purpose:	Add an auxilliary keyword to the keyword dictionary.
1654  */
1655 void
add_aux(MODEL * model,DICTIONARY * keys,STRING word)1656 add_aux (MODEL *model, DICTIONARY *keys, STRING word)
1657 {
1658 	int symbol;
1659 
1660 	symbol = find_word(model->dictionary, word);
1661 	if (!symbol)
1662 		return;
1663 	if (!isalnum(word.word[0]))
1664 		return;
1665 	symbol = find_word(g_aux, word);
1666 	if (!symbol)
1667 		return;
1668 	add_word(keys, word);
1669 }
1670 
1671 /*---------------------------------------------------------------------------*/
1672 
1673 /*
1674  *	Function:	reply
1675  *
1676  *	Purpose:	Generate a dictionary of reply words appropriate to the
1677  *			given dictionary of keywords.
1678  */
1679 DICTIONARY *
reply(MODEL * model,DICTIONARY * keys)1680 reply (MODEL *model, DICTIONARY *keys)
1681 {
1682 	DICTIONARY *replies;
1683 	register int i;
1684 	int symbol;
1685 	bool start = TRUE;
1686 
1687 	replies = new_dictionary();
1688 	replies->size = 0;
1689 
1690 	/*
1691 	 *	Start off by making sure that the model's context is empty.
1692 	 */
1693 	initialize_context(model);
1694 	model->context[0] = model->forward;
1695 	used_key = FALSE;
1696 
1697 	/*
1698 	 *	Generate the reply in the forward direction.
1699 	 */
1700 	while (TRUE) {
1701 		/*
1702 		 *	Get a random symbol from the current context.
1703 		 */
1704 		if (start == TRUE)
1705 			symbol = seed(model, keys);
1706 		else
1707 			symbol = babble(model, keys, replies);
1708 		if (!symbol || symbol == 1)
1709 			break;
1710 		start = FALSE;
1711 
1712 		/*
1713 		 *	Append the symbol to the reply dictionary.
1714 		 */
1715 		replies->entry = xrealloc(replies->entry, (replies->size + 1) * sizeof(STRING));
1716 
1717 		replies->entry[replies->size].length =
1718 			model->dictionary->entry[symbol].length;
1719 		replies->entry[replies->size].word =
1720 			model->dictionary->entry[symbol].word;
1721 		replies->size++;
1722 
1723 		/*
1724 		 *	Extend the current context of the model with the current symbol.
1725 		 */
1726 		update_context(model, symbol);
1727 	}
1728 
1729 	/*
1730 	 *	Start off by making sure that the model's context is empty.
1731 	 */
1732 	initialize_context(model);
1733 	model->context[0] = model->backward;
1734 
1735 	/*
1736 	 *	Re-create the context of the model from the current reply
1737 	 *	dictionary so that we can generate backwards to reach the
1738 	 *	beginning of the string.
1739 	 */
1740 	if (replies->size > 0)
1741 		for (i = MIN(replies->size - 1, model->order); i >= 0; --i) {
1742 			symbol = find_word(model->dictionary, replies->entry[i]);
1743 			update_context(model, symbol);
1744 		}
1745 
1746 	/*
1747 	 *	Generate the reply in the backward direction.
1748 	 */
1749 	while (TRUE) {
1750 		/*
1751 		 *	Get a random symbol from the current context.
1752 		 */
1753 		symbol = babble(model, keys, replies);
1754 		if (!symbol || symbol == 1)
1755 			break;
1756 
1757 		/*
1758 		 *	Prepend the symbol to the reply dictionary.
1759 		 */
1760 		replies->entry = xrealloc(replies->entry, (replies->size + 1) * sizeof(STRING));
1761 
1762 		/*
1763 		 *	Shuffle everything up for the prepend.
1764 		 */
1765 		for (i = replies->size; i > 0; --i) {
1766 			replies->entry[i].length = replies->entry[i - 1].length;
1767 			replies->entry[i].word = replies->entry[i - 1].word;
1768 		}
1769 
1770 		replies->entry[0].length = model->dictionary->entry[symbol].length;
1771 		replies->entry[0].word = model->dictionary->entry[symbol].word;
1772 		replies->size++;
1773 
1774 		/*
1775 		 *	Extend the current context of the model with the current symbol.
1776 		 */
1777 		update_context(model, symbol);
1778 	}
1779 
1780 	return replies;
1781 }
1782 
1783 /*---------------------------------------------------------------------------*/
1784 
1785 /*
1786  *	Function:	evaluate_reply
1787  *
1788  *	Purpose:	Measure the average surprise of keywords relative to the
1789  *			language model.
1790  */
1791 float
evaluate_reply(MODEL * model,DICTIONARY * keys,DICTIONARY * words)1792 evaluate_reply (MODEL *model, DICTIONARY *keys, DICTIONARY *words)
1793 {
1794 	register unsigned int j;
1795 	register int i;
1796 	int symbol, count, num = 0;
1797 	float probability, entropy = (float)0.0;
1798 	TREE *node;
1799 
1800 	if (words->size <= 0)
1801 		return (float)0.0;
1802 	initialize_context(model);
1803 	model->context[0] = model->forward;
1804 	for (i = 0; i < (signed)words->size; i++) {
1805 		symbol = find_word(model->dictionary, words->entry[i]);
1806 		if (find_word(keys, words->entry[i])) {
1807 			probability = (float)0.0;
1808 			count = 0;
1809 			++num;
1810 			for (j = 0; j < model->order; j++)
1811 				if (model->context[j]) {
1812 					node = find_symbol(model->context[j], symbol);
1813 					probability += (float)(node->count) /
1814 						(float)(model->context[j]->usage);
1815 					++count;
1816 				}
1817 			if (count > 0.0)
1818 				entropy -= (float)log(probability / (float)count);
1819 		}
1820 		update_context(model, symbol);
1821 	}
1822 
1823 	initialize_context(model);
1824 	model->context[0] = model->backward;
1825 	for (i = words->size - 1; i >= 0; --i) {
1826 		symbol = find_word(model->dictionary, words->entry[i]);
1827 		if (find_word(keys, words->entry[i])) {
1828 			probability = (float)0.0;
1829 			count = 0;
1830 			++num;
1831 			for (j = 0; j < model->order; j++)
1832 				if (model->context[j]) {
1833 					node = find_symbol(model->context[j], symbol);
1834 					probability += (float)(node->count) /
1835 						(float)(model->context[j]->usage);
1836 					++count;
1837 				}
1838 			if (count > 0.0)
1839 				entropy -= (float)log(probability / (float)count);
1840 		}
1841 		update_context(model, symbol);
1842 	}
1843 
1844 	if (num >= 8)
1845 		entropy /= (float)sqrt(num - 1);
1846 	if (num >= 16)
1847 		entropy /= (float)num;
1848 
1849 	return entropy;
1850 }
1851 
1852 /*---------------------------------------------------------------------------*/
1853 
1854 /*
1855  *	Function:	make_output
1856  *
1857  *	Purpose:	Generate a string from the dictionary of reply words.
1858  */
1859 char *
make_output(DICTIONARY * words)1860 make_output (DICTIONARY *words)
1861 {
1862 	char *output = 0;
1863 	register unsigned int i, len;
1864 
1865 	if (!words->size)
1866 		return xstrdup("I am utterly speechless!");
1867 
1868 	len = 1;
1869 	for (i = 0; i < words->size; i++)
1870 		len += words->entry[i].length;
1871 	output = xmalloc(len + 1);
1872 	len = 0;
1873 	for (i = 0; i < words->size; i++) {
1874 		memcpy(&(output[len]), words->entry[i].word, words->entry[i].length);
1875 		len += words->entry[i].length;
1876 	}
1877 	output[len] = 0;
1878 
1879 	return output;
1880 }
1881 
1882 /*---------------------------------------------------------------------------*/
1883 
1884 /*
1885  *	Function:	babble
1886  *
1887  *	Purpose:	Return a random symbol from the current context, or a
1888  *			zero symbol identifier if we've reached either the
1889  *			start or end of the sentence.  Select the symbol based
1890  *			on probabilities, favouring keywords.  In all cases,
1891  *			use the longest available context to choose the symbol.
1892  */
1893 int
babble(MODEL * model,DICTIONARY * keys,DICTIONARY * words)1894 babble (MODEL *model, DICTIONARY *keys, DICTIONARY *words)
1895 {
1896 	TREE *node = 0;
1897 	register int i;
1898 	int count, symbol = 0;
1899 
1900 	/*
1901 	 *	Select the longest available context.
1902 	 */
1903 	for (i = 0; i <= model->order; i++)
1904 		if (model->context[i])
1905 			node = model->context[i];
1906 
1907 	if (!node || !node->branch)
1908 		return 0;
1909 
1910 	/*
1911 	 *	Choose a symbol at random from this context.
1912 	 */
1913 	i = random() % node->branch;
1914 	count = random() % node->usage;
1915 	while (count >= 0) {
1916 		/*
1917 		 *	If the symbol occurs as a keyword, then use it.  Only use an
1918 		 *	auxilliary keyword if a normal keyword has already been used.
1919 		 */
1920 		symbol = node->tree[i]->symbol;
1921 
1922 		if (
1923 			(find_word(keys, model->dictionary->entry[symbol])) &&
1924 			(used_key == TRUE ||
1925 			(!find_word(g_aux, model->dictionary->entry[symbol]))) &&
1926 			(word_exists(words, model->dictionary->entry[symbol]) == FALSE)
1927 		) {
1928 			used_key = TRUE;
1929 			break;
1930 		}
1931 		count -= node->tree[i]->count;
1932 		i = i >= (node->branch - 1) ? 0 : i + 1;
1933 	}
1934 
1935 	return symbol;
1936 }
1937 
1938 /*---------------------------------------------------------------------------*/
1939 
1940 /*
1941  *	Function:	word_exists
1942  *
1943  *	Purpose:	A silly brute-force searcher for the reply string.
1944  */
1945 bool
word_exists(DICTIONARY * dictionary,STRING word)1946 word_exists (DICTIONARY *dictionary, STRING word)
1947 {
1948 	register u_int32_t i;
1949 
1950 	for (i = 0; i < dictionary->size; i++)
1951 		if (!wordcmp(dictionary->entry[i], word))
1952 			return TRUE;
1953 
1954 	return FALSE;
1955 }
1956 
1957 /*---------------------------------------------------------------------------*/
1958 
1959 /*
1960  *	Function:	seed
1961  *
1962  *	Purpose:	Seed the reply by guaranteeing that it contains a
1963  *			keyword, if one exists.
1964  */
1965 int
seed(MODEL * model,DICTIONARY * keys)1966 seed (MODEL *model, DICTIONARY *keys)
1967 {
1968 	register unsigned int i, stop;
1969 	int symbol;
1970 
1971 	if (!model->context[0]->branch)
1972 		symbol = 0;
1973 	else
1974 		symbol = random() % model->context[0]->branch;
1975 
1976 	if (keys->size > 2) {
1977 		do {
1978 			i = random() % keys->size;
1979 		} while (i < 2);
1980 		stop = i;
1981 		while (TRUE) {
1982 			if (
1983 				(find_word(model->dictionary, keys->entry[i])) &&
1984 				(!find_word(g_aux, keys->entry[i]))
1985 			) {
1986 				symbol = find_word(model->dictionary, keys->entry[i]);
1987 				return symbol;
1988 			}
1989 			i++;
1990 			if (i == keys->size)
1991 				i = 2;
1992 			if (i == stop)
1993 				return symbol;
1994 		}
1995 	}
1996 
1997 	return symbol;
1998 }
1999 
2000 /*---------------------------------------------------------------------------*/
2001 
2002 /*
2003  *	Function:	new_swap
2004  *
2005  *	Purpose:	Allocate a new swap structure.
2006  */
2007 SWAP *
new_swap(void)2008 new_swap (void)
2009 {
2010 	SWAP *list;
2011 
2012 	list = xmalloc(sizeof(*list));
2013 	list->size = 0;
2014 	list->from = 0;
2015 	list->to = 0;
2016 
2017 	return list;
2018 }
2019 
2020 /*---------------------------------------------------------------------------*/
2021 
2022 /*
2023  *	Function:	add_swap
2024  *
2025  *	Purpose:	Add a new entry to the swap structure.
2026  */
2027 void
add_swap(SWAP * list,char * s,char * d)2028 add_swap (SWAP *list, char *s, char *d)
2029 {
2030 	list->size++;
2031 
2032 	list->from = xrealloc(list->from, sizeof(STRING) * (list->size));
2033 	list->to = xrealloc(list->to, sizeof(STRING) * (list->size));
2034 
2035 	list->from[list->size - 1].length = strlen(s);
2036 	list->from[list->size - 1].word = xstrdup(s);
2037 	list->to[list->size - 1].length = strlen(d);
2038 	list->to[list->size - 1].word = xstrdup(d);
2039 }
2040 
2041 /*---------------------------------------------------------------------------*/
2042 
2043 /*
2044  *	Function:	initialize_swap
2045  *
2046  *	Purpose:	Read a swap structure from a file.
2047  */
2048 SWAP *
initialize_swap(const char * filename)2049 initialize_swap (const char *filename)
2050 {
2051 	SWAP *list;
2052 	FILE *fp;
2053 	char buffer[1024], *from, *to;
2054 
2055 	list = new_swap();
2056 
2057 	if (!filename || !(fp = fopen(filename, "r")))
2058 		return list;
2059 
2060 	while (fgets(buffer, sizeof(buffer), fp)) {
2061 		if (buffer[0] == '#')
2062 			continue;
2063 		from = strtok(buffer, "\t ");
2064 		to = strtok(0, "\t \n#");
2065 		add_swap(list, from, to);
2066 	}
2067 
2068 	fclose(fp);
2069 
2070 	return list;
2071 }
2072 
2073 /*---------------------------------------------------------------------------*/
2074 
2075 /*
2076  *	Function:	initialize_list
2077  *
2078  *	Purpose:	Read a dictionary from a file.
2079  */
2080 DICTIONARY
initialize_list(const char * filename)2081 *initialize_list (const char *filename)
2082 {
2083 	DICTIONARY *list;
2084 	FILE *fp;
2085 	STRING word;
2086 	char *string, buffer[1024];
2087 
2088 	list = new_dictionary();
2089 
2090 	if (!filename || !(fp = fopen(filename, "r")))
2091 		return list;
2092 
2093 	while (fgets(buffer, sizeof(buffer), fp)) {
2094 		if (buffer[0] == '#')
2095 			continue;
2096 		string = strtok(buffer, "\t \n#");
2097 		if (string && string[0]) {
2098 			word.length = strlen(string);
2099 			word.word = xstrdup(buffer);
2100 			add_word(list, word);
2101 		}
2102 	}
2103 
2104 	fclose(fp);
2105 
2106 	return list;
2107 }
2108 
2109 /*---------------------------------------------------------------------------*/
2110 
2111 /*
2112  *	Function:	delay
2113  *
2114  *	Purpose:	Display the string to stdout as if it was typed by a human.
2115  */
2116 void
delay(struct htlc_conn * htlc,char * str)2117 delay (struct htlc_conn *htlc, char *str)
2118 {
2119 	register int i;
2120 	register char *out;
2121 
2122 	/*
2123 	 *	Don't simulate typing if the feature is turned off
2124 	 */
2125 	if (typing_delay == FALSE) {
2126 		hx_send_chat(htlc, 0, str);
2127 		return;
2128 	}
2129 
2130 	out = xmalloc(strlen(str) + 1);
2131 	for (i = 0; str[i]; i++) {
2132 		/*
2133 		 *	Standard keyboard delay
2134 		 */
2135 		usleep(D_KEY + random() % V_KEY - random() % V_KEY);
2136 		out[i] = str[i];
2137 
2138 		/*
2139 		 *	A random thinking delay
2140 		 */
2141 		if ((!isalnum(str[i])) && ((random() % 100) < P_THINK))
2142 			usleep(D_THINK + random() % V_THINK - random() % V_THINK);
2143 	}
2144 	out[i] = 0;
2145 	hx_send_chat(htlc, 0, out);
2146 	xfree(out);
2147 }
2148 
2149 /*===========================================================================*/
2150 
2151 /*
2152  *	Revision 1.8  1997/12/24 03:17:01  hutch
2153  *	More bug fixes, and hopefully the final contest version!
2154  *
2155  *	Revision 1.7  1997/12/22  13:18:09  hutch
2156  *	A few more bug fixes, and non-repeating implemented.
2157  *
2158  *	Revision 1.6  1997/12/22 04:27:04  hutch
2159  *	A few minor bug fixes.
2160  *
2161  *	Revision 1.5  1997/12/15 04:35:59  hutch
2162  *	Final Loebner version!
2163  *
2164  *	Revision 1.4  1997/12/11 05:45:29  hutch
2165  *	the almost finished version.
2166  *
2167  *	Revision 1.3  1997/12/10 09:08:09  hutch
2168  *	Now Loebner complient (tm)
2169  *
2170  *	Revision 1.2  1997/12/08 06:22:32  hutch
2171  *	Tidied up.
2172  *
2173  *	Revision 1.1  1997/12/05  07:11:44  hutch
2174  *	Initial revision
2175  *
2176  *	Revision 1.7  1997/12/04 07:07:13  hutch
2177  *	Added load and save functions, and tidied up some code/
2178  *
2179  *	Revision 1.6  1997/12/02 08:34:47  hutch
2180  *	Added the ban, aux and swp functions.
2181  *
2182  *	Revision 1.5  1997/12/02 06:03:04  hutch
2183  *	Updated to use a special terminating symbol, and to store only
2184  *	branches of maximum depth, as they are the only ones used in
2185  *	the reply.
2186  *
2187  *	Revision 1.4  1997/10/28 09:23:12  hutch
2188  *	MegaHAL is babbling nicely, but without keywords.
2189  *
2190  *	Revision 1.3  1997/10/15  09:04:03  hutch
2191  *	MegaHAL can parrot back whatever the user says.
2192  *
2193  *	Revision 1.2  1997/07/21 04:03:28  hutch
2194  *	Fully working.
2195  *
2196  *	Revision 1.1  1997/07/15 01:55:25  hutch
2197  *	Initial revision
2198  *
2199  *	Revision 1.1  1997/07/15 01:54:21  hutch
2200  *	Initial revision
2201  */
2202 
2203 /*===========================================================================*/
2204 
2205