KnightCap/Export/td.c

#include "includes.h"
#include "knightcap.h"

#define TD_LAMBDA 0.7
#define TD_ALPHA (10/(EVAL_SCALE))
#define MAX_ROUNDS 4
#define MAX_SIZE 50

static int total_rounds;

struct max_struct {
	double val;
	int i,j,k;
};

static int max_compare(struct max_struct *m1, struct max_struct *m2)
{
	if (m1->val > m2->val)
		return 1;
	else if (m1->val == m2->val)
		return 0;

	return -1;
}

extern struct state *state;
extern int player;
extern int dont_change[];

char *stage_name[] = {"OPENING", "MIDDLE", "ENDING", "MATING"};

#include "names.h"

static void p_coeff_vector(struct coefficient_name *cn, FILE *large, FILE *small)
{
	int x;
	fprintf(large,"/* %s */\n", cn->name);
	if (small)
		fprintf(small,"/* %s */\n", cn->name);
	for (x=0; x<(cn+1)->index - cn->index; x++) {
		fprintf(large,"%7d,", coefficients[cn->index + x]);
		if (small)
			fprintf(small,"%7d,", coefficients[cn->index + x]/100);
	}
	fprintf(large,"\n");
	if (small)
		fprintf(small,"\n");
}

static void p_coeff_array(struct coefficient_name *cn, FILE *large, FILE *small)
{
	int x;
	fprintf(large,"/* %s */\n", cn->name);
	if (small)
		fprintf(small,"/* %s */\n", cn->name);
	for (x=0; x<(cn+1)->index - cn->index; x++) {
		fprintf(large,"%7d,", coefficients[cn->index + x]);
		if (small)
			fprintf(small,"%7d,", coefficients[cn->index + x]/100);
		if ((x+1)%10 == 0) {
			fprintf(large, "\n");
			if (small)
				fprintf(small, "\n");
		}
	}
	fprintf(large,"\n");
	if (small)
		fprintf(small,"\n");
}


static void p_coeff_board(struct coefficient_name *cn, FILE *large, FILE *small)
{
	int x, y;
	fprintf(large,"/* %s */\n", cn->name);
	if (small)
		fprintf(small,"/* %s */\n", cn->name);
	for (y=0; y<8; y++) {
		for (x=0; x<8; x++) {
			fprintf(large,"%7d,", coefficients[cn->index + x + y*8]);
			if (small)
				fprintf(small,"%7d,", coefficients[cn->index + x + y*8]/100);
		}
		fprintf(large,"\n");
		if (small)
			fprintf(small,"\n");
	}
}

static void p_coeff_half_board(struct coefficient_name *cn, FILE *large, FILE *small)
{
	int x, y;
	fprintf(large,"/* %s */\n", cn->name);
	if (small)
		fprintf(small,"/* %s */\n", cn->name);
	for (y=0; y<8; y++) {
		for (x=0; x<4; x++) {
			fprintf(large,"%7d,", coefficients[cn->index + x + y*4]);
			if (small)
				fprintf(small,"%7d,", coefficients[cn->index + x + y*4]/100);
		}
		fprintf(large,"\n");
		if (small)
			fprintf(small,"\n");
	}
}

void dump_coeffs(char *fname, int round)
{
        struct coefficient_name *cn;
        FILE *large, *small;
        int fd;
	int i;
	char fn[160];

#if LARGE_ETYPE
	if (round >= 0)
		sprintf(fn, "/usr/local/chess/large_coeffs%d.h", round);
	else
		sprintf(fn,"large_coeffs.h");
	large = (FILE *)fopen(fn, "w");
	sprintf(fn, "small_coeffs.h");
	small = (FILE *)fopen(fn, "w");
#else
	if (round >= 0)
		sprintf(fn, "/usr/local/chess/small_coeffs%d.h", round);
	else
		sprintf(fn, "small_coeffs.h");
	large = (FILE *)fopen(fn, "w");
	small = NULL;
#endif
	if (large == NULL) {
                perror(fname);
                return;
        }

	state->total_rounds = total_rounds;

        fprintf(large, "etype orig_coefficients[] = {\n");
	if (small)
		fprintf(small, "etype orig_coefficients[] = {\n");
	for (i=OPENING; i<=MATING; i++) {
		fprintf(large, "\n/* %%%s%% */\n", stage_name[i]);
		if (small)
			fprintf(small, "\n/* %%%s%% */\n", stage_name[i]);
		cn = &coefficient_names[0];
		coefficients = 	new_coefficients + i*__COEFFS_PER_STAGE__;
		while (cn->name) {
			int n = cn[1].index - cn[0].index;
			if (n == 1) {
				fprintf(large, "/* %s */ %d,\n", cn[0].name,
					coefficients[cn[0].index]);
				if (small)
					fprintf(small, "/* %s */ %d,\n", cn[0].name,
						coefficients[cn[0].index]/100);
			} else if (n == 64) {
				p_coeff_board(cn,large,small);
			} else if (n == 32) {
				p_coeff_half_board(cn,large,small);
			} else if (n % 10 == 0) {
				p_coeff_array(cn,large,small);
			} else {
				p_coeff_vector(cn,large,small);
			}
			cn++;
		}
	}

        fprintf(large, "};\n");
	if (small)
		fprintf(small, "};\n");
        fclose(large);
	if (small)
		fclose(small);

        fd = open(fname, O_WRONLY | O_CREAT | O_TRUNC, 0666);
        if (fd == -1) {
                perror(fname);
                return;
        }

        Write(fd, (char *)new_coefficients, __TOTAL_COEFFS__*sizeof(new_coefficients[0]));
        close(fd);

        return;
}


int td_dump(char *fname)
{
	int i;
	etype sum;

	dump_coeffs(fname, total_rounds);

	sum = 0.0;
	for (i=0; i<__TOTAL_COEFFS__; i++) {
		sum += ABS(new_coefficients[i] - orig_coefficients[i]);
	}

	cprintf(0,"%d\n", sum);
	return 1;
}

/* routines for updating the evaluation function according to the
   method of temporal differences */

#if LEARN_EVAL


int td_store_pos(Position *b)
{
	state->leaf_pos[state->stored_move_num] = *b;
	print_board(state->leaf_pos[state->stored_move_num].board);
	++state->stored_move_num;
	if (state->computer != 0)
		state->td_comp = state->computer;

	return 1;
}

/* calculate the partial derivative of the eval function with
   respect to each of the coefficients. computed
   numerically */
int td_gradient(float *big_grad)
{
	etype v, v2;
	int i, n, m;
	etype delta = 100;
	float *grad;
	Position *b, b1;
#if TEST_GRADIENT
	float error;
	etype v3, v4;
#endif

	n = __COEFFS_PER_STAGE__;

	for (m = 0; m < state->stored_move_num; m++) {
		b = state->leaf_pos+m;
		lprintf(0, "%d ", m);
		/* sanity check */
		if (b->stage < OPENING || b->stage > MATING) {
			lprintf(0, "**Wrong stage in gradient calc: %d\n", b->stage);
			return 0;
		}

		b->flags &= ~FLAG_EVAL_DONE;
		b->flags &= ~FLAG_DONE_TACTICS;
		b1 = (*b);
		v = eval_etype(&b1, INFINITY, MAX_DEPTH);
		lprintf(0, "%d %d\n", v, b1.stage);

		state->leaf_eval[m].v = next_to_play(b)*v;
		if (!state->demo_mode) {
			state->leaf_eval[m].v *= state->td_comp;
		}

		coefficients = new_coefficients + b->stage*__COEFFS_PER_STAGE__;
		grad = big_grad + __TOTAL_COEFFS__*m + b->stage*__COEFFS_PER_STAGE__;

		for (i=0;i<n;i++) {
			b1 = (*b);
			v = eval_etype(&b1, INFINITY, MAX_DEPTH);

			coefficients[i] += delta;

			/* material only affects the eval indirectly
			   via the board, so update the board */

			b1 = (*b);
			if (i > IPIECE_VALUES && i < IPIECE_VALUES+KING)
				create_pboard(&b1);

			v2 = eval_etype(&b1, INFINITY, MAX_DEPTH);

			grad[i] = next_to_play(&b1)*(v2 - v) / (float)delta;
			if (!state->demo_mode) {
				grad[i] *= state->td_comp;
			}
#if TEST_GRADIENT
			coefficients[i] += delta;

			b1 = (*b);
			if (i > IPIECE_VALUES && i < IPIECE_VALUES+KING)
				create_pboard(&b1);

			v3 = eval_etype(&b1, INFINITY, MAX_DEPTH);

			coefficients[i] -= 2*delta;

			b1 = (*b);
			if (i > IPIECE_VALUES && i < IPIECE_VALUES+KING)
				create_pboard(&b1);

			v4 = eval_etype(&b1, INFINITY, MAX_DEPTH);
			error = next_to_play(&b1)*(v3 - v);
			if (!state->demo_mode)
				error *= state->td_comp;
			error -= 2*delta*grad[i];
			error /= delta;
			if (ABS(error)>0.05) {
				lprintf(0,"***coeff: %d grad: %f error: %f %e %e %e %e\n",
					i,
					grad[i],
					error,
					v, v2, v3, v4);
			}
#else
			coefficients[i] -= delta;
#endif
		}
	}

	return n;

}

void td_save_bad(int fd, Position *b1)
{
	int x;

	lseek(fd, 0, SEEK_END);

	if ((x = Write(fd, (char *)b1, sizeof(Position))) != sizeof(Position)) {
		lprintf(0,"***Error saving bad eval position %d %d\n",
			sizeof(Position), x);
	}
}

/* Updates the 	coefficients according to the TD(lambda) algorithm. */
int td_update()
{
	int fd;
        int i,j,n,t;
	int argmax;
	int num_moves;
	int rounds = 0;
	float grad[300*__TOTAL_COEFFS__];
	double c, max;
	double dw[__TOTAL_COEFFS__];
	double olddw[__TOTAL_COEFFS__];
	double tanhv[MAX_GAME_MOVES];
	double d[MAX_GAME_MOVES];
	double oldnorm, newnorm, dotprod, angle;
	FILE *f;

	if (state->analysed)
		return 0;

	if ((f = (FILE *)fopen("rounds.dat", "r")) != NULL) {
		fscanf(f, "%d\n", &rounds);
		fclose(f);
	}

	if ((f = (FILE *)fopen("total_rounds.dat", "r")) != NULL) {
		fscanf(f, "%d\n", &total_rounds);
		fclose(f);
	}

	memset(dw, 0, __TOTAL_COEFFS__*sizeof(dw[0]));
	memset(olddw, 0, __TOTAL_COEFFS__*sizeof(dw[0]));

#if DUMPING_TD_UPDATES
	fd = open("update.dat", O_RDONLY);
	if (fd != -1) {
		if (read(fd, olddw, __TOTAL_COEFFS__*sizeof(olddw[0])) !=
		    __TOTAL_COEFFS__*sizeof(olddw[0])) {
			lprintf(0, "update file corrupt\n");
		} else {
			memcpy(dw, olddw,  __TOTAL_COEFFS__*sizeof(olddw[0]));
		}
	}
	close(fd);
#endif

	if (state->stored_move_num == 0 || state->stored_move_num > 300) {
		lprintf(0, "no gradient information: %d\n", state->stored_move_num);
		return 0;
	}

	memset(grad, 0, 300*__TOTAL_COEFFS__*sizeof(grad[0]));

	if (state->ics_robot && result() == TIME_FORFEIT)
		num_moves = state->stored_move_num-1;
	else
		num_moves = state->stored_move_num;

	lprintf(0,"***moves: %d\n", num_moves);
	n = __TOTAL_COEFFS__;

	if (td_gradient(grad)) {
		lprintf(0,"gradients calculated\n");
	} else {
		lprintf(0,"gradient error\n");
		return 0;
	}

	/* Squash the evals and compute the temporal differences */
	tanhv[0] =  tanh(EVAL_SCALE*state->leaf_eval[0].v);
	for (t=0; t<num_moves-1; t++) {
		tanhv[t+1] = tanh(EVAL_SCALE*state->leaf_eval[t+1].v);
		d[t] = tanhv[t+1] - tanhv[t];
		if (state->predicted_move[t+1] == -1 &&
		    !state->demo_mode &&
		    state->rating_change < 0)
			d[t] = RAMP(d[t]);
	}

	/* work out the outcome */
	if (state->demo_mode) {
		switch (state->won) {
		case STALEMATE: {
			if (NO_STALEMATE_LEARN)
				return 0;
			d[num_moves-1] = tanh(EVAL_SCALE*DRAW_VALUE)
				- tanhv[num_moves-1];
			break;
		}
		case 1: {
			d[num_moves-1] = 1.0 - tanhv[num_moves-1];
			break;
		}
		case 0: {
			d[num_moves-1] = -1.0 - tanhv[num_moves-1];
			break;
		}
		}
	} else {
		switch (result()) {
		case STALEMATE: {
			if (NO_STALEMATE_LEARN)
				return 0;
			d[num_moves-1] = tanh(EVAL_SCALE*DRAW_VALUE)
				- tanhv[num_moves-1];
			break;
		}
		case 1: {
			d[num_moves-1] = 1.0 - tanhv[num_moves-1];
			break;
		}
		case 0: {
			d[num_moves-1] = -1.0 - tanhv[num_moves-1];
			break;
		}
		/* for time forfeited or resigned games we just assume the
		   final eval was correct */
		case TIME_FORFEIT: {
			d[num_moves-1] = 0.0;
			break;
		}
		}
	}

	if (state->predicted_move[num_moves] == -1 &&
	    !state->demo_mode && state->rating_change < 0)
		d[num_moves-1] = RAMP(d[num_moves-1]);

	lprintf(0,"outcome: %d %d %d\n", state->won, state->colour, state->position.winner);

	for (i=0; i<num_moves; i++) {
		lprintf(0, "%d %d %lf\n", i, state->leaf_eval[i].v, d[i]);
	}

	/* calculate the coefficient updates */
	max = 0.0;
	j=0;
	for (i=0; i<n; i++) {
		/* "FACTORS" are multiplicative and have disproportionally
		   high derivatives so we don't adjust them */
		if (dont_change && i==dont_change[j]) {
			++j;
			continue;
		}
		c = (1.0 - tanhv[0]*tanhv[0])*EVAL_SCALE*grad[i];

		for (t=0; t<num_moves; t++) {
			dw[i] += d[t]*c;
			if (t<num_moves-1) {
				c = TD_LAMBDA*c + (1-tanhv[t+1]*tanhv[t+1])*
					EVAL_SCALE*grad[(t+1)*n+i];
			}
		}
		if (ABS(dw[i]) > max) {
			max = ABS(dw[i]);
			argmax = i;
		}
	}

	lprintf(0,"max: %lf %d\n", TD_ALPHA*max, argmax);

	oldnorm = 0.0;
	newnorm = 0.0;
	dotprod = 0.0;
	for (i=0; i<n; i++) {
		oldnorm += ((double)new_coefficients[i]*(double)new_coefficients[i]);
		newnorm += (new_coefficients[i]+TD_ALPHA*dw[i])*
			(new_coefficients[i]+TD_ALPHA*dw[i]);
		dotprod += (new_coefficients[i] + TD_ALPHA*dw[i])*new_coefficients[i];
	}
	angle = 0.0;
	if (oldnorm != 0)
		angle = 180*acos(dotprod/sqrt(oldnorm*newnorm))/PI;
	lprintf(0, "change in angle: %lg\n", angle);
	f = (FILE *)fopen("angle.dat", "a");
	fprintf(f, "%g\n", angle);
	fclose(f);

	j = 0;
	for (i=0; i<n; i++) {
		if (dont_change && i==dont_change[j]) {
			++j;
			continue;
		}

		if (rounds == MAX_ROUNDS)
			new_coefficients[i] += TD_ALPHA*dw[i];
	}

#if DUMPING_TD_UPDATES
	fd = open("update.dat", O_WRONLY | O_CREAT | O_TRUNC, 0666);
	if (rounds == MAX_ROUNDS) {
		memset(dw, 0,  __TOTAL_COEFFS__*sizeof(dw[0]));
		rounds = 0;
	}
	if (Write(fd, (char *)dw, __TOTAL_COEFFS__*sizeof(dw[0])) !=
	    __TOTAL_COEFFS__*sizeof(dw[0])) {
		lprintf(0,"failed to write updates\n");
	}
	close(fd);

	++rounds;
	f = (FILE *)fopen("rounds.dat", "w");
	fprintf(f, "%d\n", rounds);
	fclose(f);
#endif
	lprintf(0,"updated coefficients\n");

	++total_rounds;
	f = (FILE *)fopen("total_rounds.dat", "w");
	fprintf(f, "%d\n", total_rounds);
	fclose(f);

	state->analysed = 1;
	return 0;
}

#else
void td_dummy(void)
{}
#endif