1 #include "includes.h"
2 #include "knightcap.h"
3 
4 #define TD_LAMBDA 0.7
5 #define TD_ALPHA (10/(EVAL_SCALE))
6 #define MAX_ROUNDS 4
7 #define MAX_SIZE 50
8 
9 static int total_rounds;
10 
11 struct max_struct {
12 	double val;
13 	int i,j,k;
14 };
15 
max_compare(struct max_struct * m1,struct max_struct * m2)16 static int max_compare(struct max_struct *m1, struct max_struct *m2)
17 {
18 	if (m1->val > m2->val)
19 		return 1;
20 	else if (m1->val == m2->val)
21 		return 0;
22 
23 	return -1;
24 }
25 
26 extern struct state *state;
27 extern int player;
28 extern int dont_change[];
29 
30 char *stage_name[] = {"OPENING", "MIDDLE", "ENDING", "MATING"};
31 
32 #include "names.h"
33 
p_coeff_vector(struct coefficient_name * cn,FILE * large,FILE * small)34 static void p_coeff_vector(struct coefficient_name *cn, FILE *large, FILE *small)
35 {
36 	int x;
37 	fprintf(large,"/* %s */\n", cn->name);
38 	if (small)
39 		fprintf(small,"/* %s */\n", cn->name);
40 	for (x=0; x<(cn+1)->index - cn->index; x++) {
41 		fprintf(large,"%7d,", coefficients[cn->index + x]);
42 		if (small)
43 			fprintf(small,"%7d,", coefficients[cn->index + x]/100);
44 	}
45 	fprintf(large,"\n");
46 	if (small)
47 		fprintf(small,"\n");
48 }
49 
p_coeff_array(struct coefficient_name * cn,FILE * large,FILE * small)50 static void p_coeff_array(struct coefficient_name *cn, FILE *large, FILE *small)
51 {
52 	int x;
53 	fprintf(large,"/* %s */\n", cn->name);
54 	if (small)
55 		fprintf(small,"/* %s */\n", cn->name);
56 	for (x=0; x<(cn+1)->index - cn->index; x++) {
57 		fprintf(large,"%7d,", coefficients[cn->index + x]);
58 		if (small)
59 			fprintf(small,"%7d,", coefficients[cn->index + x]/100);
60 		if ((x+1)%10 == 0) {
61 			fprintf(large, "\n");
62 			if (small)
63 				fprintf(small, "\n");
64 		}
65 	}
66 	fprintf(large,"\n");
67 	if (small)
68 		fprintf(small,"\n");
69 }
70 
71 
p_coeff_board(struct coefficient_name * cn,FILE * large,FILE * small)72 static void p_coeff_board(struct coefficient_name *cn, FILE *large, FILE *small)
73 {
74 	int x, y;
75 	fprintf(large,"/* %s */\n", cn->name);
76 	if (small)
77 		fprintf(small,"/* %s */\n", cn->name);
78 	for (y=0; y<8; y++) {
79 		for (x=0; x<8; x++) {
80 			fprintf(large,"%7d,", coefficients[cn->index + x + y*8]);
81 			if (small)
82 				fprintf(small,"%7d,", coefficients[cn->index + x + y*8]/100);
83 		}
84 		fprintf(large,"\n");
85 		if (small)
86 			fprintf(small,"\n");
87 	}
88 }
89 
p_coeff_half_board(struct coefficient_name * cn,FILE * large,FILE * small)90 static void p_coeff_half_board(struct coefficient_name *cn, FILE *large, FILE *small)
91 {
92 	int x, y;
93 	fprintf(large,"/* %s */\n", cn->name);
94 	if (small)
95 		fprintf(small,"/* %s */\n", cn->name);
96 	for (y=0; y<8; y++) {
97 		for (x=0; x<4; x++) {
98 			fprintf(large,"%7d,", coefficients[cn->index + x + y*4]);
99 			if (small)
100 				fprintf(small,"%7d,", coefficients[cn->index + x + y*4]/100);
101 		}
102 		fprintf(large,"\n");
103 		if (small)
104 			fprintf(small,"\n");
105 	}
106 }
107 
dump_coeffs(char * fname,int round)108 void dump_coeffs(char *fname, int round)
109 {
110         struct coefficient_name *cn;
111         FILE *large, *small;
112         int fd;
113 	int i;
114 	char fn[160];
115 
116 #if LARGE_ETYPE
117 	if (round >= 0)
118 		sprintf(fn, "/usr/local/chess/large_coeffs%d.h", round);
119 	else
120 		sprintf(fn,"large_coeffs.h");
121 	large = (FILE *)fopen(fn, "w");
122 	sprintf(fn, "small_coeffs.h");
123 	small = (FILE *)fopen(fn, "w");
124 #else
125 	if (round >= 0)
126 		sprintf(fn, "/usr/local/chess/small_coeffs%d.h", round);
127 	else
128 		sprintf(fn, "small_coeffs.h");
129 	large = (FILE *)fopen(fn, "w");
130 	small = NULL;
131 #endif
132 	if (large == NULL) {
133                 perror(fname);
134                 return;
135         }
136 
137 	state->total_rounds = total_rounds;
138 
139         fprintf(large, "etype orig_coefficients[] = {\n");
140 	if (small)
141 		fprintf(small, "etype orig_coefficients[] = {\n");
142 	for (i=OPENING; i<=MATING; i++) {
143 		fprintf(large, "\n/* %%%s%% */\n", stage_name[i]);
144 		if (small)
145 			fprintf(small, "\n/* %%%s%% */\n", stage_name[i]);
146 		cn = &coefficient_names[0];
147 		coefficients = 	new_coefficients + i*__COEFFS_PER_STAGE__;
148 		while (cn->name) {
149 			int n = cn[1].index - cn[0].index;
150 			if (n == 1) {
151 				fprintf(large, "/* %s */ %d,\n", cn[0].name,
152 					coefficients[cn[0].index]);
153 				if (small)
154 					fprintf(small, "/* %s */ %d,\n", cn[0].name,
155 						coefficients[cn[0].index]/100);
156 			} else if (n == 64) {
157 				p_coeff_board(cn,large,small);
158 			} else if (n == 32) {
159 				p_coeff_half_board(cn,large,small);
160 			} else if (n % 10 == 0) {
161 				p_coeff_array(cn,large,small);
162 			} else {
163 				p_coeff_vector(cn,large,small);
164 			}
165 			cn++;
166 		}
167 	}
168 
169         fprintf(large, "};\n");
170 	if (small)
171 		fprintf(small, "};\n");
172         fclose(large);
173 	if (small)
174 		fclose(small);
175 
176         fd = open(fname, O_WRONLY | O_CREAT | O_TRUNC, 0666);
177         if (fd == -1) {
178                 perror(fname);
179                 return;
180         }
181 
182         Write(fd, (char *)new_coefficients, __TOTAL_COEFFS__*sizeof(new_coefficients[0]));
183         close(fd);
184 
185         return;
186 }
187 
188 
td_dump(char * fname)189 int td_dump(char *fname)
190 {
191 	int i;
192 	etype sum;
193 
194 	dump_coeffs(fname, total_rounds);
195 
196 	sum = 0.0;
197 	for (i=0; i<__TOTAL_COEFFS__; i++) {
198 		sum += ABS(new_coefficients[i] - orig_coefficients[i]);
199 	}
200 
201 	cprintf(0,"%d\n", sum);
202 	return 1;
203 }
204 
205 /* routines for updating the evaluation function according to the
206    method of temporal differences */
207 
208 #if LEARN_EVAL
209 
210 
211 
td_store_pos(Position * b)212 int td_store_pos(Position *b)
213 {
214 	state->leaf_pos[state->stored_move_num] = *b;
215 	print_board(state->leaf_pos[state->stored_move_num].board);
216 	++state->stored_move_num;
217 	if (state->computer != 0)
218 		state->td_comp = state->computer;
219 
220 	return 1;
221 }
222 
223 /* calculate the partial derivative of the eval function with
224    respect to each of the coefficients. computed
225    numerically */
td_gradient(float * big_grad)226 int td_gradient(float *big_grad)
227 {
228 	etype v, v2;
229 	int i, n, m;
230 	etype delta = 100;
231 	float *grad;
232 	Position *b, b1;
233 #if TEST_GRADIENT
234 	float error;
235 	etype v3, v4;
236 #endif
237 
238 	n = __COEFFS_PER_STAGE__;
239 
240 	for (m = 0; m < state->stored_move_num; m++) {
241 		b = state->leaf_pos+m;
242 		lprintf(0, "%d ", m);
243 		/* sanity check */
244 		if (b->stage < OPENING || b->stage > MATING) {
245 			lprintf(0, "**Wrong stage in gradient calc: %d\n", b->stage);
246 			return 0;
247 		}
248 
249 		b->flags &= ~FLAG_EVAL_DONE;
250 		b->flags &= ~FLAG_DONE_TACTICS;
251 		b1 = (*b);
252 		v = eval_etype(&b1, INFINITY, MAX_DEPTH);
253 		lprintf(0, "%d %d\n", v, b1.stage);
254 
255 		state->leaf_eval[m].v = next_to_play(b)*v;
256 		if (!state->demo_mode) {
257 			state->leaf_eval[m].v *= state->td_comp;
258 		}
259 
260 		coefficients = new_coefficients + b->stage*__COEFFS_PER_STAGE__;
261 		grad = big_grad + __TOTAL_COEFFS__*m + b->stage*__COEFFS_PER_STAGE__;
262 
263 		for (i=0;i<n;i++) {
264 			b1 = (*b);
265 			v = eval_etype(&b1, INFINITY, MAX_DEPTH);
266 
267 			coefficients[i] += delta;
268 
269 			/* material only affects the eval indirectly
270 			   via the board, so update the board */
271 
272 			b1 = (*b);
273 			if (i > IPIECE_VALUES && i < IPIECE_VALUES+KING)
274 				create_pboard(&b1);
275 
276 			v2 = eval_etype(&b1, INFINITY, MAX_DEPTH);
277 
278 			grad[i] = next_to_play(&b1)*(v2 - v) / (float)delta;
279 			if (!state->demo_mode) {
280 				grad[i] *= state->td_comp;
281 			}
282 #if TEST_GRADIENT
283 			coefficients[i] += delta;
284 
285 			b1 = (*b);
286 			if (i > IPIECE_VALUES && i < IPIECE_VALUES+KING)
287 				create_pboard(&b1);
288 
289 			v3 = eval_etype(&b1, INFINITY, MAX_DEPTH);
290 
291 			coefficients[i] -= 2*delta;
292 
293 			b1 = (*b);
294 			if (i > IPIECE_VALUES && i < IPIECE_VALUES+KING)
295 				create_pboard(&b1);
296 
297 			v4 = eval_etype(&b1, INFINITY, MAX_DEPTH);
298 			error = next_to_play(&b1)*(v3 - v);
299 			if (!state->demo_mode)
300 				error *= state->td_comp;
301 			error -= 2*delta*grad[i];
302 			error /= delta;
303 			if (ABS(error)>0.05) {
304 				lprintf(0,"***coeff: %d grad: %f error: %f %e %e %e %e\n",
305 					i,
306 					grad[i],
307 					error,
308 					v, v2, v3, v4);
309 			}
310 #else
311 			coefficients[i] -= delta;
312 #endif
313 		}
314 	}
315 
316 	return n;
317 
318 }
319 
td_save_bad(int fd,Position * b1)320 void td_save_bad(int fd, Position *b1)
321 {
322 	int x;
323 
324 	lseek(fd, 0, SEEK_END);
325 
326 	if ((x = Write(fd, (char *)b1, sizeof(Position))) != sizeof(Position)) {
327 		lprintf(0,"***Error saving bad eval position %d %d\n",
328 			sizeof(Position), x);
329 	}
330 }
331 
332 /* Updates the 	coefficients according to the TD(lambda) algorithm. */
td_update()333 int td_update()
334 {
335 	int fd;
336         int i,j,n,t;
337 	int argmax;
338 	int num_moves;
339 	int rounds = 0;
340 	float grad[300*__TOTAL_COEFFS__];
341 	double c, max;
342 	double dw[__TOTAL_COEFFS__];
343 	double olddw[__TOTAL_COEFFS__];
344 	double tanhv[MAX_GAME_MOVES];
345 	double d[MAX_GAME_MOVES];
346 	double oldnorm, newnorm, dotprod, angle;
347 	FILE *f;
348 
349 	if (state->analysed)
350 		return 0;
351 
352 	if ((f = (FILE *)fopen("rounds.dat", "r")) != NULL) {
353 		fscanf(f, "%d\n", &rounds);
354 		fclose(f);
355 	}
356 
357 	if ((f = (FILE *)fopen("total_rounds.dat", "r")) != NULL) {
358 		fscanf(f, "%d\n", &total_rounds);
359 		fclose(f);
360 	}
361 
362 	memset(dw, 0, __TOTAL_COEFFS__*sizeof(dw[0]));
363 	memset(olddw, 0, __TOTAL_COEFFS__*sizeof(dw[0]));
364 
365 #if DUMPING_TD_UPDATES
366 	fd = open("update.dat", O_RDONLY);
367 	if (fd != -1) {
368 		if (read(fd, olddw, __TOTAL_COEFFS__*sizeof(olddw[0])) !=
369 		    __TOTAL_COEFFS__*sizeof(olddw[0])) {
370 			lprintf(0, "update file corrupt\n");
371 		} else {
372 			memcpy(dw, olddw,  __TOTAL_COEFFS__*sizeof(olddw[0]));
373 		}
374 	}
375 	close(fd);
376 #endif
377 
378 	if (state->stored_move_num == 0 || state->stored_move_num > 300) {
379 		lprintf(0, "no gradient information: %d\n", state->stored_move_num);
380 		return 0;
381 	}
382 
383 	memset(grad, 0, 300*__TOTAL_COEFFS__*sizeof(grad[0]));
384 
385 	if (state->ics_robot && result() == TIME_FORFEIT)
386 		num_moves = state->stored_move_num-1;
387 	else
388 		num_moves = state->stored_move_num;
389 
390 	lprintf(0,"***moves: %d\n", num_moves);
391 	n = __TOTAL_COEFFS__;
392 
393 	if (td_gradient(grad)) {
394 		lprintf(0,"gradients calculated\n");
395 	} else {
396 		lprintf(0,"gradient error\n");
397 		return 0;
398 	}
399 
400 	/* Squash the evals and compute the temporal differences */
401 	tanhv[0] =  tanh(EVAL_SCALE*state->leaf_eval[0].v);
402 	for (t=0; t<num_moves-1; t++) {
403 		tanhv[t+1] = tanh(EVAL_SCALE*state->leaf_eval[t+1].v);
404 		d[t] = tanhv[t+1] - tanhv[t];
405 		if (state->predicted_move[t+1] == -1 &&
406 		    !state->demo_mode &&
407 		    state->rating_change < 0)
408 			d[t] = RAMP(d[t]);
409 	}
410 
411 	/* work out the outcome */
412 	if (state->demo_mode) {
413 		switch (state->won) {
414 		case STALEMATE: {
415 			if (NO_STALEMATE_LEARN)
416 				return 0;
417 			d[num_moves-1] = tanh(EVAL_SCALE*DRAW_VALUE)
418 				- tanhv[num_moves-1];
419 			break;
420 		}
421 		case 1: {
422 			d[num_moves-1] = 1.0 - tanhv[num_moves-1];
423 			break;
424 		}
425 		case 0: {
426 			d[num_moves-1] = -1.0 - tanhv[num_moves-1];
427 			break;
428 		}
429 		}
430 	} else {
431 		switch (result()) {
432 		case STALEMATE: {
433 			if (NO_STALEMATE_LEARN)
434 				return 0;
435 			d[num_moves-1] = tanh(EVAL_SCALE*DRAW_VALUE)
436 				- tanhv[num_moves-1];
437 			break;
438 		}
439 		case 1: {
440 			d[num_moves-1] = 1.0 - tanhv[num_moves-1];
441 			break;
442 		}
443 		case 0: {
444 			d[num_moves-1] = -1.0 - tanhv[num_moves-1];
445 			break;
446 		}
447 		/* for time forfeited or resigned games we just assume the
448 		   final eval was correct */
449 		case TIME_FORFEIT: {
450 			d[num_moves-1] = 0.0;
451 			break;
452 		}
453 		}
454 	}
455 
456 	if (state->predicted_move[num_moves] == -1 &&
457 	    !state->demo_mode && state->rating_change < 0)
458 		d[num_moves-1] = RAMP(d[num_moves-1]);
459 
460 	lprintf(0,"outcome: %d %d %d\n", state->won, state->colour, state->position.winner);
461 
462 	for (i=0; i<num_moves; i++) {
463 		lprintf(0, "%d %d %lf\n", i, state->leaf_eval[i].v, d[i]);
464 	}
465 
466 	/* calculate the coefficient updates */
467 	max = 0.0;
468 	j=0;
469 	for (i=0; i<n; i++) {
470 		/* "FACTORS" are multiplicative and have disproportionally
471 		   high derivatives so we don't adjust them */
472 		if (dont_change && i==dont_change[j]) {
473 			++j;
474 			continue;
475 		}
476 		c = (1.0 - tanhv[0]*tanhv[0])*EVAL_SCALE*grad[i];
477 
478 		for (t=0; t<num_moves; t++) {
479 			dw[i] += d[t]*c;
480 			if (t<num_moves-1) {
481 				c = TD_LAMBDA*c + (1-tanhv[t+1]*tanhv[t+1])*
482 					EVAL_SCALE*grad[(t+1)*n+i];
483 			}
484 		}
485 		if (ABS(dw[i]) > max) {
486 			max = ABS(dw[i]);
487 			argmax = i;
488 		}
489 	}
490 
491 	lprintf(0,"max: %lf %d\n", TD_ALPHA*max, argmax);
492 
493 	oldnorm = 0.0;
494 	newnorm = 0.0;
495 	dotprod = 0.0;
496 	for (i=0; i<n; i++) {
497 		oldnorm += ((double)new_coefficients[i]*(double)new_coefficients[i]);
498 		newnorm += (new_coefficients[i]+TD_ALPHA*dw[i])*
499 			(new_coefficients[i]+TD_ALPHA*dw[i]);
500 		dotprod += (new_coefficients[i] + TD_ALPHA*dw[i])*new_coefficients[i];
501 	}
502 	angle = 0.0;
503 	if (oldnorm != 0)
504 		angle = 180*acos(dotprod/sqrt(oldnorm*newnorm))/PI;
505 	lprintf(0, "change in angle: %lg\n", angle);
506 	f = (FILE *)fopen("angle.dat", "a");
507 	fprintf(f, "%g\n", angle);
508 	fclose(f);
509 
510 	j = 0;
511 	for (i=0; i<n; i++) {
512 		if (dont_change && i==dont_change[j]) {
513 			++j;
514 			continue;
515 		}
516 
517 		if (rounds == MAX_ROUNDS)
518 			new_coefficients[i] += TD_ALPHA*dw[i];
519 	}
520 
521 #if DUMPING_TD_UPDATES
522 	fd = open("update.dat", O_WRONLY | O_CREAT | O_TRUNC, 0666);
523 	if (rounds == MAX_ROUNDS) {
524 		memset(dw, 0,  __TOTAL_COEFFS__*sizeof(dw[0]));
525 		rounds = 0;
526 	}
527 	if (Write(fd, (char *)dw, __TOTAL_COEFFS__*sizeof(dw[0])) !=
528 	    __TOTAL_COEFFS__*sizeof(dw[0])) {
529 		lprintf(0,"failed to write updates\n");
530 	}
531 	close(fd);
532 
533 	++rounds;
534 	f = (FILE *)fopen("rounds.dat", "w");
535 	fprintf(f, "%d\n", rounds);
536 	fclose(f);
537 #endif
538 	lprintf(0,"updated coefficients\n");
539 
540 	++total_rounds;
541 	f = (FILE *)fopen("total_rounds.dat", "w");
542 	fprintf(f, "%d\n", total_rounds);
543 	fclose(f);
544 
545 	state->analysed = 1;
546 	return 0;
547 }
548 
549 #else
td_dummy(void)550 void td_dummy(void)
551 {}
552 #endif
553 
554 
555