1 #include <math.h>
2 #include <time.h>
3 #include "chess.h"
4 #include "data.h"
5 #if defined(UNIX)
6 # include <unistd.h>
7 #endif
8
9 /* last modified 02/24/14 */
10 /*
11 *******************************************************************************
12 * *
13 * LearnAdjust() us used to scale the learn value, which can be used to *
14 * limit the aggressiveness of the learning algorithm. All we do here is *
15 * divide the learn value passed in by "learning / 10". *
16 * *
17 *******************************************************************************
18 */
LearnAdjust(int value)19 int LearnAdjust(int value) {
20
21 if (learning / 10 > 0)
22 return value / (learning / 10);
23 else
24 return 0;
25 }
26
27 /* last modified 02/24/14 */
28 /*
29 *******************************************************************************
30 * *
31 * LearnBook() is used to update the book database when a game ends for any *
32 * reason. It uses the global "learn_value" variable and updates the book *
33 * based on the moves played and the value that was "learned". *
34 * *
35 * The global learn_value has two possible sources. If a game ends with a *
36 * real result (win, lose or draw) then the learrn_value will be set to a *
37 * number in the interval {-300, 300} depending on the result. If there is *
38 * no result (the operator exits the program prior to reaching a conclusion *
39 * (quit, end, ^C) then we will use the values from the first few searches *
40 * after leaving book to compute a learrn_value (see LearnValue() comments *
41 * later in this file). *
42 * *
43 *******************************************************************************
44 */
LearnBook()45 void LearnBook() {
46 float book_learn[64], t_learn_value;
47 int nplies = 0, thisply = 0, i, j, v, cluster;
48 unsigned char buf32[4];
49
50 /*
51 ************************************************************
52 * *
53 * If we have not been "out of book" for N moves, all we *
54 * we need to do is take the search evaluation for the *
55 * search just completed and tuck it away in the book *
56 * learning array (book_learn_eval[]) for use later. *
57 * *
58 ************************************************************
59 */
60 if (!book_file)
61 return;
62 if (!learn)
63 return;
64 if (Abs(learn_value) != learning)
65 learn_value = LearnAdjust(learn_value);
66 learn = 0;
67 Print(32, "LearnBook() updating book database\n");
68 /*
69 ************************************************************
70 * *
71 * Now we build a vector of book learning results. We *
72 * give every book move below the last point where there *
73 * were alternatives 100% of the learned score. We give *
74 * the book move played at that point 100% of the learned *
75 * score as well. Then we divide the learned score by the *
76 * number of alternatives, and propagate this score back *
77 * until there was another alternative, where we do this *
78 * again and again until we reach the top of the book *
79 * tree. *
80 * *
81 ************************************************************
82 */
83 t_learn_value = ((float) learn_value) / 100.0;
84 for (i = 0; i < 64; i++)
85 if (learn_nmoves[i] > 1)
86 nplies++;
87 nplies = Max(nplies, 1);
88 for (i = 0; i < 64; i++) {
89 if (learn_nmoves[i] > 1)
90 thisply++;
91 book_learn[i] = t_learn_value * (float) thisply / (float) nplies;
92 }
93 /*
94 ************************************************************
95 * *
96 * Now find the appropriate cluster, find the key we were *
97 * passed, and update the resulting learn value. *
98 * *
99 ************************************************************
100 */
101 for (i = 0; i < 64 && learn_seekto[i]; i++) {
102 if (learn_seekto[i] > 0) {
103 fseek(book_file, learn_seekto[i], SEEK_SET);
104 v = fread(buf32, 4, 1, book_file);
105 if (v <= 0)
106 perror("Learn() fread error: ");
107 cluster = BookIn32(buf32);
108 if (cluster)
109 BookClusterIn(book_file, cluster, book_buffer);
110 for (j = 0; j < cluster; j++)
111 if (!(learn_key[i] ^ book_buffer[j].position))
112 break;
113 if (j >= cluster)
114 return;
115 if (fabs(book_buffer[j].learn) < 0.0001)
116 book_buffer[j].learn = book_learn[i];
117 else
118 book_buffer[j].learn = (book_buffer[j].learn + book_learn[i]) / 2.0;
119 fseek(book_file, learn_seekto[i] + 4, SEEK_SET);
120 if (cluster)
121 BookClusterOut(book_file, cluster, book_buffer);
122 fflush(book_file);
123 }
124 }
125 }
126
127 /* last modified 02/24/14 */
128 /*
129 *******************************************************************************
130 * *
131 * LearnFunction() is called to compute the adjustment value added to the *
132 * learn counter in the opening book. It takes three pieces of information *
133 * into consideration to do this: the search value, the search depth that *
134 * produced this value, and the rating difference (Crafty-opponent) so that *
135 * + numbers means Crafty is expected to win, - numbers mean Crafty is ex- *
136 * pected to lose. *
137 * *
138 *******************************************************************************
139 */
LearnFunction(int sv,int search_depth,int rating_difference,int trusted_value)140 int LearnFunction(int sv, int search_depth, int rating_difference,
141 int trusted_value) {
142 float multiplier;
143 static const float rating_mult_t[11] = { .00625, .0125, .025, .05, .075, .1,
144 0.15, 0.2, 0.25, 0.3, 0.35
145 };
146 static const float rating_mult_ut[11] = { .25, .2, .15, .1, .05, .025, .012,
147 .006, .003, .001
148 };
149 int sd, rd, value;
150
151 sd = Max(Min(search_depth - 10, 19), 0);
152 rd = Max(Min(rating_difference / 200, 5), -5) + 5;
153 if (trusted_value)
154 multiplier = rating_mult_t[rd] * sd;
155 else
156 multiplier = rating_mult_ut[rd] * sd;
157 sv = Max(Min(sv, 5 * learning), -5 * learning);
158 value = (int) (sv * multiplier);
159 return value;
160 }
161
162 /* last modified 02/24/14 */
163 /*
164 *******************************************************************************
165 * *
166 * LearnValue() is used to monitor the scores over the first N moves out of *
167 * book. After these moves have been played, the evaluations are then used *
168 * to decide whether the last book move played was a reasonable choice or *
169 * not. (N is set by the #define LEARN_INTERVAL definition.) *
170 * *
171 * This procedure does not directly update the book. Rather, it sets the *
172 * global learn_value variable to represent the goodness or badness of the *
173 * position where we left the opening book. This will be used later to *
174 * update the book in the event the game ends without any sort of actual *
175 * result. In a normal situation, we will base our learning on the result *
176 * of the game, win-lose-draw. But it is possible that the game ends before *
177 * the final result is known. In this case, we will use the score from the *
178 * learn_value we compute here so that we learn _something_ from playing a *
179 * game fragment. *
180 * *
181 * There are three cases to be handled. (1) If the evaluation is bad right *
182 * out of book, or it drops enough to be considered a bad line, then the *
183 * book move will have its "learn" value reduced to discourage playing this *
184 * move again. (2) If the evaluation is even after N moves, then the learn *
185 * value will be increased, but by a relatively modest amount, so that a few *
186 * even results will offset one bad result. (3) If the evaluation is very *
187 * good after N moves, the learn value will be increased by a large amount *
188 * so that this move will be favored the next time the game is played. *
189 * *
190 *******************************************************************************
191 */
LearnValue(int search_value,int search_depth)192 void LearnValue(int search_value, int search_depth) {
193 int i, interval;
194 int best_eval = -999999, best_eval_p = 0;
195 int worst_eval = 999999, worst_eval_p = 0;
196 int best_after_worst_eval = -999999, worst_after_best_eval = 999999;
197
198 /*
199 ************************************************************
200 * *
201 * If we have not been "out of book" for N moves, all we *
202 * need to do is take the search evaluation for the search *
203 * just completed and tuck it away in the book learning *
204 * array (book_learn_eval[]) for use later. *
205 * *
206 ************************************************************
207 */
208 if (!book_file)
209 return;
210 if (!learn || learn_value != 0)
211 return;
212 if (moves_out_of_book <= LEARN_INTERVAL) {
213 if (moves_out_of_book) {
214 book_learn_eval[moves_out_of_book - 1] = search_value;
215 book_learn_depth[moves_out_of_book - 1] = search_depth;
216 }
217 }
218 /*
219 ************************************************************
220 * *
221 * Check the evaluations we've seen so far. If they are *
222 * within reason (+/- 1/3 of a pawn or so) we simply keep *
223 * playing and leave the book alone. If the eval is much *
224 * better or worse, we need to update the learning data. *
225 * *
226 ************************************************************
227 */
228 else if (moves_out_of_book == LEARN_INTERVAL + 1) {
229 if (moves_out_of_book < 1)
230 return;
231 interval = Min(LEARN_INTERVAL, moves_out_of_book);
232 if (interval < 2)
233 return;
234 for (i = 0; i < interval; i++) {
235 if (book_learn_eval[i] > best_eval) {
236 best_eval = book_learn_eval[i];
237 best_eval_p = i;
238 }
239 if (book_learn_eval[i] < worst_eval) {
240 worst_eval = book_learn_eval[i];
241 worst_eval_p = i;
242 }
243 }
244 if (best_eval_p < interval - 1) {
245 for (i = best_eval_p; i < interval; i++)
246 if (book_learn_eval[i] < worst_after_best_eval)
247 worst_after_best_eval = book_learn_eval[i];
248 } else
249 worst_after_best_eval = book_learn_eval[interval - 1];
250 if (worst_eval_p < interval - 1) {
251 for (i = worst_eval_p; i < interval; i++)
252 if (book_learn_eval[i] > best_after_worst_eval)
253 best_after_worst_eval = book_learn_eval[i];
254 } else
255 best_after_worst_eval = book_learn_eval[interval - 1];
256 /*
257 ************************************************************
258 * *
259 * We now have the best eval for the first N moves out of *
260 * book, the worst eval for the first N moves out of book, *
261 * and the worst eval that follows the best eval. This *
262 * will be used to recognize the following cases of *
263 * results that follow a book move: *
264 * *
265 ************************************************************
266 */
267 /*
268 ************************************************************
269 * *
270 * (1) The best score is very good, and it doesn't drop *
271 * after following the game further. This case detects *
272 * those moves in book that are "good" and should be *
273 * played whenever possible, while avoiding the sound *
274 * gambits that leave us ahead in material for a short *
275 * while until the score starts to drop as the gambit *
276 * begins to show its effect. *
277 * *
278 ************************************************************
279 */
280 if (best_eval == best_after_worst_eval) {
281 learn_value = best_eval;
282 for (i = 0; i < interval; i++)
283 if (learn_value == book_learn_eval[i])
284 search_depth = Max(search_depth, book_learn_depth[i]);
285 }
286 /*
287 ************************************************************
288 * *
289 * (2) The worst score is bad, and doesn't improve any *
290 * after the worst point, indicating that the book move *
291 * chosen was "bad" and should be avoided in the future. *
292 * *
293 ************************************************************
294 */
295 else if (worst_eval == worst_after_best_eval) {
296 learn_value = worst_eval;
297 for (i = 0; i < interval; i++)
298 if (learn_value == book_learn_eval[i])
299 search_depth = Max(search_depth, book_learn_depth[i]);
300 }
301 /*
302 ************************************************************
303 * *
304 * (3) Things seem even out of book and remain that way *
305 * for N moves. We will just average the 10 scores and *
306 * use that as an approximation. *
307 * *
308 ************************************************************
309 */
310 else {
311 learn_value = 0;
312 search_depth = 0;
313 for (i = 0; i < interval; i++) {
314 learn_value += book_learn_eval[i];
315 search_depth += book_learn_depth[i];
316 }
317 learn_value /= interval;
318 search_depth /= interval;
319 }
320 learn_value =
321 LearnFunction(learn_value, search_depth,
322 crafty_rating - opponent_rating, learn_value < 0);
323 }
324 }
325