1 #include <math.h>
2 #include <time.h>
3 #include "chess.h"
4 #include "data.h"
5 #if defined(UNIX)
6 #  include <unistd.h>
7 #endif
8 
9 /* last modified 02/24/14 */
10 /*
11  *******************************************************************************
12  *                                                                             *
13  *   LearnAdjust() us used to scale the learn value, which can be used to      *
14  *   limit the aggressiveness of the learning algorithm.  All we do here is    *
15  *   divide the learn value passed in by "learning / 10".                      *
16  *                                                                             *
17  *******************************************************************************
18  */
LearnAdjust(int value)19 int LearnAdjust(int value) {
20 
21   if (learning / 10 > 0)
22     return value / (learning / 10);
23   else
24     return 0;
25 }
26 
27 /* last modified 02/24/14 */
28 /*
29  *******************************************************************************
30  *                                                                             *
31  *   LearnBook() is used to update the book database when a game ends for any  *
32  *   reason.  It uses the global "learn_value" variable and updates the book   *
33  *   based on the moves played and the value that was "learned".               *
34  *                                                                             *
35  *   The global learn_value has two possible sources.  If a game ends with a   *
36  *   real result (win, lose or draw) then the learrn_value will be set to a    *
37  *   number in the interval {-300, 300} depending on the result.  If there is  *
38  *   no result (the operator exits the program prior to reaching a conclusion  *
39  *   (quit, end, ^C) then we will use the values from the first few searches   *
40  *   after leaving book to compute a learrn_value (see LearnValue() comments   *
41  *   later in this file).                                                      *
42  *                                                                             *
43  *******************************************************************************
44  */
LearnBook()45 void LearnBook() {
46   float book_learn[64], t_learn_value;
47   int nplies = 0, thisply = 0, i, j, v, cluster;
48   unsigned char buf32[4];
49 
50 /*
51  ************************************************************
52  *                                                          *
53  *  If we have not been "out of book" for N moves, all we   *
54  *  we need to do is take the search evaluation for the     *
55  *  search just completed and tuck it away in the book      *
56  *  learning array (book_learn_eval[]) for use later.       *
57  *                                                          *
58  ************************************************************
59  */
60   if (!book_file)
61     return;
62   if (!learn)
63     return;
64   if (Abs(learn_value) != learning)
65     learn_value = LearnAdjust(learn_value);
66   learn = 0;
67   Print(32, "LearnBook() updating book database\n");
68 /*
69  ************************************************************
70  *                                                          *
71  *  Now we build a vector of book learning results.  We     *
72  *  give every book move below the last point where there   *
73  *  were alternatives 100% of the learned score.  We give   *
74  *  the book move played at that point 100% of the learned  *
75  *  score as well.  Then we divide the learned score by the *
76  *  number of alternatives, and propagate this score back   *
77  *  until there was another alternative, where we do this   *
78  *  again and again until we reach the top of the book      *
79  *  tree.                                                   *
80  *                                                          *
81  ************************************************************
82  */
83   t_learn_value = ((float) learn_value) / 100.0;
84   for (i = 0; i < 64; i++)
85     if (learn_nmoves[i] > 1)
86       nplies++;
87   nplies = Max(nplies, 1);
88   for (i = 0; i < 64; i++) {
89     if (learn_nmoves[i] > 1)
90       thisply++;
91     book_learn[i] = t_learn_value * (float) thisply / (float) nplies;
92   }
93 /*
94  ************************************************************
95  *                                                          *
96  *  Now find the appropriate cluster, find the key we were  *
97  *  passed, and update the resulting learn value.           *
98  *                                                          *
99  ************************************************************
100  */
101   for (i = 0; i < 64 && learn_seekto[i]; i++) {
102     if (learn_seekto[i] > 0) {
103       fseek(book_file, learn_seekto[i], SEEK_SET);
104       v = fread(buf32, 4, 1, book_file);
105       if (v <= 0)
106         perror("Learn() fread error: ");
107       cluster = BookIn32(buf32);
108       if (cluster)
109         BookClusterIn(book_file, cluster, book_buffer);
110       for (j = 0; j < cluster; j++)
111         if (!(learn_key[i] ^ book_buffer[j].position))
112           break;
113       if (j >= cluster)
114         return;
115       if (fabs(book_buffer[j].learn) < 0.0001)
116         book_buffer[j].learn = book_learn[i];
117       else
118         book_buffer[j].learn = (book_buffer[j].learn + book_learn[i]) / 2.0;
119       fseek(book_file, learn_seekto[i] + 4, SEEK_SET);
120       if (cluster)
121         BookClusterOut(book_file, cluster, book_buffer);
122       fflush(book_file);
123     }
124   }
125 }
126 
127 /* last modified 02/24/14 */
128 /*
129  *******************************************************************************
130  *                                                                             *
131  *   LearnFunction() is called to compute the adjustment value added to the    *
132  *   learn counter in the opening book.  It takes three pieces of information  *
133  *   into consideration to do this:  the search value, the search depth that   *
134  *   produced this value, and the rating difference (Crafty-opponent) so that  *
135  *   + numbers means Crafty is expected to win, - numbers mean Crafty is ex-   *
136  *   pected to lose.                                                           *
137  *                                                                             *
138  *******************************************************************************
139  */
LearnFunction(int sv,int search_depth,int rating_difference,int trusted_value)140 int LearnFunction(int sv, int search_depth, int rating_difference,
141     int trusted_value) {
142   float multiplier;
143   static const float rating_mult_t[11] = { .00625, .0125, .025, .05, .075, .1,
144     0.15, 0.2, 0.25, 0.3, 0.35
145   };
146   static const float rating_mult_ut[11] = { .25, .2, .15, .1, .05, .025, .012,
147     .006, .003, .001
148   };
149   int sd, rd, value;
150 
151   sd = Max(Min(search_depth - 10, 19), 0);
152   rd = Max(Min(rating_difference / 200, 5), -5) + 5;
153   if (trusted_value)
154     multiplier = rating_mult_t[rd] * sd;
155   else
156     multiplier = rating_mult_ut[rd] * sd;
157   sv = Max(Min(sv, 5 * learning), -5 * learning);
158   value = (int) (sv * multiplier);
159   return value;
160 }
161 
162 /* last modified 02/24/14 */
163 /*
164  *******************************************************************************
165  *                                                                             *
166  *   LearnValue() is used to monitor the scores over the first N moves out of  *
167  *   book.  After these moves have been played, the evaluations are then used  *
168  *   to decide whether the last book move played was a reasonable choice or    *
169  *   not.  (N is set by the #define LEARN_INTERVAL definition.)                *
170  *                                                                             *
171  *   This procedure does not directly update the book.  Rather, it sets the    *
172  *   global learn_value variable to represent the goodness or badness of the   *
173  *   position where we left the opening book.  This will be used later to      *
174  *   update the book in the event the game ends without any sort of actual     *
175  *   result.  In a normal situation, we will base our learning on the result   *
176  *   of the game, win-lose-draw.  But it is possible that the game ends before *
177  *   the final result is known.  In this case, we will use the score from the  *
178  *   learn_value we compute here so that we learn _something_ from playing a   *
179  *   game fragment.                                                            *
180  *                                                                             *
181  *   There are three cases to be handled.  (1) If the evaluation is bad right  *
182  *   out of book, or it drops enough to be considered a bad line, then the     *
183  *   book move will have its "learn" value reduced to discourage playing this  *
184  *   move again.  (2) If the evaluation is even after N moves, then the learn  *
185  *   value will be increased, but by a relatively modest amount, so that a few *
186  *   even results will offset one bad result.  (3) If the evaluation is very   *
187  *   good after N moves, the learn value will be increased by a large amount   *
188  *   so that this move will be favored the next time the game is played.       *
189  *                                                                             *
190  *******************************************************************************
191  */
LearnValue(int search_value,int search_depth)192 void LearnValue(int search_value, int search_depth) {
193   int i, interval;
194   int best_eval = -999999, best_eval_p = 0;
195   int worst_eval = 999999, worst_eval_p = 0;
196   int best_after_worst_eval = -999999, worst_after_best_eval = 999999;
197 
198 /*
199  ************************************************************
200  *                                                          *
201  *  If we have not been "out of book" for N moves, all we   *
202  *  need to do is take the search evaluation for the search *
203  *  just completed and tuck it away in the book learning    *
204  *  array (book_learn_eval[]) for use later.                *
205  *                                                          *
206  ************************************************************
207  */
208   if (!book_file)
209     return;
210   if (!learn || learn_value != 0)
211     return;
212   if (moves_out_of_book <= LEARN_INTERVAL) {
213     if (moves_out_of_book) {
214       book_learn_eval[moves_out_of_book - 1] = search_value;
215       book_learn_depth[moves_out_of_book - 1] = search_depth;
216     }
217   }
218 /*
219  ************************************************************
220  *                                                          *
221  *  Check the evaluations we've seen so far.  If they are   *
222  *  within reason (+/- 1/3 of a pawn or so) we simply keep  *
223  *  playing and leave the book alone.  If the eval is much  *
224  *  better or worse, we need to update the learning data.   *
225  *                                                          *
226  ************************************************************
227  */
228   else if (moves_out_of_book == LEARN_INTERVAL + 1) {
229     if (moves_out_of_book < 1)
230       return;
231     interval = Min(LEARN_INTERVAL, moves_out_of_book);
232     if (interval < 2)
233       return;
234     for (i = 0; i < interval; i++) {
235       if (book_learn_eval[i] > best_eval) {
236         best_eval = book_learn_eval[i];
237         best_eval_p = i;
238       }
239       if (book_learn_eval[i] < worst_eval) {
240         worst_eval = book_learn_eval[i];
241         worst_eval_p = i;
242       }
243     }
244     if (best_eval_p < interval - 1) {
245       for (i = best_eval_p; i < interval; i++)
246         if (book_learn_eval[i] < worst_after_best_eval)
247           worst_after_best_eval = book_learn_eval[i];
248     } else
249       worst_after_best_eval = book_learn_eval[interval - 1];
250     if (worst_eval_p < interval - 1) {
251       for (i = worst_eval_p; i < interval; i++)
252         if (book_learn_eval[i] > best_after_worst_eval)
253           best_after_worst_eval = book_learn_eval[i];
254     } else
255       best_after_worst_eval = book_learn_eval[interval - 1];
256 /*
257  ************************************************************
258  *                                                          *
259  *  We now have the best eval for the first N moves out of  *
260  *  book, the worst eval for the first N moves out of book, *
261  *  and the worst eval that follows the best eval.  This    *
262  *  will be used to recognize the following cases of        *
263  *  results that follow a book move:                        *
264  *                                                          *
265  ************************************************************
266  */
267 /*
268  ************************************************************
269  *                                                          *
270  *  (1) The best score is very good, and it doesn't drop    *
271  *  after following the game further.  This case detects    *
272  *  those moves in book that are "good" and should be       *
273  *  played whenever possible, while avoiding the sound      *
274  *  gambits that leave us ahead in material for a short     *
275  *  while until the score starts to drop as the gambit      *
276  *  begins to show its effect.                              *
277  *                                                          *
278  ************************************************************
279  */
280     if (best_eval == best_after_worst_eval) {
281       learn_value = best_eval;
282       for (i = 0; i < interval; i++)
283         if (learn_value == book_learn_eval[i])
284           search_depth = Max(search_depth, book_learn_depth[i]);
285     }
286 /*
287  ************************************************************
288  *                                                          *
289  *  (2) The worst score is bad, and doesn't improve any     *
290  *  after the worst point, indicating that the book move    *
291  *  chosen was "bad" and should be avoided in the future.   *
292  *                                                          *
293  ************************************************************
294  */
295     else if (worst_eval == worst_after_best_eval) {
296       learn_value = worst_eval;
297       for (i = 0; i < interval; i++)
298         if (learn_value == book_learn_eval[i])
299           search_depth = Max(search_depth, book_learn_depth[i]);
300     }
301 /*
302  ************************************************************
303  *                                                          *
304  *  (3) Things seem even out of book and remain that way    *
305  *  for N moves.  We will just average the 10 scores and    *
306  *  use that as an approximation.                           *
307  *                                                          *
308  ************************************************************
309  */
310     else {
311       learn_value = 0;
312       search_depth = 0;
313       for (i = 0; i < interval; i++) {
314         learn_value += book_learn_eval[i];
315         search_depth += book_learn_depth[i];
316       }
317       learn_value /= interval;
318       search_depth /= interval;
319     }
320     learn_value =
321         LearnFunction(learn_value, search_depth,
322         crafty_rating - opponent_rating, learn_value < 0);
323   }
324 }
325