1 #include "chess.h"
2 #include "data.h"
3 /* last modified 08/26/15 */
4 /*
5 *******************************************************************************
6 * *
7 * AutoTune() is used to tune the parallel search parameters and optimize *
8 * them for the current hardware and a specific time per move target. The *
9 * syntax of the command is *
10 * *
11 * autotune time accuracy *
12 * *
13 * "time" is the target time to optimize for. Longer time limits require *
14 * somewhat different tuning values, so this should be set to the typical *
15 * time per move. The default is 30 seconds per move if not specified. *
16 * *
17 * "accuracy" is normally set to 4. Since SMP search results and times are *
18 * non-deterministic, running tests 1 time can be inaccurate. This value is *
19 * used to determine how many times to run each test. If you set it to two, *
20 * the entire test will take 1/2 as long. Bigger numbers are better, but *
21 * it is easy to make the test run for hours if you go too far. A big value *
22 * will work best if allowed to run overnight. Crafty will display a time *
23 * estimate after determining the optimal benchmark settings. If this time *
24 * is excessive, a ^C will let you re-start Crafty and pick a more *
25 * reasonable time/accuracy setting. *
26 * *
27 * AutoTune() will tune the primary SMP controls, namely the values set by *
28 * the commands smpgroup, smpmin, smpsd and smppsl. It will NEVER change *
29 * smpmt (max threads), smproot (split at root) and smpaffinity. those are *
30 * user choices and in general the default is optimal. In general the *
31 * values have min and max settings defined in data.c (search for autotune), *
32 * and this code will try multiple values in the given range to find an *
33 * optimal setting. For some of the values, it will test each value in the *
34 * interval, but for values with a large range it will try reasonable *
35 * (again, see data.c and the "tune" array) intervals. If desired, the *
36 * low/high limits can be changed along with the interval between samples, *
37 * by modifying the autotune data in data.c. *
38 * *
39 * Note that this command is best used before you go to eat or something as *
40 * it will run a while. If you ramp up the accuracy setting, it will take *
41 * multiples of accuracy times longer. Best results are likely obtained *
42 * with a larger accuracy setting, but it needs to run overnight. *
43 * *
44 *******************************************************************************
45 */
AutoTune(int nargs,char * args[])46 void AutoTune(int nargs, char *args[]) {
47 unsigned int target_time = 3000, accuracy = 4, atstart, atend;
48 unsigned int time, current, setting[64], times[64], last_time, stageii;
49 int benchd, i, v, p, best, bestv, samples;
50 FILE *craftyrc = fopen(".craftyrc", "a");
51
52 /*
53 ************************************************************
54 * *
55 * Initialize. *
56 * *
57 ************************************************************
58 */
59 if (smp_max_threads < 2) {
60 Print(4095, "ERROR: smpmt must be set to > 1 for tuning to work\n");
61 fclose(craftyrc);
62 return;
63 }
64 if (nargs > 1)
65 target_time = atoi(args[1]) * 100;
66 if (nargs > 2)
67 accuracy = atoi(args[2]);
68 Print(4095, "AutoTune() time=%s accuracy=%d\n",
69 DisplayHHMMSS(target_time), accuracy);
70 /*
71 ************************************************************
72 * *
73 * First task is to find the benchmark setting that will *
74 * run in the alotted time. The Bench() command runs six *
75 * positions, so we want the command to run in no more *
76 * than six times the autotune time limit to average the *
77 * specified time per move. We break out of the loop when *
78 * bench takes more than 6x this time limit and use the *
79 * previous value which just fit inside the limit. *
80 * *
81 ************************************************************
82 */
83 atstart = ReadClock();
84 stageii = 0;
85 for (v = 0; v < autotune_params; v++)
86 for (current = tune[v].min; current <= tune[v].max;
87 current += tune[v].increment)
88 stageii++;
89 Print(4095, "Calculating optimal benchmark setting.\n");
90 Print(4095, "Target time average = %s.\n", DisplayHHMMSS(6 * target_time));
91 Print(4095, "Estimated run time (stage I) is %s.\n",
92 DisplayHHMMSS(accuracy * 12 * target_time));
93 Print(4095, "Estimated run time (stage II) is %s.\n",
94 DisplayHHMMSS(accuracy * stageii * 4 * target_time));
95 Print(4095, "\nBegin stage I (calibration)\n");
96 last_time = 0;
97 for (benchd = -5; benchd < 10; benchd++) {
98 Print(4095, "bench %2d:", benchd);
99 time = 0;
100 for (v = 0; v < accuracy; v++)
101 time += Bench(benchd, 1);
102 time /= accuracy;
103 Print(4095, " ->%s\n", DisplayHHMMSS(time));
104 if (time > 6 * target_time)
105 break;
106 last_time = time;
107 }
108 benchd--;
109 Print(4095, "Optimal setting is " "bench %d" "\n", benchd);
110 atend = ReadClock();
111 Print(4095, "Actual runtime for Stage I: %s\n",
112 DisplayHHMMSS(atend - atstart));
113 Print(4095, "New estimated run time (stage II) is %s.\n",
114 DisplayHHMMSS(accuracy * stageii * last_time));
115 Print(4095, "\nBegin stage II (SMP testing).\n");
116 atstart = ReadClock();
117 /*
118 ************************************************************
119 * *
120 * Next we simply take each option, one by one, and try *
121 * reasonable values between the min/max values as defined *
122 * in data.c. *
123 * *
124 * The process is fairly simple, but very time-consuming. *
125 * We will start at the min value for a single paramenter, *
126 * and run bench "accuracy" times and compute the average *
127 * of the times. We then repeat for the next step in the *
128 * parameter, and continue until we try the max value that *
129 * is allowed. We choose the parameter value that used *
130 * the least amount of time which optimizes this value for *
131 * minimum time-to-depth. *
132 * *
133 ************************************************************
134 */
135 for (v = 0; v < autotune_params; v++) {
136 Print(4095, "auto-tuning %s (%d ~ %d by %d)\n", tune[v].description,
137 tune[v].min, tune[v].max, tune[v].increment);
138 current = *tune[v].parameter;
139 samples = 0;
140 if (v == 0 && tune[v].min > smp_max_threads) {
141 samples = 1;
142 times[0] = 0;
143 setting[0] = smp_max_threads;
144 } else
145 for (current = tune[v].min; current <= tune[v].max;
146 current += tune[v].increment) {
147 Print(4095, "Testing %d: ", current);
148 *tune[v].parameter = current;
149 time = 0;
150 for (p = 0; p < accuracy; p++)
151 time += Bench(benchd, 1);
152 time /= accuracy;
153 times[samples] = time;
154 setting[samples++] = current;
155 Print(4095, " ->%s\n", DisplayHHMMSS(time));
156 }
157 best = 0;
158 bestv = times[0];
159 for (i = 1; i < samples; i++)
160 if (bestv > times[i]) {
161 bestv = times[i];
162 best = i;
163 }
164 fprintf(craftyrc, "%s=%d\n", tune[v].command, setting[best]);
165 Print(4095, "adding " "%s=%d" " to .craftyrc file.\n", tune[v].command,
166 setting[best]);
167 }
168 atend = ReadClock();
169 Print(4095, "Runtime for StageII: %s\n", DisplayHHMMSS(atend - atstart));
170 fclose(craftyrc);
171 }
172