1 /*
2     This file is part of Leela Zero.
3     Copyright (C) 2017-2019 Gian-Carlo Pascutto and contributors
4 
5     Leela Zero is free software: you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation, either version 3 of the License, or
8     (at your option) any later version.
9 
10     Leela Zero is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
17 
18     Additional permission under GNU GPL version 3 section 7
19 
20     If you modify this Program, or any covered work, by linking or
21     combining it with NVIDIA Corporation's libraries from the
22     NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
23     Network library and/or the NVIDIA TensorRT inference library
24     (or a modified version of those libraries), containing parts covered
25     by the terms of the respective license agreement, the licensors of
26     this Program grant you additional permission to convey the resulting
27     work.
28 */
29 
30 #ifndef NETWORK_H_INCLUDED
31 #define NETWORK_H_INCLUDED
32 
33 #include "config.h"
34 
35 #include <deque>
36 #include <array>
37 #include <memory>
38 #include <string>
39 #include <utility>
40 #include <vector>
41 #include <fstream>
42 
43 #include "NNCache.h"
44 #include "FastState.h"
45 #ifdef USE_OPENCL
46 #include "OpenCLScheduler.h"
47 #endif
48 #include "GameState.h"
49 #include "ForwardPipe.h"
50 #ifdef USE_OPENCL
51 #include "OpenCLScheduler.h"
52 #endif
53 #ifdef USE_OPENCL_SELFCHECK
54 #include "SMP.h"
55 #endif
56 
57 
58 // Winograd filter transformation changes 3x3 filters to M + 3 - 1
59 constexpr auto WINOGRAD_M = 4;
60 constexpr auto WINOGRAD_ALPHA = WINOGRAD_M + 3 - 1;
61 constexpr auto WINOGRAD_WTILES = BOARD_SIZE / WINOGRAD_M + (BOARD_SIZE % WINOGRAD_M != 0);
62 constexpr auto WINOGRAD_TILE = WINOGRAD_ALPHA * WINOGRAD_ALPHA;
63 constexpr auto WINOGRAD_P = WINOGRAD_WTILES * WINOGRAD_WTILES;
64 constexpr auto SQ2 = 1.4142135623730951f; // Square root of 2
65 
66 class Network {
67     using ForwardPipeWeights = ForwardPipe::ForwardPipeWeights;
68 public:
69     static constexpr auto NUM_SYMMETRIES = 8;
70     static constexpr auto IDENTITY_SYMMETRY = 0;
71     enum Ensemble {
72         DIRECT, RANDOM_SYMMETRY, AVERAGE
73     };
74     using PolicyVertexPair = std::pair<float,int>;
75     using Netresult = NNCache::Netresult;
76 
77     Netresult get_output(const GameState* const state,
78                          const Ensemble ensemble,
79                          const int symmetry = -1,
80                          const bool read_cache = true,
81                          const bool write_cache = true,
82                          const bool force_selfcheck = false);
83 
84     static constexpr auto INPUT_MOVES = 8;
85     static constexpr auto INPUT_CHANNELS = 2 * INPUT_MOVES + 2;
86     static constexpr auto OUTPUTS_POLICY = 2;
87     static constexpr auto OUTPUTS_VALUE = 1;
88     static constexpr auto VALUE_LAYER = 256;
89 
90     void initialize(int playouts, const std::string & weightsfile);
91 
92     float benchmark_time(int centiseconds);
93     void benchmark(const GameState * const state,
94                    const int iterations = 1600);
95     static void show_heatmap(const FastState * const state,
96                              const Netresult & netres, const bool topmoves);
97 
98     static std::vector<float> gather_features(const GameState* const state,
99                                               const int symmetry);
100     static std::pair<int, int> get_symmetry(const std::pair<int, int>& vertex,
101                                             const int symmetry,
102                                             const int board_size = BOARD_SIZE);
103 
104     size_t get_estimated_size();
105     size_t get_estimated_cache_size();
106     void nncache_resize(int max_count);
107 
108 private:
109     std::pair<int, int> load_v1_network(std::istream& wtfile);
110     std::pair<int, int> load_network_file(const std::string& filename);
111 
112     static std::vector<float> winograd_transform_f(const std::vector<float>& f,
113                                                    const int outputs, const int channels);
114     static std::vector<float> zeropad_U(const std::vector<float>& U,
115                                         const int outputs, const int channels,
116                                         const int outputs_pad, const int channels_pad);
117     static void winograd_transform_in(const std::vector<float>& in,
118                                       std::vector<float>& V,
119                                       const int C);
120     static void winograd_transform_out(const std::vector<float>& M,
121                                        std::vector<float>& Y,
122                                        const int K);
123     static void winograd_convolve3(const int outputs,
124                                    const std::vector<float>& input,
125                                    const std::vector<float>& U,
126                                    std::vector<float>& V,
127                                    std::vector<float>& M,
128                                    std::vector<float>& output);
129     static void winograd_sgemm(const std::vector<float>& U,
130                                const std::vector<float>& V,
131                                std::vector<float>& M, const int C, const int K);
132     Netresult get_output_internal(const GameState* const state,
133                                   const int symmetry, bool selfcheck = false);
134     static void fill_input_plane_pair(const FullBoard& board,
135                                       std::vector<float>::iterator black,
136                                       std::vector<float>::iterator white,
137                                       const int symmetry);
138     bool probe_cache(const GameState* const state, Network::Netresult& result);
139     std::unique_ptr<ForwardPipe>&& init_net(int channels,
140                                             std::unique_ptr<ForwardPipe>&& pipe);
141 #ifdef USE_HALF
142     void select_precision(int channels);
143 #endif
144     std::unique_ptr<ForwardPipe> m_forward;
145 #ifdef USE_OPENCL_SELFCHECK
146     void compare_net_outputs(const Netresult& data, const Netresult& ref);
147     std::unique_ptr<ForwardPipe> m_forward_cpu;
148 #endif
149 
150     NNCache m_nncache;
151 
152     size_t estimated_size{0};
153 
154     // Residual tower
155     std::shared_ptr<ForwardPipeWeights> m_fwd_weights;
156 
157     // Policy head
158     std::array<float, OUTPUTS_POLICY> m_bn_pol_w1;
159     std::array<float, OUTPUTS_POLICY> m_bn_pol_w2;
160 
161     std::array<float, OUTPUTS_POLICY
162                       * NUM_INTERSECTIONS
163                       * POTENTIAL_MOVES> m_ip_pol_w;
164     std::array<float, POTENTIAL_MOVES> m_ip_pol_b;
165 
166     // Value head
167     std::array<float, OUTPUTS_VALUE> m_bn_val_w1;
168     std::array<float, OUTPUTS_VALUE> m_bn_val_w2;
169 
170     std::array<float, OUTPUTS_VALUE
171                       * NUM_INTERSECTIONS
172                       * VALUE_LAYER> m_ip1_val_w;
173     std::array<float, VALUE_LAYER> m_ip1_val_b;
174 
175     std::array<float, VALUE_LAYER> m_ip2_val_w;
176     std::array<float, 1> m_ip2_val_b;
177     bool m_value_head_not_stm;
178 };
179 #endif
180