1 /*
2     This file is part of Leela Zero.
3     Copyright (C) 2018-2019 Junhee Yoo and contributors
4 
5     Leela Zero is free software: you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation, either version 3 of the License, or
8     (at your option) any later version.
9 
10     Leela Zero is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14 
15     You should have received a copy of the GNU General Public License
16     along with Leela Zero.  If not, see <http://www.gnu.org/licenses/>.
17 
18     Additional permission under GNU GPL version 3 section 7
19 
20     If you modify this Program, or any covered work, by linking or
21     combining it with NVIDIA Corporation's libraries from the
22     NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural
23     Network library and/or the NVIDIA TensorRT inference library
24     (or a modified version of those libraries), containing parts covered
25     by the terms of the respective license agreement, the licensors of
26     this Program grant you additional permission to convey the resulting
27     work.
28 */
29 
30 #ifndef OPENCLSCHEDULER_H_INCLUDED
31 #define OPENCLSCHEDULER_H_INCLUDED
32 #include "config.h"
33 
34 #include <list>
35 #include <vector>
36 #include <thread>
37 
38 #include "SMP.h"
39 #include "ForwardPipe.h"
40 #include "OpenCL.h"
41 #include "ThreadPool.h"
42 
43 #ifndef NDEBUG
44 struct batch_stats_t {
45     std::atomic<size_t> single_evals{0};
46     std::atomic<size_t> batch_evals{0};
47 };
48 extern batch_stats_t batch_stats;
49 #endif
50 
51 template <typename net_t>
52 class OpenCLScheduler : public ForwardPipe {
53     class ForwardQueueEntry {
54     public:
55         std::mutex mutex;
56         std::condition_variable cv;
57         const std::vector<float>& in;
58         std::vector<float>& out_p;
59         std::vector<float>& out_v;
ForwardQueueEntry(const std::vector<float> & input,std::vector<float> & output_pol,std::vector<float> & output_val)60         ForwardQueueEntry(const std::vector<float>& input,
61                           std::vector<float>& output_pol,
62                           std::vector<float>& output_val)
63         : in(input), out_p(output_pol), out_v(output_val)
64           {}
65     };
66 public:
67     virtual ~OpenCLScheduler();
68     OpenCLScheduler();
69 
70     virtual void initialize(const int channels);
71     virtual void forward(const std::vector<float>& input,
72                          std::vector<float>& output_pol,
73                          std::vector<float>& output_val);
74     virtual bool needs_autodetect();
75     virtual void push_weights(unsigned int filter_size,
76                               unsigned int channels,
77                               unsigned int outputs,
78                               std::shared_ptr<const ForwardPipeWeights> weights);
79 private:
80     bool m_running = true;
81     std::vector<std::unique_ptr<OpenCL_Network<net_t>>> m_networks;
82     std::vector<std::unique_ptr<OpenCL<net_t>>> m_opencl;
83 
84     std::mutex m_mutex;
85     std::condition_variable m_cv;
86 
87     // start with 10 milliseconds : lock protected
88     int m_waittime{10};
89 
90     // set to true when single (non-batch) eval is in progress
91     std::atomic<bool> m_single_eval_in_progress{false};
92 
93     std::list<std::shared_ptr<ForwardQueueEntry>> m_forward_queue;
94     std::list<std::thread> m_worker_threads;
95 
96     void batch_worker(const size_t gnum);
97     void push_input_convolution(unsigned int filter_size,
98                                 unsigned int channels,
99                                 unsigned int outputs,
100                                 const std::vector<float>& weights,
101                                 const std::vector<float>& means,
102                                 const std::vector<float>& variances);
103 
104     void push_residual(unsigned int filter_size,
105                        unsigned int channels,
106                        unsigned int outputs,
107                        const std::vector<float>& weights_1,
108                        const std::vector<float>& means_1,
109                        const std::vector<float>& variances_1,
110                        const std::vector<float>& weights_2,
111                        const std::vector<float>& means_2,
112                        const std::vector<float>& variances_2);
113 
114     void push_convolve(unsigned int filter_size,
115                        unsigned int channels,
116                        unsigned int outputs,
117                        const std::vector<float>& weights);
118 };
119 
120 #endif
121