1 /* 2 This file is part of Leela Zero. 3 Copyright (C) 2018-2019 Junhee Yoo and contributors 4 5 Leela Zero is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation, either version 3 of the License, or 8 (at your option) any later version. 9 10 Leela Zero is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with Leela Zero. If not, see <http://www.gnu.org/licenses/>. 17 18 Additional permission under GNU GPL version 3 section 7 19 20 If you modify this Program, or any covered work, by linking or 21 combining it with NVIDIA Corporation's libraries from the 22 NVIDIA CUDA Toolkit and/or the NVIDIA CUDA Deep Neural 23 Network library and/or the NVIDIA TensorRT inference library 24 (or a modified version of those libraries), containing parts covered 25 by the terms of the respective license agreement, the licensors of 26 this Program grant you additional permission to convey the resulting 27 work. 28 */ 29 30 #ifndef OPENCLSCHEDULER_H_INCLUDED 31 #define OPENCLSCHEDULER_H_INCLUDED 32 #include "config.h" 33 34 #include <list> 35 #include <vector> 36 #include <thread> 37 38 #include "SMP.h" 39 #include "ForwardPipe.h" 40 #include "OpenCL.h" 41 #include "ThreadPool.h" 42 43 #ifndef NDEBUG 44 struct batch_stats_t { 45 std::atomic<size_t> single_evals{0}; 46 std::atomic<size_t> batch_evals{0}; 47 }; 48 extern batch_stats_t batch_stats; 49 #endif 50 51 template <typename net_t> 52 class OpenCLScheduler : public ForwardPipe { 53 class ForwardQueueEntry { 54 public: 55 std::mutex mutex; 56 std::condition_variable cv; 57 const std::vector<float>& in; 58 std::vector<float>& out_p; 59 std::vector<float>& out_v; ForwardQueueEntry(const std::vector<float> & input,std::vector<float> & output_pol,std::vector<float> & output_val)60 ForwardQueueEntry(const std::vector<float>& input, 61 std::vector<float>& output_pol, 62 std::vector<float>& output_val) 63 : in(input), out_p(output_pol), out_v(output_val) 64 {} 65 }; 66 public: 67 virtual ~OpenCLScheduler(); 68 OpenCLScheduler(); 69 70 virtual void initialize(const int channels); 71 virtual void forward(const std::vector<float>& input, 72 std::vector<float>& output_pol, 73 std::vector<float>& output_val); 74 virtual bool needs_autodetect(); 75 virtual void push_weights(unsigned int filter_size, 76 unsigned int channels, 77 unsigned int outputs, 78 std::shared_ptr<const ForwardPipeWeights> weights); 79 private: 80 bool m_running = true; 81 std::vector<std::unique_ptr<OpenCL_Network<net_t>>> m_networks; 82 std::vector<std::unique_ptr<OpenCL<net_t>>> m_opencl; 83 84 std::mutex m_mutex; 85 std::condition_variable m_cv; 86 87 // start with 10 milliseconds : lock protected 88 int m_waittime{10}; 89 90 // set to true when single (non-batch) eval is in progress 91 std::atomic<bool> m_single_eval_in_progress{false}; 92 93 std::list<std::shared_ptr<ForwardQueueEntry>> m_forward_queue; 94 std::list<std::thread> m_worker_threads; 95 96 void batch_worker(const size_t gnum); 97 void push_input_convolution(unsigned int filter_size, 98 unsigned int channels, 99 unsigned int outputs, 100 const std::vector<float>& weights, 101 const std::vector<float>& means, 102 const std::vector<float>& variances); 103 104 void push_residual(unsigned int filter_size, 105 unsigned int channels, 106 unsigned int outputs, 107 const std::vector<float>& weights_1, 108 const std::vector<float>& means_1, 109 const std::vector<float>& variances_1, 110 const std::vector<float>& weights_2, 111 const std::vector<float>& means_2, 112 const std::vector<float>& variances_2); 113 114 void push_convolve(unsigned int filter_size, 115 unsigned int channels, 116 unsigned int outputs, 117 const std::vector<float>& weights); 118 }; 119 120 #endif 121