1 /*! 2 * Copyright 2021 by XGBoost Contributors 3 */ 4 #ifndef XGBOOST_TREE_HIST_HISTOGRAM_H_ 5 #define XGBOOST_TREE_HIST_HISTOGRAM_H_ 6 7 #include <algorithm> 8 #include <limits> 9 #include <vector> 10 11 #include "rabit/rabit.h" 12 #include "xgboost/tree_model.h" 13 #include "../../common/hist_util.h" 14 15 namespace xgboost { 16 namespace tree { 17 template <typename GradientSumT, typename ExpandEntry> class HistogramBuilder { 18 using GradientPairT = xgboost::detail::GradientPairInternal<GradientSumT>; 19 using GHistRowT = common::GHistRow<GradientSumT>; 20 21 /*! \brief culmulative histogram of gradients. */ 22 common::HistCollection<GradientSumT> hist_; 23 /*! \brief culmulative local parent histogram of gradients. */ 24 common::HistCollection<GradientSumT> hist_local_worker_; 25 common::GHistBuilder<GradientSumT> builder_; 26 common::ParallelGHistBuilder<GradientSumT> buffer_; 27 rabit::Reducer<GradientPairT, GradientPairT::Reduce> reducer_; 28 int32_t max_bin_ {-1}; 29 int32_t n_threads_ {-1}; 30 // Whether XGBoost is running in distributed environment. 31 bool is_distributed_ {false}; 32 33 public: 34 /** 35 * \param total_bins Total number of bins across all features 36 * \param max_bin_per_feat Maximum number of bins per feature, same as the `max_bin` 37 * training parameter. 38 * \param n_threads Number of threads. 39 * \param is_distributed Mostly used for testing to allow injecting parameters instead 40 * of using global rabit variable. 41 */ 42 void Reset(uint32_t total_bins, int32_t max_bin_per_feat, int32_t n_threads, 43 bool is_distributed = rabit::IsDistributed()) { 44 CHECK_GE(n_threads, 1); 45 n_threads_ = n_threads; 46 CHECK_GE(max_bin_per_feat, 2); 47 max_bin_ = max_bin_per_feat; 48 hist_.Init(total_bins); 49 hist_local_worker_.Init(total_bins); 50 buffer_.Init(total_bins); 51 builder_ = common::GHistBuilder<GradientSumT>(n_threads, total_bins); 52 is_distributed_ = is_distributed; 53 } 54 55 template <bool any_missing> 56 void BuildLocalHistograms(DMatrix * p_fmat,std::vector<ExpandEntry> nodes_for_explicit_hist_build,common::RowSetCollection const & row_set_collection,const std::vector<GradientPair> & gpair_h)57 BuildLocalHistograms(DMatrix *p_fmat, 58 std::vector<ExpandEntry> nodes_for_explicit_hist_build, 59 common::RowSetCollection const &row_set_collection, 60 const std::vector<GradientPair> &gpair_h) { 61 const size_t n_nodes = nodes_for_explicit_hist_build.size(); 62 63 // create space of size (# rows in each node) 64 common::BlockedSpace2d space( 65 n_nodes, 66 [&](size_t node) { 67 const int32_t nid = nodes_for_explicit_hist_build[node].nid; 68 return row_set_collection[nid].Size(); 69 }, 70 256); 71 72 std::vector<GHistRowT> target_hists(n_nodes); 73 for (size_t i = 0; i < n_nodes; ++i) { 74 const int32_t nid = nodes_for_explicit_hist_build[i].nid; 75 target_hists[i] = hist_[nid]; 76 } 77 buffer_.Reset(this->n_threads_, n_nodes, space, target_hists); 78 79 // Parallel processing by nodes and data in each node 80 for (auto const &gmat : p_fmat->GetBatches<GHistIndexMatrix>( 81 BatchParam{GenericParameter::kCpuId, max_bin_})) { 82 common::ParallelFor2d( 83 space, this->n_threads_, [&](size_t nid_in_set, common::Range1d r) { 84 const auto tid = static_cast<unsigned>(omp_get_thread_num()); 85 const int32_t nid = nodes_for_explicit_hist_build[nid_in_set].nid; 86 87 auto start_of_row_set = row_set_collection[nid].begin; 88 auto rid_set = common::RowSetCollection::Elem( 89 start_of_row_set + r.begin(), start_of_row_set + r.end(), nid); 90 builder_.template BuildHist<any_missing>( 91 gpair_h, rid_set, gmat, 92 buffer_.GetInitializedHist(tid, nid_in_set)); 93 }); 94 } 95 } 96 97 void AddHistRows(int * starting_index,int * sync_count,std::vector<ExpandEntry> const & nodes_for_explicit_hist_build,std::vector<ExpandEntry> const & nodes_for_subtraction_trick,RegTree * p_tree)98 AddHistRows(int *starting_index, int *sync_count, 99 std::vector<ExpandEntry> const &nodes_for_explicit_hist_build, 100 std::vector<ExpandEntry> const &nodes_for_subtraction_trick, 101 RegTree *p_tree) { 102 if (is_distributed_) { 103 this->AddHistRowsDistributed(starting_index, sync_count, 104 nodes_for_explicit_hist_build, 105 nodes_for_subtraction_trick, p_tree); 106 } else { 107 this->AddHistRowsLocal(starting_index, sync_count, 108 nodes_for_explicit_hist_build, 109 nodes_for_subtraction_trick); 110 } 111 } 112 113 /* Main entry point of this class, build histogram for tree nodes. */ BuildHist(DMatrix * p_fmat,RegTree * p_tree,common::RowSetCollection const & row_set_collection,std::vector<ExpandEntry> const & nodes_for_explicit_hist_build,std::vector<ExpandEntry> const & nodes_for_subtraction_trick,std::vector<GradientPair> const & gpair)114 void BuildHist(DMatrix *p_fmat, RegTree *p_tree, 115 common::RowSetCollection const &row_set_collection, 116 std::vector<ExpandEntry> const &nodes_for_explicit_hist_build, 117 std::vector<ExpandEntry> const &nodes_for_subtraction_trick, 118 std::vector<GradientPair> const &gpair) { 119 int starting_index = std::numeric_limits<int>::max(); 120 int sync_count = 0; 121 this->AddHistRows(&starting_index, &sync_count, 122 nodes_for_explicit_hist_build, 123 nodes_for_subtraction_trick, p_tree); 124 if (p_fmat->IsDense()) { 125 BuildLocalHistograms<false>(p_fmat, nodes_for_explicit_hist_build, 126 row_set_collection, gpair); 127 } else { 128 BuildLocalHistograms<true>(p_fmat, nodes_for_explicit_hist_build, 129 row_set_collection, gpair); 130 } 131 if (is_distributed_) { 132 this->SyncHistogramDistributed(p_tree, nodes_for_explicit_hist_build, 133 nodes_for_subtraction_trick, 134 starting_index, sync_count); 135 } else { 136 this->SyncHistogramLocal(p_tree, nodes_for_explicit_hist_build, 137 nodes_for_subtraction_trick, starting_index, 138 sync_count); 139 } 140 } 141 SyncHistogramDistributed(RegTree * p_tree,std::vector<ExpandEntry> const & nodes_for_explicit_hist_build,std::vector<ExpandEntry> const & nodes_for_subtraction_trick,int starting_index,int sync_count)142 void SyncHistogramDistributed( 143 RegTree *p_tree, 144 std::vector<ExpandEntry> const &nodes_for_explicit_hist_build, 145 std::vector<ExpandEntry> const &nodes_for_subtraction_trick, 146 int starting_index, int sync_count) { 147 const size_t nbins = builder_.GetNumBins(); 148 common::BlockedSpace2d space( 149 nodes_for_explicit_hist_build.size(), [&](size_t) { return nbins; }, 150 1024); 151 common::ParallelFor2d( 152 space, n_threads_, [&](size_t node, common::Range1d r) { 153 const auto &entry = nodes_for_explicit_hist_build[node]; 154 auto this_hist = this->hist_[entry.nid]; 155 // Merging histograms from each thread into once 156 buffer_.ReduceHist(node, r.begin(), r.end()); 157 // Store posible parent node 158 auto this_local = hist_local_worker_[entry.nid]; 159 common::CopyHist(this_local, this_hist, r.begin(), r.end()); 160 161 if (!(*p_tree)[entry.nid].IsRoot()) { 162 const size_t parent_id = (*p_tree)[entry.nid].Parent(); 163 const int subtraction_node_id = 164 nodes_for_subtraction_trick[node].nid; 165 auto parent_hist = this->hist_local_worker_[parent_id]; 166 auto sibling_hist = this->hist_[subtraction_node_id]; 167 common::SubtractionHist(sibling_hist, parent_hist, this_hist, 168 r.begin(), r.end()); 169 // Store posible parent node 170 auto sibling_local = hist_local_worker_[subtraction_node_id]; 171 common::CopyHist(sibling_local, sibling_hist, r.begin(), r.end()); 172 } 173 }); 174 175 reducer_.Allreduce(this->hist_[starting_index].data(), 176 builder_.GetNumBins() * sync_count); 177 178 ParallelSubtractionHist(space, nodes_for_explicit_hist_build, 179 nodes_for_subtraction_trick, p_tree); 180 181 common::BlockedSpace2d space2( 182 nodes_for_subtraction_trick.size(), [&](size_t) { return nbins; }, 183 1024); 184 ParallelSubtractionHist(space2, nodes_for_subtraction_trick, 185 nodes_for_explicit_hist_build, p_tree); 186 } 187 SyncHistogramLocal(RegTree * p_tree,std::vector<ExpandEntry> const & nodes_for_explicit_hist_build,std::vector<ExpandEntry> const & nodes_for_subtraction_trick,int starting_index,int sync_count)188 void SyncHistogramLocal( 189 RegTree *p_tree, 190 std::vector<ExpandEntry> const &nodes_for_explicit_hist_build, 191 std::vector<ExpandEntry> const &nodes_for_subtraction_trick, 192 int starting_index, int sync_count) { 193 const size_t nbins = this->builder_.GetNumBins(); 194 common::BlockedSpace2d space( 195 nodes_for_explicit_hist_build.size(), [&](size_t) { return nbins; }, 196 1024); 197 198 common::ParallelFor2d( 199 space, this->n_threads_, [&](size_t node, common::Range1d r) { 200 const auto &entry = nodes_for_explicit_hist_build[node]; 201 auto this_hist = this->hist_[entry.nid]; 202 // Merging histograms from each thread into once 203 this->buffer_.ReduceHist(node, r.begin(), r.end()); 204 205 if (!(*p_tree)[entry.nid].IsRoot()) { 206 const size_t parent_id = (*p_tree)[entry.nid].Parent(); 207 const int subtraction_node_id = 208 nodes_for_subtraction_trick[node].nid; 209 auto parent_hist = this->hist_[parent_id]; 210 auto sibling_hist = this->hist_[subtraction_node_id]; 211 common::SubtractionHist(sibling_hist, parent_hist, this_hist, 212 r.begin(), r.end()); 213 } 214 }); 215 } 216 217 public: 218 /* Getters for tests. */ Histogram()219 common::HistCollection<GradientSumT> const& Histogram() { 220 return hist_; 221 } Buffer()222 auto& Buffer() { return buffer_; } 223 224 private: 225 void ParallelSubtractionHist(const common::BlockedSpace2d & space,const std::vector<ExpandEntry> & nodes,const std::vector<ExpandEntry> & subtraction_nodes,const RegTree * p_tree)226 ParallelSubtractionHist(const common::BlockedSpace2d &space, 227 const std::vector<ExpandEntry> &nodes, 228 const std::vector<ExpandEntry> &subtraction_nodes, 229 const RegTree *p_tree) { 230 common::ParallelFor2d( 231 space, this->n_threads_, [&](size_t node, common::Range1d r) { 232 const auto &entry = nodes[node]; 233 if (!((*p_tree)[entry.nid].IsLeftChild())) { 234 auto this_hist = this->hist_[entry.nid]; 235 236 if (!(*p_tree)[entry.nid].IsRoot()) { 237 const int subtraction_node_id = subtraction_nodes[node].nid; 238 auto parent_hist = hist_[(*p_tree)[entry.nid].Parent()]; 239 auto sibling_hist = hist_[subtraction_node_id]; 240 common::SubtractionHist(this_hist, parent_hist, sibling_hist, 241 r.begin(), r.end()); 242 } 243 } 244 }); 245 } 246 247 // Add a tree node to histogram buffer in local training environment. AddHistRowsLocal(int * starting_index,int * sync_count,std::vector<ExpandEntry> const & nodes_for_explicit_hist_build,std::vector<ExpandEntry> const & nodes_for_subtraction_trick)248 void AddHistRowsLocal( 249 int *starting_index, int *sync_count, 250 std::vector<ExpandEntry> const &nodes_for_explicit_hist_build, 251 std::vector<ExpandEntry> const &nodes_for_subtraction_trick) { 252 for (auto const &entry : nodes_for_explicit_hist_build) { 253 int nid = entry.nid; 254 this->hist_.AddHistRow(nid); 255 (*starting_index) = std::min(nid, (*starting_index)); 256 } 257 (*sync_count) = nodes_for_explicit_hist_build.size(); 258 259 for (auto const &node : nodes_for_subtraction_trick) { 260 this->hist_.AddHistRow(node.nid); 261 } 262 this->hist_.AllocateAllData(); 263 } 264 AddHistRowsDistributed(int * starting_index,int * sync_count,std::vector<ExpandEntry> const & nodes_for_explicit_hist_build,std::vector<ExpandEntry> const & nodes_for_subtraction_trick,RegTree * p_tree)265 void AddHistRowsDistributed( 266 int *starting_index, int *sync_count, 267 std::vector<ExpandEntry> const &nodes_for_explicit_hist_build, 268 std::vector<ExpandEntry> const &nodes_for_subtraction_trick, 269 RegTree *p_tree) { 270 const size_t explicit_size = nodes_for_explicit_hist_build.size(); 271 const size_t subtaction_size = nodes_for_subtraction_trick.size(); 272 std::vector<int> merged_node_ids(explicit_size + subtaction_size); 273 for (size_t i = 0; i < explicit_size; ++i) { 274 merged_node_ids[i] = nodes_for_explicit_hist_build[i].nid; 275 } 276 for (size_t i = 0; i < subtaction_size; ++i) { 277 merged_node_ids[explicit_size + i] = nodes_for_subtraction_trick[i].nid; 278 } 279 std::sort(merged_node_ids.begin(), merged_node_ids.end()); 280 int n_left = 0; 281 for (auto const &nid : merged_node_ids) { 282 if ((*p_tree)[nid].IsLeftChild()) { 283 this->hist_.AddHistRow(nid); 284 (*starting_index) = std::min(nid, (*starting_index)); 285 n_left++; 286 this->hist_local_worker_.AddHistRow(nid); 287 } 288 } 289 for (auto const &nid : merged_node_ids) { 290 if (!((*p_tree)[nid].IsLeftChild())) { 291 this->hist_.AddHistRow(nid); 292 this->hist_local_worker_.AddHistRow(nid); 293 } 294 } 295 this->hist_.AllocateAllData(); 296 this->hist_local_worker_.AllocateAllData(); 297 (*sync_count) = std::max(1, n_left); 298 } 299 }; 300 } // namespace tree 301 } // namespace xgboost 302 #endif // XGBOOST_TREE_HIST_HISTOGRAM_H_ 303