1 /*!
2  * Copyright 2019-2021 by XGBoost Contributors
3  */
4 #include <thrust/host_vector.h>
5 
6 #include "test_ranking_obj.cc"
7 #include "../../../src/objective/rank_obj.cu"
8 
9 namespace xgboost {
10 
11 template <typename T = uint32_t, typename Comparator = thrust::greater<T>>
12 std::unique_ptr<dh::SegmentSorter<T>>
RankSegmentSorterTestImpl(const std::vector<uint32_t> & group_indices,const std::vector<T> & hlabels,const std::vector<T> & expected_sorted_hlabels,const std::vector<uint32_t> & expected_orig_pos)13 RankSegmentSorterTestImpl(const std::vector<uint32_t> &group_indices,
14                           const std::vector<T> &hlabels,
15                           const std::vector<T> &expected_sorted_hlabels,
16                           const std::vector<uint32_t> &expected_orig_pos
17                           ) {
18   std::unique_ptr<dh::SegmentSorter<T>> seg_sorter_ptr(new dh::SegmentSorter<T>);
19   dh::SegmentSorter<T> &seg_sorter(*seg_sorter_ptr);
20 
21   // Create a bunch of unsorted labels on the device and sort it via the segment sorter
22   dh::device_vector<T> dlabels(hlabels);
23   seg_sorter.SortItems(dlabels.data().get(), dlabels.size(), group_indices, Comparator());
24 
25   auto num_items = seg_sorter.GetItemsSpan().size();
26   EXPECT_EQ(num_items, group_indices.back());
27   EXPECT_EQ(seg_sorter.GetNumGroups(), group_indices.size() - 1);
28 
29   // Check the labels
30   dh::device_vector<T> sorted_dlabels(num_items);
31   sorted_dlabels.assign(dh::tcbegin(seg_sorter.GetItemsSpan()),
32                         dh::tcend(seg_sorter.GetItemsSpan()));
33   thrust::host_vector<T> sorted_hlabels(sorted_dlabels);
34   EXPECT_EQ(expected_sorted_hlabels, sorted_hlabels);
35 
36   // Check the indices
37   dh::device_vector<uint32_t> dorig_pos(num_items);
38   dorig_pos.assign(dh::tcbegin(seg_sorter.GetOriginalPositionsSpan()),
39                    dh::tcend(seg_sorter.GetOriginalPositionsSpan()));
40   dh::device_vector<uint32_t> horig_pos(dorig_pos);
41   EXPECT_EQ(expected_orig_pos, horig_pos);
42 
43   return seg_sorter_ptr;
44 }
45 
TEST(Objective,RankSegmentSorterTest)46 TEST(Objective, RankSegmentSorterTest) {
47   RankSegmentSorterTestImpl({0, 2, 4, 7, 10, 14, 18, 22, 26},  // Groups
48                             {1, 1,                             // Labels
49                              1, 2,
50                              3, 2, 1,
51                              1, 2, 1,
52                              1, 3, 4, 2,
53                              1, 2, 1, 1,
54                              1, 2, 2, 3,
55                              3, 3, 1, 2},
56                             {1, 1,                             // Expected sorted labels
57                              2, 1,
58                              3, 2, 1,
59                              2, 1, 1,
60                              4, 3, 2, 1,
61                              2, 1, 1, 1,
62                              3, 2, 2, 1,
63                              3, 3, 2, 1},
64                             {0, 1,                             // Expected original positions
65                              3, 2,
66                              4, 5, 6,
67                              8, 7, 9,
68                              12, 11, 13, 10,
69                              15, 14, 16, 17,
70                              21, 19, 20, 18,
71                              22, 23, 25, 24});
72 }
73 
TEST(Objective,RankSegmentSorterSingleGroupTest)74 TEST(Objective, RankSegmentSorterSingleGroupTest) {
75   RankSegmentSorterTestImpl({0, 7},                  // Groups
76                             {6, 1, 4, 3, 0, 5, 2},   // Labels
77                             {6, 5, 4, 3, 2, 1, 0},   // Expected sorted labels
78                             {0, 5, 2, 3, 6, 1, 4});  // Expected original positions
79 }
80 
TEST(Objective,RankSegmentSorterAscendingTest)81 TEST(Objective, RankSegmentSorterAscendingTest) {
82   RankSegmentSorterTestImpl<uint32_t, thrust::less<uint32_t>>(
83                                                     {0, 4, 7},    // Groups
84                                                     {3, 1, 4, 2,  // Labels
85                                                      6, 5, 7},
86                                                     {1, 2, 3, 4,  // Expected sorted labels
87                                                      5, 6, 7},
88                                                     {1, 3, 0, 2,  // Expected original positions
89                                                      5, 4, 6});
90 }
91 
92 using CountFunctor = uint32_t (*)(const int *, uint32_t, int);
RankItemCountImpl(const std::vector<int> & sorted_items,CountFunctor f,int find_val,uint32_t exp_val)93 void RankItemCountImpl(const std::vector<int> &sorted_items, CountFunctor f,
94                        int find_val, uint32_t exp_val) {
95   EXPECT_NE(std::find(sorted_items.begin(), sorted_items.end(), find_val), sorted_items.end());
96   EXPECT_EQ(f(&sorted_items[0], sorted_items.size(), find_val), exp_val);
97 }
98 
TEST(Objective,RankItemCountOnLeft)99 TEST(Objective, RankItemCountOnLeft) {
100   // Items sorted descendingly
101   std::vector<int> sorted_items{10, 10, 6, 4, 4, 4, 4, 1, 1, 1, 1, 1, 0};
102   RankItemCountImpl(sorted_items, &xgboost::obj::CountNumItemsToTheLeftOf,
103                     10, static_cast<uint32_t>(0));
104   RankItemCountImpl(sorted_items, &xgboost::obj::CountNumItemsToTheLeftOf,
105                     6, static_cast<uint32_t>(2));
106   RankItemCountImpl(sorted_items, &xgboost::obj::CountNumItemsToTheLeftOf,
107                     4, static_cast<uint32_t>(3));
108   RankItemCountImpl(sorted_items, &xgboost::obj::CountNumItemsToTheLeftOf,
109                     1, static_cast<uint32_t>(7));
110   RankItemCountImpl(sorted_items, &xgboost::obj::CountNumItemsToTheLeftOf,
111                     0, static_cast<uint32_t>(12));
112 }
113 
TEST(Objective,RankItemCountOnRight)114 TEST(Objective, RankItemCountOnRight) {
115   // Items sorted descendingly
116   std::vector<int> sorted_items{10, 10, 6, 4, 4, 4, 4, 1, 1, 1, 1, 1, 0};
117   RankItemCountImpl(sorted_items, &xgboost::obj::CountNumItemsToTheRightOf,
118                     10, static_cast<uint32_t>(11));
119   RankItemCountImpl(sorted_items, &xgboost::obj::CountNumItemsToTheRightOf,
120                     6, static_cast<uint32_t>(10));
121   RankItemCountImpl(sorted_items, &xgboost::obj::CountNumItemsToTheRightOf,
122                     4, static_cast<uint32_t>(6));
123   RankItemCountImpl(sorted_items, &xgboost::obj::CountNumItemsToTheRightOf,
124                     1, static_cast<uint32_t>(1));
125   RankItemCountImpl(sorted_items, &xgboost::obj::CountNumItemsToTheRightOf,
126                     0, static_cast<uint32_t>(0));
127 }
128 
TEST(Objective,NDCGLambdaWeightComputerTest)129 TEST(Objective, NDCGLambdaWeightComputerTest) {
130   std::vector<float> hlabels = {3.1f, 1.2f, 2.3f, 4.4f,        // Labels
131                                 7.8f, 5.01f, 6.96f,
132                                 10.3f, 8.7f, 11.4f, 9.45f, 11.4f};
133   dh::device_vector<bst_float> dlabels(hlabels);
134 
135   auto segment_label_sorter = RankSegmentSorterTestImpl<float>(
136     {0, 4, 7, 12},                  // Groups
137     hlabels,
138     {4.4f, 3.1f, 2.3f, 1.2f,        // Expected sorted labels
139      7.8f, 6.96f, 5.01f,
140      11.4f, 11.4f, 10.3f, 9.45f, 8.7f},
141     {3, 0, 2, 1,                    // Expected original positions
142      4, 6, 5,
143      9, 11, 7, 10, 8});
144 
145   // Created segmented predictions for the labels from above
146   std::vector<bst_float> hpreds{-9.78f, 24.367f, 0.908f, -11.47f,
147                                 -1.03f, -2.79f, -3.1f,
148                                 104.22f, 103.1f, -101.7f, 100.5f, 45.1f};
149   dh::device_vector<bst_float> dpreds(hpreds);
150 
151   xgboost::obj::NDCGLambdaWeightComputer ndcg_lw_computer(dpreds.data().get(),
152                                                           dlabels.data().get(),
153                                                           *segment_label_sorter);
154 
155   // Where will the predictions move from its current position, if they were sorted
156   // descendingly?
157   auto dsorted_pred_pos = ndcg_lw_computer.GetPredictionSorter().GetIndexableSortedPositionsSpan();
158   std::vector<uint32_t> hsorted_pred_pos(segment_label_sorter->GetNumItems());
159   dh::CopyDeviceSpanToVector(&hsorted_pred_pos, dsorted_pred_pos);
160   std::vector<uint32_t> expected_sorted_pred_pos{2, 0, 1, 3,
161                                                  4, 5, 6,
162                                                  7, 8, 11, 9, 10};
163   EXPECT_EQ(expected_sorted_pred_pos, hsorted_pred_pos);
164 
165   // Check group DCG values
166   std::vector<float> hgroup_dcgs(segment_label_sorter->GetNumGroups());
167   dh::CopyDeviceSpanToVector(&hgroup_dcgs, ndcg_lw_computer.GetGroupDcgsSpan());
168   std::vector<uint32_t> hgroups(segment_label_sorter->GetNumGroups() + 1);
169   dh::CopyDeviceSpanToVector(&hgroups, segment_label_sorter->GetGroupsSpan());
170   EXPECT_EQ(hgroup_dcgs.size(), segment_label_sorter->GetNumGroups());
171   std::vector<float> hsorted_labels(segment_label_sorter->GetNumItems());
172   dh::CopyDeviceSpanToVector(&hsorted_labels, segment_label_sorter->GetItemsSpan());
173   for (auto i = 0; i < hgroup_dcgs.size(); ++i) {
174     // Compute group DCG value on CPU and compare
175     auto gbegin = hgroups[i];
176     auto gend = hgroups[i + 1];
177     EXPECT_NEAR(
178       hgroup_dcgs[i],
179       xgboost::obj::NDCGLambdaWeightComputer::ComputeGroupDCGWeight(&hsorted_labels[gbegin],
180                                                                     gend - gbegin),
181       0.01f);
182   }
183 }
184 
TEST(Objective,IndexableSortedItemsTest)185 TEST(Objective, IndexableSortedItemsTest) {
186   std::vector<float> hlabels = {3.1f, 1.2f, 2.3f, 4.4f,        // Labels
187                                 7.8f, 5.01f, 6.96f,
188                                 10.3f, 8.7f, 11.4f, 9.45f, 11.4f};
189   dh::device_vector<bst_float> dlabels(hlabels);
190 
191   auto segment_label_sorter = RankSegmentSorterTestImpl<float>(
192     {0, 4, 7, 12},                  // Groups
193     hlabels,
194     {4.4f, 3.1f, 2.3f, 1.2f,        // Expected sorted labels
195      7.8f, 6.96f, 5.01f,
196      11.4f, 11.4f, 10.3f, 9.45f, 8.7f},
197     {3, 0, 2, 1,                    // Expected original positions
198      4, 6, 5,
199      9, 11, 7, 10, 8});
200 
201   segment_label_sorter->CreateIndexableSortedPositions();
202   std::vector<uint32_t> sorted_indices(segment_label_sorter->GetNumItems());
203   dh::CopyDeviceSpanToVector(&sorted_indices,
204                              segment_label_sorter->GetIndexableSortedPositionsSpan());
205   std::vector<uint32_t> expected_sorted_indices = {
206     1, 3, 2, 0,
207     4, 6, 5,
208     9, 11, 7, 10, 8};
209   EXPECT_EQ(expected_sorted_indices, sorted_indices);
210 }
211 
TEST(Objective,ComputeAndCompareMAPStatsTest)212 TEST(Objective, ComputeAndCompareMAPStatsTest) {
213   std::vector<float> hlabels = {3.1f, 0.0f, 2.3f, 4.4f,        // Labels
214                                 0.0f, 5.01f, 0.0f,
215                                 10.3f, 0.0f, 11.4f, 9.45f, 11.4f};
216   dh::device_vector<bst_float> dlabels(hlabels);
217 
218   auto segment_label_sorter = RankSegmentSorterTestImpl<float>(
219     {0, 4, 7, 12},                  // Groups
220     hlabels,
221     {4.4f, 3.1f, 2.3f, 0.0f,        // Expected sorted labels
222      5.01f, 0.0f, 0.0f,
223      11.4f, 11.4f, 10.3f, 9.45f, 0.0f},
224     {3, 0, 2, 1,                    // Expected original positions
225      5, 4, 6,
226      9, 11, 7, 10, 8});
227 
228   // Create MAP stats on the device first using the objective
229   std::vector<bst_float> hpreds{-9.78f, 24.367f, 0.908f, -11.47f,
230                                 -1.03f, -2.79f, -3.1f,
231                                 104.22f, 103.1f, -101.7f, 100.5f, 45.1f};
232   dh::device_vector<bst_float> dpreds(hpreds);
233 
234   xgboost::obj::MAPLambdaWeightComputer map_lw_computer(dpreds.data().get(),
235                                                         dlabels.data().get(),
236                                                         *segment_label_sorter);
237 
238   // Get the device MAP stats on host
239   std::vector<xgboost::obj::MAPLambdaWeightComputer::MAPStats> dmap_stats(
240     segment_label_sorter->GetNumItems());
241   dh::CopyDeviceSpanToVector(&dmap_stats, map_lw_computer.GetMapStatsSpan());
242 
243   // Compute the MAP stats on host next to compare
244   std::vector<uint32_t> hgroups(segment_label_sorter->GetNumGroups() + 1);
245   dh::CopyDeviceSpanToVector(&hgroups, segment_label_sorter->GetGroupsSpan());
246 
247   for (auto i = 0; i < hgroups.size() - 1; ++i) {
248     auto gbegin = hgroups[i];
249     auto gend = hgroups[i + 1];
250     std::vector<xgboost::obj::ListEntry> lst_entry;
251     for (auto j = gbegin; j < gend; ++j) {
252       lst_entry.emplace_back(hpreds[j], hlabels[j], j);
253     }
254     std::stable_sort(lst_entry.begin(), lst_entry.end(), xgboost::obj::ListEntry::CmpPred);
255 
256     // Compute the MAP stats with this list and compare with the ones computed on the device
257     std::vector<xgboost::obj::MAPLambdaWeightComputer::MAPStats> hmap_stats;
258     xgboost::obj::MAPLambdaWeightComputer::GetMAPStats(lst_entry, &hmap_stats);
259     for (auto j = gbegin; j < gend; ++j) {
260       EXPECT_EQ(dmap_stats[j].hits, hmap_stats[j - gbegin].hits);
261       EXPECT_NEAR(dmap_stats[j].ap_acc, hmap_stats[j - gbegin].ap_acc, 0.01f);
262       EXPECT_NEAR(dmap_stats[j].ap_acc_miss, hmap_stats[j - gbegin].ap_acc_miss, 0.01f);
263       EXPECT_NEAR(dmap_stats[j].ap_acc_add, hmap_stats[j - gbegin].ap_acc_add, 0.01f);
264     }
265   }
266 }
267 
268 }  // namespace xgboost
269