1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 #include <faiss/Clustering.h>
9 #include <faiss/gpu/GpuIndexFlat.h>
10 #include <faiss/gpu/StandardGpuResources.h>
11 #include <faiss/gpu/perf/IndexWrapper.h>
12 #include <faiss/gpu/utils/DeviceUtils.h>
13 #include <faiss/gpu/utils/Timer.h>
14 #include <faiss/utils/random.h>
15 #include <gflags/gflags.h>
16 #include <memory>
17 #include <vector>
18 
19 #include <cuda_profiler_api.h>
20 
21 DEFINE_int32(num, 10000, "# of vecs");
22 DEFINE_int32(k, 100, "# of clusters");
23 DEFINE_int32(dim, 128, "# of dimensions");
24 DEFINE_int32(niter, 10, "# of iterations");
25 DEFINE_bool(L2_metric, true, "If true, use L2 metric. If false, use IP metric");
26 DEFINE_bool(use_float16, false, "use float16 vectors and math");
27 DEFINE_bool(transposed, false, "transposed vector storage");
28 DEFINE_bool(verbose, false, "turn on clustering logging");
29 DEFINE_int64(seed, -1, "specify random seed");
30 DEFINE_int32(num_gpus, 1, "number of gpus to use");
31 DEFINE_int64(
32         min_paging_size,
33         -1,
34         "minimum size to use CPU -> GPU paged copies");
35 DEFINE_int64(pinned_mem, -1, "pinned memory allocation to use");
36 DEFINE_int32(max_points, -1, "max points per centroid");
37 
38 using namespace faiss::gpu;
39 
main(int argc,char ** argv)40 int main(int argc, char** argv) {
41     gflags::ParseCommandLineFlags(&argc, &argv, true);
42 
43     cudaProfilerStop();
44 
45     auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(nullptr);
46     printf("using seed %ld\n", seed);
47 
48     std::vector<float> vecs((size_t)FLAGS_num * FLAGS_dim);
49     faiss::float_rand(vecs.data(), vecs.size(), seed);
50 
51     printf("K-means metric %s dim %d centroids %d num train %d niter %d\n",
52            FLAGS_L2_metric ? "L2" : "IP",
53            FLAGS_dim,
54            FLAGS_k,
55            FLAGS_num,
56            FLAGS_niter);
57     printf("float16 math %s\n", FLAGS_use_float16 ? "enabled" : "disabled");
58     printf("transposed storage %s\n",
59            FLAGS_transposed ? "enabled" : "disabled");
60     printf("verbose %s\n", FLAGS_verbose ? "enabled" : "disabled");
61 
62     auto initFn = [](faiss::gpu::GpuResourcesProvider* res,
63                      int dev) -> std::unique_ptr<faiss::gpu::GpuIndexFlat> {
64         if (FLAGS_pinned_mem >= 0) {
65             ((faiss::gpu::StandardGpuResources*)res)
66                     ->setPinnedMemory(FLAGS_pinned_mem);
67         }
68 
69         GpuIndexFlatConfig config;
70         config.device = dev;
71         config.useFloat16 = FLAGS_use_float16;
72         config.storeTransposed = FLAGS_transposed;
73 
74         auto p = std::unique_ptr<faiss::gpu::GpuIndexFlat>(
75                 FLAGS_L2_metric
76                         ? (faiss::gpu::GpuIndexFlat*)new faiss::gpu::
77                                   GpuIndexFlatL2(res, FLAGS_dim, config)
78                         : (faiss::gpu::GpuIndexFlat*)new faiss::gpu::
79                                   GpuIndexFlatIP(res, FLAGS_dim, config));
80 
81         if (FLAGS_min_paging_size >= 0) {
82             p->setMinPagingSize(FLAGS_min_paging_size);
83         }
84         return p;
85     };
86 
87     IndexWrapper<faiss::gpu::GpuIndexFlat> gpuIndex(FLAGS_num_gpus, initFn);
88 
89     CUDA_VERIFY(cudaProfilerStart());
90     faiss::gpu::synchronizeAllDevices();
91 
92     float gpuTime = 0.0f;
93 
94     faiss::ClusteringParameters cp;
95     cp.niter = FLAGS_niter;
96     cp.verbose = FLAGS_verbose;
97 
98     if (FLAGS_max_points > 0) {
99         cp.max_points_per_centroid = FLAGS_max_points;
100     }
101 
102     faiss::Clustering kmeans(FLAGS_dim, FLAGS_k, cp);
103 
104     // Time k-means
105     {
106         CpuTimer timer;
107 
108         kmeans.train(FLAGS_num, vecs.data(), *(gpuIndex.getIndex()));
109 
110         // There is a device -> host copy above, so no need to time
111         // additional synchronization with the GPU
112         gpuTime = timer.elapsedMilliseconds();
113     }
114 
115     CUDA_VERIFY(cudaProfilerStop());
116     printf("k-means time %.3f ms\n", gpuTime);
117 
118     CUDA_VERIFY(cudaDeviceSynchronize());
119 
120     return 0;
121 }
122