1 /**
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8 #include <faiss/Clustering.h>
9 #include <faiss/gpu/GpuIndexFlat.h>
10 #include <faiss/gpu/StandardGpuResources.h>
11 #include <faiss/gpu/perf/IndexWrapper.h>
12 #include <faiss/gpu/utils/DeviceUtils.h>
13 #include <faiss/gpu/utils/Timer.h>
14 #include <faiss/utils/random.h>
15 #include <gflags/gflags.h>
16 #include <memory>
17 #include <vector>
18
19 #include <cuda_profiler_api.h>
20
21 DEFINE_int32(num, 10000, "# of vecs");
22 DEFINE_int32(k, 100, "# of clusters");
23 DEFINE_int32(dim, 128, "# of dimensions");
24 DEFINE_int32(niter, 10, "# of iterations");
25 DEFINE_bool(L2_metric, true, "If true, use L2 metric. If false, use IP metric");
26 DEFINE_bool(use_float16, false, "use float16 vectors and math");
27 DEFINE_bool(transposed, false, "transposed vector storage");
28 DEFINE_bool(verbose, false, "turn on clustering logging");
29 DEFINE_int64(seed, -1, "specify random seed");
30 DEFINE_int32(num_gpus, 1, "number of gpus to use");
31 DEFINE_int64(
32 min_paging_size,
33 -1,
34 "minimum size to use CPU -> GPU paged copies");
35 DEFINE_int64(pinned_mem, -1, "pinned memory allocation to use");
36 DEFINE_int32(max_points, -1, "max points per centroid");
37
38 using namespace faiss::gpu;
39
main(int argc,char ** argv)40 int main(int argc, char** argv) {
41 gflags::ParseCommandLineFlags(&argc, &argv, true);
42
43 cudaProfilerStop();
44
45 auto seed = FLAGS_seed != -1L ? FLAGS_seed : time(nullptr);
46 printf("using seed %ld\n", seed);
47
48 std::vector<float> vecs((size_t)FLAGS_num * FLAGS_dim);
49 faiss::float_rand(vecs.data(), vecs.size(), seed);
50
51 printf("K-means metric %s dim %d centroids %d num train %d niter %d\n",
52 FLAGS_L2_metric ? "L2" : "IP",
53 FLAGS_dim,
54 FLAGS_k,
55 FLAGS_num,
56 FLAGS_niter);
57 printf("float16 math %s\n", FLAGS_use_float16 ? "enabled" : "disabled");
58 printf("transposed storage %s\n",
59 FLAGS_transposed ? "enabled" : "disabled");
60 printf("verbose %s\n", FLAGS_verbose ? "enabled" : "disabled");
61
62 auto initFn = [](faiss::gpu::GpuResourcesProvider* res,
63 int dev) -> std::unique_ptr<faiss::gpu::GpuIndexFlat> {
64 if (FLAGS_pinned_mem >= 0) {
65 ((faiss::gpu::StandardGpuResources*)res)
66 ->setPinnedMemory(FLAGS_pinned_mem);
67 }
68
69 GpuIndexFlatConfig config;
70 config.device = dev;
71 config.useFloat16 = FLAGS_use_float16;
72 config.storeTransposed = FLAGS_transposed;
73
74 auto p = std::unique_ptr<faiss::gpu::GpuIndexFlat>(
75 FLAGS_L2_metric
76 ? (faiss::gpu::GpuIndexFlat*)new faiss::gpu::
77 GpuIndexFlatL2(res, FLAGS_dim, config)
78 : (faiss::gpu::GpuIndexFlat*)new faiss::gpu::
79 GpuIndexFlatIP(res, FLAGS_dim, config));
80
81 if (FLAGS_min_paging_size >= 0) {
82 p->setMinPagingSize(FLAGS_min_paging_size);
83 }
84 return p;
85 };
86
87 IndexWrapper<faiss::gpu::GpuIndexFlat> gpuIndex(FLAGS_num_gpus, initFn);
88
89 CUDA_VERIFY(cudaProfilerStart());
90 faiss::gpu::synchronizeAllDevices();
91
92 float gpuTime = 0.0f;
93
94 faiss::ClusteringParameters cp;
95 cp.niter = FLAGS_niter;
96 cp.verbose = FLAGS_verbose;
97
98 if (FLAGS_max_points > 0) {
99 cp.max_points_per_centroid = FLAGS_max_points;
100 }
101
102 faiss::Clustering kmeans(FLAGS_dim, FLAGS_k, cp);
103
104 // Time k-means
105 {
106 CpuTimer timer;
107
108 kmeans.train(FLAGS_num, vecs.data(), *(gpuIndex.getIndex()));
109
110 // There is a device -> host copy above, so no need to time
111 // additional synchronization with the GPU
112 gpuTime = timer.elapsedMilliseconds();
113 }
114
115 CUDA_VERIFY(cudaProfilerStop());
116 printf("k-means time %.3f ms\n", gpuTime);
117
118 CUDA_VERIFY(cudaDeviceSynchronize());
119
120 return 0;
121 }
122