1 /*
2  * Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * * Redistributions of source code must retain the above copyright
8  *   notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  *   notice, this list of conditions and the following disclaimer in the
11  *   documentation and/or other materials provided with the distribution.
12  * * Neither the name of NVIDIA CORPORATION nor the names of its
13  *   contributors may be used to endorse or promote products derived
14  *   from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30   Extended example for building on-the-fly kernels with C interface.
31   Simple examples demonstrating different ways to load source code
32     and call kernels.
33  */
34 
35 
36 #include "AxB_dot3_cuda_tests.hpp"
37 #include "gtest/gtest.h"
38 
39 
40 //int main(int argc, char* argv[]) {
41 #if __cplusplus >= 201103L
42 
43 //#define TEST_RESULT(result) (result ? "PASSED" : "FAILED")
44 //std::cout << "Running tests..."<<std::endl;
45 
46 /*
47 TEST(MergePathDot, PlusTimesffd){
48   bool test_spdot_plus_times_ffd_nu = test_spdotfactoryUM<float,float,double>(256, 32,120,"PLUS_TIMES");
49   EXPECT_EQ( true, test_spdot_plus_times_ffd_nu);
50 }
51 
52 TEST(MergePathDot, PlusTimesffdLarge) {
53   bool test_spdot_plus_times_ffd_lrg_nu = test_spdotfactoryUM<float,float,double>(4096, 256,256,"PLUS_TIMES");
54   EXPECT_EQ(true, test_spdot_plus_times_ffd_lrg_nu);
55 }
56 
57 TEST(MergePathDot, PlusTimesfff) {
58   bool test_spdot_plus_times_fff = test_spdotfactoryUM<float,float,float>(256, 32,32,"PLUS_TIMES");
59   EXPECT_EQ(true, test_spdot_plus_times_fff);
60 }
61 
62 TEST(MergePathDot, PlusTimeffdTiny) {
63   bool test_spdot_plus_times_ffd = test_spdotfactoryUM<float,float,double>(256, 32,32,"PLUS_TIMES");
64   EXPECT_EQ(true, test_spdot_plus_times_ffd);
65 }
66 
67 TEST(VSVSDot, PlusTimesfff) {
68   bool test_spdot_batch_fff = test_spdot_batch_factoryUM<float, float, float>(5, 32, 128, 128, "PLUS_TIMES");
69   EXPECT_EQ( true, test_spdot_batch_fff);
70 }
71 
72 TEST(VSVSDot, PlusTimesiii) {
73   bool test_spdot_batch_iii = test_spdot_batch_factoryUM<int, int, int>(5, 32, 128, 128, "PLUS_TIMES");
74   EXPECT_EQ( true, test_spdot_batch_iii);
75 }
76 
77 
78 
79 //  bool test_spdot_batch_fff = test_spdot_batch_factoryUM<float, float, float>(5, 32, 128, 128, "PLUS_TIMES");
80 
81   cudaSetDevice(0);
82   cudaDeviceReset();
83   bool test_spdot_batch_iii = test_spdot_batch_factoryUM<int, int, int>(5, 32, 128, 128, "PLUS_TIMES");
84   std::cout << "test_spdot_batchUM<int,int,int> uncached:       "
85             << TEST_RESULT(test_spdot_batch_iii)
86             << std::endl;
87 
88   cudaSetDevice(1);
89   cudaDeviceReset();
90 
91   bool test_spdot_batch_iii2= test_spdot_batch_factoryUM<int, int, int64_t>(5, 32, 256, 128, "PLUS_TIMES");
92   std::cout << "test_spdot_batchUM<int,int,int64> uncached:       "
93             << TEST_RESULT(test_spdot_batch_iii2)
94             << std::endl;
95 
96 
97 
98 
99 
100   bool test_dot_min_plus_iil = test_dotfactoryUM<int,int,long>(4096,"MIN_PLUS");
101   std::cout << "test_dotfactoryUM<int,int,long> uncached:       "
102             << TEST_RESULT(test_dot_min_plus_iil)
103             << std::endl;
104 
105   bool test_dot_min_plus_ffd = test_dotfactoryUM<float,float,double>(4096,"MIN_PLUS");
106   std::cout << "test_dotfactoryUM<float,float,double> uncached:       "
107             << TEST_RESULT(test_dot_min_plus_ffd)
108             << std::endl;
109 
110   bool test_dot_plus_times_ffd = test_dotfactoryUM<float,float,double>(4096,"PLUS_TIMES");
111   std::cout << "test_dotfactoryUM<float,float,double> uncached:       "
112             << TEST_RESULT(test_dot_plus_times_ffd)
113             << std::endl;
114 
115   bool test_dot_plus_times_fii = test_dotfactoryUM<float,int,int>(4096,"PLUS_TIMES");
116   std::cout << "test_dotfactoryUM<float,int,int> uncached:       "
117             << TEST_RESULT(test_dot_plus_times_fii)
118             << std::endl;
119 
120   bool test_dot_plus_times_iil = test_dotfactoryUM<int,int,long>(4096,"PLUS_TIMES");
121   std::cout << "test_dotfactoryUM<int,int,long> uncached:       "
122             << TEST_RESULT(test_dot_plus_times_iil)
123             << std::endl;
124 
125   bool test_reducefactory_float_result = test_reducefactoryUM<float>(4096, "PLUS");
126   std::cout << "test_reducefactoryUM<float> uncached:       "
127             << TEST_RESULT(test_reducefactory_float_result)
128             << std::endl;
129 
130   bool test_reducefactory_double_plus_result = test_reducefactoryUM<double>(4096, "PLUS");
131   std::cout << "test_reducefactoryUM<double> uncached:       "
132             << TEST_RESULT(test_reducefactory_double_plus_result)
133             << std::endl;
134 
135   std::cout << "testing cached kernel" <<std::endl;
136   bool test2_reducefactory_double_plus_result = test_reducefactoryUM<double>(4096, "PLUS");
137   std::cout << "test_reducefactoryUM<double> cached:       "
138             << TEST_RESULT(test2_reducefactory_double_plus_result)
139             << std::endl;
140 
141   bool test_reducefactory_float_min_result = test_reducefactoryUM<float>(32,"MIN");
142   std::cout << "test_reducefactoryUM<float> MIN uncached:       "
143             << TEST_RESULT(test_reducefactory_float_min_result)
144             << std::endl;
145 
146   bool test_reducefactory_int_min_result = test_reducefactoryUM<int>(32,"MIN");
147   std::cout << "test_reducefactoryUM<int> MIN uncached:       "
148             << TEST_RESULT(test_reducefactory_int_min_result)
149             << std::endl;
150 
151   bool test_reducefactory_int_max_result = test_reducefactoryUM<int>(32,"MAX");
152   std::cout << "test_reducefactoryUM<int> MAX uncached:       "
153             << TEST_RESULT(test_reducefactory_int_max_result)
154             << std::endl;
155 
156   bool test_reducefactory_int_result = test_reducefactoryUM<int>(4096,"PLUS");
157   std::cout << "test_reducefactoryUM<int> PLUS uncached:       "
158             << TEST_RESULT(test_reducefactory_int_result)
159             << std::endl;
160 
161   bool test_reducefactory_int_cache_result =
162                 test_reducefactoryUM<int>(4096,"PLUS");
163   std::cout << "test_reducefactoryUM<int> PLUS cached:          "
164             << TEST_RESULT(test_reducefactory_int_cache_result)
165             << std::endl;
166 */
167 #endif
168