1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #define EIGEN_TEST_NO_LONGDOUBLE
11 #define EIGEN_TEST_FUNC cxx11_tensor_complex_cwise_ops
12 #define EIGEN_USE_GPU
13 
14 #include "main.h"
15 #include <unsupported/Eigen/CXX11/Tensor>
16 
17 using Eigen::Tensor;
18 
19 template<typename T>
test_cuda_complex_cwise_ops()20 void test_cuda_complex_cwise_ops() {
21   const int kNumItems = 2;
22   std::size_t complex_bytes = kNumItems * sizeof(std::complex<T>);
23 
24   std::complex<T>* d_in1;
25   std::complex<T>* d_in2;
26   std::complex<T>* d_out;
27   cudaMalloc((void**)(&d_in1), complex_bytes);
28   cudaMalloc((void**)(&d_in2), complex_bytes);
29   cudaMalloc((void**)(&d_out), complex_bytes);
30 
31   Eigen::CudaStreamDevice stream;
32   Eigen::GpuDevice gpu_device(&stream);
33 
34   Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in1(
35       d_in1, kNumItems);
36   Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in2(
37       d_in2, kNumItems);
38   Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_out(
39       d_out, kNumItems);
40 
41   const std::complex<T> a(3.14f, 2.7f);
42   const std::complex<T> b(-10.6f, 1.4f);
43 
44   gpu_in1.device(gpu_device) = gpu_in1.constant(a);
45   gpu_in2.device(gpu_device) = gpu_in2.constant(b);
46 
47   enum CwiseOp {
48     Add = 0,
49     Sub,
50     Mul,
51     Div
52   };
53 
54   Tensor<std::complex<T>, 1, 0, int> actual(kNumItems);
55   for (int op = Add; op <= Div; op++) {
56     std::complex<T> expected;
57     switch (static_cast<CwiseOp>(op)) {
58       case Add:
59         gpu_out.device(gpu_device) = gpu_in1 + gpu_in2;
60         expected = a + b;
61         break;
62       case Sub:
63         gpu_out.device(gpu_device) = gpu_in1 - gpu_in2;
64         expected = a - b;
65         break;
66       case Mul:
67         gpu_out.device(gpu_device) = gpu_in1 * gpu_in2;
68         expected = a * b;
69         break;
70       case Div:
71         gpu_out.device(gpu_device) = gpu_in1 / gpu_in2;
72         expected = a / b;
73         break;
74     }
75     assert(cudaMemcpyAsync(actual.data(), d_out, complex_bytes, cudaMemcpyDeviceToHost,
76                            gpu_device.stream()) == cudaSuccess);
77     assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
78 
79     for (int i = 0; i < kNumItems; ++i) {
80       VERIFY_IS_APPROX(actual(i), expected);
81     }
82   }
83 
84   cudaFree(d_in1);
85   cudaFree(d_in2);
86   cudaFree(d_out);
87 }
88 
89 
test_cxx11_tensor_complex_cwise_ops()90 void test_cxx11_tensor_complex_cwise_ops()
91 {
92   CALL_SUBTEST(test_cuda_complex_cwise_ops<float>());
93   CALL_SUBTEST(test_cuda_complex_cwise_ops<double>());
94 }
95