1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 
10 #define EIGEN_TEST_NO_LONGDOUBLE
11 #define EIGEN_TEST_FUNC cxx11_tensor_complex_cwise_ops
12 #define EIGEN_USE_GPU
13 
14 #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
15 #include <cuda_fp16.h>
16 #endif
17 #include "main.h"
18 #include <unsupported/Eigen/CXX11/Tensor>
19 
20 using Eigen::Tensor;
21 
22 template<typename T>
test_cuda_complex_cwise_ops()23 void test_cuda_complex_cwise_ops() {
24   const int kNumItems = 2;
25   std::size_t complex_bytes = kNumItems * sizeof(std::complex<T>);
26 
27   std::complex<T>* d_in1;
28   std::complex<T>* d_in2;
29   std::complex<T>* d_out;
30   cudaMalloc((void**)(&d_in1), complex_bytes);
31   cudaMalloc((void**)(&d_in2), complex_bytes);
32   cudaMalloc((void**)(&d_out), complex_bytes);
33 
34   Eigen::CudaStreamDevice stream;
35   Eigen::GpuDevice gpu_device(&stream);
36 
37   Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in1(
38       d_in1, kNumItems);
39   Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in2(
40       d_in2, kNumItems);
41   Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_out(
42       d_out, kNumItems);
43 
44   const std::complex<T> a(3.14f, 2.7f);
45   const std::complex<T> b(-10.6f, 1.4f);
46 
47   gpu_in1.device(gpu_device) = gpu_in1.constant(a);
48   gpu_in2.device(gpu_device) = gpu_in2.constant(b);
49 
50   enum CwiseOp {
51     Add = 0,
52     Sub,
53     Mul,
54     Div
55   };
56 
57   Tensor<std::complex<T>, 1, 0, int> actual(kNumItems);
58   for (int op = Add; op <= Div; op++) {
59     std::complex<T> expected;
60     switch (static_cast<CwiseOp>(op)) {
61       case Add:
62         gpu_out.device(gpu_device) = gpu_in1 + gpu_in2;
63         expected = a + b;
64         break;
65       case Sub:
66         gpu_out.device(gpu_device) = gpu_in1 - gpu_in2;
67         expected = a - b;
68         break;
69       case Mul:
70         gpu_out.device(gpu_device) = gpu_in1 * gpu_in2;
71         expected = a * b;
72         break;
73       case Div:
74         gpu_out.device(gpu_device) = gpu_in1 / gpu_in2;
75         expected = a / b;
76         break;
77     }
78     assert(cudaMemcpyAsync(actual.data(), d_out, complex_bytes, cudaMemcpyDeviceToHost,
79                            gpu_device.stream()) == cudaSuccess);
80     assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
81 
82     for (int i = 0; i < kNumItems; ++i) {
83       VERIFY_IS_APPROX(actual(i), expected);
84     }
85   }
86 
87   cudaFree(d_in1);
88   cudaFree(d_in2);
89   cudaFree(d_out);
90 }
91 
92 
test_cxx11_tensor_complex_cwise_ops()93 void test_cxx11_tensor_complex_cwise_ops()
94 {
95   CALL_SUBTEST(test_cuda_complex_cwise_ops<float>());
96   CALL_SUBTEST(test_cuda_complex_cwise_ops<double>());
97 }
98