1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10 #define EIGEN_TEST_NO_LONGDOUBLE
11 #define EIGEN_TEST_FUNC cxx11_tensor_complex_cwise_ops
12 #define EIGEN_USE_GPU
13
14 #if defined __CUDACC_VER__ && __CUDACC_VER__ >= 70500
15 #include <cuda_fp16.h>
16 #endif
17 #include "main.h"
18 #include <unsupported/Eigen/CXX11/Tensor>
19
20 using Eigen::Tensor;
21
22 template<typename T>
test_cuda_complex_cwise_ops()23 void test_cuda_complex_cwise_ops() {
24 const int kNumItems = 2;
25 std::size_t complex_bytes = kNumItems * sizeof(std::complex<T>);
26
27 std::complex<T>* d_in1;
28 std::complex<T>* d_in2;
29 std::complex<T>* d_out;
30 cudaMalloc((void**)(&d_in1), complex_bytes);
31 cudaMalloc((void**)(&d_in2), complex_bytes);
32 cudaMalloc((void**)(&d_out), complex_bytes);
33
34 Eigen::CudaStreamDevice stream;
35 Eigen::GpuDevice gpu_device(&stream);
36
37 Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in1(
38 d_in1, kNumItems);
39 Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in2(
40 d_in2, kNumItems);
41 Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_out(
42 d_out, kNumItems);
43
44 const std::complex<T> a(3.14f, 2.7f);
45 const std::complex<T> b(-10.6f, 1.4f);
46
47 gpu_in1.device(gpu_device) = gpu_in1.constant(a);
48 gpu_in2.device(gpu_device) = gpu_in2.constant(b);
49
50 enum CwiseOp {
51 Add = 0,
52 Sub,
53 Mul,
54 Div
55 };
56
57 Tensor<std::complex<T>, 1, 0, int> actual(kNumItems);
58 for (int op = Add; op <= Div; op++) {
59 std::complex<T> expected;
60 switch (static_cast<CwiseOp>(op)) {
61 case Add:
62 gpu_out.device(gpu_device) = gpu_in1 + gpu_in2;
63 expected = a + b;
64 break;
65 case Sub:
66 gpu_out.device(gpu_device) = gpu_in1 - gpu_in2;
67 expected = a - b;
68 break;
69 case Mul:
70 gpu_out.device(gpu_device) = gpu_in1 * gpu_in2;
71 expected = a * b;
72 break;
73 case Div:
74 gpu_out.device(gpu_device) = gpu_in1 / gpu_in2;
75 expected = a / b;
76 break;
77 }
78 assert(cudaMemcpyAsync(actual.data(), d_out, complex_bytes, cudaMemcpyDeviceToHost,
79 gpu_device.stream()) == cudaSuccess);
80 assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
81
82 for (int i = 0; i < kNumItems; ++i) {
83 VERIFY_IS_APPROX(actual(i), expected);
84 }
85 }
86
87 cudaFree(d_in1);
88 cudaFree(d_in2);
89 cudaFree(d_out);
90 }
91
92
test_cxx11_tensor_complex_cwise_ops()93 void test_cxx11_tensor_complex_cwise_ops()
94 {
95 CALL_SUBTEST(test_cuda_complex_cwise_ops<float>());
96 CALL_SUBTEST(test_cuda_complex_cwise_ops<double>());
97 }
98