1 // This file is part of Eigen, a lightweight C++ template library
2 // for linear algebra.
3 //
4 // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog@gmail.com>
5 //
6 // This Source Code Form is subject to the terms of the Mozilla
7 // Public License v. 2.0. If a copy of the MPL was not distributed
8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10 #define EIGEN_TEST_NO_LONGDOUBLE
11 #define EIGEN_TEST_FUNC cxx11_tensor_complex_cwise_ops
12 #define EIGEN_USE_GPU
13
14 #include "main.h"
15 #include <unsupported/Eigen/CXX11/Tensor>
16
17 using Eigen::Tensor;
18
19 template<typename T>
test_cuda_complex_cwise_ops()20 void test_cuda_complex_cwise_ops() {
21 const int kNumItems = 2;
22 std::size_t complex_bytes = kNumItems * sizeof(std::complex<T>);
23
24 std::complex<T>* d_in1;
25 std::complex<T>* d_in2;
26 std::complex<T>* d_out;
27 cudaMalloc((void**)(&d_in1), complex_bytes);
28 cudaMalloc((void**)(&d_in2), complex_bytes);
29 cudaMalloc((void**)(&d_out), complex_bytes);
30
31 Eigen::CudaStreamDevice stream;
32 Eigen::GpuDevice gpu_device(&stream);
33
34 Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in1(
35 d_in1, kNumItems);
36 Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_in2(
37 d_in2, kNumItems);
38 Eigen::TensorMap<Eigen::Tensor<std::complex<T>, 1, 0, int>, Eigen::Aligned> gpu_out(
39 d_out, kNumItems);
40
41 const std::complex<T> a(3.14f, 2.7f);
42 const std::complex<T> b(-10.6f, 1.4f);
43
44 gpu_in1.device(gpu_device) = gpu_in1.constant(a);
45 gpu_in2.device(gpu_device) = gpu_in2.constant(b);
46
47 enum CwiseOp {
48 Add = 0,
49 Sub,
50 Mul,
51 Div
52 };
53
54 Tensor<std::complex<T>, 1, 0, int> actual(kNumItems);
55 for (int op = Add; op <= Div; op++) {
56 std::complex<T> expected;
57 switch (static_cast<CwiseOp>(op)) {
58 case Add:
59 gpu_out.device(gpu_device) = gpu_in1 + gpu_in2;
60 expected = a + b;
61 break;
62 case Sub:
63 gpu_out.device(gpu_device) = gpu_in1 - gpu_in2;
64 expected = a - b;
65 break;
66 case Mul:
67 gpu_out.device(gpu_device) = gpu_in1 * gpu_in2;
68 expected = a * b;
69 break;
70 case Div:
71 gpu_out.device(gpu_device) = gpu_in1 / gpu_in2;
72 expected = a / b;
73 break;
74 }
75 assert(cudaMemcpyAsync(actual.data(), d_out, complex_bytes, cudaMemcpyDeviceToHost,
76 gpu_device.stream()) == cudaSuccess);
77 assert(cudaStreamSynchronize(gpu_device.stream()) == cudaSuccess);
78
79 for (int i = 0; i < kNumItems; ++i) {
80 VERIFY_IS_APPROX(actual(i), expected);
81 }
82 }
83
84 cudaFree(d_in1);
85 cudaFree(d_in2);
86 cudaFree(d_out);
87 }
88
89
test_cxx11_tensor_complex_cwise_ops()90 void test_cxx11_tensor_complex_cwise_ops()
91 {
92 CALL_SUBTEST(test_cuda_complex_cwise_ops<float>());
93 CALL_SUBTEST(test_cuda_complex_cwise_ops<double>());
94 }
95