1 #ifndef VIENNACL_LINALG_CUDA_NMF_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_CUDA_NMF_OPERATIONS_HPP_
3
4 /* =========================================================================
5 Copyright (c) 2010-2016, Institute for Microelectronics,
6 Institute for Analysis and Scientific Computing,
7 TU Wien.
8 Portions of this software are copyright by UChicago Argonne, LLC.
9
10 -----------------
11 ViennaCL - The Vienna Computing Library
12 -----------------
13
14 Project Head: Karl Rupp rupp@iue.tuwien.ac.at
15
16 (A list of authors and contributors can be found in the manual)
17
18 License: MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20
21 /** @file viennacl/linalg/cuda/vector_operations.hpp
22 @brief Implementations of NMF operations using CUDA
23 */
24
25 #include "viennacl/linalg/host_based/nmf_operations.hpp"
26
27 #include "viennacl/linalg/cuda/common.hpp"
28
29 namespace viennacl
30 {
31 namespace linalg
32 {
33 namespace cuda
34 {
35
36 /** @brief Main CUDA kernel for nonnegative matrix factorization of a dense matrices. */
37 template<typename NumericT>
el_wise_mul_div(NumericT * matrix1,NumericT const * matrix2,NumericT const * matrix3,unsigned int size)38 __global__ void el_wise_mul_div(NumericT * matrix1,
39 NumericT const * matrix2,
40 NumericT const * matrix3,
41 unsigned int size)
42 {
43 for (unsigned int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i +=gridDim.x * blockDim.x)
44 {
45 NumericT val = matrix1[i] * matrix2[i];
46 NumericT divisor = matrix3[i];
47 matrix1[i] = (divisor > (NumericT) 0.00001) ? (val / divisor) : NumericT(0);
48 }
49 }
50
51 /** @brief The nonnegative matrix factorization (approximation) algorithm as suggested by Lee and Seung. Factorizes a matrix V with nonnegative entries into matrices W and H such that ||V - W*H|| is minimized.
52 *
53 * @param V Input matrix
54 * @param W First factor
55 * @param H Second factor
56 * @param conf A configuration object holding tolerances and the like
57 */
58 template<typename NumericT>
nmf(viennacl::matrix_base<NumericT> const & V,viennacl::matrix_base<NumericT> & W,viennacl::matrix_base<NumericT> & H,viennacl::linalg::nmf_config const & conf)59 void nmf(viennacl::matrix_base<NumericT> const & V,
60 viennacl::matrix_base<NumericT> & W,
61 viennacl::matrix_base<NumericT> & H,
62 viennacl::linalg::nmf_config const & conf)
63 {
64 vcl_size_t k = W.size2();
65 conf.iters_ = 0;
66
67 if (!viennacl::linalg::norm_frobenius(W))
68 W = viennacl::scalar_matrix<NumericT>(W.size1(), W.size2(), NumericT(1.0));
69
70 if (!viennacl::linalg::norm_frobenius(H))
71 H = viennacl::scalar_matrix<NumericT>(H.size1(), H.size2(), NumericT(1.0));
72
73 viennacl::matrix_base<NumericT> wn(V.size1(), k, W.row_major());
74 viennacl::matrix_base<NumericT> wd(V.size1(), k, W.row_major());
75 viennacl::matrix_base<NumericT> wtmp(V.size1(), V.size2(), W.row_major());
76
77 viennacl::matrix_base<NumericT> hn(k, V.size2(), H.row_major());
78 viennacl::matrix_base<NumericT> hd(k, V.size2(), H.row_major());
79 viennacl::matrix_base<NumericT> htmp(k, k, H.row_major());
80
81 viennacl::matrix_base<NumericT> appr(V.size1(), V.size2(), V.row_major());
82
83 viennacl::vector<NumericT> diff(V.size1() * V.size2());
84
85 NumericT last_diff = 0;
86 NumericT diff_init = 0;
87 bool stagnation_flag = false;
88
89 for (vcl_size_t i = 0; i < conf.max_iterations(); i++)
90 {
91 conf.iters_ = i + 1;
92
93 hn = viennacl::linalg::prod(trans(W), V);
94 htmp = viennacl::linalg::prod(trans(W), W);
95 hd = viennacl::linalg::prod(htmp, H);
96
97 el_wise_mul_div<<<128, 128>>>(viennacl::cuda_arg<NumericT>(H),
98 viennacl::cuda_arg<NumericT>(hn),
99 viennacl::cuda_arg<NumericT>(hd),
100 static_cast<unsigned int>(H.internal_size1() * H.internal_size2()));
101 VIENNACL_CUDA_LAST_ERROR_CHECK("el_wise_mul_div");
102
103 wn = viennacl::linalg::prod(V, trans(H));
104 wtmp = viennacl::linalg::prod(W, H);
105 wd = viennacl::linalg::prod(wtmp, trans(H));
106
107 el_wise_mul_div<<<128, 128>>>(viennacl::cuda_arg<NumericT>(W),
108 viennacl::cuda_arg<NumericT>(wn),
109 viennacl::cuda_arg<NumericT>(wd),
110 static_cast<unsigned int>( W.internal_size1() * W.internal_size2()));
111 VIENNACL_CUDA_LAST_ERROR_CHECK("el_wise_mul_div");
112
113 if (i % conf.check_after_steps() == 0) //check for convergence
114 {
115 appr = viennacl::linalg::prod(W, H);
116
117 appr -= V;
118 NumericT diff_val = viennacl::linalg::norm_frobenius(appr);
119
120 if (i == 0)
121 diff_init = diff_val;
122
123 if (conf.print_relative_error())
124 std::cout << diff_val / diff_init << std::endl;
125
126 // Approximation check
127 if (diff_val / diff_init < conf.tolerance())
128 break;
129
130 // Stagnation check
131 if (std::fabs(diff_val - last_diff) / (diff_val * conf.check_after_steps()) < conf.stagnation_tolerance()) //avoid situations where convergence stagnates
132 {
133 if (stagnation_flag) // iteration stagnates (two iterates with no notable progress)
134 break;
135 else
136 // record stagnation in this iteration
137 stagnation_flag = true;
138 } else
139 // good progress in this iteration, so unset stagnation flag
140 stagnation_flag = false;
141
142 // prepare for next iterate:
143 last_diff = diff_val;
144 }
145 }
146 }
147
148 } //namespace cuda
149 } //namespace linalg
150 } //namespace viennacl
151
152 #endif /* VIENNACL_LINALG_CUDA_NMF_OPERATIONS_HPP_ */
153