1 #ifndef VIENNACL_LINALG_CUDA_NMF_OPERATIONS_HPP_
2 #define VIENNACL_LINALG_CUDA_NMF_OPERATIONS_HPP_
3 
4 /* =========================================================================
5    Copyright (c) 2010-2016, Institute for Microelectronics,
6                             Institute for Analysis and Scientific Computing,
7                             TU Wien.
8    Portions of this software are copyright by UChicago Argonne, LLC.
9 
10                             -----------------
11                   ViennaCL - The Vienna Computing Library
12                             -----------------
13 
14    Project Head:    Karl Rupp                   rupp@iue.tuwien.ac.at
15 
16    (A list of authors and contributors can be found in the manual)
17 
18    License:         MIT (X11), see file LICENSE in the base directory
19 ============================================================================= */
20 
21 /** @file viennacl/linalg/cuda/vector_operations.hpp
22  @brief Implementations of NMF operations using CUDA
23  */
24 
25 #include "viennacl/linalg/host_based/nmf_operations.hpp"
26 
27 #include "viennacl/linalg/cuda/common.hpp"
28 
29 namespace viennacl
30 {
31 namespace linalg
32 {
33 namespace cuda
34 {
35 
36 /** @brief Main CUDA kernel for nonnegative matrix factorization of a dense matrices. */
37 template<typename NumericT>
el_wise_mul_div(NumericT * matrix1,NumericT const * matrix2,NumericT const * matrix3,unsigned int size)38 __global__ void el_wise_mul_div(NumericT       * matrix1,
39                                 NumericT const * matrix2,
40                                 NumericT const * matrix3,
41                                 unsigned int size)
42 {
43   for (unsigned int i = blockIdx.x * blockDim.x + threadIdx.x; i < size; i +=gridDim.x * blockDim.x)
44   {
45     NumericT val = matrix1[i] * matrix2[i];
46     NumericT divisor = matrix3[i];
47     matrix1[i] = (divisor > (NumericT) 0.00001) ? (val / divisor) : NumericT(0);
48   }
49 }
50 
51 /** @brief The nonnegative matrix factorization (approximation) algorithm as suggested by Lee and Seung. Factorizes a matrix V with nonnegative entries into matrices W and H such that ||V - W*H|| is minimized.
52  *
53  * @param V     Input matrix
54  * @param W     First factor
55  * @param H     Second factor
56  * @param conf  A configuration object holding tolerances and the like
57  */
58 template<typename NumericT>
nmf(viennacl::matrix_base<NumericT> const & V,viennacl::matrix_base<NumericT> & W,viennacl::matrix_base<NumericT> & H,viennacl::linalg::nmf_config const & conf)59 void nmf(viennacl::matrix_base<NumericT> const & V,
60          viennacl::matrix_base<NumericT> & W,
61          viennacl::matrix_base<NumericT> & H,
62          viennacl::linalg::nmf_config const & conf)
63 {
64   vcl_size_t k = W.size2();
65   conf.iters_ = 0;
66 
67   if (!viennacl::linalg::norm_frobenius(W))
68     W = viennacl::scalar_matrix<NumericT>(W.size1(), W.size2(), NumericT(1.0));
69 
70   if (!viennacl::linalg::norm_frobenius(H))
71     H = viennacl::scalar_matrix<NumericT>(H.size1(), H.size2(), NumericT(1.0));
72 
73   viennacl::matrix_base<NumericT> wn(V.size1(), k, W.row_major());
74   viennacl::matrix_base<NumericT> wd(V.size1(), k, W.row_major());
75   viennacl::matrix_base<NumericT> wtmp(V.size1(), V.size2(), W.row_major());
76 
77   viennacl::matrix_base<NumericT> hn(k, V.size2(), H.row_major());
78   viennacl::matrix_base<NumericT> hd(k, V.size2(), H.row_major());
79   viennacl::matrix_base<NumericT> htmp(k, k, H.row_major());
80 
81   viennacl::matrix_base<NumericT> appr(V.size1(), V.size2(), V.row_major());
82 
83   viennacl::vector<NumericT> diff(V.size1() * V.size2());
84 
85   NumericT last_diff = 0;
86   NumericT diff_init = 0;
87   bool stagnation_flag = false;
88 
89   for (vcl_size_t i = 0; i < conf.max_iterations(); i++)
90   {
91     conf.iters_ = i + 1;
92 
93     hn = viennacl::linalg::prod(trans(W), V);
94     htmp = viennacl::linalg::prod(trans(W), W);
95     hd = viennacl::linalg::prod(htmp, H);
96 
97     el_wise_mul_div<<<128, 128>>>(viennacl::cuda_arg<NumericT>(H),
98                                   viennacl::cuda_arg<NumericT>(hn),
99                                   viennacl::cuda_arg<NumericT>(hd),
100                                   static_cast<unsigned int>(H.internal_size1() * H.internal_size2()));
101     VIENNACL_CUDA_LAST_ERROR_CHECK("el_wise_mul_div");
102 
103     wn   = viennacl::linalg::prod(V, trans(H));
104     wtmp = viennacl::linalg::prod(W, H);
105     wd   = viennacl::linalg::prod(wtmp, trans(H));
106 
107     el_wise_mul_div<<<128, 128>>>(viennacl::cuda_arg<NumericT>(W),
108                                   viennacl::cuda_arg<NumericT>(wn),
109                                   viennacl::cuda_arg<NumericT>(wd),
110                                   static_cast<unsigned int>( W.internal_size1() * W.internal_size2()));
111     VIENNACL_CUDA_LAST_ERROR_CHECK("el_wise_mul_div");
112 
113     if (i % conf.check_after_steps() == 0)  //check for convergence
114     {
115       appr = viennacl::linalg::prod(W, H);
116 
117       appr -= V;
118       NumericT diff_val = viennacl::linalg::norm_frobenius(appr);
119 
120       if (i == 0)
121         diff_init = diff_val;
122 
123       if (conf.print_relative_error())
124         std::cout << diff_val / diff_init << std::endl;
125 
126       // Approximation check
127       if (diff_val / diff_init < conf.tolerance())
128         break;
129 
130       // Stagnation check
131       if (std::fabs(diff_val - last_diff) / (diff_val * conf.check_after_steps()) < conf.stagnation_tolerance()) //avoid situations where convergence stagnates
132       {
133         if (stagnation_flag)  // iteration stagnates (two iterates with no notable progress)
134           break;
135         else
136           // record stagnation in this iteration
137           stagnation_flag = true;
138       } else
139         // good progress in this iteration, so unset stagnation flag
140         stagnation_flag = false;
141 
142       // prepare for next iterate:
143       last_diff = diff_val;
144     }
145   }
146 }
147 
148 } //namespace cuda
149 } //namespace linalg
150 } //namespace viennacl
151 
152 #endif /* VIENNACL_LINALG_CUDA_NMF_OPERATIONS_HPP_ */
153