1 #section init_code
2 
3 setup_ext_cuda();
4 
5 #section support_code_struct
6 
APPLY_SPECIFIC(magma_svd)7 int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A,
8                               PyGpuArrayObject **S,
9                               PyGpuArrayObject **U, // may be NULL
10                               PyGpuArrayObject **VT, // may be NULL
11                               PARAMS_TYPE *params) {
12   bool compute_uv = (U != NULL);
13   magma_int_t *iwork = NULL, iunused[1];
14   magma_int_t M, N, K, ldu, ldv, M_U, N_VT, info;
15   magma_vec_t jobz;
16   size_t s_dims[1], u_dims[2], vt_dims[2];
17   float *a_data = NULL, *s_data = NULL, *u_data = NULL, *vt_data = NULL,
18         *work = NULL;
19   float dummy[1];
20   int res = -1, lwork;
21 
22   if (A->ga.typecode != GA_FLOAT) {
23     PyErr_SetString(PyExc_TypeError,
24                     "GpuMagmaSVD: Unsupported data type");
25     return -1;
26   }
27 
28   // This is early to match the exit() in the fail label.
29   cuda_enter(params->context->ctx);
30 
31   if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) {
32     PyErr_SetString(PyExc_ValueError,
33                     "GpuMagmaSVD: requires data to be C-contiguous");
34     goto fail;
35   }
36   if (PyGpuArray_NDIM(A) != 2) {
37     PyErr_SetString(PyExc_ValueError,
38                     "GpuMagmaSVD: matrix rank error");
39     goto fail;
40   }
41 
42   // magma matrix svd
43   // reverse dimensions because MAGMA expects column-major matrices:
44   M = PyGpuArray_DIM(A, 1);
45   N = PyGpuArray_DIM(A, 0);
46   K = M < N ? M : N;
47 
48   if (MAGMA_SUCCESS !=  magma_smalloc_pinned(&a_data, M * N)) {
49     PyErr_SetString(PyExc_RuntimeError,
50                     "GpuMagmaSVD: failed to allocate memory");
51     goto fail;
52   }
53   cudaMemcpy(a_data, PyGpuArray_DEV_DATA(A), M * N * sizeof(float),
54              cudaMemcpyDeviceToDevice);
55 
56   if (MAGMA_SUCCESS !=  magma_smalloc_pinned(&s_data, K)) {
57     PyErr_SetString(PyExc_RuntimeError,
58                     "GpuMagmaSVD: failed to allocate memory");
59     goto fail;
60   }
61 
62   if (compute_uv) {
63     if (params->full_matrices) {
64       jobz = MagmaAllVec;
65     } else {
66       jobz = MagmaSomeVec;
67     }
68     M_U  = (jobz == MagmaAllVec ? M : K);
69     N_VT = (jobz == MagmaAllVec ? N : K);
70     ldu = M;
71     ldv = N_VT;
72 
73     if (MAGMA_SUCCESS != magma_smalloc_pinned(&u_data, M_U * M)) {
74       PyErr_SetString(PyExc_RuntimeError,
75                       "GpuMagmaSVD: failed to allocate memory");
76       goto fail;
77     }
78     if (MAGMA_SUCCESS != magma_smalloc_pinned(&vt_data, N * N_VT)) {
79       PyErr_SetString(PyExc_RuntimeError,
80                       "GpuMagmaSVD: failed to allocate memory");
81       goto fail;
82     }
83   } else {
84     jobz = MagmaNoVec;
85     ldu = M;
86     ldv = N;
87   }
88 
89   // query for workspace size
90   magma_sgesdd(jobz, M, N, NULL, M, NULL, NULL, ldu, NULL, ldv,
91                dummy, -1, iunused, &info);
92 
93   lwork = (magma_int_t) MAGMA_S_REAL(dummy[0]);
94   if (MAGMA_SUCCESS != magma_smalloc_pinned(&work, lwork)) {
95     PyErr_SetString(PyExc_RuntimeError,
96                     "GpuMagmaSVD: failed to allocate working memory");
97     goto fail;
98   }
99 
100   if (MAGMA_SUCCESS != magma_imalloc_cpu(&iwork, 8*K)) {
101     PyErr_SetString(PyExc_RuntimeError,
102                     "GpuMagmaSVD: failed to allocate working memory");
103     goto fail;
104   }
105 
106   // compute svd
107   magma_sgesdd(jobz, M, N, a_data, M, s_data,
108                u_data, ldu, vt_data, ldv, work, lwork, iwork, &info);
109   if (info > 0) {
110     PyErr_Format(
111         PyExc_RuntimeError,
112         "GpuMagmaSVD: the updating process of SBDSDC did not converge (error: %d)",
113         info);
114     goto fail;
115   } else if (info < 0) {
116     PyErr_Format(
117         PyExc_RuntimeError,
118         "GpuMagmaSVD: magma_sgesdd_gpu argument %d has an illegal value", -info);
119     goto fail;
120   }
121 
122   s_dims[0] = K;
123   if (theano_prep_output(S, 1, s_dims, A->ga.typecode, GA_C_ORDER, params->context) != 0){
124     PyErr_SetString(PyExc_RuntimeError,
125                     "GpuMagmaSVD: failed to allocate memory");
126     goto fail;
127   }
128   cudaMemcpy(PyGpuArray_DEV_DATA(*S), s_data, K * sizeof(float),
129              cudaMemcpyDeviceToDevice);
130 
131   if (compute_uv) {
132     u_dims[0] = N; u_dims[1] = N_VT;
133     if (theano_prep_output(U, 2, u_dims, A->ga.typecode, GA_C_ORDER, params->context) != 0){
134       PyErr_SetString(PyExc_RuntimeError,
135                       "GpuMagmaSVD: failed to allocate memory");
136       goto fail;
137     }
138     // magma expects column-major matrices. Exchange u_data -> VT and vt_data -> U
139     // to match numpy.linalg.svd output
140     cudaMemcpy(PyGpuArray_DEV_DATA(*U), vt_data, N * N_VT * sizeof(float),
141                cudaMemcpyDeviceToDevice);
142 
143     vt_dims[0] = M_U; vt_dims[1] = M;
144     if (theano_prep_output(VT, 2, vt_dims, A->ga.typecode, GA_C_ORDER, params->context) != 0){
145       PyErr_SetString(PyExc_RuntimeError,
146                       "GpuMagmaSVD: failed to allocate memory");
147       goto fail;
148     }
149     // magma expects column-major matrices. Exchange u_data -> VT and vt_data -> U
150     // to match numpy.linalg.svd output
151     cudaMemcpy(PyGpuArray_DEV_DATA(*VT), u_data, M_U * M * sizeof(float),
152                cudaMemcpyDeviceToDevice);
153   }
154   res = 0;
155 fail:
156   if (a_data != NULL)
157     magma_free_pinned(a_data);
158   if (s_data != NULL)
159     magma_free_pinned(s_data);
160   if (u_data != NULL)
161     magma_free_pinned(u_data);
162   if (vt_data != NULL)
163     magma_free_pinned(vt_data);
164   if (work != NULL)
165     magma_free_pinned(work);
166   if (iwork != NULL)
167     magma_free_cpu(iwork);
168   cuda_exit(params->context->ctx);
169   return res;
170 }
171