1 #section init_code
2
3 setup_ext_cuda();
4
5 #section support_code_struct
6
APPLY_SPECIFIC(magma_svd)7 int APPLY_SPECIFIC(magma_svd)(PyGpuArrayObject *A,
8 PyGpuArrayObject **S,
9 PyGpuArrayObject **U, // may be NULL
10 PyGpuArrayObject **VT, // may be NULL
11 PARAMS_TYPE *params) {
12 bool compute_uv = (U != NULL);
13 magma_int_t *iwork = NULL, iunused[1];
14 magma_int_t M, N, K, ldu, ldv, M_U, N_VT, info;
15 magma_vec_t jobz;
16 size_t s_dims[1], u_dims[2], vt_dims[2];
17 float *a_data = NULL, *s_data = NULL, *u_data = NULL, *vt_data = NULL,
18 *work = NULL;
19 float dummy[1];
20 int res = -1, lwork;
21
22 if (A->ga.typecode != GA_FLOAT) {
23 PyErr_SetString(PyExc_TypeError,
24 "GpuMagmaSVD: Unsupported data type");
25 return -1;
26 }
27
28 // This is early to match the exit() in the fail label.
29 cuda_enter(params->context->ctx);
30
31 if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) {
32 PyErr_SetString(PyExc_ValueError,
33 "GpuMagmaSVD: requires data to be C-contiguous");
34 goto fail;
35 }
36 if (PyGpuArray_NDIM(A) != 2) {
37 PyErr_SetString(PyExc_ValueError,
38 "GpuMagmaSVD: matrix rank error");
39 goto fail;
40 }
41
42 // magma matrix svd
43 // reverse dimensions because MAGMA expects column-major matrices:
44 M = PyGpuArray_DIM(A, 1);
45 N = PyGpuArray_DIM(A, 0);
46 K = M < N ? M : N;
47
48 if (MAGMA_SUCCESS != magma_smalloc_pinned(&a_data, M * N)) {
49 PyErr_SetString(PyExc_RuntimeError,
50 "GpuMagmaSVD: failed to allocate memory");
51 goto fail;
52 }
53 cudaMemcpy(a_data, PyGpuArray_DEV_DATA(A), M * N * sizeof(float),
54 cudaMemcpyDeviceToDevice);
55
56 if (MAGMA_SUCCESS != magma_smalloc_pinned(&s_data, K)) {
57 PyErr_SetString(PyExc_RuntimeError,
58 "GpuMagmaSVD: failed to allocate memory");
59 goto fail;
60 }
61
62 if (compute_uv) {
63 if (params->full_matrices) {
64 jobz = MagmaAllVec;
65 } else {
66 jobz = MagmaSomeVec;
67 }
68 M_U = (jobz == MagmaAllVec ? M : K);
69 N_VT = (jobz == MagmaAllVec ? N : K);
70 ldu = M;
71 ldv = N_VT;
72
73 if (MAGMA_SUCCESS != magma_smalloc_pinned(&u_data, M_U * M)) {
74 PyErr_SetString(PyExc_RuntimeError,
75 "GpuMagmaSVD: failed to allocate memory");
76 goto fail;
77 }
78 if (MAGMA_SUCCESS != magma_smalloc_pinned(&vt_data, N * N_VT)) {
79 PyErr_SetString(PyExc_RuntimeError,
80 "GpuMagmaSVD: failed to allocate memory");
81 goto fail;
82 }
83 } else {
84 jobz = MagmaNoVec;
85 ldu = M;
86 ldv = N;
87 }
88
89 // query for workspace size
90 magma_sgesdd(jobz, M, N, NULL, M, NULL, NULL, ldu, NULL, ldv,
91 dummy, -1, iunused, &info);
92
93 lwork = (magma_int_t) MAGMA_S_REAL(dummy[0]);
94 if (MAGMA_SUCCESS != magma_smalloc_pinned(&work, lwork)) {
95 PyErr_SetString(PyExc_RuntimeError,
96 "GpuMagmaSVD: failed to allocate working memory");
97 goto fail;
98 }
99
100 if (MAGMA_SUCCESS != magma_imalloc_cpu(&iwork, 8*K)) {
101 PyErr_SetString(PyExc_RuntimeError,
102 "GpuMagmaSVD: failed to allocate working memory");
103 goto fail;
104 }
105
106 // compute svd
107 magma_sgesdd(jobz, M, N, a_data, M, s_data,
108 u_data, ldu, vt_data, ldv, work, lwork, iwork, &info);
109 if (info > 0) {
110 PyErr_Format(
111 PyExc_RuntimeError,
112 "GpuMagmaSVD: the updating process of SBDSDC did not converge (error: %d)",
113 info);
114 goto fail;
115 } else if (info < 0) {
116 PyErr_Format(
117 PyExc_RuntimeError,
118 "GpuMagmaSVD: magma_sgesdd_gpu argument %d has an illegal value", -info);
119 goto fail;
120 }
121
122 s_dims[0] = K;
123 if (theano_prep_output(S, 1, s_dims, A->ga.typecode, GA_C_ORDER, params->context) != 0){
124 PyErr_SetString(PyExc_RuntimeError,
125 "GpuMagmaSVD: failed to allocate memory");
126 goto fail;
127 }
128 cudaMemcpy(PyGpuArray_DEV_DATA(*S), s_data, K * sizeof(float),
129 cudaMemcpyDeviceToDevice);
130
131 if (compute_uv) {
132 u_dims[0] = N; u_dims[1] = N_VT;
133 if (theano_prep_output(U, 2, u_dims, A->ga.typecode, GA_C_ORDER, params->context) != 0){
134 PyErr_SetString(PyExc_RuntimeError,
135 "GpuMagmaSVD: failed to allocate memory");
136 goto fail;
137 }
138 // magma expects column-major matrices. Exchange u_data -> VT and vt_data -> U
139 // to match numpy.linalg.svd output
140 cudaMemcpy(PyGpuArray_DEV_DATA(*U), vt_data, N * N_VT * sizeof(float),
141 cudaMemcpyDeviceToDevice);
142
143 vt_dims[0] = M_U; vt_dims[1] = M;
144 if (theano_prep_output(VT, 2, vt_dims, A->ga.typecode, GA_C_ORDER, params->context) != 0){
145 PyErr_SetString(PyExc_RuntimeError,
146 "GpuMagmaSVD: failed to allocate memory");
147 goto fail;
148 }
149 // magma expects column-major matrices. Exchange u_data -> VT and vt_data -> U
150 // to match numpy.linalg.svd output
151 cudaMemcpy(PyGpuArray_DEV_DATA(*VT), u_data, M_U * M * sizeof(float),
152 cudaMemcpyDeviceToDevice);
153 }
154 res = 0;
155 fail:
156 if (a_data != NULL)
157 magma_free_pinned(a_data);
158 if (s_data != NULL)
159 magma_free_pinned(s_data);
160 if (u_data != NULL)
161 magma_free_pinned(u_data);
162 if (vt_data != NULL)
163 magma_free_pinned(vt_data);
164 if (work != NULL)
165 magma_free_pinned(work);
166 if (iwork != NULL)
167 magma_free_cpu(iwork);
168 cuda_exit(params->context->ctx);
169 return res;
170 }
171