1 #section kernels
2 
3 #kernel triu_kernel : size, size, size, *:
4 #include "cluda.h"
5 
triu_kernel(const ga_size nthreads,const ga_size ncols,const ga_size a_off,GLOBAL_MEM DTYPE_INPUT_0 * a)6 KERNEL void triu_kernel(const ga_size nthreads, const ga_size ncols,
7                         const ga_size a_off, GLOBAL_MEM DTYPE_INPUT_0 *a) {
8   a = (GLOBAL_MEM DTYPE_INPUT_0 *)(((char *)a) + a_off);
9   // grid stride looping
10   for (ga_size index = GID_0 * LDIM_0 + LID_0; index < nthreads;
11        index += LDIM_0 * GDIM_0) {
12     const ga_size ix = index / ncols;
13     const ga_size iy = index % ncols;
14     if (ix > iy) {
15       a[index] = 0.0;
16     }
17   }
18 }
19 
20 #section init_code
21 
22 setup_ext_cuda();
23 
24 #section support_code
25 
pygpu_narrow(PyGpuArrayObject * src,size_t dim,size_t size)26 static PyGpuArrayObject *pygpu_narrow(PyGpuArrayObject *src, size_t dim,
27                                       size_t size) {
28   PyGpuArrayObject *src_view = pygpu_view(src, Py_None);
29   src_view->ga.dimensions[dim] = size;
30   GpuArray_fix_flags(&src_view->ga);
31   return pygpu_copy(src_view, GA_C_ORDER);
32 }
33 
34 #section support_code_struct
35 
APPLY_SPECIFIC(magma_qr)36 int APPLY_SPECIFIC(magma_qr)(PyGpuArrayObject *A_,
37                              PyGpuArrayObject **R,
38                              PyGpuArrayObject **Q, // may be NULL
39                              PARAMS_TYPE* params) {
40   PyGpuArrayObject *A = NULL;
41   magma_int_t M, N, K, nb, ldwork;
42   size_t n2;
43   float *tau_data = NULL;
44   gpudata *work_data = NULL;
45   int res = -1, info;
46   A = A_;
47 
48   if (A->ga.typecode != GA_FLOAT) {
49     PyErr_SetString(PyExc_TypeError,
50                     "GpuMagmaQR: Unsupported data type");
51     return -1;
52   }
53 
54   // This is early to match the exit() in the fail label.
55   cuda_enter(params->context->ctx);
56 
57   if (!GpuArray_IS_C_CONTIGUOUS(&A->ga)) {
58     PyErr_SetString(PyExc_ValueError,
59                     "GpuMagmaQR: requires data to be C-contiguous");
60     goto fail;
61   }
62   if (PyGpuArray_NDIM(A) != 2) {
63     PyErr_SetString(PyExc_ValueError, "GpuMagmaQR: matrix rank error");
64     goto fail;
65   }
66 
67   A = pygpu_copy(A_, GA_F_ORDER);
68   if (A == NULL) {
69     PyErr_SetString(PyExc_RuntimeError,
70                     "GpuMagmaQR: failed to change to column-major order");
71     goto fail;
72   }
73 
74   // magma matrix qr
75   M = PyGpuArray_DIM(A, 0);
76   N = PyGpuArray_DIM(A, 1);
77   K = M < N ? M : N;
78 
79   if (MAGMA_SUCCESS != magma_smalloc_pinned(&tau_data, N * N)) {
80     PyErr_SetString(PyExc_RuntimeError,
81                     "GpuMagmaQR: failed to allocate working memory");
82     goto fail;
83   }
84 
85   nb = magma_get_sgeqrf_nb(M, N);
86   ldwork = (2 * K + magma_roundup(N, 32)) * nb;
87   work_data = gpudata_alloc(params->context->ctx, ldwork * sizeof(float), NULL, 0, NULL);
88   if (work_data == NULL) {
89     PyErr_SetString(PyExc_RuntimeError,
90                     "GpuMagmaQR: failed to allocate working memory");
91     goto fail;
92   }
93 
94   // compute R
95   magma_sgeqrf2_gpu(M, N, (float *)PyGpuArray_DEV_DATA(A), M, tau_data, &info);
96   if (info != 0) {
97     PyErr_Format(
98         PyExc_RuntimeError,
99         "GpuMagmaQR: magma_sgeqrf2_gpu argument %d has an illegal value", -info);
100     goto fail;
101   }
102   *R = pygpu_narrow(A, 0, K);
103   if (*R == NULL) {
104     PyErr_SetString(PyExc_RuntimeError, "GpuMagmaQR: failed to narrow array");
105     goto fail;
106   }
107   n2 = K * N;
108   res = triu_kernel_scall(1, &n2, 0, n2, N, (*R)->ga.offset, (*R)->ga.data);
109   if (res != GA_NO_ERROR) {
110     PyErr_Format(PyExc_RuntimeError, "GpuMagmaQR: triu_kernel %s.",
111                  GpuKernel_error(&k_triu_kernel, res));
112     goto fail;
113   }
114 
115   if (params->complete) {
116     // compute Q
117     Py_XDECREF(A);
118     A = pygpu_copy(A_, GA_F_ORDER);
119     if (A == NULL) {
120       PyErr_SetString(PyExc_RuntimeError,
121                       "GpuMagmaQR: failed to change to column-major order");
122       return -1;
123     }
124     magma_sgeqrf_gpu(M, N, (float *)PyGpuArray_DEV_DATA(A), M, tau_data,
125                      *(float **)work_data, &info);
126     if (info != 0) {
127       PyErr_Format(
128                    PyExc_RuntimeError,
129                    "GpuMagmaQR: magma_sgeqrf_gpu argument %d has an illegal value", -info);
130       goto fail;
131     }
132 
133     magma_sorgqr_gpu(M, K, K, (float *)PyGpuArray_DEV_DATA(A), M, tau_data,
134                      *(float **)work_data, nb, &info);
135     if (info != 0) {
136       PyErr_Format(
137                    PyExc_RuntimeError,
138                    "GpuMagmaQR: magma_sorgqr_gpu argument %d has an illegal value", -info);
139       goto fail;
140     }
141     *Q = pygpu_narrow(A, 1, K);
142     if (*Q == NULL) {
143       PyErr_SetString(PyExc_RuntimeError, "GpuMagmaQR: failed to narrow array");
144       goto fail;
145     }
146   }
147   res = 0;
148 fail:
149   if (tau_data != NULL)
150     magma_free_pinned(tau_data);
151   if (work_data != NULL)
152     gpudata_release(work_data);
153   Py_XDECREF(A);
154   cuda_exit(params->context->ctx);
155   return res;
156 }
157