1# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
2# Licensed under the BSD 3-clause license (see LICENSE.txt)
3
4
5#
6# The utility functions for GPU computation
7#
8import numpy as np
9
10from ..util import gpu_init
11
12try:
13    from pycuda.reduction import ReductionKernel
14    from pycuda.elementwise import ElementwiseKernel
15
16    # log|A| for A is a low triangle matrix
17    # logDiagSum(A, A.shape[0]+1)
18    logDiagSum = ReductionKernel(np.float64, neutral="0", reduce_expr="a+b", map_expr="i%step==0?log(x[i]):0", arguments="double *x, int step")
19
20    strideSum = ReductionKernel(np.float64, neutral="0", reduce_expr="a+b", map_expr="i%step==0?x[i]:0", arguments="double *x, int step")
21
22    # np.trace(np.dot(A,B)) (also equivalent to (A*B.T).sum() ) A - a1 x a2, B - a2 x a1
23    traceDot = ReductionKernel(np.float64, neutral="0", reduce_expr="a+b", map_expr="A[i]*B[(i%a1)*a2+i/a1]", arguments="double *A, double *B, int a1, int a2")
24
25    #=======================================================================================
26    # Element-wise functions
27    #=======================================================================================
28
29    # log(X)
30    log = ElementwiseKernel("double *in, double *out", "out[i] = log(in[i])", "log_element")
31
32    # log(1.0-X)
33    logOne = ElementwiseKernel("double *in, double *out", "out[i] = log(1.-in[i])", "logOne_element")
34
35    # multiplication with broadcast on the last dimension (out = shorter[:,None]*longer)
36    mul_bcast = ElementwiseKernel("double *out, double *shorter, double *longer, int shorter_size", "out[i] = longer[i]*shorter[i%shorter_size]", "mul_bcast")
37
38    # multiplication with broadcast on the first dimension (out = shorter[None,:]*longer)
39    mul_bcast_first = ElementwiseKernel("double *out, double *shorter, double *longer, int first_dim", "out[i] = longer[i]*shorter[i/first_dim]", "mul_bcast")
40
41    # sum through the middle dimension (size_2) of a 3D matrix (size_1, size_2, size_3)
42    sum_axis = ElementwiseKernel("double *out, double *in, int size_1, int size_2", "out[i] += sum_axis_element(in, size_1, size_2, i)", "sum_axis",preamble="""
43        __device__ double sum_axis_element(double *in, int size_1, int size_2, int idx)
44        {
45            int k = idx/size_1;
46            int i = idx%size_1;
47            double sum=0;
48            for(int j=0;j<size_2;j++) {
49                sum += in[(k*size_2+j)*size_1+i];
50            }
51            return sum;
52        }
53        """)
54
55    # the outer product between two vectors (out = np.dot(v1,v2.T))
56    outer_prod = ElementwiseKernel("double *out, double *v1, double *v2, int v1_size", "out[i] = v1[i%v1_size]*v2[i/v1_size]", "outer_prod")
57
58    # the outer product between two vectors (out = np.einsum('na,nb->nab',m1,m2) a=dim1, b=dim2 )
59    join_prod = ElementwiseKernel("double *out, double *m1, double *m2, int dim1, int dim2", "out[i] = m1[(i%dim1)*dim1+(i%(dim1*dim2))/dim1]*m2[(i%dim1)*dim1+i/(dim1*dim2)]", "join_prod")
60
61except:
62    pass
63
64
65
66