1# Copyright (c) 2012, GPy authors (see AUTHORS.txt). 2# Licensed under the BSD 3-clause license (see LICENSE.txt) 3 4 5# 6# The utility functions for GPU computation 7# 8import numpy as np 9 10from ..util import gpu_init 11 12try: 13 from pycuda.reduction import ReductionKernel 14 from pycuda.elementwise import ElementwiseKernel 15 16 # log|A| for A is a low triangle matrix 17 # logDiagSum(A, A.shape[0]+1) 18 logDiagSum = ReductionKernel(np.float64, neutral="0", reduce_expr="a+b", map_expr="i%step==0?log(x[i]):0", arguments="double *x, int step") 19 20 strideSum = ReductionKernel(np.float64, neutral="0", reduce_expr="a+b", map_expr="i%step==0?x[i]:0", arguments="double *x, int step") 21 22 # np.trace(np.dot(A,B)) (also equivalent to (A*B.T).sum() ) A - a1 x a2, B - a2 x a1 23 traceDot = ReductionKernel(np.float64, neutral="0", reduce_expr="a+b", map_expr="A[i]*B[(i%a1)*a2+i/a1]", arguments="double *A, double *B, int a1, int a2") 24 25 #======================================================================================= 26 # Element-wise functions 27 #======================================================================================= 28 29 # log(X) 30 log = ElementwiseKernel("double *in, double *out", "out[i] = log(in[i])", "log_element") 31 32 # log(1.0-X) 33 logOne = ElementwiseKernel("double *in, double *out", "out[i] = log(1.-in[i])", "logOne_element") 34 35 # multiplication with broadcast on the last dimension (out = shorter[:,None]*longer) 36 mul_bcast = ElementwiseKernel("double *out, double *shorter, double *longer, int shorter_size", "out[i] = longer[i]*shorter[i%shorter_size]", "mul_bcast") 37 38 # multiplication with broadcast on the first dimension (out = shorter[None,:]*longer) 39 mul_bcast_first = ElementwiseKernel("double *out, double *shorter, double *longer, int first_dim", "out[i] = longer[i]*shorter[i/first_dim]", "mul_bcast") 40 41 # sum through the middle dimension (size_2) of a 3D matrix (size_1, size_2, size_3) 42 sum_axis = ElementwiseKernel("double *out, double *in, int size_1, int size_2", "out[i] += sum_axis_element(in, size_1, size_2, i)", "sum_axis",preamble=""" 43 __device__ double sum_axis_element(double *in, int size_1, int size_2, int idx) 44 { 45 int k = idx/size_1; 46 int i = idx%size_1; 47 double sum=0; 48 for(int j=0;j<size_2;j++) { 49 sum += in[(k*size_2+j)*size_1+i]; 50 } 51 return sum; 52 } 53 """) 54 55 # the outer product between two vectors (out = np.dot(v1,v2.T)) 56 outer_prod = ElementwiseKernel("double *out, double *v1, double *v2, int v1_size", "out[i] = v1[i%v1_size]*v2[i/v1_size]", "outer_prod") 57 58 # the outer product between two vectors (out = np.einsum('na,nb->nab',m1,m2) a=dim1, b=dim2 ) 59 join_prod = ElementwiseKernel("double *out, double *m1, double *m2, int dim1, int dim2", "out[i] = m1[(i%dim1)*dim1+(i%(dim1*dim2))/dim1]*m2[(i%dim1)*dim1+i/(dim1*dim2)]", "join_prod") 60 61except: 62 pass 63 64 65 66