1 /* ************************************************************************
2  * Copyright 2013 Advanced Micro Devices, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  * ************************************************************************/
16 
17 #include <sys/types.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <math.h>
21 
22 /* Include CLBLAS header. It automatically includes needed OpenCL header,
23  * so we can drop out explicit inclusion of cl.h header.
24  */
25 #include <clBLAS.h>
26 
27 /* This example uses predefined matrices and their characteristics for
28  * simplicity purpose.
29  */
30 static const size_t N = 7;
31 static cl_float X[] = {
32     1,
33     2,
34     -11,
35     17,
36     5,
37     6,
38     800,
39     10
40 };
41 static const int incx = 1;
42 static cl_uint indexMax;
43 
44 int
main(void)45 main(void)
46 {
47     cl_int err;
48     cl_platform_id platform = 0;
49     cl_device_id device = 0;
50     cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
51     cl_context ctx = 0;
52     cl_command_queue queue = 0;
53     cl_mem bufX, scratchBuf, iMax;
54     cl_event event = NULL;
55     int ret = 0;
56 	int lenX = 1 + (N-1)*abs(incx);
57     int lenScratchBuf = N;
58 
59     /* Setup OpenCL environment. */
60     err = clGetPlatformIDs(1, &platform, NULL);
61     if (err != CL_SUCCESS) {
62         printf( "clGetPlatformIDs() failed with %d\n", err );
63         return 1;
64     }
65 
66     err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
67     if (err != CL_SUCCESS) {
68         printf( "clGetDeviceIDs() failed with %d\n", err );
69         return 1;
70     }
71 
72     props[1] = (cl_context_properties)platform;
73     ctx = clCreateContext(props, 1, &device, NULL, NULL, &err);
74     if (err != CL_SUCCESS) {
75         printf( "clCreateContext() failed with %d\n", err );
76         return 1;
77     }
78 
79     queue = clCreateCommandQueue(ctx, device, 0, &err);
80     if (err != CL_SUCCESS) {
81         printf( "clCreateCommandQueue() failed with %d\n", err );
82         clReleaseContext(ctx);
83         return 1;
84     }
85 
86     /* Setup clblas. */
87     err = clblasSetup();
88     if (err != CL_SUCCESS) {
89         printf("clblasSetup() failed with %d\n", err);
90         clReleaseCommandQueue(queue);
91         clReleaseContext(ctx);
92         return 1;
93     }
94     /* Prepare OpenCL memory objects and place matrices inside them. */
95     bufX = clCreateBuffer(ctx, CL_MEM_READ_ONLY, (lenX*sizeof(cl_float)), NULL, &err);
96 
97     // Allocate minimum of (N/64) elements. But here allocating N elements for the sake of simplicity
98     scratchBuf = clCreateBuffer(ctx, CL_MEM_READ_WRITE, (lenScratchBuf*sizeof(cl_float) * 2), NULL, &err);
99 
100     // Buffer to return the index of max absolute value in X
101     iMax = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, sizeof(cl_uint), NULL, &err);
102 
103     err = clEnqueueWriteBuffer(queue, bufX, CL_TRUE, 0, (lenX*sizeof(cl_float)) , X, 0, NULL, NULL);
104 
105     /* Call clblas function. */
106     err = clblasiSamax( N, iMax, 0, bufX, 0, incx, scratchBuf,
107                                     1, &queue, 0, NULL, &event);
108     if (err != CL_SUCCESS) {
109         printf("clblasiSamax() failed with %d\n", err);
110         ret = 1;
111     }
112     else {
113         /* Wait for calculations to be finished. */
114         err = clWaitForEvents(1, &event);
115 
116         /* Fetch results of calculations from GPU memory. */
117         err = clEnqueueReadBuffer(queue, iMax, CL_TRUE, 0, sizeof(cl_uint),
118                                     &indexMax, 0, NULL, NULL);
119         printf("Result amax: %d\n", indexMax);
120     }
121 
122     /* Release OpenCL events. */
123     clReleaseEvent(event);
124 
125     /* Release OpenCL memory objects. */
126     clReleaseMemObject(bufX);
127     clReleaseMemObject(scratchBuf);
128     clReleaseMemObject(iMax);
129 
130     /* Finalize work with clblas. */
131     clblasTeardown();
132 
133     /* Release OpenCL working objects. */
134     clReleaseCommandQueue(queue);
135     clReleaseContext(ctx);
136 
137     return ret;
138 }
139