1 /* ************************************************************************
2 * Copyright 2013 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 * ************************************************************************/
16
17 #include <sys/types.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <math.h>
21
22 /* Include CLBLAS header. It automatically includes needed OpenCL header,
23 * so we can drop out explicit inclusion of cl.h header.
24 */
25 #include <clBLAS.h>
26
27 /* This example uses predefined matrices and their characteristics for
28 * simplicity purpose.
29 */
30 static const size_t N = 7;
31 static cl_float X[] = {
32 1,
33 2,
34 -11,
35 17,
36 5,
37 6,
38 800,
39 10
40 };
41 static const int incx = 1;
42 static cl_uint indexMax;
43
44 int
main(void)45 main(void)
46 {
47 cl_int err;
48 cl_platform_id platform = 0;
49 cl_device_id device = 0;
50 cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
51 cl_context ctx = 0;
52 cl_command_queue queue = 0;
53 cl_mem bufX, scratchBuf, iMax;
54 cl_event event = NULL;
55 int ret = 0;
56 int lenX = 1 + (N-1)*abs(incx);
57 int lenScratchBuf = N;
58
59 /* Setup OpenCL environment. */
60 err = clGetPlatformIDs(1, &platform, NULL);
61 if (err != CL_SUCCESS) {
62 printf( "clGetPlatformIDs() failed with %d\n", err );
63 return 1;
64 }
65
66 err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
67 if (err != CL_SUCCESS) {
68 printf( "clGetDeviceIDs() failed with %d\n", err );
69 return 1;
70 }
71
72 props[1] = (cl_context_properties)platform;
73 ctx = clCreateContext(props, 1, &device, NULL, NULL, &err);
74 if (err != CL_SUCCESS) {
75 printf( "clCreateContext() failed with %d\n", err );
76 return 1;
77 }
78
79 queue = clCreateCommandQueue(ctx, device, 0, &err);
80 if (err != CL_SUCCESS) {
81 printf( "clCreateCommandQueue() failed with %d\n", err );
82 clReleaseContext(ctx);
83 return 1;
84 }
85
86 /* Setup clblas. */
87 err = clblasSetup();
88 if (err != CL_SUCCESS) {
89 printf("clblasSetup() failed with %d\n", err);
90 clReleaseCommandQueue(queue);
91 clReleaseContext(ctx);
92 return 1;
93 }
94 /* Prepare OpenCL memory objects and place matrices inside them. */
95 bufX = clCreateBuffer(ctx, CL_MEM_READ_ONLY, (lenX*sizeof(cl_float)), NULL, &err);
96
97 // Allocate minimum of (N/64) elements. But here allocating N elements for the sake of simplicity
98 scratchBuf = clCreateBuffer(ctx, CL_MEM_READ_WRITE, (lenScratchBuf*sizeof(cl_float) * 2), NULL, &err);
99
100 // Buffer to return the index of max absolute value in X
101 iMax = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY, sizeof(cl_uint), NULL, &err);
102
103 err = clEnqueueWriteBuffer(queue, bufX, CL_TRUE, 0, (lenX*sizeof(cl_float)) , X, 0, NULL, NULL);
104
105 /* Call clblas function. */
106 err = clblasiSamax( N, iMax, 0, bufX, 0, incx, scratchBuf,
107 1, &queue, 0, NULL, &event);
108 if (err != CL_SUCCESS) {
109 printf("clblasiSamax() failed with %d\n", err);
110 ret = 1;
111 }
112 else {
113 /* Wait for calculations to be finished. */
114 err = clWaitForEvents(1, &event);
115
116 /* Fetch results of calculations from GPU memory. */
117 err = clEnqueueReadBuffer(queue, iMax, CL_TRUE, 0, sizeof(cl_uint),
118 &indexMax, 0, NULL, NULL);
119 printf("Result amax: %d\n", indexMax);
120 }
121
122 /* Release OpenCL events. */
123 clReleaseEvent(event);
124
125 /* Release OpenCL memory objects. */
126 clReleaseMemObject(bufX);
127 clReleaseMemObject(scratchBuf);
128 clReleaseMemObject(iMax);
129
130 /* Finalize work with clblas. */
131 clblasTeardown();
132
133 /* Release OpenCL working objects. */
134 clReleaseCommandQueue(queue);
135 clReleaseContext(ctx);
136
137 return ret;
138 }
139