1 /*
2 Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions
6 are met:
7
8 * Redistributions of source code must retain the above copyright
9 notice, this list of conditions and the following disclaimer.
10 * Redistributions in binary form must reproduce the above copyright
11 notice, this list of conditions and the following disclaimer in the
12 documentation and/or other materials provided with the distribution.
13 * Neither the name of Intel Corporation nor the names of its
14 contributors may be used to endorse or promote products derived
15 from this software without specific prior written permission.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30
31 #include "cean_util.h"
32 #include "offload_common.h"
33
34 // 1. allocate element of CeanReadRanges type
35 // 2. initialized it for reading consequently contiguous ranges
36 // described by "ap" argument
init_read_ranges_arr_desc(const Arr_Desc * ap)37 CeanReadRanges * init_read_ranges_arr_desc(const Arr_Desc *ap)
38 {
39 CeanReadRanges * res;
40
41 // find the max contiguous range
42 int64_t rank = ap->rank - 1;
43 int64_t length = ap->dim[rank].size;
44 for (; rank >= 0; rank--) {
45 if (ap->dim[rank].stride == 1) {
46 length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1);
47 if (rank > 0 && length != ap->dim[rank - 1].size) {
48 break;
49 }
50 }
51 else {
52 break;
53 }
54 }
55
56 res =(CeanReadRanges *)malloc(sizeof(CeanReadRanges) +
57 (ap->rank - rank) * sizeof(CeanReadDim));
58 if (res == NULL)
59 LIBOFFLOAD_ERROR(c_malloc);
60
61 res->arr_desc = const_cast<Arr_Desc*>(ap);
62 res->current_number = 0;
63 res->range_size = length;
64 res->last_noncont_ind = rank;
65
66 // calculate number of contiguous ranges inside noncontiguous dimensions
67 int count = 1;
68 bool prev_is_cont = true;
69 int64_t offset = 0;
70
71 for (; rank >= 0; rank--) {
72 res->Dim[rank].count = count;
73 res->Dim[rank].size = ap->dim[rank].stride * ap->dim[rank].size;
74 count *= (prev_is_cont && ap->dim[rank].stride == 1? 1 :
75 (ap->dim[rank].upper - ap->dim[rank].lower +
76 ap->dim[rank].stride) / ap->dim[rank].stride);
77 prev_is_cont = false;
78 offset +=(ap->dim[rank].lower - ap->dim[rank].lindex) *
79 ap->dim[rank].size;
80 }
81 res->range_max_number = count;
82 res -> ptr = (void*)ap->base;
83 res -> init_offset = offset;
84 return res;
85 }
86
87 // check if ranges described by 1 argument could be transferred into ranges
88 // described by 2-nd one
cean_ranges_match(CeanReadRanges * read_rng1,CeanReadRanges * read_rng2)89 bool cean_ranges_match(
90 CeanReadRanges * read_rng1,
91 CeanReadRanges * read_rng2
92 )
93 {
94 return ( read_rng1 == NULL || read_rng2 == NULL ||
95 (read_rng1->range_size % read_rng2->range_size == 0 ||
96 read_rng2->range_size % read_rng1->range_size == 0));
97 }
98
99 // Set next offset and length and returns true for next range.
100 // Returns false if the ranges are over.
get_next_range(CeanReadRanges * read_rng,int64_t * offset)101 bool get_next_range(
102 CeanReadRanges * read_rng,
103 int64_t *offset
104 )
105 {
106 if (++read_rng->current_number > read_rng->range_max_number) {
107 read_rng->current_number = 0;
108 return false;
109 }
110 int rank = 0;
111 int num = read_rng->current_number - 1;
112 int64_t cur_offset = 0;
113 int num_loc;
114 for (; rank <= read_rng->last_noncont_ind; rank++) {
115 num_loc = num / read_rng->Dim[rank].count;
116 cur_offset += num_loc * read_rng->Dim[rank].size;
117 num = num % read_rng->Dim[rank].count;
118 }
119 *offset = cur_offset + read_rng->init_offset;
120 return true;
121 }
122
is_arr_desc_contiguous(const Arr_Desc * ap)123 bool is_arr_desc_contiguous(const Arr_Desc *ap)
124 {
125 int64_t rank = ap->rank - 1;
126 int64_t length = ap->dim[rank].size;
127 for (; rank >= 0; rank--) {
128 if (ap->dim[rank].stride > 1 &&
129 ap->dim[rank].upper - ap->dim[rank].lower != 0) {
130 return false;
131 }
132 else if (length != ap->dim[rank].size) {
133 for (; rank >= 0; rank--) {
134 if (ap->dim[rank].upper - ap->dim[rank].lower != 0) {
135 return false;
136 }
137 }
138 return true;
139 }
140 length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1);
141 }
142 return true;
143 }
144
cean_get_transf_size(CeanReadRanges * read_rng)145 int64_t cean_get_transf_size(CeanReadRanges * read_rng)
146 {
147 return(read_rng->range_max_number * read_rng->range_size);
148 }
149
150 static uint64_t last_left, last_right;
151
152 typedef void (*fpp)(
153 const char *spaces,
154 uint64_t low,
155 uint64_t high,
156 int esize,
157 bool print_values
158 );
159
generate_one_range(const char * spaces,uint64_t lrange,uint64_t rrange,fpp fp,int esize,bool print_values)160 static void generate_one_range(
161 const char *spaces,
162 uint64_t lrange,
163 uint64_t rrange,
164 fpp fp,
165 int esize,
166 bool print_values
167 )
168 {
169 OFFLOAD_TRACE(3,
170 "%s generate_one_range(lrange=%p, rrange=%p, esize=%d)\n",
171 spaces, (void*)lrange, (void*)rrange, esize);
172 if (last_left == -1) {
173 // First range
174 last_left = lrange;
175 }
176 else {
177 if (lrange == last_right+1) {
178 // Extend previous range, don't print
179 }
180 else {
181 (*fp)(spaces, last_left, last_right, esize, print_values);
182 last_left = lrange;
183 }
184 }
185 last_right = rrange;
186 }
187
element_is_contiguous(uint64_t rank,const struct Dim_Desc * ddp)188 static bool element_is_contiguous(
189 uint64_t rank,
190 const struct Dim_Desc *ddp
191 )
192 {
193 if (rank == 1) {
194 return (ddp[0].lower == ddp[0].upper || ddp[0].stride == 1);
195 }
196 else {
197 return ((ddp[0].size == (ddp[1].upper-ddp[1].lower+1)*ddp[1].size) &&
198 element_is_contiguous(rank-1, ddp++));
199 }
200 }
201
generate_mem_ranges_one_rank(const char * spaces,uint64_t base,uint64_t rank,const struct Dim_Desc * ddp,fpp fp,int esize,bool print_values)202 static void generate_mem_ranges_one_rank(
203 const char *spaces,
204 uint64_t base,
205 uint64_t rank,
206 const struct Dim_Desc *ddp,
207 fpp fp,
208 int esize,
209 bool print_values
210 )
211 {
212 uint64_t lindex = ddp->lindex;
213 uint64_t lower = ddp->lower;
214 uint64_t upper = ddp->upper;
215 uint64_t stride = ddp->stride;
216 uint64_t size = ddp->size;
217 OFFLOAD_TRACE(3,
218 "%s "
219 "generate_mem_ranges_one_rank(base=%p, rank=%lld, lindex=%lld, "
220 "lower=%lld, upper=%lld, stride=%lld, size=%lld, esize=%d)\n",
221 spaces, (void*)base, rank, lindex, lower, upper, stride, size, esize);
222
223 if (element_is_contiguous(rank, ddp)) {
224 uint64_t lrange, rrange;
225 lrange = base + (lower-lindex)*size;
226 rrange = lrange + (upper-lower+1)*size - 1;
227 generate_one_range(spaces, lrange, rrange, fp, esize, print_values);
228 }
229 else {
230 if (rank == 1) {
231 for (int i=lower-lindex; i<=upper-lindex; i+=stride) {
232 uint64_t lrange, rrange;
233 lrange = base + i*size;
234 rrange = lrange + size - 1;
235 generate_one_range(spaces, lrange, rrange,
236 fp, esize, print_values);
237 }
238 }
239 else {
240 for (int i=lower-lindex; i<=upper-lindex; i+=stride) {
241 generate_mem_ranges_one_rank(
242 spaces, base+i*size, rank-1, ddp+1,
243 fp, esize, print_values);
244
245 }
246 }
247 }
248 }
249
generate_mem_ranges(const char * spaces,const Arr_Desc * adp,bool deref,fpp fp,bool print_values)250 static void generate_mem_ranges(
251 const char *spaces,
252 const Arr_Desc *adp,
253 bool deref,
254 fpp fp,
255 bool print_values
256 )
257 {
258 uint64_t esize;
259
260 OFFLOAD_TRACE(3,
261 "%s "
262 "generate_mem_ranges(adp=%p, deref=%d, fp)\n",
263 spaces, adp, deref);
264 last_left = -1;
265 last_right = -2;
266
267 // Element size is derived from last dimension
268 esize = adp->dim[adp->rank-1].size;
269
270 generate_mem_ranges_one_rank(
271 // For c_cean_var the base addr is the address of the data
272 // For c_cean_var_ptr the base addr is dereferenced to get to the data
273 spaces, deref ? *((uint64_t*)(adp->base)) : adp->base,
274 adp->rank, &adp->dim[0], fp, esize, print_values);
275 (*fp)(spaces, last_left, last_right, esize, print_values);
276 }
277
278 // returns offset and length of the data to be transferred
__arr_data_offset_and_length(const Arr_Desc * adp,int64_t & offset,int64_t & length)279 void __arr_data_offset_and_length(
280 const Arr_Desc *adp,
281 int64_t &offset,
282 int64_t &length
283 )
284 {
285 int64_t rank = adp->rank - 1;
286 int64_t size = adp->dim[rank].size;
287 int64_t r_off = 0; // offset from right boundary
288
289 // find the rightmost dimension which takes just part of its
290 // range. We define it if the size of left rank is not equal
291 // the range's length between upper and lower boungaries
292 while (rank > 0) {
293 size *= (adp->dim[rank].upper - adp->dim[rank].lower + 1);
294 if (size != adp->dim[rank - 1].size) {
295 break;
296 }
297 rank--;
298 }
299
300 offset = (adp->dim[rank].lower - adp->dim[rank].lindex) *
301 adp->dim[rank].size;
302
303 // find gaps both from the left - offset and from the right - r_off
304 for (rank--; rank >= 0; rank--) {
305 offset += (adp->dim[rank].lower - adp->dim[rank].lindex) *
306 adp->dim[rank].size;
307 r_off += adp->dim[rank].size -
308 (adp->dim[rank + 1].upper - adp->dim[rank + 1].lindex + 1) *
309 adp->dim[rank + 1].size;
310 }
311 length = (adp->dim[0].upper - adp->dim[0].lindex + 1) *
312 adp->dim[0].size - offset - r_off;
313 }
314
315 #if OFFLOAD_DEBUG > 0
316
print_range(const char * spaces,uint64_t low,uint64_t high,int esize,bool print_values)317 static void print_range(
318 const char *spaces,
319 uint64_t low,
320 uint64_t high,
321 int esize,
322 bool print_values
323 )
324 {
325 char buffer[1024];
326 char number[32];
327
328 OFFLOAD_TRACE(3, "%s print_range(low=%p, high=%p, esize=%d)\n",
329 spaces, (void*)low, (void*)high, esize);
330
331 if (console_enabled < 4 || !print_values) {
332 return;
333 }
334 OFFLOAD_TRACE(4, "%s values:\n", spaces);
335 int count = 0;
336 buffer[0] = '\0';
337 while (low <= high)
338 {
339 switch (esize)
340 {
341 case 1:
342 sprintf(number, "%d ", *((char *)low));
343 low += 1;
344 break;
345 case 2:
346 sprintf(number, "%d ", *((short *)low));
347 low += 2;
348 break;
349 case 4:
350 sprintf(number, "%d ", *((int *)low));
351 low += 4;
352 break;
353 default:
354 sprintf(number, "0x%016x ", *((uint64_t *)low));
355 low += 8;
356 break;
357 }
358 strcat(buffer, number);
359 count++;
360 if (count == 10) {
361 OFFLOAD_TRACE(4, "%s %s\n", spaces, buffer);
362 count = 0;
363 buffer[0] = '\0';
364 }
365 }
366 if (count != 0) {
367 OFFLOAD_TRACE(4, "%s %s\n", spaces, buffer);
368 }
369 }
370
__arr_desc_dump(const char * spaces,const char * name,const Arr_Desc * adp,bool deref,bool print_values)371 void __arr_desc_dump(
372 const char *spaces,
373 const char *name,
374 const Arr_Desc *adp,
375 bool deref,
376 bool print_values
377 )
378 {
379 OFFLOAD_TRACE(2, "%s%s CEAN expression %p\n", spaces, name, adp);
380
381 if (adp != 0) {
382 OFFLOAD_TRACE(2, "%s base=%llx, rank=%lld\n",
383 spaces, adp->base, adp->rank);
384
385 for (int i = 0; i < adp->rank; i++) {
386 OFFLOAD_TRACE(2,
387 "%s dimension %d: size=%lld, lindex=%lld, "
388 "lower=%lld, upper=%lld, stride=%lld\n",
389 spaces, i, adp->dim[i].size, adp->dim[i].lindex,
390 adp->dim[i].lower, adp->dim[i].upper,
391 adp->dim[i].stride);
392 }
393 // For c_cean_var the base addr is the address of the data
394 // For c_cean_var_ptr the base addr is dereferenced to get to the data
395 generate_mem_ranges(spaces, adp, deref, &print_range, print_values);
396 }
397 }
398
noncont_struct_dump(const char * spaces,const char * name,struct NonContigDesc * desc_p)399 void noncont_struct_dump(
400 const char *spaces,
401 const char *name,
402 struct NonContigDesc *desc_p)
403 {
404 OFFLOAD_TRACE(2, "%s%s NonCont Struct expression %p\n",
405 spaces, name, desc_p->base);
406 if (desc_p) {
407 OFFLOAD_TRACE(2, "%s%s base=%p\n", spaces, name, desc_p->base);
408 for (int i = 0; i < desc_p->interval_cnt; i++) {
409 OFFLOAD_TRACE(2,"%s dimension %d: lower=%lld, size=%lld\n",
410 spaces, i, desc_p->interval[i].lower, desc_p->interval[i].size);
411 }
412 }
413 }
414
get_noncont_struct_size(struct NonContigDesc * desc_p)415 int64_t get_noncont_struct_size(struct NonContigDesc *desc_p)
416 {
417 int index = desc_p->interval_cnt - 1;
418 return(desc_p->interval[index].lower + desc_p->interval[index].size);
419 }
420
421 #endif // OFFLOAD_DEBUG
422