1 /*
2     Copyright (c) 2014-2016 Intel Corporation.  All Rights Reserved.
3 
4     Redistribution and use in source and binary forms, with or without
5     modification, are permitted provided that the following conditions
6     are met:
7 
8       * Redistributions of source code must retain the above copyright
9         notice, this list of conditions and the following disclaimer.
10       * Redistributions in binary form must reproduce the above copyright
11         notice, this list of conditions and the following disclaimer in the
12         documentation and/or other materials provided with the distribution.
13       * Neither the name of Intel Corporation nor the names of its
14         contributors may be used to endorse or promote products derived
15         from this software without specific prior written permission.
16 
17     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20     A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21     HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29 
30 
31 #include "cean_util.h"
32 #include "offload_common.h"
33 
34 // 1. allocate element of CeanReadRanges type
35 // 2. initialized it for reading consequently contiguous ranges
36 //    described by "ap" argument
init_read_ranges_arr_desc(const Arr_Desc * ap)37 CeanReadRanges * init_read_ranges_arr_desc(const Arr_Desc *ap)
38 {
39     CeanReadRanges * res;
40 
41     // find the max contiguous range
42     int64_t rank = ap->rank - 1;
43     int64_t length = ap->dim[rank].size;
44     for (; rank >= 0; rank--) {
45         if (ap->dim[rank].stride == 1) {
46             length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1);
47             if (rank > 0 && length != ap->dim[rank - 1].size) {
48                 break;
49             }
50         }
51         else {
52             break;
53         }
54     }
55 
56     res =(CeanReadRanges *)malloc(sizeof(CeanReadRanges) +
57                                   (ap->rank - rank) * sizeof(CeanReadDim));
58     if (res == NULL)
59       LIBOFFLOAD_ERROR(c_malloc);
60 
61     res->arr_desc = const_cast<Arr_Desc*>(ap);
62     res->current_number = 0;
63     res->range_size = length;
64     res->last_noncont_ind = rank;
65 
66     // calculate number of contiguous ranges inside noncontiguous dimensions
67     int count = 1;
68     bool prev_is_cont = true;
69     int64_t offset = 0;
70 
71     for (; rank >= 0; rank--) {
72         res->Dim[rank].count = count;
73         res->Dim[rank].size = ap->dim[rank].stride * ap->dim[rank].size;
74         count *= (prev_is_cont && ap->dim[rank].stride == 1? 1 :
75             (ap->dim[rank].upper - ap->dim[rank].lower +
76             ap->dim[rank].stride) / ap->dim[rank].stride);
77         prev_is_cont = false;
78         offset +=(ap->dim[rank].lower - ap->dim[rank].lindex) *
79                  ap->dim[rank].size;
80     }
81     res->range_max_number = count;
82     res -> ptr = (void*)ap->base;
83     res -> init_offset = offset;
84     return res;
85 }
86 
87 // check if ranges described by 1 argument could be transferred into ranges
88 // described by 2-nd one
cean_ranges_match(CeanReadRanges * read_rng1,CeanReadRanges * read_rng2)89 bool cean_ranges_match(
90     CeanReadRanges * read_rng1,
91     CeanReadRanges * read_rng2
92 )
93 {
94     return ( read_rng1 == NULL || read_rng2 == NULL ||
95             (read_rng1->range_size % read_rng2->range_size == 0 ||
96             read_rng2->range_size % read_rng1->range_size == 0));
97 }
98 
99 // Set next offset and length and returns true for next range.
100 // Returns false if the ranges are over.
get_next_range(CeanReadRanges * read_rng,int64_t * offset)101 bool get_next_range(
102     CeanReadRanges * read_rng,
103     int64_t *offset
104 )
105 {
106     if (++read_rng->current_number > read_rng->range_max_number) {
107         read_rng->current_number = 0;
108         return false;
109     }
110     int rank = 0;
111     int num = read_rng->current_number - 1;
112     int64_t cur_offset = 0;
113     int num_loc;
114     for (; rank <= read_rng->last_noncont_ind; rank++) {
115         num_loc = num / read_rng->Dim[rank].count;
116         cur_offset += num_loc * read_rng->Dim[rank].size;
117         num = num % read_rng->Dim[rank].count;
118     }
119     *offset = cur_offset + read_rng->init_offset;
120     return true;
121 }
122 
is_arr_desc_contiguous(const Arr_Desc * ap)123 bool is_arr_desc_contiguous(const Arr_Desc *ap)
124 {
125     int64_t rank = ap->rank - 1;
126     int64_t length = ap->dim[rank].size;
127     for (; rank >= 0; rank--) {
128         if (ap->dim[rank].stride > 1 &&
129             ap->dim[rank].upper - ap->dim[rank].lower != 0) {
130                 return false;
131         }
132         else if (length != ap->dim[rank].size) {
133             for (; rank >= 0; rank--) {
134                 if (ap->dim[rank].upper - ap->dim[rank].lower != 0) {
135                     return false;
136                 }
137             }
138             return true;
139         }
140         length *= (ap->dim[rank].upper - ap->dim[rank].lower + 1);
141     }
142     return true;
143 }
144 
cean_get_transf_size(CeanReadRanges * read_rng)145 int64_t cean_get_transf_size(CeanReadRanges * read_rng)
146 {
147     return(read_rng->range_max_number * read_rng->range_size);
148 }
149 
150 static uint64_t last_left, last_right;
151 
152 typedef void (*fpp)(
153     const char *spaces,
154     uint64_t low,
155     uint64_t high,
156     int esize,
157     bool print_values
158 );
159 
generate_one_range(const char * spaces,uint64_t lrange,uint64_t rrange,fpp fp,int esize,bool print_values)160 static void generate_one_range(
161     const char *spaces,
162     uint64_t lrange,
163     uint64_t rrange,
164     fpp fp,
165     int esize,
166     bool print_values
167 )
168 {
169     OFFLOAD_TRACE(3,
170         "%s    generate_one_range(lrange=%p, rrange=%p, esize=%d)\n",
171         spaces, (void*)lrange, (void*)rrange, esize);
172     if (last_left == -1) {
173         // First range
174         last_left = lrange;
175     }
176     else {
177         if (lrange == last_right+1) {
178             // Extend previous range, don't print
179         }
180         else {
181             (*fp)(spaces, last_left, last_right, esize, print_values);
182             last_left = lrange;
183         }
184     }
185     last_right = rrange;
186 }
187 
element_is_contiguous(uint64_t rank,const struct Dim_Desc * ddp)188 static bool element_is_contiguous(
189     uint64_t rank,
190     const struct Dim_Desc *ddp
191 )
192 {
193     if (rank == 1) {
194         return (ddp[0].lower == ddp[0].upper || ddp[0].stride == 1);
195     }
196     else {
197         return ((ddp[0].size == (ddp[1].upper-ddp[1].lower+1)*ddp[1].size) &&
198                  element_is_contiguous(rank-1, ddp++));
199     }
200 }
201 
generate_mem_ranges_one_rank(const char * spaces,uint64_t base,uint64_t rank,const struct Dim_Desc * ddp,fpp fp,int esize,bool print_values)202 static void generate_mem_ranges_one_rank(
203     const char *spaces,
204     uint64_t base,
205     uint64_t rank,
206     const struct Dim_Desc *ddp,
207     fpp fp,
208     int esize,
209     bool print_values
210 )
211 {
212     uint64_t lindex = ddp->lindex;
213     uint64_t lower = ddp->lower;
214     uint64_t upper = ddp->upper;
215     uint64_t stride = ddp->stride;
216     uint64_t size = ddp->size;
217     OFFLOAD_TRACE(3,
218         "%s    "
219         "generate_mem_ranges_one_rank(base=%p, rank=%lld, lindex=%lld, "
220         "lower=%lld, upper=%lld, stride=%lld, size=%lld, esize=%d)\n",
221         spaces, (void*)base, rank, lindex, lower, upper, stride, size, esize);
222 
223     if (element_is_contiguous(rank, ddp)) {
224         uint64_t lrange, rrange;
225         lrange = base + (lower-lindex)*size;
226         rrange = lrange + (upper-lower+1)*size - 1;
227         generate_one_range(spaces, lrange, rrange, fp, esize, print_values);
228     }
229     else {
230         if (rank == 1) {
231             for (int i=lower-lindex; i<=upper-lindex; i+=stride) {
232                 uint64_t lrange, rrange;
233                 lrange = base + i*size;
234                 rrange = lrange + size - 1;
235                 generate_one_range(spaces, lrange, rrange,
236 		                   fp, esize, print_values);
237             }
238         }
239         else {
240             for (int i=lower-lindex; i<=upper-lindex; i+=stride) {
241                 generate_mem_ranges_one_rank(
242                     spaces, base+i*size, rank-1, ddp+1,
243                     fp, esize, print_values);
244 
245             }
246         }
247     }
248 }
249 
generate_mem_ranges(const char * spaces,const Arr_Desc * adp,bool deref,fpp fp,bool print_values)250 static void generate_mem_ranges(
251     const char *spaces,
252     const Arr_Desc *adp,
253     bool deref,
254     fpp fp,
255     bool print_values
256 )
257 {
258     uint64_t esize;
259 
260     OFFLOAD_TRACE(3,
261         "%s    "
262         "generate_mem_ranges(adp=%p, deref=%d, fp)\n",
263         spaces, adp, deref);
264     last_left = -1;
265     last_right = -2;
266 
267     // Element size is derived from last dimension
268     esize = adp->dim[adp->rank-1].size;
269 
270     generate_mem_ranges_one_rank(
271         // For c_cean_var the base addr is the address of the data
272         // For c_cean_var_ptr the base addr is dereferenced to get to the data
273         spaces, deref ? *((uint64_t*)(adp->base)) : adp->base,
274         adp->rank, &adp->dim[0], fp, esize, print_values);
275     (*fp)(spaces, last_left, last_right, esize, print_values);
276 }
277 
278 // returns offset and length of the data to be transferred
__arr_data_offset_and_length(const Arr_Desc * adp,int64_t & offset,int64_t & length)279 void __arr_data_offset_and_length(
280     const Arr_Desc *adp,
281     int64_t &offset,
282     int64_t &length
283 )
284 {
285     int64_t rank = adp->rank - 1;
286     int64_t size = adp->dim[rank].size;
287     int64_t r_off = 0; // offset from right boundary
288 
289     // find the rightmost dimension which takes just part of its
290     // range. We define it if the size of left rank is not equal
291     // the range's length between upper and lower boungaries
292     while (rank > 0) {
293         size *= (adp->dim[rank].upper - adp->dim[rank].lower + 1);
294         if (size != adp->dim[rank - 1].size) {
295             break;
296         }
297         rank--;
298     }
299 
300     offset = (adp->dim[rank].lower - adp->dim[rank].lindex) *
301              adp->dim[rank].size;
302 
303     // find gaps both from the left - offset and from the right - r_off
304     for (rank--; rank >= 0; rank--) {
305         offset += (adp->dim[rank].lower - adp->dim[rank].lindex) *
306                   adp->dim[rank].size;
307         r_off += adp->dim[rank].size -
308                  (adp->dim[rank + 1].upper - adp->dim[rank + 1].lindex + 1) *
309                  adp->dim[rank + 1].size;
310     }
311     length = (adp->dim[0].upper - adp->dim[0].lindex + 1) *
312              adp->dim[0].size - offset - r_off;
313 }
314 
315 #if OFFLOAD_DEBUG > 0
316 
print_range(const char * spaces,uint64_t low,uint64_t high,int esize,bool print_values)317 static void print_range(
318     const char *spaces,
319     uint64_t low,
320     uint64_t high,
321     int esize,
322     bool print_values
323 )
324 {
325     char buffer[1024];
326     char number[32];
327 
328     OFFLOAD_TRACE(3, "%s        print_range(low=%p, high=%p, esize=%d)\n",
329         spaces, (void*)low, (void*)high, esize);
330 
331     if (console_enabled < 4 || !print_values) {
332         return;
333     }
334     OFFLOAD_TRACE(4, "%s            values:\n", spaces);
335     int count = 0;
336     buffer[0] = '\0';
337     while (low <= high)
338     {
339         switch (esize)
340         {
341         case 1:
342             sprintf(number, "%d ", *((char *)low));
343             low += 1;
344             break;
345         case 2:
346             sprintf(number, "%d ", *((short *)low));
347             low += 2;
348             break;
349         case 4:
350             sprintf(number, "%d ", *((int *)low));
351             low += 4;
352             break;
353         default:
354             sprintf(number, "0x%016x ", *((uint64_t *)low));
355             low += 8;
356             break;
357         }
358         strcat(buffer, number);
359         count++;
360         if (count == 10) {
361             OFFLOAD_TRACE(4, "%s            %s\n", spaces, buffer);
362             count = 0;
363             buffer[0] = '\0';
364         }
365     }
366     if (count != 0) {
367         OFFLOAD_TRACE(4, "%s            %s\n", spaces, buffer);
368     }
369 }
370 
__arr_desc_dump(const char * spaces,const char * name,const Arr_Desc * adp,bool deref,bool print_values)371 void __arr_desc_dump(
372     const char *spaces,
373     const char *name,
374     const Arr_Desc *adp,
375     bool deref,
376     bool print_values
377 )
378 {
379     OFFLOAD_TRACE(2, "%s%s CEAN expression %p\n", spaces, name, adp);
380 
381     if (adp != 0) {
382         OFFLOAD_TRACE(2, "%s    base=%llx, rank=%lld\n",
383             spaces, adp->base, adp->rank);
384 
385         for (int i = 0; i < adp->rank; i++) {
386             OFFLOAD_TRACE(2,
387                           "%s    dimension %d: size=%lld, lindex=%lld, "
388                           "lower=%lld, upper=%lld, stride=%lld\n",
389                           spaces, i, adp->dim[i].size, adp->dim[i].lindex,
390                           adp->dim[i].lower, adp->dim[i].upper,
391                           adp->dim[i].stride);
392         }
393         // For c_cean_var the base addr is the address of the data
394         // For c_cean_var_ptr the base addr is dereferenced to get to the data
395         generate_mem_ranges(spaces, adp, deref, &print_range, print_values);
396     }
397 }
398 
noncont_struct_dump(const char * spaces,const char * name,struct NonContigDesc * desc_p)399 void noncont_struct_dump(
400     const char *spaces,
401     const char *name,
402     struct NonContigDesc *desc_p)
403 {
404    OFFLOAD_TRACE(2, "%s%s NonCont Struct expression %p\n",
405                  spaces, name, desc_p->base);
406    if (desc_p) {
407        OFFLOAD_TRACE(2, "%s%s    base=%p\n", spaces, name, desc_p->base);
408        for (int i = 0; i < desc_p->interval_cnt; i++) {
409            OFFLOAD_TRACE(2,"%s    dimension %d: lower=%lld, size=%lld\n",
410                spaces, i, desc_p->interval[i].lower, desc_p->interval[i].size);
411        }
412    }
413 }
414 
get_noncont_struct_size(struct NonContigDesc * desc_p)415 int64_t get_noncont_struct_size(struct NonContigDesc *desc_p)
416 {
417     int index = desc_p->interval_cnt - 1;
418     return(desc_p->interval[index].lower + desc_p->interval[index].size);
419 }
420 
421 #endif // OFFLOAD_DEBUG
422