1 /*
2  * Copyright (c) 2017, NVIDIA CORPORATION.  All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17 
18 #include "fioMacros.h"
19 
20 /* init structures used to find cpu indexes */
21 
__fort_initndx(int nd,int * cnts,int * ncnts,int * strs,int * nstrs,int * mults)22 void __fort_initndx( int nd, /* number of dimensions */
23                     int *cnts, /* cpu counts by dimension */
24                     int *ncnts, /* cpu counts by dimension sorted by
25                                  * stride (output) */
26                     int *strs,  /* cpu strides by dimension */
27                     int *nstrs, /* cpu striedes by dimension sorted by
28                                  * stride (output) */
29                     int *mults) /* multiplers by dimension sorted by
30                                  * stride (output) */
31 {
32   int n;
33   int i;
34 
35   n = 1;
36   for (i = 0; i < nd; i++) {
37     mults[i] = n;
38     n *= cnts[i];
39     nstrs[i] = strs[i];
40     ncnts[i] = cnts[i];
41   }
42   i = 0;
43   while (i < (nd - 1)) {
44     if (nstrs[i] > nstrs[i + 1]) {
45       n = nstrs[i];
46       nstrs[i] = nstrs[i + 1];
47       nstrs[i + 1] = n;
48       n = ncnts[i];
49       ncnts[i] = ncnts[i + 1];
50       ncnts[i + 1] = n;
51       n = mults[i];
52       mults[i] = mults[i + 1];
53       mults[i + 1] = n;
54       if (i > 0) {
55         i--;
56         continue;
57       }
58     }
59     i++;
60   }
61 }
62 
63 /* find a cpu's index */
64 
__fort_findndx(int cpu,int nd,int low,int * nstrs,int * mults)65 int __fort_findndx( int cpu, /* cpu whose index is wanted */
66                    int nd, /* number of dimensions */
67                    int low,/* lowest numbered cpu */
68                    int *nstrs, /* strides by dimension sorted by stride */
69                    int *mults) /* multipliers by dimension sorted by stride */
70 {
71   int n;
72   int i;
73   int ndx;
74 
75   /* convert cpu number to index into cpu sequence */
76 
77   cpu = cpu - low;
78   ndx = 0;
79   for (i = (nd - 1); i >= 0; i--) {
80     n = cpu / nstrs[i];
81     cpu = cpu - n * nstrs[i];
82     ndx += n * mults[i];
83   }
84 
85   return (ndx);
86 }
87 
88 /* generate list of cpu numbers */
89 
__fort_genlist(nd,low,cnts,strs)90 struct cgrp *__fort_genlist(nd, low, cnts,
91                            strs) int nd; /* number of dimensions */
92 int low;                                 /* lowest cpu number */
93 int cnts[];                              /* counts per dimension */
94 int strs[];                              /* strides per dimension */
95 {
96 
97   int dim;
98   int idxs[MAXDIMS];
99   struct cgrp *g;
100   int cpu;
101   int n;
102 
103   n = 1;
104   for (dim = 0; dim < nd; dim++) {
105     idxs[dim] = 1; /* reset indices */
106     n *= cnts[dim];
107   }
108   g = (struct cgrp *)__fort_malloc(sizeof(struct cgrp) + (n - 1) * sizeof(int));
109   cpu = low;
110   g->ncpus = 0;
111   do {
112     g->cpus[g->ncpus++] = cpu;
113     for (dim = 0; dim < nd; dim++) {/* increment cpu */
114       if (idxs[dim] < cnts[dim]) {
115         ++idxs[dim];
116         cpu += strs[dim];
117         break;
118       }
119       idxs[dim] = 1;
120       cpu -= (cnts[dim] - 1) * strs[dim];
121     }
122   } while (dim < nd);
123   return (g);
124 }
125