1 /*
2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17
18 #include "fioMacros.h"
19
20 /* init structures used to find cpu indexes */
21
__fort_initndx(int nd,int * cnts,int * ncnts,int * strs,int * nstrs,int * mults)22 void __fort_initndx( int nd, /* number of dimensions */
23 int *cnts, /* cpu counts by dimension */
24 int *ncnts, /* cpu counts by dimension sorted by
25 * stride (output) */
26 int *strs, /* cpu strides by dimension */
27 int *nstrs, /* cpu striedes by dimension sorted by
28 * stride (output) */
29 int *mults) /* multiplers by dimension sorted by
30 * stride (output) */
31 {
32 int n;
33 int i;
34
35 n = 1;
36 for (i = 0; i < nd; i++) {
37 mults[i] = n;
38 n *= cnts[i];
39 nstrs[i] = strs[i];
40 ncnts[i] = cnts[i];
41 }
42 i = 0;
43 while (i < (nd - 1)) {
44 if (nstrs[i] > nstrs[i + 1]) {
45 n = nstrs[i];
46 nstrs[i] = nstrs[i + 1];
47 nstrs[i + 1] = n;
48 n = ncnts[i];
49 ncnts[i] = ncnts[i + 1];
50 ncnts[i + 1] = n;
51 n = mults[i];
52 mults[i] = mults[i + 1];
53 mults[i + 1] = n;
54 if (i > 0) {
55 i--;
56 continue;
57 }
58 }
59 i++;
60 }
61 }
62
63 /* find a cpu's index */
64
__fort_findndx(int cpu,int nd,int low,int * nstrs,int * mults)65 int __fort_findndx( int cpu, /* cpu whose index is wanted */
66 int nd, /* number of dimensions */
67 int low,/* lowest numbered cpu */
68 int *nstrs, /* strides by dimension sorted by stride */
69 int *mults) /* multipliers by dimension sorted by stride */
70 {
71 int n;
72 int i;
73 int ndx;
74
75 /* convert cpu number to index into cpu sequence */
76
77 cpu = cpu - low;
78 ndx = 0;
79 for (i = (nd - 1); i >= 0; i--) {
80 n = cpu / nstrs[i];
81 cpu = cpu - n * nstrs[i];
82 ndx += n * mults[i];
83 }
84
85 return (ndx);
86 }
87
88 /* generate list of cpu numbers */
89
__fort_genlist(nd,low,cnts,strs)90 struct cgrp *__fort_genlist(nd, low, cnts,
91 strs) int nd; /* number of dimensions */
92 int low; /* lowest cpu number */
93 int cnts[]; /* counts per dimension */
94 int strs[]; /* strides per dimension */
95 {
96
97 int dim;
98 int idxs[MAXDIMS];
99 struct cgrp *g;
100 int cpu;
101 int n;
102
103 n = 1;
104 for (dim = 0; dim < nd; dim++) {
105 idxs[dim] = 1; /* reset indices */
106 n *= cnts[dim];
107 }
108 g = (struct cgrp *)__fort_malloc(sizeof(struct cgrp) + (n - 1) * sizeof(int));
109 cpu = low;
110 g->ncpus = 0;
111 do {
112 g->cpus[g->ncpus++] = cpu;
113 for (dim = 0; dim < nd; dim++) {/* increment cpu */
114 if (idxs[dim] < cnts[dim]) {
115 ++idxs[dim];
116 cpu += strs[dim];
117 break;
118 }
119 idxs[dim] = 1;
120 cpu -= (cnts[dim] - 1) * strs[dim];
121 }
122 } while (dim < nd);
123 return (g);
124 }
125