1 /* 2 * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 * 16 */ 17 18 /* clang-format off */ 19 20 /* scatter.h - parameters for scatter/gather routines */ 21 22 /* operation codes */ 23 24 typedef enum { __SCATTER, __GATHER } gathscat_dir; 25 26 /* gather-scatter parameters (not included in schedule) */ 27 28 typedef struct xstuff xstuff; 29 typedef struct gathscat_dim gathscat_dim; 30 typedef struct gathscat_parm gathscat_parm; 31 32 struct xstuff { 33 xstuff *next; /* next xlist entry */ 34 DECL_HDR_PTRS(xd); /* index array descriptor */ 35 DECL_F90_DIM_PTR(xdd); 36 DECL_DIST_DIM_PTR(xdd); /* index array descriptor dimension */ 37 __INT_T cn; /* number of cycles (blocks) */ 38 __INT_T cl; /* lower cycle loop bound (current index) */ 39 __INT_T cs; /* cycle loop stride */ 40 __INT_T clof; /* cyclic global->local index offset */ 41 __INT_T clos; /* cyclic global->local offset stride */ 42 __INT_T bn; /* block element count */ 43 int str; /* element stride */ 44 __INT_T off0; /* saved element offset */ 45 int vx; /* vectored array axis */ 46 int xx; /* index array axis */ 47 }; 48 49 struct gathscat_dim { 50 __INT_T *xb; /* index base address */ 51 DECL_HDR_PTRS(xd); /* index descriptor */ 52 __INT_T *xmap; /* map index axis -> unvectored axis */ 53 }; 54 55 struct gathscat_parm { 56 char *what; /* "GATHER"/"XXX_SCATTER" */ 57 void (*xfer_request)(struct chdr *, int, void *, long, long, int, 58 long); /* scatter: __fort_sendl; gather: __fort_recvl */ 59 void (*xfer_respond)(struct chdr *, int, void *, long, long, int, 60 long); /* scatter: __fort_recvl; gather: __fort_sendl */ 61 void (*gathscatfn)(); /* local gather-scatter-reduction function */ 62 void (*scatterfn)(); /* local scatter-reduction function */ 63 char *rb, *ab, *mb; /* base addresses */ 64 char *ub, *vb; 65 DECL_HDR_PTRS(rd); 66 DECL_HDR_PTRS(ad); 67 DECL_HDR_PTRS(md); /* descriptors */ 68 DECL_HDR_PTRS(ud); 69 DECL_HDR_PTRS(vd); 70 int *counts; /* request-response counts per cpu */ 71 int *head; /* head of linked list for each target */ 72 int *next; /* next linked list pointer */ 73 int *roff; /* offsets in remote vectored array */ 74 int *loff; /* offsets in local unvectored array */ 75 gathscat_dir dir; /* transfer direction code */ 76 77 /* masks with bits selected by vectored array dim... */ 78 int indirect; /* dims with indirect indexes */ 79 int permuted; /* dims with permuted axes */ 80 int conform_x_u; /* index conforms with unvectored */ 81 int aligned_x_u; /* index aligned with unvectored */ 82 int aligned_v_u; /* vectored aligned with unvectored */ 83 int aligned_u_v; /* unvectored aligned with vectored */ 84 85 int communicate; /* nonzero if need to communicate */ 86 int replicate; /* nonzero if need to broadcast result */ 87 int group_offset; /* my offset within replication group */ 88 int outgoing; /* total number of elements to transfer */ 89 90 int ui[MAXDIMS]; /* unvectored array index */ 91 92 /* each unvectored array axis has it own list identifying the 93 vectored axis/index axis pairs which it indexes. */ 94 95 xstuff *xfree; /* next available xlist entry */ 96 xstuff *xhead[MAXDIMS]; /* unvectored axis -> first xlist entry */ 97 xstuff xlist[MAXDIMS * (MAXDIMS + 1)]; 98 99 repl_t r_repl; /* result replication descriptor */ 100 101 gathscat_dim dim[MAXDIMS]; /* per vectored-dimension parameters */ 102 }; 103 104 /* prototypes */ 105 106 void __fort_gathscat_abort(char *what, char *msg); 107 108 sked *I8(__fort_gathscat)(gathscat_parm *z); 109 110 void *ENTFTN(COMM_START, comm_start)(); 111 void ENTFTN(COMM_FINISH, comm_finish)(); 112 void ENTFTN(COMM_FREE, comm_free)(); 113 114 void *I8(__fort_adjust_index_array)(char *what, char *idx_array, char *src, 115 int dim, F90_Desc *is, F90_Desc *bs); 116 117 void *I8(__fort_create_conforming_index_array)(char *what, char *ab, void *ib, 118 F90_Desc *as, F90_Desc *is, 119 F90_Desc *new_is); 120