/* * Copyright (c) 1995-2018, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ /* clang-format off */ /* hpf_library, hpf_local_library, and system inquiry routines */ /* FIXME: how much (if any) of this is used/needed */ #include "stdioInterf.h" #include "fioMacros.h" static int I8(fetch_int)(void *b, F90_Desc *s) { dtype kind = TYPEKIND(s); switch (kind) { case __INT1: return (int)(*(__INT1_T *)b); case __INT2: return (int)(*(__INT2_T *)b); case __INT4: return (int)(*(__INT4_T *)b); case __INT8: return (int)(*(__INT8_T *)b); default: __fort_abort("fetch_int: invalid argument type"); return 0; } } static int I8(fetch_log)(void *b, F90_Desc *s) { dtype kind = TYPEKIND(s); switch (kind) { case __LOG1: return (*(__LOG1_T *)b & GET_DIST_MASK_LOG1) != 0; case __LOG2: return (*(__LOG2_T *)b & GET_DIST_MASK_LOG2) != 0; case __LOG4: return (*(__LOG4_T *)b & GET_DIST_MASK_LOG4) != 0; case __LOG8: return (*(__LOG8_T *)b & GET_DIST_MASK_LOG8) != 0; default: __fort_abort("fetch_log: invalid argument type"); return 0; } } static void I8(fetch_vector)(void *ab, F90_Desc *as, __INT_T *vector, int veclen) { __INT_T *la; __INT_T i; if (F90_RANK_G(as) != 1) __fort_abort("fetch_vector: incorrect argument rank"); for (i = F90_DIM_LBOUND_G(as, 0); --veclen >= 0; ++i) { la = I8(__fort_local_address)(ab, as, &i); if (la == NULL) __fort_abort("fetch_vector: argument inaccessible"); *vector++ = I8(fetch_int)(la, as); } } static void I8(store_int)(void *b, F90_Desc *s, __INT_T val) { dtype kind = TYPEKIND(s); switch (kind) { case __INT1: *(__INT1_T *)b = (__INT1_T)val; break; case __INT2: *(__INT2_T *)b = (__INT2_T)val; break; case __INT4: *(__INT4_T *)b = (__INT4_T)val; break; case __INT8: *(__INT8_T *)b = (__INT8_T)val; break; default: __fort_abort("store_int: invalid argument type (integer expected)"); } } static void I8(store_log)(void *b, F90_Desc *s, int val) { dtype kind = TYPEKIND(s); switch (kind) { case __LOG1: *(__LOG1_T *)b = val ? GET_DIST_TRUE_LOG1 : 0; break; case __LOG2: *(__LOG2_T *)b = val ? GET_DIST_TRUE_LOG2 : 0; break; case __LOG4: *(__LOG4_T *)b = val ? GET_DIST_TRUE_LOG4 : 0; break; case __LOG8: *(__LOG8_T *)b = val ? GET_DIST_TRUE_LOG8 : 0; break; default: __fort_abort("store_log: invalid argument type (logical expected)"); } } static void I8(store_element)(void *ab, F90_Desc *as, int index, int val) { __INT_T *la; __INT_T i; if (F90_RANK_G(as) != 1) __fort_abort("store_element: incorrect argument rank"); i = F90_DIM_LBOUND_G(as, 0) - 1 + index; la = I8(__fort_local_address)(ab, as, &i); if (la != NULL) I8(store_int)(la, as, val); } static void I8(store_vector)(void *ab, F90_Desc *as, __INT_T *vector, __INT_T veclen) { __INT_T *la; __INT_T i; if (F90_RANK_G(as) != 1) __fort_abort("store_vector: incorrect argument rank"); for (i = F90_DIM_LBOUND_G(as, 0); --veclen >= 0; ++i) { la = I8(__fort_local_address)(ab, as, &i); if (la != NULL) I8(store_int)(la, as, *vector); ++vector; } } static void I8(store_vector_int)(void *ab, F90_Desc *as, int *vector, __INT_T veclen) { __INT_T *la; __INT_T i; if (F90_RANK_G(as) != 1) __fort_abort("store_vector_int: incorrect argument rank"); for (i = F90_DIM_LBOUND_G(as, 0); --veclen >= 0; ++i) { la = I8(__fort_local_address)(ab, as, &i); if (la != NULL) I8(store_int)(la, as, *vector); ++vector; } } static void ftnstrcpy(char *dst, /* destination string, blank-filled */ size_t len, /* length of destination space */ char *src) /* null terminated source string */ { char *end = dst + len; while (dst < end && *src != '\0') *dst++ = *src++; while (dst < end) *dst++ = ' '; } /* FIXME: still used ? */ /* hpf_library mapping inquiry routines */ void ENTFTN(DIST_ALIGNMENT, dist_alignment)(void *alignee_b, void *lb, void *ub, void *stride, void *axis_map, void *identity_map, void *dynamic, void *ncopies, F90_Desc *alignee, F90_Desc *lb_s, F90_Desc *ub_s, F90_Desc *stride_s, F90_Desc *axis_map_s, F90_Desc *identity_map_s, F90_Desc *dynamic_s, F90_Desc *ncopies_s) { DECL_DIM_PTRS(ad); proc *p; procdim *pd; __INT_T aextent, i, idm, ncp, px, rank, textent, vector[MAXDIMS]; rank = (F90_TAG_G(alignee) == __DESC) ? F90_RANK_G(alignee) : 0; if (ISPRESENT(lb)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(ad, alignee, i); vector[i] = DIST_DPTR_TSTRIDE_G(ad) * F90_DPTR_LBOUND_G(ad) + DIST_DPTR_TOFFSET_G(ad) - DIST_DPTR_TLB_G(ad) + 1; } I8(store_vector)(lb, lb_s, vector, rank); } if (ISPRESENT(ub)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(ad, alignee, i); vector[i] = DIST_DPTR_TSTRIDE_G(ad) * DPTR_UBOUND_G(ad) + DIST_DPTR_TOFFSET_G(ad) - DIST_DPTR_TLB_G(ad) + 1; } I8(store_vector)(ub, ub_s, vector, rank); } if (ISPRESENT(stride)) { for (i = rank; --i >= 0;) { if (DFMT(alignee, i + 1) != DFMT_COLLAPSED) { SET_DIM_PTRS(ad, alignee, i); vector[i] = DIST_DPTR_TSTRIDE_G(ad); } else { vector[i] = 0; } } I8(store_vector)(stride, stride_s, vector, rank); } if (ISPRESENT(axis_map)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(ad, alignee, i); vector[i] = DIST_DPTR_TAXIS_G(ad); } I8(store_vector)(axis_map, axis_map_s, vector, rank); } if (ISPRESENT(identity_map)) { idm = (rank == 0 || rank == F90_RANK_G(DIST_ALIGN_TARGET_G(alignee))); for (i = rank; idm && --i >= 0;) { SET_DIM_PTRS(ad, alignee, i); idm = (DIST_DPTR_TAXIS_G(ad) == i + 1 && DIST_DPTR_TSTRIDE_G(ad) == 1 && F90_DPTR_LBOUND_G(ad) == DIST_DPTR_TLB_G(ad) && DPTR_UBOUND_G(ad) == DIST_DPTR_TUB_G(ad)); } I8(store_log)(identity_map, identity_map_s, idm); } if (ISPRESENT(dynamic)) { I8(store_log)(dynamic, dynamic_s, (rank > 0 && F90_FLAGS_G(alignee) & __DYNAMIC)); } if (ISPRESENT(ncopies)) { if (rank > 0) { p = DIST_DIST_TARGET_G(alignee); ncp = 1; for (px = 0; px < p->rank; ++px) { if (DIST_REPLICATED_G(alignee) >> px & 1) { pd = &p->dim[px]; ncp *= pd->shape; } } } else ncp = GET_DIST_TCPUS; I8(store_int)(ncopies, ncopies_s, ncp); } } /* FIXME: still used */ void ENTFTN(DIST_DISTRIBUTIONA, dist_distributiona)( void *distributee_b, DCHAR(axis_type), void *axis_info, void *proc_rank, void *proc_shape, void *plb, void *pub, void *pstride, void *low_shadow, void *high_shadow, F90_Desc *distributee, F90_Desc *axis_type_s, F90_Desc *axis_info_s, F90_Desc *proc_rank_s, F90_Desc *proc_shape_s, F90_Desc *plb_s, F90_Desc *pub_s, F90_Desc *pstride_s, F90_Desc *low_shadow_s, F90_Desc *high_shadow_s DCLEN64(axis_type)) { DECL_HDR_PTRS(u); DECL_DIM_PTRS(ud); DECL_DIM_PTRS(dd); proc *p; procdim *pd; __INT_T i, rank, vector[MAXDIMS]; char *src; __CLEN_T len; if (F90_TAG_G(distributee) == __DESC) { u = DIST_ALIGN_TARGET_G(distributee); p = DIST_DIST_TARGET_G(distributee); rank = F90_RANK_G(u); } else { u = NULL; p = NULL; rank = 0; } if (ISPRESENTC(axis_type)) { len = CLEN(axis_type); for (i = rank; i > 0; --i) { switch (DFMT(u, i)) { case DFMT_COLLAPSED: src = "COLLAPSED"; break; case DFMT_BLOCK: case DFMT_BLOCK_K: src = "BLOCK"; break; case DFMT_CYCLIC: case DFMT_CYCLIC_K: src = "CYCLIC"; break; case DFMT_GEN_BLOCK: src = "GEN_BLOCK"; break; case DFMT_INDIRECT: src = "INDIRECT"; break; default: __fort_abort("DIST_DISTRIBUTION: unsupported dist-format"); } ftnstrcpy(CADR(axis_type) + (i - 1) * len, len, src); } } if (ISPRESENT(axis_info)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(ud, u, i); vector[i] = DIST_DPTR_BLOCK_G(ud); } I8(store_vector)(axis_info, axis_info_s, vector, rank); } if (ISPRESENT(proc_rank)) { I8(store_int)(proc_rank, proc_rank_s, p != NULL ? p->rank : 0); } if (ISPRESENT(proc_shape) && p != NULL) { for (i = p->rank; --i >= 0;) { pd = &p->dim[i]; vector[i] = pd->shape; } I8(store_vector)(proc_shape, proc_shape_s, vector, p->rank); } if (ISPRESENT(plb)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(ud, u, i); vector[i] = 1; } I8(store_vector)(plb, plb_s, vector, rank); } if (ISPRESENT(pub)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(ud, u, i); vector[i] = DIST_DPTR_PSHAPE_G(ud); } I8(store_vector)(pub, pub_s, vector, rank); } if (ISPRESENT(pstride)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(ud, u, i); vector[i] = DIST_DPTR_PSTRIDE_G(ud); } I8(store_vector)(pstride, pstride_s, vector, rank); } /* Return low_shadow and high_shadow values for the 'distributee' argument. HPF 2 spec makes no sense where it says these should come from the distributee's ultimate align target. */ rank = (F90_TAG_G(distributee) == __DESC) ? F90_RANK_G(distributee) : 0; if (ISPRESENT(low_shadow)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(dd, distributee, i); vector[i] = DIST_DPTR_NO_G(dd); } I8(store_vector)(low_shadow, low_shadow_s, vector, rank); } if (ISPRESENT(high_shadow)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(dd, distributee, i); vector[i] = DIST_DPTR_PO_G(dd); } I8(store_vector)(high_shadow, high_shadow_s, vector, rank); } } /* 32 bit CLEN version */ void ENTFTN(DIST_DISTRIBUTION, dist_distribution)( void *distributee_b, DCHAR(axis_type), void *axis_info, void *proc_rank, void *proc_shape, void *plb, void *pub, void *pstride, void *low_shadow, void *high_shadow, F90_Desc *distributee, F90_Desc *axis_type_s, F90_Desc *axis_info_s, F90_Desc *proc_rank_s, F90_Desc *proc_shape_s, F90_Desc *plb_s, F90_Desc *pub_s, F90_Desc *pstride_s, F90_Desc *low_shadow_s, F90_Desc *high_shadow_s DCLEN(axis_type)) { ENTFTN(DIST_DISTRIBUTIONA, dist_distributiona)(distributee_b, CADR(axis_type), axis_info, proc_rank, proc_shape, plb, pub, pstride, low_shadow, high_shadow, distributee, axis_type_s, axis_info_s, proc_rank_s, proc_shape_s, plb_s, pub_s, pstride_s, low_shadow_s, high_shadow_s, (__CLEN_T)CLEN(axis_type)); } /* FIXME: not used */ void ENTFTN(DIST_TEMPLATEA, dist_templatea)(void *alignee_b, void *template_rank, void *lb, void *ub, DCHAR(axis_type), void *axis_info, void *number_aligned, void *dynamic, F90_Desc *alignee, F90_Desc *template_rank_s, F90_Desc *lb_s, F90_Desc *ub_s, F90_Desc *axis_type_s, F90_Desc *axis_info_s, F90_Desc *number_aligned_s, F90_Desc *dynamic_s DCLEN64(axis_type)) { DECL_HDR_PTRS(u); DECL_HDR_PTRS(a); DECL_DIM_PTRS(ud); proc *p; __INT_T i, rank, n_alnd, ux; __INT_T alignee_axis[MAXDIMS], vector[MAXDIMS]; char *src; __CLEN_T len; if (F90_TAG_G(alignee) == __DESC) { u = DIST_ALIGN_TARGET_G(alignee); p = DIST_DIST_TARGET_G(alignee); rank = F90_RANK_G(u); for (i = rank; --i >= 0;) alignee_axis[i] = 0; for (i = F90_RANK_G(alignee); --i >= 0;) { ux = DIST_DIM_TAXIS_G(alignee, i); if (ux > 0) alignee_axis[ux - 1] = i + 1; } } else rank = 0; if (ISPRESENT(template_rank)) { I8(store_int)(template_rank, template_rank_s, rank); } if (ISPRESENT(lb)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(ud, u, i); vector[i] = F90_DPTR_LBOUND_G(ud); } I8(store_vector)(lb, lb_s, vector, rank); } if (ISPRESENT(ub)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(ud, u, i); vector[i] = DPTR_UBOUND_G(ud); } I8(store_vector)(ub, ub_s, vector, rank); } if (ISPRESENTC(axis_type)) { len = CLEN(axis_type); for (i = rank; --i >= 0;) { if (alignee_axis[i] > 0) src = "NORMAL"; else if (DIST_SINGLE_G(alignee) >> i & 1) src = "SINGLE"; else src = "REPLICATED"; ftnstrcpy(CADR(axis_type) + i * len, len, src); } } if (ISPRESENT(axis_info)) { for (i = rank; --i >= 0;) { if (alignee_axis[i] > 0) vector[i] = alignee_axis[i]; else if (DIST_SINGLE_G(alignee) >> i & 1) vector[i] = DIST_INFO_G(alignee, i); else { SET_DIM_PTRS(ud, u, i); vector[i] = (DIST_DPTR_PAXIS_G(ud) > 0) ? DIST_DPTR_PSHAPE_G(ud) : 1; } } I8(store_vector)(axis_info, axis_info_s, vector, rank); } if (ISPRESENT(number_aligned)) { if (!(F90_FLAGS_G(u) & __DYNAMIC)) { __fort_abort( "DIST_TEMPLATE: NUMBER_ALIGNED not supported for static align target"); } n_alnd = 0; if (rank > 0) { if (u) for (a = DIST_NEXT_ALIGNEE_G(u); a != NULL; a = DIST_NEXT_ALIGNEE_G(a)) { ++n_alnd; } } I8(store_int)(number_aligned, number_aligned_s, n_alnd); } if (ISPRESENT(dynamic)) { I8(store_log)(dynamic, dynamic_s, rank > 0 && F90_FLAGS_G(u) & __DYNAMIC); } } /* 32 bit CLEN version */ void ENTFTN(DIST_TEMPLATE, dist_template)(void *alignee_b, void *template_rank, void *lb, void *ub, DCHAR(axis_type), void *axis_info, void *number_aligned, void *dynamic, F90_Desc *alignee, F90_Desc *template_rank_s, F90_Desc *lb_s, F90_Desc *ub_s, F90_Desc *axis_type_s, F90_Desc *axis_info_s, F90_Desc *number_aligned_s, F90_Desc *dynamic_s DCLEN(axis_type)) { ENTFTN(DIST_TEMPLATEA, dist_templatea)(alignee_b, template_rank, lb, ub, CADR(axis_type), axis_info, number_aligned, dynamic, alignee, template_rank_s, lb_s, ub_s, axis_type_s, axis_info_s, number_aligned_s, dynamic_s, (__CLEN_T)CLEN(axis_type)); } void ENTFTN(GLOBAL_ALIGNMENT, global_alignment)(void *array_b, void *lb, void *ub, void *stride, void *axis_map, void *identity_map, void *dynamic, void *ncopies, F90_Desc *array_s, F90_Desc *lb_s, F90_Desc *ub_s, F90_Desc *stride_s, F90_Desc *axis_map_s, F90_Desc *identity_map_s, F90_Desc *dynamic_s, F90_Desc *ncopies_s) { DECL_HDR_PTRS(alignee); DECL_DIM_PTRS(ad); proc *p; __INT_T i, idm, n, rank, vector[MAXDIMS]; if (F90_TAG_G(array_s) == __DESC) { alignee = DIST_ACTUAL_ARG_G(array_s); if (alignee == NULL) __fort_abort("GLOBAL_ALIGNMENT: array is not associated" " with global actual argument"); rank = F90_RANK_G(alignee); } else rank = 0; if (ISPRESENT(lb)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(ad, alignee, i); vector[i] = DIST_DPTR_TSTRIDE_G(ad) * F90_DPTR_LBOUND_G(ad) + DIST_DPTR_TOFFSET_G(ad) - DIST_DPTR_TLB_G(ad) + 1; } I8(store_vector)(lb, lb_s, vector, rank); } if (ISPRESENT(ub)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(ad, alignee, i); vector[i] = DIST_DPTR_TSTRIDE_G(ad) * DPTR_UBOUND_G(ad) + DIST_DPTR_TOFFSET_G(ad) - DIST_DPTR_TLB_G(ad) + 1; } I8(store_vector)(ub, ub_s, vector, rank); } if (ISPRESENT(stride)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(ad, alignee, i); vector[i] = DIST_DPTR_TSTRIDE_G(ad); } I8(store_vector)(stride, stride_s, vector, rank); } if (ISPRESENT(axis_map)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(ad, alignee, i); vector[i] = DIST_DPTR_TAXIS_G(ad); } I8(store_vector)(axis_map, axis_map_s, vector, rank); } if (ISPRESENT(identity_map)) { idm = (rank == 0 || rank == F90_TAG_G(DIST_ALIGN_TARGET_G(alignee))); for (i = rank; idm && --i >= 0;) { SET_DIM_PTRS(ad, alignee, i); idm = (DIST_DPTR_TAXIS_G(ad) == i + 1 && DIST_DPTR_TSTRIDE_G(ad) == 1 && F90_DPTR_LBOUND_G(ad) == DIST_DPTR_TLB_G(ad) && DPTR_UBOUND_G(ad) == DIST_DPTR_TUB_G(ad)); } I8(store_log)(identity_map, identity_map_s, idm); } if (ISPRESENT(dynamic)) { I8(store_log)(dynamic, dynamic_s, rank > 0 && F90_FLAGS_G(alignee) & __DYNAMIC); } if (ISPRESENT(ncopies)) { if (rank > 0) { n = 1; p = DIST_DIST_TARGET_G(alignee); for (i = p->rank; --i >= 0;) { if (DIST_REPLICATED_G(alignee) >> i & 1) n *= p->dim[i].shape; } } else n = GET_DIST_TCPUS; I8(store_int)(ncopies, ncopies_s, n); } } void ENTFTN(GLOBAL_DISTRIBUTIONA, global_distributiona)( void *array_b, DCHAR(axis_type), void *axis_info, void *proc_rank, void *proc_shape, void *plb, void *pub, void *pstride, void *low_shadow, void *high_shadow, F90_Desc *array_s, F90_Desc *axis_type_s, F90_Desc *axis_info_s, F90_Desc *proc_rank_s, F90_Desc *proc_shape_s, F90_Desc *plb_s, F90_Desc *pub_s, F90_Desc *pstride_s, F90_Desc *low_shadow_s, F90_Desc *high_shadow_s DCLEN(axis_type)) { DECL_HDR_PTRS(u); DECL_HDR_PTRS(distributee); DECL_DIM_PTRS(ud); DECL_DIM_PTRS(dd); proc *p; procdim *pd; __INT_T i, rank, vector[MAXDIMS]; char *src; __CLEN_T len; if (F90_TAG_G(array_s) == __DESC) { distributee = DIST_ACTUAL_ARG_G(array_s); if (distributee == NULL) __fort_abort("GLOBAL_DISTRIBUTION: array is not associated" " with global actual argument"); u = DIST_ALIGN_TARGET_G(distributee); p = DIST_DIST_TARGET_G(distributee); rank = F90_RANK_G(u); } else { distributee = NULL; u = NULL; p = NULL; rank = 0; } if (ISPRESENTC(axis_type)) { len = CLEN(axis_type); for (i = rank; i > 0; --i) { switch (DFMT(u, i)) { case DFMT_COLLAPSED: src = "COLLAPSED"; break; case DFMT_BLOCK: case DFMT_BLOCK_K: src = "BLOCK"; break; case DFMT_CYCLIC: case DFMT_CYCLIC_K: src = "CYCLIC"; break; case DFMT_GEN_BLOCK: src = "GEN_BLOCK"; break; case DFMT_INDIRECT: src = "INDIRECT"; break; default: __fort_abort("GLOBAL_DISTRIBUTION: unsupported dist-format"); } ftnstrcpy(CADR(axis_type) + (i - 1) * len, len, src); } } if (ISPRESENT(axis_info)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(ud, u, i); vector[i] = DIST_DPTR_BLOCK_G(ud); } I8(store_vector)(axis_info, axis_info_s, vector, rank); } if (ISPRESENT(proc_rank)) { I8(store_int)(proc_rank, proc_rank_s, p != NULL ? p->rank : 0); } if (ISPRESENT(proc_shape) && p != NULL) { for (i = p->rank; --i >= 0;) { pd = &p->dim[i]; vector[i] = pd->shape; } I8(store_vector)(proc_shape, proc_shape_s, vector, p->rank); } if (ISPRESENT(plb)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(ud, u, i); vector[i] = 1; } I8(store_vector)(plb, plb_s, vector, rank); } if (ISPRESENT(pub)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(ud, u, i); vector[i] = DIST_DPTR_PSHAPE_G(ud); } I8(store_vector)(pub, pub_s, vector, rank); } if (ISPRESENT(pstride)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(ud, u, i); vector[i] = DIST_DPTR_PSTRIDE_G(ud); } I8(store_vector)(pstride, pstride_s, vector, rank); } /* Return low_shadow and high_shadow values for the 'distributee' argument. HPF 2 spec makes no sense where it says these should come from the distributee's ultimate align target. */ rank = (distributee != NULL && F90_TAG_G(distributee) == __DESC) ? F90_RANK_G(distributee) : 0; if (ISPRESENT(low_shadow)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(dd, distributee, i); vector[i] = DIST_DPTR_NO_G(dd); } I8(store_vector)(low_shadow, low_shadow_s, vector, rank); } if (ISPRESENT(high_shadow)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(dd, distributee, i); vector[i] = DIST_DPTR_PO_G(dd); } I8(store_vector)(high_shadow, high_shadow_s, vector, rank); } } /* 32 bit CLEN version */ void ENTFTN(GLOBAL_DISTRIBUTION, global_distribution)( void *array_b, DCHAR(axis_type), void *axis_info, void *proc_rank, void *proc_shape, void *plb, void *pub, void *pstride, void *low_shadow, void *high_shadow, F90_Desc *array_s, F90_Desc *axis_type_s, F90_Desc *axis_info_s, F90_Desc *proc_rank_s, F90_Desc *proc_shape_s, F90_Desc *plb_s, F90_Desc *pub_s, F90_Desc *pstride_s, F90_Desc *low_shadow_s, F90_Desc *high_shadow_s DCLEN(axis_type)) { ENTFTN(GLOBAL_DISTRIBUTIONA, global_distributiona)(array_b, CADR(axis_type), axis_info, proc_rank, proc_shape, plb, pub, pstride, low_shadow, high_shadow, array_s, axis_type_s, axis_info_s, proc_rank_s, proc_shape_s, plb_s, pub_s, pstride_s, low_shadow_s, high_shadow_s, (__CLEN_T)CLEN(axis_type)); } void ENTFTN(GLOBAL_TEMPLATEA, global_templatea)( void *array_b, void *template_rank, void *lb, void *ub, DCHAR(axis_type), void *axis_info, void *number_aligned, void *dynamic, F90_Desc *array_s, F90_Desc *template_rank_s, F90_Desc *lb_s, F90_Desc *ub_s, F90_Desc *axis_type_s, F90_Desc *axis_info_s, F90_Desc *number_aligned_s, F90_Desc *dynamic_s DCLEN64(axis_type)) { DECL_HDR_PTRS(u); DECL_HDR_PTRS(alignee); DECL_HDR_PTRS(a); DECL_DIM_PTRS(ud); proc *p; __INT_T i, rank, n_alnd, ux; __INT_T alignee_axis[MAXDIMS], vector[MAXDIMS]; char *src; __CLEN_T len; if (F90_TAG_G(array_s) == __DESC) { alignee = DIST_ACTUAL_ARG_G(array_s); if (alignee == NULL) __fort_abort("GLOBAL_TEMPLATE: array is not associated" " with global actual argument"); u = DIST_ALIGN_TARGET_G(alignee); p = DIST_DIST_TARGET_G(alignee); rank = F90_RANK_G(u); for (i = rank; --i >= 0;) alignee_axis[i] = 0; for (i = F90_RANK_G(alignee); --i >= 0;) { ux = DIST_DIM_TAXIS_G(alignee, i); if (ux > 0) alignee_axis[ux - 1] = i + 1; } } else rank = 0; if (ISPRESENT(template_rank)) { I8(store_int)(template_rank, template_rank_s, rank); } if (ISPRESENT(lb)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(ud, u, i); vector[i] = F90_DPTR_LBOUND_G(ud); } I8(store_vector)(lb, lb_s, vector, rank); } if (ISPRESENT(ub)) { for (i = rank; --i >= 0;) { SET_DIM_PTRS(ud, u, i); vector[i] = DPTR_UBOUND_G(ud); } I8(store_vector)(ub, ub_s, vector, rank); } if (ISPRESENTC(axis_type)) { len = CLEN(axis_type); for (i = rank; --i >= 0;) { if (alignee_axis[i] > 0) src = "NORMAL"; else if (DIST_SINGLE_G(alignee) >> i & 1) src = "SINGLE"; else src = "REPLICATED"; ftnstrcpy(CADR(axis_type) + i * len, len, src); } } if (ISPRESENT(axis_info)) { for (i = rank; --i >= 0;) { if (alignee_axis[i] > 0) vector[i] = alignee_axis[i]; else if (DIST_SINGLE_G(alignee) >> i & 1) vector[i] = DIST_INFO_G(alignee, i); else { SET_DIM_PTRS(ud, u, i); vector[i] = (DIST_DPTR_PAXIS_G(ud) > 0) ? DIST_DPTR_PSHAPE_G(ud) : 1; } } I8(store_vector)(axis_info, axis_info_s, vector, rank); } if (ISPRESENT(number_aligned)) { n_alnd = 0; if (rank > 0) { for (a = u; a != NULL; a = DIST_NEXT_ALIGNEE_G(a)) ++n_alnd; } I8(store_int)(number_aligned, number_aligned_s, n_alnd); } if (ISPRESENT(dynamic)) { I8(store_log)(dynamic, dynamic_s, rank > 0 && F90_FLAGS_G(u) & __DYNAMIC); } } /* 32 bit CLEN version */ void ENTFTN(GLOBAL_TEMPLATE, global_template)( void *array_b, void *template_rank, void *lb, void *ub, DCHAR(axis_type), void *axis_info, void *number_aligned, void *dynamic, F90_Desc *array_s, F90_Desc *template_rank_s, F90_Desc *lb_s, F90_Desc *ub_s, F90_Desc *axis_type_s, F90_Desc *axis_info_s, F90_Desc *number_aligned_s, F90_Desc *dynamic_s DCLEN(axis_type)) { ENTFTN(GLOBAL_TEMPLATEA, global_templatea)(array_b, template_rank, lb, ub, CADR(axis_type), axis_info, number_aligned, dynamic, array_s, template_rank_s, lb_s, ub_s, axis_type_s, axis_info_s, number_aligned_s, dynamic_s, (__CLEN_T)CLEN(axis_type)); } void ENTFTN(GLOBAL_LBOUND, global_lbound)(void *lbound_b, void *array_b, void *dim_b, F90_Desc *lbound_s, F90_Desc *array_s, F90_Desc *dim_s) { DECL_HDR_PTRS(g); __INT_T i, dim, rank, vector[MAXDIMS]; if (F90_TAG_G(array_s) == __DESC) { g = DIST_ACTUAL_ARG_G(array_s); if (g == NULL) __fort_abort("GLOBAL_LBOUND: array is not associated" " with global actual argument"); rank = F90_RANK_G(g); } else rank = 0; if (ISPRESENT(dim_b)) { dim = I8(fetch_int)(dim_b, dim_s); if (dim < 1 || dim > rank) __fort_abort("GLOBAL_LBOUND: invalid dim"); I8(store_int)(lbound_b, lbound_s, F90_DIM_LBOUND_G(g, dim - 1)); } else { for (i = rank; --i >= 0;) vector[i] = F90_DIM_LBOUND_G(g, i); I8(store_vector)(lbound_b, lbound_s, vector, rank); } } void ENTFTN(GLOBAL_SHAPE, global_shape)(void *shape_b, void *source_b, F90_Desc *shape_s, F90_Desc *source_s) { DECL_HDR_PTRS(g); DECL_DIM_PTRS(gd); __INT_T i, extent, rank, vector[MAXDIMS]; if (F90_TAG_G(source_s) == __DESC) { g = DIST_ACTUAL_ARG_G(source_s); if (g == NULL) __fort_abort("GLOBAL_SHAPE: source is not associated with" " global actual argument"); rank = F90_RANK_G(g); } else rank = 0; for (i = rank; --i >= 0;) { SET_DIM_PTRS(gd, g, i); extent = F90_DPTR_EXTENT_G(gd); if (extent < 0) extent = 0; vector[i] = extent; } I8(store_vector)(shape_b, shape_s, vector, rank); } void ENTFTN(GLOBAL_SIZE, global_size)(void *size_b, void *array_b, void *dim_b, F90_Desc *size_s, F90_Desc *array_s, F90_Desc *dim_s) { DECL_HDR_PTRS(g); DECL_DIM_PTRS(gd); __INT_T i, dim, rank, size; if (F90_TAG_G(array_s) == __DESC) { g = DIST_ACTUAL_ARG_G(array_s); if (g == NULL) __fort_abort("GLOBAL_SIZE: array is not associated with" " global actual argument"); rank = F90_RANK_G(g); SET_DIM_PTRS(gd, g, 0); } else rank = 0; if (ISPRESENT(dim_b)) { dim = I8(fetch_int)(dim_b, dim_s); if (dim < 1 || dim > rank) __fort_abort("GLOBAL_SIZE: invalid dim"); SET_DIM_PTRS(gd, g, dim - 1); size = F90_DPTR_EXTENT_G(gd); if (size < 0) size = 0; } else if (rank > 0) size = F90_GSIZE_G(g); else size = 1; I8(store_int)(size_b, size_s, size); } void ENTFTN(GLOBAL_UBOUND, global_ubound)(void *ubound_b, void *array_b, void *dim_b, F90_Desc *ubound_s, F90_Desc *array_s, F90_Desc *dim_s) { DECL_HDR_PTRS(g); __INT_T i, dim, rank, vector[MAXDIMS]; if (F90_TAG_G(array_s) == __DESC) { g = DIST_ACTUAL_ARG_G(array_s); if (g == NULL) __fort_abort("GLOBAL_UBOUND: array is not associated with" "global actual argument"); rank = F90_RANK_G(g); } else rank = 0; if (ISPRESENT(dim_b)) { dim = I8(fetch_int)(dim_b, dim_s); if (dim < 1 || dim > rank) __fort_abort("GLOBAL_UBOUND: invalid dim"); I8(store_int)(ubound_b, ubound_s, DIM_UBOUND_G(g, dim - 1)); } else { for (i = rank; --i >= 0;) vector[i] = DIM_UBOUND_G(g, i); I8(store_vector)(ubound_b, ubound_s, vector, rank); } } void ENTFTN(ABSTRACT_TO_PHYSICAL, abstract_to_physical)(void *array_b, void *index_b, void *proc_b, F90_Desc *array_s, F90_Desc *index_s, F90_Desc *proc_s) { DECL_HDR_PTRS(g); proc *p; procdim *pd; __INT_T i, index[MAXDIMS], proc; if (F90_TAG_G(array_s) != __DESC) __fort_abort("ABSTRACT_TO_PHYSICAL: argument must be array"); g = DIST_ACTUAL_ARG_G(array_s); if (g == NULL) __fort_abort("ABSTRACT_TO_PHYSICAL: array is not associated" " with global actual argument"); p = DIST_DIST_TARGET_G(g); I8(fetch_vector)(index_b, index_s, index, p->rank); proc = p->base; for (i = p->rank; --i >= 0;) { pd = &p->dim[i]; if (index[i] < 1 || index[i] > pd->shape) __fort_abort("ABSTRACT_TO_PHYSICAL: invalid processor coordinate"); proc += pd->stride * (index[i] - 1); } I8(store_int)(proc_b, proc_s, proc); } void ENTFTN(PHYSICAL_TO_ABSTRACT, physical_to_abstract)(void *array_b, void *proc_b, void *index_b, F90_Desc *array_s, F90_Desc *proc_s, F90_Desc *index_s) { DECL_HDR_PTRS(g); proc *p; procdim *pd; __INT_T i, index[MAXDIMS], proc; if (F90_TAG_G(array_s) != __DESC) __fort_abort("PHYSICAL_TO_ABSTRACT: argument must be array"); g = DIST_ACTUAL_ARG_G(array_s); if (g == NULL) __fort_abort("PHYSICAL_TO_ABSTRACT: array is not associated" " with global actual argument"); p = DIST_DIST_TARGET_G(g); proc = I8(fetch_int)(proc_b, proc_s); proc -= p->base; if (proc < 0 || proc >= p->size) __fort_abort("PHYSICAL_TO_ABSTRACT: invalid processor number"); for (i = 0; i < p->rank; ++i) { pd = &p->dim[i]; RECIP_DIVMOD(&proc, &index[i], proc, pd->shape); index[i]++; } I8(store_vector)(index_b, index_s, index, p->rank); } /* Translate local indices to global indices */ void ENTFTN(LOCAL_TO_GLOBAL, local_to_global)(void *array_b, void *l_index_b, void *g_index_b, F90_Desc *array_s, F90_Desc *l_index_s, F90_Desc *g_index_s) { DECL_HDR_PTRS(gs); DECL_DIM_PTRS(gsd); DECL_DIM_PTRS(asd); __INT_T i, j, local, gof, procno, *procs; __INT_T index[MAXDIMS], pcoord[MAXDIMS]; __INT_T lb, ub, lof; __INT_T lboffset, adjindex, cyclenum, cyclepos, gstride; if (F90_TAG_G(array_s) != __DESC) __fort_abort("LOCAL_TO_GLOBAL: argument must be array"); gs = DIST_ACTUAL_ARG_G(array_s); if (gs == NULL || F90_TAG_G(gs) != __DESC) __fort_abort("LOCAL_TO_GLOBAL: array is not associated with" " global actual argument"); #if defined(DEBUG) if (F90_RANK_G(gs) != F90_RANK_G(array_s)) __fort_abort("LOCAL_TO_GLOBAL: global vs. local rank mismatch"); #endif /* get the local index vector */ I8(fetch_vector)(l_index_b, l_index_s, index, F90_RANK_G(gs)); /* translate local array indices to global array indices */ for (i = 1; i <= F90_RANK_G(gs); ++i) { /* iterate through dimensions */ SET_DIM_PTRS(asd, array_s, i - 1); SET_DIM_PTRS(gsd, gs, i - 1); /* index must be within local array bounds */ if (index[i - 1] < F90_DPTR_LBOUND_G(asd) || index[i - 1] > DPTR_UBOUND_G(asd)) { __fort_abort("LOCAL_TO_GLOBAL: index outside local array bounds\n"); } switch (DFMT(gs, i)) { case DFMT_CYCLIC: case DFMT_CYCLIC_K: if (DIST_DPTR_TSTRIDE_G(gsd) != 1) { int ii, startblocks, off; int elem_per_cycle, elem, my_cycle_lb, my_cycle_ub, first; int tstride, abs_tstride, gblock, pcoord, lbound; tstride = DIST_DPTR_TSTRIDE_G(gsd); abs_tstride = Abs(tstride); gblock = DIST_DPTR_BLOCK_G(gsd); pcoord = DIST_DPTR_PCOORD_G(gsd); lbound = F90_DPTR_LBOUND_G(gsd); off = DIST_DPTR_TOFFSET_G(gsd); first = (lbound * tstride + off) - 1; elem = first + 1; if (tstride < 0) { int start_cpu, ext, text, partialblocks, tlb, tub, cpus; tlb = DIST_DPTR_TLB_G(gsd); tub = DIST_DPTR_TUB_G(gsd); ext = elem - tlb + 1; text = tub - tlb + 1; cpus = Min(DIST_DPTR_PSHAPE_G(gsd), Ceil(text, gblock)); elem_per_cycle = (gblock * cpus); partialblocks = (ext % elem_per_cycle); if (!partialblocks) { start_cpu = cpus - 1; startblocks = gblock * Abs(pcoord - start_cpu); } else if (partialblocks <= gblock) { start_cpu = 0; startblocks = partialblocks * Abs(pcoord - start_cpu); if (!startblocks) startblocks = partialblocks - gblock; } else { RECIP_DIV(&start_cpu, partialblocks, DIST_DPTR_BLOCK_G(gsd)); if (start_cpu < 0) start_cpu += cpus; else if (start_cpu >= cpus) start_cpu -= cpus; startblocks = Abs(pcoord - start_cpu); startblocks *= (partialblocks - gblock); } elem = tub - elem + 1; first = elem - 1; } else { elem_per_cycle = DIST_DPTR_CYCLE_G(gsd); startblocks = pcoord * gblock; } my_cycle_lb = (lbound + startblocks); if (my_cycle_lb > lbound) { while (elem > my_cycle_lb) my_cycle_lb += elem_per_cycle; } my_cycle_ub = my_cycle_lb + (gblock - 1); elem -= abs_tstride; for (ii = F90_DPTR_LBOUND_G(asd); ii <= index[i - 1];) { if (elem > my_cycle_ub) { my_cycle_lb += elem_per_cycle; my_cycle_ub += elem_per_cycle; } else elem += abs_tstride; if (elem >= my_cycle_lb && elem <= my_cycle_ub) { ++ii; } } index[i - 1] = (elem - first) / abs_tstride + (elem - first) % abs_tstride; break; } if (DIST_DPTR_OLB_G(gsd) == F90_DPTR_LBOUND_G(gsd)) { /* First element */ lboffset = 0; } else { lboffset = 0; } adjindex = index[i - 1] - F90_DPTR_LBOUND_G(asd) + lboffset; RECIP_DIVMOD(&cyclenum, &cyclepos, adjindex, DIST_DPTR_BLOCK_G(gsd)); index[i - 1] = cyclenum * DIST_DPTR_CYCLE_G(gsd) + cyclepos + DIST_DPTR_OLB_G(gsd) - lboffset; break; default: /* block */ index[i - 1] += DIST_DPTR_OLB_G(gsd) - F90_DPTR_LBOUND_G(asd); } } /* return the global index vector */ I8(store_vector)(g_index_b, g_index_s, index, F90_RANK_G(gs)); } /* Translate global indices to local indices */ void ENTFTN(GLOBAL_TO_LOCAL, global_to_local)(void *array_b, void *g_index_b, void *l_index_b, void *local_b, void *ncopies_b, void *procs_b, F90_Desc *array_s, F90_Desc *g_index_s, F90_Desc *l_index_s, F90_Desc *local_s, F90_Desc *ncopies_s, F90_Desc *procs_s) { DECL_DIM_PTRS(asd); /* local array dimensions */ DECL_HDR_PTRS(gs); /* global section */ DECL_DIM_PTRS(gsd); /* global section dimensions */ proc *p; repl_t repl; /* replication descriptor */ __INT_T i, j, local, lof, n, procno; __INT_T *procs; __INT_T gindex[MAXDIMS], lindex[MAXDIMS], pcoord[MAXDIMS]; if (F90_TAG_G(array_s) != __DESC) __fort_abort("GLOBAL_TO_LOCAL: argument must be array"); gs = DIST_ACTUAL_ARG_G(array_s); if (gs == NULL || F90_TAG_G(gs) != __DESC) __fort_abort("GLOBAL_TO_LOCAL: array is not associated with" " global actual argument"); #if defined(DEBUG) if (F90_RANK_G(gs) != F90_RANK_G(array_s)) __fort_abort("GLOBAL_TO_LOCAL: global vs. local rank mismatch"); #endif /* get the global index vector */ I8(fetch_vector)(g_index_b, g_index_s, gindex, F90_RANK_G(gs)); /* check if element is local */ local = I8(__fort_islocal)(gs, gindex); if (local && ISPRESENT(l_index_b)) { for (i = F90_RANK_G(gs); i > 0; --i) { SET_DIM_PTRS(asd, array_s, i - 1); SET_DIM_PTRS(gsd, gs, i - 1); switch (DFMT(gs, i)) { case DFMT_CYCLIC: case DFMT_CYCLIC_K: { __INT_T aolb, aoub; aolb = DIST_DPTR_OLB_G(asd); aoub = DIST_DPTR_OUB_G(asd); /* compute local offset for cyclic distribution */ j = DIST_DPTR_TSTRIDE_G(gsd) * gindex[i - 1] + DIST_DPTR_TOFFSET_G(gsd) - DIST_DPTR_CLB_G(gsd); j = Abs(j); RECIP_DIV(&j, j, DIST_DPTR_CYCLE_G(gsd)); lof = j * DIST_DPTR_COFSTR_G(gsd); lindex[i - 1] = F90_DPTR_SSTRIDE_G(gsd) * gindex[i - 1] + F90_DPTR_SOFFSET_G(gsd) - lof - (DIST_DPTR_OLB_G(gsd) - aolb); while (lindex[i - 1] > aoub) { lindex[i - 1] -= (DIST_DPTR_CYCLE_G(gsd) - DIST_DPTR_BLOCK_G(gsd)); } while (lindex[i - 1] < aolb) { lindex[i - 1] += (aoub - aolb + 1); } break; } default: /* block or unmapped: subtract the difference between global and local owned bounds */ lindex[i - 1] = gindex[i - 1] - (DIST_DPTR_OLB_G(gsd) - DIST_DPTR_OLB_G(asd)); } } I8(store_vector)(l_index_b, l_index_s, lindex, F90_RANK_G(gs)); } if (ISPRESENT(local_b)) I8(store_log)(local_b, local_s, local); /* if needed, get replication info */ if (ISPRESENT(ncopies_b) || ISPRESENT(procs_b)) I8(__fort_describe_replication)(gs, &repl); if (ISPRESENT(ncopies_b)) I8(store_int)(ncopies_b, ncopies_s, repl.ncopies); if (ISPRESENT(procs_b)) { procno = I8(__fort_owner)(gs, gindex); if (repl.ncopies == 1) I8(store_vector)(procs_b, procs_s, &procno, 1); else { procs = (__INT_T *)__fort_malloc(repl.ncopies * sizeof(__INT_T)); for (i = repl.ndim; --i >= 0;) pcoord[i] = 0; i = j = 0; while (j < repl.ndim) { if (pcoord[j] < repl.pcnt[j]) { procs[i++] = procno; procno += repl.pstr[j]; ++pcoord[j]; j = 0; } else { procno -= repl.pcnt[j] * repl.pstr[j]; pcoord[j++] = 0; } } #if defined(DEBUG) if (i != repl.ncopies) __fort_abort("GLOBAL_TO_LOCAL: replication info incorrect"); #endif I8(store_vector)(procs_b, procs_s, procs, repl.ncopies); __fort_free(procs); } } } /* Return the number of non-empty blocks in all or a specified dimension. */ void ENTFTN(LOCAL_BLKCNT, local_blkcnt)(void *blkcnt_b, void *array_b, void *dim_b, void *proc_b, F90_Desc *blkcnt_s, F90_Desc *array_s, F90_Desc *dim_s, F90_Desc *proc_s) { DECL_HDR_PTRS(gs); DECL_DIM_PTRS(gsd); __INT_T dim, proc; __INT_T blkcnt[MAXDIMS]; __INT_T cl, cn, il, iu; if (F90_TAG_G(array_s) != __DESC) __fort_abort("LOCAL_BLKCNT: argument must be array"); if ((gs = DIST_ACTUAL_ARG_G(array_s)) == NULL) __fort_abort("LOCAL_BLKCNT: array is not associated with global" " actual argument"); if (ISPRESENT(dim_b)) { dim = I8(fetch_int)(dim_b, dim_s); if (dim < 1 || dim > F90_RANK_G(gs)) __fort_abort("LOCAL_BLKCNT: invalid dim"); } else dim = 0; if (ISPRESENT(proc_b)) { if ((proc = I8(fetch_int)(proc_b, proc_s)) < 0 || proc >= GET_DIST_TCPUS) __fort_abort("LOCAL_BLKCNT: invalid proc"); if (proc != GET_DIST_LCPU) __fort_abort("LOCAL_BLKCNT: proc .ne. my_processor() unsupported"); } else proc = GET_DIST_LCPU; if (dim != 0) { /* compute blkcnt for specified dimension */ blkcnt[0] = 0; if (~F90_FLAGS_G(gs) & __OFF_TEMPLATE) { I8(__fort_cycle_bounds)(gs); SET_DIM_PTRS(gsd, gs, dim - 1); for (cl = DIST_DPTR_CL_G(gsd), cn = DIST_DPTR_CN_G(gsd); --cn >= 0; cl += DIST_DPTR_CS_G(gsd)) if (I8(__fort_block_bounds)(gs, dim, cl, &il, &iu) > 0) blkcnt[0]++; } I8(store_int)(blkcnt_b, blkcnt_s, blkcnt[0]); } else { /* compute blkcnt for all dimensions */ for (dim = F90_RANK_G(gs); dim > 0; --dim) blkcnt[dim - 1] = 0; if (~F90_FLAGS_G(gs) & __OFF_TEMPLATE) { I8(__fort_cycle_bounds)(gs); for (dim = F90_RANK_G(gs); dim > 0; --dim) { SET_DIM_PTRS(gsd, gs, dim - 1); for (cl = DIST_DPTR_CL_G(gsd), cn = DIST_DPTR_CN_G(gsd); --cn >= 0; cl += DIST_DPTR_CS_G(gsd)) if (I8(__fort_block_bounds)(gs, dim, cl, &il, &iu) > 0) blkcnt[dim - 1]++; } } I8(store_vector)(blkcnt_b, blkcnt_s, blkcnt, F90_RANK_G(gs)); } } /* Return the lower indices of all non-empty blocks. */ void ENTFTN(LOCAL_LINDEX, local_lindex)(void *lindex_b, void *array_b, void *dim_b, void *proc_b, F90_Desc *lindex_s, F90_Desc *array_s, F90_Desc *dim_s, F90_Desc *proc_s) { DECL_HDR_PTRS(gs); DECL_DIM_PTRS(gsd); __INT_T dim; __INT_T proc; __INT_T blkcnt, cl, cn, il, iu; /* check array argument */ if (F90_TAG_G(array_s) != __DESC) __fort_abort("LOCAL_LINDEX: argument must be array"); if ((gs = DIST_ACTUAL_ARG_G(array_s)) == NULL) __fort_abort("LOCAL_LINDEX: array is not associated with global" " actual argument"); /* check dim argument */ dim = I8(fetch_int)(dim_b, dim_s); if (dim < 1 || dim > F90_RANK_G(gs)) __fort_abort("LOCAL_LINDEX: invalid dim argument"); /* check proc argument */ if (ISPRESENT(proc_b)) { proc = I8(fetch_int)(proc_b, proc_s); if (proc < 0 || proc >= GET_DIST_TCPUS) __fort_abort("LOCAL_LINDEX: invalid proc argument"); if (proc != GET_DIST_LCPU) __fort_abort("LOCAL_LINDEX: proc .ne. my_processor() unsupported"); } else proc = GET_DIST_LCPU; /* compute lower indices of all non-empty blocks */ if (~F90_FLAGS_G(gs) & __OFF_TEMPLATE) { I8(__fort_cycle_bounds)(gs); SET_DIM_PTRS(gsd, gs, dim - 1); blkcnt = 0; for (cl = DIST_DPTR_CL_G(gsd), cn = DIST_DPTR_CN_G(gsd); --cn >= 0; cl += DIST_DPTR_CS_G(gsd)) { if (I8(__fort_block_bounds)(gs, dim, cl, &il, &iu) > 0) { DECL_DIM_PTRS(asd); SET_DIM_PTRS(asd, array_s, dim - 1); switch (DFMT(gs, dim)) { case DFMT_CYCLIC: case DFMT_CYCLIC_K: { __INT_T aolb, aoub, j, lof; aolb = DIST_DPTR_OLB_G(asd); aoub = DIST_DPTR_OUB_G(asd); /* compute local offset for cyclic distribution */ j = DIST_DPTR_TSTRIDE_G(gsd) * il + DIST_DPTR_TOFFSET_G(gsd) - DIST_DPTR_CLB_G(gsd); j = Abs(j); RECIP_DIV(&j, j, DIST_DPTR_CYCLE_G(gsd)); lof = j * DIST_DPTR_COFSTR_G(gsd); il = F90_DPTR_SSTRIDE_G(gsd) * il + F90_DPTR_SOFFSET_G(gsd) - lof - (DIST_DPTR_OLB_G(gsd) - aolb); while (il > aoub) { il -= (DIST_DPTR_CYCLE_G(gsd) - DIST_DPTR_BLOCK_G(gsd)); } while (il < aolb) { il += (aoub - aolb + 1); } break; } default: /* block or unmapped: subtract the difference between global and local owned bounds */ il -= (DIST_DPTR_OLB_G(gsd) - DIST_DPTR_OLB_G(asd)); } I8(store_element)(lindex_b, lindex_s, ++blkcnt, il); } } } } /* Return the upper indices of all non-empty blocks */ void ENTFTN(LOCAL_UINDEX, local_uindex)(void *uindex_b, void *array_b, void *dim_b, void *proc_b, F90_Desc *uindex_s, F90_Desc *array_s, F90_Desc *dim_s, F90_Desc *proc_s) { DECL_HDR_PTRS(gs); DECL_DIM_PTRS(gsd); __INT_T dim; __INT_T proc; __INT_T blkcnt, cl, cn, il, iu; /* check array argument */ if (F90_TAG_G(array_s) != __DESC) __fort_abort("LOCAL_UINDEX: argument must be array"); if ((gs = DIST_ACTUAL_ARG_G(array_s)) == NULL) __fort_abort("LOCAL_UINDEX: array is not associated with global" " actual argument"); /* check dim argument */ dim = I8(fetch_int)(dim_b, dim_s); if (dim < 1 || dim > F90_RANK_G(gs)) __fort_abort("LOCAL_UINDEX: invalid dim argument"); /* check proc argument */ if (ISPRESENT(proc_b)) { proc = I8(fetch_int)(proc_b, proc_s); if (proc < 0 || proc >= GET_DIST_TCPUS) __fort_abort("LOCAL_UINDEX: invalid proc argument"); if (proc != GET_DIST_LCPU) __fort_abort("LOCAL_UINDEX: proc .ne. my_processor() unsupported"); } else proc = GET_DIST_LCPU; /* compute upper indices of all non-empty blocks */ if (~F90_FLAGS_G(gs) & __OFF_TEMPLATE) { I8(__fort_cycle_bounds)(gs); SET_DIM_PTRS(gsd, gs, dim - 1); blkcnt = 0; for (cl = DIST_DPTR_CL_G(gsd), cn = DIST_DPTR_CN_G(gsd); --cn >= 0; cl += DIST_DPTR_CS_G(gsd)) { if (I8(__fort_block_bounds)(gs, dim, cl, &il, &iu) > 0) { DECL_DIM_PTRS(asd); SET_DIM_PTRS(asd, array_s, dim - 1); switch (DFMT(gs, dim)) { case DFMT_CYCLIC: case DFMT_CYCLIC_K: { __INT_T aolb, aoub, j, lof; aolb = DIST_DPTR_OLB_G(asd); aoub = DIST_DPTR_OUB_G(asd); /* compute local offset for cyclic distribution */ j = DIST_DPTR_TSTRIDE_G(gsd) * iu + DIST_DPTR_TOFFSET_G(gsd) - DIST_DPTR_CLB_G(gsd); j = Abs(j); RECIP_DIV(&j, j, DIST_DPTR_CYCLE_G(gsd)); lof = j * DIST_DPTR_COFSTR_G(gsd); iu = F90_DPTR_SSTRIDE_G(gsd) * iu + F90_DPTR_SOFFSET_G(gsd) - lof - (DIST_DPTR_OLB_G(gsd) - aolb); while (iu > aoub) { iu -= (DIST_DPTR_CYCLE_G(gsd) - DIST_DPTR_BLOCK_G(gsd)); } while (iu < aolb) { iu += (aoub - aolb + 1); } break; } default: /* block or unmapped: subtract the difference between global and local owned bounds */ iu -= (DIST_DPTR_OLB_G(gsd) - DIST_DPTR_OLB_G(asd)); } I8(store_element)(uindex_b, uindex_s, ++blkcnt, iu); } } } } /* system inquiry routines */ void ENTFTN(PROCESSORS_SHAPE, processors_shape)(__INT_T *shape, F90_Desc *shape_s) { I8(store_vector_int)(shape, shape_s, GET_DIST_TCPUS_ADDR, 1); } #ifndef DESC_I8 __INT_T ENTFTN(MY_PROCESSOR, my_processor)() { return GET_DIST_LCPU; } __INT_T ENTFTN(MYPROCNUM, myprocnum)() { return GET_DIST_LCPU; } int __fort_nprocs() { return GET_DIST_TCPUS; } __INT_T ENTFTN(NPROCS, nprocs)() { return GET_DIST_TCPUS; } __INT_T ENTFTN(NUMBER_OF_PROCESSORS, number_of_processors) (__INT_T *dim, __INT_T *szdim) { int d, np; np = GET_DIST_TCPUS; if (ISPRESENT(dim)) { d = __fort_varying_int(dim, szdim); if (d != 1) np = 1; } return np; } __INT8_T ENTFTN(KNUMBER_OF_PROCESSORS, knumber_of_processors) (__INT_T *dim, __INT_T *szdim) { /* * -i8 variant of NUMBER_OF_PROCESSORS */ int d, np; np = GET_DIST_TCPUS; if (ISPRESENT(dim)) { d = __fort_varying_int(dim, szdim); if (d != 1) np = 1; } return np; } __INT_T ENTFTN(PROCESSORS_RANK, processors_rank)() { return 1; } __INT8_T ENTFTN(KPROCESSORS_RANK, kprocessors_rank)() { /* * -i8 variant of PROCESSORS_RANK */ return 1; } /* Translate processor number to processor grid coordinates. rank and shape describe the processor grid. The processor number given by procnum is translated to grid coordinates returned in coord. Grid coordinates are integers between 1 and the size of the corresponding grid dimension. If the processor number is outside the bounds of the processor grid, zeroes are returned in coord. */ void __fort_procnum_to_coord(int procnum, int rank, __INT_T *shape, __INT_T *coord) { int i, m; if (procnum >= 0) { for (i = 0; i < rank; ++i) { if (shape[i] <= 0) __fort_abort("PROCNUM_TO_COORD: invalid processor shape"); m = procnum / shape[i]; coord[i] = procnum - m * shape[i] + 1; procnum = m; } } if (procnum != 0) { for (i = rank; --i >= 0;) coord[i] = 0; } } void ENTFTN(PROCNUM_TO_COORD, procnum_to_coord)(__INT_T *procnum, __INT_T *rank, __INT_T *shape, __INT_T *coord) { __fort_procnum_to_coord(*procnum, *rank, shape, coord); } /* Translate processor grid coordinates to processor number. rank and shape describe the processor grid. The processor grid coordinates in coord are translated to a processor number. Grid coordinates are integers between 1 and the size of the corresponding grid dimension. If the coordinates are outside the bounds of the processor grid, -1 is returned. */ int __fort_coord_to_procnum(__INT_T rank, __INT_T *shape, __INT_T *coord) { int i, m, p; m = 1; p = 0; for (i = 0; i < rank; ++i) { if (shape[i] <= 0) __fort_abort("COORD_TO_PROCNUM: invalid processor shape"); if (coord[i]<1 | coord[i]> shape[i]) return -1; p += (coord[i] - 1) * m; m *= shape[i]; } return p; } __INT_T ENTFTN(COORD_TO_PROCNUM, coord_to_procnum) (__INT_T *rank, __INT_T *shape, __INT_T *coord) { return __fort_coord_to_procnum(*rank, shape, coord); } #endif