/* * Copyright (c) 2012-2019, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ /** \file * \brief Fix up and optimize general IL_SMOVEJ operations. * * Smove may be added by the expander, or by other transformations, such as the * accelerator compiler or IPA, when adding struct assignments. */ #include "rmsmove.h" #include "gbldefs.h" #include "error.h" #include "global.h" #include "symtab.h" #include "ili.h" static struct { int msz, msize; ILI_OP ld, st; } info[4] = { {MSZ_I8, 8, IL_LDKR, IL_STKR}, {MSZ_WORD, 4, IL_LD, IL_ST}, {MSZ_UHWORD, 2, IL_LD, IL_ST}, {MSZ_UBYTE, 1, IL_LD, IL_ST}, }; #define SMOVE_CHUNK 8 /* using movsq */ #define SMOVE_MIN 64 #define INFO1 0 static int fixup_nme(int nmex, int msize, int offset, int iter) { SPTR new_sym; int new_nme; DTYPE new_dtype; static char buf[100]; int buf_len = 100; int sym; char *name = NULL; int name_len; bool is_malloced = false; if (nmex <= 0 || NME_TYPE(nmex) != NT_VAR || NME_SYM(nmex) <= 0) return nmex; switch (msize) { case 8: new_dtype = DT_UINT8; break; case 4: new_dtype = DT_INT; break; case 2: new_dtype = DT_USINT; break; case 1: new_dtype = DT_BINT; break; } sym = NME_SYM(nmex); name_len = strlen(SYMNAME(sym)) + 15; if (name_len <= buf_len) { name = &buf[0]; } else { name = (char *)malloc(name_len); assert(name != NULL, "Fail to malloc a buffer", nmex, ERR_Fatal); is_malloced = true; } sprintf(name, "..__smove__%s__%d", SYMNAME(sym), iter); new_sym = getsymbol(name); DTYPEP(new_sym, new_dtype); STYPEP(new_sym, ST_MEMBER); CCSYMP(new_sym, 1); ADDRESSP(new_sym, offset); new_nme = addnme(NT_MEM, SPTR_NULL, nmex, 0); NME_SYM(new_nme) = new_sym; if (is_malloced) free(name); return new_nme; } /* fixup_nme */ void exp_remove_gsmove(void); void rm_smove(void) { int bihx, iltx, ilix, new_acon; /* * First implementation of GSMOVE will be under XBIT(2,0x800000). When this * is the only method, presumably, the code in exp_remove_gsmove() will be * moved to the ensuing loop. */ if (USE_GSMOVE) exp_remove_gsmove(); for (bihx = gbl.entbih; bihx; bihx = BIH_NEXT(bihx)) { bool have_smove = false; rdilts(bihx); for (iltx = BIH_ILTFIRST(bihx); iltx; iltx = ILT_NEXT(iltx)) { ilix = ILT_ILIP(iltx); if (ILI_OPC(ilix) == IL_SMOVEJ) { int srcx, src_nme, destx, dest_nme, len; int i, n, offset = 0, any = 0; /* target-dependent optimizations */ srcx = ILI_OPND(ilix, 1); src_nme = ILI_OPND(ilix, 3); destx = ILI_OPND(ilix, 2); dest_nme = ILI_OPND(ilix, 4); len = ILI_OPND(ilix, 5); offset = 0; if (len > SMOVE_MIN) { /* turn the SMOVEI into SMOVE, change the len to IL_ACON */ ILI_OPCP(ilix, IL_SMOVE); ILI_OPND(ilix, 1) = srcx; /* For LLVM we use memcpy and do not need to chunk up the * copies. * XXX: Fortran has a special case in make_stmt (cgmain.c) * for the 'case STMT_SMOVE'. Fortran will multiply the * length by 8 or 4 depending on the architecture. * C just takes the length as given, and we do not want to chunk the * length since we will be using memcpy and let llvm's memcpy handle * how it performs the copy. */ n = len / SMOVE_CHUNK; offset = n * SMOVE_CHUNK; new_acon = ad_aconi(n); ILI_OPND(ilix, 3) = new_acon; len -= offset; ++any; have_smove = true; } if (XBIT(2, 0x4000)) { src_nme = NME_UNK; dest_nme = NME_UNK; } for (i = INFO1; i < 4; ++i) { int msz = info[i].msz; int msize = info[i].msize; while (len >= msize) { int ilioffset, ilix2; int ndest_nme = dest_nme, nsrc_nme = src_nme; /* add the load, store */ if (any == 1) { srcx = ad1ili(IL_CSEAR, srcx); destx = ad1ili(IL_CSEAR, destx); } if (!XBIT(2, 0x4000)) { nsrc_nme = fixup_nme(src_nme, msize, offset, i); ndest_nme = fixup_nme(dest_nme, msize, offset, i); } ilioffset = ad_aconi(offset); ilix = ad3ili(IL_AADD, srcx, ilioffset, 0); ilix = ad3ili(info[i].ld, ilix, nsrc_nme, msz); ilix2 = ad3ili(IL_AADD, destx, ilioffset, 0); ilix = ad4ili(info[i].st, ilix, ilix2, ndest_nme, msz); if (!any) { /* reuse this ILT */ ILT_ILIP(iltx) = ilix; /* flag this as a store operation */ ILT_ST(iltx) = 1; } else { iltx = addilt(iltx, ilix); /* ILT_ST gets set by addilt here */ } ++any; offset += msize; len -= msize; } } } } wrilts(bihx); if (have_smove) BIH_SMOVE(bihx) = 1; } } /* rm_smove */