1 /*
2 * Copyright (c) 2012-2019, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17
18 /** \file
19 * \brief Fix up and optimize general IL_SMOVEJ operations.
20 *
21 * Smove may be added by the expander, or by other transformations, such as the
22 * accelerator compiler or IPA, when adding struct assignments.
23 */
24
25 #include "rmsmove.h"
26 #include "gbldefs.h"
27 #include "error.h"
28 #include "global.h"
29 #include "symtab.h"
30 #include "ili.h"
31
32 static struct {
33 int msz, msize;
34 ILI_OP ld, st;
35 } info[4] = {
36 {MSZ_I8, 8, IL_LDKR, IL_STKR},
37 {MSZ_WORD, 4, IL_LD, IL_ST},
38 {MSZ_UHWORD, 2, IL_LD, IL_ST},
39 {MSZ_UBYTE, 1, IL_LD, IL_ST},
40 };
41 #define SMOVE_CHUNK 8 /* using movsq */
42 #define SMOVE_MIN 64
43 #define INFO1 0
44
45 static int
fixup_nme(int nmex,int msize,int offset,int iter)46 fixup_nme(int nmex, int msize, int offset, int iter)
47 {
48 SPTR new_sym;
49 int new_nme;
50 DTYPE new_dtype;
51 static char buf[100];
52 int buf_len = 100;
53 int sym;
54 char *name = NULL;
55 int name_len;
56 bool is_malloced = false;
57
58 if (nmex <= 0 || NME_TYPE(nmex) != NT_VAR || NME_SYM(nmex) <= 0)
59 return nmex;
60
61 switch (msize) {
62 case 8:
63 new_dtype = DT_UINT8;
64 break;
65 case 4:
66 new_dtype = DT_INT;
67 break;
68 case 2:
69 new_dtype = DT_USINT;
70 break;
71 case 1:
72 new_dtype = DT_BINT;
73 break;
74 }
75
76 sym = NME_SYM(nmex);
77 name_len = strlen(SYMNAME(sym)) + 15;
78 if (name_len <= buf_len) {
79 name = &buf[0];
80 } else {
81 name = (char *)malloc(name_len);
82 assert(name != NULL, "Fail to malloc a buffer", nmex, ERR_Fatal);
83 is_malloced = true;
84 }
85
86 sprintf(name, "..__smove__%s__%d", SYMNAME(sym), iter);
87 new_sym = getsymbol(name);
88 DTYPEP(new_sym, new_dtype);
89 STYPEP(new_sym, ST_MEMBER);
90 CCSYMP(new_sym, 1);
91 ADDRESSP(new_sym, offset);
92 new_nme = addnme(NT_MEM, SPTR_NULL, nmex, 0);
93 NME_SYM(new_nme) = new_sym;
94
95 if (is_malloced)
96 free(name);
97 return new_nme;
98 } /* fixup_nme */
99
100 void exp_remove_gsmove(void);
101 void
rm_smove(void)102 rm_smove(void)
103 {
104 int bihx, iltx, ilix, new_acon;
105 /*
106 * First implementation of GSMOVE will be under XBIT(2,0x800000). When this
107 * is the only method, presumably, the code in exp_remove_gsmove() will be
108 * moved to the ensuing loop.
109 */
110 if (USE_GSMOVE)
111 exp_remove_gsmove();
112 for (bihx = gbl.entbih; bihx; bihx = BIH_NEXT(bihx)) {
113 bool have_smove = false;
114 rdilts(bihx);
115 for (iltx = BIH_ILTFIRST(bihx); iltx; iltx = ILT_NEXT(iltx)) {
116 ilix = ILT_ILIP(iltx);
117 if (ILI_OPC(ilix) == IL_SMOVEJ) {
118 int srcx, src_nme, destx, dest_nme, len;
119 int i, n, offset = 0, any = 0;
120 /* target-dependent optimizations */
121 srcx = ILI_OPND(ilix, 1);
122 src_nme = ILI_OPND(ilix, 3);
123 destx = ILI_OPND(ilix, 2);
124 dest_nme = ILI_OPND(ilix, 4);
125 len = ILI_OPND(ilix, 5);
126 offset = 0;
127 if (len > SMOVE_MIN) {
128 /* turn the SMOVEI into SMOVE, change the len to IL_ACON */
129 ILI_OPCP(ilix, IL_SMOVE);
130 ILI_OPND(ilix, 1) = srcx;
131
132 /* For LLVM we use memcpy and do not need to chunk up the
133 * copies.
134 * XXX: Fortran has a special case in make_stmt (cgmain.c)
135 * for the 'case STMT_SMOVE'. Fortran will multiply the
136 * length by 8 or 4 depending on the architecture.
137 * C just takes the length as given, and we do not want to chunk the
138 * length since we will be using memcpy and let llvm's memcpy handle
139 * how it performs the copy.
140 */
141 n = len / SMOVE_CHUNK;
142 offset = n * SMOVE_CHUNK;
143 new_acon = ad_aconi(n);
144 ILI_OPND(ilix, 3) = new_acon;
145 len -= offset;
146 ++any;
147 have_smove = true;
148 }
149 if (XBIT(2, 0x4000)) {
150 src_nme = NME_UNK;
151 dest_nme = NME_UNK;
152 }
153 for (i = INFO1; i < 4; ++i) {
154 int msz = info[i].msz;
155 int msize = info[i].msize;
156 while (len >= msize) {
157 int ilioffset, ilix2;
158 int ndest_nme = dest_nme, nsrc_nme = src_nme;
159 /* add the load, store */
160 if (any == 1) {
161 srcx = ad1ili(IL_CSEAR, srcx);
162 destx = ad1ili(IL_CSEAR, destx);
163 }
164 if (!XBIT(2, 0x4000)) {
165 nsrc_nme = fixup_nme(src_nme, msize, offset, i);
166 ndest_nme = fixup_nme(dest_nme, msize, offset, i);
167 }
168 ilioffset = ad_aconi(offset);
169 ilix = ad3ili(IL_AADD, srcx, ilioffset, 0);
170 ilix = ad3ili(info[i].ld, ilix, nsrc_nme, msz);
171 ilix2 = ad3ili(IL_AADD, destx, ilioffset, 0);
172 ilix = ad4ili(info[i].st, ilix, ilix2, ndest_nme, msz);
173 if (!any) {
174 /* reuse this ILT */
175 ILT_ILIP(iltx) = ilix;
176 /* flag this as a store operation */
177 ILT_ST(iltx) = 1;
178 } else {
179 iltx = addilt(iltx, ilix);
180 /* ILT_ST gets set by addilt here */
181 }
182 ++any;
183 offset += msize;
184 len -= msize;
185 }
186 }
187 }
188 }
189 wrilts(bihx);
190 if (have_smove)
191 BIH_SMOVE(bihx) = 1;
192 }
193 } /* rm_smove */
194