1 /*
2  * Copyright (c) 2012-2019, NVIDIA CORPORATION.  All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17 
18 /** \file
19  * \brief Fix up and optimize general IL_SMOVEJ operations.
20  *
21  * Smove may be added by the expander, or by other transformations, such as the
22  * accelerator compiler or IPA, when adding struct assignments.
23  */
24 
25 #include "rmsmove.h"
26 #include "gbldefs.h"
27 #include "error.h"
28 #include "global.h"
29 #include "symtab.h"
30 #include "ili.h"
31 
32 static struct {
33   int msz, msize;
34   ILI_OP ld, st;
35 } info[4] = {
36     {MSZ_I8, 8, IL_LDKR, IL_STKR},
37     {MSZ_WORD, 4, IL_LD, IL_ST},
38     {MSZ_UHWORD, 2, IL_LD, IL_ST},
39     {MSZ_UBYTE, 1, IL_LD, IL_ST},
40 };
41 #define SMOVE_CHUNK 8 /* using movsq */
42 #define SMOVE_MIN 64
43 #define INFO1 0
44 
45 static int
fixup_nme(int nmex,int msize,int offset,int iter)46 fixup_nme(int nmex, int msize, int offset, int iter)
47 {
48   SPTR new_sym;
49   int new_nme;
50   DTYPE new_dtype;
51   static char buf[100];
52   int buf_len = 100;
53   int sym;
54   char *name = NULL;
55   int name_len;
56   bool is_malloced = false;
57 
58   if (nmex <= 0 || NME_TYPE(nmex) != NT_VAR || NME_SYM(nmex) <= 0)
59     return nmex;
60 
61   switch (msize) {
62   case 8:
63     new_dtype = DT_UINT8;
64     break;
65   case 4:
66     new_dtype = DT_INT;
67     break;
68   case 2:
69     new_dtype = DT_USINT;
70     break;
71   case 1:
72     new_dtype = DT_BINT;
73     break;
74   }
75 
76   sym = NME_SYM(nmex);
77   name_len = strlen(SYMNAME(sym)) + 15;
78   if (name_len <= buf_len) {
79     name = &buf[0];
80   } else {
81     name = (char *)malloc(name_len);
82     assert(name != NULL, "Fail to malloc a buffer", nmex, ERR_Fatal);
83     is_malloced = true;
84   }
85 
86   sprintf(name, "..__smove__%s__%d", SYMNAME(sym), iter);
87   new_sym = getsymbol(name);
88   DTYPEP(new_sym, new_dtype);
89   STYPEP(new_sym, ST_MEMBER);
90   CCSYMP(new_sym, 1);
91   ADDRESSP(new_sym, offset);
92   new_nme = addnme(NT_MEM, SPTR_NULL, nmex, 0);
93   NME_SYM(new_nme) = new_sym;
94 
95   if (is_malloced)
96     free(name);
97   return new_nme;
98 } /* fixup_nme */
99 
100 void exp_remove_gsmove(void);
101 void
rm_smove(void)102 rm_smove(void)
103 {
104   int bihx, iltx, ilix, new_acon;
105   /*
106    * First implementation of GSMOVE will be under XBIT(2,0x800000). When this
107    * is the only method, presumably, the code in exp_remove_gsmove() will be
108    * moved to the ensuing loop.
109    */
110   if (USE_GSMOVE)
111     exp_remove_gsmove();
112   for (bihx = gbl.entbih; bihx; bihx = BIH_NEXT(bihx)) {
113     bool have_smove = false;
114     rdilts(bihx);
115     for (iltx = BIH_ILTFIRST(bihx); iltx; iltx = ILT_NEXT(iltx)) {
116       ilix = ILT_ILIP(iltx);
117       if (ILI_OPC(ilix) == IL_SMOVEJ) {
118         int srcx, src_nme, destx, dest_nme, len;
119         int i, n, offset = 0, any = 0;
120         /* target-dependent optimizations */
121         srcx = ILI_OPND(ilix, 1);
122         src_nme = ILI_OPND(ilix, 3);
123         destx = ILI_OPND(ilix, 2);
124         dest_nme = ILI_OPND(ilix, 4);
125         len = ILI_OPND(ilix, 5);
126         offset = 0;
127         if (len > SMOVE_MIN) {
128           /* turn the SMOVEI into SMOVE, change the len to IL_ACON */
129           ILI_OPCP(ilix, IL_SMOVE);
130           ILI_OPND(ilix, 1) = srcx;
131 
132 /* For LLVM we use memcpy and do not need to chunk up the
133  * copies.
134  * XXX: Fortran has a special case in make_stmt (cgmain.c)
135  * for the 'case STMT_SMOVE'.  Fortran will multiply the
136  * length by 8 or 4 depending on the architecture.
137  * C just takes the length as given, and we do not want to chunk the
138  * length since we will be using memcpy and let llvm's memcpy handle
139  * how it performs the copy.
140  */
141           n = len / SMOVE_CHUNK;
142           offset = n * SMOVE_CHUNK;
143           new_acon = ad_aconi(n);
144           ILI_OPND(ilix, 3) = new_acon;
145           len -= offset;
146           ++any;
147           have_smove = true;
148         }
149         if (XBIT(2, 0x4000)) {
150           src_nme = NME_UNK;
151           dest_nme = NME_UNK;
152         }
153         for (i = INFO1; i < 4; ++i) {
154           int msz = info[i].msz;
155           int msize = info[i].msize;
156           while (len >= msize) {
157             int ilioffset, ilix2;
158             int ndest_nme = dest_nme, nsrc_nme = src_nme;
159             /* add the load, store */
160             if (any == 1) {
161               srcx = ad1ili(IL_CSEAR, srcx);
162               destx = ad1ili(IL_CSEAR, destx);
163             }
164             if (!XBIT(2, 0x4000)) {
165               nsrc_nme = fixup_nme(src_nme, msize, offset, i);
166               ndest_nme = fixup_nme(dest_nme, msize, offset, i);
167             }
168             ilioffset = ad_aconi(offset);
169             ilix = ad3ili(IL_AADD, srcx, ilioffset, 0);
170             ilix = ad3ili(info[i].ld, ilix, nsrc_nme, msz);
171             ilix2 = ad3ili(IL_AADD, destx, ilioffset, 0);
172             ilix = ad4ili(info[i].st, ilix, ilix2, ndest_nme, msz);
173             if (!any) {
174               /* reuse this ILT */
175               ILT_ILIP(iltx) = ilix;
176               /* flag this as a store operation */
177               ILT_ST(iltx) = 1;
178             } else {
179               iltx = addilt(iltx, ilix);
180               /* ILT_ST gets set by addilt here */
181             }
182             ++any;
183             offset += msize;
184             len -= msize;
185           }
186         }
187       }
188     }
189     wrilts(bihx);
190     if (have_smove)
191       BIH_SMOVE(bihx) = 1;
192   }
193 } /* rm_smove */
194