1 /*
2 * Copyright (c) 1994-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 */
17
18 /** \file
19 \brief Data dependence framework.
20 */
21
22 #include "gbldefs.h"
23 #include "error.h"
24 #ifndef NOVECTORIZE
25 #include "global.h"
26 #include "symtab.h"
27 #include "ast.h"
28 #include "nme.h"
29 #include "optimize.h"
30 #include "hlvect.h"
31
32 #include "induc.h"
33
34 #include "soc.h"
35 #include "direct.h"
36 #include "extern.h"
37
38 #if DEBUG
39 #define TRACE0(s) \
40 if (DBGBIT(36, 2)) \
41 fprintf(gbl.dbgfil, s)
42 #define TRACE1(s, a1) \
43 if (DBGBIT(36, 2)) \
44 fprintf(gbl.dbgfil, s, a1)
45 #define TRACE2(s, a1, a2) \
46 if (DBGBIT(36, 2)) \
47 fprintf(gbl.dbgfil, s, a1, a2)
48 #define TRACE3(s, a1, a2, a3) \
49 if (DBGBIT(36, 2)) \
50 fprintf(gbl.dbgfil, s, a1, a2, a3)
51 #define TRACE4(s, a1, a2, a3, a4) \
52 if (DBGBIT(36, 2)) \
53 fprintf(gbl.dbgfil, s, a1, a2, a3, a4)
54 #define TRACE5(s, a1, a2, a3, a4, a5) \
55 if (DBGBIT(36, 2)) \
56 fprintf(gbl.dbgfil, s, a1, a2, a3, a4, a5)
57
58 #define STRACE0(s) \
59 if (DBGBIT(36, 4)) \
60 fprintf(gbl.dbgfil, s)
61 #define STRACE1(s, a1) \
62 if (DBGBIT(36, 4)) \
63 fprintf(gbl.dbgfil, s, a1)
64 #define STRACE2(s, a1, a2) \
65 if (DBGBIT(36, 4)) \
66 fprintf(gbl.dbgfil, s, a1, a2)
67 #define STRACE3(s, a1, a2, a3) \
68 if (DBGBIT(36, 4)) \
69 fprintf(gbl.dbgfil, s, a1, a2, a3)
70 #define STRACE4(s, a1, a2, a3, a4) \
71 if (DBGBIT(36, 4)) \
72 fprintf(gbl.dbgfil, s, a1, a2, a3, a4)
73 #define STRACE5(s, a1, a2, a3, a4, a5) \
74 if (DBGBIT(36, 4)) \
75 fprintf(gbl.dbgfil, s, a1, a2, a3, a4, a5)
76
77 #define DTRACE0(s) \
78 if (DBGBIT(36, 16)) \
79 fprintf(gbl.dbgfil, s)
80 #define DTRACE1(s, a1) \
81 if (DBGBIT(36, 16)) \
82 fprintf(gbl.dbgfil, s, a1)
83 #define DTRACE2(s, a1, a2) \
84 if (DBGBIT(36, 16)) \
85 fprintf(gbl.dbgfil, s, a1, a2)
86 #define DTRACE3(s, a1, a2, a3) \
87 if (DBGBIT(36, 16)) \
88 fprintf(gbl.dbgfil, s, a1, a2, a3)
89 #define DTRACE4(s, a1, a2, a3, a4) \
90 if (DBGBIT(36, 16)) \
91 fprintf(gbl.dbgfil, s, a1, a2, a3, a4)
92 #define DTRACE5(s, a1, a2, a3, a4, a5) \
93 if (DBGBIT(36, 16)) \
94 fprintf(gbl.dbgfil, s, a1, a2, a3, a4, a5)
95
96 #define BTRACE0(s) \
97 if (DBGBIT(36, 64)) \
98 fprintf(gbl.dbgfil, s)
99 #define BTRACE1(s, a1) \
100 if (DBGBIT(36, 64)) \
101 fprintf(gbl.dbgfil, s, a1)
102 #define BTRACE2(s, a1, a2) \
103 if (DBGBIT(36, 64)) \
104 fprintf(gbl.dbgfil, s, a1, a2)
105 #define BTRACE3(s, a1, a2, a3) \
106 if (DBGBIT(36, 64)) \
107 fprintf(gbl.dbgfil, s, a1, a2, a3)
108 #define BTRACE4(s, a1, a2, a3, a4) \
109 if (DBGBIT(36, 64)) \
110 fprintf(gbl.dbgfil, s, a1, a2, a3, a4)
111 #define BTRACE5(s, a1, a2, a3, a4, a5) \
112 if (DBGBIT(36, 64)) \
113 fprintf(gbl.dbgfil, s, a1, a2, a3, a4, a5)
114
115 #else
116
117 #define TRACE0(s)
118 #define TRACE1(s, a1)
119 #define TRACE2(s, a1, a2)
120 #define TRACE3(s, a1, a2, a3)
121 #define TRACE4(s, a1, a2, a3, a4)
122 #define TRACE5(s, a1, a2, a3, a4, a5)
123 #define STRACE0(s)
124 #define STRACE1(s, a1)
125 #define STRACE2(s, a1, a2)
126 #define STRACE3(s, a1, a2, a3)
127 #define STRACE4(s, a1, a2, a3, a4)
128 #define STRACE5(s, a1, a2, a3, a4, a5)
129 #define DTRACE0(s)
130 #define DTRACE1(s, a1)
131 #define DTRACE2(s, a1, a2)
132 #define DTRACE3(s, a1, a2, a3)
133 #define DTRACE4(s, a1, a2, a3, a4)
134 #define DTRACE5(s, a1, a2, a3, a4, a5)
135 #define BTRACE0(s)
136 #define BTRACE1(s, a1)
137 #define BTRACE2(s, a1, a2)
138 #define BTRACE3(s, a1, a2, a3)
139 #define BTRACE4(s, a1, a2, a3, a4)
140 #define BTRACE5(s, a1, a2, a3, a4, a5)
141 #endif
142
143 #if DEBUG
144 static ISZ_T
DBGcnst(int s)145 DBGcnst(int s)
146 {
147 ISZ_T yy;
148 yy = get_isz_cval(s);
149 printf("CNSTG: %ld\n", yy);
150 return yy;
151 }
152 #endif
153 #define ad_icon(i) (mk_isz_cval(i, astb.bnd.dtype))
154 #define prilitree(a) (printast(a))
155 #define ILT_NEXT(s) (STD_NEXT(s))
156 #define IS_CNST(a) (A_TYPEG(a) == A_CNST)
157 #define CNSTG(a) (get_isz_cval(A_SPTRG(a)))
158 #define ZZZCNSTG(a) (DBGcnst(A_SPTRG(a)))
159 #define RESG(a) (A_DTYPEG(a))
160 #define IS_IRES(a) (DT_ISINT(RESG(a)))
161 #define IS_ARES(a) (RESG(a) == DT_ADDR)
162 #define ILI_VISIT(a) (A_VISITG(a))
163 #define ILI_REPL(a) (A_REPLG(a))
164 /* arbitrary opcode numbers */
165 #define IL_INEG 1
166 #define IL_IADD 2
167 #define IL_ISUB 3
168 #define IL_IMUL 4
169 #define IL_IDIV 5
170 #define IL_MOD 6
171 #define IL_AADD 7
172 #define IL_ASUB 8
173
174 typedef struct DV {
175 DIRVEC vec;
176 struct DV *next;
177 } DV;
178
179 #define MAX_DD (2 * MAX_LOOPS)
180 #define MAX_N MAX_DD
181 #define MAXNMESIZ 30
182 #define MAX_S MAXNMESIZ
183
184 typedef struct bound {
185 int bnd[MAX_N + 1];
186 int mplyr; /* multiplier of lhs */
187 int gcd; /* gcd of lhs terms */
188 struct bound *next;
189 } BOUND;
190
191 static void build_loop_dd(int loop);
192 static void dd_compute(void);
193 void dd_edge(int src, int sink, DIRVEC vec);
194 static void dd_succ(int loop);
195 static void dd_exact();
196
197 static void resolve_vv(void);
198 static void resolve_pv(void);
199 static void resolve_pp(void);
200 static void resolve_uv(void);
201 static void do_subscript(int nsubs);
202 static void add_dep(DIRVEC vec);
203 static void hierarchy(DIRVEC dir, int lev, DIRVEC veco);
204 static void unpack_loops(void);
205 static void chkref(int i, int j);
206 LOGICAL dd_array_conflict(int astliTriples, int astArrSrc, int astArrSink,
207 int bSinkAfterSrc);
208 /*static DIRVEC dirv_permute();*/
209 static int symbolic_mul(int a, int b);
210 static LOGICAL symbolic_divide(int num, int den, int *quot);
211 static void cln_visit(void);
212 static int ili_symbolic(int ili);
213 int dd_symbolic(int il);
214 #if DEBUG
215 static void dump_one_bound(BOUND *p, int k, LOGICAL btype);
216 static void dump_two_bound(BOUND *p, BOUND *q, int k, LOGICAL btype);
217 #endif
218 static int ad1ili(int opc, int ast1);
219 static int ad2ili(int opc, int ast1, int ast2);
220 static int ILI_OPC(int astx);
221 static int ILI_OPND(int astx, int opnd);
222
223 static struct {
224 int mr1, mr2; /* current two mem refs */
225 int subs1, subs2; /* current two subscripts */
226 int basenm1, basenm2; /* current two base names */
227 int outer_loop; /* outermost loop */
228 DIRVEC vec; /* temp */
229 DV *dvlist; /* list of direction vectors */
230 int unknown; /* dependence not proven */
231 int n1, n2; /* number of non-common loops */
232 int n; /* number of common loops */
233 int common; /* innermost common loop */
234 short lps[MAX_DD]; /* unpacked loops */
235 VPRAGMAS pragmas;
236 } ddinfo;
237
238 /*
239 * Data dependence framework:
240 * mr1 and mr2 are enclosed in n common loops, with n1 loops enclosing mr1
241 * but not mr2, and n2 loops enclosing mr2 but not mr1. common is the loop
242 * index of the innermost common loop; the nesting depth of common is n;
243 * the nesting depth of mr1 is n1+n; the nesting depth of mr2 is n2+n.
244 *
245 * L[1] DO ddinfo.outer_loop
246 * ... (ddinfo.n loops)
247 * L[n] DO ddinfo.common
248 * L1[1] DO
249 * ... (ddinfo.n1 loops)
250 * L1[n1] DO MR_LOOP(ddinfo.mr1)
251 * mr1
252 * ENDDO
253 * ...
254 * ENDDO
255 * L2[1] DO
256 * ... (ddinfo.n2 loops)
257 * L2[n2] DO MR_LOOP(ddinfo.mr2)
258 * mr2
259 * ENDDO
260 * ...
261 * ENDDO
262 * ENDDO
263 * ...
264 * ENDDO
265 *
266 * The loops are unpacked into lps as follows:
267 *
268 * L1[n1],...,L1[1], L2[n2],...,L2[1], L[n],...,L[1]
269 */
270
271 /* local data */
272 typedef BV *BVP;
273 static BVP *fgsucc;
274 static BV *fgbase;
275
276 /** \brief Build the data dependency graph. */
277 void
build_dd(int loop)278 build_dd(int loop)
279 {
280 int i;
281
282 /* go through load/stores & build data dependency graph */
283 ddinfo.n = 0;
284 ddinfo.outer_loop = loop;
285 /*adjloops(loop);*/
286 #if DEBUG
287 if (DBGBIT(36, 1)) {
288 fprintf(gbl.dbgfil, "\n------- dump before build_loop_dd, loop %d ------\n",
289 loop);
290 dump_one_vloop(loop, 0);
291 for (i = VL_CHILD(loop); i != 0; i = VL_SIBLING(i))
292 dump_vloops(i, 1);
293 }
294 #endif
295 build_loop_dd(loop);
296 cln_visit();
297 }
298
299 static void
build_nat_loop(int loop)300 build_nat_loop(int loop)
301 {
302 int i;
303
304 for (i = LP_FG(loop); i != 0; i = FG_NEXT(i)) {
305 ++hlv.fgn;
306 FG_NATNXT(i) = hlv.natural_loop;
307 hlv.natural_loop = i;
308 }
309 for (i = VL_CHILD(loop); i != 0; i = VL_SIBLING(i)) {
310 build_nat_loop(i);
311 }
312 }
313
314 static void
dd_succ(int loop)315 dd_succ(int loop)
316 {
317 /*
318 * We need to know, for two blocks b1 and b2, if b1 can execute before b2
319 * in the same iteration of the loop; if b1 cannot execute before b2,
320 * then there can be no dependence with an equals direction from a memory
321 * reference in b1 to a memory reference in b2. To do this, we create,
322 * for each flowgraph node, a bit vector giving all the successors of
323 * that node, except that successors of the loop tail are not included.
324 * Then, a transitive closure is performed on those bit vectors.
325 */
326 int i;
327 int nbits;
328 int nnodes;
329 int ntotal;
330 int offs;
331 int change;
332 PSI_P p;
333 BV *temp_set;
334
335 /*
336 * number of bits needed -- count the number of flowgraph nodes in the
337 * loop; for each of those, need a bit for each flowgraph node in the
338 * entire flowgraph.
339 */
340 hlv.fgn = 0;
341 hlv.natural_loop = 0;
342 build_nat_loop(loop);
343
344 nnodes = hlv.fgn;
345 nbits = opt.num_nodes + 1;
346 nbits = (nbits + BV_BITS - 1) / BV_BITS;
347 ntotal = nbits * (nnodes + 1);
348 NEW(fgsucc, BVP, opt.num_nodes + 1);
349 NEW(fgbase, BV, ntotal);
350
351 /* go through and initialize sets */
352 offs = 0;
353 for (i = hlv.natural_loop; i != 0; i = FG_NATNXT(i)) {
354 if (i == LP_TAIL(loop))
355 continue;
356 fgsucc[i] = fgbase + offs;
357 assert(offs < ntotal - nbits, "dd_succ: bad offs", loop, 4);
358 offs += nbits;
359 BZERO(fgsucc[i], BV, nbits);
360 /* initialize successors */
361 for (p = FG_SUCC(i); p != 0; p = PSI_NEXT(p))
362 bv_set(fgsucc[i], PSI_NODE(p));
363 }
364 /* tail has no successors */
365 fgsucc[LP_TAIL(loop)] = fgbase + offs;
366 assert(offs < ntotal - nbits, "dd_succ: bad offs", loop, 4);
367 offs += nbits;
368 BZERO(fgsucc[LP_TAIL(loop)], BV, nbits);
369
370 /* temporary set */
371 temp_set = fgbase + offs;
372 assert(offs <= ntotal - nbits, "dd_succ: bad offs", loop, 4);
373 offs += nbits;
374
375 /* now perform transitive closure */
376 do {
377 change = 0;
378 for (i = hlv.natural_loop; i != 0; i = FG_NATNXT(i)) {
379 if (i == LP_TAIL(loop))
380 continue;
381 bv_copy(temp_set, fgsucc[i], nbits);
382 /*
383 * union successor set of this node with successor sets of this
384 * node's successor
385 */
386 for (p = FG_SUCC(i); p != 0; p = PSI_NEXT(p)) {
387 bv_union(temp_set, fgsucc[PSI_NODE(p)], nbits);
388 }
389 if (bv_notequal(temp_set, fgsucc[i], nbits)) {
390 change = TRUE;
391 bv_copy(fgsucc[i], temp_set, nbits);
392 }
393 }
394 } while (change);
395 #if DEBUG
396 if (DBGBIT(36, 32)) {
397 fprintf(gbl.dbgfil, "Successor bit sets\n");
398 for (i = hlv.natural_loop; i != 0; i = FG_NATNXT(i)) {
399 fprintf(gbl.dbgfil, "%3d:", i);
400 bv_print(fgsucc[i], opt.num_nodes + 1);
401 }
402 }
403 #endif
404 }
405
406 static void
build_loop_dd(int loop)407 build_loop_dd(int loop)
408 {
409 int i, j;
410 int l1, l2;
411 int end, end1;
412
413 ++ddinfo.n;
414 TRACE2("build_loop_dd: loop %d lev %d\n", loop, ddinfo.n);
415 /* do inner loops first */
416 for (i = VL_CHILD(loop); i != 0; i = VL_SIBLING(i)) {
417 build_loop_dd(i);
418 }
419 ddinfo.common = loop;
420
421 /* build successor bit vector */
422 dd_succ(loop);
423
424 /* go through all memory references */
425
426 /* use pragmas for the common loop */
427 ddinfo.pragmas = VL_PRAGMAS(loop);
428
429 /* first, this loop */
430 end = VL_MRSTART(loop) + VL_MRCNT(loop);
431 for (i = VL_MRSTART(loop); i < end; ++i)
432 for (j = VL_MRSTART(loop); j <= i; ++j)
433 chkref(i, j);
434
435 /* next, this loop against inner loops */
436 end1 = VL_MRSTART(loop) + VL_MRECNT(loop);
437 for (i = VL_MRSTART(loop); i < end; ++i)
438 for (j = end; j < end1; ++j)
439 chkref(i, j);
440
441 /* finally, inner loops against other inner loops */
442 for (l1 = VL_CHILD(loop); l1 != 0; l1 = VL_SIBLING(l1))
443 for (l2 = VL_CHILD(loop); l2 != l1; l2 = VL_SIBLING(l2)) {
444 end = VL_MRSTART(l1) + VL_MRECNT(l1);
445 end1 = VL_MRSTART(l2) + VL_MRECNT(l2);
446 for (i = VL_MRSTART(l1); i < end; ++i)
447 for (j = VL_MRSTART(l2); j < end1; ++j)
448 chkref(i, j);
449 }
450
451 FREE(fgsucc);
452 FREE(fgbase);
453 --ddinfo.n;
454 #if DEBUG
455 if (DBGBIT(36, 8)) {
456 fprintf(gbl.dbgfil, "----Mem ref after build_dd loop %d----\n", loop);
457 dump_memrefs(VL_MRSTART(loop), VL_MRECNT(loop));
458 }
459 #endif
460 }
461
462 /* assume mr1, mr2 in same fg node; return TRUE if mr1 comes before mr2 */
463 static LOGICAL
mr_preceeds(int mr1,int mr2)464 mr_preceeds(int mr1, int mr2)
465 {
466 int ilt1, ilt2;
467
468 if (mr1 == mr2)
469 return FALSE;
470 ilt1 = MR_ILT(mr1);
471 ilt2 = MR_ILT(mr2);
472 if (ilt1 == ilt2) {
473 /* if this is the same ilt, don't allow the store to preceed the
474 * load */
475 if (MR_TYPE(mr1) != 'l') {
476 assert(MR_TYPE(mr2) == 'l', "mr_preceeds: too may st/br", mr2, 4);
477 return FALSE;
478 }
479 return TRUE;
480 }
481 while (ilt1 != 0) {
482 if (ilt1 == ilt2)
483 return TRUE;
484 ilt1 = ILT_NEXT(ilt1);
485 }
486 return FALSE;
487 }
488
489 static void
chkref(int i,int j)490 chkref(int i, int j)
491 {
492 DIRVEC exo_dirvec_ji;
493 DIRVEC exo_dirvec_ij;
494 int fg1, fg2;
495 DIRVEC vec;
496 DV *p;
497
498 if ((MR_TYPE(i) != 'l' && MR_TYPE(i) != 's') ||
499 (MR_TYPE(j) != 'l' && MR_TYPE(j) != 's'))
500 return;
501 if (MR_TYPE(i) != 's' && MR_TYPE(j) != 's')
502 return;
503
504 STRACE2("mem ref j %d i %d\n", j, i);
505
506 /*
507 * now compute data dependence vector indicating when a
508 * dependence can exist between memory references i and j
509 */
510 /* compute all possible dependence vectors */
511 ddinfo.dvlist = NULL;
512 ddinfo.mr1 = j;
513 ddinfo.mr2 = i;
514 dd_compute();
515
516 /** compute execution orders from j to i */
517 fg1 = MR_FG(j);
518 fg2 = MR_FG(i);
519 if (fg1 == fg2) {
520 if (mr_preceeds(j, i))
521 exo_dirvec_ji = dirv_exo(ddinfo.n, TRUE);
522 else
523 exo_dirvec_ji = dirv_exo(ddinfo.n, FALSE);
524 } else if (bv_mem(fgsucc[fg1], fg2))
525 exo_dirvec_ji = dirv_exo(ddinfo.n, TRUE);
526 else
527 exo_dirvec_ji = dirv_exo(ddinfo.n, FALSE);
528
529 /** compute execution orders from i to j */
530 if (fg1 == fg2) {
531 if (mr_preceeds(i, j))
532 exo_dirvec_ij = dirv_exo(ddinfo.n, TRUE);
533 else
534 exo_dirvec_ij = dirv_exo(ddinfo.n, FALSE);
535 } else if (bv_mem(fgsucc[fg2], fg1))
536 exo_dirvec_ij = dirv_exo(ddinfo.n, TRUE);
537 else
538 exo_dirvec_ij = dirv_exo(ddinfo.n, FALSE);
539
540 /*
541 * intersect the execution order direction vector with the
542 * dependence direction vectors to determine true dependence
543 * direction vector
544 */
545
546 for (p = ddinfo.dvlist; p != 0; p = p->next) {
547 /* j preceeds i */
548 vec = p->vec & exo_dirvec_ji;
549 if (!dirv_chkzero(vec, ddinfo.n))
550 dd_edge(j, i, vec);
551
552 /* i preceeds j; must invert dependence vector */
553 vec = dirv_inverse(p->vec) & exo_dirvec_ij;
554 if (!dirv_chkzero(vec, ddinfo.n))
555 dd_edge(i, j, vec);
556 }
557 freearea(HLV_AREA1);
558 }
559
560 static void
edge_func(DIRVEC vec)561 edge_func(DIRVEC vec)
562 {
563 ddinfo.vec |= vec;
564 }
565
566 void
dd_edge(int src,int sink,DIRVEC vec)567 dd_edge(int src, int sink, DIRVEC vec)
568 {
569 int type; /* type of dependence */
570 DDEDGE *p;
571
572 /* figure out type of dependence */
573 /* dependence goes from src to sink */
574 ddinfo.vec = 0;
575 dirv_gen(vec, (int *)0, 0, DIRV_BIGPOS, edge_func);
576 vec = ddinfo.vec;
577 if (vec == 0)
578 return;
579 TRACE3("dd_edge: src %d sink %d vec %s \n", src, sink, dirv_print(vec));
580 if (MR_TYPE(src) == 's' && MR_TYPE(sink) == 's')
581 type = DIRV_FOUT;
582 else if (MR_TYPE(src) == 'l' && MR_TYPE(sink) == 's') {
583 type = DIRV_FANTI;
584 /* under certain circumstances we can ignore anti dependences.
585 * If they are from the load of a scalar to the store of the same
586 * scalar, and the store has a loop-invariant RHS. There must
587 * also be only one assignment of the variable in the loop, and
588 * the memory refs must be in the same ilt.
589 * The theory is that it doesn't matter if we do the load before the
590 * store, since the same value is always stored.
591 */
592 if (MR_NME(src) == MR_NME(sink) && MR_INVAL(sink) &&
593 MR_ILT(src) == MR_ILT(sink)) {
594 int def;
595 int lp;
596 int inloop;
597
598 inloop = 0;
599 for (def = NME_DEF(MR_NME(src)); def != 0; def = DEF_NEXT(def)) {
600 /* is it in the loop? */
601 for (lp = FG_LOOP(DEF_FG(def)); lp != 0; lp = LP_PARENT(lp))
602 if (lp == ddinfo.common)
603 goto in_loop;
604 continue; /* not in loop */
605 in_loop:
606 if (inloop++)
607 goto skip;
608 }
609 TRACE0(" Ignoring edge because inv scalar\n");
610 return;
611 }
612 skip:;
613 } else if (MR_TYPE(src) == 's' && MR_TYPE(sink) == 'l')
614 type = DIRV_FFLOW;
615 else {
616 interr("unknown load/load dep in dd_edge", 0, 3);
617 type = 3;
618 }
619 /* add to list for this mem ref */
620 for (p = MR_SUCC(src); p != 0; p = DD_NEXT(p)) {
621 if (DD_SINK(p) == sink && DD_TYPE(p) == type) {
622 DD_DIRVEC(p) |= vec;
623 return;
624 }
625 }
626 /* add a new one */
627 p = (DDEDGE *)getitem(HLV_AREA, sizeof(DDEDGE));
628 DD_TYPE(p) = type;
629 DD_DIRVEC(p) = vec;
630 DD_SINK(p) = sink;
631 DD_NEXT(p) = MR_SUCC(src);
632 MR_SUCC(src) = p;
633 }
634
635 static void
add_dep(DIRVEC vec)636 add_dep(DIRVEC vec)
637 {
638 DV *p;
639
640 p = (DV *)getitem(HLV_AREA1, sizeof(DV));
641 p->vec = vec;
642 p->next = ddinfo.dvlist;
643 ddinfo.dvlist = p;
644 }
645
646 static void
dd_compute(void)647 dd_compute(void)
648 {
649 int nm1, nm2;
650 int ui, loop, mr;
651
652 STRACE4("dd_compute: mr1 %d mr2 %d common %d n %d\n", ddinfo.mr1, ddinfo.mr2,
653 ddinfo.common, ddinfo.n);
654
655 /* I believe that 'yuck' is the appropriate word here. All this
656 * deals with is initial values that the vectorizer added for induction
657 * vars with non-invariant initial values. Since the uses haven't
658 * yet been replaced, artificial checks must be done for a dependence
659 * that will be added. While ugly, the code is probably correct and
660 * reasonably efficient.
661 */
662 ui = FALSE;
663 if (MR_IVUSE(ddinfo.mr1) && MR_INIT(ddinfo.mr2)) {
664 loop = MR_LOOP(ddinfo.mr1);
665 mr = ddinfo.mr2;
666 if (VL_PREBIH(loop) == FG_TO_BIH(MR_FG(ddinfo.mr2)))
667 ui = TRUE;
668 } else if (MR_IVUSE(ddinfo.mr2) && MR_INIT(ddinfo.mr1)) {
669 loop = MR_LOOP(ddinfo.mr2);
670 mr = ddinfo.mr1;
671 if (VL_PREBIH(loop) == FG_TO_BIH(MR_FG(ddinfo.mr1)))
672 ui = TRUE;
673 }
674 if (ui) {
675 /* initial value-use situation */
676 int i;
677
678 for (i = VL_IVLIST(loop); i != 0; i = MR_NEXT(i)) {
679 if (MR_IV(i) == MR_IV(mr)) {
680 add_dep(dirv_fulldep(ddinfo.n));
681 return;
682 }
683 }
684 }
685
686 nm1 = MR_NME(ddinfo.mr1);
687 while (NME_TYPE(nm1) == NT_ARR || NME_TYPE(nm1) == NT_MEM)
688 nm1 = NME_NM(nm1);
689 ddinfo.basenm1 = nm1;
690
691 nm2 = MR_NME(ddinfo.mr2);
692 while (NME_TYPE(nm2) == NT_ARR || NME_TYPE(nm2) == NT_MEM)
693 nm2 = NME_NM(nm2);
694 ddinfo.basenm2 = nm2;
695
696 if (NME_TYPE(nm1) == NT_VAR && NME_TYPE(nm2) == NT_VAR) {
697 if (nm1 != nm2) {
698 int i;
699 /*
700 * for FORTRAN equivalence stmts... return TRUE if one symbol is
701 * in storage overlap chain of the other. Eventually we need to
702 * be more clever here.
703 */
704 int s1 = basesym_of(nm1);
705 int s2 = basesym_of(nm2);
706 if (!VP_DEPCHK(ddinfo.pragmas) || !VP_EQVCHK(ddinfo.pragmas))
707 return;
708 if (SOCPTRG(s1)) {
709 for (i = SOCPTRG(s1); i; i = SOC_NEXT(i))
710 if (SOC_SPTR(i) == s2) {
711 add_dep(dirv_fulldep(ddinfo.n));
712 return;
713 }
714 }
715 if ((POINTERG(s1) || POINTERG(s2)) &&
716 expr_dependent(MR_ILI(ddinfo.mr2), MR_ILI(ddinfo.mr1),
717 MR_ILT(ddinfo.mr2), MR_ILT(ddinfo.mr1))) {
718 add_dep(dirv_fulldep(ddinfo.n));
719 return;
720 }
721 return;
722 }
723 resolve_vv();
724 } else if ((NME_TYPE(nm1) == NT_IND && NME_TYPE(nm2) == NT_VAR) ||
725 (NME_TYPE(nm1) == NT_VAR && NME_TYPE(nm2) == NT_IND))
726 resolve_pv();
727 else if (NME_TYPE(nm1) == NT_IND && NME_TYPE(nm2) == NT_IND)
728 resolve_pp();
729 else if ((NME_TYPE(nm1) == NT_UNK && NME_TYPE(nm2) == NT_IND) ||
730 (NME_TYPE(nm1) == NT_IND && NME_TYPE(nm2) == NT_UNK) ||
731 (NME_TYPE(nm1) == NT_UNK && NME_TYPE(nm2) == NT_UNK)) {
732 if (!VP_DEPCHK(ddinfo.pragmas))
733 return;
734 add_dep(dirv_fulldep(ddinfo.n));
735 } else if ((NME_TYPE(nm1) == NT_UNK && NME_TYPE(nm2) == NT_VAR) ||
736 (NME_TYPE(nm1) == NT_VAR && NME_TYPE(nm2) == NT_UNK))
737 resolve_uv();
738 else
739 assert(0, "dd_compute: can't happen", 0, 4);
740 }
741
742 static void
resolve_pp(void)743 resolve_pp(void)
744 {
745 /* for pointer-pointer, best we can do is the -x switches */
746 open_pragma(BIH_LINENO(FG_TO_BIH(LP_HEAD(ddinfo.common))));
747 if (is_ptr_safe(ddinfo.basenm1))
748 return;
749 if (is_ptr_safe(ddinfo.basenm2))
750 return;
751 close_pragma();
752 if (!VP_DEPCHK(ddinfo.pragmas))
753 return;
754 if (ddinfo.basenm1 == ddinfo.basenm2 && MR_SUBST(ddinfo.mr1) &&
755 MR_SUBST(ddinfo.mr2)) {
756 /* do data dependence analysis */
757 unpack_loops();
758 do_subscript(1);
759 } else
760 add_dep(dirv_fulldep(ddinfo.n));
761 }
762
763 static void
resolve_pv(void)764 resolve_pv(void)
765 {
766 int ptr, var;
767 int sym;
768
769 /* use a combination of optimizer utilities & stolen optimizer code */
770 if (NME_TYPE(ddinfo.basenm1) == NT_IND) {
771 ptr = ddinfo.basenm1;
772 var = ddinfo.basenm2;
773 } else {
774 ptr = ddinfo.basenm2;
775 var = ddinfo.basenm1;
776 }
777 assert(NME_TYPE(ptr) == NT_IND && NME_TYPE(var) == NT_VAR,
778 "resolve_pv: bad ptr/var", 0, 4);
779 open_pragma(BIH_LINENO(FG_TO_BIH(LP_HEAD(ddinfo.common))));
780 if (is_ptr_safe(ptr))
781 return;
782 if (is_sym_optsafe(var, ddinfo.common))
783 return;
784 close_pragma();
785 add_dep(dirv_fulldep(ddinfo.n));
786 }
787
788 static void
resolve_uv(void)789 resolve_uv(void)
790 {
791 add_dep(dirv_fulldep(ddinfo.n));
792 }
793
794 static void
unpack_loops(void)795 unpack_loops(void)
796 {
797 int i, j;
798 /* unpack the loops */
799 i = 0;
800 ddinfo.n1 = 0;
801 for (j = MR_LOOP(ddinfo.mr1); j != ddinfo.common; j = LP_PARENT(j)) {
802 ddinfo.lps[i++] = j;
803 ++ddinfo.n1;
804 }
805 ddinfo.n2 = 0;
806 for (j = MR_LOOP(ddinfo.mr2); j != ddinfo.common; j = LP_PARENT(j)) {
807 ddinfo.lps[i++] = j;
808 ++ddinfo.n2;
809 }
810 j = ddinfo.common;
811 for (;;) {
812 ddinfo.lps[i++] = j;
813 if (j == ddinfo.outer_loop)
814 break;
815 j = LP_PARENT(j);
816 }
817 assert(i <= MAX_DD, "resolve_vv: bad i", ddinfo.common, 4);
818 assert(i == ddinfo.n1 + ddinfo.n2 + ddinfo.n, "resolve_vv: bad iA",
819 ddinfo.common, 4);
820 ddinfo.subs1 = MR_SUBST(ddinfo.mr1) + MR_SUBCNT(ddinfo.mr1);
821 ddinfo.subs2 = MR_SUBST(ddinfo.mr2) + MR_SUBCNT(ddinfo.mr2);
822 DTRACE2("resolve_vv: subs1 %d subs2 %d\n", ddinfo.subs1, ddinfo.subs2);
823 }
824
825 static void
resolve_vv(void)826 resolve_vv(void)
827 {
828 int i1, i2;
829 int nm1, nm2;
830 DIRVEC dir;
831 int buf1[MAXNMESIZ], buf2[MAXNMESIZ];
832 int i, end;
833 int j;
834 DV *p, *q, *q1, *p1;
835
836 /* check for both scalar */
837 if (MR_SCALR(ddinfo.mr1) && MR_SCALR(ddinfo.mr2)) {
838 /* complex have different nmes */
839 assert(ddinfo.basenm1 == ddinfo.basenm2, "resolve_vv: mr1 != mr2",
840 ddinfo.mr1, 4);
841 /* expanded scalars are different */
842 for (i = VL_SCLIST(ddinfo.common); i != 0; i = SCLR_NEXT(i))
843 if (MR_NME(ddinfo.mr1) == SCLR_NME(i))
844 goto found;
845 add_dep(dirv_fulldep(ddinfo.n));
846 return;
847 found:
848 /* equivalent to equals direction in last place */
849 dir = DIRV_ALLEQ;
850 for (i = 1; i < ddinfo.n; ++i)
851 DIRV_ENTRYP(dir, i, DIRV_STAR);
852 DIRV_ENTRYP(dir, 0, DIRV_EQ);
853 add_dep(dir);
854 return;
855 }
856 /* unpack names info into buf1, buf2 */
857 i1 = i2 = MAXNMESIZ;
858 nm1 = MR_NME(ddinfo.mr1);
859 while (i1 > 0) {
860 if (NME_TYPE(nm1) == NT_VAR)
861 break;
862 buf1[--i1] = nm1;
863 nm1 = NME_NM(nm1);
864 }
865 /* too big? */
866 if (NME_TYPE(nm1) != NT_VAR) {
867 add_dep(dirv_fulldep(ddinfo.n));
868 return;
869 }
870
871 nm2 = MR_NME(ddinfo.mr2);
872 while (i2 > 0) {
873 if (NME_TYPE(nm2) == NT_VAR)
874 break;
875 buf2[--i2] = nm2;
876 nm2 = NME_NM(nm2);
877 }
878 /* too big? */
879 if (NME_TYPE(nm2) != NT_VAR) {
880 add_dep(dirv_fulldep(ddinfo.n));
881 return;
882 }
883
884 unpack_loops();
885
886 /* see if we can avoid any checking */
887 i = 0;
888 for (;;) {
889 /* invariant: we have a common prefix of buf1 and buf2 */
890 /* for example: a.b.c[...] */
891 if (i1 + i >= MAXNMESIZ || i2 + i >= MAXNMESIZ)
892 break;
893 if (NME_TYPE(buf1[i1 + i]) == NT_MEM && NME_TYPE(buf2[i2 + i]) == NT_MEM) {
894 if (NME_SYM(buf1[i1 + i]) != NME_SYM(buf2[i2 + i])) {
895 ddinfo.dvlist = NULL;
896 return;
897 }
898 }
899 ++i;
900 }
901 i = 0;
902 j = 0; /* count subscripts */
903 for (;;) {
904 /* all member references match */
905 if (i1 + i >= MAXNMESIZ || i2 + i >= MAXNMESIZ)
906 break;
907 if (NME_TYPE(buf1[i1 + i]) == NT_MEM && NME_TYPE(buf2[i2 + i]) == NT_MEM) {
908 assert(NME_SYM(buf1[i1 + i]) == NME_SYM(buf2[i2 + i]),
909 "resolve_vv: diff MEM symbols", 0, 4);
910 ++i;
911 } else if (NME_TYPE(buf1[i1 + i]) == NT_ARR &&
912 NME_TYPE(buf2[i2 + i]) == NT_ARR) {
913 while (i1 + i < MAXNMESIZ && NME_TYPE(buf1[i1 + i]) == NT_ARR) {
914 assert(NME_TYPE(buf2[i2 + i]) == NT_ARR, "resolve_vv: not both arrays",
915 0, 4);
916 assert(ASD_NDIM(NME_SUB(buf1[i1 + i])) ==
917 ASD_NDIM(NME_SUB(buf1[i1 + i])),
918 "resolve_vv: ndims not equal", 0, 4);
919 j += ASD_NDIM(NME_SUB(buf1[i1 + i]));
920 ++i;
921 }
922 } else
923 assert(0, "resolve_vv: can't happen", 0, 4);
924 }
925 do_subscript(j);
926 ddinfo.subs1 -= j;
927 ddinfo.subs2 -= j;
928 }
929
930 static int nvars;
931 static int neqns;
932 static int nfree;
933
934 #define UD xUD
935 static int UD[MAX_N][MAX_S + MAX_N];
936 static int T[MAX_N];
937 static int C[MAX_S];
938 static int Dist[MAX_LOOPS];
939 static int TU[MAX_S][MAX_N];
940
941 #define BOUND_LEN (2 * (MAX_N + 1))
942 static BOUND *Bound[MAX_N + 1][2];
943 static BOUND *SaveBound[MAX_N + 1][2];
944
945 #if DEBUG
946 static void
prmat(int col,int row)947 prmat(int col, int row)
948 {
949 int i, j;
950
951 fprintf(gbl.dbgfil, "Matrix Dump: %d, %d-----\n", col, row);
952 for (i = 0; i < nvars; ++i) {
953 for (j = 0; j < nvars + neqns; ++j) {
954 prilitree(UD[i][j]);
955 fprintf(gbl.dbgfil, "\t");
956 }
957 fprintf(gbl.dbgfil, "\n");
958 }
959 }
960 #endif
961
962 static INT
gcd(INT a,INT b)963 gcd(INT a, INT b)
964 {
965 INT u, v;
966 INT r;
967
968 u = a > 0 ? a : -a;
969 v = b > 0 ? b : -b;
970 while (v != 0) {
971 r = u % v;
972 u = v;
973 v = r;
974 }
975 return u;
976 }
977
978 static int
ceilg(int t,int g)979 ceilg(int t, int g)
980 {
981 int q;
982 int a;
983
984 a = t;
985 if (t < 0)
986 a = -t;
987 /* return ceil(t/g), g is positive */
988 q = a / g;
989 if (g * q == a) {
990 if (t < 0)
991 return -q;
992 return q;
993 }
994 /* q is floor(a/g) */
995 ddinfo.unknown = 1;
996 if (t > 0)
997 return q + 1;
998 return -q;
999 }
1000
1001 static int
floorg(int t,int g)1002 floorg(int t, int g)
1003 {
1004 int q;
1005 int a;
1006
1007 a = t;
1008 if (t < 0)
1009 a = -t;
1010 /* return floor(t/g), g is positive */
1011 q = a / g;
1012 if (g * q == a) {
1013 if (t < 0)
1014 return -q;
1015 return q;
1016 }
1017 /* q is floor(a/g) */
1018 ddinfo.unknown = 1;
1019 if (t > 0)
1020 return q;
1021 return -q - 1;
1022 }
1023
1024 static LOGICAL
compare_one_bound(BOUND * low,BOUND * up,int var)1025 compare_one_bound(BOUND *low, BOUND *up, int var)
1026 {
1027 int i, j;
1028 int tmp[MAX_N + 1];
1029 LOGICAL btype;
1030 int t, g, t1;
1031 BOUND b, *p;
1032 int icon0 = ad_icon(0);
1033
1034 #if DEBUG
1035 if (DBGBIT(36, 64)) {
1036 fprintf(gbl.dbgfil, "compare_one_bound: ");
1037 dump_two_bound(low, up, var, FALSE);
1038 }
1039 #endif
1040 /* compute difference of bounds */
1041 for (j = 1; j < var; ++j) {
1042 tmp[j] = ili_symbolic(
1043 ad2ili(IL_ISUB, ad2ili(IL_IMUL, up->bnd[j], ad_icon(low->mplyr)),
1044 ad2ili(IL_IMUL, low->bnd[j], ad_icon(up->mplyr))));
1045 }
1046 /* compute difference of lower bounds */
1047 tmp[0] = ili_symbolic(ad2ili(
1048 IL_ISUB, ad2ili(IL_IMUL, up->bnd[0], ad_icon(low->mplyr * low->gcd)),
1049 ad2ili(IL_IMUL, low->bnd[0], ad_icon(up->mplyr * up->gcd))));
1050 #if DEBUG
1051 if (DBGBIT(36, 64)) {
1052 fprintf(gbl.dbgfil, "compare_one_bound: up-low: ");
1053 for (j = 0; j < var; ++j) {
1054 prilitree(tmp[j]);
1055 fprintf(gbl.dbgfil, ", ");
1056 }
1057 fprintf(gbl.dbgfil, "\n");
1058 }
1059 #endif
1060 /* tmp[0] is divided by product of gcds */
1061 /* find highest non-zero coefficient */
1062 for (j = var - 1; j > 0; --j)
1063 if (tmp[j] != icon0)
1064 break;
1065 if (j == 0) {
1066 /* check this one immediately */
1067 if (IS_CNST(tmp[0])) {
1068 if (CNSTG(tmp[0]) < 0) {
1069 DTRACE0(
1070 "compare_one_bound finds constant inconsistency, returns nodep\n");
1071 return TRUE;
1072 }
1073 return FALSE; /* constant consistent */
1074 }
1075 /* variable inconsistent, unknown data dep */
1076 ddinfo.unknown = 1;
1077 return FALSE;
1078 }
1079 /* This coefficient must be constant or we'll ignore this bound */
1080 /* since we don't know if it is upper or lower */
1081 if (!IS_CNST(tmp[j])) {
1082 DTRACE1("compare_one_bound ignores bound for %d\n", var);
1083 ddinfo.unknown = 1;
1084 return FALSE;
1085 }
1086 /* decide whether this is a lower or upper bound */
1087 t = CNSTG(tmp[j]);
1088 if (t > 0) {
1089 btype = FALSE; /* lower */
1090 for (i = 0; i < j; ++i)
1091 tmp[i] = ili_symbolic(ad1ili(IL_INEG, tmp[i]));
1092 } else {
1093 btype = TRUE;
1094 tmp[j] = ad1ili(IL_INEG, tmp[j]);
1095 t = -t;
1096 }
1097 /* compute gcd if possible */
1098 g = 1;
1099 if (t != 1) {
1100 g = 0;
1101 for (i = 1; i <= j; ++i) {
1102 if (!IS_CNST(tmp[i]))
1103 break;
1104 g = gcd(g, CNSTG(tmp[i]));
1105 }
1106 if (g != 0) {
1107 /* divide thru */
1108 t /= g;
1109 for (i = 1; i <= j; ++i) {
1110 tmp[i] = ad_icon(CNSTG(tmp[i]) / g);
1111 }
1112 /* try to handle 0th element */
1113 if (IS_CNST(tmp[0])) {
1114 t1 = CNSTG(tmp[0]);
1115 /* compute floor/ceil ( t1 / g) */
1116 if (btype)
1117 t1 = floorg(t1, g);
1118 else
1119 t1 = ceilg(t1, g);
1120 tmp[0] = ad_icon(t1);
1121 g = 1;
1122 }
1123 } else
1124 g = 1;
1125 }
1126 b.gcd = up->gcd * low->gcd * g;
1127 b.mplyr = t;
1128 for (i = 0; i < j; ++i)
1129 b.bnd[i] = tmp[i];
1130
1131 #if DEBUG
1132 if (DBGBIT(36, 64)) {
1133 fprintf(gbl.dbgfil, "compare_one_bound: new bound: ");
1134 dump_one_bound(&b, j, btype);
1135 }
1136 #endif
1137 /* add this one */
1138 for (p = Bound[j][btype]; p != 0; p = p->next) {
1139 if (b.mplyr == p->mplyr && b.gcd == p->gcd) {
1140 for (i = 0; i < j; ++i)
1141 if (p->bnd[i] != tmp[i])
1142 goto cont;
1143 goto found;
1144 }
1145 cont:;
1146 }
1147 p = (BOUND *)getitem(HLV_AREA1, sizeof(BOUND));
1148 *p = b;
1149 p->next = Bound[j][btype];
1150 Bound[j][btype] = p;
1151 #if DEBUG
1152 if (DBGBIT(36, 16)) {
1153 fprintf(gbl.dbgfil, "Add bound---:");
1154 dump_one_bound(p, j, btype);
1155 }
1156 #endif
1157 found:
1158 return FALSE;
1159 }
1160
1161 /* ibound = bound in terms of index variables */
1162 /* upflag = true if upper bound */
1163 /* var = index var for which this is a bound */
1164 static int
bound_add(int * ibound,LOGICAL upflag,int var)1165 bound_add(int *ibound, LOGICAL upflag, int var)
1166 {
1167 BOUND b1;
1168 BOUND b2;
1169 int j, k;
1170 int d1, d2;
1171 int icon0 = ad_icon(0);
1172
1173 /* express the bound in terms of the free variables */
1174 /* create a linear combination of bounds on free variables */
1175 BTRACE4("bound_add: nvars: %d, nfree: %d, upflag: %d, var: %d\n", nvars,
1176 nfree, upflag, var);
1177 for (j = 0; j <= nfree; ++j) {
1178 b1.bnd[j] = icon0;
1179 }
1180 b1.bnd[0] = ibound[0];
1181 for (j = 1; j <= nvars; ++j) {
1182 /* b1.bnd += bound[j] * TU(*,j) */
1183 for (k = 0; k <= nfree; ++k) {
1184 b1.bnd[k] = ili_symbolic(
1185 ad2ili(IL_IADD, b1.bnd[k], ad2ili(IL_IMUL, ibound[j], TU[k][j - 1])));
1186 }
1187 }
1188 /* construct b2 */
1189 for (k = 0; k <= nfree; ++k)
1190 b2.bnd[k] = TU[k][var];
1191 b1.gcd = b2.gcd = 1;
1192 b1.mplyr = b2.mplyr = 1;
1193 if (upflag) {
1194 /* TU(*,var) <= tmp1 */
1195 return compare_one_bound(&b2, &b1, nfree + 1);
1196 } else
1197 return compare_one_bound(&b1, &b2, nfree + 1);
1198 }
1199
1200 static LOGICAL
check_bounds(void)1201 check_bounds(void)
1202 {
1203 int k;
1204 int d1, d2;
1205 BOUND *p, *q;
1206
1207 /* compare bounds from nfree down */
1208 for (k = nfree; k >= 1; --k) {
1209 for (p = Bound[k][1]; p != 0; p = p->next)
1210 for (q = Bound[k][0]; q != 0; q = q->next) {
1211 /* compare p and q , q <= p */
1212 if (compare_one_bound(q, p, k))
1213 return TRUE;
1214 }
1215 }
1216 return FALSE;
1217 }
1218
1219 static LOGICAL
bnd_le(int k,LOGICAL eqflag)1220 bnd_le(int k, LOGICAL eqflag)
1221 {
1222 int i;
1223 BOUND b1, b2;
1224
1225 for (i = 0; i <= nfree; ++i) /* ik */
1226 b1.bnd[i] = TU[i][2 * k];
1227 if (!eqflag) /* strict inequality */
1228 b1.bnd[0] = ili_symbolic(ad2ili(IL_IADD, b1.bnd[0], ad_icon(1)));
1229 b1.gcd = b1.mplyr = 1;
1230 for (i = 0; i <= nfree; ++i) /* jk */
1231 b2.bnd[i] = TU[i][2 * k + 1];
1232 b2.gcd = b2.mplyr = 1;
1233 return compare_one_bound(&b1, &b2, nfree + 1);
1234 }
1235
1236 static LOGICAL
bnd_ge(int k,LOGICAL eqflag)1237 bnd_ge(int k, LOGICAL eqflag)
1238 {
1239 int i;
1240 BOUND b1, b2;
1241
1242 for (i = 0; i <= nfree; ++i) /* ik */
1243 b1.bnd[i] = TU[i][2 * k];
1244 b1.gcd = b1.mplyr = 1;
1245 for (i = 0; i <= nfree; ++i) /* jk */
1246 b2.bnd[i] = TU[i][2 * k + 1];
1247 if (!eqflag)
1248 b2.bnd[0] = ili_symbolic(ad2ili(IL_IADD, b2.bnd[0], ad_icon(1)));
1249 b2.gcd = b2.mplyr = 1;
1250 return compare_one_bound(&b2, &b1, nfree + 1);
1251 }
1252
1253 /* check for dependence with direction vector dir */
1254 static LOGICAL
check_new_bound(DIRVEC dir,int lev)1255 check_new_bound(DIRVEC dir, int lev)
1256 {
1257 int k;
1258 DIRVEC d;
1259
1260 BCOPY(Bound, SaveBound, BOUND *, BOUND_LEN);
1261 /* create equations describing this direction vector */
1262 for (k = 0; k < lev; ++k) {
1263 d = DIRV_ENTRYG(dir, lev - k - 1);
1264 switch (d) {
1265 case DIRV_STAR:
1266 break;
1267 case DIRV_LT:
1268 /* ik < jk --> ik+1 <= jk */
1269 if (bnd_le(k, FALSE))
1270 return TRUE;
1271 break;
1272 case DIRV_EQ:
1273 /* simulate with <=, >= */
1274 /* ik >= jk and ik <= jk */
1275 if (bnd_le(k, TRUE))
1276 return TRUE;
1277 if (bnd_ge(k, TRUE))
1278 return TRUE;
1279 break;
1280 case DIRV_GT:
1281 /* ik > jk --> ik >= jk+1 */
1282 if (bnd_ge(k, FALSE))
1283 return TRUE;
1284 break;
1285 }
1286 }
1287 return check_bounds();
1288 }
1289
1290 static void
do_subscript(int nsubs)1291 do_subscript(int nsubs)
1292 {
1293 /*
1294 * this function is the heart of the data dependence analysis. subs1 and
1295 * subs2 point to subscript entries for the same array. do_subscript
1296 * determines a direction vector under which those subscripts will
1297 * intersect.
1298 */
1299 int i, j, k, k1, bnd, n1;
1300 int q, d1, d2, sgn, sc, ili;
1301 int d;
1302 int t, t1;
1303 int stride1, stride2;
1304 DIRVEC vec, vec1, vec2;
1305 int icon0, icon1, iconneg1;
1306 int sub;
1307 int invar1, invar2;
1308 int lp;
1309 int tmp[MAX_N + 1];
1310
1311 if (nsubs <= 0)
1312 goto give_up;
1313 icon0 = ad_icon(0L);
1314 icon1 = ad_icon(1L);
1315 iconneg1 = ad_icon(-1L);
1316 /* quick check for non-intersecting loop-invariant subscripts */
1317 /* also find out how many outer loops to remove */
1318 /* Set d to number of outer loops to consider based on
1319 * how far we were able to analyze both subscripts. */
1320 d = 0x7FFFFFFF;
1321 for (sub = 1; sub <= nsubs; ++sub) {
1322 k = ddinfo.n1 + ddinfo.n;
1323 for (i = 0; i < k; ++i) {
1324 if (SB_STRIDE(ddinfo.subs1 - sub)[i] == 0) {
1325 break;
1326 }
1327 if (SB_STRIDE(ddinfo.subs1 - sub)[i] < 0 ||
1328 SB_STRIDE(ddinfo.subs1 - sub)[i] >= astb.stg_avail) {
1329 fprintf(stderr, "sub=%d,ddinfo.subs1=%d,i=%d,SB_STRICE(%d)[%d]=%d\n",
1330 sub, ddinfo.subs1, i, ddinfo.subs1 - sub, i,
1331 SB_STRIDE(ddinfo.subs1 - sub)[i]);
1332 }
1333 }
1334 if (d > i - ddinfo.n1)
1335 d = i - ddinfo.n1;
1336 k = ddinfo.n2 + ddinfo.n;
1337 for (i = 0; i < k; ++i) {
1338 if (SB_STRIDE(ddinfo.subs2 - sub)[i] == 0) {
1339 break;
1340 }
1341 if (SB_STRIDE(ddinfo.subs2 - sub)[i] < 0 ||
1342 SB_STRIDE(ddinfo.subs2 - sub)[i] >= astb.stg_avail) {
1343 fprintf(stderr, "sub=%d,ddinfo.subs2=%d,i=%d,SB_STRICE(%d)[%d]=%d\n",
1344 sub, ddinfo.subs2, i, ddinfo.subs2 - sub, i,
1345 SB_STRIDE(ddinfo.subs2 - sub)[i]);
1346 }
1347 }
1348 if (d > i - ddinfo.n2)
1349 d = i - ddinfo.n2;
1350 }
1351 if (d <= 0)
1352 goto give_up;
1353 for (sub = 1; sub <= nsubs; ++sub) {
1354 invar1 = TRUE;
1355 k = ddinfo.n1 + d;
1356 for (i = 0; i < k; ++i)
1357 if (SB_STRIDE(ddinfo.subs1 - sub)[i] != icon0) {
1358 invar1 = FALSE;
1359 break;
1360 }
1361 invar2 = TRUE;
1362 k = ddinfo.n2 + d;
1363 for (i = 0; i < k; ++i)
1364 if (SB_STRIDE(ddinfo.subs2 - sub)[i] != icon0) {
1365 invar2 = FALSE;
1366 break;
1367 }
1368 if (invar1 && invar2) {
1369 d1 = SB_BASES(ddinfo.subs1 - sub)[ddinfo.n1 + d];
1370 d2 = SB_BASES(ddinfo.subs2 - sub)[ddinfo.n2 + d];
1371 if (A_TYPEG(d1) == A_TRIPLE || A_DTYPEG(d2) == A_TRIPLE) {
1372 /* get lower bound from d1, d2 */
1373 int d1low, d1high, d2low, d2high;
1374 if (A_TYPEG(d1) != A_TRIPLE) {
1375 d1low = d1high = d1;
1376 } else {
1377 d1low = A_LBDG(d1);
1378 d1high = A_UPBDG(d1);
1379 if (A_STRIDEG(d1)) {
1380 int d1str;
1381 d1str = ili_symbolic(A_STRIDEG(d1));
1382 if (!IS_CNST(d1str)) {
1383 goto give_up;
1384 } else if (CNSTG(d1str) < 0) {
1385 int t;
1386 t = d1low;
1387 d1low = d1high;
1388 d1high = t;
1389 }
1390 }
1391 }
1392 if (A_TYPEG(d2) != A_TRIPLE) {
1393 d2low = d2high = d2;
1394 } else {
1395 d2low = A_LBDG(d2);
1396 d2high = A_UPBDG(d2);
1397 if (A_STRIDEG(d2)) {
1398 int d2str;
1399 d2str = ili_symbolic(A_STRIDEG(d2));
1400 if (!IS_CNST(d2str)) {
1401 goto give_up;
1402 } else if (CNSTG(d2str) < 0) {
1403 int t;
1404 t = d2low;
1405 d2low = d2high;
1406 d2high = t;
1407 }
1408 }
1409 }
1410 if (IS_CNST(d1low) && IS_CNST(d2high) && CNSTG(d1low) > CNSTG(d2high)) {
1411 goto no_dep;
1412 }
1413 if (IS_CNST(d2low) && IS_CNST(d1high) && CNSTG(d2low) > CNSTG(d1high)) {
1414 goto no_dep;
1415 }
1416 continue;
1417 }
1418 if (RESG(d1) != RESG(d2))
1419 goto no_dep;
1420 if (IS_IRES(d1))
1421 t = ad2ili(IL_ISUB, d1, d2);
1422 else if (IS_ARES(d1))
1423 t = ad2ili(IL_ASUB, d1, d2);
1424 else
1425 interr("do_subscript: unknown base type for strides", ddinfo.common, 4);
1426 t = ili_symbolic(t);
1427 if (IS_CNST(t) && CNSTG(t) != 0L) {
1428 goto no_dep;
1429 }
1430 }
1431 }
1432
1433 DTRACE2("do_subscript: %d subs, # common loops = %d\n", nsubs, d);
1434 /* setup the equations */
1435 /* there are 2*d + ddinfo.n1 + ddinfo.n2 variables */
1436 nvars = 2 * d + ddinfo.n1 + ddinfo.n2;
1437 neqns = nsubs;
1438 for (sub = 1; sub <= nsubs; ++sub) {
1439 /* first set up coefficients for common loops */
1440 j = 0;
1441 for (i = 0; i < d; ++i) {
1442 /* subscript for A */
1443 UD[j][sub + nvars - 1] =
1444 SB_STRIDE(ddinfo.subs1 - sub)[ddinfo.n1 + d - i - 1];
1445 ++j;
1446 UD[j][sub + nvars - 1] =
1447 ad1ili(IL_INEG, SB_STRIDE(ddinfo.subs2 - sub)[ddinfo.n2 + d - i - 1]);
1448 ++j;
1449 }
1450 /* now set up coefficients for first loop */
1451 for (i = 0; i < ddinfo.n1; ++i) {
1452 UD[j][sub + nvars - 1] = SB_STRIDE(ddinfo.subs1 - sub)[ddinfo.n1 - i - 1];
1453 ++j;
1454 }
1455 /* now set up coefficients for second loop */
1456 for (i = 0; i < ddinfo.n2; ++i) {
1457 UD[j][sub + nvars - 1] =
1458 ad1ili(IL_INEG, SB_STRIDE(ddinfo.subs2 - sub)[ddinfo.n2 - i - 1]);
1459 ++j;
1460 }
1461 assert(j == nvars, "do_subscript: not enough vars", 0, 4);
1462 }
1463 /* set up identity matrix */
1464 for (i = 0; i < nvars; ++i) {
1465 for (j = 0; j < nvars; ++j) {
1466 UD[i][j] = icon0;
1467 }
1468 UD[i][i] = icon1;
1469 }
1470 /* set up rhs */
1471 for (sub = 1; sub <= nsubs; ++sub) {
1472 d1 = SB_BASES(ddinfo.subs1 - sub)[ddinfo.n1 + d];
1473 d2 = SB_BASES(ddinfo.subs2 - sub)[ddinfo.n2 + d];
1474 if (RESG(d1) != RESG(d2))
1475 goto no_dep;
1476 if (A_TYPEG(d1) == A_TRIPLE || A_DTYPEG(d2) == A_TRIPLE) {
1477 continue;
1478 }
1479 if (IS_IRES(d1))
1480 t = ad2ili(IL_ISUB, d2, d1);
1481 else if (IS_ARES(d1))
1482 t = ad2ili(IL_ASUB, d2, d1);
1483 else
1484 interr("do_subscript: unknown base type for constants", ddinfo.common, 4);
1485 C[sub - 1] = ili_symbolic(t);
1486 }
1487 #if DEBUG
1488 if (DBGBIT(36, 64)) {
1489 prmat(0, 0);
1490 fprintf(gbl.dbgfil, "RHS: ");
1491 for (i = 0; i < neqns; ++i) {
1492 fprintf(gbl.dbgfil, "\t%d: ", i);
1493 prilitree(C[i]);
1494 fprintf(gbl.dbgfil, "\n");
1495 }
1496 }
1497 #endif
1498
1499 /* gaussian elimination on the matrix */
1500 bnd = neqns;
1501 if (bnd > nvars)
1502 bnd = nvars;
1503 n1 = neqns;
1504 j = 0;
1505 k1 = 0;
1506 while (k1 < bnd) {
1507 i = nvars - 1;
1508 while (i > k1) {
1509 if (UD[i][j + nvars] == icon0) {
1510 --i;
1511 continue;
1512 }
1513 /* work on rows i, i-1 */
1514 d1 = UD[i][j + nvars];
1515 d2 = UD[i - 1][j + nvars];
1516 /* check if d1 == d2 or d1 == -d2 */
1517 t = ad2ili(IL_ISUB, d1, d2);
1518 t = ili_symbolic(t);
1519 if (t == icon0) {
1520 /* d1 == d2, so sgn == 1, so q == 1 */
1521 q = icon1;
1522 } else {
1523 t = ad2ili(IL_IADD, d1, d2);
1524 t = ili_symbolic(t);
1525 if (t == icon0) {
1526 /* d1 == -d2, so sgn == -1, so q = -1 */
1527 q = ad_icon((INT)-1);
1528 } else {
1529 /* they had better both be constants */
1530 if (!IS_CNST(d1) || !IS_CNST(d2)) {
1531 if (!symbolic_divide(d2, d1, &q))
1532 goto give_up;
1533 } else {
1534 d1 = CNSTG(d1);
1535 d2 = CNSTG(d2);
1536 sgn = d1 * d2;
1537 if (d1 < 0)
1538 d1 = -d1;
1539 if (d2 < 0)
1540 d2 = -d2;
1541 q = d2 / d1;
1542 if (sgn < 0)
1543 q = -q;
1544 q = ad_icon(q);
1545 }
1546 }
1547 }
1548
1549 if (q != icon0) {
1550 /* saxpy */
1551 for (k = 0; k < nvars + neqns; ++k)
1552 UD[i - 1][k] = ili_symbolic(
1553 ad2ili(IL_ISUB, UD[i - 1][k], symbolic_mul(q, UD[i][k])));
1554 }
1555 /* interchange */
1556 for (k = 0; k < neqns + nvars; ++k) {
1557 q = UD[i - 1][k];
1558 UD[i - 1][k] = UD[i][k];
1559 UD[i][k] = q;
1560 }
1561 }
1562 if (UD[k1][j + nvars] == icon0) {
1563 /* This column is linear combination of prev. cols */
1564 ++j;
1565 --n1;
1566 if (bnd > n1)
1567 bnd = n1;
1568 continue;
1569 }
1570 ++j;
1571 ++k1;
1572 }
1573 #if DEBUG
1574 if (DBGBIT(36, 64))
1575 prmat(j, i);
1576 #endif
1577
1578 /* eliminate linear dependent columns (0 on the diagonal) from D */
1579 bnd = neqns;
1580 j = 0;
1581 while (j < bnd) {
1582 if (UD[j][nvars + j] != icon0) {
1583 j++;
1584 continue;
1585 }
1586 /* 0 found in D[j][j] */
1587 bnd--; /* decrement # of columns of D */
1588 for (i = j; i < bnd; i++) {
1589 for (k = 0; k < nvars; k++)
1590 /* copy D[k][(i+1)..(bnd+1)] over D[k][i..bnd] */
1591 UD[k][nvars + i] = UD[k][nvars + i + 1];
1592 C[i] = C[i + 1];
1593 }
1594 }
1595 neqns = bnd;
1596
1597 /* solve tD=C and get distances */
1598 bnd = nvars;
1599 if (bnd > neqns)
1600 bnd = neqns;
1601 /* solve TD = C */
1602 for (j = 0; j < bnd; j++) {
1603 int djj;
1604 sc = icon0;
1605 /* sc = T[0..(j-1)] * D[0..(j-1)][j] */
1606 for (i = 0; i < j; i++) {
1607 ili = ad2ili(IL_IMUL, T[i], UD[i][nvars + j]);
1608 ili = ad2ili(IL_IADD, sc, ili);
1609 sc = ili_symbolic(ili);
1610 }
1611 /* t = C[j] - sc */
1612 ili = ad2ili(IL_ISUB, C[j], sc);
1613 t = ili_symbolic(ili);
1614 djj = UD[j][j + nvars];
1615 /* T[j] = t / D[j][j] */
1616 if (djj == icon1)
1617 T[j] = t;
1618 else if (djj == iconneg1)
1619 T[j] = ad1ili(IL_INEG, t);
1620 else if (t == icon0 && (XBIT(2, 0x200) || IS_CNST(djj)))
1621 /* 0 / x == 0 */
1622 T[j] = icon0;
1623 else if (t == djj && (XBIT(2, 0x200) || IS_CNST(djj)))
1624 /* x / x == 1 */
1625 T[j] = icon1;
1626 else if (IS_CNST(djj) && IS_CNST(t)) {
1627 assert(djj != icon0, "do_subscript: linear combo not removed", 0, 4);
1628 d1 = CNSTG(djj);
1629 d2 = CNSTG(t);
1630 if (d2 % d1 != 0) {
1631 DTRACE0("do_subscript: inconsistent eqns, no dep\n");
1632 goto no_dep;
1633 }
1634 T[j] = ad_icon(d2 / d1);
1635 } else
1636 goto give_up;
1637 }
1638 /* if system is overdetermined, check for consistency */
1639 for (j = nvars; j < neqns; j++) {
1640 sc = icon0;
1641 /* sc = T[0..(nvars-1)] * D[0..(nvars-1)][j] */
1642 for (i = 0; i < nvars; i++) {
1643 ili = ad2ili(IL_IMUL, T[i], UD[i][nvars + j]);
1644 ili = ad2ili(IL_IADD, sc, ili);
1645 sc = ili_symbolic(ili);
1646 }
1647 /* t = sc - C[j] */
1648 ili = ad2ili(IL_ISUB, sc, C[j]);
1649 t = ili_symbolic(ili);
1650 if (IS_CNST(t) && t != icon0) {
1651 DTRACE0("do_subscript: inconsistent overdetermined eqns, no dep\n");
1652 goto no_dep;
1653 }
1654 }
1655 if (neqns > nvars)
1656 /* system is consistent; ignore the last neqns - nvars columns of UD */
1657 neqns = nvars;
1658
1659 #if DEBUG
1660 if (DBGBIT(36, 64)) {
1661 prmat(0, 0);
1662 fprintf(gbl.dbgfil, "Solution:");
1663 for (j = 0; j < bnd; ++j) {
1664 fprintf(gbl.dbgfil, "\t%d: ", j);
1665 prilitree(T[j]);
1666 fprintf(gbl.dbgfil, "\n");
1667 }
1668 }
1669 #endif
1670
1671 /* check for constant dependence distances */
1672 /* actually check only in common loops */
1673 for (j = 0; j < ddinfo.n; ++j)
1674 Dist[j] = 0;
1675 /* actually, only check in the common loops
1676 * that we are counting, that is, up to nest 'd' */
1677 for (j = 0; j < d; ++j) {
1678 for (k = neqns; k < nvars; ++k) {
1679 if (UD[k][2 * j + 1] != UD[k][2 * j])
1680 goto skip;
1681 }
1682 /* distance is constant for this loop */
1683 sc = icon0;
1684 for (k = 0; k < neqns; ++k) {
1685 sc = ili_symbolic(ad2ili(
1686 IL_IADD, sc, ad2ili(IL_IMUL, T[k], ad2ili(IL_ISUB, UD[k][2 * j + 1],
1687 UD[k][2 * j]))));
1688 }
1689 Dist[j] = sc;
1690 skip:;
1691 }
1692 #if DEBUG
1693 if (DBGBIT(36, 16)) {
1694 fprintf(gbl.dbgfil, "Distance:\n");
1695 for (j = 0; j < ddinfo.n; ++j) {
1696 fprintf(gbl.dbgfil, "%d: ", j);
1697 if (Dist[j] == 0)
1698 fprintf(gbl.dbgfil, "<undef>\n");
1699 else {
1700 prilitree(Dist[j]);
1701 fprintf(gbl.dbgfil, "\n");
1702 }
1703 }
1704 }
1705 #endif
1706 nfree = nvars - neqns;
1707 assert(nfree >= 0, "do_subscript: negative nfree", 0, 4);
1708
1709 /* Extended GCD test is done. Now: */
1710 /* 1. Express index variables in terms of free variables by
1711 * multiplying tU. This involves creating a matrix with one
1712 * column for each index variable and one row for each free
1713 * variable, plus one extra row to hold the constant coefficient
1714 * derived from the already solved variables in T
1715 */
1716 for (i = 0; i < nvars; ++i) {
1717 /* do const part */
1718 sc = icon0;
1719 for (j = 0; j < neqns; ++j)
1720 sc = ili_symbolic(ad2ili(IL_IADD, sc, ad2ili(IL_IMUL, T[j], UD[j][i])));
1721 TU[0][i] = sc;
1722 /* do var part */
1723 for (j = neqns + 1; j <= nvars; ++j)
1724 TU[j - neqns][i] = UD[j - 1][i];
1725 }
1726 #if DEBUG
1727 if (DBGBIT(36, 64)) {
1728 fprintf(gbl.dbgfil, "Solution of index vars:\n");
1729 for (i = 0; i < nvars; ++i) {
1730 fprintf(gbl.dbgfil, "I%d= ", i);
1731 for (j = 0; j <= nfree; ++j) {
1732 fprintf(gbl.dbgfil, "\t");
1733 prilitree(TU[j][i]);
1734 if (j) {
1735 fprintf(gbl.dbgfil, "*h%d", j);
1736 }
1737 }
1738 fprintf(gbl.dbgfil, "\n");
1739 }
1740 }
1741 #endif
1742 /* 2. Derive a list of upper and lower bounds from the loop limits.
1743 * Express the loop bounds in terms of the index variables, then
1744 * impose the bounds on the appropriate column, then simplify.
1745 */
1746 /* A bound is a linear combination of index variables */
1747 for (i = 0; i <= nfree; ++i) {
1748 Bound[i][0] = Bound[i][1] = 0;
1749 }
1750 /* common loops */
1751 #define BOUND_ADD(t, l, i) \
1752 if (bound_add(t, l, i)) \
1753 goto no_dep; \
1754 else
1755 for (i = 0; i < d; i++) {
1756 /* get the loop */
1757 lp = ddinfo.lps[ddinfo.n1 + ddinfo.n2 + d - i - 1];
1758 /* this loop derives bounds for variable 2*i and 2*i+1 */
1759 /* same as column 2*i and column 2*i+1 */
1760 /* see if we have to ignore some bounds */
1761 for (j = 0; j < i; ++j)
1762 if (SB_STRIDE(VL_LBND(lp))[j] == 0) {
1763 BTRACE1("Ignoring lower bound for loop %d\n", lp);
1764 goto skipl1;
1765 }
1766
1767 /*----- get lower bound for I */
1768 for (j = 1; j <= nvars; ++j)
1769 tmp[j] = icon0;
1770 tmp[0] = SB_BASES(VL_LBND(lp))[i]; /* const part */
1771 for (j = 0; j < i; ++j) {
1772 /* express the bound in terms of the index variables */
1773 tmp[2 * j + 1] = SB_STRIDE(VL_LBND(lp))[i - j - 1];
1774 }
1775 BOUND_ADD(tmp, FALSE, 2 * i);
1776
1777 /*----- get lower bound for J */
1778 for (j = 1; j <= nvars; ++j)
1779 tmp[j] = icon0;
1780 tmp[0] = SB_BASES(VL_LBND(lp))[i]; /* const part */
1781 for (j = 0; j < i; ++j) {
1782 /* express the bound in terms of the index variables */
1783 tmp[2 * j + 2] = SB_STRIDE(VL_LBND(lp))[i - j - 1];
1784 }
1785 BOUND_ADD(tmp, FALSE, 2 * i + 1);
1786
1787 skipl1:
1788 for (j = 0; j < i; ++j)
1789 if (SB_STRIDE(VL_UBND(lp))[j] == 0) {
1790 BTRACE1("Ignoring upper bound for loop %d\n", lp);
1791 goto skipu1;
1792 }
1793 /*----- get upper bound for I */
1794 for (j = 1; j <= nvars; ++j)
1795 tmp[j] = icon0;
1796 tmp[0] = SB_BASES(VL_UBND(lp))[i]; /* const part */
1797 for (j = 0; j < i; ++j) {
1798 /* express the bound in terms of the index variables */
1799 tmp[2 * j + 1] = SB_STRIDE(VL_UBND(lp))[i - j - 1];
1800 }
1801 BOUND_ADD(tmp, TRUE, 2 * i);
1802
1803 /*----- get upper bound for J */
1804 for (j = 1; j <= nvars; ++j)
1805 tmp[j] = icon0;
1806 tmp[0] = SB_BASES(VL_UBND(lp))[i]; /* const part */
1807 for (j = 0; j < i; ++j) {
1808 /* express the bound in terms of the index variables */
1809 tmp[2 * j + 2] = SB_STRIDE(VL_UBND(lp))[i - j - 1];
1810 }
1811 BOUND_ADD(tmp, TRUE, 2 * i + 1);
1812 skipu1:;
1813 }
1814
1815 for (i = 0; i < ddinfo.n1; ++i) {
1816 /* get the loop */
1817 lp = ddinfo.lps[ddinfo.n1 - i - 1];
1818 /* this loop derives bounds for variable 2*d+i */
1819 for (j = 0; j < i + d; ++j)
1820 if (SB_STRIDE(VL_LBND(lp))[j] == 0) {
1821 BTRACE1("Ignoring lower bound for loop %d\n", lp);
1822 goto skipl2;
1823 }
1824
1825 /*----- get lower bound for I */
1826 for (j = 1; j <= nvars; ++j)
1827 tmp[j] = icon0;
1828 tmp[0] = SB_BASES(VL_LBND(lp))[d + i];
1829 for (j = 0; j < d; ++j)
1830 tmp[2 * j + 1] = SB_STRIDE(VL_LBND(lp))[d + i - j - 1];
1831 for (j = d; j < d + i; ++j)
1832 tmp[d + j + 1] = SB_STRIDE(VL_LBND(lp))[d + i - j - 1];
1833 BOUND_ADD(tmp, FALSE, 2 * d + i);
1834
1835 skipl2:
1836 for (j = 0; j < i + d; ++j)
1837 if (SB_STRIDE(VL_UBND(lp))[j] == 0) {
1838 BTRACE1("Ignoring upper bound for loop %d\n", lp);
1839 goto skipu2;
1840 }
1841 /*----- get upper bound for I */
1842 for (j = 1; j <= nvars; ++j)
1843 tmp[j] = icon0;
1844 tmp[0] = SB_BASES(VL_UBND(lp))[d + i];
1845 for (j = 0; j < d; ++j)
1846 tmp[2 * j + 1] = SB_STRIDE(VL_UBND(lp))[d + i - j - 1];
1847 for (j = d; j < d + i; ++j)
1848 tmp[d + j + 1] = SB_STRIDE(VL_UBND(lp))[d + i - j - 1];
1849 BOUND_ADD(tmp, TRUE, 2 * d + i);
1850
1851 skipu2:;
1852 }
1853 for (i = 0; i < ddinfo.n2; ++i) {
1854 /* get the loop */
1855 lp = ddinfo.lps[ddinfo.n1 + ddinfo.n2 - i - 1];
1856 /* this loop derives bounds for variable 2*d+ddinfo.n1+i */
1857
1858 for (j = 0; j < i + d; ++j)
1859 if (SB_STRIDE(VL_LBND(lp))[j] == 0) {
1860 BTRACE1("Ignoring lower bound for loop %d\n", lp);
1861 goto skipl3;
1862 }
1863 /*----- get lower bound for J */
1864 for (j = 1; j <= nvars; ++j)
1865 tmp[j] = icon0;
1866 tmp[0] = SB_BASES(VL_LBND(lp))[d + i];
1867 for (j = 0; j < d; ++j)
1868 tmp[2 * j + 2] = SB_STRIDE(VL_LBND(lp))[d + i - j - 1];
1869 for (j = d; j < d + i; ++j)
1870 tmp[d + ddinfo.n1 + j + 1] = SB_STRIDE(VL_LBND(lp))[d + i - j - 1];
1871 BOUND_ADD(tmp, FALSE, 2 * d + ddinfo.n1 + i);
1872
1873 skipl3:
1874 for (j = 0; j < i + d; ++j)
1875 if (SB_STRIDE(VL_UBND(lp))[j] == 0) {
1876 BTRACE1("Ignoring upper bound for loop %d\n", lp);
1877 goto skipu3;
1878 }
1879 /*----- get upper bound for J */
1880 for (j = 1; j <= nvars; ++j)
1881 tmp[j] = icon0;
1882 tmp[0] = SB_BASES(VL_UBND(lp))[d + i];
1883 for (j = 0; j < d; ++j)
1884 tmp[2 * j + 1] = SB_STRIDE(VL_UBND(lp))[d + i - j - 1];
1885 for (j = d; j < d + i; ++j)
1886 tmp[d + ddinfo.n1 + j + 1] = SB_STRIDE(VL_UBND(lp))[d + i - j - 1];
1887 BOUND_ADD(tmp, TRUE, 2 * d + ddinfo.n1 + i);
1888
1889 skipu3:;
1890 }
1891
1892 /* 4. Direction vector hierarchy if dependent */
1893 vec = dirv_fulldep(ddinfo.n) & ~DIRV_ALLEQ;
1894 vec1 = 0;
1895 for (i = d; i < ddinfo.n; ++i)
1896 DIRV_ENTRYP(vec1, i, DIRV_STAR);
1897 hierarchy(vec, d, vec1);
1898 return;
1899
1900 give_up:
1901 if (VP_DEPCHK(ddinfo.pragmas))
1902 add_dep(dirv_fulldep(ddinfo.n));
1903 no_dep:
1904 return;
1905 }
1906
1907 static void
hierarchy(DIRVEC dir,int lev,DIRVEC veco)1908 hierarchy(DIRVEC dir, int lev, DIRVEC veco)
1909 {
1910 int i;
1911 int dir2;
1912 LOGICAL top;
1913
1914 DTRACE2("hierarchy: dir 0x%lx -- %s\n", dir, dirv_print(dir));
1915
1916 for (i = 0; i < lev; ++i)
1917 if (DIRV_ENTRYG(dir, i) != DIRV_STAR)
1918 break;
1919 if (i == lev)
1920 /* all * */
1921 top = TRUE;
1922 else
1923 top = FALSE;
1924 for (i = 0; i < lev; ++i)
1925 if (DIRV_ENTRYG(dir, i) == DIRV_STAR)
1926 goto test;
1927 goto bottom;
1928
1929 test:
1930 /*
1931 * test dependence at this level; if no dependence, don't need to go any
1932 * further: if (no dependence with direction vector dir) return 0;
1933 */
1934 if (top) {
1935 if (check_bounds() == TRUE)
1936 return;
1937 BCOPY(SaveBound, Bound, BOUND *, BOUND_LEN);
1938 } else {
1939 if (check_new_bound(dir, lev) == TRUE)
1940 return;
1941 }
1942
1943 /* '*' entry, refine it further */
1944 dir2 = dir;
1945 DIRV_ENTRYC(dir2, i);
1946 DIRV_ENTRYP(dir2, i, DIRV_LT);
1947 hierarchy(dir2, lev, veco);
1948
1949 dir2 = dir;
1950 DIRV_ENTRYC(dir2, i);
1951 DIRV_ENTRYP(dir2, i, DIRV_EQ);
1952 hierarchy(dir2, lev, veco);
1953
1954 dir2 = dir;
1955 DIRV_ENTRYC(dir2, i);
1956 DIRV_ENTRYP(dir2, i, DIRV_GT);
1957 hierarchy(dir2, lev, veco);
1958
1959 return;
1960
1961 bottom:
1962 /*
1963 * we're at the bottom level. Test dependence with this direction vector
1964 * & return 0 or this direction vector. In addition, need to set the
1965 * all-equals bit if this direction vector is all equals
1966 */
1967 ddinfo.unknown = 0;
1968 if (check_new_bound(dir, lev) == TRUE)
1969 return;
1970 if (ddinfo.unknown && !VP_DEPCHK(ddinfo.pragmas))
1971 return;
1972 for (i = 0; i < lev; ++i)
1973 if (DIRV_ENTRYG(dir, i) != DIRV_EQ) {
1974 add_dep(veco | dir);
1975 return;
1976 }
1977 add_dep(veco | dir | DIRV_ALLEQ);
1978 }
1979
1980 /** \brief Invert a direction vector */
1981 DIRVEC
dirv_inverse(DIRVEC vec)1982 dirv_inverse(DIRVEC vec)
1983 {
1984 static DIRVEC revtab[8] = {
1985 /* >=< */ /* >=< */
1986 /* 000 */ 0, /* 000 */
1987 /* 001 */ 4, /* 100 */
1988 /* 010 */ 2, /* 010 */
1989 /* 011 */ 6, /* 110 */
1990 /* 100 */ 1, /* 001 */
1991 /* 101 */ 5, /* 101 */
1992 /* 110 */ 3, /* 011 */
1993 /* 111 */ 7, /* 111 */
1994 };
1995
1996 int i;
1997 int t;
1998 DIRVEC vec1;
1999
2000 /* count number of entrys */
2001 for (i = 0; DIRV_ENTRYG(vec, i) != 0 && i < MAX_LOOPS; ++i)
2002 ;
2003 /* extract information portion */
2004 vec1 = DIRV_INFOPART(vec);
2005 for (--i; i >= 0; --i) {
2006 t = DIRV_ENTRYG(vec, i);
2007 DIRV_ENTRYP(vec1, i, revtab[t]);
2008 }
2009 return vec1;
2010 }
2011
2012 /** \brief Generate full dependence at a given level */
2013 DIRVEC
dirv_fulldep(int level)2014 dirv_fulldep(int level)
2015 {
2016 int i;
2017 DIRVEC vec;
2018
2019 vec = 0;
2020 for (i = 0; i < level; ++i) {
2021 vec <<= DIRV_ENTSIZ;
2022 vec |= DIRV_STAR;
2023 }
2024 vec |= DIRV_ALLEQ;
2025 return vec;
2026 }
2027
2028 /** \brief Generate legal execution order direction vectors.
2029 \param level loop nesting level
2030 \param flag says whether all '=' allowed
2031 */
2032 DIRVEC
dirv_exo(int level,int flag)2033 dirv_exo(int level, int flag)
2034 {
2035 int i;
2036 DIRVEC vec;
2037
2038 /* outermost loop: legal vectors are '<='; for rest they are '*' */
2039 /* alleq is only possible if flag is set */
2040 vec = 0;
2041 DIRV_ENTRYP(vec, level - 1, DIRV_EQ | DIRV_LT);
2042 i = level - 1;
2043 while (i > 0) {
2044 DIRV_ENTRYP(vec, i - 1, DIRV_STAR);
2045 --i;
2046 }
2047 if (flag)
2048 vec |= DIRV_ALLEQ;
2049 return vec;
2050 }
2051
2052 /*
2053 * Return the direction vector that corresponds to the input
2054 * direction vector dir under the mapping m. If m is NULL, the
2055 * trivial mapping is used. A mapping is a permutation of the integers
2056 * 0..nest-1 representing the order of the loops as permuted from
2057 * their original order, from inner to outer.
2058 * The trivial mapping is thus nest-1, ..., 0
2059 */
2060 static DIRVEC
dirv_permute(DIRVEC dir,int * m,int nest)2061 dirv_permute(DIRVEC dir, int *m, int nest)
2062 {
2063 int i, j, k;
2064 DIRVEC e;
2065 int seen_lt;
2066 DIRVEC rdir;
2067 int base;
2068
2069 for (i = 0; DIRV_ENTRYG(dir, i) != 0 && i < MAX_LOOPS; ++i)
2070 ;
2071 base = nest - i;
2072 #if DEBUG
2073 assert(m == 0 || base >= 0, "dirv_permute: nest is wrong", nest, 4);
2074 #endif
2075 seen_lt = 0;
2076 rdir = 0;
2077 for (j = 0; j < i; ++j) {
2078 k = m ? m[j + base] : i - j - 1;
2079 e = DIRV_ENTRYG(dir, k);
2080 rdir <<= DIRV_ENTSIZ;
2081 if (!seen_lt)
2082 rdir |= (e & ~DIRV_GT);
2083 else
2084 rdir |= e;
2085 if (e & DIRV_LT)
2086 seen_lt = 1;
2087 }
2088 if (rdir == 0)
2089 return 0;
2090 /* check alleq case */
2091 for (j = 0; j < i; ++j) {
2092 if (DIRV_ENTRYG(rdir, j) != DIRV_EQ)
2093 goto skip;
2094 }
2095 /* all are equal */
2096 if (!(dir & DIRV_ALLEQ))
2097 return 0;
2098
2099 skip:
2100 rdir |= DIRV_INFOPART(dir);
2101 return rdir;
2102 }
2103
2104 static void
dovec(DIRVEC dir,DIRVEC dir1,int pos,int ig,void (* f)(DIRVEC),int alleq,int seenlt)2105 dovec(DIRVEC dir, DIRVEC dir1, int pos, int ig, void (*f)(DIRVEC), int alleq,
2106 int seenlt)
2107 {
2108 DIRVEC dir2;
2109 DIRVEC e;
2110
2111 if (pos < 0) {
2112 if (dir1 != 0 && !alleq)
2113 (*f)(dir1);
2114 else if (dir1 != 0 && alleq && (dir & DIRV_ALLEQ))
2115 (*f)(dir1 | DIRV_ALLEQ);
2116 return;
2117 }
2118 e = DIRV_ENTRYG(dir, pos);
2119 if (e & DIRV_LT) {
2120 dir2 = dir1 | (DIRV_LT << DIRV_ENTSIZ * pos);
2121 if (pos <= ig)
2122 dovec(dir, dir2, pos - 1, ig, f, 0, 1);
2123 }
2124 if (e & DIRV_EQ) {
2125 dir2 = dir1 | (DIRV_EQ << DIRV_ENTSIZ * pos);
2126 dovec(dir, dir2, pos - 1, ig, f, alleq, seenlt);
2127 }
2128 if (e & DIRV_RD) {
2129 dir2 = dir1 | (DIRV_RD << DIRV_ENTSIZ * pos);
2130 dovec(dir, dir2, pos - 1, ig, f, 0, seenlt);
2131 }
2132 if (e & DIRV_GT) {
2133 dir2 = dir1 | (DIRV_GT << DIRV_ENTSIZ * pos);
2134 if (seenlt)
2135 dovec(dir, dir2, pos - 1, ig, f, 0, 1);
2136 }
2137 }
2138
2139 void
dirv_gen(DIRVEC dir,int * map,int nest,int ig,void (* f)(DIRVEC))2140 dirv_gen(DIRVEC dir, int *map, int nest, int ig, void (*f)(DIRVEC))
2141 {
2142 /*
2143 * if a position to the left of ig contains '<', then all dirvecs gen'd
2144 * from that can be ignored
2145 */
2146 int i;
2147 DIRVEC dir1;
2148
2149 dir1 = dirv_permute(dir, map, nest);
2150 for (i = 0; DIRV_ENTRYG(dir, i) != 0 && i < MAX_LOOPS; ++i)
2151 ;
2152 dovec(dir1, 0, i - 1, ig, f, 1, 0);
2153 }
2154
2155 static LOGICAL is_linear(int);
2156
2157 /* Initialize subscript struct sub with information from expression
2158 * ast under multiplier astmpyr. The loop index of the ith outer loop
2159 * is the FORALL-index in the ith outer triplet within astliTriples.
2160 * astmpyr is invariant with respect to the FORALL-indices.
2161 * Return TRUE if ast is a linear expression. */
2162 static LOGICAL
mkSub(int astliTriples,int sub,int astmpyr,int ast)2163 mkSub(int astliTriples, int sub, int astmpyr, int ast)
2164 {
2165 int i;
2166 int astli;
2167 int aststride, astbase, astid, astcnst;
2168 LOGICAL bLinear;
2169
2170 switch (A_TYPEG(ast)) {
2171 case A_CONV:
2172 /*
2173 * when the convert case did not exist, mkSub() returned false
2174 * indicating the subscript expression is non-linear. The convert
2175 * could appear when mixing integer types or the use of -Mlarge_arrays
2176 * and default integer*4. For specaccel palm -Mlarge_arrays, a false
2177 * dependency was returned for some array assignment in an openacc
2178 * kernel; the assignment was not parallelized, but the acc CG
2179 * generated incorrect scalar code.
2180 * We could try pushing the convert into its operand; but all I'm
2181 * going to do is check if its operand is linear, and if so, I will
2182 * treat it as an ID (a single term)
2183 */
2184 if (!is_linear(A_LOPG(ast)))
2185 return FALSE;
2186 /***** fall thru -- treat as ID *****/
2187 case A_ID:
2188 i = 0;
2189 for (astli = astliTriples; astli; astli = ASTLI_NEXT(astli), i++)
2190 if (A_SPTRG(ast) == ASTLI_SPTR(astli)) {
2191 aststride = A_STRIDEG(ASTLI_TRIPLE(astli));
2192 if (!aststride)
2193 aststride = astb.bnd.one;
2194 aststride = mk_binop(OP_MUL, aststride, astmpyr, astb.bnd.dtype);
2195 aststride =
2196 mk_binop(OP_ADD, aststride, SB_STRIDE(sub)[i], astb.bnd.dtype);
2197 SB_STRIDE(sub)[i] = aststride;
2198 astbase = A_LBDG(ASTLI_TRIPLE(astli));
2199 if (A_DTYPEG(astbase) != astb.bnd.dtype) {
2200 astbase = mk_convert(astbase, astb.bnd.dtype);
2201 }
2202 astbase = mk_binop(OP_MUL, astbase, astmpyr, astb.bnd.dtype);
2203 astbase = mk_binop(OP_ADD, astbase, SB_BASE(sub), astb.bnd.dtype);
2204 SB_BASE(sub) = astbase;
2205 return TRUE;
2206 }
2207 astbase = mk_binop(OP_MUL, astmpyr, ast, astb.bnd.dtype);
2208 SB_BASE(sub) = mk_binop(OP_ADD, astbase, SB_BASE(sub), astb.bnd.dtype);
2209 return TRUE;
2210 case A_CNST:
2211 astbase = mk_binop(OP_MUL, astmpyr, ast, astb.bnd.dtype);
2212 SB_BASE(sub) = mk_binop(OP_ADD, astbase, SB_BASE(sub), astb.bnd.dtype);
2213 return TRUE;
2214 case A_UNOP:
2215 if (A_OPTYPEG(ast) != OP_SUB)
2216 return FALSE;
2217 astmpyr = mk_unop(OP_SUB, astmpyr, astb.bnd.dtype);
2218 bLinear = mkSub(astliTriples, sub, astmpyr, A_LOPG(ast));
2219 return bLinear;
2220 case A_BINOP:
2221 switch (A_OPTYPEG(ast)) {
2222 case OP_ADD:
2223 bLinear = (mkSub(astliTriples, sub, astmpyr, A_LOPG(ast)) &&
2224 mkSub(astliTriples, sub, astmpyr, A_ROPG(ast)));
2225 return bLinear;
2226 case OP_SUB:
2227 bLinear = mkSub(astliTriples, sub, astmpyr, A_LOPG(ast));
2228 if (!bLinear)
2229 return FALSE;
2230 astmpyr = mk_unop(OP_SUB, astmpyr, astb.bnd.dtype);
2231 bLinear = mkSub(astliTriples, sub, astmpyr, A_ROPG(ast));
2232 return bLinear;
2233 case OP_MUL:
2234 astmpyr = mk_binop(OP_MUL, A_LOPG(ast), astmpyr, astb.bnd.dtype);
2235 bLinear = mkSub(astliTriples, sub, astmpyr, A_ROPG(ast));
2236 if (!bLinear)
2237 return FALSE;
2238 astmpyr = mk_binop(OP_MUL, A_ROPG(ast), astmpyr, astb.bnd.dtype);
2239 bLinear = mkSub(astliTriples, sub, astmpyr, A_LOPG(ast));
2240 return bLinear;
2241 default:
2242 return FALSE;
2243 }
2244 default:
2245 return FALSE;
2246 }
2247 }
2248
2249 static LOGICAL
is_linear(int ast)2250 is_linear(int ast)
2251 {
2252 if (!IS_IRES(ast))
2253 return FALSE;
2254 switch (A_TYPEG(ast)) {
2255 case A_CONV:
2256 return is_linear(A_LOPG(ast));
2257 case A_ID:
2258 case A_CNST:
2259 return TRUE;
2260 case A_UNOP:
2261 if (A_OPTYPEG(ast) != OP_SUB)
2262 return FALSE;
2263 return is_linear(A_LOPG(ast));
2264 case A_BINOP:
2265 switch (A_OPTYPEG(ast)) {
2266 case OP_ADD:
2267 case OP_SUB:
2268 case OP_MUL:
2269 if (!is_linear(A_LOPG(ast)))
2270 return FALSE;
2271 return is_linear(A_ROPG(ast));
2272 default:
2273 return FALSE;
2274 }
2275 default:
2276 return FALSE;
2277 }
2278 }
2279
2280 /* Local storage for fwd_func() callback. */
2281 static struct {
2282 int nloops; /* number of loops to check */
2283 LOGICAL bFwd; /* TRUE if forward dependence on entry idv */
2284 } fwd;
2285
2286 static void
fwd_func(DIRVEC dv)2287 fwd_func(DIRVEC dv)
2288 {
2289 int i;
2290
2291 for (i = 0; i < fwd.nloops; i++)
2292 fwd.bFwd |= ((DIRV_ENTRYG(dv, i) & DIRV_LT) != 0);
2293 }
2294
2295 /*
2296 * fill SB_ data structures for any subscripts found in the reference.
2297 * return the number of subscripts, or -1 if a nonlinear subscript is found.
2298 */
2299 static int
fill_subscripts(int astRef,int mr,int subStart,int ntriples,int astliTriples)2300 fill_subscripts(int astRef, int mr, int subStart, int ntriples,
2301 int astliTriples)
2302 {
2303 int n, asd, ndim, sub, i;
2304 switch (A_TYPEG(astRef)) {
2305 case A_ID:
2306 n = 0;
2307 break;
2308 case A_MEM:
2309 n = fill_subscripts(A_PARENTG(astRef), mr, subStart, ntriples,
2310 astliTriples);
2311 break;
2312 case A_SUBSTR:
2313 n = fill_subscripts(A_LOPG(astRef), mr, subStart, ntriples, astliTriples);
2314 break;
2315 case A_SUBSCR:
2316 n = fill_subscripts(A_LOPG(astRef), mr, subStart, ntriples, astliTriples);
2317 if (n < 0)
2318 return n;
2319 asd = A_ASDG(astRef);
2320 ndim = ASD_NDIM(asd);
2321 for (sub = 0; sub < ndim; ++sub) {
2322 LOGICAL bLinear;
2323 int astSub, i;
2324 ++(MR_SUBCNT(mr));
2325 ++hlv.subavail;
2326 NEED(hlv.subavail, hlv.subbase, SUBS, hlv.subsize, hlv.subsize + 100);
2327 BZERO(&hlv.subbase[subStart + n], SUBS, 1);
2328 SB_BASE(subStart + n) = astb.bnd.zero;
2329 for (i = 0; i < ntriples; i++)
2330 SB_STRIDE(subStart + n)[i] = astb.bnd.zero;
2331 astSub = ASD_SUBS(asd, sub);
2332 astSub = ili_symbolic(astSub);
2333 bLinear = mkSub(astliTriples, subStart + n, astb.bnd.one, astSub);
2334 if (!bLinear)
2335 return -1;
2336 SB_BASES(subStart + n)[ntriples] = SB_BASE(subStart + n);
2337 ++n;
2338 }
2339 break;
2340 default:
2341 interr("fill_subscripts: unexpected AST type", astRef, 3);
2342 n = 0;
2343 }
2344 return n;
2345 } /* fill_subscripts */
2346
2347 /** \brief Return TRUE if there is a forward dependence from \p astArrSrc
2348 (the source) to \p astArrSink (the sink) within an iteration space
2349 described by a triplet list beginning at \p astliTriples.
2350
2351 \p bSinkAfterSrc should be:
2352 + 1 if the sink is lexically after the source.
2353 + 0 if the sink is lexically before the source.
2354 + -1 if this is a FORALL test
2355 */
2356 LOGICAL
dd_array_conflict(int astliTriples,int astArrSrc,int astArrSink,int bSinkAfterSrc)2357 dd_array_conflict(int astliTriples, int astArrSrc, int astArrSink,
2358 int bSinkAfterSrc)
2359 {
2360 int i, ntriples;
2361 int astli;
2362 int lp, lpOuter;
2363 int sub, nsubsSrc, nsubsSink, subStart, nsubs;
2364 int astTriple, astSub;
2365 int asdSrc, asdSink, aSrc, aSink, nSrc, nSink;
2366 int mrSink, mrSrc;
2367
2368 assert(astliTriples, "dd_array_conflict: empty triplet list", astArrSrc, 4);
2369
2370 ntriples = 0;
2371 for (astli = astliTriples; astli; astli = ASTLI_NEXT(astli))
2372 ntriples++;
2373 if (ntriples >= MAX_LOOPS)
2374 /* Assume dependence if too many triples. */
2375 return TRUE;
2376
2377 nsubsSrc = nsubsSink = 0;
2378 for (aSrc = astArrSrc; aSrc && A_TYPEG(aSrc) != A_ID;) {
2379 switch (A_TYPEG(aSrc)) {
2380 case A_MEM:
2381 aSrc = A_PARENTG(aSrc);
2382 break;
2383 case A_SUBSTR:
2384 aSrc = A_LOPG(aSrc);
2385 break;
2386 case A_SUBSCR:
2387 asdSrc = A_ASDG(aSrc);
2388 nsubsSrc += ASD_NDIM(asdSrc);
2389 aSrc = A_LOPG(aSrc);
2390 break;
2391 default:
2392 interr("dd_array_conflict: unexpected AST in source", aSrc, 3);
2393 aSrc = 0;
2394 break;
2395 }
2396 }
2397 for (aSink = astArrSink; A_TYPEG(aSink) != A_ID;) {
2398 switch (A_TYPEG(aSink)) {
2399 case A_MEM:
2400 aSink = A_PARENTG(aSink);
2401 break;
2402 case A_SUBSTR:
2403 aSink = A_LOPG(aSink);
2404 break;
2405 case A_SUBSCR:
2406 asdSink = A_ASDG(aSink);
2407 nsubsSink += ASD_NDIM(asdSink);
2408 aSink = A_LOPG(aSink);
2409 break;
2410 default:
2411 interr("dd_array_conflict: unexpected AST in sink", aSink, 3);
2412 aSrc = 0;
2413 break;
2414 }
2415 }
2416
2417 /* Initialize vectorizer's memory. */
2418 hlv.mrsize = 100;
2419 NEW(hlv.mrbase, MEMREF, hlv.mrsize);
2420 hlv.mravail = 1;
2421 hlv.lpsize = 100;
2422 NEW(hlv.lpbase, VLOOP, hlv.lpsize);
2423 hlv.lpavail = 1;
2424 hlv.subsize = 100;
2425 NEW(hlv.subbase, SUBS, hlv.subsize);
2426 hlv.subavail = 1;
2427
2428 /* Allocate ntriples vectorizer loops. */
2429 lpOuter = hlv.lpavail;
2430 hlv.lpavail += ntriples;
2431 NEED(hlv.lpavail, hlv.lpbase, VLOOP, hlv.lpsize, hlv.lpsize + 100);
2432 BZERO(&hlv.lpbase[lpOuter], VLOOP, ntriples);
2433
2434 /* Initialize the lower and upper bound subscripts of all loops. */
2435 astli = astliTriples;
2436 for (lp = hlv.lpavail - 1; lp >= lpOuter; --lp, astli = ASTLI_NEXT(astli)) {
2437 int aststride;
2438 astTriple = ASTLI_TRIPLE(astli);
2439 assert(A_TYPEG(astTriple) == A_TRIPLE,
2440 "dd_array_conflict: wrong triplet type", astliTriples, 4);
2441 sub = hlv.subavail++;
2442 NEED(hlv.subavail, hlv.subbase, SUBS, hlv.subsize, hlv.subsize + 100);
2443 BZERO(&hlv.subbase[sub], SUBS, 1);
2444 SB_BASE(sub) = astb.bnd.zero;
2445 for (i = 0; i < hlv.lpavail - 1 - lp; i++) {
2446 SB_STRIDE(sub)[i] = astb.bnd.zero;
2447 SB_BASES(sub)[i + 1] = astb.bnd.zero;
2448 }
2449 VL_LBND(lp) = sub;
2450 sub = hlv.subavail++;
2451 NEED(hlv.subavail, hlv.subbase, SUBS, hlv.subsize, hlv.subsize + 100);
2452 BZERO(&hlv.subbase[sub], SUBS, 1);
2453 SB_BASE(sub) =
2454 mk_binop(OP_SUB, A_UPBDG(astTriple), A_LBDG(astTriple), astb.bnd.dtype);
2455 aststride = A_STRIDEG(astTriple);
2456 if (aststride && IS_CNST(aststride)) {
2457 ISZ_T val;
2458 val = CNSTG(aststride);
2459 if (val < 0) {
2460 SB_BASE(sub) = mk_binop(OP_SUB, A_LBDG(astTriple), A_UPBDG(astTriple),
2461 astb.bnd.dtype);
2462 }
2463 }
2464 for (i = 0; i < hlv.lpavail - 1 - lp; i++) {
2465 SB_STRIDE(sub)[i] = astb.bnd.zero;
2466 SB_BASES(sub)[i + 1] = SB_BASE(sub);
2467 }
2468 VL_UBND(lp) = sub;
2469 }
2470
2471 /* Create memory reference structures for astArrSrc & astArrSink. */
2472 mrSink = hlv.mravail++;
2473 NEED(hlv.mravail, hlv.mrbase, MEMREF, hlv.mrsize, hlv.mrsize + 100);
2474 BZERO(&hlv.mrbase[mrSink], MEMREF, 1);
2475 MR_ILI(mrSink) = astArrSink;
2476 MR_TYPE(mrSink) = 'l';
2477
2478 mrSrc = hlv.mravail++;
2479 NEED(hlv.mravail, hlv.mrbase, MEMREF, hlv.mrsize, hlv.mrsize + 100);
2480 BZERO(&hlv.mrbase[mrSrc], MEMREF, 1);
2481 MR_ILI(mrSrc) = astArrSrc;
2482 MR_TYPE(mrSrc) = 's';
2483
2484 /* Create subscript structures for mrSink. */
2485 subStart = hlv.subavail;
2486 MR_SUBST(mrSink) = subStart;
2487 MR_SUBCNT(mrSink) = 0;
2488 nSrc = fill_subscripts(astArrSink, mrSink, subStart, ntriples, astliTriples);
2489 if (nSrc < 0) {
2490 fwd.bFwd = TRUE;
2491 goto nonlinear;
2492 }
2493
2494 /* Create subscript structures for mrSrc. */
2495 subStart = hlv.subavail;
2496 MR_SUBST(mrSrc) = subStart;
2497 MR_SUBCNT(mrSrc) = 0;
2498 nSink = fill_subscripts(astArrSrc, mrSrc, subStart, ntriples, astliTriples);
2499 if (nSink < 0) {
2500 fwd.bFwd = TRUE;
2501 goto nonlinear;
2502 }
2503
2504 /* Fill in ddinfo for dependence analysis. */
2505 ddinfo.mr1 = mrSrc;
2506 ddinfo.mr2 = mrSink;
2507 ddinfo.subs1 = MR_SUBST(mrSrc) + MR_SUBCNT(mrSrc);
2508 ddinfo.subs2 = MR_SUBST(mrSink) + MR_SUBCNT(mrSink);
2509
2510 ddinfo.n = ntriples; /* the array references are in same loop nest. */
2511 ddinfo.n1 = ddinfo.n2 = 0;
2512
2513 for (i = 0; i < ntriples; i++)
2514 ddinfo.lps[i] = (lpOuter + ntriples - 1) - i;
2515
2516 ddinfo.common = ddinfo.lps[0];
2517 ddinfo.outer_loop = lpOuter;
2518
2519 VP_DEPCHK(ddinfo.pragmas) = TRUE;
2520
2521 ddinfo.dvlist = NULL;
2522
2523 /* Do dependence analysis. */
2524 nsubs = nsubsSrc < nsubsSink ? nsubsSrc : nsubsSink;
2525 do_subscript(nsubs);
2526
2527 if (bSinkAfterSrc < 0) {
2528 DV *pdv;
2529 /* ANY loop-carried dependence is a forward dependence */
2530 fwd.bFwd = FALSE;
2531 for (pdv = ddinfo.dvlist; pdv; pdv = pdv->next) {
2532 DIRVEC dv;
2533 int i;
2534 dv = pdv->vec;
2535 for (i = 0; i < MAX_LOOPS; ++i) {
2536 int t;
2537 t = DIRV_ENTRYG(dv, i);
2538 if (t == 0)
2539 break; /* done */
2540 if (t & (DIRV_GT | DIRV_LT)) {
2541 /* found a loop-carried dependence */
2542 fwd.bFwd = TRUE;
2543 break;
2544 }
2545 }
2546 }
2547 } else {
2548 /* Generate direction vectors. */
2549 DIRVEC dvexo;
2550 DDEDGE *dd;
2551 DV *pdv;
2552 dvexo = dirv_exo(ntriples, bSinkAfterSrc);
2553 for (pdv = ddinfo.dvlist; pdv; pdv = pdv->next) {
2554 DIRVEC dv;
2555 dv = pdv->vec & dvexo;
2556 if (!dirv_chkzero(dv, ntriples))
2557 dd_edge(mrSrc, mrSink, dv);
2558 }
2559
2560 /* Determine if there is a loop-carried dependence. */
2561 fwd.bFwd = FALSE;
2562 fwd.nloops = ntriples;
2563 for (dd = MR_SUCC(mrSrc); dd; dd = DD_NEXT(dd)) {
2564 dirv_gen(dd->dirvec, 0, ntriples, ntriples, fwd_func);
2565 if (fwd.bFwd)
2566 break;
2567 }
2568 }
2569
2570 nonlinear:
2571 /* Clean up allocated memory. */
2572 cln_visit();
2573 FREE(hlv.lpbase);
2574 FREE(hlv.mrbase);
2575 FREE(hlv.subbase);
2576 freearea(HLV_AREA1);
2577
2578 return fwd.bFwd;
2579 }
2580
2581 static int
symbolic_mul(int a,int b)2582 symbolic_mul(int a, int b)
2583 {
2584 int flag;
2585 int c;
2586
2587 flag = 1;
2588 if (ILI_OPC(a) == IL_INEG) {
2589 a = ILI_OPND(a, 1);
2590 flag = -1 * flag;
2591 }
2592 if (ILI_OPC(b) == IL_INEG) {
2593 b = ILI_OPND(b, 1);
2594 flag = -1 * flag;
2595 }
2596 c = ad2ili(IL_IMUL, a, b);
2597 if (flag < 0)
2598 c = ad1ili(IL_INEG, c);
2599 return c;
2600 }
2601
2602 /* num/den = ili */
2603 /* quot = symbolic quotient */
2604 static LOGICAL
symbolic_divide(int num,int den,int * quot)2605 symbolic_divide(int num, int den, int *quot)
2606 {
2607 int sign = 1;
2608 int q = num;
2609 int icon1, iconm1, icon0;
2610
2611 if (num == (icon0 = ad_icon((INT)0))) {
2612 /* 0 / x */
2613 q = icon0;
2614 goto ret;
2615 }
2616 /* x / +-1 */
2617 if (den == (icon1 = ad_icon((INT)1)))
2618 goto ret;
2619 if (den == (iconm1 = ad_icon((INT)-1))) {
2620 sign = -1;
2621 goto ret;
2622 }
2623 /* we'll assume -1 / sym and 1 / sym is 0; this
2624 * is o.k. since we'll just interchange rows & try
2625 * again
2626 */
2627 if (num == icon1 || num == iconm1) {
2628 q = ad_icon((INT)0);
2629 goto ret;
2630 }
2631
2632 if (ILI_OPC(num) == IL_INEG) {
2633 num = ILI_OPND(num, 1);
2634 sign = -1 * sign;
2635 }
2636 if (ILI_OPC(den) == IL_INEG) {
2637 den = ILI_OPND(den, 1);
2638 sign = -1 * sign;
2639 }
2640 if (ILI_OPC(num) == IL_IMUL) {
2641 if (ILI_OPND(num, 1) == den) {
2642 q = ILI_OPND(num, 2);
2643 goto ret;
2644 }
2645 if (ILI_OPND(num, 2) == den) {
2646 q = ILI_OPND(num, 1);
2647 goto ret;
2648 }
2649 }
2650 return FALSE;
2651 ret:
2652 if (sign < 0)
2653 *quot = ad1ili(IL_INEG, q);
2654 else
2655 *quot = q;
2656 return TRUE;
2657 }
2658
2659 static LOGICAL rdc; /* TRUE if a reduction has occurred */
2660 static int visit_chain = 0; /* head of chain of ili with ILI_REPL set */
2661 static LOGICAL use_visit = TRUE; /* TRUE if ILI_REPL to be used */
2662
2663 typedef struct arith_term {
2664 long confact; /* constant multiplier */
2665 int varfact; /* variable factor */
2666 struct arith_term *next; /* next term */
2667 } ARITH_TERM, *ARITH_LIST; /* arithmetic terms */
2668
2669 /* Clear the ILI_REPL & ILI_VISIT fields of all ili in the chain beginning
2670 * at visit_chain. */
2671 static void
cln_visit(void)2672 cln_visit(void)
2673 {
2674 int il = visit_chain;
2675 int ilnext;
2676
2677 for (il = visit_chain; il; il = ilnext) {
2678 ilnext = ILI_VISIT(il);
2679 ILI_VISIT(il) = ILI_REPL(il) = 0;
2680 }
2681 visit_chain = 0;
2682 }
2683
2684 /* Append list l2 to the end of l1, and return the resulting list. */
2685 static ARITH_LIST
apnd(ARITH_LIST l1,ARITH_LIST l2)2686 apnd(ARITH_LIST l1, ARITH_LIST l2)
2687 {
2688 ARITH_LIST l;
2689
2690 if (l1 == NULL)
2691 return l2;
2692 if (l2 == NULL)
2693 return l1;
2694 /* Set l to the last cell of l1's list */
2695 for (l = l1; l->next; l = l->next)
2696 ;
2697 l->next = l2;
2698 return l1;
2699 }
2700
2701 /* Extract constant factors from the varfact member of arithmetic term atp.
2702 * Rules are:
2703 * r(<c,d>) = <c*d,1>, where d is a constant.
2704 * r(<c,x*y) = <c*d*e,x'*y'>, where:
2705 * <d,x'> = r(<1,x>)
2706 * <e,y'> = r(<1,y>).
2707 * r(<c,-x>) = <-c*d,y>, where
2708 * <d,y> = r(<1,x>).
2709 * r(<c,x>) = <c,x>, otherwise.
2710 */
2711 static void
refactor(ARITH_LIST atp)2712 refactor(ARITH_LIST atp)
2713 {
2714 ARITH_TERM at1, at2;
2715 int opc;
2716 int icon1 = ad_icon(1L);
2717
2718 at1.next = at2.next = NULL;
2719
2720 if (IS_CNST(atp->varfact)) {
2721 atp->confact *= CNSTG(atp->varfact);
2722 atp->varfact = icon1;
2723 return;
2724 }
2725
2726 opc = ILI_OPC(atp->varfact);
2727 switch (opc) {
2728 case IL_IMUL:
2729 at1.confact = at2.confact = 1L;
2730 at1.varfact = ILI_OPND(atp->varfact, 1);
2731 at2.varfact = ILI_OPND(atp->varfact, 2);
2732 refactor(&at1);
2733 refactor(&at2);
2734 atp->confact *= at1.confact * at2.confact;
2735 if (at1.varfact == icon1)
2736 atp->varfact = at2.varfact;
2737 else if (at2.varfact == icon1)
2738 atp->varfact = at1.varfact;
2739 else
2740 atp->varfact = ad2ili(IL_IMUL, at1.varfact, at2.varfact);
2741 break;
2742 case IL_INEG:
2743 at1.confact = 1L;
2744 at1.varfact = ILI_OPND(atp->varfact, 1);
2745 refactor(&at1);
2746 atp->confact *= -at1.confact;
2747 atp->varfact = at1.varfact;
2748 break;
2749 default:
2750 break;
2751 }
2752 }
2753
2754 /* Extract constant factors of each term in arithmetic term list lst. */
2755 static void
refactor_list(ARITH_LIST lst)2756 refactor_list(ARITH_LIST lst)
2757 {
2758 ARITH_LIST l;
2759
2760 for (l = lst; l; l = l->next)
2761 refactor(l);
2762 }
2763
2764 /* Create a sum of all terms in list lst. */
2765 static int
sum(ARITH_LIST lst)2766 sum(ARITH_LIST lst)
2767 {
2768 int ilcon, ilterm, ilsum;
2769 ARITH_LIST l;
2770
2771 ilsum = ad_icon(0L);
2772 for (l = lst; l; l = l->next) {
2773 ilcon = ad_icon(l->confact);
2774 ilterm = ad2ili(IL_IMUL, ilcon, l->varfact);
2775 ilsum = ad2ili(IL_IADD, ilterm, ilsum);
2776 }
2777 return ilsum;
2778 }
2779
2780 /* Return an arithmetic term list whose sum is equivalent to mpyr * ili,
2781 * where mpyr is distributed across terms in the resulting list. All confact
2782 * fields in the term list are set to 1.
2783 * Rules are:
2784 * distrib(x*y, m) = [distrib(x, yi) | yi <- distrib(y, m)].
2785 * distrib(x+y, m) = distrib(x, m) apnd distrib(y, m).
2786 * distrib(x-y, m) = distrib(x, m) apnd distrib(y, -m).
2787 * distrib(-x, m) = distrib(x, -m).
2788 * distrib(x, m + n) = distrib(m, x) apnd distrib(n, x); if x is a load.
2789 * distrib(x, m) = [x * m]; otherwise.
2790 */
2791 static ARITH_LIST
distrib(int ili,int mpyr)2792 distrib(int ili, int mpyr)
2793 {
2794 int ili1, ili2, ilitmp;
2795 int opc, opc1;
2796 ARITH_LIST l1, l2, l;
2797
2798 assert(mpyr, "distrib: invalid multiplier", ili, 4);
2799 assert(ili, "distrib: invalid ili", ili, 4);
2800
2801 opc = ILI_OPC(ili);
2802 switch (opc) {
2803 case IL_INEG:
2804 ilitmp = ad1ili(IL_INEG, mpyr);
2805 l = distrib(ILI_OPND(ili, 1), ilitmp);
2806 return l;
2807 case IL_IADD:
2808 l1 = distrib(ILI_OPND(ili, 1), mpyr);
2809 l2 = distrib(ILI_OPND(ili, 2), mpyr);
2810 l = apnd(l1, l2);
2811 return l;
2812 case IL_ISUB:
2813 l1 = distrib(ILI_OPND(ili, 1), mpyr);
2814 ilitmp = ad1ili(IL_INEG, mpyr);
2815 l2 = distrib(ILI_OPND(ili, 2), ilitmp);
2816 l = apnd(l1, l2);
2817 return l;
2818 case IL_IMUL:
2819 l2 = distrib(ILI_OPND(ili, 2), mpyr);
2820 l = NULL;
2821 for (; l2; l2 = l2->next) {
2822 assert(l2->confact == 1L, "distrib: non-unit constant", ili, 4);
2823 l1 = distrib(ILI_OPND(ili, 1), l2->varfact);
2824 l = apnd(l1, l);
2825 }
2826 return l;
2827 case IL_IDIV:
2828 case IL_MOD:
2829 ilitmp = ili_symbolic(ILI_OPND(ili, 1));
2830 if (ilitmp != ili) {
2831 ilitmp = ad2ili(opc, ilitmp, ILI_OPND(ili, 2));
2832 ilitmp = ad2ili(IL_IMUL, ilitmp, mpyr);
2833 l = (ARITH_LIST)getitem(HLV_AREA1, sizeof(ARITH_TERM));
2834 l->confact = 1;
2835 l->varfact = ilitmp;
2836 l->next = NULL;
2837 return l;
2838 }
2839 /* continue into the default case */
2840 default:
2841 opc1 = ILI_OPC(mpyr);
2842 if (opc1 == IL_IADD) {
2843 l1 = distrib(ILI_OPND(mpyr, 1), ili);
2844 l2 = distrib(ILI_OPND(mpyr, 2), ili);
2845 l = apnd(l1, l2);
2846 return l;
2847 }
2848 if (opc1 == IL_ISUB) {
2849 l1 = distrib(ILI_OPND(mpyr, 1), ili);
2850 ili2 = ad1ili(IL_INEG, ILI_OPND(mpyr, 2));
2851 l2 = distrib(ili2, ili);
2852 l = apnd(l1, l2);
2853 return l;
2854 }
2855 if (IS_IRES(ili))
2856 ilitmp = ad2ili(IL_IMUL, ili, mpyr);
2857 else
2858 ilitmp = ili;
2859 l = (ARITH_LIST)getitem(HLV_AREA1, sizeof(ARITH_TERM));
2860 l->confact = 1;
2861 l->varfact = ilitmp;
2862 l->next = NULL;
2863 return l;
2864 }
2865 }
2866
2867 /* Return TRUE if arithmetic term list lst contains a term whose varfact
2868 * member is equivalent to trm.
2869 * Rules are:
2870 * c_c_f([], x) = FALSE.
2871 * c_c_f([<c,x>|l], x) = TRUE.
2872 * c_c_f([x|l], y) = c_c_f(l, y), otherwise.
2873 */
2874 static LOGICAL
contains_common_factor(ARITH_LIST lst,int trm)2875 contains_common_factor(ARITH_LIST lst, int trm)
2876 {
2877 ARITH_LIST l;
2878
2879 assert(trm, "contains_common_factor: invalid term", trm, 4);
2880 for (l = lst; l; l = l->next)
2881 if (l->varfact == trm)
2882 return TRUE;
2883 return FALSE;
2884 }
2885
2886 /* Sum the confact members of all terms in arithmetic term list lst
2887 * whose varfact members are all equal to trm. Return the sum.
2888 * Rules are:
2889 * c_t([], x) = 0.
2890 * c_t([<c,x>| l], x) = c + c_t(l, x).
2891 * c_t([x|l], y) = c_t(l, y); otherwise.
2892 */
2893 static long
combine_terms(ARITH_LIST lst,int trm)2894 combine_terms(ARITH_LIST lst, int trm)
2895 {
2896 ARITH_LIST l;
2897 long sum = 0L;
2898
2899 for (l = lst; l; l = l->next)
2900 if (l->varfact == trm)
2901 sum += l->confact;
2902 return sum;
2903 }
2904
2905 /* Remove all arithmetic terms from arithmetic term list lst whose varfact
2906 * members are equivalent to trm.
2907 * Rules are:
2908 * r_t([], x) = [].
2909 * r_t([<c,x>|l], x) = l.
2910 * r_t([x|l], y) = [x | r_t(l,y)], otherwise
2911 */
2912 static ARITH_LIST
remove_terms(ARITH_LIST lst,int trm)2913 remove_terms(ARITH_LIST lst, int trm)
2914 {
2915 ARITH_LIST lcur, lprev, lhead;
2916
2917 assert(trm, "remove_terms: invalid term", trm, 4);
2918
2919 lhead = lst;
2920 lprev = NULL;
2921 for (lcur = lst; lcur; lcur = lcur->next)
2922 if (lcur->varfact == trm)
2923 /* remove the current term from the list */
2924 if (lprev == NULL)
2925 lhead = lcur->next;
2926 else
2927 lprev->next = lcur->next;
2928 else
2929 lprev = lcur;
2930 return lhead;
2931 }
2932
2933 /* Combine terms from arithmetic term list lst that have common factors, and
2934 * return the result.
2935 * Rules are:
2936 * elim([]) = [].
2937 * elim([<c,x>|l]) = if c_c_f(l, x) then
2938 * [<c_t(l, x)+c,x> | elim(r_t(l,x))]
2939 * else
2940 * [<c,x> | elim(l)].
2941 * When a reduction takes place, set global rdc to TRUE.
2942 */
2943 static ARITH_LIST
elim(ARITH_LIST lst)2944 elim(ARITH_LIST lst)
2945 {
2946 ARITH_LIST lprev, lcur, lhead;
2947 long sum;
2948
2949 lhead = lst;
2950 lprev = NULL;
2951 for (lcur = lst; lcur; lcur = lcur->next) {
2952 if (contains_common_factor(lcur->next, lcur->varfact)) {
2953 sum = combine_terms(lcur, lcur->varfact);
2954 lcur->next = remove_terms(lcur->next, lcur->varfact);
2955 if (sum == 0L)
2956 if (lprev)
2957 lprev->next = lcur->next;
2958 else
2959 lhead = lcur->next;
2960 else {
2961 lcur->confact = sum;
2962 lprev = lcur;
2963 }
2964 rdc = TRUE;
2965 } else
2966 lprev = lcur;
2967 }
2968 return lhead;
2969 }
2970
2971 static int
ili_symbolic(int ili)2972 ili_symbolic(int ili)
2973 {
2974 /* perform symbolic manipulation on ili to simplify its form */
2975 int opc;
2976 int icon0 = ad_icon(0L), icon1 = ad_icon(1L);
2977 ARITH_LIST lst1, lst2;
2978 int ilires;
2979 int i;
2980 LOGICAL changed;
2981
2982 if (use_visit && ILI_REPL(ili))
2983 return ILI_REPL(ili);
2984
2985 if (IS_CNST(ili))
2986 return ili;
2987 if (A_TYPEG(ili) == A_ID)
2988 return ili;
2989
2990 opc = ILI_OPC(ili);
2991 if (IS_IRES(ili)) {
2992 /* symbolic manipulation on integer expressions only */
2993 rdc = FALSE;
2994 lst1 = distrib(ili, icon1); /* create list of ili */
2995 refactor_list(lst1); /* simplify terms */
2996 lst2 = elim(lst1); /* combine terms */
2997 if (rdc) {
2998 ilires = sum(lst2); /* create sum of terms */
2999 #if DEBUG
3000 if (DBGBIT(36, 128)) {
3001 fprintf(gbl.dbgfil, "Term reduced: ");
3002 prilitree(ili);
3003 fprintf(gbl.dbgfil, " -> ");
3004 prilitree(ilires);
3005 fprintf(gbl.dbgfil, "\n");
3006 }
3007 #endif
3008 } else
3009 ilires = ili;
3010 } else
3011 ilires = ili;
3012
3013 if (!use_visit)
3014 return ilires;
3015 ILI_REPL(ili) = ilires;
3016 ILI_VISIT(ili) = visit_chain;
3017 visit_chain = ili;
3018 return ilires;
3019 }
3020
3021 /** \brief Perform symbolic algebraic simplification of term il.
3022
3023 \b NOTE: Call `freearea(HLV_AREA1)` after calling this function.
3024 */
3025 int
dd_symbolic(int il)3026 dd_symbolic(int il)
3027 {
3028 int ilres;
3029
3030 use_visit = FALSE;
3031 ilres = ili_symbolic(il);
3032 use_visit = TRUE;
3033 return ilres;
3034 }
3035
3036 int
dirv_chkzero(DIRVEC dir,int n)3037 dirv_chkzero(DIRVEC dir, int n)
3038 {
3039 int i;
3040
3041 for (i = 0; i < n; ++i)
3042 if (DIRV_ENTRYG(dir, i) == 0)
3043 return 1;
3044 return 0;
3045 }
3046
3047 /* opc = a unary arithmetic ili opcode */
3048 static int
ad1ili(int opc,int ast1)3049 ad1ili(int opc, int ast1)
3050 {
3051 int astx;
3052
3053 if (opc != IL_INEG)
3054 interr("ad1ili: unidentified opcode", opc, 4);
3055 if (A_DTYPEG(ast1) != astb.bnd.dtype)
3056 ast1 = mk_convert(ast1, astb.bnd.dtype);
3057 astx = mk_unop(OP_SUB, ast1, astb.bnd.dtype);
3058 return astx;
3059 }
3060
3061 /* opc = a binary arithmetic ili opcode */
3062 static int
ad2ili(int opc,int ast1,int ast2)3063 ad2ili(int opc, int ast1, int ast2)
3064 {
3065 int optype;
3066 int dtype;
3067 int astx;
3068
3069 switch (opc) {
3070 case IL_IADD:
3071 optype = OP_ADD;
3072 dtype = astb.bnd.dtype;
3073 break;
3074 case IL_ISUB:
3075 optype = OP_SUB;
3076 dtype = astb.bnd.dtype;
3077 break;
3078 case IL_IMUL:
3079 optype = OP_MUL;
3080 dtype = astb.bnd.dtype;
3081 break;
3082 case IL_IDIV:
3083 optype = OP_DIV;
3084 dtype = astb.bnd.dtype;
3085 break;
3086 case IL_AADD:
3087 optype = OP_ADD;
3088 dtype = DT_ADDR;
3089 break;
3090 case IL_ASUB:
3091 optype = OP_SUB;
3092 dtype = DT_ADDR;
3093 break;
3094 default:
3095 interr("ad2ili: unidentified opcode", opc, 4);
3096 }
3097
3098 if (A_DTYPEG(ast1) != dtype)
3099 ast1 = mk_convert(ast1, dtype);
3100 astx = mk_binop(optype, ast1, ast2, dtype);
3101 return astx;
3102 }
3103
3104 static int
ILI_OPC(int astx)3105 ILI_OPC(int astx)
3106 {
3107 if (A_TYPEG(astx) == A_BINOP)
3108 switch (A_OPTYPEG(astx)) {
3109 case OP_ADD:
3110 if (A_DTYPEG(astx) == DT_ADDR)
3111 return IL_AADD;
3112 if (DT_ISBASIC(A_DTYPEG(astx)))
3113 return IL_IADD;
3114 interr("ILI_OPC: unknown dtype for ADD", astx, 4);
3115 case OP_SUB:
3116 if (A_DTYPEG(astx) == DT_ADDR)
3117 return IL_ASUB;
3118 if (DT_ISBASIC(A_DTYPEG(astx)))
3119 return IL_ISUB;
3120 interr("ILI_OPC: unknown dtype for SUB", astx, 4);
3121 case OP_MUL:
3122 if (DT_ISBASIC(A_DTYPEG(astx)))
3123 return IL_IMUL;
3124 interr("ILI_OPC: unknown dtype for MUL", astx, 4);
3125 case OP_DIV:
3126 if (DT_ISBASIC(A_DTYPEG(astx)))
3127 return IL_IDIV;
3128 interr("ILI_OPC: unknown dtype for DIV", astx, 4);
3129 default:
3130 return 0; /* opcode unknown */
3131 }
3132 if (A_TYPEG(astx) == A_UNOP && A_OPTYPEG(astx) == OP_SUB &&
3133 DT_ISBASIC(A_DTYPEG(astx)))
3134 return IL_INEG;
3135 return 0; /* opcode unknown */
3136 }
3137
3138 static int
ILI_OPND(int astx,int opnd)3139 ILI_OPND(int astx, int opnd)
3140 {
3141 if (A_TYPEG(astx) == A_UNOP) {
3142 if (opnd == 1)
3143 return A_LOPG(astx);
3144 else
3145 interr("ILI_OPND: UNOP operand # out of range", astx, 4);
3146 }
3147 if (A_TYPEG(astx) == A_BINOP) {
3148 if (opnd == 1)
3149 return A_LOPG(astx);
3150 else if (opnd == 2)
3151 return A_ROPG(astx);
3152 else
3153 interr("ILI_OPND: BINOP operand # out of range", astx, 4);
3154 }
3155 interr("ILI_OPND: unknown operand type", astx, 4);
3156 return 0;
3157 }
3158
3159 #if DEBUG
3160 char *
dirv_print(DIRVEC dir)3161 dirv_print(DIRVEC dir)
3162 {
3163 static char xyz[32];
3164 char *p = xyz;
3165 DIRVEC t;
3166 int i;
3167
3168 for (i = 0; DIRV_ENTRYG(dir, i) != 0 && i < MAX_LOOPS; ++i)
3169 ;
3170 if (dir & DIRV_ALLEQ)
3171 *p++ = '#';
3172 else
3173 *p++ = ' ';
3174 for (--i; i >= 0; --i) {
3175 t = DIRV_ENTRYG(dir, i);
3176 if (t & DIRV_LT)
3177 *p++ = '<';
3178 else
3179 *p++ = '.';
3180 if (t & DIRV_EQ)
3181 *p++ = '=';
3182 else
3183 *p++ = '.';
3184 if (t & DIRV_GT)
3185 *p++ = '>';
3186 else
3187 *p++ = '.';
3188 if (t & DIRV_RD)
3189 *p++ = 'R';
3190 else
3191 *p++ = ' ';
3192 }
3193 *p++ = 0;
3194 return xyz;
3195 }
3196
3197 void
dump_dd(DDEDGE * p)3198 dump_dd(DDEDGE *p)
3199 {
3200 static char *types[] = {"flow", "anti", "outp", "????"};
3201
3202 for (; p != 0; p = DD_NEXT(p)) {
3203 fprintf(gbl.dbgfil, " %4d %4s %-32s\n", DD_SINK(p), types[DD_TYPE(p)],
3204 dirv_print(DD_DIRVEC(p)));
3205 }
3206 }
3207
3208 static void
dump_one_bound(BOUND * p,int k,LOGICAL btype)3209 dump_one_bound(BOUND *p, int k, LOGICAL btype)
3210 {
3211 int j;
3212 BOUND b;
3213
3214 b = *p;
3215 if (b.mplyr != 1)
3216 fprintf(gbl.dbgfil, "%d*T[%d]", b.mplyr, k);
3217 else
3218 fprintf(gbl.dbgfil, "T[%d]", k);
3219 fprintf(gbl.dbgfil, btype ? "<=" : ">=");
3220 /* dump 0 term */
3221 if (b.gcd == 1 || IS_CNST(b.bnd[0]))
3222 prilitree(b.bnd[0]);
3223 else {
3224 fprintf(gbl.dbgfil, "(");
3225 prilitree(b.bnd[0]);
3226 fprintf(gbl.dbgfil, ")/%d", b.gcd);
3227 }
3228 for (j = 1; j < k; ++j) {
3229 if (IS_CNST(b.bnd[j]) && CNSTG(b.bnd[j]) == 1L)
3230 fprintf(gbl.dbgfil, "+");
3231 else if (IS_CNST(b.bnd[j]) && CNSTG(b.bnd[j]) == 0L)
3232 continue;
3233 else {
3234 fprintf(gbl.dbgfil, "+");
3235 prilitree(b.bnd[j]);
3236 fprintf(gbl.dbgfil, "*");
3237 }
3238 fprintf(gbl.dbgfil, "T[%d]", j);
3239 }
3240 fprintf(gbl.dbgfil, "\n");
3241 }
3242
3243 static void
dump_two_bound(BOUND * p,BOUND * q,int k,LOGICAL btype)3244 dump_two_bound(BOUND *p, BOUND *q, int k, LOGICAL btype)
3245 {
3246 int j;
3247 if (p->mplyr != 1)
3248 fprintf(gbl.dbgfil, "(");
3249 /* dump 0 term */
3250 if (p->gcd == 1 || IS_CNST(p->bnd[0]))
3251 prilitree(p->bnd[0]);
3252 else {
3253 fprintf(gbl.dbgfil, "(");
3254 prilitree(p->bnd[0]);
3255 fprintf(gbl.dbgfil, ")");
3256 }
3257 for (j = 1; j < k; ++j) {
3258 if (IS_CNST(p->bnd[j]) && CNSTG(p->bnd[j]) == 1L)
3259 fprintf(gbl.dbgfil, "+");
3260 else if (IS_CNST(p->bnd[j]) && CNSTG(p->bnd[j]) == 0L)
3261 continue;
3262 else {
3263 fprintf(gbl.dbgfil, "+");
3264 prilitree(p->bnd[j]);
3265 fprintf(gbl.dbgfil, "*");
3266 }
3267 fprintf(gbl.dbgfil, "T[%d]", j);
3268 }
3269 if (p->mplyr != 1)
3270 fprintf(gbl.dbgfil, ")/%d", p->mplyr);
3271 fprintf(gbl.dbgfil, btype ? ">=" : "<=");
3272 if (q->mplyr != 1)
3273 fprintf(gbl.dbgfil, "(");
3274 /* dump 0 term */
3275 if (q->gcd == 1 || IS_CNST(q->bnd[0]))
3276 prilitree(q->bnd[0]);
3277 else {
3278 fprintf(gbl.dbgfil, "(");
3279 prilitree(q->bnd[0]);
3280 fprintf(gbl.dbgfil, ")");
3281 }
3282 for (j = 1; j < k; ++j) {
3283 if (IS_CNST(q->bnd[j]) && CNSTG(q->bnd[j]) == 1L)
3284 fprintf(gbl.dbgfil, "+");
3285 else if (IS_CNST(q->bnd[j]) && CNSTG(q->bnd[j]) == 0L)
3286 continue;
3287 else {
3288 fprintf(gbl.dbgfil, "+");
3289 prilitree(q->bnd[j]);
3290 fprintf(gbl.dbgfil, "*");
3291 }
3292 fprintf(gbl.dbgfil, "T[%d]", j);
3293 }
3294 if (q->mplyr != 1)
3295 fprintf(gbl.dbgfil, ")/%d", q->mplyr);
3296
3297 fprintf(gbl.dbgfil, "\n");
3298 }
3299
3300 /* Dump a list of arithmetic terms. */
3301 static void
dump_termlist(ARITH_LIST lst)3302 dump_termlist(ARITH_LIST lst)
3303 {
3304 ARITH_LIST l;
3305
3306 if (lst == NULL) {
3307 fprintf(gbl.dbgfil, "[]");
3308 return;
3309 }
3310 fprintf(gbl.dbgfil, "[");
3311 for (l = lst; l->next; l = l->next) {
3312 fprintf(gbl.dbgfil, "%ld*", l->confact);
3313 prilitree(l->varfact);
3314 fprintf(gbl.dbgfil, ", ");
3315 }
3316 fprintf(gbl.dbgfil, "%ld*", l->confact);
3317 prilitree(l->varfact);
3318 fprintf(gbl.dbgfil, "]");
3319 }
3320 #endif
3321
3322 #endif
3323