1 /*
2  * Copyright (c) 1994-2018, NVIDIA CORPORATION.  All rights reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17 
18 /** \file
19     \brief Data dependence framework.
20  */
21 
22 #include "gbldefs.h"
23 #include "error.h"
24 #ifndef NOVECTORIZE
25 #include "global.h"
26 #include "symtab.h"
27 #include "ast.h"
28 #include "nme.h"
29 #include "optimize.h"
30 #include "hlvect.h"
31 
32 #include "induc.h"
33 
34 #include "soc.h"
35 #include "direct.h"
36 #include "extern.h"
37 
38 #if DEBUG
39 #define TRACE0(s)    \
40   if (DBGBIT(36, 2)) \
41   fprintf(gbl.dbgfil, s)
42 #define TRACE1(s, a1) \
43   if (DBGBIT(36, 2))  \
44   fprintf(gbl.dbgfil, s, a1)
45 #define TRACE2(s, a1, a2) \
46   if (DBGBIT(36, 2))      \
47   fprintf(gbl.dbgfil, s, a1, a2)
48 #define TRACE3(s, a1, a2, a3) \
49   if (DBGBIT(36, 2))          \
50   fprintf(gbl.dbgfil, s, a1, a2, a3)
51 #define TRACE4(s, a1, a2, a3, a4) \
52   if (DBGBIT(36, 2))              \
53   fprintf(gbl.dbgfil, s, a1, a2, a3, a4)
54 #define TRACE5(s, a1, a2, a3, a4, a5) \
55   if (DBGBIT(36, 2))                  \
56   fprintf(gbl.dbgfil, s, a1, a2, a3, a4, a5)
57 
58 #define STRACE0(s)   \
59   if (DBGBIT(36, 4)) \
60   fprintf(gbl.dbgfil, s)
61 #define STRACE1(s, a1) \
62   if (DBGBIT(36, 4))   \
63   fprintf(gbl.dbgfil, s, a1)
64 #define STRACE2(s, a1, a2) \
65   if (DBGBIT(36, 4))       \
66   fprintf(gbl.dbgfil, s, a1, a2)
67 #define STRACE3(s, a1, a2, a3) \
68   if (DBGBIT(36, 4))           \
69   fprintf(gbl.dbgfil, s, a1, a2, a3)
70 #define STRACE4(s, a1, a2, a3, a4) \
71   if (DBGBIT(36, 4))               \
72   fprintf(gbl.dbgfil, s, a1, a2, a3, a4)
73 #define STRACE5(s, a1, a2, a3, a4, a5) \
74   if (DBGBIT(36, 4))                   \
75   fprintf(gbl.dbgfil, s, a1, a2, a3, a4, a5)
76 
77 #define DTRACE0(s)    \
78   if (DBGBIT(36, 16)) \
79   fprintf(gbl.dbgfil, s)
80 #define DTRACE1(s, a1) \
81   if (DBGBIT(36, 16))  \
82   fprintf(gbl.dbgfil, s, a1)
83 #define DTRACE2(s, a1, a2) \
84   if (DBGBIT(36, 16))      \
85   fprintf(gbl.dbgfil, s, a1, a2)
86 #define DTRACE3(s, a1, a2, a3) \
87   if (DBGBIT(36, 16))          \
88   fprintf(gbl.dbgfil, s, a1, a2, a3)
89 #define DTRACE4(s, a1, a2, a3, a4) \
90   if (DBGBIT(36, 16))              \
91   fprintf(gbl.dbgfil, s, a1, a2, a3, a4)
92 #define DTRACE5(s, a1, a2, a3, a4, a5) \
93   if (DBGBIT(36, 16))                  \
94   fprintf(gbl.dbgfil, s, a1, a2, a3, a4, a5)
95 
96 #define BTRACE0(s)    \
97   if (DBGBIT(36, 64)) \
98   fprintf(gbl.dbgfil, s)
99 #define BTRACE1(s, a1) \
100   if (DBGBIT(36, 64))  \
101   fprintf(gbl.dbgfil, s, a1)
102 #define BTRACE2(s, a1, a2) \
103   if (DBGBIT(36, 64))      \
104   fprintf(gbl.dbgfil, s, a1, a2)
105 #define BTRACE3(s, a1, a2, a3) \
106   if (DBGBIT(36, 64))          \
107   fprintf(gbl.dbgfil, s, a1, a2, a3)
108 #define BTRACE4(s, a1, a2, a3, a4) \
109   if (DBGBIT(36, 64))              \
110   fprintf(gbl.dbgfil, s, a1, a2, a3, a4)
111 #define BTRACE5(s, a1, a2, a3, a4, a5) \
112   if (DBGBIT(36, 64))                  \
113   fprintf(gbl.dbgfil, s, a1, a2, a3, a4, a5)
114 
115 #else
116 
117 #define TRACE0(s)
118 #define TRACE1(s, a1)
119 #define TRACE2(s, a1, a2)
120 #define TRACE3(s, a1, a2, a3)
121 #define TRACE4(s, a1, a2, a3, a4)
122 #define TRACE5(s, a1, a2, a3, a4, a5)
123 #define STRACE0(s)
124 #define STRACE1(s, a1)
125 #define STRACE2(s, a1, a2)
126 #define STRACE3(s, a1, a2, a3)
127 #define STRACE4(s, a1, a2, a3, a4)
128 #define STRACE5(s, a1, a2, a3, a4, a5)
129 #define DTRACE0(s)
130 #define DTRACE1(s, a1)
131 #define DTRACE2(s, a1, a2)
132 #define DTRACE3(s, a1, a2, a3)
133 #define DTRACE4(s, a1, a2, a3, a4)
134 #define DTRACE5(s, a1, a2, a3, a4, a5)
135 #define BTRACE0(s)
136 #define BTRACE1(s, a1)
137 #define BTRACE2(s, a1, a2)
138 #define BTRACE3(s, a1, a2, a3)
139 #define BTRACE4(s, a1, a2, a3, a4)
140 #define BTRACE5(s, a1, a2, a3, a4, a5)
141 #endif
142 
143 #if DEBUG
144 static ISZ_T
DBGcnst(int s)145 DBGcnst(int s)
146 {
147   ISZ_T yy;
148   yy = get_isz_cval(s);
149   printf("CNSTG: %ld\n", yy);
150   return yy;
151 }
152 #endif
153 #define ad_icon(i) (mk_isz_cval(i, astb.bnd.dtype))
154 #define prilitree(a) (printast(a))
155 #define ILT_NEXT(s) (STD_NEXT(s))
156 #define IS_CNST(a) (A_TYPEG(a) == A_CNST)
157 #define CNSTG(a) (get_isz_cval(A_SPTRG(a)))
158 #define ZZZCNSTG(a) (DBGcnst(A_SPTRG(a)))
159 #define RESG(a) (A_DTYPEG(a))
160 #define IS_IRES(a) (DT_ISINT(RESG(a)))
161 #define IS_ARES(a) (RESG(a) == DT_ADDR)
162 #define ILI_VISIT(a) (A_VISITG(a))
163 #define ILI_REPL(a) (A_REPLG(a))
164 /* arbitrary opcode numbers */
165 #define IL_INEG 1
166 #define IL_IADD 2
167 #define IL_ISUB 3
168 #define IL_IMUL 4
169 #define IL_IDIV 5
170 #define IL_MOD 6
171 #define IL_AADD 7
172 #define IL_ASUB 8
173 
174 typedef struct DV {
175   DIRVEC vec;
176   struct DV *next;
177 } DV;
178 
179 #define MAX_DD (2 * MAX_LOOPS)
180 #define MAX_N MAX_DD
181 #define MAXNMESIZ 30
182 #define MAX_S MAXNMESIZ
183 
184 typedef struct bound {
185   int bnd[MAX_N + 1];
186   int mplyr; /* multiplier of lhs */
187   int gcd;   /* gcd of lhs terms */
188   struct bound *next;
189 } BOUND;
190 
191 static void build_loop_dd(int loop);
192 static void dd_compute(void);
193 void dd_edge(int src, int sink, DIRVEC vec);
194 static void dd_succ(int loop);
195 static void dd_exact();
196 
197 static void resolve_vv(void);
198 static void resolve_pv(void);
199 static void resolve_pp(void);
200 static void resolve_uv(void);
201 static void do_subscript(int nsubs);
202 static void add_dep(DIRVEC vec);
203 static void hierarchy(DIRVEC dir, int lev, DIRVEC veco);
204 static void unpack_loops(void);
205 static void chkref(int i, int j);
206 LOGICAL dd_array_conflict(int astliTriples, int astArrSrc, int astArrSink,
207                           int bSinkAfterSrc);
208 /*static DIRVEC dirv_permute();*/
209 static int symbolic_mul(int a, int b);
210 static LOGICAL symbolic_divide(int num, int den, int *quot);
211 static void cln_visit(void);
212 static int ili_symbolic(int ili);
213 int dd_symbolic(int il);
214 #if DEBUG
215 static void dump_one_bound(BOUND *p, int k, LOGICAL btype);
216 static void dump_two_bound(BOUND *p, BOUND *q, int k, LOGICAL btype);
217 #endif
218 static int ad1ili(int opc, int ast1);
219 static int ad2ili(int opc, int ast1, int ast2);
220 static int ILI_OPC(int astx);
221 static int ILI_OPND(int astx, int opnd);
222 
223 static struct {
224   int mr1, mr2;         /* current two mem refs */
225   int subs1, subs2;     /* current two subscripts */
226   int basenm1, basenm2; /* current two base names */
227   int outer_loop;       /* outermost loop */
228   DIRVEC vec;           /* temp */
229   DV *dvlist;           /* list of direction vectors */
230   int unknown;          /* dependence not proven */
231   int n1, n2;           /* number of non-common loops */
232   int n;                /* number of common loops */
233   int common;           /* innermost common loop */
234   short lps[MAX_DD];    /* unpacked loops */
235   VPRAGMAS pragmas;
236 } ddinfo;
237 
238 /*
239  * Data dependence framework:
240  * mr1 and mr2 are enclosed in n common loops, with n1 loops enclosing mr1
241  * but not mr2, and n2 loops enclosing mr2 but not mr1.  common is the loop
242  * index of the innermost common loop; the nesting depth of common is n;
243  * the nesting depth of mr1 is n1+n; the nesting depth of mr2 is n2+n.
244  *
245  * L[1]	    DO			ddinfo.outer_loop
246  *		...			    (ddinfo.n loops)
247  * L[n]		DO			ddinfo.common
248  * L1[1]	    DO
249  *			...			(ddinfo.n1 loops)
250  * L1[n1]		DO		    MR_LOOP(ddinfo.mr1)
251  *			    mr1
252  *			ENDDO
253  *			...
254  *		    ENDDO
255  * L2[1]	    DO
256  *			...			(ddinfo.n2 loops)
257  * L2[n2]		DO		    MR_LOOP(ddinfo.mr2)
258  *			    mr2
259  *			ENDDO
260  *			...
261  *		    ENDDO
262  *		ENDDO
263  *		...
264  *	    ENDDO
265  *
266  * The loops are unpacked into lps as follows:
267  *
268  * L1[n1],...,L1[1], L2[n2],...,L2[1], L[n],...,L[1]
269  */
270 
271 /* local data */
272 typedef BV *BVP;
273 static BVP *fgsucc;
274 static BV *fgbase;
275 
276 /** \brief Build the data dependency graph. */
277 void
build_dd(int loop)278 build_dd(int loop)
279 {
280   int i;
281 
282   /* go through load/stores & build data dependency graph */
283   ddinfo.n = 0;
284   ddinfo.outer_loop = loop;
285 /*adjloops(loop);*/
286 #if DEBUG
287   if (DBGBIT(36, 1)) {
288     fprintf(gbl.dbgfil, "\n------- dump before build_loop_dd, loop %d ------\n",
289             loop);
290     dump_one_vloop(loop, 0);
291     for (i = VL_CHILD(loop); i != 0; i = VL_SIBLING(i))
292       dump_vloops(i, 1);
293   }
294 #endif
295   build_loop_dd(loop);
296   cln_visit();
297 }
298 
299 static void
build_nat_loop(int loop)300 build_nat_loop(int loop)
301 {
302   int i;
303 
304   for (i = LP_FG(loop); i != 0; i = FG_NEXT(i)) {
305     ++hlv.fgn;
306     FG_NATNXT(i) = hlv.natural_loop;
307     hlv.natural_loop = i;
308   }
309   for (i = VL_CHILD(loop); i != 0; i = VL_SIBLING(i)) {
310     build_nat_loop(i);
311   }
312 }
313 
314 static void
dd_succ(int loop)315 dd_succ(int loop)
316 {
317   /*
318    * We need to know, for two blocks b1 and b2, if b1 can execute before b2
319    * in the same iteration of the loop; if b1 cannot execute before b2,
320    * then there can be no dependence with an equals direction from a memory
321    * reference in b1 to a memory reference in b2.  To do this, we create,
322    * for each flowgraph node, a bit vector giving all the successors of
323    * that node, except that successors of the loop tail are not included.
324    * Then, a transitive closure is performed on those bit vectors.
325    */
326   int i;
327   int nbits;
328   int nnodes;
329   int ntotal;
330   int offs;
331   int change;
332   PSI_P p;
333   BV *temp_set;
334 
335   /*
336    * number of bits needed -- count the number of flowgraph nodes in the
337    * loop; for each of those, need a bit for each flowgraph node in the
338    * entire flowgraph.
339    */
340   hlv.fgn = 0;
341   hlv.natural_loop = 0;
342   build_nat_loop(loop);
343 
344   nnodes = hlv.fgn;
345   nbits = opt.num_nodes + 1;
346   nbits = (nbits + BV_BITS - 1) / BV_BITS;
347   ntotal = nbits * (nnodes + 1);
348   NEW(fgsucc, BVP, opt.num_nodes + 1);
349   NEW(fgbase, BV, ntotal);
350 
351   /* go through and initialize sets */
352   offs = 0;
353   for (i = hlv.natural_loop; i != 0; i = FG_NATNXT(i)) {
354     if (i == LP_TAIL(loop))
355       continue;
356     fgsucc[i] = fgbase + offs;
357     assert(offs < ntotal - nbits, "dd_succ: bad offs", loop, 4);
358     offs += nbits;
359     BZERO(fgsucc[i], BV, nbits);
360     /* initialize successors */
361     for (p = FG_SUCC(i); p != 0; p = PSI_NEXT(p))
362       bv_set(fgsucc[i], PSI_NODE(p));
363   }
364   /* tail has no successors */
365   fgsucc[LP_TAIL(loop)] = fgbase + offs;
366   assert(offs < ntotal - nbits, "dd_succ: bad offs", loop, 4);
367   offs += nbits;
368   BZERO(fgsucc[LP_TAIL(loop)], BV, nbits);
369 
370   /* temporary set */
371   temp_set = fgbase + offs;
372   assert(offs <= ntotal - nbits, "dd_succ: bad offs", loop, 4);
373   offs += nbits;
374 
375   /* now perform transitive closure */
376   do {
377     change = 0;
378     for (i = hlv.natural_loop; i != 0; i = FG_NATNXT(i)) {
379       if (i == LP_TAIL(loop))
380         continue;
381       bv_copy(temp_set, fgsucc[i], nbits);
382       /*
383        * union successor set of this node with successor sets of this
384        * node's successor
385        */
386       for (p = FG_SUCC(i); p != 0; p = PSI_NEXT(p)) {
387         bv_union(temp_set, fgsucc[PSI_NODE(p)], nbits);
388       }
389       if (bv_notequal(temp_set, fgsucc[i], nbits)) {
390         change = TRUE;
391         bv_copy(fgsucc[i], temp_set, nbits);
392       }
393     }
394   } while (change);
395 #if DEBUG
396   if (DBGBIT(36, 32)) {
397     fprintf(gbl.dbgfil, "Successor bit sets\n");
398     for (i = hlv.natural_loop; i != 0; i = FG_NATNXT(i)) {
399       fprintf(gbl.dbgfil, "%3d:", i);
400       bv_print(fgsucc[i], opt.num_nodes + 1);
401     }
402   }
403 #endif
404 }
405 
406 static void
build_loop_dd(int loop)407 build_loop_dd(int loop)
408 {
409   int i, j;
410   int l1, l2;
411   int end, end1;
412 
413   ++ddinfo.n;
414   TRACE2("build_loop_dd: loop %d lev %d\n", loop, ddinfo.n);
415   /* do inner loops first */
416   for (i = VL_CHILD(loop); i != 0; i = VL_SIBLING(i)) {
417     build_loop_dd(i);
418   }
419   ddinfo.common = loop;
420 
421   /* build successor bit vector */
422   dd_succ(loop);
423 
424   /* go through all memory references */
425 
426   /* use pragmas for the common loop */
427   ddinfo.pragmas = VL_PRAGMAS(loop);
428 
429   /* first, this loop */
430   end = VL_MRSTART(loop) + VL_MRCNT(loop);
431   for (i = VL_MRSTART(loop); i < end; ++i)
432     for (j = VL_MRSTART(loop); j <= i; ++j)
433       chkref(i, j);
434 
435   /* next, this loop against inner loops */
436   end1 = VL_MRSTART(loop) + VL_MRECNT(loop);
437   for (i = VL_MRSTART(loop); i < end; ++i)
438     for (j = end; j < end1; ++j)
439       chkref(i, j);
440 
441   /* finally, inner loops against other inner loops */
442   for (l1 = VL_CHILD(loop); l1 != 0; l1 = VL_SIBLING(l1))
443     for (l2 = VL_CHILD(loop); l2 != l1; l2 = VL_SIBLING(l2)) {
444       end = VL_MRSTART(l1) + VL_MRECNT(l1);
445       end1 = VL_MRSTART(l2) + VL_MRECNT(l2);
446       for (i = VL_MRSTART(l1); i < end; ++i)
447         for (j = VL_MRSTART(l2); j < end1; ++j)
448           chkref(i, j);
449     }
450 
451   FREE(fgsucc);
452   FREE(fgbase);
453   --ddinfo.n;
454 #if DEBUG
455   if (DBGBIT(36, 8)) {
456     fprintf(gbl.dbgfil, "----Mem ref after build_dd loop %d----\n", loop);
457     dump_memrefs(VL_MRSTART(loop), VL_MRECNT(loop));
458   }
459 #endif
460 }
461 
462 /* assume mr1, mr2 in same fg node; return TRUE if mr1 comes before mr2 */
463 static LOGICAL
mr_preceeds(int mr1,int mr2)464 mr_preceeds(int mr1, int mr2)
465 {
466   int ilt1, ilt2;
467 
468   if (mr1 == mr2)
469     return FALSE;
470   ilt1 = MR_ILT(mr1);
471   ilt2 = MR_ILT(mr2);
472   if (ilt1 == ilt2) {
473     /* if this is the same ilt, don't allow the store to preceed the
474      * load */
475     if (MR_TYPE(mr1) != 'l') {
476       assert(MR_TYPE(mr2) == 'l', "mr_preceeds: too may st/br", mr2, 4);
477       return FALSE;
478     }
479     return TRUE;
480   }
481   while (ilt1 != 0) {
482     if (ilt1 == ilt2)
483       return TRUE;
484     ilt1 = ILT_NEXT(ilt1);
485   }
486   return FALSE;
487 }
488 
489 static void
chkref(int i,int j)490 chkref(int i, int j)
491 {
492   DIRVEC exo_dirvec_ji;
493   DIRVEC exo_dirvec_ij;
494   int fg1, fg2;
495   DIRVEC vec;
496   DV *p;
497 
498   if ((MR_TYPE(i) != 'l' && MR_TYPE(i) != 's') ||
499       (MR_TYPE(j) != 'l' && MR_TYPE(j) != 's'))
500     return;
501   if (MR_TYPE(i) != 's' && MR_TYPE(j) != 's')
502     return;
503 
504   STRACE2("mem ref j %d i %d\n", j, i);
505 
506   /*
507    * now compute data dependence vector indicating when a
508    * dependence can exist between memory references i and j
509    */
510   /* compute all possible dependence vectors */
511   ddinfo.dvlist = NULL;
512   ddinfo.mr1 = j;
513   ddinfo.mr2 = i;
514   dd_compute();
515 
516   /** compute execution orders from j to i */
517   fg1 = MR_FG(j);
518   fg2 = MR_FG(i);
519   if (fg1 == fg2) {
520     if (mr_preceeds(j, i))
521       exo_dirvec_ji = dirv_exo(ddinfo.n, TRUE);
522     else
523       exo_dirvec_ji = dirv_exo(ddinfo.n, FALSE);
524   } else if (bv_mem(fgsucc[fg1], fg2))
525     exo_dirvec_ji = dirv_exo(ddinfo.n, TRUE);
526   else
527     exo_dirvec_ji = dirv_exo(ddinfo.n, FALSE);
528 
529   /** compute execution orders from i to j */
530   if (fg1 == fg2) {
531     if (mr_preceeds(i, j))
532       exo_dirvec_ij = dirv_exo(ddinfo.n, TRUE);
533     else
534       exo_dirvec_ij = dirv_exo(ddinfo.n, FALSE);
535   } else if (bv_mem(fgsucc[fg2], fg1))
536     exo_dirvec_ij = dirv_exo(ddinfo.n, TRUE);
537   else
538     exo_dirvec_ij = dirv_exo(ddinfo.n, FALSE);
539 
540   /*
541    * intersect the execution order direction vector with the
542    * dependence direction vectors to determine true dependence
543    * direction vector
544    */
545 
546   for (p = ddinfo.dvlist; p != 0; p = p->next) {
547     /* j preceeds i */
548     vec = p->vec & exo_dirvec_ji;
549     if (!dirv_chkzero(vec, ddinfo.n))
550       dd_edge(j, i, vec);
551 
552     /* i preceeds j; must invert dependence vector */
553     vec = dirv_inverse(p->vec) & exo_dirvec_ij;
554     if (!dirv_chkzero(vec, ddinfo.n))
555       dd_edge(i, j, vec);
556   }
557   freearea(HLV_AREA1);
558 }
559 
560 static void
edge_func(DIRVEC vec)561 edge_func(DIRVEC vec)
562 {
563   ddinfo.vec |= vec;
564 }
565 
566 void
dd_edge(int src,int sink,DIRVEC vec)567 dd_edge(int src, int sink, DIRVEC vec)
568 {
569   int type; /* type of dependence */
570   DDEDGE *p;
571 
572   /* figure out type of dependence */
573   /* dependence goes from src to sink */
574   ddinfo.vec = 0;
575   dirv_gen(vec, (int *)0, 0, DIRV_BIGPOS, edge_func);
576   vec = ddinfo.vec;
577   if (vec == 0)
578     return;
579   TRACE3("dd_edge: src %d sink %d vec %s \n", src, sink, dirv_print(vec));
580   if (MR_TYPE(src) == 's' && MR_TYPE(sink) == 's')
581     type = DIRV_FOUT;
582   else if (MR_TYPE(src) == 'l' && MR_TYPE(sink) == 's') {
583     type = DIRV_FANTI;
584     /* under certain circumstances we can ignore anti dependences.
585      * If they are from the load of a scalar to the store of the same
586      * scalar, and the store has a loop-invariant RHS.  There must
587      * also be only one assignment of the variable in the loop, and
588      * the memory refs must be in the same ilt.
589      * The theory is that it doesn't matter if we do the load before the
590      * store, since the same value is always stored.
591      */
592     if (MR_NME(src) == MR_NME(sink) && MR_INVAL(sink) &&
593         MR_ILT(src) == MR_ILT(sink)) {
594       int def;
595       int lp;
596       int inloop;
597 
598       inloop = 0;
599       for (def = NME_DEF(MR_NME(src)); def != 0; def = DEF_NEXT(def)) {
600         /* is it in the loop? */
601         for (lp = FG_LOOP(DEF_FG(def)); lp != 0; lp = LP_PARENT(lp))
602           if (lp == ddinfo.common)
603             goto in_loop;
604         continue; /* not in loop */
605       in_loop:
606         if (inloop++)
607           goto skip;
608       }
609       TRACE0("  Ignoring edge because inv scalar\n");
610       return;
611     }
612   skip:;
613   } else if (MR_TYPE(src) == 's' && MR_TYPE(sink) == 'l')
614     type = DIRV_FFLOW;
615   else {
616     interr("unknown load/load dep in dd_edge", 0, 3);
617     type = 3;
618   }
619   /* add to list for this mem ref */
620   for (p = MR_SUCC(src); p != 0; p = DD_NEXT(p)) {
621     if (DD_SINK(p) == sink && DD_TYPE(p) == type) {
622       DD_DIRVEC(p) |= vec;
623       return;
624     }
625   }
626   /* add a new one */
627   p = (DDEDGE *)getitem(HLV_AREA, sizeof(DDEDGE));
628   DD_TYPE(p) = type;
629   DD_DIRVEC(p) = vec;
630   DD_SINK(p) = sink;
631   DD_NEXT(p) = MR_SUCC(src);
632   MR_SUCC(src) = p;
633 }
634 
635 static void
add_dep(DIRVEC vec)636 add_dep(DIRVEC vec)
637 {
638   DV *p;
639 
640   p = (DV *)getitem(HLV_AREA1, sizeof(DV));
641   p->vec = vec;
642   p->next = ddinfo.dvlist;
643   ddinfo.dvlist = p;
644 }
645 
646 static void
dd_compute(void)647 dd_compute(void)
648 {
649   int nm1, nm2;
650   int ui, loop, mr;
651 
652   STRACE4("dd_compute: mr1 %d mr2 %d common %d n %d\n", ddinfo.mr1, ddinfo.mr2,
653           ddinfo.common, ddinfo.n);
654 
655   /* I believe that 'yuck' is the appropriate word here.  All this
656    * deals with is initial values that the vectorizer added for induction
657    * vars with non-invariant initial values.  Since the uses haven't
658    * yet been replaced, artificial checks must be done for a dependence
659    * that will be added.  While ugly, the code is probably correct and
660    * reasonably efficient.
661    */
662   ui = FALSE;
663   if (MR_IVUSE(ddinfo.mr1) && MR_INIT(ddinfo.mr2)) {
664     loop = MR_LOOP(ddinfo.mr1);
665     mr = ddinfo.mr2;
666     if (VL_PREBIH(loop) == FG_TO_BIH(MR_FG(ddinfo.mr2)))
667       ui = TRUE;
668   } else if (MR_IVUSE(ddinfo.mr2) && MR_INIT(ddinfo.mr1)) {
669     loop = MR_LOOP(ddinfo.mr2);
670     mr = ddinfo.mr1;
671     if (VL_PREBIH(loop) == FG_TO_BIH(MR_FG(ddinfo.mr1)))
672       ui = TRUE;
673   }
674   if (ui) {
675     /* initial value-use situation */
676     int i;
677 
678     for (i = VL_IVLIST(loop); i != 0; i = MR_NEXT(i)) {
679       if (MR_IV(i) == MR_IV(mr)) {
680         add_dep(dirv_fulldep(ddinfo.n));
681         return;
682       }
683     }
684   }
685 
686   nm1 = MR_NME(ddinfo.mr1);
687   while (NME_TYPE(nm1) == NT_ARR || NME_TYPE(nm1) == NT_MEM)
688     nm1 = NME_NM(nm1);
689   ddinfo.basenm1 = nm1;
690 
691   nm2 = MR_NME(ddinfo.mr2);
692   while (NME_TYPE(nm2) == NT_ARR || NME_TYPE(nm2) == NT_MEM)
693     nm2 = NME_NM(nm2);
694   ddinfo.basenm2 = nm2;
695 
696   if (NME_TYPE(nm1) == NT_VAR && NME_TYPE(nm2) == NT_VAR) {
697     if (nm1 != nm2) {
698       int i;
699       /*
700        * for FORTRAN equivalence stmts...  return TRUE if one symbol is
701        * in storage overlap chain of the other. Eventually we need to
702        * be more clever here.
703        */
704       int s1 = basesym_of(nm1);
705       int s2 = basesym_of(nm2);
706       if (!VP_DEPCHK(ddinfo.pragmas) || !VP_EQVCHK(ddinfo.pragmas))
707         return;
708       if (SOCPTRG(s1)) {
709         for (i = SOCPTRG(s1); i; i = SOC_NEXT(i))
710           if (SOC_SPTR(i) == s2) {
711             add_dep(dirv_fulldep(ddinfo.n));
712             return;
713           }
714       }
715       if ((POINTERG(s1) || POINTERG(s2)) &&
716           expr_dependent(MR_ILI(ddinfo.mr2), MR_ILI(ddinfo.mr1),
717                          MR_ILT(ddinfo.mr2), MR_ILT(ddinfo.mr1))) {
718         add_dep(dirv_fulldep(ddinfo.n));
719         return;
720       }
721       return;
722     }
723     resolve_vv();
724   } else if ((NME_TYPE(nm1) == NT_IND && NME_TYPE(nm2) == NT_VAR) ||
725              (NME_TYPE(nm1) == NT_VAR && NME_TYPE(nm2) == NT_IND))
726     resolve_pv();
727   else if (NME_TYPE(nm1) == NT_IND && NME_TYPE(nm2) == NT_IND)
728     resolve_pp();
729   else if ((NME_TYPE(nm1) == NT_UNK && NME_TYPE(nm2) == NT_IND) ||
730            (NME_TYPE(nm1) == NT_IND && NME_TYPE(nm2) == NT_UNK) ||
731            (NME_TYPE(nm1) == NT_UNK && NME_TYPE(nm2) == NT_UNK)) {
732     if (!VP_DEPCHK(ddinfo.pragmas))
733       return;
734     add_dep(dirv_fulldep(ddinfo.n));
735   } else if ((NME_TYPE(nm1) == NT_UNK && NME_TYPE(nm2) == NT_VAR) ||
736              (NME_TYPE(nm1) == NT_VAR && NME_TYPE(nm2) == NT_UNK))
737     resolve_uv();
738   else
739     assert(0, "dd_compute: can't happen", 0, 4);
740 }
741 
742 static void
resolve_pp(void)743 resolve_pp(void)
744 {
745   /* for pointer-pointer, best we can do is the -x switches */
746   open_pragma(BIH_LINENO(FG_TO_BIH(LP_HEAD(ddinfo.common))));
747   if (is_ptr_safe(ddinfo.basenm1))
748     return;
749   if (is_ptr_safe(ddinfo.basenm2))
750     return;
751   close_pragma();
752   if (!VP_DEPCHK(ddinfo.pragmas))
753     return;
754   if (ddinfo.basenm1 == ddinfo.basenm2 && MR_SUBST(ddinfo.mr1) &&
755       MR_SUBST(ddinfo.mr2)) {
756     /* do data dependence analysis */
757     unpack_loops();
758     do_subscript(1);
759   } else
760     add_dep(dirv_fulldep(ddinfo.n));
761 }
762 
763 static void
resolve_pv(void)764 resolve_pv(void)
765 {
766   int ptr, var;
767   int sym;
768 
769   /* use a combination of optimizer utilities & stolen optimizer code */
770   if (NME_TYPE(ddinfo.basenm1) == NT_IND) {
771     ptr = ddinfo.basenm1;
772     var = ddinfo.basenm2;
773   } else {
774     ptr = ddinfo.basenm2;
775     var = ddinfo.basenm1;
776   }
777   assert(NME_TYPE(ptr) == NT_IND && NME_TYPE(var) == NT_VAR,
778          "resolve_pv: bad ptr/var", 0, 4);
779   open_pragma(BIH_LINENO(FG_TO_BIH(LP_HEAD(ddinfo.common))));
780   if (is_ptr_safe(ptr))
781     return;
782   if (is_sym_optsafe(var, ddinfo.common))
783     return;
784   close_pragma();
785   add_dep(dirv_fulldep(ddinfo.n));
786 }
787 
788 static void
resolve_uv(void)789 resolve_uv(void)
790 {
791   add_dep(dirv_fulldep(ddinfo.n));
792 }
793 
794 static void
unpack_loops(void)795 unpack_loops(void)
796 {
797   int i, j;
798   /* unpack the loops */
799   i = 0;
800   ddinfo.n1 = 0;
801   for (j = MR_LOOP(ddinfo.mr1); j != ddinfo.common; j = LP_PARENT(j)) {
802     ddinfo.lps[i++] = j;
803     ++ddinfo.n1;
804   }
805   ddinfo.n2 = 0;
806   for (j = MR_LOOP(ddinfo.mr2); j != ddinfo.common; j = LP_PARENT(j)) {
807     ddinfo.lps[i++] = j;
808     ++ddinfo.n2;
809   }
810   j = ddinfo.common;
811   for (;;) {
812     ddinfo.lps[i++] = j;
813     if (j == ddinfo.outer_loop)
814       break;
815     j = LP_PARENT(j);
816   }
817   assert(i <= MAX_DD, "resolve_vv: bad i", ddinfo.common, 4);
818   assert(i == ddinfo.n1 + ddinfo.n2 + ddinfo.n, "resolve_vv: bad iA",
819          ddinfo.common, 4);
820   ddinfo.subs1 = MR_SUBST(ddinfo.mr1) + MR_SUBCNT(ddinfo.mr1);
821   ddinfo.subs2 = MR_SUBST(ddinfo.mr2) + MR_SUBCNT(ddinfo.mr2);
822   DTRACE2("resolve_vv: subs1 %d subs2 %d\n", ddinfo.subs1, ddinfo.subs2);
823 }
824 
825 static void
resolve_vv(void)826 resolve_vv(void)
827 {
828   int i1, i2;
829   int nm1, nm2;
830   DIRVEC dir;
831   int buf1[MAXNMESIZ], buf2[MAXNMESIZ];
832   int i, end;
833   int j;
834   DV *p, *q, *q1, *p1;
835 
836   /* check for both scalar */
837   if (MR_SCALR(ddinfo.mr1) && MR_SCALR(ddinfo.mr2)) {
838     /* complex have different nmes */
839     assert(ddinfo.basenm1 == ddinfo.basenm2, "resolve_vv: mr1 != mr2",
840            ddinfo.mr1, 4);
841     /* expanded scalars are different */
842     for (i = VL_SCLIST(ddinfo.common); i != 0; i = SCLR_NEXT(i))
843       if (MR_NME(ddinfo.mr1) == SCLR_NME(i))
844         goto found;
845     add_dep(dirv_fulldep(ddinfo.n));
846     return;
847   found:
848     /* equivalent to equals direction in last place */
849     dir = DIRV_ALLEQ;
850     for (i = 1; i < ddinfo.n; ++i)
851       DIRV_ENTRYP(dir, i, DIRV_STAR);
852     DIRV_ENTRYP(dir, 0, DIRV_EQ);
853     add_dep(dir);
854     return;
855   }
856   /* unpack names info into buf1, buf2 */
857   i1 = i2 = MAXNMESIZ;
858   nm1 = MR_NME(ddinfo.mr1);
859   while (i1 > 0) {
860     if (NME_TYPE(nm1) == NT_VAR)
861       break;
862     buf1[--i1] = nm1;
863     nm1 = NME_NM(nm1);
864   }
865   /* too big? */
866   if (NME_TYPE(nm1) != NT_VAR) {
867     add_dep(dirv_fulldep(ddinfo.n));
868     return;
869   }
870 
871   nm2 = MR_NME(ddinfo.mr2);
872   while (i2 > 0) {
873     if (NME_TYPE(nm2) == NT_VAR)
874       break;
875     buf2[--i2] = nm2;
876     nm2 = NME_NM(nm2);
877   }
878   /* too big? */
879   if (NME_TYPE(nm2) != NT_VAR) {
880     add_dep(dirv_fulldep(ddinfo.n));
881     return;
882   }
883 
884   unpack_loops();
885 
886   /* see if we can avoid any checking */
887   i = 0;
888   for (;;) {
889     /* invariant: we have a common prefix of buf1 and buf2 */
890     /* for example: a.b.c[...] */
891     if (i1 + i >= MAXNMESIZ || i2 + i >= MAXNMESIZ)
892       break;
893     if (NME_TYPE(buf1[i1 + i]) == NT_MEM && NME_TYPE(buf2[i2 + i]) == NT_MEM) {
894       if (NME_SYM(buf1[i1 + i]) != NME_SYM(buf2[i2 + i])) {
895         ddinfo.dvlist = NULL;
896         return;
897       }
898     }
899     ++i;
900   }
901   i = 0;
902   j = 0; /* count subscripts */
903   for (;;) {
904     /* all member references match */
905     if (i1 + i >= MAXNMESIZ || i2 + i >= MAXNMESIZ)
906       break;
907     if (NME_TYPE(buf1[i1 + i]) == NT_MEM && NME_TYPE(buf2[i2 + i]) == NT_MEM) {
908       assert(NME_SYM(buf1[i1 + i]) == NME_SYM(buf2[i2 + i]),
909              "resolve_vv: diff MEM symbols", 0, 4);
910       ++i;
911     } else if (NME_TYPE(buf1[i1 + i]) == NT_ARR &&
912                NME_TYPE(buf2[i2 + i]) == NT_ARR) {
913       while (i1 + i < MAXNMESIZ && NME_TYPE(buf1[i1 + i]) == NT_ARR) {
914         assert(NME_TYPE(buf2[i2 + i]) == NT_ARR, "resolve_vv: not both arrays",
915                0, 4);
916         assert(ASD_NDIM(NME_SUB(buf1[i1 + i])) ==
917                    ASD_NDIM(NME_SUB(buf1[i1 + i])),
918                "resolve_vv: ndims not equal", 0, 4);
919         j += ASD_NDIM(NME_SUB(buf1[i1 + i]));
920         ++i;
921       }
922     } else
923       assert(0, "resolve_vv: can't happen", 0, 4);
924   }
925   do_subscript(j);
926   ddinfo.subs1 -= j;
927   ddinfo.subs2 -= j;
928 }
929 
930 static int nvars;
931 static int neqns;
932 static int nfree;
933 
934 #define UD xUD
935 static int UD[MAX_N][MAX_S + MAX_N];
936 static int T[MAX_N];
937 static int C[MAX_S];
938 static int Dist[MAX_LOOPS];
939 static int TU[MAX_S][MAX_N];
940 
941 #define BOUND_LEN (2 * (MAX_N + 1))
942 static BOUND *Bound[MAX_N + 1][2];
943 static BOUND *SaveBound[MAX_N + 1][2];
944 
945 #if DEBUG
946 static void
prmat(int col,int row)947 prmat(int col, int row)
948 {
949   int i, j;
950 
951   fprintf(gbl.dbgfil, "Matrix Dump: %d, %d-----\n", col, row);
952   for (i = 0; i < nvars; ++i) {
953     for (j = 0; j < nvars + neqns; ++j) {
954       prilitree(UD[i][j]);
955       fprintf(gbl.dbgfil, "\t");
956     }
957     fprintf(gbl.dbgfil, "\n");
958   }
959 }
960 #endif
961 
962 static INT
gcd(INT a,INT b)963 gcd(INT a, INT b)
964 {
965   INT u, v;
966   INT r;
967 
968   u = a > 0 ? a : -a;
969   v = b > 0 ? b : -b;
970   while (v != 0) {
971     r = u % v;
972     u = v;
973     v = r;
974   }
975   return u;
976 }
977 
978 static int
ceilg(int t,int g)979 ceilg(int t, int g)
980 {
981   int q;
982   int a;
983 
984   a = t;
985   if (t < 0)
986     a = -t;
987   /* return ceil(t/g), g is positive */
988   q = a / g;
989   if (g * q == a) {
990     if (t < 0)
991       return -q;
992     return q;
993   }
994   /* q is floor(a/g) */
995   ddinfo.unknown = 1;
996   if (t > 0)
997     return q + 1;
998   return -q;
999 }
1000 
1001 static int
floorg(int t,int g)1002 floorg(int t, int g)
1003 {
1004   int q;
1005   int a;
1006 
1007   a = t;
1008   if (t < 0)
1009     a = -t;
1010   /* return floor(t/g), g is positive */
1011   q = a / g;
1012   if (g * q == a) {
1013     if (t < 0)
1014       return -q;
1015     return q;
1016   }
1017   /* q is floor(a/g) */
1018   ddinfo.unknown = 1;
1019   if (t > 0)
1020     return q;
1021   return -q - 1;
1022 }
1023 
1024 static LOGICAL
compare_one_bound(BOUND * low,BOUND * up,int var)1025 compare_one_bound(BOUND *low, BOUND *up, int var)
1026 {
1027   int i, j;
1028   int tmp[MAX_N + 1];
1029   LOGICAL btype;
1030   int t, g, t1;
1031   BOUND b, *p;
1032   int icon0 = ad_icon(0);
1033 
1034 #if DEBUG
1035   if (DBGBIT(36, 64)) {
1036     fprintf(gbl.dbgfil, "compare_one_bound: ");
1037     dump_two_bound(low, up, var, FALSE);
1038   }
1039 #endif
1040   /* compute difference of bounds */
1041   for (j = 1; j < var; ++j) {
1042     tmp[j] = ili_symbolic(
1043         ad2ili(IL_ISUB, ad2ili(IL_IMUL, up->bnd[j], ad_icon(low->mplyr)),
1044                ad2ili(IL_IMUL, low->bnd[j], ad_icon(up->mplyr))));
1045   }
1046   /* compute difference of lower bounds */
1047   tmp[0] = ili_symbolic(ad2ili(
1048       IL_ISUB, ad2ili(IL_IMUL, up->bnd[0], ad_icon(low->mplyr * low->gcd)),
1049       ad2ili(IL_IMUL, low->bnd[0], ad_icon(up->mplyr * up->gcd))));
1050 #if DEBUG
1051   if (DBGBIT(36, 64)) {
1052     fprintf(gbl.dbgfil, "compare_one_bound: up-low: ");
1053     for (j = 0; j < var; ++j) {
1054       prilitree(tmp[j]);
1055       fprintf(gbl.dbgfil, ", ");
1056     }
1057     fprintf(gbl.dbgfil, "\n");
1058   }
1059 #endif
1060   /* tmp[0] is divided by product of gcds */
1061   /* find highest non-zero coefficient */
1062   for (j = var - 1; j > 0; --j)
1063     if (tmp[j] != icon0)
1064       break;
1065   if (j == 0) {
1066     /* check this one immediately */
1067     if (IS_CNST(tmp[0])) {
1068       if (CNSTG(tmp[0]) < 0) {
1069         DTRACE0(
1070             "compare_one_bound finds constant inconsistency, returns nodep\n");
1071         return TRUE;
1072       }
1073       return FALSE; /* constant consistent */
1074     }
1075     /* variable inconsistent, unknown data dep */
1076     ddinfo.unknown = 1;
1077     return FALSE;
1078   }
1079   /* This coefficient must be constant or we'll ignore this bound */
1080   /* since we don't know if it is upper or lower */
1081   if (!IS_CNST(tmp[j])) {
1082     DTRACE1("compare_one_bound ignores bound for %d\n", var);
1083     ddinfo.unknown = 1;
1084     return FALSE;
1085   }
1086   /* decide whether this is a lower or upper bound */
1087   t = CNSTG(tmp[j]);
1088   if (t > 0) {
1089     btype = FALSE; /* lower */
1090     for (i = 0; i < j; ++i)
1091       tmp[i] = ili_symbolic(ad1ili(IL_INEG, tmp[i]));
1092   } else {
1093     btype = TRUE;
1094     tmp[j] = ad1ili(IL_INEG, tmp[j]);
1095     t = -t;
1096   }
1097   /* compute gcd if possible */
1098   g = 1;
1099   if (t != 1) {
1100     g = 0;
1101     for (i = 1; i <= j; ++i) {
1102       if (!IS_CNST(tmp[i]))
1103         break;
1104       g = gcd(g, CNSTG(tmp[i]));
1105     }
1106     if (g != 0) {
1107       /* divide thru */
1108       t /= g;
1109       for (i = 1; i <= j; ++i) {
1110         tmp[i] = ad_icon(CNSTG(tmp[i]) / g);
1111       }
1112       /* try to handle 0th element */
1113       if (IS_CNST(tmp[0])) {
1114         t1 = CNSTG(tmp[0]);
1115         /* compute floor/ceil ( t1 / g) */
1116         if (btype)
1117           t1 = floorg(t1, g);
1118         else
1119           t1 = ceilg(t1, g);
1120         tmp[0] = ad_icon(t1);
1121         g = 1;
1122       }
1123     } else
1124       g = 1;
1125   }
1126   b.gcd = up->gcd * low->gcd * g;
1127   b.mplyr = t;
1128   for (i = 0; i < j; ++i)
1129     b.bnd[i] = tmp[i];
1130 
1131 #if DEBUG
1132   if (DBGBIT(36, 64)) {
1133     fprintf(gbl.dbgfil, "compare_one_bound: new bound: ");
1134     dump_one_bound(&b, j, btype);
1135   }
1136 #endif
1137   /* add this one */
1138   for (p = Bound[j][btype]; p != 0; p = p->next) {
1139     if (b.mplyr == p->mplyr && b.gcd == p->gcd) {
1140       for (i = 0; i < j; ++i)
1141         if (p->bnd[i] != tmp[i])
1142           goto cont;
1143       goto found;
1144     }
1145   cont:;
1146   }
1147   p = (BOUND *)getitem(HLV_AREA1, sizeof(BOUND));
1148   *p = b;
1149   p->next = Bound[j][btype];
1150   Bound[j][btype] = p;
1151 #if DEBUG
1152   if (DBGBIT(36, 16)) {
1153     fprintf(gbl.dbgfil, "Add bound---:");
1154     dump_one_bound(p, j, btype);
1155   }
1156 #endif
1157 found:
1158   return FALSE;
1159 }
1160 
1161 /* ibound = bound in terms of index variables */
1162 /* upflag = true if upper bound */
1163 /* var = index var for which this is a bound */
1164 static int
bound_add(int * ibound,LOGICAL upflag,int var)1165 bound_add(int *ibound, LOGICAL upflag, int var)
1166 {
1167   BOUND b1;
1168   BOUND b2;
1169   int j, k;
1170   int d1, d2;
1171   int icon0 = ad_icon(0);
1172 
1173   /* express the bound in terms of the free variables */
1174   /* create a linear combination of bounds on free variables */
1175   BTRACE4("bound_add: nvars: %d, nfree: %d, upflag: %d, var: %d\n", nvars,
1176           nfree, upflag, var);
1177   for (j = 0; j <= nfree; ++j) {
1178     b1.bnd[j] = icon0;
1179   }
1180   b1.bnd[0] = ibound[0];
1181   for (j = 1; j <= nvars; ++j) {
1182     /* b1.bnd += bound[j] * TU(*,j) */
1183     for (k = 0; k <= nfree; ++k) {
1184       b1.bnd[k] = ili_symbolic(
1185           ad2ili(IL_IADD, b1.bnd[k], ad2ili(IL_IMUL, ibound[j], TU[k][j - 1])));
1186     }
1187   }
1188   /* construct b2 */
1189   for (k = 0; k <= nfree; ++k)
1190     b2.bnd[k] = TU[k][var];
1191   b1.gcd = b2.gcd = 1;
1192   b1.mplyr = b2.mplyr = 1;
1193   if (upflag) {
1194     /* TU(*,var) <= tmp1 */
1195     return compare_one_bound(&b2, &b1, nfree + 1);
1196   } else
1197     return compare_one_bound(&b1, &b2, nfree + 1);
1198 }
1199 
1200 static LOGICAL
check_bounds(void)1201 check_bounds(void)
1202 {
1203   int k;
1204   int d1, d2;
1205   BOUND *p, *q;
1206 
1207   /* compare bounds from nfree down */
1208   for (k = nfree; k >= 1; --k) {
1209     for (p = Bound[k][1]; p != 0; p = p->next)
1210       for (q = Bound[k][0]; q != 0; q = q->next) {
1211         /* compare p and q , q <= p */
1212         if (compare_one_bound(q, p, k))
1213           return TRUE;
1214       }
1215   }
1216   return FALSE;
1217 }
1218 
1219 static LOGICAL
bnd_le(int k,LOGICAL eqflag)1220 bnd_le(int k, LOGICAL eqflag)
1221 {
1222   int i;
1223   BOUND b1, b2;
1224 
1225   for (i = 0; i <= nfree; ++i) /* ik */
1226     b1.bnd[i] = TU[i][2 * k];
1227   if (!eqflag) /* strict inequality */
1228     b1.bnd[0] = ili_symbolic(ad2ili(IL_IADD, b1.bnd[0], ad_icon(1)));
1229   b1.gcd = b1.mplyr = 1;
1230   for (i = 0; i <= nfree; ++i) /* jk */
1231     b2.bnd[i] = TU[i][2 * k + 1];
1232   b2.gcd = b2.mplyr = 1;
1233   return compare_one_bound(&b1, &b2, nfree + 1);
1234 }
1235 
1236 static LOGICAL
bnd_ge(int k,LOGICAL eqflag)1237 bnd_ge(int k, LOGICAL eqflag)
1238 {
1239   int i;
1240   BOUND b1, b2;
1241 
1242   for (i = 0; i <= nfree; ++i) /* ik */
1243     b1.bnd[i] = TU[i][2 * k];
1244   b1.gcd = b1.mplyr = 1;
1245   for (i = 0; i <= nfree; ++i) /* jk */
1246     b2.bnd[i] = TU[i][2 * k + 1];
1247   if (!eqflag)
1248     b2.bnd[0] = ili_symbolic(ad2ili(IL_IADD, b2.bnd[0], ad_icon(1)));
1249   b2.gcd = b2.mplyr = 1;
1250   return compare_one_bound(&b2, &b1, nfree + 1);
1251 }
1252 
1253 /* check for dependence with direction vector dir */
1254 static LOGICAL
check_new_bound(DIRVEC dir,int lev)1255 check_new_bound(DIRVEC dir, int lev)
1256 {
1257   int k;
1258   DIRVEC d;
1259 
1260   BCOPY(Bound, SaveBound, BOUND *, BOUND_LEN);
1261   /* create equations describing this direction vector */
1262   for (k = 0; k < lev; ++k) {
1263     d = DIRV_ENTRYG(dir, lev - k - 1);
1264     switch (d) {
1265     case DIRV_STAR:
1266       break;
1267     case DIRV_LT:
1268       /* ik < jk --> ik+1 <= jk */
1269       if (bnd_le(k, FALSE))
1270         return TRUE;
1271       break;
1272     case DIRV_EQ:
1273       /* simulate with <=, >= */
1274       /* ik >= jk and ik <= jk */
1275       if (bnd_le(k, TRUE))
1276         return TRUE;
1277       if (bnd_ge(k, TRUE))
1278         return TRUE;
1279       break;
1280     case DIRV_GT:
1281       /* ik > jk --> ik >= jk+1 */
1282       if (bnd_ge(k, FALSE))
1283         return TRUE;
1284       break;
1285     }
1286   }
1287   return check_bounds();
1288 }
1289 
1290 static void
do_subscript(int nsubs)1291 do_subscript(int nsubs)
1292 {
1293   /*
1294    * this function is the heart of the data dependence analysis. subs1 and
1295    * subs2 point to subscript entries for the same array. do_subscript
1296    * determines a direction vector under which those subscripts will
1297    * intersect.
1298    */
1299   int i, j, k, k1, bnd, n1;
1300   int q, d1, d2, sgn, sc, ili;
1301   int d;
1302   int t, t1;
1303   int stride1, stride2;
1304   DIRVEC vec, vec1, vec2;
1305   int icon0, icon1, iconneg1;
1306   int sub;
1307   int invar1, invar2;
1308   int lp;
1309   int tmp[MAX_N + 1];
1310 
1311   if (nsubs <= 0)
1312     goto give_up;
1313   icon0 = ad_icon(0L);
1314   icon1 = ad_icon(1L);
1315   iconneg1 = ad_icon(-1L);
1316   /* quick check for non-intersecting loop-invariant subscripts */
1317   /* also find out how many outer loops to remove */
1318   /* Set d to number of outer loops to consider based on
1319    * how far we were able to analyze both subscripts. */
1320   d = 0x7FFFFFFF;
1321   for (sub = 1; sub <= nsubs; ++sub) {
1322     k = ddinfo.n1 + ddinfo.n;
1323     for (i = 0; i < k; ++i) {
1324       if (SB_STRIDE(ddinfo.subs1 - sub)[i] == 0) {
1325         break;
1326       }
1327       if (SB_STRIDE(ddinfo.subs1 - sub)[i] < 0 ||
1328           SB_STRIDE(ddinfo.subs1 - sub)[i] >= astb.stg_avail) {
1329         fprintf(stderr, "sub=%d,ddinfo.subs1=%d,i=%d,SB_STRICE(%d)[%d]=%d\n",
1330                 sub, ddinfo.subs1, i, ddinfo.subs1 - sub, i,
1331                 SB_STRIDE(ddinfo.subs1 - sub)[i]);
1332       }
1333     }
1334     if (d > i - ddinfo.n1)
1335       d = i - ddinfo.n1;
1336     k = ddinfo.n2 + ddinfo.n;
1337     for (i = 0; i < k; ++i) {
1338       if (SB_STRIDE(ddinfo.subs2 - sub)[i] == 0) {
1339         break;
1340       }
1341       if (SB_STRIDE(ddinfo.subs2 - sub)[i] < 0 ||
1342           SB_STRIDE(ddinfo.subs2 - sub)[i] >= astb.stg_avail) {
1343         fprintf(stderr, "sub=%d,ddinfo.subs2=%d,i=%d,SB_STRICE(%d)[%d]=%d\n",
1344                 sub, ddinfo.subs2, i, ddinfo.subs2 - sub, i,
1345                 SB_STRIDE(ddinfo.subs2 - sub)[i]);
1346       }
1347     }
1348     if (d > i - ddinfo.n2)
1349       d = i - ddinfo.n2;
1350   }
1351   if (d <= 0)
1352     goto give_up;
1353   for (sub = 1; sub <= nsubs; ++sub) {
1354     invar1 = TRUE;
1355     k = ddinfo.n1 + d;
1356     for (i = 0; i < k; ++i)
1357       if (SB_STRIDE(ddinfo.subs1 - sub)[i] != icon0) {
1358         invar1 = FALSE;
1359         break;
1360       }
1361     invar2 = TRUE;
1362     k = ddinfo.n2 + d;
1363     for (i = 0; i < k; ++i)
1364       if (SB_STRIDE(ddinfo.subs2 - sub)[i] != icon0) {
1365         invar2 = FALSE;
1366         break;
1367       }
1368     if (invar1 && invar2) {
1369       d1 = SB_BASES(ddinfo.subs1 - sub)[ddinfo.n1 + d];
1370       d2 = SB_BASES(ddinfo.subs2 - sub)[ddinfo.n2 + d];
1371       if (A_TYPEG(d1) == A_TRIPLE || A_DTYPEG(d2) == A_TRIPLE) {
1372         /* get lower bound from d1, d2 */
1373         int d1low, d1high, d2low, d2high;
1374         if (A_TYPEG(d1) != A_TRIPLE) {
1375           d1low = d1high = d1;
1376         } else {
1377           d1low = A_LBDG(d1);
1378           d1high = A_UPBDG(d1);
1379           if (A_STRIDEG(d1)) {
1380             int d1str;
1381             d1str = ili_symbolic(A_STRIDEG(d1));
1382             if (!IS_CNST(d1str)) {
1383               goto give_up;
1384             } else if (CNSTG(d1str) < 0) {
1385               int t;
1386               t = d1low;
1387               d1low = d1high;
1388               d1high = t;
1389             }
1390           }
1391         }
1392         if (A_TYPEG(d2) != A_TRIPLE) {
1393           d2low = d2high = d2;
1394         } else {
1395           d2low = A_LBDG(d2);
1396           d2high = A_UPBDG(d2);
1397           if (A_STRIDEG(d2)) {
1398             int d2str;
1399             d2str = ili_symbolic(A_STRIDEG(d2));
1400             if (!IS_CNST(d2str)) {
1401               goto give_up;
1402             } else if (CNSTG(d2str) < 0) {
1403               int t;
1404               t = d2low;
1405               d2low = d2high;
1406               d2high = t;
1407             }
1408           }
1409         }
1410         if (IS_CNST(d1low) && IS_CNST(d2high) && CNSTG(d1low) > CNSTG(d2high)) {
1411           goto no_dep;
1412         }
1413         if (IS_CNST(d2low) && IS_CNST(d1high) && CNSTG(d2low) > CNSTG(d1high)) {
1414           goto no_dep;
1415         }
1416         continue;
1417       }
1418       if (RESG(d1) != RESG(d2))
1419         goto no_dep;
1420       if (IS_IRES(d1))
1421         t = ad2ili(IL_ISUB, d1, d2);
1422       else if (IS_ARES(d1))
1423         t = ad2ili(IL_ASUB, d1, d2);
1424       else
1425         interr("do_subscript: unknown base type for strides", ddinfo.common, 4);
1426       t = ili_symbolic(t);
1427       if (IS_CNST(t) && CNSTG(t) != 0L) {
1428         goto no_dep;
1429       }
1430     }
1431   }
1432 
1433   DTRACE2("do_subscript: %d subs, # common loops = %d\n", nsubs, d);
1434   /* setup the equations */
1435   /* there are 2*d + ddinfo.n1 + ddinfo.n2 variables */
1436   nvars = 2 * d + ddinfo.n1 + ddinfo.n2;
1437   neqns = nsubs;
1438   for (sub = 1; sub <= nsubs; ++sub) {
1439     /* first set up coefficients for common loops */
1440     j = 0;
1441     for (i = 0; i < d; ++i) {
1442       /* subscript for A */
1443       UD[j][sub + nvars - 1] =
1444           SB_STRIDE(ddinfo.subs1 - sub)[ddinfo.n1 + d - i - 1];
1445       ++j;
1446       UD[j][sub + nvars - 1] =
1447           ad1ili(IL_INEG, SB_STRIDE(ddinfo.subs2 - sub)[ddinfo.n2 + d - i - 1]);
1448       ++j;
1449     }
1450     /* now set up coefficients for first loop */
1451     for (i = 0; i < ddinfo.n1; ++i) {
1452       UD[j][sub + nvars - 1] = SB_STRIDE(ddinfo.subs1 - sub)[ddinfo.n1 - i - 1];
1453       ++j;
1454     }
1455     /* now set up coefficients for second loop */
1456     for (i = 0; i < ddinfo.n2; ++i) {
1457       UD[j][sub + nvars - 1] =
1458           ad1ili(IL_INEG, SB_STRIDE(ddinfo.subs2 - sub)[ddinfo.n2 - i - 1]);
1459       ++j;
1460     }
1461     assert(j == nvars, "do_subscript: not enough vars", 0, 4);
1462   }
1463   /* set up identity matrix */
1464   for (i = 0; i < nvars; ++i) {
1465     for (j = 0; j < nvars; ++j) {
1466       UD[i][j] = icon0;
1467     }
1468     UD[i][i] = icon1;
1469   }
1470   /* set up rhs */
1471   for (sub = 1; sub <= nsubs; ++sub) {
1472     d1 = SB_BASES(ddinfo.subs1 - sub)[ddinfo.n1 + d];
1473     d2 = SB_BASES(ddinfo.subs2 - sub)[ddinfo.n2 + d];
1474     if (RESG(d1) != RESG(d2))
1475       goto no_dep;
1476     if (A_TYPEG(d1) == A_TRIPLE || A_DTYPEG(d2) == A_TRIPLE) {
1477       continue;
1478     }
1479     if (IS_IRES(d1))
1480       t = ad2ili(IL_ISUB, d2, d1);
1481     else if (IS_ARES(d1))
1482       t = ad2ili(IL_ASUB, d2, d1);
1483     else
1484       interr("do_subscript: unknown base type for constants", ddinfo.common, 4);
1485     C[sub - 1] = ili_symbolic(t);
1486   }
1487 #if DEBUG
1488   if (DBGBIT(36, 64)) {
1489     prmat(0, 0);
1490     fprintf(gbl.dbgfil, "RHS: ");
1491     for (i = 0; i < neqns; ++i) {
1492       fprintf(gbl.dbgfil, "\t%d: ", i);
1493       prilitree(C[i]);
1494       fprintf(gbl.dbgfil, "\n");
1495     }
1496   }
1497 #endif
1498 
1499   /* gaussian elimination on the matrix */
1500   bnd = neqns;
1501   if (bnd > nvars)
1502     bnd = nvars;
1503   n1 = neqns;
1504   j = 0;
1505   k1 = 0;
1506   while (k1 < bnd) {
1507     i = nvars - 1;
1508     while (i > k1) {
1509       if (UD[i][j + nvars] == icon0) {
1510         --i;
1511         continue;
1512       }
1513       /* work on rows i, i-1 */
1514       d1 = UD[i][j + nvars];
1515       d2 = UD[i - 1][j + nvars];
1516       /* check if d1 == d2 or d1 == -d2 */
1517       t = ad2ili(IL_ISUB, d1, d2);
1518       t = ili_symbolic(t);
1519       if (t == icon0) {
1520         /* d1 == d2, so sgn == 1, so q == 1 */
1521         q = icon1;
1522       } else {
1523         t = ad2ili(IL_IADD, d1, d2);
1524         t = ili_symbolic(t);
1525         if (t == icon0) {
1526           /* d1 == -d2, so sgn == -1, so q = -1 */
1527           q = ad_icon((INT)-1);
1528         } else {
1529           /* they had better both be constants */
1530           if (!IS_CNST(d1) || !IS_CNST(d2)) {
1531             if (!symbolic_divide(d2, d1, &q))
1532               goto give_up;
1533           } else {
1534             d1 = CNSTG(d1);
1535             d2 = CNSTG(d2);
1536             sgn = d1 * d2;
1537             if (d1 < 0)
1538               d1 = -d1;
1539             if (d2 < 0)
1540               d2 = -d2;
1541             q = d2 / d1;
1542             if (sgn < 0)
1543               q = -q;
1544             q = ad_icon(q);
1545           }
1546         }
1547       }
1548 
1549       if (q != icon0) {
1550         /* saxpy */
1551         for (k = 0; k < nvars + neqns; ++k)
1552           UD[i - 1][k] = ili_symbolic(
1553               ad2ili(IL_ISUB, UD[i - 1][k], symbolic_mul(q, UD[i][k])));
1554       }
1555       /* interchange */
1556       for (k = 0; k < neqns + nvars; ++k) {
1557         q = UD[i - 1][k];
1558         UD[i - 1][k] = UD[i][k];
1559         UD[i][k] = q;
1560       }
1561     }
1562     if (UD[k1][j + nvars] == icon0) {
1563       /* This column is linear combination of prev. cols */
1564       ++j;
1565       --n1;
1566       if (bnd > n1)
1567         bnd = n1;
1568       continue;
1569     }
1570     ++j;
1571     ++k1;
1572   }
1573 #if DEBUG
1574   if (DBGBIT(36, 64))
1575     prmat(j, i);
1576 #endif
1577 
1578   /* eliminate linear dependent columns (0 on the diagonal) from D */
1579   bnd = neqns;
1580   j = 0;
1581   while (j < bnd) {
1582     if (UD[j][nvars + j] != icon0) {
1583       j++;
1584       continue;
1585     }
1586     /* 0 found in D[j][j] */
1587     bnd--; /* decrement # of columns of D */
1588     for (i = j; i < bnd; i++) {
1589       for (k = 0; k < nvars; k++)
1590         /* copy D[k][(i+1)..(bnd+1)] over D[k][i..bnd] */
1591         UD[k][nvars + i] = UD[k][nvars + i + 1];
1592       C[i] = C[i + 1];
1593     }
1594   }
1595   neqns = bnd;
1596 
1597   /* solve tD=C and get distances */
1598   bnd = nvars;
1599   if (bnd > neqns)
1600     bnd = neqns;
1601   /* solve TD = C */
1602   for (j = 0; j < bnd; j++) {
1603     int djj;
1604     sc = icon0;
1605     /* sc = T[0..(j-1)] * D[0..(j-1)][j] */
1606     for (i = 0; i < j; i++) {
1607       ili = ad2ili(IL_IMUL, T[i], UD[i][nvars + j]);
1608       ili = ad2ili(IL_IADD, sc, ili);
1609       sc = ili_symbolic(ili);
1610     }
1611     /* t = C[j] - sc */
1612     ili = ad2ili(IL_ISUB, C[j], sc);
1613     t = ili_symbolic(ili);
1614     djj = UD[j][j + nvars];
1615     /* T[j] = t / D[j][j] */
1616     if (djj == icon1)
1617       T[j] = t;
1618     else if (djj == iconneg1)
1619       T[j] = ad1ili(IL_INEG, t);
1620     else if (t == icon0 && (XBIT(2, 0x200) || IS_CNST(djj)))
1621       /* 0 / x == 0 */
1622       T[j] = icon0;
1623     else if (t == djj && (XBIT(2, 0x200) || IS_CNST(djj)))
1624       /* x / x == 1 */
1625       T[j] = icon1;
1626     else if (IS_CNST(djj) && IS_CNST(t)) {
1627       assert(djj != icon0, "do_subscript: linear combo not removed", 0, 4);
1628       d1 = CNSTG(djj);
1629       d2 = CNSTG(t);
1630       if (d2 % d1 != 0) {
1631         DTRACE0("do_subscript: inconsistent eqns, no dep\n");
1632         goto no_dep;
1633       }
1634       T[j] = ad_icon(d2 / d1);
1635     } else
1636       goto give_up;
1637   }
1638   /* if system is overdetermined, check for consistency */
1639   for (j = nvars; j < neqns; j++) {
1640     sc = icon0;
1641     /* sc = T[0..(nvars-1)] * D[0..(nvars-1)][j] */
1642     for (i = 0; i < nvars; i++) {
1643       ili = ad2ili(IL_IMUL, T[i], UD[i][nvars + j]);
1644       ili = ad2ili(IL_IADD, sc, ili);
1645       sc = ili_symbolic(ili);
1646     }
1647     /* t = sc - C[j] */
1648     ili = ad2ili(IL_ISUB, sc, C[j]);
1649     t = ili_symbolic(ili);
1650     if (IS_CNST(t) && t != icon0) {
1651       DTRACE0("do_subscript: inconsistent overdetermined eqns, no dep\n");
1652       goto no_dep;
1653     }
1654   }
1655   if (neqns > nvars)
1656     /* system is consistent; ignore the last neqns - nvars columns of UD */
1657     neqns = nvars;
1658 
1659 #if DEBUG
1660   if (DBGBIT(36, 64)) {
1661     prmat(0, 0);
1662     fprintf(gbl.dbgfil, "Solution:");
1663     for (j = 0; j < bnd; ++j) {
1664       fprintf(gbl.dbgfil, "\t%d: ", j);
1665       prilitree(T[j]);
1666       fprintf(gbl.dbgfil, "\n");
1667     }
1668   }
1669 #endif
1670 
1671   /* check for constant dependence distances */
1672   /* actually check only in common loops */
1673   for (j = 0; j < ddinfo.n; ++j)
1674     Dist[j] = 0;
1675   /* actually, only check in the common loops
1676    * that we are counting, that is, up to nest 'd' */
1677   for (j = 0; j < d; ++j) {
1678     for (k = neqns; k < nvars; ++k) {
1679       if (UD[k][2 * j + 1] != UD[k][2 * j])
1680         goto skip;
1681     }
1682     /* distance is constant for this loop */
1683     sc = icon0;
1684     for (k = 0; k < neqns; ++k) {
1685       sc = ili_symbolic(ad2ili(
1686           IL_IADD, sc, ad2ili(IL_IMUL, T[k], ad2ili(IL_ISUB, UD[k][2 * j + 1],
1687                                                     UD[k][2 * j]))));
1688     }
1689     Dist[j] = sc;
1690   skip:;
1691   }
1692 #if DEBUG
1693   if (DBGBIT(36, 16)) {
1694     fprintf(gbl.dbgfil, "Distance:\n");
1695     for (j = 0; j < ddinfo.n; ++j) {
1696       fprintf(gbl.dbgfil, "%d: ", j);
1697       if (Dist[j] == 0)
1698         fprintf(gbl.dbgfil, "<undef>\n");
1699       else {
1700         prilitree(Dist[j]);
1701         fprintf(gbl.dbgfil, "\n");
1702       }
1703     }
1704   }
1705 #endif
1706   nfree = nvars - neqns;
1707   assert(nfree >= 0, "do_subscript: negative nfree", 0, 4);
1708 
1709   /* Extended GCD test is done.  Now: */
1710   /* 1. Express index variables in terms of free variables by
1711    *    multiplying tU.  This involves creating a matrix with one
1712    *    column for each index variable and one row for each free
1713    *    variable, plus one extra row to hold the constant coefficient
1714    *    derived from the already solved variables in T
1715    */
1716   for (i = 0; i < nvars; ++i) {
1717     /* do const part */
1718     sc = icon0;
1719     for (j = 0; j < neqns; ++j)
1720       sc = ili_symbolic(ad2ili(IL_IADD, sc, ad2ili(IL_IMUL, T[j], UD[j][i])));
1721     TU[0][i] = sc;
1722     /* do var part */
1723     for (j = neqns + 1; j <= nvars; ++j)
1724       TU[j - neqns][i] = UD[j - 1][i];
1725   }
1726 #if DEBUG
1727   if (DBGBIT(36, 64)) {
1728     fprintf(gbl.dbgfil, "Solution of index vars:\n");
1729     for (i = 0; i < nvars; ++i) {
1730       fprintf(gbl.dbgfil, "I%d= ", i);
1731       for (j = 0; j <= nfree; ++j) {
1732         fprintf(gbl.dbgfil, "\t");
1733         prilitree(TU[j][i]);
1734         if (j) {
1735           fprintf(gbl.dbgfil, "*h%d", j);
1736         }
1737       }
1738       fprintf(gbl.dbgfil, "\n");
1739     }
1740   }
1741 #endif
1742   /* 2. Derive a list of upper and lower bounds from the loop limits.
1743    *    Express the loop bounds in terms of the index variables, then
1744    *    impose the bounds on the appropriate column, then simplify.
1745    */
1746   /* A bound is a linear combination of index variables */
1747   for (i = 0; i <= nfree; ++i) {
1748     Bound[i][0] = Bound[i][1] = 0;
1749   }
1750 /* common loops */
1751 #define BOUND_ADD(t, l, i) \
1752   if (bound_add(t, l, i))  \
1753     goto no_dep;           \
1754   else
1755   for (i = 0; i < d; i++) {
1756     /* get the loop */
1757     lp = ddinfo.lps[ddinfo.n1 + ddinfo.n2 + d - i - 1];
1758     /* this loop derives bounds for variable 2*i and 2*i+1 */
1759     /* same as column 2*i and column 2*i+1 */
1760     /* see if we have to ignore some bounds */
1761     for (j = 0; j < i; ++j)
1762       if (SB_STRIDE(VL_LBND(lp))[j] == 0) {
1763         BTRACE1("Ignoring lower bound for loop %d\n", lp);
1764         goto skipl1;
1765       }
1766 
1767     /*----- get lower bound for I */
1768     for (j = 1; j <= nvars; ++j)
1769       tmp[j] = icon0;
1770     tmp[0] = SB_BASES(VL_LBND(lp))[i]; /* const part */
1771     for (j = 0; j < i; ++j) {
1772       /* express the bound in terms of the index variables */
1773       tmp[2 * j + 1] = SB_STRIDE(VL_LBND(lp))[i - j - 1];
1774     }
1775     BOUND_ADD(tmp, FALSE, 2 * i);
1776 
1777     /*----- get lower bound for J */
1778     for (j = 1; j <= nvars; ++j)
1779       tmp[j] = icon0;
1780     tmp[0] = SB_BASES(VL_LBND(lp))[i]; /* const part */
1781     for (j = 0; j < i; ++j) {
1782       /* express the bound in terms of the index variables */
1783       tmp[2 * j + 2] = SB_STRIDE(VL_LBND(lp))[i - j - 1];
1784     }
1785     BOUND_ADD(tmp, FALSE, 2 * i + 1);
1786 
1787   skipl1:
1788     for (j = 0; j < i; ++j)
1789       if (SB_STRIDE(VL_UBND(lp))[j] == 0) {
1790         BTRACE1("Ignoring upper bound for loop %d\n", lp);
1791         goto skipu1;
1792       }
1793     /*----- get upper bound for I */
1794     for (j = 1; j <= nvars; ++j)
1795       tmp[j] = icon0;
1796     tmp[0] = SB_BASES(VL_UBND(lp))[i]; /* const part */
1797     for (j = 0; j < i; ++j) {
1798       /* express the bound in terms of the index variables */
1799       tmp[2 * j + 1] = SB_STRIDE(VL_UBND(lp))[i - j - 1];
1800     }
1801     BOUND_ADD(tmp, TRUE, 2 * i);
1802 
1803     /*----- get upper bound for J */
1804     for (j = 1; j <= nvars; ++j)
1805       tmp[j] = icon0;
1806     tmp[0] = SB_BASES(VL_UBND(lp))[i]; /* const part */
1807     for (j = 0; j < i; ++j) {
1808       /* express the bound in terms of the index variables */
1809       tmp[2 * j + 2] = SB_STRIDE(VL_UBND(lp))[i - j - 1];
1810     }
1811     BOUND_ADD(tmp, TRUE, 2 * i + 1);
1812   skipu1:;
1813   }
1814 
1815   for (i = 0; i < ddinfo.n1; ++i) {
1816     /* get the loop */
1817     lp = ddinfo.lps[ddinfo.n1 - i - 1];
1818     /* this loop derives bounds for variable 2*d+i */
1819     for (j = 0; j < i + d; ++j)
1820       if (SB_STRIDE(VL_LBND(lp))[j] == 0) {
1821         BTRACE1("Ignoring lower bound for loop %d\n", lp);
1822         goto skipl2;
1823       }
1824 
1825     /*----- get lower bound for I */
1826     for (j = 1; j <= nvars; ++j)
1827       tmp[j] = icon0;
1828     tmp[0] = SB_BASES(VL_LBND(lp))[d + i];
1829     for (j = 0; j < d; ++j)
1830       tmp[2 * j + 1] = SB_STRIDE(VL_LBND(lp))[d + i - j - 1];
1831     for (j = d; j < d + i; ++j)
1832       tmp[d + j + 1] = SB_STRIDE(VL_LBND(lp))[d + i - j - 1];
1833     BOUND_ADD(tmp, FALSE, 2 * d + i);
1834 
1835   skipl2:
1836     for (j = 0; j < i + d; ++j)
1837       if (SB_STRIDE(VL_UBND(lp))[j] == 0) {
1838         BTRACE1("Ignoring upper bound for loop %d\n", lp);
1839         goto skipu2;
1840       }
1841     /*----- get upper bound for I */
1842     for (j = 1; j <= nvars; ++j)
1843       tmp[j] = icon0;
1844     tmp[0] = SB_BASES(VL_UBND(lp))[d + i];
1845     for (j = 0; j < d; ++j)
1846       tmp[2 * j + 1] = SB_STRIDE(VL_UBND(lp))[d + i - j - 1];
1847     for (j = d; j < d + i; ++j)
1848       tmp[d + j + 1] = SB_STRIDE(VL_UBND(lp))[d + i - j - 1];
1849     BOUND_ADD(tmp, TRUE, 2 * d + i);
1850 
1851   skipu2:;
1852   }
1853   for (i = 0; i < ddinfo.n2; ++i) {
1854     /* get the loop */
1855     lp = ddinfo.lps[ddinfo.n1 + ddinfo.n2 - i - 1];
1856     /* this loop derives bounds for variable 2*d+ddinfo.n1+i */
1857 
1858     for (j = 0; j < i + d; ++j)
1859       if (SB_STRIDE(VL_LBND(lp))[j] == 0) {
1860         BTRACE1("Ignoring lower bound for loop %d\n", lp);
1861         goto skipl3;
1862       }
1863     /*----- get lower bound for J */
1864     for (j = 1; j <= nvars; ++j)
1865       tmp[j] = icon0;
1866     tmp[0] = SB_BASES(VL_LBND(lp))[d + i];
1867     for (j = 0; j < d; ++j)
1868       tmp[2 * j + 2] = SB_STRIDE(VL_LBND(lp))[d + i - j - 1];
1869     for (j = d; j < d + i; ++j)
1870       tmp[d + ddinfo.n1 + j + 1] = SB_STRIDE(VL_LBND(lp))[d + i - j - 1];
1871     BOUND_ADD(tmp, FALSE, 2 * d + ddinfo.n1 + i);
1872 
1873   skipl3:
1874     for (j = 0; j < i + d; ++j)
1875       if (SB_STRIDE(VL_UBND(lp))[j] == 0) {
1876         BTRACE1("Ignoring upper bound for loop %d\n", lp);
1877         goto skipu3;
1878       }
1879     /*----- get upper bound for J */
1880     for (j = 1; j <= nvars; ++j)
1881       tmp[j] = icon0;
1882     tmp[0] = SB_BASES(VL_UBND(lp))[d + i];
1883     for (j = 0; j < d; ++j)
1884       tmp[2 * j + 1] = SB_STRIDE(VL_UBND(lp))[d + i - j - 1];
1885     for (j = d; j < d + i; ++j)
1886       tmp[d + ddinfo.n1 + j + 1] = SB_STRIDE(VL_UBND(lp))[d + i - j - 1];
1887     BOUND_ADD(tmp, TRUE, 2 * d + ddinfo.n1 + i);
1888 
1889   skipu3:;
1890   }
1891 
1892   /* 4. Direction vector hierarchy if dependent */
1893   vec = dirv_fulldep(ddinfo.n) & ~DIRV_ALLEQ;
1894   vec1 = 0;
1895   for (i = d; i < ddinfo.n; ++i)
1896     DIRV_ENTRYP(vec1, i, DIRV_STAR);
1897   hierarchy(vec, d, vec1);
1898   return;
1899 
1900 give_up:
1901   if (VP_DEPCHK(ddinfo.pragmas))
1902     add_dep(dirv_fulldep(ddinfo.n));
1903 no_dep:
1904   return;
1905 }
1906 
1907 static void
hierarchy(DIRVEC dir,int lev,DIRVEC veco)1908 hierarchy(DIRVEC dir, int lev, DIRVEC veco)
1909 {
1910   int i;
1911   int dir2;
1912   LOGICAL top;
1913 
1914   DTRACE2("hierarchy: dir 0x%lx -- %s\n", dir, dirv_print(dir));
1915 
1916   for (i = 0; i < lev; ++i)
1917     if (DIRV_ENTRYG(dir, i) != DIRV_STAR)
1918       break;
1919   if (i == lev)
1920     /* all * */
1921     top = TRUE;
1922   else
1923     top = FALSE;
1924   for (i = 0; i < lev; ++i)
1925     if (DIRV_ENTRYG(dir, i) == DIRV_STAR)
1926       goto test;
1927   goto bottom;
1928 
1929 test:
1930   /*
1931    * test dependence at this level; if no dependence, don't need to go any
1932    * further: if (no dependence with direction vector dir) return 0;
1933    */
1934   if (top) {
1935     if (check_bounds() == TRUE)
1936       return;
1937     BCOPY(SaveBound, Bound, BOUND *, BOUND_LEN);
1938   } else {
1939     if (check_new_bound(dir, lev) == TRUE)
1940       return;
1941   }
1942 
1943   /* '*' entry, refine it further */
1944   dir2 = dir;
1945   DIRV_ENTRYC(dir2, i);
1946   DIRV_ENTRYP(dir2, i, DIRV_LT);
1947   hierarchy(dir2, lev, veco);
1948 
1949   dir2 = dir;
1950   DIRV_ENTRYC(dir2, i);
1951   DIRV_ENTRYP(dir2, i, DIRV_EQ);
1952   hierarchy(dir2, lev, veco);
1953 
1954   dir2 = dir;
1955   DIRV_ENTRYC(dir2, i);
1956   DIRV_ENTRYP(dir2, i, DIRV_GT);
1957   hierarchy(dir2, lev, veco);
1958 
1959   return;
1960 
1961 bottom:
1962   /*
1963    * we're at the bottom level.  Test dependence with this direction vector
1964    * & return 0 or this direction vector.  In addition, need to set the
1965    * all-equals bit if this direction vector is all equals
1966    */
1967   ddinfo.unknown = 0;
1968   if (check_new_bound(dir, lev) == TRUE)
1969     return;
1970   if (ddinfo.unknown && !VP_DEPCHK(ddinfo.pragmas))
1971     return;
1972   for (i = 0; i < lev; ++i)
1973     if (DIRV_ENTRYG(dir, i) != DIRV_EQ) {
1974       add_dep(veco | dir);
1975       return;
1976     }
1977   add_dep(veco | dir | DIRV_ALLEQ);
1978 }
1979 
1980 /** \brief Invert a direction vector */
1981 DIRVEC
dirv_inverse(DIRVEC vec)1982 dirv_inverse(DIRVEC vec)
1983 {
1984   static DIRVEC revtab[8] = {
1985       /* >=< */    /* >=< */
1986       /* 000 */ 0, /* 000 */
1987       /* 001 */ 4, /* 100 */
1988       /* 010 */ 2, /* 010 */
1989       /* 011 */ 6, /* 110 */
1990       /* 100 */ 1, /* 001 */
1991       /* 101 */ 5, /* 101 */
1992       /* 110 */ 3, /* 011 */
1993       /* 111 */ 7, /* 111 */
1994   };
1995 
1996   int i;
1997   int t;
1998   DIRVEC vec1;
1999 
2000   /* count number of entrys */
2001   for (i = 0; DIRV_ENTRYG(vec, i) != 0 && i < MAX_LOOPS; ++i)
2002     ;
2003   /* extract information portion */
2004   vec1 = DIRV_INFOPART(vec);
2005   for (--i; i >= 0; --i) {
2006     t = DIRV_ENTRYG(vec, i);
2007     DIRV_ENTRYP(vec1, i, revtab[t]);
2008   }
2009   return vec1;
2010 }
2011 
2012 /** \brief Generate full dependence at a given level */
2013 DIRVEC
dirv_fulldep(int level)2014 dirv_fulldep(int level)
2015 {
2016   int i;
2017   DIRVEC vec;
2018 
2019   vec = 0;
2020   for (i = 0; i < level; ++i) {
2021     vec <<= DIRV_ENTSIZ;
2022     vec |= DIRV_STAR;
2023   }
2024   vec |= DIRV_ALLEQ;
2025   return vec;
2026 }
2027 
2028 /** \brief Generate legal execution order direction vectors.
2029     \param level loop nesting level
2030     \param flag  says whether all '=' allowed
2031  */
2032 DIRVEC
dirv_exo(int level,int flag)2033 dirv_exo(int level, int flag)
2034 {
2035   int i;
2036   DIRVEC vec;
2037 
2038   /* outermost loop: legal vectors are '<='; for rest they are '*' */
2039   /* alleq is only possible if flag is set */
2040   vec = 0;
2041   DIRV_ENTRYP(vec, level - 1, DIRV_EQ | DIRV_LT);
2042   i = level - 1;
2043   while (i > 0) {
2044     DIRV_ENTRYP(vec, i - 1, DIRV_STAR);
2045     --i;
2046   }
2047   if (flag)
2048     vec |= DIRV_ALLEQ;
2049   return vec;
2050 }
2051 
2052 /*
2053  * Return the direction vector that corresponds to the input
2054  * direction vector dir under the mapping m.  If m is NULL, the
2055  * trivial mapping is used.  A mapping is a permutation of the integers
2056  * 0..nest-1 representing the order of the loops as permuted from
2057  * their original order, from inner to outer.
2058  * The trivial mapping is thus nest-1, ..., 0
2059  */
2060 static DIRVEC
dirv_permute(DIRVEC dir,int * m,int nest)2061 dirv_permute(DIRVEC dir, int *m, int nest)
2062 {
2063   int i, j, k;
2064   DIRVEC e;
2065   int seen_lt;
2066   DIRVEC rdir;
2067   int base;
2068 
2069   for (i = 0; DIRV_ENTRYG(dir, i) != 0 && i < MAX_LOOPS; ++i)
2070     ;
2071   base = nest - i;
2072 #if DEBUG
2073   assert(m == 0 || base >= 0, "dirv_permute: nest is wrong", nest, 4);
2074 #endif
2075   seen_lt = 0;
2076   rdir = 0;
2077   for (j = 0; j < i; ++j) {
2078     k = m ? m[j + base] : i - j - 1;
2079     e = DIRV_ENTRYG(dir, k);
2080     rdir <<= DIRV_ENTSIZ;
2081     if (!seen_lt)
2082       rdir |= (e & ~DIRV_GT);
2083     else
2084       rdir |= e;
2085     if (e & DIRV_LT)
2086       seen_lt = 1;
2087   }
2088   if (rdir == 0)
2089     return 0;
2090   /* check alleq case */
2091   for (j = 0; j < i; ++j) {
2092     if (DIRV_ENTRYG(rdir, j) != DIRV_EQ)
2093       goto skip;
2094   }
2095   /* all are equal */
2096   if (!(dir & DIRV_ALLEQ))
2097     return 0;
2098 
2099 skip:
2100   rdir |= DIRV_INFOPART(dir);
2101   return rdir;
2102 }
2103 
2104 static void
dovec(DIRVEC dir,DIRVEC dir1,int pos,int ig,void (* f)(DIRVEC),int alleq,int seenlt)2105 dovec(DIRVEC dir, DIRVEC dir1, int pos, int ig, void (*f)(DIRVEC), int alleq,
2106       int seenlt)
2107 {
2108   DIRVEC dir2;
2109   DIRVEC e;
2110 
2111   if (pos < 0) {
2112     if (dir1 != 0 && !alleq)
2113       (*f)(dir1);
2114     else if (dir1 != 0 && alleq && (dir & DIRV_ALLEQ))
2115       (*f)(dir1 | DIRV_ALLEQ);
2116     return;
2117   }
2118   e = DIRV_ENTRYG(dir, pos);
2119   if (e & DIRV_LT) {
2120     dir2 = dir1 | (DIRV_LT << DIRV_ENTSIZ * pos);
2121     if (pos <= ig)
2122       dovec(dir, dir2, pos - 1, ig, f, 0, 1);
2123   }
2124   if (e & DIRV_EQ) {
2125     dir2 = dir1 | (DIRV_EQ << DIRV_ENTSIZ * pos);
2126     dovec(dir, dir2, pos - 1, ig, f, alleq, seenlt);
2127   }
2128   if (e & DIRV_RD) {
2129     dir2 = dir1 | (DIRV_RD << DIRV_ENTSIZ * pos);
2130     dovec(dir, dir2, pos - 1, ig, f, 0, seenlt);
2131   }
2132   if (e & DIRV_GT) {
2133     dir2 = dir1 | (DIRV_GT << DIRV_ENTSIZ * pos);
2134     if (seenlt)
2135       dovec(dir, dir2, pos - 1, ig, f, 0, 1);
2136   }
2137 }
2138 
2139 void
dirv_gen(DIRVEC dir,int * map,int nest,int ig,void (* f)(DIRVEC))2140 dirv_gen(DIRVEC dir, int *map, int nest, int ig, void (*f)(DIRVEC))
2141 {
2142   /*
2143    * if a position to the left of ig contains '<', then all dirvecs gen'd
2144    * from that can be ignored
2145    */
2146   int i;
2147   DIRVEC dir1;
2148 
2149   dir1 = dirv_permute(dir, map, nest);
2150   for (i = 0; DIRV_ENTRYG(dir, i) != 0 && i < MAX_LOOPS; ++i)
2151     ;
2152   dovec(dir1, 0, i - 1, ig, f, 1, 0);
2153 }
2154 
2155 static LOGICAL is_linear(int);
2156 
2157 /* Initialize subscript struct sub with information from expression
2158  * ast under multiplier astmpyr. The loop index of the ith outer loop
2159  * is the FORALL-index in the ith outer triplet within astliTriples.
2160  * astmpyr is invariant with respect to the FORALL-indices.
2161  * Return TRUE if ast is a linear expression. */
2162 static LOGICAL
mkSub(int astliTriples,int sub,int astmpyr,int ast)2163 mkSub(int astliTriples, int sub, int astmpyr, int ast)
2164 {
2165   int i;
2166   int astli;
2167   int aststride, astbase, astid, astcnst;
2168   LOGICAL bLinear;
2169 
2170   switch (A_TYPEG(ast)) {
2171   case A_CONV:
2172     /*
2173      * when the convert case did not exist, mkSub() returned false
2174      * indicating the subscript expression is non-linear. The convert
2175      * could appear when mixing integer types or the use of -Mlarge_arrays
2176      * and default integer*4. For specaccel palm -Mlarge_arrays, a false
2177      * dependency was returned for some array assignment in an openacc
2178      * kernel; the assignment was not parallelized, but the acc CG
2179      * generated incorrect scalar code.
2180      * We could try pushing the convert into its operand; but all I'm
2181      * going to do is check if its operand is linear, and if so, I will
2182      * treat it as an ID (a single term)
2183      */
2184     if (!is_linear(A_LOPG(ast)))
2185       return FALSE;
2186   /***** fall thru -- treat as ID *****/
2187   case A_ID:
2188     i = 0;
2189     for (astli = astliTriples; astli; astli = ASTLI_NEXT(astli), i++)
2190       if (A_SPTRG(ast) == ASTLI_SPTR(astli)) {
2191         aststride = A_STRIDEG(ASTLI_TRIPLE(astli));
2192         if (!aststride)
2193           aststride = astb.bnd.one;
2194         aststride = mk_binop(OP_MUL, aststride, astmpyr, astb.bnd.dtype);
2195         aststride =
2196             mk_binop(OP_ADD, aststride, SB_STRIDE(sub)[i], astb.bnd.dtype);
2197         SB_STRIDE(sub)[i] = aststride;
2198         astbase = A_LBDG(ASTLI_TRIPLE(astli));
2199         if (A_DTYPEG(astbase) != astb.bnd.dtype) {
2200           astbase = mk_convert(astbase, astb.bnd.dtype);
2201         }
2202         astbase = mk_binop(OP_MUL, astbase, astmpyr, astb.bnd.dtype);
2203         astbase = mk_binop(OP_ADD, astbase, SB_BASE(sub), astb.bnd.dtype);
2204         SB_BASE(sub) = astbase;
2205         return TRUE;
2206       }
2207     astbase = mk_binop(OP_MUL, astmpyr, ast, astb.bnd.dtype);
2208     SB_BASE(sub) = mk_binop(OP_ADD, astbase, SB_BASE(sub), astb.bnd.dtype);
2209     return TRUE;
2210   case A_CNST:
2211     astbase = mk_binop(OP_MUL, astmpyr, ast, astb.bnd.dtype);
2212     SB_BASE(sub) = mk_binop(OP_ADD, astbase, SB_BASE(sub), astb.bnd.dtype);
2213     return TRUE;
2214   case A_UNOP:
2215     if (A_OPTYPEG(ast) != OP_SUB)
2216       return FALSE;
2217     astmpyr = mk_unop(OP_SUB, astmpyr, astb.bnd.dtype);
2218     bLinear = mkSub(astliTriples, sub, astmpyr, A_LOPG(ast));
2219     return bLinear;
2220   case A_BINOP:
2221     switch (A_OPTYPEG(ast)) {
2222     case OP_ADD:
2223       bLinear = (mkSub(astliTriples, sub, astmpyr, A_LOPG(ast)) &&
2224                  mkSub(astliTriples, sub, astmpyr, A_ROPG(ast)));
2225       return bLinear;
2226     case OP_SUB:
2227       bLinear = mkSub(astliTriples, sub, astmpyr, A_LOPG(ast));
2228       if (!bLinear)
2229         return FALSE;
2230       astmpyr = mk_unop(OP_SUB, astmpyr, astb.bnd.dtype);
2231       bLinear = mkSub(astliTriples, sub, astmpyr, A_ROPG(ast));
2232       return bLinear;
2233     case OP_MUL:
2234       astmpyr = mk_binop(OP_MUL, A_LOPG(ast), astmpyr, astb.bnd.dtype);
2235       bLinear = mkSub(astliTriples, sub, astmpyr, A_ROPG(ast));
2236       if (!bLinear)
2237         return FALSE;
2238       astmpyr = mk_binop(OP_MUL, A_ROPG(ast), astmpyr, astb.bnd.dtype);
2239       bLinear = mkSub(astliTriples, sub, astmpyr, A_LOPG(ast));
2240       return bLinear;
2241     default:
2242       return FALSE;
2243     }
2244   default:
2245     return FALSE;
2246   }
2247 }
2248 
2249 static LOGICAL
is_linear(int ast)2250 is_linear(int ast)
2251 {
2252   if (!IS_IRES(ast))
2253     return FALSE;
2254   switch (A_TYPEG(ast)) {
2255   case A_CONV:
2256     return is_linear(A_LOPG(ast));
2257   case A_ID:
2258   case A_CNST:
2259     return TRUE;
2260   case A_UNOP:
2261     if (A_OPTYPEG(ast) != OP_SUB)
2262       return FALSE;
2263     return is_linear(A_LOPG(ast));
2264   case A_BINOP:
2265     switch (A_OPTYPEG(ast)) {
2266     case OP_ADD:
2267     case OP_SUB:
2268     case OP_MUL:
2269       if (!is_linear(A_LOPG(ast)))
2270         return FALSE;
2271       return is_linear(A_ROPG(ast));
2272     default:
2273       return FALSE;
2274     }
2275   default:
2276     return FALSE;
2277   }
2278 }
2279 
2280 /* Local storage for fwd_func() callback. */
2281 static struct {
2282   int nloops;   /* number of loops to check */
2283   LOGICAL bFwd; /* TRUE if forward dependence on entry idv */
2284 } fwd;
2285 
2286 static void
fwd_func(DIRVEC dv)2287 fwd_func(DIRVEC dv)
2288 {
2289   int i;
2290 
2291   for (i = 0; i < fwd.nloops; i++)
2292     fwd.bFwd |= ((DIRV_ENTRYG(dv, i) & DIRV_LT) != 0);
2293 }
2294 
2295 /*
2296  * fill SB_ data structures for any subscripts found in the reference.
2297  * return the number of subscripts, or -1 if a nonlinear subscript is found.
2298  */
2299 static int
fill_subscripts(int astRef,int mr,int subStart,int ntriples,int astliTriples)2300 fill_subscripts(int astRef, int mr, int subStart, int ntriples,
2301                 int astliTriples)
2302 {
2303   int n, asd, ndim, sub, i;
2304   switch (A_TYPEG(astRef)) {
2305   case A_ID:
2306     n = 0;
2307     break;
2308   case A_MEM:
2309     n = fill_subscripts(A_PARENTG(astRef), mr, subStart, ntriples,
2310                         astliTriples);
2311     break;
2312   case A_SUBSTR:
2313     n = fill_subscripts(A_LOPG(astRef), mr, subStart, ntriples, astliTriples);
2314     break;
2315   case A_SUBSCR:
2316     n = fill_subscripts(A_LOPG(astRef), mr, subStart, ntriples, astliTriples);
2317     if (n < 0)
2318       return n;
2319     asd = A_ASDG(astRef);
2320     ndim = ASD_NDIM(asd);
2321     for (sub = 0; sub < ndim; ++sub) {
2322       LOGICAL bLinear;
2323       int astSub, i;
2324       ++(MR_SUBCNT(mr));
2325       ++hlv.subavail;
2326       NEED(hlv.subavail, hlv.subbase, SUBS, hlv.subsize, hlv.subsize + 100);
2327       BZERO(&hlv.subbase[subStart + n], SUBS, 1);
2328       SB_BASE(subStart + n) = astb.bnd.zero;
2329       for (i = 0; i < ntriples; i++)
2330         SB_STRIDE(subStart + n)[i] = astb.bnd.zero;
2331       astSub = ASD_SUBS(asd, sub);
2332       astSub = ili_symbolic(astSub);
2333       bLinear = mkSub(astliTriples, subStart + n, astb.bnd.one, astSub);
2334       if (!bLinear)
2335         return -1;
2336       SB_BASES(subStart + n)[ntriples] = SB_BASE(subStart + n);
2337       ++n;
2338     }
2339     break;
2340   default:
2341     interr("fill_subscripts: unexpected AST type", astRef, 3);
2342     n = 0;
2343   }
2344   return n;
2345 } /* fill_subscripts */
2346 
2347 /** \brief Return TRUE if there is a forward dependence from \p astArrSrc
2348     (the source) to \p astArrSink (the sink) within an iteration space
2349     described by a triplet list beginning at \p astliTriples.
2350 
2351     \p bSinkAfterSrc should be:
2352       +  1 if the sink is lexically after the source.
2353       +  0 if the sink is lexically before the source.
2354       + -1 if this is a FORALL test
2355  */
2356 LOGICAL
dd_array_conflict(int astliTriples,int astArrSrc,int astArrSink,int bSinkAfterSrc)2357 dd_array_conflict(int astliTriples, int astArrSrc, int astArrSink,
2358                   int bSinkAfterSrc)
2359 {
2360   int i, ntriples;
2361   int astli;
2362   int lp, lpOuter;
2363   int sub, nsubsSrc, nsubsSink, subStart, nsubs;
2364   int astTriple, astSub;
2365   int asdSrc, asdSink, aSrc, aSink, nSrc, nSink;
2366   int mrSink, mrSrc;
2367 
2368   assert(astliTriples, "dd_array_conflict: empty triplet list", astArrSrc, 4);
2369 
2370   ntriples = 0;
2371   for (astli = astliTriples; astli; astli = ASTLI_NEXT(astli))
2372     ntriples++;
2373   if (ntriples >= MAX_LOOPS)
2374     /* Assume dependence if too many triples. */
2375     return TRUE;
2376 
2377   nsubsSrc = nsubsSink = 0;
2378   for (aSrc = astArrSrc; aSrc && A_TYPEG(aSrc) != A_ID;) {
2379     switch (A_TYPEG(aSrc)) {
2380     case A_MEM:
2381       aSrc = A_PARENTG(aSrc);
2382       break;
2383     case A_SUBSTR:
2384       aSrc = A_LOPG(aSrc);
2385       break;
2386     case A_SUBSCR:
2387       asdSrc = A_ASDG(aSrc);
2388       nsubsSrc += ASD_NDIM(asdSrc);
2389       aSrc = A_LOPG(aSrc);
2390       break;
2391     default:
2392       interr("dd_array_conflict: unexpected AST in source", aSrc, 3);
2393       aSrc = 0;
2394       break;
2395     }
2396   }
2397   for (aSink = astArrSink; A_TYPEG(aSink) != A_ID;) {
2398     switch (A_TYPEG(aSink)) {
2399     case A_MEM:
2400       aSink = A_PARENTG(aSink);
2401       break;
2402     case A_SUBSTR:
2403       aSink = A_LOPG(aSink);
2404       break;
2405     case A_SUBSCR:
2406       asdSink = A_ASDG(aSink);
2407       nsubsSink += ASD_NDIM(asdSink);
2408       aSink = A_LOPG(aSink);
2409       break;
2410     default:
2411       interr("dd_array_conflict: unexpected AST in sink", aSink, 3);
2412       aSrc = 0;
2413       break;
2414     }
2415   }
2416 
2417   /* Initialize vectorizer's memory. */
2418   hlv.mrsize = 100;
2419   NEW(hlv.mrbase, MEMREF, hlv.mrsize);
2420   hlv.mravail = 1;
2421   hlv.lpsize = 100;
2422   NEW(hlv.lpbase, VLOOP, hlv.lpsize);
2423   hlv.lpavail = 1;
2424   hlv.subsize = 100;
2425   NEW(hlv.subbase, SUBS, hlv.subsize);
2426   hlv.subavail = 1;
2427 
2428   /* Allocate ntriples vectorizer loops. */
2429   lpOuter = hlv.lpavail;
2430   hlv.lpavail += ntriples;
2431   NEED(hlv.lpavail, hlv.lpbase, VLOOP, hlv.lpsize, hlv.lpsize + 100);
2432   BZERO(&hlv.lpbase[lpOuter], VLOOP, ntriples);
2433 
2434   /* Initialize the lower and upper bound subscripts of all loops. */
2435   astli = astliTriples;
2436   for (lp = hlv.lpavail - 1; lp >= lpOuter; --lp, astli = ASTLI_NEXT(astli)) {
2437     int aststride;
2438     astTriple = ASTLI_TRIPLE(astli);
2439     assert(A_TYPEG(astTriple) == A_TRIPLE,
2440            "dd_array_conflict: wrong triplet type", astliTriples, 4);
2441     sub = hlv.subavail++;
2442     NEED(hlv.subavail, hlv.subbase, SUBS, hlv.subsize, hlv.subsize + 100);
2443     BZERO(&hlv.subbase[sub], SUBS, 1);
2444     SB_BASE(sub) = astb.bnd.zero;
2445     for (i = 0; i < hlv.lpavail - 1 - lp; i++) {
2446       SB_STRIDE(sub)[i] = astb.bnd.zero;
2447       SB_BASES(sub)[i + 1] = astb.bnd.zero;
2448     }
2449     VL_LBND(lp) = sub;
2450     sub = hlv.subavail++;
2451     NEED(hlv.subavail, hlv.subbase, SUBS, hlv.subsize, hlv.subsize + 100);
2452     BZERO(&hlv.subbase[sub], SUBS, 1);
2453     SB_BASE(sub) =
2454         mk_binop(OP_SUB, A_UPBDG(astTriple), A_LBDG(astTriple), astb.bnd.dtype);
2455     aststride = A_STRIDEG(astTriple);
2456     if (aststride && IS_CNST(aststride)) {
2457       ISZ_T val;
2458       val = CNSTG(aststride);
2459       if (val < 0) {
2460         SB_BASE(sub) = mk_binop(OP_SUB, A_LBDG(astTriple), A_UPBDG(astTriple),
2461                                 astb.bnd.dtype);
2462       }
2463     }
2464     for (i = 0; i < hlv.lpavail - 1 - lp; i++) {
2465       SB_STRIDE(sub)[i] = astb.bnd.zero;
2466       SB_BASES(sub)[i + 1] = SB_BASE(sub);
2467     }
2468     VL_UBND(lp) = sub;
2469   }
2470 
2471   /* Create memory reference structures for astArrSrc & astArrSink. */
2472   mrSink = hlv.mravail++;
2473   NEED(hlv.mravail, hlv.mrbase, MEMREF, hlv.mrsize, hlv.mrsize + 100);
2474   BZERO(&hlv.mrbase[mrSink], MEMREF, 1);
2475   MR_ILI(mrSink) = astArrSink;
2476   MR_TYPE(mrSink) = 'l';
2477 
2478   mrSrc = hlv.mravail++;
2479   NEED(hlv.mravail, hlv.mrbase, MEMREF, hlv.mrsize, hlv.mrsize + 100);
2480   BZERO(&hlv.mrbase[mrSrc], MEMREF, 1);
2481   MR_ILI(mrSrc) = astArrSrc;
2482   MR_TYPE(mrSrc) = 's';
2483 
2484   /* Create subscript structures for mrSink. */
2485   subStart = hlv.subavail;
2486   MR_SUBST(mrSink) = subStart;
2487   MR_SUBCNT(mrSink) = 0;
2488   nSrc = fill_subscripts(astArrSink, mrSink, subStart, ntriples, astliTriples);
2489   if (nSrc < 0) {
2490     fwd.bFwd = TRUE;
2491     goto nonlinear;
2492   }
2493 
2494   /* Create subscript structures for mrSrc. */
2495   subStart = hlv.subavail;
2496   MR_SUBST(mrSrc) = subStart;
2497   MR_SUBCNT(mrSrc) = 0;
2498   nSink = fill_subscripts(astArrSrc, mrSrc, subStart, ntriples, astliTriples);
2499   if (nSink < 0) {
2500     fwd.bFwd = TRUE;
2501     goto nonlinear;
2502   }
2503 
2504   /* Fill in ddinfo for dependence analysis. */
2505   ddinfo.mr1 = mrSrc;
2506   ddinfo.mr2 = mrSink;
2507   ddinfo.subs1 = MR_SUBST(mrSrc) + MR_SUBCNT(mrSrc);
2508   ddinfo.subs2 = MR_SUBST(mrSink) + MR_SUBCNT(mrSink);
2509 
2510   ddinfo.n = ntriples; /* the array references are in same loop nest. */
2511   ddinfo.n1 = ddinfo.n2 = 0;
2512 
2513   for (i = 0; i < ntriples; i++)
2514     ddinfo.lps[i] = (lpOuter + ntriples - 1) - i;
2515 
2516   ddinfo.common = ddinfo.lps[0];
2517   ddinfo.outer_loop = lpOuter;
2518 
2519   VP_DEPCHK(ddinfo.pragmas) = TRUE;
2520 
2521   ddinfo.dvlist = NULL;
2522 
2523   /* Do dependence analysis. */
2524   nsubs = nsubsSrc < nsubsSink ? nsubsSrc : nsubsSink;
2525   do_subscript(nsubs);
2526 
2527   if (bSinkAfterSrc < 0) {
2528     DV *pdv;
2529     /* ANY loop-carried dependence is a forward dependence */
2530     fwd.bFwd = FALSE;
2531     for (pdv = ddinfo.dvlist; pdv; pdv = pdv->next) {
2532       DIRVEC dv;
2533       int i;
2534       dv = pdv->vec;
2535       for (i = 0; i < MAX_LOOPS; ++i) {
2536         int t;
2537         t = DIRV_ENTRYG(dv, i);
2538         if (t == 0)
2539           break; /* done */
2540         if (t & (DIRV_GT | DIRV_LT)) {
2541           /* found a loop-carried dependence */
2542           fwd.bFwd = TRUE;
2543           break;
2544         }
2545       }
2546     }
2547   } else {
2548     /* Generate direction vectors. */
2549     DIRVEC dvexo;
2550     DDEDGE *dd;
2551     DV *pdv;
2552     dvexo = dirv_exo(ntriples, bSinkAfterSrc);
2553     for (pdv = ddinfo.dvlist; pdv; pdv = pdv->next) {
2554       DIRVEC dv;
2555       dv = pdv->vec & dvexo;
2556       if (!dirv_chkzero(dv, ntriples))
2557         dd_edge(mrSrc, mrSink, dv);
2558     }
2559 
2560     /* Determine if there is a loop-carried dependence. */
2561     fwd.bFwd = FALSE;
2562     fwd.nloops = ntriples;
2563     for (dd = MR_SUCC(mrSrc); dd; dd = DD_NEXT(dd)) {
2564       dirv_gen(dd->dirvec, 0, ntriples, ntriples, fwd_func);
2565       if (fwd.bFwd)
2566         break;
2567     }
2568   }
2569 
2570 nonlinear:
2571   /* Clean up allocated memory. */
2572   cln_visit();
2573   FREE(hlv.lpbase);
2574   FREE(hlv.mrbase);
2575   FREE(hlv.subbase);
2576   freearea(HLV_AREA1);
2577 
2578   return fwd.bFwd;
2579 }
2580 
2581 static int
symbolic_mul(int a,int b)2582 symbolic_mul(int a, int b)
2583 {
2584   int flag;
2585   int c;
2586 
2587   flag = 1;
2588   if (ILI_OPC(a) == IL_INEG) {
2589     a = ILI_OPND(a, 1);
2590     flag = -1 * flag;
2591   }
2592   if (ILI_OPC(b) == IL_INEG) {
2593     b = ILI_OPND(b, 1);
2594     flag = -1 * flag;
2595   }
2596   c = ad2ili(IL_IMUL, a, b);
2597   if (flag < 0)
2598     c = ad1ili(IL_INEG, c);
2599   return c;
2600 }
2601 
2602 /* num/den = ili */
2603 /* quot = symbolic quotient */
2604 static LOGICAL
symbolic_divide(int num,int den,int * quot)2605 symbolic_divide(int num, int den, int *quot)
2606 {
2607   int sign = 1;
2608   int q = num;
2609   int icon1, iconm1, icon0;
2610 
2611   if (num == (icon0 = ad_icon((INT)0))) {
2612     /* 0 / x */
2613     q = icon0;
2614     goto ret;
2615   }
2616   /* x / +-1 */
2617   if (den == (icon1 = ad_icon((INT)1)))
2618     goto ret;
2619   if (den == (iconm1 = ad_icon((INT)-1))) {
2620     sign = -1;
2621     goto ret;
2622   }
2623   /* we'll assume -1 / sym and 1 / sym is 0; this
2624    * is o.k. since we'll just interchange rows & try
2625    * again
2626    */
2627   if (num == icon1 || num == iconm1) {
2628     q = ad_icon((INT)0);
2629     goto ret;
2630   }
2631 
2632   if (ILI_OPC(num) == IL_INEG) {
2633     num = ILI_OPND(num, 1);
2634     sign = -1 * sign;
2635   }
2636   if (ILI_OPC(den) == IL_INEG) {
2637     den = ILI_OPND(den, 1);
2638     sign = -1 * sign;
2639   }
2640   if (ILI_OPC(num) == IL_IMUL) {
2641     if (ILI_OPND(num, 1) == den) {
2642       q = ILI_OPND(num, 2);
2643       goto ret;
2644     }
2645     if (ILI_OPND(num, 2) == den) {
2646       q = ILI_OPND(num, 1);
2647       goto ret;
2648     }
2649   }
2650   return FALSE;
2651 ret:
2652   if (sign < 0)
2653     *quot = ad1ili(IL_INEG, q);
2654   else
2655     *quot = q;
2656   return TRUE;
2657 }
2658 
2659 static LOGICAL rdc;              /* TRUE if a reduction has occurred */
2660 static int visit_chain = 0;      /* head of chain of ili with ILI_REPL set */
2661 static LOGICAL use_visit = TRUE; /* TRUE if ILI_REPL to be used */
2662 
2663 typedef struct arith_term {
2664   long confact;            /* constant multiplier */
2665   int varfact;             /* variable factor */
2666   struct arith_term *next; /* next term */
2667 } ARITH_TERM, *ARITH_LIST; /* arithmetic terms */
2668 
2669 /* Clear the ILI_REPL & ILI_VISIT fields of all ili in the chain beginning
2670  * at visit_chain. */
2671 static void
cln_visit(void)2672 cln_visit(void)
2673 {
2674   int il = visit_chain;
2675   int ilnext;
2676 
2677   for (il = visit_chain; il; il = ilnext) {
2678     ilnext = ILI_VISIT(il);
2679     ILI_VISIT(il) = ILI_REPL(il) = 0;
2680   }
2681   visit_chain = 0;
2682 }
2683 
2684 /* Append list l2 to the end of l1, and return the resulting list. */
2685 static ARITH_LIST
apnd(ARITH_LIST l1,ARITH_LIST l2)2686 apnd(ARITH_LIST l1, ARITH_LIST l2)
2687 {
2688   ARITH_LIST l;
2689 
2690   if (l1 == NULL)
2691     return l2;
2692   if (l2 == NULL)
2693     return l1;
2694   /* Set l to the last cell of l1's list */
2695   for (l = l1; l->next; l = l->next)
2696     ;
2697   l->next = l2;
2698   return l1;
2699 }
2700 
2701 /* Extract constant factors from the varfact member of arithmetic term atp.
2702  * Rules are:
2703  *	r(<c,d>) = <c*d,1>, where d is a constant.
2704  *	r(<c,x*y) = <c*d*e,x'*y'>, where:
2705  *			<d,x'> = r(<1,x>)
2706  *			<e,y'> = r(<1,y>).
2707  *	r(<c,-x>) = <-c*d,y>, where
2708  *			<d,y> = r(<1,x>).
2709  *	r(<c,x>) = <c,x>, otherwise.
2710  */
2711 static void
refactor(ARITH_LIST atp)2712 refactor(ARITH_LIST atp)
2713 {
2714   ARITH_TERM at1, at2;
2715   int opc;
2716   int icon1 = ad_icon(1L);
2717 
2718   at1.next = at2.next = NULL;
2719 
2720   if (IS_CNST(atp->varfact)) {
2721     atp->confact *= CNSTG(atp->varfact);
2722     atp->varfact = icon1;
2723     return;
2724   }
2725 
2726   opc = ILI_OPC(atp->varfact);
2727   switch (opc) {
2728   case IL_IMUL:
2729     at1.confact = at2.confact = 1L;
2730     at1.varfact = ILI_OPND(atp->varfact, 1);
2731     at2.varfact = ILI_OPND(atp->varfact, 2);
2732     refactor(&at1);
2733     refactor(&at2);
2734     atp->confact *= at1.confact * at2.confact;
2735     if (at1.varfact == icon1)
2736       atp->varfact = at2.varfact;
2737     else if (at2.varfact == icon1)
2738       atp->varfact = at1.varfact;
2739     else
2740       atp->varfact = ad2ili(IL_IMUL, at1.varfact, at2.varfact);
2741     break;
2742   case IL_INEG:
2743     at1.confact = 1L;
2744     at1.varfact = ILI_OPND(atp->varfact, 1);
2745     refactor(&at1);
2746     atp->confact *= -at1.confact;
2747     atp->varfact = at1.varfact;
2748     break;
2749   default:
2750     break;
2751   }
2752 }
2753 
2754 /* Extract constant factors of each term in arithmetic term list lst. */
2755 static void
refactor_list(ARITH_LIST lst)2756 refactor_list(ARITH_LIST lst)
2757 {
2758   ARITH_LIST l;
2759 
2760   for (l = lst; l; l = l->next)
2761     refactor(l);
2762 }
2763 
2764 /* Create a sum of all terms in list lst. */
2765 static int
sum(ARITH_LIST lst)2766 sum(ARITH_LIST lst)
2767 {
2768   int ilcon, ilterm, ilsum;
2769   ARITH_LIST l;
2770 
2771   ilsum = ad_icon(0L);
2772   for (l = lst; l; l = l->next) {
2773     ilcon = ad_icon(l->confact);
2774     ilterm = ad2ili(IL_IMUL, ilcon, l->varfact);
2775     ilsum = ad2ili(IL_IADD, ilterm, ilsum);
2776   }
2777   return ilsum;
2778 }
2779 
2780 /* Return an arithmetic term list whose sum is equivalent to mpyr * ili,
2781  * where mpyr is distributed across terms in the resulting list. All confact
2782  * fields in the term list are set to 1.
2783  * Rules are:
2784  *	distrib(x*y, m) = [distrib(x, yi) | yi <- distrib(y, m)].
2785  *	distrib(x+y, m) = distrib(x, m) apnd distrib(y, m).
2786  *	distrib(x-y, m) = distrib(x, m) apnd distrib(y, -m).
2787  *	distrib(-x, m) = distrib(x, -m).
2788  *	distrib(x, m + n) = distrib(m, x) apnd distrib(n, x); if x is a load.
2789  *	distrib(x, m) = [x * m]; otherwise.
2790  */
2791 static ARITH_LIST
distrib(int ili,int mpyr)2792 distrib(int ili, int mpyr)
2793 {
2794   int ili1, ili2, ilitmp;
2795   int opc, opc1;
2796   ARITH_LIST l1, l2, l;
2797 
2798   assert(mpyr, "distrib: invalid multiplier", ili, 4);
2799   assert(ili, "distrib: invalid ili", ili, 4);
2800 
2801   opc = ILI_OPC(ili);
2802   switch (opc) {
2803   case IL_INEG:
2804     ilitmp = ad1ili(IL_INEG, mpyr);
2805     l = distrib(ILI_OPND(ili, 1), ilitmp);
2806     return l;
2807   case IL_IADD:
2808     l1 = distrib(ILI_OPND(ili, 1), mpyr);
2809     l2 = distrib(ILI_OPND(ili, 2), mpyr);
2810     l = apnd(l1, l2);
2811     return l;
2812   case IL_ISUB:
2813     l1 = distrib(ILI_OPND(ili, 1), mpyr);
2814     ilitmp = ad1ili(IL_INEG, mpyr);
2815     l2 = distrib(ILI_OPND(ili, 2), ilitmp);
2816     l = apnd(l1, l2);
2817     return l;
2818   case IL_IMUL:
2819     l2 = distrib(ILI_OPND(ili, 2), mpyr);
2820     l = NULL;
2821     for (; l2; l2 = l2->next) {
2822       assert(l2->confact == 1L, "distrib: non-unit constant", ili, 4);
2823       l1 = distrib(ILI_OPND(ili, 1), l2->varfact);
2824       l = apnd(l1, l);
2825     }
2826     return l;
2827   case IL_IDIV:
2828   case IL_MOD:
2829     ilitmp = ili_symbolic(ILI_OPND(ili, 1));
2830     if (ilitmp != ili) {
2831       ilitmp = ad2ili(opc, ilitmp, ILI_OPND(ili, 2));
2832       ilitmp = ad2ili(IL_IMUL, ilitmp, mpyr);
2833       l = (ARITH_LIST)getitem(HLV_AREA1, sizeof(ARITH_TERM));
2834       l->confact = 1;
2835       l->varfact = ilitmp;
2836       l->next = NULL;
2837       return l;
2838     }
2839   /* continue into the default case */
2840   default:
2841     opc1 = ILI_OPC(mpyr);
2842     if (opc1 == IL_IADD) {
2843       l1 = distrib(ILI_OPND(mpyr, 1), ili);
2844       l2 = distrib(ILI_OPND(mpyr, 2), ili);
2845       l = apnd(l1, l2);
2846       return l;
2847     }
2848     if (opc1 == IL_ISUB) {
2849       l1 = distrib(ILI_OPND(mpyr, 1), ili);
2850       ili2 = ad1ili(IL_INEG, ILI_OPND(mpyr, 2));
2851       l2 = distrib(ili2, ili);
2852       l = apnd(l1, l2);
2853       return l;
2854     }
2855     if (IS_IRES(ili))
2856       ilitmp = ad2ili(IL_IMUL, ili, mpyr);
2857     else
2858       ilitmp = ili;
2859     l = (ARITH_LIST)getitem(HLV_AREA1, sizeof(ARITH_TERM));
2860     l->confact = 1;
2861     l->varfact = ilitmp;
2862     l->next = NULL;
2863     return l;
2864   }
2865 }
2866 
2867 /* Return TRUE if arithmetic term list lst contains a term whose varfact
2868  * member is equivalent to trm.
2869  * Rules are:
2870  *	c_c_f([], x) = FALSE.
2871  *	c_c_f([<c,x>|l], x) = TRUE.
2872  *	c_c_f([x|l], y) = c_c_f(l, y), otherwise.
2873  */
2874 static LOGICAL
contains_common_factor(ARITH_LIST lst,int trm)2875 contains_common_factor(ARITH_LIST lst, int trm)
2876 {
2877   ARITH_LIST l;
2878 
2879   assert(trm, "contains_common_factor: invalid term", trm, 4);
2880   for (l = lst; l; l = l->next)
2881     if (l->varfact == trm)
2882       return TRUE;
2883   return FALSE;
2884 }
2885 
2886 /* Sum the confact members of all terms in arithmetic term list lst
2887  * whose varfact members are all equal to trm. Return the sum.
2888  * Rules are:
2889  *	c_t([], x) = 0.
2890  *	c_t([<c,x>| l], x) = c + c_t(l, x).
2891  *	c_t([x|l], y) = c_t(l, y); otherwise.
2892  */
2893 static long
combine_terms(ARITH_LIST lst,int trm)2894 combine_terms(ARITH_LIST lst, int trm)
2895 {
2896   ARITH_LIST l;
2897   long sum = 0L;
2898 
2899   for (l = lst; l; l = l->next)
2900     if (l->varfact == trm)
2901       sum += l->confact;
2902   return sum;
2903 }
2904 
2905 /* Remove all arithmetic terms from arithmetic term list lst whose varfact
2906  * members are equivalent to trm.
2907  * Rules are:
2908  *	r_t([], x) = [].
2909  *	r_t([<c,x>|l], x) = l.
2910  *	r_t([x|l], y) = [x | r_t(l,y)], otherwise
2911  */
2912 static ARITH_LIST
remove_terms(ARITH_LIST lst,int trm)2913 remove_terms(ARITH_LIST lst, int trm)
2914 {
2915   ARITH_LIST lcur, lprev, lhead;
2916 
2917   assert(trm, "remove_terms: invalid term", trm, 4);
2918 
2919   lhead = lst;
2920   lprev = NULL;
2921   for (lcur = lst; lcur; lcur = lcur->next)
2922     if (lcur->varfact == trm)
2923       /* remove the current term from the list */
2924       if (lprev == NULL)
2925         lhead = lcur->next;
2926       else
2927         lprev->next = lcur->next;
2928     else
2929       lprev = lcur;
2930   return lhead;
2931 }
2932 
2933 /* Combine terms from arithmetic term list lst that have common factors, and
2934  * return the result.
2935  * Rules are:
2936  *	elim([]) = [].
2937  *	elim([<c,x>|l]) = if c_c_f(l, x) then
2938  *			     [<c_t(l, x)+c,x> | elim(r_t(l,x))]
2939  *			  else
2940  *			     [<c,x> | elim(l)].
2941  * When a reduction takes place, set global rdc to TRUE.
2942  */
2943 static ARITH_LIST
elim(ARITH_LIST lst)2944 elim(ARITH_LIST lst)
2945 {
2946   ARITH_LIST lprev, lcur, lhead;
2947   long sum;
2948 
2949   lhead = lst;
2950   lprev = NULL;
2951   for (lcur = lst; lcur; lcur = lcur->next) {
2952     if (contains_common_factor(lcur->next, lcur->varfact)) {
2953       sum = combine_terms(lcur, lcur->varfact);
2954       lcur->next = remove_terms(lcur->next, lcur->varfact);
2955       if (sum == 0L)
2956         if (lprev)
2957           lprev->next = lcur->next;
2958         else
2959           lhead = lcur->next;
2960       else {
2961         lcur->confact = sum;
2962         lprev = lcur;
2963       }
2964       rdc = TRUE;
2965     } else
2966       lprev = lcur;
2967   }
2968   return lhead;
2969 }
2970 
2971 static int
ili_symbolic(int ili)2972 ili_symbolic(int ili)
2973 {
2974   /* perform symbolic manipulation on ili to simplify its form */
2975   int opc;
2976   int icon0 = ad_icon(0L), icon1 = ad_icon(1L);
2977   ARITH_LIST lst1, lst2;
2978   int ilires;
2979   int i;
2980   LOGICAL changed;
2981 
2982   if (use_visit && ILI_REPL(ili))
2983     return ILI_REPL(ili);
2984 
2985   if (IS_CNST(ili))
2986     return ili;
2987   if (A_TYPEG(ili) == A_ID)
2988     return ili;
2989 
2990   opc = ILI_OPC(ili);
2991   if (IS_IRES(ili)) {
2992     /* symbolic manipulation on integer expressions only */
2993     rdc = FALSE;
2994     lst1 = distrib(ili, icon1); /* create list of ili */
2995     refactor_list(lst1);        /* simplify terms */
2996     lst2 = elim(lst1);          /* combine terms */
2997     if (rdc) {
2998       ilires = sum(lst2); /* create sum of terms */
2999 #if DEBUG
3000       if (DBGBIT(36, 128)) {
3001         fprintf(gbl.dbgfil, "Term reduced: ");
3002         prilitree(ili);
3003         fprintf(gbl.dbgfil, " -> ");
3004         prilitree(ilires);
3005         fprintf(gbl.dbgfil, "\n");
3006       }
3007 #endif
3008     } else
3009       ilires = ili;
3010   } else
3011     ilires = ili;
3012 
3013   if (!use_visit)
3014     return ilires;
3015   ILI_REPL(ili) = ilires;
3016   ILI_VISIT(ili) = visit_chain;
3017   visit_chain = ili;
3018   return ilires;
3019 }
3020 
3021 /** \brief Perform symbolic algebraic simplification of term il.
3022 
3023   \b NOTE: Call `freearea(HLV_AREA1)` after calling this function.
3024  */
3025 int
dd_symbolic(int il)3026 dd_symbolic(int il)
3027 {
3028   int ilres;
3029 
3030   use_visit = FALSE;
3031   ilres = ili_symbolic(il);
3032   use_visit = TRUE;
3033   return ilres;
3034 }
3035 
3036 int
dirv_chkzero(DIRVEC dir,int n)3037 dirv_chkzero(DIRVEC dir, int n)
3038 {
3039   int i;
3040 
3041   for (i = 0; i < n; ++i)
3042     if (DIRV_ENTRYG(dir, i) == 0)
3043       return 1;
3044   return 0;
3045 }
3046 
3047 /* opc = a unary arithmetic ili opcode */
3048 static int
ad1ili(int opc,int ast1)3049 ad1ili(int opc, int ast1)
3050 {
3051   int astx;
3052 
3053   if (opc != IL_INEG)
3054     interr("ad1ili: unidentified opcode", opc, 4);
3055   if (A_DTYPEG(ast1) != astb.bnd.dtype)
3056     ast1 = mk_convert(ast1, astb.bnd.dtype);
3057   astx = mk_unop(OP_SUB, ast1, astb.bnd.dtype);
3058   return astx;
3059 }
3060 
3061 /* opc = a binary arithmetic ili opcode */
3062 static int
ad2ili(int opc,int ast1,int ast2)3063 ad2ili(int opc, int ast1, int ast2)
3064 {
3065   int optype;
3066   int dtype;
3067   int astx;
3068 
3069   switch (opc) {
3070   case IL_IADD:
3071     optype = OP_ADD;
3072     dtype = astb.bnd.dtype;
3073     break;
3074   case IL_ISUB:
3075     optype = OP_SUB;
3076     dtype = astb.bnd.dtype;
3077     break;
3078   case IL_IMUL:
3079     optype = OP_MUL;
3080     dtype = astb.bnd.dtype;
3081     break;
3082   case IL_IDIV:
3083     optype = OP_DIV;
3084     dtype = astb.bnd.dtype;
3085     break;
3086   case IL_AADD:
3087     optype = OP_ADD;
3088     dtype = DT_ADDR;
3089     break;
3090   case IL_ASUB:
3091     optype = OP_SUB;
3092     dtype = DT_ADDR;
3093     break;
3094   default:
3095     interr("ad2ili: unidentified opcode", opc, 4);
3096   }
3097 
3098   if (A_DTYPEG(ast1) != dtype)
3099     ast1 = mk_convert(ast1, dtype);
3100   astx = mk_binop(optype, ast1, ast2, dtype);
3101   return astx;
3102 }
3103 
3104 static int
ILI_OPC(int astx)3105 ILI_OPC(int astx)
3106 {
3107   if (A_TYPEG(astx) == A_BINOP)
3108     switch (A_OPTYPEG(astx)) {
3109     case OP_ADD:
3110       if (A_DTYPEG(astx) == DT_ADDR)
3111         return IL_AADD;
3112       if (DT_ISBASIC(A_DTYPEG(astx)))
3113         return IL_IADD;
3114       interr("ILI_OPC: unknown dtype for ADD", astx, 4);
3115     case OP_SUB:
3116       if (A_DTYPEG(astx) == DT_ADDR)
3117         return IL_ASUB;
3118       if (DT_ISBASIC(A_DTYPEG(astx)))
3119         return IL_ISUB;
3120       interr("ILI_OPC: unknown dtype for SUB", astx, 4);
3121     case OP_MUL:
3122       if (DT_ISBASIC(A_DTYPEG(astx)))
3123         return IL_IMUL;
3124       interr("ILI_OPC: unknown dtype for MUL", astx, 4);
3125     case OP_DIV:
3126       if (DT_ISBASIC(A_DTYPEG(astx)))
3127         return IL_IDIV;
3128       interr("ILI_OPC: unknown dtype for DIV", astx, 4);
3129     default:
3130       return 0; /* opcode unknown */
3131     }
3132   if (A_TYPEG(astx) == A_UNOP && A_OPTYPEG(astx) == OP_SUB &&
3133       DT_ISBASIC(A_DTYPEG(astx)))
3134     return IL_INEG;
3135   return 0; /* opcode unknown */
3136 }
3137 
3138 static int
ILI_OPND(int astx,int opnd)3139 ILI_OPND(int astx, int opnd)
3140 {
3141   if (A_TYPEG(astx) == A_UNOP) {
3142     if (opnd == 1)
3143       return A_LOPG(astx);
3144     else
3145       interr("ILI_OPND: UNOP operand # out of range", astx, 4);
3146   }
3147   if (A_TYPEG(astx) == A_BINOP) {
3148     if (opnd == 1)
3149       return A_LOPG(astx);
3150     else if (opnd == 2)
3151       return A_ROPG(astx);
3152     else
3153       interr("ILI_OPND: BINOP operand # out of range", astx, 4);
3154   }
3155   interr("ILI_OPND: unknown operand type", astx, 4);
3156   return 0;
3157 }
3158 
3159 #if DEBUG
3160 char *
dirv_print(DIRVEC dir)3161 dirv_print(DIRVEC dir)
3162 {
3163   static char xyz[32];
3164   char *p = xyz;
3165   DIRVEC t;
3166   int i;
3167 
3168   for (i = 0; DIRV_ENTRYG(dir, i) != 0 && i < MAX_LOOPS; ++i)
3169     ;
3170   if (dir & DIRV_ALLEQ)
3171     *p++ = '#';
3172   else
3173     *p++ = ' ';
3174   for (--i; i >= 0; --i) {
3175     t = DIRV_ENTRYG(dir, i);
3176     if (t & DIRV_LT)
3177       *p++ = '<';
3178     else
3179       *p++ = '.';
3180     if (t & DIRV_EQ)
3181       *p++ = '=';
3182     else
3183       *p++ = '.';
3184     if (t & DIRV_GT)
3185       *p++ = '>';
3186     else
3187       *p++ = '.';
3188     if (t & DIRV_RD)
3189       *p++ = 'R';
3190     else
3191       *p++ = ' ';
3192   }
3193   *p++ = 0;
3194   return xyz;
3195 }
3196 
3197 void
dump_dd(DDEDGE * p)3198 dump_dd(DDEDGE *p)
3199 {
3200   static char *types[] = {"flow", "anti", "outp", "????"};
3201 
3202   for (; p != 0; p = DD_NEXT(p)) {
3203     fprintf(gbl.dbgfil, "     %4d %4s %-32s\n", DD_SINK(p), types[DD_TYPE(p)],
3204             dirv_print(DD_DIRVEC(p)));
3205   }
3206 }
3207 
3208 static void
dump_one_bound(BOUND * p,int k,LOGICAL btype)3209 dump_one_bound(BOUND *p, int k, LOGICAL btype)
3210 {
3211   int j;
3212   BOUND b;
3213 
3214   b = *p;
3215   if (b.mplyr != 1)
3216     fprintf(gbl.dbgfil, "%d*T[%d]", b.mplyr, k);
3217   else
3218     fprintf(gbl.dbgfil, "T[%d]", k);
3219   fprintf(gbl.dbgfil, btype ? "<=" : ">=");
3220   /* dump 0 term */
3221   if (b.gcd == 1 || IS_CNST(b.bnd[0]))
3222     prilitree(b.bnd[0]);
3223   else {
3224     fprintf(gbl.dbgfil, "(");
3225     prilitree(b.bnd[0]);
3226     fprintf(gbl.dbgfil, ")/%d", b.gcd);
3227   }
3228   for (j = 1; j < k; ++j) {
3229     if (IS_CNST(b.bnd[j]) && CNSTG(b.bnd[j]) == 1L)
3230       fprintf(gbl.dbgfil, "+");
3231     else if (IS_CNST(b.bnd[j]) && CNSTG(b.bnd[j]) == 0L)
3232       continue;
3233     else {
3234       fprintf(gbl.dbgfil, "+");
3235       prilitree(b.bnd[j]);
3236       fprintf(gbl.dbgfil, "*");
3237     }
3238     fprintf(gbl.dbgfil, "T[%d]", j);
3239   }
3240   fprintf(gbl.dbgfil, "\n");
3241 }
3242 
3243 static void
dump_two_bound(BOUND * p,BOUND * q,int k,LOGICAL btype)3244 dump_two_bound(BOUND *p, BOUND *q, int k, LOGICAL btype)
3245 {
3246   int j;
3247   if (p->mplyr != 1)
3248     fprintf(gbl.dbgfil, "(");
3249   /* dump 0 term */
3250   if (p->gcd == 1 || IS_CNST(p->bnd[0]))
3251     prilitree(p->bnd[0]);
3252   else {
3253     fprintf(gbl.dbgfil, "(");
3254     prilitree(p->bnd[0]);
3255     fprintf(gbl.dbgfil, ")");
3256   }
3257   for (j = 1; j < k; ++j) {
3258     if (IS_CNST(p->bnd[j]) && CNSTG(p->bnd[j]) == 1L)
3259       fprintf(gbl.dbgfil, "+");
3260     else if (IS_CNST(p->bnd[j]) && CNSTG(p->bnd[j]) == 0L)
3261       continue;
3262     else {
3263       fprintf(gbl.dbgfil, "+");
3264       prilitree(p->bnd[j]);
3265       fprintf(gbl.dbgfil, "*");
3266     }
3267     fprintf(gbl.dbgfil, "T[%d]", j);
3268   }
3269   if (p->mplyr != 1)
3270     fprintf(gbl.dbgfil, ")/%d", p->mplyr);
3271   fprintf(gbl.dbgfil, btype ? ">=" : "<=");
3272   if (q->mplyr != 1)
3273     fprintf(gbl.dbgfil, "(");
3274   /* dump 0 term */
3275   if (q->gcd == 1 || IS_CNST(q->bnd[0]))
3276     prilitree(q->bnd[0]);
3277   else {
3278     fprintf(gbl.dbgfil, "(");
3279     prilitree(q->bnd[0]);
3280     fprintf(gbl.dbgfil, ")");
3281   }
3282   for (j = 1; j < k; ++j) {
3283     if (IS_CNST(q->bnd[j]) && CNSTG(q->bnd[j]) == 1L)
3284       fprintf(gbl.dbgfil, "+");
3285     else if (IS_CNST(q->bnd[j]) && CNSTG(q->bnd[j]) == 0L)
3286       continue;
3287     else {
3288       fprintf(gbl.dbgfil, "+");
3289       prilitree(q->bnd[j]);
3290       fprintf(gbl.dbgfil, "*");
3291     }
3292     fprintf(gbl.dbgfil, "T[%d]", j);
3293   }
3294   if (q->mplyr != 1)
3295     fprintf(gbl.dbgfil, ")/%d", q->mplyr);
3296 
3297   fprintf(gbl.dbgfil, "\n");
3298 }
3299 
3300 /* Dump a list of arithmetic terms. */
3301 static void
dump_termlist(ARITH_LIST lst)3302 dump_termlist(ARITH_LIST lst)
3303 {
3304   ARITH_LIST l;
3305 
3306   if (lst == NULL) {
3307     fprintf(gbl.dbgfil, "[]");
3308     return;
3309   }
3310   fprintf(gbl.dbgfil, "[");
3311   for (l = lst; l->next; l = l->next) {
3312     fprintf(gbl.dbgfil, "%ld*", l->confact);
3313     prilitree(l->varfact);
3314     fprintf(gbl.dbgfil, ", ");
3315   }
3316   fprintf(gbl.dbgfil, "%ld*", l->confact);
3317   prilitree(l->varfact);
3318   fprintf(gbl.dbgfil, "]");
3319 }
3320 #endif
3321 
3322 #endif
3323