1 
2    /**-------------------------------------------------------------------**
3     **                              CLooG                                **
4     **-------------------------------------------------------------------**
5     **                             loop.c                                **
6     **-------------------------------------------------------------------**
7     **                  First version: october 26th 2001                 **
8     **-------------------------------------------------------------------**/
9 
10 
11 /******************************************************************************
12  *               CLooG : the Chunky Loop Generator (experimental)             *
13  ******************************************************************************
14  *                                                                            *
15  * Copyright (C) 2001-2005 Cedric Bastoul                                     *
16  *                                                                            *
17  * This library is free software; you can redistribute it and/or              *
18  * modify it under the terms of the GNU Lesser General Public                 *
19  * License as published by the Free Software Foundation; either               *
20  * version 2.1 of the License, or (at your option) any later version.         *
21  *                                                                            *
22  * This library is distributed in the hope that it will be useful,            *
23  * but WITHOUT ANY WARRANTY; without even the implied warranty of             *
24  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU          *
25  * Lesser General Public License for more details.                            *
26  *                                                                            *
27  * You should have received a copy of the GNU Lesser General Public           *
28  * License along with this library; if not, write to the Free Software        *
29  * Foundation, Inc., 51 Franklin Street, Fifth Floor,                         *
30  * Boston, MA  02110-1301  USA                                                *
31  *                                                                            *
32  * CLooG, the Chunky Loop Generator                                           *
33  * Written by Cedric Bastoul, Cedric.Bastoul@inria.fr                         *
34  *                                                                            *
35  ******************************************************************************/
36 /* CAUTION: the english used for comments is probably the worst you ever read,
37  *          please feel free to correct and improve it !
38  */
39 
40 # include <stdlib.h>
41 # include <stdio.h>
42 # include "../include/cloog/cloog.h"
43 
44 #define ALLOC(type) (type*)malloc(sizeof(type))
45 
46 
47 /******************************************************************************
48  *                             Memory leaks hunting                           *
49  ******************************************************************************/
50 
51 
52 /**
53  * These functions and global variables are devoted to memory leaks hunting: we
54  * want to know at each moment how many CloogLoop structures had been allocated
55  * (cloog_loop_allocated) and how many had been freed (cloog_loop_freed).
56  * Each time a CloogLoog structure is allocated, a call to the function
57  * cloog_loop_leak_up() must be carried out, and respectively
58  * cloog_loop_leak_down() when a CloogLoop structure is freed. The special
59  * variable cloog_loop_max gives the maximal number of CloogLoop structures
60  * simultaneously alive (i.e. allocated and non-freed) in memory.
61  * - July 3rd->11th 2003: first version (memory leaks hunt and correction).
62  */
63 
64 
cloog_loop_leak_up(CloogState * state)65 static void cloog_loop_leak_up(CloogState *state)
66 {
67   state->loop_allocated++;
68   if ((state->loop_allocated - state->loop_freed) > state->loop_max)
69     state->loop_max = state->loop_allocated - state->loop_freed;
70 }
71 
72 
cloog_loop_leak_down(CloogState * state)73 static void cloog_loop_leak_down(CloogState *state)
74 {
75   state->loop_freed++;
76 }
77 
78 
79 /******************************************************************************
80  *                          Structure display function                        *
81  ******************************************************************************/
82 
83 
84 /**
85  * cloog_loop_print_structure function:
86  * Displays a loop structure in a way that trends to be understandable without
87  * falling in a deep depression or, for the lucky ones, getting a headache...
88  * Written by Olivier Chorier, Luc Marchaud, Pierre Martin and Romain Tartiere.
89  * - April 24th 2005: Initial version.
90  * - May   21rd 2005: - New parameter `F' for destination file (ie stdout),
91  *                    - Minor tweaks.
92  * - May   26th 2005: Memory leak hunt.
93  * - June   2nd 2005: (Ced) Integration and minor fixes.
94  * -June  22nd 2005: (Ced) Adaptation for GMP.
95  */
cloog_loop_print_structure(FILE * file,CloogLoop * loop,int level)96 void cloog_loop_print_structure(FILE * file, CloogLoop * loop, int level)
97 { int i, j, first=1 ;
98 
99   if (loop)
100   { /* Go to the right level. */
101     for (i=0; i<level; i++)
102     fprintf(file,"|\t") ;
103 
104     fprintf(file,"+-- CloogLoop\n") ;
105   }
106 
107   /* For each loop. */
108   while (loop)
109   { if (!first)
110     { /* Go to the right level. */
111       for (i=0; i<level; i++)
112       fprintf(file,"|\t") ;
113 
114       fprintf(file,"|   CloogLoop\n") ;
115     }
116     else
117     first = 0 ;
118 
119     /* A blank line. */
120     for(j=0; j<=level+1; j++)
121     fprintf(file,"|\t") ;
122     fprintf(file,"\n") ;
123 
124     /* Print the domain. */
125     cloog_domain_print_structure(file, loop->domain, level+1, "CloogDomain");
126 
127     /* Print the stride. */
128     for(j=0; j<=level; j++)
129     fprintf(file,"|\t") ;
130     if (loop->stride) {
131 	fprintf(file, "Stride: ");
132 	cloog_int_print(file, loop->stride->stride);
133 	fprintf(file, "\n");
134 	fprintf(file, "Offset: ");
135 	cloog_int_print(file, loop->stride->offset);
136 	fprintf(file, "\n");
137     }
138 
139     /* A blank line. */
140     for(j=0; j<=level+1; j++)
141     fprintf(file,"|\t") ;
142     fprintf(file,"\n") ;
143 
144     /* Print the block. */
145     cloog_block_print_structure(file,loop->block,level+1) ;
146 
147     /* A blank line. */
148     for (i=0; i<=level+1; i++)
149     fprintf(file,"|\t") ;
150     fprintf(file,"\n") ;
151 
152     /* Print inner if any. */
153     if (loop->inner)
154     cloog_loop_print_structure(file,loop->inner,level+1) ;
155 
156     /* And let's go for the next one. */
157     loop = loop->next ;
158 
159     /* One more time something that is here only for a better look. */
160     if (!loop)
161     { /* Two blank lines if this is the end of the linked list. */
162       for (j=0; j<2; j++)
163       { for (i=0; i<=level; i++)
164         fprintf(file,"|\t") ;
165 
166         fprintf(file,"\n") ;
167       }
168     }
169     else
170     { /* A special blank line if the is a next loop. */
171       for (i=0; i<=level; i++)
172       fprintf(file,"|\t") ;
173       fprintf(file,"V\n") ;
174     }
175   }
176 }
177 
178 
179 /**
180  * cloog_loop_print function:
181  * This function prints the content of a CloogLoop structure (start) into a
182  * file (file, possibly stdout).
183  * - June 2nd 2005: Now this very old function (probably as old as CLooG) is
184  *                  only a frontend to cloog_loop_print_structure, with a quite
185  *                  better human-readable representation.
186  */
cloog_loop_print(FILE * file,CloogLoop * loop)187 void cloog_loop_print(FILE * file, CloogLoop * loop)
188 { cloog_loop_print_structure(file,loop,0) ;
189 }
190 
191 
192 /******************************************************************************
193  *                         Memory deallocation function                       *
194  ******************************************************************************/
195 
196 
197 /**
198  * cloog_loop_free function:
199  * This function frees the allocated memory for a CloogLoop structure (loop),
200  * and frees its inner loops and its next loops.
201  * - June 22nd 2005: Adaptation for GMP.
202  */
cloog_loop_free(CloogLoop * loop)203 void cloog_loop_free(CloogLoop * loop)
204 { CloogLoop * next ;
205 
206   while (loop != NULL) {
207     cloog_loop_leak_down(loop->state);
208 
209     next = loop->next ;
210     cloog_domain_free(loop->domain) ;
211     cloog_domain_free(loop->unsimplified);
212     cloog_block_free(loop->block) ;
213     if (loop->inner != NULL)
214     cloog_loop_free(loop->inner) ;
215 
216     cloog_stride_free(loop->stride);
217     free(loop) ;
218     loop = next ;
219   }
220 }
221 
222 
223 /**
224  * cloog_loop_free_parts function:
225  * This function frees the allocated memory for some parts of a CloogLoop
226  * structure (loop), each other argument is a boolean having to be set to 1 if
227  * we want to free the corresponding part, 0 otherwise. This function applies
228  * the same freeing policy to its inner ans next loops recursively.
229  * - July  3rd 2003: first version.
230  * - June 22nd 2005: Adaptation for GMP.
231  */
cloog_loop_free_parts(loop,domain,block,inner,next)232 void cloog_loop_free_parts(loop, domain, block, inner, next)
233 CloogLoop * loop ;
234 int domain, block, inner, next ;
235 { CloogLoop * follow ;
236 
237   while (loop != NULL) {
238     cloog_loop_leak_down(loop->state);
239     follow = loop->next ;
240 
241     if (domain)
242     cloog_domain_free(loop->domain) ;
243 
244     if (block)
245     cloog_block_free(loop->block) ;
246 
247     if ((inner) && (loop->inner != NULL))
248     cloog_loop_free_parts(loop->inner,domain,block,inner,1) ;
249 
250     cloog_domain_free(loop->unsimplified);
251     cloog_stride_free(loop->stride);
252     free(loop) ;
253     if (next)
254     loop = follow ;
255     else
256     loop = NULL ;
257   }
258 }
259 
260 
261 /******************************************************************************
262  *                              Reading functions                             *
263  ******************************************************************************/
264 
265 
266 /**
267  * Construct a CloogLoop structure from a given iteration domain
268  * and statement number.
269  */
cloog_loop_from_domain(CloogState * state,CloogDomain * domain,int number)270 CloogLoop *cloog_loop_from_domain(CloogState *state, CloogDomain *domain,
271 	int number)
272 {
273   int nb_iterators;
274   CloogLoop * loop ;
275   CloogStatement * statement ;
276 
277   /* Memory allocation and information reading for the first domain: */
278   loop = cloog_loop_malloc(state);
279   /* domain. */
280   loop->domain = domain;
281   if (loop->domain != NULL)
282     nb_iterators = cloog_domain_dimension(loop->domain);
283   else
284   nb_iterators = 0 ;
285   /* included statement block. */
286   statement = cloog_statement_alloc(state, number + 1);
287   loop->block = cloog_block_alloc(statement, 0, NULL, nb_iterators);
288 
289   return loop ;
290 }
291 
292 
293 /**
294  * cloog_loop_read function:
295  * This function reads loop data from a file (foo, possibly stdin) and
296  * returns a pointer to a CloogLoop structure containing the read information.
297  * This function can be used only for input file reading, when one loop is
298  * associated with one statement.
299  * - number is the statement block number carried by the loop (-1 if none).
300  * - nb_parameters is the number of parameters.
301  **
302  * - September 9th 2002: first version.
303  * - April    16th 2005: adaptation to new CloogStatement struct (with number).
304  * - June     11th 2005: adaptation to new CloogBlock structure.
305  * - June     22nd 2005: Adaptation for GMP.
306  */
cloog_loop_read(CloogState * state,FILE * foo,int number,int nb_parameters)307 CloogLoop *cloog_loop_read(CloogState *state,
308 			    FILE *foo, int number, int nb_parameters)
309 {
310   int op1, op2, op3;
311   char s[MAX_STRING];
312   CloogDomain *domain;
313 
314   domain = cloog_domain_union_read(state, foo, nb_parameters);
315 
316   /* To read that stupid "0 0 0" line. */
317   while (fgets(s,MAX_STRING,foo) == 0) ;
318   while ((*s=='#' || *s=='\n') || (sscanf(s," %d %d %d",&op1,&op2,&op3)<3))
319     if (fgets(s, MAX_STRING, foo))
320       continue;
321 
322   return cloog_loop_from_domain(state, domain, number);
323 }
324 
325 
326 /******************************************************************************
327  *                            Processing functions                            *
328  ******************************************************************************/
329 
330 
331 /**
332  * cloog_loop_malloc function:
333  * This function allocates the memory space for a CloogLoop structure and
334  * sets its fields with default values. Then it returns a pointer to the
335  * allocated space.
336  * - November 21th 2005: first version.
337  */
cloog_loop_malloc(CloogState * state)338 CloogLoop *cloog_loop_malloc(CloogState *state)
339 { CloogLoop * loop ;
340 
341   /* Memory allocation for the CloogLoop structure. */
342   loop = (CloogLoop *)malloc(sizeof(CloogLoop)) ;
343   if (loop == NULL)
344     cloog_die("memory overflow.\n");
345   cloog_loop_leak_up(state);
346 
347 
348   /* We set the various fields with default values. */
349   loop->state    = state;
350   loop->domain = NULL ;
351   loop->unsimplified = NULL;
352   loop->block  = NULL ;
353   loop->usr    = NULL;
354   loop->inner  = NULL ;
355   loop->next   = NULL ;
356   loop->otl = 0;
357   loop->stride = NULL;
358 
359   return loop ;
360 }
361 
362 
363 /**
364  * cloog_loop_alloc function:
365  * This function allocates the memory space for a CloogLoop structure and
366  * sets its fields with those given as input. Then it returns a pointer to the
367  * allocated space.
368  * - October  27th 2001: first version.
369  * - June     22nd 2005: Adaptation for GMP.
370  * - November 21th 2005: use of cloog_loop_malloc.
371  */
cloog_loop_alloc(CloogState * state,CloogDomain * domain,int otl,CloogStride * stride,CloogBlock * block,CloogLoop * inner,CloogLoop * next)372 CloogLoop *cloog_loop_alloc(CloogState *state,
373 	CloogDomain *domain, int otl, CloogStride *stride,
374 	CloogBlock *block, CloogLoop *inner, CloogLoop *next)
375 { CloogLoop * loop ;
376 
377   loop = cloog_loop_malloc(state);
378 
379   loop->domain = domain ;
380   loop->block  = block ;
381   loop->inner  = inner ;
382   loop->next   = next ;
383   loop->otl = otl;
384   loop->stride = cloog_stride_copy(stride);
385 
386   return(loop) ;
387 }
388 
389 
390 /**
391  * cloog_loop_add function:
392  * This function adds a CloogLoop structure (loop) at a given place (now) of a
393  * NULL terminated list of CloogLoop structures. The beginning of this list
394  * is (start). This function updates (now) to (loop), and updates (start) if the
395  * added element is the first one -that is when (start) is NULL-.
396  * - October 28th 2001: first version.
397  */
cloog_loop_add(CloogLoop ** start,CloogLoop ** now,CloogLoop * loop)398 void cloog_loop_add(CloogLoop ** start, CloogLoop ** now, CloogLoop * loop)
399 { if (*start == NULL)
400   { *start = loop ;
401     *now = *start ;
402   }
403   else
404   { (*now)->next = loop ;
405     *now = (*now)->next ;
406   }
407 }
408 
409 
410 /**
411  * cloog_loop_add function:
412  * This function adds a CloogLoop structure (loop) at a given place (now) of a
413  * NULL terminated list of CloogLoop structures. The beginning of this list
414  * is (start). This function updates (now) to the end of the loop list (loop),
415  * and updates (start) if the added element is the first one -that is when
416  * (start) is NULL-.
417  * - September 9th 2005: first version.
418  */
cloog_loop_add_list(CloogLoop ** start,CloogLoop ** now,CloogLoop * loop)419 void cloog_loop_add_list(CloogLoop ** start, CloogLoop ** now, CloogLoop * loop)
420 { if (*start == NULL)
421   { *start = loop ;
422     *now = *start ;
423   }
424   else
425   { (*now)->next = loop ;
426     *now = (*now)->next ;
427   }
428 
429   while ((*now)->next != NULL)
430   *now = (*now)->next ;
431 }
432 
433 
434 /**
435  * cloog_loop_copy function:
436  * This function returns a copy of the CloogLoop structure given as input. In
437  * fact, there is just new allocations for the CloogLoop structures, but their
438  * contents are the same.
439  * - October 28th 2001: first version.
440  * - July 3rd->11th 2003: memory leaks hunt and correction.
441  */
cloog_loop_copy(CloogLoop * source)442 CloogLoop * cloog_loop_copy(CloogLoop * source)
443 { CloogLoop * loop ;
444   CloogBlock * block ;
445   CloogDomain * domain ;
446 
447   loop = NULL ;
448   if (source != NULL)
449   { domain = cloog_domain_copy(source->domain) ;
450     block  = cloog_block_copy(source->block) ;
451     loop   = cloog_loop_alloc(source->state, domain, source->otl,
452 		source->stride, block, NULL,  NULL);
453     loop->usr = source->usr;
454     loop->inner = cloog_loop_copy(source->inner) ;
455     loop->next = cloog_loop_copy(source->next) ;
456   }
457   return(loop) ;
458 }
459 
460 
461 /**
462  * cloog_loop_add_disjoint function:
463  * This function adds some CloogLoop structures at a given place (now) of a
464  * NULL terminated list of CloogLoop structures. The beginning of this list
465  * is (start). (loop) can be an union of polyhedra, this function separates the
466  * union into a list of *disjoint* polyhedra then adds the list. This function
467  * updates (now) to the end of the list and updates (start) if first added
468  * element is the first of the principal list -that is when (start) is NULL-.
469  * (loop) can be freed by this function, basically when its domain is actually
470  * a union of polyhedra, but don't worry, all the useful data are now stored
471  * inside the list (start). We do not use PolyLib's Domain_Disjoint function,
472  * since the number of union components is often higher (thus code size too).
473  * - October   28th 2001: first version.
474  * - November  14th 2001: bug correction (this one was hard to find !).
475  * - July 3rd->11th 2003: memory leaks hunt and correction.
476  * - June      22nd 2005: Adaptation for GMP.
477  * - October   27th 2005: (debug) included blocks were not copied for new loops.
478  */
cloog_loop_add_disjoint(start,now,loop)479 void cloog_loop_add_disjoint(start, now, loop)
480 CloogLoop ** start, ** now, * loop ;
481 {
482   CloogLoop * sep, * inner ;
483   CloogDomain *domain, *seen, *temp, *rest;
484   CloogBlock * block ;
485 
486   if (cloog_domain_isconvex(loop->domain))
487   cloog_loop_add(start,now,loop) ;
488   else {
489     domain = cloog_domain_simplify_union(loop->domain);
490     loop->domain = NULL ;
491 
492     /* We separate the first element of the rest of the union. */
493     domain = cloog_domain_cut_first(domain, &rest);
494 
495     /* This first element is the first of the list of disjoint polyhedra. */
496     sep = cloog_loop_alloc(loop->state, domain, 0, NULL,
497 			   loop->block, loop->inner, NULL);
498     cloog_loop_add(start,now,sep) ;
499 
500     seen = cloog_domain_copy(domain);
501     while (!cloog_domain_isempty(domain = rest)) {
502       temp = cloog_domain_cut_first(domain, &rest);
503       domain = cloog_domain_difference(temp, seen);
504       cloog_domain_free(temp);
505 
506       if (cloog_domain_isempty(domain)) {
507 	cloog_domain_free(domain);
508 	continue;
509       }
510 
511       /* Each new loop will have its own life, for instance we can free its
512        * inner loop and included block. Then each one must have its own copy
513        * of both 'inner' and 'block'.
514        */
515       inner = cloog_loop_copy(loop->inner) ;
516       block = cloog_block_copy(loop->block) ;
517 
518       sep = cloog_loop_alloc(loop->state, cloog_domain_copy(domain),
519 			     0, NULL, block, inner, NULL);
520       /* domain can be an union too. If so: recursion. */
521       if (cloog_domain_isconvex(domain))
522 	cloog_loop_add(start,now,sep) ;
523       else
524 	cloog_loop_add_disjoint(start,now,sep) ;
525 
526       if (cloog_domain_isempty(rest)) {
527 	cloog_domain_free(domain);
528 	break;
529       }
530 
531       seen = cloog_domain_union(seen, domain);
532     }
533     cloog_domain_free(rest);
534     cloog_domain_free(seen);
535     cloog_loop_free_parts(loop,0,0,0,0) ;
536   }
537 }
538 
539 
540 /**
541  * cloog_loop_disjoint function:
542  * This function returns a list of loops such that each loop with non-convex
543  * domain in the input list (loop) is separated into several loops where the
544  * domains are the components of the union of *disjoint* polyhedra equivalent
545  * to the original non-convex domain. See cloog_loop_add_disjoint comments
546  * for more details.
547  * - September 16th 2005: first version.
548  */
cloog_loop_disjoint(CloogLoop * loop)549 CloogLoop * cloog_loop_disjoint(CloogLoop * loop)
550 { CloogLoop *res=NULL, * now=NULL, * next ;
551 
552   /* Because this is often the case, don't waste time ! */
553   if (loop && !loop->next && cloog_domain_isconvex(loop->domain))
554   return loop ;
555 
556   while (loop != NULL)
557   { next = loop->next ;
558     loop->next = NULL ;
559     cloog_loop_add_disjoint(&res,&now,loop) ;
560     loop = next ;
561   }
562 
563   return res ;
564 }
565 
566 
567 /**
568  * cloog_loop_restrict function:
569  * This function returns the (loop) in the context of (context): it makes the
570  * intersection between the (loop) domain and the (context), then it returns
571  * a pointer to a new loop, with this intersection as domain.
572  **
573  * - October 27th 2001: first version.
574  * - June    15th 2005: a memory leak fixed (domain was not freed when empty).
575  * - June    22nd 2005: Adaptation for GMP.
576  */
cloog_loop_restrict(CloogLoop * loop,CloogDomain * context)577 CloogLoop *cloog_loop_restrict(CloogLoop *loop, CloogDomain *context)
578 { int new_dimension ;
579   CloogDomain * domain, * extended_context, * new_domain ;
580   CloogLoop * new_loop ;
581 
582   domain = loop->domain ;
583   if (cloog_domain_dimension(domain) > cloog_domain_dimension(context))
584   {
585     new_dimension = cloog_domain_dimension(domain);
586     extended_context = cloog_domain_extend(context, new_dimension);
587     new_domain = cloog_domain_intersection(extended_context,loop->domain) ;
588     cloog_domain_free(extended_context) ;
589   }
590   else
591   new_domain = cloog_domain_intersection(context,loop->domain) ;
592 
593   if (cloog_domain_isempty(new_domain))
594   { cloog_domain_free(new_domain) ;
595     return(NULL) ;
596   }
597   else {
598     new_loop = cloog_loop_alloc(loop->state, new_domain,
599 				0, NULL, loop->block, loop->inner, NULL);
600     return(new_loop) ;
601   }
602 }
603 
604 
605 /**
606  * Call cloog_loop_restrict on each loop in the list "loop" and return
607  * the concatenated result.
608  */
cloog_loop_restrict_all(CloogLoop * loop,CloogDomain * context)609 CloogLoop *cloog_loop_restrict_all(CloogLoop *loop, CloogDomain *context)
610 {
611     CloogLoop *next;
612     CloogLoop *res = NULL;
613     CloogLoop **res_next = &res;
614 
615     for (; loop; loop = next) {
616 	next = loop->next;
617 
618 	*res_next = cloog_loop_restrict(loop, context);
619 	if (*res_next) {
620 	    res_next = &(*res_next)->next;
621 	    cloog_loop_free_parts(loop, 1, 0, 0, 0);
622 	} else {
623 	    loop->next = NULL;
624 	    cloog_loop_free(loop);
625 	}
626     }
627 
628     return res;
629 }
630 
631 
632 /**
633  * Restrict the domains of the inner loops of each loop l in the given
634  * list of loops to the domain of the loop l.  If the domains of all
635  * inner loops of a given loop l turn out to be empty, then remove l
636  * from the list.
637  */
cloog_loop_restrict_inner(CloogLoop * loop)638 CloogLoop *cloog_loop_restrict_inner(CloogLoop *loop)
639 {
640     CloogLoop *next;
641     CloogLoop *res;
642     CloogLoop **res_next = &res;
643 
644     for (; loop; loop = next) {
645 	next = loop->next;
646 
647 	loop->inner = cloog_loop_restrict_all(loop->inner, loop->domain);
648 	if (loop->inner) {
649 	    *res_next = loop;
650 	    res_next = &(*res_next)->next;
651 	} else {
652 	    loop->next = NULL;
653 	    cloog_loop_free(loop);
654 	}
655     }
656 
657     *res_next = NULL;
658 
659     return res;
660 }
661 
662 /**
663  * cloog_loop_project function:
664  * This function returns the projection of (loop) on the (level) first
665  * dimensions (outer loops). It makes the projection of the (loop) domain,
666  * then it returns a pointer to a new loop, with this projection as domain.
667  **
668  * - October   27th 2001: first version.
669  * - July 3rd->11th 2003: memory leaks hunt and correction.
670  * - June      22nd 2005: Adaptation for GMP.
671  */
cloog_loop_project(CloogLoop * loop,int level)672 CloogLoop * cloog_loop_project(CloogLoop * loop, int level)
673 {
674   CloogDomain * new_domain ;
675   CloogLoop * new_loop, * copy ;
676 
677   copy = cloog_loop_alloc(loop->state, loop->domain, loop->otl, loop->stride,
678 			  loop->block, loop->inner, NULL);
679 
680   if (cloog_domain_dimension(loop->domain) == level)
681   new_domain = cloog_domain_copy(loop->domain) ;
682   else
683     new_domain = cloog_domain_project(loop->domain, level);
684 
685   new_loop = cloog_loop_alloc(loop->state, new_domain, 0, NULL,
686 			      NULL, copy, NULL);
687 
688   return(new_loop) ;
689 }
690 
691 
692 /**
693  * Call cloog_loop_project on each loop in the list "loop" and return
694  * the concatenated result.
695  */
cloog_loop_project_all(CloogLoop * loop,int level)696 CloogLoop *cloog_loop_project_all(CloogLoop *loop, int level)
697 {
698     CloogLoop *next;
699     CloogLoop *res = NULL;
700     CloogLoop **res_next = &res;
701 
702     for (; loop; loop = next) {
703 	next = loop->next;
704 
705 	*res_next = cloog_loop_project(loop, level);
706 	res_next = &(*res_next)->next;
707 	cloog_loop_free_parts(loop, 0, 0, 0, 0);
708     }
709 
710     return res;
711 }
712 
713 
714 /**
715  * cloog_loop_concat function:
716  * This function returns a pointer to the concatenation of the
717  * CloogLoop lists given as input.
718  * - October 28th 2001: first version.
719  */
cloog_loop_concat(CloogLoop * a,CloogLoop * b)720 CloogLoop * cloog_loop_concat(CloogLoop * a, CloogLoop * b)
721 { CloogLoop * loop, * temp ;
722 
723   loop = a  ;
724   temp = loop ;
725   if (loop != NULL)
726   { while (temp->next != NULL)
727     temp = temp->next ;
728     temp->next = b ;
729   }
730   else
731   loop = b ;
732 
733   return(loop) ;
734 }
735 
736 
737 /**
738  * cloog_loop_combine:
739  * Combine consecutive loops with identical domains into
740  * a single loop with the concatenation of their inner loops
741  * as inner loop.
742  */
cloog_loop_combine(CloogLoop * loop)743 CloogLoop *cloog_loop_combine(CloogLoop *loop)
744 {
745     CloogLoop *first, *second;
746 
747     for (first = loop; first; first = first->next) {
748 	while (first->next) {
749 	    if (!cloog_domain_lazy_equal(first->domain, first->next->domain))
750 		break;
751 	    second = first->next;
752 	    first->inner = cloog_loop_concat(first->inner, second->inner);
753 	    first->next = second->next;
754 	    cloog_loop_free_parts(second, 1, 0, 0, 0);
755 	}
756     }
757 
758     return loop;
759 }
760 
761 /**
762  * Remove loops from list that have an empty domain.
763  */
cloog_loop_remove_empty_domain_loops(CloogLoop * loop)764 CloogLoop *cloog_loop_remove_empty_domain_loops(CloogLoop *loop)
765 {
766     CloogLoop *l, *res, *next, **res_next;
767 
768     res = NULL;
769     res_next = &res;
770     for (l = loop; l; l = next) {
771 	next = l->next;
772 	if (cloog_domain_isempty(l->domain))
773 	    cloog_loop_free_parts(l, 1, 1, 1, 0);
774 	else {
775 	    *res_next = l;
776 	    res_next = &(*res_next)->next;
777 	}
778     }
779     *res_next = NULL;
780 
781     return res;
782 }
783 
784 CloogLoop *cloog_loop_decompose_inner(CloogLoop *loop,
785 	int level, int scalar, int *scaldims, int nb_scattdims);
786 
787 /* For each loop with only one inner loop, replace the domain
788  * of the loop with the projection of the domain of the inner
789  * loop.  To increase the number of loops with a single inner
790  * we first decompose the inner loops into strongly connected
791  * components.
792  */
cloog_loop_specialize(CloogLoop * loop,int level,int scalar,int * scaldims,int nb_scattdims)793 CloogLoop *cloog_loop_specialize(CloogLoop *loop,
794 	int level, int scalar, int *scaldims, int nb_scattdims)
795 {
796     int dim;
797     CloogDomain *domain;
798     CloogLoop *l;
799 
800     loop = cloog_loop_decompose_inner(loop, level, scalar,
801 					scaldims, nb_scattdims);
802 
803     for (l = loop; l; l = l->next) {
804 	if (l->inner->next)
805 	    continue;
806 	if (!cloog_domain_isconvex(l->inner->domain))
807 	    continue;
808 
809 	dim = cloog_domain_dimension(l->domain);
810 	domain = cloog_domain_project(l->inner->domain, dim);
811 	if (cloog_domain_isconvex(domain)) {
812 	    cloog_domain_free(l->domain);
813 	    l->domain = domain;
814 	} else {
815 	    cloog_domain_free(domain);
816 	}
817     }
818 
819     return cloog_loop_remove_empty_domain_loops(loop);
820 }
821 
822 /* For each loop with only one inner loop, propagate the bounds from
823  * the inner loop domain to the outer loop domain.  This is especially
824  * useful if the inner loop domain has a non-trivial stride which
825  * results in an update of the lower bound.
826  */
cloog_loop_propagate_lower_bound(CloogLoop * loop,int level)827 CloogLoop *cloog_loop_propagate_lower_bound(CloogLoop *loop, int level)
828 {
829     int dim;
830     CloogDomain *domain, *t;
831     CloogLoop *l;
832 
833     for (l = loop; l; l = l->next) {
834 	if (l->inner->next)
835 	    continue;
836 	if (!cloog_domain_isconvex(l->inner->domain))
837 	    continue;
838 
839 	dim = cloog_domain_dimension(l->domain);
840 	domain = cloog_domain_project(l->inner->domain, dim);
841 	if (cloog_domain_isconvex(domain)) {
842 	    t = cloog_domain_intersection(domain, l->domain);
843 	    cloog_domain_free(l->domain);
844 	    l->domain = t;
845 	}
846 	cloog_domain_free(domain);
847     }
848 
849     return loop;
850 }
851 
852 /**
853  * cloog_loop_separate function:
854  * This function implements the Quillere algorithm for separation of multiple
855  * loops: for a given set of polyhedra (loop), it computes a set of disjoint
856  * polyhedra such that the unions of these sets are equal, and returns this set.
857  * - October   28th 2001: first version.
858  * - November  14th 2001: elimination of some unused blocks.
859  * - August    13th 2002: (debug) in the case of union of polyhedra for one
860  *                        loop, redundant constraints are fired.
861  * - July 3rd->11th 2003: memory leaks hunt and correction.
862  * - June      22nd 2005: Adaptation for GMP.
863  * - October   16th 2005: Removal of the non-shared constraint elimination when
864  *                        there is only one loop in the list (seems to work
865  *                        without now, DomainSimplify may have been improved).
866  *                        The problem was visible with test/iftest2.cloog.
867  */
cloog_loop_separate(CloogLoop * loop)868 CloogLoop * cloog_loop_separate(CloogLoop * loop)
869 { int lazy_equal=0, disjoint = 0;
870   CloogLoop * new_loop, * new_inner, * res, * now, * temp, * Q,
871             * inner, * old /*, * previous, * next*/  ;
872   CloogDomain *UQ, *domain;
873 
874   if (loop == NULL)
875   return NULL ;
876 
877   loop = cloog_loop_combine(loop);
878 
879   if (loop->next == NULL)
880   return cloog_loop_disjoint(loop) ;
881 
882   UQ     = cloog_domain_copy(loop->domain) ;
883   domain = cloog_domain_copy(loop->domain) ;
884   res    = cloog_loop_alloc(loop->state, domain, 0, NULL,
885 			    loop->block, loop->inner, NULL);
886 
887   old = loop ;
888   while((loop = loop->next) != NULL)
889   { temp = NULL ;
890 
891     /* For all Q, add Q-loop associated with the blocks of Q alone,
892      * and Q inter loop associated with the blocks of Q and loop.
893      */
894     for (Q = res; Q; Q = Q->next) {
895         /* Add (Q inter loop). */
896         if ((disjoint = cloog_domain_lazy_disjoint(Q->domain,loop->domain)))
897 	domain = NULL ;
898 	else
899 	{ if ((lazy_equal = cloog_domain_lazy_equal(Q->domain,loop->domain)))
900 	  domain = cloog_domain_copy(Q->domain) ;
901           else
902 	  domain = cloog_domain_intersection(Q->domain,loop->domain) ;
903 
904 	  if (!cloog_domain_isempty(domain))
905           { new_inner = cloog_loop_concat(cloog_loop_copy(Q->inner),
906                                           cloog_loop_copy(loop->inner)) ;
907 	    new_loop = cloog_loop_alloc(loop->state, domain, 0, NULL,
908 					NULL, new_inner, NULL);
909             cloog_loop_add_disjoint(&temp,&now,new_loop) ;
910           }
911           else {
912 	    disjoint = 1;
913 	    cloog_domain_free(domain);
914 	  }
915         }
916 
917 	/* Add (Q - loop). */
918         if (disjoint)
919 	domain = cloog_domain_copy(Q->domain) ;
920 	else
921 	{ if (lazy_equal)
922 	  domain = cloog_domain_empty(Q->domain);
923 	  else
924 	  domain = cloog_domain_difference(Q->domain,loop->domain) ;
925 	}
926 
927 	if (!cloog_domain_isempty(domain)) {
928           new_loop = cloog_loop_alloc(loop->state, domain, 0, NULL,
929 				      NULL, Q->inner, NULL);
930           cloog_loop_add_disjoint(&temp,&now,new_loop) ;
931         }
932         else
933         { cloog_domain_free(domain) ;
934 	  /* If Q->inner is no more useful, we can free it. */
935           inner = Q->inner ;
936           Q->inner = NULL ;
937           cloog_loop_free(inner) ;
938         }
939     }
940 
941     /* Add loop-UQ associated with the blocks of loop alone.*/
942     if (cloog_domain_lazy_disjoint(loop->domain,UQ))
943     domain = cloog_domain_copy(loop->domain) ;
944     else
945     { if (cloog_domain_lazy_equal(loop->domain,UQ))
946       domain = cloog_domain_empty(UQ);
947       else
948       domain = cloog_domain_difference(loop->domain,UQ) ;
949     }
950 
951     if (!cloog_domain_isempty(domain)) {
952       new_loop = cloog_loop_alloc(loop->state, domain, 0, NULL,
953 				  NULL, loop->inner, NULL);
954       cloog_loop_add_disjoint(&temp,&now,new_loop) ;
955     }
956     else
957     { cloog_domain_free(domain) ;
958       /* If loop->inner is no more useful, we can free it. */
959       cloog_loop_free(loop->inner) ;
960     }
961 
962     loop->inner = NULL ;
963 
964     if (loop->next != NULL)
965       UQ = cloog_domain_union(UQ, cloog_domain_copy(loop->domain));
966     else
967       cloog_domain_free(UQ);
968 
969     cloog_loop_free_parts(res,1,0,0,1) ;
970 
971     res = temp ;
972   }
973   cloog_loop_free_parts(old,1,0,0,1) ;
974 
975   return(res) ;
976 }
977 
978 
bounding_domain(CloogDomain * dom,CloogOptions * options)979 static CloogDomain *bounding_domain(CloogDomain *dom, CloogOptions *options)
980 {
981     if (options->sh)
982 	return cloog_domain_simple_convex(dom);
983     else
984 	return cloog_domain_convex(dom);
985 }
986 
987 
988 /**
989  * cloog_loop_merge function:
990  * This function is the 'soft' version of loop_separate if we are looking for
991  * a code much simpler (and less efficicient).  This function returns the new
992  * CloogLoop list.
993  * - October 29th 2001: first version.
994  * - July 3rd->11th 2003: memory leaks hunt and correction.
995  * - June      22nd 2005: Adaptation for GMP.
996  */
cloog_loop_merge(CloogLoop * loop,int level,CloogOptions * options)997 CloogLoop *cloog_loop_merge(CloogLoop *loop, int level, CloogOptions *options)
998 {
999     CloogLoop *res, *new_inner, *old;
1000     CloogDomain *new_domain, *temp;
1001 
1002     if (loop == NULL)
1003 	return loop;
1004 
1005     if (loop->next == NULL && cloog_domain_isconvex(loop->domain))
1006 	return loop;
1007 
1008     old = loop;
1009     temp = loop->domain;
1010     loop->domain = NULL;
1011     new_inner = loop->inner;
1012 
1013     for (loop = loop->next; loop; loop = loop->next) {
1014 	temp = cloog_domain_union(temp, loop->domain);
1015 	loop->domain = NULL;
1016 	new_inner = cloog_loop_concat(new_inner, loop->inner);
1017     }
1018 
1019     new_domain = bounding_domain(temp, options);
1020 
1021     if (level > 0 && !cloog_domain_is_bounded(new_domain, level) &&
1022 	    	     cloog_domain_is_bounded(temp, level)) {
1023 	CloogDomain *splitter, *t2;
1024 
1025 	cloog_domain_free(new_domain);
1026 	splitter = cloog_domain_bound_splitter(temp, level);
1027 
1028 	res = NULL;
1029 	while (!cloog_domain_isconvex(splitter)) {
1030 	    CloogDomain *first, *rest;
1031 	    first = cloog_domain_cut_first(splitter, &rest);
1032 	    splitter = rest;
1033 	    t2 = cloog_domain_intersection(first, temp);
1034 	    cloog_domain_free(first);
1035 
1036 	    new_domain = bounding_domain(t2, options);
1037 	    cloog_domain_free(t2);
1038 
1039 	    if (cloog_domain_isempty(new_domain)) {
1040 		cloog_domain_free(new_domain);
1041 		continue;
1042 	    }
1043 	    res = cloog_loop_alloc(old->state, new_domain, 0, NULL,
1044 				   NULL, cloog_loop_copy(new_inner), res);
1045 	}
1046 
1047 	t2 = cloog_domain_intersection(splitter, temp);
1048 	cloog_domain_free(splitter);
1049 
1050 	new_domain = bounding_domain(t2, options);
1051 	cloog_domain_free(t2);
1052 
1053 	if (cloog_domain_isempty(new_domain)) {
1054 	    cloog_domain_free(new_domain);
1055 	    cloog_loop_free(new_inner);
1056 	} else
1057 	    res = cloog_loop_alloc(old->state, new_domain, 0, NULL,
1058 				   NULL, new_inner, res);
1059     } else {
1060 	res = cloog_loop_alloc(old->state, new_domain, 0, NULL,
1061 			       NULL, new_inner, NULL);
1062     }
1063     cloog_domain_free(temp);
1064 
1065     cloog_loop_free_parts(old, 0, 0, 0, 1);
1066 
1067     return res;
1068 }
1069 
1070 
cloog_loop_count(CloogLoop * loop)1071 static int cloog_loop_count(CloogLoop *loop)
1072 {
1073     int nb_loops;
1074 
1075     for (nb_loops = 0; loop; loop = loop->next)
1076 	nb_loops++;
1077 
1078     return  nb_loops;
1079 }
1080 
1081 
1082 /**
1083  * cloog_loop_sort function:
1084  * Adaptation from LoopGen 0.4 by F. Quillere. This function sorts a list of
1085  * parameterized disjoint polyhedra, in order to not have lexicographic order
1086  * violation (see Quillere paper).
1087  * - September 16th 2005: inclusion of cloog_loop_number (October 29th 2001).
1088  */
cloog_loop_sort(CloogLoop * loop,int level)1089 CloogLoop *cloog_loop_sort(CloogLoop *loop, int level)
1090 {
1091   CloogLoop *res, *now, **loop_array;
1092   CloogDomain **doms;
1093   int i, nb_loops=0, * permut ;
1094 
1095   /* There is no need to sort the parameter domains. */
1096   if (!level)
1097     return loop;
1098 
1099   /* We will need to know how many loops are in the list. */
1100   nb_loops = cloog_loop_count(loop);
1101 
1102   /* If there is only one loop, it's the end. */
1103   if (nb_loops == 1)
1104   return(loop) ;
1105 
1106   /* We have to allocate memory for some useful components:
1107    * - loop_array: the loop array,
1108    * - doms: the array of domains to sort,
1109    * - permut: will give us a possible sort (maybe not the only one).
1110    */
1111   loop_array = (CloogLoop **)malloc(nb_loops*sizeof(CloogLoop *)) ;
1112   doms = (CloogDomain **)malloc(nb_loops*sizeof(CloogDomain *));
1113   permut = (int *)malloc(nb_loops*sizeof(int)) ;
1114 
1115   /* We fill up the loop and domain arrays. */
1116   for (i=0;i<nb_loops;i++,loop=loop->next)
1117   { loop_array[i] = loop ;
1118     doms[i] = loop_array[i]->domain;
1119   }
1120 
1121   /* cloog_domain_sort will fill up permut. */
1122   cloog_domain_sort(doms, nb_loops, level, permut);
1123 
1124   /* With permut and loop_array we build the sorted list. */
1125   res = NULL ;
1126   for (i=0;i<nb_loops;i++)
1127   { /* To avoid pointer looping... loop_add will rebuild the list. */
1128     loop_array[permut[i]-1]->next = NULL ;
1129     cloog_loop_add(&res,&now,loop_array[permut[i]-1]) ;
1130   }
1131 
1132   free(permut) ;
1133   free(doms);
1134   free(loop_array) ;
1135 
1136   return res;
1137 }
1138 
1139 
1140 /**
1141  * cloog_loop_nest function:
1142  * This function changes the loop list in such a way that we have no more than
1143  * one dimension added by level. It returns an equivalent loop list with
1144  * this property.
1145  * - October 29th 2001: first version.
1146  * - July 3rd->11th 2003: memory leaks hunt and correction.
1147  * - June      22nd 2005: Adaptation for GMP.
1148  * - November  21th 2005: (debug) now OK when cloog_loop_restrict returns NULL.
1149  */
cloog_loop_nest(CloogLoop * loop,CloogDomain * context,int level)1150 CloogLoop *cloog_loop_nest(CloogLoop *loop, CloogDomain *context, int level)
1151 { int l ;
1152   CloogLoop * p, * temp, * res, * now, * next ;
1153   CloogDomain * new_domain ;
1154 
1155   loop = cloog_loop_disjoint(loop);
1156 
1157   res = NULL ;
1158   /* Each domain is changed by its intersection with the context. */
1159   while (loop != NULL)
1160   { p = cloog_loop_restrict(loop, context);
1161     next = loop->next ;
1162 
1163     if (p != NULL)
1164     { cloog_loop_free_parts(loop,1,0,0,0) ;
1165 
1166       temp = cloog_loop_alloc(p->state, p->domain, 0, NULL,
1167 			      p->block, p->inner, NULL);
1168 
1169       /* If the intersection dimension is too big, we make projections smaller
1170        * and smaller, and each projection includes the preceding projection
1171        * (thus, in the target list, dimensions are added one by one).
1172        */
1173       if (cloog_domain_dimension(p->domain) >= level)
1174 	for (l = cloog_domain_dimension(p->domain); l >= level; l--) {
1175 	  new_domain = cloog_domain_project(p->domain, l);
1176 	  temp = cloog_loop_alloc(p->state, new_domain, 0, NULL,
1177 				  NULL, temp, NULL);
1178 	}
1179 
1180        /* p is no more useful (but its content yes !). */
1181       cloog_loop_free_parts(p,0,0,0,0) ;
1182 
1183       cloog_loop_add(&res,&now,temp) ;
1184     }
1185     else
1186     cloog_loop_free_parts(loop,1,1,1,0) ;
1187 
1188     loop = next ;
1189   }
1190 
1191   return(res) ;
1192 }
1193 
1194 
1195 /* Check if the domains of the inner loops impose a stride constraint
1196  * on the given level.
1197  * The core of the search is implemented in cloog_domain_list_stride.
1198  * Here, we simply construct a list of domains to pass to this function
1199  * and if a stride is found, we adjust the lower bounds by calling
1200  * cloog_domain_stride_lower_bound.
1201  */
cloog_loop_variable_offset_stride(CloogLoop * loop,int level)1202 static int cloog_loop_variable_offset_stride(CloogLoop *loop, int level)
1203 {
1204     CloogDomainList *list = NULL;
1205     CloogLoop *inner;
1206     CloogStride *stride;
1207 
1208     for (inner = loop->inner; inner; inner = inner->next) {
1209 	CloogDomainList *entry = ALLOC(CloogDomainList);
1210 	entry->domain = cloog_domain_copy(inner->domain);
1211 	entry->next = list;
1212 	list = entry;
1213     }
1214 
1215     stride = cloog_domain_list_stride(list, level);
1216 
1217     cloog_domain_list_free(list);
1218 
1219     if (!stride)
1220 	return 0;
1221 
1222     loop->stride = stride;
1223     loop->domain = cloog_domain_stride_lower_bound(loop->domain, level, stride);
1224 
1225     return 1;
1226 }
1227 
1228 
1229 /**
1230  * cloog_loop_stride function:
1231  * This function will find the stride of a loop for the iterator at the column
1232  * number 'level' in the constraint matrix. It will update the lower bound of
1233  * the iterator accordingly. Basically, the function will try to find in the
1234  * inner loops a common condition on this iterator for the inner loop iterators
1235  * to be integral. For instance, let us consider a loop with the iterator i,
1236  * the iteration domain -4<=i<=n, and its two inner loops with the iterator j.
1237  * The first inner loop has the constraint 3j=i, and the second one has the
1238  * constraint 6j=i. Then the common constraint on i for j to be integral is
1239  * i%3=0, the stride for i is 3. Lastly, we have to find the new lower bound
1240  * for i: the first value satisfying the common constraint: -3. At the end, the
1241  * iteration domain for i is -3<=i<=n and the stride for i is 3.
1242  *
1243  * The algorithm implemented in this function only allows for strides
1244  * on loops with a lower bound that has a constant remainder on division
1245  * by the stride.  Before initiating this procedure, we first check
1246  * if we can find a stride with a lower bound with a variable offset in
1247  * cloog_loop_variable_offset_stride.
1248  *
1249  * - loop is the loop including the iteration domain of the considered iterator,
1250  * - level is the column number of the iterator in the matrix of contraints.
1251  **
1252  * - June 29th 2003: first version (work in progress since June 26th 2003).
1253  * - July 14th 2003: simpler version.
1254  * - June 22nd 2005: Adaptation for GMP (from S. Verdoolaege's 0.12.1 version).
1255  */
cloog_loop_stride(CloogLoop * loop,int level)1256 void cloog_loop_stride(CloogLoop * loop, int level)
1257 { int first_search ;
1258   cloog_int_t stride, ref_offset, offset, potential;
1259   CloogLoop * inner ;
1260 
1261   if (!cloog_domain_can_stride(loop->domain, level))
1262     return;
1263 
1264   if (cloog_loop_variable_offset_stride(loop, level))
1265     return;
1266 
1267   cloog_int_init(stride);
1268   cloog_int_init(ref_offset);
1269   cloog_int_init(offset);
1270   cloog_int_init(potential);
1271 
1272   cloog_int_set_si(ref_offset, 0);
1273   cloog_int_set_si(offset, 0);
1274 
1275   /* Default stride. */
1276   cloog_int_set_si(stride, 1);
1277   first_search = 1 ;
1278   inner = loop->inner ;
1279 
1280   while (inner != NULL)
1281   { /* If the minimun stride has not been found yet, find the stride. */
1282     if ((first_search) || (!cloog_int_is_one(stride)))
1283     {
1284       cloog_domain_stride(inner->domain, level, &potential, &offset);
1285       if (!cloog_int_is_one(potential) && (!first_search))
1286       { /* Offsets must be the same for common stride. */
1287 	cloog_int_gcd(stride, potential, stride);
1288 	if (!cloog_int_is_zero(stride)) {
1289 	    cloog_int_fdiv_r(offset, offset, stride);
1290 	    cloog_int_fdiv_r(ref_offset, ref_offset, stride);
1291 	}
1292         if (cloog_int_ne(offset,ref_offset))
1293 	    cloog_int_set_si(stride, 1);
1294       }
1295       else {
1296         cloog_int_set(stride, potential);
1297         cloog_int_set(ref_offset, offset);
1298       }
1299 
1300       first_search = 0 ;
1301     }
1302 
1303     inner = inner->next ;
1304   }
1305 
1306   if (cloog_int_is_zero(stride))
1307     cloog_int_set_si(stride, 1);
1308 
1309   /* Update the values if necessary. */
1310   if (!cloog_int_is_one(stride))
1311   { /* Update the stride value. */
1312     if (!cloog_int_is_zero(offset))
1313       cloog_int_sub(offset, stride, offset);
1314     loop->stride = cloog_stride_alloc(stride, offset);
1315     loop->domain = cloog_domain_stride_lower_bound(loop->domain, level,
1316 						    loop->stride);
1317   }
1318 
1319   cloog_int_clear(stride);
1320   cloog_int_clear(ref_offset);
1321   cloog_int_clear(offset);
1322   cloog_int_clear(potential);
1323 }
1324 
1325 
cloog_loop_otl(CloogLoop * loop,int level)1326 void cloog_loop_otl(CloogLoop *loop, int level)
1327 {
1328     if (cloog_domain_is_otl(loop->domain, level))
1329 	loop->otl = 1;
1330 }
1331 
1332 
1333 /**
1334  * cloog_loop_stop function:
1335  * This function implements the 'stop' option : each domain of each loop
1336  * in the list 'loop' is replaced by 'context'. 'context' should be the
1337  * domain of the outer loop. By using this method, there are no more dimensions
1338  * to scan and the simplification step will automaticaly remove the domains
1339  * since they are the same as the corresponding contexts. The effect of this
1340  * function is to stop the code generation at the level this function is called,
1341  * the resulting code do not consider the next dimensions.
1342  * - January 11th 2005: first version.
1343  */
cloog_loop_stop(CloogLoop * loop,CloogDomain * context)1344 CloogLoop * cloog_loop_stop(CloogLoop * loop, CloogDomain * context)
1345 { if (loop == NULL)
1346   return NULL ;
1347   else
1348   { cloog_domain_free(loop->domain) ;
1349     loop->domain = cloog_domain_copy(context) ;
1350     loop->next = cloog_loop_stop(loop->next, context) ;
1351   }
1352 
1353   return loop ;
1354 }
1355 
1356 
level_is_constant(int level,int scalar,int * scaldims,int nb_scattdims)1357 static int level_is_constant(int level, int scalar, int *scaldims, int nb_scattdims)
1358 {
1359   return level && (level+scalar <= nb_scattdims) && (scaldims[level+scalar-1]);
1360 }
1361 
1362 
1363 /**
1364  * Compare the constant dimensions of loops 'l1' and 'l2' starting at 'scalar'
1365  * and return -1 if the vector of constant dimensions of 'l1' is smaller
1366  * than that of 'l2', 0 if they are the same and +1 if that of 'l1' is
1367  * greater than that of 'l2'.
1368  * This function should be called on the innermost loop (the loop
1369  * containing a block).
1370  * \param l1 Loop to be compared with l2.
1371  * \param l2 Loop to be compared with l1.
1372  * \param level Current non-scalar dimension.
1373  * \param scaldims Boolean array saying whether a dimension is scalar or not.
1374  * \param nb_scattdims Size of the scaldims array.
1375  * \param scalar Current scalar dimension.
1376  * \return -1 if (l1 < l2), 0 if (l1 == l2) and +1 if (l1 > l2)
1377  */
cloog_loop_constant_cmp(CloogLoop * l1,CloogLoop * l2,int level,int * scaldims,int nb_scattdims,int scalar)1378 int cloog_loop_constant_cmp(CloogLoop *l1, CloogLoop *l2, int level,
1379 	int *scaldims, int nb_scattdims, int scalar)
1380 {
1381   CloogBlock *b1, *b2;
1382   b1 = l1->block;
1383   b2 = l2->block;
1384   while (level_is_constant(level, scalar, scaldims, nb_scattdims)) {
1385     int cmp = cloog_int_cmp(b1->scaldims[scalar], b2->scaldims[scalar]);
1386     if (cmp)
1387 	return cmp;
1388     scalar++;
1389   }
1390   return 0;
1391 }
1392 
1393 
1394 /**
1395  * cloog_loop_scalar_gt function:
1396  * This function returns 1 if loop 'l1' is greater than loop 'l2' for the
1397  * scalar dimension vector that begins at dimension 'scalar', 0 otherwise. What
1398  * we want to know is whether a loop is scheduled before another one or not.
1399  * This function solves the problem when the considered dimension for scheduling
1400  * is a scalar dimension. Since there may be a succession of scalar dimensions,
1401  * this function will reason about the vector of scalar dimension that begins
1402  * at dimension 'level+scalar' and finish to the first non-scalar dimension.
1403  * \param l1 Loop to be compared with l2.
1404  * \param l2 Loop to be compared with l1.
1405  * \param level Current non-scalar dimension.
1406  * \param scaldims Boolean array saying whether a dimension is scalar or not.
1407  * \param nb_scattdims Size of the scaldims array.
1408  * \param scalar Current scalar dimension.
1409  * \return 1 if (l1 > l2), 0 otherwise.
1410  **
1411  * - September 9th 2005: first version.
1412  * - October  15nd 2007: now "greater than" instead of "greater or equal".
1413  */
cloog_loop_scalar_gt(l1,l2,level,scaldims,nb_scattdims,scalar)1414 int cloog_loop_scalar_gt(l1, l2, level, scaldims, nb_scattdims, scalar)
1415 CloogLoop * l1, * l2 ;
1416 int level, * scaldims, nb_scattdims, scalar ;
1417 {
1418   return cloog_loop_constant_cmp(l1, l2, level, scaldims, nb_scattdims, scalar) > 0;
1419 }
1420 
1421 
1422 /**
1423  * cloog_loop_scalar_eq function:
1424  * This function returns 1 if loop 'l1' is equal to loop 'l2' for the scalar
1425  * dimension vector that begins at dimension 'scalar', 0 otherwise. What we want
1426  * to know is whether two loops are scheduled for the same time or not.
1427  * This function solves the problem when the considered dimension for scheduling
1428  * is a scalar dimension. Since there may be a succession of scalar dimensions,
1429  * this function will reason about the vector of scalar dimension that begins
1430  * at dimension 'level+scalar' and finish to the first non-scalar dimension.
1431  * - l1 and l2 are the loops to compare,
1432  * - level is the current non-scalar dimension,
1433  * - scaldims is the boolean array saying whether a dimension is scalar or not,
1434  * - nb_scattdims is the size of the scaldims array,
1435  * - scalar is the current scalar dimension.
1436  **
1437  * - September 9th 2005 : first version.
1438  */
cloog_loop_scalar_eq(l1,l2,level,scaldims,nb_scattdims,scalar)1439 int cloog_loop_scalar_eq(l1, l2, level, scaldims, nb_scattdims, scalar)
1440 CloogLoop * l1, * l2 ;
1441 int level, * scaldims, nb_scattdims, scalar ;
1442 {
1443   return cloog_loop_constant_cmp(l1, l2, level, scaldims, nb_scattdims, scalar) == 0;
1444 }
1445 
1446 
1447 /**
1448  * cloog_loop_scalar_sort function:
1449  * This function sorts a linked list of loops (loop) with respect to the
1450  * scalar dimension vector that begins at dimension 'scalar'. Since there may
1451  * be a succession of scalar dimensions, this function will reason about the
1452  * vector of scalar dimension that begins at dimension 'level+scalar' and
1453  * finish to the first non-scalar dimension.
1454  * \param loop Loop list to sort.
1455  * \param level Current non-scalar dimension.
1456  * \param scaldims Boolean array saying whether a dimension is scalar or not.
1457  * \param nb_scattdims Size of the scaldims array.
1458  * \param scalar Current scalar dimension.
1459  * \return A pointer to the sorted list.
1460  **
1461  * - July      2nd 2005: first developments.
1462  * - September 2nd 2005: first version.
1463  * - October  15nd 2007: complete rewrite to remove bugs, now a bubble sort.
1464  */
cloog_loop_scalar_sort(loop,level,scaldims,nb_scattdims,scalar)1465 CloogLoop * cloog_loop_scalar_sort(loop, level, scaldims, nb_scattdims, scalar)
1466 CloogLoop * loop ;
1467 int level, * scaldims, nb_scattdims, scalar ;
1468 { int ok ;
1469   CloogLoop **current;
1470 
1471   do {
1472     ok = 1;
1473     for (current = &loop; (*current)->next; current = &(*current)->next) {
1474       CloogLoop *next = (*current)->next;
1475       if (cloog_loop_scalar_gt(*current,next,level,scaldims,nb_scattdims,scalar)) {
1476         ok = 0;
1477 	(*current)->next = next->next;
1478 	next->next = *current;
1479 	*current = next;
1480       }
1481     }
1482   } while (!ok);
1483 
1484   return loop ;
1485 }
1486 
1487 
1488 /**
1489  * cloog_loop_generate_backtrack function:
1490  * adaptation from LoopGen 0.4 by F. Quillere. This function implements the
1491  * backtrack of the Quillere et al. algorithm (see the Quillere paper).
1492  * It eliminates unused iterations of the current level for the new one. See the
1493  * example called linearity-1-1 example with and without this part for an idea.
1494  * - October 26th 2001: first version in cloog_loop_generate_general.
1495  * - July    31th 2002: (debug) no more parasite loops (REALLY hard !).
1496  * - October 30th 2005: extraction from cloog_loop_generate_general.
1497  */
cloog_loop_generate_backtrack(CloogLoop * loop,int level,CloogOptions * options)1498 CloogLoop *cloog_loop_generate_backtrack(CloogLoop *loop,
1499 	int level, CloogOptions *options)
1500 {
1501   CloogDomain * domain ;
1502   CloogLoop * now, * now2, * next, * next2, * end, * temp, * l, * inner,
1503             * new_loop ;
1504 
1505   temp = loop ;
1506   loop = NULL ;
1507 
1508   while (temp != NULL)
1509   { l = NULL ;
1510     inner = temp->inner ;
1511 
1512     while (inner != NULL)
1513     { next = inner->next ;
1514       /* This 'if' and its first part is the debug of july 31th 2002. */
1515       if (inner->block != NULL) {
1516         end = cloog_loop_alloc(temp->state, inner->domain, 0, NULL,
1517 			       inner->block, NULL, NULL);
1518         domain = cloog_domain_copy(temp->domain) ;
1519         new_loop = cloog_loop_alloc(temp->state, domain, 0, NULL,
1520 				    NULL, end, NULL);
1521       }
1522       else
1523       new_loop = cloog_loop_project(inner, level);
1524 
1525       cloog_loop_free_parts(inner,0,0,0,0) ;
1526       cloog_loop_add(&l,&now2,new_loop) ;
1527       inner = next ;
1528     }
1529 
1530     temp->inner = NULL ;
1531 
1532     if (l != NULL)
1533     { l = cloog_loop_separate(l) ;
1534       l = cloog_loop_sort(l, level);
1535       while (l != NULL) {
1536 	l->stride = cloog_stride_copy(l->stride);
1537         cloog_loop_add(&loop,&now,l) ;
1538         l = l->next ;
1539       }
1540     }
1541     next2 = temp->next ;
1542     cloog_loop_free_parts(temp,1,0,0,0) ;
1543     temp = next2 ;
1544   }
1545 
1546   return loop ;
1547 }
1548 
1549 
1550 /**
1551  * Return 1 if we need to continue recursing to the specified level.
1552  */
cloog_loop_more(CloogLoop * loop,int level,int scalar,int nb_scattdims)1553 int cloog_loop_more(CloogLoop *loop, int level, int scalar, int nb_scattdims)
1554 {
1555   return level + scalar <= nb_scattdims ||
1556 	 cloog_domain_dimension(loop->domain) >= level;
1557 }
1558 
1559 /**
1560  * Return 1 if the domains of all loops in the given linked list
1561  * have a fixed value at the given level.
1562  * In principle, there would be no need to check that the fixed value is
1563  * the same for each of these loops because this function is only
1564  * called on a component.  However, not all backends perform a proper
1565  * decomposition into components.
1566  */
cloog_loop_is_constant(CloogLoop * loop,int level)1567 int cloog_loop_is_constant(CloogLoop *loop, int level)
1568 {
1569     cloog_int_t c1, c2;
1570     int r = 1;
1571 
1572     cloog_int_init(c1);
1573     cloog_int_init(c2);
1574 
1575     if (!cloog_domain_lazy_isconstant(loop->domain, level - 1, &c1))
1576 	r = 0;
1577 
1578     for (loop = loop->next; r && loop; loop = loop->next) {
1579 	if (!cloog_domain_lazy_isconstant(loop->domain, level - 1, &c2))
1580 	    r = 0;
1581 	else if (cloog_int_ne(c1, c2))
1582 	    r = 0;
1583     }
1584 
1585     cloog_int_clear(c1);
1586     cloog_int_clear(c2);
1587 
1588     return r;
1589 }
1590 
1591 /**
1592  * Assuming all domains in the given linked list of loop
1593  * have a fixed values at level, return a single loop with
1594  * a domain corresponding to this fixed value and with as
1595  * list of inner loops the concatenation of all inner loops
1596  * in the original list.
1597  */
cloog_loop_constant(CloogLoop * loop,int level)1598 CloogLoop *cloog_loop_constant(CloogLoop *loop, int level)
1599 {
1600     CloogLoop *res, *inner, *tmp;
1601     CloogDomain *domain, *t;
1602 
1603     if (!loop)
1604 	return loop;
1605 
1606     inner = loop->inner;
1607     domain = loop->domain;
1608     for (tmp = loop->next; tmp; tmp = tmp->next) {
1609 	inner = cloog_loop_concat(inner, tmp->inner);
1610 	domain = cloog_domain_union(domain, tmp->domain);
1611     }
1612 
1613     domain = cloog_domain_simple_convex(t = domain);
1614     cloog_domain_free(t);
1615 
1616     res = cloog_loop_alloc(loop->state, domain, 0, NULL, NULL, inner, NULL);
1617 
1618     cloog_loop_free_parts(loop, 0, 0, 0, 1);
1619 
1620     return res;
1621 }
1622 
1623 
1624 /* Unroll the given loop at the given level, provided it is allowed
1625  * by cloog_domain_can_unroll.
1626  * If so, we return a list of loops, one for each iteration of the original
1627  * loop.  Otherwise, we simply return the original loop.
1628  */
loop_unroll(CloogLoop * loop,int level)1629 static CloogLoop *loop_unroll(CloogLoop *loop, int level)
1630 {
1631     int can_unroll;
1632     cloog_int_t i;
1633     cloog_int_t n;
1634     CloogConstraint *lb;
1635     CloogLoop *res = NULL;
1636     CloogLoop **next_res = &res;
1637     CloogDomain *domain;
1638     CloogLoop *inner;
1639 
1640     cloog_int_init(n);
1641     can_unroll = cloog_domain_can_unroll(loop->domain, level, &n, &lb);
1642     if (!can_unroll) {
1643 	cloog_int_clear(n);
1644 	return loop;
1645     }
1646 
1647     cloog_int_init(i);
1648 
1649     for (cloog_int_set_si(i, 0); cloog_int_lt(i, n); cloog_int_add_ui(i, i, 1)) {
1650 	domain = cloog_domain_copy(loop->domain);
1651 	domain = cloog_domain_fixed_offset(domain, level, lb, i);
1652 	inner = cloog_loop_copy(loop->inner);
1653 	inner = cloog_loop_restrict_all(inner, domain);
1654 	if (!inner) {
1655 	    cloog_domain_free(domain);
1656 	    continue;
1657 	}
1658 	*next_res = cloog_loop_alloc(loop->state, domain, 1, NULL, NULL,
1659 					inner, NULL);
1660 	next_res = &(*next_res)->next;
1661     }
1662 
1663     cloog_int_clear(i);
1664     cloog_int_clear(n);
1665     cloog_constraint_release(lb);
1666 
1667     cloog_loop_free(loop);
1668 
1669     return res;
1670 }
1671 
1672 
1673 /* Unroll all loops in the given list at the given level, provided
1674  * they can be unrolled.
1675  */
cloog_loop_unroll(CloogLoop * loop,int level)1676 CloogLoop *cloog_loop_unroll(CloogLoop *loop, int level)
1677 {
1678     CloogLoop *now, *next;
1679     CloogLoop *res = NULL;
1680     CloogLoop **next_res = &res;
1681 
1682     for (now = loop; now; now = next) {
1683 	next = now->next;
1684 	now->next = NULL;
1685 
1686 	*next_res = loop_unroll(now, level);
1687 
1688 	while (*next_res)
1689 	    next_res = &(*next_res)->next;
1690     }
1691 
1692     return res;
1693 }
1694 
1695 CloogLoop *cloog_loop_generate_restricted_or_stop(CloogLoop *loop,
1696 	CloogDomain *context,
1697 	int level, int scalar, int *scaldims, int nb_scattdims,
1698 	CloogOptions *options);
1699 
1700 CloogLoop *cloog_loop_recurse(CloogLoop *loop,
1701 	int level, int scalar, int *scaldims, int nb_scattdims,
1702 	int constant, CloogOptions *options);
1703 
1704 
1705 /**
1706  * Recurse on the inner loops of the given single loop.
1707  *
1708  * - loop is the loop for which we have to generate scanning code,
1709  * - level is the current non-scalar dimension,
1710  * - scalar is the current scalar dimension,
1711  * - scaldims is the boolean array saying whether a dimension is scalar or not,
1712  * - nb_scattdims is the size of the scaldims array,
1713  * - constant is true if the loop is known to be executed at most once
1714  * - options are the general code generation options.
1715  */
loop_recurse(CloogLoop * loop,int level,int scalar,int * scaldims,int nb_scattdims,int constant,CloogOptions * options)1716 static CloogLoop *loop_recurse(CloogLoop *loop,
1717 	int level, int scalar, int *scaldims, int nb_scattdims,
1718 	int constant, CloogOptions *options)
1719 {
1720     CloogLoop *inner, *into, *end, *next, *l, *now;
1721     CloogDomain *domain;
1722 
1723     if (level && options->strides && !constant)
1724       cloog_loop_stride(loop, level);
1725 
1726     if (!constant &&
1727 	options->first_unroll >= 0 && level + scalar >= options->first_unroll) {
1728 	loop = cloog_loop_unroll(loop, level);
1729 	if (loop->next)
1730 	    return cloog_loop_recurse(loop, level, scalar, scaldims,
1731 					nb_scattdims, 1, options);
1732     }
1733 
1734     if (level && options->otl)
1735       cloog_loop_otl(loop, level);
1736     inner = loop->inner;
1737     domain = cloog_domain_copy(loop->domain);
1738     domain = cloog_domain_add_stride_constraint(domain, loop->stride);
1739     into = NULL ;
1740     while (inner != NULL)
1741     { /* 4b. -ced- recurse for each sub-list of non terminal loops. */
1742       if (cloog_loop_more(inner, level + 1, scalar, nb_scattdims)) {
1743 	end = inner;
1744         while ((end->next != NULL) &&
1745                cloog_loop_more(end->next, level + 1, scalar, nb_scattdims))
1746         end = end->next ;
1747 
1748 	next = end->next ;
1749         end->next = NULL ;
1750 
1751         l = cloog_loop_generate_restricted_or_stop(inner, domain,
1752 			level + 1, scalar, scaldims, nb_scattdims, options);
1753 
1754 	if (l != NULL)
1755         cloog_loop_add_list(&into,&now,l) ;
1756 
1757         inner = next ;
1758       }
1759       else
1760       { cloog_loop_add(&into,&now,inner) ;
1761         inner = inner->next ;
1762       }
1763     }
1764 
1765     cloog_domain_free(domain);
1766     loop->inner = into;
1767     return loop;
1768 }
1769 
1770 
1771 /**
1772  * Recurse on the inner loops of each of the loops in the loop list.
1773  *
1774  * - loop is the loop list for which we have to generate scanning code,
1775  * - level is the current non-scalar dimension,
1776  * - scalar is the current scalar dimension,
1777  * - scaldims is the boolean array saying whether a dimension is scalar or not,
1778  * - nb_scattdims is the size of the scaldims array,
1779  * - constant is true if the loop is known to be executed at most once
1780  * - options are the general code generation options.
1781  */
cloog_loop_recurse(CloogLoop * loop,int level,int scalar,int * scaldims,int nb_scattdims,int constant,CloogOptions * options)1782 CloogLoop *cloog_loop_recurse(CloogLoop *loop,
1783 	int level, int scalar, int *scaldims, int nb_scattdims,
1784 	int constant, CloogOptions *options)
1785 {
1786     CloogLoop *now, *next;
1787     CloogLoop *res = NULL;
1788     CloogLoop **next_res = &res;
1789 
1790     for (now = loop; now; now = next) {
1791 	next = now->next;
1792 	now->next = NULL;
1793 
1794 	*next_res = loop_recurse(now, level, scalar, scaldims, nb_scattdims,
1795 				 constant, options);
1796 
1797 	while (*next_res)
1798 	    next_res = &(*next_res)->next;
1799     }
1800 
1801     return res;
1802 }
1803 
1804 
1805 /* Get the max across all 'first' depths for statements in this
1806  * stmt list, and the max across all 'last' depths */
cloog_statement_get_fl(CloogStatement * s,int * f,int * l,CloogOptions * options)1807 void cloog_statement_get_fl(CloogStatement *s, int *f, int *l,
1808         CloogOptions *options)
1809 {
1810     if (s == NULL) return;
1811 
1812     int fs, ls;
1813 
1814     if (options->fs != NULL && options->ls != NULL) {
1815         fs = options->fs[s->number-1];
1816         ls = options->ls[s->number-1];
1817         *f = (fs > *f)? fs: *f;
1818         *l = (ls > *l)? ls: *l;
1819     }else{
1820         *f = -1;
1821         *l = -1;
1822     }
1823 
1824     cloog_statement_get_fl(s->next, f, l, options);
1825 }
1826 
1827 /* Get the max across all 'first' depths for statements under
1828  * this loop, and the max across all 'last' depths */
cloog_loop_get_fl(CloogLoop * loop,int * f,int * l,CloogOptions * options)1829 void cloog_loop_get_fl(CloogLoop *loop, int *f, int *l,
1830         CloogOptions *options)
1831 {
1832     if (loop == NULL)   return;
1833 
1834     CloogBlock *block = loop->block;
1835 
1836     if (block != NULL && block->statement != NULL) {
1837         cloog_statement_get_fl(block->statement, f, l, options);
1838     }
1839 
1840     cloog_loop_get_fl(loop->inner, f, l, options);
1841     cloog_loop_get_fl(loop->next, f, l, options);
1842 }
1843 
1844 /**
1845  * cloog_loop_generate_general function:
1846  * Adaptation from LoopGen 0.4 by F. Quillere. This function implements the
1847  * Quillere algorithm for polyhedron scanning from step 3 to 5.
1848  * (see the Quillere paper).
1849  * - loop is the loop for which we have to generate a scanning code,
1850  * - level is the current non-scalar dimension,
1851  * - scalar is the current scalar dimension,
1852  * - scaldims is the boolean array saying whether a dimension is scalar or not,
1853  * - nb_scattdims is the size of the scaldims array,
1854  * - options are the general code generation options.
1855  **
1856  * - October 26th 2001: first version.
1857  * - July 3rd->11th 2003: memory leaks hunt and correction.
1858  * - June      22nd 2005: Adaptation for GMP.
1859  * - September  2nd 2005: The function have been cutted out in two pieces:
1860  *                        cloog_loop_generate and this one, in order to handle
1861  *                        the scalar dimension case more efficiently with
1862  *                        cloog_loop_generate_scalar.
1863  * - November  15th 2005: (debug) the result of the cloog_loop_generate call may
1864  *                        be a list of polyhedra (especially if stop option is
1865  *                        used): cloog_loop_add_list instead of cloog_loop_add.
1866  * - May 31, 2012: statement-wise first and last depth for loop separation
1867  *   if options->fs and option->ls arrays are set, it will override what has
1868  *   been supplied via "-f/-l"
1869  *
1870  */
cloog_loop_generate_general(CloogLoop * loop,int level,int scalar,int * scaldims,int nb_scattdims,CloogOptions * options)1871 CloogLoop *cloog_loop_generate_general(CloogLoop *loop,
1872 	int level, int scalar, int *scaldims, int nb_scattdims,
1873 	CloogOptions *options)
1874 {
1875   CloogLoop *res, *now, *temp, *l, *new_loop, *next;
1876   int separate = 0;
1877   int constant = 0;
1878 
1879     int first = -1;
1880     int last = -1;
1881 
1882     now = NULL;
1883 
1884     /* Get the -f and -l for each statement */
1885     cloog_loop_get_fl(loop, &first, &last, options);
1886 
1887     /* If stmt-wise options are not set or set inconsistently, use -f/-l ones globally */
1888     if (first <= 0 || last < first) {
1889         first = options->f;
1890         last = options->l;
1891     }
1892 
1893   /* 3. Separate all projections into disjoint polyhedra. */
1894   if (level > 0 && cloog_loop_is_constant(loop, level)) {
1895     res = cloog_loop_constant(loop, level);
1896     constant = 1;
1897     }else if ((first > level+scalar) || (first < 0)) {
1898     res = cloog_loop_merge(loop, level, options);
1899     }else{
1900     res = cloog_loop_separate(loop);
1901     separate = 1;
1902   }
1903 
1904   /* 3b. -correction- sort the loops to determine their textual order. */
1905   res = cloog_loop_sort(res, level);
1906 
1907   res = cloog_loop_restrict_inner(res);
1908 
1909   if (separate)
1910     res = cloog_loop_specialize(res, level, scalar, scaldims, nb_scattdims);
1911 
1912   /* 4. Recurse for each loop with the current domain as context. */
1913   temp = res ;
1914   res = NULL ;
1915     if (!level || (level+scalar < last) || (last < 0))
1916     res = cloog_loop_recurse(temp, level, scalar, scaldims, nb_scattdims,
1917 			     constant, options);
1918   else
1919   while (temp != NULL)
1920   { next = temp->next ;
1921     l = cloog_loop_nest(temp->inner, temp->domain, level+1);
1922     new_loop = cloog_loop_alloc(temp->state, temp->domain, 0, NULL,
1923 				NULL, l, NULL);
1924     temp->inner = NULL ;
1925     temp->next = NULL ;
1926     cloog_loop_free_parts(temp,0,0,0,0) ;
1927     cloog_loop_add(&res,&now,new_loop) ;
1928     temp = next ;
1929   }
1930 
1931   if (options->strides)
1932     res = cloog_loop_propagate_lower_bound(res, level);
1933 
1934   /* 5. eliminate unused iterations of the current level for the new one. See
1935    *    the example called linearity-1-1 example with and without this part
1936    *    for an idea.
1937    */
1938   if (options->backtrack && level &&
1939             ((level+scalar < last) || (last < 0)) &&
1940             ((first <= level+scalar) && !(first < 0)))
1941   res = cloog_loop_generate_backtrack(res, level, options);
1942 
1943   /* Pray for my new paper to be accepted somewhere since the following stuff
1944    * is really amazing :-) !
1945    * Far long later: The paper has been accepted to PACT 2004 :-))). But there
1946    * are still some bugs and I have no time to fix them. Thus now you have to
1947    * pray for me to get an academic position for that really amazing stuff :-) !
1948    * Later again: OK, I get my academic position, but still I have not enough
1949    * time to fix and clean this part... Pray again :-) !!!
1950    */
1951   /* res = cloog_loop_unisolate(res,level) ;*/
1952 
1953   return(res) ;
1954 }
1955 
1956 
1957 CloogLoop *cloog_loop_generate_restricted(CloogLoop *loop,
1958 	int level, int scalar, int *scaldims, int nb_scattdims,
1959 	CloogOptions *options);
1960 
1961 
1962 /**
1963  * cloog_loop_generate_scalar function:
1964  * This function applies the simplified code generation scheme in the trivial
1965  * case of scalar dimensions. When dealing with scalar dimensions, there is
1966  * no need of costly polyhedral operations for separation or sorting: sorting
1967  * is a question of comparing scalar vectors and separation amounts to consider
1968  * only loops with the same scalar vector for the next step of the code
1969  * generation process. This function achieves the separation/sorting process
1970  * for the vector of scalar dimension that begins at dimension 'level+scalar'
1971  * and finish to the first non-scalar dimension.
1972  * - loop is the loop for which we have to generate a scanning code,
1973  * - level is the current non-scalar dimension,
1974  * - scalar is the current scalar dimension,
1975  * - scaldims is the boolean array saying whether a dimension is scalar or not,
1976  * - nb_scattdims is the size of the scaldims array,
1977  * - options are the general code generation options.
1978  **
1979  * - September  2nd 2005: First version.
1980  */
cloog_loop_generate_scalar(CloogLoop * loop,int level,int scalar,int * scaldims,int nb_scattdims,CloogOptions * options)1981 CloogLoop *cloog_loop_generate_scalar(CloogLoop *loop,
1982 	int level, int scalar, int *scaldims, int nb_scattdims,
1983 	CloogOptions *options)
1984 { CloogLoop * res, * now, * temp, * l, * end, * next, * ref ;
1985   int scalar_new;
1986 
1987   /* We sort the loop list with respect to the current scalar vector. */
1988   res = cloog_loop_scalar_sort(loop,level,scaldims,nb_scattdims,scalar) ;
1989 
1990   scalar_new = scalar + scaldims[level + scalar - 1];
1991 
1992   temp = res ;
1993   res = NULL ;
1994   while (temp != NULL)
1995   { /* Then we will appy the general code generation process to each sub-list
1996      * of loops with the same scalar vector.
1997      */
1998     end = temp ;
1999     ref = temp ;
2000 
2001     while((end->next != NULL) &&
2002 	 cloog_loop_more(end->next, level, scalar_new, nb_scattdims) &&
2003          cloog_loop_scalar_eq(ref,end->next,level,scaldims,nb_scattdims,scalar))
2004     end = end->next ;
2005 
2006     next = end->next ;
2007     end->next = NULL ;
2008 
2009     /* For the next dimension, scalar value is updated by adding the scalar
2010      * vector size, which is stored at scaldims[level+scalar-1].
2011      */
2012     if (cloog_loop_more(temp, level, scalar_new, nb_scattdims)) {
2013       l = cloog_loop_generate_restricted(temp, level, scalar_new,
2014 				      scaldims, nb_scattdims, options);
2015 
2016       if (l != NULL)
2017 	cloog_loop_add_list(&res, &now, l);
2018     } else
2019       cloog_loop_add(&res, &now, temp);
2020 
2021     temp = next ;
2022   }
2023 
2024   return res ;
2025 }
2026 
2027 
2028 /* Compare loop with the next loop based on their constant dimensions.
2029  * The result is < 0, == 0 or > 0 depending on whether the constant
2030  * dimensions of loop are lexicographically smaller, equal or greater
2031  * than those of loop->next.
2032  * If loop is the last in the list, then it is assumed to be smaller
2033  * than the "next" one.
2034  */
cloog_loop_next_scal_cmp(CloogLoop * loop)2035 static int cloog_loop_next_scal_cmp(CloogLoop *loop)
2036 {
2037     int i;
2038     int nb_scaldims;
2039 
2040     if (!loop->next)
2041 	return -1;
2042 
2043     nb_scaldims = loop->block->nb_scaldims;
2044     if (loop->next->block->nb_scaldims < nb_scaldims)
2045 	nb_scaldims = loop->next->block->nb_scaldims;
2046 
2047     for (i = 0; i < nb_scaldims; ++i) {
2048 	int cmp = cloog_int_cmp(loop->block->scaldims[i],
2049 				loop->next->block->scaldims[i]);
2050 	if (cmp)
2051 	    return cmp;
2052     }
2053     return loop->block->nb_scaldims - loop->next->block->nb_scaldims;
2054 }
2055 
2056 
2057 /* Check whether the globally constant dimensions of a and b
2058  * have the same value for all globally constant dimensions
2059  * that are situated before any (locally) non-constant dimension.
2060  */
cloog_loop_equal_prefix(CloogLoop * a,CloogLoop * b,int * scaldims,int nb_scattdims)2061 static int cloog_loop_equal_prefix(CloogLoop *a, CloogLoop *b,
2062 				    int *scaldims, int nb_scattdims)
2063 {
2064     int i;
2065     int cst = 0;
2066     int dim = 0;
2067 
2068     for (i = 0; i < nb_scattdims; ++i) {
2069 	if (!scaldims[i]) {
2070 	    dim++;
2071 	    continue;
2072 	}
2073 	if (!cloog_int_eq(a->block->scaldims[cst], b->block->scaldims[cst]))
2074 	    break;
2075 	cst++;
2076     }
2077     for (i = i + 1; i < nb_scattdims; ++i) {
2078 	if (scaldims[i])
2079 	    continue;
2080 	if (!cloog_domain_lazy_isconstant(a->domain, dim, NULL))
2081 	    return 0;
2082 	/* No need to check that dim is also constant in b and that the
2083 	 * constant values are equal.  That will happen during the check
2084 	 * whether the two domains are equal.
2085 	 */
2086 	dim++;
2087     }
2088     return 1;
2089 }
2090 
2091 
2092 /* Try to block adjacent loops in the loop list "loop".
2093  * We only attempt blocking if the constant dimensions of the loops
2094  * in the least are (not necessarily strictly) increasing.
2095  * Then we look for a sublist such that the first (begin) has constant
2096  * dimensions strictly larger than the previous loop in the complete
2097  * list and such that the loop (end) after the last loop in the sublist
2098  * has constant dimensions strictly larger than the last loop in the sublist.
2099  * Furthermore, all loops in the sublist should have the same domain
2100  * (with globally constant dimensions removed) and the difference
2101  * (if any) in constant dimensions may only occur after all the
2102  * (locally) constant dimensions.
2103  * If we find such a sublist, then the blocks of all but the first
2104  * are merged into the block of the first.
2105  *
2106  * Note that this function can only be called before the global
2107  * blocklist has been created because it may otherwise modify and destroy
2108  * elements on that list.
2109  */
cloog_loop_block(CloogLoop * loop,int * scaldims,int nb_scattdims)2110 CloogLoop *cloog_loop_block(CloogLoop *loop, int *scaldims, int nb_scattdims)
2111 {
2112     CloogLoop *begin, *end, *l;
2113     int begin_after_previous;
2114     int end_after_previous;
2115 
2116     if (!loop->next)
2117 	return loop;
2118     for (begin = loop; begin; begin = begin->next) {
2119 	if (!begin->block || !begin->block->scaldims)
2120 	    return loop;
2121 	if (cloog_loop_next_scal_cmp(begin) > 0)
2122 	    return loop;
2123     }
2124 
2125     begin_after_previous = 1;
2126     for (begin = loop; begin; begin = begin->next) {
2127 	if (!begin_after_previous) {
2128 	    begin_after_previous = cloog_loop_next_scal_cmp(begin) < 0;
2129 	    continue;
2130 	}
2131 
2132 	end_after_previous = cloog_loop_next_scal_cmp(begin) < 0;
2133 	for (end = begin->next; end; end = end->next) {
2134 	    if (!cloog_loop_equal_prefix(begin, end, scaldims, nb_scattdims))
2135 		break;
2136 	    if (!cloog_domain_lazy_equal(begin->domain, end->domain))
2137 		break;
2138 	    end_after_previous = cloog_loop_next_scal_cmp(end) < 0;
2139 	}
2140 	if (end != begin->next && end_after_previous) {
2141 	    for (l = begin->next; l != end; l = begin->next) {
2142 		cloog_block_merge(begin->block, l->block);
2143 		begin->next = l->next;
2144 		cloog_loop_free_parts(l, 1, 0, 1, 0);
2145 	    }
2146 	}
2147 
2148 	begin_after_previous = cloog_loop_next_scal_cmp(begin) < 0;
2149     }
2150 
2151     return loop;
2152 }
2153 
2154 
2155 /**
2156  * Check whether for any fixed iteration of the outer loops,
2157  * there is an iteration of loop1 that is lexicographically greater
2158  * than an iteration of loop2.
2159  * Return 1 if there exists (or may exist) such a pair.
2160  * Return 0 if all iterations of loop1 are lexicographically smaller
2161  * than the iterations of loop2.
2162  * If no iteration is lexicographically greater, but if there are
2163  * iterations that are equal to iterations of loop2, then return "def".
2164  * This is useful for ensuring that such statements are not reordered.
2165  * Some users, including the test_run target in test, expect
2166  * the statements at a given point to be run in the original order.
2167  * Passing the value "0" for "def" would allow such statements to be reordered
2168  * and would allow for the detection of more components.
2169  */
cloog_loop_follows(CloogLoop * loop1,CloogLoop * loop2,int level,int scalar,int * scaldims,int nb_scattdims,int def)2170 int cloog_loop_follows(CloogLoop *loop1, CloogLoop *loop2,
2171 	int level, int scalar, int *scaldims, int nb_scattdims, int def)
2172 {
2173     int dim1, dim2;
2174 
2175     dim1 = cloog_domain_dimension(loop1->domain);
2176     dim2 = cloog_domain_dimension(loop2->domain);
2177     while ((level <= dim1 && level <= dim2) ||
2178 	   level_is_constant(level, scalar, scaldims, nb_scattdims)) {
2179 	if (level_is_constant(level, scalar, scaldims, nb_scattdims)) {
2180 	    int cmp = cloog_loop_constant_cmp(loop1, loop2, level, scaldims,
2181 					    nb_scattdims, scalar);
2182 	    if (cmp > 0)
2183 		return 1;
2184 	    if (cmp < 0)
2185 		return 0;
2186 	    scalar += scaldims[level + scalar - 1];
2187 	} else {
2188 	    int follows = cloog_domain_follows(loop1->domain, loop2->domain,
2189 						level);
2190 	    if (follows > 0)
2191 		return 1;
2192 	    if (follows < 0)
2193 		return 0;
2194 	    level++;
2195 	}
2196     }
2197 
2198     return def;
2199 }
2200 
2201 
2202 /* Structure for representing the nodes in the graph being traversed
2203  * using Tarjan's algorithm.
2204  * index represents the order in which nodes are visited.
2205  * min_index is the index of the root of a (sub)component.
2206  * on_stack indicates whether the node is currently on the stack.
2207  */
2208 struct cloog_loop_sort_node {
2209     int index;
2210     int min_index;
2211     int on_stack;
2212 };
2213 /* Structure for representing the graph being traversed
2214  * using Tarjan's algorithm.
2215  * len is the number of nodes
2216  * node is an array of nodes
2217  * stack contains the nodes on the path from the root to the current node
2218  * sp is the stack pointer
2219  * index is the index of the last node visited
2220  * order contains the elements of the components separated by -1
2221  * op represents the current position in order
2222  */
2223 struct cloog_loop_sort {
2224     int len;
2225     struct cloog_loop_sort_node *node;
2226     int *stack;
2227     int sp;
2228     int index;
2229     int *order;
2230     int op;
2231 };
2232 
2233 /* Allocate and initialize cloog_loop_sort structure.
2234  */
cloog_loop_sort_alloc(int len)2235 static struct cloog_loop_sort *cloog_loop_sort_alloc(int len)
2236 {
2237     struct cloog_loop_sort *s;
2238     int i;
2239 
2240     s = (struct cloog_loop_sort *)malloc(sizeof(struct cloog_loop_sort));
2241     assert(s);
2242     s->len = len;
2243     s->node = (struct cloog_loop_sort_node *)
2244 			malloc(len * sizeof(struct cloog_loop_sort_node));
2245     assert(s->node);
2246     for (i = 0; i < len; ++i)
2247 	s->node[i].index = -1;
2248     s->stack = (int *)malloc(len * sizeof(int));
2249     assert(s->stack);
2250     s->order = (int *)malloc(2 * len * sizeof(int));
2251     assert(s->order);
2252 
2253     s->sp = 0;
2254     s->index = 0;
2255     s->op = 0;
2256 
2257     return s;
2258 }
2259 
2260 /* Free cloog_loop_sort structure.
2261  */
cloog_loop_sort_free(struct cloog_loop_sort * s)2262 static void cloog_loop_sort_free(struct cloog_loop_sort *s)
2263 {
2264     free(s->node);
2265     free(s->stack);
2266     free(s->order);
2267     free(s);
2268 }
2269 
2270 
2271 /* Check whether for any fixed iteration of the outer loops,
2272  * there is an iteration of loop1 that is lexicographically greater
2273  * than an iteration of loop2, where the iteration domains are
2274  * available in the inner loops of the arguments.
2275  *
2276  * By using this functions to detect components, we ensure that
2277  * two CloogLoops appear in the same component if some iterations of
2278  * each loop should be executed before some iterations of the other loop.
2279  * Since we also want two CloogLoops that have exactly the same
2280  * iteration domain at the current level to be placed in the same component,
2281  * we first check if these domains are indeed the same.
2282  */
inner_loop_follows(CloogLoop * loop1,CloogLoop * loop2,int level,int scalar,int * scaldims,int nb_scattdims,int def)2283 static int inner_loop_follows(CloogLoop *loop1, CloogLoop *loop2,
2284 	int level, int scalar, int *scaldims, int nb_scattdims, int def)
2285 {
2286     int f;
2287 
2288     f = cloog_domain_lazy_equal(loop1->domain, loop2->domain);
2289     if (!f)
2290 	f = cloog_loop_follows(loop1->inner, loop2->inner,
2291 				level, scalar, scaldims, nb_scattdims, def);
2292 
2293     return f;
2294 }
2295 
2296 
2297 /* Perform Tarjan's algorithm for computing the strongly connected components
2298  * in the graph with the individual CloogLoops as vertices.
2299  * Two CloopLoops appear in the same component if they both (indirectly)
2300  * "follow" each other, where the following relation is determined
2301  * by the follows function.
2302  */
cloog_loop_components_tarjan(struct cloog_loop_sort * s,CloogLoop ** loop_array,int i,int level,int scalar,int * scaldims,int nb_scattdims,int (* follows)(CloogLoop * loop1,CloogLoop * loop2,int level,int scalar,int * scaldims,int nb_scattdims,int def))2303 static void cloog_loop_components_tarjan(struct cloog_loop_sort *s,
2304 	CloogLoop **loop_array, int i, int level, int scalar, int *scaldims,
2305 	int nb_scattdims,
2306 	int (*follows)(CloogLoop *loop1, CloogLoop *loop2,
2307 	    int level, int scalar, int *scaldims, int nb_scattdims, int def))
2308 {
2309     int j;
2310 
2311     s->node[i].index = s->index;
2312     s->node[i].min_index = s->index;
2313     s->node[i].on_stack = 1;
2314     s->index++;
2315     s->stack[s->sp++] = i;
2316 
2317     for (j = s->len - 1; j >= 0; --j) {
2318 	int f;
2319 
2320 	if (j == i)
2321 	    continue;
2322 	if (s->node[j].index >= 0 &&
2323 		(!s->node[j].on_stack ||
2324 		 s->node[j].index > s->node[i].min_index))
2325 	    continue;
2326 
2327 	f = follows(loop_array[i], loop_array[j],
2328 				level, scalar, scaldims, nb_scattdims, i > j);
2329 	if (!f)
2330 	    continue;
2331 
2332 	if (s->node[j].index < 0) {
2333 	    cloog_loop_components_tarjan(s, loop_array, j, level, scalar,
2334 					 scaldims, nb_scattdims, follows);
2335 	    if (s->node[j].min_index < s->node[i].min_index)
2336 		s->node[i].min_index = s->node[j].min_index;
2337 	} else if (s->node[j].index < s->node[i].min_index)
2338 		s->node[i].min_index = s->node[j].index;
2339     }
2340 
2341     if (s->node[i].index != s->node[i].min_index)
2342 	return;
2343 
2344     do {
2345 	j = s->stack[--s->sp];
2346 	s->node[j].on_stack = 0;
2347 	s->order[s->op++] = j;
2348     } while (j != i);
2349     s->order[s->op++] = -1;
2350 }
2351 
2352 
qsort_index_cmp(const void * p1,const void * p2)2353 static int qsort_index_cmp(const void *p1, const void *p2)
2354 {
2355     return *(int *)p1 - *(int *)p2;
2356 }
2357 
2358 /* Sort the elements of the component starting at list.
2359  * The list is terminated by a -1.
2360  */
sort_component(int * list)2361 static void sort_component(int *list)
2362 {
2363     int len;
2364 
2365     for (len = 0; list[len] != -1; ++len)
2366 	;
2367 
2368     qsort(list, len, sizeof(int), qsort_index_cmp);
2369 }
2370 
2371 /* Given an array of indices "list" into the "loop_array" array,
2372  * terminated by -1, construct a linked list of the corresponding
2373  * entries and put the result in *res.
2374  * The value returned is the number of CloogLoops in the (linked) list
2375  */
extract_component(CloogLoop ** loop_array,int * list,CloogLoop ** res)2376 static int extract_component(CloogLoop **loop_array, int *list, CloogLoop **res)
2377 {
2378     int i = 0;
2379 
2380     sort_component(list);
2381     while (list[i] != -1) {
2382 	*res = loop_array[list[i]];
2383 	res = &(*res)->next;
2384 	++i;
2385     }
2386     *res = NULL;
2387 
2388     return i;
2389 }
2390 
2391 
2392 /**
2393  * Call cloog_loop_generate_scalar or cloog_loop_generate_general
2394  * on each of the strongly connected components in the list of CloogLoops
2395  * pointed to by "loop".
2396  *
2397  * We use Tarjan's algorithm to find the strongly connected components.
2398  * Note that this algorithm also topologically sorts the components.
2399  *
2400  * The components are treated separately to avoid spurious separations.
2401  * The concatentation of the results may contain successive loops
2402  * with the same bounds, so we try to combine such loops.
2403  */
cloog_loop_generate_components(CloogLoop * loop,int level,int scalar,int * scaldims,int nb_scattdims,CloogOptions * options)2404 CloogLoop *cloog_loop_generate_components(CloogLoop *loop,
2405 	int level, int scalar, int *scaldims, int nb_scattdims,
2406 	CloogOptions *options)
2407 {
2408     int i, nb_loops;
2409     CloogLoop *tmp;
2410     CloogLoop *res, **res_next;
2411     CloogLoop **loop_array;
2412     struct cloog_loop_sort *s;
2413 
2414     if (level == 0 || !loop->next)
2415 	return cloog_loop_generate_general(loop, level, scalar,
2416 					     scaldims, nb_scattdims, options);
2417 
2418     nb_loops = cloog_loop_count(loop);
2419 
2420     loop_array = (CloogLoop **)malloc(nb_loops * sizeof(CloogLoop *));
2421     assert(loop_array);
2422 
2423     for (i = 0, tmp = loop; i < nb_loops; i++, tmp = tmp->next)
2424 	loop_array[i] = tmp;
2425 
2426     s = cloog_loop_sort_alloc(nb_loops);
2427     for (i = nb_loops - 1; i >= 0; --i) {
2428 	if (s->node[i].index >= 0)
2429 	    continue;
2430 	cloog_loop_components_tarjan(s, loop_array, i, level, scalar, scaldims,
2431 					nb_scattdims, &inner_loop_follows);
2432     }
2433 
2434     i = 0;
2435     res = NULL;
2436     res_next = &res;
2437     while (nb_loops) {
2438 	int n = extract_component(loop_array, &s->order[i], &tmp);
2439 	i += n + 1;
2440 	nb_loops -= n;
2441 	*res_next = cloog_loop_generate_general(tmp, level, scalar,
2442 					     scaldims, nb_scattdims, options);
2443     	while (*res_next)
2444 	    res_next = &(*res_next)->next;
2445     }
2446 
2447     cloog_loop_sort_free(s);
2448 
2449     free(loop_array);
2450 
2451     res = cloog_loop_combine(res);
2452 
2453     return res;
2454 }
2455 
2456 
2457 /* For each loop in the list "loop", decompose the list of
2458  * inner loops into strongly connected components and put
2459  * the components into separate loops at the top level.
2460  */
cloog_loop_decompose_inner(CloogLoop * loop,int level,int scalar,int * scaldims,int nb_scattdims)2461 CloogLoop *cloog_loop_decompose_inner(CloogLoop *loop,
2462 	int level, int scalar, int *scaldims, int nb_scattdims)
2463 {
2464     CloogLoop *l, *tmp;
2465     CloogLoop **loop_array;
2466     int i, n_loops, max_loops = 0;
2467     struct cloog_loop_sort *s;
2468 
2469     for (l = loop; l; l = l->next) {
2470 	n_loops = cloog_loop_count(l->inner);
2471 	if (max_loops < n_loops)
2472 	    max_loops = n_loops;
2473     }
2474 
2475     if (max_loops <= 1)
2476 	return loop;
2477 
2478     loop_array = (CloogLoop **)malloc(max_loops * sizeof(CloogLoop *));
2479     assert(loop_array);
2480 
2481     for (l = loop; l; l = l->next) {
2482 	int n;
2483 
2484 	for (i = 0, tmp = l->inner; tmp; i++, tmp = tmp->next)
2485 	    loop_array[i] = tmp;
2486 	n_loops = i;
2487 	if (n_loops <= 1)
2488 	    continue;
2489 
2490 	s = cloog_loop_sort_alloc(n_loops);
2491 	for (i = n_loops - 1; i >= 0; --i) {
2492 	    if (s->node[i].index >= 0)
2493 		continue;
2494 	    cloog_loop_components_tarjan(s, loop_array, i, level, scalar,
2495 				scaldims, nb_scattdims, &cloog_loop_follows);
2496 	}
2497 
2498 	n = extract_component(loop_array, s->order, &l->inner);
2499 	n_loops -= n;
2500 	i = n + 1;
2501 	while (n_loops) {
2502 	    CloogLoop *inner;
2503 
2504 	    n = extract_component(loop_array, &s->order[i], &inner);
2505 	    n_loops -= n;
2506 	    i += n + 1;
2507 	    tmp = cloog_loop_alloc(l->state, cloog_domain_copy(l->domain),
2508 			l->otl, l->stride, l->block, inner, l->next);
2509 	    l->next = tmp;
2510 	    l = tmp;
2511 	}
2512 
2513 	cloog_loop_sort_free(s);
2514     }
2515 
2516     free(loop_array);
2517 
2518     return loop;
2519 }
2520 
2521 
cloog_loop_generate_restricted(CloogLoop * loop,int level,int scalar,int * scaldims,int nb_scattdims,CloogOptions * options)2522 CloogLoop *cloog_loop_generate_restricted(CloogLoop *loop,
2523 	int level, int scalar, int *scaldims, int nb_scattdims,
2524 	CloogOptions *options)
2525 {
2526   /* To save both time and memory, we switch here depending on whether the
2527    * current dimension is scalar (simplified processing) or not (general
2528    * processing).
2529    */
2530   if (level_is_constant(level, scalar, scaldims, nb_scattdims))
2531     return cloog_loop_generate_scalar(loop, level, scalar,
2532                                    scaldims, nb_scattdims, options);
2533   /*
2534    * 2. Compute the projection of each polyhedron onto the outermost
2535    *    loop variable and the parameters.
2536    */
2537   loop = cloog_loop_project_all(loop, level);
2538 
2539   return cloog_loop_generate_components(loop, level, scalar, scaldims,
2540 					nb_scattdims, options);
2541 }
2542 
2543 
cloog_loop_generate_restricted_or_stop(CloogLoop * loop,CloogDomain * context,int level,int scalar,int * scaldims,int nb_scattdims,CloogOptions * options)2544 CloogLoop *cloog_loop_generate_restricted_or_stop(CloogLoop *loop,
2545 	CloogDomain *context,
2546 	int level, int scalar, int *scaldims, int nb_scattdims,
2547 	CloogOptions *options)
2548 {
2549   /* If the user asked to stop code generation at this level, let's stop. */
2550   if ((options->stop >= 0) && (level+scalar >= options->stop+1))
2551     return cloog_loop_stop(loop,context) ;
2552 
2553   return cloog_loop_generate_restricted(loop, level, scalar, scaldims,
2554   				nb_scattdims, options);
2555 }
2556 
2557 
2558 /**
2559  * cloog_loop_generate function:
2560  * Adaptation from LoopGen 0.4 by F. Quillere. This function implements the
2561  * Quillere algorithm for polyhedron scanning from step 1 to 2.
2562  * (see the Quillere paper).
2563  * - loop is the loop for which we have to generate a scanning code,
2564  * - context is the context of the current loop (constraints on parameter and/or
2565  *   on outer loop counters),
2566  * - level is the current non-scalar dimension,
2567  * - scalar is the current scalar dimension,
2568  * - scaldims is the boolean array saying whether a dimension is scalar or not,
2569  * - nb_scattdims is the size of the scaldims array,
2570  * - options are the general code generation options.
2571  **
2572  * - October 26th 2001: first version.
2573  * - July 3rd->11th 2003: memory leaks hunt and correction.
2574  * - June      15th 2005: a memory leak fixed (loop was not entirely freed when
2575  *                        the result of cloog_loop_restrict was NULL).
2576  * - June      22nd 2005: Adaptation for GMP.
2577  * - September  2nd 2005: The function have been cutted out in two pieces:
2578  *                        cloog_loop_generate and this one, in order to handle
2579  *                        the scalar dimension case more efficiently with
2580  *                        cloog_loop_generate_scalar.
2581  * - November  15th 2005: (debug) Condition for stop option no more take care of
2582  *                        further scalar dimensions.
2583  */
cloog_loop_generate(CloogLoop * loop,CloogDomain * context,int level,int scalar,int * scaldims,int nb_scattdims,CloogOptions * options)2584 CloogLoop *cloog_loop_generate(CloogLoop *loop, CloogDomain *context,
2585 	int level, int scalar, int *scaldims, int nb_scattdims,
2586 	CloogOptions *options)
2587 {
2588   /* 1. Replace each polyhedron by its intersection with the context.
2589    */
2590   loop = cloog_loop_restrict_all(loop, context);
2591   if (!loop)
2592     return NULL;
2593 
2594   return cloog_loop_generate_restricted_or_stop(loop, context,
2595 			      level, scalar, scaldims, nb_scattdims, options);
2596 }
2597 
2598 
2599 /*
2600  * Internal function for simplifying a single loop in a list of loops.
2601  * See cloog_loop_simplify.
2602  */
loop_simplify(CloogLoop * loop,CloogDomain * context,int level,int nb_scattdims,CloogOptions * options)2603 static CloogLoop *loop_simplify(CloogLoop *loop, CloogDomain *context,
2604 	int level, int nb_scattdims, CloogOptions *options)
2605 {
2606   int domain_dim;
2607   CloogBlock * new_block ;
2608   CloogLoop *simplified, *inner;
2609   CloogDomain * domain, * simp, * inter, * extended_context ;
2610 
2611   domain = loop->domain ;
2612 
2613   domain_dim = cloog_domain_dimension(domain);
2614   extended_context = cloog_domain_extend(context, domain_dim);
2615   inter = cloog_domain_intersection(domain,extended_context) ;
2616   simp = cloog_domain_simplify(domain, extended_context);
2617   cloog_domain_free(extended_context) ;
2618 
2619   /* If the constraint system is never true, go to the next one. */
2620   if (cloog_domain_never_integral(simp)) {
2621     cloog_loop_free(loop->inner);
2622     cloog_domain_free(inter);
2623     cloog_domain_free(simp);
2624     return NULL;
2625   }
2626 
2627   inner = cloog_loop_simplify(loop->inner, inter, level+1, nb_scattdims,
2628                               options);
2629 
2630   if ((inner == NULL) && (loop->block == NULL)) {
2631     cloog_domain_free(inter);
2632     cloog_domain_free(simp);
2633     return NULL;
2634   }
2635 
2636   new_block = cloog_block_copy(loop->block) ;
2637 
2638   simplified = cloog_loop_alloc(loop->state, simp, loop->otl, loop->stride,
2639 				new_block, inner, NULL);
2640 
2641   if (options->save_domains) {
2642     inter = cloog_domain_add_stride_constraint(inter, loop->stride);
2643     if (domain_dim > nb_scattdims) {
2644       CloogDomain *t;
2645       inter = cloog_domain_project(t = inter, nb_scattdims);
2646       cloog_domain_free(t);
2647     }
2648     simplified->unsimplified = inter;
2649   } else
2650     cloog_domain_free(inter);
2651 
2652   return(simplified) ;
2653 }
2654 
2655 
2656 /**
2657  * cloog_loop_simplify function:
2658  * This function implements the part 6. of the Quillere algorithm, it
2659  * recursively simplifies each loop in the context of the preceding loop domain.
2660  * It returns a pointer to the simplified loop list.
2661  * The cloog_domain_simplify (DomainSimplify) behaviour is really bad with
2662  * polyhedra union and some really awful sidesteppings were written, I plan
2663  * to solve that...
2664  * - October   31th 2001: first version.
2665  * - July 3rd->11th 2003: memory leaks hunt and correction.
2666  * - April     16th 2005: a memory leak fixed (extended_context was not freed).
2667  * - June      15th 2005: a memory leak fixed (loop was not conveniently freed
2668  *                        when the constraint system is never true).
2669  * - October   27th 2005: - this function called before cloog_loop_fast_simplify
2670  *                          is now the official cloog_loop_simplify function in
2671  *                          replacement of a slower and more complex one (after
2672  *                          deep changes in the pretty printer).
2673  *                        - we use cloog_loop_disjoint to fix the problem when
2674  *                          simplifying gives a union of polyhedra (before, it
2675  *                          was under the responsibility of the pretty printer).
2676  */
cloog_loop_simplify(CloogLoop * loop,CloogDomain * context,int level,int nb_scattdims,CloogOptions * options)2677 CloogLoop *cloog_loop_simplify(CloogLoop *loop, CloogDomain *context, int level,
2678 	                       int nb_scattdims, CloogOptions *options)
2679 {
2680   CloogLoop *now;
2681   CloogLoop *res = NULL;
2682   CloogLoop **next = &res;
2683   int need_split = 0;
2684 
2685   for (now = loop; now; now = now->next)
2686     if (!cloog_domain_isconvex(now->domain)) {
2687       now->domain = cloog_domain_simplify_union(now->domain);
2688       if (!cloog_domain_isconvex(now->domain))
2689 	need_split = 1;
2690     }
2691 
2692   /* If the input of CLooG contains any union domains, then they
2693    * may not have been split yet at this point.  Do so now as the
2694    * clast construction assumes there are no union domains.
2695    */
2696   if (need_split)
2697     loop = cloog_loop_disjoint(loop);
2698 
2699   for (now = loop; now; now = now->next) {
2700     *next = loop_simplify(now, context, level, nb_scattdims, options);
2701 
2702     now->inner = NULL; /* For loop integrity. */
2703     cloog_domain_free(now->domain);
2704     now->domain = NULL;
2705 
2706     if (*next)
2707       next = &(*next)->next;
2708   }
2709   cloog_loop_free(loop);
2710 
2711   return res;
2712 }
2713 
2714 
2715 /**
2716  * cloog_loop_scatter function:
2717  * This function add the scattering (scheduling) informations in a loop.
2718  */
cloog_loop_scatter(CloogLoop * loop,CloogScattering * scatt)2719 void cloog_loop_scatter(CloogLoop * loop, CloogScattering *scatt)
2720 {
2721   loop->domain = cloog_domain_scatter(loop->domain, scatt);
2722 }
2723 
2724