1 /* Matrix layout transformations.
2    Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
3    Contributed by Razya Ladelsky <razya@il.ibm.com>
4    Originally written by Revital Eres and Mustafa Hagog.
5 
6 This file is part of GCC.
7 
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12 
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
16 for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3.  If not see
20 <http://www.gnu.org/licenses/>.  */
21 
22 /*
23    Matrix flattening optimization tries to replace a N-dimensional
24    matrix with its equivalent M-dimensional matrix, where M < N.
25    This first implementation focuses on global matrices defined dynamically.
26 
27    When N==1, we actually flatten the whole matrix.
28    For instance consider a two-dimensional array a [dim1] [dim2].
29    The code for allocating space for it usually looks like:
30 
31      a = (int **)  malloc(dim1 * sizeof(int *));
32      for (i=0; i<dim1; i++)
33         a[i] = (int *) malloc (dim2 * sizeof(int));
34 
35    If the array "a" is found suitable for this optimization,
36    its allocation is replaced by:
37 
38      a = (int *) malloc (dim1 * dim2 *sizeof(int));
39 
40    and all the references to a[i][j] are replaced by a[i * dim2 + j].
41 
42    The two main phases of the optimization are the analysis
43    and transformation.
44    The driver of the optimization is matrix_reorg ().
45 
46 
47 
48    Analysis phase:
49    ===============
50 
51    We'll number the dimensions outside-in, meaning the most external
52    is 0, then 1, and so on.
53    The analysis part of the optimization determines K, the escape
54    level of a N-dimensional matrix (K <= N), that allows flattening of
55    the external dimensions 0,1,..., K-1. Escape level 0 means that the
56    whole matrix escapes and no flattening is possible.
57 
58    The analysis part is implemented in analyze_matrix_allocation_site()
59    and analyze_matrix_accesses().
60 
61    Transformation phase:
62    =====================
63    In this phase we define the new flattened matrices that replace the
64    original matrices in the code.
65    Implemented in transform_allocation_sites(),
66    transform_access_sites().
67 
68    Matrix Transposing
69    ==================
70    The idea of Matrix Transposing is organizing the matrix in a different
71    layout such that the dimensions are reordered.
72    This could produce better cache behavior in some cases.
73 
74    For example, lets look at the matrix accesses in the following loop:
75 
76    for (i=0; i<N; i++)
77     for (j=0; j<M; j++)
78      access to a[i][j]
79 
80    This loop can produce good cache behavior because the elements of
81    the inner dimension are accessed sequentially.
82 
83   However, if the accesses of the matrix were of the following form:
84 
85   for (i=0; i<N; i++)
86    for (j=0; j<M; j++)
87      access to a[j][i]
88 
89   In this loop we iterate the columns and not the rows.
90   Therefore, replacing the rows and columns
91   would have had an organization with better (cache) locality.
92   Replacing the dimensions of the matrix is called matrix transposing.
93 
94   This  example, of course, could be enhanced to multiple dimensions matrices
95   as well.
96 
97   Since a program could include all kind of accesses, there is a decision
98   mechanism, implemented in analyze_transpose(), which implements a
99   heuristic that tries to determine whether to transpose the matrix or not,
100   according to the form of the more dominant accesses.
101   This decision is transferred to the flattening mechanism, and whether
102   the matrix was transposed or not, the matrix is flattened (if possible).
103 
104   This decision making is based on profiling information and loop information.
105   If profiling information is available, decision making mechanism will be
106   operated, otherwise the matrix will only be flattened (if possible).
107 
108   Both optimizations are described in the paper "Matrix flattening and
109   transposing in GCC" which was presented in GCC summit 2006.
110   http://www.gccsummit.org/2006/2006-GCC-Summit-Proceedings.pdf.  */
111 
112 #include "config.h"
113 #include "system.h"
114 #include "coretypes.h"
115 #include "tm.h"
116 #include "tree.h"
117 #include "rtl.h"
118 #include "tree-inline.h"
119 #include "tree-flow.h"
120 #include "tree-flow-inline.h"
121 #include "langhooks.h"
122 #include "hashtab.h"
123 #include "flags.h"
124 #include "ggc.h"
125 #include "debug.h"
126 #include "target.h"
127 #include "cgraph.h"
128 #include "diagnostic-core.h"
129 #include "timevar.h"
130 #include "params.h"
131 #include "fibheap.h"
132 #include "intl.h"
133 #include "function.h"
134 #include "basic-block.h"
135 #include "cfgloop.h"
136 #include "tree-iterator.h"
137 #include "tree-pass.h"
138 #include "opts.h"
139 #include "tree-data-ref.h"
140 #include "tree-chrec.h"
141 #include "tree-scalar-evolution.h"
142 #include "tree-ssa-sccvn.h"
143 
144 /* We need to collect a lot of data from the original malloc,
145    particularly as the gimplifier has converted:
146 
147    orig_var = (struct_type *) malloc (x * sizeof (struct_type *));
148 
149    into
150 
151    T3 = <constant> ;  ** <constant> is amount to malloc; precomputed **
152    T4 = malloc (T3);
153    T5 = (struct_type *) T4;
154    orig_var = T5;
155 
156    The following struct fields allow us to collect all the necessary data from
157    the gimplified program.  The comments in the struct below are all based
158    on the gimple example above.  */
159 
160 struct malloc_call_data
161 {
162   gimple call_stmt;		/* Tree for "T4 = malloc (T3);"                     */
163   tree size_var;		/* Var decl for T3.                                 */
164   tree malloc_size;		/* Tree for "<constant>", the rhs assigned to T3.   */
165 };
166 
167 static tree can_calculate_expr_before_stmt (tree, sbitmap);
168 static tree can_calculate_stmt_before_stmt (gimple, sbitmap);
169 
170 /* The front end of the compiler, when parsing statements of the form:
171 
172    var = (type_cast) malloc (sizeof (type));
173 
174    always converts this single statement into the following statements
175    (GIMPLE form):
176 
177    T.1 = sizeof (type);
178    T.2 = malloc (T.1);
179    T.3 = (type_cast) T.2;
180    var = T.3;
181 
182    Since we need to create new malloc statements and modify the original
183    statements somewhat, we need to find all four of the above statements.
184    Currently record_call_1 (called for building cgraph edges) finds and
185    records the statements containing the actual call to malloc, but we
186    need to find the rest of the variables/statements on our own.  That
187    is what the following function does.  */
188 static void
189 collect_data_for_malloc_call (gimple stmt, struct malloc_call_data *m_data)
190 {
191   tree size_var = NULL;
192   tree malloc_fn_decl;
193   tree arg1;
194 
195   gcc_assert (is_gimple_call (stmt));
196 
197   malloc_fn_decl = gimple_call_fndecl (stmt);
198   if (malloc_fn_decl == NULL
199       || DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
200     return;
201 
202   arg1 = gimple_call_arg (stmt, 0);
203   size_var = arg1;
204 
205   m_data->call_stmt = stmt;
206   m_data->size_var = size_var;
207   if (TREE_CODE (size_var) != VAR_DECL)
208     m_data->malloc_size = size_var;
209   else
210     m_data->malloc_size = NULL_TREE;
211 }
212 
213 /* Information about matrix access site.
214    For example: if an access site of matrix arr is arr[i][j]
215    the ACCESS_SITE_INFO structure will have the address
216    of arr as its stmt.  The INDEX_INFO will hold information about the
217    initial address and index of each dimension.  */
218 struct access_site_info
219 {
220   /* The statement (MEM_REF or POINTER_PLUS_EXPR).  */
221   gimple stmt;
222 
223   /* In case of POINTER_PLUS_EXPR, what is the offset.  */
224   tree offset;
225 
226   /* The index which created the offset.  */
227   tree index;
228 
229   /* The indirection level of this statement.  */
230   int level;
231 
232   /* TRUE for allocation site FALSE for access site.  */
233   bool is_alloc;
234 
235   /* The function containing the access site.  */
236   tree function_decl;
237 
238   /* This access is iterated in the inner most loop */
239   bool iterated_by_inner_most_loop_p;
240 };
241 
242 typedef struct access_site_info *access_site_info_p;
243 DEF_VEC_P (access_site_info_p);
244 DEF_VEC_ALLOC_P (access_site_info_p, heap);
245 
246 /* Calls to free when flattening a matrix.  */
247 
248 struct free_info
249 {
250   gimple stmt;
251   tree func;
252 };
253 
254 /* Information about matrix to flatten.  */
255 struct matrix_info
256 {
257   /* Decl tree of this matrix.  */
258   tree decl;
259   /* Number of dimensions; number
260      of "*" in the type declaration.  */
261   int num_dims;
262 
263   /* Minimum indirection level that escapes, 0 means that
264      the whole matrix escapes, k means that dimensions
265      0 to ACTUAL_DIM - k escapes.  */
266   int min_indirect_level_escape;
267 
268   gimple min_indirect_level_escape_stmt;
269 
270   /* Hold the allocation site for each level (dimension).
271      We can use NUM_DIMS as the upper bound and allocate the array
272      once with this number of elements and no need to use realloc and
273      MAX_MALLOCED_LEVEL.  */
274   gimple *malloc_for_level;
275 
276   int max_malloced_level;
277 
278   /* Is the matrix transposed.  */
279   bool is_transposed_p;
280 
281   /* The location of the allocation sites (they must be in one
282      function).  */
283   tree allocation_function_decl;
284 
285   /* The calls to free for each level of indirection.  */
286   struct free_info *free_stmts;
287 
288   /* An array which holds for each dimension its size. where
289      dimension 0 is the outer most (one that contains all the others).
290    */
291   tree *dimension_size;
292 
293   /* An array which holds for each dimension it's original size
294      (before transposing and flattening take place).  */
295   tree *dimension_size_orig;
296 
297   /* An array which holds for each dimension the size of the type of
298      of elements accessed in that level (in bytes).  */
299   HOST_WIDE_INT *dimension_type_size;
300 
301   int dimension_type_size_len;
302 
303   /* An array collecting the count of accesses for each dimension.  */
304   gcov_type *dim_hot_level;
305 
306   /* An array of the accesses to be flattened.
307      elements are of type "struct access_site_info *".  */
308   VEC (access_site_info_p, heap) * access_l;
309 
310   /* A map of how the dimensions will be organized at the end of
311      the analyses.  */
312   int *dim_map;
313 };
314 
315 /* In each phi node we want to record the indirection level we have when we
316    get to the phi node.  Usually we will have phi nodes with more than two
317    arguments, then we must assure that all of them get to the phi node with
318    the same indirection level, otherwise it's not safe to do the flattening.
319    So we record the information regarding the indirection level each time we
320    get to the phi node in this hash table.  */
321 
322 struct matrix_access_phi_node
323 {
324   gimple phi;
325   int indirection_level;
326 };
327 
328 /* We use this structure to find if the SSA variable is accessed inside the
329    tree and record the tree containing it.  */
330 
331 struct ssa_acc_in_tree
332 {
333   /* The variable whose accesses in the tree we are looking for.  */
334   tree ssa_var;
335   /* The tree and code inside it the ssa_var is accessed, currently
336      it could be an MEM_REF or CALL_EXPR.  */
337   enum tree_code t_code;
338   tree t_tree;
339   /* The place in the containing tree.  */
340   tree *tp;
341   tree second_op;
342   bool var_found;
343 };
344 
345 static void analyze_matrix_accesses (struct matrix_info *, tree, int, bool,
346 				     sbitmap, bool);
347 static int transform_allocation_sites (void **, void *);
348 static int transform_access_sites (void **, void *);
349 static int analyze_transpose (void **, void *);
350 static int dump_matrix_reorg_analysis (void **, void *);
351 
352 static bool check_transpose_p;
353 
354 /* Hash function used for the phi nodes.  */
355 
356 static hashval_t
357 mat_acc_phi_hash (const void *p)
358 {
359   const struct matrix_access_phi_node *const ma_phi =
360     (const struct matrix_access_phi_node *) p;
361 
362   return htab_hash_pointer (ma_phi->phi);
363 }
364 
365 /* Equality means phi node pointers are the same.  */
366 
367 static int
368 mat_acc_phi_eq (const void *p1, const void *p2)
369 {
370   const struct matrix_access_phi_node *const phi1 =
371     (const struct matrix_access_phi_node *) p1;
372   const struct matrix_access_phi_node *const phi2 =
373     (const struct matrix_access_phi_node *) p2;
374 
375   if (phi1->phi == phi2->phi)
376     return 1;
377 
378   return 0;
379 }
380 
381 /* Hold the PHI nodes we visit during the traversal for escaping
382    analysis.  */
383 static htab_t htab_mat_acc_phi_nodes = NULL;
384 
385 /* This hash-table holds the information about the matrices we are
386    going to handle.  */
387 static htab_t matrices_to_reorg = NULL;
388 
389 /* Return a hash for MTT, which is really a "matrix_info *".  */
390 static hashval_t
391 mtt_info_hash (const void *mtt)
392 {
393   return htab_hash_pointer (((const struct matrix_info *) mtt)->decl);
394 }
395 
396 /* Return true if MTT1 and MTT2 (which are really both of type
397    "matrix_info *") refer to the same decl.  */
398 static int
399 mtt_info_eq (const void *mtt1, const void *mtt2)
400 {
401   const struct matrix_info *const i1 = (const struct matrix_info *) mtt1;
402   const struct matrix_info *const i2 = (const struct matrix_info *) mtt2;
403 
404   if (i1->decl == i2->decl)
405     return true;
406 
407   return false;
408 }
409 
410 /* Return false if STMT may contain a vector expression.
411    In this situation, all matrices should not be flattened.  */
412 static bool
413 may_flatten_matrices_1 (gimple stmt)
414 {
415   switch (gimple_code (stmt))
416     {
417     case GIMPLE_ASSIGN:
418     case GIMPLE_CALL:
419       if (!gimple_has_lhs (stmt))
420 	return true;
421       if (TREE_CODE (TREE_TYPE (gimple_get_lhs (stmt))) == VECTOR_TYPE)
422 	{
423 	  if (dump_file)
424 	    fprintf (dump_file,
425 		     "Found vector type, don't flatten matrix\n");
426 	  return false;
427 	}
428       break;
429     case GIMPLE_ASM:
430       /* Asm code could contain vector operations.  */
431       return false;
432       break;
433     default:
434       break;
435     }
436   return true;
437 }
438 
439 /* Return false if there are hand-written vectors in the program.
440    We disable the flattening in such a case.  */
441 static bool
442 may_flatten_matrices (struct cgraph_node *node)
443 {
444   tree decl;
445   struct function *func;
446   basic_block bb;
447   gimple_stmt_iterator gsi;
448 
449   decl = node->decl;
450   if (node->analyzed)
451     {
452       func = DECL_STRUCT_FUNCTION (decl);
453       FOR_EACH_BB_FN (bb, func)
454 	for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
455 	if (!may_flatten_matrices_1 (gsi_stmt (gsi)))
456 	  return false;
457     }
458   return true;
459 }
460 
461 /* Given a VAR_DECL, check its type to determine whether it is
462    a definition of a dynamic allocated matrix and therefore is
463    a suitable candidate for the matrix flattening optimization.
464    Return NULL if VAR_DECL is not such decl.  Otherwise, allocate
465    a MATRIX_INFO structure, fill it with the relevant information
466    and return a pointer to it.
467    TODO: handle also statically defined arrays.  */
468 static struct matrix_info *
469 analyze_matrix_decl (tree var_decl)
470 {
471   struct matrix_info *m_node, tmpmi, *mi;
472   tree var_type;
473   int dim_num = 0;
474 
475   gcc_assert (matrices_to_reorg);
476 
477   if (TREE_CODE (var_decl) == PARM_DECL)
478     var_type = DECL_ARG_TYPE (var_decl);
479   else if (TREE_CODE (var_decl) == VAR_DECL)
480     var_type = TREE_TYPE (var_decl);
481   else
482     return NULL;
483 
484   if (!POINTER_TYPE_P (var_type))
485     return NULL;
486 
487   while (POINTER_TYPE_P (var_type))
488     {
489       var_type = TREE_TYPE (var_type);
490       dim_num++;
491     }
492 
493   if (dim_num <= 1)
494     return NULL;
495 
496   if (!COMPLETE_TYPE_P (var_type)
497       || TREE_CODE (TYPE_SIZE_UNIT (var_type)) != INTEGER_CST)
498     return NULL;
499 
500   /* Check to see if this pointer is already in there.  */
501   tmpmi.decl = var_decl;
502   mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi);
503 
504   if (mi)
505     return NULL;
506 
507   /* Record the matrix.  */
508 
509   m_node = (struct matrix_info *) xcalloc (1, sizeof (struct matrix_info));
510   m_node->decl = var_decl;
511   m_node->num_dims = dim_num;
512   m_node->free_stmts
513     = (struct free_info *) xcalloc (dim_num, sizeof (struct free_info));
514 
515   /* Init min_indirect_level_escape to -1 to indicate that no escape
516      analysis has been done yet.  */
517   m_node->min_indirect_level_escape = -1;
518   m_node->is_transposed_p = false;
519 
520   return m_node;
521 }
522 
523 /* Free matrix E.  */
524 static void
525 mat_free (void *e)
526 {
527   struct matrix_info *mat = (struct matrix_info *) e;
528 
529   if (!mat)
530     return;
531 
532   free (mat->free_stmts);
533   free (mat->dim_hot_level);
534   free (mat->malloc_for_level);
535 }
536 
537 /* Find all potential matrices.
538    TODO: currently we handle only multidimensional
539    dynamically allocated arrays.  */
540 static void
541 find_matrices_decl (void)
542 {
543   struct matrix_info *tmp;
544   PTR *slot;
545   struct varpool_node *vnode;
546 
547   gcc_assert (matrices_to_reorg);
548 
549   /* For every global variable in the program:
550      Check to see if it's of a candidate type and record it.  */
551   for (vnode = varpool_nodes_queue; vnode; vnode = vnode->next_needed)
552     {
553       tree var_decl = vnode->decl;
554 
555       if (!var_decl || TREE_CODE (var_decl) != VAR_DECL)
556 	continue;
557 
558       if (matrices_to_reorg)
559 	if ((tmp = analyze_matrix_decl (var_decl)))
560 	  {
561 	    if (!TREE_ADDRESSABLE (var_decl))
562 	      {
563 		slot = htab_find_slot (matrices_to_reorg, tmp, INSERT);
564 		*slot = tmp;
565 	      }
566 	  }
567     }
568   return;
569 }
570 
571 /* Mark that the matrix MI escapes at level L.  */
572 static void
573 mark_min_matrix_escape_level (struct matrix_info *mi, int l, gimple s)
574 {
575   if (mi->min_indirect_level_escape == -1
576       || (mi->min_indirect_level_escape > l))
577     {
578       mi->min_indirect_level_escape = l;
579       mi->min_indirect_level_escape_stmt = s;
580     }
581 }
582 
583 /* Find if the SSA variable is accessed inside the
584    tree and record the tree containing it.
585    The only relevant uses are the case of SSA_NAME, or SSA inside
586    MEM_REF, PLUS_EXPR, POINTER_PLUS_EXPR, MULT_EXPR.  */
587 static void
588 ssa_accessed_in_tree (tree t, struct ssa_acc_in_tree *a)
589 {
590   a->t_code = TREE_CODE (t);
591   switch (a->t_code)
592     {
593     case SSA_NAME:
594       if (t == a->ssa_var)
595 	a->var_found = true;
596       break;
597     case MEM_REF:
598       if (SSA_VAR_P (TREE_OPERAND (t, 0))
599 	  && TREE_OPERAND (t, 0) == a->ssa_var)
600 	a->var_found = true;
601       break;
602     default:
603       break;
604     }
605 }
606 
607 /* Find if the SSA variable is accessed on the right hand side of
608    gimple call STMT. */
609 
610 static void
611 ssa_accessed_in_call_rhs (gimple stmt, struct ssa_acc_in_tree *a)
612 {
613   tree decl;
614   tree arg;
615   size_t i;
616 
617   a->t_code = CALL_EXPR;
618   for (i = 0; i < gimple_call_num_args (stmt); i++)
619     {
620       arg = gimple_call_arg (stmt, i);
621       if (arg == a->ssa_var)
622 	{
623 	  a->var_found = true;
624 	  decl = gimple_call_fndecl (stmt);
625 	  a->t_tree = decl;
626 	  break;
627 	}
628     }
629 }
630 
631 /* Find if the SSA variable is accessed on the right hand side of
632    gimple assign STMT. */
633 
634 static void
635 ssa_accessed_in_assign_rhs (gimple stmt, struct ssa_acc_in_tree *a)
636 {
637 
638   a->t_code = gimple_assign_rhs_code (stmt);
639   switch (a->t_code)
640     {
641       tree op1, op2;
642 
643     case SSA_NAME:
644     case MEM_REF:
645     CASE_CONVERT:
646     case VIEW_CONVERT_EXPR:
647       ssa_accessed_in_tree (gimple_assign_rhs1 (stmt), a);
648       break;
649     case POINTER_PLUS_EXPR:
650     case PLUS_EXPR:
651     case MULT_EXPR:
652       op1 = gimple_assign_rhs1 (stmt);
653       op2 = gimple_assign_rhs2 (stmt);
654 
655       if (op1 == a->ssa_var)
656 	{
657 	  a->var_found = true;
658 	  a->second_op = op2;
659 	}
660       else if (op2 == a->ssa_var)
661 	{
662 	  a->var_found = true;
663 	  a->second_op = op1;
664 	}
665       break;
666     default:
667       break;
668     }
669 }
670 
671 /* Record the access/allocation site information for matrix MI so we can
672    handle it later in transformation.  */
673 static void
674 record_access_alloc_site_info (struct matrix_info *mi, gimple stmt, tree offset,
675 			       tree index, int level, bool is_alloc)
676 {
677   struct access_site_info *acc_info;
678 
679   if (!mi->access_l)
680     mi->access_l = VEC_alloc (access_site_info_p, heap, 100);
681 
682   acc_info
683     = (struct access_site_info *)
684     xcalloc (1, sizeof (struct access_site_info));
685   acc_info->stmt = stmt;
686   acc_info->offset = offset;
687   acc_info->index = index;
688   acc_info->function_decl = current_function_decl;
689   acc_info->level = level;
690   acc_info->is_alloc = is_alloc;
691 
692   VEC_safe_push (access_site_info_p, heap, mi->access_l, acc_info);
693 
694 }
695 
696 /* Record the malloc as the allocation site of the given LEVEL.  But
697    first we Make sure that all the size parameters passed to malloc in
698    all the allocation sites could be pre-calculated before the call to
699    the malloc of level 0 (the main malloc call).  */
700 static void
701 add_allocation_site (struct matrix_info *mi, gimple stmt, int level)
702 {
703   struct malloc_call_data mcd;
704 
705   /* Make sure that the allocation sites are in the same function.  */
706   if (!mi->allocation_function_decl)
707     mi->allocation_function_decl = current_function_decl;
708   else if (mi->allocation_function_decl != current_function_decl)
709     {
710       int min_malloc_level;
711 
712       gcc_assert (mi->malloc_for_level);
713 
714       /* Find the minimum malloc level that already has been seen;
715          we known its allocation function must be
716          MI->allocation_function_decl since it's different than
717          CURRENT_FUNCTION_DECL then the escaping level should be
718          MIN (LEVEL, MIN_MALLOC_LEVEL) - 1 , and the allocation function
719          must be set accordingly.  */
720       for (min_malloc_level = 0;
721 	   min_malloc_level < mi->max_malloced_level
722 	   && mi->malloc_for_level[min_malloc_level]; min_malloc_level++)
723 	;
724       if (level < min_malloc_level)
725 	{
726 	  mi->allocation_function_decl = current_function_decl;
727 	  mark_min_matrix_escape_level (mi, min_malloc_level, stmt);
728 	}
729       else
730 	{
731 	  mark_min_matrix_escape_level (mi, level, stmt);
732 	  /* cannot be that (level == min_malloc_level)
733 	     we would have returned earlier.  */
734 	  return;
735 	}
736     }
737 
738   /* Find the correct malloc information.  */
739   collect_data_for_malloc_call (stmt, &mcd);
740 
741   /* We accept only calls to malloc function; we do not accept
742      calls like calloc and realloc.  */
743   if (!mi->malloc_for_level)
744     {
745       mi->malloc_for_level = XCNEWVEC (gimple, level + 1);
746       mi->max_malloced_level = level + 1;
747     }
748   else if (mi->max_malloced_level <= level)
749     {
750       mi->malloc_for_level
751 	= XRESIZEVEC (gimple, mi->malloc_for_level, level + 1);
752 
753       /* Zero the newly allocated items.  */
754       memset (&(mi->malloc_for_level[mi->max_malloced_level + 1]),
755 	      0, (level - mi->max_malloced_level) * sizeof (tree));
756 
757       mi->max_malloced_level = level + 1;
758     }
759   mi->malloc_for_level[level] = stmt;
760 }
761 
762 /* Given an assignment statement STMT that we know that its
763    left-hand-side is the matrix MI variable, we traverse the immediate
764    uses backwards until we get to a malloc site.  We make sure that
765    there is one and only one malloc site that sets this variable.  When
766    we are performing the flattening we generate a new variable that
767    will hold the size for each dimension; each malloc that allocates a
768    dimension has the size parameter; we use that parameter to
769    initialize the dimension size variable so we can use it later in
770    the address calculations.  LEVEL is the dimension we're inspecting.
771    Return if STMT is related to an allocation site.  */
772 
773 static void
774 analyze_matrix_allocation_site (struct matrix_info *mi, gimple stmt,
775 				int level, sbitmap visited)
776 {
777   if (gimple_assign_copy_p (stmt) || gimple_assign_cast_p (stmt))
778     {
779       tree rhs = gimple_assign_rhs1 (stmt);
780 
781       if (TREE_CODE (rhs) == SSA_NAME)
782 	{
783 	  gimple def = SSA_NAME_DEF_STMT (rhs);
784 
785 	  analyze_matrix_allocation_site (mi, def, level, visited);
786 	  return;
787 	}
788       /* If we are back to the original matrix variable then we
789          are sure that this is analyzed as an access site.  */
790       else if (rhs == mi->decl)
791 	return;
792     }
793   /* A result of call to malloc.  */
794   else if (is_gimple_call (stmt))
795     {
796       int call_flags = gimple_call_flags (stmt);
797 
798       if (!(call_flags & ECF_MALLOC))
799 	{
800 	  mark_min_matrix_escape_level (mi, level, stmt);
801 	  return;
802 	}
803       else
804 	{
805 	  tree malloc_fn_decl;
806 
807 	  malloc_fn_decl = gimple_call_fndecl (stmt);
808 	  if (malloc_fn_decl == NULL_TREE)
809 	    {
810 	      mark_min_matrix_escape_level (mi, level, stmt);
811 	      return;
812 	    }
813 	  if (DECL_FUNCTION_CODE (malloc_fn_decl) != BUILT_IN_MALLOC)
814 	    {
815 	      if (dump_file)
816 		fprintf (dump_file,
817 			 "Matrix %s is an argument to function %s\n",
818 			 get_name (mi->decl), get_name (malloc_fn_decl));
819 	      mark_min_matrix_escape_level (mi, level, stmt);
820 	      return;
821 	    }
822 	}
823       /* This is a call to malloc of level 'level'.
824 	 mi->max_malloced_level-1 == level  means that we've
825 	 seen a malloc statement of level 'level' before.
826 	 If the statement is not the same one that we've
827 	 seen before, then there's another malloc statement
828 	 for the same level, which means that we need to mark
829 	 it escaping.  */
830       if (mi->malloc_for_level
831 	  && mi->max_malloced_level-1 == level
832 	  && mi->malloc_for_level[level] != stmt)
833 	{
834 	  mark_min_matrix_escape_level (mi, level, stmt);
835 	  return;
836 	}
837       else
838 	add_allocation_site (mi, stmt, level);
839       return;
840     }
841   /* Looks like we don't know what is happening in this
842      statement so be in the safe side and mark it as escaping.  */
843   mark_min_matrix_escape_level (mi, level, stmt);
844 }
845 
846 /* The transposing decision making.
847    In order to calculate the profitability of transposing, we collect two
848    types of information regarding the accesses:
849    1. profiling information used to express the hotness of an access, that
850    is how often the matrix is accessed by this access site (count of the
851    access site).
852    2. which dimension in the access site is iterated by the inner
853    most loop containing this access.
854 
855    The matrix will have a calculated value of weighted hotness for each
856    dimension.
857    Intuitively the hotness level of a dimension is a function of how
858    many times it was the most frequently accessed dimension in the
859    highly executed access sites of this matrix.
860 
861    As computed by following equation:
862    m      n
863    __   __
864    \    \  dim_hot_level[i] +=
865    /_   /_
866    j     i
867                  acc[j]->dim[i]->iter_by_inner_loop * count(j)
868 
869   Where n is the number of dims and m is the number of the matrix
870   access sites. acc[j]->dim[i]->iter_by_inner_loop is 1 if acc[j]
871   iterates over dim[i] in innermost loop, and is 0 otherwise.
872 
873   The organization of the new matrix should be according to the
874   hotness of each dimension. The hotness of the dimension implies
875   the locality of the elements.*/
876 static int
877 analyze_transpose (void **slot, void *data ATTRIBUTE_UNUSED)
878 {
879   struct matrix_info *mi = (struct matrix_info *) *slot;
880   int min_escape_l = mi->min_indirect_level_escape;
881   struct loop *loop;
882   affine_iv iv;
883   struct access_site_info *acc_info;
884   int i;
885 
886   if (min_escape_l < 2 || !mi->access_l)
887     {
888       if (mi->access_l)
889 	{
890 	  FOR_EACH_VEC_ELT (access_site_info_p, mi->access_l, i, acc_info)
891 	    free (acc_info);
892 	  VEC_free (access_site_info_p, heap, mi->access_l);
893 
894 	}
895       return 1;
896     }
897   if (!mi->dim_hot_level)
898     mi->dim_hot_level =
899       (gcov_type *) xcalloc (min_escape_l, sizeof (gcov_type));
900 
901 
902   for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
903        i++)
904     {
905       if (gimple_assign_rhs_code (acc_info->stmt) == POINTER_PLUS_EXPR
906 	  && acc_info->level < min_escape_l)
907 	{
908 	  loop = loop_containing_stmt (acc_info->stmt);
909 	  if (!loop || loop->inner)
910 	    {
911 	      free (acc_info);
912 	      continue;
913 	    }
914 	  if (simple_iv (loop, loop, acc_info->offset, &iv, true))
915 	    {
916 	      if (iv.step != NULL)
917 		{
918 		  HOST_WIDE_INT istep;
919 
920 		  istep = int_cst_value (iv.step);
921 		  if (istep != 0)
922 		    {
923 		      acc_info->iterated_by_inner_most_loop_p = 1;
924 		      mi->dim_hot_level[acc_info->level] +=
925 			gimple_bb (acc_info->stmt)->count;
926 		    }
927 
928 		}
929 	    }
930 	}
931       free (acc_info);
932     }
933   VEC_free (access_site_info_p, heap, mi->access_l);
934 
935   return 1;
936 }
937 
938 /* Find the index which defines the OFFSET from base.
939    We walk from use to def until we find how the offset was defined.  */
940 static tree
941 get_index_from_offset (tree offset, gimple def_stmt)
942 {
943   tree op1, op2, index;
944 
945   if (gimple_code (def_stmt) == GIMPLE_PHI)
946     return NULL;
947   if ((gimple_assign_copy_p (def_stmt) || gimple_assign_cast_p (def_stmt))
948       && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
949     return get_index_from_offset (offset,
950 				  SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt)));
951   else if (is_gimple_assign (def_stmt)
952 	   && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
953     {
954       op1 = gimple_assign_rhs1 (def_stmt);
955       op2 = gimple_assign_rhs2 (def_stmt);
956       if (TREE_CODE (op1) != INTEGER_CST && TREE_CODE (op2) != INTEGER_CST)
957 	return NULL;
958       index = (TREE_CODE (op1) == INTEGER_CST) ? op2 : op1;
959       return index;
960     }
961   else
962     return NULL_TREE;
963 }
964 
965 /* update MI->dimension_type_size[CURRENT_INDIRECT_LEVEL] with the size
966    of the type related to the SSA_VAR, or the type related to the
967    lhs of STMT, in the case that it is an MEM_REF.  */
968 static void
969 update_type_size (struct matrix_info *mi, gimple stmt, tree ssa_var,
970 		  int current_indirect_level)
971 {
972   tree lhs;
973   HOST_WIDE_INT type_size;
974 
975   /* Update type according to the type of the MEM_REF expr.   */
976   if (is_gimple_assign (stmt)
977       && TREE_CODE (gimple_assign_lhs (stmt)) == MEM_REF)
978     {
979       lhs = gimple_assign_lhs (stmt);
980       gcc_assert (POINTER_TYPE_P
981 		  (TREE_TYPE (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
982       type_size =
983 	int_size_in_bytes (TREE_TYPE
984 			   (TREE_TYPE
985 			    (SSA_NAME_VAR (TREE_OPERAND (lhs, 0)))));
986     }
987   else
988     type_size = int_size_in_bytes (TREE_TYPE (ssa_var));
989 
990   /* Record the size of elements accessed (as a whole)
991      in the current indirection level (dimension).  If the size of
992      elements is not known at compile time, mark it as escaping.  */
993   if (type_size <= 0)
994     mark_min_matrix_escape_level (mi, current_indirect_level, stmt);
995   else
996     {
997       int l = current_indirect_level;
998 
999       if (!mi->dimension_type_size)
1000 	{
1001 	  mi->dimension_type_size
1002 	    = (HOST_WIDE_INT *) xcalloc (l + 1, sizeof (HOST_WIDE_INT));
1003 	  mi->dimension_type_size_len = l + 1;
1004 	}
1005       else if (mi->dimension_type_size_len < l + 1)
1006 	{
1007 	  mi->dimension_type_size
1008 	    = (HOST_WIDE_INT *) xrealloc (mi->dimension_type_size,
1009 					  (l + 1) * sizeof (HOST_WIDE_INT));
1010 	  memset (&mi->dimension_type_size[mi->dimension_type_size_len],
1011 		  0, (l + 1 - mi->dimension_type_size_len)
1012 		  * sizeof (HOST_WIDE_INT));
1013 	  mi->dimension_type_size_len = l + 1;
1014 	}
1015       /* Make sure all the accesses in the same level have the same size
1016          of the type.  */
1017       if (!mi->dimension_type_size[l])
1018 	mi->dimension_type_size[l] = type_size;
1019       else if (mi->dimension_type_size[l] != type_size)
1020 	mark_min_matrix_escape_level (mi, l, stmt);
1021     }
1022 }
1023 
1024 /* USE_STMT represents a GIMPLE_CALL, where one of the arguments is the
1025    ssa var that we want to check because it came from some use of matrix
1026    MI.  CURRENT_INDIRECT_LEVEL is the indirection level we reached so
1027    far.  */
1028 
1029 static int
1030 analyze_accesses_for_call_stmt (struct matrix_info *mi, tree ssa_var,
1031 				gimple use_stmt, int current_indirect_level)
1032 {
1033   tree fndecl = gimple_call_fndecl (use_stmt);
1034 
1035   if (gimple_call_lhs (use_stmt))
1036     {
1037       tree lhs = gimple_call_lhs (use_stmt);
1038       struct ssa_acc_in_tree lhs_acc, rhs_acc;
1039 
1040       memset (&lhs_acc, 0, sizeof (lhs_acc));
1041       memset (&rhs_acc, 0, sizeof (rhs_acc));
1042 
1043       lhs_acc.ssa_var = ssa_var;
1044       lhs_acc.t_code = ERROR_MARK;
1045       ssa_accessed_in_tree (lhs, &lhs_acc);
1046       rhs_acc.ssa_var = ssa_var;
1047       rhs_acc.t_code = ERROR_MARK;
1048       ssa_accessed_in_call_rhs (use_stmt, &rhs_acc);
1049 
1050       /* The SSA must be either in the left side or in the right side,
1051 	 to understand what is happening.
1052 	 In case the SSA_NAME is found in both sides we should be escaping
1053 	 at this level because in this case we cannot calculate the
1054 	 address correctly.  */
1055       if ((lhs_acc.var_found && rhs_acc.var_found
1056 	   && lhs_acc.t_code == MEM_REF)
1057 	  || (!rhs_acc.var_found && !lhs_acc.var_found))
1058 	{
1059 	  mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1060 	  return current_indirect_level;
1061 	}
1062       gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1063 
1064       /* If we are storing to the matrix at some level, then mark it as
1065 	 escaping at that level.  */
1066       if (lhs_acc.var_found)
1067 	{
1068 	  int l = current_indirect_level + 1;
1069 
1070 	  gcc_assert (lhs_acc.t_code == MEM_REF);
1071 	  mark_min_matrix_escape_level (mi, l, use_stmt);
1072 	  return current_indirect_level;
1073 	}
1074     }
1075 
1076   if (fndecl)
1077     {
1078       if (DECL_FUNCTION_CODE (fndecl) != BUILT_IN_FREE)
1079 	{
1080 	  if (dump_file)
1081 	    fprintf (dump_file,
1082 		     "Matrix %s: Function call %s, level %d escapes.\n",
1083 		     get_name (mi->decl), get_name (fndecl),
1084 		     current_indirect_level);
1085 	  mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1086 	}
1087       else if (mi->free_stmts[current_indirect_level].stmt != NULL
1088 	       && mi->free_stmts[current_indirect_level].stmt != use_stmt)
1089 	mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1090       else
1091 	{
1092 	  /*Record the free statements so we can delete them
1093 	     later. */
1094 	  int l = current_indirect_level;
1095 
1096 	  mi->free_stmts[l].stmt = use_stmt;
1097 	  mi->free_stmts[l].func = current_function_decl;
1098 	}
1099     }
1100   return current_indirect_level;
1101 }
1102 
1103 /* USE_STMT represents a phi node of the ssa var that we want to
1104    check  because it came from some use of matrix
1105    MI.
1106    We check all the escaping levels that get to the PHI node
1107    and make sure they are all the same escaping;
1108    if not (which is rare) we let the escaping level be the
1109    minimum level that gets into that PHI because starting from
1110    that level we cannot expect the behavior of the indirections.
1111    CURRENT_INDIRECT_LEVEL is the indirection level we reached so far.  */
1112 
1113 static void
1114 analyze_accesses_for_phi_node (struct matrix_info *mi, gimple use_stmt,
1115 			       int current_indirect_level, sbitmap visited,
1116 			       bool record_accesses)
1117 {
1118 
1119   struct matrix_access_phi_node tmp_maphi, *maphi, **pmaphi;
1120 
1121   tmp_maphi.phi = use_stmt;
1122   if ((maphi = (struct matrix_access_phi_node *)
1123        htab_find (htab_mat_acc_phi_nodes, &tmp_maphi)))
1124     {
1125       if (maphi->indirection_level == current_indirect_level)
1126 	return;
1127       else
1128 	{
1129 	  int level = MIN (maphi->indirection_level,
1130 			   current_indirect_level);
1131 	  size_t j;
1132 	  gimple stmt = NULL;
1133 
1134 	  maphi->indirection_level = level;
1135 	  for (j = 0; j < gimple_phi_num_args (use_stmt); j++)
1136 	    {
1137 	      tree def = PHI_ARG_DEF (use_stmt, j);
1138 
1139 	      if (gimple_code (SSA_NAME_DEF_STMT (def)) != GIMPLE_PHI)
1140 		stmt = SSA_NAME_DEF_STMT (def);
1141 	    }
1142 	  mark_min_matrix_escape_level (mi, level, stmt);
1143 	}
1144       return;
1145     }
1146   maphi = (struct matrix_access_phi_node *)
1147     xcalloc (1, sizeof (struct matrix_access_phi_node));
1148   maphi->phi = use_stmt;
1149   maphi->indirection_level = current_indirect_level;
1150 
1151   /* Insert to hash table.  */
1152   pmaphi = (struct matrix_access_phi_node **)
1153     htab_find_slot (htab_mat_acc_phi_nodes, maphi, INSERT);
1154   gcc_assert (pmaphi);
1155   *pmaphi = maphi;
1156 
1157   if (!TEST_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt))))
1158     {
1159       SET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1160       analyze_matrix_accesses (mi, PHI_RESULT (use_stmt),
1161 			       current_indirect_level, false, visited,
1162 			       record_accesses);
1163       RESET_BIT (visited, SSA_NAME_VERSION (PHI_RESULT (use_stmt)));
1164     }
1165 }
1166 
1167 /* USE_STMT represents an assign statement (the rhs or lhs include
1168    the ssa var that we want to check  because it came from some use of matrix
1169    MI.  CURRENT_INDIRECT_LEVEL is the indirection level we reached so far.  */
1170 
1171 static int
1172 analyze_accesses_for_assign_stmt (struct matrix_info *mi, tree ssa_var,
1173 				  gimple use_stmt, int current_indirect_level,
1174 				  bool last_op, sbitmap visited,
1175 				  bool record_accesses)
1176 {
1177   tree lhs = gimple_get_lhs (use_stmt);
1178   struct ssa_acc_in_tree lhs_acc, rhs_acc;
1179 
1180   memset (&lhs_acc, 0, sizeof (lhs_acc));
1181   memset (&rhs_acc, 0, sizeof (rhs_acc));
1182 
1183   lhs_acc.ssa_var = ssa_var;
1184   lhs_acc.t_code = ERROR_MARK;
1185   ssa_accessed_in_tree (lhs, &lhs_acc);
1186   rhs_acc.ssa_var = ssa_var;
1187   rhs_acc.t_code = ERROR_MARK;
1188   ssa_accessed_in_assign_rhs (use_stmt, &rhs_acc);
1189 
1190   /* The SSA must be either in the left side or in the right side,
1191      to understand what is happening.
1192      In case the SSA_NAME is found in both sides we should be escaping
1193      at this level because in this case we cannot calculate the
1194      address correctly.  */
1195   if ((lhs_acc.var_found && rhs_acc.var_found
1196        && lhs_acc.t_code == MEM_REF)
1197       || (!rhs_acc.var_found && !lhs_acc.var_found))
1198     {
1199       mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1200       return current_indirect_level;
1201     }
1202   gcc_assert (!rhs_acc.var_found || !lhs_acc.var_found);
1203 
1204   /* If we are storing to the matrix at some level, then mark it as
1205      escaping at that level.  */
1206   if (lhs_acc.var_found)
1207     {
1208       int l = current_indirect_level + 1;
1209 
1210       gcc_assert (lhs_acc.t_code == MEM_REF);
1211 
1212       if (!(gimple_assign_copy_p (use_stmt)
1213 	    || gimple_assign_cast_p (use_stmt))
1214 	  || (TREE_CODE (gimple_assign_rhs1 (use_stmt)) != SSA_NAME))
1215 	mark_min_matrix_escape_level (mi, l, use_stmt);
1216       else
1217 	{
1218 	  gimple def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (use_stmt));
1219 	  analyze_matrix_allocation_site (mi, def_stmt, l, visited);
1220 	  if (record_accesses)
1221 	    record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1222 					   NULL_TREE, l, true);
1223 	  update_type_size (mi, use_stmt, NULL, l);
1224 	}
1225       return current_indirect_level;
1226     }
1227   /* Now, check the right-hand-side, to see how the SSA variable
1228      is used.  */
1229   if (rhs_acc.var_found)
1230     {
1231       if (rhs_acc.t_code != MEM_REF
1232 	  && rhs_acc.t_code != POINTER_PLUS_EXPR && rhs_acc.t_code != SSA_NAME)
1233 	{
1234 	  mark_min_matrix_escape_level (mi, current_indirect_level, use_stmt);
1235 	  return current_indirect_level;
1236 	}
1237       /* If the access in the RHS has an indirection increase the
1238          indirection level.  */
1239       if (rhs_acc.t_code == MEM_REF)
1240 	{
1241 	  if (record_accesses)
1242 	    record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1243 					   NULL_TREE,
1244 					   current_indirect_level, true);
1245 	  current_indirect_level += 1;
1246 	}
1247       else if (rhs_acc.t_code == POINTER_PLUS_EXPR)
1248 	{
1249 	  gcc_assert (rhs_acc.second_op);
1250 	  if (last_op)
1251 	    /* Currently we support only one PLUS expression on the
1252 	       SSA_NAME that holds the base address of the current
1253 	       indirection level; to support more general case there
1254 	       is a need to hold a stack of expressions and regenerate
1255 	       the calculation later.  */
1256 	    mark_min_matrix_escape_level (mi, current_indirect_level,
1257 					  use_stmt);
1258 	  else
1259 	    {
1260 	      tree index;
1261 	      tree op1, op2;
1262 
1263 	      op1 = gimple_assign_rhs1 (use_stmt);
1264 	      op2 = gimple_assign_rhs2 (use_stmt);
1265 
1266 	      op2 = (op1 == ssa_var) ? op2 : op1;
1267 	      if (TREE_CODE (op2) == INTEGER_CST)
1268 		index =
1269 		  build_int_cst (TREE_TYPE (op1),
1270 				 TREE_INT_CST_LOW (op2) /
1271 				 int_size_in_bytes (TREE_TYPE (op1)));
1272 	      else
1273 		{
1274 		  index =
1275 		    get_index_from_offset (op2, SSA_NAME_DEF_STMT (op2));
1276 		  if (index == NULL_TREE)
1277 		    {
1278 		      mark_min_matrix_escape_level (mi,
1279 						    current_indirect_level,
1280 						    use_stmt);
1281 		      return current_indirect_level;
1282 		    }
1283 		}
1284 	      if (record_accesses)
1285 		record_access_alloc_site_info (mi, use_stmt, op2,
1286 					       index,
1287 					       current_indirect_level, false);
1288 	    }
1289 	}
1290       /* If we are storing this level of indirection mark it as
1291          escaping.  */
1292       if (lhs_acc.t_code == MEM_REF || TREE_CODE (lhs) != SSA_NAME)
1293 	{
1294 	  int l = current_indirect_level;
1295 
1296 	  /* One exception is when we are storing to the matrix
1297 	     variable itself; this is the case of malloc, we must make
1298 	     sure that it's the one and only one call to malloc so
1299 	     we call analyze_matrix_allocation_site to check
1300 	     this out.  */
1301 	  if (TREE_CODE (lhs) != VAR_DECL || lhs != mi->decl)
1302 	    mark_min_matrix_escape_level (mi, current_indirect_level,
1303 					  use_stmt);
1304 	  else
1305 	    {
1306 	      /* Also update the escaping level.  */
1307 	      analyze_matrix_allocation_site (mi, use_stmt, l, visited);
1308 	      if (record_accesses)
1309 		record_access_alloc_site_info (mi, use_stmt, NULL_TREE,
1310 					       NULL_TREE, l, true);
1311 	    }
1312 	}
1313       else
1314 	{
1315 	  /* We are placing it in an SSA, follow that SSA.  */
1316 	  analyze_matrix_accesses (mi, lhs,
1317 				   current_indirect_level,
1318 				   rhs_acc.t_code == POINTER_PLUS_EXPR,
1319 				   visited, record_accesses);
1320 	}
1321     }
1322   return current_indirect_level;
1323 }
1324 
1325 /* Given a SSA_VAR (coming from a use statement of the matrix MI),
1326    follow its uses and level of indirection and find out the minimum
1327    indirection level it escapes in (the highest dimension) and the maximum
1328    level it is accessed in (this will be the actual dimension of the
1329    matrix).  The information is accumulated in MI.
1330    We look at the immediate uses, if one escapes we finish; if not,
1331    we make a recursive call for each one of the immediate uses of the
1332    resulting SSA name.  */
1333 static void
1334 analyze_matrix_accesses (struct matrix_info *mi, tree ssa_var,
1335 			 int current_indirect_level, bool last_op,
1336 			 sbitmap visited, bool record_accesses)
1337 {
1338   imm_use_iterator imm_iter;
1339   use_operand_p use_p;
1340 
1341   update_type_size (mi, SSA_NAME_DEF_STMT (ssa_var), ssa_var,
1342 		    current_indirect_level);
1343 
1344   /* We don't go beyond the escaping level when we are performing the
1345      flattening.  NOTE: we keep the last indirection level that doesn't
1346      escape.  */
1347   if (mi->min_indirect_level_escape > -1
1348       && mi->min_indirect_level_escape <= current_indirect_level)
1349     return;
1350 
1351 /* Now go over the uses of the SSA_NAME and check how it is used in
1352    each one of them.  We are mainly looking for the pattern MEM_REF,
1353    then a POINTER_PLUS_EXPR, then MEM_REF etc.  while in between there could
1354    be any number of copies and casts.  */
1355   gcc_assert (TREE_CODE (ssa_var) == SSA_NAME);
1356 
1357   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_var)
1358   {
1359     gimple use_stmt = USE_STMT (use_p);
1360     if (gimple_code (use_stmt) == GIMPLE_PHI)
1361       /* We check all the escaping levels that get to the PHI node
1362          and make sure they are all the same escaping;
1363          if not (which is rare) we let the escaping level be the
1364          minimum level that gets into that PHI because starting from
1365          that level we cannot expect the behavior of the indirections.  */
1366 
1367       analyze_accesses_for_phi_node (mi, use_stmt, current_indirect_level,
1368 				     visited, record_accesses);
1369 
1370     else if (is_gimple_call (use_stmt))
1371       analyze_accesses_for_call_stmt (mi, ssa_var, use_stmt,
1372 				      current_indirect_level);
1373     else if (is_gimple_assign (use_stmt))
1374       current_indirect_level =
1375 	analyze_accesses_for_assign_stmt (mi, ssa_var, use_stmt,
1376 					  current_indirect_level, last_op,
1377 					  visited, record_accesses);
1378   }
1379 }
1380 
1381 typedef struct
1382 {
1383   tree fn;
1384   gimple stmt;
1385 } check_var_data;
1386 
1387 /* A walk_tree function to go over the VAR_DECL, PARM_DECL nodes of
1388    the malloc size expression and check that those aren't changed
1389    over the function.  */
1390 static tree
1391 check_var_notmodified_p (tree * tp, int *walk_subtrees, void *data)
1392 {
1393   basic_block bb;
1394   tree t = *tp;
1395   check_var_data *callback_data = (check_var_data*) data;
1396   tree fn = callback_data->fn;
1397   gimple_stmt_iterator gsi;
1398   gimple stmt;
1399 
1400   if (TREE_CODE (t) != VAR_DECL && TREE_CODE (t) != PARM_DECL)
1401     return NULL_TREE;
1402 
1403   FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (fn))
1404   {
1405     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1406       {
1407 	stmt = gsi_stmt (gsi);
1408 	if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1409 	  continue;
1410 	if (gimple_get_lhs (stmt) == t)
1411 	  {
1412 	    callback_data->stmt = stmt;
1413 	    return t;
1414 	  }
1415       }
1416   }
1417   *walk_subtrees = 1;
1418   return NULL_TREE;
1419 }
1420 
1421 /* Go backwards in the use-def chains and find out the expression
1422    represented by the possible SSA name in STMT, until it is composed
1423    of only VAR_DECL, PARM_DECL and INT_CST.  In case of phi nodes
1424    we make sure that all the arguments represent the same subexpression,
1425    otherwise we fail.  */
1426 
1427 static tree
1428 can_calculate_stmt_before_stmt (gimple stmt, sbitmap visited)
1429 {
1430   tree op1, op2, res;
1431   enum tree_code code;
1432 
1433   switch (gimple_code (stmt))
1434     {
1435     case GIMPLE_ASSIGN:
1436       code = gimple_assign_rhs_code (stmt);
1437       op1 = gimple_assign_rhs1 (stmt);
1438 
1439       switch (code)
1440 	{
1441 	case POINTER_PLUS_EXPR:
1442 	case PLUS_EXPR:
1443 	case MINUS_EXPR:
1444 	case MULT_EXPR:
1445 
1446 	  op2 = gimple_assign_rhs2 (stmt);
1447 	  op1 = can_calculate_expr_before_stmt (op1, visited);
1448 	  if (!op1)
1449 	    return NULL_TREE;
1450 	  op2 = can_calculate_expr_before_stmt (op2, visited);
1451 	  if (op2)
1452 	    return fold_build2 (code, gimple_expr_type (stmt), op1, op2);
1453 	  return NULL_TREE;
1454 
1455 	CASE_CONVERT:
1456 	  res = can_calculate_expr_before_stmt (op1, visited);
1457 	  if (res != NULL_TREE)
1458 	    return build1 (code, gimple_expr_type (stmt), res);
1459 	  else
1460 	    return NULL_TREE;
1461 
1462 	default:
1463 	  if (gimple_assign_single_p (stmt))
1464 	    return can_calculate_expr_before_stmt (op1, visited);
1465 	  else
1466 	    return NULL_TREE;
1467 	}
1468 
1469     case GIMPLE_PHI:
1470       {
1471 	size_t j;
1472 
1473 	res = NULL_TREE;
1474 	/* Make sure all the arguments represent the same value.  */
1475 	for (j = 0; j < gimple_phi_num_args (stmt); j++)
1476 	  {
1477 	    tree new_res;
1478 	    tree def = PHI_ARG_DEF (stmt, j);
1479 
1480 	    new_res = can_calculate_expr_before_stmt (def, visited);
1481 	    if (res == NULL_TREE)
1482 	      res = new_res;
1483 	    else if (!new_res || !expressions_equal_p (res, new_res))
1484 	      return NULL_TREE;
1485 	  }
1486 	return res;
1487       }
1488 
1489     default:
1490       return NULL_TREE;
1491     }
1492 }
1493 
1494 /* Go backwards in the use-def chains and find out the expression
1495    represented by the possible SSA name in EXPR, until it is composed
1496    of only VAR_DECL, PARM_DECL and INT_CST.  In case of phi nodes
1497    we make sure that all the arguments represent the same subexpression,
1498    otherwise we fail.  */
1499 static tree
1500 can_calculate_expr_before_stmt (tree expr, sbitmap visited)
1501 {
1502   gimple def_stmt;
1503   tree res;
1504 
1505   switch (TREE_CODE (expr))
1506     {
1507     case SSA_NAME:
1508       /* Case of loop, we don't know to represent this expression.  */
1509       if (TEST_BIT (visited, SSA_NAME_VERSION (expr)))
1510 	return NULL_TREE;
1511 
1512       SET_BIT (visited, SSA_NAME_VERSION (expr));
1513       def_stmt = SSA_NAME_DEF_STMT (expr);
1514       res = can_calculate_stmt_before_stmt (def_stmt, visited);
1515       RESET_BIT (visited, SSA_NAME_VERSION (expr));
1516       return res;
1517     case VAR_DECL:
1518     case PARM_DECL:
1519     case INTEGER_CST:
1520       return expr;
1521 
1522     default:
1523       return NULL_TREE;
1524     }
1525 }
1526 
1527 /* There should be only one allocation function for the dimensions
1528    that don't escape. Here we check the allocation sites in this
1529    function. We must make sure that all the dimensions are allocated
1530    using malloc and that the malloc size parameter expression could be
1531    pre-calculated before the call to the malloc of dimension 0.
1532 
1533    Given a candidate matrix for flattening -- MI -- check if it's
1534    appropriate for flattening -- we analyze the allocation
1535    sites that we recorded in the previous analysis.  The result of the
1536    analysis is a level of indirection (matrix dimension) in which the
1537    flattening is safe.  We check the following conditions:
1538    1. There is only one allocation site for each dimension.
1539    2. The allocation sites of all the dimensions are in the same
1540       function.
1541       (The above two are being taken care of during the analysis when
1542       we check the allocation site).
1543    3. All the dimensions that we flatten are allocated at once; thus
1544       the total size must be known before the allocation of the
1545       dimension 0 (top level) -- we must make sure we represent the
1546       size of the allocation as an expression of global parameters or
1547       constants and that those doesn't change over the function.  */
1548 
1549 static int
1550 check_allocation_function (void **slot, void *data ATTRIBUTE_UNUSED)
1551 {
1552   int level;
1553   struct matrix_info *mi = (struct matrix_info *) *slot;
1554   sbitmap visited;
1555 
1556   if (!mi->malloc_for_level)
1557     return 1;
1558 
1559   visited = sbitmap_alloc (num_ssa_names);
1560 
1561   /* Do nothing if the current function is not the allocation
1562      function of MI.  */
1563   if (mi->allocation_function_decl != current_function_decl
1564       /* We aren't in the main allocation function yet.  */
1565       || !mi->malloc_for_level[0])
1566     return 1;
1567 
1568   for (level = 1; level < mi->max_malloced_level; level++)
1569     if (!mi->malloc_for_level[level])
1570       break;
1571 
1572   mark_min_matrix_escape_level (mi, level, NULL);
1573 
1574   /* Check if the expression of the size passed to malloc could be
1575      pre-calculated before the malloc of level 0.  */
1576   for (level = 1; level < mi->min_indirect_level_escape; level++)
1577     {
1578       gimple call_stmt;
1579       tree size;
1580       struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
1581 
1582       call_stmt = mi->malloc_for_level[level];
1583 
1584       /* Find the correct malloc information.  */
1585       collect_data_for_malloc_call (call_stmt, &mcd);
1586 
1587       /* No need to check anticipation for constants.  */
1588       if (TREE_CODE (mcd.size_var) == INTEGER_CST)
1589 	{
1590 	  if (!mi->dimension_size)
1591 	    {
1592 	      mi->dimension_size =
1593 		(tree *) xcalloc (mi->min_indirect_level_escape,
1594 				  sizeof (tree));
1595 	      mi->dimension_size_orig =
1596 		(tree *) xcalloc (mi->min_indirect_level_escape,
1597 				  sizeof (tree));
1598 	    }
1599 	  mi->dimension_size[level] = mcd.size_var;
1600 	  mi->dimension_size_orig[level] = mcd.size_var;
1601 	  continue;
1602 	}
1603       /* ??? Here we should also add the way to calculate the size
1604          expression not only know that it is anticipated.  */
1605       sbitmap_zero (visited);
1606       size = can_calculate_expr_before_stmt (mcd.size_var, visited);
1607       if (size == NULL_TREE)
1608 	{
1609 	  mark_min_matrix_escape_level (mi, level, call_stmt);
1610 	  if (dump_file)
1611 	    fprintf (dump_file,
1612 		     "Matrix %s: Cannot calculate the size of allocation, escaping at level %d\n",
1613 		     get_name (mi->decl), level);
1614 	  break;
1615 	}
1616       if (!mi->dimension_size)
1617 	{
1618 	  mi->dimension_size =
1619 	    (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1620 	  mi->dimension_size_orig =
1621 	    (tree *) xcalloc (mi->min_indirect_level_escape, sizeof (tree));
1622 	}
1623       mi->dimension_size[level] = size;
1624       mi->dimension_size_orig[level] = size;
1625     }
1626 
1627   /* We don't need those anymore.  */
1628   for (level = mi->min_indirect_level_escape;
1629        level < mi->max_malloced_level; level++)
1630     mi->malloc_for_level[level] = NULL;
1631   return 1;
1632 }
1633 
1634 /* Track all access and allocation sites.  */
1635 static void
1636 find_sites_in_func (bool record)
1637 {
1638   sbitmap visited_stmts_1;
1639 
1640   gimple_stmt_iterator gsi;
1641   gimple stmt;
1642   basic_block bb;
1643   struct matrix_info tmpmi, *mi;
1644 
1645   visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1646 
1647   FOR_EACH_BB (bb)
1648   {
1649     for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1650       {
1651 	tree lhs;
1652 
1653 	stmt = gsi_stmt (gsi);
1654 	lhs = gimple_get_lhs (stmt);
1655 	if (lhs != NULL_TREE
1656 	    && TREE_CODE (lhs) == VAR_DECL)
1657 	  {
1658 	    tmpmi.decl = lhs;
1659 	    if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1660 							&tmpmi)))
1661 	      {
1662 		sbitmap_zero (visited_stmts_1);
1663 		analyze_matrix_allocation_site (mi, stmt, 0, visited_stmts_1);
1664 	      }
1665 	  }
1666 	if (is_gimple_assign (stmt)
1667 	    && gimple_assign_single_p (stmt)
1668 	    && TREE_CODE (lhs) == SSA_NAME
1669 	    && TREE_CODE (gimple_assign_rhs1 (stmt)) == VAR_DECL)
1670 	  {
1671 	    tmpmi.decl = gimple_assign_rhs1 (stmt);
1672 	    if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg,
1673 							&tmpmi)))
1674 	      {
1675 		sbitmap_zero (visited_stmts_1);
1676 		analyze_matrix_accesses (mi, lhs, 0,
1677 					 false, visited_stmts_1, record);
1678 	      }
1679 	  }
1680       }
1681   }
1682   sbitmap_free (visited_stmts_1);
1683 }
1684 
1685 /* Traverse the use-def chains to see if there are matrices that
1686    are passed through pointers and we cannot know how they are accessed.
1687    For each SSA-name defined by a global variable of our interest,
1688    we traverse the use-def chains of the SSA and follow the indirections,
1689    and record in what level of indirection the use of the variable
1690    escapes.  A use of a pointer escapes when it is passed to a function,
1691    stored into memory or assigned (except in malloc and free calls).  */
1692 
1693 static void
1694 record_all_accesses_in_func (void)
1695 {
1696   unsigned i;
1697   sbitmap visited_stmts_1;
1698 
1699   visited_stmts_1 = sbitmap_alloc (num_ssa_names);
1700 
1701   for (i = 0; i < num_ssa_names; i++)
1702     {
1703       struct matrix_info tmpmi, *mi;
1704       tree ssa_var = ssa_name (i);
1705       tree rhs, lhs;
1706 
1707       if (!ssa_var
1708 	  || !is_gimple_assign (SSA_NAME_DEF_STMT (ssa_var))
1709 	  || !gimple_assign_single_p (SSA_NAME_DEF_STMT (ssa_var)))
1710 	continue;
1711       rhs = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (ssa_var));
1712       lhs = gimple_assign_lhs (SSA_NAME_DEF_STMT (ssa_var));
1713       if (TREE_CODE (rhs) != VAR_DECL && TREE_CODE (lhs) != VAR_DECL)
1714 	continue;
1715 
1716       /* If the RHS is a matrix that we want to analyze, follow the def-use
1717          chain for this SSA_VAR and check for escapes or apply the
1718          flattening.  */
1719       tmpmi.decl = rhs;
1720       if ((mi = (struct matrix_info *) htab_find (matrices_to_reorg, &tmpmi)))
1721 	{
1722 	  /* This variable will track the visited PHI nodes, so we can limit
1723 	     its size to the maximum number of SSA names.  */
1724 	  sbitmap_zero (visited_stmts_1);
1725 	  analyze_matrix_accesses (mi, ssa_var,
1726 				   0, false, visited_stmts_1, true);
1727 
1728 	}
1729     }
1730   sbitmap_free (visited_stmts_1);
1731 }
1732 
1733 /* Used when we want to convert the expression: RESULT = something *
1734    ORIG to RESULT = something * NEW_VAL. If ORIG and NEW_VAL are power
1735    of 2, shift operations can be done, else division and
1736    multiplication.  */
1737 
1738 static tree
1739 compute_offset (HOST_WIDE_INT orig, HOST_WIDE_INT new_val, tree result)
1740 {
1741 
1742   int x, y;
1743   tree result1, ratio, log, orig_tree, new_tree;
1744 
1745   x = exact_log2 (orig);
1746   y = exact_log2 (new_val);
1747 
1748   if (x != -1 && y != -1)
1749     {
1750       if (x == y)
1751         return result;
1752       else if (x > y)
1753         {
1754           log = build_int_cst (TREE_TYPE (result), x - y);
1755           result1 =
1756             fold_build2 (LSHIFT_EXPR, TREE_TYPE (result), result, log);
1757           return result1;
1758         }
1759       log = build_int_cst (TREE_TYPE (result), y - x);
1760       result1 = fold_build2 (RSHIFT_EXPR, TREE_TYPE (result), result, log);
1761 
1762       return result1;
1763     }
1764   orig_tree = build_int_cst (TREE_TYPE (result), orig);
1765   new_tree = build_int_cst (TREE_TYPE (result), new_val);
1766   ratio = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (result), result, orig_tree);
1767   result1 = fold_build2 (MULT_EXPR, TREE_TYPE (result), ratio, new_tree);
1768 
1769   return result1;
1770 }
1771 
1772 
1773 /* We know that we are allowed to perform matrix flattening (according to the
1774    escape analysis), so we traverse the use-def chains of the SSA vars
1775    defined by the global variables pointing to the matrices of our interest.
1776    in each use of the SSA we calculate the offset from the base address
1777    according to the following equation:
1778 
1779      a[I1][I2]...[Ik] , where D1..Dk is the length of each dimension and the
1780      escaping level is m <= k, and a' is the new allocated matrix,
1781      will be translated to :
1782 
1783        b[I(m+1)]...[Ik]
1784 
1785        where
1786        b = a' + I1*D2...*Dm + I2*D3...Dm + ... + Im
1787                                                       */
1788 
1789 static int
1790 transform_access_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1791 {
1792   gimple_stmt_iterator gsi;
1793   struct matrix_info *mi = (struct matrix_info *) *slot;
1794   int min_escape_l = mi->min_indirect_level_escape;
1795   struct access_site_info *acc_info;
1796   enum tree_code code;
1797   int i;
1798 
1799   if (min_escape_l < 2 || !mi->access_l)
1800     return 1;
1801   for (i = 0; VEC_iterate (access_site_info_p, mi->access_l, i, acc_info);
1802        i++)
1803     {
1804       /* This is possible because we collect the access sites before
1805          we determine the final minimum indirection level.  */
1806       if (acc_info->level >= min_escape_l)
1807 	{
1808 	  free (acc_info);
1809 	  continue;
1810 	}
1811       if (acc_info->is_alloc)
1812 	{
1813 	  if (acc_info->level >= 0 && gimple_bb (acc_info->stmt))
1814 	    {
1815 	      ssa_op_iter iter;
1816 	      tree def;
1817 	      gimple stmt = acc_info->stmt;
1818 	      tree lhs;
1819 
1820 	      FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
1821 		mark_sym_for_renaming (SSA_NAME_VAR (def));
1822 	      gsi = gsi_for_stmt (stmt);
1823 	      gcc_assert (is_gimple_assign (acc_info->stmt));
1824 	      lhs = gimple_assign_lhs (acc_info->stmt);
1825 	      if (TREE_CODE (lhs) == SSA_NAME
1826 		  && acc_info->level < min_escape_l - 1)
1827 		{
1828 		  imm_use_iterator imm_iter;
1829 		  use_operand_p use_p;
1830 		  gimple use_stmt;
1831 
1832 		  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
1833 		    FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1834 		  {
1835 		    tree rhs, tmp;
1836 		    gimple new_stmt;
1837 
1838 		    gcc_assert (gimple_assign_rhs_code (acc_info->stmt)
1839 				== MEM_REF);
1840 		    /* Emit convert statement to convert to type of use.  */
1841 		    tmp = create_tmp_var (TREE_TYPE (lhs), "new");
1842 		    add_referenced_var (tmp);
1843 		    rhs = gimple_assign_rhs1 (acc_info->stmt);
1844 		    rhs = fold_convert (TREE_TYPE (tmp),
1845 					TREE_OPERAND (rhs, 0));
1846 		    new_stmt = gimple_build_assign (tmp, rhs);
1847 		    tmp = make_ssa_name (tmp, new_stmt);
1848 		    gimple_assign_set_lhs (new_stmt, tmp);
1849 		    gsi = gsi_for_stmt (acc_info->stmt);
1850 		    gsi_insert_after (&gsi, new_stmt, GSI_SAME_STMT);
1851 		    SET_USE (use_p, tmp);
1852 		  }
1853 		}
1854 	      if (acc_info->level < min_escape_l - 1)
1855 		gsi_remove (&gsi, true);
1856 	    }
1857 	  free (acc_info);
1858 	  continue;
1859 	}
1860       code = gimple_assign_rhs_code (acc_info->stmt);
1861       if (code == MEM_REF
1862 	  && acc_info->level < min_escape_l - 1)
1863 	{
1864 	  /* Replace the MEM_REF with NOP (cast) usually we are casting
1865 	     from "pointer to type" to "type".  */
1866 	  tree t =
1867 	    build1 (NOP_EXPR, TREE_TYPE (gimple_assign_rhs1 (acc_info->stmt)),
1868 		    TREE_OPERAND (gimple_assign_rhs1 (acc_info->stmt), 0));
1869 	  gimple_assign_set_rhs_code (acc_info->stmt, NOP_EXPR);
1870 	  gimple_assign_set_rhs1 (acc_info->stmt, t);
1871 	}
1872       else if (code == POINTER_PLUS_EXPR
1873 	       && acc_info->level < (min_escape_l))
1874 	{
1875 	  imm_use_iterator imm_iter;
1876 	  use_operand_p use_p;
1877 
1878 	  tree offset;
1879 	  int k = acc_info->level;
1880 	  tree num_elements, total_elements;
1881 	  tree tmp1;
1882 	  tree d_size = mi->dimension_size[k];
1883 
1884 	  /* We already make sure in the analysis that the first operand
1885 	     is the base and the second is the offset.  */
1886 	  offset = acc_info->offset;
1887 	  if (mi->dim_map[k] == min_escape_l - 1)
1888 	    {
1889 	      if (!check_transpose_p || mi->is_transposed_p == false)
1890 		tmp1 = offset;
1891 	      else
1892 		{
1893 		  tree new_offset;
1894 
1895 		  new_offset =
1896 		    compute_offset (mi->dimension_type_size[min_escape_l],
1897 				    mi->dimension_type_size[k + 1], offset);
1898 
1899 		  total_elements = new_offset;
1900 		  if (new_offset != offset)
1901 		    {
1902 		      gsi = gsi_for_stmt (acc_info->stmt);
1903 		      tmp1 = force_gimple_operand_gsi (&gsi, total_elements,
1904 						       true, NULL,
1905 						       true, GSI_SAME_STMT);
1906 		    }
1907 		  else
1908 		    tmp1 = offset;
1909 		}
1910 	    }
1911 	  else
1912 	    {
1913 	      d_size = mi->dimension_size[mi->dim_map[k] + 1];
1914 	      num_elements =
1915 		fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, acc_info->index),
1916 			    fold_convert (sizetype, d_size));
1917 	      add_referenced_var (d_size);
1918 	      gsi = gsi_for_stmt (acc_info->stmt);
1919 	      tmp1 = force_gimple_operand_gsi (&gsi, num_elements, true,
1920 					       NULL, true, GSI_SAME_STMT);
1921 	    }
1922 	  /* Replace the offset if needed.  */
1923 	  if (tmp1 != offset)
1924 	    {
1925 	      if (TREE_CODE (offset) == SSA_NAME)
1926 		{
1927 		  gimple use_stmt;
1928 
1929 		  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, offset)
1930 		    FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1931 		      if (use_stmt == acc_info->stmt)
1932 		        SET_USE (use_p, tmp1);
1933 		}
1934 	      else
1935 		{
1936 		  gcc_assert (TREE_CODE (offset) == INTEGER_CST);
1937 		  gimple_assign_set_rhs2 (acc_info->stmt, tmp1);
1938 		  update_stmt (acc_info->stmt);
1939 		}
1940 	    }
1941 	}
1942       /* ??? meanwhile this happens because we record the same access
1943          site more than once; we should be using a hash table to
1944          avoid this and insert the STMT of the access site only
1945          once.
1946          else
1947          gcc_unreachable (); */
1948       free (acc_info);
1949     }
1950   VEC_free (access_site_info_p, heap, mi->access_l);
1951 
1952   update_ssa (TODO_update_ssa);
1953 #ifdef ENABLE_CHECKING
1954   verify_ssa (true);
1955 #endif
1956   return 1;
1957 }
1958 
1959 /* Sort A array of counts. Arrange DIM_MAP to reflect the new order.  */
1960 
1961 static void
1962 sort_dim_hot_level (gcov_type * a, int *dim_map, int n)
1963 {
1964   int i, j, tmp1;
1965   gcov_type tmp;
1966 
1967   for (i = 0; i < n - 1; i++)
1968     {
1969       for (j = 0; j < n - 1 - i; j++)
1970 	{
1971 	  if (a[j + 1] < a[j])
1972 	    {
1973 	      tmp = a[j];	/* swap a[j] and a[j+1]      */
1974 	      a[j] = a[j + 1];
1975 	      a[j + 1] = tmp;
1976 	      tmp1 = dim_map[j];
1977 	      dim_map[j] = dim_map[j + 1];
1978 	      dim_map[j + 1] = tmp1;
1979 	    }
1980 	}
1981     }
1982 }
1983 
1984 /* Replace multiple mallocs (one for each dimension) to one malloc
1985    with the size of DIM1*DIM2*...*DIMN*size_of_element
1986    Make sure that we hold the size in the malloc site inside a
1987    new global variable; this way we ensure that the size doesn't
1988    change and it is accessible from all the other functions that
1989    uses the matrix.  Also, the original calls to free are deleted,
1990    and replaced by a new call to free the flattened matrix.  */
1991 
1992 static int
1993 transform_allocation_sites (void **slot, void *data ATTRIBUTE_UNUSED)
1994 {
1995   int i;
1996   struct matrix_info *mi;
1997   tree type, oldfn, prev_dim_size;
1998   gimple call_stmt_0, use_stmt;
1999   struct cgraph_node *c_node;
2000   struct cgraph_edge *e;
2001   gimple_stmt_iterator gsi;
2002   struct malloc_call_data mcd = {NULL, NULL_TREE, NULL_TREE};
2003   HOST_WIDE_INT element_size;
2004 
2005   imm_use_iterator imm_iter;
2006   use_operand_p use_p;
2007   tree old_size_0, tmp;
2008   int min_escape_l;
2009   int id;
2010 
2011   mi = (struct matrix_info *) *slot;
2012 
2013   min_escape_l = mi->min_indirect_level_escape;
2014 
2015   if (!mi->malloc_for_level)
2016     mi->min_indirect_level_escape = 0;
2017 
2018   if (mi->min_indirect_level_escape < 2)
2019     return 1;
2020 
2021   mi->dim_map = (int *) xcalloc (mi->min_indirect_level_escape, sizeof (int));
2022   for (i = 0; i < mi->min_indirect_level_escape; i++)
2023     mi->dim_map[i] = i;
2024   if (check_transpose_p)
2025     {
2026       int i;
2027 
2028       if (dump_file)
2029 	{
2030 	  fprintf (dump_file, "Matrix %s:\n", get_name (mi->decl));
2031 	  for (i = 0; i < min_escape_l; i++)
2032 	    {
2033 	      fprintf (dump_file, "dim %d before sort ", i);
2034 	      if (mi->dim_hot_level)
2035 		fprintf (dump_file,
2036 			 "count is  " HOST_WIDEST_INT_PRINT_DEC "  \n",
2037 			 mi->dim_hot_level[i]);
2038 	    }
2039 	}
2040       sort_dim_hot_level (mi->dim_hot_level, mi->dim_map,
2041 			  mi->min_indirect_level_escape);
2042       if (dump_file)
2043 	for (i = 0; i < min_escape_l; i++)
2044 	  {
2045 	    fprintf (dump_file, "dim %d after sort\n", i);
2046 	    if (mi->dim_hot_level)
2047 	      fprintf (dump_file, "count is  " HOST_WIDE_INT_PRINT_DEC
2048 		       "  \n", (HOST_WIDE_INT) mi->dim_hot_level[i]);
2049 	  }
2050       for (i = 0; i < mi->min_indirect_level_escape; i++)
2051 	{
2052 	  if (dump_file)
2053 	    fprintf (dump_file, "dim_map[%d] after sort %d\n", i,
2054 		     mi->dim_map[i]);
2055 	  if (mi->dim_map[i] != i)
2056 	    {
2057 	      if (dump_file)
2058 		fprintf (dump_file,
2059 			 "Transposed dimensions: dim %d is now dim %d\n",
2060 			 mi->dim_map[i], i);
2061 	      mi->is_transposed_p = true;
2062 	    }
2063 	}
2064     }
2065   else
2066     {
2067       for (i = 0; i < mi->min_indirect_level_escape; i++)
2068 	mi->dim_map[i] = i;
2069     }
2070   /* Call statement of allocation site of level 0.  */
2071   call_stmt_0 = mi->malloc_for_level[0];
2072 
2073   /* Finds the correct malloc information.  */
2074   collect_data_for_malloc_call (call_stmt_0, &mcd);
2075 
2076   mi->dimension_size[0] = mcd.size_var;
2077   mi->dimension_size_orig[0] = mcd.size_var;
2078   /* Make sure that the variables in the size expression for
2079      all the dimensions (above level 0) aren't modified in
2080      the allocation function.  */
2081   for (i = 1; i < mi->min_indirect_level_escape; i++)
2082     {
2083       tree t;
2084       check_var_data data;
2085 
2086       /* mi->dimension_size must contain the expression of the size calculated
2087          in check_allocation_function.  */
2088       gcc_assert (mi->dimension_size[i]);
2089 
2090       data.fn = mi->allocation_function_decl;
2091       data.stmt = NULL;
2092       t = walk_tree_without_duplicates (&(mi->dimension_size[i]),
2093 					check_var_notmodified_p,
2094 					&data);
2095       if (t != NULL_TREE)
2096 	{
2097 	  mark_min_matrix_escape_level (mi, i, data.stmt);
2098 	  break;
2099 	}
2100     }
2101 
2102   if (mi->min_indirect_level_escape < 2)
2103     return 1;
2104 
2105   /* Since we should make sure that the size expression is available
2106      before the call to malloc of level 0.  */
2107   gsi = gsi_for_stmt (call_stmt_0);
2108 
2109   /* Find out the size of each dimension by looking at the malloc
2110      sites and create a global variable to hold it.
2111      We add the assignment to the global before the malloc of level 0.  */
2112 
2113   /* To be able to produce gimple temporaries.  */
2114   oldfn = current_function_decl;
2115   current_function_decl = mi->allocation_function_decl;
2116   push_cfun (DECL_STRUCT_FUNCTION (mi->allocation_function_decl));
2117 
2118   /* Set the dimension sizes as follows:
2119      DIM_SIZE[i] = DIM_SIZE[n] * ... * DIM_SIZE[i]
2120      where n is the maximum non escaping level.  */
2121   element_size = mi->dimension_type_size[mi->min_indirect_level_escape];
2122   prev_dim_size = NULL_TREE;
2123 
2124   for (i = mi->min_indirect_level_escape - 1; i >= 0; i--)
2125     {
2126       tree dim_size, dim_var;
2127       gimple stmt;
2128       tree d_type_size;
2129 
2130       /* Now put the size expression in a global variable and initialize it to
2131          the size expression before the malloc of level 0.  */
2132       dim_var =
2133 	add_new_static_var (TREE_TYPE
2134 			    (mi->dimension_size_orig[mi->dim_map[i]]));
2135       type = TREE_TYPE (mi->dimension_size_orig[mi->dim_map[i]]);
2136 
2137       /* DIM_SIZE = MALLOC_SIZE_PARAM / TYPE_SIZE.  */
2138       /* Find which dim ID becomes dim I.  */
2139       for (id = 0; id < mi->min_indirect_level_escape; id++)
2140 	if (mi->dim_map[id] == i)
2141 	  break;
2142        d_type_size =
2143         build_int_cst (type, mi->dimension_type_size[id + 1]);
2144       if (!prev_dim_size)
2145 	prev_dim_size = build_int_cst (type, element_size);
2146       if (!check_transpose_p && i == mi->min_indirect_level_escape - 1)
2147 	{
2148 	  dim_size = mi->dimension_size_orig[id];
2149 	}
2150       else
2151 	{
2152 	  dim_size =
2153 	    fold_build2 (TRUNC_DIV_EXPR, type, mi->dimension_size_orig[id],
2154 			 d_type_size);
2155 
2156 	  dim_size = fold_build2 (MULT_EXPR, type, dim_size, prev_dim_size);
2157 	}
2158       dim_size = force_gimple_operand_gsi (&gsi, dim_size, true, NULL,
2159 					   true, GSI_SAME_STMT);
2160       /* GLOBAL_HOLDING_THE_SIZE = DIM_SIZE.  */
2161       stmt = gimple_build_assign (dim_var, dim_size);
2162       mark_symbols_for_renaming (stmt);
2163       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2164 
2165       prev_dim_size = mi->dimension_size[i] = dim_var;
2166     }
2167   update_ssa (TODO_update_ssa);
2168   /* Replace the malloc size argument in the malloc of level 0 to be
2169      the size of all the dimensions.  */
2170   c_node = cgraph_get_node (mi->allocation_function_decl);
2171   gcc_checking_assert (c_node);
2172   old_size_0 = gimple_call_arg (call_stmt_0, 0);
2173   tmp = force_gimple_operand_gsi (&gsi, mi->dimension_size[0], true,
2174 				  NULL, true, GSI_SAME_STMT);
2175   if (TREE_CODE (old_size_0) == SSA_NAME)
2176     {
2177       FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, old_size_0)
2178 	FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2179 	if (use_stmt == call_stmt_0)
2180 	SET_USE (use_p, tmp);
2181     }
2182   /* When deleting the calls to malloc we need also to remove the edge from
2183      the call graph to keep it consistent.  Notice that cgraph_edge may
2184      create a new node in the call graph if there is no node for the given
2185      declaration; this shouldn't be the case but currently there is no way to
2186      check this outside of "cgraph.c".  */
2187   for (i = 1; i < mi->min_indirect_level_escape; i++)
2188     {
2189       gimple_stmt_iterator gsi;
2190 
2191       gimple call_stmt = mi->malloc_for_level[i];
2192       gcc_assert (is_gimple_call (call_stmt));
2193       e = cgraph_edge (c_node, call_stmt);
2194       gcc_assert (e);
2195       cgraph_remove_edge (e);
2196       gsi = gsi_for_stmt (call_stmt);
2197       /* Remove the call stmt.  */
2198       gsi_remove (&gsi, true);
2199       /* Remove the assignment of the allocated area.  */
2200       FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter,
2201 			     gimple_call_lhs (call_stmt))
2202       {
2203 	gsi = gsi_for_stmt (use_stmt);
2204 	gsi_remove (&gsi, true);
2205       }
2206     }
2207   update_ssa (TODO_update_ssa);
2208 #ifdef ENABLE_CHECKING
2209   verify_ssa (true);
2210 #endif
2211   /* Delete the calls to free.  */
2212   for (i = 1; i < mi->min_indirect_level_escape; i++)
2213     {
2214       gimple_stmt_iterator gsi;
2215 
2216       /* ??? wonder why this case is possible but we failed on it once.  */
2217       if (!mi->free_stmts[i].stmt)
2218 	continue;
2219 
2220       c_node = cgraph_get_node (mi->free_stmts[i].func);
2221       gcc_checking_assert (c_node);
2222       gcc_assert (is_gimple_call (mi->free_stmts[i].stmt));
2223       e = cgraph_edge (c_node, mi->free_stmts[i].stmt);
2224       gcc_assert (e);
2225       cgraph_remove_edge (e);
2226       current_function_decl = mi->free_stmts[i].func;
2227       set_cfun (DECL_STRUCT_FUNCTION (mi->free_stmts[i].func));
2228       gsi = gsi_for_stmt (mi->free_stmts[i].stmt);
2229       gsi_remove (&gsi, true);
2230     }
2231   /* Return to the previous situation.  */
2232   current_function_decl = oldfn;
2233   pop_cfun ();
2234   return 1;
2235 
2236 }
2237 
2238 
2239 /* Print out the results of the escape analysis.  */
2240 static int
2241 dump_matrix_reorg_analysis (void **slot, void *data ATTRIBUTE_UNUSED)
2242 {
2243   struct matrix_info *mi = (struct matrix_info *) *slot;
2244 
2245   if (!dump_file)
2246     return 1;
2247   fprintf (dump_file, "Matrix \"%s\"; Escaping Level: %d, Num Dims: %d,",
2248 	   get_name (mi->decl), mi->min_indirect_level_escape, mi->num_dims);
2249   fprintf (dump_file, " Malloc Dims: %d, ", mi->max_malloced_level);
2250   fprintf (dump_file, "\n");
2251   if (mi->min_indirect_level_escape >= 2)
2252     fprintf (dump_file, "Flattened %d dimensions \n",
2253 	     mi->min_indirect_level_escape);
2254   return 1;
2255 }
2256 
2257 /* Perform matrix flattening.  */
2258 
2259 static unsigned int
2260 matrix_reorg (void)
2261 {
2262   struct cgraph_node *node;
2263 
2264   if (profile_info)
2265     check_transpose_p = true;
2266   else
2267     check_transpose_p = false;
2268   /* If there are hand written vectors, we skip this optimization.  */
2269   for (node = cgraph_nodes; node; node = node->next)
2270     if (!may_flatten_matrices (node))
2271       return 0;
2272   matrices_to_reorg = htab_create (37, mtt_info_hash, mtt_info_eq, mat_free);
2273   /* Find and record all potential matrices in the program.  */
2274   find_matrices_decl ();
2275   /* Analyze the accesses of the matrices (escaping analysis).  */
2276   for (node = cgraph_nodes; node; node = node->next)
2277     if (node->analyzed)
2278       {
2279 	tree temp_fn;
2280 
2281 	temp_fn = current_function_decl;
2282 	current_function_decl = node->decl;
2283 	push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2284 	bitmap_obstack_initialize (NULL);
2285 	gimple_register_cfg_hooks ();
2286 
2287 	if (!gimple_in_ssa_p (cfun))
2288 	  {
2289 	    free_dominance_info (CDI_DOMINATORS);
2290 	    free_dominance_info (CDI_POST_DOMINATORS);
2291 	    pop_cfun ();
2292 	    current_function_decl = temp_fn;
2293 	    bitmap_obstack_release (NULL);
2294 
2295 	    return 0;
2296 	  }
2297 
2298 #ifdef ENABLE_CHECKING
2299 	verify_flow_info ();
2300 #endif
2301 
2302 	if (!matrices_to_reorg)
2303 	  {
2304 	    free_dominance_info (CDI_DOMINATORS);
2305 	    free_dominance_info (CDI_POST_DOMINATORS);
2306 	    pop_cfun ();
2307 	    current_function_decl = temp_fn;
2308 	    bitmap_obstack_release (NULL);
2309 
2310 	    return 0;
2311 	  }
2312 
2313 	/* Create htap for phi nodes.  */
2314 	htab_mat_acc_phi_nodes = htab_create (37, mat_acc_phi_hash,
2315 					      mat_acc_phi_eq, free);
2316 	if (!check_transpose_p)
2317 	  find_sites_in_func (false);
2318 	else
2319 	  {
2320 	    find_sites_in_func (true);
2321 	    loop_optimizer_init (LOOPS_NORMAL);
2322 	    if (current_loops)
2323 	      scev_initialize ();
2324 	    htab_traverse (matrices_to_reorg, analyze_transpose, NULL);
2325 	    if (current_loops)
2326 	      {
2327 		scev_finalize ();
2328 		loop_optimizer_finalize ();
2329 		current_loops = NULL;
2330 	      }
2331 	  }
2332 	/* If the current function is the allocation function for any of
2333 	   the matrices we check its allocation and the escaping level.  */
2334 	htab_traverse (matrices_to_reorg, check_allocation_function, NULL);
2335 	free_dominance_info (CDI_DOMINATORS);
2336 	free_dominance_info (CDI_POST_DOMINATORS);
2337 	pop_cfun ();
2338 	current_function_decl = temp_fn;
2339 	bitmap_obstack_release (NULL);
2340       }
2341   htab_traverse (matrices_to_reorg, transform_allocation_sites, NULL);
2342   /* Now transform the accesses.  */
2343   for (node = cgraph_nodes; node; node = node->next)
2344     if (node->analyzed)
2345       {
2346 	/* Remember that allocation sites have been handled.  */
2347 	tree temp_fn;
2348 
2349 	temp_fn = current_function_decl;
2350 	current_function_decl = node->decl;
2351 	push_cfun (DECL_STRUCT_FUNCTION (node->decl));
2352 	bitmap_obstack_initialize (NULL);
2353 	gimple_register_cfg_hooks ();
2354 	record_all_accesses_in_func ();
2355 	htab_traverse (matrices_to_reorg, transform_access_sites, NULL);
2356         cgraph_rebuild_references ();
2357 	free_dominance_info (CDI_DOMINATORS);
2358 	free_dominance_info (CDI_POST_DOMINATORS);
2359 	pop_cfun ();
2360 	current_function_decl = temp_fn;
2361 	bitmap_obstack_release (NULL);
2362       }
2363   htab_traverse (matrices_to_reorg, dump_matrix_reorg_analysis, NULL);
2364 
2365   current_function_decl = NULL;
2366   set_cfun (NULL);
2367   matrices_to_reorg = NULL;
2368   return 0;
2369 }
2370 
2371 
2372 /* The condition for matrix flattening to be performed.  */
2373 static bool
2374 gate_matrix_reorg (void)
2375 {
2376   return flag_ipa_matrix_reorg && flag_whole_program;
2377 }
2378 
2379 struct simple_ipa_opt_pass pass_ipa_matrix_reorg =
2380 {
2381  {
2382   SIMPLE_IPA_PASS,
2383   "matrix-reorg",		/* name */
2384   gate_matrix_reorg,		/* gate */
2385   matrix_reorg,			/* execute */
2386   NULL,				/* sub */
2387   NULL,				/* next */
2388   0,				/* static_pass_number */
2389   TV_NONE,			/* tv_id */
2390   0,				/* properties_required */
2391   0,				/* properties_provided */
2392   0,				/* properties_destroyed */
2393   0,				/* todo_flags_start */
2394   TODO_dump_cgraph      	/* todo_flags_finish */
2395  }
2396 };
2397