1 /*
2
3 Copyright (C) 2014, The University of Texas at Austin
4
5 This file is part of libflame and is available under the 3-Clause
6 BSD license, which can be found in the LICENSE file at the top-level
7 directory, or at http://opensource.org/licenses/BSD-3-Clause
8
9 */
10
11 #include "FLAME.h"
12
FLASH_LU_incpiv_create_hier_matrices(FLA_Obj A_flat,dim_t depth,dim_t * b_flash,dim_t b_alg,FLA_Obj * A,FLA_Obj * p,FLA_Obj * L)13 FLA_Error FLASH_LU_incpiv_create_hier_matrices( FLA_Obj A_flat, dim_t depth, dim_t* b_flash, dim_t b_alg, FLA_Obj* A, FLA_Obj* p, FLA_Obj* L )
14 {
15 FLA_Datatype datatype;
16 dim_t m, n;
17 dim_t one = 1;
18
19 // *** The current LU_incpiv algorithm implemented assumes that
20 // the matrix has a hierarchical depth of 1. We check for that here, because
21 // we anticipate that we'll use a more general algorithm in the future, and
22 // we don't want to forget to remove the constraint. ***
23 if ( depth != 1 )
24 {
25 FLA_Print_message( "FLASH_LU_incpiv() currently only supports matrices of depth 1",
26 __FILE__, __LINE__ );
27 FLA_Abort();
28 }
29
30 // Create hierarchical copy of matrix A_flat.
31 FLASH_Obj_create_hier_copy_of_flat( A_flat, depth, b_flash, A );
32
33 // Query the datatype of matrix A_flat.
34 datatype = FLA_Obj_datatype( A_flat );
35
36 // If the user passed in zero for b_alg, then we need to set the algorithmic
37 // (inner) blocksize to a reasonable default value.
38 if ( b_alg == 0 )
39 {
40 b_alg = FLASH_LU_incpiv_determine_alg_blocksize( *A );
41 }
42
43 // Query the element (not scalar) dimensions of the new hierarchical matrix.
44 // This is done so we can create p and L with full blocks for the bottom
45 // and right "edge cases" of A.
46 m = FLA_Obj_length( *A );
47 n = FLA_Obj_width ( *A );
48
49 // Create hierarchical matrices p and L.
50 FLASH_Obj_create_ext( FLA_INT, m * b_flash[0], n,
51 depth, b_flash, &one,
52 p );
53
54 FLASH_Obj_create_ext( datatype, m * b_flash[0], n * b_alg,
55 depth, b_flash, &b_alg,
56 L );
57
58 return FLA_SUCCESS;
59 }
60
61
FLASH_LU_incpiv_determine_alg_blocksize(FLA_Obj A)62 dim_t FLASH_LU_incpiv_determine_alg_blocksize( FLA_Obj A )
63 {
64 dim_t b_alg;
65 dim_t b_flash;
66
67 // Acquire the storage blocksize.
68 b_flash = FLA_Obj_length( *FLASH_OBJ_PTR_AT( A ) );
69
70 // Scale the storage blocksize by a pre-defined scalar to arrive at a
71 // reasonable algorithmic blocksize, but make sure it's at least 1.
72 b_alg = ( dim_t ) max( ( double ) b_flash * FLA_LU_INNER_TO_OUTER_B_RATIO, 1 );
73
74 return b_alg;
75 }
76