1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #include "FLAME.h"
12 
FLASH_LU_incpiv_create_hier_matrices(FLA_Obj A_flat,dim_t depth,dim_t * b_flash,dim_t b_alg,FLA_Obj * A,FLA_Obj * p,FLA_Obj * L)13 FLA_Error FLASH_LU_incpiv_create_hier_matrices( FLA_Obj A_flat, dim_t depth, dim_t* b_flash, dim_t b_alg, FLA_Obj* A, FLA_Obj* p, FLA_Obj* L )
14 {
15    FLA_Datatype datatype;
16    dim_t        m, n;
17    dim_t        one = 1;
18 
19    // *** The current LU_incpiv algorithm implemented assumes that
20    // the matrix has a hierarchical depth of 1. We check for that here, because
21    // we anticipate that we'll use a more general algorithm in the future, and
22    // we don't want to forget to remove the constraint. ***
23    if ( depth != 1 )
24    {
25       FLA_Print_message( "FLASH_LU_incpiv() currently only supports matrices of depth 1",
26                          __FILE__, __LINE__ );
27       FLA_Abort();
28    }
29 
30    // Create hierarchical copy of matrix A_flat.
31    FLASH_Obj_create_hier_copy_of_flat( A_flat, depth, b_flash, A );
32 
33    // Query the datatype of matrix A_flat.
34    datatype = FLA_Obj_datatype( A_flat );
35 
36    // If the user passed in zero for b_alg, then we need to set the algorithmic
37    // (inner) blocksize to a reasonable default value.
38    if ( b_alg == 0 )
39    {
40       b_alg = FLASH_LU_incpiv_determine_alg_blocksize( *A );
41    }
42 
43    // Query the element (not scalar) dimensions of the new hierarchical matrix.
44    // This is done so we can create p and L with full blocks for the bottom
45    // and right "edge cases" of A.
46    m = FLA_Obj_length( *A );
47    n = FLA_Obj_width ( *A );
48 
49    // Create hierarchical matrices p and L.
50    FLASH_Obj_create_ext( FLA_INT,  m * b_flash[0], n,
51                          depth, b_flash, &one,
52                          p );
53 
54    FLASH_Obj_create_ext( datatype, m * b_flash[0], n * b_alg,
55                          depth, b_flash, &b_alg,
56                          L );
57 
58    return FLA_SUCCESS;
59 }
60 
61 
FLASH_LU_incpiv_determine_alg_blocksize(FLA_Obj A)62 dim_t FLASH_LU_incpiv_determine_alg_blocksize( FLA_Obj A )
63 {
64    dim_t b_alg;
65    dim_t b_flash;
66 
67    // Acquire the storage blocksize.
68    b_flash = FLA_Obj_length( *FLASH_OBJ_PTR_AT( A ) );
69 
70    // Scale the storage blocksize by a pre-defined scalar to arrive at a
71    // reasonable algorithmic blocksize, but make sure it's at least 1.
72    b_alg = ( dim_t ) max( ( double ) b_flash * FLA_LU_INNER_TO_OUTER_B_RATIO, 1 );
73 
74    return b_alg;
75 }
76