1 /*********************************************************************/
2 /*                                                                   */
3 /*             Optimized BLAS libraries                              */
4 /*                     By Kazushige Goto <kgoto@tacc.utexas.edu>     */
5 /*                                                                   */
6 /* Copyright (c) The University of Texas, 2009. All rights reserved. */
7 /* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING  */
8 /* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF      */
9 /* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE,              */
10 /* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY  */
11 /* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF     */
12 /* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO   */
13 /* THE USE OF THE SOFTWARE OR DOCUMENTATION.                         */
14 /* Under no circumstances shall University be liable for incidental, */
15 /* special, indirect, direct or consequential damages or loss of     */
16 /* profits, interruption of business, or related expenses which may  */
17 /* arise from use of Software or Documentation, including but not    */
18 /* limited to those resulting from defects in Software and/or        */
19 /* Documentation, or loss or inaccuracy of data of any kind.         */
20 /*********************************************************************/
21 
22 #include <stdio.h>
23 #include "common.h"
24 #ifdef FUNCTION_PROFILE
25 #include "functable.h"
26 #endif
27 
28 #ifdef XDOUBLE
29 #define ERROR_NAME "XPOTRI"
30 #elif defined(DOUBLE)
31 #define ERROR_NAME "ZPOTRI"
32 #else
33 #define ERROR_NAME "CPOTRI"
34 #endif
35 
36 static blasint (*trtri_single[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) ={
37   TRTRI_UN_SINGLE, TRTRI_LN_SINGLE,
38 };
39 
40 static blasint (*lauum_single[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) ={
41   LAUUM_U_SINGLE, LAUUM_L_SINGLE,
42 };
43 
44 #ifdef SMP
45 static blasint (*trtri_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) ={
46   TRTRI_UN_PARALLEL, TRTRI_LN_PARALLEL,
47 };
48 
49 static blasint (*lauum_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) ={
50   LAUUM_U_PARALLEL, LAUUM_L_PARALLEL,
51 };
52 #endif
53 
NAME(char * UPLO,blasint * N,FLOAT * a,blasint * ldA,blasint * Info)54 int NAME(char *UPLO, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){
55 
56   blas_arg_t args;
57 
58   blasint uplo_arg = *UPLO;
59   blasint uplo;
60   blasint info;
61   FLOAT *buffer;
62 #ifdef PPC440
63   extern
64 #endif
65   FLOAT *sa, *sb;
66 
67   PRINT_DEBUG_NAME;
68 
69   args.n    = *N;
70   args.a    = (void *)a;
71   args.lda  = *ldA;
72 
73   TOUPPER(uplo_arg);
74 
75   uplo = -1;
76   if (uplo_arg == 'U') uplo = 0;
77   if (uplo_arg == 'L') uplo = 1;
78 
79   info  = 0;
80   if (args.lda  < MAX(1,args.n)) info = 4;
81   if (args.n    < 0)             info = 2;
82   if (uplo < 0)                  info = 1;
83 
84   if (info) {
85     BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
86     *Info = - info;
87     return 0;
88   }
89 
90   *Info = 0;
91 
92   if (args.n == 0) return 0;
93 
94   IDEBUG_START;
95 
96   FUNCTION_PROFILE_START();
97 
98 #ifndef PPC440
99   buffer = (FLOAT *)blas_memory_alloc(1);
100 
101   sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A);
102   sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
103 #endif
104 
105 #ifdef SMP
106   args.nthreads = num_cpu_avail(4);
107 
108   if (args.nthreads == 1) {
109 #endif
110 
111   info = (trtri_single[uplo])(&args, NULL, NULL, sa, sb, 0);
112 
113   if (!info) {
114     info = (lauum_single[uplo])(&args, NULL, NULL, sa, sb, 0);
115   }
116 
117   *Info = info;
118 
119 #ifdef SMP
120   } else {
121     info = (trtri_parallel[uplo])(&args, NULL, NULL, sa, sb, 0);
122 
123     if (!info) {
124       info = (lauum_parallel[uplo])(&args, NULL, NULL, sa, sb, 0);
125     }
126 
127     *Info = info;
128   }
129 #endif
130 
131 #ifndef PPC440
132   blas_memory_free(buffer);
133 #endif
134 
135   FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.n,  2. / 3. * args.m * args.n * args.n);
136 
137   IDEBUG_END;
138 
139   return 0;
140 }
141