1 /*********************************************************************/
2 /*                                                                   */
3 /*             Optimized BLAS libraries                              */
4 /*                     By Kazushige Goto <kgoto@tacc.utexas.edu>     */
5 /*                                                                   */
6 /* Copyright (c) The University of Texas, 2009. All rights reserved. */
7 /* UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING  */
8 /* THIS SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF      */
9 /* MERCHANTABILITY, FITNESS FOR ANY PARTICULAR PURPOSE,              */
10 /* NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY WARRANTY  */
11 /* THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF     */
12 /* TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO   */
13 /* THE USE OF THE SOFTWARE OR DOCUMENTATION.                         */
14 /* Under no circumstances shall University be liable for incidental, */
15 /* special, indirect, direct or consequential damages or loss of     */
16 /* profits, interruption of business, or related expenses which may  */
17 /* arise from use of Software or Documentation, including but not    */
18 /* limited to those resulting from defects in Software and/or        */
19 /* Documentation, or loss or inaccuracy of data of any kind.         */
20 /*********************************************************************/
21 
22 #include <stdio.h>
23 #include "common.h"
24 #ifdef FUNCTION_PROFILE
25 #include "functable.h"
26 #endif
27 
28 #ifdef XDOUBLE
29 #define ERROR_NAME "XGETRS"
30 #elif defined(DOUBLE)
31 #define ERROR_NAME "ZGETRS"
32 #else
33 #define ERROR_NAME "CGETRS"
34 #endif
35 
36 static blasint (*getrs_single[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) ={
37   GETRS_N_SINGLE, GETRS_T_SINGLE, GETRS_R_SINGLE, GETRS_C_SINGLE,
38 };
39 
40 #ifdef SMP
41 static blasint (*getrs_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) ={
42   GETRS_N_PARALLEL, GETRS_T_PARALLEL, GETRS_R_PARALLEL, GETRS_C_PARALLEL,
43 };
44 #endif
45 
NAME(char * TRANS,blasint * N,blasint * NRHS,FLOAT * a,blasint * ldA,blasint * ipiv,FLOAT * b,blasint * ldB,blasint * Info)46 int NAME(char *TRANS, blasint *N, blasint *NRHS, FLOAT *a, blasint *ldA,
47 	    blasint *ipiv, FLOAT *b, blasint *ldB, blasint *Info){
48 
49   char trans_arg = *TRANS;
50 
51   blas_arg_t args;
52 
53   blasint info;
54   int trans;
55   FLOAT *buffer;
56 #ifdef PPC440
57   extern
58 #endif
59   FLOAT *sa, *sb;
60 
61   PRINT_DEBUG_NAME;
62 
63   args.m    = *N;
64   args.n    = *NRHS;
65   args.a    = (void *)a;
66   args.lda  = *ldA;
67   args.b    = (void *)b;
68   args.ldb  = *ldB;
69   args.c    = (void *)ipiv;
70 
71   info = 0;
72 
73   TOUPPER(trans_arg);
74   trans = -1;
75 
76   if (trans_arg == 'N') trans = 0;
77   if (trans_arg == 'T') trans = 1;
78   if (trans_arg == 'R') trans = 2;
79   if (trans_arg == 'C') trans = 3;
80 
81   if (args.ldb  < MAX(1, args.m)) info = 8;
82   if (args.lda  < MAX(1, args.m)) info = 5;
83   if (args.n    < 0) info = 3;
84   if (args.m    < 0) info = 2;
85   if (trans     < 0) info = 1;
86 
87   if (info != 0) {
88     BLASFUNC(xerbla)(ERROR_NAME, &info, sizeof(ERROR_NAME));
89     return 0;
90   }
91 
92   args.alpha = NULL;
93   args.beta  = NULL;
94 
95   *Info = info;
96 
97   if (args.m == 0 || args.n == 0) return 0;
98 
99   IDEBUG_START;
100 
101   FUNCTION_PROFILE_START();
102 
103 #ifndef PPC440
104   buffer = (FLOAT *)blas_memory_alloc(1);
105 
106   sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A);
107   sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
108 #endif
109 
110 #ifdef SMP
111   args.nthreads = num_cpu_avail(4);
112 
113   if (args.nthreads == 1) {
114 #endif
115 
116     (getrs_single[trans])(&args, NULL, NULL, sa, sb, 0);
117 
118 #ifdef SMP
119   } else {
120 
121     (getrs_parallel[trans])(&args, NULL, NULL, sa, sb, 0);
122 
123   }
124 #endif
125 
126 #ifndef PPC440
127   blas_memory_free(buffer);
128 #endif
129 
130   FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.n, 2 * args.m * args.m * args.n);
131 
132   IDEBUG_END;
133 
134   return 0;
135 
136 }
137