1 /***************************************************************************
2 * Copyright (c) 2013, The OpenBLAS Project
3 * All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in
11 * the documentation and/or other materials provided with the
12 * distribution.
13 * 3. Neither the name of the OpenBLAS project nor the names of
14 * its contributors may be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 * *****************************************************************************/
27
28 /**************************************************************************************
29 * 2014/05/22 Saar
30 * TEST double precision unblocked : OK
31 * 2014/05/23 Saar
32 * TEST double precision blocked: OK
33 * TEST single precision blocked: OK
34 **************************************************************************************/
35
36 #include <stdio.h>
37 #include "common.h"
38
39 // static FLOAT dp1 = 1.;
40 // static FLOAT dm1 = -1.;
41
42
43 #ifdef UNIT
44 #define TRTI2 TRTI2_LU
45 #define TRMM TRMM_LNLU
46 #define TRSM TRSM_RNLU
47 #else
48 #define TRTI2 TRTI2_LN
49 #define TRMM TRMM_LNLN
50 #define TRSM TRSM_RNLN
51 #endif
52
53
CNAME(blas_arg_t * args,BLASLONG * range_m,BLASLONG * range_n,FLOAT * sa,FLOAT * sb,BLASLONG myid)54 blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG myid) {
55
56 BLASLONG j, n, lda;
57 FLOAT *a;
58
59 // BLASLONG info=0;
60 BLASLONG jb;
61 BLASLONG NB;
62 BLASLONG start_j;
63
64 FLOAT beta_plus[2] = { ONE, ZERO};
65 FLOAT beta_minus[2] = {-ONE, ZERO};
66
67 n = args -> n;
68
69 NB = GEMM_Q;
70
71 if (n < NB) {
72 TRTI2(args, NULL, range_n, sa, sb, 0);
73 return 0;
74 }
75
76
77 lda = args -> lda;
78 a = (FLOAT *) args -> a;
79 args -> ldb = lda;
80 args -> ldc = lda;
81 args -> alpha = NULL;
82
83 start_j = 0;
84 while (start_j < n) start_j += NB;
85 start_j -= NB;
86
87
88 for (j = start_j ; j >=0 ; j-= NB)
89 {
90 jb = n - j;
91 if ( jb > NB ) jb = NB;
92
93 args -> n = jb;
94 args -> m = n-j-jb;
95
96 args -> a = &a[(j+jb+(j+jb)*lda) * COMPSIZE];
97 args -> b = &a[(j+jb+j*lda) * COMPSIZE];
98 args -> beta = beta_plus;
99
100 TRMM(args, NULL, NULL, sa, sb, 0);
101
102 args -> a = &a[(j+j*lda) * COMPSIZE];
103 args -> beta = beta_minus;
104
105 TRSM(args, NULL, NULL, sa, sb, 0);
106
107 args -> a = &a[(j+j*lda) * COMPSIZE];
108
109 TRTI2(args, NULL, range_n, sa, sb, 0);
110
111 }
112 return 0;
113 }
114