1 /*
2 
3     Copyright (C) 2014, The University of Texas at Austin
4 
5     This file is part of libflame and is available under the 3-Clause
6     BSD license, which can be found in the LICENSE file at the top-level
7     directory, or at http://opensource.org/licenses/BSD-3-Clause
8 
9 */
10 
11 #include "blis1.h"
12 
bl1_strsvsx(uplo1_t uplo,trans1_t trans,diag1_t diag,int m,float * alpha,float * a,int a_rs,int a_cs,float * x,int incx,float * beta,float * y,int incy)13 void bl1_strsvsx( uplo1_t uplo, trans1_t trans, diag1_t diag, int m, float* alpha, float* a, int a_rs, int a_cs, float* x, int incx, float* beta, float* y, int incy )
14 {
15 	float*    a_save    = a;
16 	int       a_rs_save = a_rs;
17 	int       a_cs_save = a_cs;
18 	float*    x_temp;
19     int       incx_temp;
20 
21 	// Return early if possible.
22 	if ( bl1_zero_dim1( m ) ) return;
23 
24 	// If necessary, allocate, initialize, and use a temporary contiguous
25 	// copy of the matrix rather than the original matrix.
26 	bl1_screate_contigmr( uplo,
27 	                      m,
28 	                      m,
29 	                      a_save, a_rs_save, a_cs_save,
30 	                      &a,     &a_rs,     &a_cs );
31 
32 	// Allocate a temporary vector conformal to x.
33 	x_temp    = bl1_sallocv( m );
34 	incx_temp = 1;
35 
36 	// Copy x to a temporary vector.
37 	bl1_scopyv( BLIS1_NO_CONJUGATE,
38 	            m,
39 	            x,      incx,
40 	            x_temp, incx_temp );
41 
42 	// Perform the operation, storing the result to x_temp.
43 	bl1_strsv( uplo,
44 	           trans,
45 	           diag,
46 	           m,
47 	           a,      a_rs, a_cs,
48 	           x_temp, incx_temp );
49 
50 	// Scale y by beta.
51 	bl1_sscalv( BLIS1_NO_CONJUGATE,
52 	            m,
53 	            beta,
54 	            y, incy );
55 
56 	// Axpy the partial result in x_temp into y.
57 	bl1_saxpyv( BLIS1_NO_CONJUGATE,
58 	            m,
59 	            alpha,
60 	            x_temp, incx_temp,
61 	            y,      incy );
62 
63 	// Free the temporary vector.
64 	bl1_sfree( x_temp );
65 
66 	// Free the temporary contiguous matrix.
67 	bl1_sfree_contigm( a_save, a_rs_save, a_cs_save,
68 	                   &a,     &a_rs,     &a_cs );
69 }
70 
bl1_dtrsvsx(uplo1_t uplo,trans1_t trans,diag1_t diag,int m,double * alpha,double * a,int a_rs,int a_cs,double * x,int incx,double * beta,double * y,int incy)71 void bl1_dtrsvsx( uplo1_t uplo, trans1_t trans, diag1_t diag, int m, double* alpha, double* a, int a_rs, int a_cs, double* x, int incx, double* beta, double* y, int incy )
72 {
73 	double*   a_save    = a;
74 	int       a_rs_save = a_rs;
75 	int       a_cs_save = a_cs;
76 	double*   x_temp;
77     int       incx_temp;
78 
79 	// Return early if possible.
80 	if ( bl1_zero_dim1( m ) ) return;
81 
82 	// If necessary, allocate, initialize, and use a temporary contiguous
83 	// copy of the matrix rather than the original matrix.
84 	bl1_dcreate_contigmr( uplo,
85 	                      m,
86 	                      m,
87 	                      a_save, a_rs_save, a_cs_save,
88 	                      &a,     &a_rs,     &a_cs );
89 
90 	// Allocate a temporary vector conformal to x.
91 	x_temp    = bl1_dallocv( m );
92 	incx_temp = 1;
93 
94 	// Copy x to a temporary vector.
95 	bl1_dcopyv( BLIS1_NO_CONJUGATE,
96 	            m,
97 	            x,      incx,
98 	            x_temp, incx_temp );
99 
100 	// Perform the operation, storing the result to x_temp.
101 	bl1_dtrsv( uplo,
102 	           trans,
103 	           diag,
104 	           m,
105 	           a,      a_rs, a_cs,
106 	           x_temp, incx_temp );
107 
108 	// Scale y by beta.
109 	bl1_dscalv( BLIS1_NO_CONJUGATE,
110 	            m,
111 	            beta,
112 	            y, incy );
113 
114 	// Axpy the partial result in x_temp into y.
115 	bl1_daxpyv( BLIS1_NO_CONJUGATE,
116 	            m,
117 	            alpha,
118 	            x_temp, incx_temp,
119 	            y,      incy );
120 
121 	// Free the temporary vector.
122 	bl1_dfree( x_temp );
123 
124 	// Free the temporary contiguous matrix.
125 	bl1_dfree_contigm( a_save, a_rs_save, a_cs_save,
126 	                   &a,     &a_rs,     &a_cs );
127 }
128 
bl1_ctrsvsx(uplo1_t uplo,trans1_t trans,diag1_t diag,int m,scomplex * alpha,scomplex * a,int a_rs,int a_cs,scomplex * x,int incx,scomplex * beta,scomplex * y,int incy)129 void bl1_ctrsvsx( uplo1_t uplo, trans1_t trans, diag1_t diag, int m, scomplex* alpha, scomplex* a, int a_rs, int a_cs, scomplex* x, int incx, scomplex* beta, scomplex* y, int incy )
130 {
131 	scomplex* a_save    = a;
132 	int       a_rs_save = a_rs;
133 	int       a_cs_save = a_cs;
134 	scomplex* x_temp;
135     int       incx_temp;
136 
137 	// Return early if possible.
138 	if ( bl1_zero_dim1( m ) ) return;
139 
140 	// If necessary, allocate, initialize, and use a temporary contiguous
141 	// copy of the matrix rather than the original matrix.
142 	bl1_ccreate_contigmr( uplo,
143 	                      m,
144 	                      m,
145 	                      a_save, a_rs_save, a_cs_save,
146 	                      &a,     &a_rs,     &a_cs );
147 
148 	// Allocate a temporary vector conformal to x.
149 	x_temp    = bl1_callocv( m );
150 	incx_temp = 1;
151 
152 	// Copy x to a temporary vector.
153 	bl1_ccopyv( BLIS1_NO_CONJUGATE,
154 	            m,
155 	            x,      incx,
156 	            x_temp, incx_temp );
157 
158 	// Perform the operation, storing the result to x_temp.
159 	bl1_ctrsv( uplo,
160 	           trans,
161 	           diag,
162 	           m,
163 	           a,      a_rs, a_cs,
164 	           x_temp, incx_temp );
165 
166 	// Scale y by beta.
167 	bl1_cscalv( BLIS1_NO_CONJUGATE,
168 	            m,
169 	            beta,
170 	            y, incy );
171 
172 	// Axpy the partial result in x_temp into y.
173 	bl1_caxpyv( BLIS1_NO_CONJUGATE,
174 	            m,
175 	            alpha,
176 	            x_temp, incx_temp,
177 	            y,      incy );
178 
179 	// Free the temporary vector.
180 	bl1_cfree( x_temp );
181 
182 	// Free the temporary contiguous matrix.
183 	bl1_cfree_contigm( a_save, a_rs_save, a_cs_save,
184 	                   &a,     &a_rs,     &a_cs );
185 }
186 
bl1_ztrsvsx(uplo1_t uplo,trans1_t trans,diag1_t diag,int m,dcomplex * alpha,dcomplex * a,int a_rs,int a_cs,dcomplex * x,int incx,dcomplex * beta,dcomplex * y,int incy)187 void bl1_ztrsvsx( uplo1_t uplo, trans1_t trans, diag1_t diag, int m, dcomplex* alpha, dcomplex* a, int a_rs, int a_cs, dcomplex* x, int incx, dcomplex* beta, dcomplex* y, int incy )
188 {
189 	dcomplex* a_save    = a;
190 	int       a_rs_save = a_rs;
191 	int       a_cs_save = a_cs;
192 	dcomplex* x_temp;
193     int       incx_temp;
194 
195 	// Return early if possible.
196 	if ( bl1_zero_dim1( m ) ) return;
197 
198 	// If necessary, allocate, initialize, and use a temporary contiguous
199 	// copy of the matrix rather than the original matrix.
200 	bl1_zcreate_contigmr( uplo,
201 	                      m,
202 	                      m,
203 	                      a_save, a_rs_save, a_cs_save,
204 	                      &a,     &a_rs,     &a_cs );
205 
206 	// Allocate a temporary vector conformal to x.
207 	x_temp    = bl1_zallocv( m );
208 	incx_temp = 1;
209 
210 	// Copy x to a temporary vector.
211 	bl1_zcopyv( BLIS1_NO_CONJUGATE,
212 	            m,
213 	            x,      incx,
214 	            x_temp, incx_temp );
215 
216 	// Perform the operation, storing the result to x_temp.
217 	bl1_ztrsv( uplo,
218 	           trans,
219 	           diag,
220 	           m,
221 	           a,      a_rs, a_cs,
222 	           x_temp, incx_temp );
223 
224 	// Scale y by beta.
225 	bl1_zscalv( BLIS1_NO_CONJUGATE,
226 	            m,
227 	            beta,
228 	            y, incy );
229 
230 	// Axpy the partial result in x_temp into y.
231 	bl1_zaxpyv( BLIS1_NO_CONJUGATE,
232 	            m,
233 	            alpha,
234 	            x_temp, incx_temp,
235 	            y,      incy );
236 
237 	// Free the temporary vector.
238 	bl1_zfree( x_temp );
239 
240 	// Free the temporary contiguous matrix.
241 	bl1_zfree_contigm( a_save, a_rs_save, a_cs_save,
242 	                   &a,     &a_rs,     &a_cs );
243 }
244 
245