1 /*
2 * -- High Performance Computing Linpack Benchmark (HPL)
3 * HPL - 2.3 - December 2, 2018
4 * Antoine P. Petitet
5 * University of Tennessee, Knoxville
6 * Innovative Computing Laboratory
7 * (C) Copyright 2000-2008 All Rights Reserved
8 *
9 * -- Copyright notice and Licensing terms:
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 *
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 *
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions, and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 *
22 * 3. All advertising materials mentioning features or use of this
23 * software must display the following acknowledgement:
24 * This product includes software developed at the University of
25 * Tennessee, Knoxville, Innovative Computing Laboratory.
26 *
27 * 4. The name of the University, the name of the Laboratory, or the
28 * names of its contributors may not be used to endorse or promote
29 * products derived from this software without specific written
30 * permission.
31 *
32 * -- Disclaimer:
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
38 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 * ---------------------------------------------------------------------
46 */
47 /*
48 * Include files
49 */
50 #include "hpl.h"
51 /*
52 * Define default value for unrolling factor
53 */
54 #ifndef HPL_LASWP01N_DEPTH
55 #define HPL_LASWP01N_DEPTH 32
56 #define HPL_LASWP01N_LOG2_DEPTH 5
57 #endif
58
59 #ifdef STDC_HEADERS
HPL_dlaswp01N(const int M,const int N,double * A,const int LDA,double * U,const int LDU,const int * LINDXA,const int * LINDXAU)60 void HPL_dlaswp01N
61 (
62 const int M,
63 const int N,
64 double * A,
65 const int LDA,
66 double * U,
67 const int LDU,
68 const int * LINDXA,
69 const int * LINDXAU
70 )
71 #else
72 void HPL_dlaswp01N
73 ( M, N, A, LDA, U, LDU, LINDXA, LINDXAU )
74 const int M;
75 const int N;
76 double * A;
77 const int LDA;
78 double * U;
79 const int LDU;
80 const int * LINDXA;
81 const int * LINDXAU;
82 #endif
83 {
84 /*
85 * Purpose
86 * =======
87 *
88 * HPL_dlaswp01N copies scattered rows of A into itself and into an
89 * array U. The row offsets in A of the source rows are specified by
90 * LINDXA. The destination of those rows are specified by LINDXAU. A
91 * positive value of LINDXAU indicates that the array destination is U,
92 * and A otherwise.
93 *
94 * Arguments
95 * =========
96 *
97 * M (local input) const int
98 * On entry, M specifies the number of rows of A that should be
99 * moved within A or copied into U. M must be at least zero.
100 *
101 * N (local input) const int
102 * On entry, N specifies the length of rows of A that should be
103 * moved within A or copied into U. N must be at least zero.
104 *
105 * A (local input/output) double *
106 * On entry, A points to an array of dimension (LDA,N). The rows
107 * of this array specified by LINDXA should be moved within A or
108 * copied into U.
109 *
110 * LDA (local input) const int
111 * On entry, LDA specifies the leading dimension of the array A.
112 * LDA must be at least MAX(1,M).
113 *
114 * U (local input/output) double *
115 * On entry, U points to an array of dimension (LDU,N). The rows
116 * of A specified by LINDXA are be copied within this array U at
117 * the positions indicated by positive values of LINDXAU.
118 *
119 * LDU (local input) const int
120 * On entry, LDU specifies the leading dimension of the array U.
121 * LDU must be at least MAX(1,M).
122 *
123 * LINDXA (local input) const int *
124 * On entry, LINDXA is an array of dimension M that contains the
125 * local row indexes of A that should be moved within A or
126 * or copied into U.
127 *
128 * LINDXAU (local input) const int *
129 * On entry, LINDXAU is an array of dimension M that contains
130 * the local row indexes of U where the rows of A should be
131 * copied at. This array also contains the local row offsets in
132 * A where some of the rows of A should be moved to. A positive
133 * value of LINDXAU[i] indicates that the row LINDXA[i] of A
134 * should be copied into U at the position LINDXAU[i]; otherwise
135 * the row LINDXA[i] of A should be moved at the position
136 * -LINDXAU[i] within A.
137 *
138 * ---------------------------------------------------------------------
139 */
140 /*
141 * .. Local Variables ..
142 */
143 double * a0, * a1;
144 const int incA = (int)( (unsigned int)(LDA) <<
145 HPL_LASWP01N_LOG2_DEPTH ),
146 incU = (int)( (unsigned int)(LDU) <<
147 HPL_LASWP01N_LOG2_DEPTH );
148 int lda1, nu, nr;
149 register int i, j;
150 /* ..
151 * .. Executable Statements ..
152 */
153 if( ( M <= 0 ) || ( N <= 0 ) ) return;
154
155 nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01N_LOG2_DEPTH ) <<
156 HPL_LASWP01N_LOG2_DEPTH ) );
157
158 for( j = 0; j < nu; j += HPL_LASWP01N_DEPTH, A += incA, U += incU )
159 {
160 for( i = 0; i < M; i++ )
161 {
162 a0 = A + (size_t)(LINDXA[i]);
163 if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; }
164 else { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; }
165
166 *a1 = *a0; a1 += lda1; a0 += LDA;
167 #if ( HPL_LASWP01N_DEPTH > 1 )
168 *a1 = *a0; a1 += lda1; a0 += LDA;
169 #endif
170 #if ( HPL_LASWP01N_DEPTH > 2 )
171 *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
172 #endif
173 #if ( HPL_LASWP01N_DEPTH > 4 )
174 *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
175 *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
176 #endif
177 #if ( HPL_LASWP01N_DEPTH > 8 )
178 *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
179 *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
180 *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
181 *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
182 #endif
183 #if ( HPL_LASWP01N_DEPTH > 16 )
184 *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
185 *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
186 *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
187 *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
188 *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
189 *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
190 *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
191 *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA;
192 #endif
193 }
194 }
195
196 if( nr )
197 {
198 for( i = 0; i < M; i++ )
199 {
200 a0 = A + (size_t)(LINDXA[i]);
201 if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; }
202 else { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; }
203 for( j = 0; j < nr; j++, a1 += lda1, a0 += LDA ) { *a1 = *a0; }
204 }
205 }
206 /*
207 * End of HPL_dlaswp01N
208 */
209 }
210