1 /*
2 * Copyright (C) 2013 Qiming Sun <osirpt.sun@gmail.com>
3 *
4 * blas-like functions
5 */
6
7 #include <string.h>
8 #include <complex.h>
9 #include "fblas.h"
10
11 #define OF_CMPLX 2
12
CINTdset0(FINT n,double * x)13 void CINTdset0(FINT n, double *x)
14 {
15 FINT i;
16 for (i = 0; i < n; i++) {
17 x[i] = 0;
18 }
19 }
20
21
22 /*
23 * v = a * x + y
24 */
CINTdaxpy2v(FINT n,double a,double * x,double * y,double * v)25 void CINTdaxpy2v(FINT n, double a, double *x, double *y, double *v)
26 {
27 //cblas_dcopy(n, y, 1, v, 1);
28 //cblas_daxpy(n, a, x, 1, v, 1);
29 FINT i;
30 for (i = 0; i < n; i++) {
31 v[i] = a * x[i] + y[i];
32 }
33 }
34
35 /*
36 * a[m,n] -> a_t[n,m]
37 */
CINTdmat_transpose(double * a_t,double * a,FINT m,FINT n)38 void CINTdmat_transpose(double *a_t, double *a, FINT m, FINT n)
39 {
40 FINT i, j, k;
41
42 for (j = 0; j < n-3; j+=4) {
43 #pragma GCC ivdep
44 for (i = 0; i < m; i++) {
45 a_t[(j+0)*m+i] = a[i*n+j+0];
46 a_t[(j+1)*m+i] = a[i*n+j+1];
47 a_t[(j+2)*m+i] = a[i*n+j+2];
48 a_t[(j+3)*m+i] = a[i*n+j+3];
49 }
50 }
51
52 switch (n-j) {
53 case 1:
54 #pragma GCC ivdep
55 for (i = 0; i < m; i++) {
56 a_t[j*m+i] = a[i*n+j];
57 }
58 break;
59 case 2:
60 #pragma GCC ivdep
61 for (i = 0; i < m; i++) {
62 a_t[(j+0)*m+i] = a[i*n+j+0];
63 a_t[(j+1)*m+i] = a[i*n+j+1];
64 }
65 break;
66 case 3:
67 #pragma GCC ivdep
68 for (i = 0; i < m; i++) {
69 a_t[(j+0)*m+i] = a[i*n+j+0];
70 a_t[(j+1)*m+i] = a[i*n+j+1];
71 a_t[(j+2)*m+i] = a[i*n+j+2];
72 }
73 break;
74 }
75 }
76
77 /*
78 * a_t[n,m] += a[m,n]
79 */
CINTdplus_transpose(double * a_t,double * a,FINT m,FINT n)80 void CINTdplus_transpose(double *a_t, double *a, FINT m, FINT n)
81 {
82 FINT i, j, k;
83
84 for (j = 0; j < n-3; j+=4) {
85 #pragma GCC ivdep
86 for (i = 0; i < m; i++) {
87 a_t[(j+0)*m+i] += a[i*n+j+0];
88 a_t[(j+1)*m+i] += a[i*n+j+1];
89 a_t[(j+2)*m+i] += a[i*n+j+2];
90 a_t[(j+3)*m+i] += a[i*n+j+3];
91 }
92 }
93
94 switch (n-j) {
95 case 1:
96 #pragma GCC ivdep
97 for (i = 0; i < m; i++) {
98 a_t[j*m+i] += a[i*n+j];
99 }
100 break;
101 case 2:
102 #pragma GCC ivdep
103 for (i = 0; i < m; i++) {
104 a_t[(j+0)*m+i] += a[i*n+j+0];
105 a_t[(j+1)*m+i] += a[i*n+j+1];
106 }
107 break;
108 case 3:
109 #pragma GCC ivdep
110 for (i = 0; i < m; i++) {
111 a_t[(j+0)*m+i] += a[i*n+j+0];
112 a_t[(j+1)*m+i] += a[i*n+j+1];
113 a_t[(j+2)*m+i] += a[i*n+j+2];
114 }
115 break;
116 }
117 }
118
119 /*
120 * a[m,n] -> a_t[n,m]
121 */
CINTzmat_transpose(double complex * a_t,double complex * a,FINT m,FINT n)122 void CINTzmat_transpose(double complex *a_t, double complex *a, FINT m, FINT n)
123 {
124 FINT i, j;
125
126 switch (n) {
127 case 2:
128 for (i = 0; i < m; i++) {
129 a_t[i ] = a[2*i+0];
130 a_t[i+m] = a[2*i+1];
131 }
132 break;
133 default:
134 switch (m) {
135 case 2: for (i = 0; i < n; i++) {
136 a_t[2*i+0] = a[i ];
137 a_t[2*i+1] = a[i+n];
138 }
139 break;
140 default:
141 for (i = 0; i < n; i++) {
142 for (j = 0; j < m; j++) {
143 a_t[i*m+j] = a[j*n+i];
144 }
145 }
146 }
147 }
148 }
149
CINTzmat_dagger(double complex * a_t,double complex * a,FINT m,FINT n)150 void CINTzmat_dagger(double complex *a_t, double complex *a, FINT m, FINT n)
151 {
152 FINT i, j;
153
154 for (i = 0; i < n; i++) {
155 for (j = 0; j < m; j++) {
156 a_t[i*m+j] = conj(a[j*n+i]);
157 }
158 }
159 }
160
CINTdgemm_NN1(FINT m,FINT n,FINT k,double * a,double * b,double * c,FINT ldc)161 void CINTdgemm_NN1(FINT m, FINT n, FINT k,
162 double *a, double *b, double *c, FINT ldc)
163 {
164 FINT i, j, kp;
165 double bi;
166 for (j = 0; j < n; j++) {
167 for (i = 0; i < m; i++) {
168 c[i+ldc*j] = 0;
169 }
170 for (kp = 0; kp < k; kp++) {
171 bi = b[kp+k*j];
172 #pragma GCC ivdep
173 for (i = 0; i < m; i++) {
174 c[i+ldc*j] += a[i+m*kp] * bi;
175 }
176 }
177 }
178 }
179
CINTdgemm_NN(FINT m,FINT n,FINT k,double * a,double * b,double * c)180 void CINTdgemm_NN(FINT m, FINT n, FINT k,
181 double *a, double *b, double *c)
182 {
183 CINTdgemm_NN1(m, n, k, a, b, c, m);
184 }
185
CINTdgemm_TN(FINT m,FINT n,FINT k,double * a,double * b,double * c)186 void CINTdgemm_TN(FINT m, FINT n, FINT k,
187 double *a, double *b, double *c)
188 {
189 FINT i, j, kp;
190 double ci;
191 for (j = 0; j < n; j++) {
192 for (i = 0; i < m; i++) {
193 ci = 0;
194 #pragma GCC ivdep
195 for (kp = 0; kp < k; kp++) {
196 ci += a[kp+k*i] * b[kp+k*j];
197 }
198 c[i+m*j] = ci;
199 }
200 }
201 }
202
CINTdgemm_NT(FINT m,FINT n,FINT k,double * a,double * b,double * c)203 void CINTdgemm_NT(FINT m, FINT n, FINT k,
204 double *a, double *b, double *c)
205 {
206 FINT i, j, kp;
207 double bi;
208 for (j = 0; j < n; j++) {
209 for (i = 0; i < m; i++) {
210 c[i+m*j] = 0;
211 }
212 for (kp = 0; kp < k; kp++) {
213 bi = b[j+n*kp];
214 #pragma GCC ivdep
215 for (i = 0; i < m; i++) {
216 c[i+m*j] += a[i+m*kp] * bi;
217 }
218 }
219 }
220 }
221