1 /* Copyright 2014-2018 The PySCF Developers. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14
15 *
16 * Author: Qiming Sun <osirpt.sun@gmail.com>
17 */
18
19 #include "stdlib.h"
20 #include <complex.h>
21 #include "config.h"
22 #include "np_helper.h"
23
NPdsymm_triu(int n,double * mat,int hermi)24 void NPdsymm_triu(int n, double *mat, int hermi)
25 {
26 size_t i, j, j0, j1;
27
28 if (hermi == HERMITIAN || hermi == SYMMETRIC) {
29 TRIU_LOOP(i, j) {
30 mat[i*n+j] = mat[j*n+i];
31 }
32 } else {
33 TRIU_LOOP(i, j) {
34 mat[i*n+j] = -mat[j*n+i];
35 }
36 }
37 }
38
NPzhermi_triu(int n,double complex * mat,int hermi)39 void NPzhermi_triu(int n, double complex *mat, int hermi)
40 {
41 size_t i, j, j0, j1;
42
43 if (hermi == HERMITIAN) {
44 TRIU_LOOP(i, j) {
45 mat[i*n+j] = conj(mat[j*n+i]);
46 }
47 } else if (hermi == SYMMETRIC) {
48 TRIU_LOOP(i, j) {
49 mat[i*n+j] = mat[j*n+i];
50 }
51 } else {
52 TRIU_LOOP(i, j) {
53 mat[i*n+j] = -conj(mat[j*n+i]);
54 }
55 }
56 }
57
58
NPdunpack_tril(int n,double * tril,double * mat,int hermi)59 void NPdunpack_tril(int n, double *tril, double *mat, int hermi)
60 {
61 size_t i, j, ij;
62 for (ij = 0, i = 0; i < n; i++) {
63 for (j = 0; j <= i; j++, ij++) {
64 mat[i*n+j] = tril[ij];
65 }
66 }
67 if (hermi) {
68 NPdsymm_triu(n, mat, hermi);
69 }
70 }
71
72 // unpack one row from the compact matrix-tril coefficients
NPdunpack_row(int ndim,int row_id,double * tril,double * row)73 void NPdunpack_row(int ndim, int row_id, double *tril, double *row)
74 {
75 int i;
76 size_t idx = ((size_t)row_id) * (row_id + 1) / 2;
77 NPdcopy(row, tril+idx, row_id);
78 for (i = row_id; i < ndim; i++) {
79 idx += i;
80 row[i] = tril[idx];
81 }
82 }
83
NPzunpack_tril(int n,double complex * tril,double complex * mat,int hermi)84 void NPzunpack_tril(int n, double complex *tril, double complex *mat,
85 int hermi)
86 {
87 size_t i, j, ij;
88 for (ij = 0, i = 0; i < n; i++) {
89 for (j = 0; j <= i; j++, ij++) {
90 mat[i*n+j] = tril[ij];
91 }
92 }
93 if (hermi) {
94 NPzhermi_triu(n, mat, hermi);
95 }
96 }
97
NPdpack_tril(int n,double * tril,double * mat)98 void NPdpack_tril(int n, double *tril, double *mat)
99 {
100 size_t i, j, ij;
101 for (ij = 0, i = 0; i < n; i++) {
102 for (j = 0; j <= i; j++, ij++) {
103 tril[ij] = mat[i*n+j];
104 }
105 }
106 }
107
NPzpack_tril(int n,double complex * tril,double complex * mat)108 void NPzpack_tril(int n, double complex *tril, double complex *mat)
109 {
110 size_t i, j, ij;
111 for (ij = 0, i = 0; i < n; i++) {
112 for (j = 0; j <= i; j++, ij++) {
113 tril[ij] = mat[i*n+j];
114 }
115 }
116 }
117
118 /* out += in[idx[:,None],idy] */
NPdtake_2d(double * out,double * in,int * idx,int * idy,int odim,int idim,int nx,int ny)119 void NPdtake_2d(double *out, double *in, int *idx, int *idy,
120 int odim, int idim, int nx, int ny)
121 {
122 #pragma omp parallel default(none) \
123 shared(out, in, idx,idy, odim, idim, nx, ny)
124 {
125 size_t i, j;
126 double *pin;
127 #pragma omp for schedule (static)
128 for (i = 0; i < nx; i++) {
129 pin = in + (size_t)idim * idx[i];
130 for (j = 0; j < ny; j++) {
131 out[i*odim+j] = pin[idy[j]];
132 }
133 }
134 }
135 }
136
NPztake_2d(double complex * out,double complex * in,int * idx,int * idy,int odim,int idim,int nx,int ny)137 void NPztake_2d(double complex *out, double complex *in, int *idx, int *idy,
138 int odim, int idim, int nx, int ny)
139 {
140 #pragma omp parallel default(none) \
141 shared(out, in, idx,idy, odim, idim, nx, ny)
142 {
143 size_t i, j;
144 double complex *pin;
145 #pragma omp for schedule (static)
146 for (i = 0; i < nx; i++) {
147 pin = in + (size_t)idim * idx[i];
148 for (j = 0; j < ny; j++) {
149 out[i*odim+j] = pin[idy[j]];
150 }
151 }
152 }
153 }
154
155 /* out[idx[:,None],idy] += in */
NPdtakebak_2d(double * out,double * in,int * idx,int * idy,int odim,int idim,int nx,int ny,int thread_safe)156 void NPdtakebak_2d(double *out, double *in, int *idx, int *idy,
157 int odim, int idim, int nx, int ny, int thread_safe)
158 {
159 if (thread_safe) {
160 #pragma omp parallel default(none) \
161 shared(out, in, idx,idy, odim, idim, nx, ny)
162 {
163 size_t i, j;
164 double *pout;
165 #pragma omp for schedule (static)
166 for (i = 0; i < nx; i++) {
167 pout = out + (size_t)odim * idx[i];
168 for (j = 0; j < ny; j++) {
169 pout[idy[j]] += in[i*idim+j];
170 }
171 }
172 }
173 } else {
174 size_t i, j;
175 double *pout;
176 for (i = 0; i < nx; i++) {
177 pout = out + (size_t)odim * idx[i];
178 for (j = 0; j < ny; j++) {
179 pout[idy[j]] += in[i*idim+j];
180 }
181 }
182 }
183 }
184
NPztakebak_2d(double complex * out,double complex * in,int * idx,int * idy,int odim,int idim,int nx,int ny,int thread_safe)185 void NPztakebak_2d(double complex *out, double complex *in, int *idx, int *idy,
186 int odim, int idim, int nx, int ny, int thread_safe)
187 {
188 if (thread_safe) {
189 #pragma omp parallel default(none) \
190 shared(out, in, idx,idy, odim, idim, nx, ny)
191 {
192 size_t i, j;
193 double complex *pout;
194 #pragma omp for schedule (static)
195 for (i = 0; i < nx; i++) {
196 pout = out + (size_t)odim * idx[i];
197 for (j = 0; j < ny; j++) {
198 pout[idy[j]] += in[i*idim+j];
199 }
200 }
201 }
202 } else {
203 size_t i, j;
204 double complex *pout;
205 for (i = 0; i < nx; i++) {
206 pout = out + (size_t)odim * idx[i];
207 for (j = 0; j < ny; j++) {
208 pout[idy[j]] += in[i*idim+j];
209 }
210 }
211 }
212 }
213
NPdunpack_tril_2d(int count,int n,double * tril,double * mat,int hermi)214 void NPdunpack_tril_2d(int count, int n, double *tril, double *mat, int hermi)
215 {
216 #pragma omp parallel default(none) \
217 shared(count, n, tril, mat, hermi)
218 {
219 int ic;
220 size_t nn = n * n;
221 size_t n2 = n*(n+1)/2;
222 #pragma omp for schedule (static)
223 for (ic = 0; ic < count; ic++) {
224 NPdunpack_tril(n, tril+n2*ic, mat+nn*ic, hermi);
225 }
226 }
227 }
228
NPzunpack_tril_2d(int count,int n,double complex * tril,double complex * mat,int hermi)229 void NPzunpack_tril_2d(int count, int n,
230 double complex *tril, double complex *mat, int hermi)
231 {
232 #pragma omp parallel default(none) \
233 shared(count, n, tril, mat, hermi)
234 {
235 int ic;
236 size_t nn = n * n;
237 size_t n2 = n*(n+1)/2;
238 #pragma omp for schedule (static)
239 for (ic = 0; ic < count; ic++) {
240 NPzunpack_tril(n, tril+n2*ic, mat+nn*ic, hermi);
241 }
242 }
243 }
244
NPdpack_tril_2d(int count,int n,double * tril,double * mat)245 void NPdpack_tril_2d(int count, int n, double *tril, double *mat)
246 {
247 #pragma omp parallel default(none) \
248 shared(count, n, tril, mat)
249 {
250 int ic;
251 size_t nn = n * n;
252 size_t n2 = n*(n+1)/2;
253 #pragma omp for schedule (static)
254 for (ic = 0; ic < count; ic++) {
255 NPdpack_tril(n, tril+n2*ic, mat+nn*ic);
256 }
257 }
258 }
259
NPzpack_tril_2d(int count,int n,double complex * tril,double complex * mat)260 void NPzpack_tril_2d(int count, int n, double complex *tril, double complex *mat)
261 {
262 #pragma omp parallel default(none) \
263 shared(count, n, tril, mat)
264 {
265 int ic;
266 size_t nn = n * n;
267 size_t n2 = n*(n+1)/2;
268 #pragma omp for schedule (static)
269 for (ic = 0; ic < count; ic++) {
270 NPzpack_tril(n, tril+n2*ic, mat+nn*ic);
271 }
272 }
273 }
274
275