1 /* Copyright 2014-2018 The PySCF Developers. All Rights Reserved.
2 
3    Licensed under the Apache License, Version 2.0 (the "License");
4     you may not use this file except in compliance with the License.
5     You may obtain a copy of the License at
6 
7         http://www.apache.org/licenses/LICENSE-2.0
8 
9     Unless required by applicable law or agreed to in writing, software
10     distributed under the License is distributed on an "AS IS" BASIS,
11     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12     See the License for the specific language governing permissions and
13     limitations under the License.
14 
15  *
16  * Author: Qiming Sun <osirpt.sun@gmail.com>
17  */
18 
19 #include "stdlib.h"
20 #include <complex.h>
21 #include "config.h"
22 #include "np_helper.h"
23 
NPdsymm_triu(int n,double * mat,int hermi)24 void NPdsymm_triu(int n, double *mat, int hermi)
25 {
26         size_t i, j, j0, j1;
27 
28         if (hermi == HERMITIAN || hermi == SYMMETRIC) {
29                 TRIU_LOOP(i, j) {
30                         mat[i*n+j] = mat[j*n+i];
31                 }
32         } else {
33                 TRIU_LOOP(i, j) {
34                         mat[i*n+j] = -mat[j*n+i];
35                 }
36         }
37 }
38 
NPzhermi_triu(int n,double complex * mat,int hermi)39 void NPzhermi_triu(int n, double complex *mat, int hermi)
40 {
41         size_t i, j, j0, j1;
42 
43         if (hermi == HERMITIAN) {
44                 TRIU_LOOP(i, j) {
45                         mat[i*n+j] = conj(mat[j*n+i]);
46                 }
47         } else if (hermi == SYMMETRIC) {
48                 TRIU_LOOP(i, j) {
49                         mat[i*n+j] = mat[j*n+i];
50                 }
51         } else {
52                 TRIU_LOOP(i, j) {
53                         mat[i*n+j] = -conj(mat[j*n+i]);
54                 }
55         }
56 }
57 
58 
NPdunpack_tril(int n,double * tril,double * mat,int hermi)59 void NPdunpack_tril(int n, double *tril, double *mat, int hermi)
60 {
61         size_t i, j, ij;
62         for (ij = 0, i = 0; i < n; i++) {
63                 for (j = 0; j <= i; j++, ij++) {
64                         mat[i*n+j] = tril[ij];
65                 }
66         }
67         if (hermi) {
68                 NPdsymm_triu(n, mat, hermi);
69         }
70 }
71 
72 // unpack one row from the compact matrix-tril coefficients
NPdunpack_row(int ndim,int row_id,double * tril,double * row)73 void NPdunpack_row(int ndim, int row_id, double *tril, double *row)
74 {
75         int i;
76         size_t idx = ((size_t)row_id) * (row_id + 1) / 2;
77         NPdcopy(row, tril+idx, row_id);
78         for (i = row_id; i < ndim; i++) {
79                 idx += i;
80                 row[i] = tril[idx];
81         }
82 }
83 
NPzunpack_tril(int n,double complex * tril,double complex * mat,int hermi)84 void NPzunpack_tril(int n, double complex *tril, double complex *mat,
85                     int hermi)
86 {
87         size_t i, j, ij;
88         for (ij = 0, i = 0; i < n; i++) {
89                 for (j = 0; j <= i; j++, ij++) {
90                         mat[i*n+j] = tril[ij];
91                 }
92         }
93         if (hermi) {
94                 NPzhermi_triu(n, mat, hermi);
95         }
96 }
97 
NPdpack_tril(int n,double * tril,double * mat)98 void NPdpack_tril(int n, double *tril, double *mat)
99 {
100         size_t i, j, ij;
101         for (ij = 0, i = 0; i < n; i++) {
102                 for (j = 0; j <= i; j++, ij++) {
103                         tril[ij] = mat[i*n+j];
104                 }
105         }
106 }
107 
NPzpack_tril(int n,double complex * tril,double complex * mat)108 void NPzpack_tril(int n, double complex *tril, double complex *mat)
109 {
110         size_t i, j, ij;
111         for (ij = 0, i = 0; i < n; i++) {
112                 for (j = 0; j <= i; j++, ij++) {
113                         tril[ij] = mat[i*n+j];
114                 }
115         }
116 }
117 
118 /* out += in[idx[:,None],idy] */
NPdtake_2d(double * out,double * in,int * idx,int * idy,int odim,int idim,int nx,int ny)119 void NPdtake_2d(double *out, double *in, int *idx, int *idy,
120                 int odim, int idim, int nx, int ny)
121 {
122 #pragma omp parallel default(none) \
123         shared(out, in, idx,idy, odim, idim, nx, ny)
124 {
125         size_t i, j;
126         double *pin;
127 #pragma omp for schedule (static)
128         for (i = 0; i < nx; i++) {
129                 pin = in + (size_t)idim * idx[i];
130                 for (j = 0; j < ny; j++) {
131                         out[i*odim+j] = pin[idy[j]];
132                 }
133         }
134 }
135 }
136 
NPztake_2d(double complex * out,double complex * in,int * idx,int * idy,int odim,int idim,int nx,int ny)137 void NPztake_2d(double complex *out, double complex *in, int *idx, int *idy,
138                 int odim, int idim, int nx, int ny)
139 {
140 #pragma omp parallel default(none) \
141         shared(out, in, idx,idy, odim, idim, nx, ny)
142 {
143         size_t i, j;
144         double complex *pin;
145 #pragma omp for schedule (static)
146         for (i = 0; i < nx; i++) {
147                 pin = in + (size_t)idim * idx[i];
148                 for (j = 0; j < ny; j++) {
149                         out[i*odim+j] = pin[idy[j]];
150                 }
151         }
152 }
153 }
154 
155 /* out[idx[:,None],idy] += in */
NPdtakebak_2d(double * out,double * in,int * idx,int * idy,int odim,int idim,int nx,int ny,int thread_safe)156 void NPdtakebak_2d(double *out, double *in, int *idx, int *idy,
157                    int odim, int idim, int nx, int ny, int thread_safe)
158 {
159         if (thread_safe) {
160 #pragma omp parallel default(none) \
161         shared(out, in, idx,idy, odim, idim, nx, ny)
162 {
163         size_t i, j;
164         double *pout;
165 #pragma omp for schedule (static)
166         for (i = 0; i < nx; i++) {
167                 pout = out + (size_t)odim * idx[i];
168                 for (j = 0; j < ny; j++) {
169                         pout[idy[j]] += in[i*idim+j];
170                 }
171         }
172 }
173         } else {
174                 size_t i, j;
175                 double *pout;
176                 for (i = 0; i < nx; i++) {
177                         pout = out + (size_t)odim * idx[i];
178                         for (j = 0; j < ny; j++) {
179                                 pout[idy[j]] += in[i*idim+j];
180                         }
181                 }
182         }
183 }
184 
NPztakebak_2d(double complex * out,double complex * in,int * idx,int * idy,int odim,int idim,int nx,int ny,int thread_safe)185 void NPztakebak_2d(double complex *out, double complex *in, int *idx, int *idy,
186                    int odim, int idim, int nx, int ny, int thread_safe)
187 {
188         if (thread_safe) {
189 #pragma omp parallel default(none) \
190         shared(out, in, idx,idy, odim, idim, nx, ny)
191 {
192         size_t i, j;
193         double complex *pout;
194 #pragma omp for schedule (static)
195         for (i = 0; i < nx; i++) {
196                 pout = out + (size_t)odim * idx[i];
197                 for (j = 0; j < ny; j++) {
198                         pout[idy[j]] += in[i*idim+j];
199                 }
200         }
201 }
202         } else {
203                 size_t i, j;
204                 double complex *pout;
205                 for (i = 0; i < nx; i++) {
206                         pout = out + (size_t)odim * idx[i];
207                         for (j = 0; j < ny; j++) {
208                                 pout[idy[j]] += in[i*idim+j];
209                         }
210                 }
211         }
212 }
213 
NPdunpack_tril_2d(int count,int n,double * tril,double * mat,int hermi)214 void NPdunpack_tril_2d(int count, int n, double *tril, double *mat, int hermi)
215 {
216 #pragma omp parallel default(none) \
217         shared(count, n, tril, mat, hermi)
218 {
219         int ic;
220         size_t nn = n * n;
221         size_t n2 = n*(n+1)/2;
222 #pragma omp for schedule (static)
223         for (ic = 0; ic < count; ic++) {
224                 NPdunpack_tril(n, tril+n2*ic, mat+nn*ic, hermi);
225         }
226 }
227 }
228 
NPzunpack_tril_2d(int count,int n,double complex * tril,double complex * mat,int hermi)229 void NPzunpack_tril_2d(int count, int n,
230                        double complex *tril, double complex *mat, int hermi)
231 {
232 #pragma omp parallel default(none) \
233         shared(count, n, tril, mat, hermi)
234 {
235         int ic;
236         size_t nn = n * n;
237         size_t n2 = n*(n+1)/2;
238 #pragma omp for schedule (static)
239         for (ic = 0; ic < count; ic++) {
240                 NPzunpack_tril(n, tril+n2*ic, mat+nn*ic, hermi);
241         }
242 }
243 }
244 
NPdpack_tril_2d(int count,int n,double * tril,double * mat)245 void NPdpack_tril_2d(int count, int n, double *tril, double *mat)
246 {
247 #pragma omp parallel default(none) \
248         shared(count, n, tril, mat)
249 {
250         int ic;
251         size_t nn = n * n;
252         size_t n2 = n*(n+1)/2;
253 #pragma omp for schedule (static)
254         for (ic = 0; ic < count; ic++) {
255                 NPdpack_tril(n, tril+n2*ic, mat+nn*ic);
256         }
257 }
258 }
259 
NPzpack_tril_2d(int count,int n,double complex * tril,double complex * mat)260 void NPzpack_tril_2d(int count, int n, double complex *tril, double complex *mat)
261 {
262 #pragma omp parallel default(none) \
263         shared(count, n, tril, mat)
264 {
265         int ic;
266         size_t nn = n * n;
267         size_t n2 = n*(n+1)/2;
268 #pragma omp for schedule (static)
269         for (ic = 0; ic < count; ic++) {
270                 NPzpack_tril(n, tril+n2*ic, mat+nn*ic);
271         }
272 }
273 }
274 
275