1 /* Copyright 2014-2018 The PySCF Developers. All Rights Reserved.
2 
3    Licensed under the Apache License, Version 2.0 (the "License");
4     you may not use this file except in compliance with the License.
5     You may obtain a copy of the License at
6 
7         http://www.apache.org/licenses/LICENSE-2.0
8 
9     Unless required by applicable law or agreed to in writing, software
10     distributed under the License is distributed on an "AS IS" BASIS,
11     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12     See the License for the specific language governing permissions and
13     limitations under the License.
14 
15  *
16  * Author: Qiming Sun <osirpt.sun@gmail.com>
17  */
18 
19 #include <stdlib.h>
20 #include <complex.h>
21 #include "np_helper.h"
22 
23 /*
24  * matrix a[n,m]
25  */
NPdtranspose(int n,int m,double * a,double * at)26 void NPdtranspose(int n, int m, double *a, double *at)
27 {
28         size_t i, j, j0, j1;
29         for (j0 = 0; j0 < n; j0+=BLOCK_DIM) {
30                 j1 = MIN(j0+BLOCK_DIM, n);
31                 for (i = 0; i < m; i++) {
32                         for (j = j0; j < j1; j++) {
33                                 at[i*n+j] = a[j*m+i];
34                         }
35                 }
36         }
37 }
38 
NPztranspose(int n,int m,double complex * a,double complex * at)39 void NPztranspose(int n, int m, double complex *a, double complex *at)
40 {
41         size_t i, j, j0, j1;
42         for (j0 = 0; j0 < n; j0+=BLOCK_DIM) {
43                 j1 = MIN(j0+BLOCK_DIM, n);
44                 for (i = 0; i < m; i++) {
45                         for (j = j0; j < j1; j++) {
46                                 at[i*n+j] = a[j*m+i];
47                         }
48                 }
49         }
50 }
51 
52 
NPdtranspose_021(int * shape,double * a,double * at)53 void NPdtranspose_021(int *shape, double *a, double *at)
54 {
55 #pragma omp parallel default(none) \
56         shared(shape, a, at)
57 {
58         int ic;
59         size_t nm = shape[1] * shape[2];
60 #pragma omp for schedule (static)
61         for (ic = 0; ic < shape[0]; ic++) {
62                 NPdtranspose(shape[1], shape[2], a+ic*nm, at+ic*nm);
63         }
64 }
65 }
66 
NPztranspose_021(int * shape,double complex * a,double complex * at)67 void NPztranspose_021(int *shape, double complex *a, double complex *at)
68 {
69 #pragma omp parallel default(none) \
70         shared(shape, a, at)
71 {
72         int ic;
73         size_t nm = shape[1] * shape[2];
74 #pragma omp for schedule (static)
75         for (ic = 0; ic < shape[0]; ic++) {
76                 NPztranspose(shape[1], shape[2], a+ic*nm, at+ic*nm);
77         }
78 }
79 }
80 
81 
NPdsymm_sum(int n,double * a,double * out,int hermi)82 void NPdsymm_sum(int n, double *a, double *out, int hermi)
83 {
84         size_t i, j, j0, j1;
85         double tmp;
86 
87         if (hermi == HERMITIAN || hermi == SYMMETRIC) {
88                 TRIU_LOOP(i, j) {
89                         tmp = a[i*n+j] + a[j*n+i];
90                         out[i*n+j] = tmp;
91                         out[j*n+i] = tmp;
92                 }
93         } else {
94                 TRIU_LOOP(i, j) {
95                         tmp = a[i*n+j] - a[j*n+i];
96                         out[i*n+j] = tmp;
97                         out[j*n+i] =-tmp;
98                 }
99         }
100 }
101 
NPzhermi_sum(int n,double complex * a,double complex * out,int hermi)102 void NPzhermi_sum(int n, double complex *a, double complex *out, int hermi)
103 {
104         size_t i, j, j0, j1;
105         double complex tmp;
106 
107         if (hermi == HERMITIAN) {
108                 TRIU_LOOP(i, j) {
109                         tmp = a[i*n+j] + conj(a[j*n+i]);
110                         out[i*n+j] = tmp;
111                         out[j*n+i] = conj(tmp);
112                 }
113         } else if (hermi == SYMMETRIC) {
114                 TRIU_LOOP(i, j) {
115                         tmp = a[i*n+j] + a[j*n+i];
116                         out[i*n+j] = tmp;
117                         out[j*n+i] = tmp;
118                 }
119         } else {
120                 TRIU_LOOP(i, j) {
121                         tmp = a[i*n+j] - conj(a[j*n+i]);
122                         out[i*n+j] = tmp;
123                         out[j*n+i] =-conj(tmp);
124                 }
125         }
126 }
127 
128 
NPdsymm_021_sum(int * shape,double * a,double * out,int hermi)129 void NPdsymm_021_sum(int *shape, double *a, double *out, int hermi)
130 {
131 #pragma omp parallel default(none) \
132         shared(shape, a, out, hermi)
133 {
134         int ic;
135         size_t nn = shape[1] * shape[1];
136 #pragma omp for schedule (static)
137         for (ic = 0; ic < shape[0]; ic++) {
138                 NPdsymm_sum(shape[1], a+ic*nn, out+ic*nn, hermi);
139         }
140 }
141 }
142 
NPzhermi_021_sum(int * shape,double complex * a,double complex * out,int hermi)143 void NPzhermi_021_sum(int *shape, double complex *a, double complex *out, int hermi)
144 {
145 #pragma omp parallel default(none) \
146         shared(shape, a, out, hermi)
147 {
148         int ic;
149         size_t nn = shape[1] * shape[1];
150 #pragma omp for schedule (static)
151         for (ic = 0; ic < shape[0]; ic++) {
152                 NPzhermi_sum(shape[1], a+ic*nn, out+ic*nn, hermi);
153         }
154 }
155 }
156