1 /* Copyright 2014-2018 The PySCF Developers. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14
15 *
16 * Author: Qiming Sun <osirpt.sun@gmail.com>
17 */
18
19 #include <stdlib.h>
20 #include <complex.h>
21 #include "np_helper.h"
22
23 /*
24 * matrix a[n,m]
25 */
NPdtranspose(int n,int m,double * a,double * at)26 void NPdtranspose(int n, int m, double *a, double *at)
27 {
28 size_t i, j, j0, j1;
29 for (j0 = 0; j0 < n; j0+=BLOCK_DIM) {
30 j1 = MIN(j0+BLOCK_DIM, n);
31 for (i = 0; i < m; i++) {
32 for (j = j0; j < j1; j++) {
33 at[i*n+j] = a[j*m+i];
34 }
35 }
36 }
37 }
38
NPztranspose(int n,int m,double complex * a,double complex * at)39 void NPztranspose(int n, int m, double complex *a, double complex *at)
40 {
41 size_t i, j, j0, j1;
42 for (j0 = 0; j0 < n; j0+=BLOCK_DIM) {
43 j1 = MIN(j0+BLOCK_DIM, n);
44 for (i = 0; i < m; i++) {
45 for (j = j0; j < j1; j++) {
46 at[i*n+j] = a[j*m+i];
47 }
48 }
49 }
50 }
51
52
NPdtranspose_021(int * shape,double * a,double * at)53 void NPdtranspose_021(int *shape, double *a, double *at)
54 {
55 #pragma omp parallel default(none) \
56 shared(shape, a, at)
57 {
58 int ic;
59 size_t nm = shape[1] * shape[2];
60 #pragma omp for schedule (static)
61 for (ic = 0; ic < shape[0]; ic++) {
62 NPdtranspose(shape[1], shape[2], a+ic*nm, at+ic*nm);
63 }
64 }
65 }
66
NPztranspose_021(int * shape,double complex * a,double complex * at)67 void NPztranspose_021(int *shape, double complex *a, double complex *at)
68 {
69 #pragma omp parallel default(none) \
70 shared(shape, a, at)
71 {
72 int ic;
73 size_t nm = shape[1] * shape[2];
74 #pragma omp for schedule (static)
75 for (ic = 0; ic < shape[0]; ic++) {
76 NPztranspose(shape[1], shape[2], a+ic*nm, at+ic*nm);
77 }
78 }
79 }
80
81
NPdsymm_sum(int n,double * a,double * out,int hermi)82 void NPdsymm_sum(int n, double *a, double *out, int hermi)
83 {
84 size_t i, j, j0, j1;
85 double tmp;
86
87 if (hermi == HERMITIAN || hermi == SYMMETRIC) {
88 TRIU_LOOP(i, j) {
89 tmp = a[i*n+j] + a[j*n+i];
90 out[i*n+j] = tmp;
91 out[j*n+i] = tmp;
92 }
93 } else {
94 TRIU_LOOP(i, j) {
95 tmp = a[i*n+j] - a[j*n+i];
96 out[i*n+j] = tmp;
97 out[j*n+i] =-tmp;
98 }
99 }
100 }
101
NPzhermi_sum(int n,double complex * a,double complex * out,int hermi)102 void NPzhermi_sum(int n, double complex *a, double complex *out, int hermi)
103 {
104 size_t i, j, j0, j1;
105 double complex tmp;
106
107 if (hermi == HERMITIAN) {
108 TRIU_LOOP(i, j) {
109 tmp = a[i*n+j] + conj(a[j*n+i]);
110 out[i*n+j] = tmp;
111 out[j*n+i] = conj(tmp);
112 }
113 } else if (hermi == SYMMETRIC) {
114 TRIU_LOOP(i, j) {
115 tmp = a[i*n+j] + a[j*n+i];
116 out[i*n+j] = tmp;
117 out[j*n+i] = tmp;
118 }
119 } else {
120 TRIU_LOOP(i, j) {
121 tmp = a[i*n+j] - conj(a[j*n+i]);
122 out[i*n+j] = tmp;
123 out[j*n+i] =-conj(tmp);
124 }
125 }
126 }
127
128
NPdsymm_021_sum(int * shape,double * a,double * out,int hermi)129 void NPdsymm_021_sum(int *shape, double *a, double *out, int hermi)
130 {
131 #pragma omp parallel default(none) \
132 shared(shape, a, out, hermi)
133 {
134 int ic;
135 size_t nn = shape[1] * shape[1];
136 #pragma omp for schedule (static)
137 for (ic = 0; ic < shape[0]; ic++) {
138 NPdsymm_sum(shape[1], a+ic*nn, out+ic*nn, hermi);
139 }
140 }
141 }
142
NPzhermi_021_sum(int * shape,double complex * a,double complex * out,int hermi)143 void NPzhermi_021_sum(int *shape, double complex *a, double complex *out, int hermi)
144 {
145 #pragma omp parallel default(none) \
146 shared(shape, a, out, hermi)
147 {
148 int ic;
149 size_t nn = shape[1] * shape[1];
150 #pragma omp for schedule (static)
151 for (ic = 0; ic < shape[0]; ic++) {
152 NPzhermi_sum(shape[1], a+ic*nn, out+ic*nn, hermi);
153 }
154 }
155 }
156