1#if 0 2! This file is part of ELPA. 3! 4! The ELPA library was originally created by the ELPA consortium, 5! consisting of the following organizations: 6! 7! - Max Planck Computing and Data Facility (MPCDF), formerly known as 8! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG), 9! - Bergische Universität Wuppertal, Lehrstuhl für angewandte 10! Informatik, 11! - Technische Universität München, Lehrstuhl für Informatik mit 12! Schwerpunkt Wissenschaftliches Rechnen , 13! - Fritz-Haber-Institut, Berlin, Abt. Theorie, 14! - Max-Plack-Institut für Mathematik in den Naturwissenschaften, 15! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition, 16! and 17! - IBM Deutschland GmbH 18! 19! 20! More information can be found here: 21! http://elpa.mpcdf.mpg.de/ 22! 23! ELPA is free software: you can redistribute it and/or modify 24! it under the terms of the version 3 of the license of the 25! GNU Lesser General Public License as published by the Free 26! Software Foundation. 27! 28! ELPA is distributed in the hope that it will be useful, 29! but WITHOUT ANY WARRANTY; without even the implied warranty of 30! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 31! GNU Lesser General Public License for more details. 32! 33! You should have received a copy of the GNU Lesser General Public License 34! along with ELPA. If not, see <http://www.gnu.org/licenses/> 35! 36! ELPA reflects a substantial effort on the part of the original 37! ELPA consortium, and we ask you to respect the spirit of the 38! license that we chose: i.e., please contribute any changes you 39! may have back to the original ELPA library distribution, and keep 40! any derivatives of ELPA under the same license that we chose for 41! the original distribution, the GNU Lesser General Public License. 42! 43! 44! -------------------------------------------------------------------------------------------------- 45! 46! This file contains the compute intensive kernels for the Householder transformations. 47! 48! This is the small and simple version (no hand unrolling of loops etc.) but for some 49! compilers this performs better than a sophisticated version with transformed and unrolled loops. 50! 51! It should be compiled with the highest possible optimization level. 52! 53! Copyright of the original code rests with the authors inside the ELPA 54! consortium. The copyright of any additional modifications shall rest 55! with their original authors, but shall adhere to the licensing terms 56! distributed along with the original code in the file "COPYING". 57! 58! -------------------------------------------------------------------------------------------------- 59#endif 60 61#if COMPLEXCASE==1 62 ! the intel compiler creates a temp copy of array q 63 ! this should be avoided without using assumed size arrays 64 65 subroutine single_hh_trafo_& 66 &MATH_DATATYPE& 67 &_generic_simple_& 68 &PRECISION& 69 & (q, hh, nb, nq, ldq) 70 71 use precision 72 use elpa_abstract_impl 73 implicit none 74 !class(elpa_abstract_impl_t), intent(inout) :: obj 75 integer(kind=ik), intent(in) :: nb, nq, ldq 76#ifdef USE_ASSUMED_SIZE 77 complex(kind=C_DATATYPE_KIND), intent(inout) :: q(ldq,*) 78 complex(kind=C_DATATYPE_KIND), intent(in) :: hh(*) 79#else 80 complex(kind=C_DATATYPE_KIND), intent(inout) :: q(1:ldq,1:nb) 81 complex(kind=C_DATATYPE_KIND), intent(in) :: hh(1:nb) 82#endif 83 integer(kind=ik) :: i 84 complex(kind=C_DATATYPE_KIND) :: tau1, x(nq) 85 86 !call obj%timer%start("kernel_& 87 !&MATH_DATATYPE& 88 !&_generic_simple: single_hh_trafo_& 89 !&MATH_DATATYPE& 90 !&_generic_simple" // & 91 !&PRECISION_SUFFIX & 92 !) 93 94 ! Just one Householder transformation 95 96 x(1:nq) = q(1:nq,1) 97 98 do i=2,nb 99 x(1:nq) = x(1:nq) + q(1:nq,i)*conjg(hh(i)) 100 enddo 101 102 tau1 = hh(1) 103 x(1:nq) = x(1:nq)*(-tau1) 104 105 q(1:nq,1) = q(1:nq,1) + x(1:nq) 106 107 do i=2,nb 108 q(1:nq,i) = q(1:nq,i) + x(1:nq)*hh(i) 109 enddo 110 111 112 !call obj%timer%stop("kernel_& 113 !&MATH_DATATYPE& 114 !&_generic_simple: single_hh_trafo_& 115 !&MATH_DATATYPE& 116 !&_generic_simple" // & 117 !&PRECISION_SUFFIX & 118 !) 119 120 end subroutine 121 122#endif /* COMPLEXCASE == 1 */ 123 ! -------------------------------------------------------------------------------------------------- 124 125 subroutine double_hh_trafo_& 126 &MATH_DATATYPE& 127 &_generic_simple_& 128 &PRECISION& 129 & (q, hh, nb, nq, ldq, ldh) 130 131 use precision 132 use elpa_abstract_impl 133 implicit none 134 135 !class(elpa_abstract_impl_t), intent(inout) :: obj 136 integer(kind=ik), intent(in) :: nb, nq, ldq, ldh 137#if REALCASE==1 138 139#ifdef USE_ASSUMED_SIZE 140 real(kind=C_DATATYPE_KIND), intent(inout) :: q(ldq,*) 141 real(kind=C_DATATYPE_KIND), intent(in) :: hh(ldh,*) 142#else 143 real(kind=C_DATATYPE_KIND), intent(inout) :: q(1:ldq,1:nb+1) 144 real(kind=C_DATATYPE_KIND), intent(in) :: hh(1:ldh,1:6) 145#endif 146 real(kind=C_DATATYPE_KIND) :: s, h1, h2, tau1, tau2, x(nq), y(nq) 147#endif /* REALCASE==1 */ 148 149#if COMPLEXCASE==1 150 151#ifdef USE_ASSUMED_SIZE 152 complex(kind=C_DATATYPE_KIND), intent(inout) :: q(ldq,*) 153 complex(kind=C_DATATYPE_KIND), intent(in) :: hh(ldh,*) 154#else 155 complex(kind=C_DATATYPE_KIND), intent(inout) :: q(1:ldq,1:nb+1) 156 complex(kind=C_DATATYPE_KIND), intent(in) :: hh(1:ldh,1:2) 157#endif 158 complex(kind=C_DATATYPE_KIND) :: s, h1, h2, tau1, tau2, x(nq), y(nq) 159#endif /* COMPLEXCASE==1 */ 160 integer(kind=ik) :: i 161 162 !call obj%timer%start("kernel_& 163 !&MATH_DATATYPE& 164 !&_generic_simple: double_hh_trafo_& 165 !&MATH_DATATYPE& 166 !&_generic_simple" // & 167 !&PRECISION_SUFFIX & 168 !) 169 170 ! Calculate dot product of the two Householder vectors 171#if REALCASE==1 172 s = hh(2,2)*1.0 173 do i=3,nb 174 s = s+hh(i,2)*hh(i-1,1) 175 enddo 176#endif 177 178#if COMPLEXCASE==1 179 s = conjg(hh(2,2))*1.0 180 do i=3,nb 181 s = s+(conjg(hh(i,2))*hh(i-1,1)) 182 enddo 183#endif 184 185 ! Do the Householder transformations 186 187 x(1:nq) = q(1:nq,2) 188#if REALCASE==1 189 y(1:nq) = q(1:nq,1) + q(1:nq,2)*hh(2,2) 190#endif 191 192#if COMPLEXCASE==1 193 y(1:nq) = q(1:nq,1) + q(1:nq,2)*conjg(hh(2,2)) 194#endif 195 196 do i=3,nb 197#if REALCASE==1 198 h1 = hh(i-1,1) 199 h2 = hh(i,2) 200#endif 201 202#if COMPLEXCASE==1 203 h1 = conjg(hh(i-1,1)) 204 h2 = conjg(hh(i,2)) 205#endif 206 x(1:nq) = x(1:nq) + q(1:nq,i)*h1 207 y(1:nq) = y(1:nq) + q(1:nq,i)*h2 208 enddo 209 210#if REALCASE==1 211 x(1:nq) = x(1:nq) + q(1:nq,nb+1)*hh(nb,1) 212#endif 213 214#if COMPLEXCASE==1 215 x(1:nq) = x(1:nq) + q(1:nq,nb+1)*conjg(hh(nb,1)) 216#endif 217 tau1 = hh(1,1) 218 tau2 = hh(1,2) 219 220 h1 = -tau1 221 x(1:nq) = x(1:nq)*h1 222 h1 = -tau2 223 h2 = -tau2*s 224 y(1:nq) = y(1:nq)*h1 + x(1:nq)*h2 225 226 q(1:nq,1) = q(1:nq,1) + y(1:nq) 227 q(1:nq,2) = q(1:nq,2) + x(1:nq) + y(1:nq)*hh(2,2) 228 229 do i=3,nb 230 h1 = hh(i-1,1) 231 h2 = hh(i,2) 232 q(1:nq,i) = q(1:nq,i) + x(1:nq)*h1 + y(1:nq)*h2 233 enddo 234 235 q(1:nq,nb+1) = q(1:nq,nb+1) + x(1:nq)*hh(nb,1) 236 237 238 !call obj%timer%stop("kernel_& 239 !&MATH_DATATYPE& 240 !&_generic_simple: double_hh_trafo_& 241 !&MATH_DATATYPE& 242 !&_generic_simple" // & 243 !&PRECISION_SUFFIX & 244 !) 245 246 end subroutine 247