1!
2! Copyright (C) 2016 - present Andrea Dal Corso
3! This file is distributed under the terms of the
4! GNU General Public License. See the file `License'
5! in the root directory of the present distribution,
6! or http://www.gnu.org/copyleft/gpl.txt .
7!
8!
9MODULE linear_solvers
10
11  USE io_global, ONLY : stdout
12  IMPLICIT NONE
13  !
14  SAVE
15  !
16  PRIVATE
17
18  PUBLIC  ccg_many_vectors, cg_many_vectors, linsolvx, linsolvx_sym, &
19          linsolvms, linsolvsvd, min_sqr_solve
20
21
22CONTAINS
23
24!----------------------------------------------------------------------
25SUBROUTINE cg_many_vectors (apply_a, precond, scal_prod, d0psi, dpsi, &
26                h_diag, ndmx, ndim, ethr, ik, kter, conv_root, anorm, nbnd)
27  !----------------------------------------------------------------------
28  !
29  !     iterative solution of the linear system:
30  !
31  !                  A  * dpsi = d0psi                      (1)
32  !
33  !     where A is an hermitean positive definite matrix, dpsi and d0psi are
34  !     complex vectors. It uses the conjugate gradient method.
35  !     If you have an general complex operator you have to apply the
36  !     slower ccg_many_vector routine.
37  !
38  !     on input:
39  !                 apply_a  EXTERNAL  name of a subroutine:
40  !                          apply_a(ndmx,ndim,psi,psip,)
41  !                          Calculates  H*psi products.
42  !                          Vectors psi and psip should be dimensined
43  !                          (ndmx,nvec). nvec=1 is used!
44  !
45  !                 cg_psi   EXTERNAL  name of a subroutine:
46  !                          g_psi(ndmx,ndim,notcnv,psi,e)
47  !                          which calculates (h-e)^-1 * psi, with
48  !                          some approximation, e.g. (diag(h)-e)
49  !
50  !                 scal_prod EXTERNAL name of a subroutine
51  !                          scal_prod(ndmx,ndim,psi1,psi2)
52  !                          which calculate the scalar product
53  !                          psi1 ^+ psi2
54  !
55  !                 dpsi     contains an estimate of the solution
56  !                          vector.
57  !
58  !                 d0psi    contains the right hand side vector
59  !                          of the system.
60  !
61  !                 ndmx     integer row dimension of dpsi, ecc.
62  !
63  !                 ndim     integer actual row dimension of dpsi
64  !
65  !                 ethr     real     convergence threshold. solution
66  !                          improvement is stopped when the error in
67  !                          eq (1), defined as l.h.s. - r.h.s., becomes
68  !                          less than ethr in norm.
69  !
70  !     on output:  dpsi     contains the refined estimate of the
71  !                          solution vector.
72  !
73  !                 d0psi    is corrupted on exit
74  !
75  !   written 29/01/2016  by A. Dal Corso modifying the old routine
76  !   contained in cgsolve_all of Quantum ESPRESSO.
77  !
78  USE kinds,          ONLY : DP
79  USE io_global,       ONLY : stdout
80
81  IMPLICIT NONE
82  !
83  !   first the I/O variables
84  !
85  INTEGER :: ndmx, & ! input: the maximum dimension of the vectors
86             ndim, & ! input: the actual dimension of the vectors
87             kter, & ! output: counter on iterations
88             nbnd, & ! input: the number of bands
89             ik      ! input: the k point
90
91  REAL(DP) ::         &
92             anorm,   & ! output: the norm of the error in the solution
93             h_diag(ndmx,nbnd), & ! input: an estimate of ( H - \epsilon )
94             ethr       ! input: the required precision
95
96  COMPLEX(DP) ::                &
97             dpsi (ndmx, nbnd), & ! output: the solution of the linear syst
98             d0psi (ndmx, nbnd)   ! input: the known term
99
100  LOGICAL :: conv_root ! output: if true the root is converged
101  EXTERNAL apply_a     ! input: the routine computing A
102  EXTERNAL precond     ! input: the routine applying the preconditioning
103  EXTERNAL scal_prod   ! input: the routine computing the scalar product
104  !
105  !  here the local variables
106  !
107  INTEGER, PARAMETER :: maxter = 200
108  ! the maximum number of iterations
109  INTEGER :: iter, ibnd, lbnd
110  ! counters on iteration, bands
111  INTEGER , ALLOCATABLE :: conv (:)
112  ! if 1 the root is converged
113
114  COMPLEX(DP), ALLOCATABLE :: g (:,:), t (:,:), h (:,:), hold (:,:)
115  !  the gradient of psi
116  !  the preconditioned gradient
117  !  the delta gradient
118  !  the conjugate gradient
119  !  work space
120  REAL(DP) ::  dcgamma, dclambda
121  !  the ratio between rho
122  !  step length
123  REAL(DP), ALLOCATABLE :: rho (:), rhoold (:), a(:), c(:)
124  ! the residue
125  ! auxiliary for h_diag
126  !
127  COMPLEX(DP) :: scal_prod
128  !
129  INTEGER, ALLOCATABLE :: indb(:)
130  !
131  REAL(DP) :: kter_eff
132  ! account the number of iterations with b
133  !
134  CALL start_clock ('cg_many_vectors')
135
136  ALLOCATE ( g(ndmx,nbnd), t(ndmx,nbnd), h(ndmx,nbnd),hold(ndmx ,nbnd) )
137  ALLOCATE (a(nbnd), c(nbnd))
138  ALLOCATE (conv (nbnd))
139  ALLOCATE (rho(nbnd),rhoold(nbnd))
140  ALLOCATE (indb(nbnd))
141  !
142  !      WRITE( stdout,*) g,t,h,hold
143  !
144  kter_eff = 0.d0
145  conv= 0
146  g=(0.d0,0.d0)
147  t=(0.d0,0.d0)
148  h=(0.d0,0.d0)
149  hold=(0.d0,0.d0)
150  DO ibnd=1,nbnd
151     indb(ibnd)=ibnd
152  END DO
153  DO iter = 1, maxter
154     !
155     !    compute the gradient. can reuse information from previous step
156     !
157     IF (iter == 1) THEN
158        CALL apply_a (ndmx, ndim, dpsi, g, ik, nbnd, indb, 1)
159        DO ibnd = 1, nbnd
160           CALL zaxpy (ndmx, (-1.d0,0.d0), d0psi(1,ibnd), 1, g(1,ibnd), 1)
161        ENDDO
162     ENDIF
163     !
164     !    compute preconditioned residual vectors and convergence check
165     !
166     lbnd = 0
167     DO ibnd = 1, nbnd
168        IF (conv (ibnd) == 0) THEN
169           lbnd = lbnd+1
170           CALL zcopy (ndmx, g (1, ibnd), 1, h (1, ibnd), 1)
171           CALL precond(ndmx, ndim, 1, h(1,ibnd), h_diag(1,ibnd) )
172           rho(lbnd) = scal_prod (ndmx, ndim, h(1,ibnd), g(1,ibnd) )
173        ENDIF
174     ENDDO
175     kter_eff = kter_eff + DBLE (lbnd) / DBLE (nbnd)
176     DO ibnd = nbnd, 1, -1
177        IF (conv(ibnd)==0) THEN
178           rho(ibnd)=rho(lbnd)
179           lbnd = lbnd -1
180           anorm = SQRT (rho (ibnd) )
181!           WRITE(stdout,*) iter, ibnd, anorm
182           IF (anorm < ethr) conv (ibnd) = 1
183        ENDIF
184     ENDDO
185!
186     conv_root = .true.
187     DO ibnd = 1, nbnd
188        conv_root = conv_root.AND. (conv (ibnd) .eq.1)
189     ENDDO
190     IF (conv_root) GOTO 100
191     !
192     !        compute the step directions h and hs.
193     !
194     lbnd = 0
195     DO ibnd = 1, nbnd
196        IF (conv (ibnd) == 0) THEN
197!
198!          change sign to h and hs
199!
200           CALL dscal (2 * ndmx, - 1.d0, h (1, ibnd), 1)
201           IF (iter /= 1) THEN
202              dcgamma = rho (ibnd) / rhoold (ibnd)
203              CALL zaxpy (ndmx, dcgamma, hold (1, ibnd), 1, h (1, ibnd), 1)
204           ENDIF
205
206!
207! here hold is used as auxiliary vector in order to efficiently compute t = A*h
208! it is later set to the current (becoming old) value of h
209!
210           lbnd = lbnd+1
211           CALL zcopy (ndmx, h (1, ibnd), 1, hold (1, lbnd), 1)
212           indb (lbnd) = ibnd
213        ENDIF
214     ENDDO
215     !
216     !        compute t = A*h
217     !
218     CALL apply_a (ndmx, ndim, hold, t, ik, lbnd, indb, 1)
219     !
220     !        compute the coefficients a and c for the line minimization
221     !        compute step length lambda
222     lbnd=0
223     DO ibnd = 1, nbnd
224        IF (conv (ibnd) == 0) THEN
225           lbnd=lbnd+1
226           a(lbnd) = scal_prod (ndmx, ndim, h(1,ibnd), g(1,ibnd))
227           c(lbnd) = scal_prod (ndmx, ndim, h(1,ibnd), t(1,lbnd))
228        END IF
229     END DO
230     lbnd=0
231     DO ibnd = 1, nbnd
232        IF (conv (ibnd) == 0) THEN
233           lbnd=lbnd+1
234           dclambda = CMPLX( - a(lbnd) / c(lbnd), 0.d0,kind=DP)
235           !
236           !    move to new position
237           !
238           CALL zaxpy (ndmx, dclambda, h(1,ibnd), 1, dpsi(1,ibnd), 1)
239           !
240           !    update to get the gradient
241           !
242           !g=g+lam
243           CALL zaxpy (ndmx, dclambda, t(1,lbnd), 1, g(1,ibnd), 1)
244           !
245           !    save current (now old) h and rho for later use
246           !
247           CALL zcopy (ndmx, h(1,ibnd), 1, hold(1,ibnd), 1)
248           rhoold (ibnd) = rho (ibnd)
249        ENDIF
250     ENDDO
251  ENDDO
252100 CONTINUE
253  kter = kter_eff
254  DEALLOCATE (indb)
255  DEALLOCATE (rho, rhoold)
256  DEALLOCATE (conv)
257  DEALLOCATE (a,c)
258  DEALLOCATE (g, t, h, hold)
259
260  CALL stop_clock ('cg_many_vectors')
261  RETURN
262END SUBROUTINE cg_many_vectors
263
264!----------------------------------------------------------------------
265SUBROUTINE ccg_many_vectors (apply_a, precond, scal_prod, d0psi, dpsi, &
266                h_diag, ndmx, ndim, ethr, ik, kter, conv_root, anorm, nbnd)
267  !----------------------------------------------------------------------
268  !
269  !     iterative solution of the linear system:
270  !
271  !                  A  * dpsi = d0psi                      (1)
272  !
273  !     where A is a complex general matrix, dpsi and d0psi are
274  !     complex vectors. It uses the biconjugate gradient method.
275  !     Note that this routine is for general matrices and can deal with
276  !     many vectors d0psi.
277  !     If you have an hermitean positive definite operator you can
278  !     apply the much faster cg_many_vector routine.
279  !     If you have only one vector d0psi you can apply the
280  !     cg_one_vector ccg_one_vector routines.
281  !
282  !     on input:
283  !                 apply_a  EXTERNAL  name of a subroutine:
284  !                          apply_a(ndmx,ndim,psi,psip,)
285  !                          Calculates  H*psi products.
286  !                          Vectors psi and psip should be dimensined
287  !                          (ndmx,nvec). nvec=1 is used!
288  !
289  !                 cg_psi   EXTERNAL  name of a subroutine:
290  !                          g_psi(ndmx,ndim,notcnv,psi,e)
291  !                          which calculates (h-e)^-1 * psi, with
292  !                          some approximation, e.g. (diag(h)-e)
293  !
294  !                 scal_prod EXTERNAL name of a subroutine
295  !                          scal_prod(ndmx,ndim,psi1,psi2)
296  !                          which calculate the scalar product
297  !                          psi1 ^+ psi2
298  !
299  !                 dpsi     contains an estimate of the solution
300  !                          vector.
301  !
302  !                 d0psi    contains the right hand side vector
303  !                          of the system.
304  !
305  !                 ndmx     integer row dimension of dpsi, ecc.
306  !
307  !                 ndim     integer actual row dimension of dpsi
308  !
309  !                 ethr     real     convergence threshold. solution
310  !                          improvement is stopped when the error in
311  !                          eq (1), defined as l.h.s. - r.h.s., becomes
312  !                          less than ethr in norm.
313  !
314  !     on output:  dpsi     contains the refined estimate of the
315  !                          solution vector.
316  !
317  !                 d0psi    is corrupted on exit
318  !
319  !   written 29/01/2016  by A. Dal Corso
320  !
321  USE kinds,          ONLY : DP
322  USE io_global,       ONLY : stdout
323
324  IMPLICIT NONE
325  !
326  !   first the I/O variables
327  !
328  INTEGER :: ndmx, & ! input: the maximum dimension of the vectors
329             ndim, & ! input: the actual dimension of the vectors
330             kter, & ! output: counter on iterations
331             nbnd, & ! input: the number of bands
332             ik      ! input: the k point
333
334  REAL(DP) :: &
335             anorm,   & ! output: the norm of the error in the solution
336             ethr       ! input: the required precision
337
338  COMPLEX(DP) :: &
339             h_diag(ndmx,nbnd), & ! input: an estimate of ( H - \epsilon- w )
340                                  ! w can be complex
341             dpsi (ndmx, nbnd), & ! output: the solution of the linear syst
342             d0psi (ndmx, nbnd)   ! input: the known term
343
344  LOGICAL :: conv_root ! output: if true the root is converged
345  EXTERNAL apply_a     ! input: the routine computing A
346  EXTERNAL precond     ! input: the routine applying the preconditioning
347  EXTERNAL scal_prod   ! input: the routine computing the scalar product
348  !
349  !  here the local variables
350  !
351  INTEGER, PARAMETER :: maxter = 2000
352  ! the maximum number of iterations
353  INTEGER :: iter, ibnd, lbnd
354  ! counters on iteration, bands
355  INTEGER , ALLOCATABLE :: conv (:)
356  ! if 1 the root is converged
357
358  COMPLEX(DP), ALLOCATABLE :: g (:,:), t (:,:), h (:,:), hold (:,:)
359  COMPLEX(DP), ALLOCATABLE :: gs (:,:), ts (:,:), hs (:,:), hsold (:,:)
360  !  the gradient of psi
361  !  the preconditioned gradient
362  !  the delta gradient
363  !  the conjugate gradient
364  !  work space
365  COMPLEX(DP) ::  dcgamma, dcgamma1, dclambda, dclambda1
366  !  the ratio between rho
367  !  step length
368  COMPLEX(DP), ALLOCATABLE :: rho (:), rhoold (:), a(:), c(:)
369  ! the residue
370  ! auxiliary for h_diag
371  !
372  COMPLEX(DP) :: scal_prod
373  !
374  INTEGER, ALLOCATABLE :: indb(:)
375  !
376  REAL(DP) :: kter_eff
377  ! account the number of iterations with b
378  !
379  CALL start_clock ('ccg_many_vectors')
380
381  ALLOCATE ( g(ndmx,nbnd), t(ndmx,nbnd), h(ndmx,nbnd),hold(ndmx ,nbnd) )
382  ALLOCATE ( gs(ndmx,nbnd), ts(ndmx,nbnd), hs(ndmx,nbnd), hsold(ndmx,nbnd) )
383  ALLOCATE (a(nbnd), c(nbnd))
384  ALLOCATE (conv (nbnd))
385  ALLOCATE (rho(nbnd),rhoold(nbnd))
386  ALLOCATE (indb(nbnd))
387  !      WRITE( stdout,*) g,t,h,hold
388
389  kter_eff = 0.d0
390  conv= 0
391  g=(0.d0,0.d0)
392  t=(0.d0,0.d0)
393  h=(0.d0,0.d0)
394  hold=(0.d0,0.d0)
395  gs=(0.d0,0.d0)
396  ts=(0.d0,0.d0)
397  hs=(0.d0,0.d0)
398  hsold=(0.d0,0.d0)
399  DO ibnd=1,nbnd
400     indb(ibnd)=ibnd
401  END DO
402  DO iter = 1, maxter
403     !
404     !    compute the gradient. can reuse information from previous step
405     !
406     IF (iter == 1) THEN
407        CALL apply_a (ndmx, ndim, dpsi, g, ik, nbnd, indb, 1)
408        DO ibnd = 1, nbnd
409           CALL zaxpy (ndmx, (-1.d0,0.d0), d0psi(1,ibnd), 1, g(1,ibnd), 1)
410        ENDDO
411        gs(:,:) = CONJG(g(:,:))
412     ENDIF
413     !
414     !    compute preconditioned residual vectors and convergence check
415     !
416     lbnd = 0
417     DO ibnd = 1, nbnd
418        IF (conv (ibnd) == 0) THEN
419           lbnd = lbnd+1
420           CALL zcopy (ndmx, g (1, ibnd), 1, h (1, ibnd), 1)
421           CALL zcopy (ndmx, gs (1, ibnd), 1, hs (1, ibnd), 1)
422           CALL precond(ndmx, ndim, 1, h(1,ibnd), h_diag(1,ibnd), 1 )
423           CALL precond(ndmx, ndim, 1, hs(1,ibnd), h_diag(1,ibnd), -1 )
424           rho(lbnd) = scal_prod (ndmx, ndim, hs(1,ibnd), g(1,ibnd))
425        ENDIF
426     ENDDO
427     kter_eff = kter_eff + DBLE (lbnd) / DBLE (nbnd)
428     DO ibnd = nbnd, 1, -1
429        IF (conv(ibnd)==0) THEN
430           rho(ibnd)=rho(lbnd)
431           lbnd = lbnd -1
432           anorm = SQRT (ABS (rho (ibnd)) )
433!           IF (MOD(iter,20)==0) WRITE(stdout,*) iter, ibnd, anorm
434           IF (anorm < ethr) conv (ibnd) = 1
435        ENDIF
436     ENDDO
437!
438     conv_root = .true.
439     DO ibnd = 1, nbnd
440        conv_root = conv_root.AND. (conv (ibnd) .eq.1)
441     ENDDO
442     IF (conv_root) GOTO 100
443     !
444     !        compute the step directions h and hs.
445     !
446     lbnd = 0
447     DO ibnd = 1, nbnd
448        IF (conv (ibnd) == 0) THEN
449!
450!          change sign to h and hs
451!
452           CALL dscal (2 * ndmx, - 1.d0, h (1, ibnd), 1)
453           CALL dscal (2 * ndmx, - 1.d0, hs (1, ibnd), 1)
454           IF (iter /= 1) THEN
455              dcgamma = rho (ibnd) / rhoold (ibnd)
456              dcgamma1 = CONJG(dcgamma)
457              CALL zaxpy (ndmx, dcgamma, hold (1, ibnd), 1, h (1, ibnd), 1)
458              CALL zaxpy (ndmx, dcgamma1, hsold (1, ibnd), 1, hs (1, ibnd), 1)
459           ENDIF
460
461!
462! here hold is used as auxiliary vector in order to efficiently compute t = A*h
463! it is later set to the current (becoming old) value of h
464!
465           lbnd = lbnd+1
466           CALL zcopy (ndmx, h (1, ibnd), 1, hold (1, lbnd), 1)
467           CALL zcopy (ndmx, hs (1, ibnd), 1, hsold (1, lbnd), 1)
468           indb (lbnd) = ibnd
469        ENDIF
470     ENDDO
471     !
472     !        compute t = A*h  ts= A^+ * h
473     !
474     CALL apply_a (ndmx, ndim, hold, t, ik, lbnd, indb, 1)
475     CALL apply_a (ndmx, ndim, hsold, ts, ik, lbnd, indb, -1)
476     !
477     !        compute the coefficients a and c for the line minimization
478     !        compute step length lambda
479     lbnd=0
480     DO ibnd = 1, nbnd
481        IF (conv (ibnd) == 0) THEN
482           lbnd=lbnd+1
483           a(lbnd) = scal_prod (ndmx, ndim, hs(1,ibnd), g(1,ibnd))
484           c(lbnd) = scal_prod (ndmx, ndim, hs(1,ibnd), t(1,lbnd))
485        END IF
486     END DO
487     lbnd=0
488     DO ibnd = 1, nbnd
489        IF (conv (ibnd) == 0) THEN
490           lbnd=lbnd+1
491           dclambda = - a(lbnd) / c(lbnd)
492           dclambda1 = CONJG(dclambda)
493           !
494           !    move to new position
495           !
496           CALL zaxpy (ndmx, dclambda, h(1,ibnd), 1, dpsi(1,ibnd), 1)
497           !
498           !    update to get the gradient
499           !
500           !g=g+lam
501           CALL zaxpy (ndmx, dclambda, t(1,lbnd), 1, g(1,ibnd), 1)
502           CALL zaxpy (ndmx, dclambda1, ts(1,lbnd), 1, gs(1,ibnd), 1)
503           !
504           !    save current (now old) h and rho for later use
505           !
506           CALL zcopy (ndmx, h(1,ibnd), 1, hold(1,ibnd), 1)
507           CALL zcopy (ndmx, hs(1,ibnd), 1, hsold(1,ibnd), 1)
508           rhoold (ibnd) = rho (ibnd)
509        ENDIF
510     ENDDO
511  ENDDO
512100 CONTINUE
513  kter = kter_eff
514  DEALLOCATE (indb)
515  DEALLOCATE (rho, rhoold)
516  DEALLOCATE (conv)
517  DEALLOCATE (a,c)
518  DEALLOCATE (gs, ts, hs, hsold)
519  DEALLOCATE (g, t, h, hold)
520
521  CALL stop_clock ('ccg_many_vectors')
522  RETURN
523END SUBROUTINE ccg_many_vectors
524!
525!-------------------------------------------------------------------
526SUBROUTINE linsolvms(hc,m,n,vc,alpha)
527!-------------------------------------------------------------------
528!
529!    This routine is a driver for the correspondent lapack routines
530!    which solve a linear system of equations with real coefficients.
531!    The system is assumed to be overdetermined, so the number of equation
532!    is larger than the number of unknown. The program gives the solution
533!    that minimize the || Ax - b ||^2.
534!    input the matrix is contained in hc, and the known part in vc, on
535!    output the solution is on alpha.
536!
537!
538USE kinds, ONLY : DP
539IMPLICIT NONE
540
541INTEGER, INTENT(IN)  :: m, n        ! input: logical dimensions of hc
542
543REAL(DP), INTENT(IN)  ::  hc(m,n),  &  ! input: the matrix to solve
544                          vc(m)        ! input: the known part of the system
545
546REAL(DP), INTENT(INOUT) ::  alpha(n)     ! output: the solution
547
548REAL(DP) :: aux(m,1)                     ! auxiliary space
549REAL(DP), ALLOCATABLE    :: work(:)
550REAL(DP) :: rwork
551INTEGER :: iwork
552INTEGER :: info
553
554aux(1:m,1)=vc(1:m)
555iwork=-1
556CALL dgels('N',m,n,1,hc,m,aux,m,rwork,iwork,info)
557CALL errore('linsolvms','error finding optimal size',abs(info))
558iwork=NINT(rwork)
559ALLOCATE(work(iwork))
560CALL dgels('N',m,n,1,hc,m,aux,m,work,iwork,info)
561CALL errore('linsolvms','error in solving',abs(info))
562alpha(1:n)=aux(1:n,1)
563
564DEALLOCATE( work )
565
566RETURN
567END SUBROUTINE linsolvms
568
569!-------------------------------------------------------------------
570SUBROUTINE linsolvsvd(hc,m,n,vc,alpha)
571!-------------------------------------------------------------------
572!
573!    This routine is a driver for the correspondent lapack routines
574!    which solve a linear system of equations with real coefficients.
575!    The system is assumed to be overdetermined, so the number of equation
576!    is larger than the number of unknown. The program gives the solution
577!    that minimize the || Ax - b ||^2 using the singular value decomposition
578!    method.
579!    input the matrix is contained in hc, and the known part in vc, on
580!    output the solution is on alpha.
581!
582!
583USE kinds, ONLY : DP
584IMPLICIT NONE
585
586INTEGER, INTENT(IN)  :: m, n        ! input: logical dimensions of hc
587
588REAL(DP), INTENT(IN)  ::  hc(m,n),  &  ! input: the matrix to solve
589                          vc(m)        ! input: the known part of the system
590
591REAL(DP), INTENT(INOUT) ::  alpha(n)     ! output: the solution
592
593REAL(DP) :: aux(m,1), w2(m)               ! auxiliary space
594REAL(DP), ALLOCATABLE    :: work(:)
595REAL(DP) :: rwork, rcond
596INTEGER :: iwork, rank
597INTEGER :: info
598
599aux(1:m,1)=vc(1:m)
600rcond=-1.0_DP
601iwork=-1
602CALL dgelss(m,n,1,hc,m,aux,m,w2,rcond,rank,rwork,iwork,info)
603CALL errore('linsolvsvd','error finding optimal size',abs(info))
604iwork=NINT(rwork)
605ALLOCATE(work(iwork))
606CALL dgelss(m,n,1,hc,m,aux,m,w2,rcond,rank,work,iwork,info)
607CALL errore('linsolvsvd','error in solving',abs(info))
608alpha(1:n)=aux(1:n,1)
609WRITE(stdout,'(/5x,"In linsolvsvd m, n, and rank are: ",3i6)') m, n, rank
610
611DEALLOCATE( work )
612
613RETURN
614END SUBROUTINE linsolvsvd
615
616!
617!-------------------------------------------------------------------
618SUBROUTINE linsolvx(hc,n,vc,alpha)
619!-------------------------------------------------------------------
620!
621!    This routine was initially in the Quantum ESPRESSO distribution.
622!
623!    This routine is a driver for the correspondent lapack routines
624!    which solve a linear system of equations with real coefficients. On
625!    input the matrix is contained in hc, and the known part in vc, on
626!    output the solution is on alpha.
627!
628!
629USE kinds, ONLY : DP
630IMPLICIT NONE
631
632INTEGER, INTENT(IN)  :: n        ! input: logical dimension of hc
633
634REAL(DP), INTENT(IN)  ::  hc(n,n),  &  ! input: the matrix to solve
635                          vc(n)        ! input: the known part of the system
636
637REAL(DP), INTENT(INOUT) ::  alpha(n)     ! output: the solution
638
639INTEGER, ALLOCATABLE    :: iwork(:)
640INTEGER :: info
641
642ALLOCATE(iwork(n))
643
644CALL dgetrf(n,n,hc,n,iwork,info)
645CALL errore('linsolvx','error in factorization',abs(info))
646alpha=vc
647CALL dgetrs('N',n,1,hc,n,iwork,alpha,n,info)
648CALL errore('linsolvx','error in solving',abs(info))
649
650DEALLOCATE( iwork )
651
652RETURN
653END SUBROUTINE linsolvx
654!
655!-------------------------------------------------------------------
656SUBROUTINE linsolvx_sym(hc,n,vc,alpha)
657!-------------------------------------------------------------------
658!
659!    This routine is a driver for the corresponding lapack routine
660!    which solves a linear system of equations with real coefficients
661!    for a symmetric matrix hc. On input the matrix is contained in the
662!    upper triangular part of hc, the right hand side in vc, on output
663!    the solution is on alpha.
664!
665!
666USE kinds, ONLY : DP
667IMPLICIT NONE
668
669INTEGER, INTENT(IN)  :: n        ! input: logical dimension of hc
670
671REAL(DP), INTENT(IN)  ::  hc(n,n),  &  ! input: the matrix to solve
672                          vc(n)        ! input: the known part of the system
673
674REAL(DP), INTENT(INOUT) ::  alpha(n)     ! output: the solution
675
676REAL(DP) :: work(n)
677INTEGER :: ipiv(n)
678INTEGER :: info
679
680alpha=vc
681CALL dsysv('U',n,1,hc,n,ipiv,alpha,n,work,n,info)
682CALL errore('linsolvx_sym','error in factorization',abs(info))
683
684RETURN
685END SUBROUTINE linsolvx_sym
686
687SUBROUTINE min_sqr_solve(ndata, ncoeff, amat, f, coeff, lsolve)
688!
689!   This routine receives a linear system where in general the number of
690!   rows ndata is larger than the number column ncoeff and finds the
691!   solution of the system that minimize the norm ||AX-f||^2.
692!   It can do it with three different algorithms according to the
693!   value of lsolve.
694!   A X = f      where A is a matrix of dimension ndata x ncoeff
695!                Using 1 a matrix ncoeff x ncoeff is calculated as
696!   A^T A X = A^T f   and the linear system provides X, the ncoeff
697!                coefficients of the polynomial.
698!                Using 2 the overdetemined linear system AX=f is solved
699!                using QR or LQ factorization.
700!                Using 3 the overdetermined linear system AX=f is solved
701!                using SVD decomposition.
702!   If lsolve is not one of these values method 2 is used.
703!
704USE kinds, ONLY : DP
705IMPLICIT NONE
706
707INTEGER, INTENT(IN) :: ndata, ncoeff, lsolve
708REAL(DP), INTENT(IN) :: amat(ndata,ncoeff), f(ndata)
709REAL(DP), INTENT(INOUT) :: coeff(ncoeff)
710
711REAL(DP), ALLOCATABLE :: aa(:,:), b(:)
712INTEGER :: idata, ivar, jvar, lsolve_
713
714ALLOCATE(aa(ncoeff,ncoeff))
715ALLOCATE(b(ncoeff))
716
717aa=0.0_DP
718b =0.0_DP
719DO ivar=1,ncoeff
720   DO jvar=1,ncoeff
721      DO idata=1,ndata
722         aa(ivar,jvar)= aa(ivar,jvar) + amat(idata,ivar) * amat(idata,jvar)
723      END DO
724   END DO
725   DO idata=1,ndata
726      b(ivar) = b(ivar) + amat(idata,ivar) * f(idata)
727   END DO
728END DO
729!
730!   solve the linear system and find the coefficients
731!
732coeff=0.0_DP
733lsolve_=lsolve
734IF (lsolve_<1.OR.lsolve_>3) lsolve_=2
735IF (lsolve_==1) THEN
736   WRITE(stdout,'(5x,"Finding the quartic polynomial using &
737                                              &ncoeff x ncoeff matrix")')
738   CALL linsolvx(aa,ncoeff,b,coeff)
739!   CALL linsolvx_sym(aa,ncoeff,b,coeff)
740
741ELSEIF(lsolve_==2) THEN
742   WRITE(stdout,'(5x,"Finding the quartic polynomial using &
743                                                   &QR factorization")')
744   CALL linsolvms(amat,ndata,ncoeff,f,coeff)
745ELSEIF(lsolve_==3) THEN
746   WRITE(stdout,'(5x,"Finding the quartic polynomial using SVD")')
747   CALL linsolvsvd(amat,ndata,ncoeff,f,coeff)
748ENDIF
749
750DEALLOCATE(aa)
751DEALLOCATE(b)
752
753RETURN
754END SUBROUTINE min_sqr_solve
755
756END MODULE linear_solvers
757