1 /*
2 XLiFE++ is an extended library of finite elements written in C++
3     Copyright (C) 2014  Lunéville, Eric; Kielbasiewicz, Nicolas; Lafranche, Yvon; Nguyen, Manh-Ha; Chambeyron, Colin
4 
5     This program is free software: you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation, either version 3 of the License, or
8     (at your option) any later version.
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13     You should have received a copy of the GNU General Public License
14     along with this program.  If not, see <http://www.gnu.org/licenses/>.
15  */
16 
17 /*!
18   \file DenseStorage.hpp
19   \authors D. Martin, E. Lunéville, NGUYEN Manh Ha, N. Kielbasiewicz
20   \since 21 jun 2011
21   \date 21 feb 2013
22 
23   \brief Definition of the xlifepp::DenseStorage class
24 
25   xlifepp::DenseStorage class is the abstract mother class of all dense storage methods for large matrices
26   \verbatim
27                          | RowDenseStorage   (store row[1],row[2] ...)
28   DenseStorage --------> | ColDenseStorage   (store col[1],col[2] ...)
29                          | DualDenseStorage
30                          | SymDenseStorage
31   \endverbatim
32 
33   Only _symm access type storage can be used for a matrix with a symmetry
34   property ( _symmetric, _skewSymmetric, _selfAdjoint or _skewAdjoint)
35 */
36 
37 #ifndef DENSE_STORAGE_HPP
38 #define DENSE_STORAGE_HPP
39 
40 #include "config.h"
41 #include "../MatrixStorage.hpp"
42 #ifdef XLIFEPP_WITH_OMP
43 #include "omp.h"
44 #endif // XLIFEPP_WITH_OMP
45 
46 namespace xlifepp
47 {
48 
49 /*!
50    \class DenseStorage
51    abstract base class of all dense storage methods
52  */
53 
54 class DenseStorage : public MatrixStorage
55 {
56   public:
57     // Constructors, Destructor
58     DenseStorage(string_t id="DenseStorage");                                 //!< default constructor
59     DenseStorage(AccessType, string_t id="DenseStorage");                     //!< constructor by access type
60     DenseStorage(AccessType, number_t, string_t id="DenseStorage");           //!< constructor by access type, number of columns and rows (same)
61     DenseStorage(AccessType, number_t, number_t, string_t id="DenseStorage"); //!< constructor by access type, number of columns and rows
~DenseStorage()62     virtual ~DenseStorage() {}                //!< virtual destructor
63 
64     MatrixStorage* toDual();                  //!< create a new dual dense storage from sym dense storage
65 
66     // utilities
pos_(number_t i,number_t j,SymType s=_noSymmetry) const67     virtual number_t pos_(number_t i, number_t j, SymType s = _noSymmetry) const   //! overloaded pos fast returns adress of entry (i,j)
68     {return pos(i,j,s); }
size() const69     virtual number_t size() const {return nbRows_ * nbCols_;} //!< storage size except for SymDenseStorage
lowerPartSize() const70     virtual number_t lowerPartSize() const {return 0;}        //!< size of lower triangular part except for Dual/SymDenseStorage
upperPartSize() const71     virtual number_t upperPartSize() const {return 0;}        //!< size of upper triangular part except for Dual/SymDenseStorage
72     std::set<number_t> getRows(number_t c, number_t r1=1, number_t r2=0) const;  //!< get row indices of col c in set [r1,r2]
73     std::set<number_t> getCols(number_t r, number_t c1=1, number_t c2=0) const;  //!< get col indices of row r in set [c1,c2]
74     void getColsV(std::vector<number_t>&, number_t&, number_t r, number_t c1=1, number_t c2=0) const;  //!< get col indices of row r in set [c1,c2]
75     void getRowsV(std::vector<number_t>&, number_t&, number_t c, number_t r1=1, number_t r2=0) const;  //!< get row indices of col c in set [r1,r2]
76     bool sameStorage(const MatrixStorage&) const;             //!< check if two storages have the same structures
77 
78   protected:
79     //@{
80     //! print utilites for child class
81     template<typename Iterator>
82     void printScalarEntries(Iterator& it, number_t nrows, number_t ncols, number_t perRow, number_t width, number_t prec,
83                             const string_t& rowOrcol, number_t vb, std::ostream& os) const;
84     template<typename Iterator>
85     void printScalarEntriesTriangularPart(Iterator& itd, Iterator& it, number_t nrows, number_t ncols, number_t perRow, number_t width, number_t prec,
86                                           const string_t& rowOrcol, number_t vb, std::ostream& os) const;
87     template<typename Iterator>
88     void printMatrixEntries(Iterator& it, number_t nrows, number_t ncols,
89                             const string_t& rowOrcol, number_t vb, std::ostream& os) const;
90     template<typename Iterator>
91     void printMatrixEntriesTriangularPart(Iterator& itd, Iterator& it, number_t nrows, number_t ncols,
92                                           const string_t& rowOrcol, number_t vb, std::ostream& os) const;
93     //@}
94 
95     void loadFromFileDense(std::istream&, std::vector<real_t>&, SymType, bool);
96     void loadFromFileDense(std::istream&, std::vector<complex_t>&, SymType, bool);
97     void loadFromFileCoo(std::istream&, std::vector<real_t>&, SymType, bool);
98     void loadFromFileCoo(std::istream&, std::vector<complex_t>&, SymType, bool);
99 
100     // product of any part of dense matrix and vector common to different dense storages (child classes)
101     template<typename MatIterator, typename VecIterator, typename ResIterator>
102     void diagonalMatrixVector(MatIterator&, VecIterator&, ResIterator&, ResIterator&) const; //!< diag matrix * vector
103     template<typename MatIterator, typename VecIterator, typename ResIterator>
104     void diagonalVectorMatrix(MatIterator&, VecIterator&, ResIterator&, ResIterator&) const; //!< vector * diag matrix
105     template<typename MatIterator, typename VecIterator, typename ResIterator>
106     void lowerMatrixVector(MatIterator&, VecIterator&, VecIterator&,
107                            ResIterator&, ResIterator&, SymType = _noSymmetry) const;         //!< Lower part * vector
108     template<typename MatIterator, typename VecIterator, typename ResIterator>
109     void lowerVectorMatrix(MatIterator&, VecIterator&, VecIterator&,
110                            ResIterator&, ResIterator&, SymType = _noSymmetry) const;         //!< vector * lower part
111     template<typename MatIterator, typename VecIterator, typename ResIterator>
112     void upperMatrixVector(MatIterator&, VecIterator&, VecIterator&,
113                            ResIterator&, ResIterator&, SymType = _noSymmetry) const;         //!< upper part * vector
114     template<typename MatIterator, typename VecIterator, typename ResIterator>
115     void upperVectorMatrix(MatIterator&, VecIterator&, VecIterator&,
116                            ResIterator&, ResIterator&, SymType = _noSymmetry) const;         //!< vector * upper part
117     template<typename MatIterator, typename VecIterator, typename ResIterator>
118     void rowMatrixVector(MatIterator&, VecIterator&, VecIterator&, ResIterator&, ResIterator&)  const;   //!< row matrix * vector
119     template<typename MatIterator, typename VecIterator, typename ResIterator>
120     void rowVectorMatrix(MatIterator&, VecIterator&, VecIterator&, ResIterator&, ResIterator&)  const;   //!< vector * row matrix
121     template<typename MatIterator, typename VecIterator, typename ResIterator>
122     void columnMatrixVector(MatIterator&, VecIterator&, VecIterator&, ResIterator&, ResIterator&)  const; //!< column matrix * vector
123     template<typename MatIterator, typename VecIterator, typename ResIterator>
124     void columnVectorMatrix(MatIterator&, VecIterator&, VecIterator&, ResIterator&, ResIterator&)  const; //!< vector * column matrix
125 
126     //omp versions
127     template<typename MatIterator, typename VecIterator, typename ResIterator>
128     void parallelRowMatrixVector(MatIterator&, VecIterator&, VecIterator&, ResIterator&, ResIterator&)  const;  //!< templated parallel row Matrix x Vector,
129     template<typename MatIterator, typename V, typename R>
130     void parallelRowMatrixVector(MatIterator&, const std::vector<V>&, std::vector<R>&)  const;     //!< templated parallel row Vector x Matrix, assuming square matrix
131     template<typename MatIterator, typename VecIterator, typename ResIterator>
132     void parallelColumnVectorMatrix(MatIterator&, VecIterator&, VecIterator&, ResIterator&, ResIterator&)  const; //!< templated parallel column Vector x Matrix
133     template<typename MatIterator, typename V, typename R>
134     void parallelLowerMatrixVector(MatrixPart, MatIterator&, const std::vector<V>&, std::vector<R>&,
135                                    SymType = _noSymmetry) const;                                   //!< templated parallel lower  Matrix x Vector,
136     template<typename MatIterator, typename V, typename R>
137     void parallelLowerVectorMatrix(MatrixPart, MatIterator&, const std::vector<V>&, std::vector<R>&,
138                                    SymType = _noSymmetry) const;                                   //!< templated parallel lower Vector x Matrix,
139     template<typename MatIterator, typename V, typename R>
140     void parallelUpperMatrixVector(MatrixPart, MatIterator&, const std::vector<V>&, std::vector<R>&,
141                                    SymType =_noSymmetry) const;                                    //!< templated parallel upper Matrix x Vector
142     template<typename MatIterator, typename V, typename R>
143     void parallelUpperVectorMatrix(MatrixPart, MatIterator&, const std::vector<V>&, std::vector<R>&,
144                                    SymType =_noSymmetry) const;                                    //!< templated parallel upper Vector x Matrix
145 
146     void extractThreadIndex(MatrixPart, number_t&, std::vector<number_t>&) const;  //!< tool to split matrix rows/cols in balanced parts (multi-thread computation)
147 
148     //! sum of any part of dense matrix and dense matrix
149     template<typename Mat1Iterator, typename Mat2Iterator, typename ResIterator>
150     void sumMatrixMatrix(Mat1Iterator& itm1, Mat2Iterator& itm2, ResIterator& itrb, ResIterator& itre) const;
151 
152     //SOR and SSOR stuff
153     template<typename MatIterator, typename VecIterator, typename ResIterator>
154     void bzSorDiagonalMatrixVector(MatIterator&, VecIterator&, ResIterator&,
155                                    ResIterator&, const real_t) const;         //!< SOR diagonal product w * D * v
156     template<typename MatIterator, typename VecIterator, typename XIterator>
157     void bzSorDiagonalSolver(MatIterator&, VecIterator&, XIterator&,
158                              XIterator&, const real_t) const;                 //!< SOR diagonal solver D/w x = v
159     template<typename MatIterator, typename VecIterator, typename XIterator>
160     void bzSorLowerSolver(const MatIterator&, const MatIterator&, VecIterator&, XIterator&,
161                           XIterator&, const real_t) const;                   //!< SOR lower solver (D/w+L) x = b
162     template<typename MatRevIterator, typename VecRevIterator, typename XRevIterator>
163     void bzSorUpperSolver(const MatRevIterator&, const MatRevIterator&, VecRevIterator&,
164                           XRevIterator&, XRevIterator&, const real_t) const;  //!< SOR upper solver (D/w+U) x = b
165 
166   public:
167     //Generic Gauss solver and LU factorization (see childs for faster implementation)
168     template<typename T>
169     void gaussSolverG(std::vector<T>&, std::vector<std::vector<T> >&) const; //!< generic Gauss solver with partial pivoting strategy  A*x1=b1, A*x2=b2, ...
170     template<typename T>
171     void gaussSolverG(std::vector<T>&, std::vector<T>&) const;               //!< generic Gauss solver with partial pivoting strategy  A*x=b
172     template<typename T>
173     void luG(std::vector<T>& A, std::vector<T>& LU, std::vector<number_t>& p) const;//!< LU factorization with row pivoting (omp)
174     template<typename T>
175     void luG(std::vector<T>& A, std::vector<T>& LU) const;                          //!< LU factorization with no pivoting (omp)
176     //@{
177     //! specializations of gaussSolver and LU factorization thats call generic functions
178     virtual void gaussSolver(std::vector<real_t>& A, std::vector<std::vector<real_t> >& bs) const;
179     virtual void gaussSolver(std::vector<complex_t>& A, std::vector<std::vector<complex_t> >& bs) const;
180     virtual void gaussSolver(std::vector<real_t>& A, std::vector<real_t>& b) const;
181     virtual void gaussSolver(std::vector<complex_t>& A, std::vector<complex_t>& b) const;
182     void lu(std::vector<real_t>& A, std::vector<real_t>& LU, std::vector<number_t>& p, const SymType sym = _noSymmetry) const;
183     void lu(std::vector<complex_t>& A, std::vector<complex_t>& LU, std::vector<number_t>& p, const SymType sym = _noSymmetry) const;
184     void lu(std::vector<real_t>& A, std::vector<real_t>& LU, const SymType sym = _noSymmetry) const;
185     void lu(std::vector<complex_t>& A, std::vector<complex_t>& LU, const SymType sym = _noSymmetry) const;
186     //@}
187 
188     // Template diagonal and triangular part solvers (after factorization), call generic triangular part solvers
189     //! Generic lower triangular with unit diagonal linear system solver : (I+L) x = b
190     template<typename M, typename V, typename X>
191     void lowerD1SolverG(const std::vector<M>& m, std::vector<V>& v, std::vector<X>& x) const;
192     //@{
193     //! Specializations of lower triangular part with unit diagonal linear solvers (I + L) x = v */
lowerD1Solver(const std::vector<real_t> & m,std::vector<real_t> & v,std::vector<real_t> & x) const194     void lowerD1Solver(const std::vector<real_t>& m, std::vector<real_t>& v, std::vector<real_t>& x) const
195     { lowerD1SolverG<>(m, v, x); }
lowerD1Solver(const std::vector<real_t> & m,std::vector<complex_t> & v,std::vector<complex_t> & x) const196     void lowerD1Solver(const std::vector<real_t>& m, std::vector<complex_t>& v, std::vector<complex_t>& x) const
197     { lowerD1SolverG<>(m, v, x); }
lowerD1Solver(const std::vector<complex_t> & m,std::vector<real_t> & v,std::vector<complex_t> & x) const198     void lowerD1Solver(const std::vector<complex_t>& m, std::vector<real_t>& v, std::vector<complex_t>& x) const
199     { lowerD1SolverG<>(m, v, x); }
lowerD1Solver(const std::vector<complex_t> & m,std::vector<complex_t> & v,std::vector<complex_t> & x) const200     void lowerD1Solver(const std::vector<complex_t>& m, std::vector<complex_t>& v, std::vector<complex_t>& x) const
201     { lowerD1SolverG<>(m, v, x); }
202     //@}
203 
204     //! Generic lower triangular with unit diagonal linear system left solver : x (I+L) = b
205     template<typename M, typename V, typename X>
206     void lowerD1LeftSolverG(const std::vector<M>& m, std::vector<V>& v, std::vector<X>& x) const;
207     //@{
208     //! Specializations of lower triangular part with unit diagonal linear solvers (I + L) x = v */
lowerD1LeftSolver(const std::vector<real_t> & m,std::vector<real_t> & v,std::vector<real_t> & x) const209     void lowerD1LeftSolver(const std::vector<real_t>& m, std::vector<real_t>& v, std::vector<real_t>& x) const
210     { lowerD1LeftSolverG<>(m, v, x); }
lowerD1LeftSolver(const std::vector<real_t> & m,std::vector<complex_t> & v,std::vector<complex_t> & x) const211     void lowerD1LeftSolver(const std::vector<real_t>& m, std::vector<complex_t>& v, std::vector<complex_t>& x) const
212     { lowerD1LeftSolverG<>(m, v, x); }
lowerD1LeftSolver(const std::vector<complex_t> & m,std::vector<real_t> & v,std::vector<complex_t> & x) const213     void lowerD1LeftSolver(const std::vector<complex_t>& m, std::vector<real_t>& v, std::vector<complex_t>& x) const
214     { lowerD1LeftSolverG<>(m, v, x); }
lowerD1LeftSolver(const std::vector<complex_t> & m,std::vector<complex_t> & v,std::vector<complex_t> & x) const215     void lowerD1LeftSolver(const std::vector<complex_t>& m, std::vector<complex_t>& v, std::vector<complex_t>& x) const
216     { lowerD1LeftSolverG<>(m, v, x); }
217     //@}
218 
219     //! Generic diagonal linear system solver : D x = b
220     template<typename M, typename V, typename X>
221     void diagonalSolverG(const std::vector<M>& m, std::vector<V>& v, std::vector<X>& x) const;
222     //@{
223     //! Specializations of diagonal linear solvers D x = v
diagonalSolver(const std::vector<real_t> & m,std::vector<real_t> & v,std::vector<real_t> & x) const224     void diagonalSolver(const std::vector<real_t>& m, std::vector<real_t>& v, std::vector<real_t>& x) const
225     { diagonalSolverG<>(m, v, x); }
diagonalSolver(const std::vector<real_t> & m,std::vector<complex_t> & v,std::vector<complex_t> & x) const226     void diagonalSolver(const std::vector<real_t>& m, std::vector<complex_t>& v, std::vector<complex_t>& x) const
227     { diagonalSolverG<>(m, v, x); }
diagonalSolver(const std::vector<complex_t> & m,std::vector<real_t> & v,std::vector<complex_t> & x) const228     void diagonalSolver(const std::vector<complex_t>& m, std::vector<real_t>& v, std::vector<complex_t>& x) const
229     { diagonalSolverG<>(m, v, x); }
diagonalSolver(const std::vector<complex_t> & m,std::vector<complex_t> & v,std::vector<complex_t> & x) const230     void diagonalSolver(const std::vector<complex_t>& m, std::vector<complex_t>& v, std::vector<complex_t>& x) const
231     { diagonalSolverG<>(m, v, x); }
232     //@}
233 
234     //! Generic upper triangular with unit diagonal linear system solver : (I+U) x = b
235     template<typename M, typename V, typename X>
236     void upperD1SolverG(const std::vector<M>& m, std::vector<V>& v, std::vector<X>& x, const SymType sym = _noSymmetry) const;
237     //@{
238     //! Specializations of upper triangular part with unit diagonal linear solvers (I + U) x = v
upperD1Solver(const std::vector<real_t> & m,std::vector<real_t> & v,std::vector<real_t> & x,const SymType sym=_noSymmetry) const239     void upperD1Solver(const std::vector<real_t>& m, std::vector<real_t>& v, std::vector<real_t>& x, const SymType sym = _noSymmetry) const
240     { upperD1SolverG<>(m, v, x, sym); }
upperD1Solver(const std::vector<real_t> & m,std::vector<complex_t> & v,std::vector<complex_t> & x,const SymType sym=_noSymmetry) const241     void upperD1Solver(const std::vector<real_t>& m, std::vector<complex_t>& v, std::vector<complex_t>& x, const SymType sym = _noSymmetry) const
242     { upperD1SolverG<>(m, v, x, sym); }
upperD1Solver(const std::vector<complex_t> & m,std::vector<real_t> & v,std::vector<complex_t> & x,const SymType sym=_noSymmetry) const243     void upperD1Solver(const std::vector<complex_t>& m, std::vector<real_t>& v, std::vector<complex_t>& x, const SymType sym = _noSymmetry) const
244     { upperD1SolverG<>(m, v, x, sym); }
upperD1Solver(const std::vector<complex_t> & m,std::vector<complex_t> & v,std::vector<complex_t> & x,const SymType sym=_noSymmetry) const245     void upperD1Solver(const std::vector<complex_t>& m, std::vector<complex_t>& v, std::vector<complex_t>& x, const SymType sym = _noSymmetry) const
246     { upperD1SolverG<>(m, v, x, sym); }
247     //@}
248 
249     //! Generic upper triangular linear system solver : (D+U) x = b
250     template<typename M, typename V, typename X>
251     void upperSolverG(const std::vector<M>& m, std::vector<V>& v, std::vector<X>& x, const SymType sym = _noSymmetry) const;
252     //@{
253     //! specializations of upper triangular part linear solvers (D + U) x = v
upperSolver(const std::vector<real_t> & m,std::vector<real_t> & v,std::vector<real_t> & x,const SymType sym=_noSymmetry) const254     void upperSolver(const std::vector<real_t>& m, std::vector<real_t>& v, std::vector<real_t>& x, const SymType sym = _noSymmetry) const
255     { upperSolverG<>(m, v, x, sym); }
upperSolver(const std::vector<real_t> & m,std::vector<complex_t> & v,std::vector<complex_t> & x,const SymType sym=_noSymmetry) const256     void upperSolver(const std::vector<real_t>& m, std::vector<complex_t>& v, std::vector<complex_t>& x, const SymType sym = _noSymmetry) const
257     { upperSolverG<>(m, v, x, sym); }
upperSolver(const std::vector<complex_t> & m,std::vector<real_t> & v,std::vector<complex_t> & x,const SymType sym=_noSymmetry) const258     void upperSolver(const std::vector<complex_t>& m, std::vector<real_t>& v, std::vector<complex_t>& x, const SymType sym = _noSymmetry) const
259     { upperSolverG<>(m, v, x, sym); }
upperSolver(const std::vector<complex_t> & m,std::vector<complex_t> & v,std::vector<complex_t> & x,const SymType sym=_noSymmetry) const260     void upperSolver(const std::vector<complex_t>& m, std::vector<complex_t>& v, std::vector<complex_t>& x, const SymType sym = _noSymmetry) const
261     { upperSolverG<>(m, v, x, sym); }
262     //@}
263 
264     //! Generic upper triangular linear system left solver : x U = b
265     template<typename M, typename V, typename X>
266     void upperLeftSolverG(const std::vector<M>& m, std::vector<V>& v, std::vector<X>& x, const SymType sym = _noSymmetry) const;
267     //@{
268     //! Specializations of upper triangular linear system left solver : x U = b
upperLeftSolver(const std::vector<real_t> & m,std::vector<real_t> & v,std::vector<real_t> & x,const SymType sym=_noSymmetry) const269     void upperLeftSolver(const std::vector<real_t>& m, std::vector<real_t>& v, std::vector<real_t>& x, const SymType sym = _noSymmetry) const
270     { upperLeftSolverG<>(m, v, x, sym); }
upperLeftSolver(const std::vector<real_t> & m,std::vector<complex_t> & v,std::vector<complex_t> & x,const SymType sym=_noSymmetry) const271     void upperLeftSolver(const std::vector<real_t>& m, std::vector<complex_t>& v, std::vector<complex_t>& x, const SymType sym = _noSymmetry) const
272     { upperLeftSolverG<>(m, v, x, sym); }
upperLeftSolver(const std::vector<complex_t> & m,std::vector<real_t> & v,std::vector<complex_t> & x,const SymType sym=_noSymmetry) const273     void upperLeftSolver(const std::vector<complex_t>& m, std::vector<real_t>& v, std::vector<complex_t>& x, const SymType sym = _noSymmetry) const
274     { upperLeftSolverG<>(m, v, x, sym); }
upperLeftSolver(const std::vector<complex_t> & m,std::vector<complex_t> & v,std::vector<complex_t> & x,const SymType sym=_noSymmetry) const275     void upperLeftSolver(const std::vector<complex_t>& m, std::vector<complex_t>& v, std::vector<complex_t>& x, const SymType sym = _noSymmetry) const
276     { upperLeftSolverG<>(m, v, x, sym); }
277     //@}
278 };
279 /* --------------------------------------------------------------------------------
280    Template functions
281   --------------------------------------------------------------------------------*/
282 // print matrix of scalars
283 template<typename Iterator>
printScalarEntries(Iterator & it,number_t nrows,number_t ncols,number_t perRow,number_t width,number_t prec,const string_t & rowOrcol,number_t vb,std::ostream & os) const284 void DenseStorage::printScalarEntries(Iterator& it,
285                                       number_t nrows, number_t ncols, number_t perRow, number_t width, number_t prec,
286                                       const string_t& rowOrcol, number_t vb, std::ostream& os) const
287 {
288   os.setf(std::ios::scientific);
289   os <<eol<< "(" << words("firste") << "s " << std::min(vb, nrows) << " " << words(rowOrcol) << "s.)";
290   for(number_t r = 0; r < std::min(vb, nrows) ; r++)
291     {
292       os << eol << "   " << rowOrcol << "   " << r + 1;
293       printRowWise(os, "   ", perRow, width, prec, it, it + ncols);
294       it += ncols;
295     }
296   os.unsetf(std::ios::scientific);
297   os << std::endl;
298 }
299 
300 template<typename Iterator>
printScalarEntriesTriangularPart(Iterator & itd,Iterator & it,number_t nrows,number_t ncols,number_t perRow,number_t width,number_t prec,const string_t & rowOrcol,number_t vb,std::ostream & os) const301 void DenseStorage::printScalarEntriesTriangularPart(Iterator& itd, Iterator& it,
302     number_t nrows, number_t ncols, number_t perRow, number_t width, number_t prec,
303     const string_t& rowOrcol, number_t vb, std::ostream& os) const
304 {
305   // itd is an iterator on matrix diagonal entries
306   // it is an iterator on matrix lower or upper triangular part entries
307   os.setf(std::ios::scientific);
308   os << " " << words("firste") << "s " << std::min(vb, nrows) << " " << words(rowOrcol) << "s.)";
309   os << eol << "   " << rowOrcol << "   " << 1 ;
310   os << "   " << std::setw(width) << std::setprecision(prec) << *itd++;
311   for(number_t r = 1; r < std::min(vb, nrows) ; r++)
312     {
313       os << eol << "   " << rowOrcol << "   " << r + 1 ;
314       if(r < ncols)
315         {
316           printRowWise(os, "   ", perRow, width, prec, it, it + r);
317           if(r % perRow == 0) { os << " ..." << eol << "..."; }
318           os << std::setw(width) << std::setprecision(prec) << *itd++;
319           it += r;
320         }
321       else
322         {
323           printRowWise(os, "   ", perRow, width, prec, it, it + ncols);
324           it += ncols;
325         }
326     }
327   os.unsetf(std::ios::scientific);
328   os << std::endl;
329 }
330 
331 // print matrix of matrices
332 template<typename Iterator>
printMatrixEntries(Iterator & it,number_t nrows,number_t ncols,const string_t & rowOrcol,number_t vb,std::ostream & os) const333 void DenseStorage::printMatrixEntries(Iterator& it,
334                                       number_t nrows, number_t ncols,
335                                       const string_t& rowOrcol, number_t vb, std::ostream& os) const
336 {
337   os.setf(std::ios::scientific);
338   os << eol<<"(" << words("firste") << "s " << std::min(vb, nrows) << " " << words(rowOrcol) << "s.)";
339   for(number_t r = 0; r < std::min(vb, nrows) ; r++)
340     {
341       os << eol << "   " << rowOrcol << "   " << r + 1;
342       for(Iterator itj = it; itj != it + ncols; itj++) { os << *itj; }
343       it += ncols;
344     }
345   os.unsetf(std::ios::scientific);
346   os << std::endl;
347 }
348 
349 template<typename Iterator>
printMatrixEntriesTriangularPart(Iterator & itd,Iterator & it,number_t nrows,number_t ncols,const string_t & rowOrcol,number_t vb,std::ostream & os) const350 void DenseStorage::printMatrixEntriesTriangularPart(Iterator& itd, Iterator& it,
351     number_t nrows, number_t ncols,
352     const string_t& rowOrcol, number_t vb, std::ostream& os) const
353 {
354   // itd is an iterator on matrix diagonal entries
355   // it is an iterator on matrix lower or upper triangular part entries
356   os.setf(std::ios::scientific);
357   os << " " << words("firste") << "s " << std::min(vb, nrows) << " " << words(rowOrcol) << "s.)";
358   os << eol << "   " << rowOrcol << "   " << 1 ;
359   os << *itd++;
360   for(number_t r = 1; r < std::min(vb, nrows) ; r++)
361     {
362       os << eol << "   " << rowOrcol << "   " << r + 1 ;
363       if(r < ncols)
364         {
365           for(Iterator itj = it; itj != it + r; itj++) { os << *itj; }
366           os << *itd++;
367           it += r;
368         }
369       else
370         {
371           for(Iterator itj = it; itj != it + ncols; itj++) { os << *itj; }
372           it += ncols;
373         }
374     }
375   os.unsetf(std::ios::scientific);
376   os << std::endl;
377 }
378 
379 /*--------------------------------------------------------------------------------
380    template Matrix x Vector partial multiplications used in child classes
381    CAUTION ! We use non-commutative * operations here as we may deal
382              with overloaded matrix-vector left or right products
383    in functions, iterators have the following meaning
384      itd  : iterator on matrix diagonal entries (begin)
385      itm  : iterator on matrix entries (lower "triangular" part)
386      itvb : iterator on given vector entries (begin)
387      itve : iterator on given vector entries (end)
388      itrb : iterator on result vector entries (begin)
389      itre : iterator on result vector entries (end)
390    Result vector res *** NEED NOT BE INITIALIZED HERE ***
391 --------------------------------------------------------------------------------*/
392 // partial multiplication by diagonal M*v, matrix need not be square
393 template<typename MatIterator, typename VecIterator, typename ResIterator>
diagonalMatrixVector(MatIterator & itd,VecIterator & itvb,ResIterator & itrb,ResIterator & itre) const394 void DenseStorage::diagonalMatrixVector(MatIterator& itd, VecIterator& itvb, ResIterator& itrb, ResIterator& itre) const
395 {
396   VecIterator itv = itvb;
397   ResIterator itr;
398   for(itr = itrb; itr != itrb + diagonalSize(); ++itr, ++itd, ++itv) { *itr = *itd** itv; }
399   for(itr = itrb + diagonalSize(); itr != itre; ++itr) { *itr *= 0; }     // *itr=0 : ambiguous resolution in Vector<K>::operator = ?!
400 }
401 
402 // partial multiplication by diagonal v*M, matrix need not be square
403 template<typename MatIterator, typename VecIterator, typename ResIterator>
diagonalVectorMatrix(MatIterator & itd,VecIterator & itvb,ResIterator & itrb,ResIterator & itre) const404 void DenseStorage::diagonalVectorMatrix(MatIterator& itd, VecIterator& itvb, ResIterator& itrb, ResIterator& itre) const
405 {
406   VecIterator itv = itvb;
407   ResIterator itr;
408   for(itr = itrb; itr != itrb + diagonalSize(); ++itr, ++itd, ++itv) { *itr = *itv** itd; }
409   for(itr = itrb + diagonalSize(); itr != itre; ++itr) { *itr *= 0; }     // *itr=0 : ambiguous resolution in Vector<K>::operator = ?!
410 }
411 
412 // partial multiplication by lower triangular part
413 // also used in multiplication of upper part of tranposed matrix by vector
414 // should be called after Diagonal multiplication
415 template<typename MatIterator, typename VecIterator, typename ResIterator>
lowerMatrixVector(MatIterator & itm,VecIterator & itvb,VecIterator & itve,ResIterator & itrb,ResIterator & itre,SymType sym) const416 void DenseStorage::lowerMatrixVector(MatIterator& itm, VecIterator& itvb, VecIterator& itve,
417                                      ResIterator& itrb, ResIterator& itre, SymType sym) const
418 {
419   number_t row = 1;
420   number_t rox = itve - itvb;
421   switch(sym)
422     {
423       case _skewSymmetric:
424         for(ResIterator itr = itrb + 1; itr != itre; ++itr, ++row)
425           for(VecIterator itv = itvb; itv != itvb + std::min(row, rox); ++itv, ++itm) { *itr -= *itm** itv; }
426         break;
427       case _selfAdjoint:
428         for(ResIterator itr = itrb + 1; itr != itre; ++itr, ++row)
429           for(VecIterator itv = itvb; itv != itvb + std::min(row, rox); ++itv, ++itm) { *itr += conj(*itm)** itv; }
430         break;
431       case _skewAdjoint:
432         for(ResIterator itr = itrb + 1; itr != itre; ++itr, ++row)
433           for(VecIterator itv = itvb; itv != itvb + std::min(row, rox); ++itv, ++itm) { *itr -= conj(*itm)** itv; }
434         break;
435       default:
436         for(ResIterator itr = itrb + 1; itr != itre; ++itr, ++row)
437           for(VecIterator itv = itvb; itv != itvb + std::min(row, rox); ++itv, ++itm) { *itr += *itm** itv; }
438     }
439 }
440 
441 // templated parallel lower Vector x Matrix, assuming square matrix
442 //  itm : iterator on the first element of the lower triangular part of the matrix, row access
443 template<typename MatIterator, typename V, typename R>
parallelLowerMatrixVector(MatrixPart lowup,MatIterator & itm,const std::vector<V> & v,std::vector<R> & r,SymType sym) const444 void DenseStorage::parallelLowerMatrixVector(MatrixPart lowup, MatIterator& itm, const std::vector<V>& v, std::vector<R>& r, SymType sym) const
445 {
446 #ifndef XLIFEPP_WITH_OMP
447   where("DenseStorage::parallelLowerMatrixVector(...)");
448   error("xlifepp_without_omp");
449 #else
450   number_t nt = 1;
451   #pragma omp parallel for lastprivate(nt)
452   for(number_t i = 0; i < 1; i++) {nt = omp_get_num_threads();}
453 
454   if(nt==1) //do not use parallel computation
455     {
456       typename std::vector<V>::const_iterator itvb = v.begin(), itve = v.end();
457       typename std::vector<R>::iterator it_rb = r.begin(), it_re = r.end();
458       lowerMatrixVector(itm, itvb, itve, it_rb, it_re, sym);
459       return;
460     }
461 
462   number_t rox = v.size();
463   typename std::vector<V>::const_iterator itvb = v.begin(), itve = v.end(), itv;
464   typename std::vector<R>::iterator itrb = r.begin()+1;
465   R rz0= 0.* *r.begin(), rz;
466   MatIterator itmr;
467   switch(sym)
468     {
469       case _skewSymmetric:
470         #pragma omp parallel for firstprivate(itmr, itv, rz) schedule(static)
471         for(number_t row=1; row<r.size(); ++row)
472           {
473             itmr= itm + (row*(row-1))/2;
474             rz= rz0;
475             for(itv = itvb; itv != itvb + std::min(row, rox); ++itv, ++itmr) { rz -= *itmr** itv;}
476             *(r.begin()+ row)+=rz;
477           }
478         break;
479       case _selfAdjoint:
480         #pragma omp parallel for firstprivate(itmr, itv, rz) schedule(static)
481         for(number_t row=1; row<r.size(); ++row)
482           {
483             itmr= itm + (row*(row-1))/2;
484             rz= rz0;
485             for(itv = itvb; itv != itvb + std::min(row, rox); ++itv, ++itmr) { rz += conj(*itmr)** itv; }
486             *(r.begin()+ row)+=rz;
487           }
488         break;
489       case _skewAdjoint:
490         #pragma omp parallel for firstprivate(itmr, itv, rz) schedule(static)
491         for(number_t row=1; row<r.size(); ++row)
492           {
493             itmr= itm + (row*(row-1))/2;
494             rz= rz0;
495             for(itv = itvb; itv != itvb + std::min(row, rox); ++itv, ++itmr) { rz -= conj(*itmr)** itv; }
496             *(r.begin()+ row)+=rz;
497           }
498         break;
499       default:
500         #pragma omp parallel for firstprivate(itmr, itv, rz) schedule(dynamic)
501         for(number_t row=1; row<r.size(); ++row)
502           {
503             itmr= itm + (row*(row-1))/2;
504             rz= rz0;
505             for(itv = itvb; itv != itvb + std::min(row, rox); ++itv, ++itmr) { rz += *itmr** itv; }
506             *(r.begin()+ row)+=rz;
507           }
508     }
509 #endif
510 }
511 
512 // not included parallel case, see parallelLowerVectorMatrix for its parallelized version
513 template<typename MatIterator, typename VecIterator, typename ResIterator>
lowerVectorMatrix(MatIterator & itm,VecIterator & itvb,VecIterator & itve,ResIterator & itrb,ResIterator & itre,SymType sym) const514 void DenseStorage::lowerVectorMatrix(MatIterator& itm, VecIterator& itvb, VecIterator& itve,
515                                      ResIterator& itrb, ResIterator& itre, SymType sym) const
516 {
517   number_t col = 1;
518   number_t cox = itre - itrb;
519   switch(sym)
520     {
521       case _skewSymmetric:
522         for(VecIterator itv = itvb + 1; itv != itve; ++itv, ++col)
523           for(ResIterator itr = itrb;  itr != itrb + std::min(col, cox); ++itr, ++itm) { *itr -= *itv** itm ; }
524         break;
525       case _selfAdjoint:
526         for(VecIterator itv = itvb + 1; itv != itve; ++itv, ++col)
527           for(ResIterator itr = itrb;  itr != itrb + std::min(col, cox); ++itr, ++itm) { *itr += *itv * conj(*itm); }
528         break;
529       case _skewAdjoint:
530         for(VecIterator itv = itvb + 1; itv != itve; ++itv, ++col)
531           for(ResIterator itr = itrb;  itr != itrb + std::min(col, cox); ++itr, ++itm) { *itr -= *itv * conj(*itm); }
532         break;
533       default:
534         for(VecIterator itv = itvb + 1; itv != itve ; itv++, col++)
535           for(ResIterator itr = itrb;  itr != itrb + std::min(col, cox); ++itr, ++itm) *itr += *itv** itm;
536     }
537 }
538 
539 // templated parallel lower Vector x Matrix, assuming square matrix
540 //  itm : iterator on the first element of the lower triangular part of the matrix, row access
541 template<typename MatIterator, typename V, typename R>
parallelLowerVectorMatrix(MatrixPart lowup,MatIterator & itm,const std::vector<V> & v,std::vector<R> & r,SymType sym) const542 void DenseStorage::parallelLowerVectorMatrix(MatrixPart lowup, MatIterator& itm, const std::vector<V>& v, std::vector<R>& r, SymType sym) const
543 {
544 #ifndef XLIFEPP_WITH_OMP
545   where("DenseStorage::parallelLowerVectorMatrix(...)");
546   error("xlifepp_without_omp");
547 #else
548   number_t nt = 1;
549   std::vector<number_t> threadIndex;
550   extractThreadIndex(lowup,nt,threadIndex);
551   if(nt==1) //do not use parallel computation
552     {
553       typename std::vector<V>::const_iterator itvb = v.begin(), itve = v.end();
554       typename std::vector<R>::iterator it_rb = r.begin(), it_re = r.end();
555       lowerVectorMatrix(itm, itvb, itve, it_rb, it_re, sym);
556       return;
557     }
558   //compute product by distributing matrix parts given by threadIndex vector on different thread
559   std::vector<std::vector<R> > res(nt,std::vector<R>(r.size(),0.*r[0])); //temporary vector to avoid data races
560   number_t cox=r.size();
561   #pragma omp parallel for
562   for(number_t t=0; t<nt; ++t)  //partial product in parallel
563     {
564       number_t r1 = threadIndex[t], r2 = threadIndex[t+1];
565       MatIterator itmt = itm + (r1*(r1+1)/2);
566       typename std::vector<V>::const_iterator itvb = v.begin()+ (r1+1), itve = v.begin() + (r2+1), itv;
567       if(t==nt-1) itve=v.end();
568       typename std::vector<R>::iterator itrb = res[t].begin(), itre = res[t].end(), itr;
569       number_t col = r1+1;
570       switch(sym)
571         {
572           case _skewSymmetric:
573             for(itv = itvb; itv != itve; ++itv, ++col)
574               for(itr = itrb;  itr != itrb + std::min(col,cox); ++itr, ++itmt) { *itr -= *itv** itmt ; }
575             break;
576           case _selfAdjoint:
577             for(itv = itvb; itv != itve; ++itv, ++col)
578               for(itr = itrb;  itr != itrb + std::min(col,cox); ++itr, ++itmt) { *itr += *itv * conj(*itmt); }
579             break;
580           case _skewAdjoint:
581             for(itv = itvb; itv != itve; ++itv, ++col)
582               for(itr = itrb;  itr != itrb + std::min(col,cox); ++itr, ++itmt) { *itr -= *itv * conj(*itmt); }
583             break;
584           default:
585             for(itv = itvb; itv != itve ; itv++, col++)
586               for(itr = itrb;  itr != itrb + std::min(col,cox); ++itr, ++itmt) { *itr += *itv** itmt; }
587         }
588     }
589   //accumulate temporary result in a serial way
590   typename std::vector<R>::iterator itrb = r.begin(), itre = r.end(), itr;
591   typename std::vector<R>::iterator itrp;
592   for(number_t t=0; t<nt; ++t)
593     {
594       itrp = res[t].begin();
595       for(itr=itrb; itr!=itre; ++itr, ++itrp) *itr+=*itrp;
596     }
597 #endif
598 }
599 
600 // partial multiplication by upper triangular part
601 // also used in multiplication of upper part of tranposed matrix by vector
602 // should be called after Diagonal multiplication
603 // see parallelUpperMatrixVector for its parallelized version
604 template<typename MatIterator, typename VecIterator, typename ResIterator>
upperMatrixVector(MatIterator & itm,VecIterator & itvb,VecIterator & itve,ResIterator & itrb,ResIterator & itre,SymType sym) const605 void DenseStorage::upperMatrixVector(MatIterator& itm, VecIterator& itvb, VecIterator& itve,
606                                      ResIterator& itrb, ResIterator& itre, SymType sym) const
607 {
608   number_t col = 1;
609   number_t cox = itre - itrb;
610   switch(sym)
611     {
612       case _skewSymmetric:
613         for(VecIterator itv = itvb + 1; itv != itve; ++itv, ++col)
614           //for(ResIterator itr = itrb;  itr != itrb + std::min(col, cox); itr++, itm++) { *itr -= *itm * *itv; }
615           for(ResIterator itr = itrb;  itr != itrb + std::min(col, cox); ++itr, ++itm) { *itr -= *itv** itm ; }
616         break;
617       case _selfAdjoint:
618         for(VecIterator itv = itvb + 1; itv != itve; itv++, col++)
619           //for(ResIterator itr = itrb;  itr != itrb + std::min(col, cox); itr++, itm++) { *itr += conj(*itm) * *itv; }
620           for(ResIterator itr = itrb;  itr != itrb + std::min(col, cox); ++itr, ++itm) { *itr += *itv * conj(*itm); }
621         break;
622       case _skewAdjoint:
623         for(VecIterator itv = itvb + 1; itv != itve; itv++, ++col)
624           //for(ResIterator itr = itrb;  itr != itrb + std::min(col, cox); itr++, itm++) { *itr -= conj(*itm) * *itv; }
625           for(ResIterator itr = itrb;  itr != itrb + std::min(col, cox); ++itr, ++itm) { *itr -= *itv * conj(*itm); }
626         break;
627       case _symmetric:
628         for(VecIterator itv = itvb + 1; itv != itve; itv++, ++col)
629           //for(ResIterator itr = itrb;  itr != itrb + std::min(col, cox); itr++, itm++) { *itr += *itm * *itv; }
630           for(ResIterator itr = itrb;  itr != itrb + std::min(col, cox); ++itr, ++itm) { *itr += *itv** itm; }
631         break;
632       default:
633         for(VecIterator itv = itvb + 1; itv != itve; itv++, col++)
634           for(ResIterator itr = itrb;  itr != itrb + std::min(col, cox); ++itr, ++itm) { *itr += *itm** itv; }
635     }
636 }
637 
638 //templated parallel upper Matrix x Vector, assuming square matrix
639 // itm : iterator on the first element of the upper triangular part of the matrix, column access
640 template<typename MatIterator, typename V, typename R>
parallelUpperMatrixVector(MatrixPart lowup,MatIterator & itm,const std::vector<V> & v,std::vector<R> & r,SymType sym) const641 void DenseStorage::parallelUpperMatrixVector(MatrixPart lowup, MatIterator& itm, const std::vector<V>& v, std::vector<R>& r, SymType sym) const
642 {
643 #ifdef XLIFEPP_WITH_OMP
644   number_t nt = 1;
645   std::vector<number_t> threadIndex;
646   extractThreadIndex(lowup,nt,threadIndex);
647   if(nt==1) //do not use parallel computation
648     {
649       typename std::vector<V>::const_iterator itvb = v.begin(), itve = v.end();
650       typename std::vector<R>::iterator it_rb = r.begin(), it_re = r.end();
651       upperMatrixVector(itm, itvb, itve, it_rb, it_re, sym);
652       return;
653     }
654   //compute product by distributing matrix parts given by threadIndex vector on different thread
655   std::vector<std::vector<R> > res(nt,std::vector<R>(r.size(),0.*r[0])); //temporary vector to avoid data races
656   number_t cox=r.size();
657   #pragma omp parallel for
658   for(number_t t=0; t<nt; ++t)  //partial product in parallel
659     {
660       number_t r1 = threadIndex[t], r2 = threadIndex[t+1];
661       MatIterator itmt = itm + (r1*(r1+1)/2);
662       typename std::vector<V>::const_iterator itvb = v.begin()+ (r1+1), itve = v.begin() + (r2+1), itv;
663       if(t==nt-1) itve=v.end();
664       typename std::vector<R>::iterator itrb = res[t].begin(), itre = res[t].end(), itr;
665       number_t col = r1+1;
666       switch(sym)
667         {
668           case _skewSymmetric:
669             for(itv = itvb; itv != itve; ++itv, ++col)
670               //for(itr = itrb;  itr != itrb + std::min(col,cox); ++itr, ++itmt) { *itr -= *itv** itmt ; }
671               for(itr = itrb;  itr != itrb + std::min(col,cox); ++itr, ++itmt) { *itr -= *itmt * *itv; }
672             break;
673           case _selfAdjoint:
674             for(itv = itvb; itv != itve; ++itv, ++col)
675               //for(itr = itrb;  itr != itrb + std::min(col,cox); ++itr, ++itmt) { *itr += *itv * conj(*itmt); }
676               for(itr = itrb;  itr != itrb + std::min(col,cox); ++itr, ++itmt) { *itr += conj(*itmt) * *itv; }
677             break;
678           case _skewAdjoint:
679             for(itv = itvb; itv != itve; ++itv, ++col)
680               //for(itr = itrb;  itr != itrb + std::min(col,cox); ++itr, ++itmt) { *itr -= *itv * conj(*itmt); }
681               for(itr = itrb;  itr != itrb + std::min(col,cox); ++itr, ++itmt) { *itr -=  conj(*itmt) ** itv; }
682             break;
683           default:
684             for(itv = itvb; itv != itve ; itv++, col++)
685               //for(itr = itrb;  itr != itrb + std::min(col,cox); ++itr, ++itmt) { *itr += *itv** itmt; }
686               for(itr = itrb;  itr != itrb + std::min(col,cox); ++itr, ++itmt) { *itr += * itmt * *itv; }
687         }
688     }
689   //accumulate temporary result in a serial way
690   typename std::vector<R>::iterator itrb = r.begin(), itre = r.end(), itr;
691   typename std::vector<R>::iterator itrp;
692   for(number_t t=0; t<nt; ++t)
693     {
694       itrp = res[t].begin();
695       for(itr=itrb; itr!=itre; ++itr, ++itrp) *itr+=*itrp;
696     }
697 #endif
698 }
699 
700 template<typename MatIterator, typename VecIterator, typename ResIterator>
upperVectorMatrix(MatIterator & itm,VecIterator & itvb,VecIterator & itve,ResIterator & itrb,ResIterator & itre,SymType sym) const701 void DenseStorage::upperVectorMatrix(MatIterator& itm, VecIterator& itvb, VecIterator& itve,
702                                      ResIterator& itrb, ResIterator& itre, SymType sym) const
703 {
704   number_t col= 1;
705   number_t cox = itve - itvb;
706   switch(sym)
707     {
708       case _skewSymmetric:
709         for(ResIterator itr = itrb + 1; itr != itre; ++itr, ++col)
710           for(VecIterator itv = itvb; itv != itvb + std::min(col, cox); ++itv, ++itm) { *itr -= *itm** itv; }
711         //for(VecIterator itv = itvb; itv != itvb + std::min(col, cox); ++itv, ++itm) { *itr -= *itv **itm;} //vector of vector
712         break;
713       case _selfAdjoint:
714         for(ResIterator itr = itrb + 1; itr != itre; ++itr, ++col)
715           for(VecIterator itv = itvb; itv != itvb + std::min(col, cox); ++itv, ++itm) { *itr += conj(*itm)** itv; }
716         //for(VecIterator itv = itvb; itv != itvb + std::min(col, cox); ++itv, ++itm) { *itr += *itv * conj(*itm);} //vector of vector
717         break;
718       case _skewAdjoint:
719         for(ResIterator itr = itrb + 1; itr != itre; ++itr, ++col)
720           for(VecIterator itv = itvb; itv != itvb + std::min(col, cox); ++itv, ++itm) { *itr -= conj(*itm)** itv; }
721         //for(VecIterator itv = itvb; itv != itvb + std::min(col, cox); ++itv, ++itm) { *itr -= *itv * conj(*itm);} //vector of vector
722         break;
723       default:
724         for(ResIterator itr = itrb + 1; itr != itre; ++itr, ++col)
725           for(VecIterator itv = itvb; itv != itvb + std::min(col, cox); ++itv, ++itm) { *itr += *itm** itv; }
726         //for(VecIterator itv = itvb; itv != itvb + std::min(col, cox); ++itv, ++itm) { *itr += *itv **itm;} //vector of vector
727     }
728 }
729 
730 // templated parallel lower Vector x Matrix, assuming square matrix
731 //  itm : iterator on the first element of the lower triangular part of the matrix, row access
732 template<typename MatIterator, typename V, typename R>
parallelUpperVectorMatrix(MatrixPart lowup,MatIterator & itm,const std::vector<V> & v,std::vector<R> & r,SymType sym) const733 void DenseStorage::parallelUpperVectorMatrix(MatrixPart lowup, MatIterator& itm, const std::vector<V>& v, std::vector<R>& r, SymType sym) const
734 {
735 #ifndef XLIFEPP_WITH_OMP
736   where("DenseStorage::parallelLowerMatrixVector(...)");
737   error("xlifepp_without_omp");
738 #else
739   number_t nt = 1;
740   std::vector<number_t> threadIndex;
741   extractThreadIndex(lowup,nt,threadIndex);
742   if(nt==1) //do not use parallel computation
743     {
744       typename std::vector<V>::const_iterator itvb = v.begin(), itve = v.end();
745       typename std::vector<R>::iterator it_rb = r.begin(), it_re = r.end();
746       lowerMatrixVector(itm, itvb, itve, it_rb, it_re, sym);
747       return;
748     }
749 
750   //multi-threads
751   number_t rox = v.size();
752   typename std::vector<V>::const_iterator itvb = v.begin(), itve = v.end(), itv;
753   typename std::vector<R>::iterator itrb = r.begin()+1;
754   R rz0 = 0.* *r.begin();
755   switch(sym)
756     {
757       case _skewSymmetric:
758         #pragma omp parallel for schedule(static)
759         for(number_t row=1; row<r.size(); ++row)
760           {
761             typename std::vector<V>::const_iterator itv;
762             MatIterator itmr= itm + (row*(row-1)/2);
763             R rz= rz0;
764             for(itv = itvb; itv != itvb + std::min(row, rox); ++itv, ++itmr) { rz -= * itv * *itmr; }
765             typename std::vector<R>::iterator itr = r.begin()+ row;
766             *itr+=rz;
767           }
768         break;
769       case _selfAdjoint:
770         #pragma omp parallel for schedule(static)
771         for(number_t row=1; row<r.size(); ++row)
772           {
773             typename std::vector<V>::const_iterator itv;
774             MatIterator itmr= itm + (row*(row-1)/2);
775             R rz= rz0;
776             for(itv = itvb; itv != itvb + std::min(row, rox); ++itv, ++itmr) { rz += * itv * conj(*itmr); }
777             typename std::vector<R>::iterator itr = r.begin()+ row;
778             *itr+=rz;
779           }
780         break;
781       case _skewAdjoint:
782         #pragma omp parallel for schedule(static)
783         for(number_t row=1; row<r.size(); ++row)
784           {
785             typename std::vector<V>::const_iterator itv;
786             MatIterator itmr= itm + (row*(row-1)/2);
787             R rz= rz0;
788             for(itv = itvb; itv != itvb + std::min(row, rox); ++itv, ++itmr) { rz -= * itv * conj(*itmr); }
789             typename std::vector<R>::iterator itr = r.begin()+ row;
790             *itr+=rz;
791           }
792         break;
793       default:
794         #pragma omp parallel for schedule(static)
795         for(number_t row=1; row<r.size(); ++row)
796           {
797             typename std::vector<V>::const_iterator itv;
798             MatIterator itmr= itm + (row*(row-1)/2);
799             R rz= rz0;
800             for(itv = itvb; itv != itvb + std::min(row, rox); ++itv, ++itmr) { rz += * itv * *itmr; }
801             typename std::vector<R>::iterator itr = r.begin()+ row;
802             *itr+=rz;
803           }
804     }
805 #endif
806 }
807 
808 // multiplication M x V of dense row-major access matrix M and vector V
809 // also used in multiplication of column-major transposed matrix by vector
810 template<typename MatIterator, typename VecIterator, typename ResIterator>
rowMatrixVector(MatIterator & itm,VecIterator & itvb,VecIterator & itve,ResIterator & itrb,ResIterator & itre) const811 void DenseStorage::rowMatrixVector(MatIterator& itm, VecIterator& itvb, VecIterator& itve, ResIterator& itrb, ResIterator& itre)  const
812 {
813   for(ResIterator itr = itrb; itr != itre; ++itr) { *itr *= 0; }     // *itr=0 : ambiguous resolution in Vector<K>::operator = ?!
814   for(ResIterator itr = itrb; itr != itre; ++itr)
815     for(VecIterator itv = itvb; itv != itve; ++itv, ++itm) { *itr += *itm** itv; }
816 }
817 
818 // also used in multiplication of column-major transposed matrix by vector
819 template<typename MatIterator, typename VecIterator, typename ResIterator>
parallelRowMatrixVector(MatIterator & itm,VecIterator & itvb,VecIterator & itve,ResIterator & itrb,ResIterator & itre) const820 void DenseStorage::parallelRowMatrixVector(MatIterator& itm, VecIterator& itvb, VecIterator& itve, ResIterator& itrb, ResIterator& itre)  const
821 {
822   for(ResIterator itr = itrb; itr != itre; ++itr) { *itr *= 0; }     // *itr=0 : ambiguous resolution in Vector<K>::operator = ?!
823 #ifndef XLIFEPP_WITH_OMP
824   for(ResIterator itr = itrb; itr != itre; ++itr)
825     for(VecIterator itv = itvb; itv != itve; ++itv, ++itm) { *itr += *itm** itv; }
826 #else
827   number_t nr =nbOfRows(), nc=nbOfColumns();
828   #pragma omp parallel for
829   for(number_t r=0; r<nr; ++r)
830     {
831       ResIterator itr = itrb + r;
832       MatIterator itmr =  itm + r*nc;
833       for(VecIterator itv = itvb; itv != itve; ++itv, ++itmr) { *itr += *itmr** itv; }
834     }
835 #endif // XLIFEPP_WITH_OMP
836 }
837 
838 // also used in multiplication of column-major transposed matrix by vector
839 template<typename MatIterator, typename V, typename R>
parallelRowMatrixVector(MatIterator & itm,const std::vector<V> & v,std::vector<R> & r) const840 void DenseStorage::parallelRowMatrixVector(MatIterator& itm, const std::vector<V>& v, std::vector<R>& r)  const
841 {
842 #ifndef XLIFEPP_WITH_OMP
843   where("DenseStorage::parallelRowMatrixVector(...)");
844   error("xlifepp_without_omp");
845 #else
846   typename std::vector<R>::iterator itrb = r.begin(), itre=r.end(), itr;
847   for(itr=itrb; itr != itre; ++itr) { *itr *= 0; }     // *itr=0 : ambiguous resolution in Vector<K>::operator = ?!
848   number_t nr =nbOfRows(), nc=nbOfColumns();
849   typename std::vector<V>::const_iterator itvb = v.begin(), itve=v.end(), itv;
850   MatIterator itmr ;
851   R t;
852   #pragma omp parallel for firstprivate(itr, itv, itmr, t) schedule(static)
853   for(number_t r=0; r<nr; ++r)
854     {
855       itr = itrb + r;
856       itmr =  itm + r*nc;
857       t = *itr;
858       for(itv = itvb; itv != itve; ++itv, ++itmr) { t += *itmr** itv; }
859       *itr = t;
860     }
861 #endif // XLIFEPP_WITH_OMP
862 }
863 
864 //template<typename MatIterator, typename V, typename R>
865 //void DenseStorage::parallelRowMatrixVector(MatIterator& itm, const std::vector<V>& v, std::vector<R>& r)  const
866 //{
867 //#ifndef XLIFEPP_WITH_OMP
868 //  where("DenseStorage::parallelRowMatrixVector(...)");
869 //  error("xlifepp_without_omp");
870 //#else
871 //  typename std::vector<R>::iterator itrb = r.begin(), itre=r.end(), itr;
872 //  for(itr=itrb; itr != itre; ++itr) { *itr *= 0; }     // *itr=0 : ambiguous resolution in Vector<K>::operator = ?!
873 //  number_t nr =nbOfRows(), nc=nbOfColumns();
874 //  typename std::vector<V>::const_iterator itvb = v.begin(), itve=v.end(), itv;
875 //  MatIterator itmr ;
876 //  R t;
877 //  number_t br=0, rm, sr=100;  //size of block
878 //  while(br<nr)
879 //  {
880 //   rm=std::min(br+sr,nr);
881 //   #pragma omp parallel for firstprivate(itr, itv, itmr, t) schedule(static)
882 //   for(number_t r=br; r < rm; ++r)
883 //    {
884 //      itr = itrb + r;
885 //      itmr =  itm + r*nc;
886 //      t = *itr;
887 //      for(itv = itvb; itv != itve; ++itv, ++itmr) { t += *itmr** itv; }
888 //      *itr = t;
889 //    }
890 //    br+=sr;
891 //  }
892 // #endif // XLIFEPP_WITH_OMP
893 //}
894 
895 //template<typename MatIterator, typename V, typename R>
896 //void DenseStorage::parallelRowMatrixVector(MatIterator& itm, const std::vector<V>& v, std::vector<R>& r)  const
897 //{
898 //#ifndef XLIFEPP_WITH_OMP
899 //  where("DenseStorage::parallelRowMatrixVector(...)");
900 //  error("xlifepp_without_omp");
901 //#else
902 //  typename std::vector<R>::iterator itrb = r.begin(), itre=r.end(), itr;
903 //  for(itr=itrb; itr != itre; ++itr) { *itr *= 0; }     // *itr=0 : ambiguous resolution in Vector<K>::operator = ?!
904 //  number_t nr =nbOfRows(), nc=nbOfColumns();
905 //  typename std::vector<V>::const_iterator itvb = v.begin(), itve=v.end(), itv;
906 //  MatIterator itmr ;
907 //  R t;
908 //  number_t sr=100;  //size of block
909 //  number_t nbr=nr/sr, rm;
910 //  #pragma omp parallel for firstprivate(itr, itv, itmr, t) schedule(static)
911 //  for(number_t br=0; br<=nbr; ++br)
912 //  {
913 //   number_t r0=br*sr, rm=std::min(r0+sr,nr);
914 //   for(number_t r=r0; r < rm; ++r)
915 //    {
916 //      itr = itrb + r;
917 //      itmr =  itm + r*nc;
918 //      t = *itr;
919 //      for(itv = itvb; itv != itve; ++itv, ++itmr) { t += *itmr** itv; }
920 //      *itr = t;
921 //    }
922 //  }
923 // #endif // XLIFEPP_WITH_OMP
924 //}
925 
926 // multiplication V x M of vector V and dense row-major access matrix M
927 template<typename MatIterator, typename VecIterator, typename ResIterator>
rowVectorMatrix(MatIterator & itm,VecIterator & itvb,VecIterator & itve,ResIterator & itrb,ResIterator & itre) const928 void DenseStorage::rowVectorMatrix(MatIterator& itm, VecIterator& itvb, VecIterator& itve, ResIterator& itrb, ResIterator& itre)  const
929 {
930   for(ResIterator itr = itrb; itr != itre; ++itr) { *itr *= 0; }     // *itr=0 : ambiguous resolution in Vector<K>::operator = ?!
931   for(VecIterator itv = itvb; itv != itve; ++itv)
932     for(ResIterator itr = itrb; itr != itre; ++itr, ++itm) { *itr += *itv** itm; }
933 }
934 
935 // multiplication M x V of dense column-major access matrix M and vector V
936 // also used in multiplication of row-major transposed matrix by vector
937 template<typename MatIterator, typename VecIterator, typename ResIterator>
columnMatrixVector(MatIterator & itm,VecIterator & itvb,VecIterator & itve,ResIterator & itrb,ResIterator & itre) const938 void DenseStorage::columnMatrixVector(MatIterator& itm, VecIterator& itvb, VecIterator& itve, ResIterator& itrb, ResIterator& itre) const
939 {
940   for(ResIterator itr = itrb; itr != itre; ++itr) { *itr *= 0; }     // *itr=0 : ambiguous resolution in Vector<K>::operator = ?!
941   for(VecIterator itv = itvb; itv != itve; ++itv)
942     for(ResIterator itr = itrb; itr != itre; ++itr, ++itm) { *itr += *itm** itv; }
943 }
944 
945 // multiplication V x M of vector V and dense column-major access matrix M
946 template<typename MatIterator, typename VecIterator, typename ResIterator>
columnVectorMatrix(MatIterator & itm,VecIterator & itvb,VecIterator & itve,ResIterator & itrb,ResIterator & itre) const947 void DenseStorage::columnVectorMatrix(MatIterator& itm, VecIterator& itvb, VecIterator& itve, ResIterator& itrb, ResIterator& itre) const
948 {
949   for(ResIterator itr = itrb; itr != itre; ++itr) { *itr *= 0; }     // *itr=0 : ambiguous resolution in Vector<K>::operator = ?!
950   for(ResIterator itr = itrb; itr != itre; ++itr)
951     for(VecIterator itv = itvb; itv != itve; ++itv, ++itm) *itr += *itv** itm;
952   // Transpose (*itm) HERE ???????????????????????
953 }
954 
955 // multiplication V x M of vector V and dense column-major access matrix M
956 template<typename MatIterator, typename VecIterator, typename ResIterator>
parallelColumnVectorMatrix(MatIterator & itm,VecIterator & itvb,VecIterator & itve,ResIterator & itrb,ResIterator & itre) const957 void DenseStorage::parallelColumnVectorMatrix(MatIterator& itm, VecIterator& itvb, VecIterator& itve, ResIterator& itrb, ResIterator& itre) const
958 {
959   for(ResIterator itr = itrb; itr != itre; ++itr) { *itr *= 0; }     // *itr=0 : ambiguous resolution in Vector<K>::operator = ?!
960 #ifndef XLIFEPP_WITH_OMP
961   for(ResIterator itr = itrb; itr != itre; ++itr)
962     for(VecIterator itv = itvb; itv != itve; ++itv, ++itm) *itr += *itv** itm;
963 #else
964   number_t nr=nbOfRows(), nc=nbOfColumns();
965   #pragma omp parallel for
966   for(number_t c=0; c<nc; ++c)
967     {
968       ResIterator itr = itrb + c;
969       MatIterator itmr =  itm + c*nr;
970       for(VecIterator itv = itvb; itv != itve; ++itv, ++itmr) { *itr += *itmr** itv; }
971     }
972 #endif
973   // Transpose (*itm) HERE ???????????????????????
974 }
975 
976 
977 template<typename Mat1Iterator, typename Mat2Iterator, typename ResIterator>
sumMatrixMatrix(Mat1Iterator & itm1,Mat2Iterator & itm2,ResIterator & itrb,ResIterator & itre) const978 void DenseStorage::sumMatrixMatrix(Mat1Iterator& itm1, Mat2Iterator& itm2, ResIterator& itrb, ResIterator& itre) const
979 {
980   for(ResIterator itr = itrb; itr != itre; ++itr) { *itr = *itm1 + *itm2; }
981 }
982 
983 /* -------------------------------------------------------------------------------------------------------
984                                    generic lower and upper solvers
985    -------------------------------------------------------------------------------------------------------*/
986 // solve LD1 * x = v
987 template<typename M, typename V, typename X>
lowerD1SolverG(const std::vector<M> & m,std::vector<V> & v,std::vector<X> & x) const988 void DenseStorage::lowerD1SolverG(const std::vector<M>& m, std::vector<V>& v, std::vector<X>& x) const
989 {
990     typename std::vector<V>::iterator itv=v.begin();
991     typename std::vector<X>::iterator itxb=x.begin(), itx;
992     number_t n=x.size();
993     for(number_t k=1; k<=n; ++k, ++itv)
994     {
995         X t=*itv;
996         itx=itxb;
997         for(number_t j=1; j<k; ++j,++itx)
998         {
999             t -= m[pos(k,j)] * *itx;
1000         }
1001         *itx = t;
1002     }
1003 }
1004 
1005 // solve U * x = v
1006 template<typename M, typename V, typename X>
upperSolverG(const std::vector<M> & m,std::vector<V> & v,std::vector<X> & x,const SymType sym) const1007 void DenseStorage::upperSolverG(const std::vector<M>& m, std::vector<V>& v, std::vector<X>& x, const SymType sym) const
1008 {
1009     number_t n=x.size();
1010     typename std::vector<V>::reverse_iterator itv=v.rbegin()++;
1011     typename std::vector<X>::reverse_iterator itxb=x.rbegin()++, itx;
1012     for(number_t k=n; k>0; k--, ++itv)
1013     {
1014         switch(sym)
1015         {
1016             case _skewSymmetric :
1017             {
1018                 X t=*itv; itx=itxb;
1019                 for(number_t j=n; j>k; j--, ++itx) t += m[pos(k, j, sym)] * *itx;
1020                 *itx = t / m[pos(k,k)];
1021             }
1022             break;
1023             case _selfAdjoint :
1024             {
1025                 X t=*itv; itx=itxb;
1026                 for(number_t j=n; j>k; j--, ++itx) t -= conj(m[pos(k, j, sym)]) * *itx;
1027                 *itx = t / m[pos(k,k)];
1028             }
1029             break;
1030             case _skewAdjoint :
1031             {
1032                 X t=*itv; itx=itxb;
1033                 for(number_t j=n; j>k; j--, ++itx) t += conj(m[pos(k, j, sym)]) * *itx;
1034                 *itx = t / m[pos(k,k)];
1035             }
1036             break;
1037             default : //symmetric
1038                 {
1039                 X t=*itv; itx=itxb;
1040                 for(number_t j=n; j>k; j--, ++itx) t -= m[pos(k, j, sym)] * *itx;
1041                 *itx = t / m[pos(k,k)];
1042                 }
1043         }
1044     }
1045 }
1046 
1047 // solve UD1 * x = v
1048 template<typename M, typename V, typename X>
upperD1SolverG(const std::vector<M> & m,std::vector<V> & v,std::vector<X> & x,const SymType sym) const1049 void DenseStorage::upperD1SolverG(const std::vector<M>& m, std::vector<V>& v, std::vector<X>& x, const SymType sym) const
1050 {
1051     number_t n=x.size();
1052     typename std::vector<V>::reverse_iterator itv=v.rbegin()++;
1053     typename std::vector<X>::reverse_iterator itxb=x.rbegin()++, itx;
1054     for(number_t k=n; k>0; k--, ++itv)
1055     {
1056        switch(sym)
1057         {
1058             case _skewSymmetric :
1059             {
1060                 X t=*itv; itx=itxb;
1061                 for(number_t j=n; j>k; j--, ++itx) t += m[pos(k, j, sym)] * *itx;
1062                 *itx = t;
1063             }
1064             break;
1065             case _selfAdjoint :
1066             {
1067                 X t=*itv; itx=itxb;
1068                 for(number_t j=n; j>k; j--, ++itx) t -= conj(m[pos(k, j, sym)]) * *itx;
1069                 *itx = t ;
1070             }
1071             break;
1072             case _skewAdjoint :
1073             {
1074                 X t=*itv; itx=itxb;
1075                 for(number_t j=n; j>k; j--, ++itx) t += conj(m[pos(k, j, sym)]) * *itx;
1076                 *itx = t;
1077             }
1078             break;
1079             default : //symmetric
1080                 {
1081                 X t=*itv; itx=itxb;
1082                 for(number_t j=n; j>k; j--, ++itx) t -= m[pos(k, j, sym)] * *itx;
1083                 *itx = t;
1084                 }
1085         }
1086     }
1087 }
1088 
1089 // solve x * LD1 = v
1090 template<typename M, typename V, typename X>
lowerD1LeftSolverG(const std::vector<M> & m,std::vector<V> & v,std::vector<X> & x) const1091 void DenseStorage::lowerD1LeftSolverG(const std::vector<M>& m, std::vector<V>& v, std::vector<X>& x) const
1092 {
1093     number_t n=x.size();
1094     typename std::vector<V>::reverse_iterator itv=v.rbegin()++;
1095     typename std::vector<X>::reverse_iterator itxb=x.rbegin()++, itx;
1096     for(number_t k=n; k>0; k--, ++itv)
1097     {
1098         X t=*itv; itx=itxb;
1099         for(number_t j=n; j>k; j--, ++itx) t -= *itx * m[pos(j, k)] ;
1100         *itx = t;
1101     }
1102 }
1103 
1104 // solve x * U = v
1105 template<typename M, typename V, typename X>
upperLeftSolverG(const std::vector<M> & m,std::vector<V> & v,std::vector<X> & x,const SymType sym) const1106 void DenseStorage::upperLeftSolverG(const std::vector<M>& m, std::vector<V>& v, std::vector<X>& x, const SymType sym) const
1107 {
1108     typename std::vector<V>::iterator itv=v.begin();
1109     typename std::vector<X>::iterator itxb=x.begin(), itx;
1110     number_t n=x.size();
1111     for(number_t k=1; k<=n; ++k, ++itv)
1112     {
1113         switch(sym)
1114         {
1115             case _skewSymmetric :
1116             {
1117                 X t=*itv; itx=itxb;
1118                 for(number_t j=1; j<k; ++j,++itx) t += *itx * m[pos(j, k, sym)];
1119                 *itx = t/m[pos(k,k)];
1120             }
1121             break;
1122             case _selfAdjoint :
1123             {
1124                 X t=*itv; itx=itxb;
1125                 for(number_t j=1; j<k; ++j,++itx) t -= *itx * conj(m[pos(j, k, sym)]);
1126                 *itx = t/m[pos(k,k)];
1127             }
1128             break;
1129             case _skewAdjoint :
1130             {
1131                 X t=*itv; itx=itxb;
1132                 for(number_t j=1; j<k; ++j,++itx) t += *itx * conj(m[pos(j, k, sym)]);
1133                 *itx = t/m[pos(k,k)];
1134             }
1135             break;
1136             default : //symmetric
1137                 {
1138                 X t=*itv; itx=itxb;
1139                 for(number_t j=1; j<k; ++j,++itx) t -= *itx * m[pos(j, k, sym)];
1140                 *itx = t/m[pos(k,k)];
1141                 }
1142         }
1143     }
1144 }
1145 
1146 // solve D * x = v
1147 template<typename M, typename V, typename X>
diagonalSolverG(const std::vector<M> & m,std::vector<V> & v,std::vector<X> & x) const1148 void DenseStorage::diagonalSolverG(const std::vector<M>& m, std::vector<V>& v, std::vector<X>& x) const
1149 {
1150     typename std::vector<V>::iterator itv=v.begin();
1151     typename std::vector<X>::iterator itx=x.begin();
1152     number_t n=x.size();
1153     for(number_t k=1; k<=n; k++, ++itv, ++itx)
1154     {
1155         *itx = *itv / m[pos(k,k)] ;
1156     }
1157 }
1158 
1159 /* -------------------------------------------------------------------------------------------------------
1160                                     SOR and SSOR stuff
1161    -------------------------------------------------------------------------------------------------------*/
1162 /*! SOR-like diagonal product  r = w * D * v
1163     \param itd is a (forward) iterator on matrix diagonal entries (D)
1164     \param itvb is a (forward) iterator on right hand side vector v entries
1165     \param itrb, itre are (forward) iterators on solution vector r entries
1166     \param w is the SOR parameter
1167 */
1168 template<typename MatIterator, typename VecIterator, typename ResIterator>
bzSorDiagonalMatrixVector(MatIterator & itd,VecIterator & itvb,ResIterator & itrb,ResIterator & itre,const real_t w) const1169 void DenseStorage::bzSorDiagonalMatrixVector(MatIterator& itd, VecIterator& itvb, ResIterator& itrb,
1170     ResIterator& itre, const real_t w) const
1171 {
1172   VecIterator itv = itvb;
1173   ResIterator itr;
1174   for(itr = itrb; itr != itrb + diagonalSize(); ++itr, ++itd, ++itv) { *itr = w** itv** itd; }
1175   for(itr = itrb + diagonalSize(); itr != itre; ++itr) { *itr *= 0; }     // *itr=0 : ambiguous resolution in Vector<K>::operator = ?!
1176 }
1177 
1178 /*! SOR-like diagonal D/w x = v solver
1179     \param itd  is a (forward) iterator on matrix diagonal entries (D)
1180     \param itvb is a (forward) iterator on right hand side vector v entries
1181     \param itxb, itxe are (forward) iterators on solution vector x entries
1182     \param w is the SOR parameter
1183 */
1184 template<typename MatIterator, typename VecIterator, typename XIterator>
bzSorDiagonalSolver(MatIterator & itd,VecIterator & itvb,XIterator & itxb,XIterator & itxe,const real_t w) const1185 void DenseStorage::bzSorDiagonalSolver(MatIterator& itd, VecIterator& itvb, XIterator& itxb,
1186                                        XIterator& itxe, const real_t w) const
1187 {
1188   VecIterator itv = itvb;
1189   XIterator itx;
1190   for(itx = itxb; itx != itxb + diagonalSize(); ++itx, ++itd, ++itv) { *itx = w** itv / *itd; }
1191   for(itx = itxb + diagonalSize(); itx != itxe; ++itx) { *itx *= 0; }     // *itr=0 : ambiguous resolution in Vector<K>::operator = ?!
1192 }
1193 
1194 /*! SOR lower triangular part (D/w+L) x = b solver
1195     \param itdb is a (forward) iterator on matrix diagonal entries (D)
1196     \param itlb is a (forward) iterator on matrix strict lower triangular part entries (L)
1197     \param itbb is a (forward) iterator on right-hand side vector entries (b)
1198     \param itxb,itxe is a (forward) begin (resp. end) iterator on solution vector entries (x)
1199     \param w is the SOR parameter
1200 */
1201 template<typename MatIterator, typename VecIterator, typename XIterator>
bzSorLowerSolver(const MatIterator & itdb,const MatIterator & itlb,VecIterator & itbb,XIterator & itxb,XIterator & itxe,const real_t w) const1202 void DenseStorage::bzSorLowerSolver(const MatIterator& itdb, const MatIterator& itlb, VecIterator& itbb, XIterator& itxb,
1203                                     XIterator& itxe, const real_t w) const
1204 {
1205   MatIterator itd=itdb, itl=itlb;
1206   XIterator itx = itxb, itxj;
1207   number_t row=0;
1208   for(itx = itxb; itx != itxe; ++itx, ++row, ++itd)
1209     {
1210       *itx = *(itbb + row);
1211       itxj=itxb;
1212       for(number_t j=0; j<row; ++j, ++itl, ++itxj)  *itx -= *itl** itxj;
1213       *itx *= w / *itd;
1214     }
1215 }
1216 
1217 /*!  SOR upper triangular part (D/w+U) x = b solver
1218      *** LO AND BEHOLD *** we are using reverse_iterator syntax here ***
1219      *** loops are thus reversed from end to begin (or rather rbegin to rend) even though iterators are "++"
1220     \param itrdb is a (backward) iterator on matrix diagonal entries (D), starting on matrix last diagonal entry
1221     \param itrub is a (backward) iterator on matrix strict upper triangular part entries (U), starting on matrix last entry
1222     \param itrbb is a reverse iterator "b.rbegin" iterator on right hand side vector (b) entries, starting on vector last entry
1223     \param itrxb,itrxe is a reverse iterator "x.rbegin" (resp. "x.rend") iterator on solution vector (x) entries starting on vector last entry
1224     \param w is the SOR parameter
1225 */
1226 template<typename MatRevIterator, typename VecRevIterator, typename XRevIterator>
bzSorUpperSolver(const MatRevIterator & itrdb,const MatRevIterator & itrub,VecRevIterator & itrbb,XRevIterator & itrxb,XRevIterator & itrxe,const real_t w) const1227 void DenseStorage::bzSorUpperSolver(const MatRevIterator& itrdb, const MatRevIterator& itrub, VecRevIterator& itrbb,
1228                                     XRevIterator& itrxb, XRevIterator& itrxe, const real_t w) const
1229 {
1230   MatRevIterator itrd=itrdb, itru=itrub;
1231   VecRevIterator itrb = itrbb;
1232   XRevIterator itrx = itrxb, itrxi;
1233   for(itrx = itrxb; itrx != itrxe; ++itrx) { *itrx = *itrb++;}  //initialize x with b
1234 
1235   itrx = itrxb;
1236   number_t nbc=nbOfColumns();
1237   for(number_t c=nbc; c!=0; --c, ++itrd, ++itrx)
1238     {
1239       *itrx *= w / *itrd;
1240       itrxi = itrxb + (nbc -c + 1);
1241       for(number_t i=1; i< c; ++i, ++itrxi, ++itru) *itrxi -= *itru** itrx;
1242     }
1243 }
1244 
1245 } // end of namespace xlifepp
1246 
1247 #endif // DENSE_STORAGE_HPP
1248