1 /*
2 XLiFE++ is an extended library of finite elements written in C++
3     Copyright (C) 2014  Lunéville, Eric; Kielbasiewicz, Nicolas; Lafranche, Yvon; Nguyen, Manh-Ha; Chambeyron, Colin
4 
5     This program is free software: you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation, either version 3 of the License, or
8     (at your option) any later version.
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13     You should have received a copy of the GNU General Public License
14     along with this program.  If not, see <http://www.gnu.org/licenses/>.
15  */
16 
17 /*!
18   \file CsStorage.hpp
19   \authors D. Martin, E. Lunéville, NGUYEN Manh Ha, N. Kielbasiewicz
20   \since 21 juin 2011
21   \date 21 feb 2013
22 
23   \brief Definition of the xlifepp::CsStorage class
24 
25   xlifepp::CsStorage class is the abstract mother class of all compressed sparse storage methods for large matrices
26   \verbatim
27                       | RowCsStorage   (like CSR)
28   CsStorage --------> | ColCsStorage   (like CSC)
29                       | DualCsStorage  (like CSR/CSC)
30                       | SymCsStorage   (like CSR)
31   \endverbatim
32 
33   Only _sym access type storage can be used for a matrix with a symmetry
34   property ( _symmetric, _skewSymmetric, _selfAdjoint or _skewAdjoint)
35 */
36 #ifndef CS_STORAGE_HPP
37 #define CS_STORAGE_HPP
38 
39 #include "config.h"
40 #include "../MatrixStorage.hpp"
41 
42 #ifdef XLIFEPP_WITH_OMP
43 #include <omp.h>
44 #endif
45 
46 namespace xlifepp
47 {
48 
49 /*!
50    \class CsStorage
51    abstract base class of all compressed sparse storage classes
52  */
53 class CsStorage : public MatrixStorage
54 {
55   public:
56     // Constructors, Destructor
57     CsStorage(AccessType = _dual, string_t id = "CsStorage");               //!< constructor by access type
58     CsStorage(number_t, AccessType = _dual, string_t id = "CsStorage");       //!< constructor by access type, number of columns and rows (same)
59     CsStorage(number_t, number_t, AccessType = _dual, string_t id = "CsStorage"); //!< constructor by access type, number of columns and rows
~CsStorage()60     virtual ~CsStorage() {}                                               //!< virtual destructor
61 
62     // public utilities
63     // virtual void visual(ostream&, number_t vb = 0) const = 0;    //!< to visalize non zero values
64     virtual number_t size() const = 0;                       //!< storage size
lowerPartSize() const65     virtual number_t lowerPartSize() const {return 0;}       //!< size of lower triangular part except for Dual/SymCsStorage
upperPartSize() const66     virtual number_t upperPartSize() const {return 0;}       //!< size of upper triangular part except for Dual/SymCsStorage
67 
68   protected:
69     // build utilities for child class
70 //    void buildCsStorage(const std::vector<std::vector<number_t> >&,
71 //                        std::vector<number_t>&, std::vector<number_t>&); //!< build storage vectors of row and col storage
72     template<class L>
73     void buildCsStorage(const std::vector<L>&, std::vector<number_t>&, std::vector<number_t>&);  //!< build storage vectors of row and col storage
74     void addCsSubMatrixIndices(std::vector<number_t>&, std::vector<number_t>&,
75                                const std::vector<number_t>&,const std::vector<number_t>&,
76                                bool lower=true, bool diag=false);      //!< add in csstorage submatrix indices given by its row and column indices (>= 1)
77     void toScalarCs(const std::vector<number_t>&, const std::vector<number_t>&,
78                     dimen_t, dimen_t, std::vector<number_t>&, std::vector<number_t>&); //!< go to scalar cs storage from non scalar cs storage
79 
80     MatrixStorage* toDual();            //!< create a new dual cs storage from current sym cs storage
81     CsStorage* toColStorage();          //!< create a new col cs storage from current cs storage
82     CsStorage* toRowStorage();          //!< create a new row cs storage from current cs storage
83 
84 
85     //! remove rows from matrix
86     template<typename T>
87     void deleteRowsT(std::vector<number_t>&, std::vector<number_t>&, number_t&, number_t&, number_t, number_t, std::vector<T>&);
88     //! remove cols from matrix
89     template<typename T>
90     void deleteColsT(std::vector<number_t>&, std::vector<number_t>&, number_t&, number_t&, number_t, number_t, std::vector<T>&);
91 
92     //skyline conversion utilities
93     template<typename Iterator, typename IteratorS>
94     void fillSkylineDiagonalPart(Iterator&, IteratorS&) const;  //!< copy diagonal part
95     template<typename Iterator, typename IteratorS>
96     void fillSkylineTriangularPart(const std::vector<number_t>&, const std::vector<number_t>&,
97                                    Iterator&, IteratorS&) const; //!< copy triangular (lower or upper) part as a skyline storage
98 
99     // print utilites for child class
100 
101     template<typename Iterator>
102     void printEntriesAll(StrucType, Iterator&, const std::vector<number_t>&, const std::vector<number_t>&,
103                          number_t, number_t, number_t, const string_t&, number_t, std::ostream&) const; //!< print matrix in row/col compressed storage
104     template<typename Iterator>
105     void printEntriesTriangularPart(StrucType, Iterator&, Iterator&, const std::vector<number_t>&,
106                                     const std::vector<number_t>&, number_t, number_t, number_t,
107                                     const string_t&, number_t, std::ostream&) const;              //!< print lower/upper part of matrix in sym/dual compressed storage
108     template<typename Iterator>
109     void printCooTriangularPart(std::ostream&, Iterator&, const std::vector<number_t>&,
110                                 const std::vector<number_t>&, bool, SymType sym = _noSymmetry) const; //!< print lower/upper part in coordinate format
111     template<typename Iterator>
112     void printCooDiagonalPart(std::ostream&, Iterator&, number_t) const;                            //!< print diagonal part in coordinate format
113 
114     //! load cs matrix from dense file
115     template <typename T>
116     void loadCsFromFileDense(std::istream&, std::vector<T>&, std::vector<number_t>&,
117                              std::vector<number_t>&, SymType, bool);
118     //! load cs matrix from coordinate file
119     template <typename T>
120     void loadCsFromFileCoo(std::istream&, std::vector<T>&, std::vector<number_t>&,
121                            std::vector<number_t>&, SymType, bool);
122     //! load cs matrix from dense file
123     template <typename T>
124     void loadCsFromFileDense(std::istream&, std::vector<T>&, std::vector<number_t>&,
125                              std::vector<number_t>&, std::vector<number_t>&, std::vector<number_t>&, SymType, bool);
126     //! load cs matrix from coordinate file
127     template <typename T>
128     void loadCsFromFileCoo(std::istream&, std::vector<T>&, std::vector<number_t>&, std::vector<number_t>&,
129                            std::vector<number_t>&, std::vector<number_t>&, SymType, bool);
130 
131     // product of any part of Cs matrix and vector common to different Cs storages (child classes)
132     //! diag matrix * vector
133     template<typename MatIterator, typename VecIterator, typename ResIterator>
134     void diagonalMatrixVector(MatIterator&, VecIterator&, ResIterator&, ResIterator&) const;
135     //! vector * diag matrix
136     template<typename MatIterator, typename VecIterator, typename ResIterator>
137     void diagonalVectorMatrix(MatIterator&, VecIterator&, ResIterator&, ResIterator&) const;
138     //! lower part * vector
139     template<typename MatIterator, typename VecIterator, typename ResIterator>
140     void lowerMatrixVector(const std::vector<number_t>& index, const std::vector<number_t>& pointer,
141                            MatIterator& itm, VecIterator& itvb, ResIterator& itrb, SymType sym = _noSymmetry) const;
142 
143 
144     //! vector * lower part
145     template<typename MatIterator, typename VecIterator, typename ResIterator>
146     void lowerVectorMatrix(const std::vector<number_t>& index, const std::vector<number_t>& pointer,
147                            MatIterator& itm, VecIterator& itvb, ResIterator& itrb, SymType sym = _noSymmetry) const;
148     //! upper part * vector
149     template<typename MatIterator, typename VecIterator, typename ResIterator>
150     void upperMatrixVector(const std::vector<number_t>& index, const std::vector<number_t>& pointer,
151                            MatIterator& itm, VecIterator& itvb, ResIterator& itrb, SymType sym = _noSymmetry) const;
152     //! vector * upper part
153     template<typename MatIterator, typename VecIterator, typename ResIterator>
154     void upperVectorMatrix(const std::vector<number_t>& index, const std::vector<number_t>& pointer,
155                            MatIterator& itm, VecIterator& itvb, ResIterator& itrb, SymType sym = _noSymmetry) const;
156     //! row matrix * vector
157     template<typename MatIterator, typename VecIterator, typename ResIterator>
158     void rowMatrixVector(const std::vector<number_t>&, const std::vector<number_t>&, MatIterator&, VecIterator&, ResIterator&)  const;
159     //! vector * row matrix
160     template<typename MatIterator, typename VecIterator, typename ResIterator>
161     void rowVectorMatrix(const std::vector<number_t>&, const std::vector<number_t>&, MatIterator&, VecIterator&, ResIterator&)  const;
162     //! col matrix * vector
163     template<typename MatIterator, typename VecIterator, typename ResIterator>
164     void columnMatrixVector(const std::vector<number_t>&, const std::vector<number_t>&, MatIterator&, VecIterator&, ResIterator&)  const;
165     //! vector * col matrix
166     template<typename MatIterator, typename VecIterator, typename ResIterator>
167     void columnVectorMatrix(const std::vector<number_t>&, const std::vector<number_t>&, MatIterator&, VecIterator&, ResIterator&)  const;
168 
169     template<typename MatIterator, typename VecIterator, typename ResIterator>
170     void bzSorDiagonalMatrixVector(MatIterator& itd, const VecIterator& itvb, ResIterator& itrb, const real_t w) const;
171 
172     template<typename MatIterator, typename VecIterator, typename XIterator>
173     void bzSorLowerSolver(const MatIterator&, const MatIterator&, VecIterator&, XIterator&, XIterator&, const std::vector<number_t>&, const std::vector<number_t>&, const real_t) const;
174 
175     template<typename MatIterator, typename VecIterator, typename XIterator>
176     void bzLowerD1Solver(const MatIterator&, const VecIterator&, const XIterator&, const XIterator&, const std::vector<number_t>&, const std::vector<number_t>&) const;
177 
178     template<typename MatIterator, typename VecIterator, typename XIterator>
179     void bzSorDiagonalSolver(const MatIterator& itdb, VecIterator& itbb, XIterator& itxb, XIterator& itxe, const real_t w) const;
180 
181     template<typename MatRevIterator, typename VecRevIterator, typename XRevIterator>
182     void bzSorUpperSolver(const MatRevIterator&, const MatRevIterator&, VecRevIterator&, XRevIterator&, XRevIterator&, const std::vector<number_t>&,
183                           const std::vector<number_t>&, const real_t, SymType =_noSymmetry) const;
184 
185     template<typename MatRevIterator, typename VecRevIterator, typename XRevIterator>
186     void bzSorUpperD1Solver(const MatRevIterator&, const MatRevIterator&, VecRevIterator&, XRevIterator&, XRevIterator&, const std::vector<number_t>&,
187                             const std::vector<number_t>&, const real_t, SymType =_noSymmetry) const;
188 
189     template<typename MatRevIterator, typename VecRevIterator, typename XRevIterator>
190     void bzUpperD1Solver(const MatRevIterator&, const MatRevIterator&, VecRevIterator&, XRevIterator&, XRevIterator&, const std::vector<number_t>&,
191                          const std::vector<number_t>&, const real_t, SymType =_noSymmetry) const;
192     //! sum of any part of dense matrix and dense matrix
193     template<typename Mat1Iterator, typename Mat2Iterator, typename ResIterator>
194     void sumMatrixMatrix(Mat1Iterator&, Mat2Iterator&, ResIterator&, ResIterator&) const;
195 
196     //! extract index thread for block of column or row
197     void extractThreadIndex(const std::vector<number_t>& pointer, const std::vector<number_t>& index,
198                             number_t& numThread,
199                             std::vector<std::vector<number_t>::const_iterator>& itLowerThread,
200                             std::vector<std::vector<number_t>::const_iterator>& itUpperThread) const;
201 
202 #ifdef XLIFEPP_WITH_OMP
203     template<typename MatIterator, typename VecIterator, typename ResIterator>
204     void parallelRowMatrixVector(const std::vector<number_t>& index, const std::vector<number_t>& pointer,
205                                  MatIterator& itm, VecIterator& itvb, ResIterator& itrb,
206                                  number_t numThread,
207                                  std::vector<std::vector<number_t>::const_iterator>& itThreadLower,
208                                  std::vector<std::vector<number_t>::const_iterator>& itThreadUpper) const;
209 
210     template<typename MatIterator, typename VecIterator, typename ResIterator>
211     void parallelColumnMatrixVector(const std::vector<number_t>& index, const std::vector<number_t>& pointer,
212                                     MatIterator& itm, VecIterator& itvb, ResIterator& itrb,
213                                     number_t numThread,
214                                     std::vector<std::vector<number_t>::const_iterator>& itThreadLower,
215                                     std::vector<std::vector<number_t>::const_iterator>& itThreadUpper,
216                                     number_t numRes) const;
217 
218     template<typename MatIterator, typename VecIterator, typename ResIterator>
219     void parallelLowerMatrixVector(const std::vector<number_t>& index, const std::vector<number_t>& pointer,
220                                    MatIterator& itm, VecIterator& itvb, ResIterator& itrb,
221                                    number_t numThread,
222                                    std::vector<std::vector<number_t>::const_iterator>& itThreadLower,
223                                    std::vector<std::vector<number_t>::const_iterator>& itThreadUpper,
224                                    SymType sym) const;
225 
226     template<typename MatIterator, typename VecIterator, typename ResIterator>
227     void parallelUpperMatrixVector(const std::vector<number_t>& index, const std::vector<number_t>& pointer,
228                                    MatIterator& itm, VecIterator& itvb, ResIterator& itrb,
229                                    number_t numThread,
230                                    std::vector<std::vector<number_t>::const_iterator>& itThreadLower,
231                                    std::vector<std::vector<number_t>::const_iterator>& itThreadUpper,
232                                    number_t numRes,
233                                    SymType sym) const;
234 #endif
235 };
236 
237 
238 std::vector<number_t> skylinePointer(const std::vector<number_t>&, const std::vector<number_t>&); //!< get the skyline pointers from cs pointers
239 
240 inline
smallPivot(real_t piv)241 bool smallPivot(real_t piv)
242 {
243   return (piv<= theZeroThreshold);
244 }
245 inline
smallPivot(complex_t piv)246 bool smallPivot(complex_t piv)
247 {
248   return (std::abs(piv)<= theZeroThreshold);
249 }
250 
251 /*
252 --------------------------------------------------------------------------------
253   Template functions
254 --------------------------------------------------------------------------------
255 */
256 
257 /*--------------------------------------------------------------------------------
258  template constructor from indices list
259 --------------------------------------------------------------------------------*/
260 // construct row (resp. col) storage vectors (index and pointer)  from the indices of columns (resp rows)
261 // stored in a vector<L > (rcIndex), called by CsStorage child class
262 // L if template class supporting ::const_iterator and size() member function either vector<number_t>, set<number_t>, list<number_t>, ...
263 //
264 // note : rcIndex starts at 1 !!!
265 template<class L>
buildCsStorage(const std::vector<L> & rcIndex,std::vector<number_t> & index,std::vector<number_t> & pointer)266 void CsStorage::buildCsStorage(const std::vector<L>& rcIndex, std::vector<number_t>& index, std::vector<number_t>& pointer)
267 {
268   trace_p->push("CsStorage::buildCsStorage");
269   //update row/colPointer_
270   typename std::vector<L>::const_iterator itvs;
271   number_t nnz = 0;
272   pointer.resize(rcIndex.size() + 1,0);    //+1 to store the size of stored values (more convenient)
273   std::vector<number_t>::iterator itvn = pointer.begin();
274   for(itvs = rcIndex.begin(); itvs != rcIndex.end(); ++itvs, ++itvn)
275     {
276       *itvn = nnz;
277       nnz += itvs->size();
278     }
279   *itvn = nnz; // last of rowPointers_ is the number of non zeros
280   // update col/rowIndex_
281   index.resize(nnz);
282   itvn = index.begin();
283   typename L::const_iterator its;
284   for(itvs = rcIndex.begin(); itvs != rcIndex.end(); ++itvs)
285     for(its = itvs->begin(); its != itvs->end(); ++its, ++itvn) { *itvn = *its -1; }
286   // end of construction
287   trace_p->pop();
288 }
289 
290 /*--------------------------------------------------------------------------------
291  template delete Row/Col operations
292 --------------------------------------------------------------------------------*/
293 
294 // template delete rows/cols rc1,..., rc2, works for ColCsStorage and RowCsStorage
295 template<typename T>
deleteRowsT(std::vector<number_t> & crPointer,std::vector<number_t> & rcIndex,number_t & nbcr,number_t & nbrc,number_t rc1in,number_t rc2in,std::vector<T> & v)296 void CsStorage::deleteRowsT(std::vector<number_t>& crPointer, std::vector<number_t>& rcIndex,
297                             number_t& nbcr, number_t& nbrc, number_t rc1in, number_t rc2in, std::vector<T>& v)
298 {
299   number_t rc1=std::min(std::max(rc1in,number_t(1)),nbrc);  //to prevent oversized indices
300   number_t rc2=std::min(std::max(rc2in,number_t(1)),nbrc);
301   if(rc2<rc1) return; //nothing to remove
302 
303   //recreate all storage pointer (expansive)
304   std::vector<number_t>::iterator itr=rcIndex.begin();
305   std::vector<number_t>::iterator itp=crPointer.begin();
306   std::vector<std::vector<number_t> > rowcols(nbcr);
307   std::vector<std::vector<number_t> >::iterator itrc=rowcols.begin();
308   typename std::vector<T>::iterator itv=v.begin()+1;
309   typename std::vector<T>::iterator itnv=itv;
310   number_t s=rc2-rc1;
311   for(number_t c=0; c<nbcr; c++, itp++, itrc++)
312     {
313       number_t l=*(itp+1) -*itp;
314       std::vector<number_t>& colrow=*itrc;
315       for(number_t k=0; k<l; k++, itr++, itv++)
316         {
317           number_t rc = *itr + 1;
318           if(rc<rc1)
319             {
320               colrow.push_back(*itr + 1);
321               *itnv = *itv; itnv++;
322             }
323           if(rc>rc2)
324             {
325               colrow.push_back(*itr - s);
326               *itnv = *itv; itnv++;
327             }
328         }
329     }
330   std::vector<number_t> newrcIndex, newcrPointer;
331   buildCsStorage(rowcols, newrcIndex, newcrPointer); //construct new rowIndex and colPointer
332   //update storage
333   crPointer=newcrPointer;
334   rcIndex=newrcIndex;
335   nbrc -= rc2-rc1+1;
336   v.resize(crPointer[nbcr]+1);
337 }
338 
339 // template delete cols/rows cr1,..., cr2, works for ColCsStorage and RowCsStorage
340 template<typename T>
deleteColsT(std::vector<number_t> & crPointer,std::vector<number_t> & rcIndex,number_t & nbcr,number_t & nbrc,number_t cr1in,number_t cr2in,std::vector<T> & v)341 void CsStorage::deleteColsT(std::vector<number_t>& crPointer, std::vector<number_t>& rcIndex,
342                             number_t& nbcr, number_t& nbrc, number_t cr1in, number_t cr2in, std::vector<T>& v)
343 {
344 
345   number_t cr1=std::min(std::max(cr1in,number_t(1)),nbcr);  //to prevent oversized indices
346   number_t cr2=std::min(std::max(cr2in,number_t(1)),nbcr);
347   if(cr2<cr1) return;
348 
349   if(cr2==nbcr) //remove last columns/rows (simple case)
350     {
351       if(cr1<=1)  //remove all ?
352         {
353           warning("free_warning","CsStorage::deleteColsT : removing all columns of a matrix !?");
354           v.resize(1);
355           crPointer.resize(1,0);
356           rcIndex.clear();
357           nbcr=0;
358           return;
359         }
360       //remove last columns from c1 to nbcols
361       crPointer.resize(cr1);
362       number_t last=crPointer[cr1-1];
363       rcIndex.resize(last);
364       v.resize(last+1);
365       nbcr=cr1-1;
366       return;
367     }
368   //remove internal columns
369   number_t nbk=cr2-cr1+1; //nb of cols/rows to remove
370   number_t ad1=crPointer[cr1-1], ad2=crPointer[cr2], ade=crPointer[nbcr];
371   number_t nbt=ad2-ad1; //nb of terms to remove
372   std::vector<number_t>::iterator itr1=rcIndex.begin()+ad1, itr2=rcIndex.begin()+ad2;
373   typename std::vector<T>::iterator itv1=v.begin()+ad1+1, itv2=v.begin()+ad2+1;
374   for(number_t k=ad2; k<ade; k++, itr1++, itr2++, itv1++, itv2++)
375     {
376       *itv1 = *itv2;
377       *itr1 = *itr2;
378     }
379   std::vector<number_t>::iterator itcp=crPointer.begin()+cr2+1;
380   for(; itcp!=crPointer.end(); itcp++) *(itcp - nbk) = *itcp -nbt;
381   nbcr-=nbk;
382   crPointer.resize(nbcr+1);
383   number_t last=crPointer[nbcr];
384   rcIndex.resize(last);
385   v.resize(last+1);
386 }
387 
388 /*--------------------------------------------------------------------------------
389  template fill a skyline storage
390 --------------------------------------------------------------------------------*/
391 
392 template<typename Iterator, typename IteratorS>
fillSkylineDiagonalPart(Iterator & it,IteratorS & its) const393 void  CsStorage::fillSkylineDiagonalPart(Iterator& it, IteratorS& its) const
394 {
395   for(number_t i = 0; i < diagonalSize(); i++, its++, it++) *its = *it;
396 }
397 
398 /*! fill a skyline storage (triangular part) from a cs storage
399   it is assumed that :
400   - column/row indexes are ordered in cs storage
401   - the entries of skyline storage are initialized to 0
402   Remark : this function does not required the skyline storage, it is built by an other function
403 
404   rcPointer : row or column pointers of cs storage
405   crIndex : column or row indexes of cs storage
406   it  : iterator on cs storage entries
407   its : iterator on skyline storage entries
408 */
409 
410 template<typename Iterator, typename IteratorS>
fillSkylineTriangularPart(const std::vector<number_t> & rcPointer,const std::vector<number_t> & crIndex,Iterator & it,IteratorS & its) const411 void  CsStorage::fillSkylineTriangularPart(const std::vector<number_t>& rcPointer, const std::vector<number_t>& crIndex,
412     Iterator& it, IteratorS& its) const
413 {
414   std::vector<number_t>::const_iterator itr, itc = crIndex.begin();//, ite = crIndex.end();
415   number_t i = 0;
416   for(itr = rcPointer.begin(); itr != rcPointer.end() - 1; itr++, i++)
417     {
418       number_t l = *(itr + 1) - *itr;
419       if(l > 0)
420         {
421           number_t cmin = *itc;
422           for(number_t k = 0; k < l; k++, itc++, it++)
423             {
424               *(its + (*itc - cmin)) = *it;
425             }
426           its += i - cmin;
427         }
428     }
429 }
430 
431 /*--------------------------------------------------------------------------------
432  template print utilities
433 --------------------------------------------------------------------------------*/
434 // print matrix of scalars for row or cs strorage
435 template<typename Iterator>
printEntriesAll(StrucType st,Iterator & itm,const std::vector<number_t> & index,const std::vector<number_t> & pointer,number_t perRow,number_t width,number_t prec,const string_t & rowOrcol,number_t vb,std::ostream & os) const436 void CsStorage::printEntriesAll(StrucType st, Iterator& itm, const std::vector<number_t>& index, const std::vector<number_t>& pointer,
437                                 number_t perRow, number_t width, number_t prec, const string_t& rowOrcol, number_t vb, std::ostream& os) const
438 {
439   number_t rcmin = std::min(vb, number_t(pointer.size()) - 1); // number of rows or cols to print
440   string_t f = "firste";
441   if(rcmin > 1) { f = "firstes"; }
442   os << "(" << words(f) << " " << rcmin << " " << words(rowOrcol) << "s.)";
443   os.setf(std::ios::scientific);
444   string_t colOrrow;
445   if(rowOrcol == "row") { colOrrow = "col"; }
446   else { colOrrow = "row"; }
447   for(number_t rc = 0; rc < rcmin; rc++)
448     {
449       number_t nnz = pointer[rc + 1] - pointer[rc]; // number of non zeros on row/col rc
450       os << eol << "  " << words(rowOrcol) << " " << rc + 1 << " (" << nnz;
451       if(nnz == 0) { os << words("no entry") << " )"; }
452       if(nnz > 1) { os << " " << words("entries") << ", " << words(colOrrow) << " :"; }
453       else        { os << " " << words("entry") << ", " << words(colOrrow) << " :"; }
454       if(nnz > 0)
455         {
456           for(number_t i = pointer[rc]; i < pointer[rc + 1]; i++)  { os << " " << index[i] + 1; }
457           os << ")";
458           if(st == _scalar) { printRowWise(os, "   ", perRow, width, prec, itm, itm + nnz); }
459           else for(Iterator it = itm; it < itm + nnz; it++) { os << *it; }
460           itm += nnz;
461         }
462     }
463   os.unsetf(std::ios::scientific);
464   os << eol;
465 }
466 
467 /*! print matrix of scalars for sym or dual cs strorage
468     itd   : iterator on matrix diagonal entries
469     itlu  : iterator on matrix lower or upper triangular part entries
470     index : numbers of cols (resp. rows) on rows (resp. cols)
471     pointer : adresses of the beginning of rows (resp. cols)
472 */
473 template<typename Iterator>
printEntriesTriangularPart(StrucType st,Iterator & itd,Iterator & itlu,const std::vector<number_t> & index,const std::vector<number_t> & pointer,number_t perRow,number_t width,number_t prec,const string_t & rowOrcol,number_t vb,std::ostream & os) const474 void CsStorage::printEntriesTriangularPart(StrucType st, Iterator& itd, Iterator& itlu, const std::vector<number_t>& index, const std::vector<number_t>& pointer, number_t perRow, number_t width, number_t prec, const string_t& rowOrcol, number_t vb, std::ostream& os) const
475 {
476   number_t rcmin = std::min(vb, number_t(pointer.size()) - 1); // number of rows or cols to print
477   string_t f = "firste";
478   if(rcmin > 1) { f = "firstes"; }
479   os << "(" << words(f) << " " << rcmin << " " << words(rowOrcol) << "s.)";
480   os.setf(std::ios::scientific);
481   string_t colOrrow;
482   if(rowOrcol == "row") { colOrrow = "col"; }
483   else { colOrrow = "row"; }
484   for(number_t rc = 0; rc < rcmin; rc++)
485     {
486       number_t nnz = pointer[rc + 1] - pointer[rc]; // number of non zeros on row/col rc
487       os << eol << "  " << words(rowOrcol) << " " << rc + 1;
488       if(nnz == 0)  // only diagonal coef.
489         {
490           os << " (1 " << words("entry") << ", " << words(colOrrow) << " : " << rc + 1 << ")";
491           if(st == _scalar) { printRowWise(os, "   ", perRow - 1, width, prec, itd, itd + 1); }
492           else { os << *itd; }
493           itd++;
494         }
495       if(nnz > 0)   // non zero coefs. and diagonal coef.
496         {
497           os << " (" << nnz + 1 << " " << words("entries") << ", " << words(colOrrow) << " : ";
498           for(number_t i = pointer[rc]; i < pointer[rc + 1]; i++)  { os << " " << index[i] + 1; }
499           os << " " << (rc + 1) << ")";
500           if(st == _scalar)
501             {
502               printRowWise(os, "   ", perRow - 1, width, prec, itlu, itlu + nnz);
503               os << std::setw(width) << std::setprecision(prec) << *itd++;
504             }
505           else
506             {
507               for(Iterator it = itlu; it < itlu + nnz; it++) { os << *it; }
508               os << *itd++;
509             }
510           itlu += nnz;
511         }
512     }
513   os.unsetf(std::ios::scientific);
514   os << eol;
515 }
516 
517 /*!
518  print "triangular" part of matrix to ostream in coordinate form  i j A_ij :
519     1 1 A11
520     1 2 A12
521     ...
522  a null value or a value less than a tolerance in module is not displayed
523  The coordinates are not ordered
524 */
525 template<typename Iterator>
printCooTriangularPart(std::ostream & os,Iterator & itm,const std::vector<number_t> & index,const std::vector<number_t> & pointer,bool byRow,SymType sym) const526 void CsStorage::printCooTriangularPart(std::ostream& os, Iterator& itm, const std::vector<number_t>& index, const std::vector<number_t>& pointer, bool byRow, SymType sym) const
527 {
528   std::vector<number_t>::const_iterator itr = pointer.begin();
529   std::vector<number_t>::const_iterator itc = index.begin();
530   number_t k, l, nbr = pointer.size() - 1;
531 
532   for(number_t i = 1; i <= nbr; i++, itr++)      // loop on rows/cols
533     {
534       number_t nnz = *(itr + 1) - *itr;
535       for(number_t j = 0; j < nnz; j++, itc++, itm++) // loop on col/row indices
536         {
537           if(byRow) {k = i; l = *itc + 1;}
538           else {k = *itc + 1; l = i;}
539           switch(sym)
540             {
541               case _skewSymmetric : printCoo(os, -*itm, k, l, 0.); break;
542               case _skewAdjoint   : printCoo(os, -conj(*itm), k, l, 0.); break;
543               case _selfAdjoint   : printCoo(os, conj(*itm), k, l, 0.); break;
544               default             : printCoo(os, *itm, k, l, 0.);
545             }
546         }
547     }
548 }
549 
550 template<typename Iterator>
printCooDiagonalPart(std::ostream & os,Iterator & itm,number_t n) const551 void CsStorage::printCooDiagonalPart(std::ostream& os, Iterator& itm, number_t n) const
552 {
553   for(number_t i = 1; i <= n; i++, itm++)   { printCoo(os, *itm, i, i, 0.); }
554 }
555 
556 
557 /*--------------------------------------------------------------------------------
558  template load from file and save to file utilities
559 --------------------------------------------------------------------------------*/
560 
561 /*! load matrix from file including a dense matrix
562     A11 A12  ...                            Re(A11) Im(A11) Re(A12) IM(A12) ....
563     A21 A22  ...    or if complex values    Re(A21) Im(A21) Re(A22) IM(A22) ....
564     ...                                     ...
565  values in module less than the tolerance are considered to be outside the storage
566  note : algorithms use pos(i,j) function; they are not optimal!
567  arguments :
568    ifs       : istream for reading
569    mat       : values of matrix
570    crIndex   : col or row index of non zero coefficients
571    rcPointer : row or col pointer in crIndex
572    sym       : symmetry property of the matrix (_noSymmetry, _symmetric, ...)
573    realAsCmplx : if true, read a real and cast it to complex
574  call only for row or col compressed storage
575 */
576 
577 template <typename T>
loadCsFromFileDense(std::istream & ifs,std::vector<T> & mat,std::vector<number_t> & crIndex,std::vector<number_t> & rcPointer,SymType sym,bool realAsCmplx)578 void CsStorage::loadCsFromFileDense(std::istream& ifs, std::vector<T>& mat, std::vector<number_t>& crIndex, std::vector<number_t>& rcPointer, SymType sym, bool realAsCmplx)
579 {
580   trace_p->push("CsStorage::loadCsFromFileDense");
581   if(accessType_ != _row && accessType_ != _col) { error("storage_not_handled", words("storage type",_cs), words("access type",accessType_)); }
582 
583   // read the file and construct the colomn or row indices of "non zeros"
584   std::vector<std::vector<number_t> > index;
585   std::vector<std::vector<T> > values;
586   if(accessType_ == _row)
587     {index.resize(nbRows_); values.resize(nbRows_);}
588   else
589     {index.resize(nbCols_); values.resize(nbCols_);}
590 
591   for(number_t i = 0; i < nbRows_; i++)
592     for(number_t j = 0; j < nbCols_; j++)
593       {
594         T v; readItem(ifs, v, realAsCmplx);
595         if(std::abs(v) > theTolerance)
596           {
597             if(accessType_ == _row)
598               {index[i].push_back(j+1); values[i].push_back(v);}
599             else
600               {index[j].push_back(i+1); values[j].push_back(v);}
601           }
602       }
603   // construct compressed storage vectors
604   buildCsStorage(index, crIndex, rcPointer);
605   mat.resize(size() + 1);
606   // assign values in "matrix"
607   typename std::vector<T>::iterator itv;
608   std::vector<number_t>::iterator iti;
609   for(number_t i = 0; i < nbRows_; i++)
610     {
611       itv = values[i].begin();
612       for(iti = index[i].begin(); iti != index[i].end(); iti++, itv++)
613         if(sym == _noSymmetry || (sym != _noSymmetry && i >= *iti))
614           {
615             if(accessType_ == _row) { mat[pos(i + 1, *iti)] = *itv; }
616             else { mat[pos(*iti, i+1)] = *itv; }
617           }
618     }
619   trace_p->pop();
620 }
621 
622 /*!
623  load matrix from file including a matrix in coordinate format
624     i j Aij      or if complex values    i j Re(Aij) Im(Aij)
625     ...                                     ...
626  contrary to dense matrix file, zero values are kept in compressed storage
627  note : algorithms use pos(i,j) function; they are not optimal!
628  */
629 template <typename T>
loadCsFromFileCoo(std::istream & ifs,std::vector<T> & mat,std::vector<number_t> & crIndex,std::vector<number_t> & rcPointer,SymType sym,bool realAsCmplx)630 void CsStorage::loadCsFromFileCoo(std::istream& ifs, std::vector<T>& mat, std::vector<number_t>& crIndex, std::vector<number_t>& rcPointer, SymType sym, bool realAsCmplx)
631 {
632   trace_p->push("CsStorage::loadCsFromFileCoo");
633   if(accessType_ != _row && accessType_ != _col) { error("storage_not_handled", words("storage type",_cs), words("access type",accessType_)); }
634 
635   number_t i, j, nbRows_ = 0, nbCols_ = 0;
636   std::map<std::pair<number_t, number_t>, T> values;         // to store matrix values with coordinates access
637   while(!ifs.eof())
638     {
639       ifs >> i >> j;
640       T v; readItem(ifs, v, realAsCmplx);
641       if(i > nbRows_) { nbRows_ = i; }
642       if(j > nbCols_) { nbCols_ = j; }
643       values[std::pair<number_t, number_t>(i, j)] = v;
644     }
645   // build storage
646   std::vector < std::vector<number_t> > index; // colum/row indices by row/col
647   typename std::map<std::pair<number_t, number_t>, T>::iterator itm;
648   if(accessType_ == _row)
649     {
650       index.resize(nbRows_);
651       for(itm = values.begin(); itm != values.end(); itm++)
652         { index[itm->first.first - 1].push_back(itm->first.second); }
653     }
654   else
655     {
656       index.resize(nbCols_);
657       for(itm = values.begin(); itm != values.end(); itm++)
658         { index[itm->first.second - 1].push_back(itm->first.first); }
659     }
660   buildCsStorage(index, crIndex, rcPointer);
661   mat.resize(size() + 1);
662   // assign matrix values
663   for(itm = values.begin(); itm != values.end(); itm++)
664     {
665       i = itm->first.first;
666       j = itm->first.second;
667       if(sym == _noSymmetry || (sym != _noSymmetry && i >= j)) { mat[pos(i, j)] = itm->second; }
668     }
669   trace_p->pop();
670 }
671 
672 // call only for dual or sym compressed storage
673 template <typename T>
loadCsFromFileDense(std::istream & ifs,std::vector<T> & mat,std::vector<number_t> & colIndex,std::vector<number_t> & rowPointer,std::vector<number_t> & rowIndex,std::vector<number_t> & colPointer,SymType sym,bool realAsCmplx)674 void CsStorage::loadCsFromFileDense(std::istream& ifs, std::vector<T>& mat, std::vector<number_t>& colIndex, std::vector<number_t>& rowPointer, std::vector<number_t>& rowIndex, std::vector<number_t>& colPointer, SymType sym, bool realAsCmplx)
675 {
676   trace_p->push("CsStorage::loadCsFromFileDense");
677   if(accessType_ != _dual && accessType_ != _sym) { error("storage_not_handled", words("storage type",_cs), words("access type",accessType_)); }
678 
679   // read the file and construct the column and row indices of "non zeros"
680   std::vector<std::vector<number_t> > cIndex, rIndex;
681   std::vector<std::vector<T> > lowValues, upValues;
682   std::vector<T> diagValues;
683   cIndex.resize(nbRows_); lowValues.resize(nbRows_);
684   diagValues.resize(diagonalSize());
685   bool upper = (accessType_ == _dual) || (accessType_ == _sym && sym == _noSymmetry);
686   if(upper) {rIndex.resize(nbCols_); upValues.resize(nbCols_);}
687 
688   for(number_t i = 0; i < nbRows_; i++)
689     for(number_t j = 0; j < nbCols_; j++)
690       {
691         T v; readItem(ifs, v, realAsCmplx);
692         if(std::abs(v) > theTolerance)
693           {
694             if(i > j) {cIndex[i].push_back(j+1); lowValues[i].push_back(v);}
695             if(i == j) { diagValues[i] = v; }
696             if(i < j && upper) {rIndex[j].push_back(i+1); upValues[j].push_back(v);}
697           }
698       }
699   // construct compressed storage vectors
700   buildCsStorage(cIndex, colIndex, rowPointer);             // lower triangular part
701   if(upper) { buildCsStorage(rIndex, rowIndex, colPointer); }     // upper triangular part
702   if(upper) { mat.resize(diagonalSize() + lowerPartSize() + upperPartSize() + 1); }
703   else      { mat.resize(diagonalSize() + lowerPartSize() + 1); }
704 
705   // load values in "matrix"
706   mat[0] = T();
707   typename std::vector<T>::iterator itm = mat.begin() + 1;
708   typename std::vector<std::vector<T> >::iterator itvv;
709   typename std::vector<T>::iterator itv;
710   std::vector<std::vector<number_t> >::iterator itrc;
711   std::vector<number_t>::iterator iti;
712   for(itv = diagValues.begin(); itv != diagValues.end(); itv++, itm++) { *itm = *itv; } // diagonal part
713   itvv = lowValues.begin();
714 
715   number_t i = 1;
716   for(itrc = cIndex.begin(); itrc != cIndex.end(); itrc++, itvv++, i++)      // lower triangular part
717     {
718       itv = itvv->begin();
719       for(iti = itrc->begin(); iti != itrc->end(); iti++, itv++)  { mat[pos(i, *iti)] = *itv; }
720     }
721 
722   if(upper)  // upper triangular part
723     {
724       itvv = upValues.begin();
725       number_t i = 1;
726       for(itrc = rIndex.begin(); itrc != rIndex.end(); itrc++, itvv++, i++)
727         {
728           itv = itvv->begin();
729           for(iti = itrc->begin(); iti != itrc->end(); iti++, itv++)  { mat[pos(*iti, i)] = *itv; }
730         }
731     }
732 
733   trace_p->pop();
734 }
735 
736 /*!
737  load matrix from file including a matrix in coordinate format
738     i j Aij      or if complex values    i j Re(Aij) Im(Aij)
739     ...                                     ...
740  contrary to dense matrix file, zero values are kept in compressed storage
741  note : algorithm use pos(i,j) function; it is not optimal!
742 */
743 template <typename T>
loadCsFromFileCoo(std::istream & ifs,std::vector<T> & mat,std::vector<number_t> & colIndex,std::vector<number_t> & rowPointer,std::vector<number_t> & rowIndex,std::vector<number_t> & colPointer,SymType sym,bool realAsCmplx)744 void CsStorage::loadCsFromFileCoo(std::istream& ifs, std::vector<T>& mat, std::vector<number_t>& colIndex, std::vector<number_t>& rowPointer, std::vector<number_t>& rowIndex, std::vector<number_t>& colPointer, SymType sym, bool realAsCmplx)
745 {
746   trace_p->push("CsStorage::loadFromFileCoo");
747   if(accessType_ != _dual && accessType_ != _sym) { error("storage_not_handled", words("storage type",_cs), words("access type",accessType_)); }
748 
749   // read values and store them in a map indexed by coordinates matrix
750   number_t i, j, nbRows_ = 0, nbCols_ = 0;
751   std::map<std::pair<number_t, number_t>, T> values;         // to store matrix values with coordinates access
752   while(!ifs.eof())
753     {
754       ifs >> i >> j;
755       T v; readItem(ifs, v, realAsCmplx);
756       if(i > nbRows_) { nbRows_ = i; }
757       if(j > nbCols_) { nbCols_ = j; }
758       values[std::pair<number_t, number_t>(i, j)] = v;
759     }
760 
761   // build storage
762   bool upper = (accessType_ == _dual) || (accessType_ == _sym && sym == _noSymmetry);
763   std::vector < std::vector<number_t> > cIndex, rIndex; //colum and row indices by row/col
764   typename std::map<std::pair<number_t, number_t>, T>::iterator itmv;
765   cIndex.resize(nbRows_);
766   if(upper) { rIndex.resize(nbCols_); }
767   for(itmv = values.begin(); itmv != values.end(); itmv++)
768     {
769       number_t i = itmv->first.first, j = itmv->first.second;
770       if(i > j) { cIndex[i - 1].push_back(j); }
771       if(i < j && upper) { rIndex[j - 1].push_back(i); }
772     }
773   buildCsStorage(cIndex, colIndex, rowPointer);
774   if(accessType_ == _dual) { buildCsStorage(rIndex, rowIndex, colPointer); }
775   if(upper) { mat.resize(diagonalSize() + lowerPartSize() + upperPartSize() + 1); }
776   else      { mat.resize(diagonalSize() + lowerPartSize() + 1); }
777 
778   // assign matrix values
779   for(itmv = values.begin(); itmv != values.end(); itmv++)
780     {
781       i = itmv->first.first;
782       j = itmv->first.second;
783       if((i >= j) || (upper && j > i)) { mat[pos(i, j)] = itmv->second; }
784     }
785 
786   trace_p->pop();
787 }
788 
789 /*--------------------------------------------------------------------------------
790    template Matrix x Vector partial multiplications used in child classes
791  --------------------------------------------------------------------------------*/
792 /*!template Matrix x Vector partial multiplications used in child classes
793    CAUTION ! We use non-commutative * operations here as we may deal
794              with overloaded matrix-vector left or right products
795    in functions, iterators have the following meaning
796      itd  : iterator on matrix diagonal entries (begin)
797      itm  : iterator on matrix entries (lower "triangular" part)
798      itvb : iterator on given vector entries (begin)
799      itve : iterator on given vector entries (end)
800      itrb : iterator on result vector entries (begin)
801      itre : iterator on result vector entries (end)
802      index : col or row indices of non zeros coefs
803      pointer : adresses in index of beginning of rows or cols
804    Result vector res *** NEED NOT BE INITIALIZED HERE ***
805 */
806 
807 // partial multiplication by diagonal M*v, matrix need not be square
808 // should be called first in a product matrix*vector (result vector initialisation)
809 template<typename MatIterator, typename VecIterator, typename ResIterator>
diagonalMatrixVector(MatIterator & itd,VecIterator & itvb,ResIterator & itrb,ResIterator & itre) const810 void CsStorage::diagonalMatrixVector(MatIterator& itd, VecIterator& itvb, ResIterator& itrb, ResIterator& itre) const
811 {
812 #ifdef XLIFEPP_WITH_OMP
813   MatIterator itm = itd;
814   VecIterator itv = itvb;
815   ResIterator itr;
816   #pragma omp parallel default(none) \
817   private(itm, itv, itr) \
818   shared(itvb, itd, itrb, itre)
819   {
820     number_t distance = 0;
821     #pragma omp for nowait
822     for(itr = itrb; itr < (itrb + diagonalSize()); ++itr)
823       {
824         distance = itr - itrb;
825         itm = itd  + distance;
826         itv = itvb + distance;
827         *itr = *itm * *itv;
828       }
829 
830     #pragma omp for nowait
831     for(itr = itrb + diagonalSize(); itr < itre; ++itr) { *itr *= 0; }     // *itr=0 : ambiguous resolution in Vector<K>::operator = ?!
832   }
833 #else
834   VecIterator itv = itvb;
835   ResIterator itr;
836   for(itr = itrb; itr != itrb + diagonalSize(); ++itr, ++itd, ++itv) { *itr = *itd * *itv; }
837   for(itr = itrb + diagonalSize(); itr != itre; ++itr) { *itr *= 0; }     // *itr=0 : ambiguous resolution in Vector<K>::operator = ?!
838 #endif
839 }
840 
841 // partial multiplication by diagonal v*M, matrix need not be square
842 template<typename MatIterator, typename VecIterator, typename ResIterator>
diagonalVectorMatrix(MatIterator & itd,VecIterator & itvb,ResIterator & itrb,ResIterator & itre) const843 void CsStorage::diagonalVectorMatrix(MatIterator& itd, VecIterator& itvb, ResIterator& itrb, ResIterator& itre) const
844 {
845 #ifdef XLIFEPP_WITH_OMP
846   MatIterator itm = itd;
847   VecIterator itv = itvb;
848   ResIterator itr;
849   #pragma omp parallel default(none) \
850   private(itm, itv, itr) \
851   shared(itvb, itd, itrb, itre)
852   {
853     number_t distance = 0;
854     #pragma omp for nowait
855     for(itr = itrb; itr < (itrb + diagonalSize()); ++itr)
856       {
857         distance = itr - itrb;
858         itm = itd  + distance;
859         itv = itvb + distance;
860         *itr = *itm * *itv;
861       }
862 
863     #pragma omp for nowait
864     for(itr = itrb + diagonalSize(); itr < itre; ++itr) { *itr *= 0; }     // *itr=0 : ambiguous resolution in Vector<K>::operator = ?!
865   }
866 #else
867   VecIterator itv = itvb;
868   ResIterator itr;
869   for(itr = itrb; itr != itrb + diagonalSize(); ++itr, ++itd, ++itv) { *itr = *itv * *itd; }
870   for(itr = itrb + diagonalSize(); itr != itre; ++itr) { *itr *= 0; }     // *itr=0 : ambiguous resolution in Vector<K>::operator = ?!
871 #endif
872 }
873 
874 // partial multiplication by lower triangular part
875 // also used in multiplication of upper part of transposed matrix by vector
876 // should be called after Diagonal multiplication
877 template<typename MatIterator, typename VecIterator, typename ResIterator>
lowerMatrixVector(const std::vector<number_t> & index,const std::vector<number_t> & pointer,MatIterator & itm,VecIterator & itvb,ResIterator & itrb,SymType sym) const878 void CsStorage::lowerMatrixVector(const std::vector<number_t>& index, const std::vector<number_t>& pointer,
879                                   MatIterator& itm, VecIterator& itvb, ResIterator& itrb, SymType sym) const
880 {
881 #ifdef XLIFEPP_WITH_OMP
882   // Get the number of thread to execute code
883   number_t numThread = numberOfThreads();
884   const number_t GRANULARITY = 16;
885   numThread *= GRANULARITY;
886   // Lower and upper bound position for each thread
887   std::vector<std::vector<number_t>::const_iterator> itThreadLower(numThread);
888   std::vector<std::vector<number_t>::const_iterator> itThreadUpper(numThread);
889   extractThreadIndex(pointer, index, numThread, itThreadLower, itThreadUpper);
890   parallelLowerMatrixVector(index, pointer, itm, itvb, itrb, numThread, itThreadLower, itThreadUpper, sym);
891 
892 #else
893   std::vector<number_t>::const_iterator itpb = pointer.begin(), itpe = pointer.end() - 1, itp;
894   std::vector<number_t>::const_iterator iti = index.begin(), itie;
895   ResIterator itr = itrb;
896   switch(sym)
897     {
898       case _skewSymmetric:
899         for(itp = itpb; itp != itpe; ++itp, ++itr)
900           {
901             itie = index.begin() + *(itp + 1);
902             while(iti != itie) {*itr -= *itm * *(itvb + *iti); ++iti; ++itm;}
903           }
904         break;
905       case _selfAdjoint:
906         for(itp = itpb; itp != itpe; ++itp, ++itr)
907           {
908             itie = index.begin() + *(itp + 1);
909             while(iti != itie) {*itr += conj(*itm) * *(itvb + *iti); ++iti; ++itm;}
910           }
911         break;
912       case _skewAdjoint:
913         for(itp = itpb; itp != itpe; ++itp, ++itr)
914           {
915             itie = index.begin() + *(itp + 1);
916             while(iti != itie) {*itr -= conj(*itm) * *(itvb + *iti); ++iti; ++itm;}
917           }
918         break;
919       default: // symmetric matrix
920         for(itp = itpb; itp != itpe; ++itp, ++itr)
921           {
922             itie = index.begin() + *(itp + 1);
923             while(iti != itie) {*itr += *itm * *(itvb + *iti); ++iti; ++itm;}
924           }
925     }
926 #endif
927 }
928 
929 template<typename MatIterator, typename VecIterator, typename ResIterator>
lowerVectorMatrix(const std::vector<number_t> & index,const std::vector<number_t> & pointer,MatIterator & itm,VecIterator & itvb,ResIterator & itrb,SymType sym) const930 void CsStorage::lowerVectorMatrix(const std::vector<number_t>& index, const std::vector<number_t>& pointer,
931                                   MatIterator& itm, VecIterator& itvb, ResIterator& itrb, SymType sym) const
932 {
933 #ifdef XLIFEPP_WITH_OMP
934   // Get the number of thread to execute code
935   number_t numThread = numberOfThreads();
936   const number_t GRANULARITY = 4;
937   numThread *= GRANULARITY;
938   // Lower and upper bound position for each thread
939   std::vector<std::vector<number_t>::const_iterator> itThreadLower(numThread);
940   std::vector<std::vector<number_t>::const_iterator> itThreadUpper(numThread);
941   extractThreadIndex(pointer, index, numThread, itThreadLower, itThreadUpper);
942   parallelUpperMatrixVector(index, pointer, itm, itvb, itrb, numThread, itThreadLower, itThreadUpper, MatrixStorage::nbCols_, sym);
943 #else
944   std::vector<number_t>::const_iterator itpb = pointer.begin(), itpe = pointer.end() - 1, itp;
945   std::vector<number_t>::const_iterator iti = index.begin(), itie;
946   VecIterator itv = itvb;
947   switch(sym)
948     {
949       case _skewSymmetric:
950         for(itp = itpb; itp != itpe; ++itp, ++itv)
951           {
952             itie = index.begin() + *(itp + 1);
953             while(iti != itie) {*(itrb + *iti) -= *itv * *itm; ++iti; ++itm;}
954           }
955         break;
956       case _selfAdjoint:
957         for(itp = itpb; itp != itpe; ++itp, ++itv)
958           {
959             itie = index.begin() + *(itp + 1);
960             while(iti != itie) {*(itrb + *iti) += *itv * conj(*itm); ++iti; ++itm;}
961           }
962         break;
963       case _skewAdjoint:
964         for(itp = itpb; itp != itpe; ++itp, ++itv)
965           {
966             itie = index.begin() + *(itp + 1);
967             while(iti != itie) {*(itrb + *iti) -= *itv * conj(*itm); ++iti; ++itm;}
968           }
969         break;
970       default: //symmetric matrix
971         for(itp = itpb; itp != itpe; ++itp, ++itv)
972           {
973             itie = index.begin() + *(itp + 1);
974             while(iti != itie) {*(itrb + *iti) += *itv * *itm; ++iti; ++itm;}
975           }
976         break;
977     }
978 #endif
979 }
980 
981 // partial multiplication by upper triangular part
982 // also used in multiplication of upper part of tranposed matrix by vector
983 // should be called after Diagonal multiplication
984 // note : for block matrix, (Matrix<Matrix>) a symmetry property means a symmetry property for the full matrix not for the block matrix
985 //        thus the product of the upper part has to be "transposed" because Aji=tAij
986 template<typename MatIterator, typename VecIterator, typename ResIterator>
upperMatrixVector(const std::vector<number_t> & index,const std::vector<number_t> & pointer,MatIterator & itm,VecIterator & itvb,ResIterator & itrb,SymType sym) const987 void CsStorage::upperMatrixVector(const std::vector<number_t>& index, const std::vector<number_t>& pointer,
988                                   MatIterator& itm, VecIterator& itvb, ResIterator& itrb, SymType sym) const
989 {
990 #ifdef XLIFEPP_WITH_OMP
991   // Get the number of thread to execute code
992   number_t numThread = numberOfThreads();
993   const number_t GRANULARITY = 4 ;//16;
994   numThread *= GRANULARITY;
995   // Lower and upper bound position for each thread
996   std::vector<std::vector<number_t>::const_iterator> itThreadLower(numThread);
997   std::vector<std::vector<number_t>::const_iterator> itThreadUpper(numThread);
998   extractThreadIndex(pointer, index, numThread, itThreadLower, itThreadUpper);
999   parallelUpperMatrixVector(index, pointer, itm, itvb, itrb, numThread, itThreadLower, itThreadUpper, MatrixStorage::nbRows_, sym);
1000 #else
1001   std::vector<number_t>::const_iterator itpb = pointer.begin(), itpe = pointer.end() - 1, itp;
1002   std::vector<number_t>::const_iterator iti = index.begin(), itie;
1003   VecIterator itv = itvb;
1004 
1005   switch(sym)
1006     {
1007       case _skewSymmetric:
1008         for(itp = itpb; itp != itpe; ++itp, ++itv)
1009           {
1010             itie = index.begin() + *(itp + 1);
1011             //while(iti != itie) {*(itrb + *iti) -= *itm * *itv; iti++; itm++;}
1012             while(iti != itie) {*(itrb + *iti) -= *itv * *itm ; ++iti; ++itm;} //modif to deal with block matrix which is skew-symmetric, not block skew-symmetric
1013           }
1014         break;
1015       case _selfAdjoint:
1016         for(itp = itpb; itp != itpe; ++itp, ++itv)
1017           {
1018             itie = index.begin() + *(itp + 1);
1019             //while(iti != itie) {*(itrb + *iti) += conj(*itm) * *itv; iti++; itm++;}
1020             while(iti != itie) {*(itrb + *iti) += *itv * conj(*itm) ; ++iti; ++itm;} //modif to deal with block matrix which is selfadjoint, not block selfadjoint
1021           }
1022         break;
1023       case _skewAdjoint:
1024         for(itp = itpb; itp != itpe; ++itp, ++itv)
1025           {
1026             itie = index.begin() + *(itp + 1);
1027             //while(iti != itie) {*(itrb + *iti) -= conj(*itm) * *itv; iti++; itm++;}
1028             while(iti != itie) {*(itrb + *iti) -= *itv * conj(*itm) ; ++iti; ++itm;} //modif to deal with block matrix which is skewadjoint, not block skewadjointwhile
1029 
1030           }
1031         break;
1032       case _symmetric :
1033         for(itp = itpb; itp != itpe; ++itp, ++itv)
1034           {
1035             itie = index.begin() + *(itp + 1);
1036             //while(iti != itie) { *(itrb + *iti) += *itm * *itv; iti++; itm++;}
1037             while(iti != itie) { *(itrb + *iti) +=  *itv * *itm; ++iti; ++itm;}  //modif to deal with block matrix which is symmetric, not block symmetric
1038           }
1039         break;
1040       default: // no symmetry
1041         for(itp = itpb; itp != itpe; ++itp, ++itv)
1042           {
1043             itie = index.begin() + *(itp + 1);
1044             while(iti != itie) { *(itrb + *iti) += *itm * *itv; ++iti; ++itm;}
1045 
1046           }
1047     }
1048 #endif
1049 }
1050 
1051 // note : for block matrix, (Matrix<Matrix>) a symmetry property means a symmetry property for the full matrix not for the block matrix
1052 //        thus the product of the upper part has to be "transposed" because Aji=tAij
1053 template<typename MatIterator, typename VecIterator, typename ResIterator>
upperVectorMatrix(const std::vector<number_t> & index,const std::vector<number_t> & pointer,MatIterator & itm,VecIterator & itvb,ResIterator & itrb,SymType sym) const1054 void CsStorage::upperVectorMatrix(const std::vector<number_t>& index, const std::vector<number_t>& pointer,
1055                                   MatIterator& itm, VecIterator& itvb, ResIterator& itrb, SymType sym) const
1056 {
1057 #ifdef XLIFEPP_WITH_OMP
1058   // Get the number of thread to execute code
1059   number_t numThread = numberOfThreads();
1060   number_t GRANULARITY = 4;
1061   numThread *= GRANULARITY;
1062   // Lower and upper bound position for each thread
1063   std::vector<std::vector<number_t>::const_iterator> itThreadLower(numThread);
1064   std::vector<std::vector<number_t>::const_iterator> itThreadUpper(numThread);
1065   extractThreadIndex(pointer, index, numThread, itThreadLower, itThreadUpper);
1066   parallelLowerMatrixVector(index, pointer, itm, itvb, itrb, numThread, itThreadLower, itThreadUpper, sym);
1067 
1068 #else
1069   std::vector<number_t>::const_iterator itpb = pointer.begin(), itpe = pointer.end() - 1, itp;
1070   std::vector<number_t>::const_iterator iti = index.begin(), itie;
1071   ResIterator itr = itrb;
1072   switch(sym)
1073     {
1074       case _skewSymmetric:
1075         for(itp = itpb; itp != itpe; ++itp, ++itr)
1076           {
1077             itie = index.begin() + *(itp + 1);
1078             //while(iti != itie) {*itr -= *(itvb + *iti) * *itm ; iti++; itm++;}
1079             while(iti != itie) {*itr -= *itm * *(itvb + *iti) ; ++iti; ++itm;}
1080           }
1081         break;
1082       case _selfAdjoint:
1083         for(itp = itpb; itp != itpe; ++itp, ++itr)
1084           {
1085             itie = index.begin() + *(itp + 1);
1086             //while(iti != itie) {*itr += *(itvb + *iti) * conj(*itm); iti++; itm++;}
1087             while(iti != itie) {*itr += conj(*itm) * *(itvb + *iti); ++iti; ++itm;}
1088           }
1089         break;
1090       case _skewAdjoint:
1091         for(itp = itpb; itp != itpe; ++itp, ++itr)
1092           {
1093             itie = index.begin() + *(itp + 1);
1094             //while(iti != itie) {*itr -= *(itvb + *iti) * conj(*itm); iti++; itm++;}
1095             while(iti != itie) {*itr -= conj(*itm) * *(itvb + *iti); ++iti; ++itm;}
1096           }
1097         break;
1098       case _symmetric:
1099         for(itp = itpb; itp != itpe; ++itp, ++itr)
1100           {
1101             itie = index.begin() + *(itp + 1);
1102             //while(iti != itie) {*itr += *(itvb + *iti) * *itm; iti++; itm++;}
1103             while(iti != itie) {*itr += *itm * *(itvb + *iti); ++iti; ++itm;}
1104           }
1105         break;
1106       default: // no symmetry
1107         for(itp = itpb; itp != itpe; ++itp, ++itr)
1108           {
1109             itie = index.begin() + *(itp + 1);
1110             while(iti != itie) {*itr += *(itvb + *iti) * *itm; ++iti; ++itm;}
1111           }
1112     }
1113 #endif
1114 }
1115 
1116 // multiplication M x V of Cs row-major access matrix M and vector V
1117 // also used in multiplication of column-major transposed matrix by vector
1118 template<typename MatIterator, typename VecIterator, typename ResIterator>
rowMatrixVector(const std::vector<number_t> & index,const std::vector<number_t> & pointer,MatIterator & itm,VecIterator & itvb,ResIterator & itrb) const1119 void CsStorage::rowMatrixVector(const std::vector<number_t>& index, const std::vector<number_t>& pointer, MatIterator& itm, VecIterator& itvb, ResIterator& itrb)  const
1120 {
1121 #ifdef XLIFEPP_WITH_OMP
1122   // Get the number of thread to execute code
1123   number_t numThread = numberOfThreads();
1124   const number_t GRANULARITY = 16;
1125   numThread *= GRANULARITY;
1126   // Lower and upper bound position for each thread
1127   std::vector<std::vector<number_t>::const_iterator> itThreadLower(numThread);
1128   std::vector<std::vector<number_t>::const_iterator> itThreadUpper(numThread);
1129   extractThreadIndex(pointer, index, numThread, itThreadLower, itThreadUpper);
1130   parallelRowMatrixVector(index, pointer, itm, itvb, itrb, numThread, itThreadLower, itThreadUpper);
1131 
1132 #else
1133   std::vector<number_t>::const_iterator itpb = pointer.begin(), itpe = pointer.end() - 1, itp;
1134   std::vector<number_t>::const_iterator iti = index.begin(), itie;
1135   ResIterator itr = itrb;
1136   for(itp = itpb; itp != itpe; ++itr, ++itp)
1137     {
1138       *itr *= 0;                      // *itr=0 : ambiguous resolution in Vector<K>::operator = ?!
1139       itie = index.begin() + *(itp + 1);
1140       while(iti != itie) {*itr += *itm * *(itvb + *iti); ++iti; ++itm;}
1141     }
1142 #endif
1143 }
1144 
1145 // multiplication V x M of vector V and Cs row-major access matrix M
1146 template<typename MatIterator, typename VecIterator, typename ResIterator>
rowVectorMatrix(const std::vector<number_t> & index,const std::vector<number_t> & pointer,MatIterator & itm,VecIterator & itvb,ResIterator & itrb) const1147 void CsStorage::rowVectorMatrix(const std::vector<number_t>& index, const std::vector<number_t>& pointer, MatIterator& itm, VecIterator& itvb, ResIterator& itrb)  const
1148 {
1149 #ifdef XLIFEPP_WITH_OMP
1150   // Get the number of thread to execute code
1151   number_t numThread = numberOfThreads();
1152   const number_t GRANULARITY = 4;
1153   numThread *= GRANULARITY;
1154 
1155   // Lower and upper bound position for each thread
1156   std::vector<std::vector<number_t>::const_iterator> itThreadLower(numThread);
1157   std::vector<std::vector<number_t>::const_iterator> itThreadUpper(numThread);
1158   extractThreadIndex(pointer, index, numThread, itThreadLower, itThreadUpper);
1159   parallelColumnMatrixVector(index, pointer, itm, itvb, itrb, numThread, itThreadLower, itThreadUpper, MatrixStorage::nbCols_);
1160 
1161 #else
1162   std::vector<number_t>::const_iterator itpb = pointer.begin(), itpe = pointer.end() - 1, itp;
1163   std::vector<number_t>::const_iterator iti = index.begin(), itie;
1164   ResIterator itr = itrb;
1165   for(itp = itpb; itp != itpe; ++itr, ++itp) { *itr *= 0; }     // *itr=0 : ambiguous resolution in Vector<K>::operator = ?!
1166   number_t r = 0;
1167   itr = itrb;
1168   for(itp = itpb; itp != itpe; ++itr, ++r, ++itp)
1169     {
1170       itie = index.begin() + *(itp + 1);
1171       while(iti != itie) {*(itrb + *iti) += *(itvb + r) * *itm ; ++iti; ++itm;}
1172     }
1173 #endif
1174 }
1175 
1176 // multiplication M x V of Cs column-major access matrix M and vector V
1177 // also used in multiplication of row-major transposed matrix by vector
1178 template<typename MatIterator, typename VecIterator, typename ResIterator>
columnMatrixVector(const std::vector<number_t> & index,const std::vector<number_t> & pointer,MatIterator & itm,VecIterator & itvb,ResIterator & itrb) const1179 void CsStorage::columnMatrixVector(const std::vector<number_t>& index, const std::vector<number_t>& pointer, MatIterator& itm, VecIterator& itvb, ResIterator& itrb) const
1180 {
1181 #ifdef XLIFEPP_WITH_OMP
1182   // Get the number of thread to execute code
1183   number_t numThread = numberOfThreads();
1184   const number_t GRANULARITY = 4 ;//16;
1185   numThread *= GRANULARITY;
1186   // Lower and upper bound position for each thread
1187   std::vector<std::vector<number_t>::const_iterator> itThreadLower(numThread);
1188   std::vector<std::vector<number_t>::const_iterator> itThreadUpper(numThread);
1189   extractThreadIndex(pointer, index, numThread, itThreadLower, itThreadUpper);
1190   parallelColumnMatrixVector(index, pointer, itm, itvb, itrb, numThread, itThreadLower, itThreadUpper, MatrixStorage::nbRows_);
1191 
1192 #else
1193 
1194   std::vector<number_t>::const_iterator itpb = pointer.begin(), itpe = pointer.end() - 1, itp;
1195   std::vector<number_t>::const_iterator iti = index.begin(), itie;
1196   VecIterator itv = itvb;
1197   ResIterator itr = itrb;
1198   for(itp = itpb; itp != itpe; ++itr, ++itp) { *itr *= 0; }     // *itr=0 : ambiguous resolution in Vector<K>::operator = ?!
1199   itr = itrb;
1200   for(itp = itpb; itp != itpe; ++itv, ++itp)
1201     {
1202       itie = index.begin() + *(itp + 1);
1203       while(iti != itie) {*(itrb + *iti) += *itm * *itv; ++iti; ++itm;}
1204     }
1205 #endif
1206 }
1207 
1208 
1209 // multiplication V x M of vector V and Cs column-major access matrix M
1210 template<typename MatIterator, typename VecIterator, typename ResIterator>
columnVectorMatrix(const std::vector<number_t> & index,const std::vector<number_t> & pointer,MatIterator & itm,VecIterator & itvb,ResIterator & itrb) const1211 void CsStorage::columnVectorMatrix(const std::vector<number_t>& index, const std::vector<number_t>& pointer, MatIterator& itm, VecIterator& itvb, ResIterator& itrb) const
1212 {
1213 #ifdef XLIFEPP_WITH_OMP
1214   // Get the number of thread to execute code
1215   number_t numThread = numberOfThreads();
1216   const number_t GRANULARITY = 16;
1217   numThread *= GRANULARITY;
1218   // Lower and upper bound position for each thread
1219   std::vector<std::vector<number_t>::const_iterator> itThreadLower(numThread);
1220   std::vector<std::vector<number_t>::const_iterator> itThreadUpper(numThread);
1221   extractThreadIndex(pointer, index, numThread, itThreadLower, itThreadUpper);
1222   parallelRowMatrixVector(index, pointer, itm, itvb, itrb, numThread, itThreadLower, itThreadUpper);
1223 
1224 #else
1225   std::vector<number_t>::const_iterator itpb = pointer.begin(), itpe = pointer.end() - 1, itp;
1226   std::vector<number_t>::const_iterator iti = index.begin(), itie;
1227   ResIterator itr = itrb;
1228   for(itp = itpb; itp != itpe; ++itr, ++itp)
1229     {
1230       *itr *= 0;                     // *itr=0 : ambiguous resolution in Vector<K>::operator = ?!
1231       itie = index.begin() + *(itp + 1);
1232       while(iti != itie) {*itr += *(itvb + *iti) * *itm ; ++iti; ++itm;}
1233     }
1234 #endif
1235 }
1236 
1237 /*!
1238    special template solvers (used for SSOR solvers for Symm & Dual Cs Storages)
1239     *** Matrix is assumed INVERTIBLE here ***, no check on division by zero is done
1240     *** SOR-like w*D v diagonal matrix x vector multiplication
1241 */
1242 template<typename MatIterator, typename VecIterator, typename ResIterator>
bzSorDiagonalMatrixVector(MatIterator & itd,const VecIterator & itvb,ResIterator & itrb,const real_t w) const1243 void CsStorage::bzSorDiagonalMatrixVector(MatIterator& itd, const VecIterator& itvb, ResIterator& itrb, const real_t w) const
1244 {
1245   VecIterator itv = itvb;
1246   for(ResIterator itr = itrb; itr != itrb + diagonalSize(); ++itr, ++itd, ++itv)
1247     { *itr = w * (*itd * *itv); }
1248 }
1249 
1250 /*!
1251    SOR lower triangular part (D/w+L) x = b solver
1252    \param itdb is a (forward) iterator on matrix diagonal entries (D)
1253    \param itlb is a (forward) iterator on matrix strict lower triangular part entries (L)
1254    \param itbb is a (forward) iterator on right-hand side vector entries (b)
1255    \param itxb,itxe is a (forward) begin (resp. end) iterator on solution vector entries (x)
1256    \param index
1257    \param pointer
1258    \param w w coefficient
1259 */
1260 template<typename MatIterator, typename VecIterator, typename XIterator>
bzSorLowerSolver(const MatIterator & itdb,const MatIterator & itlb,VecIterator & itbb,XIterator & itxb,XIterator & itxe,const std::vector<number_t> & index,const std::vector<number_t> & pointer,const real_t w) const1261 void CsStorage::bzSorLowerSolver(const MatIterator& itdb, const MatIterator& itlb, VecIterator& itbb, XIterator& itxb, XIterator& itxe, const std::vector<number_t>& index, const std::vector<number_t>& pointer, const real_t w) const
1262 {
1263   std::vector<number_t>::const_iterator itpb = pointer.begin(), itpe = pointer.end() - 1, itp;
1264   std::vector<number_t>::const_iterator iti = index.begin(), itie;
1265 
1266   MatIterator itd=itdb, itl=itlb;
1267   VecIterator itb = itbb;
1268   XIterator itx = itxb;
1269 
1270   for(itp = itpb; itp != itpe; ++itp, ++itx)
1271     {
1272       *itx = *itb++;
1273 
1274       itie = index.begin() + *(itp + 1);
1275       while(iti != itie) {*itx -= *itl * *(itxb + *iti); ++iti; ++itl; }
1276       *itx *= w / *itd++;
1277     }
1278 }
1279 
1280 /*!
1281    lower triangular part L x = b solver with diagonal of L=1
1282    \param itlb is a (forward) iterator on matrix strict lower triangular part entries (L)
1283    \param itbb is a (forward) iterator on right-hand side vector entries (b)
1284    \param itxb,itxe is a (forward) begin (resp. end) iterator on solution vector entries (x)
1285    \param index
1286    \param pointer
1287 */
1288 template<typename MatIterator, typename VecIterator, typename XIterator>
bzLowerD1Solver(const MatIterator & itlb,const VecIterator & itbb,const XIterator & itxb,const XIterator & itxe,const std::vector<number_t> & index,const std::vector<number_t> & pointer) const1289 void CsStorage::bzLowerD1Solver(const MatIterator& itlb, const VecIterator& itbb, const XIterator& itxb, const XIterator& itxe, const std::vector<number_t>& index, const std::vector<number_t>& pointer) const
1290 {
1291   std::vector<number_t>::const_iterator itpb = pointer.begin(), itpe = pointer.end() - 1, itp;
1292   std::vector<number_t>::const_iterator iti = index.begin(), itie;
1293 
1294   MatIterator itl=itlb;
1295   VecIterator itb = itbb;
1296   XIterator itx = itxb;
1297 
1298   for(itp = itpb; itp != itpe; ++itp, ++itx)
1299     {
1300       *itx = *itb++;
1301 
1302       itie = index.begin() + *(itp + 1);
1303       while(iti != itie) {*itx -= *itl * *(itxb + *iti); ++iti; ++itl; }
1304     }
1305 }
1306 
1307 /*!
1308     SOR-like diagonal D/w x = b solver
1309     \param itdb is a (forward) iterator on matrix diagonal entries (D)
1310     \param itbb is a (forward) iterator on right hand side vector b entries
1311     \param itxb, itxe is a (forward) iterator on solution vector x entries
1312     \param w w coefficient
1313 */
1314 template<typename MatIterator, typename VecIterator, typename XIterator>
bzSorDiagonalSolver(const MatIterator & itdb,VecIterator & itbb,XIterator & itxb,XIterator & itxe,const real_t w) const1315 void CsStorage::bzSorDiagonalSolver(const MatIterator& itdb, VecIterator& itbb, XIterator& itxb, XIterator& itxe, const real_t w) const
1316 {
1317   MatIterator itd=itdb;
1318   VecIterator itb=itbb;
1319   if(w!=1)
1320     {
1321       for(XIterator itx = itxb; itx != itxe; ++itx, ++itb, ++itd)
1322         *itx = w * *itb / *itd;
1323     }
1324   else
1325     {
1326       for(XIterator itx = itxb; itx != itxe; ++itx, ++itb, ++itd)
1327         *itx = *itb / *itd;
1328     }
1329 }
1330 
1331 /*!
1332    SOR upper triangular part (D/w+U) x = b solver
1333    *** LO AND BEHOLD *** we are using reverse_iterator syntax here ***
1334    *** loops are thus reversed from end to begin (or rather rbegin to rend)
1335        even though iterators are "++"ed
1336 
1337    \param itrdb is a (backward) iterator on matrix diagonal entries (D)
1338      starting on matrix last diagonal entry (m.rbegin)
1339    \param itrub is a (backward) iterator on matrix strict upper triangular part entries (U)
1340      starting on matrix last entry
1341    \param itrbb is a reverse iterator "b.rbegin" iterator on right hand side vector (b) entries
1342      starting on vector last entry
1343    \param itrxb,itrxe is a reverse iterator "x.rbegin" (resp. "x.rend") iterator on
1344      solution vector (x) entries starting on vector last entry
1345    \param index
1346    \param pointer
1347    \param w w coefficient
1348    \param sym : one of _noSymmetry, _symmetric, _skewSymmetric, _selfAdjoint, _skewAdjoint
1349 */
1350 template<typename MatRevIterator, typename VecRevIterator, typename XRevIterator>
bzSorUpperSolver(const MatRevIterator & itrdb,const MatRevIterator & itrub,VecRevIterator & itrbb,XRevIterator & itrxb,XRevIterator & itrxe,const std::vector<number_t> & index,const std::vector<number_t> & pointer,const real_t w,SymType sym) const1351 void CsStorage::bzSorUpperSolver(const MatRevIterator& itrdb, const MatRevIterator& itrub, VecRevIterator& itrbb, XRevIterator& itrxb, XRevIterator& itrxe,
1352                                  const std::vector<number_t>& index, const std::vector<number_t>& pointer, const real_t w, SymType sym) const
1353 {
1354   std::vector<number_t>::const_reverse_iterator itrpb = pointer.rbegin(), itrpe = pointer.rend() - 1, itrp;
1355   std::vector<number_t>::const_reverse_iterator itri = index.rbegin(), itrie = index.rbegin();
1356 
1357   MatRevIterator itrd=itrdb, tiru=itrub;
1358   VecRevIterator itrb = itrbb;
1359   XRevIterator itrx = itrxb;
1360 
1361   for(itrx = itrxb; itrx != itrxe; ++itrx) { *itrx = *itrb++; }
1362 
1363   itrx = itrxb;
1364   for(itrp = itrpb; itrp != itrpe; ++itrp, ++itrx)
1365     {
1366       *itrx *= w / *itrd++;
1367       itrie += *(itrp) - *(itrp + 1);
1368       switch(sym)
1369         {
1370           case _skewSymmetric :
1371             while(itri != itrie) {*(itrxe - *itri - 1) += *tiru * *(itrx); ++itri; ++tiru; }
1372             break;
1373           case _selfAdjoint :
1374             while(itri != itrie) {*(itrxe - *itri - 1) -= conj(*tiru) * *(itrx); ++itri; ++tiru; }
1375             break;
1376           case _skewAdjoint :
1377             while(itri != itrie) {*(itrxe - *itri - 1) += conj(*tiru) * *(itrx); ++itri; ++tiru; }
1378             break;
1379           default :
1380             while(itri != itrie) {*(itrxe - *itri - 1) -= *tiru * *(itrx); ++itri; ++tiru; }
1381         }
1382       itri = itrie;
1383     }
1384 }
1385 
1386 /*!
1387    SOR upper triangular part I+U) x = b solver
1388    *** LO AND BEHOLD *** we are using reverse_iterator syntax here ***
1389    *** loops are thus reversed from end to begin (or rather rbegin to rend)
1390        even though iterators are "++"ed
1391 
1392    \param itrdb is a (backward) iterator on matrix diagonal entries (D)
1393      starting on matrix last diagonal entry (m.rbegin)
1394    \param itrub is a (backward) iterator on matrix strict upper triangular part entries (U)
1395      starting on matrix last entry
1396    \param itrbb is a reverse iterator "b.rbegin" iterator on right hand side vector (b) entries
1397      starting on vector last entry
1398    \param itrxb,itrxe is a reverse iterator "x.rbegin" (resp. "x.rend") iterator on
1399      solution vector (x) entries starting on vector last entry
1400    \param index
1401    \param pointer
1402    \param w w coefficient
1403    \param sym : one of _noSymmetry, _symmetric, _skewSymmetric, _selfAdjoint, _skewAdjoint
1404 */
1405 template<typename MatRevIterator, typename VecRevIterator, typename XRevIterator>
bzUpperD1Solver(const MatRevIterator & itrdb,const MatRevIterator & itrub,VecRevIterator & itrbb,XRevIterator & itrxb,XRevIterator & itrxe,const std::vector<number_t> & index,const std::vector<number_t> & pointer,const real_t w,SymType sym) const1406 void CsStorage::bzUpperD1Solver(const MatRevIterator& itrdb, const MatRevIterator& itrub, VecRevIterator& itrbb, XRevIterator& itrxb, XRevIterator& itrxe,  const std::vector<number_t>& index, const std::vector<number_t>& pointer, const real_t w, SymType sym) const
1407 {
1408   std::vector<number_t>::const_reverse_iterator itrpb = pointer.rbegin(), itrpe = pointer.rend() - 1, itrp;
1409   std::vector<number_t>::const_reverse_iterator itri = index.rbegin(), itrie = index.rbegin();
1410 
1411   MatRevIterator tiru=itrub;
1412   VecRevIterator itrb = itrbb;
1413   XRevIterator itrx = itrxb;
1414 
1415   for(itrx = itrxb; itrx != itrxe; ++itrx) { *itrx = *itrb++; }
1416 
1417   itrx = itrxb;
1418   for(itrp = itrpb; itrp != itrpe; ++itrp, ++itrx)
1419     {
1420       *itrx *=1.;// w / *itrd++;
1421       itrie += *(itrp) - *(itrp + 1);
1422       switch(sym)
1423         {
1424           case _skewSymmetric :
1425             while(itri != itrie) {*(itrxe - *itri - 1) += *tiru * *(itrx); ++itri; ++tiru; }
1426             break;
1427           case _selfAdjoint :
1428             while(itri != itrie) {*(itrxe - *itri - 1) -= conj(*tiru) * *(itrx); ++itri; ++tiru; }
1429             break;
1430           case _skewAdjoint :
1431             while(itri != itrie) {*(itrxe - *itri - 1) += conj(*tiru) * *(itrx); ++itri; ++tiru; }
1432             break;
1433           default :
1434             while(itri != itrie) {*(itrxe - *itri - 1) -= *tiru * *(itrx); ++itri; ++tiru; }
1435         }
1436       itri = itrie;
1437     }
1438 }
1439 
1440 template<typename Mat1Iterator, typename Mat2Iterator, typename ResIterator>
sumMatrixMatrix(Mat1Iterator & itm1,Mat2Iterator & itm2,ResIterator & itrb,ResIterator & itre) const1441 void CsStorage::sumMatrixMatrix(Mat1Iterator& itm1, Mat2Iterator& itm2, ResIterator& itrb, ResIterator& itre) const
1442 {
1443   for(ResIterator itr = itrb; itr != itre; ++itr)
1444     {
1445       *itr = *itm1 + *itm2;
1446     }
1447 }
1448 
1449 #ifdef XLIFEPP_WITH_OMP
1450 /*!
1451    Parallelize column matrix-vector multiplication
1452        (as well as row vector-matrix multiplication)
1453 
1454    \param [in] index vector column index
1455    \param [in] pointer vector row index
1456    \param [in] itm iterator pointing to first non zero element in the part
1457    \param [in] itvb iterator pointing to first element of source vector
1458    \param [in,out] itrb iterator pointing to first element of vector result
1459    \param [in] numThread number of thread. It's not necessary to be the same number of OpenMp thread
1460                    it can be even more depending on the GRANULARITY factor
1461    \param [in] itThreadLower vector containing lower bound of index for each thread
1462    \param [in] itThreadUpper vector containing upper bound of index for each thread
1463 */
1464 template<typename MatIterator, typename VecIterator, typename ResIterator>
parallelRowMatrixVector(const std::vector<number_t> & index,const std::vector<number_t> & pointer,MatIterator & itm,VecIterator & itvb,ResIterator & itrb,number_t numThread,std::vector<std::vector<number_t>::const_iterator> & itThreadLower,std::vector<std::vector<number_t>::const_iterator> & itThreadUpper) const1465 void CsStorage::parallelRowMatrixVector(const std::vector<number_t>& index, const std::vector<number_t>& pointer,
1466                                         MatIterator& itm, VecIterator& itvb, ResIterator& itrb,
1467                                         number_t numThread,
1468                                         std::vector<std::vector<number_t>::const_iterator>& itThreadLower,
1469                                         std::vector<std::vector<number_t>::const_iterator>& itThreadUpper) const
1470 {
1471   std::vector<number_t>::const_iterator itpb = pointer.begin(), itpe = pointer.end()-1;
1472   std::vector<number_t>::const_iterator itib = index.begin(), itie, iti = itib;
1473   std::vector<number_t>::const_iterator itp = itpb, itpLower, itpUpper;
1474   ResIterator itr = itrb;
1475   MatIterator itim = itm;
1476 
1477   std::vector<std::vector<number_t>::const_iterator>::const_iterator itbLower, itbUpper;
1478   itbLower = itThreadLower.begin();
1479   itbUpper = itThreadUpper.begin();
1480   number_t i = 0;
1481 
1482   #pragma omp parallel for default(none)\
1483   private(i, itr, iti, itie, itim, itpLower, itpUpper) \
1484   shared(numThread, itbLower, itbUpper, itpb, itib, itm, itvb, itrb) \
1485   schedule(dynamic,1)
1486   for(i = 0; i < numThread; i++)
1487     {
1488       itpLower = *(itbLower+i);
1489       itpUpper = *(itbUpper+i);
1490       for(; itpLower != itpUpper; ++itpLower)
1491         {
1492           itr = itrb + (itpLower - itpb);
1493           *itr *= 0;
1494           iti  = itib + *(itpLower);
1495           itie = itib + *(itpLower + 1);
1496           itim = itm + *(itpLower);
1497 
1498           while(iti != itie)
1499             {
1500               *itr += *(itim) * *(itvb + *iti); ++iti; ++itim;
1501             }
1502         }
1503     }
1504 }
1505 
1506 /*!
1507    Parallelize column matrix-vector multiplication
1508        (as well as row vector-matrix multiplication)
1509 
1510    \param [in] index vector column index
1511    \param [in] pointer vector row index
1512    \param [in] itm iterator pointing to first non zero element in the part
1513    \param [in] itvb iterator pointing to first element of source vector
1514    \param [in,out] itrb iterator pointing to first element of vector result
1515    \param [in] numThread number of thread. It's not necessary to be the same number of OpenMp thread
1516                    it can be even more depending on the GRANULARITY factor
1517    \param [in] itThreadLower vector containing lower bound of index for each thread
1518    \param [in] itThreadUpper vector containing upper bound of index for each thread
1519    \param [in] nResult size of vector result
1520 */
1521 template<typename MatIterator, typename VecIterator, typename ResIterator>
parallelColumnMatrixVector(const std::vector<number_t> & index,const std::vector<number_t> & pointer,MatIterator & itm,VecIterator & itvb,ResIterator & itrb,number_t numThread,std::vector<std::vector<number_t>::const_iterator> & itThreadLower,std::vector<std::vector<number_t>::const_iterator> & itThreadUpper,number_t nResult) const1522 void CsStorage::parallelColumnMatrixVector(const std::vector<number_t>& index, const std::vector<number_t>& pointer,
1523     MatIterator& itm, VecIterator& itvb, ResIterator& itrb,
1524     number_t numThread,
1525     std::vector<std::vector<number_t>::const_iterator>& itThreadLower,
1526     std::vector<std::vector<number_t>::const_iterator>& itThreadUpper,
1527     number_t nResult) const
1528 {
1529   typedef typename IterationVectorTrait<ResIterator>::Type ResType;
1530   std::vector<number_t>::const_iterator itpb = pointer.begin(), itpe = pointer.end()-1;
1531   std::vector<number_t>::const_iterator itib = index.begin(), itie, iti = itib;
1532   std::vector<number_t>::const_iterator itp = itpb, itpLower, itpUpper;
1533   VecIterator itv = itvb;
1534   ResIterator itr = itrb;
1535   MatIterator itim = itm;
1536 
1537   std::vector<std::vector<number_t>::const_iterator>::const_iterator itbLower, itbUpper;
1538   itbLower = itThreadLower.begin();
1539   itbUpper = itThreadUpper.begin();
1540 
1541   number_t i = 0;
1542   #pragma omp parallel  default(none) \
1543   private(i, itr, iti, itie, itim, itpLower, itpUpper, itv) \
1544   shared(numThread, itbLower, itbUpper, itpb, itib, itm, itvb, itrb, nResult)
1545   {
1546     std::vector<ResType> resTemp(nResult,((*itm) * (*itvb)) * 0.);  //use the first product to correctly initialize resTemp in case of vector of vectors
1547     typename std::vector<ResType>::iterator itbResTemp = resTemp.begin(), iteResTemp = resTemp.end(), itResTemp = itbResTemp;
1548     #pragma omp for
1549     for(i = 0; i < nResult; ++i)
1550       {
1551         *(itrb + i) *= 0;
1552       }
1553 
1554     #pragma omp for nowait schedule(dynamic,1)
1555     for(i = 0; i < numThread; ++i)
1556       {
1557         itpLower = *(itbLower+i);
1558         itpUpper = *(itbUpper+i);
1559         for(; itpLower != itpUpper; itpLower++)
1560           {
1561             itv = itvb + (itpLower - itpb);
1562             iti  = itib + *(itpLower);
1563             itie = itib + *(itpLower + 1);
1564             itim = itm + *(itpLower);
1565             while(iti != itie)
1566               {
1567                 itResTemp   = itbResTemp + (*iti);
1568                 (*itResTemp) += (*itim) * (*itv);
1569                 ++iti; ++itim;
1570               }
1571           }
1572       }
1573     #pragma omp critical (updateResult)
1574     {
1575       for(itResTemp = itbResTemp; itResTemp != iteResTemp; ++itResTemp)
1576         {
1577           itr = (itrb + (itResTemp - itbResTemp));
1578           *itr += *itResTemp;
1579         }
1580     }
1581   }
1582 }
1583 
1584 /*!
1585    Parallelize triangular lower part of matrix-vector multiplication
1586        (as well as triangular upper part of vector-matrix multiplication)
1587        The switch-case statement inside the parallel region can decrease the performance
1588        a little bit; however, in this way, it helps avoiding redundant codes
1589 
1590    \param [in] index vector column index
1591    \param [in] pointer vector row index
1592    \param [in] itm iterator pointing to first non zero element in the part
1593    \param [in] itvb iterator pointing to first element of source vector
1594    \param [in,out] itrb iterator pointing to first element of vector result
1595    \param [in] numThread number of thread. It's not necessary to be the same number of OpenMp thread
1596                    it can be even more depending on the GRANULARITY factor
1597    \param [in] itThreadLower vector containing lower bound of index for each thread
1598    \param [in] itThreadUpper vector containing upper bound of index for each thread
1599    \param [in] sym symmetric type of matrix
1600 */
1601 template<typename MatIterator, typename VecIterator, typename ResIterator>
parallelLowerMatrixVector(const std::vector<number_t> & index,const std::vector<number_t> & pointer,MatIterator & itm,VecIterator & itvb,ResIterator & itrb,number_t numThread,std::vector<std::vector<number_t>::const_iterator> & itThreadLower,std::vector<std::vector<number_t>::const_iterator> & itThreadUpper,SymType sym) const1602 void CsStorage::parallelLowerMatrixVector(const std::vector<number_t>& index, const std::vector<number_t>& pointer,
1603     MatIterator& itm, VecIterator& itvb, ResIterator& itrb,
1604     number_t numThread,
1605     std::vector<std::vector<number_t>::const_iterator>& itThreadLower,
1606     std::vector<std::vector<number_t>::const_iterator>& itThreadUpper,
1607     SymType sym) const
1608 {
1609   std::vector<number_t>::const_iterator itpb = pointer.begin();
1610   std::vector<number_t>::const_iterator itib = index.begin(), itie, iti = itib;
1611   std::vector<number_t>::const_iterator itpLower, itpUpper;
1612   ResIterator itr = itrb;
1613   MatIterator itim = itm;
1614 
1615   std::vector<std::vector<number_t>::const_iterator>::const_iterator itbLower, itbUpper;
1616   itbLower = itThreadLower.begin();
1617   itbUpper = itThreadUpper.begin();
1618 
1619   number_t i = 0;
1620   #pragma omp parallel default(none)\
1621   private(i, itr, iti, itie, itim, itpLower, itpUpper) \
1622   shared(numThread, itbLower, itbUpper, itpb, itib, itm, itvb, itrb, sym)
1623   {
1624     switch(sym)
1625       {
1626         case _skewSymmetric:
1627           #pragma omp for schedule(dynamic,1)
1628           for(i = 0; i < numThread; ++i)
1629             {
1630               itpLower = *(itbLower+i);
1631               itpUpper = *(itbUpper+i);
1632               for(; itpLower != itpUpper; itpLower++)
1633                 {
1634                   itr = itrb + (itpLower - itpb);
1635                   iti  = itib + *(itpLower);
1636                   itie = itib + *(itpLower + 1);
1637                   itim = itm + *(itpLower);
1638 
1639                   while(iti != itie)
1640                     {
1641                       *itr -= *(itim) * *(itvb + *iti); ++iti; ++itim;
1642                     }
1643                 }
1644             }
1645           break;
1646 
1647         case _selfAdjoint:
1648           #pragma omp for schedule(dynamic,1)
1649           for(i = 0; i < numThread; i++)
1650             {
1651               itpLower = *(itbLower+i);
1652               itpUpper = *(itbUpper+i);
1653               for(; itpLower != itpUpper; ++itpLower)
1654                 {
1655                   itr = itrb + (itpLower - itpb);
1656                   iti  = itib + *(itpLower);
1657                   itie = itib + *(itpLower + 1);
1658                   itim = itm + *(itpLower);
1659 
1660                   while(iti != itie)
1661                     {
1662                       *itr += conj(*itim) * *(itvb + *iti); ++iti; ++itim;
1663                     }
1664                 }
1665             }
1666           break;
1667 
1668         case _skewAdjoint:
1669           #pragma omp for schedule(dynamic,1)
1670           for(i = 0; i < numThread; ++i)
1671             {
1672               itpLower = *(itbLower+i);
1673               itpUpper = *(itbUpper+i);
1674               for(; itpLower != itpUpper; itpLower++)
1675                 {
1676                   itr = itrb + (itpLower - itpb);
1677                   iti  = itib + *(itpLower);
1678                   itie = itib + *(itpLower + 1);
1679                   itim = itm + *(itpLower);
1680 
1681                   while(iti != itie)
1682                     {
1683                       *itr -= conj(*itim) * *(itvb + *iti); ++iti; ++itim;
1684                     }
1685                 }
1686             }
1687           break;
1688 
1689         default: // symmetric matrix
1690           #pragma omp for schedule(dynamic,1)
1691           for(i = 0; i < numThread; i++)
1692             {
1693               itpLower = *(itbLower+i);
1694               itpUpper = *(itbUpper+i);
1695               for(; itpLower != itpUpper; ++itpLower)
1696                 {
1697                   itr = itrb + (itpLower - itpb);
1698                   iti  = itib + *(itpLower);
1699                   itie = itib + *(itpLower + 1);
1700                   itim = itm + *(itpLower);
1701 
1702                   while(iti != itie)
1703                     {
1704                       *itr += *(itim) * *(itvb + *iti); ++iti; ++itim;
1705                     }
1706                 }
1707             }
1708           break;
1709       }
1710   }
1711 }
1712 
1713 /*!
1714    Parallelize matrix-vector multiplication of triangular upper part
1715        (as well as vector-matrix multiplication of triangular lower part)
1716        The switch-case statement inside the parallel region can decrease the performance
1717        a little bit; however, in this way, it helps avoiding redundant codes
1718 
1719    \param [in] index vector row index
1720    \param [in] pointer vector column index
1721    \param [in] itm iterator pointing to first non zero element in the part
1722    \param [in] itvb iterator pointing to first element of source vector
1723    \param [in,out] itrb iterator pointing to first element of vector result
1724    \param [in] numThread number of thread. It's not necessary to be the same number of OpenMp thread
1725                    it can be even more depending on the GRANULARITY factor
1726    \param [in] itThreadLower vector containing lower bound of index for each thread
1727    \param [in] itThreadUpper vector containing upper bound of index for each thread
1728    \param [in] numRes size of vector result
1729    \param [in] sym symmetric type of matrix
1730 */
1731 template<typename MatIterator, typename VecIterator, typename ResIterator>
parallelUpperMatrixVector(const std::vector<number_t> & index,const std::vector<number_t> & pointer,MatIterator & itm,VecIterator & itvb,ResIterator & itrb,number_t numThread,std::vector<std::vector<number_t>::const_iterator> & itThreadLower,std::vector<std::vector<number_t>::const_iterator> & itThreadUpper,number_t numRes,SymType sym) const1732 void CsStorage::parallelUpperMatrixVector(const std::vector<number_t>& index, const std::vector<number_t>& pointer,
1733     MatIterator& itm, VecIterator& itvb, ResIterator& itrb,
1734     number_t numThread,
1735     std::vector<std::vector<number_t>::const_iterator>& itThreadLower,
1736     std::vector<std::vector<number_t>::const_iterator>& itThreadUpper,
1737     number_t numRes,
1738     SymType sym) const
1739 {
1740   typedef typename IterationVectorTrait<ResIterator>::Type ResType;
1741 
1742   std::vector<number_t>::const_iterator itpb = pointer.begin();
1743   std::vector<number_t>::const_iterator itib = index.begin(), itie, iti = itib;
1744   std::vector<number_t>::const_iterator itpLower, itpUpper;
1745   VecIterator itv = itvb;
1746   ResIterator itr = itrb;
1747   MatIterator itim = itm;
1748 
1749   std::vector<std::vector<number_t>::const_iterator>::const_iterator itbLower, itbUpper;
1750   itbLower = itThreadLower.begin();
1751   itbUpper = itThreadUpper.begin();
1752 
1753   number_t i = 0;
1754   #pragma omp parallel  default(none) \
1755   private(i, itr, iti, itie, itim, itpLower, itpUpper, itv) \
1756   shared(numThread, itbLower, itbUpper, itpb, itib, itm, itvb, itrb, numRes, sym)
1757   {
1758     std::vector<ResType> resTemp(numRes,*itrb * 0.);
1759     typename std::vector<ResType>::iterator itbResTemp = resTemp.begin(), iteResTemp = resTemp.end(), itResTemp = itbResTemp;
1760 
1761     switch(sym)
1762       {
1763         case _skewSymmetric:
1764           #pragma omp for nowait schedule(dynamic,1)
1765           for(i = 0; i < numThread; i++)
1766             {
1767               itpLower = *(itbLower+i);
1768               itpUpper = *(itbUpper+i);
1769               for(; itpLower != itpUpper; ++itpLower)
1770                 {
1771                   itv = itvb + (itpLower - itpb);
1772                   iti  = itib + *(itpLower);
1773                   itie = itib + *(itpLower + 1);
1774                   itim = itm + *(itpLower);
1775 
1776                   while(iti != itie)
1777                     {
1778                       itResTemp   = itbResTemp + (*iti);
1779                       //*itResTemp -= *itim * *(itv);
1780                       *itResTemp -= *itv * *itim;
1781                       ++iti; ++itim;
1782                     }
1783                 }
1784             }
1785           break;
1786 
1787         case _selfAdjoint:
1788           #pragma omp for nowait schedule(dynamic,1)
1789           for(i = 0; i < numThread; i++)
1790             {
1791               itpLower = *(itbLower+i);
1792               itpUpper = *(itbUpper+i);
1793               for(; itpLower != itpUpper; ++itpLower)
1794                 {
1795                   itv = itvb + (itpLower - itpb);
1796                   iti  = itib + *(itpLower);
1797                   itie = itib + *(itpLower + 1);
1798                   itim = itm + *(itpLower);
1799 
1800                   while(iti != itie)
1801                     {
1802                       itResTemp   = itbResTemp + (*iti);
1803                       //*itResTemp += conj(*itim) * *(itv);
1804                       *itResTemp += *itv * conj(*itim);
1805                       ++iti; ++itim;
1806                     }
1807                 }
1808             }
1809           break;
1810 
1811         case _skewAdjoint:
1812           #pragma omp for nowait schedule(dynamic,1)
1813           for(i = 0; i < numThread; i++)
1814             {
1815               itpLower = *(itbLower+i);
1816               itpUpper = *(itbUpper+i);
1817               for(; itpLower != itpUpper; ++itpLower)
1818                 {
1819                   itv = itvb + (itpLower - itpb);
1820                   iti  = itib + *(itpLower);
1821                   itie = itib + *(itpLower + 1);
1822                   itim = itm + *(itpLower);
1823 
1824                   while(iti != itie)
1825                     {
1826                       itResTemp   = itbResTemp + (*iti);
1827                       //*itResTemp -= conj(*itim) * *(itv);
1828                       *itResTemp -= *itv * conj(*itim);
1829                       ++iti; ++itim;
1830                     }
1831                 }
1832             }
1833           break;
1834 
1835         case _symmetric :
1836           #pragma omp for nowait schedule(dynamic,1)
1837           for(i = 0; i < numThread; i++)
1838             {
1839               itpLower = *(itbLower+i);
1840               itpUpper = *(itbUpper+i);
1841               for(; itpLower != itpUpper; ++itpLower)
1842                 {
1843                   itv = itvb + (itpLower - itpb);
1844                   iti  = itib + *(itpLower);
1845                   itie = itib + *(itpLower + 1);
1846                   itim = itm + *(itpLower);
1847 
1848                   while(iti != itie)
1849                     {
1850                       itResTemp   = itbResTemp + (*iti);
1851                       //*itResTemp += *itim * *(itv);
1852                       *itResTemp += *itv * *itim ;
1853                       ++iti; ++itim;
1854                     }
1855                 }
1856             }
1857           break;
1858 
1859         default: // no symmetry
1860           #pragma omp for nowait schedule(dynamic,1)
1861           for(i = 0; i < numThread; i++)
1862             {
1863               itpLower = *(itbLower+i);
1864               itpUpper = *(itbUpper+i);
1865               for(; itpLower != itpUpper; ++itpLower)
1866                 {
1867                   itv = itvb + (itpLower - itpb);
1868                   iti  = itib + *(itpLower);
1869                   itie = itib + *(itpLower + 1);
1870                   itim = itm + *(itpLower);
1871 
1872                   while(iti != itie)
1873                     {
1874                       itResTemp   = itbResTemp + (*iti);
1875                       *itResTemp += *itim * *(itv);
1876                       ++iti; ++itim;
1877                     }
1878                 }
1879             }
1880           break;
1881 
1882       }
1883     #pragma omp critical (updateResult)
1884     {
1885       for(itResTemp = itbResTemp; itResTemp != iteResTemp; ++itResTemp)
1886         {
1887           itr = (itrb + (itResTemp - itbResTemp));
1888           *itr += *itResTemp;
1889         }
1890     }
1891   }
1892 }
1893 #endif// end of #ifdef XLIFEPP_WITH_OMP
1894 
1895 } // end of namespace xlifepp
1896 
1897 #endif // CS_STORAGE_HPP
1898 
1899