1/*
2Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without modification,
6are permitted provided that the following conditions are met:
7
8Redistributions of source code must retain the above copyright notice, this list of
9conditions and the following disclaimer. Redistributions in binary form must reproduce
10the above copyright notice, this list of conditions and the following disclaimer
11in the documentation and/or other materials provided with the distribution.
12
13Neither the name of the Johns Hopkins University nor the names of its contributors
14may be used to endorse or promote products derived from this software without specific
15prior written permission.
16
17THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
18EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES
19OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
20SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
22TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
23BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
25ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
26DAMAGE.
27*/
28
29#include <float.h>
30#include <string.h>
31
32
33///////////////////
34//  SparseMatrix //
35///////////////////
36///////////////////////////////////////
37// SparseMatrix Methods and Memebers //
38///////////////////////////////////////
39
40template< class T >
41void SparseMatrix< T >::_init( void )
42{
43	_contiguous = false;
44	_maxEntriesPerRow = 0;
45	rows = 0;
46	rowSizes = NullPointer( int );
47	m_ppElements = NullPointer( Pointer( MatrixEntry< T > ) );
48}
49
50template< class T > SparseMatrix< T >::SparseMatrix( void ){  _init(); }
51
52template< class T > SparseMatrix< T >::SparseMatrix( int rows                        ){ _init() , Resize( rows ); }
53template< class T > SparseMatrix< T >::SparseMatrix( int rows , int maxEntriesPerRow ){ _init() , Resize( rows , maxEntriesPerRow ); }
54
55template< class T >
56SparseMatrix< T >::SparseMatrix( const SparseMatrix& M )
57{
58	_init();
59	if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
60	else                Resize( M.rows );
61	for( int i=0 ; i<rows ; i++ )
62	{
63		SetRowSize( i , M.rowSizes[i] );
64		memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
65	}
66}
67template<class T>
68int SparseMatrix<T>::Entries( void ) const
69{
70	int e = 0;
71	for( int i=0 ; i<rows ; i++ ) e += int( rowSizes[i] );
72	return e;
73}
74template<class T>
75SparseMatrix<T>& SparseMatrix<T>::operator = (const SparseMatrix<T>& M)
76{
77	if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
78	else                Resize( M.rows );
79	for( int i=0 ; i<rows ; i++ )
80	{
81		SetRowSize( i , M.rowSizes[i] );
82		memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
83	}
84	return *this;
85}
86
87template<class T>
88SparseMatrix<T>::~SparseMatrix( void ){ Resize( 0 ); }
89
90template< class T >
91bool SparseMatrix< T >::write( const char* fileName ) const
92{
93	FILE* fp = fopen( fileName , "wb" );
94	if( !fp ) return false;
95	bool ret = write( fp );
96	fclose( fp );
97	return ret;
98}
99template< class T >
100bool SparseMatrix< T >::read( const char* fileName )
101{
102	FILE* fp = fopen( fileName , "rb" );
103	if( !fp ) return false;
104	bool ret = read( fp );
105	fclose( fp );
106	return ret;
107}
108template< class T >
109bool SparseMatrix< T >::write( FILE* fp ) const
110{
111	if( fwrite( &rows , sizeof( int ) , 1 , fp )!=1 ) return false;
112	if( fwrite( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
113	for( int i=0 ; i<rows ; i++ ) if( fwrite( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
114	return true;
115}
116template< class T >
117bool SparseMatrix< T >::read( FILE* fp )
118{
119	int r;
120	if( fread( &r , sizeof( int ) , 1 , fp )!=1 ) return false;
121	Resize( r );
122	if( fread( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
123	for( int i=0 ; i<rows ; i++ )
124	{
125		r = rowSizes[i];
126		rowSizes[i] = 0;
127		SetRowSize( i , r );
128		if( fread( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
129	}
130	return true;
131}
132
133
134template< class T >
135void SparseMatrix< T >::Resize( int r )
136{
137	if( rows>0 )
138	{
139		if( _contiguous ){ if( _maxEntriesPerRow ) FreePointer( m_ppElements[0] ); }
140		else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) FreePointer( m_ppElements[i] ); }
141		FreePointer( m_ppElements );
142		FreePointer( rowSizes );
143	}
144	rows = r;
145	if( r )
146	{
147		rowSizes = AllocPointer< int >( r );
148		m_ppElements = AllocPointer< Pointer( MatrixEntry< T > ) >( r );
149		memset( rowSizes , 0 , sizeof( int ) * r );
150	}
151	_contiguous = false;
152	_maxEntriesPerRow = 0;
153}
154template< class T >
155void SparseMatrix< T >::Resize( int r , int e )
156{
157	if( rows>0 )
158	{
159		if( _contiguous ){ if( _maxEntriesPerRow ) FreePointer( m_ppElements[0] ); }
160		else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) FreePointer( m_ppElements[i] ); }
161		FreePointer( m_ppElements );
162		FreePointer( rowSizes );
163	}
164	rows = r;
165	if( r )
166	{
167		rowSizes = AllocPointer< int >( r );
168		m_ppElements = AllocPointer< Pointer( MatrixEntry< T > ) >( r );
169		m_ppElements[0] = AllocPointer< MatrixEntry< T > >( r * e );
170		memset( rowSizes , 0 , sizeof( int ) * r );
171		for( int i=1 ; i<r ; i++ ) m_ppElements[i] = m_ppElements[i-1] + e;
172	}
173	_contiguous = true;
174	_maxEntriesPerRow = e;
175}
176
177template<class T>
178void SparseMatrix< T >::SetRowSize( int row , int count )
179{
180	if( _contiguous )
181	{
182		if( count>_maxEntriesPerRow ) fprintf( stderr , "[ERROR] Cannot set row size on contiguous matrix: %d<=%d\n" , count , _maxEntriesPerRow ) , exit( 0 );
183		rowSizes[row] = count;
184	}
185	else if( row>=0 && row<rows )
186	{
187		if( rowSizes[row] ) FreePointer( m_ppElements[row] );
188		if( count>0 ) m_ppElements[row] = AllocPointer< MatrixEntry< T > >( count );
189		// [WARNING] Why wasn't this line here before???
190		rowSizes[row] = count;
191	}
192}
193
194
195template<class T>
196void SparseMatrix<T>::SetZero()
197{
198	Resize(this->m_N, this->m_M);
199}
200
201template<class T>
202SparseMatrix<T> SparseMatrix<T>::operator * (const T& V) const
203{
204	SparseMatrix<T> M(*this);
205	M *= V;
206	return M;
207}
208
209template<class T>
210SparseMatrix<T>& SparseMatrix<T>::operator *= (const T& V)
211{
212	for( int i=0 ; i<rows ; i++ ) for( int ii=0 ; ii<rowSizes[i] ; i++ ) m_ppElements[i][ii].Value *= V;
213	return *this;
214}
215
216template< class T >
217template< class T2 >
218void SparseMatrix< T >::Multiply( ConstPointer( T2 ) in , Pointer( T2 ) out , int threads ) const
219{
220#pragma omp parallel for num_threads( threads )
221	for( int i=0 ; i<rows ; i++ )
222	{
223		T2 _out(0);
224		ConstPointer( MatrixEntry< T > ) start = m_ppElements[i];
225		ConstPointer( MatrixEntry< T > ) end = start + rowSizes[i];
226		ConstPointer( MatrixEntry< T > ) e;
227		for( e=start ; e!=end ; e++ ) _out += in[ e->N ] * e->Value;
228		out[i] = _out;
229	}
230}
231template< class T >
232template< class T2 >
233void SparseMatrix< T >::MultiplyAndAddAverage( ConstPointer( T2 ) in , Pointer( T2 ) out , int threads ) const
234{
235	T2 average = 0;
236	for( int i=0 ; i<rows ; i++ ) average += in[i];
237	average /= rows;
238	Multiply( in , out , threads );
239#pragma omp parallel for num_threads( threads )
240	for( int i=0 ; i<rows ; i++ ) out[i] += average;
241}
242
243
244template< class T >
245template< class T2 >
246int SparseMatrix<T>::SolveJacobi( const SparseMatrix<T>& M , ConstPointer( T2 ) diagonal , ConstPointer( T2 ) b , Pointer( T2 ) x , Pointer( T2 ) Mx , T2 sor , int threads )
247{
248	M.Multiply( x , Mx , threads );
249#if ZERO_TESTING_JACOBI
250	for( int j=0 ; j<int(M.rows) ; j++ ) if( diagonal[j] ) x[j] += ( b[j]-Mx[j] ) * sor / diagonal[j];
251#else // !ZERO_TESTING_JACOBI
252	for( int j=0 ; j<int(M.rows) ; j++ ) x[j] += ( b[j]-Mx[j] ) * sor / diagonal[j];
253#endif // ZERO_TESTING_JACOBI
254	return M.rows;
255}
256template< class T >
257template< class T2 >
258int SparseMatrix<T>::SolveJacobi( const SparseMatrix<T>& M , ConstPointer( T2 ) b , Pointer( T2 ) x , Pointer( T2 ) Mx , T2 sor , int threads )
259{
260	M.Multiply( x , Mx , threads );
261#if ZERO_TESTING_JACOBI
262	for( int j=0 ; j<int(M.rows) ; j++ )
263	{
264		T diagonal = M[j][0].Value;
265		if( diagonal ) x[j] += ( b[j]-Mx[j] ) * sor / diagonal;
266	}
267#else // !ZERO_TESTING_JACOBI
268	for( int j=0 ; j<int(M.rows) ; j++ ) x[j] += ( b[j]-Mx[j] ) * sor / M[j][0].Value;
269#endif // ZERO_TESTING_JACOBI
270	return M.rows;
271}
272template<class T>
273template<class T2>
274int SparseMatrix<T>::SolveGS( const SparseMatrix<T>& M , ConstPointer( T2 ) diagonal , ConstPointer( T2 ) b , Pointer( T2 ) x , bool forward )
275{
276#define ITERATE                                                         \
277	{                                                                   \
278		ConstPointer( MatrixEntry< T > ) start = M[j];                  \
279		ConstPointer( MatrixEntry< T > ) end = start + M.rowSizes[j];   \
280		ConstPointer( MatrixEntry< T > ) e;                             \
281		T2 _b = b[j];                                                   \
282		for( e=start ; e!=end ; e++ ) _b -= x[ e->N ] * e->Value;       \
283		x[j] += _b / diagonal[j];                                       \
284	}
285
286#if ZERO_TESTING_JACOBI
287	if( forward ) for( int j=0 ; j<int(M.rows)    ; j++ ){ if( diagonal[j] ){ ITERATE; } }
288	else          for( int j=int(M.rows)-1 ; j>=0 ; j-- ){ if( diagonal[j] ){ ITERATE; } }
289#else // !ZERO_TESTING_JACOBI
290	if( forward ) for( int j=0 ; j<int(M.rows) ; j++ ){ ITERATE; }
291	else          for( int j=int(M.rows)-1 ; j>=0 ; j-- ){ ITERATE; }
292#endif // ZERO_TESTING_JACOBI
293#undef ITERATE
294	return M.rows;
295}
296template<class T>
297template<class T2>
298int SparseMatrix<T>::SolveGS( const std::vector< std::vector< int > >& mcIndices , const SparseMatrix<T>& M , ConstPointer( T2 ) diagonal , ConstPointer( T2 ) b , Pointer( T2 ) x , bool forward , int threads )
299{
300	int sum=0;
301#ifdef _WIN32
302#define SetOMPParallel __pragma( omp parallel for num_threads( threads ) )
303#else // !_WIN32
304#define SetOMPParallel _Pragma( "omp parallel for num_threads( threads )" )
305#endif // _WIN32
306#if ZERO_TESTING_JACOBI
307#define ITERATE( indices )                                                        \
308	{                                                                             \
309SetOMPParallel                                                                    \
310		for( int k=0 ; k<int( indices.size() ) ; k++ ) if( diagonal[indices[k]] ) \
311		{                                                                         \
312			int jj = indices[k];                                                  \
313			ConstPointer( MatrixEntry< T > ) start = M[jj];                       \
314			ConstPointer( MatrixEntry< T > ) end = start + M.rowSizes[jj];        \
315			ConstPointer( MatrixEntry< T > ) e;                                   \
316			T2 _b = b[jj];                                                        \
317			for( e=start ; e!=end ; e++ ) _b -= x[ e->N ] * e->Value;             \
318			x[jj] += _b / diagonal[jj];                                           \
319		}                                                                         \
320	}
321#else // !ZERO_TESTING_JACOBI
322#define ITERATE( indices )                                                  \
323	{                                                                       \
324SetOMPParallel                                                              \
325		for( int k=0 ; k<int( indices.size() ) ; k++ )                      \
326		{                                                                   \
327			int jj = indices[k];                                            \
328			ConstPointer( MatrixEntry< T > ) start = M[jj];                 \
329			ConstPointer( MatrixEntry< T > ) end = start + M.rowSizes[jj];  \
330			ConstPointer( MatrixEntry< T > ) e;                             \
331			T2 _b = b[jj];                                                  \
332			for( e=start ; e!=end ; e++ ) _b -= x[ e->N ] * e->Value;       \
333			x[jj] += _b / diagonal[jj];                                     \
334		}                                                                   \
335	}
336#endif // ZERO_TESTING_JACOBI
337	if( forward ) for( int j=0 ; j<mcIndices.size()  ; j++ ){ sum += int( mcIndices[j].size() ) ; ITERATE( mcIndices[j] ); }
338	else for( int j=int( mcIndices.size() )-1 ; j>=0 ; j-- ){ sum += int( mcIndices[j].size() ) ; ITERATE( mcIndices[j] ); }
339#undef ITERATE
340#undef SetOMPParallel
341	return sum;
342}
343template<class T>
344template<class T2>
345int SparseMatrix<T>::SolveGS( const SparseMatrix<T>& M , ConstPointer( T2 ) b , Pointer( T2 ) x , bool forward )
346{
347	int start = forward ? 0 : M.rows-1 , end = forward ? M.rows : -1 , dir = forward ? 1 : -1;
348	for( int j=start ; j!=end ; j+=dir )
349	{
350		T diagonal = M[j][0].Value;
351#if ZERO_TESTING_JACOBI
352		if( diagonal )
353#endif // ZERO_TESTING_JACOBI
354		{
355			ConstPointer( MatrixEntry< T > ) start = M[j];
356			ConstPointer( MatrixEntry< T > ) end = start + M.rowSizes[j];
357			ConstPointer( MatrixEntry< T > ) e;
358			start++;
359			T2 _b = b[j];
360			for( e=start ; e!=end ; e++ ) _b -= x[ e->N ] * e->Value;
361			x[j] = _b / diagonal;
362		}
363	}
364	return M.rows;
365}
366template<class T>
367template<class T2>
368int SparseMatrix<T>::SolveGS( const std::vector< std::vector< int > >& mcIndices , const SparseMatrix<T>& M , ConstPointer( T2 ) b , Pointer( T2 ) x , bool forward , int threads )
369{
370	int sum=0 , start = forward ? 0 : int( mcIndices.size() )-1 , end = forward ? int( mcIndices.size() ) : -1 , dir = forward ? 1 : -1;
371	for( int j=start ; j!=end ; j+=dir )
372	{
373		const std::vector< int >& _mcIndices = mcIndices[j];
374		sum += int( _mcIndices.size() );
375		{
376#pragma omp parallel for num_threads( threads )
377			for( int k=0 ; k<int( _mcIndices.size() ) ; k++ )
378			{
379				int jj = _mcIndices[k];
380				T diagonal = M[jj][0].Value;
381#if ZERO_TESTING_JACOBI
382				if( diagonal )
383#endif // ZERO_TESTING_JACOBI
384				{
385					ConstPointer( MatrixEntry< T > ) start = M[jj];
386					ConstPointer( MatrixEntry< T > ) end = start + M.rowSizes[jj];
387					ConstPointer( MatrixEntry< T > ) e;
388					start++;
389					T2 _b = b[jj];
390					for( e=start ; e!=end ; e++ ) _b -= x[ e->N ] * e->Value;
391					x[jj] = _b / diagonal;
392				}
393			}
394		}
395	}
396	return sum;
397}
398
399template< class T >
400template< class T2 >
401void SparseMatrix< T >::getDiagonal( Pointer( T2 ) diagonal , int threads ) const
402{
403#pragma omp parallel for num_threads( threads )
404	for( int i=0 ; i<rows ; i++ )
405	{
406		T2 d = 0.;
407		ConstPointer( MatrixEntry< T > ) start = m_ppElements[i];
408		ConstPointer( MatrixEntry< T > ) end = start + rowSizes[i];
409		ConstPointer( MatrixEntry< T > ) e;
410		for( e=start ; e!=end ; e++ ) if( e->N==i ) d += e->Value;
411		diagonal[i] = d;
412	}
413}
414template< class T >
415template< class T2 >
416int SparseMatrix< T >::SolveCG( const SparseMatrix<T>& A , ConstPointer( T2 ) b , int iters , Pointer( T2 ) x , T2 eps , int reset , bool addDCTerm , bool solveNormal , int threads )
417{
418	eps *= eps;
419	int dim = A.rows;
420	Pointer( T2 ) r = AllocPointer< T2 >( dim );
421	Pointer( T2 ) d = AllocPointer< T2 >( dim );
422	Pointer( T2 ) q = AllocPointer< T2 >( dim );
423	Pointer( T2 ) temp = NullPointer( T2 );
424	if( reset ) memset( x , 0 , sizeof(T2)* dim );
425	if( solveNormal ) temp = AllocPointer< T2 >( dim );
426
427	double delta_new = 0 , delta_0;
428	if( solveNormal )
429	{
430		if( addDCTerm ) A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )x , temp , threads ) , A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )temp , r , threads ) , A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )b , temp , threads );
431		else            A.Multiply( ( ConstPointer( T2 ) )x , temp , threads ) , A.Multiply( ( ConstPointer( T2 ) )temp , r , threads ) , A.Multiply( ( ConstPointer( T2 ) )b , temp , threads );
432#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
433		for( int i=0 ; i<dim ; i++ ) d[i] = r[i] = temp[i] - r[i] , delta_new += r[i] * r[i];
434	}
435	else
436	{
437		if( addDCTerm ) A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )x , r , threads );
438		else            A.Multiply( ( ConstPointer( T2 ) )x , r , threads );
439#pragma omp parallel for num_threads( threads )  reduction ( + : delta_new )
440		for( int i=0 ; i<dim ; i++ ) d[i] = r[i] = b[i] - r[i] , delta_new += r[i] * r[i];
441	}
442	delta_0 = delta_new;
443	if( delta_new<eps )
444	{
445//		fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
446		FreePointer( r );
447		FreePointer( d );
448		FreePointer( q );
449		FreePointer( temp );
450		return 0;
451	}
452	int ii;
453	for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
454	{
455		if( solveNormal )
456			if( addDCTerm ) A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )d , temp , threads ) , A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )temp , q , threads );
457			else            A.Multiply( ( ConstPointer( T2 ) )d , temp , threads ) , A.Multiply( ( ConstPointer( T2 ) )temp , q , threads );
458		else
459			if( addDCTerm ) A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )d , q , threads );
460			else            A.Multiply( ( ConstPointer( T2 ) )d , q , threads );
461        double dDotQ = 0;
462#pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
463		for( int i=0 ; i<dim ; i++ ) dDotQ += d[i] * q[i];
464		T2 alpha = T2( delta_new / dDotQ );
465		double delta_old = delta_new;
466		delta_new = 0;
467		if( (ii%50)==(50-1) )
468		{
469#pragma omp parallel for num_threads( threads )
470			for( int i=0 ; i<dim ; i++ ) x[i] += d[i] * alpha;
471			if( solveNormal )
472				if( addDCTerm ) A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )x , temp , threads ) , A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )temp , r , threads );
473				else            A.Multiply( ( ConstPointer( T2 ) )x , temp , threads ) , A.Multiply( ( ConstPointer( T2 ) )temp , r , threads );
474			else
475				if( addDCTerm ) A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )x , r , threads );
476				else            A.Multiply( ( ConstPointer( T2 ) )x , r , threads );
477#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
478			for( int i=0 ; i<dim ; i++ ) r[i] = b[i] - r[i] , delta_new += r[i] * r[i] , x[i] += d[i] * alpha;
479		}
480		else
481#pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
482			for( int i=0 ; i<dim ; i++ ) r[i] -= q[i] * alpha , delta_new += r[i] * r[i] ,  x[i] += d[i] * alpha;
483
484		T2 beta = T2( delta_new / delta_old );
485#pragma omp parallel for num_threads( threads )
486		for( int i=0 ; i<dim ; i++ ) d[i] = r[i] + d[i] * beta;
487	}
488	FreePointer( r );
489	FreePointer( d );
490	FreePointer( q );
491	FreePointer( temp );
492	return ii;
493}
494