1/* 2Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without modification, 6are permitted provided that the following conditions are met: 7 8Redistributions of source code must retain the above copyright notice, this list of 9conditions and the following disclaimer. Redistributions in binary form must reproduce 10the above copyright notice, this list of conditions and the following disclaimer 11in the documentation and/or other materials provided with the distribution. 12 13Neither the name of the Johns Hopkins University nor the names of its contributors 14may be used to endorse or promote products derived from this software without specific 15prior written permission. 16 17THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 18EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 19OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 20SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 21INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 23BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 25ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 26DAMAGE. 27*/ 28 29#include <float.h> 30#include <string.h> 31 32 33/////////////////// 34// SparseMatrix // 35/////////////////// 36/////////////////////////////////////// 37// SparseMatrix Methods and Memebers // 38/////////////////////////////////////// 39 40template< class T > 41void SparseMatrix< T >::_init( void ) 42{ 43 _contiguous = false; 44 _maxEntriesPerRow = 0; 45 rows = 0; 46 rowSizes = NullPointer( int ); 47 m_ppElements = NullPointer( Pointer( MatrixEntry< T > ) ); 48} 49 50template< class T > SparseMatrix< T >::SparseMatrix( void ){ _init(); } 51 52template< class T > SparseMatrix< T >::SparseMatrix( int rows ){ _init() , Resize( rows ); } 53template< class T > SparseMatrix< T >::SparseMatrix( int rows , int maxEntriesPerRow ){ _init() , Resize( rows , maxEntriesPerRow ); } 54 55template< class T > 56SparseMatrix< T >::SparseMatrix( const SparseMatrix& M ) 57{ 58 _init(); 59 if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow ); 60 else Resize( M.rows ); 61 for( int i=0 ; i<rows ; i++ ) 62 { 63 SetRowSize( i , M.rowSizes[i] ); 64 memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] ); 65 } 66} 67template<class T> 68int SparseMatrix<T>::Entries( void ) const 69{ 70 int e = 0; 71 for( int i=0 ; i<rows ; i++ ) e += int( rowSizes[i] ); 72 return e; 73} 74template<class T> 75SparseMatrix<T>& SparseMatrix<T>::operator = (const SparseMatrix<T>& M) 76{ 77 if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow ); 78 else Resize( M.rows ); 79 for( int i=0 ; i<rows ; i++ ) 80 { 81 SetRowSize( i , M.rowSizes[i] ); 82 memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] ); 83 } 84 return *this; 85} 86 87template<class T> 88SparseMatrix<T>::~SparseMatrix( void ){ Resize( 0 ); } 89 90template< class T > 91bool SparseMatrix< T >::write( const char* fileName ) const 92{ 93 FILE* fp = fopen( fileName , "wb" ); 94 if( !fp ) return false; 95 bool ret = write( fp ); 96 fclose( fp ); 97 return ret; 98} 99template< class T > 100bool SparseMatrix< T >::read( const char* fileName ) 101{ 102 FILE* fp = fopen( fileName , "rb" ); 103 if( !fp ) return false; 104 bool ret = read( fp ); 105 fclose( fp ); 106 return ret; 107} 108template< class T > 109bool SparseMatrix< T >::write( FILE* fp ) const 110{ 111 if( fwrite( &rows , sizeof( int ) , 1 , fp )!=1 ) return false; 112 if( fwrite( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false; 113 for( int i=0 ; i<rows ; i++ ) if( fwrite( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false; 114 return true; 115} 116template< class T > 117bool SparseMatrix< T >::read( FILE* fp ) 118{ 119 int r; 120 if( fread( &r , sizeof( int ) , 1 , fp )!=1 ) return false; 121 Resize( r ); 122 if( fread( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false; 123 for( int i=0 ; i<rows ; i++ ) 124 { 125 r = rowSizes[i]; 126 rowSizes[i] = 0; 127 SetRowSize( i , r ); 128 if( fread( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false; 129 } 130 return true; 131} 132 133 134template< class T > 135void SparseMatrix< T >::Resize( int r ) 136{ 137 if( rows>0 ) 138 { 139 if( _contiguous ){ if( _maxEntriesPerRow ) FreePointer( m_ppElements[0] ); } 140 else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) FreePointer( m_ppElements[i] ); } 141 FreePointer( m_ppElements ); 142 FreePointer( rowSizes ); 143 } 144 rows = r; 145 if( r ) 146 { 147 rowSizes = AllocPointer< int >( r ); 148 m_ppElements = AllocPointer< Pointer( MatrixEntry< T > ) >( r ); 149 memset( rowSizes , 0 , sizeof( int ) * r ); 150 } 151 _contiguous = false; 152 _maxEntriesPerRow = 0; 153} 154template< class T > 155void SparseMatrix< T >::Resize( int r , int e ) 156{ 157 if( rows>0 ) 158 { 159 if( _contiguous ){ if( _maxEntriesPerRow ) FreePointer( m_ppElements[0] ); } 160 else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) FreePointer( m_ppElements[i] ); } 161 FreePointer( m_ppElements ); 162 FreePointer( rowSizes ); 163 } 164 rows = r; 165 if( r ) 166 { 167 rowSizes = AllocPointer< int >( r ); 168 m_ppElements = AllocPointer< Pointer( MatrixEntry< T > ) >( r ); 169 m_ppElements[0] = AllocPointer< MatrixEntry< T > >( r * e ); 170 memset( rowSizes , 0 , sizeof( int ) * r ); 171 for( int i=1 ; i<r ; i++ ) m_ppElements[i] = m_ppElements[i-1] + e; 172 } 173 _contiguous = true; 174 _maxEntriesPerRow = e; 175} 176 177template<class T> 178void SparseMatrix< T >::SetRowSize( int row , int count ) 179{ 180 if( _contiguous ) 181 { 182 if( count>_maxEntriesPerRow ) fprintf( stderr , "[ERROR] Cannot set row size on contiguous matrix: %d<=%d\n" , count , _maxEntriesPerRow ) , exit( 0 ); 183 rowSizes[row] = count; 184 } 185 else if( row>=0 && row<rows ) 186 { 187 if( rowSizes[row] ) FreePointer( m_ppElements[row] ); 188 if( count>0 ) m_ppElements[row] = AllocPointer< MatrixEntry< T > >( count ); 189 // [WARNING] Why wasn't this line here before??? 190 rowSizes[row] = count; 191 } 192} 193 194 195template<class T> 196void SparseMatrix<T>::SetZero() 197{ 198 Resize(this->m_N, this->m_M); 199} 200 201template<class T> 202SparseMatrix<T> SparseMatrix<T>::operator * (const T& V) const 203{ 204 SparseMatrix<T> M(*this); 205 M *= V; 206 return M; 207} 208 209template<class T> 210SparseMatrix<T>& SparseMatrix<T>::operator *= (const T& V) 211{ 212 for( int i=0 ; i<rows ; i++ ) for( int ii=0 ; ii<rowSizes[i] ; i++ ) m_ppElements[i][ii].Value *= V; 213 return *this; 214} 215 216template< class T > 217template< class T2 > 218void SparseMatrix< T >::Multiply( ConstPointer( T2 ) in , Pointer( T2 ) out , int threads ) const 219{ 220#pragma omp parallel for num_threads( threads ) 221 for( int i=0 ; i<rows ; i++ ) 222 { 223 T2 _out(0); 224 ConstPointer( MatrixEntry< T > ) start = m_ppElements[i]; 225 ConstPointer( MatrixEntry< T > ) end = start + rowSizes[i]; 226 ConstPointer( MatrixEntry< T > ) e; 227 for( e=start ; e!=end ; e++ ) _out += in[ e->N ] * e->Value; 228 out[i] = _out; 229 } 230} 231template< class T > 232template< class T2 > 233void SparseMatrix< T >::MultiplyAndAddAverage( ConstPointer( T2 ) in , Pointer( T2 ) out , int threads ) const 234{ 235 T2 average = 0; 236 for( int i=0 ; i<rows ; i++ ) average += in[i]; 237 average /= rows; 238 Multiply( in , out , threads ); 239#pragma omp parallel for num_threads( threads ) 240 for( int i=0 ; i<rows ; i++ ) out[i] += average; 241} 242 243 244template< class T > 245template< class T2 > 246int SparseMatrix<T>::SolveJacobi( const SparseMatrix<T>& M , ConstPointer( T2 ) diagonal , ConstPointer( T2 ) b , Pointer( T2 ) x , Pointer( T2 ) Mx , T2 sor , int threads ) 247{ 248 M.Multiply( x , Mx , threads ); 249#if ZERO_TESTING_JACOBI 250 for( int j=0 ; j<int(M.rows) ; j++ ) if( diagonal[j] ) x[j] += ( b[j]-Mx[j] ) * sor / diagonal[j]; 251#else // !ZERO_TESTING_JACOBI 252 for( int j=0 ; j<int(M.rows) ; j++ ) x[j] += ( b[j]-Mx[j] ) * sor / diagonal[j]; 253#endif // ZERO_TESTING_JACOBI 254 return M.rows; 255} 256template< class T > 257template< class T2 > 258int SparseMatrix<T>::SolveJacobi( const SparseMatrix<T>& M , ConstPointer( T2 ) b , Pointer( T2 ) x , Pointer( T2 ) Mx , T2 sor , int threads ) 259{ 260 M.Multiply( x , Mx , threads ); 261#if ZERO_TESTING_JACOBI 262 for( int j=0 ; j<int(M.rows) ; j++ ) 263 { 264 T diagonal = M[j][0].Value; 265 if( diagonal ) x[j] += ( b[j]-Mx[j] ) * sor / diagonal; 266 } 267#else // !ZERO_TESTING_JACOBI 268 for( int j=0 ; j<int(M.rows) ; j++ ) x[j] += ( b[j]-Mx[j] ) * sor / M[j][0].Value; 269#endif // ZERO_TESTING_JACOBI 270 return M.rows; 271} 272template<class T> 273template<class T2> 274int SparseMatrix<T>::SolveGS( const SparseMatrix<T>& M , ConstPointer( T2 ) diagonal , ConstPointer( T2 ) b , Pointer( T2 ) x , bool forward ) 275{ 276#define ITERATE \ 277 { \ 278 ConstPointer( MatrixEntry< T > ) start = M[j]; \ 279 ConstPointer( MatrixEntry< T > ) end = start + M.rowSizes[j]; \ 280 ConstPointer( MatrixEntry< T > ) e; \ 281 T2 _b = b[j]; \ 282 for( e=start ; e!=end ; e++ ) _b -= x[ e->N ] * e->Value; \ 283 x[j] += _b / diagonal[j]; \ 284 } 285 286#if ZERO_TESTING_JACOBI 287 if( forward ) for( int j=0 ; j<int(M.rows) ; j++ ){ if( diagonal[j] ){ ITERATE; } } 288 else for( int j=int(M.rows)-1 ; j>=0 ; j-- ){ if( diagonal[j] ){ ITERATE; } } 289#else // !ZERO_TESTING_JACOBI 290 if( forward ) for( int j=0 ; j<int(M.rows) ; j++ ){ ITERATE; } 291 else for( int j=int(M.rows)-1 ; j>=0 ; j-- ){ ITERATE; } 292#endif // ZERO_TESTING_JACOBI 293#undef ITERATE 294 return M.rows; 295} 296template<class T> 297template<class T2> 298int SparseMatrix<T>::SolveGS( const std::vector< std::vector< int > >& mcIndices , const SparseMatrix<T>& M , ConstPointer( T2 ) diagonal , ConstPointer( T2 ) b , Pointer( T2 ) x , bool forward , int threads ) 299{ 300 int sum=0; 301#ifdef _WIN32 302#define SetOMPParallel __pragma( omp parallel for num_threads( threads ) ) 303#else // !_WIN32 304#define SetOMPParallel _Pragma( "omp parallel for num_threads( threads )" ) 305#endif // _WIN32 306#if ZERO_TESTING_JACOBI 307#define ITERATE( indices ) \ 308 { \ 309SetOMPParallel \ 310 for( int k=0 ; k<int( indices.size() ) ; k++ ) if( diagonal[indices[k]] ) \ 311 { \ 312 int jj = indices[k]; \ 313 ConstPointer( MatrixEntry< T > ) start = M[jj]; \ 314 ConstPointer( MatrixEntry< T > ) end = start + M.rowSizes[jj]; \ 315 ConstPointer( MatrixEntry< T > ) e; \ 316 T2 _b = b[jj]; \ 317 for( e=start ; e!=end ; e++ ) _b -= x[ e->N ] * e->Value; \ 318 x[jj] += _b / diagonal[jj]; \ 319 } \ 320 } 321#else // !ZERO_TESTING_JACOBI 322#define ITERATE( indices ) \ 323 { \ 324SetOMPParallel \ 325 for( int k=0 ; k<int( indices.size() ) ; k++ ) \ 326 { \ 327 int jj = indices[k]; \ 328 ConstPointer( MatrixEntry< T > ) start = M[jj]; \ 329 ConstPointer( MatrixEntry< T > ) end = start + M.rowSizes[jj]; \ 330 ConstPointer( MatrixEntry< T > ) e; \ 331 T2 _b = b[jj]; \ 332 for( e=start ; e!=end ; e++ ) _b -= x[ e->N ] * e->Value; \ 333 x[jj] += _b / diagonal[jj]; \ 334 } \ 335 } 336#endif // ZERO_TESTING_JACOBI 337 if( forward ) for( int j=0 ; j<mcIndices.size() ; j++ ){ sum += int( mcIndices[j].size() ) ; ITERATE( mcIndices[j] ); } 338 else for( int j=int( mcIndices.size() )-1 ; j>=0 ; j-- ){ sum += int( mcIndices[j].size() ) ; ITERATE( mcIndices[j] ); } 339#undef ITERATE 340#undef SetOMPParallel 341 return sum; 342} 343template<class T> 344template<class T2> 345int SparseMatrix<T>::SolveGS( const SparseMatrix<T>& M , ConstPointer( T2 ) b , Pointer( T2 ) x , bool forward ) 346{ 347 int start = forward ? 0 : M.rows-1 , end = forward ? M.rows : -1 , dir = forward ? 1 : -1; 348 for( int j=start ; j!=end ; j+=dir ) 349 { 350 T diagonal = M[j][0].Value; 351#if ZERO_TESTING_JACOBI 352 if( diagonal ) 353#endif // ZERO_TESTING_JACOBI 354 { 355 ConstPointer( MatrixEntry< T > ) start = M[j]; 356 ConstPointer( MatrixEntry< T > ) end = start + M.rowSizes[j]; 357 ConstPointer( MatrixEntry< T > ) e; 358 start++; 359 T2 _b = b[j]; 360 for( e=start ; e!=end ; e++ ) _b -= x[ e->N ] * e->Value; 361 x[j] = _b / diagonal; 362 } 363 } 364 return M.rows; 365} 366template<class T> 367template<class T2> 368int SparseMatrix<T>::SolveGS( const std::vector< std::vector< int > >& mcIndices , const SparseMatrix<T>& M , ConstPointer( T2 ) b , Pointer( T2 ) x , bool forward , int threads ) 369{ 370 int sum=0 , start = forward ? 0 : int( mcIndices.size() )-1 , end = forward ? int( mcIndices.size() ) : -1 , dir = forward ? 1 : -1; 371 for( int j=start ; j!=end ; j+=dir ) 372 { 373 const std::vector< int >& _mcIndices = mcIndices[j]; 374 sum += int( _mcIndices.size() ); 375 { 376#pragma omp parallel for num_threads( threads ) 377 for( int k=0 ; k<int( _mcIndices.size() ) ; k++ ) 378 { 379 int jj = _mcIndices[k]; 380 T diagonal = M[jj][0].Value; 381#if ZERO_TESTING_JACOBI 382 if( diagonal ) 383#endif // ZERO_TESTING_JACOBI 384 { 385 ConstPointer( MatrixEntry< T > ) start = M[jj]; 386 ConstPointer( MatrixEntry< T > ) end = start + M.rowSizes[jj]; 387 ConstPointer( MatrixEntry< T > ) e; 388 start++; 389 T2 _b = b[jj]; 390 for( e=start ; e!=end ; e++ ) _b -= x[ e->N ] * e->Value; 391 x[jj] = _b / diagonal; 392 } 393 } 394 } 395 } 396 return sum; 397} 398 399template< class T > 400template< class T2 > 401void SparseMatrix< T >::getDiagonal( Pointer( T2 ) diagonal , int threads ) const 402{ 403#pragma omp parallel for num_threads( threads ) 404 for( int i=0 ; i<rows ; i++ ) 405 { 406 T2 d = 0.; 407 ConstPointer( MatrixEntry< T > ) start = m_ppElements[i]; 408 ConstPointer( MatrixEntry< T > ) end = start + rowSizes[i]; 409 ConstPointer( MatrixEntry< T > ) e; 410 for( e=start ; e!=end ; e++ ) if( e->N==i ) d += e->Value; 411 diagonal[i] = d; 412 } 413} 414template< class T > 415template< class T2 > 416int SparseMatrix< T >::SolveCG( const SparseMatrix<T>& A , ConstPointer( T2 ) b , int iters , Pointer( T2 ) x , T2 eps , int reset , bool addDCTerm , bool solveNormal , int threads ) 417{ 418 eps *= eps; 419 int dim = A.rows; 420 Pointer( T2 ) r = AllocPointer< T2 >( dim ); 421 Pointer( T2 ) d = AllocPointer< T2 >( dim ); 422 Pointer( T2 ) q = AllocPointer< T2 >( dim ); 423 Pointer( T2 ) temp = NullPointer( T2 ); 424 if( reset ) memset( x , 0 , sizeof(T2)* dim ); 425 if( solveNormal ) temp = AllocPointer< T2 >( dim ); 426 427 double delta_new = 0 , delta_0; 428 if( solveNormal ) 429 { 430 if( addDCTerm ) A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )x , temp , threads ) , A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )temp , r , threads ) , A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )b , temp , threads ); 431 else A.Multiply( ( ConstPointer( T2 ) )x , temp , threads ) , A.Multiply( ( ConstPointer( T2 ) )temp , r , threads ) , A.Multiply( ( ConstPointer( T2 ) )b , temp , threads ); 432#pragma omp parallel for num_threads( threads ) reduction( + : delta_new ) 433 for( int i=0 ; i<dim ; i++ ) d[i] = r[i] = temp[i] - r[i] , delta_new += r[i] * r[i]; 434 } 435 else 436 { 437 if( addDCTerm ) A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )x , r , threads ); 438 else A.Multiply( ( ConstPointer( T2 ) )x , r , threads ); 439#pragma omp parallel for num_threads( threads ) reduction ( + : delta_new ) 440 for( int i=0 ; i<dim ; i++ ) d[i] = r[i] = b[i] - r[i] , delta_new += r[i] * r[i]; 441 } 442 delta_0 = delta_new; 443 if( delta_new<eps ) 444 { 445// fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps ); 446 FreePointer( r ); 447 FreePointer( d ); 448 FreePointer( q ); 449 FreePointer( temp ); 450 return 0; 451 } 452 int ii; 453 for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ ) 454 { 455 if( solveNormal ) 456 if( addDCTerm ) A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )d , temp , threads ) , A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )temp , q , threads ); 457 else A.Multiply( ( ConstPointer( T2 ) )d , temp , threads ) , A.Multiply( ( ConstPointer( T2 ) )temp , q , threads ); 458 else 459 if( addDCTerm ) A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )d , q , threads ); 460 else A.Multiply( ( ConstPointer( T2 ) )d , q , threads ); 461 double dDotQ = 0; 462#pragma omp parallel for num_threads( threads ) reduction( + : dDotQ ) 463 for( int i=0 ; i<dim ; i++ ) dDotQ += d[i] * q[i]; 464 T2 alpha = T2( delta_new / dDotQ ); 465 double delta_old = delta_new; 466 delta_new = 0; 467 if( (ii%50)==(50-1) ) 468 { 469#pragma omp parallel for num_threads( threads ) 470 for( int i=0 ; i<dim ; i++ ) x[i] += d[i] * alpha; 471 if( solveNormal ) 472 if( addDCTerm ) A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )x , temp , threads ) , A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )temp , r , threads ); 473 else A.Multiply( ( ConstPointer( T2 ) )x , temp , threads ) , A.Multiply( ( ConstPointer( T2 ) )temp , r , threads ); 474 else 475 if( addDCTerm ) A.MultiplyAndAddAverage( ( ConstPointer( T2 ) )x , r , threads ); 476 else A.Multiply( ( ConstPointer( T2 ) )x , r , threads ); 477#pragma omp parallel for num_threads( threads ) reduction( + : delta_new ) 478 for( int i=0 ; i<dim ; i++ ) r[i] = b[i] - r[i] , delta_new += r[i] * r[i] , x[i] += d[i] * alpha; 479 } 480 else 481#pragma omp parallel for num_threads( threads ) reduction( + : delta_new ) 482 for( int i=0 ; i<dim ; i++ ) r[i] -= q[i] * alpha , delta_new += r[i] * r[i] , x[i] += d[i] * alpha; 483 484 T2 beta = T2( delta_new / delta_old ); 485#pragma omp parallel for num_threads( threads ) 486 for( int i=0 ; i<dim ; i++ ) d[i] = r[i] + d[i] * beta; 487 } 488 FreePointer( r ); 489 FreePointer( d ); 490 FreePointer( q ); 491 FreePointer( temp ); 492 return ii; 493} 494