1 ////////////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (c) 2008 The Regents of the University of California
4 //
5 // This file is part of Qbox
6 //
7 // Qbox is distributed under the terms of the GNU General Public License
8 // as published by the Free Software Foundation, either version 2 of
9 // the License, or (at your option) any later version.
10 // See the file COPYING in the root directory of this distribution
11 // or <http://www.gnu.org/licenses/>.
12 //
13 ////////////////////////////////////////////////////////////////////////////////
14 //
15 // jade.cpp
16 //
17 ////////////////////////////////////////////////////////////////////////////////
18
19 #include <cmath>
20 #include <cassert>
21 #include <vector>
22 #include <deque>
23 #include <algorithm>
24 #include <limits> // epsilon
25 #include <iostream>
26 #include <iomanip>
27 #ifdef USE_MPI
28 #include <mpi.h>
29 #endif
30
31 #ifdef SCALAPACK
32 #include "blacs.h"
33 #endif
34
35 #include "Context.h"
36 #include "Matrix.h"
37 #include "blas.h"
38 #include "Timer.h"
39 using namespace std;
40
jade(int maxsweep,double tol,vector<DoubleMatrix * > a,DoubleMatrix & u,vector<vector<double>> & adiag)41 int jade(int maxsweep, double tol, vector<DoubleMatrix*> a,
42 DoubleMatrix& u, vector<vector<double> >& adiag)
43 {
44 Timer tm_comm;
45 const bool debug_diag_sum = false;
46
47 const double eps = numeric_limits<double>::epsilon();
48 if (tol<eps)
49 {
50 if ( u.context().onpe0() )
51 {
52 cout << " warning: jade tol=" << tol << " < eps=" << eps << endl;
53 }
54 }
55 const Context& ctxt = u.context();
56 // The input matrices are *a[k]
57 // the orthogonal transformation is returned in u
58 // on exit, the matrices a[k] are maximally diagonal,
59 // u contains the orthogonal transformation
60 // adiag[k][i] contains the diagonal elements of the a[k]'s
61
62 for ( int k = 0; k < a.size(); k++ )
63 {
64 assert(a[k]->context() == u.context());
65 assert(a[k]->m()==a[k]->n());
66 assert(a[k]->m()==u.n());
67 assert(a[k]->m()==u.m());
68 assert(a[k]->mb()==u.mb());
69 assert(a[k]->nb()==u.nb());
70 }
71
72 int mloc = a[0]->mloc();
73 int nloc = a[0]->nloc();
74 //cout << ctxt.mype() << ": nloc: " << nloc << endl;
75
76 // identify the last active process column
77 // process columns beyond that column do not have any elements of a[k]
78 // compute num_nblocks = total number of column blocks
79 // if num_nblocks >= ctxt.npcol(), all process columns are active
80 // otherwise, the last active process column has index num_nblocks-1
81 const int num_nblocks = u.n() / u.nb() + ( u.n()%u.nb() == 0 ? 0 : 1 );
82 const int last_active_process_col = min(ctxt.npcol()-1, num_nblocks-1);
83
84 // initialize u with the identity
85 u.identity();
86
87 // eigenvalue array
88 adiag.resize(a.size());
89 for ( int k = 0; k < a.size(); k++ )
90 adiag[k].resize(a[k]->n());
91
92 // check if the local number of rows is odd
93 const bool nloc_odd = ( a[0]->nloc()%2 != 0 );
94
95 // if nloc is odd, auxiliary arrays are created to host an extra column
96 // for both a[k] and u
97 vector<vector<double> > a_aux(a.size());
98 vector<double> u_aux;
99 if ( nloc_odd )
100 {
101 for ( int k = 0; k < a.size(); k++ )
102 a_aux[k].resize(mloc);
103 u_aux.resize(mloc);
104 }
105
106 // compute local number of pairs nploc
107 const int nploc = (a[0]->nloc()+1)/2;
108 // dimension of top and bot arrays is nploc: local number of pairs
109 deque<int> top(nploc), bot(nploc);
110
111 // compute total number of pairs np
112 int np = nploc;
113 ctxt.isum('r',1,1,&np,1);
114 //cout << ctxt.mype() << ": np=" << np << endl;
115 // initialize top and bot arrays
116 // the pair i is (top[i],bot[i])
117 // top[i] is the local index of the top column of pair i
118 // bot[i] is the local index of the bottom column of pair i
119 for ( int i = 0; i < nploc; i++ )
120 top[i] = i;
121 for ( int i = 0; i < nploc; i++ )
122 bot[nploc-i-1] = nploc+i;
123 // if top[i] or bot[i] == nloc,the data resides in the array a_aux or u_aux
124
125 // jglobal: global column index
126 // jglobal[i] is the global column index of the column residing in
127 // the local vector i. If nloc_odd and i==2*nploc-1, jglobal[i] == -1
128 vector<int> jglobal(2*nploc,-1);
129 for ( int jblock = 0; jblock < a[0]->nblocks(); jblock++ )
130 for ( int y = 0; y < a[0]->nbs(jblock); y++ )
131 jglobal[y + jblock*a[0]->nb()] = a[0]->j(jblock,y);
132
133 // store addresses of columns of a and of u in acol and ucol
134 vector<vector<double*> > acol(a.size());
135 vector<double*> ucol(2*nploc);
136 for ( int k = 0; k < a.size(); k++ )
137 {
138 acol[k].resize(2*nploc);
139 for ( int i = 0; i < a[k]->nloc(); i++ )
140 acol[k][i] = a[k]->valptr(i*a[k]->mloc());
141 // if nloc is odd, store the address of vector 2*nploc-1
142 if ( nloc_odd )
143 acol[k][2*nploc-1] = &a_aux[k][0];
144 }
145 for ( int i = 0; i < u.nloc(); i++ )
146 ucol[i] = u.valptr(i*u.mloc());
147 // if nloc is odd, store the address of vector 2*nploc-1
148 if ( nloc_odd )
149 ucol[2*nploc-1] = &u_aux[0];
150
151 //for ( int i = 0; i < acol.size(); i++ )
152 // cout << ctxt.mype() << ": acol[" << i << "]=" << acol[i] << endl;
153 //for ( int i = 0; i < ucol.size(); i++ )
154 // cout << ctxt.mype() << ": ucol[" << i << "]=" << ucol[i] << endl;
155
156 // the vectors of the pair (top[i],bot[i]) are located at
157 // addresses acol[top[i]] and acol[bot[i]]
158
159 bool done = false;
160 int nsweep = 0;
161 // allocate matrix element packed array apq
162 // apq[3*ipair + k*3*nploc] = apq[k][ipair]
163 // apq[3*ipair+1 + k*3*nploc] = app[k][ipair]
164 // apq[3*ipair+2 + k*3*nploc] = aqq[k][ipair]
165 vector<double> apq(a.size()*3*nploc);
166
167 double diag_sum = 0.0;
168 #ifdef DEBUG
169 double previous_diag_sum = 0.0;
170 #endif
171 while ( !done )
172 {
173 // sweep: process local pairs and rotate 2*np-1 times
174 nsweep++;
175 double diag_change = 0.0;
176 for ( int irot = 0; irot < 2*np-1; irot++ )
177 {
178 //cout << ctxt.mype() << ": top[i]: ";
179 //for ( int i = 0; i < nploc; i++ )
180 // cout << setw(3) << top[i];
181 //cout << endl;
182
183 //cout << ctxt.mype() << ": bot[i]: ";
184 //for ( int i = 0; i < nploc; i++ )
185 // cout << setw(3) << bot[i];
186 //cout << endl;
187
188 //cout << ctxt.mype() << ": jglobal[top[i]]: ";
189 //for ( int i = 0; i < nploc; i++ )
190 // cout << setw(3) << jglobal[top[i]];
191 //cout << endl;
192
193 //cout << ctxt.mype() << ": jglobal[bot[i]]: ";
194 //for ( int i = 0; i < nploc; i++ )
195 // cout << setw(3) << jglobal[bot[i]];
196 //cout << endl;
197
198 // perform Jacobi rotations for all local pairs
199
200 // compute off-diagonal matrix elements apq for all pairs
201 // skip the pair if one or both of the vectors is a dummy vector
202 // i.e. a vector having jglobal==-1
203
204 #pragma omp parallel for
205 for ( int k = 0; k < a.size(); k++ )
206 {
207 for ( int ipair = 0; ipair < nploc; ipair++ )
208 {
209 const int iapq = 3*ipair + k*3*nploc;
210 //cout << ctxt.mype() << ": computing apq for global pair "
211 // << jglobal[top[ipair]] << " " << jglobal[bot[ipair]] << endl;
212 apq[iapq] = 0.0;
213 apq[iapq+1] = 0.0;
214 apq[iapq+2] = 0.0;
215 if ( jglobal[top[ipair]] >= 0 && jglobal[bot[ipair]] >= 0 )
216 {
217 const double *ap = acol[k][top[ipair]];
218 const double *aq = acol[k][bot[ipair]];
219 const double *up = ucol[top[ipair]];
220 const double *uq = ucol[bot[ipair]];
221 int one = 1;
222 apq[iapq] = ddot(&mloc,ap,&one,uq,&one);
223 apq[iapq+1] = ddot(&mloc,ap,&one,up,&one);
224 apq[iapq+2] = ddot(&mloc,aq,&one,uq,&one);
225 }
226 }
227 } // for k
228 // apq now contains partial sums of matrix elements
229 tm_comm.start();
230 int len = apq.size();
231 ctxt.dsum('c',len,1,&apq[0],len);
232 tm_comm.stop();
233 // apq now contains the matrix elements
234
235 #pragma omp parallel for
236 for ( int ipair = 0; ipair < nploc; ipair++ )
237 {
238 if ( jglobal[top[ipair]] >= 0 &&
239 jglobal[bot[ipair]] >= 0 )
240 {
241 // compute rotation sine and cosine
242 // Cardoso-Souloumiac expressions for the rotation angle
243
244 // compute 2x2 matrix g
245 double g11 = 0.0, g12 = 0.0, g22 = 0.0;
246
247 for ( int k = 0; k < a.size(); k++ )
248 {
249 const int iapq = 3*ipair + k*3*nploc;
250 const double app = apq[iapq+1];
251 const double aqq = apq[iapq+2];
252
253 const double h1 = app - aqq;
254 const double h2 = 2.0 * apq[iapq];
255
256 g11 += h1 * h1;
257 g12 += h1 * h2;
258 g22 += h2 * h2;
259 }
260
261 // the matrix g is real, symmetric
262 // compute Jacobi rotation diagonalizing g
263
264 double c = 1.0, s = 0.0, e1 = g11, e2 = g22;
265 if ( g12*g12 > eps * eps * fabs(g11*g22) )
266 {
267 double tau = 0.5 * ( g22 - g11 ) / g12;
268 double t = 1.0 / ( fabs(tau) + sqrt(1.0 + tau*tau));
269 if ( tau < 0.0 ) t *= -1.0;
270 c = 1.0 / sqrt(1.0 + t*t);
271 s = t * c;
272
273 // eigenvalues
274 e1 -= t * g12;
275 e2 += t * g12;
276 }
277
278 // components of eigenvector associated with the largest eigenvalue
279 double x,y;
280 if ( e1 > e2 )
281 {
282 x = c;
283 y = -s;
284 }
285 else
286 {
287 x = s;
288 y = c;
289 }
290
291 // choose eigenvector with x positive to ensure small angle
292 if ( x < 0.0 )
293 {
294 x = -x;
295 y = -y;
296 }
297
298 // compute Jacobi rotation R(p,q)
299 c = sqrt(0.5*(x+1.0));
300 s = y / sqrt(2.0*(x+1.0));
301
302 // apply the rotation R(p,q)
303 //
304 // | c s |
305 // R(p,q) = | |
306 // | -s c |
307
308 // U := U * R(p,q)^T
309 // A := A * R(p,q)^T
310
311 // apply rotation to columns of a and u
312
313 // the drot function computes
314 // c*x + s*y -> x
315 //-s*x + c*y -> y
316 // call drot with args c, -s
317
318 //cout << " p=" << jglobal[top[ipair]]
319 // << " q=" << jglobal[bot[ipair]]
320 // << " g11=" << g11 << " g22=" << g22
321 // << " g12=" << g12 << endl;
322 //cout << " c=" << c << " s=" << s << endl;
323
324 int one = 1;
325 for ( int k = 0; k < a.size(); k++ )
326 {
327 double *ap = acol[k][top[ipair]];
328 double *aq = acol[k][bot[ipair]];
329 drot(&mloc,ap,&one,aq,&one,&c,&s);
330 }
331 double *up = ucol[top[ipair]];
332 double *uq = ucol[bot[ipair]];
333 drot(&mloc,up,&one,uq,&one,&c,&s);
334
335 // new value of off-diag element apq
336 double diag_change_ipair = 0.0;
337 for ( int k = 0; k < a.size(); k++ )
338 {
339 const int iapq = 3*ipair + k*3*nploc;
340 const double app = apq[iapq+1];
341 const double aqq = apq[iapq+2];
342 const double apqnew = (c*c-s*s)*apq[iapq] - c*s*(app-aqq);
343
344 // accumulate change in sum of squares of diag elements
345 // note negative sign: decrease in offdiag is increase in diag
346 diag_change_ipair -= 2.0 * ( apqnew*apqnew - apq[iapq]*apq[iapq] );
347 }
348 #pragma omp critical
349 diag_change -= diag_change_ipair;
350 }
351 } // for ipair
352
353 // all local pairs have been processed
354
355 // rotate top and bot arrays
356 if ( nploc > 0 )
357 {
358 bot.push_back(top.back());
359 top.pop_back();
360 top.push_front(bot.front());
361 bot.pop_front();
362
363 // make rotation skip element 0 on the first process column
364 // if my process column is zero, swap top[0] and top[1]
365 if ( ctxt.mycol() == 0 )
366 {
367 if ( nploc > 1 )
368 {
369 int tmp = top[0];
370 top[0] = top[1];
371 top[1] = tmp;
372 }
373 else
374 {
375 // if there is only one local pair, exchange top[0] and bot[0]
376 int tmp = top[0];
377 top[0] = bot[0];
378 bot[0] = tmp;
379 }
380 }
381
382 // exchange columns of a[k] and u
383
384 int rbufi_left, rbufi_right, sbufi_left, sbufi_right;
385 // send buffers contain k columns of a and one of u
386 int bufsize = (a.size()+1)*a[0]->mloc();
387 vector<double> sbuf_left(bufsize), sbuf_right(bufsize);
388 vector<double> rbuf_left(bufsize), rbuf_right(bufsize);
389
390 // on each task except mycol==npcol-1
391 // send jglobal[bot[nploc-1]] to the right
392 // if jglobal != -1 send vector bot[nploc-1] to the right
393
394 // on each task except mycol==npcol-1
395 // recv jglobal from the right
396 // if jglobal != -1 recv a vector from the right into bot[nploc-1]
397 // set value of jglobal[bot[nploc-1]]
398
399 // on each task except mycol==0
400 // send jglobal[top[0]] to the left
401 // if jglobal != -1 send vector top[0] to the left
402
403 // on each task except mycol==0
404 // recv jglobal from the left
405 // if jglobal != -1 recv a vector from the left into top[0]
406 // set value of jglobal[top[0]]
407
408 // exchange jglobal values first
409 tm_comm.start();
410 if ( ctxt.mycol() < last_active_process_col )
411 {
412 sbufi_right = jglobal[bot[nploc-1]];
413 ctxt.isend(1,1,&sbufi_right,1,ctxt.myrow(),ctxt.mycol()+1);
414 ctxt.irecv(1,1,&rbufi_right,1,ctxt.myrow(),ctxt.mycol()+1);
415 jglobal[bot[nploc-1]] = rbufi_right;
416 //cout << ctxt.mype() << ": received jglobal="
417 // << jglobal[bot[nploc-1]] << " from right" << endl;
418 }
419 if ( ctxt.mycol() != 0 )
420 {
421 sbufi_left = jglobal[top[0]];
422 ctxt.isend(1,1,&sbufi_left,1,ctxt.myrow(),ctxt.mycol()-1);
423 ctxt.irecv(1,1,&rbufi_left,1,ctxt.myrow(),ctxt.mycol()-1);
424 jglobal[top[0]] = rbufi_left;
425 //cout << ctxt.mype() << ": received jglobal="
426 // << jglobal[top[0]] << " from left" << endl;
427 }
428
429 // exchange column vectors
430
431 if ( ctxt.mycol() < last_active_process_col )
432 {
433 for ( int k = 0; k < a.size(); k++ )
434 {
435 memcpy(&sbuf_right[k*mloc],acol[k][bot[nploc-1]],
436 mloc*sizeof(double));
437 }
438 memcpy(&sbuf_right[a.size()*mloc], ucol[bot[nploc-1]],
439 mloc*sizeof(double) );
440 ctxt.dsend(bufsize,1,&sbuf_right[0],bufsize,
441 ctxt.myrow(),ctxt.mycol()+1);
442 ctxt.drecv(bufsize,1,&rbuf_right[0],bufsize,
443 ctxt.myrow(),ctxt.mycol()+1);
444 for ( int k = 0; k < a.size(); k++ )
445 {
446 memcpy(acol[k][bot[nploc-1]],&rbuf_right[k*mloc],
447 mloc*sizeof(double));
448 }
449 memcpy(ucol[bot[nploc-1]], &rbuf_right[a.size()*mloc],
450 mloc*sizeof(double) );
451 //cout << ctxt.mype() << ": received vector jglobal="
452 // << jglobal[bot[nploc-1]] << " from right" << endl;
453 }
454 if ( ctxt.mycol() != 0 )
455 {
456 for ( int k = 0; k < a.size(); k++ )
457 {
458 memcpy(&sbuf_left[k*mloc],acol[k][top[0]],mloc*sizeof(double));
459 }
460 memcpy(&sbuf_left[a.size()*mloc],ucol[top[0]],mloc*sizeof(double) );
461 ctxt.dsend(bufsize,1,&sbuf_left[0],bufsize,
462 ctxt.myrow(),ctxt.mycol()-1);
463 ctxt.drecv(bufsize,1,&rbuf_left[0],bufsize,
464 ctxt.myrow(),ctxt.mycol()-1);
465 for ( int k = 0; k < a.size(); k++ )
466 {
467 memcpy(acol[k][top[0]],&rbuf_left[k*mloc],mloc*sizeof(double) );
468 }
469 memcpy(ucol[top[0]],&rbuf_left[a.size()*mloc],mloc*sizeof(double) );
470 //cout << ctxt.mype() << ": received vector jglobal="
471 // << jglobal[top[0]] << " from left" << endl;
472 }
473 tm_comm.stop();
474 } // if nploc > 0
475
476 // end of step
477
478 } // for irot
479
480 // sweep is complete
481 tm_comm.start();
482 ctxt.dsum('r',1,1,&diag_change,1);
483 tm_comm.stop();
484
485 // compute sum of squares of diagonal elements
486
487 if ( debug_diag_sum )
488 {
489 // compute sum of squares of diagonal elements using current values
490 // (after rotation)
491 #ifdef DEBUG
492 previous_diag_sum = diag_sum;
493 #endif
494 diag_sum = 0.0;
495 for ( int k = 0; k < a.size(); k++ )
496 {
497 for ( int ipair = 0; ipair < nploc; ipair++ )
498 {
499 double tmp[2] = { 0.0, 0.0 };
500 // compute the diagonal elements
501 // skip dummy vectors
502 int one = 1;
503 if ( jglobal[top[ipair]] >= 0 )
504 {
505 const double *ap = acol[k][top[ipair]];
506 const double *up = ucol[top[ipair]];
507 tmp[0] = ddot(&mloc,ap,&one,up,&one);
508 }
509 if ( jglobal[bot[ipair]] >= 0 )
510 {
511 const double *aq = acol[k][bot[ipair]];
512 const double *uq = ucol[bot[ipair]];
513 tmp[1] = ddot(&mloc,aq,&one,uq,&one);
514 }
515 // tmp now contains partial sums of app and aqq
516 ctxt.dsum('c',2,1,tmp,2);
517 // tmp now contains the diagonal elements app and aqq
518 diag_sum += tmp[0]*tmp[0] + tmp[1]*tmp[1];
519 }
520 }
521 ctxt.dsum('r',1,1,&diag_sum,1);
522 #ifdef DEBUG
523 const double diag_sum_increase = diag_sum - previous_diag_sum;
524 if ( ctxt.onpe0() )
525 cout << " jade: nsweep=" << nsweep
526 << " dsum: "
527 << setw(15) << setprecision(10) << diag_sum
528 << " dsum_inc: "
529 << setw(15) << setprecision(10) << diag_sum_increase << endl;
530 #endif
531 }
532 #ifdef DEBUG
533 if ( ctxt.onpe0() )
534 cout << " jade: nsweep=" << nsweep
535 << " dchange: "
536 << setw(15) << setprecision(10) << diag_change << endl;
537 #endif
538 done = ( ( fabs(diag_change) < tol ) || ( nsweep >= maxsweep ) );
539
540 } // while !done
541
542 #ifdef DEBUG
543 //print final values of jglobal[top[i]] and jglobal[bot[i]]
544 cout << "final jglobal[top/bot] values:" << endl;
545 cout << ctxt.mype() << ": top: ";
546 for ( int i = 0; i < nploc; i++ )
547 cout << " " << jglobal[top[i]];
548 cout << endl;
549 cout << ctxt.mype() << ": bot: ";
550 for ( int i = 0; i < nploc; i++ )
551 cout << " " << jglobal[bot[i]];
552 cout << endl;
553 //print final values of top and bot
554 cout << "final top/bot values:" << endl;
555 cout << ctxt.mype() << ": top: ";
556 for ( int i = 0; i < nploc; i++ )
557 cout << " " << top[i];
558 cout << endl;
559 cout << ctxt.mype() << ": bot: ";
560 for ( int i = 0; i < nploc; i++ )
561 cout << " " << bot[i];
562 cout << endl;
563 #endif
564
565 // rotate columns of a and u to restore original order
566 double *tmpmat = new double[nloc*mloc];
567 // rotate columns of a
568 for ( int k = 0; k < a.size(); k++ )
569 {
570 for ( int ipair = 0; ipair < nploc; ipair++ )
571 {
572 // copy columns of a[k] to temporary array tmpmat in original order
573 if ( jglobal[top[ipair]] >= 0 )
574 {
575 memcpy(&tmpmat[ipair*mloc], acol[k][top[ipair]], mloc*sizeof(double));
576 }
577 if ( jglobal[bot[nploc-ipair-1]] >= 0 )
578 {
579 memcpy(&tmpmat[(nploc+ipair)*mloc],acol[k][bot[nploc-ipair-1]],
580 mloc*sizeof(double));
581 }
582 }
583 // copy tmpmat back to a[k]
584 memcpy(acol[k][0],tmpmat,nloc*mloc*sizeof(double));
585 }
586
587 // rotate columns of u
588 for ( int ipair = 0; ipair < nploc; ipair++ )
589 {
590 // copy columns of u to temporary array tmpmat in original order
591 if ( jglobal[top[ipair]] >= 0 )
592 {
593 memcpy(&tmpmat[ipair*mloc], ucol[top[ipair]], mloc*sizeof(double));
594 }
595 if ( jglobal[bot[nploc-ipair-1]] >= 0 )
596 {
597 memcpy(&tmpmat[(nploc+ipair)*mloc],ucol[bot[nploc-ipair-1]],
598 mloc*sizeof(double));
599 }
600 }
601 // copy tmpmat back to u
602 memcpy(ucol[0],tmpmat,nloc*mloc*sizeof(double));
603 delete [] tmpmat;
604
605 // compute diagonal values
606 for ( int k = 0; k < a.size(); k++ )
607 {
608 for ( int i = 0; i < a[k]->n(); i++ )
609 adiag[k][i] = 0.0;
610 for ( int jblock = 0; jblock < a[k]->nblocks(); jblock++ )
611 for ( int y = 0; y < a[k]->nbs(jblock); y++ )
612 {
613 // j is the global column index
614 int j = a[k]->j(jblock,y);
615 int jjj = y + jblock*a[k]->nb();
616 const double *ap = a[k]->valptr(jjj*a[k]->mloc());
617 const double *up = u.valptr(jjj*u.mloc());
618 int one = 1;
619 adiag[k][j] = ddot(&mloc,ap,&one,up,&one);
620 }
621 // adiag[k][i] now contains the partial sums of the diagonal elements of a
622 tm_comm.start();
623 ctxt.dsum(a[k]->n(),1,&adiag[k][0],a[k]->n());
624 tm_comm.stop();
625 // adiag[k] contains the diagonal elements of a[k]
626 // u contains the orthogonal transformation minimizing the spread
627 }
628
629 #ifdef DEBUG
630 if ( ctxt.onpe0() )
631 cout << " jade: comm time: " << tm_comm.real() << endl;
632 #endif
633 return nsweep;
634 }
635