1#if HAVE_CONFIG_H 2# include "config.fh" 3#endif 4c $Id: nbtest.F,v 1.64.2.11 2007-04-06 22:37:35 d3g293 Exp $ 5 6#define BLOCK_DIM 200 7 8 program main 9 implicit none 10#include "mafdecls.fh" 11#include "global.fh" 12#include "testutil.fh" 13 integer heap, stack, fudge, ma_heap, me, nproc 14 integer ndim, dims(2), pdims(2), lo(2), hi(2), ld 15 integer g_a, two, chunk(2) 16 integer i, j, ii, jj, n, ibuf 17 integer buf(BLOCK_DIM,BLOCK_DIM) 18 integer nbbuf(BLOCK_DIM,BLOCK_DIM,2) 19 integer nbhdl(2) 20 GA_ACCESS_INDEX_TYPE g_ptr, ioff 21 logical status, ok, ok2 22 integer i_put, i_get, i_acc, i_nbput, i_nbget, i_nbacc 23 integer i_putw, i_getw, i_accw 24 double precision t_put, t_get, t_acc, t_nbput, t_nbget, t_nbacc 25 double precision t_wput, t_wget, t_wacc, t_putw, t_getw, t_accw 26 double precision tst 27 parameter (heap=200*200*4, fudge=100, stack=200*200) 28c 29c*** Intitialize a message passing library 30c 31#include "mp3.fh" 32c 33c*** Initialize GA 34c 35c There are 2 choices: ga_initialize or ga_initialize_ltd. 36c In the first case, there is no explicit limit on memory usage. 37c In the second, user can set limit (per processor) in bytes. 38c 39 call ga_initialize() 40 nproc = ga_nnodes() 41 me = ga_nodeid() 42c we can also use GA_set_memory_limit BEFORE first ga_create call 43c 44 ma_heap = heap + fudge 45 ma_heap = 2*ma_heap 46 if (me.eq.0) then 47 print *, 'util_mdtob(', ma_heap, ')=', util_mdtob(ma_heap) 48 print * 49 endif 50 call GA_set_memory_limit(util_mdtob(ma_heap)) 51c 52 if(ga_nodeid().eq.0)then 53 print *,'GA initialized on ',nproc,' processors' 54 call ffflush(6) 55 endif 56c 57c*** Initialize the MA package 58c MA must be initialized before any global array is allocated 59c 60 status = ma_init(MT_DCPL, stack, ma_heap) 61 if (.not. status) call ga_error('ma_init failed',-1) 62c 63c Initialize all timing variables 64c 65 i_put = 0 66 i_get = 0 67 i_acc = 0 68 i_nbput = 0 69 i_nbget = 0 70 i_nbacc = 0 71 i_putw = 0 72 i_getw = 0 73 i_accw = 0 74 t_put = 0.0d00 75 t_get = 0.0d00 76 t_acc = 0.0d00 77 t_nbput = 0.0d00 78 t_nbget = 0.0d00 79 t_nbacc = 0.0d00 80 t_wput = 0.0d00 81 t_wget = 0.0d00 82 t_wacc = 0.0d00 83 t_putw = 0.0d00 84 t_getw = 0.0d00 85 t_accw = 0.0d00 86c 87c Uncomment the below line to register external memory allocator 88c for dynamic arrays inside GA routines. 89c call register_ext_memory() 90c 91c Factor processors into a roughly square grid 92c 93 call factor(nproc,pdims) 94 if (me.eq.0) then 95 write(6,*) 96 write(6,'(a,i4,a,i4,a)') ' Processors configured on ',pdims(1), 97 + ' x ',pdims(2),' grid' 98 endif 99c 100c Create an array with each processor holding BLOCK_DIM*BLOCK_DIM 101c elements and initialize it to zero 102c 103 dims(1) = BLOCK_DIM*pdims(1) 104 dims(2) = BLOCK_DIM*pdims(2) 105 ld = BLOCK_DIM 106 g_a = ga_create_handle() 107 two = 2 108 call nga_set_data(g_a, two, dims, MT_INT) 109 chunk(1) = BLOCK_DIM 110 chunk(2) = BLOCK_DIM 111 call nga_set_chunk(g_a, chunk) 112 if (.not.ga_allocate(g_a)) then 113 n = dims(1)*dims(2) 114 write(6,'(a,i16)') 115 + 'Failure to initialize global array of size ',n 116 call ga_error('GA_Allocate failure',-1) 117 endif 118 call ga_zero(g_a) 119c 120c Initialize entire GA from process 0 using blocking puts 121c 122 if (me.eq.0) then 123c 124c Loop over all processors 125c 126 do n = 0, nproc-1 127c 128c Calculate process grid coordinates 129c 130 ii = mod(n,pdims(1)) 131 jj = (n-ii)/pdims(1) 132c 133c Calculate lower and upper indices of the block held by process n 134c 135 lo(1) = ii*BLOCK_DIM + 1 136 lo(2) = jj*BLOCK_DIM + 1 137 hi(1) = (ii+1)*BLOCK_DIM 138 hi(2) = (jj+1)*BLOCK_DIM 139c 140c Fill buffer with correct values 141c 142 do j = lo(2), hi(2) 143 jj = j-lo(2)+1 144 do i = lo(1), hi(1) 145 ii = i-lo(1)+1 146 buf(ii,jj) = (j-1)*dims(1)+(i-1) 147 end do 148 end do 149c 150c Copy buffer to GA 151c 152 tst = ga_wtime() 153 call nga_put(g_a,lo,hi,buf,ld) 154 t_put = t_put + ga_wtime() - tst 155 i_put = i_put + 1 156 end do 157 endif 158 call ga_sync 159c 160c Check values on each process for correctness 161c 162 ok = .true. 163 call nga_distribution(g_a,me,lo,hi) 164 call nga_access(g_a,lo,hi,g_ptr,ld) 165 ioff = 0 166 do j = lo(2), hi(2) 167 do i = lo(1), hi(1) 168 n = int_mb(g_ptr+ioff) 169 if (n.ne.(j-1)*dims(1)+(i-1)) ok = .false. 170 ioff = ioff+1 171 end do 172 end do 173 call nga_release(g_a, lo, hi) 174c 175 if (ok) then 176 if (me.eq.0) then 177 write(6,*) 178 write(6,'(a)') 'OK for simple PUT test' 179 write(6,*) 180 endif 181 else 182 write(6,'(a,i4)') 'Error found in PUT test on process ',me 183 endif 184c 185c Collect all values on process 0 from GA 186c 187 ok = .true. 188 if (me.eq.0) then 189 do n = 0, nproc-1 190c 191c Calculate process grid coordinates 192c 193 ii = mod(n,pdims(1)) 194 jj = (n-ii)/pdims(1) 195c 196c Calculate lower and upper indices of the block held by process n 197c 198 lo(1) = ii*BLOCK_DIM + 1 199 lo(2) = jj*BLOCK_DIM + 1 200 hi(1) = (ii+1)*BLOCK_DIM 201 hi(2) = (jj+1)*BLOCK_DIM 202c write(6,'(a,i4,a,i4,a,i4,a,i4,a,i4)') 'Accessing block (', 203c + lo(1),',',hi(1),') (',lo(2),',',hi(2),') on process ',n 204c 205c Copy contents of GA to local buffer 206c 207 tst = ga_wtime() 208 call nga_get(g_a,lo,hi,buf,ld) 209 t_get = t_get + ga_wtime() - tst 210 i_get = i_get + 1 211c 212c check buffer for correct values 213c 214 ok2 = .true. 215 do j = lo(2), hi(2) 216 jj = j-lo(2)+1 217 do i = lo(1), hi(1) 218 ii = i-lo(1)+1 219 if (buf(ii,jj).ne.(j-1)*dims(1)+(i-1)) ok2 = .false. 220 end do 221 end do 222 if (.not.ok2) then 223 write(6,'(a,i4)') 'Error found in GET test on process ',n 224 ok = .false. 225 endif 226 end do 227 if (ok) then 228 if (me.eq.0) then 229 write(6,*) 230 write(6,'(a)') 'OK for simple GET test' 231 write(6,*) 232 endif 233 endif 234 endif 235c 236c Double all values in the GA using accumulate 237c 238 if (me.eq.0) then 239c 240c Loop over all processors 241c 242 do n = 0, nproc-1 243c 244c Calculate process grid coordinates 245c 246 ii = mod(n,pdims(1)) 247 jj = (n-ii)/pdims(1) 248c 249c Calculate lower and upper indices of the block held by process n 250c 251 lo(1) = ii*BLOCK_DIM + 1 252 lo(2) = jj*BLOCK_DIM + 1 253 hi(1) = (ii+1)*BLOCK_DIM 254 hi(2) = (jj+1)*BLOCK_DIM 255c 256c Fill buffer with correct values 257c 258 do j = lo(2), hi(2) 259 jj = j-lo(2)+1 260 do i = lo(1), hi(1) 261 ii = i-lo(1)+1 262 buf(ii,jj) = (j-1)*dims(1)+(i-1) 263 end do 264 end do 265c 266c Accumulate buffer to GA 267c 268 tst = ga_wtime() 269 call nga_acc(g_a,lo,hi,buf,ld,1) 270 t_acc = t_acc + ga_wtime() - tst 271 i_acc = i_acc + 1 272 end do 273 endif 274 call ga_sync 275c 276c Check values on each process for correctness 277c 278 ok = .true. 279 call nga_distribution(g_a,me,lo,hi) 280 call nga_access(g_a,lo,hi,g_ptr,ld) 281 ioff = 0 282 do j = lo(2), hi(2) 283 do i = lo(1), hi(1) 284 n = int_mb(g_ptr+ioff) 285 if (n.ne.2*((j-1)*dims(1)+(i-1))) ok = .false. 286 ioff = ioff+1 287 end do 288 end do 289 call nga_release(g_a, lo, hi) 290c 291 if (ok) then 292 if (me.eq.0) then 293 write(6,*) 294 write(6,'(a)') 'OK for simple ACC test' 295 write(6,*) 296 endif 297 else 298 write(6,'(a,i4)') 'Error found in ACC test on process ',me 299 endif 300c 301c Redo test using non-blocking puts, gets and accumulates 302c 303 call ga_zero(g_a) 304 if (me.eq.0) then 305c 306c Initialize puts by doing calculation for process 0 307c 308 tst = ga_wtime() 309 lo(1) = 1 310 lo(2) = 1 311 hi(1) = BLOCK_DIM 312 hi(2) = BLOCK_DIM 313c 314c Fill first buffer with data 315c 316 do j = lo(2), hi(2) 317 jj = j-lo(2)+1 318 do i = lo(1), hi(1) 319 ii = i-lo(1)+1 320 nbbuf(ii,jj,1) = (j-1)*dims(1)+(i-1) 321 end do 322 end do 323 t_putw = t_putw + ga_wtime() - tst 324 i_putw = i_putw + 1 325c 326c Start the first put 327c 328 tst = ga_wtime() 329 call nga_nbput(g_a,lo,hi,nbbuf(1,1,1),ld,nbhdl(1)) 330 t_nbput = t_nbput + ga_wtime() - tst 331 i_nbput = i_nbput + 1 332c 333c Loop over all processors 334c 335 do n = 1, nproc-1 336c 337c Calculate process grid coordinates 338c 339 ii = mod(n,pdims(1)) 340 jj = (n-ii)/pdims(1) 341c 342c Calculate lower and upper indices of the block held by process n 343c 344 tst = ga_wtime() 345 lo(1) = ii*BLOCK_DIM + 1 346 lo(2) = jj*BLOCK_DIM + 1 347 hi(1) = (ii+1)*BLOCK_DIM 348 hi(2) = (jj+1)*BLOCK_DIM 349c 350c Fill buffer with correct values 351c 352 ibuf = mod(n,2)+1 353 do j = lo(2), hi(2) 354 jj = j-lo(2)+1 355 do i = lo(1), hi(1) 356 ii = i-lo(1)+1 357 nbbuf(ii,jj,ibuf) = (j-1)*dims(1)+(i-1) 358 end do 359 end do 360 t_putw = t_putw + ga_wtime() - tst 361 i_putw = i_putw + 1 362c 363c Copy buffer to GA 364c 365 tst = ga_wtime() 366 call nga_nbput(g_a,lo,hi,nbbuf(1,1,ibuf),ld,nbhdl(ibuf)) 367 t_nbput = t_nbput + ga_wtime() - tst 368 i_nbput = i_nbput + 1 369 tst = ga_wtime() 370 call nga_nbwait(nbhdl(mod(n-1,2)+1)) 371 t_wput = t_wput + ga_wtime() - tst 372 end do 373 tst = ga_wtime() 374 call nga_nbwait(nbhdl(mod(nproc-1,2)+1)) 375 t_wput = t_wput + ga_wtime() - tst 376 endif 377 call ga_sync 378c 379c Check values on each process for correctness 380c 381 ok = .true. 382 call nga_distribution(g_a,me,lo,hi) 383 call nga_access(g_a,lo,hi,g_ptr,ld) 384 ioff = 0 385 do j = lo(2), hi(2) 386 do i = lo(1), hi(1) 387 n = int_mb(g_ptr+ioff) 388 if (n.ne.(j-1)*dims(1)+(i-1)) ok = .false. 389 ioff = ioff+1 390 end do 391 end do 392 call nga_release(g_a, lo, hi) 393c 394 if (ok) then 395 if (me.eq.0) then 396 write(6,*) 397 write(6,'(a)') 'OK for Non-Blocking PUT test' 398 write(6,*) 399 endif 400 else 401 write(6,'(a,i4)') 402 + 'Error found in Non-Blocking PUT test on process ',me 403 endif 404c 405c Collect all values on process 0 from GA using non-blocking calls 406c 407 ok = .true. 408 if (me.eq.0) then 409c 410c Start with get on process 0 411c 412 lo(1) = 1 413 lo(2) = 1 414 hi(1) = BLOCK_DIM 415 hi(2) = BLOCK_DIM 416c 417c Fill first buffer with data 418c 419 do j = lo(2), hi(2) 420 jj = j-lo(2)+1 421 do i = lo(1), hi(1) 422 ii = i-lo(1)+1 423 nbbuf(ii,jj,1) = (j-1)*dims(1)+(i-1) 424 end do 425 end do 426c 427c Start the first get 428c 429 tst = ga_wtime() 430 call nga_nbget(g_a,lo,hi,nbbuf(1,1,1),ld,nbhdl(1)) 431 t_nbget = t_nbget + ga_wtime() - tst 432 i_nbget = i_nbget + 1 433 do n = 1, nproc-1 434c 435c Calculate process grid coordinates 436c 437 ii = mod(n,pdims(1)) 438 jj = (n-ii)/pdims(1) 439c 440c Calculate lower and upper indices of the block held by process n 441c 442 lo(1) = ii*BLOCK_DIM + 1 443 lo(2) = jj*BLOCK_DIM + 1 444 hi(1) = (ii+1)*BLOCK_DIM 445 hi(2) = (jj+1)*BLOCK_DIM 446c write(6,'(a,i4,a,i4,a,i4,a,i4,a,i4)') 'Accessing block (', 447c + lo(1),',',hi(1),') (',lo(2),',',hi(2),') on process ',n 448c 449c Copy contents of GA to local buffer 450c 451 ibuf = mod(n,2)+1 452 tst = ga_wtime() 453 call nga_nbget(g_a,lo,hi,nbbuf(1,1,ibuf),ld,nbhdl(ibuf)) 454 t_nbget = t_nbget + ga_wtime() - tst 455 i_nbget = i_nbget + 1 456 tst = ga_wtime() 457 call nga_nbwait(nbhdl(mod(n-1,2)+1)) 458 t_wget = t_wget + ga_wtime() - tst 459c 460c check buffer for correct values 461c 462 tst = ga_wtime() 463 ok2 = .true. 464 ibuf = mod(n-1,2)+1 465 ii = mod(n-1,pdims(1)) 466 jj = (n-1-ii)/pdims(1) 467 lo(1) = ii*BLOCK_DIM + 1 468 lo(2) = jj*BLOCK_DIM + 1 469 hi(1) = (ii+1)*BLOCK_DIM 470 hi(2) = (jj+1)*BLOCK_DIM 471 do j = lo(2), hi(2) 472 jj = j-lo(2)+1 473 do i = lo(1), hi(1) 474 ii = i-lo(1)+1 475 if (nbbuf(ii,jj,ibuf).ne.(j-1)*dims(1)+(i-1)) 476 + ok2 = .false. 477 end do 478 end do 479 if (.not.ok2) then 480 write(6,'(a,i4)') 'Error found in GET test on process ',n 481 ok = .false. 482 endif 483 t_getw = t_getw + ga_wtime() - tst 484 i_getw = i_getw + 1 485 end do 486 tst = ga_wtime() 487 call nga_nbwait(nbhdl(mod(n-1,2)+1)) 488 t_wget = t_wget + ga_wtime() - tst 489 tst = ga_wtime() 490 ok2 = .true. 491 ibuf = mod(nproc-1,2)+1 492 ii = mod(n-1,pdims(1)) 493 jj = (n-1-ii)/pdims(1) 494 lo(1) = ii*BLOCK_DIM + 1 495 lo(2) = jj*BLOCK_DIM + 1 496 hi(1) = (ii+1)*BLOCK_DIM 497 hi(2) = (jj+1)*BLOCK_DIM 498 do j = lo(2), hi(2) 499 jj = j-lo(2)+1 500 do i = lo(1), hi(1) 501 ii = i-lo(1)+1 502 if (nbbuf(ii,jj,ibuf).ne.(j-1)*dims(1)+(i-1)) ok2 = .false. 503 end do 504 end do 505 if (.not.ok2) then 506 write(6,'(a,i4)') 'Error found in GET test on process ',n 507 ok = .false. 508 endif 509 if (ok) then 510 if (me.eq.0) then 511 write(6,*) 512 write(6,'(a)') 'OK for Non-Blocking GET test' 513 write(6,*) 514 endif 515 endif 516 t_getw = t_getw + ga_wtime() - tst 517 i_getw = i_getw + 1 518 endif 519c 520c Double all values in the GA using accumulate 521c 522 if (me.eq.0) then 523c 524c Initialize puts by doing calculation for process 0 525c 526 tst = ga_wtime() 527 lo(1) = 1 528 lo(2) = 1 529 hi(1) = BLOCK_DIM 530 hi(2) = BLOCK_DIM 531c 532c Fill first buffer with data 533c 534 do j = lo(2), hi(2) 535 jj = j-lo(2)+1 536 do i = lo(1), hi(1) 537 ii = i-lo(1)+1 538 nbbuf(ii,jj,1) = (j-1)*dims(1)+(i-1) 539 end do 540 end do 541 t_accw = t_accw + ga_wtime() - tst 542 i_accw = i_accw + 1 543c 544c Start the first put 545c 546 tst = ga_wtime() 547 call nga_nbacc(g_a,lo,hi,nbbuf(1,1,1),ld,1,nbhdl(1)) 548 t_nbacc = t_nbacc + ga_wtime() - tst 549 i_nbacc = i_nbacc + 1 550c 551c Loop over all processors 552c 553 do n = 1, nproc-1 554c 555c Calculate process grid coordinates 556c 557 ii = mod(n,pdims(1)) 558 jj = (n-ii)/pdims(1) 559c 560c Calculate lower and upper indices of the block held by process n 561c 562 tst = ga_wtime() 563 lo(1) = ii*BLOCK_DIM + 1 564 lo(2) = jj*BLOCK_DIM + 1 565 hi(1) = (ii+1)*BLOCK_DIM 566 hi(2) = (jj+1)*BLOCK_DIM 567c 568c Fill buffer with correct values 569c 570 ibuf = mod(n,2)+1 571 do j = lo(2), hi(2) 572 jj = j-lo(2)+1 573 do i = lo(1), hi(1) 574 ii = i-lo(1)+1 575 nbbuf(ii,jj,ibuf) = (j-1)*dims(1)+(i-1) 576 end do 577 end do 578 t_accw = t_accw + ga_wtime() - tst 579 i_accw = i_accw + 1 580c 581c Copy buffer to GA 582c 583 tst = ga_wtime() 584 call nga_nbacc(g_a,lo,hi,nbbuf(1,1,ibuf),ld,1,nbhdl(ibuf)) 585 t_nbacc = t_nbacc + ga_wtime() - tst 586 i_nbacc = i_nbacc + 1 587 tst = ga_wtime() 588 call nga_nbwait(nbhdl(mod(n-1,2)+1)) 589 t_wacc = t_wacc + ga_wtime() - tst 590 end do 591 tst = ga_wtime() 592 call nga_nbwait(nbhdl(mod(nproc-1,2)+1)) 593 t_wacc = t_wacc + ga_wtime() - tst 594 endif 595 call ga_sync 596c 597c Check values on each process for correctness 598c 599 ok = .true. 600 call nga_distribution(g_a,me,lo,hi) 601 call nga_access(g_a,lo,hi,g_ptr,ld) 602 ioff = 0 603 do j = lo(2), hi(2) 604 do i = lo(1), hi(1) 605 n = int_mb(g_ptr+ioff) 606 if (n.ne.2*((j-1)*dims(1)+(i-1))) ok = .false. 607 ioff = ioff+1 608 end do 609 end do 610 call nga_release(g_a, lo, hi) 611c 612 if (ok) then 613 if (me.eq.0) then 614 write(6,*) 615 write(6,'(a)') 'OK for Non-Blocking ACC test' 616 write(6,*) 617 endif 618 else 619 write(6,'(a,i4)') 620 + 'Error found in Non-Blocking ACC test on process ',me 621 endif 622c 623c Print out timing numbers. Start by summing timers over all 624c processors 625c 626 call ga_igop(1,i_put,1,'+') 627 call ga_igop(2,i_get,1,'+') 628 call ga_igop(3,i_acc,1,'+') 629 call ga_igop(4,i_nbput,1,'+') 630 call ga_igop(5,i_nbget,1,'+') 631 call ga_igop(6,i_nbacc,1,'+') 632 call ga_igop(7,i_putw,1,'+') 633 call ga_igop(8,i_getw,1,'+') 634 call ga_igop(9,i_accw,1,'+') 635 call ga_dgop(1,t_put,1,'+') 636 call ga_dgop(2,t_get,1,'+') 637 call ga_dgop(3,t_acc,1,'+') 638 call ga_dgop(4,t_nbput,1,'+') 639 call ga_dgop(5,t_nbget,1,'+') 640 call ga_dgop(6,t_nbacc,1,'+') 641 call ga_dgop(7,t_wput,1,'+') 642 call ga_dgop(8,t_wget,1,'+') 643 call ga_dgop(9,t_wacc,1,'+') 644 call ga_dgop(1,t_putw,1,'+') 645 call ga_dgop(2,t_getw,1,'+') 646 call ga_dgop(3,t_accw,1,'+') 647 if (me.eq.0) then 648 write(6,'(a,f16.8)') 'Average time for blocking PUT: ', 649 + t_put/dble(i_put) 650 write(6,'(a,f16.8)') 'Average time for blocking GET: ', 651 + t_get/dble(i_get) 652 write(6,'(a,f16.8)') 'Average time for blocking ACC: ', 653 + t_acc/dble(i_acc) 654 write(6,'(a,f16.8)') 'Average time for non-blocking PUT: ', 655 + t_nbput/dble(i_nbput) 656 write(6,'(a,f16.8)') 'Average time for non-blocking GET: ', 657 + t_nbget/dble(i_nbget) 658 write(6,'(a,f16.8)') 'Average time for non-blocking ACC: ', 659 + t_nbacc/dble(i_nbacc) 660 write(6,'(a,f16.8)') 'Average time for wait on PUT: ', 661 + t_wput/dble(i_nbput) 662 write(6,'(a,f16.8)') 'Average time for wait on GET: ', 663 + t_wget/dble(i_nbget) 664 write(6,'(a,f16.8)') 'Average time for wait on ACC: ', 665 + t_wacc/dble(i_nbacc) 666 write(6,'(a,f16.8)') 'Aggregate time on non-blocking PUT: ', 667 + (t_nbput+t_wput)/dble(i_nbput) 668 write(6,'(a,f16.8)') 'Aggregate time on non-blocking GET: ', 669 + (t_nbget+t_wget)/dble(i_nbget) 670 write(6,'(a,f16.8)') 'Aggregate time on non-blocking ACC: ', 671 + (t_nbacc+t_wacc)/dble(i_nbacc) 672 write(6,'(a,f16.8)') 'Average time in PUT work: ', 673 + t_putw/dble(i_putw) 674 write(6,'(a,f16.8)') 'Average time in GET work: ', 675 + t_getw/dble(i_getw) 676 write(6,'(a,f16.8)') 'Average time in ACC work: ', 677 + t_accw/dble(i_accw) 678 endif 679 680c 681c*** Tidy up the GA package 682c 683 call ga_terminate() 684c 685c*** Tidy up after message-passing library 686c 687 call MP_FINALIZE() 688c 689 end 690c 691c Obtain a 2x2 processor grid for p processors 692c 693 subroutine factor(p,dims) 694 implicit none 695 integer i,j,p,dims(2),imin,mdim 696 integer ip,ifac,pmax,prime(1000) 697 integer fac(1000) 698c 699 i = 1 700 ip = p 701 do i = 1, 2 702 dims(i) = 1 703 end do 704c 705c factor p completely 706c first, find all prime numbers less than or equal to p 707c 708 pmax = 0 709 do i = 2, p 710 do j = 1, pmax 711 if (mod(i,prime(j)).eq.0) go to 100 712 end do 713 pmax = pmax + 1 714 prime(pmax) = i 715 100 continue 716 end do 717c 718c find all prime factors of p 719c 720 ifac = 0 721 do i = 1, pmax 722 200 if (mod(ip,prime(i)).eq.0) then 723 ifac = ifac + 1 724 fac(ifac) = prime(i) 725 ip = ip/prime(i) 726 go to 200 727 endif 728 end do 729c 730c determine dimensions of processor grid 731c 732 do i = ifac, 1, -1 733c 734c find dimension with minimum value 735c 736 imin = dims(1) 737 mdim = 1 738 if (dims(2).lt.imin) then 739 imin = dims(2) 740 mdim = 2 741 endif 742 dims(mdim) = dims(mdim)*fac(i) 743 end do 744c 745 return 746 end 747 748