1! { dg-do compile } 2! { dg-options "-fcoarray=lib" } 3! 4! PR fortran/64771 5! 6! Contributed by Alessandro Fanfarill 7! 8! Reduced version of the full NAS CG benchmark 9! 10 11!-------------------------------------------------------------------------! 12! ! 13! N A S P A R A L L E L B E N C H M A R K S 3.3 ! 14! ! 15! C G ! 16! ! 17!-------------------------------------------------------------------------! 18! ! 19! This benchmark is part of the NAS Parallel Benchmark 3.3 suite. ! 20! It is described in NAS Technical Reports 95-020 and 02-007 ! 21! ! 22! Permission to use, copy, distribute and modify this software ! 23! for any purpose with or without fee is hereby granted. We ! 24! request, however, that all derived work reference the NAS ! 25! Parallel Benchmarks 3.3. This software is provided "as is" ! 26! without express or implied warranty. ! 27! ! 28! Information on NPB 3.3, including the technical report, the ! 29! original specifications, source code, results and information ! 30! on how to submit new results, is available at: ! 31! ! 32! http://www.nas.nasa.gov/Software/NPB/ ! 33! ! 34! Send comments or suggestions to npb@nas.nasa.gov ! 35! ! 36! NAS Parallel Benchmarks Group ! 37! NASA Ames Research Center ! 38! Mail Stop: T27A-1 ! 39! Moffett Field, CA 94035-1000 ! 40! ! 41! E-mail: npb@nas.nasa.gov ! 42! Fax: (650) 604-3957 ! 43! ! 44!-------------------------------------------------------------------------! 45 46 47c--------------------------------------------------------------------- 48c 49c Authors: M. Yarrow 50c C. Kuszmaul 51c R. F. Van der Wijngaart 52c H. Jin 53c 54c--------------------------------------------------------------------- 55 56 57c--------------------------------------------------------------------- 58c--------------------------------------------------------------------- 59 program cg 60c--------------------------------------------------------------------- 61c--------------------------------------------------------------------- 62 implicit none 63 64 integer na, nonzer, niter 65 double precision shift, rcond 66 parameter( na=75000, 67 > nonzer=13, 68 > niter=75, 69 > shift=60., 70 > rcond=1.0d-1 ) 71 72 73 74 integer num_proc_rows, num_proc_cols 75 parameter( num_proc_rows = 2, num_proc_cols = 2) 76 integer num_procs 77 parameter( num_procs = num_proc_cols * num_proc_rows ) 78 79 integer nz 80 parameter( nz = na*(nonzer+1)/num_procs*(nonzer+1)+nonzer 81 > + na*(nonzer+2+num_procs/256)/num_proc_cols ) 82 83 common / partit_size / naa, nzz, 84 > npcols, nprows, 85 > proc_col, proc_row, 86 > firstrow, 87 > lastrow, 88 > firstcol, 89 > lastcol, 90 > exch_proc, 91 > exch_recv_length, 92 > send_start, 93 > send_len 94 integer naa, nzz, 95 > npcols, nprows, 96 > proc_col, proc_row, 97 > firstrow, 98 > lastrow, 99 > firstcol, 100 > lastcol, 101 > exch_proc, 102 > exch_recv_length, 103 > send_start, 104 > send_len 105 106 107 common / main_int_mem / colidx, rowstr, 108 > iv, arow, acol 109 integer colidx(nz), rowstr(na+1), 110 > iv(2*na+1), arow(nz), acol(nz) 111 112 113c--------------------------------- 114c Coarray Decalarations 115c--------------------------------- 116 double precision v(na+1)[0:*], aelt(nz)[0:*], a(nz)[0:*], 117 > x(na/num_proc_rows+2)[0:*], 118 > z(na/num_proc_rows+2)[0:*], 119 > p(na/num_proc_rows+2)[0:*], 120 > q(na/num_proc_rows+2)[0:*], 121 > r(na/num_proc_rows+2)[0:*], 122 > w(na/num_proc_rows+2)[0:*] 123 124 125 common /urando/ amult, tran 126 double precision amult, tran 127 128 129 130 integer l2npcols 131 integer reduce_exch_proc(num_proc_cols) 132 integer reduce_send_starts(num_proc_cols) 133 integer reduce_send_lengths(num_proc_cols) 134 integer reduce_recv_lengths(num_proc_cols) 135 integer reduce_rrecv_starts(num_proc_cols) 136c--------------------------------- 137c Coarray Decalarations 138c--------------------------------- 139 integer reduce_recv_starts(num_proc_cols)[0:*] 140 141 integer i, j, k, it, me, nprocs, root 142 143 double precision zeta, randlc 144 external randlc 145 double precision rnorm 146c--------------------------------- 147c Coarray Decalarations 148c--------------------------------- 149 double precision norm_temp1(2)[0:*], norm_temp2(2)[0:*] 150 151 double precision t, tmax, mflops 152 double precision u(1), umax(1) 153 external timer_read 154 double precision timer_read 155 character class 156 logical verified 157 double precision zeta_verify_value, epsilon, err 158 159c--------------------------------------------------------------------- 160c Explicit interface for conj_grad, due to coarray args 161c--------------------------------------------------------------------- 162 interface 163 164 subroutine conj_grad ( colidx, 165 > rowstr, 166 > x, 167 > z, 168 > a, 169 > p, 170 > q, 171 > r, 172 > w, 173 > rnorm, 174 > l2npcols, 175 > reduce_exch_proc, 176 > reduce_send_starts, 177 > reduce_send_lengths, 178 > reduce_recv_starts, 179 > reduce_recv_lengths, 180 > reduce_rrecv_starts ) 181 182 common / partit_size / naa, nzz, 183 > npcols, nprows, 184 > proc_col, proc_row, 185 > firstrow, 186 > lastrow, 187 > firstcol, 188 > lastcol, 189 > exch_proc, 190 > exch_recv_length, 191 > send_start, 192 > send_len 193 194 integer naa, nzz, 195 > npcols, nprows, 196 > proc_col, proc_row, 197 > firstrow, 198 > lastrow, 199 > firstcol, 200 > lastcol, 201 > exch_proc, 202 > exch_recv_length, 203 > send_start, 204 > send_len 205 206 double precision x(*), 207 > z(*), 208 > a(nzz) 209 integer colidx(nzz), rowstr(naa+1) 210 211 double precision p(*), 212 > q(*)[0:*], 213 > r(*)[0:*], 214 > w(*)[0:*] ! used as work temporary 215 216 integer l2npcols 217 integer reduce_exch_proc(l2npcols) 218 integer reduce_send_starts(l2npcols) 219 integer reduce_send_lengths(l2npcols) 220 integer reduce_recv_starts(l2npcols)[0:*] 221 integer reduce_recv_lengths(l2npcols) 222 integer reduce_rrecv_starts(l2npcols) 223 224 double precision rnorm 225 226 end subroutine 227 228 end interface 229 230c--------------------------------------------------------------------- 231c The call to the conjugate gradient routine: 232c--------------------------------------------------------------------- 233 call conj_grad ( colidx, 234 > rowstr, 235 > x, 236 > z, 237 > a, 238 > p, 239 > q, 240 > r, 241 > w, 242 > rnorm, 243 > l2npcols, 244 > reduce_exch_proc, 245 > reduce_send_starts, 246 > reduce_send_lengths, 247 > reduce_recv_starts, 248 > reduce_recv_lengths, 249 > reduce_rrecv_starts ) 250 251 252 sync all 253 254 end ! end main 255 256c--------------------------------------------------------------------- 257c--------------------------------------------------------------------- 258 subroutine conj_grad ( colidx, 259 > rowstr, 260 > x, 261 > z, 262 > a, 263 > p, 264 > q, 265 > r, 266 > w, 267 > rnorm, 268 > l2npcols, 269 > reduce_exch_proc, 270 > reduce_send_starts, 271 > reduce_send_lengths, 272 > reduce_recv_starts, 273 > reduce_recv_lengths, 274 > reduce_rrecv_starts ) 275c--------------------------------------------------------------------- 276c--------------------------------------------------------------------- 277 278c--------------------------------------------------------------------- 279c Floaging point arrays here are named as in NPB1 spec discussion of 280c CG algorithm 281c--------------------------------------------------------------------- 282 283 implicit none 284 285c include 'cafnpb.h' 286 287 common / partit_size / naa, nzz, 288 > npcols, nprows, 289 > proc_col, proc_row, 290 > firstrow, 291 > lastrow, 292 > firstcol, 293 > lastcol, 294 > exch_proc, 295 > exch_recv_length, 296 > send_start, 297 > send_len 298 integer naa, nzz, 299 > npcols, nprows, 300 > proc_col, proc_row, 301 > firstrow, 302 > lastrow, 303 > firstcol, 304 > lastcol, 305 > exch_proc, 306 > exch_recv_length, 307 > send_start, 308 > send_len 309 310 311 312 double precision x(*), 313 > z(*), 314 > a(nzz) 315 integer colidx(nzz), rowstr(naa+1) 316 317 double precision p(*), 318 > q(*)[0:*], 319 > r(*)[0:*], 320 > w(*)[0:*] ! used as work temporary 321 322 integer l2npcols 323 integer reduce_exch_proc(l2npcols) 324 integer reduce_send_starts(l2npcols) 325 integer reduce_send_lengths(l2npcols) 326 integer reduce_recv_starts(l2npcols)[0:*] 327 integer reduce_recv_lengths(l2npcols) 328 integer reduce_rrecv_starts(l2npcols) 329 330 integer recv_start_idx, recv_end_idx, send_start_idx, 331 > send_end_idx, recv_length 332 333 integer i, j, k, ierr 334 integer cgit, cgitmax 335 336 double precision, save :: d[0:*], rho[0:*] 337 double precision sum, rho0, alpha, beta, rnorm 338 339 external timer_read 340 double precision timer_read 341 342 data cgitmax / 25 / 343 344 345 return 346 end ! end of routine conj_grad 347 348