1! { dg-do compile }
2! { dg-options "-fcoarray=lib" }
3!
4! PR fortran/64771
5!
6! Contributed by Alessandro Fanfarill
7!
8! Reduced version of the full NAS CG benchmark
9!
10
11!-------------------------------------------------------------------------!
12!                                                                         !
13!        N  A  S     P A R A L L E L     B E N C H M A R K S  3.3         !
14!                                                                         !
15!                                   C G                                   !
16!                                                                         !
17!-------------------------------------------------------------------------!
18!                                                                         !
19!    This benchmark is part of the NAS Parallel Benchmark 3.3 suite.      !
20!    It is described in NAS Technical Reports 95-020 and 02-007           !
21!                                                                         !
22!    Permission to use, copy, distribute and modify this software         !
23!    for any purpose with or without fee is hereby granted.  We           !
24!    request, however, that all derived work reference the NAS            !
25!    Parallel Benchmarks 3.3. This software is provided "as is"           !
26!    without express or implied warranty.                                 !
27!                                                                         !
28!    Information on NPB 3.3, including the technical report, the          !
29!    original specifications, source code, results and information        !
30!    on how to submit new results, is available at:                       !
31!                                                                         !
32!           http://www.nas.nasa.gov/Software/NPB/                         !
33!                                                                         !
34!    Send comments or suggestions to  npb@nas.nasa.gov                    !
35!                                                                         !
36!          NAS Parallel Benchmarks Group                                  !
37!          NASA Ames Research Center                                      !
38!          Mail Stop: T27A-1                                              !
39!          Moffett Field, CA   94035-1000                                 !
40!                                                                         !
41!          E-mail:  npb@nas.nasa.gov                                      !
42!          Fax:     (650) 604-3957                                        !
43!                                                                         !
44!-------------------------------------------------------------------------!
45
46
47c---------------------------------------------------------------------
48c
49c Authors: M. Yarrow
50c          C. Kuszmaul
51c          R. F. Van der Wijngaart
52c          H. Jin
53c
54c---------------------------------------------------------------------
55
56
57c---------------------------------------------------------------------
58c---------------------------------------------------------------------
59      program cg
60c---------------------------------------------------------------------
61c---------------------------------------------------------------------
62      implicit none
63
64      integer            na, nonzer, niter
65      double precision   shift, rcond
66      parameter(  na=75000,
67     >     nonzer=13,
68     >     niter=75,
69     >     shift=60.,
70     >     rcond=1.0d-1 )
71
72
73
74      integer num_proc_rows, num_proc_cols
75      parameter( num_proc_rows = 2, num_proc_cols = 2)
76      integer    num_procs
77      parameter( num_procs = num_proc_cols * num_proc_rows )
78
79      integer    nz
80      parameter( nz = na*(nonzer+1)/num_procs*(nonzer+1)+nonzer
81     >              + na*(nonzer+2+num_procs/256)/num_proc_cols )
82
83      common / partit_size  /  naa, nzz,
84     >                         npcols, nprows,
85     >                         proc_col, proc_row,
86     >                         firstrow,
87     >                         lastrow,
88     >                         firstcol,
89     >                         lastcol,
90     >                         exch_proc,
91     >                         exch_recv_length,
92     >                         send_start,
93     >                         send_len
94      integer                  naa, nzz,
95     >                         npcols, nprows,
96     >                         proc_col, proc_row,
97     >                         firstrow,
98     >                         lastrow,
99     >                         firstcol,
100     >                         lastcol,
101     >                         exch_proc,
102     >                         exch_recv_length,
103     >                         send_start,
104     >                         send_len
105
106
107      common / main_int_mem /  colidx,     rowstr,
108     >                         iv,         arow,     acol
109      integer                  colidx(nz), rowstr(na+1),
110     >                         iv(2*na+1), arow(nz), acol(nz)
111
112
113c---------------------------------
114c     Coarray Decalarations
115c---------------------------------
116      double precision         v(na+1)[0:*], aelt(nz)[0:*], a(nz)[0:*],
117     >                         x(na/num_proc_rows+2)[0:*],
118     >                         z(na/num_proc_rows+2)[0:*],
119     >                         p(na/num_proc_rows+2)[0:*],
120     >                         q(na/num_proc_rows+2)[0:*],
121     >                         r(na/num_proc_rows+2)[0:*],
122     >                         w(na/num_proc_rows+2)[0:*]
123
124
125      common /urando/          amult, tran
126      double precision         amult, tran
127
128
129
130      integer            l2npcols
131      integer            reduce_exch_proc(num_proc_cols)
132      integer            reduce_send_starts(num_proc_cols)
133      integer            reduce_send_lengths(num_proc_cols)
134      integer            reduce_recv_lengths(num_proc_cols)
135      integer            reduce_rrecv_starts(num_proc_cols)
136c---------------------------------
137c     Coarray Decalarations
138c---------------------------------
139      integer            reduce_recv_starts(num_proc_cols)[0:*]
140
141      integer            i, j, k, it, me, nprocs, root
142
143      double precision   zeta, randlc
144      external           randlc
145      double precision   rnorm
146c---------------------------------
147c     Coarray Decalarations
148c---------------------------------
149      double precision   norm_temp1(2)[0:*], norm_temp2(2)[0:*]
150
151      double precision   t, tmax, mflops
152      double precision   u(1), umax(1)
153      external           timer_read
154      double precision   timer_read
155      character          class
156      logical            verified
157      double precision   zeta_verify_value, epsilon, err
158
159c---------------------------------------------------------------------
160c  Explicit interface for conj_grad, due to coarray args
161c---------------------------------------------------------------------
162      interface
163
164      subroutine conj_grad ( colidx,
165     >                       rowstr,
166     >                       x,
167     >                       z,
168     >                       a,
169     >                       p,
170     >                       q,
171     >                       r,
172     >                       w,
173     >                       rnorm,
174     >                       l2npcols,
175     >                       reduce_exch_proc,
176     >                       reduce_send_starts,
177     >                       reduce_send_lengths,
178     >                       reduce_recv_starts,
179     >                       reduce_recv_lengths,
180     >                       reduce_rrecv_starts )
181
182      common / partit_size  /  naa, nzz,
183     >                         npcols, nprows,
184     >                         proc_col, proc_row,
185     >                         firstrow,
186     >                         lastrow,
187     >                         firstcol,
188     >                         lastcol,
189     >                         exch_proc,
190     >                         exch_recv_length,
191     >                         send_start,
192     >                         send_len
193
194      integer                  naa, nzz,
195     >                         npcols, nprows,
196     >                         proc_col, proc_row,
197     >                         firstrow,
198     >                         lastrow,
199     >                         firstcol,
200     >                         lastcol,
201     >                         exch_proc,
202     >                         exch_recv_length,
203     >                         send_start,
204     >                         send_len
205
206      double precision   x(*),
207     >                   z(*),
208     >                   a(nzz)
209      integer            colidx(nzz), rowstr(naa+1)
210
211      double precision   p(*),
212     >                   q(*)[0:*],
213     >                   r(*)[0:*],
214     >                   w(*)[0:*]        ! used as work temporary
215
216      integer   l2npcols
217      integer   reduce_exch_proc(l2npcols)
218      integer   reduce_send_starts(l2npcols)
219      integer   reduce_send_lengths(l2npcols)
220      integer   reduce_recv_starts(l2npcols)[0:*]
221      integer   reduce_recv_lengths(l2npcols)
222      integer   reduce_rrecv_starts(l2npcols)
223
224      double precision   rnorm
225
226      end subroutine
227
228      end interface
229
230c---------------------------------------------------------------------
231c  The call to the conjugate gradient routine:
232c---------------------------------------------------------------------
233         call conj_grad ( colidx,
234     >                    rowstr,
235     >                    x,
236     >                    z,
237     >                    a,
238     >                    p,
239     >                    q,
240     >                    r,
241     >                    w,
242     >                    rnorm,
243     >                    l2npcols,
244     >                    reduce_exch_proc,
245     >                    reduce_send_starts,
246     >                    reduce_send_lengths,
247     >                    reduce_recv_starts,
248     >                    reduce_recv_lengths,
249     >                    reduce_rrecv_starts )
250
251
252      sync all
253
254      end                              ! end main
255
256c---------------------------------------------------------------------
257c---------------------------------------------------------------------
258      subroutine conj_grad ( colidx,
259     >                       rowstr,
260     >                       x,
261     >                       z,
262     >                       a,
263     >                       p,
264     >                       q,
265     >                       r,
266     >                       w,
267     >                       rnorm,
268     >                       l2npcols,
269     >                       reduce_exch_proc,
270     >                       reduce_send_starts,
271     >                       reduce_send_lengths,
272     >                       reduce_recv_starts,
273     >                       reduce_recv_lengths,
274     >                       reduce_rrecv_starts )
275c---------------------------------------------------------------------
276c---------------------------------------------------------------------
277
278c---------------------------------------------------------------------
279c  Floaging point arrays here are named as in NPB1 spec discussion of
280c  CG algorithm
281c---------------------------------------------------------------------
282
283      implicit none
284
285c      include 'cafnpb.h'
286
287      common / partit_size  /  naa, nzz,
288     >                         npcols, nprows,
289     >                         proc_col, proc_row,
290     >                         firstrow,
291     >                         lastrow,
292     >                         firstcol,
293     >                         lastcol,
294     >                         exch_proc,
295     >                         exch_recv_length,
296     >                         send_start,
297     >                         send_len
298      integer                  naa, nzz,
299     >                         npcols, nprows,
300     >                         proc_col, proc_row,
301     >                         firstrow,
302     >                         lastrow,
303     >                         firstcol,
304     >                         lastcol,
305     >                         exch_proc,
306     >                         exch_recv_length,
307     >                         send_start,
308     >                         send_len
309
310
311
312      double precision   x(*),
313     >                   z(*),
314     >                   a(nzz)
315      integer            colidx(nzz), rowstr(naa+1)
316
317      double precision   p(*),
318     >                   q(*)[0:*],
319     >                   r(*)[0:*],
320     >                   w(*)[0:*]        ! used as work temporary
321
322      integer   l2npcols
323      integer   reduce_exch_proc(l2npcols)
324      integer   reduce_send_starts(l2npcols)
325      integer   reduce_send_lengths(l2npcols)
326      integer   reduce_recv_starts(l2npcols)[0:*]
327      integer   reduce_recv_lengths(l2npcols)
328      integer   reduce_rrecv_starts(l2npcols)
329
330      integer   recv_start_idx, recv_end_idx, send_start_idx,
331     >          send_end_idx, recv_length
332
333      integer   i, j, k, ierr
334      integer   cgit, cgitmax
335
336      double precision, save :: d[0:*], rho[0:*]
337      double precision   sum, rho0, alpha, beta, rnorm
338
339      external         timer_read
340      double precision timer_read
341
342      data      cgitmax / 25 /
343
344
345      return
346      end                       ! end of routine conj_grad
347
348