1 #ifndef KASTORS_POISSON_H
2 #define KASTORS_POISSON_H
3 #include <cassert>
4 #include <string>
5 
6 /******************************************************************************/
7 /*
8   Purpose:
9 
10   SWEEP carries out one step of the Jacobi iteration.
11 
12   Discussion:
13 
14   Assuming DX = DY, we can approximate
15 
16   - (d/dx d/dx + d/dy d/dy) U(X,Y)
17 
18   by
19 
20   (U(i-1,j) + U(i+1,j) + U(i,j-1) + U(i,j+1) - 4*U(i,j)) / dx / dy
21 
22   The discretization employed below will not be correct in the general
23   case where DX and DY are not equal.  It's only a little more complicated
24   to allow DX and DY to be different, but we're not going to worry about
25   that right now.
26 
27   Licensing:
28 
29   This code is distributed under the GNU LGPL license.
30 
31   Modified:
32 
33   14 December 2011
34 
35   Author:
36 
37   John Burkardt
38 
39   Parameters:
40 
41   Input, int NX, NY, the X and Y grid dimensions.
42 
43   Input, double DX, DY, the spacing between grid points.
44 
45   Input, double F[NX][NY], the right hand side data.
46 
47   Input, int ITOLD, the iteration index on input.
48 
49   Input, int ITNEW, the desired iteration index
50   on output.
51 
52   Input, double U[NX][NY], the solution estimate on
53   iteration ITNEW-1.
54 
55   Input/output, double UNEW[NX][NY], on input, the solution
56   estimate on iteration ITOLD.  On output, the solution estimate on
57   iteration ITNEW.
58 */
59 void sweep(int nx, int ny, double dx, double dy, double *f,
60      int itold, int itnew, double *u, double *unew, int block_size);
61 void sweep_seq(int nx, int ny, double dx, double dy, double *f,
62      int itold, int itnew, double *u, double *unew);
63 
64 
65 struct user_parameters {
66   int check;
67   int succeed;
68   int niter;
69   int titer;
70   int matrix_size;
71   int blocksize;
72 };
73 
74 double run(struct user_parameters* params, unsigned num_threads, std::string model);
75 
76 
77 //////////////////////////////////////////////////////////////////////////
78 void omp_block_for(int nx, int ny, double dx, double dy, double *f,
79      int itold, int itnew, double *u, double *unew, int block_size);
80 void omp_block_task(int nx, int ny, double dx, double dy, double *f,
81      int itold, int itnew, double *u, double *unew, int block_size);
82 void omp_block_task_dep(int nx, int ny, double dx, double dy, double *f,
83      int itold, int itnew, double *u, double *unew, int block_size);
84 void omp_task(int nx, int ny, double dx, double dy, double *f,
85      int itold, int itnew, double *u, double *unew, int block_size);
86 void omp_task_dep(int nx, int ny, double dx, double dy, double *f,
87      int itold, int itnew, double *u, double *unew, int block_size);
88 //////////////////////////////////////////////////////////////////////////
89 void taskflow(int nx, int ny, double dx, double dy, double *f,
90      int itold, int itnew, double *u, double *unew, int block_size, unsigned num_threads);
91 //////////////////////////////////////////////////////////////////////////
92 
93 
copy_block(int nx,int ny,int block_x,int block_y,double * u_,double * unew_,int block_size)94 static inline void copy_block(int nx, int ny, int block_x, int block_y, double *u_, double *unew_, int block_size) {
95   int i, j, start_i, start_j;
96   double (*u)[nx][ny] = (double (*)[nx][ny])u_;
97   double (*unew)[nx][ny] = (double (*)[nx][ny])unew_;
98   start_i = block_x * block_size;
99   start_j = block_y * block_size;
100   for (i = start_i; i < start_i + block_size; i++) {
101     for (j = start_j; j < start_j + block_size; j++) {
102       assert((i < nx) && (j < ny));
103       (*u)[i][j] = (*unew)[i][j];
104     }
105   }
106 }
107 
compute_estimate(int block_x,int block_y,double * u_,double * unew_,double * f_,double dx,double dy,int nx,int ny,int block_size)108 static inline void compute_estimate(int block_x, int block_y, double *u_,
109                                     double *unew_, double *f_, double dx,
110                                     double dy, int nx, int ny, int block_size) {
111   int i, j, start_i, start_j;
112   double (*f)[nx][ny] = (double (*)[nx][ny])f_;
113   double (*u)[nx][ny] = (double (*)[nx][ny])u_;
114   double (*unew)[nx][ny] = (double (*)[nx][ny])unew_;
115   start_i = block_x * block_size;
116   start_j = block_y * block_size;
117   for (i = start_i; i < start_i + block_size; i++) {
118     for (j = start_j; j < start_j + block_size; j++) {
119       if (i == 0 || j == 0 || i == nx - 1 || j == ny - 1) {
120         (*unew)[i][j] = (*f)[i][j];
121       } else {
122         (*unew)[i][j] = 0.25 * ((*u)[i-1][j] + (*u)[i][j+1]
123             + (*u)[i][j-1] + (*u)[i+1][j]
124             + (*f)[i][j] * dx * dy);
125       }
126     }
127   }
128 }
129 
130 
sweep_seq(int nx,int ny,double dx,double dy,double * f_,int itold,int itnew,double * u_,double * unew_)131 inline void sweep_seq(int nx, int ny, double dx, double dy, double *f_,
132         int itold, int itnew, double *u_, double *unew_)
133 {
134   int i;
135   int it;
136   int j;
137   double (*f)[nx][ny] = (double (*)[nx][ny])f_;
138   double (*u)[nx][ny] = (double (*)[nx][ny])u_;
139   double (*unew)[nx][ny] = (double (*)[nx][ny])unew_;
140 
141   for (it = itold + 1; it <= itnew; it++) {
142     for (i = 0; i < nx; i++) {
143       for (j = 0; j < ny; j++) {
144         (*u)[i][j] = (*unew)[i][j];
145       }
146     }
147     for (i = 0; i < nx; i++) {
148       for (j = 0; j < ny; j++) {
149         if (i == 0 || j == 0 || i == nx - 1 || j == ny - 1) {
150           (*unew)[i][j] = (*f)[i][j];
151         }
152         else {
153           (*unew)[i][j] = 0.25 * ((*u)[i-1][j] + (*u)[i][j+1]
154               + (*u)[i][j-1] + (*u)[i+1][j]
155               + (*f)[i][j] * dx * dy);
156         }
157       }
158     }
159   }
160 }
161 
162 #endif
163