1 #pragma once
2 
3 #include <cstdlib>
4 #include "anyode/anyode_parallel.hpp"
5 #include "gsl_odeiv2_anyode.hpp"
6 
7 namespace gsl_odeiv2_anyode_parallel {
8 
9     using gsl_odeiv2_cxx::StepType;
10     using gsl_odeiv2_anyode::simple_adaptive;
11     using gsl_odeiv2_anyode::simple_predefined;
12 
13     using sa_t = std::pair<std::vector<double>, std::vector<double> >;
14 
15     template <class OdeSys>
16     std::vector<sa_t>
multi_adaptive(std::vector<OdeSys * > odesys,const double atol,const double rtol,const StepType styp,const double * const y0,const double * t0,const double * tend,const long int mxsteps,const double * dx0,const double * dx_min,const double * dx_max,int autorestart=0,bool return_on_error=false)17     multi_adaptive(std::vector<OdeSys *> odesys, // vectorized
18                    const double atol,
19                    const double rtol,
20                    const StepType styp,
21                    const double * const y0,  // vectorized
22                    const double * t0,  // vectorized
23                    const double * tend,  // vectorized
24                    const long int mxsteps,
25                    const double * dx0,  // vectorized
26                    const double * dx_min,  // vectorized
27                    const double * dx_max,  // vectorized
28                    int autorestart=0,
29                    bool return_on_error=false
30                    ){
31         const int ny = odesys[0]->get_ny();
32         const int nsys = odesys.size();
33         auto results = std::vector<sa_t>(nsys);
34 
35         anyode_parallel::ThreadException te;
36         char * num_threads_var = std::getenv("ANYODE_NUM_THREADS");
37         int nt = (num_threads_var) ? std::atoi(num_threads_var) : 1;
38         if (nt < 0)
39             nt = 1;
40         #pragma omp parallel for num_threads(nt) // OMP_NUM_THREADS should be 1 for openblas LU (small matrices)
41         for (int idx=0; idx<nsys; ++idx){
42             sa_t local_result;
43             te.run([&]{
44                 local_result = simple_adaptive<OdeSys>(
45                     odesys[idx], atol, rtol, styp, y0 + idx*ny, t0[idx], tend[idx],
46                     mxsteps, dx0[idx], dx_min[idx], dx_max[idx], autorestart, return_on_error);
47             });
48             results[idx] = local_result;
49         }
50         te.rethrow();
51 
52         return results;
53     }
54 
55     template <class OdeSys>
56     std::vector<int>
multi_predefined(std::vector<OdeSys * > odesys,const double atol,const double rtol,const StepType styp,const double * const y0,const std::size_t nout,const double * const tout,double * const yout,const long int mxsteps,const double * dx0,const double * dx_min,const double * dx_max,int autorestart=0,bool return_on_error=false)57     multi_predefined(std::vector<OdeSys *> odesys,  // vectorized
58                      const double atol,
59                      const double rtol,
60                      const StepType styp,
61                      const double * const y0, // vectorized
62                      const std::size_t nout,
63                      const double * const tout, // vectorized
64                      double * const yout,  // vectorized
65                      const long int mxsteps,
66                      const double * dx0,  // vectorized
67                      const double * dx_min,  // vectorized
68                      const double * dx_max,
69                      int autorestart=0,
70                      bool return_on_error=false
71                      ){
72         const int ny = odesys[0]->get_ny();
73         const int nsys = odesys.size();
74         std::vector<int> result(nsys);
75 
76         anyode_parallel::ThreadException te;
77         #pragma omp parallel for
78         for (int idx=0; idx<nsys; ++idx){
79             te.run([&]{
80                 result[idx] = simple_predefined<OdeSys>(
81                     odesys[idx], atol, rtol, styp, y0 + idx*ny,
82                     nout, tout + idx*nout, yout + idx*ny*nout,
83                     mxsteps, dx0[idx], dx_min[idx], dx_max[idx],
84                     autorestart, return_on_error);
85             });
86         }
87         te.rethrow();
88         return result;
89     }
90 
91 }
92