1 // rbOOmit: An implementation of the Certified Reduced Basis method.
2 // Copyright (C) 2009, 2010 David J. Knezevic
3 
4 // This file is part of rbOOmit.
5 
6 // rbOOmit is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU Lesser General Public
8 // License as published by the Free Software Foundation; either
9 // version 2.1 of the License, or (at your option) any later version.
10 
11 // rbOOmit is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 // Lesser General Public License for more details.
15 
16 // You should have received a copy of the GNU Lesser General Public
17 // License along with this library; if not, write to the Free Software
18 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19 
20 // rbOOmit includes
21 #include "libmesh/rb_eim_evaluation.h"
22 #include "libmesh/rb_eim_theta.h"
23 #include "libmesh/rb_parametrized_function.h"
24 #include "libmesh/utility.h" // Utility::mkdir
25 
26 // libMesh includes
27 #include "libmesh/xdr_cxx.h"
28 #include "libmesh/libmesh_logging.h"
29 #include "libmesh/replicated_mesh.h"
30 #include "libmesh/elem.h"
31 #include "libmesh/system.h"
32 #include "timpi/parallel_implementation.h"
33 
34 // C++ includes
35 #include <sstream>
36 #include <fstream>
37 #include <numeric> // std::accumulate
38 #include <iterator> // std::advance
39 
40 namespace libMesh
41 {
42 
RBEIMEvaluation(const Parallel::Communicator & comm)43 RBEIMEvaluation::RBEIMEvaluation(const Parallel::Communicator & comm)
44 :
45 ParallelObject(comm),
46 evaluate_eim_error_bound(true),
47 _rb_eim_solves_N(0)
48 {
49 }
50 
51 RBEIMEvaluation::~RBEIMEvaluation() = default;
52 
clear()53 void RBEIMEvaluation::clear()
54 {
55   _interpolation_points_xyz.clear();
56   _interpolation_points_comp.clear();
57   _interpolation_points_subdomain_id.clear();
58   _interpolation_points_xyz_perturbations.clear();
59   _interpolation_points_elem_id.clear();
60   _interpolation_points_qp.clear();
61 
62   _interpolation_matrix.resize(0,0);
63 
64   // Delete any RBTheta objects that were created
65   _rb_eim_theta_objects.clear();
66 }
67 
resize_data_structures(const unsigned int Nmax)68 void RBEIMEvaluation::resize_data_structures(const unsigned int Nmax)
69 {
70   // Resize the data structures relevant to the EIM system
71   _interpolation_points_xyz.clear();
72   _interpolation_points_comp.clear();
73   _interpolation_points_subdomain_id.clear();
74   _interpolation_points_xyz_perturbations.clear();
75   _interpolation_points_elem_id.clear();
76   _interpolation_points_qp.clear();
77 
78   _interpolation_matrix.resize(Nmax,Nmax);
79 }
80 
set_parametrized_function(std::unique_ptr<RBParametrizedFunction> pf)81 void RBEIMEvaluation::set_parametrized_function(std::unique_ptr<RBParametrizedFunction> pf)
82 {
83   _parametrized_function = std::move(pf);
84 }
85 
get_parametrized_function()86 RBParametrizedFunction & RBEIMEvaluation::get_parametrized_function()
87 {
88   libmesh_error_msg_if(!_parametrized_function, "Parametrized function not initialized yet");
89 
90   return *_parametrized_function;
91 }
92 
rb_eim_solve(unsigned int N)93 Real RBEIMEvaluation::rb_eim_solve(unsigned int N)
94 {
95   LOG_SCOPE("rb_eim_solve()", "RBEIMEvaluation");
96 
97   libmesh_error_msg_if(N > get_n_basis_functions(), "Error: N cannot be larger than the number of basis functions in rb_solve");
98   libmesh_error_msg_if(N==0, "Error: N must be greater than 0 in rb_solve");
99 
100   // Get the rhs by sampling parametrized_function
101   // at the first N interpolation_points
102   DenseVector<Number> EIM_rhs(N);
103   for (unsigned int i=0; i<N; i++)
104     {
105       EIM_rhs(i) =
106         get_parametrized_function().evaluate_comp(get_parameters(),
107                                                   _interpolation_points_comp[i],
108                                                   _interpolation_points_xyz[i],
109                                                   _interpolation_points_subdomain_id[i],
110                                                   _interpolation_points_xyz_perturbations[i]);
111     }
112 
113   DenseMatrix<Number> interpolation_matrix_N;
114   _interpolation_matrix.get_principal_submatrix(N, interpolation_matrix_N);
115 
116   interpolation_matrix_N.lu_solve(EIM_rhs, _rb_eim_solution);
117 
118   // Optionally evaluate an a posteriori error bound. The EIM error estimate
119   // recommended in the literature is based on using "next" EIM point, so
120   // we skip this if N == get_n_basis_functions()
121   if (evaluate_eim_error_bound && (N != get_n_basis_functions()))
122     {
123       // Compute the a posteriori error bound
124       // First, sample the parametrized function at x_{N+1}
125       Number g_at_next_x =
126         get_parametrized_function().evaluate_comp(get_parameters(),
127                                                   _interpolation_points_comp[N],
128                                                   _interpolation_points_xyz[N],
129                                                   _interpolation_points_subdomain_id[N],
130                                                   _interpolation_points_xyz_perturbations[N]);
131 
132       // Next, evaluate the EIM approximation at x_{N+1}
133       Number EIM_approx_at_next_x = 0.;
134       for (unsigned int j=0; j<N; j++)
135         {
136           EIM_approx_at_next_x += _rb_eim_solution(j) * _interpolation_matrix(N,j);
137         }
138 
139       Real error_estimate = std::abs(g_at_next_x - EIM_approx_at_next_x);
140       return error_estimate;
141     }
142   else // Don't evaluate an error bound
143     {
144       return -1.;
145     }
146 }
147 
rb_eim_solve(DenseVector<Number> & EIM_rhs)148 void RBEIMEvaluation::rb_eim_solve(DenseVector<Number> & EIM_rhs)
149 {
150   LOG_SCOPE("rb_eim_solve()", "RBEIMEvaluation");
151 
152   libmesh_error_msg_if(EIM_rhs.size() > get_n_basis_functions(),
153                        "Error: N cannot be larger than the number of basis functions in rb_solve");
154 
155   libmesh_error_msg_if(EIM_rhs.size()==0, "Error: N must be greater than 0 in rb_solve");
156 
157   const unsigned int N = EIM_rhs.size();
158   DenseMatrix<Number> interpolation_matrix_N;
159   _interpolation_matrix.get_principal_submatrix(N, interpolation_matrix_N);
160 
161   interpolation_matrix_N.lu_solve(EIM_rhs, _rb_eim_solution);
162 }
163 
rb_eim_solves(const std::vector<RBParameters> & mus,unsigned int N)164 void RBEIMEvaluation::rb_eim_solves(const std::vector<RBParameters> & mus,
165                                     unsigned int N)
166 {
167   libmesh_error_msg_if(N > get_n_basis_functions(),
168     "Error: N cannot be larger than the number of basis functions in rb_eim_solves");
169   libmesh_error_msg_if(N==0, "Error: N must be greater than 0 in rb_eim_solves");
170 
171   // If mus and N are the same as before, then we return early
172   if ((_rb_eim_solves_mus == mus) && (_rb_eim_solves_N == N))
173     return;
174 
175   LOG_SCOPE("rb_eim_solves()", "RBEIMEvaluation");
176 
177   _rb_eim_solves_mus = mus;
178   _rb_eim_solves_N = N;
179 
180   if (get_parametrized_function().is_lookup_table)
181     {
182       _rb_eim_solutions.resize(mus.size());
183       for (auto mu_index : index_range(mus))
184         {
185           Real lookup_table_param =
186             mus[mu_index].get_value(get_parametrized_function().lookup_table_param_name);
187 
188           // Cast lookup_table_param to an unsigned integer so that we can use
189           // it as an index into the EIM rhs values obtained from the lookup table.
190           unsigned int lookup_table_index =
191             cast_int<unsigned int>(std::round(lookup_table_param));
192 
193           DenseVector<Number> values;
194           eim_solutions[lookup_table_index].get_principal_subvector(N, values);
195           _rb_eim_solutions[mu_index] = values;
196         }
197 
198       return;
199     }
200 
201   // output all comps indexing is as follows:
202   //   mu index --> interpolation point index --> component index --> value.
203   std::vector<std::vector<std::vector<Number>>> output_all_comps;
204   get_parametrized_function().vectorized_evaluate(mus,
205                                                   _interpolation_points_xyz,
206                                                   _interpolation_points_subdomain_id,
207                                                   _interpolation_points_xyz_perturbations,
208                                                   output_all_comps);
209 
210   std::vector<std::vector<Number>> evaluated_values_at_interp_points(output_all_comps.size());
211 
212   for(unsigned int mu_index : index_range(evaluated_values_at_interp_points))
213     {
214       evaluated_values_at_interp_points[mu_index].resize(N);
215       for(unsigned int interp_pt_index=0; interp_pt_index<N; interp_pt_index++)
216         {
217           unsigned int comp = _interpolation_points_comp[interp_pt_index];
218 
219           evaluated_values_at_interp_points[mu_index][interp_pt_index] =
220             output_all_comps[mu_index][interp_pt_index][comp];
221         }
222     }
223 
224   DenseMatrix<Number> interpolation_matrix_N;
225   _interpolation_matrix.get_principal_submatrix(N, interpolation_matrix_N);
226 
227   _rb_eim_solutions.resize(mus.size());
228   for(unsigned int mu_index : index_range(mus))
229     {
230       DenseVector<Number> EIM_rhs(N);
231       for (unsigned int i=0; i<N; i++)
232         {
233           EIM_rhs(i) = evaluated_values_at_interp_points[mu_index][i];
234         }
235 
236       interpolation_matrix_N.lu_solve(EIM_rhs, _rb_eim_solutions[mu_index]);
237     }
238 }
239 
get_n_basis_functions()240 unsigned int RBEIMEvaluation::get_n_basis_functions() const
241 {
242   return _local_eim_basis_functions.size();
243 }
244 
set_n_basis_functions(unsigned int n_bfs)245 void RBEIMEvaluation::set_n_basis_functions(unsigned int n_bfs)
246 {
247   _local_eim_basis_functions.resize(n_bfs);
248 }
249 
decrement_vector(QpDataMap & v,const DenseVector<Number> & coeffs)250 void RBEIMEvaluation::decrement_vector(QpDataMap & v,
251                                        const DenseVector<Number> & coeffs)
252 {
253   LOG_SCOPE("decrement_vector()", "RBEIMEvaluation");
254 
255   libmesh_error_msg_if(get_n_basis_functions() != coeffs.size(),
256                        "Error: Number of coefficients should match number of basis functions");
257 
258   for (auto & pr : v)
259     {
260       dof_id_type elem_id = pr.first;
261       auto & v_comp_and_qp = pr.second;
262 
263       for (const auto & comp : index_range(v_comp_and_qp))
264         for (unsigned int qp : index_range(v_comp_and_qp[comp]))
265           for (unsigned int i : index_range(_local_eim_basis_functions))
266             {
267               // Check that entry (elem_id,comp,qp) exists in _local_eim_basis_functions so that
268               // we get a clear error message if there is any missing data
269               const auto & basis_comp_and_qp = libmesh_map_find(_local_eim_basis_functions[i], elem_id);
270 
271               libmesh_error_msg_if(comp >= basis_comp_and_qp.size(), "Error: Invalid comp");
272               libmesh_error_msg_if(qp >= basis_comp_and_qp[comp].size(), "Error: Invalid qp");
273 
274               v_comp_and_qp[comp][qp] -= coeffs(i) * basis_comp_and_qp[comp][qp];
275             }
276     }
277 
278 }
279 
initialize_eim_theta_objects()280 void RBEIMEvaluation::initialize_eim_theta_objects()
281 {
282   // Initialize the rb_theta objects that access the solution from this rb_eim_evaluation
283   _rb_eim_theta_objects.clear();
284   for (auto i : make_range(get_n_basis_functions()))
285     _rb_eim_theta_objects.emplace_back(build_eim_theta(i));
286 }
287 
get_eim_theta_objects()288 std::vector<std::unique_ptr<RBTheta>> & RBEIMEvaluation::get_eim_theta_objects()
289 {
290   return _rb_eim_theta_objects;
291 }
292 
build_eim_theta(unsigned int index)293 std::unique_ptr<RBTheta> RBEIMEvaluation::build_eim_theta(unsigned int index)
294 {
295   return libmesh_make_unique<RBEIMTheta>(*this, index);
296 }
297 
get_parametrized_function_values_at_qps(const QpDataMap & pf,dof_id_type elem_id,unsigned int comp,std::vector<Number> & values)298 void RBEIMEvaluation::get_parametrized_function_values_at_qps(
299   const QpDataMap & pf,
300   dof_id_type elem_id,
301   unsigned int comp,
302   std::vector<Number> & values)
303 {
304   LOG_SCOPE("get_parametrized_function_values_at_qps()", "RBEIMConstruction");
305 
306   values.clear();
307 
308   const auto it = pf.find(elem_id);
309   if (it != pf.end())
310   {
311     const auto & comps_and_qps_on_elem = it->second;
312     libmesh_error_msg_if(comp >= comps_and_qps_on_elem.size(),
313                          "Invalid comp index: " << comp);
314 
315     values = comps_and_qps_on_elem[comp];
316   }
317 }
318 
get_parametrized_function_value(const Parallel::Communicator & comm,const QpDataMap & pf,dof_id_type elem_id,unsigned int comp,unsigned int qp)319 Number RBEIMEvaluation::get_parametrized_function_value(
320   const Parallel::Communicator & comm,
321   const QpDataMap & pf,
322   dof_id_type elem_id,
323   unsigned int comp,
324   unsigned int qp)
325 {
326   std::vector<Number> values;
327   get_parametrized_function_values_at_qps(pf, elem_id, comp, values);
328 
329   // In parallel, values should only be non-empty on one processor
330   Number value = 0.;
331   if (!values.empty())
332   {
333     libmesh_error_msg_if(qp >= values.size(), "Error: Invalid qp index");
334 
335     value = values[qp];
336   }
337   comm.sum(value);
338 
339   return value;
340 }
341 
get_eim_basis_function_values_at_qps(unsigned int basis_function_index,dof_id_type elem_id,unsigned int comp,std::vector<Number> & values)342 void RBEIMEvaluation::get_eim_basis_function_values_at_qps(unsigned int basis_function_index,
343                                                            dof_id_type elem_id,
344                                                            unsigned int comp,
345                                                            std::vector<Number> & values) const
346 {
347   libmesh_error_msg_if(basis_function_index >= _local_eim_basis_functions.size(),
348                        "Invalid basis function index: " << basis_function_index);
349 
350   get_parametrized_function_values_at_qps(
351     _local_eim_basis_functions[basis_function_index],
352     elem_id,
353     comp,
354     values);
355 }
356 
get_eim_basis_function_value(unsigned int basis_function_index,dof_id_type elem_id,unsigned int comp,unsigned int qp)357 Number RBEIMEvaluation::get_eim_basis_function_value(unsigned int basis_function_index,
358                                                      dof_id_type elem_id,
359                                                      unsigned int comp,
360                                                      unsigned int qp) const
361 {
362   libmesh_error_msg_if(basis_function_index >= _local_eim_basis_functions.size(),
363                        "Invalid basis function index: " << basis_function_index);
364 
365   return get_parametrized_function_value(
366     comm(),
367     _local_eim_basis_functions[basis_function_index],
368     elem_id,
369     comp,
370     qp);
371 }
372 
373 const RBEIMEvaluation::QpDataMap &
get_basis_function(unsigned int i)374 RBEIMEvaluation::get_basis_function(unsigned int i) const
375 {
376   return _local_eim_basis_functions[i];
377 }
378 
get_rb_eim_solution()379 const DenseVector<Number> & RBEIMEvaluation::get_rb_eim_solution() const
380 {
381   return _rb_eim_solution;
382 }
383 
get_rb_eim_solutions_entries(unsigned int index)384 std::vector<Number> RBEIMEvaluation::get_rb_eim_solutions_entries(unsigned int index) const
385 {
386   LOG_SCOPE("get_rb_eim_solutions_entries()", "RBEIMEvaluation");
387 
388   std::vector<Number> rb_eim_solutions_entries(_rb_eim_solutions.size());
389   for (unsigned int mu_index : index_range(_rb_eim_solutions))
390     {
391       libmesh_error_msg_if(index >= _rb_eim_solutions[mu_index].size(), "Error: Invalid index");
392       rb_eim_solutions_entries[mu_index] = _rb_eim_solutions[mu_index](index);
393     }
394 
395   return rb_eim_solutions_entries;
396 }
397 
add_interpolation_points_xyz(Point p)398 void RBEIMEvaluation::add_interpolation_points_xyz(Point p)
399 {
400   _interpolation_points_xyz.emplace_back(p);
401 }
402 
add_interpolation_points_comp(unsigned int comp)403 void RBEIMEvaluation::add_interpolation_points_comp(unsigned int comp)
404 {
405   _interpolation_points_comp.emplace_back(comp);
406 }
407 
add_interpolation_points_subdomain_id(subdomain_id_type sbd_id)408 void RBEIMEvaluation::add_interpolation_points_subdomain_id(subdomain_id_type sbd_id)
409 {
410   _interpolation_points_subdomain_id.emplace_back(sbd_id);
411 }
412 
add_interpolation_points_xyz_perturbations(const std::vector<Point> & perturbs)413 void RBEIMEvaluation::add_interpolation_points_xyz_perturbations(const std::vector<Point> & perturbs)
414 {
415   _interpolation_points_xyz_perturbations.emplace_back(perturbs);
416 }
417 
418 
add_interpolation_points_elem_id(dof_id_type elem_id)419 void RBEIMEvaluation::add_interpolation_points_elem_id(dof_id_type elem_id)
420 {
421   _interpolation_points_elem_id.emplace_back(elem_id);
422 }
423 
add_interpolation_points_qp(unsigned int qp)424 void RBEIMEvaluation::add_interpolation_points_qp(unsigned int qp)
425 {
426   _interpolation_points_qp.emplace_back(qp);
427 }
428 
get_interpolation_points_xyz(unsigned int index)429 Point RBEIMEvaluation::get_interpolation_points_xyz(unsigned int index) const
430 {
431   libmesh_error_msg_if(index >= _interpolation_points_xyz.size(), "Error: Invalid index");
432 
433   return _interpolation_points_xyz[index];
434 }
435 
get_interpolation_points_comp(unsigned int index)436 unsigned int RBEIMEvaluation::get_interpolation_points_comp(unsigned int index) const
437 {
438   libmesh_error_msg_if(index >= _interpolation_points_comp.size(), "Error: Invalid index");
439 
440   return _interpolation_points_comp[index];
441 }
442 
get_interpolation_points_subdomain_id(unsigned int index)443 subdomain_id_type RBEIMEvaluation::get_interpolation_points_subdomain_id(unsigned int index) const
444 {
445   libmesh_error_msg_if(index >= _interpolation_points_subdomain_id.size(), "Error: Invalid index");
446 
447   return _interpolation_points_subdomain_id[index];
448 }
449 
get_interpolation_points_xyz_perturbations(unsigned int index)450 const std::vector<Point> & RBEIMEvaluation::get_interpolation_points_xyz_perturbations(unsigned int index) const
451 {
452   libmesh_error_msg_if(index >= _interpolation_points_xyz_perturbations.size(), "Error: Invalid index");
453 
454   return _interpolation_points_xyz_perturbations[index];
455 }
456 
get_interpolation_points_elem_id(unsigned int index)457 dof_id_type RBEIMEvaluation::get_interpolation_points_elem_id(unsigned int index) const
458 {
459   libmesh_error_msg_if(index >= _interpolation_points_elem_id.size(), "Error: Invalid index");
460 
461   return _interpolation_points_elem_id[index];
462 }
463 
get_interpolation_points_qp(unsigned int index)464 unsigned int RBEIMEvaluation::get_interpolation_points_qp(unsigned int index) const
465 {
466   libmesh_error_msg_if(index >= _interpolation_points_qp.size(), "Error: Invalid index");
467 
468   return _interpolation_points_qp[index];
469 }
470 
set_interpolation_matrix_entry(unsigned int i,unsigned int j,Number value)471 void RBEIMEvaluation::set_interpolation_matrix_entry(unsigned int i, unsigned int j, Number value)
472 {
473   libmesh_error_msg_if((i >= _interpolation_matrix.m()) || (j >= _interpolation_matrix.n()),
474                        "Error: Invalid matrix indices");
475 
476   _interpolation_matrix(i,j) = value;
477 }
478 
get_interpolation_matrix()479 const DenseMatrix<Number> & RBEIMEvaluation::get_interpolation_matrix() const
480 {
481   return _interpolation_matrix;
482 }
483 
add_basis_function_and_interpolation_data(const QpDataMap & bf,Point p,unsigned int comp,dof_id_type elem_id,subdomain_id_type subdomain_id,unsigned int qp,const std::vector<Point> & perturbs)484 void RBEIMEvaluation::add_basis_function_and_interpolation_data(
485   const QpDataMap & bf,
486   Point p,
487   unsigned int comp,
488   dof_id_type elem_id,
489   subdomain_id_type subdomain_id,
490   unsigned int qp,
491   const std::vector<Point> & perturbs)
492 {
493   _local_eim_basis_functions.emplace_back(bf);
494 
495   _interpolation_points_xyz.emplace_back(p);
496   _interpolation_points_comp.emplace_back(comp);
497   _interpolation_points_elem_id.emplace_back(elem_id);
498   _interpolation_points_subdomain_id.emplace_back(subdomain_id);
499   _interpolation_points_qp.emplace_back(qp);
500   _interpolation_points_xyz_perturbations.emplace_back(perturbs);
501 }
502 
503 void RBEIMEvaluation::
write_out_basis_functions(const std::string & directory_name,bool write_binary_basis_functions)504 write_out_basis_functions(const std::string & directory_name,
505                           bool write_binary_basis_functions)
506 {
507   LOG_SCOPE("write_out_basis_functions()", "RBEIMEvaluation");
508 
509   // Quick return if there is no work to do. Note: make sure all procs
510   // agree there is no work to do.
511   bool is_empty = _local_eim_basis_functions.empty();
512   this->comm().verify(is_empty);
513 
514   if (is_empty)
515     return;
516 
517   // Gather basis function data from other procs, storing it in
518   // _local_eim_basis_functions, so that we can then print everything
519   // from processor 0.
520   this->gather_bfs();
521 
522   // Write values from processor 0 only.
523   if (this->processor_id() == 0)
524     {
525       // Make a directory to store all the data files
526       Utility::mkdir(directory_name.c_str());
527 
528       // Create filename
529       std::ostringstream file_name;
530       const std::string basis_function_suffix = (write_binary_basis_functions ? ".xdr" : ".dat");
531       file_name << directory_name << "/" << "bf_data" << basis_function_suffix;
532 
533       // Create XDR writer object
534       Xdr xdr(file_name.str(), write_binary_basis_functions ? ENCODE : WRITE);
535 
536       // Write number of basis functions to file. Note: the
537       // Xdr::data() function takes non-const references, so you can't
538       // pass e.g. vec.size() to that interface.
539       auto n_bf = _local_eim_basis_functions.size();
540       xdr.data(n_bf, "# Number of basis functions");
541 
542       // We assume that each basis function has data for the same
543       // number of elements as basis function 0, which is equal to the
544       // size of the map.
545       auto n_elem = _local_eim_basis_functions[0].size();
546       xdr.data(n_elem, "# Number of elements");
547 
548       // We assume that each element has the same number of variables,
549       // and we get the number of vars from the first element of the
550       // first basis function.
551       auto n_vars = _local_eim_basis_functions[0].begin()->second.size();
552       xdr.data(n_vars, "# Number of variables");
553 
554       // We assume that the list of elements for each basis function
555       // is the same as basis function 0. We also assume that all vars
556       // have the same number of qps.
557       std::vector<unsigned int> n_qp_per_elem;
558       n_qp_per_elem.reserve(n_elem);
559       dof_id_type expected_elem_id = 0;
560       for (const auto & pr : _local_eim_basis_functions[0])
561         {
562           // Note: Currently we require that the Elems are numbered
563           // contiguously from [0..n_elem).  This allows us to avoid
564           // writing the Elem ids to the Xdr file, but if we need to
565           // generalize this assumption later, we can.
566           const auto & actual_elem_id = pr.first;
567 
568           libmesh_error_msg_if(actual_elem_id != expected_elem_id++,
569                                "RBEIMEvaluation currently assumes a contiguous Elem numbering starting from 0.");
570 
571           // array[n_vars][n_qp] per Elem. We get the number of QPs
572           // for variable 0, assuming they are all the same.
573           const auto & array = pr.second;
574           n_qp_per_elem.push_back(array[0].size());
575         }
576       xdr.data(n_qp_per_elem, "# Number of QPs per Elem");
577 
578       // The total amount of qp data for each var is the sum of the
579       // entries in the "n_qp_per_elem" array.
580       auto n_qp_data =
581         std::accumulate(n_qp_per_elem.begin(),
582                         n_qp_per_elem.end(),
583                         0u);
584 
585       // Reserve space to store continguous vectors of qp data for each var
586       std::vector<std::vector<Number>> qp_data(n_vars);
587       for (auto var : index_range(qp_data))
588         qp_data[var].reserve(n_qp_data);
589 
590       // Now we construct a vector for each basis function, for each
591       // variable which is ordered according to:
592       // [ [qp vals for Elem 0], [qp vals for Elem 1], ... [qp vals for Elem N] ]
593       // and write it to file.
594       for (auto bf : index_range(_local_eim_basis_functions))
595         {
596           // Clear any data from previous bf
597           for (auto var : index_range(qp_data))
598             qp_data[var].clear();
599 
600           for (const auto & pr : _local_eim_basis_functions[bf])
601             {
602               // array[n_vars][n_qp] per Elem
603               const auto & array = pr.second;
604               for (auto var : index_range(array))
605                 {
606                   // Insert all qp values for this var
607                   qp_data[var].insert(/*insert at*/qp_data[var].end(),
608                                       /*data start*/array[var].begin(),
609                                       /*data end*/array[var].end());
610                 }
611             }
612 
613           // Write all the var values for this bf
614           for (auto var : index_range(qp_data))
615             xdr.data_stream(qp_data[var].data(), qp_data[var].size(), /*line_break=*/qp_data[var].size());
616         }
617     }
618 }
619 
620 void RBEIMEvaluation::
read_in_basis_functions(const System & sys,const std::string & directory_name,bool read_binary_basis_functions)621 read_in_basis_functions(const System & sys,
622                         const std::string & directory_name,
623                         bool read_binary_basis_functions)
624 {
625   LOG_SCOPE("read_in_basis_functions()", "RBEIMEvaluation");
626 
627   // Read values on processor 0 only.
628   if (sys.comm().rank() == 0)
629     {
630       // Create filename
631       std::ostringstream file_name;
632       const std::string basis_function_suffix = (read_binary_basis_functions ? ".xdr" : ".dat");
633       file_name << directory_name << "/" << "bf_data" << basis_function_suffix;
634 
635       // Create XDR reader object
636       Xdr xdr(file_name.str(), read_binary_basis_functions ? DECODE : READ);
637 
638       // Read in the number of basis functions. The comment parameter
639       // is ignored when reading.
640       std::size_t n_bf;
641       xdr.data(n_bf);
642 
643       // Read in the number of elements
644       std::size_t n_elem;
645       xdr.data(n_elem);
646 
647       // Read in the number of variables.
648       std::size_t n_vars;
649       xdr.data(n_vars);
650 
651       // Read in vector containing the number of QPs per elem. We can
652       // create this vector with the required size or let it be read
653       // from the file and sized for us.
654       std::vector<unsigned int> n_qp_per_elem(n_elem);
655       xdr.data(n_qp_per_elem);
656 
657       // The total amount of qp data for each var is the sum of the
658       // entries in the "n_qp_per_elem" array.
659       auto n_qp_data =
660         std::accumulate(n_qp_per_elem.begin(),
661                         n_qp_per_elem.end(),
662                         0u);
663 
664       // Allocate space to store all required basis functions,
665       // clearing any data that may have been there previously.
666       //
667       // TODO: Do we need to also write out/read in Elem ids?
668       // Or can we assume they will always be contiguously
669       // numbered (at least on proc 0)?
670       _local_eim_basis_functions.clear();
671       _local_eim_basis_functions.resize(n_bf);
672       for (auto i : index_range(_local_eim_basis_functions))
673         for (std::size_t elem_id=0; elem_id<n_elem; ++elem_id)
674           {
675             auto & array = _local_eim_basis_functions[i][elem_id];
676             array.resize(n_vars);
677           }
678 
679       // Allocate temporary storage for one var's worth of qp data.
680       std::vector<Number> qp_data;
681 
682       // Read in data for each basis function
683       for (auto i : index_range(_local_eim_basis_functions))
684         {
685           // Reference to the data map for the current basis function.
686           auto & bf_map = _local_eim_basis_functions[i];
687 
688           for (std::size_t var=0; var<n_vars; ++var)
689             {
690               qp_data.clear();
691               qp_data.resize(n_qp_data);
692 
693               // Read data using data_stream() since that is
694               // (currently) how we write it out. The "line_break"
695               // parameter of data_stream() is ignored while reading.
696               xdr.data_stream(qp_data.data(), qp_data.size());
697 
698               // Iterate over the qp_data vector, filling in the
699               // "small" vectors for each Elem.
700               auto cursor = qp_data.begin();
701               for (std::size_t elem_id=0; elem_id<n_elem; ++elem_id)
702                 {
703                   // Get reference to the [n_vars][n_qp] array for
704                   // this Elem. We assign() into the vector of
705                   // quadrature point values, which allocates space if
706                   // it doesn't already exist.
707                   auto & array = bf_map[elem_id];
708                   array[var].assign(cursor, cursor + n_qp_per_elem[elem_id]);
709                   std::advance(cursor, n_qp_per_elem[elem_id]);
710                 }
711             } // end for (var)
712         } // end for (i)
713     } // end if processor 0
714 
715   // Distribute the basis function information to the processors that require it
716   this->distribute_bfs(sys);
717 }
718 
print_local_eim_basis_functions()719 void RBEIMEvaluation::print_local_eim_basis_functions() const
720 {
721   for (auto bf : index_range(_local_eim_basis_functions))
722     {
723       libMesh::out << "Basis function " << bf << std::endl;
724       for (const auto & pr : _local_eim_basis_functions[bf])
725         {
726           libMesh::out << "Elem " << pr.first << std::endl;
727           const auto & array = pr.second;
728           for (auto var : index_range(array))
729             {
730               libMesh::out << "Variable " << var << std::endl;
731               for (auto qp : index_range(array[var]))
732                 libMesh::out << array[var][qp] << " ";
733               libMesh::out << std::endl;
734             }
735         }
736     }
737 }
738 
gather_bfs()739 void RBEIMEvaluation::gather_bfs()
740 {
741   // We need to gather _local_eim_basis_functions data from other
742   // procs for printing.
743   //
744   // Ideally, this could be accomplished by simply calling:
745   // this->comm().gather(/*root_id=*/0, _local_eim_basis_functions);
746   //
747   // but the data structure seems to be too complicated for this to
748   // work automatically. (I get some error about the function called
749   // being "private within this context".) Therefore, we have to
750   // gather the information manually.
751 
752   // So we can avoid calling this many times below
753   auto n_procs = this->n_processors();
754 
755   // In serial there's nothing to gather
756   if (n_procs == 1)
757     return;
758 
759   // Current assumption is that the number of basis functions stored on
760   // each processor is the same, the only thing that differs is the number
761   // of elements, so make sure that is the case now.
762   auto n_bf = _local_eim_basis_functions.size();
763   this->comm().verify(n_bf);
764 
765   // This function should never be called if there are no basis
766   // functions, so if it was, something went wrong.
767   libmesh_error_msg_if(!n_bf, "RBEIMEvaluation::gather_bfs() should not be called with 0 basis functions.");
768 
769   // The number of variables should be the same on all processors
770   // and we can get this from _local_eim_basis_functions. However,
771   // it may be that some processors have no local elements, so on
772   // those processors we cannot look up the size from
773   // _local_eim_basis_functions. As a result we use comm().max(n_vars)
774   // to make sure all processors agree on the final value.
775   std::size_t n_vars =
776     _local_eim_basis_functions[0].empty() ? 0 : _local_eim_basis_functions[0].begin()->second.size();
777   this->comm().max(n_vars);
778 
779   // Gather list of Elem ids stored on each processor to proc 0.  We
780   // use basis function 0 as an example and assume all the basis
781   // functions are distributed similarly.
782   std::vector<dof_id_type> elem_ids;
783   elem_ids.reserve(_local_eim_basis_functions[0].size());
784   for (const auto & pr : _local_eim_basis_functions[0])
785     elem_ids.push_back(pr.first);
786   this->comm().gather(/*root_id=*/0, elem_ids);
787 
788   // Store the number of qps per Elem on this processor. Again, use
789   // basis function 0 (and variable 0) to get this information, then
790   // apply it to all basis functions.
791   std::vector<unsigned int> n_qp_per_elem;
792   n_qp_per_elem.reserve(_local_eim_basis_functions[0].size());
793   for (const auto & pr : _local_eim_basis_functions[0])
794     {
795       // array[n_vars][n_qp] per Elem. We get the number of QPs
796       // for variable 0, assuming they are all the same.
797       const auto & array = pr.second;
798       n_qp_per_elem.push_back(array[0].size());
799     }
800 
801   // Before gathering, compute the total amount of local qp data for
802   // each var, which is the sum of the entries in the "n_qp_per_elem" array.
803   // This will be used to reserve space in a vector below.
804   auto n_local_qp_data =
805     std::accumulate(n_qp_per_elem.begin(),
806                     n_qp_per_elem.end(),
807                     0u);
808 
809   // Gather the number of qps per Elem for each processor onto processor 0.
810   this->comm().gather(/*root_id=*/0, n_qp_per_elem);
811 
812   // Sanity check: On processor 0, this checks that we have gathered the same number
813   // of elem ids and qp counts.
814   libmesh_error_msg_if(elem_ids.size() != n_qp_per_elem.size(),
815                        "Must gather same number of Elem ids as qps per Elem.");
816 
817   // Reserve space to store contiguous vectors of qp data for each var
818   std::vector<std::vector<Number>> gathered_qp_data(n_vars);
819   for (auto var : index_range(gathered_qp_data))
820     gathered_qp_data[var].reserve(n_local_qp_data);
821 
822   // Now we construct a vector for each basis function, for each
823   // variable, which is ordered according to:
824   // [ [qp vals for Elem 0], [qp vals for Elem 1], ... [qp vals for Elem N] ]
825   // and gather it to processor 0.
826   for (auto bf : index_range(_local_eim_basis_functions))
827     {
828       // Clear any data from previous bf
829       for (auto var : index_range(gathered_qp_data))
830         gathered_qp_data[var].clear();
831 
832       for (const auto & pr : _local_eim_basis_functions[bf])
833         {
834           // array[n_vars][n_qp] per Elem
835           const auto & array = pr.second;
836           for (auto var : index_range(array))
837             {
838               // Insert all qp values for this var
839               gathered_qp_data[var].insert(/*insert at*/gathered_qp_data[var].end(),
840                                            /*data start*/array[var].begin(),
841                                            /*data end*/array[var].end());
842             }
843         }
844 
845       // Reference to the data map for the current basis function.
846       auto & bf_map = _local_eim_basis_functions[bf];
847 
848       for (auto var : index_range(gathered_qp_data))
849         {
850           // For each var, gather gathered_qp_data[var] onto processor
851           // 0. There apparently is not a gather overload for
852           // vector-of-vectors...
853           this->comm().gather(/*root_id=*/0, gathered_qp_data[var]);
854 
855           // On processor 0, iterate over the gathered_qp_data[var]
856           // vector we just gathered, filling in the "small" vectors
857           // for each Elem. Note: here we ignore the fact that we
858           // already have the data on processor 0 and just overwrite
859           // it, this makes the indexing logic a bit simpler.
860           if (this->processor_id() == 0)
861             {
862               auto cursor = gathered_qp_data[var].begin();
863               for (auto i : index_range(elem_ids))
864                 {
865                   auto elem_id = elem_ids[i];
866                   auto n_qp_this_elem = n_qp_per_elem[i];
867 
868                   // Get reference to the [n_vars][n_qp] array for
869                   // this Elem. We assign() into the vector of
870                   // quadrature point values, which allocates space if
871                   // it doesn't already exist.
872                   auto & array = bf_map[elem_id];
873 
874                   // Possibly allocate space if this is data for a new
875                   // element we haven't seen before.
876                   if (array.empty())
877                     array.resize(n_vars);
878 
879                   array[var].assign(cursor, cursor + n_qp_this_elem);
880                   std::advance(cursor, n_qp_this_elem);
881                 }
882             }
883         }
884     } // end loop over basis functions
885 }
886 
887 
888 
distribute_bfs(const System & sys)889 void RBEIMEvaluation::distribute_bfs(const System & sys)
890 {
891   // So we can avoid calling these many times below
892   auto n_procs = sys.comm().size();
893   auto rank = sys.comm().rank();
894 
895   // In serial there's nothing to distribute
896   if (n_procs == 1)
897     return;
898 
899   // Broadcast the number of basis functions from proc 0. After
900   // distributing, all procs should have the same number of basis
901   // functions.
902   auto n_bf = _local_eim_basis_functions.size();
903   sys.comm().broadcast(n_bf);
904 
905   // Allocate enough space to store n_bf basis functions on non-zero ranks
906   if (rank != 0)
907     _local_eim_basis_functions.resize(n_bf);
908 
909   // Broadcast the number of variables from proc 0. After
910   // distributing, all procs should have the same number of variables.
911   auto n_vars = _local_eim_basis_functions[0].begin()->second.size();
912   sys.comm().broadcast(n_vars);
913 
914   // Construct lists of elem ids owned by different processors
915   const MeshBase & mesh = sys.get_mesh();
916 
917   std::vector<dof_id_type> gathered_local_elem_ids;
918   gathered_local_elem_ids.reserve(mesh.n_elem());
919   for (const auto & elem : mesh.active_local_element_ptr_range())
920     gathered_local_elem_ids.push_back(elem->id());
921 
922   // I _think_ the local elem ids are likely to already be sorted in
923   // ascending order, since that is how they are stored on the Mesh,
924   // but we can always just guarantee this to be on the safe side as
925   // well.
926   std::sort(gathered_local_elem_ids.begin(), gathered_local_elem_ids.end());
927 
928   // Gather the number of local elems from all procs to proc 0
929   auto n_local_elems = gathered_local_elem_ids.size();
930   std::vector<std::size_t> gathered_n_local_elems = {n_local_elems};
931   sys.comm().gather(/*root_id=*/0, gathered_n_local_elems);
932 
933   // Gather the elem ids owned by each processor onto processor 0.
934   sys.comm().gather(/*root_id=*/0, gathered_local_elem_ids);
935 
936   // Construct vectors of "start" and "one-past-the-end" indices into
937   // the gathered_local_elem_ids vector for each proc. Only valid on
938   // processor 0.
939   std::vector<std::size_t> start_elem_ids_index, end_elem_ids_index;
940 
941   if (rank == 0)
942     {
943       start_elem_ids_index.resize(n_procs);
944       start_elem_ids_index[0] = 0;
945       for (processor_id_type p=1; p<n_procs; ++p)
946         start_elem_ids_index[p] = start_elem_ids_index[p-1] + gathered_n_local_elems[p-1];
947 
948       end_elem_ids_index.resize(n_procs);
949       end_elem_ids_index[n_procs - 1] = gathered_local_elem_ids.size();
950       for (processor_id_type p=0; p<n_procs - 1; ++p)
951         end_elem_ids_index[p] = start_elem_ids_index[p+1];
952     }
953 
954   // On processor 0, using basis function 0 and variable 0, prepare a
955   // vector with the number of qps per Elem.  Then scatter this vector
956   // out to the processors that require it. The order of this vector
957   // matches the gathered_local_elem_ids ordering. The counts will be
958   // gathered_n_local_elems, since there will be one qp count per Elem.
959   std::vector<unsigned int> n_qp_per_elem_data;
960 
961   // On rank 0, the "counts" vector holds the number of floating point values that
962   // are to be scattered to each proc. It is only required on proc 0.
963   std::vector<int> counts;
964 
965   if (rank == 0)
966     {
967       n_qp_per_elem_data.reserve(gathered_local_elem_ids.size());
968       counts.resize(n_procs);
969 
970       auto & bf_map = _local_eim_basis_functions[0];
971 
972       for (processor_id_type p=0; p<n_procs; ++p)
973         {
974           for (auto e : make_range(start_elem_ids_index[p], end_elem_ids_index[p]))
975             {
976               auto elem_id = gathered_local_elem_ids[e];
977 
978               // Get reference to array[n_vars][n_qp] for current Elem.
979               // Throws an error if the required elem_id is not found.
980               const auto & array = libmesh_map_find(bf_map, elem_id);
981 
982               auto n_qps = array[0].size();
983 
984               // We use var==0 to set the number of qps for all vars
985               n_qp_per_elem_data.push_back(n_qps);
986 
987               // Accumulate the count for this proc
988               counts[p] += n_qps;
989             } // end for (e)
990         } // end for proc_id
991     } // if (rank == 0)
992 
993   // Now scatter the n_qp_per_elem_data to all procs (must call the
994   // scatter on all procs, it is a collective).
995   {
996     std::vector<unsigned int> recv;
997     std::vector<int> tmp(gathered_n_local_elems.begin(), gathered_n_local_elems.end());
998     sys.comm().scatter(n_qp_per_elem_data, tmp, recv, /*root_id=*/0);
999 
1000     // Now swap n_qp_per_elem_data and recv. All processors now have a
1001     // vector of length n_local_elems containing the number of
1002     // quadarature points per Elem.
1003     n_qp_per_elem_data.swap(recv);
1004   }
1005 
1006   // For each basis function and each variable, build a vector
1007   // of qp data in the Elem ordering given by the
1008   // gathered_local_elem_ids, then call
1009   //
1010   // sys.comm().scatter(data, counts, recv, /*root_id=*/0);
1011   std::vector<std::vector<Number>> qp_data(n_vars);
1012   if (rank == 0)
1013     {
1014       // The total amount of qp data is given by summing the entries
1015       // of the "counts" vector.
1016       auto n_qp_data =
1017         std::accumulate(counts.begin(), counts.end(), 0u);
1018 
1019       // On processor 0, reserve enough space to hold all the qp
1020       // data for a single basis function for each var.
1021       for (auto var : index_range(qp_data))
1022         qp_data[var].reserve(n_qp_data);
1023     }
1024 
1025   // The recv_qp_data vector will be used on the receiving end of all
1026   // the scatters below.
1027   std::vector<Number> recv_qp_data;
1028 
1029   // Loop from 0..n_bf on _all_ procs, since the scatters inside this
1030   // loop are collective.
1031   for (auto bf : make_range(n_bf))
1032     {
1033       // Prepare data for scattering (only on proc 0)
1034       if (rank == 0)
1035         {
1036           // Reference to the data map for the current basis function.
1037           auto & bf_map = _local_eim_basis_functions[bf];
1038 
1039           // Clear any data from previous bf
1040           for (auto var : index_range(qp_data))
1041             qp_data[var].clear();
1042 
1043           for (processor_id_type p=0; p<n_procs; ++p)
1044             {
1045               for (auto e : make_range(start_elem_ids_index[p], end_elem_ids_index[p]))
1046                 {
1047                   auto elem_id = gathered_local_elem_ids[e];
1048 
1049                   // Get reference to array[n_vars][n_qp] for current Elem.
1050                   // Throws an error if the required elem_id is not found.
1051                   const auto & array = libmesh_map_find(bf_map, elem_id);
1052 
1053                   for (auto var : index_range(array))
1054                     {
1055                       // Insert all qp values for this var
1056                       qp_data[var].insert(/*insert at*/qp_data[var].end(),
1057                                           /*data start*/array[var].begin(),
1058                                           /*data end*/array[var].end());
1059                     } // end for (var)
1060                 } // end for (e)
1061             } // end for proc_id
1062         } // end if rank==0
1063 
1064       // Perform the scatters (all procs)
1065       for (auto var : make_range(n_vars))
1066         {
1067           // Do the scatter for the current var
1068           sys.comm().scatter(qp_data[var], counts, recv_qp_data, /*root_id=*/0);
1069 
1070           if (rank != 0)
1071             {
1072               // Store the scattered data we received in _local_eim_basis_functions[bf]
1073               auto & bf_map = _local_eim_basis_functions[bf];
1074               auto cursor = recv_qp_data.begin();
1075 
1076               for (auto i : index_range(gathered_local_elem_ids))
1077                 {
1078                   auto elem_id = gathered_local_elem_ids[i];
1079                   auto n_qp_this_elem = n_qp_per_elem_data[i];
1080                   auto & array = bf_map[elem_id];
1081 
1082                   // Create space to store the data if it doesn't already exist.
1083                   if (array.empty())
1084                     array.resize(n_vars);
1085 
1086                   array[var].assign(cursor, cursor + n_qp_this_elem);
1087                   std::advance(cursor, n_qp_this_elem);
1088                 }
1089             } // if (rank != 0)
1090         } // end for (var)
1091     } // end for (bf)
1092 
1093   // Now that the scattering is done, delete non-local Elem
1094   // information from processor 0's _local_eim_basis_functions data
1095   // structure.
1096   if (rank == 0)
1097     {
1098       for (processor_id_type p=1; p<n_procs; ++p)
1099         {
1100           for (auto e : make_range(start_elem_ids_index[p], end_elem_ids_index[p]))
1101             {
1102               auto elem_id = gathered_local_elem_ids[e];
1103 
1104               // Delete this Elem's information from every basis function.
1105               for (auto & bf_map : _local_eim_basis_functions)
1106                 bf_map.erase(elem_id);
1107             } // end for (e)
1108         } // end for proc_id
1109     } // if (rank == 0)
1110 }
1111 
1112 } // namespace libMesh
1113