1 // ---------------------------------------------------------------------
2 //
3 // Copyright (C) 2019 - 2020 by the deal.II authors
4 //
5 // This file is part of the deal.II library.
6 //
7 // The deal.II library is free software; you can use it, redistribute
8 // it, and/or modify it under the terms of the GNU Lesser General
9 // Public License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 // The full text of the license can be found in the file LICENSE.md at
12 // the top level directory of deal.II.
13 //
14 // ---------------------------------------------------------------------
15 
16 
17 #ifndef dealii_matrix_free_vector_access_internal_h
18 #define dealii_matrix_free_vector_access_internal_h
19 
20 #include <deal.II/base/config.h>
21 
22 #include <deal.II/base/vectorization.h>
23 
24 #include <deal.II/matrix_free/dof_info.h>
25 #include <deal.II/matrix_free/type_traits.h>
26 
27 
28 DEAL_II_NAMESPACE_OPEN
29 
30 
31 namespace internal
32 {
33   // below we use type-traits from matrix-free/type_traits.h
34 
35   // access to generic const vectors that have operator ().
36   // FIXME: this is wrong for Trilinos/Petsc MPI vectors
37   // where we should first do Partitioner::local_to_global()
38   template <typename VectorType,
39             typename std::enable_if<!has_local_element<VectorType>::value,
40                                     VectorType>::type * = nullptr>
41   inline typename VectorType::value_type
vector_access(const VectorType & vec,const unsigned int entry)42   vector_access(const VectorType &vec, const unsigned int entry)
43   {
44     return vec(entry);
45   }
46 
47 
48 
49   // access to generic non-const vectors that have operator ().
50   // FIXME: this is wrong for Trilinos/Petsc MPI vectors
51   // where we should first do Partitioner::local_to_global()
52   template <typename VectorType,
53             typename std::enable_if<!has_local_element<VectorType>::value,
54                                     VectorType>::type * = nullptr>
55   inline typename VectorType::value_type &
vector_access(VectorType & vec,const unsigned int entry)56   vector_access(VectorType &vec, const unsigned int entry)
57   {
58     return vec(entry);
59   }
60 
61 
62 
63   // access to distributed MPI vectors that have a local_element(uint)
64   // method to access data in local index space, which is what we use in
65   // DoFInfo and hence in read_dof_values etc.
66   template <typename VectorType,
67             typename std::enable_if<has_local_element<VectorType>::value,
68                                     VectorType>::type * = nullptr>
69   inline typename VectorType::value_type &
vector_access(VectorType & vec,const unsigned int entry)70   vector_access(VectorType &vec, const unsigned int entry)
71   {
72     return vec.local_element(entry);
73   }
74 
75 
76 
77   // same for const access
78   template <typename VectorType,
79             typename std::enable_if<has_local_element<VectorType>::value,
80                                     VectorType>::type * = nullptr>
81   inline typename VectorType::value_type
vector_access(const VectorType & vec,const unsigned int entry)82   vector_access(const VectorType &vec, const unsigned int entry)
83   {
84     return vec.local_element(entry);
85   }
86 
87 
88 
89   template <typename VectorType,
90             typename std::enable_if<has_add_local_element<VectorType>::value,
91                                     VectorType>::type * = nullptr>
92   inline void
vector_access_add(VectorType & vec,const unsigned int entry,const typename VectorType::value_type & val)93   vector_access_add(VectorType &                           vec,
94                     const unsigned int                     entry,
95                     const typename VectorType::value_type &val)
96   {
97     vec.add_local_element(entry, val);
98   }
99 
100 
101 
102   template <typename VectorType,
103             typename std::enable_if<!has_add_local_element<VectorType>::value,
104                                     VectorType>::type * = nullptr>
105   inline void
vector_access_add(VectorType & vec,const unsigned int entry,const typename VectorType::value_type & val)106   vector_access_add(VectorType &                           vec,
107                     const unsigned int                     entry,
108                     const typename VectorType::value_type &val)
109   {
110     vector_access(vec, entry) += val;
111   }
112 
113 
114 
115   template <typename VectorType,
116             typename std::enable_if<has_add_local_element<VectorType>::value,
117                                     VectorType>::type * = nullptr>
118   inline void
vector_access_add_global(VectorType & vec,const types::global_dof_index entry,const typename VectorType::value_type & val)119   vector_access_add_global(VectorType &                           vec,
120                            const types::global_dof_index          entry,
121                            const typename VectorType::value_type &val)
122   {
123     vec.add(entry, val);
124   }
125 
126 
127 
128   template <typename VectorType,
129             typename std::enable_if<!has_add_local_element<VectorType>::value,
130                                     VectorType>::type * = nullptr>
131   inline void
vector_access_add_global(VectorType & vec,const types::global_dof_index entry,const typename VectorType::value_type & val)132   vector_access_add_global(VectorType &                           vec,
133                            const types::global_dof_index          entry,
134                            const typename VectorType::value_type &val)
135   {
136     vec(entry) += val;
137   }
138 
139 
140 
141   template <typename VectorType,
142             typename std::enable_if<has_set_local_element<VectorType>::value,
143                                     VectorType>::type * = nullptr>
144   inline void
vector_access_set(VectorType & vec,const unsigned int entry,const typename VectorType::value_type & val)145   vector_access_set(VectorType &                           vec,
146                     const unsigned int                     entry,
147                     const typename VectorType::value_type &val)
148   {
149     vec.set_local_element(entry, val);
150   }
151 
152 
153 
154   template <typename VectorType,
155             typename std::enable_if<!has_set_local_element<VectorType>::value,
156                                     VectorType>::type * = nullptr>
157   inline void
vector_access_set(VectorType & vec,const unsigned int entry,const typename VectorType::value_type & val)158   vector_access_set(VectorType &                           vec,
159                     const unsigned int                     entry,
160                     const typename VectorType::value_type &val)
161   {
162     vector_access(vec, entry) = val;
163   }
164 
165 
166 
167   // this is to make sure that the parallel partitioning in VectorType
168   // is really the same as stored in MatrixFree.
169   // version below is when has_partitioners_are_compatible == false
170   // FIXME: this is incorrect for PETSc/Trilinos MPI vectors
171   template <
172     typename VectorType,
173     typename std::enable_if<!has_partitioners_are_compatible<VectorType>::value,
174                             VectorType>::type * = nullptr>
175   inline void
check_vector_compatibility(const VectorType & vec,const internal::MatrixFreeFunctions::DoFInfo & dof_info)176   check_vector_compatibility(
177     const VectorType &                            vec,
178     const internal::MatrixFreeFunctions::DoFInfo &dof_info)
179   {
180     (void)vec;
181     (void)dof_info;
182 
183     AssertDimension(vec.size(), dof_info.vector_partitioner->size());
184   }
185 
186 
187 
188   // same as above for has_partitioners_are_compatible == true
189   template <
190     typename VectorType,
191     typename std::enable_if<has_partitioners_are_compatible<VectorType>::value,
192                             VectorType>::type * = nullptr>
193   inline void
check_vector_compatibility(const VectorType & vec,const internal::MatrixFreeFunctions::DoFInfo & dof_info)194   check_vector_compatibility(
195     const VectorType &                            vec,
196     const internal::MatrixFreeFunctions::DoFInfo &dof_info)
197   {
198     (void)vec;
199     (void)dof_info;
200     Assert(vec.partitioners_are_compatible(*dof_info.vector_partitioner),
201            ExcMessage(
202              "The parallel layout of the given vector is not "
203              "compatible with the parallel partitioning in MatrixFree. "
204              "Use MatrixFree::initialize_dof_vector to get a "
205              "compatible vector."));
206   }
207 
208 
209 
210   // Below, three classes (VectorReader, VectorSetter,
211   // VectorDistributorLocalToGlobal) implement the same interface and can be
212   // used to to read from vector, set elements of a vector and add to elements
213   // of the vector.
214 
215   // 1. A class to read data from vector
216   template <typename Number, typename VectorizedArrayType>
217   struct VectorReader
218   {
219     template <typename VectorType>
220     void
process_dofVectorReader221     process_dof(const unsigned int index,
222                 const VectorType & vec,
223                 Number &           res) const
224     {
225       res = vector_access(vec, index);
226     }
227 
228 
229 
230     template <typename VectorType>
231     void
process_dofs_vectorizedVectorReader232     process_dofs_vectorized(const unsigned int   dofs_per_cell,
233                             const unsigned int   dof_index,
234                             VectorType &         vec,
235                             VectorizedArrayType *dof_values,
236                             std::integral_constant<bool, true>) const
237     {
238       const Number *vec_ptr = vec.begin() + dof_index;
239       for (unsigned int i = 0; i < dofs_per_cell;
240            ++i, vec_ptr += VectorizedArrayType::size())
241         dof_values[i].load(vec_ptr);
242     }
243 
244 
245 
246     template <typename VectorType>
247     void
process_dofs_vectorizedVectorReader248     process_dofs_vectorized(const unsigned int   dofs_per_cell,
249                             const unsigned int   dof_index,
250                             const VectorType &   vec,
251                             VectorizedArrayType *dof_values,
252                             std::integral_constant<bool, false>) const
253     {
254       for (unsigned int i = 0; i < dofs_per_cell; ++i)
255         for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
256           dof_values[i][v] =
257             vector_access(vec, dof_index + v + i * VectorizedArrayType::size());
258     }
259 
260 
261 
262     template <typename VectorType>
263     void
process_dofs_vectorized_transposeVectorReader264     process_dofs_vectorized_transpose(const unsigned int   dofs_per_cell,
265                                       const unsigned int * dof_indices,
266                                       VectorType &         vec,
267                                       VectorizedArrayType *dof_values,
268                                       std::integral_constant<bool, true>) const
269     {
270       dealii::vectorized_load_and_transpose(dofs_per_cell,
271                                             vec.begin(),
272                                             dof_indices,
273                                             dof_values);
274     }
275 
276 
277 
278     template <typename VectorType>
279     void
process_dofs_vectorized_transposeVectorReader280     process_dofs_vectorized_transpose(const unsigned int   dofs_per_cell,
281                                       const unsigned int * dof_indices,
282                                       const VectorType &   vec,
283                                       VectorizedArrayType *dof_values,
284                                       std::integral_constant<bool, false>) const
285     {
286       for (unsigned int d = 0; d < dofs_per_cell; ++d)
287         for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
288           dof_values[d][v] = vector_access(vec, dof_indices[v] + d);
289     }
290 
291 
292 
293     // variant where VectorType::value_type is the same as Number -> can call
294     // gather
295     template <typename VectorType>
296     void
process_dof_gatherVectorReader297     process_dof_gather(const unsigned int * indices,
298                        VectorType &         vec,
299                        const unsigned int   constant_offset,
300                        VectorizedArrayType &res,
301                        std::integral_constant<bool, true>) const
302     {
303       res.gather(vec.begin() + constant_offset, indices);
304     }
305 
306 
307 
308     // variant where VectorType::value_type is not the same as Number -> must
309     // manually load the data
310     template <typename VectorType>
311     void
process_dof_gatherVectorReader312     process_dof_gather(const unsigned int * indices,
313                        const VectorType &   vec,
314                        const unsigned int   constant_offset,
315                        VectorizedArrayType &res,
316                        std::integral_constant<bool, false>) const
317     {
318       for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
319         res[v] = vector_access(vec, indices[v] + constant_offset);
320     }
321 
322 
323 
324     template <typename VectorType>
325     void
process_dof_globalVectorReader326     process_dof_global(const types::global_dof_index index,
327                        const VectorType &            vec,
328                        Number &                      res) const
329     {
330       res = vec(index);
331     }
332 
333 
334 
335     void
pre_constraintsVectorReader336     pre_constraints(const Number &, Number &res) const
337     {
338       res = Number();
339     }
340 
341 
342 
343     template <typename VectorType>
344     void
process_constraintVectorReader345     process_constraint(const unsigned int index,
346                        const Number       weight,
347                        const VectorType & vec,
348                        Number &           res) const
349     {
350       res += weight * vector_access(vec, index);
351     }
352 
353 
354 
355     void
post_constraintsVectorReader356     post_constraints(const Number &sum, Number &write_pos) const
357     {
358       write_pos = sum;
359     }
360 
361 
362 
363     void
process_emptyVectorReader364     process_empty(VectorizedArrayType &res) const
365     {
366       res = VectorizedArrayType();
367     }
368   };
369 
370 
371 
372   // 2. A class to add values to the vector during
373   // FEEvaluation::distribute_local_to_global() call
374   template <typename Number, typename VectorizedArrayType>
375   struct VectorDistributorLocalToGlobal
376   {
377     template <typename VectorType>
378     void
process_dofVectorDistributorLocalToGlobal379     process_dof(const unsigned int index, VectorType &vec, Number &res) const
380     {
381       vector_access_add(vec, index, res);
382     }
383 
384 
385 
386     template <typename VectorType>
387     void
process_dofs_vectorizedVectorDistributorLocalToGlobal388     process_dofs_vectorized(const unsigned int   dofs_per_cell,
389                             const unsigned int   dof_index,
390                             VectorType &         vec,
391                             VectorizedArrayType *dof_values,
392                             std::integral_constant<bool, true>) const
393     {
394       Number *vec_ptr = vec.begin() + dof_index;
395       for (unsigned int i = 0; i < dofs_per_cell;
396            ++i, vec_ptr += VectorizedArrayType::size())
397         {
398           VectorizedArrayType tmp;
399           tmp.load(vec_ptr);
400           tmp += dof_values[i];
401           tmp.store(vec_ptr);
402         }
403     }
404 
405 
406 
407     template <typename VectorType>
408     void
process_dofs_vectorizedVectorDistributorLocalToGlobal409     process_dofs_vectorized(const unsigned int   dofs_per_cell,
410                             const unsigned int   dof_index,
411                             VectorType &         vec,
412                             VectorizedArrayType *dof_values,
413                             std::integral_constant<bool, false>) const
414     {
415       for (unsigned int i = 0; i < dofs_per_cell; ++i)
416         for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
417           vector_access_add(vec,
418                             dof_index + v + i * VectorizedArrayType::size(),
419                             dof_values[i][v]);
420     }
421 
422 
423 
424     template <typename VectorType>
425     void
process_dofs_vectorized_transposeVectorDistributorLocalToGlobal426     process_dofs_vectorized_transpose(const unsigned int   dofs_per_cell,
427                                       const unsigned int * dof_indices,
428                                       VectorType &         vec,
429                                       VectorizedArrayType *dof_values,
430                                       std::integral_constant<bool, true>) const
431     {
432       vectorized_transpose_and_store(
433         true, dofs_per_cell, dof_values, dof_indices, vec.begin());
434     }
435 
436 
437 
438     template <typename VectorType>
439     void
process_dofs_vectorized_transposeVectorDistributorLocalToGlobal440     process_dofs_vectorized_transpose(const unsigned int   dofs_per_cell,
441                                       const unsigned int * dof_indices,
442                                       VectorType &         vec,
443                                       VectorizedArrayType *dof_values,
444                                       std::integral_constant<bool, false>) const
445     {
446       for (unsigned int d = 0; d < dofs_per_cell; ++d)
447         for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
448           vector_access_add(vec, dof_indices[v] + d, dof_values[d][v]);
449     }
450 
451 
452 
453     // variant where VectorType::value_type is the same as Number -> can call
454     // scatter
455     template <typename VectorType>
456     void
process_dof_gatherVectorDistributorLocalToGlobal457     process_dof_gather(const unsigned int * indices,
458                        VectorType &         vec,
459                        const unsigned int   constant_offset,
460                        VectorizedArrayType &res,
461                        std::integral_constant<bool, true>) const
462     {
463 #if DEAL_II_VECTORIZATION_WIDTH_IN_BITS < 512
464       for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
465         vector_access(vec, indices[v] + constant_offset) += res[v];
466 #else
467       // only use gather in case there is also scatter.
468       VectorizedArrayType tmp;
469       tmp.gather(vec.begin() + constant_offset, indices);
470       tmp += res;
471       tmp.scatter(indices, vec.begin() + constant_offset);
472 #endif
473     }
474 
475 
476 
477     // variant where VectorType::value_type is not the same as Number -> must
478     // manually append all data
479     template <typename VectorType>
480     void
process_dof_gatherVectorDistributorLocalToGlobal481     process_dof_gather(const unsigned int * indices,
482                        VectorType &         vec,
483                        const unsigned int   constant_offset,
484                        VectorizedArrayType &res,
485                        std::integral_constant<bool, false>) const
486     {
487       for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
488         vector_access_add(vec, indices[v] + constant_offset, res[v]);
489     }
490 
491 
492 
493     template <typename VectorType>
494     void
process_dof_globalVectorDistributorLocalToGlobal495     process_dof_global(const types::global_dof_index index,
496                        VectorType &                  vec,
497                        Number &                      res) const
498     {
499       vector_access_add_global(vec, index, res);
500     }
501 
502 
503 
504     void
pre_constraintsVectorDistributorLocalToGlobal505     pre_constraints(const Number &input, Number &res) const
506     {
507       res = input;
508     }
509 
510 
511 
512     template <typename VectorType>
513     void
process_constraintVectorDistributorLocalToGlobal514     process_constraint(const unsigned int index,
515                        const Number       weight,
516                        VectorType &       vec,
517                        Number &           res) const
518     {
519       vector_access_add(vec, index, weight * res);
520     }
521 
522 
523 
524     void
post_constraintsVectorDistributorLocalToGlobal525     post_constraints(const Number &, Number &) const
526     {}
527 
528 
529 
530     void
process_emptyVectorDistributorLocalToGlobal531     process_empty(VectorizedArrayType &) const
532     {}
533   };
534 
535 
536 
537   // 3. A class to set elements of the vector
538   template <typename Number, typename VectorizedArrayType>
539   struct VectorSetter
540   {
541     template <typename VectorType>
542     void
process_dofVectorSetter543     process_dof(const unsigned int index, VectorType &vec, Number &res) const
544     {
545       vector_access(vec, index) = res;
546     }
547 
548 
549 
550     template <typename VectorType>
551     void
process_dofs_vectorizedVectorSetter552     process_dofs_vectorized(const unsigned int   dofs_per_cell,
553                             const unsigned int   dof_index,
554                             VectorType &         vec,
555                             VectorizedArrayType *dof_values,
556                             std::integral_constant<bool, true>) const
557     {
558       Number *vec_ptr = vec.begin() + dof_index;
559       for (unsigned int i = 0; i < dofs_per_cell;
560            ++i, vec_ptr += VectorizedArrayType::size())
561         dof_values[i].store(vec_ptr);
562     }
563 
564 
565 
566     template <typename VectorType>
567     void
process_dofs_vectorizedVectorSetter568     process_dofs_vectorized(const unsigned int   dofs_per_cell,
569                             const unsigned int   dof_index,
570                             VectorType &         vec,
571                             VectorizedArrayType *dof_values,
572                             std::integral_constant<bool, false>) const
573     {
574       for (unsigned int i = 0; i < dofs_per_cell; ++i)
575         for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
576           vector_access(vec, dof_index + v + i * VectorizedArrayType::size()) =
577             dof_values[i][v];
578     }
579 
580 
581 
582     template <typename VectorType>
583     void
process_dofs_vectorized_transposeVectorSetter584     process_dofs_vectorized_transpose(const unsigned int   dofs_per_cell,
585                                       const unsigned int * dof_indices,
586                                       VectorType &         vec,
587                                       VectorizedArrayType *dof_values,
588                                       std::integral_constant<bool, true>) const
589     {
590       vectorized_transpose_and_store(
591         false, dofs_per_cell, dof_values, dof_indices, vec.begin());
592     }
593 
594 
595 
596     template <typename VectorType, bool booltype>
597     void
process_dofs_vectorized_transposeVectorSetter598     process_dofs_vectorized_transpose(const unsigned int   dofs_per_cell,
599                                       const unsigned int * dof_indices,
600                                       VectorType &         vec,
601                                       VectorizedArrayType *dof_values,
602                                       std::integral_constant<bool, false>) const
603     {
604       for (unsigned int i = 0; i < dofs_per_cell; ++i)
605         for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
606           vector_access(vec, dof_indices[v] + i) = dof_values[i][v];
607     }
608 
609 
610 
611     template <typename VectorType>
612     void
process_dof_gatherVectorSetter613     process_dof_gather(const unsigned int * indices,
614                        VectorType &         vec,
615                        const unsigned int   constant_offset,
616                        VectorizedArrayType &res,
617                        std::integral_constant<bool, true>) const
618     {
619       res.scatter(indices, vec.begin() + constant_offset);
620     }
621 
622 
623 
624     template <typename VectorType>
625     void
process_dof_gatherVectorSetter626     process_dof_gather(const unsigned int * indices,
627                        VectorType &         vec,
628                        const unsigned int   constant_offset,
629                        VectorizedArrayType &res,
630                        std::integral_constant<bool, false>) const
631     {
632       for (unsigned int v = 0; v < VectorizedArrayType::size(); ++v)
633         vector_access(vec, indices[v] + constant_offset) = res[v];
634     }
635 
636 
637 
638     template <typename VectorType>
639     void
process_dof_globalVectorSetter640     process_dof_global(const types::global_dof_index index,
641                        VectorType &                  vec,
642                        Number &                      res) const
643     {
644       vec(index) = res;
645     }
646 
647 
648 
649     void
pre_constraintsVectorSetter650     pre_constraints(const Number &, Number &) const
651     {}
652 
653 
654 
655     template <typename VectorType>
656     void
process_constraintVectorSetter657     process_constraint(const unsigned int,
658                        const Number,
659                        VectorType &,
660                        Number &) const
661     {}
662 
663 
664 
665     void
post_constraintsVectorSetter666     post_constraints(const Number &, Number &) const
667     {}
668 
669 
670 
671     void
process_emptyVectorSetter672     process_empty(VectorizedArrayType &) const
673     {}
674   };
675 } // namespace internal
676 
677 
678 DEAL_II_NAMESPACE_CLOSE
679 
680 #endif
681