1 #ifndef LINGEN_CALL_COMPANION_HPP_
2 #define LINGEN_CALL_COMPANION_HPP_
3 
4 #include <cstddef>                       // for size_t
5 #include <stdexcept>                      // for runtime_error
6 #include <string>                         // for string
7 #include <iosfwd>
8 #include <istream>
9 #include <array>
10 #include "lingen_substep_schedule.hpp"
11 #include "timing.h"     /* weighted_double */
12 #include "lingen_round_operand_size.hpp"
13 #include "lingen_mul_substeps_base.hpp"
14 
15 /* This object is passed as a companion info to a call of
16  * bw_biglingen_recursive ; it is computed by the code in
17  * plingen-tuning.cpp but once tuning is over, it is essentially fixed.
18  */
19 
20 struct lingen_call_companion {
21     /* This is a safeguard. Only the tuning code can create complete call
22      * companions.
23      */
24     bool complete = false;
25 
26     unsigned int mesh = 0;
recurselingen_call_companion27     bool recurse() const { return mesh > 0; }
go_mpilingen_call_companion28     bool go_mpi() const { return mesh > 1; }
29 
30     double ttb = 0;
31     /* total_ncalls is a priori a power of two, but not always.
32      * It is the number of calls that correspond to identical
33      * lingen_call_companion::key keys.  In particular, since comparison
34      * of ::key types is coarse, this means that total_ncalls is the
35      * addition of the number of calls for two possibly different input
36      * lengths.
37      */
38     size_t total_ncalls = 0;
39     struct mul_or_mp_times {/*{{{*/
40         op_mul_or_mp_base::op_type_t op_type;
41         /* XXX This must be trivially copyable because we share it via
42          * MPI... ! */
43         lingen_substep_schedule S;
44         weighted_double
45             tt,         /* 1, time per call to the mul operation */
46             /* For the following, we have both the number of times the
47              * operation is done within 1 call of the mul (or mp)
48              * operation, plus the time of _each individual call_.
49              */
50             t_dft_A,    /* time per dft of the first operand, and so on */
51             t_dft_A_comm,
52             t_dft_B,
53             t_dft_B_comm,
54             t_conv,
55             t_ift_C;
56         size_t reserved_ram;
57 
mul_or_mp_timeslingen_call_companion::mul_or_mp_times58         mul_or_mp_times(op_mul_or_mp_base::op_type_t op_type) : op_type(op_type) {}
fft_namelingen_call_companion::mul_or_mp_times59         const char * fft_name() const { return S.fft_name(); }
step_namelingen_call_companion::mul_or_mp_times60         std::string step_name() const {
61             std::string s = op_mul_or_mp_base::op_name(op_type);
62             s += ';';
63             s += fft_name();
64             return s;
65         }
66         /* we store the per-transform ram here, so that we can act
67          * appropriately if we ever detect that it changes for one
68          * specific call. This is supposed to be the "peak" of the
69          * recorded sizes.
70          */
71         std::array<size_t, 3> fft_alloc_sizes;
72         std::array<std::array<unsigned int, 3>, 2> peak_ram_multipliers;
ramlingen_call_companion::mul_or_mp_times73         size_t ram(std::array<size_t, 3> fft_alloc_sizes) const {
74             size_t rpeak = 0;
75             for(auto const & M : peak_ram_multipliers) {
76                 size_t r = 0;
77                 for(unsigned int i = 0 ; i < 3 ; i++)
78                     r += M[i] * fft_alloc_sizes[i];
79                 if (r > rpeak) rpeak = r;
80             }
81             return rpeak;
82         }
ramlingen_call_companion::mul_or_mp_times83         size_t ram() const {
84             return ram(fft_alloc_sizes);
85         }
ram_totallingen_call_companion::mul_or_mp_times86         size_t ram_total() const {
87             return ram(fft_alloc_sizes) + reserved_ram;
88         }
89 
90         size_t asize, bsize, csize;
91 
92         /* This unserializes only part of the data: the schedule S/
93          * The rest is always recomputed. Even for operator== (which we
94          * chiefly use for compatibility checking), only the schedule
95          * matters.
96          */
97         std::istream& unserialize(std::istream& is);
98         std::ostream& serialize(std::ostream& os) const;
operator ==lingen_call_companion::mul_or_mp_times99         bool operator==(mul_or_mp_times const & o) const {
100             return S == o.S;
101         }
operator !=lingen_call_companion::mul_or_mp_times102         inline bool operator!=(mul_or_mp_times const & o) const { return !(*this == o); }
103     };/*}}}*/
104     mul_or_mp_times mp  { op_mul_or_mp_base::OP_MP };
105     mul_or_mp_times mul { op_mul_or_mp_base::OP_MUL };
106 
operator []lingen_call_companion107     mul_or_mp_times operator[](op_mul_or_mp_base::op_type_t op_type) const {
108         switch(op_type) {
109             case op_mul_or_mp_base::OP_MP: return mp;
110             case op_mul_or_mp_base::OP_MUL: return mul;
111             default: throw std::runtime_error("bad op");
112         }
113     }
114 
115 
116     /* This unserializes only part of the data -- recurse, go_mpi,
117      * and the schedules. The rest is always recomputed.
118      */
119     private:
120     static constexpr const char * io_token_recursive = "recursive";
121     static constexpr const char * io_token_quadratic = "quadratic";
122     static constexpr const char * io_token_collective = "collective";
123     static constexpr const char * io_token_single = "single";
124     static constexpr const char * io_token_MP = "MP";
125     static constexpr const char * io_token_MUL = "MUL";
126     static constexpr const char * io_token_ignored = "-";
127     public:
128 
129     std::istream& unserialize(std::istream& is);
130     std::ostream& serialize(std::ostream& os) const;
131     bool operator==(lingen_call_companion const & o) const;
operator !=lingen_call_companion132     inline bool operator!=(lingen_call_companion const & o) const { return !(*this == o); }
checklingen_call_companion133     bool check() const {
134         return mul.S.check() && mp.S.check();
135     }
136     struct key {
137         int depth;
138         size_t L;
unserializelingen_call_companion::key139         std::istream& unserialize(std::istream& is) {
140             return is >> depth >> L;
141         }
serializelingen_call_companion::key142         std::ostream& serialize(std::ostream& os) const {
143             return os << " " << depth << " " << L;
144         }
operator ==lingen_call_companion::key145         bool operator==(key const & o) const {
146             return depth == o.depth && L == o.L;
147         }
operator <lingen_call_companion::key148         bool operator<(key const& a) const {
149             if (depth < a.depth) return true;
150             if (depth > a.depth) return false;
151             return lingen_round_operand_size(L) > lingen_round_operand_size(a.L);
152         }
153     };
154 };
155 
operator <<(std::ostream & os,lingen_call_companion const & c)156 inline std::ostream& operator<<(std::ostream& os, lingen_call_companion const & c) {
157     return c.serialize(os);
158 }
159 
operator >>(std::istream & is,lingen_call_companion & c)160 inline std::istream& operator>>(std::istream& is, lingen_call_companion & c) {
161     return c.unserialize(is);
162 }
163 
operator <<(std::ostream & os,lingen_call_companion::key const & c)164 inline std::ostream& operator<<(std::ostream& os, lingen_call_companion::key const & c) {
165     return c.serialize(os);
166 }
167 
operator >>(std::istream & is,lingen_call_companion::key & c)168 inline std::istream& operator>>(std::istream& is, lingen_call_companion::key & c) {
169     return c.unserialize(is);
170 }
171 
172 #endif	/* LINGEN_CALL_COMPANION_HPP_ */
173