1 #ifndef MATMUL_TOP_H_
2 #define MATMUL_TOP_H_
3 
4 #include <stddef.h>
5 #include <stdint.h>
6 
7 #ifdef __cplusplus
8 #include <string>
9 #endif
10 #include <gmp.h>                 // for gmp_randstate_t
11 struct timing_data;
12 
13 #include "select_mpi.h"
14 #include "parallelizing_info.h"
15 #include "matmul.h"
16 #include "params.h"
17 #include "balancing.h"
18 #include "mpfq/mpfq_vbase.h"
19 
20 // the ``all_v'' field collects all the pointers to the per-thread vector
21 // values. There are exactly pi->wr[0]->ncores such pointers in
22 // mmt->wr[0]. In some cases, these pointers may be equal (for source
23 // vectors, never used as destination), and in some cases not.
24 
25 typedef struct mmt_vec_s * mmt_vec_ptr;
26 
27 struct mmt_vec_s {
28     mpfq_vbase_ptr abase;
29     parallelizing_info_ptr pi;
30     int d;
31     pi_datatype_ptr pitype;
32     void * v;
33     mmt_vec_ptr * siblings;     /* pi->wr[d]->ncores siblings ;
34                                    only in case all cores in the communicator
35                                    have their own data area v */
36     mmt_vec_ptr * mpals;        /* pi->m->ncores siblings, always */
37     mmt_vec_ptr * wrpals[2];    /* pi->wr[0]->ncores and pi->wr[1]->ncores
38                                    siblings, always. */
39     unsigned int n;             // total size in items
40     unsigned int i0;
41     unsigned int i1;
42     size_t rsbuf_size;          // auto-expanded on demand.
43     void * rsbuf[2];            // only for RS_CHOICE == RS_CHOICE_MINE
44     int consistency;    /* 0 == inconsistent ; 1 == partial ; 2 == full */
45 };
46 typedef struct mmt_vec_s mmt_vec[1];
47 
48 /* some handy macros to access portions of mmt_vec's */
49 #define SUBVEC(v,w,offset) (v)->abase->vec_subvec(v->abase, (v)->w, offset)
50 #define SUBVEC_const(v,w,offset) (v)->abase->vec_subvec_const(v->abase, (v)->w, offset)
51 
52 /* yikes. yet another list structure. Wish we had the STL. */
53 struct permutation_data_s {
54     size_t n;
55     size_t alloc;
56     unsigned int (*x)[2];
57 };
58 typedef struct permutation_data_s permutation_data[1];
59 typedef struct permutation_data_s * permutation_data_ptr;
60 /* all methods are private */
61 
62 struct matmul_top_matrix_s {
63     // global stuff.
64     //
65     // n[0] is the number of rows, includding padding.
66     // n[1] is the number of columns, includding padding.
67     //
68     // n0[] is without padding.
69     //
70     // Note that a communicator in direction 0 handles in total a dataset
71     // of size n[1] (the number of items in a matrix row is the number of
72     // columns.
73     unsigned int n[2];
74     unsigned int n0[2]; // n0: unpadded.
75 
76     // this really ends up within the mm field. It's not a complete file
77     // name though. We lack the implementation extension, the possible
78     // transposition tag, as well as the .bin extension.
79     char * locfile;
80 
81     /* These two are global to all threads and jobs (well, each thread
82      * has its own pointer though, it's not a shared_malloc. It could be,
83      * but it isn't).
84      *
85      * For random matrices, both strings below are NULL.
86      */
87     char * mname;
88     char * bname;
89 
90     balancing bal;
91 
92     /* These are local excerpts of the balancing permutation: arrays of
93      * pairs (row index in the (sub)-matrix) ==> (row index in the
94      * original matrix), but only when both coordinates of this pair are
95      * in the current row and column range. This can be viewed as the set
96      * of non-zero positions in the permutation matrix if it were split
97      * just like the current matrix is. */
98     permutation_data_ptr perm[2];       /* rowperm, colperm */
99 
100     matmul_ptr mm;
101 };
102 
103 typedef struct matmul_top_matrix_s matmul_top_matrix[1];
104 typedef struct matmul_top_matrix_s * matmul_top_matrix_ptr;
105 typedef struct matmul_top_matrix_s const * matmul_top_matrix_srcptr;
106 
107 struct matmul_top_data_s {
108     parallelizing_info_ptr pi;
109     mpfq_vbase_ptr abase;
110     pi_datatype_ptr pitype;
111     /* These n[] and n0[] correspond to the dimensions of the product
112      *
113      * n[0] is matrices[0]->n[0]
114      * n[1] is matrices[nmatrices-1]->n[1]
115      */
116     unsigned int n[2];
117     unsigned int n0[2]; // n0: unpadded.
118     int nmatrices;
119     matmul_top_matrix * matrices;
120 };
121 
122 typedef struct matmul_top_data_s matmul_top_data[1];
123 typedef struct matmul_top_data_s * matmul_top_data_ptr;
124 typedef struct matmul_top_data_s const * matmul_top_data_srcptr;
125 
126 /* These are flags for the distributed vectors. For the moment we have
127  * only one flag */
128 #define THREAD_SHARED_VECTOR    1
129 
130 #ifdef __cplusplus
131 extern "C" {
132 #endif
133 
134 extern void matmul_top_init(matmul_top_data_ptr mmt,
135         mpfq_vbase_ptr abase,
136         parallelizing_info_ptr pi,
137         param_list_ptr pl,
138         int optimized_direction);
139 
140 
141 extern void matmul_top_decl_usage(param_list_ptr pl);
142 extern void matmul_top_lookup_parameters(param_list_ptr pl);
143 extern void matmul_top_report(matmul_top_data_ptr mmt, double scale, int full);
144 extern void matmul_top_clear(matmul_top_data_ptr mmt);
145 extern unsigned int matmul_top_rank_upper_bound(matmul_top_data_ptr mmt);
146 #if 0
147 extern void matmul_top_fill_random_source(matmul_top_data_ptr mmt, int d);
148 #endif
149 extern size_t mmt_my_own_size_in_items(mmt_vec_ptr v);
150 extern size_t mmt_my_own_size_in_bytes(mmt_vec_ptr v);
151 extern size_t mmt_my_own_offset_in_items(mmt_vec_ptr v);
152 extern size_t mmt_my_own_offset_in_bytes(mmt_vec_ptr v);
153 extern void * mmt_my_own_subvec(mmt_vec_ptr v);
154 
155 extern void mmt_vec_set_random_through_file(mmt_vec_ptr v, const char * name, unsigned int itemsondisk, gmp_randstate_t rstate, unsigned int block_position);
156 
157 /* do not use this function if you want consistency when the splitting
158  * changes ! */
159 extern void mmt_vec_set_random_inconsistent(mmt_vec_ptr v, gmp_randstate_t rstate);
160 extern unsigned long mmt_vec_hamming_weight(mmt_vec_ptr y);
161 extern void mmt_vec_truncate(matmul_top_data_ptr mmt, mmt_vec_ptr v);
162 extern void mmt_vec_truncate_above_index(matmul_top_data_ptr mmt, mmt_vec_ptr v, unsigned int idx);
163 extern void mmt_vec_truncate_below_index(matmul_top_data_ptr mmt, mmt_vec_ptr v, unsigned int idx);
164 extern void mmt_vec_set_x_indices(mmt_vec_ptr y, uint32_t * gxvecs, int m, unsigned int nx);
165 extern void mmt_vec_set_expanded_copy_of_local_data(mmt_vec_ptr y, const void * v, unsigned int n);
166 
167 extern void matmul_top_mul_cpu(matmul_top_data_ptr mmt, int midx, int d, mmt_vec_ptr w, mmt_vec_ptr v);
168 extern void matmul_top_comm_bench(matmul_top_data_ptr mmt, int d);
169 extern void matmul_top_mul_comm(mmt_vec_ptr w, mmt_vec_ptr v);
170 
171 /* v is both input and output. w is temporary */
172 extern void matmul_top_mul(matmul_top_data_ptr mmt, mmt_vec *w, struct timing_data * tt);
173 
174 extern void mmt_vec_init(matmul_top_data_ptr mmt, mpfq_vbase_ptr abase, pi_datatype_ptr pitype, mmt_vec_ptr v, int d, int flags, unsigned int n);
175 extern void mmt_vec_clear(matmul_top_data_ptr mmt, mmt_vec_ptr v);
176 extern void mmt_own_vec_set(mmt_vec_ptr w, mmt_vec_ptr v);
177 extern void mmt_own_vec_set2(mmt_vec_ptr z, mmt_vec_ptr w, mmt_vec_ptr v);
178 extern void mmt_vec_swap(mmt_vec_ptr w, mmt_vec_ptr v);
179 extern void mmt_full_vec_set(mmt_vec_ptr w, mmt_vec_ptr v);
180 extern void mmt_full_vec_set_zero(mmt_vec_ptr v);
181 extern void mmt_vec_set_basis_vector_at(mmt_vec_ptr v, int k, unsigned int j);
182 extern void mmt_vec_set_basis_vector(mmt_vec_ptr v, unsigned int j);
183 extern void mmt_vec_add_basis_vector_at(mmt_vec_ptr v, int k, unsigned int j);
184 extern void mmt_vec_add_basis_vector(mmt_vec_ptr v, unsigned int j);
185 #if 0
186 extern void matmul_top_fill_random_source_generic(matmul_top_data_ptr mmt, size_t stride, mmt_vec_ptr v, int d);
187 #endif
188 extern int mmt_vec_load(mmt_vec_ptr v, const char * name, unsigned int itemsondisk, unsigned int block_position) ATTRIBUTE_WARN_UNUSED_RESULT;
189 extern int mmt_vec_save(mmt_vec_ptr v, const char * name, unsigned int itemsondisk, unsigned int block_position);
190 extern void mmt_vec_reduce_mod_p(mmt_vec_ptr v);
191 extern void mmt_vec_clear_padding(mmt_vec_ptr v, size_t unpadded, size_t padded);
192 
193 extern void mmt_vec_broadcast(mmt_vec_ptr v);
194 extern void mmt_vec_reduce(mmt_vec_ptr w, mmt_vec_ptr v);
195 extern void mmt_vec_reduce_sameside(mmt_vec_ptr v);
196 extern void mmt_vec_allreduce(mmt_vec_ptr v);
197 extern void mmt_vec_twist(matmul_top_data_ptr mmt, mmt_vec_ptr y);
198 extern void mmt_vec_untwist(matmul_top_data_ptr mmt, mmt_vec_ptr y);
199 extern void mmt_vec_apply_T(matmul_top_data_ptr mmt, mmt_vec_ptr y);
200 extern void mmt_vec_unapply_T(matmul_top_data_ptr mmt, mmt_vec_ptr y);
201 extern void mmt_vec_apply_S(matmul_top_data_ptr mmt, int midx, mmt_vec_ptr y);
202 extern void mmt_vec_unapply_S(matmul_top_data_ptr mmt, int midx, mmt_vec_ptr y);
203 extern void mmt_vec_apply_P(matmul_top_data_ptr mmt, mmt_vec_ptr y);
204 extern void mmt_vec_unapply_P(matmul_top_data_ptr mmt, mmt_vec_ptr y);
205 extern void mmt_apply_identity(mmt_vec_ptr w, mmt_vec_ptr v);
206 extern void indices_twist(matmul_top_data_ptr mmt, uint32_t * xs, unsigned int n, int d);
207 
208 #ifdef __cplusplus
209 }
210 #endif
211 
212 #ifdef __cplusplus
213 /* proxies to use std::string instead of const char * ; this must not be
214  * bracketed by extern "C" ...
215  */
mmt_vec_set_random_through_file(mmt_vec_ptr v,std::string const & name,unsigned int itemsondisk,gmp_randstate_t rstate,unsigned int block_position)216 static inline void mmt_vec_set_random_through_file(mmt_vec_ptr v, std::string const & name, unsigned int itemsondisk, gmp_randstate_t rstate, unsigned int block_position) {
217     mmt_vec_set_random_through_file(v, name.c_str(), itemsondisk, rstate, block_position);
218 }
219 static inline int mmt_vec_load(mmt_vec_ptr v, std::string const & name, unsigned int itemsondisk, unsigned int block_position) ATTRIBUTE_WARN_UNUSED_RESULT;
mmt_vec_load(mmt_vec_ptr v,std::string const & name,unsigned int itemsondisk,unsigned int block_position)220 static inline int mmt_vec_load(mmt_vec_ptr v, std::string const & name, unsigned int itemsondisk, unsigned int block_position)
221 {
222     return mmt_vec_load(v, name.c_str(), itemsondisk, block_position);
223 }
mmt_vec_save(mmt_vec_ptr v,std::string const & name,unsigned int itemsondisk,unsigned int block_position)224 static inline int mmt_vec_save(mmt_vec_ptr v, std::string const & name, unsigned int itemsondisk, unsigned int block_position)
225 {
226     return mmt_vec_save(v, name.c_str(), itemsondisk, block_position);
227 }
228 #endif
229 
230 #endif	/* MATMUL_TOP_H_ */
231