1 #ifndef MATMUL_TOP_H_
2 #define MATMUL_TOP_H_
3
4 #include <stddef.h>
5 #include <stdint.h>
6
7 #ifdef __cplusplus
8 #include <string>
9 #endif
10 #include <gmp.h> // for gmp_randstate_t
11 struct timing_data;
12
13 #include "select_mpi.h"
14 #include "parallelizing_info.h"
15 #include "matmul.h"
16 #include "params.h"
17 #include "balancing.h"
18 #include "mpfq/mpfq_vbase.h"
19
20 // the ``all_v'' field collects all the pointers to the per-thread vector
21 // values. There are exactly pi->wr[0]->ncores such pointers in
22 // mmt->wr[0]. In some cases, these pointers may be equal (for source
23 // vectors, never used as destination), and in some cases not.
24
25 typedef struct mmt_vec_s * mmt_vec_ptr;
26
27 struct mmt_vec_s {
28 mpfq_vbase_ptr abase;
29 parallelizing_info_ptr pi;
30 int d;
31 pi_datatype_ptr pitype;
32 void * v;
33 mmt_vec_ptr * siblings; /* pi->wr[d]->ncores siblings ;
34 only in case all cores in the communicator
35 have their own data area v */
36 mmt_vec_ptr * mpals; /* pi->m->ncores siblings, always */
37 mmt_vec_ptr * wrpals[2]; /* pi->wr[0]->ncores and pi->wr[1]->ncores
38 siblings, always. */
39 unsigned int n; // total size in items
40 unsigned int i0;
41 unsigned int i1;
42 size_t rsbuf_size; // auto-expanded on demand.
43 void * rsbuf[2]; // only for RS_CHOICE == RS_CHOICE_MINE
44 int consistency; /* 0 == inconsistent ; 1 == partial ; 2 == full */
45 };
46 typedef struct mmt_vec_s mmt_vec[1];
47
48 /* some handy macros to access portions of mmt_vec's */
49 #define SUBVEC(v,w,offset) (v)->abase->vec_subvec(v->abase, (v)->w, offset)
50 #define SUBVEC_const(v,w,offset) (v)->abase->vec_subvec_const(v->abase, (v)->w, offset)
51
52 /* yikes. yet another list structure. Wish we had the STL. */
53 struct permutation_data_s {
54 size_t n;
55 size_t alloc;
56 unsigned int (*x)[2];
57 };
58 typedef struct permutation_data_s permutation_data[1];
59 typedef struct permutation_data_s * permutation_data_ptr;
60 /* all methods are private */
61
62 struct matmul_top_matrix_s {
63 // global stuff.
64 //
65 // n[0] is the number of rows, includding padding.
66 // n[1] is the number of columns, includding padding.
67 //
68 // n0[] is without padding.
69 //
70 // Note that a communicator in direction 0 handles in total a dataset
71 // of size n[1] (the number of items in a matrix row is the number of
72 // columns.
73 unsigned int n[2];
74 unsigned int n0[2]; // n0: unpadded.
75
76 // this really ends up within the mm field. It's not a complete file
77 // name though. We lack the implementation extension, the possible
78 // transposition tag, as well as the .bin extension.
79 char * locfile;
80
81 /* These two are global to all threads and jobs (well, each thread
82 * has its own pointer though, it's not a shared_malloc. It could be,
83 * but it isn't).
84 *
85 * For random matrices, both strings below are NULL.
86 */
87 char * mname;
88 char * bname;
89
90 balancing bal;
91
92 /* These are local excerpts of the balancing permutation: arrays of
93 * pairs (row index in the (sub)-matrix) ==> (row index in the
94 * original matrix), but only when both coordinates of this pair are
95 * in the current row and column range. This can be viewed as the set
96 * of non-zero positions in the permutation matrix if it were split
97 * just like the current matrix is. */
98 permutation_data_ptr perm[2]; /* rowperm, colperm */
99
100 matmul_ptr mm;
101 };
102
103 typedef struct matmul_top_matrix_s matmul_top_matrix[1];
104 typedef struct matmul_top_matrix_s * matmul_top_matrix_ptr;
105 typedef struct matmul_top_matrix_s const * matmul_top_matrix_srcptr;
106
107 struct matmul_top_data_s {
108 parallelizing_info_ptr pi;
109 mpfq_vbase_ptr abase;
110 pi_datatype_ptr pitype;
111 /* These n[] and n0[] correspond to the dimensions of the product
112 *
113 * n[0] is matrices[0]->n[0]
114 * n[1] is matrices[nmatrices-1]->n[1]
115 */
116 unsigned int n[2];
117 unsigned int n0[2]; // n0: unpadded.
118 int nmatrices;
119 matmul_top_matrix * matrices;
120 };
121
122 typedef struct matmul_top_data_s matmul_top_data[1];
123 typedef struct matmul_top_data_s * matmul_top_data_ptr;
124 typedef struct matmul_top_data_s const * matmul_top_data_srcptr;
125
126 /* These are flags for the distributed vectors. For the moment we have
127 * only one flag */
128 #define THREAD_SHARED_VECTOR 1
129
130 #ifdef __cplusplus
131 extern "C" {
132 #endif
133
134 extern void matmul_top_init(matmul_top_data_ptr mmt,
135 mpfq_vbase_ptr abase,
136 parallelizing_info_ptr pi,
137 param_list_ptr pl,
138 int optimized_direction);
139
140
141 extern void matmul_top_decl_usage(param_list_ptr pl);
142 extern void matmul_top_lookup_parameters(param_list_ptr pl);
143 extern void matmul_top_report(matmul_top_data_ptr mmt, double scale, int full);
144 extern void matmul_top_clear(matmul_top_data_ptr mmt);
145 extern unsigned int matmul_top_rank_upper_bound(matmul_top_data_ptr mmt);
146 #if 0
147 extern void matmul_top_fill_random_source(matmul_top_data_ptr mmt, int d);
148 #endif
149 extern size_t mmt_my_own_size_in_items(mmt_vec_ptr v);
150 extern size_t mmt_my_own_size_in_bytes(mmt_vec_ptr v);
151 extern size_t mmt_my_own_offset_in_items(mmt_vec_ptr v);
152 extern size_t mmt_my_own_offset_in_bytes(mmt_vec_ptr v);
153 extern void * mmt_my_own_subvec(mmt_vec_ptr v);
154
155 extern void mmt_vec_set_random_through_file(mmt_vec_ptr v, const char * name, unsigned int itemsondisk, gmp_randstate_t rstate, unsigned int block_position);
156
157 /* do not use this function if you want consistency when the splitting
158 * changes ! */
159 extern void mmt_vec_set_random_inconsistent(mmt_vec_ptr v, gmp_randstate_t rstate);
160 extern unsigned long mmt_vec_hamming_weight(mmt_vec_ptr y);
161 extern void mmt_vec_truncate(matmul_top_data_ptr mmt, mmt_vec_ptr v);
162 extern void mmt_vec_truncate_above_index(matmul_top_data_ptr mmt, mmt_vec_ptr v, unsigned int idx);
163 extern void mmt_vec_truncate_below_index(matmul_top_data_ptr mmt, mmt_vec_ptr v, unsigned int idx);
164 extern void mmt_vec_set_x_indices(mmt_vec_ptr y, uint32_t * gxvecs, int m, unsigned int nx);
165 extern void mmt_vec_set_expanded_copy_of_local_data(mmt_vec_ptr y, const void * v, unsigned int n);
166
167 extern void matmul_top_mul_cpu(matmul_top_data_ptr mmt, int midx, int d, mmt_vec_ptr w, mmt_vec_ptr v);
168 extern void matmul_top_comm_bench(matmul_top_data_ptr mmt, int d);
169 extern void matmul_top_mul_comm(mmt_vec_ptr w, mmt_vec_ptr v);
170
171 /* v is both input and output. w is temporary */
172 extern void matmul_top_mul(matmul_top_data_ptr mmt, mmt_vec *w, struct timing_data * tt);
173
174 extern void mmt_vec_init(matmul_top_data_ptr mmt, mpfq_vbase_ptr abase, pi_datatype_ptr pitype, mmt_vec_ptr v, int d, int flags, unsigned int n);
175 extern void mmt_vec_clear(matmul_top_data_ptr mmt, mmt_vec_ptr v);
176 extern void mmt_own_vec_set(mmt_vec_ptr w, mmt_vec_ptr v);
177 extern void mmt_own_vec_set2(mmt_vec_ptr z, mmt_vec_ptr w, mmt_vec_ptr v);
178 extern void mmt_vec_swap(mmt_vec_ptr w, mmt_vec_ptr v);
179 extern void mmt_full_vec_set(mmt_vec_ptr w, mmt_vec_ptr v);
180 extern void mmt_full_vec_set_zero(mmt_vec_ptr v);
181 extern void mmt_vec_set_basis_vector_at(mmt_vec_ptr v, int k, unsigned int j);
182 extern void mmt_vec_set_basis_vector(mmt_vec_ptr v, unsigned int j);
183 extern void mmt_vec_add_basis_vector_at(mmt_vec_ptr v, int k, unsigned int j);
184 extern void mmt_vec_add_basis_vector(mmt_vec_ptr v, unsigned int j);
185 #if 0
186 extern void matmul_top_fill_random_source_generic(matmul_top_data_ptr mmt, size_t stride, mmt_vec_ptr v, int d);
187 #endif
188 extern int mmt_vec_load(mmt_vec_ptr v, const char * name, unsigned int itemsondisk, unsigned int block_position) ATTRIBUTE_WARN_UNUSED_RESULT;
189 extern int mmt_vec_save(mmt_vec_ptr v, const char * name, unsigned int itemsondisk, unsigned int block_position);
190 extern void mmt_vec_reduce_mod_p(mmt_vec_ptr v);
191 extern void mmt_vec_clear_padding(mmt_vec_ptr v, size_t unpadded, size_t padded);
192
193 extern void mmt_vec_broadcast(mmt_vec_ptr v);
194 extern void mmt_vec_reduce(mmt_vec_ptr w, mmt_vec_ptr v);
195 extern void mmt_vec_reduce_sameside(mmt_vec_ptr v);
196 extern void mmt_vec_allreduce(mmt_vec_ptr v);
197 extern void mmt_vec_twist(matmul_top_data_ptr mmt, mmt_vec_ptr y);
198 extern void mmt_vec_untwist(matmul_top_data_ptr mmt, mmt_vec_ptr y);
199 extern void mmt_vec_apply_T(matmul_top_data_ptr mmt, mmt_vec_ptr y);
200 extern void mmt_vec_unapply_T(matmul_top_data_ptr mmt, mmt_vec_ptr y);
201 extern void mmt_vec_apply_S(matmul_top_data_ptr mmt, int midx, mmt_vec_ptr y);
202 extern void mmt_vec_unapply_S(matmul_top_data_ptr mmt, int midx, mmt_vec_ptr y);
203 extern void mmt_vec_apply_P(matmul_top_data_ptr mmt, mmt_vec_ptr y);
204 extern void mmt_vec_unapply_P(matmul_top_data_ptr mmt, mmt_vec_ptr y);
205 extern void mmt_apply_identity(mmt_vec_ptr w, mmt_vec_ptr v);
206 extern void indices_twist(matmul_top_data_ptr mmt, uint32_t * xs, unsigned int n, int d);
207
208 #ifdef __cplusplus
209 }
210 #endif
211
212 #ifdef __cplusplus
213 /* proxies to use std::string instead of const char * ; this must not be
214 * bracketed by extern "C" ...
215 */
mmt_vec_set_random_through_file(mmt_vec_ptr v,std::string const & name,unsigned int itemsondisk,gmp_randstate_t rstate,unsigned int block_position)216 static inline void mmt_vec_set_random_through_file(mmt_vec_ptr v, std::string const & name, unsigned int itemsondisk, gmp_randstate_t rstate, unsigned int block_position) {
217 mmt_vec_set_random_through_file(v, name.c_str(), itemsondisk, rstate, block_position);
218 }
219 static inline int mmt_vec_load(mmt_vec_ptr v, std::string const & name, unsigned int itemsondisk, unsigned int block_position) ATTRIBUTE_WARN_UNUSED_RESULT;
mmt_vec_load(mmt_vec_ptr v,std::string const & name,unsigned int itemsondisk,unsigned int block_position)220 static inline int mmt_vec_load(mmt_vec_ptr v, std::string const & name, unsigned int itemsondisk, unsigned int block_position)
221 {
222 return mmt_vec_load(v, name.c_str(), itemsondisk, block_position);
223 }
mmt_vec_save(mmt_vec_ptr v,std::string const & name,unsigned int itemsondisk,unsigned int block_position)224 static inline int mmt_vec_save(mmt_vec_ptr v, std::string const & name, unsigned int itemsondisk, unsigned int block_position)
225 {
226 return mmt_vec_save(v, name.c_str(), itemsondisk, block_position);
227 }
228 #endif
229
230 #endif /* MATMUL_TOP_H_ */
231