1 /*
2   The MIT License
3 
4   Copyright (c) 2018-2019 Dana-Farber Cancer Institute
5                 2016-2018 Broad Institute
6 
7   Permission is hereby granted, free of charge, to any person obtaining
8   a copy of this software and associated documentation files (the
9   "Software"), to deal in the Software without restriction, including
10   without limitation the rights to use, copy, modify, merge, publish,
11   distribute, sublicense, and/or sell copies of the Software, and to
12   permit persons to whom the Software is furnished to do so, subject to
13   the following conditions:
14 
15   The above copyright notice and this permission notice shall be
16   included in all copies or substantial portions of the Software.
17 
18   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
22   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
23   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25   SOFTWARE.
26 */
27 
28 #ifndef KANN_AUTODIFF_H
29 #define KANN_AUTODIFF_H
30 
31 #define KAD_VERSION "r544"
32 
33 #include <stdio.h>
34 #include <stdint.h>
35 
36 #ifdef __STRICT_ANSI__
37 #define inline
38 #endif
39 
40 #define KAD_MAX_DIM 4     /* max dimension */
41 #define KAD_MAX_OP  64    /* max number of operators */
42 
43 /* A computational graph is a directed acyclic graph. In the graph, an external
44  * node represents a variable, a constant or a feed; an internal node
45  * represents an operator; an edge from node v to w indicates v is an operand
46  * of w.
47  */
48 
49 #define KAD_VAR        0x1
50 #define KAD_CONST      0x2
51 #define KAD_POOL       0x4
52 #define KAD_SHARE_RNG  0x10 /* with this flag on, different time step shares the same RNG status after unroll */
53 
54 #define kad_is_back(p)  ((p)->flag & KAD_VAR)
55 #define kad_is_ext(p)   ((p)->n_child == 0)
56 #define kad_is_var(p)   (kad_is_ext(p) && kad_is_back(p))
57 #define kad_is_const(p) (kad_is_ext(p) && ((p)->flag & KAD_CONST))
58 #define kad_is_feed(p)  (kad_is_ext(p) && !kad_is_back(p) && !((p)->flag & KAD_CONST))
59 #define kad_is_pivot(p) ((p)->n_child == 1 && ((p)->flag & KAD_POOL))
60 #define kad_is_switch(p) ((p)->op == 12 && !((p)->flag & KAD_POOL))
61 #define kad_use_rng(p)  ((p)->op == 15 || (p)->op == 24)
62 
63 #define kad_eval_enable(p) ((p)->tmp = 1)
64 #define kad_eval_disable(p) ((p)->tmp = -1)
65 
66 /* a node in the computational graph */
67 typedef struct kad_node_t {
68 	uint8_t     n_d;            /* number of dimensions; no larger than KAD_MAX_DIM */
69 	uint8_t     flag;           /* type of the node; see KAD_F_* for valid flags */
70 	uint16_t    op;             /* operator; kad_op_list[op] is the actual function */
71 	int32_t     n_child;        /* number of operands/child nodes */
72 	int32_t     tmp;            /* temporary field; MUST BE zero before calling kad_compile() */
73 	int32_t     ptr_size;       /* size of ptr below */
74 	int32_t     d[KAD_MAX_DIM]; /* dimensions */
75 	int32_t     ext_label;      /* labels for external uses (not modified by the kad_* APIs) */
76 	uint32_t    ext_flag;       /* flags for external uses (not modified by the kad_* APIs) */
77 	float      *x;              /* value; allocated for internal nodes */
78 	float      *g;              /* gradient; allocated for internal nodes */
79 	void       *ptr;            /* for special operators that need additional parameters (e.g. conv2d) */
80 	void       *gtmp;           /* temporary data generated at the forward pass but used at the backward pass */
81 	struct kad_node_t **child;  /* operands/child nodes */
82 	struct kad_node_t  *pre;    /* usually NULL; only used for RNN */
83 } kad_node_t, *kad_node_p;
84 
85 #ifdef __cplusplus
86 extern "C" {
87 #endif
88 
89 /**
90  * Compile/linearize a computational graph
91  *
92  * @param n_node   number of nodes (out)
93  * @param n_roots  number of nodes without predecessors
94  * @param roots    list of nodes without predecessors
95  *
96  * @return list of nodes, of size *n_node
97  */
98 kad_node_t **kad_compile_array(int *n_node, int n_roots, kad_node_t **roots);
99 
100 kad_node_t **kad_compile(int *n_node, int n_roots, ...); /* an alternative API to above */
101 void kad_delete(int n, kad_node_t **a); /* deallocate a compiled/linearized graph */
102 
103 /**
104  * Compute the value at a node
105  *
106  * @param n       number of nodes
107  * @param a       list of nodes
108  * @param from    compute the value at this node, 0<=from<n
109  *
110  * @return a pointer to the value (pointing to kad_node_t::x, so don't call
111  *         free() on it!)
112  */
113 const float *kad_eval_at(int n, kad_node_t **a, int from);
114 
115 void kad_eval_marked(int n, kad_node_t **a);
116 int kad_sync_dim(int n, kad_node_t **v, int batch_size);
117 
118 /**
119  * Compute gradient
120  *
121  * @param n       number of nodes
122  * @param a       list of nodes
123  * @param from    the function node; must be a scalar (compute \nabla a[from])
124  */
125 void kad_grad(int n, kad_node_t **a, int from);
126 
127 /**
128  * Unroll a recurrent computation graph
129  *
130  * @param n_v     number of nodes
131  * @param v       list of nodes
132  * @param new_n   number of nodes in the unrolled graph (out)
133  * @param len     how many times to unroll, one for each pivot
134  *
135  * @return list of nodes in the unrolled graph
136  */
137 kad_node_t **kad_unroll(int n_v, kad_node_t **v, int *new_n, int *len);
138 int kad_n_pivots(int n_v, kad_node_t **v);
139 
140 kad_node_t **kad_clone(int n, kad_node_t **v, int batch_size);
141 
142 /* define a variable, a constant or a feed (placeholder in TensorFlow) */
143 kad_node_t *kad_var(float *x, float *g, int n_d, ...); /* a variable; gradients to be computed; not unrolled */
144 kad_node_t *kad_const(float *x, int n_d, ...);         /* a constant; no gradients computed; not unrolled */
145 kad_node_t *kad_feed(int n_d, ...);                    /* an input/output; no gradients computed; unrolled */
146 
147 /* operators taking two operands */
148 kad_node_t *kad_add(kad_node_t *x, kad_node_t *y); /* f(x,y) = x + y (generalized element-wise addition; f[i*n+j]=x[i*n+j]+y[j], n=kad_len(y), 0<j<n, 0<i<kad_len(x)/n) */
149 kad_node_t *kad_sub(kad_node_t *x, kad_node_t *y); /* f(x,y) = x - y (generalized element-wise subtraction) */
150 kad_node_t *kad_mul(kad_node_t *x, kad_node_t *y); /* f(x,y) = x * y (generalized element-wise product) */
151 
152 kad_node_t *kad_matmul(kad_node_t *x, kad_node_t *y);     /* f(x,y) = x * y   (general matrix product) */
153 kad_node_t *kad_cmul(kad_node_t *x, kad_node_t *y);       /* f(x,y) = x * y^T (column-wise matrix product; i.e. y is transposed) */
154 
155 /* loss functions; output scalar */
156 kad_node_t *kad_mse(kad_node_t *x, kad_node_t *y);        /* mean square error */
157 kad_node_t *kad_ce_multi(kad_node_t *x, kad_node_t *y);   /* multi-class cross-entropy; x is the preidction and y is the truth */
158 kad_node_t *kad_ce_bin(kad_node_t *x, kad_node_t *y);     /* binary cross-entropy for (0,1) */
159 kad_node_t *kad_ce_bin_neg(kad_node_t *x, kad_node_t *y); /* binary cross-entropy for (-1,1) */
160 kad_node_t *kad_ce_multi_weighted(kad_node_t *pred, kad_node_t *truth, kad_node_t *weight);
161 
162 #define KAD_PAD_NONE  0      /* use the smallest zero-padding */
163 #define KAD_PAD_SAME  (-2)   /* output to have the same dimension as input */
164 
165 kad_node_t *kad_conv2d(kad_node_t *x, kad_node_t *w, int r_stride, int c_stride, int r_pad, int c_pad);             /* 2D convolution with weight matrix flipped */
166 kad_node_t *kad_max2d(kad_node_t *x, int kernel_h, int kernel_w, int r_stride, int c_stride, int r_pad, int c_pad); /* 2D max pooling */
167 kad_node_t *kad_conv1d(kad_node_t *x, kad_node_t *w, int stride, int pad);  /* 1D convolution with weight flipped */
168 kad_node_t *kad_max1d(kad_node_t *x, int kernel_size, int stride, int pad); /* 1D max pooling */
169 kad_node_t *kad_avg1d(kad_node_t *x, int kernel_size, int stride, int pad); /* 1D average pooling */
170 
171 kad_node_t *kad_dropout(kad_node_t *x, kad_node_t *r);                      /* dropout at rate r */
172 kad_node_t *kad_sample_normal(kad_node_t *x);                               /* f(x) = x * r, where r is drawn from a standard normal distribution */
173 
174 /* operators taking one operand */
175 kad_node_t *kad_square(kad_node_t *x); /* f(x) = x^2                         (element-wise square) */
176 kad_node_t *kad_sigm(kad_node_t *x);   /* f(x) = 1/(1+exp(-x))               (element-wise sigmoid) */
177 kad_node_t *kad_tanh(kad_node_t *x);   /* f(x) = (1-exp(-2x)) / (1+exp(-2x)) (element-wise tanh) */
178 kad_node_t *kad_relu(kad_node_t *x);   /* f(x) = max{0,x}                    (element-wise rectifier, aka ReLU) */
179 kad_node_t *kad_softmax(kad_node_t *x);/* f_i(x_1,...,x_n) = exp(x_i) / \sum_j exp(x_j) (softmax: tf.nn.softmax(x,dim=-1)) */
180 kad_node_t *kad_1minus(kad_node_t *x); /* f(x) = 1 - x */
181 kad_node_t *kad_exp(kad_node_t *x);    /* f(x) = exp(x) */
182 kad_node_t *kad_log(kad_node_t *x);    /* f(x) = log(x) */
183 kad_node_t *kad_sin(kad_node_t *x);    /* f(x) = sin(x) */
184 
185 kad_node_t *kad_stdnorm(kad_node_t *x); /* layer normalization; applied to the last dimension */
186 
187 /* operators taking an indefinite number of operands (e.g. pooling) */
188 kad_node_t *kad_avg(int n, kad_node_t **x);   /* f(x_1,...,x_n) = \sum_i x_i/n      (mean pooling) */
189 kad_node_t *kad_max(int n, kad_node_t **x);   /* f(x_1,...,x_n) = max{x_1,...,x_n}  (max pooling) */
190 kad_node_t *kad_stack(int n, kad_node_t **x); /* f(x_1,...,x_n) = [x_1,...,x_n]     (stack pooling) */
191 kad_node_t *kad_select(int n, kad_node_t **x, int which); /* f(x_1,...,x_n;i) = x_i (select pooling; -1 for the last) */
192 
193 /* dimension reduction */
194 kad_node_t *kad_reduce_sum(kad_node_t *x, int axis);  /* tf.reduce_sum(x, axis) */
195 kad_node_t *kad_reduce_mean(kad_node_t *x, int axis); /* tf.reduce_mean(x, axis) */
196 
197 /* special operators */
198 kad_node_t *kad_slice(kad_node_t *x, int axis, int start, int end); /* take a slice on the axis-th dimension */
199 kad_node_t *kad_concat(int axis, int n, ...);                       /* concatenate on the axis-th dimension */
200 kad_node_t *kad_concat_array(int axis, int n, kad_node_t **p);      /* the array version of concat */
201 kad_node_t *kad_reshape(kad_node_t *x, int n_d, int *d);            /* reshape; similar behavior to TensorFlow's reshape() */
202 kad_node_t *kad_reverse(kad_node_t *x, int axis);
203 kad_node_t *kad_switch(int n, kad_node_t **p);                      /* manually (as a hyperparameter) choose one input, default to 0 */
204 
205 /* miscellaneous operations on a compiled graph */
206 int kad_size_var(int n, kad_node_t *const* v);   /* total size of all variables */
207 int kad_size_const(int n, kad_node_t *const* v); /* total size of all constants */
208 
209 /* graph I/O */
210 int kad_save(FILE *fp, int n_node, kad_node_t **node);
211 kad_node_t **kad_load(FILE *fp, int *_n_node);
212 
213 /* random number generator */
214 void *kad_rng(void);
215 void kad_srand(void *d, uint64_t seed);
216 uint64_t kad_rand(void *d);
217 double kad_drand(void *d);
218 double kad_drand_normal(void *d);
219 void kad_saxpy(int n, float a, const float *x, float *y);
220 
221 /* debugging routines */
222 void kad_trap_fe(void); /* abort on divide-by-zero and NaN */
223 void kad_print_graph(FILE *fp, int n, kad_node_t **v);
224 void kad_check_grad(int n, kad_node_t **a, int from);
225 
226 #ifdef __cplusplus
227 }
228 #endif
229 
230 #define KAD_ALLOC      1
231 #define KAD_FORWARD    2
232 #define KAD_BACKWARD   3
233 #define KAD_SYNC_DIM   4
234 
235 typedef int (*kad_op_f)(kad_node_t*, int);
236 extern kad_op_f kad_op_list[KAD_MAX_OP];
237 extern char *kad_op_name[KAD_MAX_OP];
238 
kad_len(const kad_node_t * p)239 static inline int kad_len(const kad_node_t *p) /* calculate the size of p->x */
240 {
241 	int n = 1, i;
242 	for (i = 0; i < p->n_d; ++i) n *= p->d[i];
243 	return n;
244 }
245 
246 /* Additions by Rspamd */
247 void kad_sgemm_simple (int trans_A, int trans_B, int M, int N, int K, const float *A, const float *B, float *C);
248 /**
249  * Calculate eigenvectors and eigenvalues
250  * @param N dimensions of A (must be NxN)
251  * @param A input matrix (part of it will be destroyed, so copy if needed), on finish the first `nwork` columns will have eigenvectors
252  * @param eigenvals eigenvalues, must be N elements vector
253  */
254 bool kad_ssyev_simple (int N, float *A, float *eigenvals);
255 
256 #endif
257