Lines Matching refs:l

13 static void increment_layer(layer *l, int steps)  in increment_layer()  argument
15 int num = l->outputs*l->batch*steps; in increment_layer()
16 l->output += num; in increment_layer()
17 l->delta += num; in increment_layer()
18 l->x += num; in increment_layer()
19 l->x_norm += num; in increment_layer()
22 l->output_gpu += num; in increment_layer()
23 l->delta_gpu += num; in increment_layer()
24 l->x_gpu += num; in increment_layer()
25 l->x_norm_gpu += num; in increment_layer()
33 layer l = { (LAYER_TYPE)0 }; in make_lstm_layer() local
34 l.batch = batch; in make_lstm_layer()
35 l.type = LSTM; in make_lstm_layer()
36 l.steps = steps; in make_lstm_layer()
37 l.inputs = inputs; in make_lstm_layer()
38 l.out_w = 1; in make_lstm_layer()
39 l.out_h = 1; in make_lstm_layer()
40 l.out_c = outputs; in make_lstm_layer()
42 l.uf = (layer*)xcalloc(1, sizeof(layer)); in make_lstm_layer()
44 *(l.uf) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize); in make_lstm_layer()
45 l.uf->batch = batch; in make_lstm_layer()
46 if (l.workspace_size < l.uf->workspace_size) l.workspace_size = l.uf->workspace_size; in make_lstm_layer()
48 l.ui = (layer*)xcalloc(1, sizeof(layer)); in make_lstm_layer()
50 *(l.ui) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize); in make_lstm_layer()
51 l.ui->batch = batch; in make_lstm_layer()
52 if (l.workspace_size < l.ui->workspace_size) l.workspace_size = l.ui->workspace_size; in make_lstm_layer()
54 l.ug = (layer*)xcalloc(1, sizeof(layer)); in make_lstm_layer()
56 *(l.ug) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize); in make_lstm_layer()
57 l.ug->batch = batch; in make_lstm_layer()
58 if (l.workspace_size < l.ug->workspace_size) l.workspace_size = l.ug->workspace_size; in make_lstm_layer()
60 l.uo = (layer*)xcalloc(1, sizeof(layer)); in make_lstm_layer()
62 *(l.uo) = make_connected_layer(batch, steps, inputs, outputs, LINEAR, batch_normalize); in make_lstm_layer()
63 l.uo->batch = batch; in make_lstm_layer()
64 if (l.workspace_size < l.uo->workspace_size) l.workspace_size = l.uo->workspace_size; in make_lstm_layer()
66 l.wf = (layer*)xcalloc(1, sizeof(layer)); in make_lstm_layer()
68 *(l.wf) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize); in make_lstm_layer()
69 l.wf->batch = batch; in make_lstm_layer()
70 if (l.workspace_size < l.wf->workspace_size) l.workspace_size = l.wf->workspace_size; in make_lstm_layer()
72 l.wi = (layer*)xcalloc(1, sizeof(layer)); in make_lstm_layer()
74 *(l.wi) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize); in make_lstm_layer()
75 l.wi->batch = batch; in make_lstm_layer()
76 if (l.workspace_size < l.wi->workspace_size) l.workspace_size = l.wi->workspace_size; in make_lstm_layer()
78 l.wg = (layer*)xcalloc(1, sizeof(layer)); in make_lstm_layer()
80 *(l.wg) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize); in make_lstm_layer()
81 l.wg->batch = batch; in make_lstm_layer()
82 if (l.workspace_size < l.wg->workspace_size) l.workspace_size = l.wg->workspace_size; in make_lstm_layer()
84 l.wo = (layer*)xcalloc(1, sizeof(layer)); in make_lstm_layer()
86 *(l.wo) = make_connected_layer(batch, steps, outputs, outputs, LINEAR, batch_normalize); in make_lstm_layer()
87 l.wo->batch = batch; in make_lstm_layer()
88 if (l.workspace_size < l.wo->workspace_size) l.workspace_size = l.wo->workspace_size; in make_lstm_layer()
90 l.batch_normalize = batch_normalize; in make_lstm_layer()
91 l.outputs = outputs; in make_lstm_layer()
93 l.output = (float*)xcalloc(outputs * batch * steps, sizeof(float)); in make_lstm_layer()
94 l.state = (float*)xcalloc(outputs * batch, sizeof(float)); in make_lstm_layer()
96 l.forward = forward_lstm_layer; in make_lstm_layer()
97 l.update = update_lstm_layer; in make_lstm_layer()
98 l.backward = backward_lstm_layer; in make_lstm_layer()
100 l.prev_state_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_lstm_layer()
101 l.prev_cell_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_lstm_layer()
102 l.cell_cpu = (float*)xcalloc(batch*outputs*steps, sizeof(float)); in make_lstm_layer()
104 l.f_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_lstm_layer()
105 l.i_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_lstm_layer()
106 l.g_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_lstm_layer()
107 l.o_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_lstm_layer()
108 l.c_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_lstm_layer()
109 l.h_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_lstm_layer()
110 l.temp_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_lstm_layer()
111 l.temp2_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_lstm_layer()
112 l.temp3_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_lstm_layer()
113 l.dc_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_lstm_layer()
114 l.dh_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_lstm_layer()
117 l.forward_gpu = forward_lstm_layer_gpu; in make_lstm_layer()
118 l.backward_gpu = backward_lstm_layer_gpu; in make_lstm_layer()
119 l.update_gpu = update_lstm_layer_gpu; in make_lstm_layer()
123 l.output_gpu = cuda_make_array(0, batch*outputs*steps); in make_lstm_layer()
124 l.delta_gpu = cuda_make_array(0, batch*l.outputs*steps); in make_lstm_layer()
126 l.prev_state_gpu = cuda_make_array(0, batch*outputs); in make_lstm_layer()
127 l.prev_cell_gpu = cuda_make_array(0, batch*outputs); in make_lstm_layer()
128 l.cell_gpu = cuda_make_array(0, batch*outputs*steps); in make_lstm_layer()
130 l.f_gpu = cuda_make_array(0, batch*outputs); in make_lstm_layer()
131 l.i_gpu = cuda_make_array(0, batch*outputs); in make_lstm_layer()
132 l.g_gpu = cuda_make_array(0, batch*outputs); in make_lstm_layer()
133 l.o_gpu = cuda_make_array(0, batch*outputs); in make_lstm_layer()
134 l.c_gpu = cuda_make_array(0, batch*outputs); in make_lstm_layer()
135 l.h_gpu = cuda_make_array(0, batch*outputs); in make_lstm_layer()
136 l.temp_gpu = cuda_make_array(0, batch*outputs); in make_lstm_layer()
137 l.temp2_gpu = cuda_make_array(0, batch*outputs); in make_lstm_layer()
138 l.temp3_gpu = cuda_make_array(0, batch*outputs); in make_lstm_layer()
139 l.dc_gpu = cuda_make_array(0, batch*outputs); in make_lstm_layer()
140 l.dh_gpu = cuda_make_array(0, batch*outputs); in make_lstm_layer()
157 return l; in make_lstm_layer()
160 void update_lstm_layer(layer l, int batch, float learning_rate, float momentum, float decay) in update_lstm_layer() argument
162 update_connected_layer(*(l.wf), batch, learning_rate, momentum, decay); in update_lstm_layer()
163 update_connected_layer(*(l.wi), batch, learning_rate, momentum, decay); in update_lstm_layer()
164 update_connected_layer(*(l.wg), batch, learning_rate, momentum, decay); in update_lstm_layer()
165 update_connected_layer(*(l.wo), batch, learning_rate, momentum, decay); in update_lstm_layer()
166 update_connected_layer(*(l.uf), batch, learning_rate, momentum, decay); in update_lstm_layer()
167 update_connected_layer(*(l.ui), batch, learning_rate, momentum, decay); in update_lstm_layer()
168 update_connected_layer(*(l.ug), batch, learning_rate, momentum, decay); in update_lstm_layer()
169 update_connected_layer(*(l.uo), batch, learning_rate, momentum, decay); in update_lstm_layer()
172 void forward_lstm_layer(layer l, network_state state) in forward_lstm_layer() argument
178 layer wf = *(l.wf); in forward_lstm_layer()
179 layer wi = *(l.wi); in forward_lstm_layer()
180 layer wg = *(l.wg); in forward_lstm_layer()
181 layer wo = *(l.wo); in forward_lstm_layer()
183 layer uf = *(l.uf); in forward_lstm_layer()
184 layer ui = *(l.ui); in forward_lstm_layer()
185 layer ug = *(l.ug); in forward_lstm_layer()
186 layer uo = *(l.uo); in forward_lstm_layer()
188 fill_cpu(l.outputs * l.batch * l.steps, 0, wf.delta, 1); in forward_lstm_layer()
189 fill_cpu(l.outputs * l.batch * l.steps, 0, wi.delta, 1); in forward_lstm_layer()
190 fill_cpu(l.outputs * l.batch * l.steps, 0, wg.delta, 1); in forward_lstm_layer()
191 fill_cpu(l.outputs * l.batch * l.steps, 0, wo.delta, 1); in forward_lstm_layer()
193 fill_cpu(l.outputs * l.batch * l.steps, 0, uf.delta, 1); in forward_lstm_layer()
194 fill_cpu(l.outputs * l.batch * l.steps, 0, ui.delta, 1); in forward_lstm_layer()
195 fill_cpu(l.outputs * l.batch * l.steps, 0, ug.delta, 1); in forward_lstm_layer()
196 fill_cpu(l.outputs * l.batch * l.steps, 0, uo.delta, 1); in forward_lstm_layer()
198 fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1); in forward_lstm_layer()
201 for (i = 0; i < l.steps; ++i) { in forward_lstm_layer()
202 s.input = l.h_cpu; in forward_lstm_layer()
214 copy_cpu(l.outputs*l.batch, wf.output, 1, l.f_cpu, 1); in forward_lstm_layer()
215 axpy_cpu(l.outputs*l.batch, 1, uf.output, 1, l.f_cpu, 1); in forward_lstm_layer()
217 copy_cpu(l.outputs*l.batch, wi.output, 1, l.i_cpu, 1); in forward_lstm_layer()
218 axpy_cpu(l.outputs*l.batch, 1, ui.output, 1, l.i_cpu, 1); in forward_lstm_layer()
220 copy_cpu(l.outputs*l.batch, wg.output, 1, l.g_cpu, 1); in forward_lstm_layer()
221 axpy_cpu(l.outputs*l.batch, 1, ug.output, 1, l.g_cpu, 1); in forward_lstm_layer()
223 copy_cpu(l.outputs*l.batch, wo.output, 1, l.o_cpu, 1); in forward_lstm_layer()
224 axpy_cpu(l.outputs*l.batch, 1, uo.output, 1, l.o_cpu, 1); in forward_lstm_layer()
226 activate_array(l.f_cpu, l.outputs*l.batch, LOGISTIC); in forward_lstm_layer()
227 activate_array(l.i_cpu, l.outputs*l.batch, LOGISTIC); in forward_lstm_layer()
228 activate_array(l.g_cpu, l.outputs*l.batch, TANH); in forward_lstm_layer()
229 activate_array(l.o_cpu, l.outputs*l.batch, LOGISTIC); in forward_lstm_layer()
231 copy_cpu(l.outputs*l.batch, l.i_cpu, 1, l.temp_cpu, 1); in forward_lstm_layer()
232 mul_cpu(l.outputs*l.batch, l.g_cpu, 1, l.temp_cpu, 1); in forward_lstm_layer()
233 mul_cpu(l.outputs*l.batch, l.f_cpu, 1, l.c_cpu, 1); in forward_lstm_layer()
234 axpy_cpu(l.outputs*l.batch, 1, l.temp_cpu, 1, l.c_cpu, 1); in forward_lstm_layer()
236 copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.h_cpu, 1); in forward_lstm_layer()
237 activate_array(l.h_cpu, l.outputs*l.batch, TANH); in forward_lstm_layer()
238 mul_cpu(l.outputs*l.batch, l.o_cpu, 1, l.h_cpu, 1); in forward_lstm_layer()
240 copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.cell_cpu, 1); in forward_lstm_layer()
241 copy_cpu(l.outputs*l.batch, l.h_cpu, 1, l.output, 1); in forward_lstm_layer()
243 state.input += l.inputs*l.batch; in forward_lstm_layer()
244 l.output += l.outputs*l.batch; in forward_lstm_layer()
245 l.cell_cpu += l.outputs*l.batch; in forward_lstm_layer()
259 void backward_lstm_layer(layer l, network_state state) in backward_lstm_layer() argument
265 layer wf = *(l.wf); in backward_lstm_layer()
266 layer wi = *(l.wi); in backward_lstm_layer()
267 layer wg = *(l.wg); in backward_lstm_layer()
268 layer wo = *(l.wo); in backward_lstm_layer()
270 layer uf = *(l.uf); in backward_lstm_layer()
271 layer ui = *(l.ui); in backward_lstm_layer()
272 layer ug = *(l.ug); in backward_lstm_layer()
273 layer uo = *(l.uo); in backward_lstm_layer()
275 increment_layer(&wf, l.steps - 1); in backward_lstm_layer()
276 increment_layer(&wi, l.steps - 1); in backward_lstm_layer()
277 increment_layer(&wg, l.steps - 1); in backward_lstm_layer()
278 increment_layer(&wo, l.steps - 1); in backward_lstm_layer()
280 increment_layer(&uf, l.steps - 1); in backward_lstm_layer()
281 increment_layer(&ui, l.steps - 1); in backward_lstm_layer()
282 increment_layer(&ug, l.steps - 1); in backward_lstm_layer()
283 increment_layer(&uo, l.steps - 1); in backward_lstm_layer()
285 state.input += l.inputs*l.batch*(l.steps - 1); in backward_lstm_layer()
286 if (state.delta) state.delta += l.inputs*l.batch*(l.steps - 1); in backward_lstm_layer()
288 l.output += l.outputs*l.batch*(l.steps - 1); in backward_lstm_layer()
289 l.cell_cpu += l.outputs*l.batch*(l.steps - 1); in backward_lstm_layer()
290 l.delta += l.outputs*l.batch*(l.steps - 1); in backward_lstm_layer()
292 for (i = l.steps - 1; i >= 0; --i) { in backward_lstm_layer()
293 … if (i != 0) copy_cpu(l.outputs*l.batch, l.cell_cpu - l.outputs*l.batch, 1, l.prev_cell_cpu, 1); in backward_lstm_layer()
294 copy_cpu(l.outputs*l.batch, l.cell_cpu, 1, l.c_cpu, 1); in backward_lstm_layer()
295 … if (i != 0) copy_cpu(l.outputs*l.batch, l.output - l.outputs*l.batch, 1, l.prev_state_cpu, 1); in backward_lstm_layer()
296 copy_cpu(l.outputs*l.batch, l.output, 1, l.h_cpu, 1); in backward_lstm_layer()
298 l.dh_cpu = (i == 0) ? 0 : l.delta - l.outputs*l.batch; in backward_lstm_layer()
300 copy_cpu(l.outputs*l.batch, wf.output, 1, l.f_cpu, 1); in backward_lstm_layer()
301 axpy_cpu(l.outputs*l.batch, 1, uf.output, 1, l.f_cpu, 1); in backward_lstm_layer()
303 copy_cpu(l.outputs*l.batch, wi.output, 1, l.i_cpu, 1); in backward_lstm_layer()
304 axpy_cpu(l.outputs*l.batch, 1, ui.output, 1, l.i_cpu, 1); in backward_lstm_layer()
306 copy_cpu(l.outputs*l.batch, wg.output, 1, l.g_cpu, 1); in backward_lstm_layer()
307 axpy_cpu(l.outputs*l.batch, 1, ug.output, 1, l.g_cpu, 1); in backward_lstm_layer()
309 copy_cpu(l.outputs*l.batch, wo.output, 1, l.o_cpu, 1); in backward_lstm_layer()
310 axpy_cpu(l.outputs*l.batch, 1, uo.output, 1, l.o_cpu, 1); in backward_lstm_layer()
312 activate_array(l.f_cpu, l.outputs*l.batch, LOGISTIC); in backward_lstm_layer()
313 activate_array(l.i_cpu, l.outputs*l.batch, LOGISTIC); in backward_lstm_layer()
314 activate_array(l.g_cpu, l.outputs*l.batch, TANH); in backward_lstm_layer()
315 activate_array(l.o_cpu, l.outputs*l.batch, LOGISTIC); in backward_lstm_layer()
317 copy_cpu(l.outputs*l.batch, l.delta, 1, l.temp3_cpu, 1); in backward_lstm_layer()
319 copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.temp_cpu, 1); in backward_lstm_layer()
320 activate_array(l.temp_cpu, l.outputs*l.batch, TANH); in backward_lstm_layer()
322 copy_cpu(l.outputs*l.batch, l.temp3_cpu, 1, l.temp2_cpu, 1); in backward_lstm_layer()
323 mul_cpu(l.outputs*l.batch, l.o_cpu, 1, l.temp2_cpu, 1); in backward_lstm_layer()
325 gradient_array(l.temp_cpu, l.outputs*l.batch, TANH, l.temp2_cpu); in backward_lstm_layer()
326 axpy_cpu(l.outputs*l.batch, 1, l.dc_cpu, 1, l.temp2_cpu, 1); in backward_lstm_layer()
328 copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.temp_cpu, 1); in backward_lstm_layer()
329 activate_array(l.temp_cpu, l.outputs*l.batch, TANH); in backward_lstm_layer()
330 mul_cpu(l.outputs*l.batch, l.temp3_cpu, 1, l.temp_cpu, 1); in backward_lstm_layer()
331 gradient_array(l.o_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); in backward_lstm_layer()
332 copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wo.delta, 1); in backward_lstm_layer()
333 s.input = l.prev_state_cpu; in backward_lstm_layer()
334 s.delta = l.dh_cpu; in backward_lstm_layer()
337 copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, uo.delta, 1); in backward_lstm_layer()
342 copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); in backward_lstm_layer()
343 mul_cpu(l.outputs*l.batch, l.i_cpu, 1, l.temp_cpu, 1); in backward_lstm_layer()
344 gradient_array(l.g_cpu, l.outputs*l.batch, TANH, l.temp_cpu); in backward_lstm_layer()
345 copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wg.delta, 1); in backward_lstm_layer()
346 s.input = l.prev_state_cpu; in backward_lstm_layer()
347 s.delta = l.dh_cpu; in backward_lstm_layer()
350 copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, ug.delta, 1); in backward_lstm_layer()
355 copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); in backward_lstm_layer()
356 mul_cpu(l.outputs*l.batch, l.g_cpu, 1, l.temp_cpu, 1); in backward_lstm_layer()
357 gradient_array(l.i_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); in backward_lstm_layer()
358 copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wi.delta, 1); in backward_lstm_layer()
359 s.input = l.prev_state_cpu; in backward_lstm_layer()
360 s.delta = l.dh_cpu; in backward_lstm_layer()
363 copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, ui.delta, 1); in backward_lstm_layer()
368 copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); in backward_lstm_layer()
369 mul_cpu(l.outputs*l.batch, l.prev_cell_cpu, 1, l.temp_cpu, 1); in backward_lstm_layer()
370 gradient_array(l.f_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); in backward_lstm_layer()
371 copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wf.delta, 1); in backward_lstm_layer()
372 s.input = l.prev_state_cpu; in backward_lstm_layer()
373 s.delta = l.dh_cpu; in backward_lstm_layer()
376 copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, uf.delta, 1); in backward_lstm_layer()
381 copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); in backward_lstm_layer()
382 mul_cpu(l.outputs*l.batch, l.f_cpu, 1, l.temp_cpu, 1); in backward_lstm_layer()
383 copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, l.dc_cpu, 1); in backward_lstm_layer()
385 state.input -= l.inputs*l.batch; in backward_lstm_layer()
386 if (state.delta) state.delta -= l.inputs*l.batch; in backward_lstm_layer()
387 l.output -= l.outputs*l.batch; in backward_lstm_layer()
388 l.cell_cpu -= l.outputs*l.batch; in backward_lstm_layer()
389 l.delta -= l.outputs*l.batch; in backward_lstm_layer()
404 void update_lstm_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay, fl… in update_lstm_layer_gpu() argument
406 update_connected_layer_gpu(*(l.wf), batch, learning_rate, momentum, decay, loss_scale); in update_lstm_layer_gpu()
407 update_connected_layer_gpu(*(l.wi), batch, learning_rate, momentum, decay, loss_scale); in update_lstm_layer_gpu()
408 update_connected_layer_gpu(*(l.wg), batch, learning_rate, momentum, decay, loss_scale); in update_lstm_layer_gpu()
409 update_connected_layer_gpu(*(l.wo), batch, learning_rate, momentum, decay, loss_scale); in update_lstm_layer_gpu()
410 update_connected_layer_gpu(*(l.uf), batch, learning_rate, momentum, decay, loss_scale); in update_lstm_layer_gpu()
411 update_connected_layer_gpu(*(l.ui), batch, learning_rate, momentum, decay, loss_scale); in update_lstm_layer_gpu()
412 update_connected_layer_gpu(*(l.ug), batch, learning_rate, momentum, decay, loss_scale); in update_lstm_layer_gpu()
413 update_connected_layer_gpu(*(l.uo), batch, learning_rate, momentum, decay, loss_scale); in update_lstm_layer_gpu()
416 void forward_lstm_layer_gpu(layer l, network_state state) in forward_lstm_layer_gpu() argument
422 layer wf = *(l.wf); in forward_lstm_layer_gpu()
423 layer wi = *(l.wi); in forward_lstm_layer_gpu()
424 layer wg = *(l.wg); in forward_lstm_layer_gpu()
425 layer wo = *(l.wo); in forward_lstm_layer_gpu()
427 layer uf = *(l.uf); in forward_lstm_layer_gpu()
428 layer ui = *(l.ui); in forward_lstm_layer_gpu()
429 layer ug = *(l.ug); in forward_lstm_layer_gpu()
430 layer uo = *(l.uo); in forward_lstm_layer_gpu()
432 fill_ongpu(l.outputs * l.batch * l.steps, 0, wf.delta_gpu, 1); in forward_lstm_layer_gpu()
433 fill_ongpu(l.outputs * l.batch * l.steps, 0, wi.delta_gpu, 1); in forward_lstm_layer_gpu()
434 fill_ongpu(l.outputs * l.batch * l.steps, 0, wg.delta_gpu, 1); in forward_lstm_layer_gpu()
435 fill_ongpu(l.outputs * l.batch * l.steps, 0, wo.delta_gpu, 1); in forward_lstm_layer_gpu()
437 fill_ongpu(l.outputs * l.batch * l.steps, 0, uf.delta_gpu, 1); in forward_lstm_layer_gpu()
438 fill_ongpu(l.outputs * l.batch * l.steps, 0, ui.delta_gpu, 1); in forward_lstm_layer_gpu()
439 fill_ongpu(l.outputs * l.batch * l.steps, 0, ug.delta_gpu, 1); in forward_lstm_layer_gpu()
440 fill_ongpu(l.outputs * l.batch * l.steps, 0, uo.delta_gpu, 1); in forward_lstm_layer_gpu()
442 fill_ongpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); in forward_lstm_layer_gpu()
445 for (i = 0; i < l.steps; ++i) { in forward_lstm_layer_gpu()
446 s.input = l.h_gpu; in forward_lstm_layer_gpu()
458 copy_ongpu(l.outputs*l.batch, wf.output_gpu, 1, l.f_gpu, 1); in forward_lstm_layer_gpu()
459 axpy_ongpu(l.outputs*l.batch, 1, uf.output_gpu, 1, l.f_gpu, 1); in forward_lstm_layer_gpu()
461 copy_ongpu(l.outputs*l.batch, wi.output_gpu, 1, l.i_gpu, 1); in forward_lstm_layer_gpu()
462 axpy_ongpu(l.outputs*l.batch, 1, ui.output_gpu, 1, l.i_gpu, 1); in forward_lstm_layer_gpu()
464 copy_ongpu(l.outputs*l.batch, wg.output_gpu, 1, l.g_gpu, 1); in forward_lstm_layer_gpu()
465 axpy_ongpu(l.outputs*l.batch, 1, ug.output_gpu, 1, l.g_gpu, 1); in forward_lstm_layer_gpu()
467 copy_ongpu(l.outputs*l.batch, wo.output_gpu, 1, l.o_gpu, 1); in forward_lstm_layer_gpu()
468 axpy_ongpu(l.outputs*l.batch, 1, uo.output_gpu, 1, l.o_gpu, 1); in forward_lstm_layer_gpu()
470 activate_array_ongpu(l.f_gpu, l.outputs*l.batch, LOGISTIC); in forward_lstm_layer_gpu()
471 activate_array_ongpu(l.i_gpu, l.outputs*l.batch, LOGISTIC); in forward_lstm_layer_gpu()
472 activate_array_ongpu(l.g_gpu, l.outputs*l.batch, TANH); in forward_lstm_layer_gpu()
473 activate_array_ongpu(l.o_gpu, l.outputs*l.batch, LOGISTIC); in forward_lstm_layer_gpu()
475 copy_ongpu(l.outputs*l.batch, l.i_gpu, 1, l.temp_gpu, 1); in forward_lstm_layer_gpu()
476 mul_ongpu(l.outputs*l.batch, l.g_gpu, 1, l.temp_gpu, 1); in forward_lstm_layer_gpu()
477 mul_ongpu(l.outputs*l.batch, l.f_gpu, 1, l.c_gpu, 1); in forward_lstm_layer_gpu()
478 axpy_ongpu(l.outputs*l.batch, 1, l.temp_gpu, 1, l.c_gpu, 1); in forward_lstm_layer_gpu()
480 copy_ongpu(l.outputs*l.batch, l.c_gpu, 1, l.h_gpu, 1); in forward_lstm_layer_gpu()
481 activate_array_ongpu(l.h_gpu, l.outputs*l.batch, TANH); in forward_lstm_layer_gpu()
482 mul_ongpu(l.outputs*l.batch, l.o_gpu, 1, l.h_gpu, 1); in forward_lstm_layer_gpu()
484 copy_ongpu(l.outputs*l.batch, l.c_gpu, 1, l.cell_gpu, 1); in forward_lstm_layer_gpu()
485 copy_ongpu(l.outputs*l.batch, l.h_gpu, 1, l.output_gpu, 1); in forward_lstm_layer_gpu()
487 state.input += l.inputs*l.batch; in forward_lstm_layer_gpu()
488 l.output_gpu += l.outputs*l.batch; in forward_lstm_layer_gpu()
489 l.cell_gpu += l.outputs*l.batch; in forward_lstm_layer_gpu()
503 void backward_lstm_layer_gpu(layer l, network_state state) in backward_lstm_layer_gpu() argument
509 layer wf = *(l.wf); in backward_lstm_layer_gpu()
510 layer wi = *(l.wi); in backward_lstm_layer_gpu()
511 layer wg = *(l.wg); in backward_lstm_layer_gpu()
512 layer wo = *(l.wo); in backward_lstm_layer_gpu()
514 layer uf = *(l.uf); in backward_lstm_layer_gpu()
515 layer ui = *(l.ui); in backward_lstm_layer_gpu()
516 layer ug = *(l.ug); in backward_lstm_layer_gpu()
517 layer uo = *(l.uo); in backward_lstm_layer_gpu()
519 increment_layer(&wf, l.steps - 1); in backward_lstm_layer_gpu()
520 increment_layer(&wi, l.steps - 1); in backward_lstm_layer_gpu()
521 increment_layer(&wg, l.steps - 1); in backward_lstm_layer_gpu()
522 increment_layer(&wo, l.steps - 1); in backward_lstm_layer_gpu()
524 increment_layer(&uf, l.steps - 1); in backward_lstm_layer_gpu()
525 increment_layer(&ui, l.steps - 1); in backward_lstm_layer_gpu()
526 increment_layer(&ug, l.steps - 1); in backward_lstm_layer_gpu()
527 increment_layer(&uo, l.steps - 1); in backward_lstm_layer_gpu()
529 state.input += l.inputs*l.batch*(l.steps - 1); in backward_lstm_layer_gpu()
530 if (state.delta) state.delta += l.inputs*l.batch*(l.steps - 1); in backward_lstm_layer_gpu()
532 l.output_gpu += l.outputs*l.batch*(l.steps - 1); in backward_lstm_layer_gpu()
533 l.cell_gpu += l.outputs*l.batch*(l.steps - 1); in backward_lstm_layer_gpu()
534 l.delta_gpu += l.outputs*l.batch*(l.steps - 1); in backward_lstm_layer_gpu()
536 for (i = l.steps - 1; i >= 0; --i) { in backward_lstm_layer_gpu()
537 … if (i != 0) copy_ongpu(l.outputs*l.batch, l.cell_gpu - l.outputs*l.batch, 1, l.prev_cell_gpu, 1); in backward_lstm_layer_gpu()
538 copy_ongpu(l.outputs*l.batch, l.cell_gpu, 1, l.c_gpu, 1); in backward_lstm_layer_gpu()
539 …if (i != 0) copy_ongpu(l.outputs*l.batch, l.output_gpu - l.outputs*l.batch, 1, l.prev_state_gpu, 1… in backward_lstm_layer_gpu()
540 copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.h_gpu, 1); in backward_lstm_layer_gpu()
542 l.dh_gpu = (i == 0) ? 0 : l.delta_gpu - l.outputs*l.batch; in backward_lstm_layer_gpu()
544 copy_ongpu(l.outputs*l.batch, wf.output_gpu, 1, l.f_gpu, 1); in backward_lstm_layer_gpu()
545 axpy_ongpu(l.outputs*l.batch, 1, uf.output_gpu, 1, l.f_gpu, 1); in backward_lstm_layer_gpu()
547 copy_ongpu(l.outputs*l.batch, wi.output_gpu, 1, l.i_gpu, 1); in backward_lstm_layer_gpu()
548 axpy_ongpu(l.outputs*l.batch, 1, ui.output_gpu, 1, l.i_gpu, 1); in backward_lstm_layer_gpu()
550 copy_ongpu(l.outputs*l.batch, wg.output_gpu, 1, l.g_gpu, 1); in backward_lstm_layer_gpu()
551 axpy_ongpu(l.outputs*l.batch, 1, ug.output_gpu, 1, l.g_gpu, 1); in backward_lstm_layer_gpu()
553 copy_ongpu(l.outputs*l.batch, wo.output_gpu, 1, l.o_gpu, 1); in backward_lstm_layer_gpu()
554 axpy_ongpu(l.outputs*l.batch, 1, uo.output_gpu, 1, l.o_gpu, 1); in backward_lstm_layer_gpu()
556 activate_array_ongpu(l.f_gpu, l.outputs*l.batch, LOGISTIC); in backward_lstm_layer_gpu()
557 activate_array_ongpu(l.i_gpu, l.outputs*l.batch, LOGISTIC); in backward_lstm_layer_gpu()
558 activate_array_ongpu(l.g_gpu, l.outputs*l.batch, TANH); in backward_lstm_layer_gpu()
559 activate_array_ongpu(l.o_gpu, l.outputs*l.batch, LOGISTIC); in backward_lstm_layer_gpu()
561 copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, l.temp3_gpu, 1); in backward_lstm_layer_gpu()
563 copy_ongpu(l.outputs*l.batch, l.c_gpu, 1, l.temp_gpu, 1); in backward_lstm_layer_gpu()
564 activate_array_ongpu(l.temp_gpu, l.outputs*l.batch, TANH); in backward_lstm_layer_gpu()
566 copy_ongpu(l.outputs*l.batch, l.temp3_gpu, 1, l.temp2_gpu, 1); in backward_lstm_layer_gpu()
567 mul_ongpu(l.outputs*l.batch, l.o_gpu, 1, l.temp2_gpu, 1); in backward_lstm_layer_gpu()
569 gradient_array_ongpu(l.temp_gpu, l.outputs*l.batch, TANH, l.temp2_gpu); in backward_lstm_layer_gpu()
570 axpy_ongpu(l.outputs*l.batch, 1, l.dc_gpu, 1, l.temp2_gpu, 1); in backward_lstm_layer_gpu()
572 copy_ongpu(l.outputs*l.batch, l.c_gpu, 1, l.temp_gpu, 1); in backward_lstm_layer_gpu()
573 activate_array_ongpu(l.temp_gpu, l.outputs*l.batch, TANH); in backward_lstm_layer_gpu()
574 mul_ongpu(l.outputs*l.batch, l.temp3_gpu, 1, l.temp_gpu, 1); in backward_lstm_layer_gpu()
575 gradient_array_ongpu(l.o_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); in backward_lstm_layer_gpu()
576 copy_ongpu(l.outputs*l.batch, l.temp_gpu, 1, wo.delta_gpu, 1); in backward_lstm_layer_gpu()
577 s.input = l.prev_state_gpu; in backward_lstm_layer_gpu()
578 s.delta = l.dh_gpu; in backward_lstm_layer_gpu()
581 copy_ongpu(l.outputs*l.batch, l.temp_gpu, 1, uo.delta_gpu, 1); in backward_lstm_layer_gpu()
586 copy_ongpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); in backward_lstm_layer_gpu()
587 mul_ongpu(l.outputs*l.batch, l.i_gpu, 1, l.temp_gpu, 1); in backward_lstm_layer_gpu()
588 gradient_array_ongpu(l.g_gpu, l.outputs*l.batch, TANH, l.temp_gpu); in backward_lstm_layer_gpu()
589 copy_ongpu(l.outputs*l.batch, l.temp_gpu, 1, wg.delta_gpu, 1); in backward_lstm_layer_gpu()
590 s.input = l.prev_state_gpu; in backward_lstm_layer_gpu()
591 s.delta = l.dh_gpu; in backward_lstm_layer_gpu()
594 copy_ongpu(l.outputs*l.batch, l.temp_gpu, 1, ug.delta_gpu, 1); in backward_lstm_layer_gpu()
599 copy_ongpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); in backward_lstm_layer_gpu()
600 mul_ongpu(l.outputs*l.batch, l.g_gpu, 1, l.temp_gpu, 1); in backward_lstm_layer_gpu()
601 gradient_array_ongpu(l.i_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); in backward_lstm_layer_gpu()
602 copy_ongpu(l.outputs*l.batch, l.temp_gpu, 1, wi.delta_gpu, 1); in backward_lstm_layer_gpu()
603 s.input = l.prev_state_gpu; in backward_lstm_layer_gpu()
604 s.delta = l.dh_gpu; in backward_lstm_layer_gpu()
607 copy_ongpu(l.outputs*l.batch, l.temp_gpu, 1, ui.delta_gpu, 1); in backward_lstm_layer_gpu()
612 copy_ongpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); in backward_lstm_layer_gpu()
613 mul_ongpu(l.outputs*l.batch, l.prev_cell_gpu, 1, l.temp_gpu, 1); in backward_lstm_layer_gpu()
614 gradient_array_ongpu(l.f_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); in backward_lstm_layer_gpu()
615 copy_ongpu(l.outputs*l.batch, l.temp_gpu, 1, wf.delta_gpu, 1); in backward_lstm_layer_gpu()
616 s.input = l.prev_state_gpu; in backward_lstm_layer_gpu()
617 s.delta = l.dh_gpu; in backward_lstm_layer_gpu()
620 copy_ongpu(l.outputs*l.batch, l.temp_gpu, 1, uf.delta_gpu, 1); in backward_lstm_layer_gpu()
625 copy_ongpu(l.outputs*l.batch, l.temp2_gpu, 1, l.temp_gpu, 1); in backward_lstm_layer_gpu()
626 mul_ongpu(l.outputs*l.batch, l.f_gpu, 1, l.temp_gpu, 1); in backward_lstm_layer_gpu()
627 copy_ongpu(l.outputs*l.batch, l.temp_gpu, 1, l.dc_gpu, 1); in backward_lstm_layer_gpu()
629 state.input -= l.inputs*l.batch; in backward_lstm_layer_gpu()
630 if (state.delta) state.delta -= l.inputs*l.batch; in backward_lstm_layer_gpu()
631 l.output_gpu -= l.outputs*l.batch; in backward_lstm_layer_gpu()
632 l.cell_gpu -= l.outputs*l.batch; in backward_lstm_layer_gpu()
633 l.delta_gpu -= l.outputs*l.batch; in backward_lstm_layer_gpu()