Lines Matching refs:l

18 static void increment_layer(layer *l, int steps)  in increment_layer()  argument
20 int num = l->outputs*l->batch*steps; in increment_layer()
21 l->output += num; in increment_layer()
22 l->delta += num; in increment_layer()
23 l->x += num; in increment_layer()
24 l->x_norm += num; in increment_layer()
27 l->output_gpu += num; in increment_layer()
28 l->delta_gpu += num; in increment_layer()
29 l->x_gpu += num; in increment_layer()
30 l->x_norm_gpu += num; in increment_layer()
50 layer l = { (LAYER_TYPE)0 }; in make_conv_lstm_layer() local
51 l.train = train; in make_conv_lstm_layer()
52 l.batch = batch; in make_conv_lstm_layer()
53 l.type = CONV_LSTM; in make_conv_lstm_layer()
54 l.steps = steps; in make_conv_lstm_layer()
55 l.size = size; in make_conv_lstm_layer()
56 l.stride = stride; in make_conv_lstm_layer()
57 l.dilation = dilation; in make_conv_lstm_layer()
58 l.pad = pad; in make_conv_lstm_layer()
59 l.h = h; in make_conv_lstm_layer()
60 l.w = w; in make_conv_lstm_layer()
61 l.c = c; in make_conv_lstm_layer()
62 l.groups = groups; in make_conv_lstm_layer()
63 l.out_c = output_filters; in make_conv_lstm_layer()
64 l.inputs = h * w * c; in make_conv_lstm_layer()
65 l.xnor = xnor; in make_conv_lstm_layer()
66 l.peephole = peephole; in make_conv_lstm_layer()
69 l.uf = (layer*)xcalloc(1, sizeof(layer)); in make_conv_lstm_layer()
70 …*(l.uf) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, st… in make_conv_lstm_layer()
71 l.uf->batch = batch; in make_conv_lstm_layer()
72 if (l.workspace_size < l.uf->workspace_size) l.workspace_size = l.uf->workspace_size; in make_conv_lstm_layer()
74 l.ui = (layer*)xcalloc(1, sizeof(layer)); in make_conv_lstm_layer()
75 …*(l.ui) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, st… in make_conv_lstm_layer()
76 l.ui->batch = batch; in make_conv_lstm_layer()
77 if (l.workspace_size < l.ui->workspace_size) l.workspace_size = l.ui->workspace_size; in make_conv_lstm_layer()
79 l.ug = (layer*)xcalloc(1, sizeof(layer)); in make_conv_lstm_layer()
80 …*(l.ug) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, st… in make_conv_lstm_layer()
81 l.ug->batch = batch; in make_conv_lstm_layer()
82 if (l.workspace_size < l.ug->workspace_size) l.workspace_size = l.ug->workspace_size; in make_conv_lstm_layer()
84 l.uo = (layer*)xcalloc(1, sizeof(layer)); in make_conv_lstm_layer()
85 …*(l.uo) = make_convolutional_layer(batch, steps, h, w, c, output_filters, groups, size, stride, st… in make_conv_lstm_layer()
86 l.uo->batch = batch; in make_conv_lstm_layer()
87 if (l.workspace_size < l.uo->workspace_size) l.workspace_size = l.uo->workspace_size; in make_conv_lstm_layer()
91 l.wf = (layer*)xcalloc(1, sizeof(layer)); in make_conv_lstm_layer()
92 …*(l.wf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, siz… in make_conv_lstm_layer()
93 l.wf->batch = batch; in make_conv_lstm_layer()
94 if (l.workspace_size < l.wf->workspace_size) l.workspace_size = l.wf->workspace_size; in make_conv_lstm_layer()
96 l.wi = (layer*)xcalloc(1, sizeof(layer)); in make_conv_lstm_layer()
97 …*(l.wi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, siz… in make_conv_lstm_layer()
98 l.wi->batch = batch; in make_conv_lstm_layer()
99 if (l.workspace_size < l.wi->workspace_size) l.workspace_size = l.wi->workspace_size; in make_conv_lstm_layer()
101 l.wg = (layer*)xcalloc(1, sizeof(layer)); in make_conv_lstm_layer()
102 …*(l.wg) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, siz… in make_conv_lstm_layer()
103 l.wg->batch = batch; in make_conv_lstm_layer()
104 if (l.workspace_size < l.wg->workspace_size) l.workspace_size = l.wg->workspace_size; in make_conv_lstm_layer()
106 l.wo = (layer*)xcalloc(1, sizeof(layer)); in make_conv_lstm_layer()
107 …*(l.wo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, siz… in make_conv_lstm_layer()
108 l.wo->batch = batch; in make_conv_lstm_layer()
109 if (l.workspace_size < l.wo->workspace_size) l.workspace_size = l.wo->workspace_size; in make_conv_lstm_layer()
113 l.vf = (layer*)xcalloc(1, sizeof(layer)); in make_conv_lstm_layer()
114 if (l.peephole) { in make_conv_lstm_layer()
115 …*(l.vf) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, siz… in make_conv_lstm_layer()
116 l.vf->batch = batch; in make_conv_lstm_layer()
117 if (l.workspace_size < l.vf->workspace_size) l.workspace_size = l.vf->workspace_size; in make_conv_lstm_layer()
120 l.vi = (layer*)xcalloc(1, sizeof(layer)); in make_conv_lstm_layer()
121 if (l.peephole) { in make_conv_lstm_layer()
122 …*(l.vi) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, siz… in make_conv_lstm_layer()
123 l.vi->batch = batch; in make_conv_lstm_layer()
124 if (l.workspace_size < l.vi->workspace_size) l.workspace_size = l.vi->workspace_size; in make_conv_lstm_layer()
127 l.vo = (layer*)xcalloc(1, sizeof(layer)); in make_conv_lstm_layer()
128 if (l.peephole) { in make_conv_lstm_layer()
129 …*(l.vo) = make_convolutional_layer(batch, steps, h, w, output_filters, output_filters, groups, siz… in make_conv_lstm_layer()
130 l.vo->batch = batch; in make_conv_lstm_layer()
131 if (l.workspace_size < l.vo->workspace_size) l.workspace_size = l.vo->workspace_size; in make_conv_lstm_layer()
135 l.batch_normalize = batch_normalize; in make_conv_lstm_layer()
137 l.out_h = l.wo->out_h; in make_conv_lstm_layer()
138 l.out_w = l.wo->out_w; in make_conv_lstm_layer()
139 l.outputs = l.wo->outputs; in make_conv_lstm_layer()
140 int outputs = l.outputs; in make_conv_lstm_layer()
141 l.inputs = w*h*c; in make_conv_lstm_layer()
143 assert(l.wo->outputs == l.uo->outputs); in make_conv_lstm_layer()
145 l.output = (float*)xcalloc(outputs * batch * steps, sizeof(float)); in make_conv_lstm_layer()
148 l.forward = forward_conv_lstm_layer; in make_conv_lstm_layer()
149 l.update = update_conv_lstm_layer; in make_conv_lstm_layer()
150 l.backward = backward_conv_lstm_layer; in make_conv_lstm_layer()
152 l.prev_state_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_conv_lstm_layer()
153 l.prev_cell_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_conv_lstm_layer()
154 l.cell_cpu = (float*)xcalloc(batch*outputs*steps, sizeof(float)); in make_conv_lstm_layer()
156 l.f_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_conv_lstm_layer()
157 l.i_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_conv_lstm_layer()
158 l.g_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_conv_lstm_layer()
159 l.o_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_conv_lstm_layer()
160 l.c_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_conv_lstm_layer()
161 l.stored_c_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_conv_lstm_layer()
162 l.h_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_conv_lstm_layer()
163 l.stored_h_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_conv_lstm_layer()
164 l.temp_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_conv_lstm_layer()
165 l.temp2_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_conv_lstm_layer()
166 l.temp3_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_conv_lstm_layer()
167 l.dc_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_conv_lstm_layer()
168 l.dh_cpu = (float*)xcalloc(batch*outputs, sizeof(float)); in make_conv_lstm_layer()
171 l.forward_gpu = forward_conv_lstm_layer_gpu; in make_conv_lstm_layer()
172 l.backward_gpu = backward_conv_lstm_layer_gpu; in make_conv_lstm_layer()
173 l.update_gpu = update_conv_lstm_layer_gpu; in make_conv_lstm_layer()
177 l.output_gpu = cuda_make_array(0, batch*outputs*steps); in make_conv_lstm_layer()
178 l.delta_gpu = cuda_make_array(0, batch*l.outputs*steps); in make_conv_lstm_layer()
180 l.prev_state_gpu = cuda_make_array(0, batch*outputs); in make_conv_lstm_layer()
181 l.prev_cell_gpu = cuda_make_array(0, batch*outputs); in make_conv_lstm_layer()
182 l.cell_gpu = cuda_make_array(0, batch*outputs*steps); in make_conv_lstm_layer()
184 l.f_gpu = cuda_make_array(0, batch*outputs); in make_conv_lstm_layer()
185 l.i_gpu = cuda_make_array(0, batch*outputs); in make_conv_lstm_layer()
186 l.g_gpu = cuda_make_array(0, batch*outputs); in make_conv_lstm_layer()
187 l.o_gpu = cuda_make_array(0, batch*outputs); in make_conv_lstm_layer()
188 l.c_gpu = cuda_make_array(0, batch*outputs); in make_conv_lstm_layer()
189 l.h_gpu = cuda_make_array(0, batch*outputs); in make_conv_lstm_layer()
190 l.stored_c_gpu = cuda_make_array(0, batch*outputs); in make_conv_lstm_layer()
191 l.stored_h_gpu = cuda_make_array(0, batch*outputs); in make_conv_lstm_layer()
192 l.temp_gpu = cuda_make_array(0, batch*outputs); in make_conv_lstm_layer()
193 l.temp2_gpu = cuda_make_array(0, batch*outputs); in make_conv_lstm_layer()
194 l.temp3_gpu = cuda_make_array(0, batch*outputs); in make_conv_lstm_layer()
195 l.dc_gpu = cuda_make_array(0, batch*outputs); in make_conv_lstm_layer()
196 l.dh_gpu = cuda_make_array(0, batch*outputs); in make_conv_lstm_layer()
197 l.last_prev_state_gpu = cuda_make_array(0, l.batch*l.outputs); in make_conv_lstm_layer()
198 l.last_prev_cell_gpu = cuda_make_array(0, l.batch*l.outputs); in make_conv_lstm_layer()
202 l.bflops = l.uf->bflops + l.ui->bflops + l.ug->bflops + l.uo->bflops + in make_conv_lstm_layer()
203 l.wf->bflops + l.wi->bflops + l.wg->bflops + l.wo->bflops + in make_conv_lstm_layer()
204 l.vf->bflops + l.vi->bflops + l.vo->bflops; in make_conv_lstm_layer()
206 if(l.peephole) l.bflops += 12 * l.outputs*l.batch / 1000000000.; in make_conv_lstm_layer()
207 else l.bflops += 9 * l.outputs*l.batch / 1000000000.; in make_conv_lstm_layer()
209 return l; in make_conv_lstm_layer()
212 void update_conv_lstm_layer(layer l, int batch, float learning_rate, float momentum, float decay) in update_conv_lstm_layer() argument
214 if (l.peephole) { in update_conv_lstm_layer()
215 update_convolutional_layer(*(l.vf), batch, learning_rate, momentum, decay); in update_conv_lstm_layer()
216 update_convolutional_layer(*(l.vi), batch, learning_rate, momentum, decay); in update_conv_lstm_layer()
217 update_convolutional_layer(*(l.vo), batch, learning_rate, momentum, decay); in update_conv_lstm_layer()
219 update_convolutional_layer(*(l.wf), batch, learning_rate, momentum, decay); in update_conv_lstm_layer()
220 update_convolutional_layer(*(l.wi), batch, learning_rate, momentum, decay); in update_conv_lstm_layer()
221 update_convolutional_layer(*(l.wg), batch, learning_rate, momentum, decay); in update_conv_lstm_layer()
222 update_convolutional_layer(*(l.wo), batch, learning_rate, momentum, decay); in update_conv_lstm_layer()
223 update_convolutional_layer(*(l.uf), batch, learning_rate, momentum, decay); in update_conv_lstm_layer()
224 update_convolutional_layer(*(l.ui), batch, learning_rate, momentum, decay); in update_conv_lstm_layer()
225 update_convolutional_layer(*(l.ug), batch, learning_rate, momentum, decay); in update_conv_lstm_layer()
226 update_convolutional_layer(*(l.uo), batch, learning_rate, momentum, decay); in update_conv_lstm_layer()
229 void resize_conv_lstm_layer(layer *l, int w, int h) in resize_conv_lstm_layer() argument
231 if (l->peephole) { in resize_conv_lstm_layer()
232 resize_convolutional_layer(l->vf, w, h); in resize_conv_lstm_layer()
233 if (l->workspace_size < l->vf->workspace_size) l->workspace_size = l->vf->workspace_size; in resize_conv_lstm_layer()
235 resize_convolutional_layer(l->vi, w, h); in resize_conv_lstm_layer()
236 if (l->workspace_size < l->vi->workspace_size) l->workspace_size = l->vi->workspace_size; in resize_conv_lstm_layer()
238 resize_convolutional_layer(l->vo, w, h); in resize_conv_lstm_layer()
239 if (l->workspace_size < l->vo->workspace_size) l->workspace_size = l->vo->workspace_size; in resize_conv_lstm_layer()
242 resize_convolutional_layer(l->wf, w, h); in resize_conv_lstm_layer()
243 if (l->workspace_size < l->wf->workspace_size) l->workspace_size = l->wf->workspace_size; in resize_conv_lstm_layer()
245 resize_convolutional_layer(l->wi, w, h); in resize_conv_lstm_layer()
246 if (l->workspace_size < l->wi->workspace_size) l->workspace_size = l->wi->workspace_size; in resize_conv_lstm_layer()
248 resize_convolutional_layer(l->wg, w, h); in resize_conv_lstm_layer()
249 if (l->workspace_size < l->wg->workspace_size) l->workspace_size = l->wg->workspace_size; in resize_conv_lstm_layer()
251 resize_convolutional_layer(l->wo, w, h); in resize_conv_lstm_layer()
252 if (l->workspace_size < l->wo->workspace_size) l->workspace_size = l->wo->workspace_size; in resize_conv_lstm_layer()
255 resize_convolutional_layer(l->uf, w, h); in resize_conv_lstm_layer()
256 if (l->workspace_size < l->uf->workspace_size) l->workspace_size = l->uf->workspace_size; in resize_conv_lstm_layer()
258 resize_convolutional_layer(l->ui, w, h); in resize_conv_lstm_layer()
259 if (l->workspace_size < l->ui->workspace_size) l->workspace_size = l->ui->workspace_size; in resize_conv_lstm_layer()
261 resize_convolutional_layer(l->ug, w, h); in resize_conv_lstm_layer()
262 if (l->workspace_size < l->ug->workspace_size) l->workspace_size = l->ug->workspace_size; in resize_conv_lstm_layer()
264 resize_convolutional_layer(l->uo, w, h); in resize_conv_lstm_layer()
265 if (l->workspace_size < l->uo->workspace_size) l->workspace_size = l->uo->workspace_size; in resize_conv_lstm_layer()
267 l->w = w; in resize_conv_lstm_layer()
268 l->h = h; in resize_conv_lstm_layer()
269 l->out_h = l->wo->out_h; in resize_conv_lstm_layer()
270 l->out_w = l->wo->out_w; in resize_conv_lstm_layer()
271 l->outputs = l->wo->outputs; in resize_conv_lstm_layer()
272 int outputs = l->outputs; in resize_conv_lstm_layer()
273 l->inputs = w*h*l->c; in resize_conv_lstm_layer()
274 int steps = l->steps; in resize_conv_lstm_layer()
275 int batch = l->batch; in resize_conv_lstm_layer()
277 assert(l->wo->outputs == l->uo->outputs); in resize_conv_lstm_layer()
279 l->output = (float*)xrealloc(l->output, outputs * batch * steps * sizeof(float)); in resize_conv_lstm_layer()
282 l->prev_state_cpu = (float*)xrealloc(l->prev_state_cpu, batch*outputs * sizeof(float)); in resize_conv_lstm_layer()
283 l->prev_cell_cpu = (float*)xrealloc(l->prev_cell_cpu, batch*outputs * sizeof(float)); in resize_conv_lstm_layer()
284 l->cell_cpu = (float*)xrealloc(l->cell_cpu, batch*outputs*steps * sizeof(float)); in resize_conv_lstm_layer()
286 l->f_cpu = (float*)xrealloc(l->f_cpu, batch*outputs * sizeof(float)); in resize_conv_lstm_layer()
287 l->i_cpu = (float*)xrealloc(l->i_cpu, batch*outputs * sizeof(float)); in resize_conv_lstm_layer()
288 l->g_cpu = (float*)xrealloc(l->g_cpu, batch*outputs * sizeof(float)); in resize_conv_lstm_layer()
289 l->o_cpu = (float*)xrealloc(l->o_cpu, batch*outputs * sizeof(float)); in resize_conv_lstm_layer()
290 l->c_cpu = (float*)xrealloc(l->c_cpu, batch*outputs * sizeof(float)); in resize_conv_lstm_layer()
291 l->h_cpu = (float*)xrealloc(l->h_cpu, batch*outputs * sizeof(float)); in resize_conv_lstm_layer()
292 l->temp_cpu = (float*)xrealloc(l->temp_cpu, batch*outputs * sizeof(float)); in resize_conv_lstm_layer()
293 l->temp2_cpu = (float*)xrealloc(l->temp2_cpu, batch*outputs * sizeof(float)); in resize_conv_lstm_layer()
294 l->temp3_cpu = (float*)xrealloc(l->temp3_cpu, batch*outputs * sizeof(float)); in resize_conv_lstm_layer()
295 l->dc_cpu = (float*)xrealloc(l->dc_cpu, batch*outputs * sizeof(float)); in resize_conv_lstm_layer()
296 l->dh_cpu = (float*)xrealloc(l->dh_cpu, batch*outputs * sizeof(float)); in resize_conv_lstm_layer()
297 l->stored_c_cpu = (float*)xrealloc(l->stored_c_cpu, batch*outputs * sizeof(float)); in resize_conv_lstm_layer()
298 l->stored_h_cpu = (float*)xrealloc(l->stored_h_cpu, batch*outputs * sizeof(float)); in resize_conv_lstm_layer()
304 if (l->output_gpu) cudaFree(l->output_gpu); in resize_conv_lstm_layer()
305 l->output_gpu = cuda_make_array(0, batch*outputs*steps); in resize_conv_lstm_layer()
307 if (l->delta_gpu) cudaFree(l->delta_gpu); in resize_conv_lstm_layer()
308 l->delta_gpu = cuda_make_array(0, batch*outputs*steps); in resize_conv_lstm_layer()
310 if (l->prev_state_gpu) cudaFree(l->prev_state_gpu); in resize_conv_lstm_layer()
311 l->prev_state_gpu = cuda_make_array(0, batch*outputs); in resize_conv_lstm_layer()
313 if (l->prev_cell_gpu) cudaFree(l->prev_cell_gpu); in resize_conv_lstm_layer()
314 l->prev_cell_gpu = cuda_make_array(0, batch*outputs); in resize_conv_lstm_layer()
316 if (l->cell_gpu) cudaFree(l->cell_gpu); in resize_conv_lstm_layer()
317 l->cell_gpu = cuda_make_array(0, batch*outputs*steps); in resize_conv_lstm_layer()
319 if (l->f_gpu) cudaFree(l->f_gpu); in resize_conv_lstm_layer()
320 l->f_gpu = cuda_make_array(0, batch*outputs); in resize_conv_lstm_layer()
322 if (l->i_gpu) cudaFree(l->i_gpu); in resize_conv_lstm_layer()
323 l->i_gpu = cuda_make_array(0, batch*outputs); in resize_conv_lstm_layer()
325 if (l->g_gpu) cudaFree(l->g_gpu); in resize_conv_lstm_layer()
326 l->g_gpu = cuda_make_array(0, batch*outputs); in resize_conv_lstm_layer()
328 if (l->o_gpu) cudaFree(l->o_gpu); in resize_conv_lstm_layer()
329 l->o_gpu = cuda_make_array(0, batch*outputs); in resize_conv_lstm_layer()
331 if (l->c_gpu) cudaFree(l->c_gpu); in resize_conv_lstm_layer()
332 l->c_gpu = cuda_make_array(0, batch*outputs); in resize_conv_lstm_layer()
334 if (l->h_gpu) cudaFree(l->h_gpu); in resize_conv_lstm_layer()
335 l->h_gpu = cuda_make_array(0, batch*outputs); in resize_conv_lstm_layer()
337 if (l->temp_gpu) cudaFree(l->temp_gpu); in resize_conv_lstm_layer()
338 l->temp_gpu = cuda_make_array(0, batch*outputs); in resize_conv_lstm_layer()
340 if (l->temp2_gpu) cudaFree(l->temp2_gpu); in resize_conv_lstm_layer()
341 l->temp2_gpu = cuda_make_array(0, batch*outputs); in resize_conv_lstm_layer()
343 if (l->temp3_gpu) cudaFree(l->temp3_gpu); in resize_conv_lstm_layer()
344 l->temp3_gpu = cuda_make_array(0, batch*outputs); in resize_conv_lstm_layer()
346 if (l->dc_gpu) cudaFree(l->dc_gpu); in resize_conv_lstm_layer()
347 l->dc_gpu = cuda_make_array(0, batch*outputs); in resize_conv_lstm_layer()
349 if (l->dh_gpu) cudaFree(l->dh_gpu); in resize_conv_lstm_layer()
350 l->dh_gpu = cuda_make_array(0, batch*outputs); in resize_conv_lstm_layer()
352 if (l->stored_c_gpu) cudaFree(l->stored_c_gpu); in resize_conv_lstm_layer()
353 l->stored_c_gpu = cuda_make_array(0, batch*outputs); in resize_conv_lstm_layer()
355 if (l->stored_h_gpu) cudaFree(l->stored_h_gpu); in resize_conv_lstm_layer()
356 l->stored_h_gpu = cuda_make_array(0, batch*outputs); in resize_conv_lstm_layer()
358 if (l->last_prev_state_gpu) cudaFree(l->last_prev_state_gpu); in resize_conv_lstm_layer()
359 l->last_prev_state_gpu = cuda_make_array(0, batch*outputs); in resize_conv_lstm_layer()
361 if (l->last_prev_cell_gpu) cudaFree(l->last_prev_cell_gpu); in resize_conv_lstm_layer()
362 l->last_prev_cell_gpu = cuda_make_array(0, batch*outputs); in resize_conv_lstm_layer()
366 void free_state_conv_lstm(layer l) in free_state_conv_lstm() argument
369 for (i = 0; i < l.outputs * l.batch; ++i) l.h_cpu[i] = 0; in free_state_conv_lstm()
370 for (i = 0; i < l.outputs * l.batch; ++i) l.c_cpu[i] = 0; in free_state_conv_lstm()
373 cuda_push_array(l.h_gpu, l.h_cpu, l.outputs * l.batch); in free_state_conv_lstm()
374 cuda_push_array(l.c_gpu, l.c_cpu, l.outputs * l.batch); in free_state_conv_lstm()
381 void randomize_state_conv_lstm(layer l) in randomize_state_conv_lstm() argument
384 for (i = 0; i < l.outputs * l.batch; ++i) l.h_cpu[i] = rand_uniform(-1, 1); in randomize_state_conv_lstm()
385 for (i = 0; i < l.outputs * l.batch; ++i) l.c_cpu[i] = rand_uniform(-1, 1); in randomize_state_conv_lstm()
388 cuda_push_array(l.h_gpu, l.h_cpu, l.outputs * l.batch); in randomize_state_conv_lstm()
389 cuda_push_array(l.c_gpu, l.c_cpu, l.outputs * l.batch); in randomize_state_conv_lstm()
394 void remember_state_conv_lstm(layer l) in remember_state_conv_lstm() argument
396 memcpy(l.stored_c_cpu, l.c_cpu, l.outputs * l.batch * sizeof(float)); in remember_state_conv_lstm()
397 memcpy(l.stored_h_cpu, l.h_cpu, l.outputs * l.batch * sizeof(float)); in remember_state_conv_lstm()
400 copy_ongpu(l.outputs*l.batch, l.c_gpu, 1, l.stored_c_gpu, 1); in remember_state_conv_lstm()
401 copy_ongpu(l.outputs*l.batch, l.h_gpu, 1, l.stored_h_gpu, 1); in remember_state_conv_lstm()
405 void restore_state_conv_lstm(layer l) in restore_state_conv_lstm() argument
407 memcpy(l.c_cpu, l.stored_c_cpu, l.outputs * l.batch * sizeof(float)); in restore_state_conv_lstm()
408 memcpy(l.h_cpu, l.stored_h_cpu, l.outputs * l.batch * sizeof(float)); in restore_state_conv_lstm()
411 copy_ongpu(l.outputs*l.batch, l.stored_c_gpu, 1, l.c_gpu, 1); in restore_state_conv_lstm()
412 copy_ongpu(l.outputs*l.batch, l.stored_h_gpu, 1, l.h_gpu, 1); in restore_state_conv_lstm()
416 void forward_conv_lstm_layer(layer l, network_state state) in forward_conv_lstm_layer() argument
423 layer vf = *(l.vf); in forward_conv_lstm_layer()
424 layer vi = *(l.vi); in forward_conv_lstm_layer()
425 layer vo = *(l.vo); in forward_conv_lstm_layer()
427 layer wf = *(l.wf); in forward_conv_lstm_layer()
428 layer wi = *(l.wi); in forward_conv_lstm_layer()
429 layer wg = *(l.wg); in forward_conv_lstm_layer()
430 layer wo = *(l.wo); in forward_conv_lstm_layer()
432 layer uf = *(l.uf); in forward_conv_lstm_layer()
433 layer ui = *(l.ui); in forward_conv_lstm_layer()
434 layer ug = *(l.ug); in forward_conv_lstm_layer()
435 layer uo = *(l.uo); in forward_conv_lstm_layer()
438 if (l.peephole) { in forward_conv_lstm_layer()
439 fill_cpu(l.outputs * l.batch * l.steps, 0, vf.delta, 1); in forward_conv_lstm_layer()
440 fill_cpu(l.outputs * l.batch * l.steps, 0, vi.delta, 1); in forward_conv_lstm_layer()
441 fill_cpu(l.outputs * l.batch * l.steps, 0, vo.delta, 1); in forward_conv_lstm_layer()
444 fill_cpu(l.outputs * l.batch * l.steps, 0, wf.delta, 1); in forward_conv_lstm_layer()
445 fill_cpu(l.outputs * l.batch * l.steps, 0, wi.delta, 1); in forward_conv_lstm_layer()
446 fill_cpu(l.outputs * l.batch * l.steps, 0, wg.delta, 1); in forward_conv_lstm_layer()
447 fill_cpu(l.outputs * l.batch * l.steps, 0, wo.delta, 1); in forward_conv_lstm_layer()
449 fill_cpu(l.outputs * l.batch * l.steps, 0, uf.delta, 1); in forward_conv_lstm_layer()
450 fill_cpu(l.outputs * l.batch * l.steps, 0, ui.delta, 1); in forward_conv_lstm_layer()
451 fill_cpu(l.outputs * l.batch * l.steps, 0, ug.delta, 1); in forward_conv_lstm_layer()
452 fill_cpu(l.outputs * l.batch * l.steps, 0, uo.delta, 1); in forward_conv_lstm_layer()
454 fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1); in forward_conv_lstm_layer()
457 for (i = 0; i < l.steps; ++i) in forward_conv_lstm_layer()
459 if (l.peephole) { in forward_conv_lstm_layer()
460 assert(l.outputs == vf.out_w * vf.out_h * vf.out_c); in forward_conv_lstm_layer()
461 s.input = l.c_cpu; in forward_conv_lstm_layer()
467 assert(l.outputs == wf.out_w * wf.out_h * wf.out_c); in forward_conv_lstm_layer()
468 assert(wf.c == l.out_c && wi.c == l.out_c && wg.c == l.out_c && wo.c == l.out_c); in forward_conv_lstm_layer()
470 s.input = l.h_cpu; in forward_conv_lstm_layer()
476 assert(l.inputs == uf.w * uf.h * uf.c); in forward_conv_lstm_layer()
477 assert(uf.c == l.c && ui.c == l.c && ug.c == l.c && uo.c == l.c); in forward_conv_lstm_layer()
486 copy_cpu(l.outputs*l.batch, wf.output, 1, l.f_cpu, 1); in forward_conv_lstm_layer()
487 axpy_cpu(l.outputs*l.batch, 1, uf.output, 1, l.f_cpu, 1); in forward_conv_lstm_layer()
488 if (l.peephole) axpy_cpu(l.outputs*l.batch, 1, vf.output, 1, l.f_cpu, 1); in forward_conv_lstm_layer()
491 copy_cpu(l.outputs*l.batch, wi.output, 1, l.i_cpu, 1); in forward_conv_lstm_layer()
492 axpy_cpu(l.outputs*l.batch, 1, ui.output, 1, l.i_cpu, 1); in forward_conv_lstm_layer()
493 if (l.peephole) axpy_cpu(l.outputs*l.batch, 1, vi.output, 1, l.i_cpu, 1); in forward_conv_lstm_layer()
496 copy_cpu(l.outputs*l.batch, wg.output, 1, l.g_cpu, 1); in forward_conv_lstm_layer()
497 axpy_cpu(l.outputs*l.batch, 1, ug.output, 1, l.g_cpu, 1); in forward_conv_lstm_layer()
499 activate_array(l.f_cpu, l.outputs*l.batch, LOGISTIC); in forward_conv_lstm_layer()
500 activate_array(l.i_cpu, l.outputs*l.batch, LOGISTIC); in forward_conv_lstm_layer()
501 activate_array(l.g_cpu, l.outputs*l.batch, TANH); in forward_conv_lstm_layer()
504 copy_cpu(l.outputs*l.batch, l.i_cpu, 1, l.temp_cpu, 1); in forward_conv_lstm_layer()
505 mul_cpu(l.outputs*l.batch, l.g_cpu, 1, l.temp_cpu, 1); in forward_conv_lstm_layer()
506 mul_cpu(l.outputs*l.batch, l.f_cpu, 1, l.c_cpu, 1); in forward_conv_lstm_layer()
507 axpy_cpu(l.outputs*l.batch, 1, l.temp_cpu, 1, l.c_cpu, 1); in forward_conv_lstm_layer()
510 if (l.peephole) { in forward_conv_lstm_layer()
511 s.input = l.c_cpu; in forward_conv_lstm_layer()
514 copy_cpu(l.outputs*l.batch, wo.output, 1, l.o_cpu, 1); in forward_conv_lstm_layer()
515 axpy_cpu(l.outputs*l.batch, 1, uo.output, 1, l.o_cpu, 1); in forward_conv_lstm_layer()
516 if (l.peephole) axpy_cpu(l.outputs*l.batch, 1, vo.output, 1, l.o_cpu, 1); in forward_conv_lstm_layer()
517 activate_array(l.o_cpu, l.outputs*l.batch, LOGISTIC); in forward_conv_lstm_layer()
520 copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.h_cpu, 1); in forward_conv_lstm_layer()
521 activate_array(l.h_cpu, l.outputs*l.batch, TANH); in forward_conv_lstm_layer()
522 mul_cpu(l.outputs*l.batch, l.o_cpu, 1, l.h_cpu, 1); in forward_conv_lstm_layer()
524 if (l.state_constrain) constrain_cpu(l.outputs*l.batch, l.state_constrain, l.c_cpu); in forward_conv_lstm_layer()
525 fix_nan_and_inf_cpu(l.c_cpu, l.outputs*l.batch); in forward_conv_lstm_layer()
526 fix_nan_and_inf_cpu(l.h_cpu, l.outputs*l.batch); in forward_conv_lstm_layer()
528 copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.cell_cpu, 1); in forward_conv_lstm_layer()
529 copy_cpu(l.outputs*l.batch, l.h_cpu, 1, l.output, 1); in forward_conv_lstm_layer()
531 state.input += l.inputs*l.batch; in forward_conv_lstm_layer()
532 l.output += l.outputs*l.batch; in forward_conv_lstm_layer()
533 l.cell_cpu += l.outputs*l.batch; in forward_conv_lstm_layer()
535 if (l.peephole) { in forward_conv_lstm_layer()
553 void backward_conv_lstm_layer(layer l, network_state state) in backward_conv_lstm_layer() argument
559 layer vf = *(l.vf); in backward_conv_lstm_layer()
560 layer vi = *(l.vi); in backward_conv_lstm_layer()
561 layer vo = *(l.vo); in backward_conv_lstm_layer()
563 layer wf = *(l.wf); in backward_conv_lstm_layer()
564 layer wi = *(l.wi); in backward_conv_lstm_layer()
565 layer wg = *(l.wg); in backward_conv_lstm_layer()
566 layer wo = *(l.wo); in backward_conv_lstm_layer()
568 layer uf = *(l.uf); in backward_conv_lstm_layer()
569 layer ui = *(l.ui); in backward_conv_lstm_layer()
570 layer ug = *(l.ug); in backward_conv_lstm_layer()
571 layer uo = *(l.uo); in backward_conv_lstm_layer()
573 if (l.peephole) { in backward_conv_lstm_layer()
574 increment_layer(&vf, l.steps - 1); in backward_conv_lstm_layer()
575 increment_layer(&vi, l.steps - 1); in backward_conv_lstm_layer()
576 increment_layer(&vo, l.steps - 1); in backward_conv_lstm_layer()
579 increment_layer(&wf, l.steps - 1); in backward_conv_lstm_layer()
580 increment_layer(&wi, l.steps - 1); in backward_conv_lstm_layer()
581 increment_layer(&wg, l.steps - 1); in backward_conv_lstm_layer()
582 increment_layer(&wo, l.steps - 1); in backward_conv_lstm_layer()
584 increment_layer(&uf, l.steps - 1); in backward_conv_lstm_layer()
585 increment_layer(&ui, l.steps - 1); in backward_conv_lstm_layer()
586 increment_layer(&ug, l.steps - 1); in backward_conv_lstm_layer()
587 increment_layer(&uo, l.steps - 1); in backward_conv_lstm_layer()
589 state.input += l.inputs*l.batch*(l.steps - 1); in backward_conv_lstm_layer()
590 if (state.delta) state.delta += l.inputs*l.batch*(l.steps - 1); in backward_conv_lstm_layer()
592 l.output += l.outputs*l.batch*(l.steps - 1); in backward_conv_lstm_layer()
593 l.cell_cpu += l.outputs*l.batch*(l.steps - 1); in backward_conv_lstm_layer()
594 l.delta += l.outputs*l.batch*(l.steps - 1); in backward_conv_lstm_layer()
596 for (i = l.steps - 1; i >= 0; --i) { in backward_conv_lstm_layer()
597 … if (i != 0) copy_cpu(l.outputs*l.batch, l.cell_cpu - l.outputs*l.batch, 1, l.prev_cell_cpu, 1); in backward_conv_lstm_layer()
598 copy_cpu(l.outputs*l.batch, l.cell_cpu, 1, l.c_cpu, 1); in backward_conv_lstm_layer()
599 … if (i != 0) copy_cpu(l.outputs*l.batch, l.output - l.outputs*l.batch, 1, l.prev_state_cpu, 1); in backward_conv_lstm_layer()
600 copy_cpu(l.outputs*l.batch, l.output, 1, l.h_cpu, 1); in backward_conv_lstm_layer()
602 l.dh_cpu = (i == 0) ? 0 : l.delta - l.outputs*l.batch; in backward_conv_lstm_layer()
605 copy_cpu(l.outputs*l.batch, wf.output, 1, l.f_cpu, 1); in backward_conv_lstm_layer()
606 axpy_cpu(l.outputs*l.batch, 1, uf.output, 1, l.f_cpu, 1); in backward_conv_lstm_layer()
607 if (l.peephole) axpy_cpu(l.outputs*l.batch, 1, vf.output, 1, l.f_cpu, 1); in backward_conv_lstm_layer()
610 copy_cpu(l.outputs*l.batch, wi.output, 1, l.i_cpu, 1); in backward_conv_lstm_layer()
611 axpy_cpu(l.outputs*l.batch, 1, ui.output, 1, l.i_cpu, 1); in backward_conv_lstm_layer()
612 if (l.peephole) axpy_cpu(l.outputs*l.batch, 1, vi.output, 1, l.i_cpu, 1); in backward_conv_lstm_layer()
615 copy_cpu(l.outputs*l.batch, wg.output, 1, l.g_cpu, 1); in backward_conv_lstm_layer()
616 axpy_cpu(l.outputs*l.batch, 1, ug.output, 1, l.g_cpu, 1); in backward_conv_lstm_layer()
619 copy_cpu(l.outputs*l.batch, wo.output, 1, l.o_cpu, 1); in backward_conv_lstm_layer()
620 axpy_cpu(l.outputs*l.batch, 1, uo.output, 1, l.o_cpu, 1); in backward_conv_lstm_layer()
621 if (l.peephole) axpy_cpu(l.outputs*l.batch, 1, vo.output, 1, l.o_cpu, 1); in backward_conv_lstm_layer()
623 activate_array(l.f_cpu, l.outputs*l.batch, LOGISTIC); in backward_conv_lstm_layer()
624 activate_array(l.i_cpu, l.outputs*l.batch, LOGISTIC); in backward_conv_lstm_layer()
625 activate_array(l.g_cpu, l.outputs*l.batch, TANH); in backward_conv_lstm_layer()
626 activate_array(l.o_cpu, l.outputs*l.batch, LOGISTIC); in backward_conv_lstm_layer()
628 copy_cpu(l.outputs*l.batch, l.delta, 1, l.temp3_cpu, 1); in backward_conv_lstm_layer()
630 copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.temp_cpu, 1); in backward_conv_lstm_layer()
631 activate_array(l.temp_cpu, l.outputs*l.batch, TANH); in backward_conv_lstm_layer()
633 copy_cpu(l.outputs*l.batch, l.temp3_cpu, 1, l.temp2_cpu, 1); in backward_conv_lstm_layer()
634 mul_cpu(l.outputs*l.batch, l.o_cpu, 1, l.temp2_cpu, 1); in backward_conv_lstm_layer()
636 gradient_array(l.temp_cpu, l.outputs*l.batch, TANH, l.temp2_cpu); in backward_conv_lstm_layer()
637 axpy_cpu(l.outputs*l.batch, 1, l.dc_cpu, 1, l.temp2_cpu, 1); in backward_conv_lstm_layer()
642 copy_cpu(l.outputs*l.batch, l.c_cpu, 1, l.temp_cpu, 1); in backward_conv_lstm_layer()
643 activate_array(l.temp_cpu, l.outputs*l.batch, TANH); in backward_conv_lstm_layer()
644 mul_cpu(l.outputs*l.batch, l.temp3_cpu, 1, l.temp_cpu, 1); in backward_conv_lstm_layer()
645 gradient_array(l.o_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); in backward_conv_lstm_layer()
652 if (l.peephole) { in backward_conv_lstm_layer()
653 copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, vo.delta, 1); in backward_conv_lstm_layer()
654 s.input = l.cell_cpu; in backward_conv_lstm_layer()
659 copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wo.delta, 1); in backward_conv_lstm_layer()
660 s.input = l.prev_state_cpu; in backward_conv_lstm_layer()
664 copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, uo.delta, 1); in backward_conv_lstm_layer()
670 copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); in backward_conv_lstm_layer()
671 mul_cpu(l.outputs*l.batch, l.i_cpu, 1, l.temp_cpu, 1); in backward_conv_lstm_layer()
672 gradient_array(l.g_cpu, l.outputs*l.batch, TANH, l.temp_cpu); in backward_conv_lstm_layer()
675 copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wg.delta, 1); in backward_conv_lstm_layer()
676 s.input = l.prev_state_cpu; in backward_conv_lstm_layer()
680 copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, ug.delta, 1); in backward_conv_lstm_layer()
686 copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); in backward_conv_lstm_layer()
687 mul_cpu(l.outputs*l.batch, l.g_cpu, 1, l.temp_cpu, 1); in backward_conv_lstm_layer()
688 gradient_array(l.i_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); in backward_conv_lstm_layer()
691 if (l.peephole) { in backward_conv_lstm_layer()
692 copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, vi.delta, 1); in backward_conv_lstm_layer()
693 s.input = l.prev_cell_cpu; in backward_conv_lstm_layer()
698 copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wi.delta, 1); in backward_conv_lstm_layer()
699 s.input = l.prev_state_cpu; in backward_conv_lstm_layer()
703 copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, ui.delta, 1); in backward_conv_lstm_layer()
709 copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); in backward_conv_lstm_layer()
710 mul_cpu(l.outputs*l.batch, l.prev_cell_cpu, 1, l.temp_cpu, 1); in backward_conv_lstm_layer()
711 gradient_array(l.f_cpu, l.outputs*l.batch, LOGISTIC, l.temp_cpu); in backward_conv_lstm_layer()
714 if (l.peephole) { in backward_conv_lstm_layer()
715 copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, vf.delta, 1); in backward_conv_lstm_layer()
716 s.input = l.prev_cell_cpu; in backward_conv_lstm_layer()
721 copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, wf.delta, 1); in backward_conv_lstm_layer()
722 s.input = l.prev_state_cpu; in backward_conv_lstm_layer()
726 copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, uf.delta, 1); in backward_conv_lstm_layer()
731 copy_cpu(l.outputs*l.batch, l.temp2_cpu, 1, l.temp_cpu, 1); in backward_conv_lstm_layer()
732 mul_cpu(l.outputs*l.batch, l.f_cpu, 1, l.temp_cpu, 1); in backward_conv_lstm_layer()
733 copy_cpu(l.outputs*l.batch, l.temp_cpu, 1, l.dc_cpu, 1); in backward_conv_lstm_layer()
735 state.input -= l.inputs*l.batch; in backward_conv_lstm_layer()
736 if (state.delta) state.delta -= l.inputs*l.batch; in backward_conv_lstm_layer()
737 l.output -= l.outputs*l.batch; in backward_conv_lstm_layer()
738 l.cell_cpu -= l.outputs*l.batch; in backward_conv_lstm_layer()
739 l.delta -= l.outputs*l.batch; in backward_conv_lstm_layer()
741 if (l.peephole) { in backward_conv_lstm_layer()
760 void pull_conv_lstm_layer(layer l) in pull_conv_lstm_layer() argument
762 if (l.peephole) { in pull_conv_lstm_layer()
763 pull_convolutional_layer(*(l.vf)); in pull_conv_lstm_layer()
764 pull_convolutional_layer(*(l.vi)); in pull_conv_lstm_layer()
765 pull_convolutional_layer(*(l.vo)); in pull_conv_lstm_layer()
767 pull_convolutional_layer(*(l.wf)); in pull_conv_lstm_layer()
768 pull_convolutional_layer(*(l.wi)); in pull_conv_lstm_layer()
769 pull_convolutional_layer(*(l.wg)); in pull_conv_lstm_layer()
770 pull_convolutional_layer(*(l.wo)); in pull_conv_lstm_layer()
771 pull_convolutional_layer(*(l.uf)); in pull_conv_lstm_layer()
772 pull_convolutional_layer(*(l.ui)); in pull_conv_lstm_layer()
773 pull_convolutional_layer(*(l.ug)); in pull_conv_lstm_layer()
774 pull_convolutional_layer(*(l.uo)); in pull_conv_lstm_layer()
777 void push_conv_lstm_layer(layer l) in push_conv_lstm_layer() argument
779 if (l.peephole) { in push_conv_lstm_layer()
780 push_convolutional_layer(*(l.vf)); in push_conv_lstm_layer()
781 push_convolutional_layer(*(l.vi)); in push_conv_lstm_layer()
782 push_convolutional_layer(*(l.vo)); in push_conv_lstm_layer()
784 push_convolutional_layer(*(l.wf)); in push_conv_lstm_layer()
785 push_convolutional_layer(*(l.wi)); in push_conv_lstm_layer()
786 push_convolutional_layer(*(l.wg)); in push_conv_lstm_layer()
787 push_convolutional_layer(*(l.wo)); in push_conv_lstm_layer()
788 push_convolutional_layer(*(l.uf)); in push_conv_lstm_layer()
789 push_convolutional_layer(*(l.ui)); in push_conv_lstm_layer()
790 push_convolutional_layer(*(l.ug)); in push_conv_lstm_layer()
791 push_convolutional_layer(*(l.uo)); in push_conv_lstm_layer()
794 void update_conv_lstm_layer_gpu(layer l, int batch, float learning_rate, float momentum, float deca… in update_conv_lstm_layer_gpu() argument
796 if (l.peephole) { in update_conv_lstm_layer_gpu()
797 update_convolutional_layer_gpu(*(l.vf), batch, learning_rate, momentum, decay, loss_scale); in update_conv_lstm_layer_gpu()
798 update_convolutional_layer_gpu(*(l.vi), batch, learning_rate, momentum, decay, loss_scale); in update_conv_lstm_layer_gpu()
799 update_convolutional_layer_gpu(*(l.vo), batch, learning_rate, momentum, decay, loss_scale); in update_conv_lstm_layer_gpu()
801 update_convolutional_layer_gpu(*(l.wf), batch, learning_rate, momentum, decay, loss_scale); in update_conv_lstm_layer_gpu()
802 update_convolutional_layer_gpu(*(l.wi), batch, learning_rate, momentum, decay, loss_scale); in update_conv_lstm_layer_gpu()
803 update_convolutional_layer_gpu(*(l.wg), batch, learning_rate, momentum, decay, loss_scale); in update_conv_lstm_layer_gpu()
804 update_convolutional_layer_gpu(*(l.wo), batch, learning_rate, momentum, decay, loss_scale); in update_conv_lstm_layer_gpu()
805 update_convolutional_layer_gpu(*(l.uf), batch, learning_rate, momentum, decay, loss_scale); in update_conv_lstm_layer_gpu()
806 update_convolutional_layer_gpu(*(l.ui), batch, learning_rate, momentum, decay, loss_scale); in update_conv_lstm_layer_gpu()
807 update_convolutional_layer_gpu(*(l.ug), batch, learning_rate, momentum, decay, loss_scale); in update_conv_lstm_layer_gpu()
808 update_convolutional_layer_gpu(*(l.uo), batch, learning_rate, momentum, decay, loss_scale); in update_conv_lstm_layer_gpu()
811 void forward_conv_lstm_layer_gpu(layer l, network_state state) in forward_conv_lstm_layer_gpu() argument
819 layer vf = *(l.vf); in forward_conv_lstm_layer_gpu()
820 layer vi = *(l.vi); in forward_conv_lstm_layer_gpu()
821 layer vo = *(l.vo); in forward_conv_lstm_layer_gpu()
823 layer wf = *(l.wf); in forward_conv_lstm_layer_gpu()
824 layer wi = *(l.wi); in forward_conv_lstm_layer_gpu()
825 layer wg = *(l.wg); in forward_conv_lstm_layer_gpu()
826 layer wo = *(l.wo); in forward_conv_lstm_layer_gpu()
828 layer uf = *(l.uf); in forward_conv_lstm_layer_gpu()
829 layer ui = *(l.ui); in forward_conv_lstm_layer_gpu()
830 layer ug = *(l.ug); in forward_conv_lstm_layer_gpu()
831 layer uo = *(l.uo); in forward_conv_lstm_layer_gpu()
834 if (l.peephole) { in forward_conv_lstm_layer_gpu()
835 fill_ongpu(l.outputs * l.batch * l.steps, 0, vf.delta_gpu, 1); in forward_conv_lstm_layer_gpu()
836 fill_ongpu(l.outputs * l.batch * l.steps, 0, vi.delta_gpu, 1); in forward_conv_lstm_layer_gpu()
837 fill_ongpu(l.outputs * l.batch * l.steps, 0, vo.delta_gpu, 1); in forward_conv_lstm_layer_gpu()
840 fill_ongpu(l.outputs * l.batch * l.steps, 0, wf.delta_gpu, 1); in forward_conv_lstm_layer_gpu()
841 fill_ongpu(l.outputs * l.batch * l.steps, 0, wi.delta_gpu, 1); in forward_conv_lstm_layer_gpu()
842 fill_ongpu(l.outputs * l.batch * l.steps, 0, wg.delta_gpu, 1); in forward_conv_lstm_layer_gpu()
843 fill_ongpu(l.outputs * l.batch * l.steps, 0, wo.delta_gpu, 1); in forward_conv_lstm_layer_gpu()
845 fill_ongpu(l.outputs * l.batch * l.steps, 0, uf.delta_gpu, 1); in forward_conv_lstm_layer_gpu()
846 fill_ongpu(l.outputs * l.batch * l.steps, 0, ui.delta_gpu, 1); in forward_conv_lstm_layer_gpu()
847 fill_ongpu(l.outputs * l.batch * l.steps, 0, ug.delta_gpu, 1); in forward_conv_lstm_layer_gpu()
848 fill_ongpu(l.outputs * l.batch * l.steps, 0, uo.delta_gpu, 1); in forward_conv_lstm_layer_gpu()
850 fill_ongpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1); in forward_conv_lstm_layer_gpu()
853 for (i = 0; i < l.steps; ++i) in forward_conv_lstm_layer_gpu()
855 if (l.peephole) { in forward_conv_lstm_layer_gpu()
856 assert(l.outputs == vf.out_w * vf.out_h * vf.out_c); in forward_conv_lstm_layer_gpu()
857 s.input = l.c_gpu; in forward_conv_lstm_layer_gpu()
863 assert(l.outputs == wf.out_w * wf.out_h * wf.out_c); in forward_conv_lstm_layer_gpu()
864 assert(wf.c == l.out_c && wi.c == l.out_c && wg.c == l.out_c && wo.c == l.out_c); in forward_conv_lstm_layer_gpu()
866 s.input = l.h_gpu; in forward_conv_lstm_layer_gpu()
872 assert(l.inputs == uf.w * uf.h * uf.c); in forward_conv_lstm_layer_gpu()
873 assert(uf.c == l.c && ui.c == l.c && ug.c == l.c && uo.c == l.c); in forward_conv_lstm_layer_gpu()
882 …rays_activate(wf.output_gpu, uf.output_gpu, (l.peephole)?vf.output_gpu:NULL, l.outputs*l.batch, LO… in forward_conv_lstm_layer_gpu()
889 …ys_activate(wi.output_gpu, ui.output_gpu, (l.peephole) ? vi.output_gpu : NULL, l.outputs*l.batch, … in forward_conv_lstm_layer_gpu()
896 add_3_arrays_activate(wg.output_gpu, ug.output_gpu, NULL, l.outputs*l.batch, TANH, l.g_gpu); in forward_conv_lstm_layer_gpu()
902 …sum_of_mults(l.f_gpu, l.c_gpu, l.i_gpu, l.g_gpu, l.outputs*l.batch, l.c_gpu); // decreases mAP??? in forward_conv_lstm_layer_gpu()
909 if (l.peephole) { in forward_conv_lstm_layer_gpu()
910 s.input = l.c_gpu; in forward_conv_lstm_layer_gpu()
913 …ys_activate(wo.output_gpu, uo.output_gpu, (l.peephole) ? vo.output_gpu : NULL, l.outputs*l.batch, … in forward_conv_lstm_layer_gpu()
920 activate_and_mult(l.c_gpu, l.o_gpu, l.outputs*l.batch, TANH, l.h_gpu); in forward_conv_lstm_layer_gpu()
925 fix_nan_and_inf(l.c_gpu, l.outputs*l.batch); in forward_conv_lstm_layer_gpu()
926 fix_nan_and_inf(l.h_gpu, l.outputs*l.batch); in forward_conv_lstm_layer_gpu()
927 if (l.state_constrain) constrain_ongpu(l.outputs*l.batch, l.state_constrain, l.c_gpu, 1); in forward_conv_lstm_layer_gpu()
929 if(state.train) simple_copy_ongpu(l.outputs*l.batch, l.c_gpu, l.cell_gpu); in forward_conv_lstm_layer_gpu()
930 …simple_copy_ongpu(l.outputs*l.batch, l.h_gpu, l.output_gpu); // is required for both Detection and… in forward_conv_lstm_layer_gpu()
932 state.input += l.inputs*l.batch; in forward_conv_lstm_layer_gpu()
933 l.output_gpu += l.outputs*l.batch; in forward_conv_lstm_layer_gpu()
934 l.cell_gpu += l.outputs*l.batch; in forward_conv_lstm_layer_gpu()
936 if (l.peephole) { in forward_conv_lstm_layer_gpu()
954 void backward_conv_lstm_layer_gpu(layer l, network_state state) in backward_conv_lstm_layer_gpu() argument
956 float *last_output = l.output_gpu + l.outputs*l.batch*(l.steps - 1); in backward_conv_lstm_layer_gpu()
957 float *last_cell = l.cell_gpu + l.outputs*l.batch*(l.steps - 1); in backward_conv_lstm_layer_gpu()
964 layer vf = *(l.vf); in backward_conv_lstm_layer_gpu()
965 layer vi = *(l.vi); in backward_conv_lstm_layer_gpu()
966 layer vo = *(l.vo); in backward_conv_lstm_layer_gpu()
968 layer wf = *(l.wf); in backward_conv_lstm_layer_gpu()
969 layer wi = *(l.wi); in backward_conv_lstm_layer_gpu()
970 layer wg = *(l.wg); in backward_conv_lstm_layer_gpu()
971 layer wo = *(l.wo); in backward_conv_lstm_layer_gpu()
973 layer uf = *(l.uf); in backward_conv_lstm_layer_gpu()
974 layer ui = *(l.ui); in backward_conv_lstm_layer_gpu()
975 layer ug = *(l.ug); in backward_conv_lstm_layer_gpu()
976 layer uo = *(l.uo); in backward_conv_lstm_layer_gpu()
978 if (l.peephole) { in backward_conv_lstm_layer_gpu()
979 increment_layer(&vf, l.steps - 1); in backward_conv_lstm_layer_gpu()
980 increment_layer(&vi, l.steps - 1); in backward_conv_lstm_layer_gpu()
981 increment_layer(&vo, l.steps - 1); in backward_conv_lstm_layer_gpu()
984 increment_layer(&wf, l.steps - 1); in backward_conv_lstm_layer_gpu()
985 increment_layer(&wi, l.steps - 1); in backward_conv_lstm_layer_gpu()
986 increment_layer(&wg, l.steps - 1); in backward_conv_lstm_layer_gpu()
987 increment_layer(&wo, l.steps - 1); in backward_conv_lstm_layer_gpu()
989 increment_layer(&uf, l.steps - 1); in backward_conv_lstm_layer_gpu()
990 increment_layer(&ui, l.steps - 1); in backward_conv_lstm_layer_gpu()
991 increment_layer(&ug, l.steps - 1); in backward_conv_lstm_layer_gpu()
992 increment_layer(&uo, l.steps - 1); in backward_conv_lstm_layer_gpu()
994 state.input += l.inputs*l.batch*(l.steps - 1); in backward_conv_lstm_layer_gpu()
995 if (state.delta) state.delta += l.inputs*l.batch*(l.steps - 1); in backward_conv_lstm_layer_gpu()
997 l.output_gpu += l.outputs*l.batch*(l.steps - 1); in backward_conv_lstm_layer_gpu()
998 l.cell_gpu += l.outputs*l.batch*(l.steps - 1); in backward_conv_lstm_layer_gpu()
999 l.delta_gpu += l.outputs*l.batch*(l.steps - 1); in backward_conv_lstm_layer_gpu()
1004 for (i = l.steps - 1; i >= 0; --i) { in backward_conv_lstm_layer_gpu()
1005 … if (i != 0) simple_copy_ongpu(l.outputs*l.batch, l.cell_gpu - l.outputs*l.batch, l.prev_cell_gpu); in backward_conv_lstm_layer_gpu()
1007 …current_subdivision % sequence != 0) simple_copy_ongpu(l.outputs*l.batch, l.last_prev_cell_gpu, l.… in backward_conv_lstm_layer_gpu()
1009 simple_copy_ongpu(l.outputs*l.batch, l.cell_gpu, l.c_gpu); in backward_conv_lstm_layer_gpu()
1011 …if (i != 0) simple_copy_ongpu(l.outputs*l.batch, l.output_gpu - l.outputs*l.batch, l.prev_state_gp… in backward_conv_lstm_layer_gpu()
1013 …current_subdivision % sequence != 0) simple_copy_ongpu(l.outputs*l.batch, l.last_prev_state_gpu, l in backward_conv_lstm_layer_gpu()
1015 simple_copy_ongpu(l.outputs*l.batch, l.output_gpu, l.h_gpu); in backward_conv_lstm_layer_gpu()
1017 l.dh_gpu = (i == 0) ? 0 : l.delta_gpu - l.outputs*l.batch; in backward_conv_lstm_layer_gpu()
1020 …ys_activate(wf.output_gpu, uf.output_gpu, (l.peephole) ? vf.output_gpu : NULL, l.outputs*l.batch, … in backward_conv_lstm_layer_gpu()
1027 …ys_activate(wi.output_gpu, ui.output_gpu, (l.peephole) ? vi.output_gpu : NULL, l.outputs*l.batch, … in backward_conv_lstm_layer_gpu()
1034 add_3_arrays_activate(wg.output_gpu, ug.output_gpu, NULL, l.outputs*l.batch, TANH, l.g_gpu); in backward_conv_lstm_layer_gpu()
1040 …ys_activate(wo.output_gpu, uo.output_gpu, (l.peephole) ? vo.output_gpu : NULL, l.outputs*l.batch, … in backward_conv_lstm_layer_gpu()
1047 simple_copy_ongpu(l.outputs*l.batch, l.delta_gpu, l.temp3_gpu); // temp3 = delta in backward_conv_lstm_layer_gpu()
1049 simple_copy_ongpu(l.outputs*l.batch, l.c_gpu, l.temp_gpu); in backward_conv_lstm_layer_gpu()
1050 activate_array_ongpu(l.temp_gpu, l.outputs*l.batch, TANH); // temp = tanh(c) in backward_conv_lstm_layer_gpu()
1052 simple_copy_ongpu(l.outputs*l.batch, l.temp3_gpu, l.temp2_gpu); in backward_conv_lstm_layer_gpu()
1053 mul_ongpu(l.outputs*l.batch, l.o_gpu, 1, l.temp2_gpu, 1); // temp2 = delta * o in backward_conv_lstm_layer_gpu()
1055 …gradient_array_ongpu(l.temp_gpu, l.outputs*l.batch, TANH, l.temp2_gpu); // temp2 = delta * o * gra… in backward_conv_lstm_layer_gpu()
1057 …axpy_ongpu(l.outputs*l.batch, 1, l.dc_gpu, 1, l.temp2_gpu, 1); // temp2 = delta * o * gra… in backward_conv_lstm_layer_gpu()
1062 simple_copy_ongpu(l.outputs*l.batch, l.c_gpu, l.temp_gpu); in backward_conv_lstm_layer_gpu()
1063 activate_array_ongpu(l.temp_gpu, l.outputs*l.batch, TANH); // temp = tanh(c) in backward_conv_lstm_layer_gpu()
1065 mul_ongpu(l.outputs*l.batch, l.temp3_gpu, 1, l.temp_gpu, 1); // temp = delta * tanh(c) in backward_conv_lstm_layer_gpu()
1066 …gradient_array_ongpu(l.o_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); // temp = delta * tanh(c… in backward_conv_lstm_layer_gpu()
1073 if (l.peephole) { in backward_conv_lstm_layer_gpu()
1074 simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, vo.delta_gpu); in backward_conv_lstm_layer_gpu()
1075 s.input = l.cell_gpu; in backward_conv_lstm_layer_gpu()
1080 simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, wo.delta_gpu); in backward_conv_lstm_layer_gpu()
1081 s.input = l.prev_state_gpu; in backward_conv_lstm_layer_gpu()
1085 simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, uo.delta_gpu); in backward_conv_lstm_layer_gpu()
1091 simple_copy_ongpu(l.outputs*l.batch, l.temp2_gpu, l.temp_gpu); in backward_conv_lstm_layer_gpu()
1092 mul_ongpu(l.outputs*l.batch, l.i_gpu, 1, l.temp_gpu, 1); in backward_conv_lstm_layer_gpu()
1093 gradient_array_ongpu(l.g_gpu, l.outputs*l.batch, TANH, l.temp_gpu); in backward_conv_lstm_layer_gpu()
1096 simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, wg.delta_gpu); in backward_conv_lstm_layer_gpu()
1097 s.input = l.prev_state_gpu; in backward_conv_lstm_layer_gpu()
1101 simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, ug.delta_gpu); in backward_conv_lstm_layer_gpu()
1107 simple_copy_ongpu(l.outputs*l.batch, l.temp2_gpu, l.temp_gpu); in backward_conv_lstm_layer_gpu()
1108 mul_ongpu(l.outputs*l.batch, l.g_gpu, 1, l.temp_gpu, 1); in backward_conv_lstm_layer_gpu()
1109 gradient_array_ongpu(l.i_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); in backward_conv_lstm_layer_gpu()
1112 if (l.peephole) { in backward_conv_lstm_layer_gpu()
1113 simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, vi.delta_gpu); in backward_conv_lstm_layer_gpu()
1114 s.input = l.prev_cell_gpu; in backward_conv_lstm_layer_gpu()
1119 simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, wi.delta_gpu); in backward_conv_lstm_layer_gpu()
1120 s.input = l.prev_state_gpu; in backward_conv_lstm_layer_gpu()
1124 simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, ui.delta_gpu); in backward_conv_lstm_layer_gpu()
1130 simple_copy_ongpu(l.outputs*l.batch, l.temp2_gpu, l.temp_gpu); in backward_conv_lstm_layer_gpu()
1131 mul_ongpu(l.outputs*l.batch, l.prev_cell_gpu, 1, l.temp_gpu, 1); in backward_conv_lstm_layer_gpu()
1132 gradient_array_ongpu(l.f_gpu, l.outputs*l.batch, LOGISTIC, l.temp_gpu); in backward_conv_lstm_layer_gpu()
1135 if (l.peephole) { in backward_conv_lstm_layer_gpu()
1136 simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, vf.delta_gpu); in backward_conv_lstm_layer_gpu()
1137 s.input = l.prev_cell_gpu; in backward_conv_lstm_layer_gpu()
1142 simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, wf.delta_gpu); in backward_conv_lstm_layer_gpu()
1143 s.input = l.prev_state_gpu; in backward_conv_lstm_layer_gpu()
1147 simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, uf.delta_gpu); in backward_conv_lstm_layer_gpu()
1153 simple_copy_ongpu(l.outputs*l.batch, l.temp2_gpu, l.temp_gpu); in backward_conv_lstm_layer_gpu()
1154 mul_ongpu(l.outputs*l.batch, l.f_gpu, 1, l.temp_gpu, 1); in backward_conv_lstm_layer_gpu()
1155 simple_copy_ongpu(l.outputs*l.batch, l.temp_gpu, l.dc_gpu); in backward_conv_lstm_layer_gpu()
1156 fix_nan_and_inf(l.dc_gpu, l.outputs*l.batch); in backward_conv_lstm_layer_gpu()
1159 state.input -= l.inputs*l.batch; in backward_conv_lstm_layer_gpu()
1160 …if (state.delta) state.delta -= l.inputs*l.batch; // new delta: state.delta = prev_layer.delta_g… in backward_conv_lstm_layer_gpu()
1161 l.output_gpu -= l.outputs*l.batch; in backward_conv_lstm_layer_gpu()
1162 l.cell_gpu -= l.outputs*l.batch; in backward_conv_lstm_layer_gpu()
1163 l.delta_gpu -= l.outputs*l.batch; in backward_conv_lstm_layer_gpu()
1165 if (l.peephole) { in backward_conv_lstm_layer_gpu()
1182 simple_copy_ongpu(l.outputs*l.batch, last_output, l.last_prev_state_gpu); in backward_conv_lstm_layer_gpu()
1183 simple_copy_ongpu(l.outputs*l.batch, last_cell, l.last_prev_cell_gpu); in backward_conv_lstm_layer_gpu()