1 #include "cost_layer.h"
2 #include "utils.h"
3 #include "dark_cuda.h"
4 #include "blas.h"
5 #include <math.h>
6 #include <string.h>
7 #include <stdlib.h>
8 #include <stdio.h>
9
get_cost_type(char * s)10 COST_TYPE get_cost_type(char *s)
11 {
12 if (strcmp(s, "sse")==0) return SSE;
13 if (strcmp(s, "masked")==0) return MASKED;
14 if (strcmp(s, "smooth")==0) return SMOOTH;
15 fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s);
16 return SSE;
17 }
18
get_cost_string(COST_TYPE a)19 char *get_cost_string(COST_TYPE a)
20 {
21 switch(a){
22 case SSE:
23 return "sse";
24 case MASKED:
25 return "masked";
26 case SMOOTH:
27 return "smooth";
28 default:
29 return "sse";
30 }
31 }
32
make_cost_layer(int batch,int inputs,COST_TYPE cost_type,float scale)33 cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale)
34 {
35 fprintf(stderr, "cost %4d\n", inputs);
36 cost_layer l = { (LAYER_TYPE)0 };
37 l.type = COST;
38
39 l.scale = scale;
40 l.batch = batch;
41 l.inputs = inputs;
42 l.outputs = inputs;
43 l.cost_type = cost_type;
44 l.delta = (float*)xcalloc(inputs * batch, sizeof(float));
45 l.output = (float*)xcalloc(inputs * batch, sizeof(float));
46 l.cost = (float*)xcalloc(1, sizeof(float));
47
48 l.forward = forward_cost_layer;
49 l.backward = backward_cost_layer;
50 #ifdef GPU
51 l.forward_gpu = forward_cost_layer_gpu;
52 l.backward_gpu = backward_cost_layer_gpu;
53
54 l.delta_gpu = cuda_make_array(l.delta, inputs*batch);
55 l.output_gpu = cuda_make_array(l.output, inputs*batch);
56 #endif
57 return l;
58 }
59
resize_cost_layer(cost_layer * l,int inputs)60 void resize_cost_layer(cost_layer *l, int inputs)
61 {
62 l->inputs = inputs;
63 l->outputs = inputs;
64 l->delta = (float*)xrealloc(l->delta, inputs * l->batch * sizeof(float));
65 l->output = (float*)xrealloc(l->output, inputs * l->batch * sizeof(float));
66 #ifdef GPU
67 cuda_free(l->delta_gpu);
68 cuda_free(l->output_gpu);
69 l->delta_gpu = cuda_make_array(l->delta, inputs*l->batch);
70 l->output_gpu = cuda_make_array(l->output, inputs*l->batch);
71 #endif
72 }
73
forward_cost_layer(cost_layer l,network_state state)74 void forward_cost_layer(cost_layer l, network_state state)
75 {
76 if (!state.truth) return;
77 if(l.cost_type == MASKED){
78 int i;
79 for(i = 0; i < l.batch*l.inputs; ++i){
80 if(state.truth[i] == SECRET_NUM) state.input[i] = SECRET_NUM;
81 }
82 }
83 if(l.cost_type == SMOOTH){
84 smooth_l1_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output);
85 } else {
86 l2_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output);
87 }
88 l.cost[0] = sum_array(l.output, l.batch*l.inputs);
89 }
90
backward_cost_layer(const cost_layer l,network_state state)91 void backward_cost_layer(const cost_layer l, network_state state)
92 {
93 axpy_cpu(l.batch*l.inputs, l.scale, l.delta, 1, state.delta, 1);
94 }
95
96 #ifdef GPU
97
pull_cost_layer(cost_layer l)98 void pull_cost_layer(cost_layer l)
99 {
100 cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs);
101 }
102
push_cost_layer(cost_layer l)103 void push_cost_layer(cost_layer l)
104 {
105 cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs);
106 }
107
float_abs_compare(const void * a,const void * b)108 int float_abs_compare (const void * a, const void * b)
109 {
110 float fa = *(const float*) a;
111 if(fa < 0) fa = -fa;
112 float fb = *(const float*) b;
113 if(fb < 0) fb = -fb;
114 return (fa > fb) - (fa < fb);
115 }
116
forward_cost_layer_gpu(cost_layer l,network_state state)117 void forward_cost_layer_gpu(cost_layer l, network_state state)
118 {
119 if (!state.truth) return;
120 if (l.cost_type == MASKED) {
121 mask_ongpu(l.batch*l.inputs, state.input, SECRET_NUM, state.truth);
122 }
123
124 if(l.cost_type == SMOOTH){
125 smooth_l1_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu);
126 } else {
127 l2_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu);
128 }
129
130 if(l.ratio){
131 cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs);
132 qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare);
133 int n = (1-l.ratio) * l.batch*l.inputs;
134 float thresh = l.delta[n];
135 thresh = 0;
136 printf("%f\n", thresh);
137 supp_ongpu(l.batch*l.inputs, thresh, l.delta_gpu, 1);
138 }
139
140 cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs);
141 l.cost[0] = sum_array(l.output, l.batch*l.inputs);
142 }
143
backward_cost_layer_gpu(const cost_layer l,network_state state)144 void backward_cost_layer_gpu(const cost_layer l, network_state state)
145 {
146 axpy_ongpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, state.delta, 1);
147 }
148 #endif
149