1 #include "cost_layer.h"
2 #include "utils.h"
3 #include "dark_cuda.h"
4 #include "blas.h"
5 #include <math.h>
6 #include <string.h>
7 #include <stdlib.h>
8 #include <stdio.h>
9 
get_cost_type(char * s)10 COST_TYPE get_cost_type(char *s)
11 {
12     if (strcmp(s, "sse")==0) return SSE;
13     if (strcmp(s, "masked")==0) return MASKED;
14     if (strcmp(s, "smooth")==0) return SMOOTH;
15     fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s);
16     return SSE;
17 }
18 
get_cost_string(COST_TYPE a)19 char *get_cost_string(COST_TYPE a)
20 {
21     switch(a){
22         case SSE:
23             return "sse";
24         case MASKED:
25             return "masked";
26         case SMOOTH:
27             return "smooth";
28 		default:
29 			return "sse";
30     }
31 }
32 
make_cost_layer(int batch,int inputs,COST_TYPE cost_type,float scale)33 cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale)
34 {
35     fprintf(stderr, "cost                                           %4d\n",  inputs);
36     cost_layer l = { (LAYER_TYPE)0 };
37     l.type = COST;
38 
39     l.scale = scale;
40     l.batch = batch;
41     l.inputs = inputs;
42     l.outputs = inputs;
43     l.cost_type = cost_type;
44     l.delta = (float*)xcalloc(inputs * batch, sizeof(float));
45     l.output = (float*)xcalloc(inputs * batch, sizeof(float));
46     l.cost = (float*)xcalloc(1, sizeof(float));
47 
48     l.forward = forward_cost_layer;
49     l.backward = backward_cost_layer;
50     #ifdef GPU
51     l.forward_gpu = forward_cost_layer_gpu;
52     l.backward_gpu = backward_cost_layer_gpu;
53 
54     l.delta_gpu = cuda_make_array(l.delta, inputs*batch);
55     l.output_gpu = cuda_make_array(l.output, inputs*batch);
56     #endif
57     return l;
58 }
59 
resize_cost_layer(cost_layer * l,int inputs)60 void resize_cost_layer(cost_layer *l, int inputs)
61 {
62     l->inputs = inputs;
63     l->outputs = inputs;
64     l->delta = (float*)xrealloc(l->delta, inputs * l->batch * sizeof(float));
65     l->output = (float*)xrealloc(l->output, inputs * l->batch * sizeof(float));
66 #ifdef GPU
67     cuda_free(l->delta_gpu);
68     cuda_free(l->output_gpu);
69     l->delta_gpu = cuda_make_array(l->delta, inputs*l->batch);
70     l->output_gpu = cuda_make_array(l->output, inputs*l->batch);
71 #endif
72 }
73 
forward_cost_layer(cost_layer l,network_state state)74 void forward_cost_layer(cost_layer l, network_state state)
75 {
76     if (!state.truth) return;
77     if(l.cost_type == MASKED){
78         int i;
79         for(i = 0; i < l.batch*l.inputs; ++i){
80             if(state.truth[i] == SECRET_NUM) state.input[i] = SECRET_NUM;
81         }
82     }
83     if(l.cost_type == SMOOTH){
84         smooth_l1_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output);
85     } else {
86         l2_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output);
87     }
88     l.cost[0] = sum_array(l.output, l.batch*l.inputs);
89 }
90 
backward_cost_layer(const cost_layer l,network_state state)91 void backward_cost_layer(const cost_layer l, network_state state)
92 {
93     axpy_cpu(l.batch*l.inputs, l.scale, l.delta, 1, state.delta, 1);
94 }
95 
96 #ifdef GPU
97 
pull_cost_layer(cost_layer l)98 void pull_cost_layer(cost_layer l)
99 {
100     cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs);
101 }
102 
push_cost_layer(cost_layer l)103 void push_cost_layer(cost_layer l)
104 {
105     cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs);
106 }
107 
float_abs_compare(const void * a,const void * b)108 int float_abs_compare (const void * a, const void * b)
109 {
110     float fa = *(const float*) a;
111     if(fa < 0) fa = -fa;
112     float fb = *(const float*) b;
113     if(fb < 0) fb = -fb;
114     return (fa > fb) - (fa < fb);
115 }
116 
forward_cost_layer_gpu(cost_layer l,network_state state)117 void forward_cost_layer_gpu(cost_layer l, network_state state)
118 {
119     if (!state.truth) return;
120     if (l.cost_type == MASKED) {
121         mask_ongpu(l.batch*l.inputs, state.input, SECRET_NUM, state.truth);
122     }
123 
124     if(l.cost_type == SMOOTH){
125         smooth_l1_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu);
126     } else {
127         l2_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu);
128     }
129 
130     if(l.ratio){
131         cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs);
132         qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare);
133         int n = (1-l.ratio) * l.batch*l.inputs;
134         float thresh = l.delta[n];
135         thresh = 0;
136         printf("%f\n", thresh);
137         supp_ongpu(l.batch*l.inputs, thresh, l.delta_gpu, 1);
138     }
139 
140     cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs);
141     l.cost[0] = sum_array(l.output, l.batch*l.inputs);
142 }
143 
backward_cost_layer_gpu(const cost_layer l,network_state state)144 void backward_cost_layer_gpu(const cost_layer l, network_state state)
145 {
146     axpy_ongpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, state.delta, 1);
147 }
148 #endif
149