1 /*
2 Copyright (c) by respective owners including Yahoo!, Microsoft, and
3 individual contributors. All rights reserved. Released under a BSD (revised)
4 license as described in the file LICENSE.
5 */
6 #include <stdint.h>
7 #include "gd.h"
8
compare_feature(const void * p1,const void * p2)9 int compare_feature(const void* p1, const void* p2) {
10 feature* f1 = (feature*) p1;
11 feature* f2 = (feature*) p2;
12 if(f1->weight_index < f2->weight_index) return -1;
13 else if(f1->weight_index > f2->weight_index) return 1;
14 else return 0;
15 }
16
collision_cleanup(feature * feature_map,size_t & len)17 float collision_cleanup(feature* feature_map, size_t& len) {
18
19 int pos = 0;
20 float sum_sq = 0.;
21
22 for(uint32_t i = 1;i < len;i++) {
23 if(feature_map[i].weight_index == feature_map[pos].weight_index)
24 feature_map[pos].x += feature_map[i].x;
25 else {
26 sum_sq += feature_map[pos].x*feature_map[pos].x;
27 feature_map[++pos] = feature_map[i];
28 }
29 }
30 sum_sq += feature_map[pos].x*feature_map[pos].x;
31 len = pos+1;
32 return sum_sq;
33 }
34
copy_audit_data(audit_data & src)35 audit_data copy_audit_data(audit_data &src) {
36 audit_data dst;
37 dst.space = calloc_or_die<char>(strlen(src.space)+1);
38 strcpy(dst.space, src.space);
39 dst.feature = calloc_or_die<char>(strlen(src.feature)+1);
40 strcpy(dst.feature, src.feature);
41 dst.weight_index = src.weight_index;
42 dst.x = src.x;
43 dst.alloced = src.alloced;
44 return dst;
45 }
46
47 namespace VW {
copy_example_label(example * dst,example * src,size_t label_size,void (* copy_label)(void *,void *))48 void copy_example_label(example* dst, example* src, size_t label_size, void(*copy_label)(void*,void*)) {
49 if (copy_label)
50 copy_label(&dst->l, &src->l); // TODO: we really need to delete_label on dst :(
51 else
52 dst->l = src->l;
53 }
54
copy_example_data(bool audit,example * dst,example * src)55 void copy_example_data(bool audit, example* dst, example* src)
56 {
57 //std::cerr << "copy_example_data dst = " << dst << std::endl;
58 copy_array(dst->tag, src->tag);
59 dst->example_counter = src->example_counter;
60
61 copy_array(dst->indices, src->indices);
62 for (size_t i=0; i<256; i++)
63 copy_array(dst->atomics[i], src->atomics[i]);
64 dst->ft_offset = src->ft_offset;
65
66 if (audit)
67 for (size_t i=0; i<256; i++) {
68 for (size_t j=0; j<dst->audit_features[i].size(); j++)
69 if (dst->audit_features[i][j].alloced) {
70 free(dst->audit_features[i][j].space);
71 free(dst->audit_features[i][j].feature);
72 }
73 copy_array(dst->audit_features[i], src->audit_features[i], copy_audit_data);
74 }
75
76 dst->num_features = src->num_features;
77 dst->partial_prediction = src->partial_prediction;
78 copy_array(dst->topic_predictions, src->topic_predictions);
79 dst->loss = src->loss;
80 dst->example_t = src->example_t;
81 memcpy(dst->sum_feat_sq, src->sum_feat_sq, 256 * sizeof(float));
82 dst->total_sum_feat_sq = src->total_sum_feat_sq;
83 dst->revert_weight = src->revert_weight;
84 dst->test_only = src->test_only;
85 dst->end_pass = src->end_pass;
86 dst->sorted = src->sorted;
87 dst->in_use = src->in_use;}
88
copy_example_data(bool audit,example * dst,example * src,size_t label_size,void (* copy_label)(void *,void *))89 void copy_example_data(bool audit, example* dst, example* src, size_t label_size, void(*copy_label)(void*,void*)) {
90 copy_example_data(audit, dst, src);
91 copy_example_label(dst, src, label_size, copy_label);
92 }
93 }
94
95 struct features_and_source
96 {
97 v_array<feature> feature_map; //map to store sparse feature vectors
98 uint32_t stride_shift;
99 uint32_t mask;
100 weight* base;
101 vw* all;
102 };
103
vec_store(features_and_source & p,float fx,uint32_t fi)104 void vec_store(features_and_source& p, float fx, uint32_t fi) {
105 feature f = {fx, (uint32_t)(fi >> p.stride_shift) & p.mask};
106 p.feature_map.push_back(f);
107 }
108
109 namespace VW {
get_features(vw & all,example * ec,size_t & feature_map_len)110 feature* get_features(vw& all, example* ec, size_t& feature_map_len)
111 {
112 features_and_source fs;
113 fs.stride_shift = all.reg.stride_shift;
114 fs.mask = (uint32_t)all.reg.weight_mask >> all.reg.stride_shift;
115 fs.base = all.reg.weight_vector;
116 fs.all = &all;
117 fs.feature_map = v_init<feature>();
118 GD::foreach_feature<features_and_source, uint32_t, vec_store>(all, *ec, fs);
119 feature_map_len = fs.feature_map.size();
120 return fs.feature_map.begin;
121 }
122
return_features(feature * f)123 void return_features(feature* f)
124 {
125 if (f != NULL)
126 free(f);
127 }
128 }
129
flatten_example(vw & all,example * ec)130 flat_example* flatten_example(vw& all, example *ec)
131 {
132 flat_example& fec = calloc_or_die<flat_example>();
133 fec.l = ec->l;
134
135 fec.tag_len = ec->tag.size();
136 if (fec.tag_len >0)
137 {
138 fec.tag = calloc_or_die<char>(fec.tag_len+1);
139 memcpy(fec.tag,ec->tag.begin, fec.tag_len);
140 }
141
142 fec.example_counter = ec->example_counter;
143 fec.ft_offset = ec->ft_offset;
144 fec.num_features = ec->num_features;
145
146 fec.feature_map = VW::get_features(all, ec, fec.feature_map_len);
147
148 return &fec;
149 }
150
flatten_sort_example(vw & all,example * ec)151 flat_example* flatten_sort_example(vw& all, example *ec)
152 {
153 flat_example* fec = flatten_example(all, ec);
154 qsort(fec->feature_map, fec->feature_map_len, sizeof(feature), compare_feature);
155 fec->total_sum_feat_sq = collision_cleanup(fec->feature_map, fec->feature_map_len);
156 return fec;
157 }
158
free_flatten_example(flat_example * fec)159 void free_flatten_example(flat_example* fec)
160 { //note: The label memory should be freed by by freeing the original example.
161 if (fec)
162 {
163 if (fec->feature_map_len > 0)
164 free(fec->feature_map);
165 if (fec->tag_len > 0)
166 free(fec->tag);
167 free(fec);
168 }
169 }
170
alloc_examples(size_t label_size,size_t count=1)171 example *alloc_examples(size_t label_size, size_t count=1)
172 {
173 example* ec = calloc_or_die<example>(count);
174 if (ec == NULL) return NULL;
175 for (size_t i=0; i<count; i++) {
176 ec[i].in_use = true;
177 ec[i].ft_offset = 0;
178 // std::cerr << " alloc_example.indices.begin=" << ec->indices.begin << " end=" << ec->indices.end << " // ld = " << ec->ld << "\t|| me = " << ec << std::endl;
179 }
180 return ec;
181 }
182
dealloc_example(void (* delete_label)(void *),example & ec)183 void dealloc_example(void(*delete_label)(void*), example&ec)
184 {
185 if (delete_label)
186 delete_label(&ec.l);
187
188 ec.tag.delete_v();
189
190 ec.topic_predictions.delete_v();
191
192 for (size_t j = 0; j < 256; j++)
193 {
194 ec.atomics[j].delete_v();
195
196 if (ec.audit_features[j].begin != ec.audit_features[j].end_array)
197 {
198 for (audit_data* temp = ec.audit_features[j].begin;
199 temp != ec.audit_features[j].end; temp++)
200 if (temp->alloced) {
201 free(temp->space);
202 free(temp->feature);
203 temp->alloced = false;
204 }
205 ec.audit_features[j].delete_v();
206 }
207 }
208 ec.indices.delete_v();
209 }
210
211