1 /*
2 Copyright (c) by respective owners including Yahoo!, Microsoft, and
3 individual contributors. All rights reserved.  Released under a BSD (revised)
4 license as described in the file LICENSE.
5  */
6 #include <stdint.h>
7 #include "gd.h"
8 
compare_feature(const void * p1,const void * p2)9 int compare_feature(const void* p1, const void* p2) {
10   feature* f1 = (feature*) p1;
11   feature* f2 = (feature*) p2;
12   if(f1->weight_index < f2->weight_index) return -1;
13   else if(f1->weight_index > f2->weight_index) return 1;
14   else return 0;
15 }
16 
collision_cleanup(feature * feature_map,size_t & len)17 float collision_cleanup(feature* feature_map, size_t& len) {
18 
19  int pos = 0;
20  float sum_sq = 0.;
21 
22  for(uint32_t i = 1;i < len;i++) {
23     if(feature_map[i].weight_index == feature_map[pos].weight_index)
24       feature_map[pos].x += feature_map[i].x;
25     else {
26       sum_sq += feature_map[pos].x*feature_map[pos].x;
27       feature_map[++pos] = feature_map[i];
28     }
29   }
30   sum_sq += feature_map[pos].x*feature_map[pos].x;
31   len = pos+1;
32   return sum_sq;
33 }
34 
copy_audit_data(audit_data & src)35 audit_data copy_audit_data(audit_data &src) {
36   audit_data dst;
37   dst.space = calloc_or_die<char>(strlen(src.space)+1);
38   strcpy(dst.space, src.space);
39   dst.feature = calloc_or_die<char>(strlen(src.feature)+1);
40   strcpy(dst.feature, src.feature);
41   dst.weight_index = src.weight_index;
42   dst.x = src.x;
43   dst.alloced = src.alloced;
44   return dst;
45 }
46 
47 namespace VW {
copy_example_label(example * dst,example * src,size_t label_size,void (* copy_label)(void *,void *))48 void copy_example_label(example* dst, example* src, size_t label_size, void(*copy_label)(void*,void*)) {
49   if (copy_label)
50     copy_label(&dst->l, &src->l);   // TODO: we really need to delete_label on dst :(
51   else
52     dst->l = src->l;
53 }
54 
copy_example_data(bool audit,example * dst,example * src)55 void copy_example_data(bool audit, example* dst, example* src)
56 {
57   //std::cerr << "copy_example_data dst = " << dst << std::endl;
58   copy_array(dst->tag, src->tag);
59   dst->example_counter = src->example_counter;
60 
61   copy_array(dst->indices, src->indices);
62   for (size_t i=0; i<256; i++)
63     copy_array(dst->atomics[i], src->atomics[i]);
64   dst->ft_offset = src->ft_offset;
65 
66   if (audit)
67     for (size_t i=0; i<256; i++) {
68       for (size_t j=0; j<dst->audit_features[i].size(); j++)
69         if (dst->audit_features[i][j].alloced) {
70           free(dst->audit_features[i][j].space);
71           free(dst->audit_features[i][j].feature);
72         }
73       copy_array(dst->audit_features[i], src->audit_features[i], copy_audit_data);
74     }
75 
76   dst->num_features = src->num_features;
77   dst->partial_prediction = src->partial_prediction;
78   copy_array(dst->topic_predictions, src->topic_predictions);
79   dst->loss = src->loss;
80   dst->example_t = src->example_t;
81   memcpy(dst->sum_feat_sq, src->sum_feat_sq, 256 * sizeof(float));
82   dst->total_sum_feat_sq = src->total_sum_feat_sq;
83   dst->revert_weight = src->revert_weight;
84   dst->test_only = src->test_only;
85   dst->end_pass = src->end_pass;
86   dst->sorted = src->sorted;
87   dst->in_use = src->in_use;}
88 
copy_example_data(bool audit,example * dst,example * src,size_t label_size,void (* copy_label)(void *,void *))89 void copy_example_data(bool audit, example* dst, example* src, size_t label_size, void(*copy_label)(void*,void*)) {
90   copy_example_data(audit, dst, src);
91   copy_example_label(dst, src, label_size, copy_label);
92 }
93 }
94 
95 struct features_and_source
96 {
97   v_array<feature> feature_map; //map to store sparse feature vectors
98   uint32_t stride_shift;
99   uint32_t mask;
100   weight* base;
101   vw* all;
102 };
103 
vec_store(features_and_source & p,float fx,uint32_t fi)104 void vec_store(features_and_source& p, float fx, uint32_t fi) {
105   feature f = {fx, (uint32_t)(fi >> p.stride_shift) & p.mask};
106   p.feature_map.push_back(f);
107 }
108 
109 namespace VW {
get_features(vw & all,example * ec,size_t & feature_map_len)110   feature* get_features(vw& all, example* ec, size_t& feature_map_len)
111 {
112 	features_and_source fs;
113 	fs.stride_shift = all.reg.stride_shift;
114 	fs.mask = (uint32_t)all.reg.weight_mask >> all.reg.stride_shift;
115 	fs.base = all.reg.weight_vector;
116 	fs.all = &all;
117 	fs.feature_map = v_init<feature>();
118 	GD::foreach_feature<features_and_source, uint32_t, vec_store>(all, *ec, fs);
119 	feature_map_len = fs.feature_map.size();
120 	return fs.feature_map.begin;
121 }
122 
return_features(feature * f)123 void return_features(feature* f)
124 {
125 	if (f != NULL)
126 		free(f);
127 }
128 }
129 
flatten_example(vw & all,example * ec)130 flat_example* flatten_example(vw& all, example *ec)
131 {
132   flat_example& fec = calloc_or_die<flat_example>();
133 	fec.l = ec->l;
134 
135 	fec.tag_len = ec->tag.size();
136 	if (fec.tag_len >0)
137 	  {
138 	    fec.tag = calloc_or_die<char>(fec.tag_len+1);
139 	    memcpy(fec.tag,ec->tag.begin, fec.tag_len);
140 	  }
141 
142 	fec.example_counter = ec->example_counter;
143 	fec.ft_offset = ec->ft_offset;
144 	fec.num_features = ec->num_features;
145 
146 	fec.feature_map = VW::get_features(all, ec, fec.feature_map_len);
147 
148 	return &fec;
149 }
150 
flatten_sort_example(vw & all,example * ec)151 flat_example* flatten_sort_example(vw& all, example *ec)
152 {
153   flat_example* fec = flatten_example(all, ec);
154   qsort(fec->feature_map, fec->feature_map_len, sizeof(feature), compare_feature);
155   fec->total_sum_feat_sq = collision_cleanup(fec->feature_map, fec->feature_map_len);
156   return fec;
157 }
158 
free_flatten_example(flat_example * fec)159 void free_flatten_example(flat_example* fec)
160 {  //note: The label memory should be freed by by freeing the original example.
161   if (fec)
162     {
163       if (fec->feature_map_len > 0)
164 	free(fec->feature_map);
165       if (fec->tag_len > 0)
166 	free(fec->tag);
167       free(fec);
168     }
169 }
170 
alloc_examples(size_t label_size,size_t count=1)171 example *alloc_examples(size_t label_size, size_t count=1)
172 {
173   example* ec = calloc_or_die<example>(count);
174   if (ec == NULL) return NULL;
175   for (size_t i=0; i<count; i++) {
176     ec[i].in_use = true;
177     ec[i].ft_offset = 0;
178     //  std::cerr << "  alloc_example.indices.begin=" << ec->indices.begin << " end=" << ec->indices.end << " // ld = " << ec->ld << "\t|| me = " << ec << std::endl;
179   }
180   return ec;
181 }
182 
dealloc_example(void (* delete_label)(void *),example & ec)183 void dealloc_example(void(*delete_label)(void*), example&ec)
184 {
185   if (delete_label)
186     delete_label(&ec.l);
187 
188   ec.tag.delete_v();
189 
190   ec.topic_predictions.delete_v();
191 
192   for (size_t j = 0; j < 256; j++)
193     {
194       ec.atomics[j].delete_v();
195 
196       if (ec.audit_features[j].begin != ec.audit_features[j].end_array)
197         {
198           for (audit_data* temp = ec.audit_features[j].begin;
199                temp != ec.audit_features[j].end; temp++)
200             if (temp->alloced) {
201               free(temp->space);
202               free(temp->feature);
203               temp->alloced = false;
204             }
205 	  ec.audit_features[j].delete_v();
206         }
207     }
208   ec.indices.delete_v();
209 }
210 
211