1 #include <string.h>
2 #include <float.h>
3 #include "reductions.h"
4 #include "rand48.h"
5
6 using namespace LEARNER;
7
8 struct LRQstate {
9 vw* all; // feature creation, audit, hash_inv
10 bool lrindices[256];
11 size_t orig_size[256];
12 std::vector<std::string> lrpairs;
13 bool dropout;
14 uint64_t seed;
15 uint64_t initial_seed;
16 };
17
valid_int(const char * s)18 bool valid_int (const char* s)
19 {
20 char* endptr;
21
22 int v = strtoul (s, &endptr, 0);
23 (void) v;
24
25 return (*s != '\0' && *endptr == '\0');
26 }
27
28 inline bool
cheesyrbit(uint64_t & seed)29 cheesyrbit (uint64_t& seed)
30 {
31 return merand48 (seed) > 0.5;
32 }
33
34 inline float
cheesyrand(uint32_t x)35 cheesyrand (uint32_t x)
36 {
37 uint64_t seed = x;
38
39 return merand48 (seed);
40 }
41
42 inline bool
example_is_test(example & ec)43 example_is_test (example& ec)
44 {
45 return ec.l.simple.label == FLT_MAX;
46 }
47
48 void
reset_seed(LRQstate & lrq)49 reset_seed (LRQstate& lrq)
50 {
51 if (lrq.all->bfgs)
52 lrq.seed = lrq.initial_seed;
53 }
54
55 template <bool is_learn>
predict_or_learn(LRQstate & lrq,base_learner & base,example & ec)56 void predict_or_learn(LRQstate& lrq, base_learner& base, example& ec)
57 {
58 vw& all = *lrq.all;
59
60 // Remember original features
61
62 memset (lrq.orig_size, 0, sizeof (lrq.orig_size));
63 for (unsigned char* i = ec.indices.begin; i != ec.indices.end; ++i)
64 {
65 if (lrq.lrindices[*i])
66 lrq.orig_size[*i] = ec.atomics[*i].size ();
67 }
68
69 size_t which = ec.example_counter;
70 float first_prediction = 0;
71 float first_loss = 0;
72 unsigned int maxiter = (is_learn && ! example_is_test (ec)) ? 2 : 1;
73
74 bool do_dropout = lrq.dropout && is_learn && ! example_is_test (ec);
75 float scale = (! lrq.dropout || do_dropout) ? 1.f : 0.5f;
76
77 for (unsigned int iter = 0; iter < maxiter; ++iter, ++which)
78 {
79 // Add left LRQ features, holding right LRQ features fixed
80 // and vice versa
81 // TODO: what happens with --lrq ab2 --lrq ac2
82 // i.e. namespace occurs multiple times (?)
83
84 for (vector<string>::iterator i = lrq.lrpairs.begin ();
85 i != lrq.lrpairs.end ();
86 ++i)
87 {
88 unsigned char left = (*i)[which%2];
89 unsigned char right = (*i)[(which+1)%2];
90 unsigned int k = atoi (i->c_str () + 2);
91
92 for (unsigned int lfn = 0; lfn < lrq.orig_size[left]; ++lfn)
93 {
94 feature* lf = ec.atomics[left].begin + lfn;
95 float lfx = lf->x;
96 size_t lindex = lf->weight_index + ec.ft_offset;
97
98 for (unsigned int n = 1; n <= k; ++n)
99 {
100 if (! do_dropout || cheesyrbit (lrq.seed))
101 {
102 uint32_t lwindex = (uint32_t)(lindex + (n << all.reg.stride_shift));
103
104 float* lw = &all.reg.weight_vector[lwindex & all.reg.weight_mask];
105
106 // perturb away from saddle point at (0, 0)
107 if (is_learn && ! example_is_test (ec) && *lw == 0)
108 *lw = cheesyrand (lwindex);
109
110 for (unsigned int rfn = 0;
111 rfn < lrq.orig_size[right];
112 ++rfn)
113 {
114 feature* rf = ec.atomics[right].begin + rfn;
115 audit_data* ra = ec.audit_features[right].begin + rfn;
116
117 // NB: ec.ft_offset added by base learner
118 float rfx = rf->x;
119 size_t rindex = rf->weight_index;
120 uint32_t rwindex = (uint32_t)(rindex + (n << all.reg.stride_shift));
121
122 feature lrq;
123 lrq.x = scale * *lw * lfx * rfx;
124 lrq.weight_index = rwindex;
125
126 ec.atomics[right].push_back (lrq);
127
128 if (all.audit || all.hash_inv)
129 {
130 std::stringstream new_feature_buffer;
131
132 new_feature_buffer << right << '^'
133 << ra->feature << '^'
134 << n;
135
136 #ifdef _WIN32
137 char* new_space = _strdup("lrq");
138 char* new_feature = _strdup(new_feature_buffer.str().c_str());
139 #else
140 char* new_space = strdup("lrq");
141 char* new_feature = strdup(new_feature_buffer.str().c_str());
142 #endif
143
144 audit_data ad = { new_space, new_feature, lrq.weight_index, lrq.x, true };
145 ec.audit_features[right].push_back (ad);
146 }
147 }
148 }
149 }
150 }
151 }
152
153 if (is_learn)
154 base.learn(ec);
155 else
156 base.predict(ec);
157
158 // Restore example
159 if (iter == 0)
160 {
161 first_prediction = ec.pred.scalar;
162 first_loss = ec.loss;
163 }
164 else
165 {
166 ec.pred.scalar = first_prediction;
167 ec.loss = first_loss;
168 }
169
170 for (vector<string>::iterator i = lrq.lrpairs.begin ();
171 i != lrq.lrpairs.end ();
172 ++i)
173 {
174 unsigned char right = (*i)[(which+1)%2];
175
176 ec.atomics[right].end =
177 ec.atomics[right].begin + lrq.orig_size[right];
178
179 if (all.audit || all.hash_inv)
180 {
181 for (audit_data* a = ec.audit_features[right].begin + lrq.orig_size[right];
182 a < ec.audit_features[right].end;
183 ++a)
184 {
185 free (a->space);
186 free (a->feature);
187 }
188
189 ec.audit_features[right].end =
190 ec.audit_features[right].begin + lrq.orig_size[right];
191 }
192 }
193 }
194 }
195
lrq_setup(vw & all)196 base_learner* lrq_setup(vw& all)
197 {//parse and set arguments
198 if (missing_option<vector<string>>(all, "lrq", "use low rank quadratic features"))
199 return NULL;
200 new_options(all, "Lrq options")
201 ("lrqdropout", "use dropout training for low rank quadratic features");
202 add_options(all);
203
204 if(!all.vm.count("lrq"))
205 return NULL;
206
207 LRQstate& lrq = calloc_or_die<LRQstate>();
208 size_t maxk = 0;
209 lrq.all = &all;
210
211 size_t random_seed = 0;
212 if (all.vm.count("random_seed")) random_seed = all.vm["random_seed"].as<size_t> ();
213
214 lrq.initial_seed = lrq.seed = random_seed | 8675309;
215 if (all.vm.count("lrqdropout"))
216 {
217 lrq.dropout = true;
218 *all.file_options << " --lrqdropout ";
219 }
220 else
221 lrq.dropout = false;
222
223 lrq.lrpairs = all.vm["lrq"].as<vector<string> > ();
224
225 for (vector<string>::iterator i = lrq.lrpairs.begin ();
226 i != lrq.lrpairs.end ();
227 ++i)
228 *all.file_options << " --lrq " << *i;
229
230 if (! all.quiet)
231 {
232 cerr << "creating low rank quadratic features for pairs: ";
233 if (lrq.dropout)
234 cerr << "(using dropout) ";
235 }
236
237 for (vector<string>::iterator i = lrq.lrpairs.begin ();
238 i != lrq.lrpairs.end ();
239 ++i)
240 {
241 if(!all.quiet){
242 if (( i->length() < 3 ) || ! valid_int (i->c_str () + 2)) {
243 cerr << endl << "error, low-rank quadratic features must involve two sets and a rank.\n";
244 throw exception();
245 }
246 cerr << *i << " ";
247 }
248 // TODO: colon-syntax
249
250 unsigned int k = atoi (i->c_str () + 2);
251
252 lrq.lrindices[(int) (*i)[0]] = 1;
253 lrq.lrindices[(int) (*i)[1]] = 1;
254
255 maxk = max (maxk, k);
256 }
257
258 if(!all.quiet)
259 cerr<<endl;
260
261 all.wpp = all.wpp * (uint32_t)(1 + maxk);
262 learner<LRQstate>& l = init_learner(&lrq, setup_base(all), predict_or_learn<true>,
263 predict_or_learn<false>, 1 + maxk);
264 l.set_end_pass(reset_seed);
265
266 // TODO: leaks memory ?
267 return make_base(l);
268 }
269