1 #include <string.h>
2 #include <float.h>
3 #include "reductions.h"
4 #include "rand48.h"
5 
6 using namespace LEARNER;
7 
8 struct LRQstate {
9   vw* all; // feature creation, audit, hash_inv
10   bool lrindices[256];
11   size_t orig_size[256];
12   std::vector<std::string> lrpairs;
13   bool dropout;
14   uint64_t seed;
15   uint64_t initial_seed;
16 };
17 
valid_int(const char * s)18 bool valid_int (const char* s)
19 {
20   char* endptr;
21 
22   int v = strtoul (s, &endptr, 0);
23   (void) v;
24 
25   return (*s != '\0' && *endptr == '\0');
26 }
27 
28 inline bool
cheesyrbit(uint64_t & seed)29 cheesyrbit (uint64_t& seed)
30 {
31   return merand48 (seed) > 0.5;
32 }
33 
34 inline float
cheesyrand(uint32_t x)35 cheesyrand (uint32_t x)
36 {
37   uint64_t seed = x;
38 
39   return merand48 (seed);
40 }
41 
42 inline bool
example_is_test(example & ec)43 example_is_test (example& ec)
44 {
45   return ec.l.simple.label == FLT_MAX;
46 }
47 
48 void
reset_seed(LRQstate & lrq)49 reset_seed (LRQstate& lrq)
50 {
51   if (lrq.all->bfgs)
52     lrq.seed = lrq.initial_seed;
53 }
54 
55 template <bool is_learn>
predict_or_learn(LRQstate & lrq,base_learner & base,example & ec)56 void predict_or_learn(LRQstate& lrq, base_learner& base, example& ec)
57 {
58   vw& all = *lrq.all;
59 
60   // Remember original features
61 
62   memset (lrq.orig_size, 0, sizeof (lrq.orig_size));
63   for (unsigned char* i = ec.indices.begin; i != ec.indices.end; ++i)
64     {
65       if (lrq.lrindices[*i])
66 	lrq.orig_size[*i] = ec.atomics[*i].size ();
67     }
68 
69   size_t which = ec.example_counter;
70   float first_prediction = 0;
71   float first_loss = 0;
72   unsigned int maxiter = (is_learn && ! example_is_test (ec)) ? 2 : 1;
73 
74   bool do_dropout = lrq.dropout && is_learn && ! example_is_test (ec);
75   float scale = (! lrq.dropout || do_dropout) ? 1.f : 0.5f;
76 
77   for (unsigned int iter = 0; iter < maxiter; ++iter, ++which)
78     {
79       // Add left LRQ features, holding right LRQ features fixed
80       //     and vice versa
81       // TODO: what happens with --lrq ab2 --lrq ac2
82       //       i.e. namespace occurs multiple times (?)
83 
84       for (vector<string>::iterator i = lrq.lrpairs.begin ();
85              i != lrq.lrpairs.end ();
86              ++i)
87           {
88             unsigned char left = (*i)[which%2];
89             unsigned char right = (*i)[(which+1)%2];
90             unsigned int k = atoi (i->c_str () + 2);
91 
92             for (unsigned int lfn = 0; lfn < lrq.orig_size[left]; ++lfn)
93               {
94                 feature* lf = ec.atomics[left].begin + lfn;
95                 float lfx = lf->x;
96                 size_t lindex = lf->weight_index + ec.ft_offset;
97 
98                 for (unsigned int n = 1; n <= k; ++n)
99                   {
100                     if (! do_dropout || cheesyrbit (lrq.seed))
101                       {
102                         uint32_t lwindex = (uint32_t)(lindex + (n << all.reg.stride_shift));
103 
104                         float* lw = &all.reg.weight_vector[lwindex & all.reg.weight_mask];
105 
106                         // perturb away from saddle point at (0, 0)
107                         if (is_learn && ! example_is_test (ec) && *lw == 0)
108                           *lw = cheesyrand (lwindex);
109 
110                         for (unsigned int rfn = 0;
111                              rfn < lrq.orig_size[right];
112                              ++rfn)
113                           {
114                             feature* rf = ec.atomics[right].begin + rfn;
115                             audit_data* ra = ec.audit_features[right].begin + rfn;
116 
117                             // NB: ec.ft_offset added by base learner
118                             float rfx = rf->x;
119                             size_t rindex = rf->weight_index;
120                             uint32_t rwindex = (uint32_t)(rindex + (n << all.reg.stride_shift));
121 
122                             feature lrq;
123                             lrq.x = scale * *lw * lfx * rfx;
124                             lrq.weight_index = rwindex;
125 
126                             ec.atomics[right].push_back (lrq);
127 
128                             if (all.audit || all.hash_inv)
129                               {
130                                 std::stringstream new_feature_buffer;
131 
132                                 new_feature_buffer << right << '^'
133                                                    << ra->feature << '^'
134                                                    << n;
135 
136 #ifdef _WIN32
137 								char* new_space = _strdup("lrq");
138 								char* new_feature =	_strdup(new_feature_buffer.str().c_str());
139 #else
140 								char* new_space = strdup("lrq");
141 								char* new_feature = strdup(new_feature_buffer.str().c_str());
142 #endif
143 
144                                 audit_data ad = { new_space, new_feature, lrq.weight_index, lrq.x, true };
145                                 ec.audit_features[right].push_back (ad);
146                               }
147                           }
148                       }
149                   }
150               }
151           }
152 
153 	if (is_learn)
154 	  base.learn(ec);
155 	else
156 	  base.predict(ec);
157 
158         // Restore example
159         if (iter == 0)
160           {
161             first_prediction = ec.pred.scalar;
162             first_loss = ec.loss;
163           }
164         else
165           {
166             ec.pred.scalar = first_prediction;
167             ec.loss = first_loss;
168           }
169 
170         for (vector<string>::iterator i = lrq.lrpairs.begin ();
171              i != lrq.lrpairs.end ();
172              ++i)
173           {
174             unsigned char right = (*i)[(which+1)%2];
175 
176             ec.atomics[right].end =
177               ec.atomics[right].begin + lrq.orig_size[right];
178 
179             if (all.audit || all.hash_inv)
180               {
181                 for (audit_data* a = ec.audit_features[right].begin + lrq.orig_size[right];
182                      a < ec.audit_features[right].end;
183                      ++a)
184                   {
185                     free (a->space);
186                     free (a->feature);
187                   }
188 
189                 ec.audit_features[right].end =
190                   ec.audit_features[right].begin + lrq.orig_size[right];
191               }
192           }
193       }
194   }
195 
lrq_setup(vw & all)196   base_learner* lrq_setup(vw& all)
197   {//parse and set arguments
198     if (missing_option<vector<string>>(all, "lrq", "use low rank quadratic features"))
199       return NULL;
200     new_options(all, "Lrq options")
201       ("lrqdropout", "use dropout training for low rank quadratic features");
202     add_options(all);
203 
204     if(!all.vm.count("lrq"))
205       return NULL;
206 
207     LRQstate& lrq = calloc_or_die<LRQstate>();
208     size_t maxk = 0;
209     lrq.all = &all;
210 
211     size_t random_seed = 0;
212     if (all.vm.count("random_seed")) random_seed = all.vm["random_seed"].as<size_t> ();
213 
214     lrq.initial_seed = lrq.seed = random_seed | 8675309;
215     if (all.vm.count("lrqdropout"))
216       {
217         lrq.dropout = true;
218         *all.file_options << " --lrqdropout ";
219       }
220     else
221       lrq.dropout = false;
222 
223     lrq.lrpairs = all.vm["lrq"].as<vector<string> > ();
224 
225     for (vector<string>::iterator i = lrq.lrpairs.begin ();
226 	 i != lrq.lrpairs.end ();
227 	 ++i)
228       *all.file_options << " --lrq " << *i;
229 
230     if (! all.quiet)
231       {
232         cerr << "creating low rank quadratic features for pairs: ";
233         if (lrq.dropout)
234           cerr << "(using dropout) ";
235       }
236 
237     for (vector<string>::iterator i = lrq.lrpairs.begin ();
238          i != lrq.lrpairs.end ();
239          ++i)
240       {
241         if(!all.quiet){
242           if (( i->length() < 3 ) || ! valid_int (i->c_str () + 2)) {
243             cerr << endl << "error, low-rank quadratic features must involve two sets and a rank.\n";
244             throw exception();
245           }
246           cerr << *i << " ";
247         }
248         // TODO: colon-syntax
249 
250         unsigned int k = atoi (i->c_str () + 2);
251 
252         lrq.lrindices[(int) (*i)[0]] = 1;
253         lrq.lrindices[(int) (*i)[1]] = 1;
254 
255         maxk = max (maxk, k);
256       }
257 
258     if(!all.quiet)
259       cerr<<endl;
260 
261 	all.wpp = all.wpp * (uint32_t)(1 + maxk);
262     learner<LRQstate>& l = init_learner(&lrq, setup_base(all), predict_or_learn<true>,
263 					predict_or_learn<false>, 1 + maxk);
264     l.set_end_pass(reset_seed);
265 
266     // TODO: leaks memory ?
267     return make_base(l);
268   }
269