1 /*
2  Copyright (c) by respective owners including Yahoo!, Microsoft, and
3  individual contributors. All rights reserved.  Released under a BSD (revised)
4  license as described in the file LICENSE.
5  */
6 #ifdef _WIN32
7 #include <winsock2.h>
8 #else
9 #include <netdb.h>
10 #endif
11 #include "reductions.h"
12 #include "gd.h"
13 
14 using namespace std;
15 
16 using namespace LEARNER;
17 
18 struct mf {
19   vector<string> pairs;
20 
21   size_t rank;
22 
23   uint32_t increment;
24 
25   // array to cache w*x, (l^k * x_l) and (r^k * x_r)
26   // [ w*(1,x_l,x_r) , l^1*x_l, r^1*x_r, l^2*x_l, r^2*x_2, ... ]
27   v_array<float> sub_predictions;
28 
29   // array for temp storage of indices during prediction
30   v_array<unsigned char> predict_indices;
31 
32   // array for temp storage of indices
33   v_array<unsigned char> indices;
34 
35   // array for temp storage of features
36   v_array<feature> temp_features;
37 
38   vw* all; // for pairs? and finalize
39 };
40 
41 template <bool cache_sub_predictions>
predict(mf & data,base_learner & base,example & ec)42 void predict(mf& data, base_learner& base, example& ec) {
43   float prediction = 0;
44   if (cache_sub_predictions)
45     data.sub_predictions.resize(2*data.rank+1, true);
46 
47   // predict from linear terms
48   base.predict(ec);
49 
50   // store linear prediction
51   if (cache_sub_predictions)
52     data.sub_predictions[0] = ec.partial_prediction;
53   prediction += ec.partial_prediction;
54 
55   // store namespace indices
56   copy_array(data.predict_indices, ec.indices);
57 
58   // erase indices
59   ec.indices.erase();
60   ec.indices.push_back(0);
61 
62   // add interaction terms to prediction
63   for (vector<string>::iterator i = data.pairs.begin(); i != data.pairs.end(); i++) {
64 
65     int left_ns = (int) (*i)[0];
66     int right_ns = (int) (*i)[1];
67 
68     if (ec.atomics[left_ns].size() > 0 && ec.atomics[right_ns].size() > 0) {
69       for (size_t k = 1; k <= data.rank; k++) {
70 
71 	ec.indices[0] = left_ns;
72 
73 	// compute l^k * x_l using base learner
74 	base.predict(ec, k);
75 	float x_dot_l = ec.partial_prediction;
76 	if (cache_sub_predictions)
77 	  data.sub_predictions[2*k-1] = x_dot_l;
78 
79 	// set example to right namespace only
80 	ec.indices[0] = right_ns;
81 
82 	// compute r^k * x_r using base learner
83 	base.predict(ec, k + data.rank);
84 	float x_dot_r = ec.partial_prediction;
85 	if (cache_sub_predictions)
86 	  data.sub_predictions[2*k] = x_dot_r;
87 
88 	// accumulate prediction
89 	prediction += (x_dot_l * x_dot_r);
90       }
91     }
92   }
93   // restore namespace indices and label
94   copy_array(ec.indices, data.predict_indices);
95 
96   // finalize prediction
97   ec.partial_prediction = prediction;
98   ec.pred.scalar = GD::finalize_prediction(data.all->sd, ec.partial_prediction);
99 }
100 
learn(mf & data,base_learner & base,example & ec)101 void learn(mf& data, base_learner& base, example& ec) {
102   // predict with current weights
103   predict<true>(data, base, ec);
104   float predicted = ec.pred.scalar;
105 
106   // update linear weights
107   base.update(ec);
108   ec.pred.scalar = ec.updated_prediction;
109 
110   // store namespace indices
111   copy_array(data.indices, ec.indices);
112 
113   // erase indices
114   ec.indices.erase();
115   ec.indices.push_back(0);
116 
117   // update interaction terms
118   // looping over all pairs of non-empty namespaces
119   for (vector<string>::iterator i = data.pairs.begin(); i != data.pairs.end(); i++) {
120 
121     int left_ns = (int) (*i)[0];
122     int right_ns = (int) (*i)[1];
123 
124     if (ec.atomics[left_ns].size() > 0 && ec.atomics[right_ns].size() > 0) {
125 
126       // set example to left namespace only
127       ec.indices[0] = left_ns;
128 
129       // store feature values in left namespace
130       copy_array(data.temp_features, ec.atomics[left_ns]);
131 
132       for (size_t k = 1; k <= data.rank; k++) {
133 
134 	// multiply features in left namespace by r^k * x_r
135 	for (feature* f = ec.atomics[left_ns].begin; f != ec.atomics[left_ns].end; f++)
136 	  f->x *= data.sub_predictions[2*k];
137 
138 	// update l^k using base learner
139 	base.update(ec, k);
140 
141 	// restore left namespace features (undoing multiply)
142 	copy_array(ec.atomics[left_ns], data.temp_features);
143 
144 	// compute new l_k * x_l scaling factors
145 	// base.predict(ec, k);
146 	// data.sub_predictions[2*k-1] = ec.partial_prediction;
147 	// ec.pred.scalar = ec.updated_prediction;
148       }
149 
150       // set example to right namespace only
151       ec.indices[0] = right_ns;
152 
153       // store feature values for right namespace
154       copy_array(data.temp_features, ec.atomics[right_ns]);
155 
156       for (size_t k = 1; k <= data.rank; k++) {
157 
158 	// multiply features in right namespace by l^k * x_l
159 	for (feature* f = ec.atomics[right_ns].begin; f != ec.atomics[right_ns].end; f++)
160 	  f->x *= data.sub_predictions[2*k-1];
161 
162 	// update r^k using base learner
163 	base.update(ec, k + data.rank);
164 	ec.pred.scalar = ec.updated_prediction;
165 
166 	// restore right namespace features
167 	copy_array(ec.atomics[right_ns], data.temp_features);
168       }
169     }
170   }
171   // restore namespace indices
172   copy_array(ec.indices, data.indices);
173 
174   // restore original prediction
175   ec.pred.scalar = predicted;
176 }
177 
finish(mf & o)178 void finish(mf& o) {
179   // restore global pairs
180   o.all->pairs = o.pairs;
181 
182   // clean up local v_arrays
183   o.indices.delete_v();
184   o.sub_predictions.delete_v();
185 }
186 
mf_setup(vw & all)187   base_learner* mf_setup(vw& all) {
188     if (missing_option<size_t, true>(all, "new_mf", "rank for reduction-based matrix factorization"))
189       return NULL;
190 
191     mf& data = calloc_or_die<mf>();
192     data.all = &all;
193     data.rank = (uint32_t)all.vm["new_mf"].as<size_t>();
194 
195     // store global pairs in local data structure and clear global pairs
196     // for eventual calls to base learner
197     data.pairs = all.pairs;
198     all.pairs.clear();
199 
200     all.random_positive_weights = true;
201 
202     learner<mf>& l = init_learner(&data, setup_base(all), learn, predict<false>, 2*data.rank+1);
203     l.set_finish(finish);
204     return make_base(l);
205   }
206