1 /*
2 Copyright (c) by respective owners including Yahoo!, Microsoft, and
3 individual contributors. All rights reserved. Released under a BSD (revised)
4 license as described in the file LICENSE.
5 */
6 #ifdef _WIN32
7 #include <winsock2.h>
8 #else
9 #include <netdb.h>
10 #endif
11 #include "reductions.h"
12 #include "gd.h"
13
14 using namespace std;
15
16 using namespace LEARNER;
17
18 struct mf {
19 vector<string> pairs;
20
21 size_t rank;
22
23 uint32_t increment;
24
25 // array to cache w*x, (l^k * x_l) and (r^k * x_r)
26 // [ w*(1,x_l,x_r) , l^1*x_l, r^1*x_r, l^2*x_l, r^2*x_2, ... ]
27 v_array<float> sub_predictions;
28
29 // array for temp storage of indices during prediction
30 v_array<unsigned char> predict_indices;
31
32 // array for temp storage of indices
33 v_array<unsigned char> indices;
34
35 // array for temp storage of features
36 v_array<feature> temp_features;
37
38 vw* all; // for pairs? and finalize
39 };
40
41 template <bool cache_sub_predictions>
predict(mf & data,base_learner & base,example & ec)42 void predict(mf& data, base_learner& base, example& ec) {
43 float prediction = 0;
44 if (cache_sub_predictions)
45 data.sub_predictions.resize(2*data.rank+1, true);
46
47 // predict from linear terms
48 base.predict(ec);
49
50 // store linear prediction
51 if (cache_sub_predictions)
52 data.sub_predictions[0] = ec.partial_prediction;
53 prediction += ec.partial_prediction;
54
55 // store namespace indices
56 copy_array(data.predict_indices, ec.indices);
57
58 // erase indices
59 ec.indices.erase();
60 ec.indices.push_back(0);
61
62 // add interaction terms to prediction
63 for (vector<string>::iterator i = data.pairs.begin(); i != data.pairs.end(); i++) {
64
65 int left_ns = (int) (*i)[0];
66 int right_ns = (int) (*i)[1];
67
68 if (ec.atomics[left_ns].size() > 0 && ec.atomics[right_ns].size() > 0) {
69 for (size_t k = 1; k <= data.rank; k++) {
70
71 ec.indices[0] = left_ns;
72
73 // compute l^k * x_l using base learner
74 base.predict(ec, k);
75 float x_dot_l = ec.partial_prediction;
76 if (cache_sub_predictions)
77 data.sub_predictions[2*k-1] = x_dot_l;
78
79 // set example to right namespace only
80 ec.indices[0] = right_ns;
81
82 // compute r^k * x_r using base learner
83 base.predict(ec, k + data.rank);
84 float x_dot_r = ec.partial_prediction;
85 if (cache_sub_predictions)
86 data.sub_predictions[2*k] = x_dot_r;
87
88 // accumulate prediction
89 prediction += (x_dot_l * x_dot_r);
90 }
91 }
92 }
93 // restore namespace indices and label
94 copy_array(ec.indices, data.predict_indices);
95
96 // finalize prediction
97 ec.partial_prediction = prediction;
98 ec.pred.scalar = GD::finalize_prediction(data.all->sd, ec.partial_prediction);
99 }
100
learn(mf & data,base_learner & base,example & ec)101 void learn(mf& data, base_learner& base, example& ec) {
102 // predict with current weights
103 predict<true>(data, base, ec);
104 float predicted = ec.pred.scalar;
105
106 // update linear weights
107 base.update(ec);
108 ec.pred.scalar = ec.updated_prediction;
109
110 // store namespace indices
111 copy_array(data.indices, ec.indices);
112
113 // erase indices
114 ec.indices.erase();
115 ec.indices.push_back(0);
116
117 // update interaction terms
118 // looping over all pairs of non-empty namespaces
119 for (vector<string>::iterator i = data.pairs.begin(); i != data.pairs.end(); i++) {
120
121 int left_ns = (int) (*i)[0];
122 int right_ns = (int) (*i)[1];
123
124 if (ec.atomics[left_ns].size() > 0 && ec.atomics[right_ns].size() > 0) {
125
126 // set example to left namespace only
127 ec.indices[0] = left_ns;
128
129 // store feature values in left namespace
130 copy_array(data.temp_features, ec.atomics[left_ns]);
131
132 for (size_t k = 1; k <= data.rank; k++) {
133
134 // multiply features in left namespace by r^k * x_r
135 for (feature* f = ec.atomics[left_ns].begin; f != ec.atomics[left_ns].end; f++)
136 f->x *= data.sub_predictions[2*k];
137
138 // update l^k using base learner
139 base.update(ec, k);
140
141 // restore left namespace features (undoing multiply)
142 copy_array(ec.atomics[left_ns], data.temp_features);
143
144 // compute new l_k * x_l scaling factors
145 // base.predict(ec, k);
146 // data.sub_predictions[2*k-1] = ec.partial_prediction;
147 // ec.pred.scalar = ec.updated_prediction;
148 }
149
150 // set example to right namespace only
151 ec.indices[0] = right_ns;
152
153 // store feature values for right namespace
154 copy_array(data.temp_features, ec.atomics[right_ns]);
155
156 for (size_t k = 1; k <= data.rank; k++) {
157
158 // multiply features in right namespace by l^k * x_l
159 for (feature* f = ec.atomics[right_ns].begin; f != ec.atomics[right_ns].end; f++)
160 f->x *= data.sub_predictions[2*k-1];
161
162 // update r^k using base learner
163 base.update(ec, k + data.rank);
164 ec.pred.scalar = ec.updated_prediction;
165
166 // restore right namespace features
167 copy_array(ec.atomics[right_ns], data.temp_features);
168 }
169 }
170 }
171 // restore namespace indices
172 copy_array(ec.indices, data.indices);
173
174 // restore original prediction
175 ec.pred.scalar = predicted;
176 }
177
finish(mf & o)178 void finish(mf& o) {
179 // restore global pairs
180 o.all->pairs = o.pairs;
181
182 // clean up local v_arrays
183 o.indices.delete_v();
184 o.sub_predictions.delete_v();
185 }
186
mf_setup(vw & all)187 base_learner* mf_setup(vw& all) {
188 if (missing_option<size_t, true>(all, "new_mf", "rank for reduction-based matrix factorization"))
189 return NULL;
190
191 mf& data = calloc_or_die<mf>();
192 data.all = &all;
193 data.rank = (uint32_t)all.vm["new_mf"].as<size_t>();
194
195 // store global pairs in local data structure and clear global pairs
196 // for eventual calls to base learner
197 data.pairs = all.pairs;
198 all.pairs.clear();
199
200 all.random_positive_weights = true;
201
202 learner<mf>& l = init_learner(&data, setup_base(all), learn, predict<false>, 2*data.rank+1);
203 l.set_finish(finish);
204 return make_base(l);
205 }
206