1 /* Copyright 2016 Google Inc. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "embedding_network.h"
17 
18 #include "base.h"
19 #include "embedding_network_params.h"
20 #include "float16.h"
21 #include "simple_adder.h"
22 
23 namespace chrome_lang_id {
24 namespace {
25 
26 using VectorWrapper = EmbeddingNetwork::VectorWrapper;
27 
CheckNoQuantization(const EmbeddingNetworkParams::Matrix matrix)28 void CheckNoQuantization(const EmbeddingNetworkParams::Matrix matrix) {
29   // Quantization not allowed here.
30   CLD3_DCHECK(static_cast<int>(QuantizationType::NONE) ==
31               static_cast<int>(matrix.quant_type));
32 }
33 
34 // Fills a Matrix object with the parameters in the given MatrixParams.  This
35 // function is used to initialize weight matrices that are *not* embedding
36 // matrices.
FillMatrixParams(const EmbeddingNetworkParams::Matrix source_matrix,EmbeddingNetwork::Matrix * mat)37 void FillMatrixParams(const EmbeddingNetworkParams::Matrix source_matrix,
38                       EmbeddingNetwork::Matrix *mat) {
39   mat->resize(source_matrix.rows);
40   CheckNoQuantization(source_matrix);
41   const float *weights =
42       reinterpret_cast<const float *>(source_matrix.elements);
43   for (int r = 0; r < source_matrix.rows; ++r) {
44     (*mat)[r] = EmbeddingNetwork::VectorWrapper(weights, source_matrix.cols);
45     weights += source_matrix.cols;
46   }
47 }
48 
49 // Computes y = weights * Relu(x) + b where Relu is optionally applied.
50 template <typename ScaleAdderClass>
SparseReluProductPlusBias(bool apply_relu,const EmbeddingNetwork::Matrix & weights,const EmbeddingNetwork::VectorWrapper & b,const EmbeddingNetwork::Vector & x,EmbeddingNetwork::Vector * y)51 void SparseReluProductPlusBias(bool apply_relu,
52                                const EmbeddingNetwork::Matrix &weights,
53                                const EmbeddingNetwork::VectorWrapper &b,
54                                const EmbeddingNetwork::Vector &x,
55                                EmbeddingNetwork::Vector *y) {
56   y->assign(b.data(), b.data() + b.size());
57   ScaleAdderClass adder(y->data(), y->size());
58 
59   const int x_size = x.size();
60   for (int i = 0; i < x_size; ++i) {
61     const float &scale = x[i];
62     if (apply_relu) {
63       if (scale > 0) {
64         adder.LazyScaleAdd(weights[i].data(), scale);
65       }
66     } else {
67       adder.LazyScaleAdd(weights[i].data(), scale);
68     }
69   }
70   adder.Finalize();
71 }
72 }  // namespace
73 
ConcatEmbeddings(const std::vector<FeatureVector> & feature_vectors,Vector * concat) const74 void EmbeddingNetwork::ConcatEmbeddings(
75     const std::vector<FeatureVector> &feature_vectors, Vector *concat) const {
76   concat->resize(model_->concat_layer_size());
77 
78   // "es_index" stands for "embedding space index".
79   for (size_t es_index = 0; es_index < feature_vectors.size(); ++es_index) {
80     const int concat_offset = model_->concat_offset(es_index);
81     const int embedding_dim = model_->embedding_dim(es_index);
82 
83     const EmbeddingMatrix &embedding_matrix = embedding_matrices_[es_index];
84     CLD3_DCHECK(embedding_matrix.dim() == embedding_dim);
85 
86     const bool is_quantized =
87         embedding_matrix.quant_type() != QuantizationType::NONE;
88 
89     const FeatureVector &feature_vector = feature_vectors[es_index];
90     const int num_features = feature_vector.size();
91     for (int fi = 0; fi < num_features; ++fi) {
92       const FeatureType *feature_type = feature_vector.type(fi);
93       int feature_offset = concat_offset + feature_type->base() * embedding_dim;
94       CLD3_DCHECK(feature_offset + embedding_dim <=
95                   static_cast<int>(concat->size()));
96 
97       // Weighted embeddings will be added starting from this address.
98       float *concat_ptr = concat->data() + feature_offset;
99 
100       // Pointer to float / uint8 weights for relevant embedding.
101       const void *embedding_data;
102 
103       // Multiplier for each embedding weight.
104       float multiplier;
105       const FeatureValue feature_value = feature_vector.value(fi);
106       if (feature_type->is_continuous()) {
107         // Continuous features (encoded as FloatFeatureValue).
108         FloatFeatureValue float_feature_value(feature_value);
109         const int id = float_feature_value.value.id;
110         embedding_matrix.get_embedding(id, &embedding_data, &multiplier);
111         multiplier *= float_feature_value.value.weight;
112       } else {
113         // Discrete features: every present feature has implicit value 1.0.
114         embedding_matrix.get_embedding(feature_value, &embedding_data,
115                                        &multiplier);
116       }
117 
118       if (is_quantized) {
119         const uint8 *quant_weights =
120             reinterpret_cast<const uint8 *>(embedding_data);
121         for (int i = 0; i < embedding_dim; ++i, ++quant_weights, ++concat_ptr) {
122           // 128 is bias for UINT8 quantization, only one we currently support.
123           *concat_ptr += (static_cast<int>(*quant_weights) - 128) * multiplier;
124         }
125       } else {
126         const float *weights = reinterpret_cast<const float *>(embedding_data);
127         for (int i = 0; i < embedding_dim; ++i, ++weights, ++concat_ptr) {
128           *concat_ptr += *weights * multiplier;
129         }
130       }
131     }
132   }
133 }
134 
135 template <typename ScaleAdderClass>
FinishComputeFinalScores(const Vector & concat,Vector * scores) const136 void EmbeddingNetwork::FinishComputeFinalScores(const Vector &concat,
137                                                 Vector *scores) const {
138   Vector h0(hidden_bias_[0].size());
139   SparseReluProductPlusBias<ScaleAdderClass>(false, hidden_weights_[0],
140                                              hidden_bias_[0], concat, &h0);
141 
142   CLD3_DCHECK((hidden_weights_.size() == 1) || (hidden_weights_.size() == 2));
143   if (hidden_weights_.size() == 1) {  // 1 hidden layer
144     SparseReluProductPlusBias<ScaleAdderClass>(true, softmax_weights_,
145                                                softmax_bias_, h0, scores);
146   } else if (hidden_weights_.size() == 2) {  // 2 hidden layers
147     Vector h1(hidden_bias_[1].size());
148     SparseReluProductPlusBias<ScaleAdderClass>(true, hidden_weights_[1],
149                                                hidden_bias_[1], h0, &h1);
150     SparseReluProductPlusBias<ScaleAdderClass>(true, softmax_weights_,
151                                                softmax_bias_, h1, scores);
152   }
153 }
154 
ComputeFinalScores(const std::vector<FeatureVector> & features,Vector * scores) const155 void EmbeddingNetwork::ComputeFinalScores(
156     const std::vector<FeatureVector> &features, Vector *scores) const {
157   Vector concat;
158   ConcatEmbeddings(features, &concat);
159 
160   scores->resize(softmax_bias_.size());
161   FinishComputeFinalScores<SimpleAdder>(concat, scores);
162 }
163 
EmbeddingNetwork(const EmbeddingNetworkParams * model)164 EmbeddingNetwork::EmbeddingNetwork(const EmbeddingNetworkParams *model)
165     : model_(model) {
166   int offset_sum = 0;
167   for (int i = 0; i < model_->embedding_dim_size(); ++i) {
168     CLD3_DCHECK(offset_sum == model_->concat_offset(i));
169     offset_sum += model_->embedding_dim(i) * model_->embedding_num_features(i);
170     embedding_matrices_.emplace_back(model_->GetEmbeddingMatrix(i));
171   }
172 
173   CLD3_DCHECK(model_->hidden_size() == model_->hidden_bias_size());
174   hidden_weights_.resize(model_->hidden_size());
175   hidden_bias_.resize(model_->hidden_size());
176   for (int i = 0; i < model_->hidden_size(); ++i) {
177     FillMatrixParams(model_->GetHiddenLayerMatrix(i), &hidden_weights_[i]);
178     EmbeddingNetworkParams::Matrix bias = model_->GetHiddenLayerBias(i);
179     CLD3_DCHECK(1 == bias.cols);
180     CheckNoQuantization(bias);
181     hidden_bias_[i] = VectorWrapper(
182         reinterpret_cast<const float *>(bias.elements), bias.rows);
183   }
184 
185   CLD3_DCHECK(model_->HasSoftmax());
186   FillMatrixParams(model_->GetSoftmaxMatrix(), &softmax_weights_);
187 
188   EmbeddingNetworkParams::Matrix softmax_bias = model_->GetSoftmaxBias();
189   CLD3_DCHECK(1 == softmax_bias.cols);
190   CheckNoQuantization(softmax_bias);
191   softmax_bias_ =
192       VectorWrapper(reinterpret_cast<const float *>(softmax_bias.elements),
193                     softmax_bias.rows);
194 }
195 
196 }  // namespace chrome_lang_id
197