1 // Copyright 2010 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 //
14 ///////////////////////////////////////////////////////////////////////
15 
16 #ifndef TESSERACT_TRAINING_TRAININGSAMPLE_H_
17 #define TESSERACT_TRAINING_TRAININGSAMPLE_H_
18 
19 #include "elst.h"
20 #include "featdefs.h"
21 #include "intfx.h"
22 #include "intmatcher.h"
23 #include "matrix.h"
24 #include "mf.h"
25 #include "mfdefs.h"
26 #include "picofeat.h"
27 #include "shapetable.h"
28 #include "unicharset.h"
29 
30 struct Pix;
31 
32 namespace tesseract {
33 
34 class IntFeatureMap;
35 class IntFeatureSpace;
36 class ShapeTable;
37 
38 // Number of elements of cn_feature_.
39 static const int kNumCNParams = 4;
40 // Number of ways to shift the features when randomizing.
41 static const int kSampleYShiftSize = 5;
42 // Number of ways to scale the features when randomizing.
43 static const int kSampleScaleSize = 3;
44 // Total number of different ways to manipulate the features when randomizing.
45 // The first and last combinations are removed to avoid an excessive
46 // top movement (first) and an identity transformation (last).
47 // WARNING: To avoid patterned duplication of samples, be sure to keep
48 // kSampleRandomSize prime!
49 // Eg with current values (kSampleYShiftSize = 5 and TkSampleScaleSize = 3)
50 // kSampleRandomSize is 13, which is prime.
51 static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2;
52 // ASSERT_IS_PRIME(kSampleRandomSize) !!
53 
54 class TESS_API TrainingSample : public ELIST_LINK {
55 public:
TrainingSample()56   TrainingSample()
57       : class_id_(INVALID_UNICHAR_ID)
58       , font_id_(0)
59       , page_num_(0)
60       , num_features_(0)
61       , num_micro_features_(0)
62       , outline_length_(0)
63       , features_(nullptr)
64       , micro_features_(nullptr)
65       , weight_(1.0)
66       , max_dist_(0.0)
67       , sample_index_(0)
68       , features_are_indexed_(false)
69       , features_are_mapped_(false)
70       , is_error_(false) {}
71   ~TrainingSample();
72 
73   // Saves the given features into a TrainingSample. The features are copied,
74   // so may be deleted afterwards. Delete the return value after use.
75   static TrainingSample *CopyFromFeatures(const INT_FX_RESULT_STRUCT &fx_info,
76                                           const TBOX &bounding_box,
77                                           const INT_FEATURE_STRUCT *features, int num_features);
78   // Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining.
79   FEATURE_STRUCT *GetCNFeature() const;
80   // Constructs and returns a copy "randomized" by the method given by
81   // the randomizer index. If index is out of [0, kSampleRandomSize) then
82   // an exact copy is returned.
83   TrainingSample *RandomizedCopy(int index) const;
84   // Constructs and returns an exact copy.
85   TrainingSample *Copy() const;
86 
87   // WARNING! Serialize/DeSerialize do not save/restore the "cache" data
88   // members, which is mostly the mapped features, and the weight.
89   // It is assumed these can all be reconstructed from what is saved.
90   // Writes to the given file. Returns false in case of error.
91   bool Serialize(FILE *fp) const;
92   // Creates from the given file. Returns nullptr in case of error.
93   // If swap is true, assumes a big/little-endian swap is needed.
94   static TrainingSample *DeSerializeCreate(bool swap, FILE *fp);
95   // Reads from the given file. Returns false in case of error.
96   // If swap is true, assumes a big/little-endian swap is needed.
97   bool DeSerialize(bool swap, FILE *fp);
98 
99   // Extracts the needed information from the CHAR_DESC_STRUCT.
100   void ExtractCharDesc(int feature_type, int micro_type, int cn_type, int geo_type,
101                        CHAR_DESC_STRUCT *char_desc);
102 
103   // Sets the mapped_features_ from the features_ using the provided
104   // feature_space to the indexed versions of the features.
105   void IndexFeatures(const IntFeatureSpace &feature_space);
106 
107   // Returns a pix representing the sample. (Int features only.)
108   Image RenderToPix(const UNICHARSET *unicharset) const;
109   // Displays the features in the given window with the given color.
110   void DisplayFeatures(ScrollView::Color color, ScrollView *window) const;
111 
112   // Returns a pix of the original sample image. The pix is padded all round
113   // by padding wherever possible.
114   // The returned Pix must be pixDestroyed after use.
115   // If the input page_pix is nullptr, nullptr is returned.
116   Image GetSamplePix(int padding, Image page_pix) const;
117 
118   // Accessors.
class_id()119   UNICHAR_ID class_id() const {
120     return class_id_;
121   }
set_class_id(int id)122   void set_class_id(int id) {
123     class_id_ = id;
124   }
font_id()125   int font_id() const {
126     return font_id_;
127   }
set_font_id(int id)128   void set_font_id(int id) {
129     font_id_ = id;
130   }
page_num()131   int page_num() const {
132     return page_num_;
133   }
set_page_num(int page)134   void set_page_num(int page) {
135     page_num_ = page;
136   }
bounding_box()137   const TBOX &bounding_box() const {
138     return bounding_box_;
139   }
set_bounding_box(const TBOX & box)140   void set_bounding_box(const TBOX &box) {
141     bounding_box_ = box;
142   }
num_features()143   uint32_t num_features() const {
144     return num_features_;
145   }
features()146   const INT_FEATURE_STRUCT *features() const {
147     return features_;
148   }
num_micro_features()149   uint32_t num_micro_features() const {
150     return num_micro_features_;
151   }
micro_features()152   const MicroFeature *micro_features() const {
153     return micro_features_;
154   }
outline_length()155   int outline_length() const {
156     return outline_length_;
157   }
cn_feature(int index)158   float cn_feature(int index) const {
159     return cn_feature_[index];
160   }
geo_feature(int index)161   int geo_feature(int index) const {
162     return geo_feature_[index];
163   }
weight()164   double weight() const {
165     return weight_;
166   }
set_weight(double value)167   void set_weight(double value) {
168     weight_ = value;
169   }
max_dist()170   double max_dist() const {
171     return max_dist_;
172   }
set_max_dist(double value)173   void set_max_dist(double value) {
174     max_dist_ = value;
175   }
sample_index()176   int sample_index() const {
177     return sample_index_;
178   }
set_sample_index(int value)179   void set_sample_index(int value) {
180     sample_index_ = value;
181   }
features_are_mapped()182   bool features_are_mapped() const {
183     return features_are_mapped_;
184   }
mapped_features()185   const std::vector<int> &mapped_features() const {
186     ASSERT_HOST(features_are_mapped_);
187     return mapped_features_;
188   }
indexed_features()189   const std::vector<int> &indexed_features() const {
190     ASSERT_HOST(features_are_indexed_);
191     return mapped_features_;
192   }
is_error()193   bool is_error() const {
194     return is_error_;
195   }
set_is_error(bool value)196   void set_is_error(bool value) {
197     is_error_ = value;
198   }
199 
200 private:
201   // Unichar id that this sample represents. There obviously must be a
202   // reference UNICHARSET somewhere. Usually in TrainingSampleSet.
203   UNICHAR_ID class_id_;
204   // Font id in which this sample was printed. Refers to a fontinfo_table_ in
205   // MasterTrainer.
206   int font_id_;
207   // Number of page that the sample came from.
208   int page_num_;
209   // Bounding box of sample in original image.
210   TBOX bounding_box_;
211   // Number of INT_FEATURE_STRUCT in features_ array.
212   uint32_t num_features_;
213   // Number of MicroFeature in micro_features_ array.
214   uint32_t num_micro_features_;
215   // Total length of outline in the baseline normalized coordinate space.
216   // See comment in WERD_RES class definition for a discussion of coordinate
217   // spaces.
218   int outline_length_;
219   // Array of features.
220   INT_FEATURE_STRUCT *features_;
221   // Array of features.
222   MicroFeature *micro_features_;
223   // The one and only CN feature. Indexed by NORM_PARAM_NAME enum.
224   float cn_feature_[kNumCNParams];
225   // The one and only geometric feature. (Aims at replacing cn_feature_).
226   // Indexed by GeoParams enum in picofeat.h
227   int geo_feature_[GeoCount];
228 
229   // Non-serialized cache data.
230   // Weight used for boosting training.
231   double weight_;
232   // Maximum distance to other samples of same class/font used in computing
233   // the canonical sample.
234   double max_dist_;
235   // Global index of this sample.
236   int sample_index_;
237 
238 public:
239   // both are used in training tools
240   // hide after refactoring
241 
242   // Indexed/mapped features, as indicated by the bools below.
243   std::vector<int> mapped_features_;
244   bool features_are_indexed_;
245   bool features_are_mapped_;
246 
247 private:
248   // True if the last classification was an error by the current definition.
249   bool is_error_;
250 
251   // Randomizing factors.
252   static const int kYShiftValues[kSampleYShiftSize];
253   static const double kScaleValues[kSampleScaleSize];
254 };
255 
256 ELISTIZEH(TrainingSample)
257 
258 } // namespace tesseract
259 
260 #endif // TESSERACT_TRAINING_TRAININGSAMPLE_H_
261