1 // Copyright 2010 Google Inc. All Rights Reserved. 2 // Author: rays@google.com (Ray Smith) 3 // 4 // Licensed under the Apache License, Version 2.0 (the "License"); 5 // you may not use this file except in compliance with the License. 6 // You may obtain a copy of the License at 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 // 14 /////////////////////////////////////////////////////////////////////// 15 16 #ifndef TESSERACT_TRAINING_TRAININGSAMPLE_H_ 17 #define TESSERACT_TRAINING_TRAININGSAMPLE_H_ 18 19 #include "elst.h" 20 #include "featdefs.h" 21 #include "intfx.h" 22 #include "intmatcher.h" 23 #include "matrix.h" 24 #include "mf.h" 25 #include "mfdefs.h" 26 #include "picofeat.h" 27 #include "shapetable.h" 28 #include "unicharset.h" 29 30 struct Pix; 31 32 namespace tesseract { 33 34 class IntFeatureMap; 35 class IntFeatureSpace; 36 class ShapeTable; 37 38 // Number of elements of cn_feature_. 39 static const int kNumCNParams = 4; 40 // Number of ways to shift the features when randomizing. 41 static const int kSampleYShiftSize = 5; 42 // Number of ways to scale the features when randomizing. 43 static const int kSampleScaleSize = 3; 44 // Total number of different ways to manipulate the features when randomizing. 45 // The first and last combinations are removed to avoid an excessive 46 // top movement (first) and an identity transformation (last). 47 // WARNING: To avoid patterned duplication of samples, be sure to keep 48 // kSampleRandomSize prime! 49 // Eg with current values (kSampleYShiftSize = 5 and TkSampleScaleSize = 3) 50 // kSampleRandomSize is 13, which is prime. 51 static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2; 52 // ASSERT_IS_PRIME(kSampleRandomSize) !! 53 54 class TESS_API TrainingSample : public ELIST_LINK { 55 public: TrainingSample()56 TrainingSample() 57 : class_id_(INVALID_UNICHAR_ID) 58 , font_id_(0) 59 , page_num_(0) 60 , num_features_(0) 61 , num_micro_features_(0) 62 , outline_length_(0) 63 , features_(nullptr) 64 , micro_features_(nullptr) 65 , weight_(1.0) 66 , max_dist_(0.0) 67 , sample_index_(0) 68 , features_are_indexed_(false) 69 , features_are_mapped_(false) 70 , is_error_(false) {} 71 ~TrainingSample(); 72 73 // Saves the given features into a TrainingSample. The features are copied, 74 // so may be deleted afterwards. Delete the return value after use. 75 static TrainingSample *CopyFromFeatures(const INT_FX_RESULT_STRUCT &fx_info, 76 const TBOX &bounding_box, 77 const INT_FEATURE_STRUCT *features, int num_features); 78 // Returns the cn_feature as a FEATURE_STRUCT* needed by cntraining. 79 FEATURE_STRUCT *GetCNFeature() const; 80 // Constructs and returns a copy "randomized" by the method given by 81 // the randomizer index. If index is out of [0, kSampleRandomSize) then 82 // an exact copy is returned. 83 TrainingSample *RandomizedCopy(int index) const; 84 // Constructs and returns an exact copy. 85 TrainingSample *Copy() const; 86 87 // WARNING! Serialize/DeSerialize do not save/restore the "cache" data 88 // members, which is mostly the mapped features, and the weight. 89 // It is assumed these can all be reconstructed from what is saved. 90 // Writes to the given file. Returns false in case of error. 91 bool Serialize(FILE *fp) const; 92 // Creates from the given file. Returns nullptr in case of error. 93 // If swap is true, assumes a big/little-endian swap is needed. 94 static TrainingSample *DeSerializeCreate(bool swap, FILE *fp); 95 // Reads from the given file. Returns false in case of error. 96 // If swap is true, assumes a big/little-endian swap is needed. 97 bool DeSerialize(bool swap, FILE *fp); 98 99 // Extracts the needed information from the CHAR_DESC_STRUCT. 100 void ExtractCharDesc(int feature_type, int micro_type, int cn_type, int geo_type, 101 CHAR_DESC_STRUCT *char_desc); 102 103 // Sets the mapped_features_ from the features_ using the provided 104 // feature_space to the indexed versions of the features. 105 void IndexFeatures(const IntFeatureSpace &feature_space); 106 107 // Returns a pix representing the sample. (Int features only.) 108 Image RenderToPix(const UNICHARSET *unicharset) const; 109 // Displays the features in the given window with the given color. 110 void DisplayFeatures(ScrollView::Color color, ScrollView *window) const; 111 112 // Returns a pix of the original sample image. The pix is padded all round 113 // by padding wherever possible. 114 // The returned Pix must be pixDestroyed after use. 115 // If the input page_pix is nullptr, nullptr is returned. 116 Image GetSamplePix(int padding, Image page_pix) const; 117 118 // Accessors. class_id()119 UNICHAR_ID class_id() const { 120 return class_id_; 121 } set_class_id(int id)122 void set_class_id(int id) { 123 class_id_ = id; 124 } font_id()125 int font_id() const { 126 return font_id_; 127 } set_font_id(int id)128 void set_font_id(int id) { 129 font_id_ = id; 130 } page_num()131 int page_num() const { 132 return page_num_; 133 } set_page_num(int page)134 void set_page_num(int page) { 135 page_num_ = page; 136 } bounding_box()137 const TBOX &bounding_box() const { 138 return bounding_box_; 139 } set_bounding_box(const TBOX & box)140 void set_bounding_box(const TBOX &box) { 141 bounding_box_ = box; 142 } num_features()143 uint32_t num_features() const { 144 return num_features_; 145 } features()146 const INT_FEATURE_STRUCT *features() const { 147 return features_; 148 } num_micro_features()149 uint32_t num_micro_features() const { 150 return num_micro_features_; 151 } micro_features()152 const MicroFeature *micro_features() const { 153 return micro_features_; 154 } outline_length()155 int outline_length() const { 156 return outline_length_; 157 } cn_feature(int index)158 float cn_feature(int index) const { 159 return cn_feature_[index]; 160 } geo_feature(int index)161 int geo_feature(int index) const { 162 return geo_feature_[index]; 163 } weight()164 double weight() const { 165 return weight_; 166 } set_weight(double value)167 void set_weight(double value) { 168 weight_ = value; 169 } max_dist()170 double max_dist() const { 171 return max_dist_; 172 } set_max_dist(double value)173 void set_max_dist(double value) { 174 max_dist_ = value; 175 } sample_index()176 int sample_index() const { 177 return sample_index_; 178 } set_sample_index(int value)179 void set_sample_index(int value) { 180 sample_index_ = value; 181 } features_are_mapped()182 bool features_are_mapped() const { 183 return features_are_mapped_; 184 } mapped_features()185 const std::vector<int> &mapped_features() const { 186 ASSERT_HOST(features_are_mapped_); 187 return mapped_features_; 188 } indexed_features()189 const std::vector<int> &indexed_features() const { 190 ASSERT_HOST(features_are_indexed_); 191 return mapped_features_; 192 } is_error()193 bool is_error() const { 194 return is_error_; 195 } set_is_error(bool value)196 void set_is_error(bool value) { 197 is_error_ = value; 198 } 199 200 private: 201 // Unichar id that this sample represents. There obviously must be a 202 // reference UNICHARSET somewhere. Usually in TrainingSampleSet. 203 UNICHAR_ID class_id_; 204 // Font id in which this sample was printed. Refers to a fontinfo_table_ in 205 // MasterTrainer. 206 int font_id_; 207 // Number of page that the sample came from. 208 int page_num_; 209 // Bounding box of sample in original image. 210 TBOX bounding_box_; 211 // Number of INT_FEATURE_STRUCT in features_ array. 212 uint32_t num_features_; 213 // Number of MicroFeature in micro_features_ array. 214 uint32_t num_micro_features_; 215 // Total length of outline in the baseline normalized coordinate space. 216 // See comment in WERD_RES class definition for a discussion of coordinate 217 // spaces. 218 int outline_length_; 219 // Array of features. 220 INT_FEATURE_STRUCT *features_; 221 // Array of features. 222 MicroFeature *micro_features_; 223 // The one and only CN feature. Indexed by NORM_PARAM_NAME enum. 224 float cn_feature_[kNumCNParams]; 225 // The one and only geometric feature. (Aims at replacing cn_feature_). 226 // Indexed by GeoParams enum in picofeat.h 227 int geo_feature_[GeoCount]; 228 229 // Non-serialized cache data. 230 // Weight used for boosting training. 231 double weight_; 232 // Maximum distance to other samples of same class/font used in computing 233 // the canonical sample. 234 double max_dist_; 235 // Global index of this sample. 236 int sample_index_; 237 238 public: 239 // both are used in training tools 240 // hide after refactoring 241 242 // Indexed/mapped features, as indicated by the bools below. 243 std::vector<int> mapped_features_; 244 bool features_are_indexed_; 245 bool features_are_mapped_; 246 247 private: 248 // True if the last classification was an error by the current definition. 249 bool is_error_; 250 251 // Randomizing factors. 252 static const int kYShiftValues[kSampleYShiftSize]; 253 static const double kScaleValues[kSampleScaleSize]; 254 }; 255 256 ELISTIZEH(TrainingSample) 257 258 } // namespace tesseract 259 260 #endif // TESSERACT_TRAINING_TRAININGSAMPLE_H_ 261