1 /////////////////////////////////////////////////////////////////////// 2 // File: fontinfo.h 3 // Description: Font information classes abstracted from intproto.h/cpp. 4 // Author: rays@google.com (Ray Smith) 5 // 6 // (C) Copyright 2011, Google Inc. 7 // Licensed under the Apache License, Version 2.0 (the "License"); 8 // you may not use this file except in compliance with the License. 9 // You may obtain a copy of the License at 10 // http://www.apache.org/licenses/LICENSE-2.0 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 // 17 /////////////////////////////////////////////////////////////////////// 18 19 #ifndef TESSERACT_CCSTRUCT_FONTINFO_H_ 20 #define TESSERACT_CCSTRUCT_FONTINFO_H_ 21 22 #include "errcode.h" 23 24 #include <tesseract/unichar.h> 25 #include "genericvector.h" 26 27 #include <cstdint> // for uint16_t, uint32_t 28 #include <cstdio> // for FILE 29 #include <vector> 30 31 namespace tesseract { 32 33 template <typename T> 34 class UnicityTable; 35 36 // Simple struct to hold a font and a score. The scores come from the low-level 37 // integer matcher, so they are in the uint16_t range. Fonts are an index to 38 // fontinfo_table. 39 // These get copied around a lot, so best to keep them small. 40 struct ScoredFont { ScoredFontScoredFont41 ScoredFont() : fontinfo_id(-1), score(0) {} ScoredFontScoredFont42 ScoredFont(int font_id, uint16_t classifier_score) 43 : fontinfo_id(font_id), score(classifier_score) {} 44 45 // Index into fontinfo table, but inside the classifier, may be a shapetable 46 // index. 47 int32_t fontinfo_id; 48 // Raw score from the low-level classifier. 49 uint16_t score; 50 }; 51 52 // Struct for information about spacing between characters in a particular font. 53 struct FontSpacingInfo { 54 int16_t x_gap_before; 55 int16_t x_gap_after; 56 std::vector<UNICHAR_ID> kerned_unichar_ids; 57 std::vector<int16_t> kerned_x_gaps; 58 }; 59 60 /* 61 * font_properties contains properties about boldness, italicness, fixed pitch, 62 * serif, fraktur 63 */ 64 struct FontInfo { FontInfoFontInfo65 FontInfo() : name(nullptr), properties(0), universal_id(0), spacing_vec(nullptr) {} 66 ~FontInfo() = default; 67 68 bool operator==(const FontInfo &rhs) const { 69 return strcmp(name, rhs.name) == 0; 70 } 71 72 // Writes to the given file. Returns false in case of error. 73 bool Serialize(FILE *fp) const; 74 // Reads from the given file. Returns false in case of error. 75 // If swap is true, assumes a big/little-endian swap is needed. 76 bool DeSerialize(TFile *fp); 77 78 // Reserves unicharset_size spots in spacing_vec. init_spacingFontInfo79 void init_spacing(int unicharset_size) { 80 spacing_vec = new std::vector<FontSpacingInfo *>(unicharset_size); 81 } 82 // Adds the given pointer to FontSpacingInfo to spacing_vec member 83 // (FontInfo class takes ownership of the pointer). 84 // Note: init_spacing should be called before calling this function. add_spacingFontInfo85 void add_spacing(UNICHAR_ID uch_id, FontSpacingInfo *spacing_info) { 86 ASSERT_HOST(static_cast<size_t>(uch_id) < spacing_vec->size()); 87 (*spacing_vec)[uch_id] = spacing_info; 88 } 89 90 // Returns the pointer to FontSpacingInfo for the given UNICHAR_ID. get_spacingFontInfo91 const FontSpacingInfo *get_spacing(UNICHAR_ID uch_id) const { 92 return (spacing_vec == nullptr || spacing_vec->size() <= static_cast<size_t>(uch_id)) ? nullptr 93 : (*spacing_vec)[uch_id]; 94 } 95 96 // Fills spacing with the value of the x gap expected between the two given 97 // UNICHAR_IDs. Returns true on success. get_spacingFontInfo98 bool get_spacing(UNICHAR_ID prev_uch_id, UNICHAR_ID uch_id, int *spacing) const { 99 const FontSpacingInfo *prev_fsi = this->get_spacing(prev_uch_id); 100 const FontSpacingInfo *fsi = this->get_spacing(uch_id); 101 if (prev_fsi == nullptr || fsi == nullptr) { 102 return false; 103 } 104 size_t i = 0; 105 for (; i < prev_fsi->kerned_unichar_ids.size(); ++i) { 106 if (prev_fsi->kerned_unichar_ids[i] == uch_id) { 107 break; 108 } 109 } 110 if (i < prev_fsi->kerned_unichar_ids.size()) { 111 *spacing = prev_fsi->kerned_x_gaps[i]; 112 } else { 113 *spacing = prev_fsi->x_gap_after + fsi->x_gap_before; 114 } 115 return true; 116 } 117 is_italicFontInfo118 bool is_italic() const { 119 return properties & 1; 120 } is_boldFontInfo121 bool is_bold() const { 122 return (properties & 2) != 0; 123 } is_fixed_pitchFontInfo124 bool is_fixed_pitch() const { 125 return (properties & 4) != 0; 126 } is_serifFontInfo127 bool is_serif() const { 128 return (properties & 8) != 0; 129 } is_frakturFontInfo130 bool is_fraktur() const { 131 return (properties & 16) != 0; 132 } 133 134 char *name; 135 uint32_t properties; 136 // The universal_id is a field reserved for the initialization process 137 // to assign a unique id number to all fonts loaded for the current 138 // combination of languages. This id will then be returned by 139 // ResultIterator::WordFontAttributes. 140 int32_t universal_id; 141 // Horizontal spacing between characters (indexed by UNICHAR_ID). 142 std::vector<FontSpacingInfo *> *spacing_vec; 143 }; 144 145 // Every class (character) owns a FontSet that represents all the fonts that can 146 // render this character. 147 // Since almost all the characters from the same script share the same set of 148 // fonts, the sets are shared over multiple classes (see 149 // Classify::fontset_table_). Thus, a class only store an id to a set. 150 // Because some fonts cannot render just one character of a set, there are a 151 // lot of FontSet that differ only by one font. Rather than storing directly 152 // the FontInfo in the FontSet structure, it's better to share FontInfos among 153 // FontSets (Classify::fontinfo_table_). 154 using FontSet = std::vector<int>; 155 156 // Class that adds a bit of functionality on top of GenericVector to 157 // implement a table of FontInfo that replaces UniCityTable<FontInfo>. 158 // TODO(rays) change all references once all existing traineddata files 159 // are replaced. 160 class FontInfoTable : public GenericVector<FontInfo> { 161 public: 162 TESS_API // when you remove inheritance from GenericVector, move this on 163 // class level 164 FontInfoTable(); 165 TESS_API 166 ~FontInfoTable(); 167 168 // Writes to the given file. Returns false in case of error. 169 TESS_API 170 bool Serialize(FILE *fp) const; 171 // Reads from the given file. Returns false in case of error. 172 // If swap is true, assumes a big/little-endian swap is needed. 173 TESS_API 174 bool DeSerialize(TFile *fp); 175 176 // Returns true if the given set of fonts includes one with the same 177 // properties as font_id. 178 TESS_API 179 bool SetContainsFontProperties(int font_id, const std::vector<ScoredFont> &font_set) const; 180 // Returns true if the given set of fonts includes multiple properties. 181 TESS_API 182 bool SetContainsMultipleFontProperties(const std::vector<ScoredFont> &font_set) const; 183 184 // Moves any non-empty FontSpacingInfo entries from other to this. 185 TESS_API 186 void MoveSpacingInfoFrom(FontInfoTable *other); 187 // Moves this to the target unicity table. 188 TESS_API 189 void MoveTo(UnicityTable<FontInfo> *target); 190 }; 191 192 // Deletion callbacks for GenericVector. 193 void FontInfoDeleteCallback(FontInfo f); 194 195 // Callbacks used by UnicityTable to read/write FontInfo/FontSet structures. 196 bool read_info(TFile *f, FontInfo *fi); 197 bool write_info(FILE *f, const FontInfo &fi); 198 bool read_spacing_info(TFile *f, FontInfo *fi); 199 bool write_spacing_info(FILE *f, const FontInfo &fi); 200 bool write_set(FILE *f, const FontSet &fs); 201 202 } // namespace tesseract. 203 204 #endif /* THIRD_PARTY_TESSERACT_CCSTRUCT_FONTINFO_H_ */ 205