1 /******************************************************************************
2 ** Filename: featdefs.cpp
3 ** Purpose: Definitions of currently defined feature types.
4 ** Author: Dan Johnson
5 **
6 ** (c) Copyright Hewlett-Packard Company, 1988.
7 ** Licensed under the Apache License, Version 2.0 (the "License");
8 ** you may not use this file except in compliance with the License.
9 ** You may obtain a copy of the License at
10 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
16 ******************************************************************************/
17
18 #include "featdefs.h"
19
20 #include "picofeat.h" // for PicoFeatureLength
21 #include "scanutils.h"
22
23 #include <cstdio>
24 #include <cstring>
25
26 namespace tesseract {
27
28 #define PICO_FEATURE_LENGTH 0.05
29
30 /*-----------------------------------------------------------------------------
31 Global Data Definitions and Declarations
32 -----------------------------------------------------------------------------*/
33 const char *const kMicroFeatureType = "mf";
34 const char *const kCNFeatureType = "cn";
35 const char *const kIntFeatureType = "if";
36 const char *const kGeoFeatureType = "tb";
37
38 // Define all of the parameters for the MicroFeature type.
39 StartParamDesc(MicroFeatureParams) DefineParam(0, 0, -0.5, 0.5) DefineParam(0, 0, -0.25, 0.75)
40 DefineParam(0, 1, 0.0, 1.0) DefineParam(1, 0, 0.0, 1.0) DefineParam(0, 1, -0.5, 0.5)
41 DefineParam(0, 1, -0.5, 0.5) EndParamDesc
42 // Now define the feature type itself (see features.h for parameters).
43 DefineFeature(MicroFeatureDesc, 5, 1, kMicroFeatureType, MicroFeatureParams)
44
45 // Define all of the parameters for the NormFeat type.
46 StartParamDesc(CharNormParams) DefineParam(0, 0, -0.25, 0.75) DefineParam(0, 1, 0.0, 1.0)
47 DefineParam(0, 0, 0.0, 1.0) DefineParam(0, 0, 0.0, 1.0) EndParamDesc
48 // Now define the feature type itself (see features.h for parameters).
49 DefineFeature(CharNormDesc, 4, 0, kCNFeatureType, CharNormParams)
50
51 // Define all of the parameters for the IntFeature type
52 StartParamDesc(IntFeatParams) DefineParam(0, 0, 0.0, 255.0) DefineParam(0, 0, 0.0, 255.0)
53 DefineParam(1, 0, 0.0, 255.0) EndParamDesc
54 // Now define the feature type itself (see features.h for parameters).
55 DefineFeature(IntFeatDesc, 2, 1, kIntFeatureType, IntFeatParams)
56
57 // Define all of the parameters for the GeoFeature type
58 StartParamDesc(GeoFeatParams) DefineParam(0, 0, 0.0, 255.0) DefineParam(0, 0, 0.0, 255.0)
59 DefineParam(0, 0, 0.0, 255.0) EndParamDesc
60 // Now define the feature type itself (see features.h for parameters).
61 DefineFeature(GeoFeatDesc, 3, 0, kGeoFeatureType, GeoFeatParams)
62
63 // Other features used for training the adaptive classifier, but not used
64 // during normal training, therefore not in the DescDefs array.
65
66 // Define all of the parameters for the PicoFeature type
67 // define knob that can be used to adjust pico-feature length.
68 float PicoFeatureLength = PICO_FEATURE_LENGTH;
69 StartParamDesc(PicoFeatParams) DefineParam(0, 0, -0.25, 0.75) DefineParam(1, 0, 0.0, 1.0)
70 DefineParam(0, 0, -0.5, 0.5) EndParamDesc
71 // Now define the feature type itself (see features.h for parameters).
72 DefineFeature(PicoFeatDesc, 2, 1, "pf", PicoFeatParams)
73
74 // Define all of the parameters for the OutlineFeature type.
75 StartParamDesc(OutlineFeatParams) DefineParam(0, 0, -0.5, 0.5) DefineParam(0, 0, -0.25, 0.75)
76 DefineParam(0, 0, 0.0, 1.0) DefineParam(1, 0, 0.0, 1.0) EndParamDesc
77 // Now define the feature type itself (see features.h for parameters).
78 DefineFeature(OutlineFeatDesc, 3, 1, "of", OutlineFeatParams)
79
80 // MUST be kept in-sync with ExtractorDefs in fxdefs.cpp.
81 static const FEATURE_DESC_STRUCT *DescDefs[NUM_FEATURE_TYPES] = {
82 &MicroFeatureDesc, &CharNormDesc, &IntFeatDesc, &GeoFeatDesc};
83
84 /*-----------------------------------------------------------------------------
85 Public Code
86 -----------------------------------------------------------------------------*/
InitFeatureDefs(FEATURE_DEFS_STRUCT * featuredefs)87 void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs) {
88 featuredefs->NumFeatureTypes = NUM_FEATURE_TYPES;
89 for (int i = 0; i < NUM_FEATURE_TYPES; ++i) {
90 featuredefs->FeatureDesc[i] = DescDefs[i];
91 }
92 }
93
94 /*---------------------------------------------------------------------------*/
95 /**
96 * Appends a textual representation of CharDesc to str.
97 * The format used is to write out the number of feature
98 * sets which will be written followed by a representation of
99 * each feature set.
100 *
101 * Each set starts with the short name for that feature followed
102 * by a description of the feature set. Feature sets which are
103 * not present are not written.
104 *
105 * @param FeatureDefs definitions of feature types/extractors
106 * @param str string to append CharDesc to
107 * @param CharDesc character description to write to File
108 */
WriteCharDescription(const FEATURE_DEFS_STRUCT & FeatureDefs,CHAR_DESC_STRUCT * CharDesc,std::string & str)109 void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC_STRUCT *CharDesc, std::string &str) {
110 int NumSetsToWrite = 0;
111
112 for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) {
113 if (CharDesc->FeatureSets[Type]) {
114 NumSetsToWrite++;
115 }
116 }
117
118 str += " " + std::to_string(NumSetsToWrite);
119 str += "\n";
120 for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) {
121 if (CharDesc->FeatureSets[Type]) {
122 str += FeatureDefs.FeatureDesc[Type]->ShortName;
123 str += " ";
124 WriteFeatureSet(CharDesc->FeatureSets[Type], str);
125 }
126 }
127 } /* WriteCharDescription */
128
129 // Return whether all of the fields of the given feature set
130 // are well defined (not inf or nan).
ValidCharDescription(const FEATURE_DEFS_STRUCT & FeatureDefs,CHAR_DESC_STRUCT * CharDesc)131 bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC_STRUCT *CharDesc) {
132 bool anything_written = false;
133 bool well_formed = true;
134 for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) {
135 if (CharDesc->FeatureSets[Type]) {
136 for (int i = 0; i < CharDesc->FeatureSets[Type]->NumFeatures; i++) {
137 FEATURE feat = CharDesc->FeatureSets[Type]->Features[i];
138 for (int p = 0; p < feat->Type->NumParams; p++) {
139 if (std::isnan(feat->Params[p]) || std::isinf(feat->Params[p])) {
140 well_formed = false;
141 } else {
142 anything_written = true;
143 }
144 }
145 }
146 } else {
147 return false;
148 }
149 }
150 return anything_written && well_formed;
151 } /* ValidCharDescription */
152
153 /*---------------------------------------------------------------------------*/
154 /**
155 * Read a character description from File, and return
156 * a data structure containing this information. The data
157 * is formatted as follows:
158 * @verbatim
159 NumberOfSets
160 ShortNameForSet1 Set1
161 ShortNameForSet2 Set2
162 ...
163 @endverbatim
164 *
165 * Globals:
166 * - none
167 *
168 * @param FeatureDefs definitions of feature types/extractors
169 * @param File open text file to read character description from
170 * @return Character description read from File.
171 */
ReadCharDescription(const FEATURE_DEFS_STRUCT & FeatureDefs,FILE * File)172 CHAR_DESC_STRUCT *ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE *File) {
173 int NumSetsToRead;
174 char ShortName[FEAT_NAME_SIZE];
175 int Type;
176
177 ASSERT_HOST(tfscanf(File, "%d", &NumSetsToRead) == 1);
178 ASSERT_HOST(NumSetsToRead >= 0);
179 ASSERT_HOST(NumSetsToRead <= FeatureDefs.NumFeatureTypes);
180
181 auto CharDesc = new CHAR_DESC_STRUCT(FeatureDefs);
182 for (; NumSetsToRead > 0; NumSetsToRead--) {
183 tfscanf(File, "%s", ShortName);
184 Type = ShortNameToFeatureType(FeatureDefs, ShortName);
185 CharDesc->FeatureSets[Type] = ReadFeatureSet(File, FeatureDefs.FeatureDesc[Type]);
186 }
187 return CharDesc;
188 }
189
190 /*---------------------------------------------------------------------------*/
191 /**
192 * Search through all features currently defined and return
193 * the feature type for the feature with the specified short
194 * name. Trap an error if the specified name is not found.
195 *
196 * Globals:
197 * - none
198 *
199 * @param FeatureDefs definitions of feature types/extractors
200 * @param ShortName short name of a feature type
201 * @return Feature type which corresponds to ShortName.
202 */
ShortNameToFeatureType(const FEATURE_DEFS_STRUCT & FeatureDefs,const char * ShortName)203 uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, const char *ShortName) {
204 for (int i = 0; i < FeatureDefs.NumFeatureTypes; i++) {
205 if (!strcmp((FeatureDefs.FeatureDesc[i]->ShortName), ShortName)) {
206 return static_cast<uint32_t>(i);
207 }
208 }
209 ASSERT_HOST(!"Illegal short name for a feature");
210 return 0;
211 }
212
213 } // namespace tesseract
214