1 /******************************************************************************
2  ** Filename:    featdefs.cpp
3  ** Purpose:     Definitions of currently defined feature types.
4  ** Author:      Dan Johnson
5  **
6  ** (c) Copyright Hewlett-Packard Company, 1988.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  ******************************************************************************/
17 
18 #include "featdefs.h"
19 
20 #include "picofeat.h" // for PicoFeatureLength
21 #include "scanutils.h"
22 
23 #include <cstdio>
24 #include <cstring>
25 
26 namespace tesseract {
27 
28 #define PICO_FEATURE_LENGTH 0.05
29 
30 /*-----------------------------------------------------------------------------
31         Global Data Definitions and Declarations
32 -----------------------------------------------------------------------------*/
33 const char *const kMicroFeatureType = "mf";
34 const char *const kCNFeatureType = "cn";
35 const char *const kIntFeatureType = "if";
36 const char *const kGeoFeatureType = "tb";
37 
38 // Define all of the parameters for the MicroFeature type.
39 StartParamDesc(MicroFeatureParams) DefineParam(0, 0, -0.5, 0.5) DefineParam(0, 0, -0.25, 0.75)
40     DefineParam(0, 1, 0.0, 1.0) DefineParam(1, 0, 0.0, 1.0) DefineParam(0, 1, -0.5, 0.5)
41         DefineParam(0, 1, -0.5, 0.5) EndParamDesc
42     // Now define the feature type itself (see features.h for parameters).
43     DefineFeature(MicroFeatureDesc, 5, 1, kMicroFeatureType, MicroFeatureParams)
44 
45     // Define all of the parameters for the NormFeat type.
46     StartParamDesc(CharNormParams) DefineParam(0, 0, -0.25, 0.75) DefineParam(0, 1, 0.0, 1.0)
47         DefineParam(0, 0, 0.0, 1.0) DefineParam(0, 0, 0.0, 1.0) EndParamDesc
48     // Now define the feature type itself (see features.h for parameters).
49     DefineFeature(CharNormDesc, 4, 0, kCNFeatureType, CharNormParams)
50 
51     // Define all of the parameters for the IntFeature type
52     StartParamDesc(IntFeatParams) DefineParam(0, 0, 0.0, 255.0) DefineParam(0, 0, 0.0, 255.0)
53         DefineParam(1, 0, 0.0, 255.0) EndParamDesc
54     // Now define the feature type itself (see features.h for parameters).
55     DefineFeature(IntFeatDesc, 2, 1, kIntFeatureType, IntFeatParams)
56 
57     // Define all of the parameters for the GeoFeature type
58     StartParamDesc(GeoFeatParams) DefineParam(0, 0, 0.0, 255.0) DefineParam(0, 0, 0.0, 255.0)
59         DefineParam(0, 0, 0.0, 255.0) EndParamDesc
60     // Now define the feature type itself (see features.h for parameters).
61     DefineFeature(GeoFeatDesc, 3, 0, kGeoFeatureType, GeoFeatParams)
62 
63     // Other features used for training the adaptive classifier, but not used
64     // during normal training, therefore not in the DescDefs array.
65 
66     // Define all of the parameters for the PicoFeature type
67     // define knob that can be used to adjust pico-feature length.
68     float PicoFeatureLength = PICO_FEATURE_LENGTH;
69 StartParamDesc(PicoFeatParams) DefineParam(0, 0, -0.25, 0.75) DefineParam(1, 0, 0.0, 1.0)
70     DefineParam(0, 0, -0.5, 0.5) EndParamDesc
71     // Now define the feature type itself (see features.h for parameters).
72     DefineFeature(PicoFeatDesc, 2, 1, "pf", PicoFeatParams)
73 
74     // Define all of the parameters for the OutlineFeature type.
75     StartParamDesc(OutlineFeatParams) DefineParam(0, 0, -0.5, 0.5) DefineParam(0, 0, -0.25, 0.75)
76         DefineParam(0, 0, 0.0, 1.0) DefineParam(1, 0, 0.0, 1.0) EndParamDesc
77     // Now define the feature type itself (see features.h for parameters).
78     DefineFeature(OutlineFeatDesc, 3, 1, "of", OutlineFeatParams)
79 
80     // MUST be kept in-sync with ExtractorDefs in fxdefs.cpp.
81     static const FEATURE_DESC_STRUCT *DescDefs[NUM_FEATURE_TYPES] = {
82         &MicroFeatureDesc, &CharNormDesc, &IntFeatDesc, &GeoFeatDesc};
83 
84 /*-----------------------------------------------------------------------------
85               Public Code
86 -----------------------------------------------------------------------------*/
InitFeatureDefs(FEATURE_DEFS_STRUCT * featuredefs)87 void InitFeatureDefs(FEATURE_DEFS_STRUCT *featuredefs) {
88   featuredefs->NumFeatureTypes = NUM_FEATURE_TYPES;
89   for (int i = 0; i < NUM_FEATURE_TYPES; ++i) {
90     featuredefs->FeatureDesc[i] = DescDefs[i];
91   }
92 }
93 
94 /*---------------------------------------------------------------------------*/
95 /**
96  * Appends a textual representation of CharDesc to str.
97  * The format used is to write out the number of feature
98  * sets which will be written followed by a representation of
99  * each feature set.
100  *
101  * Each set starts with the short name for that feature followed
102  * by a description of the feature set.  Feature sets which are
103  * not present are not written.
104  *
105  * @param FeatureDefs    definitions of feature types/extractors
106  * @param str            string to append CharDesc to
107  * @param CharDesc       character description to write to File
108  */
WriteCharDescription(const FEATURE_DEFS_STRUCT & FeatureDefs,CHAR_DESC_STRUCT * CharDesc,std::string & str)109 void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC_STRUCT *CharDesc, std::string &str) {
110   int NumSetsToWrite = 0;
111 
112   for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) {
113     if (CharDesc->FeatureSets[Type]) {
114       NumSetsToWrite++;
115     }
116   }
117 
118   str += " " + std::to_string(NumSetsToWrite);
119   str += "\n";
120   for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) {
121     if (CharDesc->FeatureSets[Type]) {
122       str += FeatureDefs.FeatureDesc[Type]->ShortName;
123       str += " ";
124       WriteFeatureSet(CharDesc->FeatureSets[Type], str);
125     }
126   }
127 } /* WriteCharDescription */
128 
129 // Return whether all of the fields of the given feature set
130 // are well defined (not inf or nan).
ValidCharDescription(const FEATURE_DEFS_STRUCT & FeatureDefs,CHAR_DESC_STRUCT * CharDesc)131 bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC_STRUCT *CharDesc) {
132   bool anything_written = false;
133   bool well_formed = true;
134   for (size_t Type = 0; Type < CharDesc->NumFeatureSets; Type++) {
135     if (CharDesc->FeatureSets[Type]) {
136       for (int i = 0; i < CharDesc->FeatureSets[Type]->NumFeatures; i++) {
137         FEATURE feat = CharDesc->FeatureSets[Type]->Features[i];
138         for (int p = 0; p < feat->Type->NumParams; p++) {
139           if (std::isnan(feat->Params[p]) || std::isinf(feat->Params[p])) {
140             well_formed = false;
141           } else {
142             anything_written = true;
143           }
144         }
145       }
146     } else {
147       return false;
148     }
149   }
150   return anything_written && well_formed;
151 } /* ValidCharDescription */
152 
153 /*---------------------------------------------------------------------------*/
154 /**
155  * Read a character description from File, and return
156  * a data structure containing this information.  The data
157  * is formatted as follows:
158  * @verbatim
159      NumberOfSets
160              ShortNameForSet1 Set1
161              ShortNameForSet2 Set2
162              ...
163    @endverbatim
164  *
165  * Globals:
166  * - none
167  *
168  * @param FeatureDefs    definitions of feature types/extractors
169  * @param File open text file to read character description from
170  * @return Character description read from File.
171  */
ReadCharDescription(const FEATURE_DEFS_STRUCT & FeatureDefs,FILE * File)172 CHAR_DESC_STRUCT *ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE *File) {
173   int NumSetsToRead;
174   char ShortName[FEAT_NAME_SIZE];
175   int Type;
176 
177   ASSERT_HOST(tfscanf(File, "%d", &NumSetsToRead) == 1);
178   ASSERT_HOST(NumSetsToRead >= 0);
179   ASSERT_HOST(NumSetsToRead <= FeatureDefs.NumFeatureTypes);
180 
181   auto CharDesc = new CHAR_DESC_STRUCT(FeatureDefs);
182   for (; NumSetsToRead > 0; NumSetsToRead--) {
183     tfscanf(File, "%s", ShortName);
184     Type = ShortNameToFeatureType(FeatureDefs, ShortName);
185     CharDesc->FeatureSets[Type] = ReadFeatureSet(File, FeatureDefs.FeatureDesc[Type]);
186   }
187   return CharDesc;
188 }
189 
190 /*---------------------------------------------------------------------------*/
191 /**
192  * Search through all features currently defined and return
193  * the feature type for the feature with the specified short
194  * name.  Trap an error if the specified name is not found.
195  *
196  * Globals:
197  * - none
198  *
199  * @param FeatureDefs    definitions of feature types/extractors
200  * @param ShortName short name of a feature type
201  * @return Feature type which corresponds to ShortName.
202  */
ShortNameToFeatureType(const FEATURE_DEFS_STRUCT & FeatureDefs,const char * ShortName)203 uint32_t ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, const char *ShortName) {
204   for (int i = 0; i < FeatureDefs.NumFeatureTypes; i++) {
205     if (!strcmp((FeatureDefs.FeatureDesc[i]->ShortName), ShortName)) {
206       return static_cast<uint32_t>(i);
207     }
208   }
209   ASSERT_HOST(!"Illegal short name for a feature");
210   return 0;
211 }
212 
213 } // namespace tesseract
214