1 /******************************************************************************
2  ** Filename:    intmatcher.h
3  ** Purpose:     Interface to high level generic classifier routines.
4  ** Author:      Robert Moss
5  **
6  ** (c) Copyright Hewlett-Packard Company, 1988.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  ******************************************************************************/
17 #ifndef INTMATCHER_H
18 #define INTMATCHER_H
19 
20 #include "intproto.h"
21 #include "params.h"
22 
23 namespace tesseract {
24 
25 // Character fragments could be present in the trained templaes
26 // but turned on/off on the language-by-language basis or depending
27 // on particular properties of the corpus (e.g. when we expect the
28 // images to have low exposure).
29 extern BOOL_VAR_H(disable_character_fragments);
30 
31 extern INT_VAR_H(classify_integer_matcher_multiplier);
32 
33 struct UnicharRating;
34 
35 struct CP_RESULT_STRUCT {
CP_RESULT_STRUCTCP_RESULT_STRUCT36   CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {}
37 
38   float Rating;
39   CLASS_ID Class;
40 };
41 
42 /**----------------------------------------------------------------------------
43           Public Function Prototypes
44 ----------------------------------------------------------------------------**/
45 
46 #define SE_TABLE_BITS 9
47 #define SE_TABLE_SIZE 512
48 
49 struct ScratchEvidence {
50   uint8_t feature_evidence_[MAX_NUM_CONFIGS];
51   int sum_feature_evidence_[MAX_NUM_CONFIGS];
52   uint8_t proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX];
53 
54   void Clear(const INT_CLASS_STRUCT *class_template);
55   void ClearFeatureEvidence(const INT_CLASS_STRUCT *class_template);
56   void NormalizeSums(INT_CLASS_STRUCT *ClassTemplate, int16_t NumFeatures);
57   void UpdateSumOfProtoEvidences(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ConfigMask);
58 };
59 
60 class IntegerMatcher {
61 public:
62   // Integer Matcher Theta Fudge (0-255).
63   static const int kIntThetaFudge = 128;
64   // Bits in Similarity to Evidence Lookup (8-9).
65   static const int kEvidenceTableBits = 9;
66   // Integer Evidence Truncation Bits (8-14).
67   static const int kIntEvidenceTruncBits = 14;
68   // Similarity to Evidence Table Exponential Multiplier.
69   static const float kSEExponentialMultiplier;
70   // Center of Similarity Curve.
71   static const float kSimilarityCenter;
72 
73   IntegerMatcher(tesseract::IntParam *classify_debug_level);
74 
75   void Match(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
76              int16_t NumFeatures, const INT_FEATURE_STRUCT *Features,
77              tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug,
78              bool SeparateDebugWindows);
79 
80   // Applies the CN normalization factor to the given rating and returns
81   // the modified rating.
82   float ApplyCNCorrection(float rating, int blob_length, int normalization_factor,
83                           int matcher_multiplier);
84 
85   int FindGoodProtos(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
86                      int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray,
87                      int AdaptProtoThreshold, int Debug);
88 
89   int FindBadFeatures(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
90                       int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray,
91                       int AdaptFeatureThreshold, int Debug);
92 
93 private:
94   int UpdateTablesForFeature(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
95                              int FeatureNum, const INT_FEATURE_STRUCT *Feature,
96                              ScratchEvidence *evidence, int Debug);
97 
98   int FindBestMatch(INT_CLASS_STRUCT *ClassTemplate, const ScratchEvidence &tables,
99                     tesseract::UnicharRating *Result);
100 
101 #ifndef GRAPHICS_DISABLED
102   void DebugFeatureProtoError(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
103                               const ScratchEvidence &tables, int16_t NumFeatures, int Debug);
104 
105   void DisplayProtoDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ConfigMask,
106                              const ScratchEvidence &tables, bool SeparateDebugWindows);
107 
108   void DisplayFeatureDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask,
109                                int16_t NumFeatures, const INT_FEATURE_STRUCT *Features,
110                                int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows);
111 #endif
112 
113 private:
114   tesseract::IntParam *classify_debug_level_;
115   uint8_t similarity_evidence_table_[SE_TABLE_SIZE];
116   uint32_t evidence_table_mask_;
117   uint32_t mult_trunc_shift_bits_;
118   uint32_t table_trunc_shift_bits_;
119   uint32_t evidence_mult_mask_;
120 };
121 
122 } // namespace tesseract
123 
124 #endif
125