1 /****************************************************************************** 2 ** Filename: intmatcher.h 3 ** Purpose: Interface to high level generic classifier routines. 4 ** Author: Robert Moss 5 ** 6 ** (c) Copyright Hewlett-Packard Company, 1988. 7 ** Licensed under the Apache License, Version 2.0 (the "License"); 8 ** you may not use this file except in compliance with the License. 9 ** You may obtain a copy of the License at 10 ** http://www.apache.org/licenses/LICENSE-2.0 11 ** Unless required by applicable law or agreed to in writing, software 12 ** distributed under the License is distributed on an "AS IS" BASIS, 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 ** See the License for the specific language governing permissions and 15 ** limitations under the License. 16 ******************************************************************************/ 17 #ifndef INTMATCHER_H 18 #define INTMATCHER_H 19 20 #include "intproto.h" 21 #include "params.h" 22 23 namespace tesseract { 24 25 // Character fragments could be present in the trained templaes 26 // but turned on/off on the language-by-language basis or depending 27 // on particular properties of the corpus (e.g. when we expect the 28 // images to have low exposure). 29 extern BOOL_VAR_H(disable_character_fragments); 30 31 extern INT_VAR_H(classify_integer_matcher_multiplier); 32 33 struct UnicharRating; 34 35 struct CP_RESULT_STRUCT { CP_RESULT_STRUCTCP_RESULT_STRUCT36 CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {} 37 38 float Rating; 39 CLASS_ID Class; 40 }; 41 42 /**---------------------------------------------------------------------------- 43 Public Function Prototypes 44 ----------------------------------------------------------------------------**/ 45 46 #define SE_TABLE_BITS 9 47 #define SE_TABLE_SIZE 512 48 49 struct ScratchEvidence { 50 uint8_t feature_evidence_[MAX_NUM_CONFIGS]; 51 int sum_feature_evidence_[MAX_NUM_CONFIGS]; 52 uint8_t proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]; 53 54 void Clear(const INT_CLASS_STRUCT *class_template); 55 void ClearFeatureEvidence(const INT_CLASS_STRUCT *class_template); 56 void NormalizeSums(INT_CLASS_STRUCT *ClassTemplate, int16_t NumFeatures); 57 void UpdateSumOfProtoEvidences(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ConfigMask); 58 }; 59 60 class IntegerMatcher { 61 public: 62 // Integer Matcher Theta Fudge (0-255). 63 static const int kIntThetaFudge = 128; 64 // Bits in Similarity to Evidence Lookup (8-9). 65 static const int kEvidenceTableBits = 9; 66 // Integer Evidence Truncation Bits (8-14). 67 static const int kIntEvidenceTruncBits = 14; 68 // Similarity to Evidence Table Exponential Multiplier. 69 static const float kSEExponentialMultiplier; 70 // Center of Similarity Curve. 71 static const float kSimilarityCenter; 72 73 IntegerMatcher(tesseract::IntParam *classify_debug_level); 74 75 void Match(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, 76 int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, 77 tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, 78 bool SeparateDebugWindows); 79 80 // Applies the CN normalization factor to the given rating and returns 81 // the modified rating. 82 float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, 83 int matcher_multiplier); 84 85 int FindGoodProtos(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, 86 int16_t NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, 87 int AdaptProtoThreshold, int Debug); 88 89 int FindBadFeatures(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, 90 int16_t NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, 91 int AdaptFeatureThreshold, int Debug); 92 93 private: 94 int UpdateTablesForFeature(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, 95 int FeatureNum, const INT_FEATURE_STRUCT *Feature, 96 ScratchEvidence *evidence, int Debug); 97 98 int FindBestMatch(INT_CLASS_STRUCT *ClassTemplate, const ScratchEvidence &tables, 99 tesseract::UnicharRating *Result); 100 101 #ifndef GRAPHICS_DISABLED 102 void DebugFeatureProtoError(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, 103 const ScratchEvidence &tables, int16_t NumFeatures, int Debug); 104 105 void DisplayProtoDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ConfigMask, 106 const ScratchEvidence &tables, bool SeparateDebugWindows); 107 108 void DisplayFeatureDebugInfo(INT_CLASS_STRUCT *ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, 109 int16_t NumFeatures, const INT_FEATURE_STRUCT *Features, 110 int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows); 111 #endif 112 113 private: 114 tesseract::IntParam *classify_debug_level_; 115 uint8_t similarity_evidence_table_[SE_TABLE_SIZE]; 116 uint32_t evidence_table_mask_; 117 uint32_t mult_trunc_shift_bits_; 118 uint32_t table_trunc_shift_bits_; 119 uint32_t evidence_mult_mask_; 120 }; 121 122 } // namespace tesseract 123 124 #endif 125