tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
intmatcher.h
1 /******************************************************************************
2  ** Filename: intmatcher.h
3  ** Purpose: Interface to high level generic classifier routines.
4  ** Author: Robert Moss
5  ** History: Wed Feb 13 15:24:15 MST 1991, RWM, Created.
6  **
7  ** (c) Copyright Hewlett-Packard Company, 1988.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  ******************************************************************************/
18 #ifndef INTMATCHER_H
19 #define INTMATCHER_H
20 
21 #include "params.h"
22 
23 // Character fragments could be present in the trained templaes
24 // but turned on/off on the language-by-language basis or depending
25 // on particular properties of the corpus (e.g. when we expect the
26 // images to have low exposure).
27 extern BOOL_VAR_H(disable_character_fragments, FALSE,
28  "Do not include character fragments in the"
29  " results of the classifier");
30 
31 extern INT_VAR_H(classify_integer_matcher_multiplier, 10,
32  "Integer Matcher Multiplier 0-255: ");
33 
34 
38 #include "intproto.h"
39 #include "cutoffs.h"
40 
41 namespace tesseract {
42 struct UnicharRating;
43 }
44 
46  CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {}
47 
48  float Rating;
49  CLASS_ID Class;
50 };
51 
52 /*----------------------------------------------------------------------------
53  Variables
54 -----------------------------------------------------------------------------*/
55 
56 extern INT_VAR_H(classify_adapt_proto_thresh, 230,
57  "Threshold for good protos during adaptive 0-255: ");
58 
59 extern INT_VAR_H(classify_adapt_feature_thresh, 230,
60  "Threshold for good features during adaptive 0-255: ");
61 
66 #define SE_TABLE_BITS 9
67 #define SE_TABLE_SIZE 512
68 
70  uint8_t feature_evidence_[MAX_NUM_CONFIGS];
71  int sum_feature_evidence_[MAX_NUM_CONFIGS];
72  uint8_t proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX];
73 
74  void Clear(const INT_CLASS class_template);
75  void ClearFeatureEvidence(const INT_CLASS class_template);
76  void NormalizeSums(INT_CLASS ClassTemplate, int16_t NumFeatures,
77  int32_t used_features);
78  void UpdateSumOfProtoEvidences(
79  INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, int16_t NumFeatures);
80 };
81 
82 
84  public:
85  // Integer Matcher Theta Fudge (0-255).
86  static const int kIntThetaFudge = 128;
87  // Bits in Similarity to Evidence Lookup (8-9).
88  static const int kEvidenceTableBits = 9;
89  // Integer Evidence Truncation Bits (8-14).
90  static const int kIntEvidenceTruncBits = 14;
91  // Similarity to Evidence Table Exponential Multiplier.
92  static const float kSEExponentialMultiplier;
93  // Center of Similarity Curve.
94  static const float kSimilarityCenter;
95 
96  IntegerMatcher(tesseract::IntParam *classify_debug_level);
97 
98  void Match(INT_CLASS ClassTemplate,
99  BIT_VECTOR ProtoMask,
100  BIT_VECTOR ConfigMask,
101  int16_t NumFeatures,
102  const INT_FEATURE_STRUCT* Features,
103  tesseract::UnicharRating* Result,
104  int AdaptFeatureThreshold,
105  int Debug,
106  bool SeparateDebugWindows);
107 
108  // Applies the CN normalization factor to the given rating and returns
109  // the modified rating.
110  float ApplyCNCorrection(float rating, int blob_length,
111  int normalization_factor, int matcher_multiplier);
112 
113  int FindGoodProtos(INT_CLASS ClassTemplate,
114  BIT_VECTOR ProtoMask,
115  BIT_VECTOR ConfigMask,
116  uint16_t BlobLength,
117  int16_t NumFeatures,
118  INT_FEATURE_ARRAY Features,
119  PROTO_ID *ProtoArray,
120  int AdaptProtoThreshold,
121  int Debug);
122 
123  int FindBadFeatures(INT_CLASS ClassTemplate,
124  BIT_VECTOR ProtoMask,
125  BIT_VECTOR ConfigMask,
126  uint16_t BlobLength,
127  int16_t NumFeatures,
128  INT_FEATURE_ARRAY Features,
129  FEATURE_ID *FeatureArray,
130  int AdaptFeatureThreshold,
131  int Debug);
132 
133  private:
134  int UpdateTablesForFeature(
135  INT_CLASS ClassTemplate,
136  BIT_VECTOR ProtoMask,
137  BIT_VECTOR ConfigMask,
138  int FeatureNum,
139  const INT_FEATURE_STRUCT* Feature,
140  ScratchEvidence *evidence,
141  int Debug);
142 
143  int FindBestMatch(INT_CLASS ClassTemplate,
144  const ScratchEvidence &tables,
145  tesseract::UnicharRating* Result);
146 
147 #ifndef GRAPHICS_DISABLED
148  void DebugFeatureProtoError(
149  INT_CLASS ClassTemplate,
150  BIT_VECTOR ProtoMask,
151  BIT_VECTOR ConfigMask,
152  const ScratchEvidence &tables,
153  int16_t NumFeatures,
154  int Debug);
155 
156  void DisplayProtoDebugInfo(
157  INT_CLASS ClassTemplate,
158  BIT_VECTOR ProtoMask,
159  BIT_VECTOR ConfigMask,
160  const ScratchEvidence &tables,
161  bool SeparateDebugWindows);
162 
163  void DisplayFeatureDebugInfo(
164  INT_CLASS ClassTemplate,
165  BIT_VECTOR ProtoMask,
166  BIT_VECTOR ConfigMask,
167  int16_t NumFeatures,
168  const INT_FEATURE_STRUCT* Features,
169  int AdaptFeatureThreshold,
170  int Debug,
171  bool SeparateDebugWindows);
172 #endif
173 
174  private:
176  uint8_t similarity_evidence_table_[SE_TABLE_SIZE];
181 };
182 
186 void IMDebugConfiguration(INT_FEATURE FeatureNum,
187  uint16_t ActualProtoNum,
188  uint8_t Evidence,
189  BIT_VECTOR ConfigMask,
190  uint32_t ConfigWord);
191 
192 void IMDebugConfigurationSum(INT_FEATURE FeatureNum,
193  uint8_t *FeatureEvidence,
194  int32_t ConfigCount);
195 
196 void HeapSort (int n, int ra[], int rb[]);
197 
201 #endif
uint32_t mult_trunc_shift_bits_
Definition: intmatcher.h:178
uint32_t evidence_mult_mask_
Definition: intmatcher.h:180
Definition: intmatcher.h:83
uint32_t evidence_table_mask_
Definition: intmatcher.h:177
Definition: baseapi.cpp:94
uint32_t table_trunc_shift_bits_
Definition: intmatcher.h:179
Definition: intproto.h:105
Definition: params.h:142
CLASS_ID Class
Definition: intmatcher.h:49
Definition: intmatcher.h:69
tesseract::IntParam * classify_debug_level_
Definition: intmatcher.h:175
float Rating
Definition: intmatcher.h:48
static const float kSimilarityCenter
Definition: intmatcher.h:94
CP_RESULT_STRUCT()
Definition: intmatcher.h:46
Definition: intproto.h:132
Definition: shapetable.h:41
Definition: intmatcher.h:45
static const float kSEExponentialMultiplier
Definition: intmatcher.h:92