tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
lm_consistency.h
1 // File: lm_consistency.h
3 // Description: Struct for recording consistency of the paths representing
4 // OCR hypotheses.
5 // Author: Rika Antonova
6 // Created: Mon Jun 20 11:26:43 PST 2012
7 //
8 // (C) Copyright 2012, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
20 
21 #ifndef TESSERACT_WORDREC_LM_CONSISTENCY_H_
22 #define TESSERACT_WORDREC_LM_CONSISTENCY_H_
23 
24 #include <cstdint> // for INT16_MAX
25 #include "dawg.h" // for EDGE_REF, NO_EDGE
26 #include "dict.h" // for XH_GOOD, XH_INCONSISTENT, XHeightConsi...
27 
28 class BLOB_CHOICE;
29 
30 namespace tesseract {
31 
32 static const char * const XHeightConsistencyEnumName[] = {
33  "XH_GOOD",
34  "XH_SUBNORMAL",
35  "XH_INCONSISTENT",
36 };
37 
38 // Struct for keeping track of the consistency of the path.
41 
42  // How much do characters have to be shifted away from normal parameters
43  // before we say they're not normal?
44  static const int kShiftThresh = 1;
45 
46  // How much shifting from subscript to superscript and back
47  // before we declare shenanigans?
48  static const int kMaxEntropy = 1;
49 
50  // Script positions - order important for entropy calculation.
51  static const int kSUB = 0, kNORM = 1, kSUP = 2;
52  static const int kNumPos = 3;
53 
54  explicit LMConsistencyInfo(const LMConsistencyInfo* parent_info) {
55  if (parent_info == nullptr) {
56  // Initialize from scratch.
57  num_alphas = 0;
58  num_digits = 0;
59  num_punc = 0;
60  num_other = 0;
61  chartype = CT_NONE;
62  punc_ref = NO_EDGE;
63  invalid_punc = false;
65  num_lower = 0;
66  script_id = 0;
67  inconsistent_script = false;
69  inconsistent_font = false;
70  // Initialize XHeight stats.
71  for (int i = 0; i < kNumPos; i++) {
72  xht_count[i] = 0;
73  xht_count_punc[i] = 0;
74  xht_lo[i] = 0;
75  xht_hi[i] = 256; // kBlnCellHeight
76  }
77  xht_sp = -1; // This invalid value indicates that there was no parent.
78  xpos_entropy = 0;
80  } else {
81  // Copy parent info
82  *this = *parent_info;
83  }
84  }
85  inline int NumInconsistentPunc() const {
86  return invalid_punc ? num_punc : 0;
87  }
88  inline int NumInconsistentCase() const {
90  }
91  inline int NumInconsistentChartype() const {
92  return (NumInconsistentPunc() + num_other +
94  }
95  inline bool Consistent() const {
96  return (NumInconsistentPunc() == 0 && NumInconsistentCase() == 0 &&
99  }
100  inline int NumInconsistentSpaces() const {
102  }
103  inline int InconsistentXHeight() const {
104  return xht_decision == XH_INCONSISTENT;
105  }
106  void ComputeXheightConsistency(const BLOB_CHOICE *b, bool is_punc);
107  float BodyMinXHeight() const {
108  if (InconsistentXHeight())
109  return 0.0f;
110  return xht_lo[kNORM];
111  }
112  float BodyMaxXHeight() const {
113  if (InconsistentXHeight())
114  return static_cast<float>(INT16_MAX);
115  return xht_hi[kNORM];
116  }
117 
120  int num_punc;
123  EDGE_REF punc_ref;
131  // Metrics clumped by position.
132  float xht_lo[kNumPos];
133  float xht_hi[kNumPos];
134  int16_t xht_count[kNumPos];
136  int16_t xht_sp;
137  int16_t xpos_entropy;
139 };
140 
141 } // namespace tesseract
142 
143 #endif // TESSERACT_WORDREC_LM_CONSISTENCY_H_
void ComputeXheightConsistency(const BLOB_CHOICE *b, bool is_punc)
Definition: lm_consistency.cpp:29
float BodyMaxXHeight() const
Definition: lm_consistency.h:112
float xht_lo[kNumPos]
Definition: lm_consistency.h:132
Definition: lm_consistency.h:40
int16_t xht_sp
Definition: lm_consistency.h:136
static const int kMaxEntropy
Definition: lm_consistency.h:48
bool Consistent() const
Definition: lm_consistency.h:95
float BodyMinXHeight() const
Definition: lm_consistency.h:107
Definition: lm_consistency.h:40
int NumInconsistentChartype() const
Definition: lm_consistency.h:91
int num_inconsistent_spaces
Definition: lm_consistency.h:129
ChartypeEnum
Definition: lm_consistency.h:40
Definition: dict.h:75
bool inconsistent_font
Definition: lm_consistency.h:130
static const int kSUP
Definition: lm_consistency.h:51
Definition: lm_consistency.h:40
static const int kNumPos
Definition: lm_consistency.h:52
int num_punc
Definition: lm_consistency.h:120
XHeightConsistencyEnum xht_decision
Definition: lm_consistency.h:138
Definition: lm_consistency.h:39
Definition: baseapi.cpp:94
static const int kNORM
Definition: lm_consistency.h:51
int num_other
Definition: lm_consistency.h:121
bool inconsistent_script
Definition: lm_consistency.h:128
bool invalid_punc
Definition: lm_consistency.h:124
int16_t xpos_entropy
Definition: lm_consistency.h:137
int num_non_first_upper
Definition: lm_consistency.h:125
static const int kSUB
Definition: lm_consistency.h:51
int NumInconsistentPunc() const
Definition: lm_consistency.h:85
int16_t xht_count_punc[kNumPos]
Definition: lm_consistency.h:135
EDGE_REF punc_ref
Definition: lm_consistency.h:123
int NumInconsistentCase() const
Definition: lm_consistency.h:88
int num_digits
Definition: lm_consistency.h:119
static const int kShiftThresh
Definition: lm_consistency.h:44
int num_alphas
Definition: lm_consistency.h:118
float xht_hi[kNumPos]
Definition: lm_consistency.h:133
int num_lower
Definition: lm_consistency.h:126
int16_t xht_count[kNumPos]
Definition: lm_consistency.h:134
static const char *const XHeightConsistencyEnumName[]
Definition: lm_consistency.h:32
LMConsistencyInfo(const LMConsistencyInfo *parent_info)
Definition: lm_consistency.h:54
int script_id
Definition: lm_consistency.h:127
XHeightConsistencyEnum
Definition: dict.h:75
Definition: lm_consistency.h:40
ChartypeEnum chartype
Definition: lm_consistency.h:122
int NumInconsistentSpaces() const
Definition: lm_consistency.h:100
Definition: dict.h:75
int InconsistentXHeight() const
Definition: lm_consistency.h:103
Definition: ratngs.h:49