tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
ratngs.h
1 /**********************************************************************
2  * File: ratngs.h (Formerly ratings.h)
3  * Description: Definition of the WERD_CHOICE and BLOB_CHOICE classes.
4  * Author: Ray Smith
5  * Created: Thu Apr 23 11:40:38 BST 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifndef RATNGS_H
21 #define RATNGS_H
22 
23 #include <cassert>
24 #include <cfloat> // for FLT_MAX
25 
26 #include "clst.h"
27 #include "elst.h"
28 #include "fontinfo.h"
29 #include "genericvector.h"
30 #include "matrix.h"
31 #include "unichar.h"
32 #include "unicharset.h"
33 #include "werd.h"
34 
35 class MATRIX;
36 struct TBLOB;
37 struct TWERD;
38 
39 // Enum to describe the source of a BLOB_CHOICE to make it possible to determine
40 // whether a blob has been classified by inspecting the BLOB_CHOICEs.
41 enum BlobChoiceClassifier {
42  BCC_STATIC_CLASSIFIER, // From the char_norm classifier.
43  BCC_ADAPTED_CLASSIFIER, // From the adaptive classifier.
44  BCC_SPECKLE_CLASSIFIER, // Backup for failed classification.
45  BCC_AMBIG, // Generated by ambiguity detection.
46  BCC_FAKE, // From some other process.
47 };
48 
49 class BLOB_CHOICE: public ELIST_LINK
50 {
51  public:
53  unichar_id_ = UNICHAR_SPACE;
54  fontinfo_id_ = -1;
55  fontinfo_id2_ = -1;
56  rating_ = 10.0;
57  certainty_ = -1.0;
58  script_id_ = -1;
59  xgap_before_ = 0;
60  xgap_after_ = 0;
61  min_xheight_ = 0.0f;
62  max_xheight_ = 0.0f;
63  yshift_ = 0.0f;
64  classifier_ = BCC_FAKE;
65  }
66  BLOB_CHOICE(UNICHAR_ID src_unichar_id, // character id
67  float src_rating, // rating
68  float src_cert, // certainty
69  int script_id, // script
70  float min_xheight, // min xheight in image pixel units
71  float max_xheight, // max xheight allowed by this char
72  float yshift, // the larger of y shift (top or bottom)
73  BlobChoiceClassifier c); // adapted match or other
74  BLOB_CHOICE(const BLOB_CHOICE &other);
75  ~BLOB_CHOICE() = default;
76 
77  UNICHAR_ID unichar_id() const {
78  return unichar_id_;
79  }
80  float rating() const {
81  return rating_;
82  }
83  float certainty() const {
84  return certainty_;
85  }
86  int16_t fontinfo_id() const {
87  return fontinfo_id_;
88  }
89  int16_t fontinfo_id2() const {
90  return fontinfo_id2_;
91  }
93  return fonts_;
94  }
96  fonts_ = fonts;
97  int score1 = 0, score2 = 0;
98  fontinfo_id_ = -1;
99  fontinfo_id2_ = -1;
100  for (int f = 0; f < fonts_.size(); ++f) {
101  if (fonts_[f].score > score1) {
102  score2 = score1;
104  score1 = fonts_[f].score;
105  fontinfo_id_ = fonts_[f].fontinfo_id;
106  } else if (fonts_[f].score > score2) {
107  score2 = fonts_[f].score;
108  fontinfo_id2_ = fonts_[f].fontinfo_id;
109  }
110  }
111  }
112  int script_id() const {
113  return script_id_;
114  }
116  return matrix_cell_;
117  }
118  int16_t xgap_before() const {
119  return xgap_before_;
120  }
121  int16_t xgap_after() const {
122  return xgap_after_;
123  }
124  float min_xheight() const {
125  return min_xheight_;
126  }
127  float max_xheight() const {
128  return max_xheight_;
129  }
130  float yshift() const {
131  return yshift_;
132  }
133  BlobChoiceClassifier classifier() const {
134  return classifier_;
135  }
136  bool IsAdapted() const {
137  return classifier_ == BCC_ADAPTED_CLASSIFIER;
138  }
139  bool IsClassified() const {
140  return classifier_ == BCC_STATIC_CLASSIFIER ||
141  classifier_ == BCC_ADAPTED_CLASSIFIER ||
142  classifier_ == BCC_SPECKLE_CLASSIFIER;
143  }
144 
145  void set_unichar_id(UNICHAR_ID newunichar_id) {
146  unichar_id_ = newunichar_id;
147  }
148  void set_rating(float newrat) {
149  rating_ = newrat;
150  }
151  void set_certainty(float newrat) {
152  certainty_ = newrat;
153  }
154  void set_script(int newscript_id) {
155  script_id_ = newscript_id;
156  }
157  void set_matrix_cell(int col, int row) {
158  matrix_cell_.col = col;
159  matrix_cell_.row = row;
160  }
161  void set_xgap_before(int16_t gap) {
162  xgap_before_ = gap;
163  }
164  void set_xgap_after(int16_t gap) {
165  xgap_after_ = gap;
166  }
167  void set_classifier(BlobChoiceClassifier classifier) {
169  }
170  static BLOB_CHOICE* deep_copy(const BLOB_CHOICE* src) {
171  BLOB_CHOICE* choice = new BLOB_CHOICE;
172  *choice = *src;
173  return choice;
174  }
175  // Returns true if *this and other agree on the baseline and x-height
176  // to within some tolerance based on a given estimate of the x-height.
177  bool PosAndSizeAgree(const BLOB_CHOICE& other, float x_height,
178  bool debug) const;
179 
180  void print(const UNICHARSET *unicharset) const {
181  tprintf("r%.2f c%.2f x[%g,%g]: %d %s",
184  (unicharset == nullptr) ? "" :
185  unicharset->debug_str(unichar_id_).string());
186  }
187  void print_full() const {
188  print(nullptr);
189  tprintf(" script=%d, font1=%d, font2=%d, yshift=%g, classifier=%d\n",
191  }
192  // Sort function for sorting BLOB_CHOICEs in increasing order of rating.
193  static int SortByRating(const void *p1, const void *p2) {
194  const BLOB_CHOICE *bc1 = *static_cast<const BLOB_CHOICE *const *>(p1);
195  const BLOB_CHOICE *bc2 = *static_cast<const BLOB_CHOICE *const *>(p2);
196  return (bc1->rating_ < bc2->rating_) ? -1 : 1;
197  }
198 
199  private:
200  // Copy assignment operator.
201  BLOB_CHOICE& operator=(const BLOB_CHOICE& other);
202 
203  UNICHAR_ID unichar_id_; // unichar id
204  // Fonts and scores. Allowed to be empty.
206  int16_t fontinfo_id_; // char font information
207  int16_t fontinfo_id2_; // 2nd choice font information
208  // Rating is the classifier distance weighted by the length of the outline
209  // in the blob. In terms of probability, classifier distance is -klog p such
210  // that the resulting distance is in the range [0, 1] and then
211  // rating = w (-k log p) where w is the weight for the length of the outline.
212  // Sums of ratings may be compared meaningfully for words of different
213  // segmentation.
214  float rating_; // size related
215  // Certainty is a number in [-20, 0] indicating the classifier certainty
216  // of the choice. In terms of probability, certainty = 20 (k log p) where
217  // k is defined as above to normalize -klog p to the range [0, 1].
218  float certainty_; // absolute
220  // Holds the position of this choice in the ratings matrix.
221  // Used to location position in the matrix during path backtracking.
223  int16_t xgap_before_;
224  int16_t xgap_after_;
225  // X-height range (in image pixels) that this classification supports.
228  // yshift_ - The vertical distance (in image pixels) the character is
229  // shifted (up or down) from an acceptable y position.
230  float yshift_;
231  BlobChoiceClassifier classifier_; // What generated *this.
232 };
233 
234 // Make BLOB_CHOICE listable.
235 ELISTIZEH(BLOB_CHOICE)
236 
237 // Return the BLOB_CHOICE in bc_list matching a given unichar_id,
238 // or nullptr if there is no match.
239 BLOB_CHOICE *FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list);
240 
241 // Permuter codes used in WERD_CHOICEs.
242 enum PermuterType {
243  NO_PERM, // 0
244  PUNC_PERM, // 1
245  TOP_CHOICE_PERM, // 2
246  LOWER_CASE_PERM, // 3
247  UPPER_CASE_PERM, // 4
248  NGRAM_PERM, // 5
249  NUMBER_PERM, // 6
250  USER_PATTERN_PERM, // 7
251  SYSTEM_DAWG_PERM, // 8
252  DOC_DAWG_PERM, // 9
253  USER_DAWG_PERM, // 10
254  FREQ_DAWG_PERM, // 11
255  COMPOUND_PERM, // 12
256 
257  NUM_PERMUTER_TYPES
258 };
259 
260 namespace tesseract {
261 // ScriptPos tells whether a character is subscript, superscript or normal.
262 enum ScriptPos {
267 };
268 
269 const char *ScriptPosToString(tesseract::ScriptPos script_pos);
270 
271 } // namespace tesseract.
272 
273 class WERD_CHOICE : public ELIST_LINK {
274  public:
275  static const float kBadRating;
276  static const char *permuter_name(uint8_t permuter);
277 
278  WERD_CHOICE(const UNICHARSET *unicharset)
279  : unicharset_(unicharset) { this->init(8); }
280  WERD_CHOICE(const UNICHARSET *unicharset, int reserved)
281  : unicharset_(unicharset) { this->init(reserved); }
282  WERD_CHOICE(const char *src_string,
283  const char *src_lengths,
284  float src_rating,
285  float src_certainty,
286  uint8_t src_permuter,
287  const UNICHARSET &unicharset)
288  : unicharset_(&unicharset) {
289  this->init(src_string, src_lengths, src_rating,
290  src_certainty, src_permuter);
291  }
292  WERD_CHOICE(const char *src_string, const UNICHARSET &unicharset);
294  : ELIST_LINK(word), unicharset_(word.unicharset_) {
295  this->init(word.length());
296  this->operator=(word);
297  }
298  ~WERD_CHOICE();
299 
300  const UNICHARSET *unicharset() const {
301  return unicharset_;
302  }
303  inline int length() const {
304  return length_;
305  }
306  float adjust_factor() const {
307  return adjust_factor_;
308  }
309  void set_adjust_factor(float factor) {
310  adjust_factor_ = factor;
311  }
312  inline const UNICHAR_ID *unichar_ids() const {
313  return unichar_ids_;
314  }
315  inline UNICHAR_ID unichar_id(int index) const {
316  assert(index < length_);
317  return unichar_ids_[index];
318  }
319  inline int state(int index) const {
320  return state_[index];
321  }
323  if (index < 0 || index >= length_)
324  return tesseract::SP_NORMAL;
325  return script_pos_[index];
326  }
327  inline float rating() const {
328  return rating_;
329  }
330  inline float certainty() const {
331  return certainty_;
332  }
333  inline float certainty(int index) const {
334  return certainties_[index];
335  }
336  inline float min_x_height() const {
337  return min_x_height_;
338  }
339  inline float max_x_height() const {
340  return max_x_height_;
341  }
342  inline void set_x_heights(float min_height, float max_height) {
343  min_x_height_ = min_height;
344  max_x_height_ = max_height;
345  }
346  inline uint8_t permuter() const {
347  return permuter_;
348  }
349  const char *permuter_name() const;
350  // Returns the BLOB_CHOICE_LIST corresponding to the given index in the word,
351  // taken from the appropriate cell in the ratings MATRIX.
352  // Borrowed pointer, so do not delete.
353  BLOB_CHOICE_LIST* blob_choices(int index, MATRIX* ratings) const;
354 
355  // Returns the MATRIX_COORD corresponding to the location in the ratings
356  // MATRIX for the given index into the word.
357  MATRIX_COORD MatrixCoord(int index) const;
358 
359  inline void set_unichar_id(UNICHAR_ID unichar_id, int index) {
360  assert(index < length_);
361  unichar_ids_[index] = unichar_id;
362  }
363  bool dangerous_ambig_found() const {
364  return dangerous_ambig_found_;
365  }
366  void set_dangerous_ambig_found_(bool value) {
367  dangerous_ambig_found_ = value;
368  }
369  inline void set_rating(float new_val) {
370  rating_ = new_val;
371  }
372  inline void set_certainty(float new_val) {
373  certainty_ = new_val;
374  }
375  inline void set_permuter(uint8_t perm) {
376  permuter_ = perm;
377  }
378  // Note: this function should only be used if all the fields
379  // are populated manually with set_* functions (rather than
380  // (copy)constructors and append_* functions).
381  inline void set_length(int len) {
382  ASSERT_HOST(reserved_ >= len);
383  length_ = len;
384  }
385 
387  inline void double_the_size() {
388  if (reserved_ > 0) {
390  reserved_, unichar_ids_);
392  reserved_, script_pos_);
394  reserved_, state_);
396  reserved_, certainties_);
397  reserved_ *= 2;
398  } else {
399  unichar_ids_ = new UNICHAR_ID[1];
400  script_pos_ = new tesseract::ScriptPos[1];
401  state_ = new int[1];
402  certainties_ = new float[1];
403  reserved_ = 1;
404  }
405  }
406 
409  inline void init(int reserved) {
410  reserved_ = reserved;
411  if (reserved > 0) {
412  unichar_ids_ = new UNICHAR_ID[reserved];
413  script_pos_ = new tesseract::ScriptPos[reserved];
414  state_ = new int[reserved];
415  certainties_ = new float[reserved];
416  } else {
417  unichar_ids_ = nullptr;
418  script_pos_ = nullptr;
419  state_ = nullptr;
420  certainties_ = nullptr;
421  }
422  length_ = 0;
423  adjust_factor_ = 1.0f;
424  rating_ = 0.0;
425  certainty_ = FLT_MAX;
426  min_x_height_ = 0.0f;
427  max_x_height_ = FLT_MAX;
428  permuter_ = NO_PERM;
429  unichars_in_script_order_ = false; // Tesseract is strict left-to-right.
430  dangerous_ambig_found_ = false;
431  }
432 
438  void init(const char *src_string, const char *src_lengths,
439  float src_rating, float src_certainty,
440  uint8_t src_permuter);
441 
443  inline void make_bad() {
444  length_ = 0;
445  rating_ = kBadRating;
446  certainty_ = -FLT_MAX;
447  }
448 
453  UNICHAR_ID unichar_id, int blob_count,
454  float rating, float certainty) {
455  assert(reserved_ > length_);
456  length_++;
457  this->set_unichar_id(unichar_id, blob_count,
458  rating, certainty, length_-1);
459  }
460 
461  void append_unichar_id(UNICHAR_ID unichar_id, int blob_count,
462  float rating, float certainty);
463 
464  inline void set_unichar_id(UNICHAR_ID unichar_id, int blob_count,
465  float rating, float certainty, int index) {
466  assert(index < length_);
467  unichar_ids_[index] = unichar_id;
468  state_[index] = blob_count;
469  certainties_[index] = certainty;
470  script_pos_[index] = tesseract::SP_NORMAL;
471  rating_ += rating;
472  if (certainty < certainty_) {
474  }
475  }
476  // Sets the entries for the given index from the BLOB_CHOICE, assuming
477  // unit fragment lengths, but setting the state for this index to blob_count.
478  void set_blob_choice(int index, int blob_count,
479  const BLOB_CHOICE* blob_choice);
480 
481  bool contains_unichar_id(UNICHAR_ID unichar_id) const;
482  void remove_unichar_ids(int index, int num);
483  inline void remove_last_unichar_id() { --length_; }
484  inline void remove_unichar_id(int index) {
485  this->remove_unichar_ids(index, 1);
486  }
487  bool has_rtl_unichar_id() const;
488  void reverse_and_mirror_unichar_ids();
489 
490  // Returns the half-open interval of unichar_id indices [start, end) which
491  // enclose the core portion of this word -- the part after stripping
492  // punctuation from the left and right.
493  void punct_stripped(int *start_core, int *end_core) const;
494 
495  // Returns the indices [start, end) containing the core of the word, stripped
496  // of any superscript digits on either side. (i.e., the non-footnote part
497  // of the word). There is no guarantee that the output range is non-empty.
498  void GetNonSuperscriptSpan(int *start, int *end) const;
499 
500  // Return a copy of this WERD_CHOICE with the choices [start, end).
501  // The result is useful only for checking against a dictionary.
502  WERD_CHOICE shallow_copy(int start, int end) const;
503 
504  void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const;
505  const STRING debug_string() const {
506  STRING word_str;
507  for (int i = 0; i < length_; ++i) {
508  word_str += unicharset_->debug_str(unichar_ids_[i]);
509  word_str += " ";
510  }
511  return word_str;
512  }
513  // Returns true if any unichar_id in the word is a non-space-delimited char.
515  for (int i = 0; i < length_; ++i) {
516  if (!unicharset_->IsSpaceDelimited(unichar_ids_[i])) return true;
517  }
518  return false;
519  }
520  // Returns true if the word is all spaces.
521  bool IsAllSpaces() const {
522  for (int i = 0; i < length_; ++i) {
523  if (unichar_ids_[i] != UNICHAR_SPACE) return false;
524  }
525  return true;
526  }
527 
528  // Call this to override the default (strict left to right graphemes)
529  // with the fact that some engine produces a "reading order" set of
530  // Graphemes for each word.
531  bool set_unichars_in_script_order(bool in_script_order) {
532  return unichars_in_script_order_ = in_script_order;
533  }
534 
536  return unichars_in_script_order_;
537  }
538 
539  // Returns a UTF-8 string equivalent to the current choice
540  // of UNICHAR IDs.
541  const STRING &unichar_string() const {
542  this->string_and_lengths(&unichar_string_, &unichar_lengths_);
543  return unichar_string_;
544  }
545 
546  // Returns the lengths, one byte each, representing the number of bytes
547  // required in the unichar_string for each UNICHAR_ID.
548  const STRING &unichar_lengths() const {
549  this->string_and_lengths(&unichar_string_, &unichar_lengths_);
550  return unichar_lengths_;
551  }
552 
553  // Sets up the script_pos_ member using the blobs_list to get the bln
554  // bounding boxes, *this to get the unichars, and this->unicharset
555  // to get the target positions. If small_caps is true, sub/super are not
556  // considered, but dropcaps are.
557  // NOTE: blobs_list should be the chopped_word blobs. (Fully segemented.)
558  void SetScriptPositions(bool small_caps, TWERD* word, int debug = 0);
559  // Sets the script_pos_ member from some source positions with a given length.
560  void SetScriptPositions(const tesseract::ScriptPos* positions, int length);
561  // Sets all the script_pos_ positions to the given position.
562  void SetAllScriptPositions(tesseract::ScriptPos position);
563 
564  static tesseract::ScriptPos ScriptPositionOf(bool print_debug,
565  const UNICHARSET& unicharset,
566  const TBOX& blob_box,
567  UNICHAR_ID unichar_id);
568 
569  // Returns the "dominant" script ID for the word. By "dominant", the script
570  // must account for at least half the characters. Otherwise, it returns 0.
571  // Note that for Japanese, Hiragana and Katakana are simply treated as Han.
572  int GetTopScriptID() const;
573 
574  // Fixes the state_ for a chop at the given blob_posiiton.
575  void UpdateStateForSplit(int blob_position);
576 
577  // Returns the sum of all the state elements, being the total number of blobs.
578  int TotalOfStates() const;
579 
580  void print() const { this->print(""); }
581  void print(const char *msg) const;
582  // Prints the segmentation state with an introductory message.
583  void print_state(const char *msg) const;
584 
585  // Displays the segmentation state of *this (if not the same as the last
586  // one displayed) and waits for a click in the window.
587  void DisplaySegmentation(TWERD* word);
588 
589  WERD_CHOICE& operator+= ( // concatanate
590  const WERD_CHOICE & second);// second on first
591 
592  WERD_CHOICE& operator= (const WERD_CHOICE& source);
593 
594  private:
596  // TODO(rays) Perhaps replace the multiple arrays with an array of structs?
597  // unichar_ids_ is an array of classifier "results" that make up a word.
598  // For each unichar_ids_[i], script_pos_[i] has the sub/super/normal position
599  // of each unichar_id.
600  // state_[i] indicates the number of blobs in WERD_RES::chopped_word that
601  // were put together to make the classification results in the ith position
602  // in unichar_ids_, and certainties_[i] is the certainty of the choice that
603  // was used in this word.
604  // == Change from before ==
605  // Previously there was fragment_lengths_ that allowed a word to be
606  // artificially composed of multiple fragment results. Since the new
607  // segmentation search doesn't do fragments, treatment of fragments has
608  // been moved to a lower level, augmenting the ratings matrix with the
609  // combined fragments, and allowing the language-model/segmentation-search
610  // to deal with only the combined unichar_ids.
611  UNICHAR_ID *unichar_ids_; // unichar ids that represent the text of the word
612  tesseract::ScriptPos* script_pos_; // Normal/Sub/Superscript of each unichar.
613  int* state_; // Number of blobs in each unichar.
614  float* certainties_; // Certainty of each unichar.
615  int reserved_; // size of the above arrays
616  int length_; // word length
617  // Factor that was used to adjust the rating.
619  // Rating is the sum of the ratings of the individual blobs in the word.
620  float rating_; // size related
621  // certainty is the min (worst) certainty of the individual blobs in the word.
622  float certainty_; // absolute
623  // xheight computed from the result, or 0 if inconsistent.
626  uint8_t permuter_; // permuter code
627 
628  // Normally, the ratings_ matrix represents the recognition results in order
629  // from left-to-right. However, some engines (say Cube) may return
630  // recognition results in the order of the script's major reading direction
631  // (for Arabic, that is right-to-left).
633  // True if NoDangerousAmbig found an ambiguity.
635 
636  // The following variables are populated and passed by reference any
637  // time unichar_string() or unichar_lengths() are called.
640 };
641 
642 // Make WERD_CHOICE listable.
643 ELISTIZEH(WERD_CHOICE)
645 
646 // Utilities for comparing WERD_CHOICEs
647 
648 bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1,
649  const WERD_CHOICE &word2);
650 
651 // Utilities for debug printing.
652 void print_ratings_list(
653  const char *msg, // intro message
654  BLOB_CHOICE_LIST *ratings, // list of results
655  const UNICHARSET &current_unicharset // unicharset that can be used
656  // for id-to-unichar conversion
657  );
658 
659 #endif
BlobChoiceClassifier classifier() const
Definition: ratngs.h:133
float rating_
Definition: ratngs.h:620
void set_xgap_after(int16_t gap)
Definition: ratngs.h:164
float yshift_
Definition: ratngs.h:230
bool unichars_in_script_order() const
Definition: ratngs.h:535
void set_fonts(const GenericVector< tesseract::ScoredFont > &fonts)
Definition: ratngs.h:95
float rating_
Definition: ratngs.h:214
UNICHAR_ID * unichar_ids_
Definition: ratngs.h:611
STRING unichar_lengths_
Definition: ratngs.h:639
const MATRIX_COORD & matrix_cell()
Definition: ratngs.h:115
int length_
Definition: ratngs.h:616
float yshift() const
Definition: ratngs.h:130
float certainty() const
Definition: ratngs.h:83
void set_rating(float newrat)
Definition: ratngs.h:148
int col
Definition: matrix.h:633
float min_xheight() const
Definition: ratngs.h:124
Definition: rect.h:34
int16_t xgap_before_
Definition: ratngs.h:223
Definition: unicharset.h:146
const UNICHAR_ID * unichar_ids() const
Definition: ratngs.h:312
static T * double_the_size_memcpy(int current_size, T *data)
Definition: genericvector.h:207
bool IsClassified() const
Definition: ratngs.h:139
BlobChoiceClassifier classifier_
Definition: ratngs.h:231
const UNICHARSET * unicharset() const
Definition: ratngs.h:300
bool IsAllSpaces() const
Definition: ratngs.h:521
GenericVector< tesseract::ScoredFont > fonts_
Definition: ratngs.h:205
Definition: matrix.h:575
float * certainties_
Definition: ratngs.h:614
Definition: ratngs.h:266
int script_id() const
Definition: ratngs.h:112
void set_dangerous_ambig_found_(bool value)
Definition: ratngs.h:366
tesseract::ScriptPos * script_pos_
Definition: ratngs.h:612
static BLOB_CHOICE * deep_copy(const BLOB_CHOICE *src)
Definition: ratngs.h:170
Definition: ratngs.h:265
float min_x_height() const
Definition: ratngs.h:336
Definition: baseapi.cpp:94
void append_unichar_id_space_allocated(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
Definition: ratngs.h:452
bool IsAdapted() const
Definition: ratngs.h:136
Definition: ratngs.h:273
STRING unichar_string_
Definition: ratngs.h:638
float certainty(int index) const
Definition: ratngs.h:333
int script_id_
Definition: ratngs.h:219
UNICHAR_ID unichar_id_
Definition: ratngs.h:203
UNICHAR_ID unichar_id() const
Definition: ratngs.h:77
const STRING & unichar_lengths() const
Definition: ratngs.h:548
float rating() const
Definition: ratngs.h:80
const char * ScriptPosToString(enum ScriptPos script_pos)
Definition: ratngs.cpp:200
WERD_CHOICE(const UNICHARSET *unicharset)
Definition: ratngs.h:278
uint8_t permuter() const
Definition: ratngs.h:346
float adjust_factor_
Definition: ratngs.h:618
int length() const
Definition: ratngs.h:303
const char * string() const
Definition: strngs.cpp:196
ScriptPos
Definition: ratngs.h:262
Definition: blobs.h:402
void set_classifier(BlobChoiceClassifier classifier)
Definition: ratngs.h:167
WERD_CHOICE(const UNICHARSET *unicharset, int reserved)
Definition: ratngs.h:280
bool dangerous_ambig_found() const
Definition: ratngs.h:363
bool ContainsAnyNonSpaceDelimited() const
Definition: ratngs.h:514
float certainty_
Definition: ratngs.h:218
void set_certainty(float new_val)
Definition: ratngs.h:372
int16_t fontinfo_id_
Definition: ratngs.h:206
void remove_last_unichar_id()
Definition: ratngs.h:483
tesseract::ScriptPos BlobPosition(int index) const
Definition: ratngs.h:322
void set_xgap_before(int16_t gap)
Definition: ratngs.h:161
float certainty_
Definition: ratngs.h:622
Definition: ratngs.h:264
void set_unichar_id(UNICHAR_ID newunichar_id)
Definition: ratngs.h:145
Definition: strngs.h:45
const UNICHARSET * unicharset_
Definition: ratngs.h:595
void print_full() const
Definition: ratngs.h:187
BLOB_CHOICE()
Definition: ratngs.h:52
void remove_unichar_id(int index)
Definition: ratngs.h:484
WERD_CHOICE(const WERD_CHOICE &word)
Definition: ratngs.h:293
const GenericVector< tesseract::ScoredFont > & fonts() const
Definition: ratngs.h:92
int row
Definition: matrix.h:634
void set_x_heights(float min_height, float max_height)
Definition: ratngs.h:342
void print(const UNICHARSET *unicharset) const
Definition: ratngs.h:180
const STRING debug_string() const
Definition: ratngs.h:505
int16_t fontinfo_id() const
Definition: ratngs.h:86
int size() const
Definition: genericvector.h:71
int16_t xgap_after() const
Definition: ratngs.h:121
float min_x_height_
Definition: ratngs.h:624
void set_script(int newscript_id)
Definition: ratngs.h:154
STRING debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:342
int16_t xgap_after_
Definition: ratngs.h:224
float max_xheight() const
Definition: ratngs.h:127
int * state_
Definition: ratngs.h:613
float max_xheight_
Definition: ratngs.h:227
void set_rating(float new_val)
Definition: ratngs.h:369
void init(int reserved)
Definition: ratngs.h:409
~BLOB_CHOICE()=default
int16_t fontinfo_id2_
Definition: ratngs.h:207
void set_matrix_cell(int col, int row)
Definition: ratngs.h:157
void set_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty, int index)
Definition: ratngs.h:464
void double_the_size()
Make more space in unichar_id_ and fragment_lengths_ arrays.
Definition: ratngs.h:387
Definition: ratngs.h:263
bool unichars_in_script_order_
Definition: ratngs.h:632
int16_t fontinfo_id2() const
Definition: ratngs.h:89
Definition: matrix.h:605
BLOB_CHOICE & operator=(const BLOB_CHOICE &other)
Definition: ratngs.cpp:133
Definition: blobs.h:268
void set_certainty(float newrat)
Definition: ratngs.h:151
void print() const
Definition: ratngs.h:580
void make_bad()
Set the fields in this choice to be default (bad) values.
Definition: ratngs.h:443
bool dangerous_ambig_found_
Definition: ratngs.h:634
void set_length(int len)
Definition: ratngs.h:381
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:315
const STRING & unichar_string() const
Definition: ratngs.h:541
WERD_CHOICE(const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uint8_t src_permuter, const UNICHARSET &unicharset)
Definition: ratngs.h:282
int16_t xgap_before() const
Definition: ratngs.h:118
float adjust_factor() const
Definition: ratngs.h:306
float certainty() const
Definition: ratngs.h:330
int state(int index) const
Definition: ratngs.h:319
float max_x_height() const
Definition: ratngs.h:339
void set_adjust_factor(float factor)
Definition: ratngs.h:309
void set_permuter(uint8_t perm)
Definition: ratngs.h:375
float max_x_height_
Definition: ratngs.h:625
MATRIX_COORD matrix_cell_
Definition: ratngs.h:222
static const float kBadRating
Definition: ratngs.h:275
void set_unichar_id(UNICHAR_ID unichar_id, int index)
Definition: ratngs.h:359
float rating() const
Definition: ratngs.h:327
float min_xheight_
Definition: ratngs.h:226
uint8_t permuter_
Definition: ratngs.h:626
static int SortByRating(const void *p1, const void *p2)
Definition: ratngs.h:193
bool PosAndSizeAgree(const BLOB_CHOICE &other, float x_height, bool debug) const
Definition: ratngs.cpp:152
Definition: ratngs.h:49
bool set_unichars_in_script_order(bool in_script_order)
Definition: ratngs.h:531
int reserved_
Definition: ratngs.h:615