tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
wordrec.h
1 // File: wordrec.h
3 // Description: wordrec class.
4 // Author: Samuel Charron
5 //
6 // (C) Copyright 2006, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #ifndef TESSERACT_WORDREC_WORDREC_H_
20 #define TESSERACT_WORDREC_WORDREC_H_
21 
22 #ifdef DISABLED_LEGACY_ENGINE
23 
24 #include "config_auto.h"
25 
26 #include <cstdint> // for int16_t, int32_t
27 #include "callcpp.h" // for C_COL
28 #include "chop.h" // for PointHeap, MAX_NUM_POINTS
29 #include "classify.h" // for Classify
30 #include "elst.h" // for ELIST_ITERATOR, ELISTIZEH, ELIST_LINK
31 #include "findseam.h" // for SeamQueue, SeamPile
32 #include "genericvector.h" // for GenericVector
33 #include "oldlist.h" // for LIST
34 #include "params.h" // for INT_VAR_H, IntParam, BOOL_VAR_H, BoolP...
35 #include "points.h" // for ICOORD
36 #include "ratngs.h" // for BLOB_CHOICE_LIST (ptr only), BLOB_CHOI...
37 #include "seam.h" // for SEAM (ptr only), PRIORITY
38 #include "stopper.h" // for DANGERR
39 
40 class EDGEPT_CLIST;
41 class MATRIX;
42 class STRING;
43 class TBOX;
44 class UNICHARSET;
45 class WERD_RES;
46 
47 namespace tesseract { class LMPainPoints; }
48 namespace tesseract { class TessdataManager; }
49 namespace tesseract { struct BestChoiceBundle; }
50 
51 struct BlamerBundle;
52 struct EDGEPT;
53 struct MATRIX_COORD;
54 struct SPLIT;
55 struct TBLOB;
56 struct TESSLINE;
57 struct TWERD;
58 
59 namespace tesseract {
60 
61 /* ccmain/tstruct.cpp */
62 
63 class Wordrec : public Classify {
64  public:
65  // config parameters
66 
67  BOOL_VAR_H(wordrec_debug_blamer, false, "Print blamer debug messages");
68 
69  BOOL_VAR_H(wordrec_run_blamer, false, "Try to set the blame for errors");
70 
71  // methods
72  Wordrec();
73  virtual ~Wordrec() = default;
74 
75  // tface.cpp
76  void program_editup(const char *textbase, TessdataManager *init_classifier,
77  TessdataManager *init_dict);
78  void program_editdown(int32_t elasped_time);
79  int end_recog();
80  int dict_word(const WERD_CHOICE &word);
81 
82  // Member variables
84 };
85 
86 } // namespace tesseract
87 
88 #else // DISABLED_LEGACY_ENGINE not defined
89 
90 #include "associate.h"
91 #include "classify.h"
92 #include "dict.h"
93 #include "language_model.h"
94 #include "ratngs.h"
95 #include "matrix.h"
96 #include "seam.h"
97 #include "findseam.h"
98 #include "callcpp.h"
99 
100 #include <memory>
101 
102 class WERD_RES;
103 
104 namespace tesseract {
105 
106 // A class for storing which nodes are to be processed by the segmentation
107 // search. There is a single SegSearchPending for each column in the ratings
108 // matrix, and it indicates whether the segsearch should combine all
109 // BLOB_CHOICES in the column, or just the given row with the parents
110 // corresponding to *this SegSearchPending, and whether only updated parent
111 // ViterbiStateEntries should be combined, or all, with the BLOB_CHOICEs.
113  public:
115  : classified_row_(-1),
116  revisit_whole_column_(false),
117  column_classified_(false) {}
118 
119  // Marks the whole column as just classified. Used to start a search on
120  // a newly initialized ratings matrix.
122  column_classified_ = true;
123  }
124  // Marks the matrix entry at the given row as just classified.
125  // Used after classifying a new matrix cell.
126  // Additional to, not overriding a previous RevisitWholeColumn.
127  void SetBlobClassified(int row) {
128  classified_row_ = row;
129  }
130  // Marks the whole column as needing work, but not just classified.
131  // Used when the parent vse list is updated.
132  // Additional to, not overriding a previous SetBlobClassified.
134  revisit_whole_column_ = true;
135  }
136 
137  // Clears *this to indicate no work to do.
138  void Clear() {
139  classified_row_ = -1;
140  revisit_whole_column_ = false;
141  column_classified_ = false;
142  }
143 
144  // Returns true if there are updates to do in the column that *this
145  // represents.
146  bool WorkToDo() const {
147  return revisit_whole_column_ || column_classified_ || classified_row_ >= 0;
148  }
149  // Returns true if the given row was just classified.
150  bool IsRowJustClassified(int row) const {
151  return row == classified_row_ || column_classified_;
152  }
153  // Returns the single row to process if there is only one, otherwise -1.
154  int SingleRow() const {
155  return revisit_whole_column_ || column_classified_ ? -1 : classified_row_;
156  }
157 
158  private:
159  // If non-negative, indicates the single row in the ratings matrix that has
160  // just been classified, and so should be combined with all the parents in the
161  // column that this SegSearchPending represents.
162  // Operates independently of revisit_whole_column.
164  // If revisit_whole_column is true, then all BLOB_CHOICEs in this column will
165  // be processed, but classified_row can indicate a row that is newly
166  // classified. Overridden if column_classified is true.
168  // If column_classified is true, parent vses are processed with all rows
169  // regardless of whether they are just updated, overriding
170  // revisit_whole_column and classified_row.
172 };
173 
174 
175 /* ccmain/tstruct.cpp *********************************************************/
176 class FRAGMENT:public ELIST_LINK
177 {
178  public:
179  FRAGMENT() { //constructor
180  }
181  FRAGMENT(EDGEPT *head_pt, //start
182  EDGEPT *tail_pt); //end
183 
184  ICOORD head; //coords of start
185  ICOORD tail; //coords of end
186  EDGEPT *headpt; //start point
187  EDGEPT *tailpt; //end point
188 };
189 ELISTIZEH(FRAGMENT)
190 
191 
192 class Wordrec : public Classify {
193  public:
194  // config parameters *******************************************************
195  BOOL_VAR_H(merge_fragments_in_matrix, TRUE,
196  "Merge the fragments in the ratings matrix and delete them "
197  "after merging");
198  BOOL_VAR_H(wordrec_no_block, FALSE, "Don't output block information");
199  BOOL_VAR_H(wordrec_enable_assoc, TRUE, "Associator Enable");
200  BOOL_VAR_H(force_word_assoc, FALSE,
201  "force associator to run regardless of what enable_assoc is."
202  "This is used for CJK where component grouping is necessary.");
203  double_VAR_H(wordrec_worst_state, 1, "Worst segmentation state");
204  BOOL_VAR_H(fragments_guide_chopper, FALSE,
205  "Use information from fragments to guide chopping process");
206  INT_VAR_H(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped");
207  double_VAR_H(tessedit_certainty_threshold, -2.25, "Good blob limit");
208  INT_VAR_H(chop_debug, 0, "Chop debug");
209  BOOL_VAR_H(chop_enable, 1, "Chop enable");
210  BOOL_VAR_H(chop_vertical_creep, 0, "Vertical creep");
211  INT_VAR_H(chop_split_length, 10000, "Split Length");
212  INT_VAR_H(chop_same_distance, 2, "Same distance");
213  INT_VAR_H(chop_min_outline_points, 6, "Min Number of Points on Outline");
214  INT_VAR_H(chop_seam_pile_size, 150, "Max number of seams in seam_pile");
215  BOOL_VAR_H(chop_new_seam_pile, 1, "Use new seam_pile");
216  INT_VAR_H(chop_inside_angle, -50, "Min Inside Angle Bend");
217  INT_VAR_H(chop_min_outline_area, 2000, "Min Outline Area");
218  double_VAR_H(chop_split_dist_knob, 0.5, "Split length adjustment");
219  double_VAR_H(chop_overlap_knob, 0.9, "Split overlap adjustment");
220  double_VAR_H(chop_center_knob, 0.15, "Split center adjustment");
221  INT_VAR_H(chop_centered_maxwidth, 90, "Width of (smaller) chopped blobs "
222  "above which we don't care that a chop is not near the center.");
223  double_VAR_H(chop_sharpness_knob, 0.06, "Split sharpness adjustment");
224  double_VAR_H(chop_width_change_knob, 5.0, "Width change adjustment");
225  double_VAR_H(chop_ok_split, 100.0, "OK split limit");
226  double_VAR_H(chop_good_split, 50.0, "Good split limit");
227  INT_VAR_H(chop_x_y_weight, 3, "X / Y length weight");
228  INT_VAR_H(segment_adjust_debug, 0, "Segmentation adjustment debug");
229  BOOL_VAR_H(assume_fixed_pitch_char_segment, FALSE,
230  "include fixed-pitch heuristics in char segmentation");
231  INT_VAR_H(wordrec_debug_level, 0, "Debug level for wordrec");
232  INT_VAR_H(wordrec_max_join_chunks, 4,
233  "Max number of broken pieces to associate");
234  BOOL_VAR_H(wordrec_skip_no_truth_words, false,
235  "Only run OCR for words that had truth recorded in BlamerBundle");
236  BOOL_VAR_H(wordrec_debug_blamer, false, "Print blamer debug messages");
237  BOOL_VAR_H(wordrec_run_blamer, false, "Try to set the blame for errors");
238  INT_VAR_H(segsearch_debug_level, 0, "SegSearch debug level");
239  INT_VAR_H(segsearch_max_pain_points, 2000,
240  "Maximum number of pain points stored in the queue");
241  INT_VAR_H(segsearch_max_futile_classifications, 10,
242  "Maximum number of pain point classifications per word.");
243  double_VAR_H(segsearch_max_char_wh_ratio, 2.0,
244  "Maximum character width-to-height ratio");
245  BOOL_VAR_H(save_alt_choices, true,
246  "Save alternative paths found during chopping "
247  "and segmentation search");
248 
249  // methods from wordrec/*.cpp ***********************************************
250  Wordrec();
251  virtual ~Wordrec() = default;
252 
253  // Fills word->alt_choices with alternative paths found during
254  // chopping/segmentation search that are kept in best_choices.
255  void SaveAltChoices(const LIST &best_choices, WERD_RES *word);
256 
257  // Fills character choice lattice in the given BlamerBundle
258  // using the given ratings matrix and best choice list.
259  void FillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices,
260  const UNICHARSET &unicharset, BlamerBundle *blamer_bundle);
261 
262  // Calls fill_lattice_ member function
263  // (assumes that fill_lattice_ is not nullptr).
264  void CallFillLattice(const MATRIX &ratings,
265  const WERD_CHOICE_LIST &best_choices,
266  const UNICHARSET &unicharset,
267  BlamerBundle *blamer_bundle) {
268  (this->*fill_lattice_)(ratings, best_choices, unicharset, blamer_bundle);
269  }
270 
271  // tface.cpp
272  void program_editup(const char *textbase, TessdataManager *init_classifier,
273  TessdataManager *init_dict);
274  void cc_recog(WERD_RES *word);
275  void program_editdown(int32_t elasped_time);
276  void set_pass1();
277  void set_pass2();
278  int end_recog();
279  BLOB_CHOICE_LIST *call_matcher(TBLOB* blob);
280  int dict_word(const WERD_CHOICE &word);
281  // wordclass.cpp
282  BLOB_CHOICE_LIST *classify_blob(TBLOB *blob,
283  const char *string,
284  C_COL color,
285  BlamerBundle *blamer_bundle);
286 
287  // segsearch.cpp
288  // SegSearch works on the lower diagonal matrix of BLOB_CHOICE_LISTs.
289  // Each entry in the matrix represents the classification choice
290  // for a chunk, i.e. an entry in row 2, column 1 represents the list
291  // of ratings for the chunks 1 and 2 classified as a single blob.
292  // The entries on the diagonal of the matrix are classifier choice lists
293  // for a single chunk from the maximal segmentation.
294  //
295  // The ratings matrix given to SegSearch represents the segmentation
296  // graph / trellis for the current word. The nodes in the graph are the
297  // individual BLOB_CHOICEs in each of the BLOB_CHOICE_LISTs in the ratings
298  // matrix. The children of each node (nodes connected by outgoing links)
299  // are the entries in the column that is equal to node's row+1. The parents
300  // (nodes connected by the incoming links) are the entries in the row that
301  // is equal to the node's column-1. Here is an example ratings matrix:
302  //
303  // 0 1 2 3 4
304  // -------------------------
305  // 0| c,( |
306  // 1| d l,1 |
307  // 2| o |
308  // 3| c,( |
309  // 4| g,y l,1 |
310  // -------------------------
311  //
312  // In the example above node "o" has children (outgoing connection to nodes)
313  // "c","(","g","y" and parents (incoming connections from nodes) "l","1","d".
314  //
315  // The objective of the search is to find the least cost path, where the cost
316  // is determined by the language model components and the properties of the
317  // cut between the blobs on the path. SegSearch starts by populating the
318  // matrix with the all the entries that were classified by the chopper and
319  // finding the initial best path. Based on the classifier ratings, language
320  // model scores and the properties of each cut, a list of "pain points" is
321  // constructed - those are the points on the path where the choices do not
322  // look consistent with the neighboring choices, the cuts look particularly
323  // problematic, or the certainties of the blobs are low. The most troublesome
324  // "pain point" is picked from the list and the new entry in the ratings
325  // matrix corresponding to this "pain point" is filled in. Then the language
326  // model state is updated to reflect the new classification and the new
327  // "pain points" are added to the list and the next most troublesome
328  // "pain point" is determined. This continues until either the word choice
329  // composed from the best paths in the segmentation graph is "good enough"
330  // (e.g. above a certain certainty threshold, is an unambiguous dictionary
331  // word, etc) or there are no more "pain points" to explore.
332  //
333  // If associate_blobs is set to false no new classifications will be done
334  // to combine blobs. Segmentation search will run only one "iteration"
335  // on the classifications already recorded in chunks_record.ratings.
336  //
337  // Note: this function assumes that word_res, best_choice_bundle arguments
338  // are not nullptr.
339  void SegSearch(WERD_RES* word_res,
340  BestChoiceBundle* best_choice_bundle,
341  BlamerBundle* blamer_bundle);
342 
343  // Setup and run just the initial segsearch on an established matrix,
344  // without doing any additional chopping or joining.
345  // (Internal factored version that can be used as part of the main SegSearch.)
346  void InitialSegSearch(WERD_RES* word_res, LMPainPoints* pain_points,
348  BestChoiceBundle* best_choice_bundle,
349  BlamerBundle* blamer_bundle);
350 
351  // Runs SegSearch() function (above) without needing a best_choice_bundle
352  // or blamer_bundle. Used for testing.
353  void DoSegSearch(WERD_RES* word_res);
354 
355  // chop.cpp
356  PRIORITY point_priority(EDGEPT *point);
357  void add_point_to_list(PointHeap* point_heap, EDGEPT *point);
358  // Returns true if the edgept supplied as input is an inside angle. This
359  // is determined by the angular change of the vectors from point to point.
360  bool is_inside_angle(EDGEPT *pt);
361  int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3);
362  EDGEPT *pick_close_point(EDGEPT *critical_point,
363  EDGEPT *vertical_point,
364  int *best_dist);
365  void prioritize_points(TESSLINE *outline, PointHeap* points);
366  void new_min_point(EDGEPT *local_min, PointHeap* points);
367  void new_max_point(EDGEPT *local_max, PointHeap* points);
368  void vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point,
369  EDGEPT** best_point,
370  EDGEPT_CLIST *new_points);
371 
372  // chopper.cpp
373  SEAM *attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number,
374  bool italic_blob, const GenericVector<SEAM*>& seams);
375  SEAM *chop_numbered_blob(TWERD *word, int32_t blob_number,
376  bool italic_blob, const GenericVector<SEAM*>& seams);
378  bool italic_blob,
379  WERD_RES *word_res, int *blob_number);
381  DANGERR *fixpt,
382  bool split_next_to_fragment,
383  bool italic_blob,
384  WERD_RES *word,
385  int *blob_number);
386  SEAM *chop_one_blob(const GenericVector<TBOX> &boxes,
387  const GenericVector<BLOB_CHOICE*> &blob_choices,
388  WERD_RES *word_res,
389  int *blob_number);
390  void chop_word_main(WERD_RES *word);
391  void improve_by_chopping(float rating_cert_scale,
392  WERD_RES *word,
393  BestChoiceBundle *best_choice_bundle,
394  BlamerBundle *blamer_bundle,
395  LMPainPoints *pain_points,
397  int select_blob_to_split(const GenericVector<BLOB_CHOICE*> &blob_choices,
398  float rating_ceiling,
399  bool split_next_to_fragment);
401 
402  // findseam.cpp
403  void add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue* seams);
404  void choose_best_seam(SeamQueue *seam_queue, const SPLIT *split,
405  PRIORITY priority, SEAM **seam_result, TBLOB *blob,
406  SeamPile *seam_pile);
407  void combine_seam(const SeamPile& seam_pile,
408  const SEAM* seam, SeamQueue* seam_queue);
409  SEAM *pick_good_seam(TBLOB *blob);
410  void try_point_pairs (EDGEPT * points[MAX_NUM_POINTS],
411  int16_t num_points,
412  SeamQueue* seam_queue,
413  SeamPile* seam_pile,
414  SEAM ** seam, TBLOB * blob);
415  void try_vertical_splits(EDGEPT * points[MAX_NUM_POINTS],
416  int16_t num_points,
417  EDGEPT_CLIST *new_points,
418  SeamQueue* seam_queue,
419  SeamPile* seam_pile,
420  SEAM ** seam, TBLOB * blob);
421 
422  // gradechop.cpp
423  PRIORITY grade_split_length(SPLIT *split);
424  PRIORITY grade_sharpness(SPLIT *split);
425 
426  // outlines.cpp
427  bool near_point(EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1,
428  EDGEPT **near_pt);
429 
430  // pieces.cpp
431  virtual BLOB_CHOICE_LIST *classify_piece(const GenericVector<SEAM*>& seams,
432  int16_t start,
433  int16_t end,
434  const char* description,
435  TWERD *word,
436  BlamerBundle *blamer_bundle);
437  // Try to merge fragments in the ratings matrix and put the result in
438  // the corresponding row and column
439  void merge_fragments(MATRIX *ratings,
440  int16_t num_blobs);
441  // Recursively go through the ratings matrix to find lists of fragments
442  // to be merged in the function merge_and_put_fragment_lists.
443  // current_frag is the position of the piece we are looking for.
444  // current_row is the row in the rating matrix we are currently at.
445  // start is the row we started initially, so that we can know where
446  // to append the results to the matrix. num_frag_parts is the total
447  // number of pieces we are looking for and num_blobs is the size of the
448  // ratings matrix.
449  void get_fragment_lists(int16_t current_frag,
450  int16_t current_row,
451  int16_t start,
452  int16_t num_frag_parts,
453  int16_t num_blobs,
454  MATRIX *ratings,
455  BLOB_CHOICE_LIST *choice_lists);
456  // Merge the fragment lists in choice_lists and append it to the
457  // ratings matrix
458  void merge_and_put_fragment_lists(int16_t row,
459  int16_t column,
460  int16_t num_frag_parts,
461  BLOB_CHOICE_LIST *choice_lists,
462  MATRIX *ratings);
463  // Filter the fragment list so that the filtered_choices only contain
464  // fragments that are in the correct position. choices is the list
465  // that we are going to filter. fragment_pos is the position in the
466  // fragment that we are looking for and num_frag_parts is the the
467  // total number of pieces. The result will be appended to
468  // filtered_choices.
469  void fill_filtered_fragment_list(BLOB_CHOICE_LIST *choices,
470  int fragment_pos,
471  int num_frag_parts,
472  BLOB_CHOICE_LIST *filtered_choices);
473 
474  // Member variables.
475 
476  std::unique_ptr<LanguageModel> language_model_;
477  PRIORITY pass2_ok_split;
478  // Stores the best choice for the previous word in the paragraph.
479  // This variable is modified by PAGE_RES_IT when iterating over
480  // words to OCR on the page.
482  // Sums of blame reasons computed by the blamer.
484  // Function used to fill char choice lattices.
485  void (Wordrec::*fill_lattice_)(const MATRIX &ratings,
486  const WERD_CHOICE_LIST &best_choices,
487  const UNICHARSET &unicharset,
488  BlamerBundle *blamer_bundle);
489 
490  protected:
491  inline bool SegSearchDone(int num_futile_classifications) {
492  return (language_model_->AcceptableChoiceFound() ||
493  num_futile_classifications >=
494  segsearch_max_futile_classifications);
495  }
496 
497  // Updates the language model state recorded for the child entries specified
498  // in pending[starting_col]. Enqueues the children of the updated entries
499  // into pending and proceeds to update (and remove from pending) all the
500  // remaining entries in pending[col] (col >= starting_col). Upon termination
501  // of this function all the pending[col] lists will be empty.
502  //
503  // The arguments:
504  //
505  // starting_col: index of the column in chunks_record->ratings from
506  // which the update should be started
507  //
508  // pending: list of entries listing chunks_record->ratings entries
509  // that should be updated
510  //
511  // pain_points: priority heap listing the pain points generated by
512  // the language model
513  //
514  // temp_pain_points: temporary storage for tentative pain points generated
515  // by the language model after a single call to LanguageModel::UpdateState()
516  // (the argument is passed in rather than created before each
517  // LanguageModel::UpdateState() call to avoid dynamic memory re-allocation)
518  //
519  // best_choice_bundle: a collection of variables that should be updated
520  // if a new best choice is found
521  //
523  float rating_cert_scale,
524  int starting_col,
526  WERD_RES *word_res,
527  LMPainPoints *pain_points,
528  BestChoiceBundle *best_choice_bundle,
529  BlamerBundle *blamer_bundle);
530 
531  // Process the given pain point: classify the corresponding blob, enqueue
532  // new pain points to join the newly classified blob with its neighbors.
533  void ProcessSegSearchPainPoint(float pain_point_priority,
534  const MATRIX_COORD &pain_point,
535  const char* pain_point_type,
537  WERD_RES *word_res,
538  LMPainPoints *pain_points,
539  BlamerBundle *blamer_bundle);
540  // Resets enough of the results so that the Viterbi search is re-run.
541  // Needed when the n-gram model is enabled, as the multi-length comparison
542  // implementation will re-value existing paths to worse values.
543  void ResetNGramSearch(WERD_RES* word_res,
544  BestChoiceBundle* best_choice_bundle,
546 
547  // Add pain points for classifying blobs on the correct segmentation path
548  // (so that we can evaluate correct segmentation path and discover the reason
549  // for incorrect result).
550  void InitBlamerForSegSearch(WERD_RES *word_res,
551  LMPainPoints *pain_points,
552  BlamerBundle *blamer_bundle,
553  STRING *blamer_debug);
554 };
555 
556 } // namespace tesseract
557 
558 #endif // DISABLED_LEGACY_ENGINE
559 
560 #endif // TESSERACT_WORDREC_WORDREC_H_
Definition: genericheap.h:58
void set_pass2()
Definition: tface.cpp:99
BLOB_CHOICE_LIST * classify_blob(TBLOB *blob, const char *string, C_COL color, BlamerBundle *blamer_bundle)
Definition: wordclass.cpp:54
bool SegSearchDone(int num_futile_classifications)
Definition: wordrec.h:491
Definition: lm_pain_points.h:57
void(Wordrec::* fill_lattice_)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:485
int SingleRow() const
Definition: wordrec.h:154
void SetColumnClassified()
Definition: wordrec.h:121
void Clear()
Definition: wordrec.h:138
void add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue *seams)
Definition: findseam.cpp:73
void FillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
PRIORITY point_priority(EDGEPT *point)
Definition: chop.cpp:53
Definition: split.h:37
Definition: wordrec.h:192
ICOORD tail
Definition: wordrec.h:185
Definition: blobs.h:83
void choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile)
Definition: findseam.cpp:112
void DoSegSearch(WERD_RES *word_res)
Definition: segsearch.cpp:37
void try_point_pairs(EDGEPT *points[50], int16_t num_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
Definition: findseam.cpp:305
double_VAR_H(wordrec_worst_state, 1, "Worst segmentation state")
PRIORITY pass2_ok_split
Definition: wordrec.h:477
void ProcessSegSearchPainPoint(float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:249
void try_vertical_splits(EDGEPT *points[50], int16_t num_points, EDGEPT_CLIST *new_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
Definition: findseam.cpp:343
virtual ~Wordrec()=default
Definition: classify.h:103
int end_recog()
Definition: tface.cpp:60
void get_fragment_lists(int16_t current_frag, int16_t current_row, int16_t start, int16_t num_frag_parts, int16_t num_blobs, MATRIX *ratings, BLOB_CHOICE_LIST *choice_lists)
Definition: pieces.cpp:280
Definition: rect.h:34
int angle_change(EDGEPT *point1, EDGEPT *point2, EDGEPT *point3)
Definition: chop.cpp:87
void InitialSegSearch(WERD_RES *word_res, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:137
Definition: unicharset.h:146
bool WorkToDo() const
Definition: wordrec.h:146
SEAM * chop_numbered_blob(TWERD *word, int32_t blob_number, bool italic_blob, const GenericVector< SEAM *> &seams)
Definition: chopper.cpp:266
Definition: matrix.h:575
SEAM * pick_good_seam(TBLOB *blob)
Definition: findseam.cpp:224
bool near_point(EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, EDGEPT **near_pt)
Definition: outlines.cpp:45
Definition: baseapi.cpp:94
void SaveAltChoices(const LIST &best_choices, WERD_RES *word)
virtual BLOB_CHOICE_LIST * classify_piece(const GenericVector< SEAM *> &seams, int16_t start, int16_t end, const char *description, TWERD *word, BlamerBundle *blamer_bundle)
Definition: pieces.cpp:55
BLOB_CHOICE_LIST * call_matcher(TBLOB *blob)
Definition: tface.cpp:139
Definition: ratngs.h:273
void ResetNGramSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, GenericVector< SegSearchPending > *pending)
Definition: segsearch.cpp:312
WERD_CHOICE * prev_word_best_choice_
Definition: wordrec.h:481
INT_VAR_H(repair_unchopped_blobs, 1, "Fix blobs that aren't chopped")
Definition: seam.h:44
Wordrec()
Definition: wordrec.cpp:47
void fill_filtered_fragment_list(BLOB_CHOICE_LIST *choices, int fragment_pos, int num_frag_parts, BLOB_CHOICE_LIST *filtered_choices)
Definition: pieces.cpp:104
Definition: blobs.h:187
int select_blob_to_split_from_fixpt(DANGERR *fixpt)
Definition: chopper.cpp:627
Bundle together all the things pertaining to the best choice/state.
Definition: lm_state.h:217
int select_blob_to_split(const GenericVector< BLOB_CHOICE *> &blob_choices, float rating_ceiling, bool split_next_to_fragment)
Definition: chopper.cpp:539
void cc_recog(WERD_RES *word)
Definition: tface.cpp:111
Definition: blobs.h:402
void set_pass1()
Definition: tface.cpp:87
SEAM * improve_one_blob(const GenericVector< BLOB_CHOICE *> &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, int *blob_number)
Definition: chopper.cpp:328
Definition: blamer.h:100
int dict_word(const WERD_CHOICE &word)
Definition: tface.cpp:127
int classified_row_
Definition: wordrec.h:163
SEAM * chop_overlapping_blob(const GenericVector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, int *blob_number)
Definition: chopper.cpp:274
Definition: tessdatamanager.h:126
Definition: baseapi.h:37
void merge_and_put_fragment_lists(int16_t row, int16_t column, int16_t num_frag_parts, BLOB_CHOICE_LIST *choice_lists, MATRIX *ratings)
Definition: pieces.cpp:137
void RevisitWholeColumn()
Definition: wordrec.h:133
ICOORD head
Definition: wordrec.h:184
void SetBlobClassified(int row)
Definition: wordrec.h:127
void new_max_point(EDGEPT *local_max, PointHeap *points)
Definition: chop.cpp:243
FRAGMENT()
Definition: wordrec.h:179
Definition: strngs.h:45
Definition: wordrec.h:112
EDGEPT * pick_close_point(EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist)
Definition: chop.cpp:122
EDGEPT * headpt
Definition: wordrec.h:186
integer coordinate
Definition: points.h:32
void SegSearch(WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:43
void program_editup(const char *textbase, TessdataManager *init_classifier, TessdataManager *init_dict)
Definition: tface.cpp:38
void improve_by_chopping(float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending)
Definition: chopper.cpp:455
void new_min_point(EDGEPT *local_min, PointHeap *points)
Definition: chop.cpp:219
void program_editdown(int32_t elasped_time)
Definition: tface.cpp:73
Definition: pageres.h:169
std::unique_ptr< LanguageModel > language_model_
Definition: wordrec.h:476
void UpdateSegSearchNodes(float rating_cert_scale, int starting_col, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
Definition: segsearch.cpp:181
bool is_inside_angle(EDGEPT *pt)
Definition: chop.cpp:77
void chop_word_main(WERD_RES *word)
Definition: chopper.cpp:392
void InitBlamerForSegSearch(WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle, STRING *blamer_debug)
Definition: segsearch.cpp:329
bool column_classified_
Definition: wordrec.h:171
Definition: wordrec.h:176
SEAM * chop_one_blob(const GenericVector< TBOX > &boxes, const GenericVector< BLOB_CHOICE *> &blob_choices, WERD_RES *word_res, int *blob_number)
Definition: chopper.cpp:372
Definition: oldlist.h:124
SegSearchPending()
Definition: wordrec.h:114
Definition: matrix.h:605
void combine_seam(const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue)
Definition: findseam.cpp:205
bool revisit_whole_column_
Definition: wordrec.h:167
Definition: blobs.h:268
void prioritize_points(TESSLINE *outline, PointHeap *points)
Definition: chop.cpp:160
BOOL_VAR_H(merge_fragments_in_matrix, TRUE, "Merge the fragments in the ratings matrix and delete them " "after merging")
UNICHARSET unicharset
Definition: ccutil.h:68
SEAM * attempt_blob_chop(TWERD *word, TBLOB *blob, int32_t blob_number, bool italic_blob, const GenericVector< SEAM *> &seams)
Definition: chopper.cpp:212
PRIORITY grade_sharpness(SPLIT *split)
Definition: gradechop.cpp:74
void merge_fragments(MATRIX *ratings, int16_t num_blobs)
Definition: pieces.cpp:312
bool IsRowJustClassified(int row) const
Definition: wordrec.h:150
GenericVector< int > blame_reasons_
Definition: wordrec.h:483
void add_point_to_list(PointHeap *point_heap, EDGEPT *point)
Definition: chop.cpp:63
EDGEPT * tailpt
Definition: wordrec.h:187
PRIORITY grade_split_length(SPLIT *split)
Definition: gradechop.cpp:51
void CallFillLattice(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: wordrec.h:264
void vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points)
Definition: chop.cpp:272