tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
tesseract::Wordrec Class Reference

#include <wordrec.h>

Inheritance diagram for tesseract::Wordrec:
Collaboration diagram for tesseract::Wordrec:

Public Member Functions

 BOOL_VAR_H (merge_fragments_in_matrix, TRUE, "Merge the fragments in the ratings matrix and delete them " "after merging")
 
 BOOL_VAR_H (wordrec_no_block, FALSE, "Don't output block information")
 
 BOOL_VAR_H (wordrec_enable_assoc, TRUE, "Associator Enable")
 
 BOOL_VAR_H (force_word_assoc, FALSE, "force associator to run regardless of what enable_assoc is." "This is used for CJK where component grouping is necessary.")
 
 double_VAR_H (wordrec_worst_state, 1, "Worst segmentation state")
 
 BOOL_VAR_H (fragments_guide_chopper, FALSE, "Use information from fragments to guide chopping process")
 
 INT_VAR_H (repair_unchopped_blobs, 1, "Fix blobs that aren't chopped")
 
 double_VAR_H (tessedit_certainty_threshold, -2.25, "Good blob limit")
 
 INT_VAR_H (chop_debug, 0, "Chop debug")
 
 BOOL_VAR_H (chop_enable, 1, "Chop enable")
 
 BOOL_VAR_H (chop_vertical_creep, 0, "Vertical creep")
 
 INT_VAR_H (chop_split_length, 10000, "Split Length")
 
 INT_VAR_H (chop_same_distance, 2, "Same distance")
 
 INT_VAR_H (chop_min_outline_points, 6, "Min Number of Points on Outline")
 
 INT_VAR_H (chop_seam_pile_size, 150, "Max number of seams in seam_pile")
 
 BOOL_VAR_H (chop_new_seam_pile, 1, "Use new seam_pile")
 
 INT_VAR_H (chop_inside_angle, -50, "Min Inside Angle Bend")
 
 INT_VAR_H (chop_min_outline_area, 2000, "Min Outline Area")
 
 double_VAR_H (chop_split_dist_knob, 0.5, "Split length adjustment")
 
 double_VAR_H (chop_overlap_knob, 0.9, "Split overlap adjustment")
 
 double_VAR_H (chop_center_knob, 0.15, "Split center adjustment")
 
 INT_VAR_H (chop_centered_maxwidth, 90, "Width of (smaller) chopped blobs " "above which we don't care that a chop is not near the center.")
 
 double_VAR_H (chop_sharpness_knob, 0.06, "Split sharpness adjustment")
 
 double_VAR_H (chop_width_change_knob, 5.0, "Width change adjustment")
 
 double_VAR_H (chop_ok_split, 100.0, "OK split limit")
 
 double_VAR_H (chop_good_split, 50.0, "Good split limit")
 
 INT_VAR_H (chop_x_y_weight, 3, "X / Y length weight")
 
 INT_VAR_H (segment_adjust_debug, 0, "Segmentation adjustment debug")
 
 BOOL_VAR_H (assume_fixed_pitch_char_segment, FALSE, "include fixed-pitch heuristics in char segmentation")
 
 INT_VAR_H (wordrec_debug_level, 0, "Debug level for wordrec")
 
 INT_VAR_H (wordrec_max_join_chunks, 4, "Max number of broken pieces to associate")
 
 BOOL_VAR_H (wordrec_skip_no_truth_words, false, "Only run OCR for words that had truth recorded in BlamerBundle")
 
 BOOL_VAR_H (wordrec_debug_blamer, false, "Print blamer debug messages")
 
 BOOL_VAR_H (wordrec_run_blamer, false, "Try to set the blame for errors")
 
 INT_VAR_H (segsearch_debug_level, 0, "SegSearch debug level")
 
 INT_VAR_H (segsearch_max_pain_points, 2000, "Maximum number of pain points stored in the queue")
 
 INT_VAR_H (segsearch_max_futile_classifications, 10, "Maximum number of pain point classifications per word.")
 
 double_VAR_H (segsearch_max_char_wh_ratio, 2.0, "Maximum character width-to-height ratio")
 
 BOOL_VAR_H (save_alt_choices, true, "Save alternative paths found during chopping " "and segmentation search")
 
 Wordrec ()
 
virtual ~Wordrec ()=default
 
void SaveAltChoices (const LIST &best_choices, WERD_RES *word)
 
void FillLattice (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
 
void CallFillLattice (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
 
void SegSearch (WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
 
void InitialSegSearch (WERD_RES *word_res, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
 
void DoSegSearch (WERD_RES *word_res)
 
void add_seam_to_queue (float new_priority, SEAM *new_seam, SeamQueue *seams)
 
void choose_best_seam (SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile)
 
void combine_seam (const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue)
 
SEAMpick_good_seam (TBLOB *blob)
 
void try_point_pairs (EDGEPT *points[50], int16_t num_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
 
void try_vertical_splits (EDGEPT *points[50], int16_t num_points, EDGEPT_CLIST *new_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
 
PRIORITY grade_split_length (SPLIT *split)
 
PRIORITY grade_sharpness (SPLIT *split)
 
bool near_point (EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, EDGEPT **near_pt)
 
virtual BLOB_CHOICE_LIST * classify_piece (const GenericVector< SEAM *> &seams, int16_t start, int16_t end, const char *description, TWERD *word, BlamerBundle *blamer_bundle)
 
void merge_fragments (MATRIX *ratings, int16_t num_blobs)
 
void get_fragment_lists (int16_t current_frag, int16_t current_row, int16_t start, int16_t num_frag_parts, int16_t num_blobs, MATRIX *ratings, BLOB_CHOICE_LIST *choice_lists)
 
void merge_and_put_fragment_lists (int16_t row, int16_t column, int16_t num_frag_parts, BLOB_CHOICE_LIST *choice_lists, MATRIX *ratings)
 
void fill_filtered_fragment_list (BLOB_CHOICE_LIST *choices, int fragment_pos, int num_frag_parts, BLOB_CHOICE_LIST *filtered_choices)
 
program_editup

Initialize all the things in the program that need to be initialized. init_permute determines whether to initialize the permute functions and Dawg models.

void program_editup (const char *textbase, TessdataManager *init_classifier, TessdataManager *init_dict)
 
cc_recog

Recognize a word.

void cc_recog (WERD_RES *word)
 
program_editdown

This function holds any necessary post processing for the Wise Owl program.

void program_editdown (int32_t elasped_time)
 
set_pass1

Get ready to do some pass 1 stuff.

void set_pass1 ()
 
set_pass2

Get ready to do some pass 2 stuff.

void set_pass2 ()
 
end_recog

Cleanup and exit the recog program.

int end_recog ()
 
call_matcher

Called from Tess with a blob in tess form. The blob may need rotating to the correct orientation for classification.

BLOB_CHOICE_LIST * call_matcher (TBLOB *blob)
 
dict_word()

Test the dictionaries, returning NO_PERM (0) if not found, or one of the PermuterType values if found, according to the dictionary.

int dict_word (const WERD_CHOICE &word)
 
classify_blob

Classify the this blob if it is not already recorded in the match table. Attempt to recognize this blob as a character. The recognition rating for this blob will be stored as a part of the blob. This value will also be returned to the caller.

Parameters
blobCurrent blob
stringThe string to display in ScrollView
colorThe colour to use when displayed with ScrollView
BLOB_CHOICE_LIST * classify_blob (TBLOB *blob, const char *string, C_COL color, BlamerBundle *blamer_bundle)
 
point_priority

Assign a priority to and edge point that might be used as part of a split. The argument should be of type EDGEPT.

PRIORITY point_priority (EDGEPT *point)
 
add_point_to_list

Add an edge point to a POINT_GROUP containing a list of other points.

void add_point_to_list (PointHeap *point_heap, EDGEPT *point)
 
bool is_inside_angle (EDGEPT *pt)
 
angle_change

Return the change in angle (degrees) of the line segments between points one and two, and two and three.

int angle_change (EDGEPT *point1, EDGEPT *point2, EDGEPT *point3)
 
pick_close_point

Choose the edge point that is closest to the critical point. This point may not be exactly vertical from the critical point.

EDGEPTpick_close_point (EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist)
 
prioritize_points

Find a list of edge points from the outer outline of this blob. For each of these points assign a priority. Sort these points using a heap structure so that they can be visited in order.

void prioritize_points (TESSLINE *outline, PointHeap *points)
 
new_min_point

Found a new minimum point try to decide whether to save it or not. Return the new value for the local minimum. If a point is saved then the local minimum is reset to nullptr.

void new_min_point (EDGEPT *local_min, PointHeap *points)
 
new_max_point

Found a new minimum point try to decide whether to save it or not. Return the new value for the local minimum. If a point is saved then the local minimum is reset to nullptr.

void new_max_point (EDGEPT *local_max, PointHeap *points)
 
vertical_projection_point

For one point on the outline, find the corresponding point on the other side of the outline that is a likely projection for a split point. This is done by iterating through the edge points until the X value of the point being looked at is greater than the X value of the split point. Ensure that the point being returned is not right next to the split point. Return the edge point in *best_point as a result, and any points that were newly created are also saved on the new_points list.

void vertical_projection_point (EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points)
 
attempt_blob_chop

Try to split the this blob after this one. Check to make sure that it was successful.

SEAMattempt_blob_chop (TWERD *word, TBLOB *blob, int32_t blob_number, bool italic_blob, const GenericVector< SEAM *> &seams)
 
SEAMchop_numbered_blob (TWERD *word, int32_t blob_number, bool italic_blob, const GenericVector< SEAM *> &seams)
 
SEAMchop_overlapping_blob (const GenericVector< TBOX > &boxes, bool italic_blob, WERD_RES *word_res, int *blob_number)
 
improve_one_blob

Finds the best place to chop, based on the worst blob, fixpt, or next to a fragment, according to the input. Returns the SEAM corresponding to the chop point, if any is found, and the index in the ratings_matrix of the chopped blob. Note that blob_choices is just a copy of the pointers in the leading diagonal of the ratings MATRIX. Although the blob is chopped, the returned SEAM is yet to be inserted into word->seam_array and the resulting blobs are unclassified, so this function can be used by ApplyBox as well as during recognition.

SEAMimprove_one_blob (const GenericVector< BLOB_CHOICE *> &blob_choices, DANGERR *fixpt, bool split_next_to_fragment, bool italic_blob, WERD_RES *word, int *blob_number)
 
chop_one_blob

Start with the current one-blob word and its classification. Find the worst blobs and try to divide it up to improve the ratings. Used for testing chopper.

SEAMchop_one_blob (const GenericVector< TBOX > &boxes, const GenericVector< BLOB_CHOICE *> &blob_choices, WERD_RES *word_res, int *blob_number)
 
chop_word_main

Classify the blobs in this word and permute the results. Find the worst blob in the word and chop it up. Continue this process until a good answer has been found or all the blobs have been chopped up enough. The results are returned in the WERD_RES.

void chop_word_main (WERD_RES *word)
 
improve_by_chopping

Repeatedly chops the worst blob, classifying the new blobs fixing up all the data, and incrementally runs the segmentation search until a good word is found, or no more chops can be found.

void improve_by_chopping (float rating_cert_scale, WERD_RES *word, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle, LMPainPoints *pain_points, GenericVector< SegSearchPending > *pending)
 
int select_blob_to_split (const GenericVector< BLOB_CHOICE *> &blob_choices, float rating_ceiling, bool split_next_to_fragment)
 
int select_blob_to_split_from_fixpt (DANGERR *fixpt)
 
- Public Member Functions inherited from tesseract::Classify
 Classify ()
 
virtual ~Classify ()
 
virtual DictgetDict ()
 
const ShapeTableshape_table () const
 
void SetStaticClassifier (ShapeClassifier *static_classifier)
 
void AddLargeSpeckleTo (int blob_length, BLOB_CHOICE_LIST *choices)
 
bool LargeSpeckle (const TBLOB &blob)
 
ADAPT_TEMPLATES NewAdaptedTemplates (bool InitFromUnicharset)
 
int GetFontinfoId (ADAPT_CLASS Class, uint8_t ConfigId)
 
int PruneClasses (const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uint8_t *normalization_factors, const uint16_t *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
 
void ReadNewCutoffs (TFile *fp, CLASS_CUTOFF_ARRAY Cutoffs)
 
void PrintAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates)
 
void WriteAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates)
 
ADAPT_TEMPLATES ReadAdaptedTemplates (TFile *File)
 
float ComputeNormMatch (CLASS_ID ClassId, const FEATURE_STRUCT &feature, bool DebugMatch)
 
void FreeNormProtos ()
 
NORM_PROTOSReadNormProtos (TFile *fp)
 
void ConvertProto (PROTO Proto, int ProtoId, INT_CLASS Class)
 
INT_TEMPLATES CreateIntTemplates (CLASSES FloatProtos, const UNICHARSET &target_unicharset)
 
void LearnWord (const char *fontname, WERD_RES *word)
 
void LearnPieces (const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
 
void InitAdaptiveClassifier (TessdataManager *mgr)
 
void InitAdaptedClass (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
 
void AmbigClassifier (const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
 
void MasterMatcher (INT_TEMPLATES templates, int16_t num_features, const INT_FEATURE_STRUCT *features, const uint8_t *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
 
void ExpandShapesAndApplyCorrections (ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uint8_t *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
 
double ComputeCorrectedRating (bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uint8_t *cn_factors)
 
void ConvertMatchesToChoices (const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
 
void AddNewResult (const UnicharRating &new_result, ADAPT_RESULTS *results)
 
int GetAdaptiveFeatures (TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
 
void DebugAdaptiveClassifier (TBLOB *Blob, ADAPT_RESULTS *Results)
 
PROTO_ID MakeNewTempProtos (FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
 
int MakeNewTemporaryConfig (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
 
void MakePermanent (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
 
void PrintAdaptiveMatchResults (const ADAPT_RESULTS &results)
 
void RemoveExtraPuncs (ADAPT_RESULTS *Results)
 
void RemoveBadMatches (ADAPT_RESULTS *Results)
 
void SetAdaptiveThreshold (float Threshold)
 
void ShowBestMatchFor (int shape_id, const INT_FEATURE_STRUCT *features, int num_features)
 
STRING ClassIDToDebugStr (const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
 
int ClassAndConfigIDToFontOrShapeID (int class_id, int int_result_config) const
 
int ShapeIDToClassID (int shape_id) const
 
UNICHAR_ID * BaselineClassifier (TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
 
int CharNormClassifier (TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
 
int CharNormTrainingSample (bool pruner_only, int keep_this, const TrainingSample &sample, GenericVector< UnicharRating > *results)
 
UNICHAR_ID * GetAmbiguities (TBLOB *Blob, CLASS_ID CorrectClass)
 
void DoAdaptiveMatch (TBLOB *Blob, ADAPT_RESULTS *Results)
 
void AdaptToChar (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES adaptive_templates)
 
void DisplayAdaptedChar (TBLOB *blob, INT_CLASS_STRUCT *int_class)
 
bool AdaptableWord (WERD_RES *word)
 
void EndAdaptiveClassifier ()
 
void SettupPass1 ()
 
void SettupPass2 ()
 
void AdaptiveClassifier (TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
 
void ClassifyAsNoise (ADAPT_RESULTS *Results)
 
void ResetAdaptiveClassifierInternal ()
 
void SwitchAdaptiveClassifier ()
 
void StartBackupAdaptiveClassifier ()
 
int GetCharNormFeature (const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uint8_t *pruner_norm_array, uint8_t *char_norm_array)
 
void ComputeCharNormArrays (FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uint8_t *char_norm_array, uint8_t *pruner_array)
 
bool TempConfigReliable (CLASS_ID class_id, const TEMP_CONFIG &config)
 
void UpdateAmbigsGroup (CLASS_ID class_id, TBLOB *Blob)
 
bool AdaptiveClassifierIsFull () const
 
bool AdaptiveClassifierIsEmpty () const
 
bool LooksLikeGarbage (TBLOB *blob)
 
void RefreshDebugWindow (ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
 
void ClearCharNormArray (uint8_t *char_norm_array)
 
void ComputeIntCharNormArray (const FEATURE_STRUCT &norm_feature, uint8_t *char_norm_array)
 
void ComputeIntFeatures (FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
 
INT_TEMPLATES ReadIntTemplates (TFile *fp)
 
void WriteIntTemplates (FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
 
CLASS_ID GetClassToDebug (const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id)
 
void ShowMatchDisplay ()
 
UnicityTable< FontInfo > & get_fontinfo_table ()
 
const UnicityTable< FontInfo > & get_fontinfo_table () const
 
UnicityTable< FontSet > & get_fontset_table ()
 
void NormalizeOutlines (LIST Outlines, float *XScale, float *YScale)
 
FEATURE_SET ExtractOutlineFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractPicoFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractIntCNFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
FEATURE_SET ExtractIntGeoFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
void LearnBlob (const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
 
bool WriteTRFile (const STRING &filename)
 
 BOOL_VAR_H (allow_blob_division, true, "Use divisible blobs chopping")
 
 BOOL_VAR_H (prioritize_division, FALSE, "Prioritize blob division over chopping")
 
 INT_VAR_H (tessedit_single_match, FALSE, "Top choice only from CP")
 
 BOOL_VAR_H (classify_enable_learning, true, "Enable adaptive classifier")
 
 INT_VAR_H (classify_debug_level, 0, "Classify debug level")
 
 INT_VAR_H (classify_norm_method, character, "Normalization Method ...")
 
 double_VAR_H (classify_char_norm_range, 0.2, "Character Normalization Range ...")
 
 double_VAR_H (classify_min_norm_scale_x, 0.0, "Min char x-norm scale ...")
 
 double_VAR_H (classify_max_norm_scale_x, 0.325, "Max char x-norm scale ...")
 
 double_VAR_H (classify_min_norm_scale_y, 0.0, "Min char y-norm scale ...")
 
 double_VAR_H (classify_max_norm_scale_y, 0.325, "Max char y-norm scale ...")
 
 double_VAR_H (classify_max_rating_ratio, 1.5, "Veto ratio between classifier ratings")
 
 double_VAR_H (classify_max_certainty_margin, 5.5, "Veto difference between classifier certainties")
 
 BOOL_VAR_H (tess_cn_matching, 0, "Character Normalized Matching")
 
 BOOL_VAR_H (tess_bn_matching, 0, "Baseline Normalized Matching")
 
 BOOL_VAR_H (classify_enable_adaptive_matcher, 1, "Enable adaptive classifier")
 
 BOOL_VAR_H (classify_use_pre_adapted_templates, 0, "Use pre-adapted classifier templates")
 
 BOOL_VAR_H (classify_save_adapted_templates, 0, "Save adapted templates to a file")
 
 BOOL_VAR_H (classify_enable_adaptive_debugger, 0, "Enable match debugger")
 
 BOOL_VAR_H (classify_nonlinear_norm, 0, "Non-linear stroke-density normalization")
 
 INT_VAR_H (matcher_debug_level, 0, "Matcher Debug Level")
 
 INT_VAR_H (matcher_debug_flags, 0, "Matcher Debug Flags")
 
 INT_VAR_H (classify_learning_debug_level, 0, "Learning Debug Level: ")
 
 double_VAR_H (matcher_good_threshold, 0.125, "Good Match (0-1)")
 
 double_VAR_H (matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)")
 
 double_VAR_H (matcher_perfect_threshold, 0.02, "Perfect Match (0-1)")
 
 double_VAR_H (matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)")
 
 double_VAR_H (matcher_rating_margin, 0.1, "New template margin (0-1)")
 
 double_VAR_H (matcher_avg_noise_size, 12.0, "Avg. noise blob length: ")
 
 INT_VAR_H (matcher_permanent_classes_min, 1, "Min # of permanent classes")
 
 INT_VAR_H (matcher_min_examples_for_prototyping, 3, "Reliable Config Threshold")
 
 INT_VAR_H (matcher_sufficient_examples_for_prototyping, 5, "Enable adaption even if the ambiguities have not been seen")
 
 double_VAR_H (matcher_clustering_max_angle_delta, 0.015, "Maximum angle delta for prototype clustering")
 
 double_VAR_H (classify_misfit_junk_penalty, 0.0, "Penalty to apply when a non-alnum is vertically out of " "its expected textline position")
 
 double_VAR_H (rating_scale, 1.5, "Rating scaling factor")
 
 double_VAR_H (certainty_scale, 20.0, "Certainty scaling factor")
 
 double_VAR_H (tessedit_class_miss_scale, 0.00390625, "Scale factor for features not used")
 
 double_VAR_H (classify_adapted_pruning_factor, 2.5, "Prune poor adapted results this much worse than best result")
 
 double_VAR_H (classify_adapted_pruning_threshold, -1.0, "Threshold at which classify_adapted_pruning_factor starts")
 
 INT_VAR_H (classify_adapt_proto_threshold, 230, "Threshold for good protos during adaptive 0-255")
 
 INT_VAR_H (classify_adapt_feature_threshold, 230, "Threshold for good features during adaptive 0-255")
 
 BOOL_VAR_H (disable_character_fragments, TRUE, "Do not include character fragments in the" " results of the classifier")
 
 double_VAR_H (classify_character_fragments_garbage_certainty_threshold, -3.0, "Exclude fragments that do not match any whole character" " with at least this certainty")
 
 BOOL_VAR_H (classify_debug_character_fragments, FALSE, "Bring up graphical debugging windows for fragments training")
 
 BOOL_VAR_H (matcher_debug_separate_windows, FALSE, "Use two different windows for debugging the matching: " "One for the protos and one for the features.")
 
 STRING_VAR_H (classify_learn_debug_str, "", "Class str to debug learning")
 
 INT_VAR_H (classify_class_pruner_threshold, 229, "Class Pruner Threshold 0-255")
 
 INT_VAR_H (classify_class_pruner_multiplier, 15, "Class Pruner Multiplier 0-255: ")
 
 INT_VAR_H (classify_cp_cutoff_strength, 7, "Class Pruner CutoffStrength: ")
 
 INT_VAR_H (classify_integer_matcher_multiplier, 10, "Integer Matcher Multiplier 0-255: ")
 
 INT_VAR_H (il1_adaption_test, 0, "Don't adapt to i/I at beginning of word")
 
 BOOL_VAR_H (classify_bln_numeric_mode, 0, "Assume the input is numbers [0-9].")
 
 double_VAR_H (speckle_large_max_size, 0.30, "Max large speckle size")
 
 double_VAR_H (speckle_rating_penalty, 10.0, "Penalty to add to worst rating for noise")
 
- Public Member Functions inherited from tesseract::CCStruct
 CCStruct ()=default
 
virtual ~CCStruct ()
 
- Public Member Functions inherited from tesseract::CUtil
 CUtil ()=default
 
virtual ~CUtil ()
 
void read_variables (const char *filename, bool global_only)
 
- Public Member Functions inherited from tesseract::CCUtil
 CCUtil ()
 
virtual ~CCUtil ()
 
void main_setup (const char *argv0, const char *basename)
 CCUtil::main_setup - set location of tessdata and name of image. More...
 
ParamsVectorsparams ()
 

Public Attributes

std::unique_ptr< LanguageModellanguage_model_
 
PRIORITY pass2_ok_split
 
WERD_CHOICEprev_word_best_choice_
 
GenericVector< int > blame_reasons_
 
void(Wordrec::* fill_lattice_ )(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
 
- Public Attributes inherited from tesseract::Classify
INT_TEMPLATES PreTrainedTemplates
 
ADAPT_TEMPLATES AdaptedTemplates
 
ADAPT_TEMPLATES BackupAdaptedTemplates
 
BIT_VECTOR AllProtosOn
 
BIT_VECTOR AllConfigsOn
 
BIT_VECTOR AllConfigsOff
 
BIT_VECTOR TempProtoMask
 
bool EnableLearning
 
NORM_PROTOSNormProtos
 
UnicityTable< FontInfofontinfo_table_
 
UnicityTable< FontSetfontset_table_
 
- Public Attributes inherited from tesseract::CCUtil
STRING datadir
 
STRING imagebasename
 
STRING lang
 
STRING language_data_path_prefix
 
UNICHARSET unicharset
 
UnicharAmbigs unichar_ambigs
 
STRING imagefile
 
STRING directory
 
tesseract::IntParam ambigs_debug_level
 
tesseract::BoolParam use_definite_ambigs_for_classifier
 
tesseract::BoolParam use_ambigs_for_adaption
 

Protected Member Functions

bool SegSearchDone (int num_futile_classifications)
 
void UpdateSegSearchNodes (float rating_cert_scale, int starting_col, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle)
 
void ProcessSegSearchPainPoint (float pain_point_priority, const MATRIX_COORD &pain_point, const char *pain_point_type, GenericVector< SegSearchPending > *pending, WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle)
 
void ResetNGramSearch (WERD_RES *word_res, BestChoiceBundle *best_choice_bundle, GenericVector< SegSearchPending > *pending)
 
void InitBlamerForSegSearch (WERD_RES *word_res, LMPainPoints *pain_points, BlamerBundle *blamer_bundle, STRING *blamer_debug)
 

Additional Inherited Members

- Static Public Member Functions inherited from tesseract::Classify
static void SetupBLCNDenorms (const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
 
static void ExtractFeatures (const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
 
- Static Public Attributes inherited from tesseract::CCStruct
static const double kDescenderFraction = 0.25
 
static const double kXHeightFraction = 0.5
 
static const double kAscenderFraction = 0.25
 
static const double kXHeightCapRatio
 
- Protected Attributes inherited from tesseract::Classify
IntegerMatcher im_
 
FEATURE_DEFS_STRUCT feature_defs_
 
ShapeTableshape_table_
 

Constructor & Destructor Documentation

◆ Wordrec()

tesseract::Wordrec::Wordrec ( )

◆ ~Wordrec()

virtual tesseract::Wordrec::~Wordrec ( )
virtualdefault

Member Function Documentation

◆ add_point_to_list()

void tesseract::Wordrec::add_point_to_list ( PointHeap point_heap,
EDGEPT point 
)

◆ add_seam_to_queue()

void tesseract::Wordrec::add_seam_to_queue ( float  new_priority,
SEAM new_seam,
SeamQueue seams 
)

◆ angle_change()

int tesseract::Wordrec::angle_change ( EDGEPT point1,
EDGEPT point2,
EDGEPT point3 
)

◆ attempt_blob_chop()

SEAM * tesseract::Wordrec::attempt_blob_chop ( TWERD word,
TBLOB blob,
int32_t  blob_number,
bool  italic_blob,
const GenericVector< SEAM *> &  seams 
)

◆ BOOL_VAR_H() [1/13]

tesseract::Wordrec::BOOL_VAR_H ( merge_fragments_in_matrix  ,
TRUE  ,
"Merge the fragments in the ratings matrix and delete them " "after merging"   
)

◆ BOOL_VAR_H() [2/13]

tesseract::Wordrec::BOOL_VAR_H ( wordrec_no_block  ,
FALSE  ,
"Don't output block information"   
)

◆ BOOL_VAR_H() [3/13]

tesseract::Wordrec::BOOL_VAR_H ( wordrec_enable_assoc  ,
TRUE  ,
"Associator Enable"   
)

◆ BOOL_VAR_H() [4/13]

tesseract::Wordrec::BOOL_VAR_H ( force_word_assoc  ,
FALSE  ,
"force associator to run regardless of what enable_assoc is." "This is used for CJK where component grouping is necessary."   
)

◆ BOOL_VAR_H() [5/13]

tesseract::Wordrec::BOOL_VAR_H ( fragments_guide_chopper  ,
FALSE  ,
"Use information from fragments to guide chopping process"   
)

◆ BOOL_VAR_H() [6/13]

tesseract::Wordrec::BOOL_VAR_H ( chop_enable  ,
,
"Chop enable"   
)

◆ BOOL_VAR_H() [7/13]

tesseract::Wordrec::BOOL_VAR_H ( chop_vertical_creep  ,
,
"Vertical creep"   
)

◆ BOOL_VAR_H() [8/13]

tesseract::Wordrec::BOOL_VAR_H ( chop_new_seam_pile  ,
,
"Use new seam_pile"   
)

◆ BOOL_VAR_H() [9/13]

tesseract::Wordrec::BOOL_VAR_H ( assume_fixed_pitch_char_segment  ,
FALSE  ,
"include fixed-pitch heuristics in char segmentation"   
)

◆ BOOL_VAR_H() [10/13]

tesseract::Wordrec::BOOL_VAR_H ( wordrec_skip_no_truth_words  ,
false  ,
"Only run OCR for words that had truth recorded in BlamerBundle  
)

◆ BOOL_VAR_H() [11/13]

tesseract::Wordrec::BOOL_VAR_H ( wordrec_debug_blamer  ,
false  ,
"Print blamer debug messages"   
)

◆ BOOL_VAR_H() [12/13]

tesseract::Wordrec::BOOL_VAR_H ( wordrec_run_blamer  ,
false  ,
"Try to set the blame for errors"   
)

◆ BOOL_VAR_H() [13/13]

tesseract::Wordrec::BOOL_VAR_H ( save_alt_choices  ,
true  ,
"Save alternative paths found during chopping " "and segmentation search"   
)

◆ call_matcher()

BLOB_CHOICE_LIST * tesseract::Wordrec::call_matcher ( TBLOB blob)

◆ CallFillLattice()

void tesseract::Wordrec::CallFillLattice ( const MATRIX ratings,
const WERD_CHOICE_LIST &  best_choices,
const UNICHARSET unicharset,
BlamerBundle blamer_bundle 
)
inline

◆ cc_recog()

void tesseract::Wordrec::cc_recog ( WERD_RES word)

◆ choose_best_seam()

void tesseract::Wordrec::choose_best_seam ( SeamQueue seam_queue,
const SPLIT split,
PRIORITY  priority,
SEAM **  seam_result,
TBLOB blob,
SeamPile seam_pile 
)

◆ chop_numbered_blob()

SEAM * tesseract::Wordrec::chop_numbered_blob ( TWERD word,
int32_t  blob_number,
bool  italic_blob,
const GenericVector< SEAM *> &  seams 
)

◆ chop_one_blob()

SEAM * tesseract::Wordrec::chop_one_blob ( const GenericVector< TBOX > &  boxes,
const GenericVector< BLOB_CHOICE *> &  blob_choices,
WERD_RES word_res,
int *  blob_number 
)

◆ chop_overlapping_blob()

SEAM * tesseract::Wordrec::chop_overlapping_blob ( const GenericVector< TBOX > &  boxes,
bool  italic_blob,
WERD_RES word_res,
int *  blob_number 
)

◆ chop_word_main()

void tesseract::Wordrec::chop_word_main ( WERD_RES word)

◆ classify_blob()

BLOB_CHOICE_LIST * tesseract::Wordrec::classify_blob ( TBLOB blob,
const char *  string,
C_COL  color,
BlamerBundle blamer_bundle 
)

◆ classify_piece()

BLOB_CHOICE_LIST * tesseract::Wordrec::classify_piece ( const GenericVector< SEAM *> &  seams,
int16_t  start,
int16_t  end,
const char *  description,
TWERD word,
BlamerBundle blamer_bundle 
)
virtual

◆ combine_seam()

void tesseract::Wordrec::combine_seam ( const SeamPile seam_pile,
const SEAM seam,
SeamQueue seam_queue 
)

◆ dict_word()

int tesseract::Wordrec::dict_word ( const WERD_CHOICE word)

◆ DoSegSearch()

void tesseract::Wordrec::DoSegSearch ( WERD_RES word_res)

◆ double_VAR_H() [1/10]

tesseract::Wordrec::double_VAR_H ( wordrec_worst_state  ,
,
"Worst segmentation state"   
)

◆ double_VAR_H() [2/10]

tesseract::Wordrec::double_VAR_H ( tessedit_certainty_threshold  ,
-2.  25,
"Good blob limit"   
)

◆ double_VAR_H() [3/10]

tesseract::Wordrec::double_VAR_H ( chop_split_dist_knob  ,
0.  5,
"Split length adjustment"   
)

◆ double_VAR_H() [4/10]

tesseract::Wordrec::double_VAR_H ( chop_overlap_knob  ,
0.  9,
"Split overlap adjustment"   
)

◆ double_VAR_H() [5/10]

tesseract::Wordrec::double_VAR_H ( chop_center_knob  ,
0.  15,
"Split center adjustment"   
)

◆ double_VAR_H() [6/10]

tesseract::Wordrec::double_VAR_H ( chop_sharpness_knob  ,
0.  06,
"Split sharpness adjustment"   
)

◆ double_VAR_H() [7/10]

tesseract::Wordrec::double_VAR_H ( chop_width_change_knob  ,
5.  0,
"Width change adjustment"   
)

◆ double_VAR_H() [8/10]

tesseract::Wordrec::double_VAR_H ( chop_ok_split  ,
100.  0,
"OK split limit"   
)

◆ double_VAR_H() [9/10]

tesseract::Wordrec::double_VAR_H ( chop_good_split  ,
50.  0,
"Good split limit"   
)

◆ double_VAR_H() [10/10]

tesseract::Wordrec::double_VAR_H ( segsearch_max_char_wh_ratio  ,
2.  0,
"Maximum character width-to-height ratio"   
)

◆ end_recog()

int tesseract::Wordrec::end_recog ( )

◆ fill_filtered_fragment_list()

void tesseract::Wordrec::fill_filtered_fragment_list ( BLOB_CHOICE_LIST *  choices,
int  fragment_pos,
int  num_frag_parts,
BLOB_CHOICE_LIST *  filtered_choices 
)

◆ FillLattice()

void tesseract::Wordrec::FillLattice ( const MATRIX ratings,
const WERD_CHOICE_LIST &  best_choices,
const UNICHARSET unicharset,
BlamerBundle blamer_bundle 
)

◆ get_fragment_lists()

void tesseract::Wordrec::get_fragment_lists ( int16_t  current_frag,
int16_t  current_row,
int16_t  start,
int16_t  num_frag_parts,
int16_t  num_blobs,
MATRIX ratings,
BLOB_CHOICE_LIST *  choice_lists 
)

◆ grade_sharpness()

PRIORITY tesseract::Wordrec::grade_sharpness ( SPLIT split)

◆ grade_split_length()

PRIORITY tesseract::Wordrec::grade_split_length ( SPLIT split)

◆ improve_by_chopping()

void tesseract::Wordrec::improve_by_chopping ( float  rating_cert_scale,
WERD_RES word,
BestChoiceBundle best_choice_bundle,
BlamerBundle blamer_bundle,
LMPainPoints pain_points,
GenericVector< SegSearchPending > *  pending 
)

◆ improve_one_blob()

SEAM * tesseract::Wordrec::improve_one_blob ( const GenericVector< BLOB_CHOICE *> &  blob_choices,
DANGERR fixpt,
bool  split_next_to_fragment,
bool  italic_blob,
WERD_RES word,
int *  blob_number 
)

◆ InitBlamerForSegSearch()

void tesseract::Wordrec::InitBlamerForSegSearch ( WERD_RES word_res,
LMPainPoints pain_points,
BlamerBundle blamer_bundle,
STRING blamer_debug 
)
protected

◆ InitialSegSearch()

void tesseract::Wordrec::InitialSegSearch ( WERD_RES word_res,
LMPainPoints pain_points,
GenericVector< SegSearchPending > *  pending,
BestChoiceBundle best_choice_bundle,
BlamerBundle blamer_bundle 
)

◆ INT_VAR_H() [1/16]

tesseract::Wordrec::INT_VAR_H ( repair_unchopped_blobs  ,
,
"Fix blobs that aren't chopped"   
)

◆ INT_VAR_H() [2/16]

tesseract::Wordrec::INT_VAR_H ( chop_debug  ,
,
"Chop debug"   
)

◆ INT_VAR_H() [3/16]

tesseract::Wordrec::INT_VAR_H ( chop_split_length  ,
10000  ,
"Split Length"   
)

◆ INT_VAR_H() [4/16]

tesseract::Wordrec::INT_VAR_H ( chop_same_distance  ,
,
"Same distance"   
)

◆ INT_VAR_H() [5/16]

tesseract::Wordrec::INT_VAR_H ( chop_min_outline_points  ,
,
"Min Number of Points on Outline"   
)

◆ INT_VAR_H() [6/16]

tesseract::Wordrec::INT_VAR_H ( chop_seam_pile_size  ,
150  ,
"Max number of seams in seam_pile"   
)

◆ INT_VAR_H() [7/16]

tesseract::Wordrec::INT_VAR_H ( chop_inside_angle  ,
50,
"Min Inside Angle Bend"   
)

◆ INT_VAR_H() [8/16]

tesseract::Wordrec::INT_VAR_H ( chop_min_outline_area  ,
2000  ,
"Min Outline Area"   
)

◆ INT_VAR_H() [9/16]

tesseract::Wordrec::INT_VAR_H ( chop_centered_maxwidth  ,
90  ,
"Width of (smaller) chopped blobs " "above which we don't care that a chop is not near the center."   
)

◆ INT_VAR_H() [10/16]

tesseract::Wordrec::INT_VAR_H ( chop_x_y_weight  ,
,
"X / Y length weight"   
)

◆ INT_VAR_H() [11/16]

tesseract::Wordrec::INT_VAR_H ( segment_adjust_debug  ,
,
"Segmentation adjustment debug"   
)

◆ INT_VAR_H() [12/16]

tesseract::Wordrec::INT_VAR_H ( wordrec_debug_level  ,
,
"Debug level for wordrec"   
)

◆ INT_VAR_H() [13/16]

tesseract::Wordrec::INT_VAR_H ( wordrec_max_join_chunks  ,
,
"Max number of broken pieces to associate"   
)

◆ INT_VAR_H() [14/16]

tesseract::Wordrec::INT_VAR_H ( segsearch_debug_level  ,
,
"SegSearch debug level"   
)

◆ INT_VAR_H() [15/16]

tesseract::Wordrec::INT_VAR_H ( segsearch_max_pain_points  ,
2000  ,
"Maximum number of pain points stored in the queue"   
)

◆ INT_VAR_H() [16/16]

tesseract::Wordrec::INT_VAR_H ( segsearch_max_futile_classifications  ,
10  ,
"Maximum number of pain point classifications per word."   
)

◆ is_inside_angle()

bool tesseract::Wordrec::is_inside_angle ( EDGEPT pt)

◆ merge_and_put_fragment_lists()

void tesseract::Wordrec::merge_and_put_fragment_lists ( int16_t  row,
int16_t  column,
int16_t  num_frag_parts,
BLOB_CHOICE_LIST *  choice_lists,
MATRIX ratings 
)

◆ merge_fragments()

void tesseract::Wordrec::merge_fragments ( MATRIX ratings,
int16_t  num_blobs 
)

◆ near_point()

bool tesseract::Wordrec::near_point ( EDGEPT point,
EDGEPT line_pt_0,
EDGEPT line_pt_1,
EDGEPT **  near_pt 
)

◆ new_max_point()

void tesseract::Wordrec::new_max_point ( EDGEPT local_max,
PointHeap points 
)

◆ new_min_point()

void tesseract::Wordrec::new_min_point ( EDGEPT local_min,
PointHeap points 
)

◆ pick_close_point()

EDGEPT * tesseract::Wordrec::pick_close_point ( EDGEPT critical_point,
EDGEPT vertical_point,
int *  best_dist 
)

◆ pick_good_seam()

SEAM * tesseract::Wordrec::pick_good_seam ( TBLOB blob)

◆ point_priority()

PRIORITY tesseract::Wordrec::point_priority ( EDGEPT point)

◆ prioritize_points()

void tesseract::Wordrec::prioritize_points ( TESSLINE outline,
PointHeap points 
)

◆ ProcessSegSearchPainPoint()

void tesseract::Wordrec::ProcessSegSearchPainPoint ( float  pain_point_priority,
const MATRIX_COORD pain_point,
const char *  pain_point_type,
GenericVector< SegSearchPending > *  pending,
WERD_RES word_res,
LMPainPoints pain_points,
BlamerBundle blamer_bundle 
)
protected

◆ program_editdown()

void tesseract::Wordrec::program_editdown ( int32_t  elasped_time)

◆ program_editup()

void tesseract::Wordrec::program_editup ( const char *  textbase,
TessdataManager init_classifier,
TessdataManager init_dict 
)

◆ ResetNGramSearch()

void tesseract::Wordrec::ResetNGramSearch ( WERD_RES word_res,
BestChoiceBundle best_choice_bundle,
GenericVector< SegSearchPending > *  pending 
)
protected

◆ SaveAltChoices()

void tesseract::Wordrec::SaveAltChoices ( const LIST best_choices,
WERD_RES word 
)

◆ SegSearch()

void tesseract::Wordrec::SegSearch ( WERD_RES word_res,
BestChoiceBundle best_choice_bundle,
BlamerBundle blamer_bundle 
)

◆ SegSearchDone()

bool tesseract::Wordrec::SegSearchDone ( int  num_futile_classifications)
inlineprotected

◆ select_blob_to_split()

int tesseract::Wordrec::select_blob_to_split ( const GenericVector< BLOB_CHOICE *> &  blob_choices,
float  rating_ceiling,
bool  split_next_to_fragment 
)

◆ select_blob_to_split_from_fixpt()

int tesseract::Wordrec::select_blob_to_split_from_fixpt ( DANGERR fixpt)

◆ set_pass1()

void tesseract::Wordrec::set_pass1 ( )

◆ set_pass2()

void tesseract::Wordrec::set_pass2 ( )

◆ try_point_pairs()

void tesseract::Wordrec::try_point_pairs ( EDGEPT points[50],
int16_t  num_points,
SeamQueue seam_queue,
SeamPile seam_pile,
SEAM **  seam,
TBLOB blob 
)

◆ try_vertical_splits()

void tesseract::Wordrec::try_vertical_splits ( EDGEPT points[50],
int16_t  num_points,
EDGEPT_CLIST *  new_points,
SeamQueue seam_queue,
SeamPile seam_pile,
SEAM **  seam,
TBLOB blob 
)

◆ UpdateSegSearchNodes()

void tesseract::Wordrec::UpdateSegSearchNodes ( float  rating_cert_scale,
int  starting_col,
GenericVector< SegSearchPending > *  pending,
WERD_RES word_res,
LMPainPoints pain_points,
BestChoiceBundle best_choice_bundle,
BlamerBundle blamer_bundle 
)
protected

◆ vertical_projection_point()

void tesseract::Wordrec::vertical_projection_point ( EDGEPT split_point,
EDGEPT target_point,
EDGEPT **  best_point,
EDGEPT_CLIST *  new_points 
)

Member Data Documentation

◆ blame_reasons_

GenericVector<int> tesseract::Wordrec::blame_reasons_

◆ fill_lattice_

void(Wordrec::* tesseract::Wordrec::fill_lattice_) (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)

◆ language_model_

std::unique_ptr<LanguageModel> tesseract::Wordrec::language_model_

◆ pass2_ok_split

PRIORITY tesseract::Wordrec::pass2_ok_split

◆ prev_word_best_choice_

WERD_CHOICE* tesseract::Wordrec::prev_word_best_choice_

The documentation for this class was generated from the following files: