tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
tesseract::Classify Class Reference

#include <classify.h>

Inheritance diagram for tesseract::Classify:
Collaboration diagram for tesseract::Classify:

Public Member Functions

 Classify ()
 
virtual ~Classify ()
 
virtual DictgetDict ()
 
const ShapeTableshape_table () const
 
void SetStaticClassifier (ShapeClassifier *static_classifier)
 
void AddLargeSpeckleTo (int blob_length, BLOB_CHOICE_LIST *choices)
 
bool LargeSpeckle (const TBLOB &blob)
 
ADAPT_TEMPLATES NewAdaptedTemplates (bool InitFromUnicharset)
 
int GetFontinfoId (ADAPT_CLASS Class, uint8_t ConfigId)
 
int PruneClasses (const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uint8_t *normalization_factors, const uint16_t *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
 
void ReadNewCutoffs (TFile *fp, CLASS_CUTOFF_ARRAY Cutoffs)
 
void PrintAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates)
 
void WriteAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates)
 
ADAPT_TEMPLATES ReadAdaptedTemplates (TFile *File)
 
float ComputeNormMatch (CLASS_ID ClassId, const FEATURE_STRUCT &feature, bool DebugMatch)
 
void FreeNormProtos ()
 
NORM_PROTOSReadNormProtos (TFile *fp)
 
void ConvertProto (PROTO Proto, int ProtoId, INT_CLASS Class)
 
INT_TEMPLATES CreateIntTemplates (CLASSES FloatProtos, const UNICHARSET &target_unicharset)
 
void LearnWord (const char *fontname, WERD_RES *word)
 
void LearnPieces (const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
 
void InitAdaptiveClassifier (TessdataManager *mgr)
 
void InitAdaptedClass (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
 
void AmbigClassifier (const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
 
void MasterMatcher (INT_TEMPLATES templates, int16_t num_features, const INT_FEATURE_STRUCT *features, const uint8_t *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
 
void ExpandShapesAndApplyCorrections (ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uint8_t *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
 
double ComputeCorrectedRating (bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uint8_t *cn_factors)
 
void ConvertMatchesToChoices (const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
 
void AddNewResult (const UnicharRating &new_result, ADAPT_RESULTS *results)
 
int GetAdaptiveFeatures (TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
 
void DebugAdaptiveClassifier (TBLOB *Blob, ADAPT_RESULTS *Results)
 
PROTO_ID MakeNewTempProtos (FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
 
int MakeNewTemporaryConfig (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
 
void MakePermanent (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
 
void PrintAdaptiveMatchResults (const ADAPT_RESULTS &results)
 
void RemoveExtraPuncs (ADAPT_RESULTS *Results)
 
void RemoveBadMatches (ADAPT_RESULTS *Results)
 
void SetAdaptiveThreshold (float Threshold)
 
void ShowBestMatchFor (int shape_id, const INT_FEATURE_STRUCT *features, int num_features)
 
STRING ClassIDToDebugStr (const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
 
int ClassAndConfigIDToFontOrShapeID (int class_id, int int_result_config) const
 
int ShapeIDToClassID (int shape_id) const
 
UNICHAR_ID * BaselineClassifier (TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
 
int CharNormClassifier (TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
 
int CharNormTrainingSample (bool pruner_only, int keep_this, const TrainingSample &sample, GenericVector< UnicharRating > *results)
 
UNICHAR_ID * GetAmbiguities (TBLOB *Blob, CLASS_ID CorrectClass)
 
void DoAdaptiveMatch (TBLOB *Blob, ADAPT_RESULTS *Results)
 
void AdaptToChar (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, float Threshold, ADAPT_TEMPLATES adaptive_templates)
 
void DisplayAdaptedChar (TBLOB *blob, INT_CLASS_STRUCT *int_class)
 
bool AdaptableWord (WERD_RES *word)
 
void EndAdaptiveClassifier ()
 
void SettupPass1 ()
 
void SettupPass2 ()
 
void AdaptiveClassifier (TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
 
void ClassifyAsNoise (ADAPT_RESULTS *Results)
 
void ResetAdaptiveClassifierInternal ()
 
void SwitchAdaptiveClassifier ()
 
void StartBackupAdaptiveClassifier ()
 
int GetCharNormFeature (const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uint8_t *pruner_norm_array, uint8_t *char_norm_array)
 
void ComputeCharNormArrays (FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uint8_t *char_norm_array, uint8_t *pruner_array)
 
bool TempConfigReliable (CLASS_ID class_id, const TEMP_CONFIG &config)
 
void UpdateAmbigsGroup (CLASS_ID class_id, TBLOB *Blob)
 
bool AdaptiveClassifierIsFull () const
 
bool AdaptiveClassifierIsEmpty () const
 
bool LooksLikeGarbage (TBLOB *blob)
 
void RefreshDebugWindow (ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
 
void ClearCharNormArray (uint8_t *char_norm_array)
 
void ComputeIntCharNormArray (const FEATURE_STRUCT &norm_feature, uint8_t *char_norm_array)
 
void ComputeIntFeatures (FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
 
INT_TEMPLATES ReadIntTemplates (TFile *fp)
 
void WriteIntTemplates (FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
 
CLASS_ID GetClassToDebug (const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id)
 
void ShowMatchDisplay ()
 
UnicityTable< FontInfo > & get_fontinfo_table ()
 
const UnicityTable< FontInfo > & get_fontinfo_table () const
 
UnicityTable< FontSet > & get_fontset_table ()
 
void NormalizeOutlines (LIST Outlines, float *XScale, float *YScale)
 
FEATURE_SET ExtractOutlineFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractPicoFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractIntCNFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
FEATURE_SET ExtractIntGeoFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
void LearnBlob (const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
 
bool WriteTRFile (const STRING &filename)
 
 BOOL_VAR_H (allow_blob_division, true, "Use divisible blobs chopping")
 
 BOOL_VAR_H (prioritize_division, FALSE, "Prioritize blob division over chopping")
 
 INT_VAR_H (tessedit_single_match, FALSE, "Top choice only from CP")
 
 BOOL_VAR_H (classify_enable_learning, true, "Enable adaptive classifier")
 
 INT_VAR_H (classify_debug_level, 0, "Classify debug level")
 
 INT_VAR_H (classify_norm_method, character, "Normalization Method ...")
 
 double_VAR_H (classify_char_norm_range, 0.2, "Character Normalization Range ...")
 
 double_VAR_H (classify_min_norm_scale_x, 0.0, "Min char x-norm scale ...")
 
 double_VAR_H (classify_max_norm_scale_x, 0.325, "Max char x-norm scale ...")
 
 double_VAR_H (classify_min_norm_scale_y, 0.0, "Min char y-norm scale ...")
 
 double_VAR_H (classify_max_norm_scale_y, 0.325, "Max char y-norm scale ...")
 
 double_VAR_H (classify_max_rating_ratio, 1.5, "Veto ratio between classifier ratings")
 
 double_VAR_H (classify_max_certainty_margin, 5.5, "Veto difference between classifier certainties")
 
 BOOL_VAR_H (tess_cn_matching, 0, "Character Normalized Matching")
 
 BOOL_VAR_H (tess_bn_matching, 0, "Baseline Normalized Matching")
 
 BOOL_VAR_H (classify_enable_adaptive_matcher, 1, "Enable adaptive classifier")
 
 BOOL_VAR_H (classify_use_pre_adapted_templates, 0, "Use pre-adapted classifier templates")
 
 BOOL_VAR_H (classify_save_adapted_templates, 0, "Save adapted templates to a file")
 
 BOOL_VAR_H (classify_enable_adaptive_debugger, 0, "Enable match debugger")
 
 BOOL_VAR_H (classify_nonlinear_norm, 0, "Non-linear stroke-density normalization")
 
 INT_VAR_H (matcher_debug_level, 0, "Matcher Debug Level")
 
 INT_VAR_H (matcher_debug_flags, 0, "Matcher Debug Flags")
 
 INT_VAR_H (classify_learning_debug_level, 0, "Learning Debug Level: ")
 
 double_VAR_H (matcher_good_threshold, 0.125, "Good Match (0-1)")
 
 double_VAR_H (matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)")
 
 double_VAR_H (matcher_perfect_threshold, 0.02, "Perfect Match (0-1)")
 
 double_VAR_H (matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)")
 
 double_VAR_H (matcher_rating_margin, 0.1, "New template margin (0-1)")
 
 double_VAR_H (matcher_avg_noise_size, 12.0, "Avg. noise blob length: ")
 
 INT_VAR_H (matcher_permanent_classes_min, 1, "Min # of permanent classes")
 
 INT_VAR_H (matcher_min_examples_for_prototyping, 3, "Reliable Config Threshold")
 
 INT_VAR_H (matcher_sufficient_examples_for_prototyping, 5, "Enable adaption even if the ambiguities have not been seen")
 
 double_VAR_H (matcher_clustering_max_angle_delta, 0.015, "Maximum angle delta for prototype clustering")
 
 double_VAR_H (classify_misfit_junk_penalty, 0.0, "Penalty to apply when a non-alnum is vertically out of " "its expected textline position")
 
 double_VAR_H (rating_scale, 1.5, "Rating scaling factor")
 
 double_VAR_H (certainty_scale, 20.0, "Certainty scaling factor")
 
 double_VAR_H (tessedit_class_miss_scale, 0.00390625, "Scale factor for features not used")
 
 double_VAR_H (classify_adapted_pruning_factor, 2.5, "Prune poor adapted results this much worse than best result")
 
 double_VAR_H (classify_adapted_pruning_threshold, -1.0, "Threshold at which classify_adapted_pruning_factor starts")
 
 INT_VAR_H (classify_adapt_proto_threshold, 230, "Threshold for good protos during adaptive 0-255")
 
 INT_VAR_H (classify_adapt_feature_threshold, 230, "Threshold for good features during adaptive 0-255")
 
 BOOL_VAR_H (disable_character_fragments, TRUE, "Do not include character fragments in the" " results of the classifier")
 
 double_VAR_H (classify_character_fragments_garbage_certainty_threshold, -3.0, "Exclude fragments that do not match any whole character" " with at least this certainty")
 
 BOOL_VAR_H (classify_debug_character_fragments, FALSE, "Bring up graphical debugging windows for fragments training")
 
 BOOL_VAR_H (matcher_debug_separate_windows, FALSE, "Use two different windows for debugging the matching: " "One for the protos and one for the features.")
 
 STRING_VAR_H (classify_learn_debug_str, "", "Class str to debug learning")
 
 INT_VAR_H (classify_class_pruner_threshold, 229, "Class Pruner Threshold 0-255")
 
 INT_VAR_H (classify_class_pruner_multiplier, 15, "Class Pruner Multiplier 0-255: ")
 
 INT_VAR_H (classify_cp_cutoff_strength, 7, "Class Pruner CutoffStrength: ")
 
 INT_VAR_H (classify_integer_matcher_multiplier, 10, "Integer Matcher Multiplier 0-255: ")
 
 INT_VAR_H (il1_adaption_test, 0, "Don't adapt to i/I at beginning of word")
 
 BOOL_VAR_H (classify_bln_numeric_mode, 0, "Assume the input is numbers [0-9].")
 
 double_VAR_H (speckle_large_max_size, 0.30, "Max large speckle size")
 
 double_VAR_H (speckle_rating_penalty, 10.0, "Penalty to add to worst rating for noise")
 
- Public Member Functions inherited from tesseract::CCStruct
 CCStruct ()=default
 
virtual ~CCStruct ()
 
- Public Member Functions inherited from tesseract::CUtil
 CUtil ()=default
 
virtual ~CUtil ()
 
void read_variables (const char *filename, bool global_only)
 
- Public Member Functions inherited from tesseract::CCUtil
 CCUtil ()
 
virtual ~CCUtil ()
 
void main_setup (const char *argv0, const char *basename)
 CCUtil::main_setup - set location of tessdata and name of image. More...
 
ParamsVectorsparams ()
 

Static Public Member Functions

static void SetupBLCNDenorms (const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
 
static void ExtractFeatures (const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
 

Public Attributes

INT_TEMPLATES PreTrainedTemplates
 
ADAPT_TEMPLATES AdaptedTemplates
 
ADAPT_TEMPLATES BackupAdaptedTemplates
 
BIT_VECTOR AllProtosOn
 
BIT_VECTOR AllConfigsOn
 
BIT_VECTOR AllConfigsOff
 
BIT_VECTOR TempProtoMask
 
bool EnableLearning
 
NORM_PROTOSNormProtos
 
UnicityTable< FontInfofontinfo_table_
 
UnicityTable< FontSetfontset_table_
 
- Public Attributes inherited from tesseract::CCUtil
STRING datadir
 
STRING imagebasename
 
STRING lang
 
STRING language_data_path_prefix
 
UNICHARSET unicharset
 
UnicharAmbigs unichar_ambigs
 
STRING imagefile
 
STRING directory
 
tesseract::IntParam ambigs_debug_level
 
tesseract::BoolParam use_definite_ambigs_for_classifier
 
tesseract::BoolParam use_ambigs_for_adaption
 

Protected Attributes

IntegerMatcher im_
 
FEATURE_DEFS_STRUCT feature_defs_
 
ShapeTableshape_table_
 

Private Attributes

Dict dict_
 
ShapeClassifierstatic_classifier_
 
int NumAdaptationsFailed
 
STRING tr_file_data_
 
uint16_t CharNormCutoffs [MAX_NUM_CLASSES]
 
uint16_t BaselineCutoffs [MAX_NUM_CLASSES]
 
GenericVector< uint16_t > shapetable_cutoffs_
 
ScrollViewlearn_debug_win_
 
ScrollViewlearn_fragmented_word_debug_win_
 
ScrollViewlearn_fragments_debug_win_
 

Additional Inherited Members

- Static Public Attributes inherited from tesseract::CCStruct
static const double kDescenderFraction = 0.25
 
static const double kXHeightFraction = 0.5
 
static const double kAscenderFraction = 0.25
 
static const double kXHeightCapRatio
 

Constructor & Destructor Documentation

◆ Classify()

tesseract::Classify::Classify ( )

◆ ~Classify()

tesseract::Classify::~Classify ( )
virtual

Member Function Documentation

◆ AdaptableWord()

bool tesseract::Classify::AdaptableWord ( WERD_RES word)

Return TRUE if the specified word is acceptable for adaptation.

Globals: none

Parameters
wordcurrent word
Returns
true or false

◆ AdaptiveClassifier()

void tesseract::Classify::AdaptiveClassifier ( TBLOB Blob,
BLOB_CHOICE_LIST *  Choices 
)

This routine calls the adaptive matcher which returns (in an array) the class id of each class matched.

It also returns the number of classes matched. For each class matched it places the best rating found for that class into the Ratings array.

Bad matches are then removed so that they don't need to be sorted. The remaining good matches are then sorted and converted to choices.

This routine also performs some simple speckle filtering.

Parameters
Blobblob to be classified
[out]ChoicesList of choices found by adaptive matcher. filled on return with the choices found by the class pruner and the ratings therefrom. Also contains the detailed results of the integer matcher.

◆ AdaptiveClassifierIsEmpty()

bool tesseract::Classify::AdaptiveClassifierIsEmpty ( ) const
inline

◆ AdaptiveClassifierIsFull()

bool tesseract::Classify::AdaptiveClassifierIsFull ( ) const
inline

◆ AdaptToChar()

void tesseract::Classify::AdaptToChar ( TBLOB Blob,
CLASS_ID  ClassId,
int  FontinfoId,
float  Threshold,
ADAPT_TEMPLATES  adaptive_templates 
)
Parameters
Blobblob to add to templates for ClassId
ClassIdclass to add blob to
FontinfoIdfont information from pre-trained templates
Thresholdminimum match rating to existing template
adaptive_templatescurrent set of adapted templates

Globals:

  • AllProtosOn dummy mask to match against all protos
  • AllConfigsOn dummy mask to match against all configs
Returns
none

◆ AddLargeSpeckleTo()

void tesseract::Classify::AddLargeSpeckleTo ( int  blob_length,
BLOB_CHOICE_LIST *  choices 
)

◆ AddNewResult()

void tesseract::Classify::AddNewResult ( const UnicharRating new_result,
ADAPT_RESULTS results 
)

This routine adds the result of a classification into Results. If the new rating is much worse than the current best rating, it is not entered into results because it would end up being stripped later anyway. If the new rating is better than the old rating for the class, it replaces the old rating. If this is the first rating for the class, the class is added to the list of matched classes in Results. If the new rating is better than the best so far, it becomes the best so far.

Globals:

  • #matcher_bad_match_pad defines limits of an acceptable match
Parameters
new_resultnew result to add
[out]resultsresults to add new result to

◆ AmbigClassifier()

void tesseract::Classify::AmbigClassifier ( const GenericVector< INT_FEATURE_STRUCT > &  int_features,
const INT_FX_RESULT_STRUCT fx_info,
const TBLOB blob,
INT_TEMPLATES  templates,
ADAPT_CLASS classes,
UNICHAR_ID *  ambiguities,
ADAPT_RESULTS results 
)

This routine is identical to CharNormClassifier() except that it does no class pruning. It simply matches the unknown blob against the classes listed in Ambiguities.

Globals:

Parameters
blobblob to be classified
templatesbuilt-in templates to classify against
classesadapted class templates
ambiguitiesarray of unichar id's to match against
[out]resultsplace to put match results
int_features
fx_info

◆ BaselineClassifier()

UNICHAR_ID * tesseract::Classify::BaselineClassifier ( TBLOB Blob,
const GenericVector< INT_FEATURE_STRUCT > &  int_features,
const INT_FX_RESULT_STRUCT fx_info,
ADAPT_TEMPLATES  Templates,
ADAPT_RESULTS Results 
)

This routine extracts baseline normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.

Globals:

  • BaselineCutoffs expected num features for each class
Parameters
Blobblob to be classified
Templatescurrent set of adapted templates
Resultsplace to put match results
int_features
fx_info
Returns
Array of possible ambiguous chars that should be checked.

◆ BOOL_VAR_H() [1/14]

tesseract::Classify::BOOL_VAR_H ( allow_blob_division  ,
true  ,
"Use divisible blobs chopping"   
)

◆ BOOL_VAR_H() [2/14]

tesseract::Classify::BOOL_VAR_H ( prioritize_division  ,
FALSE  ,
"Prioritize blob division over chopping"   
)

◆ BOOL_VAR_H() [3/14]

tesseract::Classify::BOOL_VAR_H ( classify_enable_learning  ,
true  ,
"Enable adaptive classifier"   
)

◆ BOOL_VAR_H() [4/14]

tesseract::Classify::BOOL_VAR_H ( tess_cn_matching  ,
,
"Character Normalized Matching"   
)

◆ BOOL_VAR_H() [5/14]

tesseract::Classify::BOOL_VAR_H ( tess_bn_matching  ,
,
"Baseline Normalized Matching"   
)

◆ BOOL_VAR_H() [6/14]

tesseract::Classify::BOOL_VAR_H ( classify_enable_adaptive_matcher  ,
,
"Enable adaptive classifier"   
)

◆ BOOL_VAR_H() [7/14]

tesseract::Classify::BOOL_VAR_H ( classify_use_pre_adapted_templates  ,
,
"Use pre-adapted classifier templates"   
)

◆ BOOL_VAR_H() [8/14]

tesseract::Classify::BOOL_VAR_H ( classify_save_adapted_templates  ,
,
"Save adapted templates to a file"   
)

◆ BOOL_VAR_H() [9/14]

tesseract::Classify::BOOL_VAR_H ( classify_enable_adaptive_debugger  ,
,
"Enable match debugger"   
)

◆ BOOL_VAR_H() [10/14]

tesseract::Classify::BOOL_VAR_H ( classify_nonlinear_norm  ,
,
"Non-linear stroke-density normalization"   
)

◆ BOOL_VAR_H() [11/14]

tesseract::Classify::BOOL_VAR_H ( disable_character_fragments  ,
TRUE  ,
"Do not include character fragments in the" " results of the classifier"   
)

◆ BOOL_VAR_H() [12/14]

tesseract::Classify::BOOL_VAR_H ( classify_debug_character_fragments  ,
FALSE  ,
"Bring up graphical debugging windows for fragments training"   
)

◆ BOOL_VAR_H() [13/14]

tesseract::Classify::BOOL_VAR_H ( matcher_debug_separate_windows  ,
FALSE  ,
"Use two different windows for debugging the matching: " "One for the protos and one for the features."   
)

◆ BOOL_VAR_H() [14/14]

tesseract::Classify::BOOL_VAR_H ( classify_bln_numeric_mode  ,
,
"Assume the input is numbers ."  [0-9] 
)

◆ CharNormClassifier()

int tesseract::Classify::CharNormClassifier ( TBLOB blob,
const TrainingSample sample,
ADAPT_RESULTS adapt_results 
)

This routine extracts character normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.

Parameters
blobblob to be classified
sampletemplates to classify unknown against
adapt_resultsplace to put match results

Globals:

  • CharNormCutoffs expected num features for each class
  • AllProtosOn mask that enables all protos
  • AllConfigsOn mask that enables all configs

◆ CharNormTrainingSample()

int tesseract::Classify::CharNormTrainingSample ( bool  pruner_only,
int  keep_this,
const TrainingSample sample,
GenericVector< UnicharRating > *  results 
)

◆ ClassAndConfigIDToFontOrShapeID()

int tesseract::Classify::ClassAndConfigIDToFontOrShapeID ( int  class_id,
int  int_result_config 
) const

◆ ClassIDToDebugStr()

STRING tesseract::Classify::ClassIDToDebugStr ( const INT_TEMPLATES_STRUCT templates,
int  class_id,
int  config_id 
) const

◆ ClassifyAsNoise()

void tesseract::Classify::ClassifyAsNoise ( ADAPT_RESULTS results)

This routine computes a rating which reflects the likelihood that the blob being classified is a noise blob. NOTE: assumes that the blob length has already been computed and placed into Results.

Parameters
resultsresults to add noise classification to

Globals:

  • matcher_avg_noise_size avg. length of a noise blob

◆ ClearCharNormArray()

void tesseract::Classify::ClearCharNormArray ( uint8_t *  char_norm_array)

For each class in the unicharset, clears the corresponding entry in char_norm_array. char_norm_array is indexed by unichar_id.

Globals:

  • none
Parameters
char_norm_arrayarray to be cleared

◆ ComputeCharNormArrays()

void tesseract::Classify::ComputeCharNormArrays ( FEATURE_STRUCT norm_feature,
INT_TEMPLATES_STRUCT templates,
uint8_t *  char_norm_array,
uint8_t *  pruner_array 
)

◆ ComputeCorrectedRating()

double tesseract::Classify::ComputeCorrectedRating ( bool  debug,
int  unichar_id,
double  cp_rating,
double  im_rating,
int  feature_misses,
int  bottom,
int  top,
int  blob_length,
int  matcher_multiplier,
const uint8_t *  cn_factors 
)

◆ ComputeIntCharNormArray()

void tesseract::Classify::ComputeIntCharNormArray ( const FEATURE_STRUCT norm_feature,
uint8_t *  char_norm_array 
)

For each class in unicharset, computes the match between norm_feature and the normalization protos for that class. Converts this number to the range from 0 - 255 and stores it into char_norm_array. CharNormArray is indexed by unichar_id.

Globals:

  • PreTrainedTemplates current set of built-in templates
Parameters
norm_featurecharacter normalization feature
[out]char_norm_arrayplace to put results of size unicharset.size()

◆ ComputeIntFeatures()

void tesseract::Classify::ComputeIntFeatures ( FEATURE_SET  Features,
INT_FEATURE_ARRAY  IntFeatures 
)

This routine converts each floating point pico-feature in Features into integer format and saves it into IntFeatures.

Globals:

  • none
Parameters
Featuresfloating point pico-features to be converted
[out]IntFeaturesarray to put converted features into

◆ ComputeNormMatch()

float tesseract::Classify::ComputeNormMatch ( CLASS_ID  ClassId,
const FEATURE_STRUCT feature,
bool  DebugMatch 
)

This routine compares Features against each character normalization proto for ClassId and returns the match rating of the best match.

Parameters
ClassIdid of class to match against
featurecharacter normalization feature
DebugMatchcontrols dump of debug info

Globals: NormProtos character normalization prototypes

Returns
Best match rating for Feature against protos of ClassId.

◆ ConvertMatchesToChoices()

void tesseract::Classify::ConvertMatchesToChoices ( const DENORM denorm,
const TBOX box,
ADAPT_RESULTS Results,
BLOB_CHOICE_LIST *  Choices 
)

The function converts the given match ratings to the list of blob choices with ratings and certainties (used by the context checkers). If character fragments are present in the results, this function also makes sure that there is at least one non-fragmented classification included. For each classification result check the unicharset for "definite" ambiguities and modify the resulting Choices accordingly.

◆ ConvertProto()

void tesseract::Classify::ConvertProto ( PROTO  Proto,
int  ProtoId,
INT_CLASS  Class 
)

This routine converts Proto to integer format and installs it as ProtoId in Class.

Parameters
Protofloating-pt proto to be converted to integer format
ProtoIdid of proto
Classinteger class to add converted proto to
Returns
none
Note
Globals: none

◆ CreateIntTemplates()

INT_TEMPLATES tesseract::Classify::CreateIntTemplates ( CLASSES  FloatProtos,
const UNICHARSET target_unicharset 
)

This routine converts from the old floating point format to the new integer format.

Parameters
FloatProtosprototypes in old floating pt format
target_unicharsetthe UNICHARSET to use
Returns
New set of training templates in integer format.
Note
Globals: none

◆ DebugAdaptiveClassifier()

void tesseract::Classify::DebugAdaptiveClassifier ( TBLOB blob,
ADAPT_RESULTS Results 
)
Parameters
blobblob whose classification is being debugged
Resultsresults of match being debugged

Globals: none

◆ DisplayAdaptedChar()

void tesseract::Classify::DisplayAdaptedChar ( TBLOB blob,
INT_CLASS_STRUCT int_class 
)

◆ DoAdaptiveMatch()

void tesseract::Classify::DoAdaptiveMatch ( TBLOB Blob,
ADAPT_RESULTS Results 
)

This routine performs an adaptive classification. If we have not yet adapted to enough classes, a simple classification to the pre-trained templates is performed. Otherwise, we match the blob against the adapted templates. If the adapted templates do not match well, we try a match against the pre-trained templates. If an adapted template match is found, we do a match to any pre-trained templates which could be ambiguous. The results from all of these classifications are merged together into Results.

Parameters
Blobblob to be classified
Resultsplace to put match results

Globals:

  • PreTrainedTemplates built-in training templates
  • AdaptedTemplates templates adapted for this page
  • matcher_reliable_adaptive_result rating limit for a great match

◆ double_VAR_H() [1/23]

tesseract::Classify::double_VAR_H ( classify_char_norm_range  ,
0.  2,
"Character Normalization Range ..."   
)

◆ double_VAR_H() [2/23]

tesseract::Classify::double_VAR_H ( classify_min_norm_scale_x  ,
0.  0,
"Min char x-norm scale ..."   
)

◆ double_VAR_H() [3/23]

tesseract::Classify::double_VAR_H ( classify_max_norm_scale_x  ,
0.  325,
"Max char x-norm scale ..."   
)

◆ double_VAR_H() [4/23]

tesseract::Classify::double_VAR_H ( classify_min_norm_scale_y  ,
0.  0,
"Min char y-norm scale ..."   
)

◆ double_VAR_H() [5/23]

tesseract::Classify::double_VAR_H ( classify_max_norm_scale_y  ,
0.  325,
"Max char y-norm scale ..."   
)

◆ double_VAR_H() [6/23]

tesseract::Classify::double_VAR_H ( classify_max_rating_ratio  ,
1.  5,
"Veto ratio between classifier ratings"   
)

◆ double_VAR_H() [7/23]

tesseract::Classify::double_VAR_H ( classify_max_certainty_margin  ,
5.  5,
"Veto difference between classifier certainties"   
)

◆ double_VAR_H() [8/23]

tesseract::Classify::double_VAR_H ( matcher_good_threshold  ,
0.  125,
"Good Match (0-1)"   
)

◆ double_VAR_H() [9/23]

tesseract::Classify::double_VAR_H ( matcher_reliable_adaptive_result  ,
0.  0,
"Great Match (0-1)"   
)

◆ double_VAR_H() [10/23]

tesseract::Classify::double_VAR_H ( matcher_perfect_threshold  ,
0.  02,
"Perfect Match (0-1)"   
)

◆ double_VAR_H() [11/23]

tesseract::Classify::double_VAR_H ( matcher_bad_match_pad  ,
0.  15,
"Bad Match Pad (0-1)"   
)

◆ double_VAR_H() [12/23]

tesseract::Classify::double_VAR_H ( matcher_rating_margin  ,
0.  1,
"New template margin (0-1)"   
)

◆ double_VAR_H() [13/23]

tesseract::Classify::double_VAR_H ( matcher_avg_noise_size  ,
12.  0,
"Avg. noise blob length: "   
)

◆ double_VAR_H() [14/23]

tesseract::Classify::double_VAR_H ( matcher_clustering_max_angle_delta  ,
0.  015,
"Maximum angle delta for prototype clustering"   
)

◆ double_VAR_H() [15/23]

tesseract::Classify::double_VAR_H ( classify_misfit_junk_penalty  ,
0.  0,
"Penalty to apply when a non-alnum is vertically out of " "its expected textline position"   
)

◆ double_VAR_H() [16/23]

tesseract::Classify::double_VAR_H ( rating_scale  ,
1.  5,
"Rating scaling factor"   
)

◆ double_VAR_H() [17/23]

tesseract::Classify::double_VAR_H ( certainty_scale  ,
20.  0,
"Certainty scaling factor"   
)

◆ double_VAR_H() [18/23]

tesseract::Classify::double_VAR_H ( tessedit_class_miss_scale  ,
0.  00390625,
"Scale factor for features not used"   
)

◆ double_VAR_H() [19/23]

tesseract::Classify::double_VAR_H ( classify_adapted_pruning_factor  ,
2.  5,
"Prune poor adapted results this much worse than best result"   
)

◆ double_VAR_H() [20/23]

tesseract::Classify::double_VAR_H ( classify_adapted_pruning_threshold  ,
-1.  0,
"Threshold at which classify_adapted_pruning_factor starts"   
)

◆ double_VAR_H() [21/23]

tesseract::Classify::double_VAR_H ( classify_character_fragments_garbage_certainty_threshold  ,
-3.  0,
"Exclude fragments that do not match any whole character" " with at least this certainty"   
)

◆ double_VAR_H() [22/23]

tesseract::Classify::double_VAR_H ( speckle_large_max_size  ,
0.  30,
"Max large speckle size"   
)

◆ double_VAR_H() [23/23]

tesseract::Classify::double_VAR_H ( speckle_rating_penalty  ,
10.  0,
"Penalty to add to worst rating for noise"   
)

◆ EndAdaptiveClassifier()

void tesseract::Classify::EndAdaptiveClassifier ( )

This routine performs cleanup operations on the adaptive classifier. It should be called before the program is terminated. Its main function is to save the adapted templates to a file.

Globals:

  • AdaptedTemplates current set of adapted templates
  • #classify_save_adapted_templates TRUE if templates should be saved
  • #classify_enable_adaptive_matcher TRUE if adaptive matcher is enabled

◆ ExpandShapesAndApplyCorrections()

void tesseract::Classify::ExpandShapesAndApplyCorrections ( ADAPT_CLASS classes,
bool  debug,
int  class_id,
int  bottom,
int  top,
float  cp_rating,
int  blob_length,
int  matcher_multiplier,
const uint8_t *  cn_factors,
UnicharRating int_result,
ADAPT_RESULTS final_results 
)

◆ ExtractFeatures()

void tesseract::Classify::ExtractFeatures ( const TBLOB blob,
bool  nonlinear_norm,
GenericVector< INT_FEATURE_STRUCT > *  bl_features,
GenericVector< INT_FEATURE_STRUCT > *  cn_features,
INT_FX_RESULT_STRUCT results,
GenericVector< int > *  outline_cn_counts 
)
static

◆ ExtractIntCNFeatures()

FEATURE_SET tesseract::Classify::ExtractIntCNFeatures ( const TBLOB blob,
const INT_FX_RESULT_STRUCT fx_info 
)
Parameters
blobblob to extract features from
fx_info
Returns
Integer character-normalized features for blob.

◆ ExtractIntGeoFeatures()

FEATURE_SET tesseract::Classify::ExtractIntGeoFeatures ( const TBLOB blob,
const INT_FX_RESULT_STRUCT fx_info 
)
Parameters
blobblob to extract features from
fx_info
Returns
Geometric (top/bottom/width) features for blob.

◆ ExtractOutlineFeatures()

FEATURE_SET tesseract::Classify::ExtractOutlineFeatures ( TBLOB Blob)

Convert each segment in the outline to a feature and return the features.

Parameters
Blobblob to extract pico-features from
Returns
Outline-features for Blob.
Note
Globals: none

◆ ExtractPicoFeatures()

FEATURE_SET tesseract::Classify::ExtractPicoFeatures ( TBLOB Blob)

Operation: Dummy for now.

Globals:

  • classify_norm_method normalization method currently specified
    Parameters
    Blobblob to extract pico-features from
    Returns
    Pico-features for Blob.

◆ FreeNormProtos()

void tesseract::Classify::FreeNormProtos ( )

◆ get_fontinfo_table() [1/2]

UnicityTable<FontInfo>& tesseract::Classify::get_fontinfo_table ( )
inline

◆ get_fontinfo_table() [2/2]

const UnicityTable<FontInfo>& tesseract::Classify::get_fontinfo_table ( ) const
inline

◆ get_fontset_table()

UnicityTable<FontSet>& tesseract::Classify::get_fontset_table ( )
inline

◆ GetAdaptiveFeatures()

int tesseract::Classify::GetAdaptiveFeatures ( TBLOB Blob,
INT_FEATURE_ARRAY  IntFeatures,
FEATURE_SET FloatFeatures 
)

This routine sets up the feature extractor to extract baseline normalized pico-features.

The extracted pico-features are converted to integer form and placed in IntFeatures. The original floating-pt. features are returned in FloatFeatures.

Globals: none

Parameters
Blobblob to extract features from
[out]IntFeaturesarray to fill with integer features
[out]FloatFeaturesplace to return actual floating-pt features
Returns
Number of pico-features returned (0 if an error occurred)

◆ GetAmbiguities()

UNICHAR_ID * tesseract::Classify::GetAmbiguities ( TBLOB Blob,
CLASS_ID  CorrectClass 
)

This routine matches blob to the built-in templates to find out if there are any classes other than the correct class which are potential ambiguities.

Parameters
Blobblob to get classification ambiguities for
CorrectClasscorrect class for Blob

Globals:

  • CurrentRatings used by qsort compare routine
  • PreTrainedTemplates built-in templates
Returns
String containing all possible ambiguous classes.

◆ GetCharNormFeature()

int tesseract::Classify::GetCharNormFeature ( const INT_FX_RESULT_STRUCT fx_info,
INT_TEMPLATES  templates,
uint8_t *  pruner_norm_array,
uint8_t *  char_norm_array 
)

This routine calls the integer (Hardware) feature extractor if it has not been called before for this blob.

The results from the feature extractor are placed into globals so that they can be used in other routines without re-extracting the features.

It then copies the char norm features into the IntFeatures array provided by the caller.

Parameters
templatesused to compute char norm adjustments
pruner_norm_arrayArray of factors from blob normalization process
char_norm_arrayarray to fill with dummy char norm adjustments
fx_infoGlobals:
Returns
Number of features extracted or 0 if an error occurred.

◆ GetClassToDebug()

CLASS_ID tesseract::Classify::GetClassToDebug ( const char *  Prompt,
bool *  adaptive_on,
bool *  pretrained_on,
int *  shape_id 
)

This routine prompts the user with Prompt and waits for the user to enter something in the debug window.

Parameters
Promptprompt to print while waiting for input from window
adaptive_on
pretrained_on
shape_id
Returns
Character entered in the debug window.
Note
Globals: none

◆ getDict()

virtual Dict& tesseract::Classify::getDict ( )
inlinevirtual

Reimplemented in tesseract::Tesseract.

◆ GetFontinfoId()

int tesseract::Classify::GetFontinfoId ( ADAPT_CLASS  Class,
uint8_t  ConfigId 
)

◆ InitAdaptedClass()

void tesseract::Classify::InitAdaptedClass ( TBLOB Blob,
CLASS_ID  ClassId,
int  FontinfoId,
ADAPT_CLASS  Class,
ADAPT_TEMPLATES  Templates 
)

This routine creates a new adapted class and uses Blob as the model for the first config in that class.

Parameters
Blobblob to model new class after
ClassIdid of the class to be initialized
FontinfoIdfont information inferred from pre-trained templates
Classadapted class to be initialized
Templatesadapted templates to add new class to

Globals:

◆ InitAdaptiveClassifier()

void tesseract::Classify::InitAdaptiveClassifier ( TessdataManager mgr)

This routine reads in the training information needed by the adaptive classifier and saves it into global variables. Parameters: load_pre_trained_templates Indicates whether the pre-trained templates (inttemp, normproto and pffmtable components) should be loaded. Should only be set to true if the necessary classifier components are present in the [lang].traineddata file. Globals: BuiltInTemplatesFile file to get built-in temps from BuiltInCutoffsFile file to get avg. feat per class from classify_use_pre_adapted_templates enables use of pre-adapted templates

◆ INT_VAR_H() [1/16]

tesseract::Classify::INT_VAR_H ( tessedit_single_match  ,
FALSE  ,
"Top choice only from CP"   
)

◆ INT_VAR_H() [2/16]

tesseract::Classify::INT_VAR_H ( classify_debug_level  ,
,
"Classify debug level"   
)

◆ INT_VAR_H() [3/16]

tesseract::Classify::INT_VAR_H ( classify_norm_method  ,
character  ,
"Normalization Method ..."   
)

◆ INT_VAR_H() [4/16]

tesseract::Classify::INT_VAR_H ( matcher_debug_level  ,
,
"Matcher Debug Level"   
)

◆ INT_VAR_H() [5/16]

tesseract::Classify::INT_VAR_H ( matcher_debug_flags  ,
,
"Matcher Debug Flags"   
)

◆ INT_VAR_H() [6/16]

tesseract::Classify::INT_VAR_H ( classify_learning_debug_level  ,
,
"Learning Debug Level: "   
)

◆ INT_VAR_H() [7/16]

tesseract::Classify::INT_VAR_H ( matcher_permanent_classes_min  ,
,
"Min # of permanent classes"   
)

◆ INT_VAR_H() [8/16]

tesseract::Classify::INT_VAR_H ( matcher_min_examples_for_prototyping  ,
,
"Reliable Config Threshold"   
)

◆ INT_VAR_H() [9/16]

tesseract::Classify::INT_VAR_H ( matcher_sufficient_examples_for_prototyping  ,
,
"Enable adaption even if the ambiguities have not been seen"   
)

◆ INT_VAR_H() [10/16]

tesseract::Classify::INT_VAR_H ( classify_adapt_proto_threshold  ,
230  ,
"Threshold for good protos during adaptive 0-255"   
)

◆ INT_VAR_H() [11/16]

tesseract::Classify::INT_VAR_H ( classify_adapt_feature_threshold  ,
230  ,
"Threshold for good features during adaptive 0-255"   
)

◆ INT_VAR_H() [12/16]

tesseract::Classify::INT_VAR_H ( classify_class_pruner_threshold  ,
229  ,
"Class Pruner Threshold 0-255"   
)

◆ INT_VAR_H() [13/16]

tesseract::Classify::INT_VAR_H ( classify_class_pruner_multiplier  ,
15  ,
"Class Pruner Multiplier 0-255: "   
)

◆ INT_VAR_H() [14/16]

tesseract::Classify::INT_VAR_H ( classify_cp_cutoff_strength  ,
,
"Class Pruner CutoffStrength: "   
)

◆ INT_VAR_H() [15/16]

tesseract::Classify::INT_VAR_H ( classify_integer_matcher_multiplier  ,
10  ,
"Integer Matcher Multiplier 0-255: "   
)

◆ INT_VAR_H() [16/16]

tesseract::Classify::INT_VAR_H ( il1_adaption_test  ,
,
"Don't adapt to i/I at beginning of word"   
)

◆ LargeSpeckle()

bool tesseract::Classify::LargeSpeckle ( const TBLOB blob)

◆ LearnBlob()

void tesseract::Classify::LearnBlob ( const STRING fontname,
TBLOB Blob,
const DENORM cn_denorm,
const INT_FX_RESULT_STRUCT fx_info,
const char *  blob_text 
)

◆ LearnPieces()

void tesseract::Classify::LearnPieces ( const char *  fontname,
int  start,
int  length,
float  threshold,
CharSegmentationType  segmentation,
const char *  correct_text,
WERD_RES word 
)

◆ LearnWord()

void tesseract::Classify::LearnWord ( const char *  fontname,
WERD_RES word 
)

◆ LooksLikeGarbage()

bool tesseract::Classify::LooksLikeGarbage ( TBLOB blob)

◆ MakeNewTemporaryConfig()

int tesseract::Classify::MakeNewTemporaryConfig ( ADAPT_TEMPLATES  Templates,
CLASS_ID  ClassId,
int  FontinfoId,
int  NumFeatures,
INT_FEATURE_ARRAY  Features,
FEATURE_SET  FloatFeatures 
)
Parameters
Templatesadapted templates to add new config to
ClassIdclass id to associate with new config
FontinfoIdfont information inferred from pre-trained templates
NumFeaturesnumber of features in IntFeatures
Featuresfeatures describing model for new config
FloatFeaturesfloating-pt representation of features
Returns
The id of the new config created, a negative integer in case of error.

◆ MakeNewTempProtos()

PROTO_ID tesseract::Classify::MakeNewTempProtos ( FEATURE_SET  Features,
int  NumBadFeat,
FEATURE_ID  BadFeat[],
INT_CLASS  IClass,
ADAPT_CLASS  Class,
BIT_VECTOR  TempProtoMask 
)

This routine finds sets of sequential bad features that all have the same angle and converts each set into a new temporary proto. The temp proto is added to the proto pruner for IClass, pushed onto the list of temp protos in Class, and added to TempProtoMask.

Parameters
Featuresfloating-pt features describing new character
NumBadFeatnumber of bad features to turn into protos
BadFeatfeature id's of bad features
IClassinteger class templates to add new protos to
Classadapted class templates to add new protos to
TempProtoMaskproto mask to add new protos to

Globals: none

Returns
Max proto id in class after all protos have been added.

◆ MakePermanent()

void tesseract::Classify::MakePermanent ( ADAPT_TEMPLATES  Templates,
CLASS_ID  ClassId,
int  ConfigId,
TBLOB Blob 
)
Parameters
Templatescurrent set of adaptive templates
ClassIdclass containing config to be made permanent
ConfigIdconfig to be made permanent
Blobcurrent blob being adapted to

Globals: none

◆ MasterMatcher()

void tesseract::Classify::MasterMatcher ( INT_TEMPLATES  templates,
int16_t  num_features,
const INT_FEATURE_STRUCT features,
const uint8_t *  norm_factors,
ADAPT_CLASS classes,
int  debug,
int  matcher_multiplier,
const TBOX blob_box,
const GenericVector< CP_RESULT_STRUCT > &  results,
ADAPT_RESULTS final_results 
)

Factored-out calls to IntegerMatcher based on class pruner results. Returns integer matcher results inside CLASS_PRUNER_RESULTS structure.

◆ NewAdaptedTemplates()

ADAPT_TEMPLATES tesseract::Classify::NewAdaptedTemplates ( bool  InitFromUnicharset)

Allocates memory for adapted tempates. each char in unicharset to the newly created templates

Parameters
InitFromUnicharsetif true, add an empty class for
Returns
Ptr to new adapted templates.
Note
Globals: none

◆ NormalizeOutlines()

void tesseract::Classify::NormalizeOutlines ( LIST  Outlines,
float *  XScale,
float *  YScale 
)

This routine normalizes every outline in Outlines according to the currently selected normalization method. It also returns the scale factors that it used to do this scaling. The scale factors returned represent the x and y sizes in the normalized coordinate system that correspond to 1 pixel in the original coordinate system.

Globals:

  • classify_norm_method method being used for normalization
  • classify_char_norm_range map radius of gyration to this value
    Parameters
    Outlineslist of outlines to be normalized
    XScalex-direction scale factor used by routine
    YScaley-direction scale factor used by routine
    Returns
    none (Outlines are changed and XScale and YScale are updated)

◆ PrintAdaptedTemplates()

void tesseract::Classify::PrintAdaptedTemplates ( FILE *  File,
ADAPT_TEMPLATES  Templates 
)

This routine prints a summary of the adapted templates in Templates to File.

Parameters
Fileopen text file to print Templates to
Templatesadapted templates to print to File
Note
Globals: none

◆ PrintAdaptiveMatchResults()

void tesseract::Classify::PrintAdaptiveMatchResults ( const ADAPT_RESULTS results)

This routine writes the matches in Results to File.

Parameters
resultsmatch results to write to File

Globals: none

◆ PruneClasses()

int tesseract::Classify::PruneClasses ( const INT_TEMPLATES_STRUCT int_templates,
int  num_features,
int  keep_this,
const INT_FEATURE_STRUCT features,
const uint8_t *  normalization_factors,
const uint16_t *  expected_num_features,
GenericVector< CP_RESULT_STRUCT > *  results 
)

Runs the class pruner from int_templates on the given features, returning the number of classes output in results.

Parameters
int_templatesClass pruner tables
num_featuresNumber of features in blob
featuresArray of features
normalization_factorsArray of fudge factors from blob normalization process (by CLASS_INDEX)
expected_num_featuresArray of expected number of features for each class (by CLASS_INDEX)
resultsSorted Array of pruned classes. Must be an array of size at least int_templates->NumClasses.
keep_this

◆ ReadAdaptedTemplates()

ADAPT_TEMPLATES tesseract::Classify::ReadAdaptedTemplates ( TFile fp)

Read a set of adapted templates from file and return a ptr to the templates.

Parameters
fpopen text file to read adapted templates from
Returns
Ptr to adapted templates read from file.
Note
Globals: none

◆ ReadIntTemplates()

INT_TEMPLATES tesseract::Classify::ReadIntTemplates ( TFile fp)

This routine reads a set of integer templates from File. File must already be open and must be in the correct binary format.

Parameters
fpopen file to read templates from
Returns
Pointer to integer templates read from File.
Note
Globals: none

◆ ReadNewCutoffs()

void tesseract::Classify::ReadNewCutoffs ( TFile fp,
CLASS_CUTOFF_ARRAY  Cutoffs 
)

Open file, read in all of the class-id/cutoff pairs and insert them into the Cutoffs array. Cutoffs are indexed in the array by class id. Unused entries in the array are set to an arbitrarily high cutoff value.

Parameters
fpfile containing cutoff definitions
Cutoffsarray to put cutoffs into
Returns
none
Note
Globals: none

◆ ReadNormProtos()

NORM_PROTOS * tesseract::Classify::ReadNormProtos ( TFile fp)

This routine allocates a new data structure to hold a set of character normalization protos. It then fills in the data structure by reading from the specified File.

Parameters
fpopen text file to read normalization protos from Globals: none
Returns
Character normalization protos.

◆ RefreshDebugWindow()

void tesseract::Classify::RefreshDebugWindow ( ScrollView **  win,
const char *  msg,
int  y_offset,
const TBOX wbox 
)

◆ RemoveBadMatches()

void tesseract::Classify::RemoveBadMatches ( ADAPT_RESULTS Results)

This routine steps through each matching class in Results and removes it from the match list if its rating is worse than the BestRating plus a pad. In other words, all good matches get moved to the front of the classes array.

Parameters
Resultscontains matches to be filtered

Globals:

  • matcher_bad_match_pad defines a "bad match"

◆ RemoveExtraPuncs()

void tesseract::Classify::RemoveExtraPuncs ( ADAPT_RESULTS Results)

This routine discards extra digits or punctuation from the results. We keep only the top 2 punctuation answers and the top 1 digit answer if present.

Parameters
Resultscontains matches to be filtered

◆ ResetAdaptiveClassifierInternal()

void tesseract::Classify::ResetAdaptiveClassifierInternal ( )

◆ SetAdaptiveThreshold()

void tesseract::Classify::SetAdaptiveThreshold ( float  Threshold)

This routine resets the internal thresholds inside the integer matcher to correspond to the specified threshold.

Parameters
Thresholdthreshold for creating new templates

Globals:

  • matcher_good_threshold default good match rating

◆ SetStaticClassifier()

void tesseract::Classify::SetStaticClassifier ( ShapeClassifier static_classifier)

◆ SettupPass1()

void tesseract::Classify::SettupPass1 ( )

This routine prepares the adaptive matcher for the start of the first pass. Learning is enabled (unless it is disabled for the whole program).

Note
this is somewhat redundant, it simply says that if learning is enabled then it will remain enabled on the first pass. If it is disabled, then it will remain disabled. This is only put here to make it very clear that learning is controlled directly by the global setting of EnableLearning.

Globals:

◆ SettupPass2()

void tesseract::Classify::SettupPass2 ( )

This routine prepares the adaptive matcher for the start of the second pass. Further learning is disabled.

Globals:

◆ SetupBLCNDenorms()

void tesseract::Classify::SetupBLCNDenorms ( const TBLOB blob,
bool  nonlinear_norm,
DENORM bl_denorm,
DENORM cn_denorm,
INT_FX_RESULT_STRUCT fx_info 
)
static

◆ shape_table()

const ShapeTable* tesseract::Classify::shape_table ( ) const
inline

◆ ShapeIDToClassID()

int tesseract::Classify::ShapeIDToClassID ( int  shape_id) const

◆ ShowBestMatchFor()

void tesseract::Classify::ShowBestMatchFor ( int  shape_id,
const INT_FEATURE_STRUCT features,
int  num_features 
)

This routine displays debug information for the best config of the given shape_id for the given set of features.

Parameters
shape_idclassifier id to work with
featuresfeatures of the unknown character
num_featuresNumber of features in the features array.

◆ ShowMatchDisplay()

void tesseract::Classify::ShowMatchDisplay ( )

This routine sends the shapes in the global display lists to the match debugger window.

Globals:

  • FeatureShapes display list containing feature matches
  • ProtoShapes display list containing proto matches
    Returns
    none

◆ StartBackupAdaptiveClassifier()

void tesseract::Classify::StartBackupAdaptiveClassifier ( )

◆ STRING_VAR_H()

tesseract::Classify::STRING_VAR_H ( classify_learn_debug_str  ,
""  ,
"Class str to debug learning"   
)

◆ SwitchAdaptiveClassifier()

void tesseract::Classify::SwitchAdaptiveClassifier ( )

◆ TempConfigReliable()

bool tesseract::Classify::TempConfigReliable ( CLASS_ID  class_id,
const TEMP_CONFIG config 
)

◆ UpdateAmbigsGroup()

void tesseract::Classify::UpdateAmbigsGroup ( CLASS_ID  class_id,
TBLOB Blob 
)

◆ WriteAdaptedTemplates()

void tesseract::Classify::WriteAdaptedTemplates ( FILE *  File,
ADAPT_TEMPLATES  Templates 
)

This routine saves Templates to File in a binary format.

Parameters
Fileopen text file to write Templates to
Templatesset of adapted templates to write to File
Note
Globals: none

◆ WriteIntTemplates()

void tesseract::Classify::WriteIntTemplates ( FILE *  File,
INT_TEMPLATES  Templates,
const UNICHARSET target_unicharset 
)

This routine writes Templates to File. The format is an efficient binary format. File must already be open for writing.

Parameters
Fileopen file to write templates to
Templatestemplates to save into File
target_unicharsetthe UNICHARSET to use
Returns
none
Note
Globals: none

◆ WriteTRFile()

bool tesseract::Classify::WriteTRFile ( const STRING filename)

Member Data Documentation

◆ AdaptedTemplates

ADAPT_TEMPLATES tesseract::Classify::AdaptedTemplates

◆ AllConfigsOff

BIT_VECTOR tesseract::Classify::AllConfigsOff

◆ AllConfigsOn

BIT_VECTOR tesseract::Classify::AllConfigsOn

◆ AllProtosOn

BIT_VECTOR tesseract::Classify::AllProtosOn

◆ BackupAdaptedTemplates

ADAPT_TEMPLATES tesseract::Classify::BackupAdaptedTemplates

◆ BaselineCutoffs

uint16_t tesseract::Classify::BaselineCutoffs[MAX_NUM_CLASSES]
private

◆ CharNormCutoffs

uint16_t tesseract::Classify::CharNormCutoffs[MAX_NUM_CLASSES]
private

◆ dict_

Dict tesseract::Classify::dict_
private

◆ EnableLearning

bool tesseract::Classify::EnableLearning

◆ feature_defs_

FEATURE_DEFS_STRUCT tesseract::Classify::feature_defs_
protected

◆ fontinfo_table_

UnicityTable<FontInfo> tesseract::Classify::fontinfo_table_

◆ fontset_table_

UnicityTable<FontSet> tesseract::Classify::fontset_table_

◆ im_

IntegerMatcher tesseract::Classify::im_
protected

◆ learn_debug_win_

ScrollView* tesseract::Classify::learn_debug_win_
private

◆ learn_fragmented_word_debug_win_

ScrollView* tesseract::Classify::learn_fragmented_word_debug_win_
private

◆ learn_fragments_debug_win_

ScrollView* tesseract::Classify::learn_fragments_debug_win_
private

◆ NormProtos

NORM_PROTOS* tesseract::Classify::NormProtos

◆ NumAdaptationsFailed

int tesseract::Classify::NumAdaptationsFailed
private

◆ PreTrainedTemplates

INT_TEMPLATES tesseract::Classify::PreTrainedTemplates

◆ shape_table_

ShapeTable* tesseract::Classify::shape_table_
protected

◆ shapetable_cutoffs_

GenericVector<uint16_t> tesseract::Classify::shapetable_cutoffs_
private

◆ static_classifier_

ShapeClassifier* tesseract::Classify::static_classifier_
private

◆ TempProtoMask

BIT_VECTOR tesseract::Classify::TempProtoMask

◆ tr_file_data_

STRING tesseract::Classify::tr_file_data_
private

The documentation for this class was generated from the following files: