tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
tesseract::ClassPruner Class Reference

Public Member Functions

 ClassPruner (int max_classes)
 
 ~ClassPruner ()
 
void ComputeScores (const INT_TEMPLATES_STRUCT *int_templates, int num_features, const INT_FEATURE_STRUCT *features)
 
void AdjustForExpectedNumFeatures (const uint16_t *expected_num_features, int cutoff_strength)
 
void DisableDisabledClasses (const UNICHARSET &unicharset)
 
void DisableFragments (const UNICHARSET &unicharset)
 
void NormalizeForXheight (int norm_multiplier, const uint8_t *normalization_factors)
 
void NoNormalization ()
 
void PruneAndSort (int pruning_factor, int keep_this, bool max_of_non_fragments, const UNICHARSET &unicharset)
 
void DebugMatch (const Classify &classify, const INT_TEMPLATES_STRUCT *int_templates, const INT_FEATURE_STRUCT *features) const
 
void SummarizeResult (const Classify &classify, const INT_TEMPLATES_STRUCT *int_templates, const uint16_t *expected_num_features, int norm_multiplier, const uint8_t *normalization_factors) const
 
int SetupResults (GenericVector< CP_RESULT_STRUCT > *results) const
 

Private Attributes

int * class_count_
 
int * norm_count_
 
int * sort_key_
 
int * sort_index_
 
int max_classes_
 
int rounded_classes_
 
int pruning_threshold_
 
int num_features_
 
int num_classes_
 

Constructor & Destructor Documentation

◆ ClassPruner()

tesseract::ClassPruner::ClassPruner ( int  max_classes)
inline

◆ ~ClassPruner()

tesseract::ClassPruner::~ClassPruner ( )
inline

Member Function Documentation

◆ AdjustForExpectedNumFeatures()

void tesseract::ClassPruner::AdjustForExpectedNumFeatures ( const uint16_t *  expected_num_features,
int  cutoff_strength 
)
inline

Adjusts the scores according to the number of expected features. Used in lieu of a constant bias, this penalizes classes that expect more features than there are present. Thus an actual c will score higher for c than e, even though almost all the features match e as well as c, because e expects more features to be present.

◆ ComputeScores()

void tesseract::ClassPruner::ComputeScores ( const INT_TEMPLATES_STRUCT int_templates,
int  num_features,
const INT_FEATURE_STRUCT features 
)
inline

Computes the scores for every class in the character set, by summing the weights for each feature and stores the sums internally in class_count_.

◆ DebugMatch()

void tesseract::ClassPruner::DebugMatch ( const Classify classify,
const INT_TEMPLATES_STRUCT int_templates,
const INT_FEATURE_STRUCT features 
) const
inline

Prints debug info on the class pruner matches for the pruned classes only.

◆ DisableDisabledClasses()

void tesseract::ClassPruner::DisableDisabledClasses ( const UNICHARSET unicharset)
inline

Zeros the scores for classes disabled in the unicharset. Implements the black-list to recognize a subset of the character set.

◆ DisableFragments()

void tesseract::ClassPruner::DisableFragments ( const UNICHARSET unicharset)
inline

Zeros the scores of fragments.

◆ NoNormalization()

void tesseract::ClassPruner::NoNormalization ( )
inline

The nop normalization copies the class_count_ array to norm_count_.

◆ NormalizeForXheight()

void tesseract::ClassPruner::NormalizeForXheight ( int  norm_multiplier,
const uint8_t *  normalization_factors 
)
inline

Normalizes the counts for xheight, putting the normalized result in norm_count_. Applies a simple subtractive penalty for incorrect vertical position provided by the normalization_factors array, indexed by character class, and scaled by the norm_multiplier.

◆ PruneAndSort()

void tesseract::ClassPruner::PruneAndSort ( int  pruning_factor,
int  keep_this,
bool  max_of_non_fragments,
const UNICHARSET unicharset 
)
inline

Prunes the classes using <the maximum count> * pruning_factor/256 as a threshold for keeping classes. If max_of_non_fragments, then ignore fragments in computing the maximum count.

◆ SetupResults()

int tesseract::ClassPruner::SetupResults ( GenericVector< CP_RESULT_STRUCT > *  results) const
inline

Copies the pruned, sorted classes into the output results and returns the number of classes.

◆ SummarizeResult()

void tesseract::ClassPruner::SummarizeResult ( const Classify classify,
const INT_TEMPLATES_STRUCT int_templates,
const uint16_t *  expected_num_features,
int  norm_multiplier,
const uint8_t *  normalization_factors 
) const
inline

Prints a summary of the pruner result.

Member Data Documentation

◆ class_count_

int* tesseract::ClassPruner::class_count_
private

Array[rounded_classes_] of initial counts for each class.

◆ max_classes_

int tesseract::ClassPruner::max_classes_
private

Number of classes in this class pruner.

◆ norm_count_

int* tesseract::ClassPruner::norm_count_
private

Array[rounded_classes_] of modified counts for each class after normalizing for expected number of features, disabled classes, fragments, and xheights.

◆ num_classes_

int tesseract::ClassPruner::num_classes_
private

Final number of pruned classes.

◆ num_features_

int tesseract::ClassPruner::num_features_
private

The number of features used to compute the scores.

◆ pruning_threshold_

int tesseract::ClassPruner::pruning_threshold_
private

Threshold count applied to prune classes.

◆ rounded_classes_

int tesseract::ClassPruner::rounded_classes_
private

Rounded up number of classes used for array sizes.

◆ sort_index_

int* tesseract::ClassPruner::sort_index_
private

Array[rounded_classes_ +1] of classes corresponding to sort_key_.

◆ sort_key_

int* tesseract::ClassPruner::sort_key_
private

Array[rounded_classes_ +1] of pruned counts that gets sorted


The documentation for this class was generated from the following file: