tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
tesseract::MasterTrainer Class Reference

#include <mastertrainer.h>

Collaboration diagram for tesseract::MasterTrainer:

Public Member Functions

 MasterTrainer (NormalizationMode norm_mode, bool shape_analysis, bool replicate_samples, int debug_level)
 
 ~MasterTrainer ()
 
bool Serialize (FILE *fp) const
 
void LoadUnicharset (const char *filename)
 
void SetFeatureSpace (const IntFeatureSpace &fs)
 
void ReadTrainingSamples (const char *page_name, const FEATURE_DEFS_STRUCT &feature_defs, bool verification)
 
void AddSample (bool verification, const char *unichar_str, TrainingSample *sample)
 
void LoadPageImages (const char *filename)
 
void PostLoadCleanup ()
 
void PreTrainingSetup ()
 
void SetupMasterShapes ()
 
void IncludeJunk ()
 
void ReplicateAndRandomizeSamplesIfRequired ()
 
bool LoadFontInfo (const char *filename)
 
bool LoadXHeights (const char *filename)
 
bool AddSpacingInfo (const char *filename)
 
int GetFontInfoId (const char *font_name)
 
int GetBestMatchingFontInfoId (const char *filename)
 
const STRINGGetTRFileName (int index) const
 
void SetupFlatShapeTable (ShapeTable *shape_table)
 
CLUSTERERSetupForClustering (const ShapeTable &shape_table, const FEATURE_DEFS_STRUCT &feature_defs, int shape_id, int *num_samples)
 
void WriteInttempAndPFFMTable (const UNICHARSET &unicharset, const UNICHARSET &shape_set, const ShapeTable &shape_table, CLASS_STRUCT *float_classes, const char *inttemp_file, const char *pffmtable_file)
 
const UNICHARSETunicharset () const
 
TrainingSampleSetGetSamples ()
 
const ShapeTablemaster_shapes () const
 
void DebugCanonical (const char *unichar_str1, const char *unichar_str2)
 
void DisplaySamples (const char *unichar_str1, int cloud_font, const char *unichar_str2, int canonical_font)
 
void TestClassifierVOld (bool replicate_samples, ShapeClassifier *test_classifier, ShapeClassifier *old_classifier)
 
void TestClassifierOnSamples (CountTypes error_mode, int report_level, bool replicate_samples, ShapeClassifier *test_classifier, STRING *report_string)
 
double TestClassifier (CountTypes error_mode, int report_level, bool replicate_samples, TrainingSampleSet *samples, ShapeClassifier *test_classifier, STRING *report_string)
 
float ShapeDistance (const ShapeTable &shapes, int s1, int s2)
 

Private Member Functions

void ReplaceFragmentedSamples ()
 
void ClusterShapes (int min_shapes, int max_shape_unichars, float max_dist, ShapeTable *shape_table)
 

Private Attributes

NormalizationMode norm_mode_
 
UNICHARSET unicharset_
 
IntFeatureSpace feature_space_
 
TrainingSampleSet samples_
 
TrainingSampleSet junk_samples_
 
TrainingSampleSet verify_samples_
 
ShapeTable master_shapes_
 
ShapeTable flat_shapes_
 
FontInfoTable fontinfo_table_
 
GenericVector< int32_t > xheights_
 
int charsetsize_
 
bool enable_shape_analysis_
 
bool enable_replication_
 
int * fragments_
 
int prev_unichar_id_
 
int debug_level_
 
IntFeatureMap feature_map_
 
GenericVector< Pix * > page_images_
 
GenericVector< STRINGtr_filenames_
 

Constructor & Destructor Documentation

◆ MasterTrainer()

tesseract::MasterTrainer::MasterTrainer ( NormalizationMode  norm_mode,
bool  shape_analysis,
bool  replicate_samples,
int  debug_level 
)

◆ ~MasterTrainer()

tesseract::MasterTrainer::~MasterTrainer ( )

Member Function Documentation

◆ AddSample()

void tesseract::MasterTrainer::AddSample ( bool  verification,
const char *  unichar_str,
TrainingSample sample 
)

◆ AddSpacingInfo()

bool tesseract::MasterTrainer::AddSpacingInfo ( const char *  filename)

◆ ClusterShapes()

void tesseract::MasterTrainer::ClusterShapes ( int  min_shapes,
int  max_shape_unichars,
float  max_dist,
ShapeTable shape_table 
)
private

◆ DebugCanonical()

void tesseract::MasterTrainer::DebugCanonical ( const char *  unichar_str1,
const char *  unichar_str2 
)

◆ DisplaySamples()

void tesseract::MasterTrainer::DisplaySamples ( const char *  unichar_str1,
int  cloud_font,
const char *  unichar_str2,
int  canonical_font 
)

◆ GetBestMatchingFontInfoId()

int tesseract::MasterTrainer::GetBestMatchingFontInfoId ( const char *  filename)

◆ GetFontInfoId()

int tesseract::MasterTrainer::GetFontInfoId ( const char *  font_name)

◆ GetSamples()

TrainingSampleSet* tesseract::MasterTrainer::GetSamples ( )
inline

◆ GetTRFileName()

const STRING& tesseract::MasterTrainer::GetTRFileName ( int  index) const
inline

◆ IncludeJunk()

void tesseract::MasterTrainer::IncludeJunk ( )

◆ LoadFontInfo()

bool tesseract::MasterTrainer::LoadFontInfo ( const char *  filename)

◆ LoadPageImages()

void tesseract::MasterTrainer::LoadPageImages ( const char *  filename)

◆ LoadUnicharset()

void tesseract::MasterTrainer::LoadUnicharset ( const char *  filename)

◆ LoadXHeights()

bool tesseract::MasterTrainer::LoadXHeights ( const char *  filename)

◆ master_shapes()

const ShapeTable& tesseract::MasterTrainer::master_shapes ( ) const
inline

◆ PostLoadCleanup()

void tesseract::MasterTrainer::PostLoadCleanup ( )

◆ PreTrainingSetup()

void tesseract::MasterTrainer::PreTrainingSetup ( )

◆ ReadTrainingSamples()

void tesseract::MasterTrainer::ReadTrainingSamples ( const char *  page_name,
const FEATURE_DEFS_STRUCT feature_defs,
bool  verification 
)

◆ ReplaceFragmentedSamples()

void tesseract::MasterTrainer::ReplaceFragmentedSamples ( )
private

◆ ReplicateAndRandomizeSamplesIfRequired()

void tesseract::MasterTrainer::ReplicateAndRandomizeSamplesIfRequired ( )

◆ Serialize()

bool tesseract::MasterTrainer::Serialize ( FILE *  fp) const

◆ SetFeatureSpace()

void tesseract::MasterTrainer::SetFeatureSpace ( const IntFeatureSpace fs)
inline

◆ SetupFlatShapeTable()

void tesseract::MasterTrainer::SetupFlatShapeTable ( ShapeTable shape_table)

◆ SetupForClustering()

CLUSTERER * tesseract::MasterTrainer::SetupForClustering ( const ShapeTable shape_table,
const FEATURE_DEFS_STRUCT feature_defs,
int  shape_id,
int *  num_samples 
)

◆ SetupMasterShapes()

void tesseract::MasterTrainer::SetupMasterShapes ( )

◆ ShapeDistance()

float tesseract::MasterTrainer::ShapeDistance ( const ShapeTable shapes,
int  s1,
int  s2 
)

◆ TestClassifier()

double tesseract::MasterTrainer::TestClassifier ( CountTypes  error_mode,
int  report_level,
bool  replicate_samples,
TrainingSampleSet samples,
ShapeClassifier test_classifier,
STRING report_string 
)

◆ TestClassifierOnSamples()

void tesseract::MasterTrainer::TestClassifierOnSamples ( CountTypes  error_mode,
int  report_level,
bool  replicate_samples,
ShapeClassifier test_classifier,
STRING report_string 
)

◆ TestClassifierVOld()

void tesseract::MasterTrainer::TestClassifierVOld ( bool  replicate_samples,
ShapeClassifier test_classifier,
ShapeClassifier old_classifier 
)

◆ unicharset()

const UNICHARSET& tesseract::MasterTrainer::unicharset ( ) const
inline

◆ WriteInttempAndPFFMTable()

void tesseract::MasterTrainer::WriteInttempAndPFFMTable ( const UNICHARSET unicharset,
const UNICHARSET shape_set,
const ShapeTable shape_table,
CLASS_STRUCT float_classes,
const char *  inttemp_file,
const char *  pffmtable_file 
)

Member Data Documentation

◆ charsetsize_

int tesseract::MasterTrainer::charsetsize_
private

◆ debug_level_

int tesseract::MasterTrainer::debug_level_
private

◆ enable_replication_

bool tesseract::MasterTrainer::enable_replication_
private

◆ enable_shape_analysis_

bool tesseract::MasterTrainer::enable_shape_analysis_
private

◆ feature_map_

IntFeatureMap tesseract::MasterTrainer::feature_map_
private

◆ feature_space_

IntFeatureSpace tesseract::MasterTrainer::feature_space_
private

◆ flat_shapes_

ShapeTable tesseract::MasterTrainer::flat_shapes_
private

◆ fontinfo_table_

FontInfoTable tesseract::MasterTrainer::fontinfo_table_
private

◆ fragments_

int* tesseract::MasterTrainer::fragments_
private

◆ junk_samples_

TrainingSampleSet tesseract::MasterTrainer::junk_samples_
private

◆ master_shapes_

ShapeTable tesseract::MasterTrainer::master_shapes_
private

◆ norm_mode_

NormalizationMode tesseract::MasterTrainer::norm_mode_
private

◆ page_images_

GenericVector<Pix*> tesseract::MasterTrainer::page_images_
private

◆ prev_unichar_id_

int tesseract::MasterTrainer::prev_unichar_id_
private

◆ samples_

TrainingSampleSet tesseract::MasterTrainer::samples_
private

◆ tr_filenames_

GenericVector<STRING> tesseract::MasterTrainer::tr_filenames_
private

◆ unicharset_

UNICHARSET tesseract::MasterTrainer::unicharset_
private

◆ verify_samples_

TrainingSampleSet tesseract::MasterTrainer::verify_samples_
private

◆ xheights_

GenericVector<int32_t> tesseract::MasterTrainer::xheights_
private

The documentation for this class was generated from the following files: