tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
tesseract::Textord Class Reference

#include <textord.h>

Collaboration diagram for tesseract::Textord:

Public Member Functions

 Textord (CCStruct *ccstruct)
 
 ~Textord ()=default
 
void TextordPage (PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Pix *binary_pix, Pix *thresholds_pix, Pix *grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
 
void CleanupSingleRowResult (PageSegMode pageseg_mode, PAGE_RES *page_res)
 
bool use_cjk_fp_model () const
 
void set_use_cjk_fp_model (bool flag)
 
void to_spacing (ICOORD page_tr, TO_BLOCK_LIST *blocks)
 
ROWmake_prop_words (TO_ROW *row, FCOORD rotation)
 
ROWmake_blob_words (TO_ROW *row, FCOORD rotation)
 
void find_components (Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
 
void filter_blobs (ICOORD page_tr, TO_BLOCK_LIST *blocks, bool testing_on)
 
void compute_block_xheight (TO_BLOCK *block, float gradient)
 
void make_spline_rows (TO_BLOCK *block, float gradient, bool testing_on)
 
 BOOL_VAR_H (textord_single_height_mode, false, "Script has no xheight, so use a single mode for horizontal text")
 
 BOOL_VAR_H (tosp_old_to_method, false, "Space stats use prechopping?")
 
 BOOL_VAR_H (tosp_old_to_constrain_sp_kn, false, "Constrain relative values of inter and intra-word gaps for " "old_to_method.")
 
 BOOL_VAR_H (tosp_only_use_prop_rows, true, "Block stats to use fixed pitch rows?")
 
 BOOL_VAR_H (tosp_force_wordbreak_on_punct, false, "Force word breaks on punct to break long lines in non-space " "delimited langs")
 
 BOOL_VAR_H (tosp_use_pre_chopping, false, "Space stats use prechopping?")
 
 BOOL_VAR_H (tosp_old_to_bug_fix, false, "Fix suspected bug in old code")
 
 BOOL_VAR_H (tosp_block_use_cert_spaces, true, "Only stat OBVIOUS spaces")
 
 BOOL_VAR_H (tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces")
 
 BOOL_VAR_H (tosp_narrow_blobs_not_cert, true, "Only stat OBVIOUS spaces")
 
 BOOL_VAR_H (tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces")
 
 BOOL_VAR_H (tosp_recovery_isolated_row_stats, true, "Use row alone when inadequate cert spaces")
 
 BOOL_VAR_H (tosp_only_small_gaps_for_kern, false, "Better guess")
 
 BOOL_VAR_H (tosp_all_flips_fuzzy, false, "Pass ANY flip to context?")
 
 BOOL_VAR_H (tosp_fuzzy_limit_all, true, "Don't restrict kn->sp fuzzy limit to tables")
 
 BOOL_VAR_H (tosp_stats_use_xht_gaps, true, "Use within xht gap for wd breaks")
 
 BOOL_VAR_H (tosp_use_xht_gaps, true, "Use within xht gap for wd breaks")
 
 BOOL_VAR_H (tosp_only_use_xht_gaps, false, "Only use within xht gap for wd breaks")
 
 BOOL_VAR_H (tosp_rule_9_test_punct, false, "Don't chng kn to space next to punct")
 
 BOOL_VAR_H (tosp_flip_fuzz_kn_to_sp, true, "Default flip")
 
 BOOL_VAR_H (tosp_flip_fuzz_sp_to_kn, true, "Default flip")
 
 BOOL_VAR_H (tosp_improve_thresh, false, "Enable improvement heuristic")
 
 INT_VAR_H (tosp_debug_level, 0, "Debug data")
 
 INT_VAR_H (tosp_enough_space_samples_for_median, 3, "or should we use mean")
 
 INT_VAR_H (tosp_redo_kern_limit, 10, "No.samples reqd to reestimate for row")
 
 INT_VAR_H (tosp_few_samples, 40, "No.gaps reqd with 1 large gap to treat as a table")
 
 INT_VAR_H (tosp_short_row, 20, "No.gaps reqd with few cert spaces to use certs")
 
 INT_VAR_H (tosp_sanity_method, 1, "How to avoid being silly")
 
 double_VAR_H (tosp_old_sp_kn_th_factor, 2.0, "Factor for defining space threshold in terms of space and " "kern sizes")
 
 double_VAR_H (tosp_threshold_bias1, 0, "how far between kern and space?")
 
 double_VAR_H (tosp_threshold_bias2, 0, "how far between kern and space?")
 
 double_VAR_H (tosp_narrow_fraction, 0.3, "Fract of xheight for narrow")
 
 double_VAR_H (tosp_narrow_aspect_ratio, 0.48, "narrow if w/h less than this")
 
 double_VAR_H (tosp_wide_fraction, 0.52, "Fract of xheight for wide")
 
 double_VAR_H (tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this")
 
 double_VAR_H (tosp_fuzzy_space_factor, 0.6, "Fract of xheight for fuzz sp")
 
 double_VAR_H (tosp_fuzzy_space_factor1, 0.5, "Fract of xheight for fuzz sp")
 
 double_VAR_H (tosp_fuzzy_space_factor2, 0.72, "Fract of xheight for fuzz sp")
 
 double_VAR_H (tosp_gap_factor, 0.83, "gap ratio to flip sp->kern")
 
 double_VAR_H (tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp")
 
 double_VAR_H (tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp")
 
 double_VAR_H (tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp")
 
 double_VAR_H (tosp_ignore_big_gaps, -1, "xht multiplier")
 
 double_VAR_H (tosp_ignore_very_big_gaps, 3.5, "xht multiplier")
 
 double_VAR_H (tosp_rep_space, 1.6, "rep gap multiplier for space")
 
 double_VAR_H (tosp_enough_small_gaps, 0.65, "Fract of kerns reqd for isolated row stats")
 
 double_VAR_H (tosp_table_kn_sp_ratio, 2.25, "Min difference of kn & sp in table")
 
 double_VAR_H (tosp_table_xht_sp_ratio, 0.33, "Expect spaces bigger than this")
 
 double_VAR_H (tosp_table_fuzzy_kn_sp_ratio, 3.0, "Fuzzy if less than this")
 
 double_VAR_H (tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg")
 
 double_VAR_H (tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg")
 
 double_VAR_H (tosp_min_sane_kn_sp, 1.5, "Don't trust spaces less than this time kn")
 
 double_VAR_H (tosp_init_guess_kn_mult, 2.2, "Thresh guess - mult kn by this")
 
 double_VAR_H (tosp_init_guess_xht_mult, 0.28, "Thresh guess - mult xht by this")
 
 double_VAR_H (tosp_max_sane_kn_thresh, 5.0, "Multiplier on kn to limit thresh")
 
 double_VAR_H (tosp_flip_caution, 0.0, "Don't autoflip kn to sp when large separation")
 
 double_VAR_H (tosp_large_kerning, 0.19, "Limit use of xht gap with large kns")
 
 double_VAR_H (tosp_dont_fool_with_small_kerns, -1, "Limit use of xht gap with odd small kns")
 
 double_VAR_H (tosp_near_lh_edge, 0, "Don't reduce box if the top left is non blank")
 
 double_VAR_H (tosp_silly_kn_sp_gap, 0.2, "Don't let sp minus kn get too small")
 
 double_VAR_H (tosp_pass_wide_fuzz_sp_to_context, 0.75, "How wide fuzzies need context")
 
 BOOL_VAR_H (textord_no_rejects, false, "Don't remove noise blobs")
 
 BOOL_VAR_H (textord_show_blobs, false, "Display unsorted blobs")
 
 BOOL_VAR_H (textord_show_boxes, false, "Display boxes")
 
 INT_VAR_H (textord_max_noise_size, 7, "Pixel size of noise")
 
 INT_VAR_H (textord_baseline_debug, 0, "Baseline debug level")
 
 double_VAR_H (textord_blob_size_bigile, 95, "Percentile for large blobs")
 
 double_VAR_H (textord_noise_area_ratio, 0.7, "Fraction of bounding box for noise")
 
 double_VAR_H (textord_blob_size_smallile, 20, "Percentile for small blobs")
 
 double_VAR_H (textord_initialx_ile, 0.75, "Ile of sizes for xheight guess")
 
 double_VAR_H (textord_initialasc_ile, 0.90, "Ile of sizes for xheight guess")
 
 INT_VAR_H (textord_noise_sizefraction, 10, "Fraction of size for maxima")
 
 double_VAR_H (textord_noise_sizelimit, 0.5, "Fraction of x for big t count")
 
 INT_VAR_H (textord_noise_translimit, 16, "Transitions for normal blob")
 
 double_VAR_H (textord_noise_normratio, 2.0, "Dot to norm ratio for deletion")
 
 BOOL_VAR_H (textord_noise_rejwords, true, "Reject noise-like words")
 
 BOOL_VAR_H (textord_noise_rejrows, true, "Reject noise-like rows")
 
 double_VAR_H (textord_noise_syfract, 0.2, "xh fract error for norm blobs")
 
 double_VAR_H (textord_noise_sxfract, 0.4, "xh fract width error for norm blobs")
 
 double_VAR_H (textord_noise_hfract, 1.0/64, "Height fraction to discard outlines as speckle noise")
 
 INT_VAR_H (textord_noise_sncount, 1, "super norm blobs to save row")
 
 double_VAR_H (textord_noise_rowratio, 6.0, "Dot to norm ratio for deletion")
 
 BOOL_VAR_H (textord_noise_debug, FALSE, "Debug row garbage detector")
 
 double_VAR_H (textord_blshift_maxshift, 0.00, "Max baseline shift")
 
 double_VAR_H (textord_blshift_xfraction, 9.99, "Min size of baseline shift")
 
compute_row_xheight

Estimate the xheight of this row. Compute the ascender rise and descender drop at the same time. Set xheigh_evidence to the number of blobs with the chosen xheight that appear in this row.

void compute_row_xheight (TO_ROW *row, const FCOORD &rotation, float gradient, int block_line_size)
 

Private Member Functions

void MakeRows (PageSegMode pageseg_mode, const FCOORD &skew, int width, int height, TO_BLOCK_LIST *to_blocks)
 
void MakeBlockRows (int min_spacing, int max_spacing, const FCOORD &skew, TO_BLOCK *block, ScrollView *win)
 
void make_old_baselines (TO_BLOCK *block, bool testing_on, float gradient)
 
void correlate_lines (TO_BLOCK *block, float gradient)
 
void correlate_neighbours (TO_BLOCK *block, TO_ROW **rows, int rowcount)
 
int correlate_with_stats (TO_ROW **rows, int rowcount, TO_BLOCK *block)
 
void find_textlines (TO_BLOCK *block, TO_ROW *row, int degree, QSPLINE *spline)
 
void block_spacing_stats (TO_BLOCK *block, GAPMAP *gapmap, bool &old_text_ord_proportional, int16_t &block_space_gap_width, int16_t &block_non_space_gap_width)
 
void row_spacing_stats (TO_ROW *row, GAPMAP *gapmap, int16_t block_idx, int16_t row_idx, int16_t block_space_gap_width, int16_t block_non_space_gap_width)
 
void old_to_method (TO_ROW *row, STATS *all_gap_stats, STATS *space_gap_stats, STATS *small_gap_stats, int16_t block_space_gap_width, int16_t block_non_space_gap_width)
 
bool isolated_row_stats (TO_ROW *row, GAPMAP *gapmap, STATS *all_gap_stats, bool suspected_table, int16_t block_idx, int16_t row_idx)
 
int16_t stats_count_under (STATS *stats, int16_t threshold)
 
void improve_row_threshold (TO_ROW *row, STATS *all_gap_stats)
 
bool make_a_word_break (TO_ROW *row, TBOX blob_box, int16_t prev_gap, TBOX prev_blob_box, int16_t real_current_gap, int16_t within_xht_current_gap, TBOX next_blob_box, int16_t next_gap, uint8_t &blanks, bool &fuzzy_sp, bool &fuzzy_non, bool &prev_gap_was_a_space, bool &break_at_next_gap)
 
bool narrow_blob (TO_ROW *row, TBOX blob_box)
 
bool wide_blob (TO_ROW *row, TBOX blob_box)
 
bool suspected_punct_blob (TO_ROW *row, TBOX box)
 
void peek_at_next_gap (TO_ROW *row, BLOBNBOX_IT box_it, TBOX &next_blob_box, int16_t &next_gap, int16_t &next_within_xht_gap)
 
void mark_gap (TBOX blob, int16_t rule, int16_t prev_gap, int16_t prev_blob_width, int16_t current_gap, int16_t next_blob_width, int16_t next_gap)
 
float find_mean_blob_spacing (WERD *word)
 
bool ignore_big_gap (TO_ROW *row, int32_t row_length, GAPMAP *gapmap, int16_t left, int16_t right)
 
TBOX reduced_box_next (TO_ROW *row, BLOBNBOX_IT *it)
 
TBOX reduced_box_for_blob (BLOBNBOX *blob, TO_ROW *row, int16_t *left_above_xht)
 
float filter_noise_blobs (BLOBNBOX_LIST *src_list, BLOBNBOX_LIST *noise_list, BLOBNBOX_LIST *small_list, BLOBNBOX_LIST *large_list)
 
void cleanup_nontext_block (BLOCK *block)
 
void cleanup_blocks (bool clean_noise, BLOCK_LIST *blocks)
 
bool clean_noise_from_row (ROW *row)
 
void clean_noise_from_words (ROW *row)
 
void clean_small_noise_from_words (ROW *row)
 
void TransferDiacriticsToBlockGroups (BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks)
 
void TransferDiacriticsToWords (BLOBNBOX_LIST *diacritic_blobs, const FCOORD &rotation, WordGrid *word_grid)
 

Private Attributes

CCStructccstruct_
 
ICOORD page_tr_
 
bool use_cjk_fp_model_
 

Constructor & Destructor Documentation

◆ Textord()

tesseract::Textord::Textord ( CCStruct ccstruct)
explicit

◆ ~Textord()

tesseract::Textord::~Textord ( )
default

Member Function Documentation

◆ block_spacing_stats()

void tesseract::Textord::block_spacing_stats ( TO_BLOCK block,
GAPMAP gapmap,
bool &  old_text_ord_proportional,
int16_t &  block_space_gap_width,
int16_t &  block_non_space_gap_width 
)
private

◆ BOOL_VAR_H() [1/28]

tesseract::Textord::BOOL_VAR_H ( textord_single_height_mode  ,
false  ,
"Script has no  xheight,
so use a single mode for horizontal text"   
)

◆ BOOL_VAR_H() [2/28]

tesseract::Textord::BOOL_VAR_H ( tosp_old_to_method  ,
false  ,
"Space stats use prechopping?"   
)

◆ BOOL_VAR_H() [3/28]

tesseract::Textord::BOOL_VAR_H ( tosp_old_to_constrain_sp_kn  ,
false  ,
"Constrain relative values of inter and intra-word gaps for " "old_to_method."   
)

◆ BOOL_VAR_H() [4/28]

tesseract::Textord::BOOL_VAR_H ( tosp_only_use_prop_rows  ,
true  ,
"Block stats to use fixed pitch rows?"   
)

◆ BOOL_VAR_H() [5/28]

tesseract::Textord::BOOL_VAR_H ( tosp_force_wordbreak_on_punct  ,
false  ,
"Force word breaks on punct to break long lines in non-space " "delimited langs"   
)

◆ BOOL_VAR_H() [6/28]

tesseract::Textord::BOOL_VAR_H ( tosp_use_pre_chopping  ,
false  ,
"Space stats use prechopping?"   
)

◆ BOOL_VAR_H() [7/28]

tesseract::Textord::BOOL_VAR_H ( tosp_old_to_bug_fix  ,
false  ,
"Fix suspected bug in old code"   
)

◆ BOOL_VAR_H() [8/28]

tesseract::Textord::BOOL_VAR_H ( tosp_block_use_cert_spaces  ,
true  ,
"Only stat OBVIOUS spaces"   
)

◆ BOOL_VAR_H() [9/28]

tesseract::Textord::BOOL_VAR_H ( tosp_row_use_cert_spaces  ,
true  ,
"Only stat OBVIOUS spaces"   
)

◆ BOOL_VAR_H() [10/28]

tesseract::Textord::BOOL_VAR_H ( tosp_narrow_blobs_not_cert  ,
true  ,
"Only stat OBVIOUS spaces"   
)

◆ BOOL_VAR_H() [11/28]

tesseract::Textord::BOOL_VAR_H ( tosp_row_use_cert_spaces1  ,
true  ,
"Only stat OBVIOUS spaces"   
)

◆ BOOL_VAR_H() [12/28]

tesseract::Textord::BOOL_VAR_H ( tosp_recovery_isolated_row_stats  ,
true  ,
"Use row alone when inadequate cert spaces"   
)

◆ BOOL_VAR_H() [13/28]

tesseract::Textord::BOOL_VAR_H ( tosp_only_small_gaps_for_kern  ,
false  ,
"Better guess"   
)

◆ BOOL_VAR_H() [14/28]

tesseract::Textord::BOOL_VAR_H ( tosp_all_flips_fuzzy  ,
false  ,
"Pass ANY flip to context?"   
)

◆ BOOL_VAR_H() [15/28]

tesseract::Textord::BOOL_VAR_H ( tosp_fuzzy_limit_all  ,
true  ,
"Don't restrict kn->sp fuzzy limit to tables"   
)

◆ BOOL_VAR_H() [16/28]

tesseract::Textord::BOOL_VAR_H ( tosp_stats_use_xht_gaps  ,
true  ,
"Use within xht gap for wd breaks"   
)

◆ BOOL_VAR_H() [17/28]

tesseract::Textord::BOOL_VAR_H ( tosp_use_xht_gaps  ,
true  ,
"Use within xht gap for wd breaks"   
)

◆ BOOL_VAR_H() [18/28]

tesseract::Textord::BOOL_VAR_H ( tosp_only_use_xht_gaps  ,
false  ,
"Only use within xht gap for wd breaks"   
)

◆ BOOL_VAR_H() [19/28]

tesseract::Textord::BOOL_VAR_H ( tosp_rule_9_test_punct  ,
false  ,
"Don't chng kn to space next to punct"   
)

◆ BOOL_VAR_H() [20/28]

tesseract::Textord::BOOL_VAR_H ( tosp_flip_fuzz_kn_to_sp  ,
true  ,
"Default flip"   
)

◆ BOOL_VAR_H() [21/28]

tesseract::Textord::BOOL_VAR_H ( tosp_flip_fuzz_sp_to_kn  ,
true  ,
"Default flip"   
)

◆ BOOL_VAR_H() [22/28]

tesseract::Textord::BOOL_VAR_H ( tosp_improve_thresh  ,
false  ,
"Enable improvement heuristic"   
)

◆ BOOL_VAR_H() [23/28]

tesseract::Textord::BOOL_VAR_H ( textord_no_rejects  ,
false  ,
"Don't remove noise blobs"   
)

◆ BOOL_VAR_H() [24/28]

tesseract::Textord::BOOL_VAR_H ( textord_show_blobs  ,
false  ,
"Display unsorted blobs"   
)

◆ BOOL_VAR_H() [25/28]

tesseract::Textord::BOOL_VAR_H ( textord_show_boxes  ,
false  ,
"Display boxes"   
)

◆ BOOL_VAR_H() [26/28]

tesseract::Textord::BOOL_VAR_H ( textord_noise_rejwords  ,
true  ,
"Reject noise-like words"   
)

◆ BOOL_VAR_H() [27/28]

tesseract::Textord::BOOL_VAR_H ( textord_noise_rejrows  ,
true  ,
"Reject noise-like rows"   
)

◆ BOOL_VAR_H() [28/28]

tesseract::Textord::BOOL_VAR_H ( textord_noise_debug  ,
FALSE  ,
"Debug row garbage detector"   
)

◆ clean_noise_from_row()

bool tesseract::Textord::clean_noise_from_row ( ROW row)
private

◆ clean_noise_from_words()

void tesseract::Textord::clean_noise_from_words ( ROW row)
private

◆ clean_small_noise_from_words()

void tesseract::Textord::clean_small_noise_from_words ( ROW row)
private

◆ cleanup_blocks()

void tesseract::Textord::cleanup_blocks ( bool  clean_noise,
BLOCK_LIST *  blocks 
)
private

◆ cleanup_nontext_block()

void tesseract::Textord::cleanup_nontext_block ( BLOCK block)
private

◆ CleanupSingleRowResult()

void tesseract::Textord::CleanupSingleRowResult ( PageSegMode  pageseg_mode,
PAGE_RES page_res 
)

◆ compute_block_xheight()

void tesseract::Textord::compute_block_xheight ( TO_BLOCK block,
float  gradient 
)

◆ compute_row_xheight()

void tesseract::Textord::compute_row_xheight ( TO_ROW row,
const FCOORD rotation,
float  gradient,
int  block_line_size 
)

◆ correlate_lines()

void tesseract::Textord::correlate_lines ( TO_BLOCK block,
float  gradient 
)
private

◆ correlate_neighbours()

void tesseract::Textord::correlate_neighbours ( TO_BLOCK block,
TO_ROW **  rows,
int  rowcount 
)
private

◆ correlate_with_stats()

int tesseract::Textord::correlate_with_stats ( TO_ROW **  rows,
int  rowcount,
TO_BLOCK block 
)
private

◆ double_VAR_H() [1/46]

tesseract::Textord::double_VAR_H ( tosp_old_sp_kn_th_factor  ,
2.  0,
"Factor for defining space threshold in terms of space and " "kern sizes"   
)

◆ double_VAR_H() [2/46]

tesseract::Textord::double_VAR_H ( tosp_threshold_bias1  ,
,
"how far between kern and space?"   
)

◆ double_VAR_H() [3/46]

tesseract::Textord::double_VAR_H ( tosp_threshold_bias2  ,
,
"how far between kern and space?"   
)

◆ double_VAR_H() [4/46]

tesseract::Textord::double_VAR_H ( tosp_narrow_fraction  ,
0.  3,
"Fract of xheight for narrow"   
)

◆ double_VAR_H() [5/46]

tesseract::Textord::double_VAR_H ( tosp_narrow_aspect_ratio  ,
0.  48,
"narrow if w/h less than this"   
)

◆ double_VAR_H() [6/46]

tesseract::Textord::double_VAR_H ( tosp_wide_fraction  ,
0.  52,
"Fract of xheight for wide"   
)

◆ double_VAR_H() [7/46]

tesseract::Textord::double_VAR_H ( tosp_wide_aspect_ratio  ,
0.  0,
"wide if w/h less than this"   
)

◆ double_VAR_H() [8/46]

tesseract::Textord::double_VAR_H ( tosp_fuzzy_space_factor  ,
0.  6,
"Fract of xheight for fuzz sp"   
)

◆ double_VAR_H() [9/46]

tesseract::Textord::double_VAR_H ( tosp_fuzzy_space_factor1  ,
0.  5,
"Fract of xheight for fuzz sp"   
)

◆ double_VAR_H() [10/46]

tesseract::Textord::double_VAR_H ( tosp_fuzzy_space_factor2  ,
0.  72,
"Fract of xheight for fuzz sp"   
)

◆ double_VAR_H() [11/46]

tesseract::Textord::double_VAR_H ( tosp_gap_factor  ,
0.  83,
"gap ratio to flip sp->kern"   
)

◆ double_VAR_H() [12/46]

tesseract::Textord::double_VAR_H ( tosp_kern_gap_factor1  ,
2.  0,
"gap ratio to flip kern->sp"   
)

◆ double_VAR_H() [13/46]

tesseract::Textord::double_VAR_H ( tosp_kern_gap_factor2  ,
1.  3,
"gap ratio to flip kern->sp"   
)

◆ double_VAR_H() [14/46]

tesseract::Textord::double_VAR_H ( tosp_kern_gap_factor3  ,
2.  5,
"gap ratio to flip kern->sp"   
)

◆ double_VAR_H() [15/46]

tesseract::Textord::double_VAR_H ( tosp_ignore_big_gaps  ,
1,
"xht multiplier"   
)

◆ double_VAR_H() [16/46]

tesseract::Textord::double_VAR_H ( tosp_ignore_very_big_gaps  ,
3.  5,
"xht multiplier"   
)

◆ double_VAR_H() [17/46]

tesseract::Textord::double_VAR_H ( tosp_rep_space  ,
1.  6,
"rep gap multiplier for space"   
)

◆ double_VAR_H() [18/46]

tesseract::Textord::double_VAR_H ( tosp_enough_small_gaps  ,
0.  65,
"Fract of kerns reqd for isolated row stats"   
)

◆ double_VAR_H() [19/46]

tesseract::Textord::double_VAR_H ( tosp_table_kn_sp_ratio  ,
2.  25,
"Min difference of kn & sp in table"   
)

◆ double_VAR_H() [20/46]

tesseract::Textord::double_VAR_H ( tosp_table_xht_sp_ratio  ,
0.  33,
"Expect spaces bigger than this"   
)

◆ double_VAR_H() [21/46]

tesseract::Textord::double_VAR_H ( tosp_table_fuzzy_kn_sp_ratio  ,
3.  0,
"Fuzzy if less than this"   
)

◆ double_VAR_H() [22/46]

tesseract::Textord::double_VAR_H ( tosp_fuzzy_kn_fraction  ,
0.  5,
"New fuzzy kn alg"   
)

◆ double_VAR_H() [23/46]

tesseract::Textord::double_VAR_H ( tosp_fuzzy_sp_fraction  ,
0.  5,
"New fuzzy sp alg"   
)

◆ double_VAR_H() [24/46]

tesseract::Textord::double_VAR_H ( tosp_min_sane_kn_sp  ,
1.  5,
"Don't trust spaces less than this time kn"   
)

◆ double_VAR_H() [25/46]

tesseract::Textord::double_VAR_H ( tosp_init_guess_kn_mult  ,
2.  2,
"Thresh guess - mult kn by this"   
)

◆ double_VAR_H() [26/46]

tesseract::Textord::double_VAR_H ( tosp_init_guess_xht_mult  ,
0.  28,
"Thresh guess - mult xht by this"   
)

◆ double_VAR_H() [27/46]

tesseract::Textord::double_VAR_H ( tosp_max_sane_kn_thresh  ,
5.  0,
"Multiplier on kn to limit thresh"   
)

◆ double_VAR_H() [28/46]

tesseract::Textord::double_VAR_H ( tosp_flip_caution  ,
0.  0,
"Don't autoflip kn to sp when large separation"   
)

◆ double_VAR_H() [29/46]

tesseract::Textord::double_VAR_H ( tosp_large_kerning  ,
0.  19,
"Limit use of xht gap with large kns"   
)

◆ double_VAR_H() [30/46]

tesseract::Textord::double_VAR_H ( tosp_dont_fool_with_small_kerns  ,
1,
"Limit use of xht gap with odd small kns"   
)

◆ double_VAR_H() [31/46]

tesseract::Textord::double_VAR_H ( tosp_near_lh_edge  ,
,
"Don't reduce box if the top left is non blank"   
)

◆ double_VAR_H() [32/46]

tesseract::Textord::double_VAR_H ( tosp_silly_kn_sp_gap  ,
0.  2,
"Don't let sp minus kn get too small"   
)

◆ double_VAR_H() [33/46]

tesseract::Textord::double_VAR_H ( tosp_pass_wide_fuzz_sp_to_context  ,
0.  75,
"How wide fuzzies need context"   
)

◆ double_VAR_H() [34/46]

tesseract::Textord::double_VAR_H ( textord_blob_size_bigile  ,
95  ,
"Percentile for large blobs"   
)

◆ double_VAR_H() [35/46]

tesseract::Textord::double_VAR_H ( textord_noise_area_ratio  ,
0.  7,
"Fraction of bounding box for noise"   
)

◆ double_VAR_H() [36/46]

tesseract::Textord::double_VAR_H ( textord_blob_size_smallile  ,
20  ,
"Percentile for small blobs"   
)

◆ double_VAR_H() [37/46]

tesseract::Textord::double_VAR_H ( textord_initialx_ile  ,
0.  75,
"Ile of sizes for xheight guess"   
)

◆ double_VAR_H() [38/46]

tesseract::Textord::double_VAR_H ( textord_initialasc_ile  ,
0.  90,
"Ile of sizes for xheight guess"   
)

◆ double_VAR_H() [39/46]

tesseract::Textord::double_VAR_H ( textord_noise_sizelimit  ,
0.  5,
"Fraction of x for big t count"   
)

◆ double_VAR_H() [40/46]

tesseract::Textord::double_VAR_H ( textord_noise_normratio  ,
2.  0,
"Dot to norm ratio for deletion"   
)

◆ double_VAR_H() [41/46]

tesseract::Textord::double_VAR_H ( textord_noise_syfract  ,
0.  2,
"xh fract error for norm blobs"   
)

◆ double_VAR_H() [42/46]

tesseract::Textord::double_VAR_H ( textord_noise_sxfract  ,
0.  4,
"xh fract width error for norm blobs"   
)

◆ double_VAR_H() [43/46]

tesseract::Textord::double_VAR_H ( textord_noise_hfract  ,
1.0/  64,
"Height fraction to discard outlines as speckle noise"   
)

◆ double_VAR_H() [44/46]

tesseract::Textord::double_VAR_H ( textord_noise_rowratio  ,
6.  0,
"Dot to norm ratio for deletion"   
)

◆ double_VAR_H() [45/46]

tesseract::Textord::double_VAR_H ( textord_blshift_maxshift  ,
0.  00,
"Max baseline shift"   
)

◆ double_VAR_H() [46/46]

tesseract::Textord::double_VAR_H ( textord_blshift_xfraction  ,
9.  99,
"Min size of baseline shift"   
)

◆ filter_blobs()

void tesseract::Textord::filter_blobs ( ICOORD  page_tr,
TO_BLOCK_LIST *  blocks,
bool  testing_on 
)

◆ filter_noise_blobs()

float tesseract::Textord::filter_noise_blobs ( BLOBNBOX_LIST *  src_list,
BLOBNBOX_LIST *  noise_list,
BLOBNBOX_LIST *  small_list,
BLOBNBOX_LIST *  large_list 
)
private

◆ find_components()

void tesseract::Textord::find_components ( Pix *  pix,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  to_blocks 
)

◆ find_mean_blob_spacing()

float tesseract::Textord::find_mean_blob_spacing ( WERD word)
private

◆ find_textlines()

void tesseract::Textord::find_textlines ( TO_BLOCK block,
TO_ROW row,
int  degree,
QSPLINE spline 
)
private

◆ ignore_big_gap()

bool tesseract::Textord::ignore_big_gap ( TO_ROW row,
int32_t  row_length,
GAPMAP gapmap,
int16_t  left,
int16_t  right 
)
private

◆ improve_row_threshold()

void tesseract::Textord::improve_row_threshold ( TO_ROW row,
STATS all_gap_stats 
)
private

◆ INT_VAR_H() [1/11]

tesseract::Textord::INT_VAR_H ( tosp_debug_level  ,
,
"Debug data"   
)

◆ INT_VAR_H() [2/11]

tesseract::Textord::INT_VAR_H ( tosp_enough_space_samples_for_median  ,
,
"or should we use mean"   
)

◆ INT_VAR_H() [3/11]

tesseract::Textord::INT_VAR_H ( tosp_redo_kern_limit  ,
10  ,
"No.samples reqd to reestimate for row"   
)

◆ INT_VAR_H() [4/11]

tesseract::Textord::INT_VAR_H ( tosp_few_samples  ,
40  ,
"No.gaps reqd with 1 large gap to treat as a table"   
)

◆ INT_VAR_H() [5/11]

tesseract::Textord::INT_VAR_H ( tosp_short_row  ,
20  ,
"No.gaps reqd with few cert spaces to use certs"   
)

◆ INT_VAR_H() [6/11]

tesseract::Textord::INT_VAR_H ( tosp_sanity_method  ,
,
"How to avoid being silly"   
)

◆ INT_VAR_H() [7/11]

tesseract::Textord::INT_VAR_H ( textord_max_noise_size  ,
,
"Pixel size of noise"   
)

◆ INT_VAR_H() [8/11]

tesseract::Textord::INT_VAR_H ( textord_baseline_debug  ,
,
"Baseline debug level"   
)

◆ INT_VAR_H() [9/11]

tesseract::Textord::INT_VAR_H ( textord_noise_sizefraction  ,
10  ,
"Fraction of size for maxima"   
)

◆ INT_VAR_H() [10/11]

tesseract::Textord::INT_VAR_H ( textord_noise_translimit  ,
16  ,
"Transitions for normal blob"   
)

◆ INT_VAR_H() [11/11]

tesseract::Textord::INT_VAR_H ( textord_noise_sncount  ,
,
"super norm blobs to save row"   
)

◆ isolated_row_stats()

bool tesseract::Textord::isolated_row_stats ( TO_ROW row,
GAPMAP gapmap,
STATS all_gap_stats,
bool  suspected_table,
int16_t  block_idx,
int16_t  row_idx 
)
private

◆ make_a_word_break()

bool tesseract::Textord::make_a_word_break ( TO_ROW row,
TBOX  blob_box,
int16_t  prev_gap,
TBOX  prev_blob_box,
int16_t  real_current_gap,
int16_t  within_xht_current_gap,
TBOX  next_blob_box,
int16_t  next_gap,
uint8_t &  blanks,
bool &  fuzzy_sp,
bool &  fuzzy_non,
bool &  prev_gap_was_a_space,
bool &  break_at_next_gap 
)
private

◆ make_blob_words()

ROW * tesseract::Textord::make_blob_words ( TO_ROW row,
FCOORD  rotation 
)

◆ make_old_baselines()

void tesseract::Textord::make_old_baselines ( TO_BLOCK block,
bool  testing_on,
float  gradient 
)
private

◆ make_prop_words()

ROW * tesseract::Textord::make_prop_words ( TO_ROW row,
FCOORD  rotation 
)

◆ make_spline_rows()

void tesseract::Textord::make_spline_rows ( TO_BLOCK block,
float  gradient,
bool  testing_on 
)

◆ MakeBlockRows()

void tesseract::Textord::MakeBlockRows ( int  min_spacing,
int  max_spacing,
const FCOORD skew,
TO_BLOCK block,
ScrollView win 
)
private

◆ MakeRows()

void tesseract::Textord::MakeRows ( PageSegMode  pageseg_mode,
const FCOORD skew,
int  width,
int  height,
TO_BLOCK_LIST *  to_blocks 
)
private

◆ mark_gap()

void tesseract::Textord::mark_gap ( TBOX  blob,
int16_t  rule,
int16_t  prev_gap,
int16_t  prev_blob_width,
int16_t  current_gap,
int16_t  next_blob_width,
int16_t  next_gap 
)
private

◆ narrow_blob()

bool tesseract::Textord::narrow_blob ( TO_ROW row,
TBOX  blob_box 
)
private

◆ old_to_method()

void tesseract::Textord::old_to_method ( TO_ROW row,
STATS all_gap_stats,
STATS space_gap_stats,
STATS small_gap_stats,
int16_t  block_space_gap_width,
int16_t  block_non_space_gap_width 
)
private

◆ peek_at_next_gap()

void tesseract::Textord::peek_at_next_gap ( TO_ROW row,
BLOBNBOX_IT  box_it,
TBOX next_blob_box,
int16_t &  next_gap,
int16_t &  next_within_xht_gap 
)
private

◆ reduced_box_for_blob()

TBOX tesseract::Textord::reduced_box_for_blob ( BLOBNBOX blob,
TO_ROW row,
int16_t *  left_above_xht 
)
private

◆ reduced_box_next()

TBOX tesseract::Textord::reduced_box_next ( TO_ROW row,
BLOBNBOX_IT *  it 
)
private

◆ row_spacing_stats()

void tesseract::Textord::row_spacing_stats ( TO_ROW row,
GAPMAP gapmap,
int16_t  block_idx,
int16_t  row_idx,
int16_t  block_space_gap_width,
int16_t  block_non_space_gap_width 
)
private

◆ set_use_cjk_fp_model()

void tesseract::Textord::set_use_cjk_fp_model ( bool  flag)
inline

◆ stats_count_under()

int16_t tesseract::Textord::stats_count_under ( STATS stats,
int16_t  threshold 
)
private

◆ suspected_punct_blob()

bool tesseract::Textord::suspected_punct_blob ( TO_ROW row,
TBOX  box 
)
private

◆ TextordPage()

void tesseract::Textord::TextordPage ( PageSegMode  pageseg_mode,
const FCOORD reskew,
int  width,
int  height,
Pix *  binary_pix,
Pix *  thresholds_pix,
Pix *  grey_pix,
bool  use_box_bottoms,
BLOBNBOX_LIST *  diacritic_blobs,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  to_blocks 
)

◆ to_spacing()

void tesseract::Textord::to_spacing ( ICOORD  page_tr,
TO_BLOCK_LIST *  blocks 
)

◆ TransferDiacriticsToBlockGroups()

void tesseract::Textord::TransferDiacriticsToBlockGroups ( BLOBNBOX_LIST *  diacritic_blobs,
BLOCK_LIST *  blocks 
)
private

◆ TransferDiacriticsToWords()

void tesseract::Textord::TransferDiacriticsToWords ( BLOBNBOX_LIST *  diacritic_blobs,
const FCOORD rotation,
WordGrid word_grid 
)
private

◆ use_cjk_fp_model()

bool tesseract::Textord::use_cjk_fp_model ( ) const
inline

◆ wide_blob()

bool tesseract::Textord::wide_blob ( TO_ROW row,
TBOX  blob_box 
)
private

Member Data Documentation

◆ ccstruct_

CCStruct* tesseract::Textord::ccstruct_
private

◆ page_tr_

ICOORD tesseract::Textord::page_tr_
private

◆ use_cjk_fp_model_

bool tesseract::Textord::use_cjk_fp_model_
private

The documentation for this class was generated from the following files: