tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
tesseract Namespace Reference

Classes

class  AlignedBlob
 
struct  AlignedBlobParams
 
class  AmbigSpec
 
class  AmbigSpec_IT
 
class  AmbigSpec_LIST
 
struct  AssociateStats
 
class  AssociateUtils
 
class  BaselineBlock
 
class  BaselineDetect
 
class  BaselineRow
 
class  BBGrid
 
struct  BestChoiceBundle
 Bundle together all the things pertaining to the best choice/state. More...
 
class  BitVector
 
struct  BlobData
 
class  BlobGrid
 
struct  BlockGroup
 
class  BoolParam
 
class  BoxChar
 
struct  BoxCharPtrSort
 
class  BoxWord
 
class  CCNonTextDetect
 
class  CCStruct
 
class  CCUtil
 
class  CCUtilMutex
 
class  ChoiceIterator
 
class  Classify
 
class  ClassPruner
 
struct  ClipFFunc
 
struct  ClipFPrime
 
struct  ClipGFunc
 
struct  ClipGPrime
 
struct  Cluster
 
class  ColPartition
 
class  ColPartitionGrid
 
class  ColPartitionSet
 
class  ColSegment
 
class  ColumnFinder
 
class  Convolve
 
class  CTC
 
class  CUtil
 
class  Dawg
 
struct  DawgArgs
 
class  DawgCache
 
struct  DawgLoader
 
struct  DawgPosition
 
class  DawgPositionVector
 
class  DebugPixa
 
class  DetLineFit
 
class  Dict
 
struct  DocQualCallbacks
 
class  DocumentCache
 
class  DocumentData
 
class  DoubleParam
 
class  DoublePtr
 
class  DPPoint
 
class  EquationDetect
 
class  EquationDetectBase
 
class  ErrorCounter
 
struct  FFunc
 
class  File
 
struct  FloatWordFeature
 
struct  FontInfo
 
class  FontInfoTable
 
struct  FontSet
 
struct  FontSpacingInfo
 
class  FontUtils
 
struct  FPrime
 
class  FRAGMENT
 
class  FullyConnected
 
class  GenericHeap
 
struct  GeometricClassifierState
 
struct  GFunc
 
struct  GPrime
 
class  GridBase
 
class  GridSearch
 
struct  HFunc
 
struct  HPrime
 
class  IcuErrorCode
 
struct  IdentityFunc
 
class  ImageData
 
class  ImageFind
 
class  ImageThresholder
 
class  IndexMap
 
class  IndexMapBiDi
 
class  Input
 
class  InputBuffer
 
struct  Interval
 
class  IntFeatureDist
 
class  IntFeatureMap
 
class  IntFeatureSpace
 
class  IntGrid
 
class  IntParam
 
class  IntSimdMatrix
 
class  IntSimdMatrixAVX2
 
class  IntSimdMatrixSSE
 
struct  KDPair
 
struct  KDPairDec
 
struct  KDPairInc
 
class  KDPtrPair
 
struct  KDPtrPairDec
 
struct  KDPtrPairInc
 
class  KDVector
 
class  LanguageModel
 
struct  LanguageModelDawgInfo
 
struct  LanguageModelNgramInfo
 
struct  LanguageModelState
 Struct to store information maintained by various language model components. More...
 
class  LigatureTable
 
class  LineFinder
 
struct  LineHypothesis
 
struct  LMConsistencyInfo
 
class  LMPainPoints
 
class  LSTM
 
class  LSTMRecognizer
 
class  LSTMTester
 
class  LSTMTrainer
 
class  LTRResultIterator
 
class  MasterTrainer
 
class  Maxpool
 
class  MutableIterator
 
class  Network
 
class  NetworkBuilder
 
class  NetworkIO
 
class  NetworkScratch
 
struct  NodeChild
 
class  ObjectCache
 
class  OutputBuffer
 
class  PageIterator
 
class  PangoFontInfo
 
class  ParagraphModelSmearer
 
class  ParagraphTheory
 
class  Parallel
 
class  Param
 
class  ParamsModel
 
class  ParamsTrainingBundle
 
struct  ParamsTrainingHypothesis
 
struct  ParamsVectors
 
class  ParamUtils
 
class  PixelHistogram
 
class  Plumbing
 
class  PointerVector
 
struct  PtrHash
 
class  RecodeBeamSearch
 
class  RecodedCharID
 
struct  RecodeNode
 
class  Reconfig
 
struct  Relu
 
struct  ReluPrime
 
class  ResultIterator
 
class  Reversed
 
class  RowInfo
 
class  RowScratchRegisters
 
class  SampleIterator
 
struct  ScoredFont
 
class  SegSearchPending
 
class  Series
 
class  Shape
 
class  ShapeClassifier
 
struct  ShapeDist
 
struct  ShapeQueueEntry
 
struct  ShapeRating
 
class  ShapeTable
 
class  ShiroRekhaSplitter
 
class  SimpleClusterer
 
struct  SpacingProperties
 
class  SquishedDawg
 
class  StaticShape
 
class  StrideMap
 
class  StringParam
 
class  StringRenderer
 
class  StrokeWidth
 
class  StructuredTable
 
class  TabConstraint
 
class  TabEventHandler
 
class  TabFind
 
class  TableFinder
 
class  TableRecognizer
 
class  TabVector
 
struct  TESS_CHAR
 
class  TessBaseAPI
 
class  TessBoxTextRenderer
 
class  TessClassifier
 
class  TessdataManager
 
class  Tesseract
 
struct  TesseractStats
 
class  TessHOcrRenderer
 
class  TessOsdRenderer
 
class  TessPDFRenderer
 
class  TessResultRenderer
 
class  TessTextRenderer
 
class  TessTsvRenderer
 
class  TessUnlvRenderer
 
class  TextlineProjection
 
class  Textord
 
class  TFile
 
class  TrainingSample
 
class  TrainingSampleSet
 
class  TRand
 
class  TransposedArray
 
class  Trie
 
class  UNICHAR
 
class  UnicharAmbigs
 
struct  UnicharAndFonts
 
class  UnicharCompress
 
class  UnicharIdArrayUtils
 
struct  UnicharRating
 
class  UnicodeSpanSkipper
 
struct  UnityFunc
 
class  ValidateGrapheme
 
class  ValidateIndic
 
class  ValidateJavanese
 
class  ValidateKhmer
 
class  ValidateMyanmar
 
class  Validator
 
struct  ViterbiStateEntry
 
class  WeightMatrix
 
struct  WordData
 
class  WordFeature
 
class  Wordrec
 
class  WordWithBox
 
class  WorkingPartSet
 

Typedefs

typedef int(Dict::* DictFunc) (void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const
 
typedef double(Dict::* ProbabilityInContextFunc) (const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
 
typedef float(Dict::* ParamsModelClassifyFunc) (const char *lang, void *path)
 
typedef void(Wordrec::* FillLatticeFunc) (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
 
typedef TessCallback4< const UNICHARSET &, int, PageIterator *, Pix * > TruthCallback
 
using SetOfModels = GenericVectorEqEq< const ParagraphModel * >
 
typedef void(Tesseract::* WordRecognizer) (const WordData &word_data, WERD_RES **in_word, PointerVector< WERD_RES > *out_words)
 
using ParamsTrainingHypothesisList = GenericVector< ParamsTrainingHypothesis >
 
using UnicharIdVector = GenericVector< UNICHAR_ID >
 
using UnicharAmbigsVector = GenericVector< AmbigSpec_LIST * >
 
typedef bool(* FileReader) (const STRING &filename, GenericVector< char > *data)
 
typedef bool(* FileWriter) (const GenericVector< char > &data, const STRING &filename)
 
using IntKDPair = KDPairInc< int, int >
 
using char32 = signed int
 
using RSMap = std::unordered_map< int, std::unique_ptr< std::vector< int > >>
 
using RSCounts = std::unordered_map< int, int >
 
using ShapeQueue = GenericHeap< ShapeQueueEntry >
 
using NodeChildVector = GenericVector< NodeChild >
 
using SuccessorList = GenericVector< int >
 
using SuccessorListsVector = GenericVector< SuccessorList * >
 
using DawgVector = GenericVector< Dawg * >
 
typedef TessResultCallback2< bool, const GenericVector< char > &, LSTMTrainer * > * CheckPointReader
 
typedef TessResultCallback3< bool, SerializeAmount, const LSTMTrainer *, GenericVector< char > * > * CheckPointWriter
 
typedef TessResultCallback4< STRING, int, const double *, const TessdataManager &, int > * TestCallback
 
using RecodePair = KDPairInc< double, RecodeNode >
 
using RecodeHeap = GenericHeap< RecodePair >
 
using BlobGridSearch = GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
 
using ColPartitionGridSearch = GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT >
 
using PartSetVector = GenericVector< ColPartitionSet * >
 
using WidthCallback = TessResultCallback1< bool, int >
 
using ColSegmentGrid = BBGrid< ColSegment, ColSegment_CLIST, ColSegment_C_IT >
 
using ColSegmentGridSearch = GridSearch< ColSegment, ColSegment_CLIST, ColSegment_C_IT >
 
using WordGrid = BBGrid< WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT >
 
using WordSearch = GridSearch< WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT >
 
using LigHash = std::unordered_map< std::string, std::string, StringHash >
 
using PainPointHeap = GenericHeap< MatrixCoordPair >
 
using LanguageModelFlagsType = unsigned char
 Used for expressing various language model flags. More...
 

Enumerations

enum  LineType { LT_START = 'S', LT_BODY = 'C', LT_UNKNOWN = 'U', LT_MULTIPLE = 'M' }
 
enum  CMD_EVENTS { ACTION_1_CMD_EVENT, RECOG_WERDS, RECOG_PSEUDO, ACTION_2_CMD_EVENT }
 
enum  CachingStrategy { CS_SEQUENTIAL, CS_ROUND_ROBIN }
 
enum  NormalizationMode { NM_BASELINE = -3, NM_CHAR_ISOTROPIC = -2, NM_CHAR_ANISOTROPIC = -1 }
 
enum  kParamsTrainingFeatureType {
  PTRAIN_DIGITS_SHORT, PTRAIN_DIGITS_MED, PTRAIN_DIGITS_LONG, PTRAIN_NUM_SHORT,
  PTRAIN_NUM_MED, PTRAIN_NUM_LONG, PTRAIN_DOC_SHORT, PTRAIN_DOC_MED,
  PTRAIN_DOC_LONG, PTRAIN_DICT_SHORT, PTRAIN_DICT_MED, PTRAIN_DICT_LONG,
  PTRAIN_FREQ_SHORT, PTRAIN_FREQ_MED, PTRAIN_FREQ_LONG, PTRAIN_SHAPE_COST_PER_CHAR,
  PTRAIN_NGRAM_COST_PER_CHAR, PTRAIN_NUM_BAD_PUNC, PTRAIN_NUM_BAD_CASE, PTRAIN_XHEIGHT_CONSISTENCY,
  PTRAIN_NUM_BAD_CHAR_TYPE, PTRAIN_NUM_BAD_SPACING, PTRAIN_NUM_BAD_FONT, PTRAIN_RATING_PER_CHAR,
  PTRAIN_NUM_FEATURE_TYPES
}
 
enum  Orientation { ORIENTATION_PAGE_UP = 0, ORIENTATION_PAGE_RIGHT = 1, ORIENTATION_PAGE_DOWN = 2, ORIENTATION_PAGE_LEFT = 3 }
 
enum  WritingDirection { WRITING_DIRECTION_LEFT_TO_RIGHT = 0, WRITING_DIRECTION_RIGHT_TO_LEFT = 1, WRITING_DIRECTION_TOP_TO_BOTTOM = 2 }
 
enum  TextlineOrder { TEXTLINE_ORDER_LEFT_TO_RIGHT = 0, TEXTLINE_ORDER_RIGHT_TO_LEFT = 1, TEXTLINE_ORDER_TOP_TO_BOTTOM = 2 }
 
enum  PageSegMode {
  PSM_OSD_ONLY, PSM_AUTO_OSD, PSM_AUTO_ONLY, PSM_AUTO,
  PSM_SINGLE_COLUMN, PSM_SINGLE_BLOCK_VERT_TEXT, PSM_SINGLE_BLOCK, PSM_SINGLE_LINE,
  PSM_SINGLE_WORD, PSM_CIRCLE_WORD, PSM_SINGLE_CHAR, PSM_SPARSE_TEXT,
  PSM_SPARSE_TEXT_OSD, PSM_RAW_LINE, PSM_COUNT
}
 
enum  PageIteratorLevel {
  RIL_BLOCK, RIL_PARA, RIL_TEXTLINE, RIL_WORD,
  RIL_SYMBOL
}
 
enum  ParagraphJustification { JUSTIFICATION_UNKNOWN, JUSTIFICATION_LEFT, JUSTIFICATION_CENTER, JUSTIFICATION_RIGHT }
 
enum  OcrEngineMode {
  OEM_TESSERACT_ONLY, OEM_LSTM_ONLY, OEM_TESSERACT_LSTM_COMBINED, OEM_DEFAULT,
  OEM_COUNT
}
 
enum  ScriptPos { SP_NORMAL, SP_SUBSCRIPT, SP_SUPERSCRIPT, SP_DROPCAP }
 
enum  AmbigType {
  NOT_AMBIG, REPLACE_AMBIG, DEFINITE_AMBIG, SIMILAR_AMBIG,
  CASE_AMBIG, AMBIG_TYPE_COUNT
}
 
enum  SetParamConstraint { SET_PARAM_CONSTRAINT_NONE, SET_PARAM_CONSTRAINT_DEBUG_ONLY, SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY, SET_PARAM_CONSTRAINT_NON_INIT_ONLY }
 
enum  TessdataType {
  TESSDATA_LANG_CONFIG, TESSDATA_UNICHARSET, TESSDATA_AMBIGS, TESSDATA_INTTEMP,
  TESSDATA_PFFMTABLE, TESSDATA_NORMPROTO, TESSDATA_PUNC_DAWG, TESSDATA_SYSTEM_DAWG,
  TESSDATA_NUMBER_DAWG, TESSDATA_FREQ_DAWG, TESSDATA_FIXED_LENGTH_DAWGS, TESSDATA_CUBE_UNICHARSET,
  TESSDATA_CUBE_SYSTEM_DAWG, TESSDATA_SHAPE_TABLE, TESSDATA_BIGRAM_DAWG, TESSDATA_UNAMBIG_DAWG,
  TESSDATA_PARAMS_MODEL, TESSDATA_LSTM, TESSDATA_LSTM_PUNC_DAWG, TESSDATA_LSTM_SYSTEM_DAWG,
  TESSDATA_LSTM_NUMBER_DAWG, TESSDATA_LSTM_UNICHARSET, TESSDATA_LSTM_RECODER, TESSDATA_VERSION,
  TESSDATA_NUM_ENTRIES
}
 
enum  CharSegmentationType { CST_FRAGMENT, CST_WHOLE, CST_IMPROPER, CST_NGRAM }
 
enum  CountTypes {
  CT_UNICHAR_TOP_OK, CT_UNICHAR_TOP1_ERR, CT_UNICHAR_TOP2_ERR, CT_UNICHAR_TOPN_ERR,
  CT_UNICHAR_TOPTOP_ERR, CT_OK_MULTI_UNICHAR, CT_OK_JOINED, CT_OK_BROKEN,
  CT_REJECT, CT_FONT_ATTR_ERR, CT_OK_MULTI_FONT, CT_NUM_RESULTS,
  CT_RANK, CT_REJECTED_JUNK, CT_ACCEPTED_JUNK, CT_SIZE
}
 
enum  DawgType {
  DAWG_TYPE_PUNCTUATION, DAWG_TYPE_WORD, DAWG_TYPE_NUMBER, DAWG_TYPE_PATTERN,
  DAWG_TYPE_COUNT
}
 
enum  XHeightConsistencyEnum { XH_GOOD, XH_SUBNORMAL, XH_INCONSISTENT }
 
enum  TrainingFlags { TF_INT_MODE = 1, TF_COMPRESS_UNICHARSET = 64 }
 
enum  ErrorTypes {
  ET_RMS, ET_DELTA, ET_WORD_RECERR, ET_CHAR_ERROR,
  ET_SKIP_RATIO, ET_COUNT
}
 
enum  Trainability {
  TRAINABLE, PERFECT, UNENCODABLE, HI_PRECISION_ERR,
  NOT_BOXED
}
 
enum  SerializeAmount { LIGHT, NO_BEST_TRAINER, FULL }
 
enum  SubTrainerResult { STR_NONE, STR_UPDATED, STR_REPLACED }
 
enum  NetworkType {
  NT_NONE, NT_INPUT, NT_CONVOLVE, NT_MAXPOOL,
  NT_PARALLEL, NT_REPLICATED, NT_PAR_RL_LSTM, NT_PAR_UD_LSTM,
  NT_PAR_2D_LSTM, NT_SERIES, NT_RECONFIG, NT_XREVERSED,
  NT_YREVERSED, NT_XYTRANSPOSE, NT_LSTM, NT_LSTM_SUMMARY,
  NT_LOGISTIC, NT_POSCLIP, NT_SYMCLIP, NT_TANH,
  NT_RELU, NT_LINEAR, NT_SOFTMAX, NT_SOFTMAX_NO_CTC,
  NT_LSTM_SOFTMAX, NT_LSTM_SOFTMAX_ENCODED, NT_TENSORFLOW, NT_COUNT
}
 
enum  NetworkFlags { NF_LAYER_SPECIFIC_LR = 64, NF_ADAM = 128 }
 
enum  TrainingState { TS_DISABLED, TS_ENABLED, TS_TEMP_DISABLE, TS_RE_ENABLE }
 
enum  NodeContinuation { NC_ANYTHING, NC_ONLY_DUP, NC_NO_DUP, NC_COUNT }
 
enum  TopNState { TN_TOP2, TN_TOPN, TN_ALSO_RAN, TN_COUNT }
 
enum  LossType { LT_NONE, LT_CTC, LT_SOFTMAX, LT_LOGISTIC }
 
enum  FlexDimensions { FD_BATCH, FD_HEIGHT, FD_WIDTH, FD_DIMSIZE }
 
enum  ColumnSpanningType {
  CST_NOISE, CST_FLOWING, CST_HEADING, CST_PULLOUT,
  CST_COUNT
}
 
enum  NeighbourPartitionType {
  NPT_HTEXT, NPT_VTEXT, NPT_WEAK_HTEXT, NPT_WEAK_VTEXT,
  NPT_IMAGE, NPT_COUNT
}
 
enum  LeftOrRight { LR_LEFT, LR_RIGHT }
 
enum  PartitionFindResult { PFR_OK, PFR_SKEW, PFR_NOISE }
 
enum  ColSegType {
  COL_UNKNOWN, COL_TEXT, COL_TABLE, COL_MIXED,
  COL_COUNT
}
 
enum  TabAlignment {
  TA_LEFT_ALIGNED, TA_LEFT_RAGGED, TA_CENTER_JUSTIFIED, TA_RIGHT_ALIGNED,
  TA_RIGHT_RAGGED, TA_SEPARATOR, TA_COUNT
}
 
enum  FactorNames {
  FN_INCOLOR, FN_Y0, FN_Y1, FN_Y2,
  FN_Y3, FN_X0, FN_X1, FN_SHEAR,
  FN_NUM_FACTORS
}
 
enum  UnicodeNormMode { UnicodeNormMode::kNFD, UnicodeNormMode::kNFC, UnicodeNormMode::kNFKD, UnicodeNormMode::kNFKC }
 
enum  OCRNorm { OCRNorm::kNone, OCRNorm::kNormalize }
 
enum  GraphemeNorm { GraphemeNorm::kNone, GraphemeNorm::kNormalize }
 
enum  GraphemeNormMode { GraphemeNormMode::kSingleString, GraphemeNormMode::kCombined, GraphemeNormMode::kGlyphSplit, GraphemeNormMode::kIndividualUnicodes }
 
enum  ViramaScript : char32 {
  ViramaScript::kNonVirama = 0, ViramaScript::kDevanagari = 0x900, ViramaScript::kBengali = 0x980, ViramaScript::kGurmukhi = 0xa00,
  ViramaScript::kGujarati = 0xa80, ViramaScript::kOriya = 0xb00, ViramaScript::kTamil = 0xb80, ViramaScript::kTelugu = 0xc00,
  ViramaScript::kKannada = 0xc80, ViramaScript::kMalayalam = 0xd00, ViramaScript::kSinhala = 0xd80, ViramaScript::kMyanmar = 0x1000,
  ViramaScript::kKhmer = 0x1780, ViramaScript::kJavanese = 0xa980
}
 
enum  LMPainPointsType {
  LM_PPTYPE_BLAMER, LM_PPTYPE_AMBIG, LM_PPTYPE_PATH, LM_PPTYPE_SHAPE,
  LM_PPTYPE_NUM
}
 

Functions

static void addAvailableLanguages (const STRING &datadir, const STRING &base, GenericVector< STRING > *langs)
 
static int CompareSTRING (const void *p1, const void *p2)
 
static tesseract::Orientation GetBlockTextOrientation (const PageIterator *it)
 
static void AddBaselineCoordsTohOCR (const PageIterator *it, PageIteratorLevel level, STRING *hocr_str)
 
static void AddIdTohOCR (STRING *hocr_str, const std::string base, int num1, int num2)
 
static void AddIdTohOCR (STRING *hocr_str, const std::string base, int num1, int num2, int num3)
 
static void AddBoxTohOCR (const ResultIterator *it, PageIteratorLevel level, STRING *hocr_str)
 
static void AddBoxToTSV (const PageIterator *it, PageIteratorLevel level, STRING *hocr_str)
 
STRING HOcrEscape (const char *text)
 
static TBLOBmake_tesseract_blob (float baseline, float xheight, float descender, float ascender, bool numeric_mode, Pix *pix)
 
static void add_space (TESS_CHAR_IT *it)
 
static float rating_to_cost (float rating)
 
static void extract_result (TESS_CHAR_IT *out, PAGE_RES *page_res)
 
static double prec (double x)
 
static long dist2 (int x1, int y1, int x2, int y2)
 
static void GetWordBaseline (int writing_direction, int ppi, int height, int word_x1, int word_y1, int word_x2, int word_y2, int line_x1, int line_y1, int line_x2, int line_y2, double *x0, double *y0, double *length)
 
static void AffineMatrix (int writing_direction, int line_x1, int line_y1, int line_x2, int line_y2, double *a, double *b, double *c, double *d)
 
static void ClipBaseline (int ppi, int x1, int y1, int x2, int y2, int *line_x1, int *line_y1, int *line_x2, int *line_y2)
 
static bool CodepointToUtf16be (int code, char utf16[kMaxBytesPerCodepoint])
 
double DotProductAVX (const double *u, const double *v, int n)
 
double DotProductSSE (const double *u, const double *v, int n)
 
int32_t IntDotProductSSE (const int8_t *u, const int8_t *v, int n)
 
static void clear_any_old_text (BLOCK_LIST *block_list)
 
static double MedianXHeight (BLOCK_LIST *block_list)
 
static double BoxMissMetric (const TBOX &box1, const TBOX &box2)
 
static void WordGap (const PointerVector< WERD_RES > &words, int index, int *right, int *next_left)
 
static void EvaluateWordSpan (const PointerVector< WERD_RES > &words, int first_index, int end_index, float *rating, float *certainty, bool *bad, bool *valid_permuter)
 
static int SelectBestWords (double rating_ratio, double certainty_margin, bool debug, PointerVector< WERD_RES > *new_words, PointerVector< WERD_RES > *best_words)
 
static bool WordsAcceptable (const PointerVector< WERD_RES > &words)
 
static BLOB_CHOICEFindBestMatchingChoice (UNICHAR_ID char_id, WERD_RES *word_res)
 
static void CorrectRepcharChoices (BLOB_CHOICE *blob_choice, WERD_RES *word_res)
 
static void find_modal_font (STATS *fonts, int16_t *font_out, int8_t *font_count)
 
static int SortCPByTopReverse (const void *p1, const void *p2)
 
static int SortCPByBottom (const void *p1, const void *p2)
 
static int SortCPByHeight (const void *p1, const void *p2)
 
bool IsTextOrEquationType (PolyBlockType type)
 
bool IsLeftIndented (const EquationDetect::IndentType type)
 
bool IsRightIndented (const EquationDetect::IndentType type)
 
static int c_blob_comparator (const void *blob1p, const void *blob2p)
 
static Pix * RemoveEnclosingCircle (Pix *pixs)
 
static void AddAllScriptsConverted (const UNICHARSET &sid_set, const UNICHARSET &osd_set, GenericVector< int > *allowed_ids)
 
static bool LikelyParagraphStart (const RowScratchRegisters &before, const RowScratchRegisters &after, tesseract::ParagraphJustification j)
 
static int Epsilon (int space_pix)
 
static bool AcceptableRowArgs (int debug_level, int min_num_rows, const char *function_name, const GenericVector< RowScratchRegisters > *rows, int row_start, int row_end)
 
static STRING StrOf (int num)
 
static void PrintTable (const GenericVector< GenericVector< STRING > > &rows, const STRING &colsep)
 
static STRING RtlEmbed (const STRING &word, bool rtlify)
 
static void PrintDetectorState (const ParagraphTheory &theory, const GenericVector< RowScratchRegisters > &rows)
 
static void DebugDump (bool should_print, const STRING &phase, const ParagraphTheory &theory, const GenericVector< RowScratchRegisters > &rows)
 
static void PrintRowRange (const GenericVector< RowScratchRegisters > &rows, int row_start, int row_end)
 
static bool IsLatinLetter (int ch)
 
static bool IsDigitLike (int ch)
 
static bool IsOpeningPunct (int ch)
 
static bool IsTerminalPunct (int ch)
 
static const char * SkipChars (const char *str, const char *toskip)
 
static const char * SkipChars (const char *str, bool(*skip)(int))
 
static const char * SkipOne (const char *str, const char *toskip)
 
static bool LikelyListNumeral (const STRING &word)
 
static bool LikelyListMark (const STRING &word)
 
bool AsciiLikelyListItem (const STRING &word)
 
int UnicodeFor (const UNICHARSET *u, const WERD_CHOICE *werd, int pos)
 
static bool LikelyListMarkUnicode (int ch)
 
static bool UniLikelyListItem (const UNICHARSET *u, const WERD_CHOICE *werd)
 
void LeftWordAttributes (const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8, bool *is_list, bool *starts_idea, bool *ends_idea)
 
void RightWordAttributes (const UNICHARSET *unicharset, const WERD_CHOICE *werd, const STRING &utf8, bool *is_list, bool *starts_idea, bool *ends_idea)
 
static int ClosestCluster (const GenericVector< Cluster > &clusters, int value)
 
static void CalculateTabStops (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, int tolerance, GenericVector< Cluster > *left_tabs, GenericVector< Cluster > *right_tabs)
 
static void MarkRowsWithModel (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, const ParagraphModel *model, bool ltr, int eop_threshold)
 
static void GeometricClassifyThreeTabStopTextBlock (int debug_level, GeometricClassifierState &s, ParagraphTheory *theory)
 
static void GeometricClassify (int debug_level, GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory)
 
bool ValidFirstLine (const GenericVector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
 
bool ValidBodyLine (const GenericVector< RowScratchRegisters > *rows, int row, const ParagraphModel *model)
 
bool CrownCompatible (const GenericVector< RowScratchRegisters > *rows, int a, int b, const ParagraphModel *model)
 
static void DiscardUnusedModels (const GenericVector< RowScratchRegisters > &rows, ParagraphTheory *theory)
 
static void DowngradeWeakestToCrowns (int debug_level, ParagraphTheory *theory, GenericVector< RowScratchRegisters > *rows)
 
void RecomputeMarginsAndClearHypotheses (GenericVector< RowScratchRegisters > *rows, int start, int end, int percentile)
 
int InterwordSpace (const GenericVector< RowScratchRegisters > &rows, int row_start, int row_end)
 
bool FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after, tesseract::ParagraphJustification justification)
 
bool FirstWordWouldHaveFit (const RowScratchRegisters &before, const RowScratchRegisters &after)
 
static bool TextSupportsBreak (const RowScratchRegisters &before, const RowScratchRegisters &after)
 
static ParagraphModel InternalParagraphModelByOutline (const GenericVector< RowScratchRegisters > *rows, int start, int end, int tolerance, bool *consistent)
 
static ParagraphModel ParagraphModelByOutline (int debug_level, const GenericVector< RowScratchRegisters > *rows, int start, int end, int tolerance)
 
bool RowsFitModel (const GenericVector< RowScratchRegisters > *rows, int start, int end, const ParagraphModel *model)
 
static void MarkStrongEvidence (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end)
 
static void ModelStrongEvidence (int debug_level, GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, bool allow_flush_models, ParagraphTheory *theory)
 
static void StrongEvidenceClassify (int debug_level, GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory)
 
static void SeparateSimpleLeaderLines (GenericVector< RowScratchRegisters > *rows, int row_start, int row_end, ParagraphTheory *theory)
 
static void ConvertHypothesizedModelRunsToParagraphs (int debug_level, const GenericVector< RowScratchRegisters > &rows, GenericVector< PARA *> *row_owners, ParagraphTheory *theory)
 
static bool RowIsStranded (const GenericVector< RowScratchRegisters > &rows, int row)
 
static void LeftoverSegments (const GenericVector< RowScratchRegisters > &rows, GenericVector< Interval > *to_fix, int row_start, int row_end)
 
void CanonicalizeDetectionResults (GenericVector< PARA *> *row_owners, PARA_LIST *paragraphs)
 
void DetectParagraphs (int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA *> *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel *> *models)
 
static void InitializeTextAndBoxesPreRecognition (const MutableIterator &it, RowInfo *info)
 
static void InitializeRowInfo (bool after_recognition, const MutableIterator &it, RowInfo *info)
 
void DetectParagraphs (int debug_level, bool after_text_recognition, const MutableIterator *block_start, GenericVector< ParagraphModel *> *models)
 
bool StrongModel (const ParagraphModel *model)
 
static bool read_t (PAGE_RES_IT *page_res_it, TBOX *tbox)
 
static void PrintPath (int length, const BLOB_CHOICE **blob_choices, const UNICHARSET &unicharset, const char *label, FILE *output_file)
 
static void PrintMatrixPaths (int col, int dim, const MATRIX &ratings, int length, const BLOB_CHOICE **blob_choices, const UNICHARSET &unicharset, const char *label, FILE *output_file)
 
static void PrintScriptDirs (const GenericVector< StrongScriptDirection > &dirs)
 
static void YOutlierPieces (WERD_RES *word, int rebuilt_blob_index, int super_y_bottom, int sub_y_top, ScriptPos *leading_pos, int *num_leading_outliers, ScriptPos *trailing_pos, int *num_trailing_outliers)
 
static bool IsStrInList (const STRING &str, const GenericVector< STRING > &str_list)
 
static void CollectFonts (const UnicityTable< FontInfo > &new_fonts, UnicityTable< FontInfo > *all_fonts)
 
static void AssignIds (const UnicityTable< FontInfo > &all_fonts, UnicityTable< FontInfo > *lang_fonts)
 
bool CompareFontInfo (const FontInfo &fi1, const FontInfo &fi2)
 
bool CompareFontSet (const FontSet &fs1, const FontSet &fs2)
 
void FontInfoDeleteCallback (FontInfo f)
 
void FontSetDeleteCallback (FontSet fs)
 
bool read_info (TFile *f, FontInfo *fi)
 
bool write_info (FILE *f, const FontInfo &fi)
 
bool read_spacing_info (TFile *f, FontInfo *fi)
 
bool write_spacing_info (FILE *f, const FontInfo &fi)
 
bool read_set (TFile *f, FontSet *fs)
 
bool write_set (FILE *f, const FontSet &fs)
 
void * ReCachePagesFunc (void *data)
 
int OtsuThreshold (Pix *src_pix, int left, int top, int width, int height, int **thresholds, int **hi_values)
 
void HistogramRect (Pix *src_pix, int channel, int left, int top, int width, int height, int *histogram)
 
int OtsuStats (const int *histogram, int *H_out, int *omega0_out)
 
int ParamsTrainingFeatureByName (const char *name)
 
bool PSM_OSD_ENABLED (int pageseg_mode)
 
bool PSM_ORIENTATION_ENABLED (int pageseg_mode)
 
bool PSM_COL_FIND_ENABLED (int pageseg_mode)
 
bool PSM_SPARSE (int pageseg_mode)
 
bool PSM_BLOCK_FIND_ENABLED (int pageseg_mode)
 
bool PSM_LINE_FIND_ENABLED (int pageseg_mode)
 
bool PSM_WORD_FIND_ENABLED (int pageseg_mode)
 
const char * ScriptPosToString (enum ScriptPos script_pos)
 
void AmbigSpec_zapper (ELIST_LINK *link)
 
bool LoadDataFromFile (const char *filename, GenericVector< char > *data)
 
bool LoadDataFromFile (const STRING &filename, GenericVector< char > *data)
 
bool SaveDataToFile (const GenericVector< char > &data, const STRING &filename)
 
bool LoadFileLinesToStrings (const STRING &filename, GenericVector< STRING > *lines)
 
template<typename T >
bool cmp_eq (T const &t1, T const &t2)
 
template<typename T >
int sort_cmp (const void *t1, const void *t2)
 
template<typename T >
int sort_ptr_cmp (const void *t1, const void *t2)
 
bool DeSerialize (FILE *fp, char *data, size_t n)
 
bool DeSerialize (FILE *fp, float *data, size_t n)
 
bool DeSerialize (FILE *fp, int8_t *data, size_t n)
 
bool DeSerialize (FILE *fp, int16_t *data, size_t n)
 
bool DeSerialize (FILE *fp, int32_t *data, size_t n)
 
bool DeSerialize (FILE *fp, uint8_t *data, size_t n)
 
bool DeSerialize (FILE *fp, uint16_t *data, size_t n)
 
bool DeSerialize (FILE *fp, uint32_t *data, size_t n)
 
bool Serialize (FILE *fp, const char *data, size_t n)
 
bool Serialize (FILE *fp, const float *data, size_t n)
 
bool Serialize (FILE *fp, const int8_t *data, size_t n)
 
bool Serialize (FILE *fp, const int16_t *data, size_t n)
 
bool Serialize (FILE *fp, const int32_t *data, size_t n)
 
bool Serialize (FILE *fp, const uint8_t *data, size_t n)
 
bool Serialize (FILE *fp, const uint16_t *data, size_t n)
 
bool Serialize (FILE *fp, const uint32_t *data, size_t n)
 
template<typename T , size_t N>
constexpr size_t countof (T const (&)[N]) noexcept
 
static int RadicalPreHash (const std::vector< int > &rs)
 
static bool DecodeRadicalLine (STRING *radical_data_line, RSMap *radical_map)
 
static bool DecodeRadicalTable (STRING *radical_data, RSMap *radical_map)
 
void ExtractFontName (const STRING &filename, STRING *fontname)
 
TrainingSampleBlobToTrainingSample (const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
 
static uint8_t NormalizeDirection (uint8_t dir, const FCOORD &unnormed_pos, const DENORM &denorm, const DENORM *root_denorm)
 
static FCOORD MeanDirectionVector (const LLSQ &point_diffs, const LLSQ &dirs, const FCOORD &start_pt, const FCOORD &end_pt)
 
static int ComputeFeatures (const FCOORD &start_pt, const FCOORD &end_pt, double feature_length, GenericVector< INT_FEATURE_STRUCT > *features)
 
static int GatherPoints (const C_OUTLINE *outline, double feature_length, const DENORM &denorm, const DENORM *root_denorm, int start_index, int end_index, ICOORD *pos, FCOORD *pos_normed, LLSQ *points, LLSQ *dirs)
 
static void ExtractFeaturesFromRun (const EDGEPT *startpt, const EDGEPT *lastpt, const DENORM &denorm, double feature_length, bool force_poly, GenericVector< INT_FEATURE_STRUCT > *features)
 
void ClearFeatureSpaceWindow (NORM_METHOD norm_method, ScrollView *window)
 
static void AddNearFeatures (const IntFeatureMap &feature_map, int f, int levels, GenericVector< int > *good_features)
 
static void CallWithUTF8 (TessCallback1< const char *> *cb, const WERD_CHOICE *wc)
 
static int sort_strings_by_dec_length (const void *v1, const void *v2)
 
static int BestLabel (const GENERIC_2D_ARRAY< float > &outputs, int t)
 
static double LogSumExp (double ln_x, double ln_y)
 
double Tanh (double x)
 
double Logistic (double x)
 
template<class Func >
void FuncInplace (int n, double *inout)
 
template<class Func >
void FuncMultiply (const double *u, const double *v, int n, double *out)
 
template<typename T >
void SoftmaxInPlace (int n, T *inout)
 
void CopyVector (int n, const double *src, double *dest)
 
void AccumulateVector (int n, const double *src, double *dest)
 
void MultiplyVectorsInPlace (int n, const double *src, double *inout)
 
void MultiplyAccumulate (int n, const double *u, const double *v, double *out)
 
void SumVectors (int n, const double *v1, const double *v2, const double *v3, const double *v4, const double *v5, double *sum)
 
template<typename T >
void ZeroVector (int n, T *vec)
 
template<typename T >
void ClipVector (int n, T lower, T upper, T *vec)
 
void CodeInBinary (int n, int nf, double *vec)
 
static uint32_t ceil_log2 (uint32_t n)
 
static void SkipWhitespace (char **str)
 
static NetworkType NonLinearity (char func)
 
static NetworkBuildFullyConnected (const StaticShape &input_shape, NetworkType type, const STRING &name, int depth)
 
static void ComputeBlackWhite (Pix *pix, float *black, float *white)
 
static void HistogramWeight (double weight, STATS *histogram)
 
static bool AtLeast2LineCrossings (BLOBNBOX_CLIST *blobs)
 
static Pix * GridReducedPix (const TBOX &box, int gridsize, ICOORD bleft, int *left, int *bottom)
 
Pix * TraceOutlineOnReducedPix (C_OUTLINE *outline, int gridsize, ICOORD bleft, int *left, int *bottom)
 
Pix * TraceBlockOnReducedPix (BLOCK *block, int gridsize, ICOORD bleft, int *left, int *bottom)
 
template<class BBC >
int SortByBoxLeft (const void *void1, const void *void2)
 
template<class BBC >
int SortRightToLeft (const void *void1, const void *void2)
 
template<class BBC >
int SortByBoxBottom (const void *void1, const void *void2)
 
static TBOX AttemptBoxExpansion (const TBOX &box, const IntGrid &noise_density, int pad)
 
 BOOL_VAR (textord_tabfind_show_initial_partitions, false, "Show partition bounds")
 
 BOOL_VAR (textord_tabfind_show_reject_blobs, false, "Show blobs rejected as noise")
 
 INT_VAR (textord_tabfind_show_partitions, 0, "Show partition bounds, waiting if >1")
 
 BOOL_VAR (textord_tabfind_show_columns, false, "Show column bounds")
 
 BOOL_VAR (textord_tabfind_show_blocks, false, "Show final block bounds")
 
 BOOL_VAR (textord_tabfind_find_tables, true, "run table detection")
 
static void ReleaseAllBlobsAndDeleteUnused (BLOBNBOX_LIST *blobs)
 
static TBOX BoxFromHLine (const TabVector *hline)
 
static void ReflectBlobList (BLOBNBOX_LIST *bblobs)
 
static void RotateAndExplodeBlobList (const FCOORD &blob_rotation, BLOBNBOX_LIST *bblobs, STATS *widths, STATS *heights)
 
 BOOL_VAR_H (textord_tabfind_find_tables, false, "run table detection")
 
static void ClipCoord (const ICOORD &bleft, const ICOORD &tright, ICOORD *pos)
 
static TO_BLOCKMoveBlobsToBlock (bool vertical_text, int line_spacing, BLOCK *block, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
 
static int MedianSpacing (int page_height, ColPartition_IT it)
 
static bool UpdateLeftMargin (const ColPartition &part, int *margin_left, int *margin_right)
 
static bool UpdateRightMargin (const ColPartition &part, int *margin_left, int *margin_right)
 
 BOOL_VAR (textord_tabfind_show_color_fit, false, "Show stroke widths")
 
static bool OKMergeCandidate (const ColPartition *part, const ColPartition *candidate, bool debug)
 
static int IncreaseInOverlap (const ColPartition *merge1, const ColPartition *merge2, int ok_overlap, ColPartition_CLIST *parts)
 
static bool TestCompatibleCandidates (const ColPartition &part, bool debug, ColPartition_CLIST *candidates)
 
static void RemoveBadBox (BLOBNBOX *box, ColPartition *part, ColPartition_LIST *part_list)
 
static void ComputeSearchBoxAndScaling (BlobNeighbourDir direction, const TBOX &part_box, int min_padding, TBOX *search_box, ICOORD *dist_scaling)
 
static bool HScanForEdge (uint32_t *data, int wpl, int x_start, int x_end, int min_count, int mid_width, int max_count, int y_end, int y_step, int *y_start)
 
static bool VScanForEdge (uint32_t *data, int wpl, int y_start, int y_end, int min_count, int mid_width, int max_count, int x_end, int x_step, int *x_start)
 
static void AttemptToShrinkBox (const FCOORD &rotation, const FCOORD &rerotation, const TBOX &im_box, Pix *pix, TBOX *slice)
 
static void CutChunkFromParts (const TBOX &box, const TBOX &im_box, const FCOORD &rotation, const FCOORD &rerotation, Pix *pix, ColPartition_LIST *part_list)
 
static void DivideImageIntoParts (const TBOX &im_box, const FCOORD &rotation, const FCOORD &rerotation, Pix *pix, ColPartitionGridSearch *rectsearch, ColPartition_LIST *part_list)
 
static int ExpandImageLeft (const TBOX &box, int left_limit, ColPartitionGrid *part_grid)
 
static int ExpandImageRight (const TBOX &box, int right_limit, ColPartitionGrid *part_grid)
 
static int ExpandImageBottom (const TBOX &box, int bottom_limit, ColPartitionGrid *part_grid)
 
static int ExpandImageTop (const TBOX &box, int top_limit, ColPartitionGrid *part_grid)
 
static int ExpandImageDir (BlobNeighbourDir dir, const TBOX &im_box, const TBOX &limit_box, ColPartitionGrid *part_grid, TBOX *expanded_box)
 
static void MaximalImageBoundingBox (ColPartitionGrid *part_grid, TBOX *im_box)
 
static void DeletePartition (ColPartition *part)
 
static bool ExpandImageIntoParts (const TBOX &max_image_box, ColPartitionGridSearch *rectsearch, ColPartitionGrid *part_grid, ColPartition **part_ptr)
 
static int IntersectArea (const TBOX &box, ColPartition_LIST *part_list)
 
static bool TestWeakIntersectedPart (const TBOX &im_box, ColPartition_LIST *part_list, ColPartition *part)
 
static void EliminateWeakParts (const TBOX &im_box, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts, ColPartition_LIST *part_list)
 
static bool ScanForOverlappingText (ColPartitionGrid *part_grid, TBOX *box)
 
static void MarkAndDeleteImageParts (const FCOORD &rerotate, ColPartitionGrid *part_grid, ColPartition_LIST *image_parts, Pix *image_pix)
 
static void DeleteSmallImages (ColPartitionGrid *part_grid)
 
static void RemoveUnusedLineSegments (bool horizontal_lines, BLOBNBOX_LIST *line_bblobs, Pix *line_pix)
 
static void SubtractLinesAndResidue (Pix *line_pix, Pix *non_line_pix, int resolution, Pix *src_pix)
 
static int MaxStrokeWidth (Pix *pix)
 
static int NumTouchingIntersections (Box *line_box, Pix *intersection_pix)
 
static int CountPixelsAdjacentToLine (int line_width, Box *line_box, Pix *nonline_pix)
 
static int FilterFalsePositives (int resolution, Pix *nonline_pix, Pix *intersection_pix, Pix *line_pix)
 
static Pix * FilterMusic (int resolution, Pix *pix_closed, Pix *pix_vline, Pix *pix_hline, l_int32 *v_empty, l_int32 *h_empty)
 
 INT_VAR (textord_tabfind_show_strokewidths, 0, "Show stroke widths")
 
 BOOL_VAR (textord_tabfind_only_strokewidths, false, "Only run stroke widths")
 
static void CollectHorizVertBlobs (BLOBNBOX_LIST *input_blobs, int *num_vertical_blobs, int *num_horizontal_blobs, BLOBNBOX_CLIST *vertical_blobs, BLOBNBOX_CLIST *horizontal_blobs, BLOBNBOX_CLIST *nondescript_blobs)
 
static void PrintBoxWidths (BLOBNBOX *neighbour)
 
static int UpperQuartileCJKSize (int gridsize, BLOBNBOX_LIST *blobs)
 
static bool AcceptableCJKMerge (const TBOX &bbox, const TBOX &nbox, bool debug, int max_size, int max_dist, int *x_gap, int *y_gap)
 
static void ListNeighbours (const BLOBNBOX *blob, BLOBNBOX_CLIST *neighbours)
 
static void List2ndNeighbours (const BLOBNBOX *blob, BLOBNBOX_CLIST *neighbours)
 
static void List3rdNeighbours (const BLOBNBOX *blob, BLOBNBOX_CLIST *neighbours)
 
static void CountNeighbourGaps (bool debug, BLOBNBOX_CLIST *neighbours, int *pure_h_count, int *pure_v_count)
 
static void CountNeighbourTypes (BLOBNBOX_CLIST *neighbours, int *pure_h_count, int *pure_v_count)
 
static BLOBNBOXMutualUnusedVNeighbour (const BLOBNBOX *blob, BlobNeighbourDir dir)
 
static BLOBNBOXMutualUnusedHNeighbour (const BLOBNBOX *blob, BlobNeighbourDir dir)
 
static void DrawDiacriticJoiner (const BLOBNBOX *blob, ScrollView *window)
 
 BOOL_VAR (textord_tabfind_show_initialtabs, false, "Show tab candidates")
 
 BOOL_VAR (textord_tabfind_show_finaltabs, false, "Show tab vectors")
 
static void DisplayBoxVector (const GenericVector< BLOBNBOX *> &boxes, ScrollView *win)
 
 BOOL_VAR (textord_show_tables, false, "Show table regions")
 
 BOOL_VAR (textord_tablefind_show_mark, false, "Debug table marking steps in detail")
 
 BOOL_VAR (textord_tablefind_show_stats, false, "Show page stats used in table finding")
 
 BOOL_VAR (textord_tablefind_recognize_tables, false, "Enables the table recognizer for table layout and filtering.")
 
template<typename T >
void DeleteObject (T *object)
 
 double_VAR (textord_tabvector_vertical_gap_fraction, 0.5, "max fraction of mean blob width allowed for vertical gaps in vertical text")
 
 double_VAR (textord_tabvector_vertical_box_ratio, 0.5, "Fraction of box matches required to declare a line vertical")
 
 double_VAR_H (textord_tabvector_vertical_gap_fraction, 0.5, "Max fraction of mean blob width allowed for vertical gaps in vertical text")
 
 double_VAR_H (textord_tabvector_vertical_box_ratio, 0.5, "Fraction of box matches required to declare a line vertical")
 
static TBOX BoundsWithinBox (Pix *pix, const TBOX &box)
 
static void TruncateBoxToMissNonText (int x_middle, int y_middle, bool split_on_x, Pix *nontext_map, TBOX *bbox)
 
void SetBlobStrokeWidth (Pix *pix, BLOBNBOX *blob)
 
void assign_blobs_to_blocks2 (Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
 
static bool IntFlagExists (const char *flag_name, int32_t *value)
 
static bool DoubleFlagExists (const char *flag_name, double *value)
 
static bool BoolFlagExists (const char *flag_name, bool *value)
 
static bool StringFlagExists (const char *flag_name, const char **value)
 
static void SetIntFlagValue (const char *flag_name, const int32_t new_val)
 
static void SetDoubleFlagValue (const char *flag_name, const double new_val)
 
static void SetBoolFlagValue (const char *flag_name, const bool new_val)
 
static void SetStringFlagValue (const char *flag_name, const char *new_val)
 
static bool SafeAtoi (const char *str, int *val)
 
static bool SafeAtod (const char *str, double *val)
 
static void PrintCommandLineFlags ()
 
void ParseCommandLineFlags (const char *usage, int *argc, char ***argv, const bool remove_flags)
 
ShapeTableLoadShapeTable (const STRING &file_prefix)
 
void WriteShapeTable (const STRING &file_prefix, const ShapeTable &shape_table)
 
MasterTrainerLoadTrainingData (int argc, const char *const *argv, bool replication, ShapeTable **shape_table, STRING *file_prefix)
 
static void CheckSharedLibraryVersion ()
 
Pix * DegradeImage (Pix *input, int exposure, TRand *randomizer, float *rotation)
 
Pix * PrepareDistortedPix (const Pix *pix, bool perspective, bool invert, bool white_noise, bool smooth_noise, bool blur, int box_reduction, TRand *randomizer, GenericVector< TBOX > *boxes)
 
void GeneratePerspectiveDistortion (int width, int height, TRand *randomizer, Pix **pix, GenericVector< TBOX > *boxes)
 
int ProjectiveCoeffs (int width, int height, TRand *randomizer, float **im_coeffs, float **box_coeffs)
 
bool WriteFile (const std::string &output_dir, const std::string &lang, const std::string &suffix, const GenericVector< char > &data, FileWriter writer)
 
STRING ReadFile (const std::string &filename, FileReader reader)
 
bool WriteUnicharset (const UNICHARSET &unicharset, const std::string &output_dir, const std::string &lang, FileWriter writer, TessdataManager *traineddata)
 
bool WriteRecoder (const UNICHARSET &unicharset, bool pass_through, const std::string &output_dir, const std::string &lang, FileWriter writer, STRING *radical_table_data, TessdataManager *traineddata)
 
static bool WriteDawg (const GenericVector< STRING > &words, const UNICHARSET &unicharset, Trie::RTLReversePolicy reverse_policy, TessdataType file_type, TessdataManager *traineddata)
 
static bool WriteDawgs (const GenericVector< STRING > &words, const GenericVector< STRING > &puncs, const GenericVector< STRING > &numbers, bool lang_is_rtl, const UNICHARSET &unicharset, TessdataManager *traineddata)
 
int CombineLangModel (const UNICHARSET &unicharset, const std::string &script_dir, const std::string &version_str, const std::string &output_dir, const std::string &lang, bool pass_through_recoder, const GenericVector< STRING > &words, const GenericVector< STRING > &puncs, const GenericVector< STRING > &numbers, bool lang_is_rtl, FileReader reader, FileWriter writer)
 
static std::string EncodeAsUTF8 (const char32 ch32)
 
static bool is_hyphen_punc (const char32 ch)
 
static bool is_single_quote (const char32 ch)
 
static bool is_double_quote (const char32 ch)
 
static void NormalizeUTF8ToUTF32 (UnicodeNormMode u_mode, OCRNorm ocr_normalize, const char *str8, std::vector< char32 > *normed32)
 
static void StripJoiners (std::vector< char32 > *str32)
 
bool NormalizeUTF8String (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNorm grapheme_normalize, const char *str8, std::string *normalized)
 
bool NormalizeCleanAndSegmentUTF8 (UnicodeNormMode u_mode, OCRNorm ocr_normalize, GraphemeNormMode g_mode, bool report_errors, const char *str8, std::vector< std::string > *graphemes)
 
char32 OCRNormalize (char32 ch)
 
bool IsOCREquivalent (char32 ch1, char32 ch2)
 
bool IsValidCodepoint (const char32 ch)
 
bool IsWhitespace (const char32 ch)
 
bool IsUTF8Whitespace (const char *text)
 
unsigned int SpanUTF8Whitespace (const char *text)
 
unsigned int SpanUTF8NotWhitespace (const char *text)
 
bool IsInterchangeValid (const char32 ch)
 
bool IsInterchangeValid7BitAscii (const char32 ch)
 
char32 FullwidthToHalfwidth (const char32 ch)
 
static void ListFontFamilies (PangoFontFamily ***families, int *n_families)
 
static char * my_strnmove (char *dest, const char *src, size_t n)
 
static bool ShouldIgnoreFontFamilyName (const char *query)
 
static void CharCoverageMapToBitmap (PangoCoverage *coverage, std::vector< bool > *unichar_bitmap)
 
static bool IsCombiner (int ch)
 
static std::string EncodeAsUTF8 (const char32 ch32)
 
static bool RandBool (const double prob, TRand *rand)
 
static Pix * CairoARGB32ToPixFormat (cairo_surface_t *surface)
 
static void MergeBoxCharsToWords (std::vector< BoxChar *> *boxchars)
 
static bool IsWhitespaceBox (const BoxChar *boxchar)
 
static std::string StringReplace (const std::string &in, const std::string &oldsub, const std::string &newsub)
 
static void ExtractFontProperties (const std::string &utf8_text, StringRenderer *render, const std::string &output_base)
 
static bool MakeIndividualGlyphs (Pix *pix, const std::vector< BoxChar *> &vbox, const int input_tiff_page)
 
static void AddStringsToUnicharset (const GenericVector< STRING > &strings, int norm_mode, UNICHARSET *unicharset)
 
static int Main (int argc, char **argv)
 
void SetupBasicProperties (bool report_errors, bool decompose, UNICHARSET *unicharset)
 
void SetScriptProperties (const std::string &script_dir, UNICHARSET *unicharset)
 
std::string GetXheightString (const std::string &script_dir, const UNICHARSET &unicharset)
 
void SetPropertiesForInputFile (const std::string &script_dir, const std::string &input_unicharset_file, const std::string &output_unicharset_file, const std::string &output_xheights_file)
 
void SetupBasicProperties (bool report_errors, UNICHARSET *unicharset)
 
static bool IsThaiConsonant (char32 ch)
 
static bool IsThaiBeforeConsonantVowel (char32 ch)
 
static bool IsThaiToneMark (char32 ch)
 
static bool IsThaiTonableVowel (char32 ch)
 
static bool CmpPairSecond (const std::pair< int, int > &p1, const std::pair< int, int > &p2)
 
static void ScanParentsForCaseMix (const UNICHARSET &unicharset, LanguageModelState *parent_node)
 
static bool HasBetterCaseVariant (const UNICHARSET &unicharset, const BLOB_CHOICE *choice, BLOB_CHOICE_LIST *choices)
 
template<class BLOB_CHOICE >
int SortByUnicharID (const void *void1, const void *void2)
 
template<class BLOB_CHOICE >
int SortByRating (const void *void1, const void *void2)
 

Variables

const int kMinRectSize = 10
 
const char kTesseractReject = '~'
 
const char kUNLVReject = '~'
 
const char kUNLVSuspect = '^'
 
const char * kInputFile = "noname.tif"
 
const char * kOldVarsFile = "failed_vars.txt"
 
const int kMaxIntSize = 22
 
const int kNumbersPerBlob = 5
 
const int kBytesPerNumber = 5
 
const int kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1
 
const int kBytesPer64BitNumber = 20
 
const int kMaxBytesPerLine
 
const int kUniChs []
 
const int kLatinChs []
 
static const int kBasicBufSize = 2048
 
static const int kCharWidth = 2
 
static const int kMaxBytesPerCodepoint = 20
 
const float kMathDigitDensityTh1 = 0.25
 
const float kMathDigitDensityTh2 = 0.1
 
const float kMathItalicDensityTh = 0.5
 
const float kUnclearDensityTh = 0.25
 
const int kSeedBlobsCountTh = 10
 
const int kLeftIndentAlignmentCountTh = 1
 
const int kMaxCharTopRange = 48
 
const float kCertaintyScale = 7.0f
 
const float kWorstDictCertainty = -25.0f
 
const int kMaxCircleErosions = 8
 
const ParagraphModelkCrownLeft = reinterpret_cast<ParagraphModel *>(0xDEAD111F)
 
const ParagraphModelkCrownRight = reinterpret_cast<ParagraphModel *>(0xDEAD888F)
 
const int16_t kMaxBoxEdgeDiff = 2
 
const int kBoxClipTolerance = 2
 
const int kNumEndPoints = 3
 
const int kMinPointsForErrorCount = 16
 
const int kMaxRealDistance = 2.0
 
const int kFeaturePadding = 2
 
const int kImagePadding = 4
 
const int kHistogramSize = 256
 
static const int kMaxSmallWordUnichars = 3
 
static const int kMaxMediumWordUnichars = 6
 
static const char *const kParamsTrainingFeatureTypeName []
 
const int kMaxAmbigStringSize = 30 * ( 10 + 1)
 
static const int kUnigramAmbigsBufferSize = 1000
 
static const char kAmbigNgramSeparator [] = { ' ', '\0' }
 
static const char kAmbigDelimiters [] = "\t "
 
static const char kIllegalMsg []
 
static const char kIllegalUnicharMsg []
 
CCUtilMutex tprintfMutex
 
static const char *const kTessdataFileSuffixes []
 
static const int kMaxNumTessdataEntries = 1000
 
const char * kNullChar = "<nul>"
 
const int kRadicalRadix = 29
 
const char * kUTF8LineSeparator = "\u2028"
 
const char * kUTF8ParagraphSeparator = "\u2029"
 
const char * kLRM = "\u200E"
 
const char * kRLM = "\u200F"
 
const char * kRLE = "\u202A"
 
const char * kPDF = "\u202C"
 
const char * kHyphenLikeUTF8 []
 
const char * kApostropheLikeUTF8 []
 
const char kUniversalAmbigsFile []
 
const int ksizeofUniversalAmbigsFile = sizeof(kUniversalAmbigsFile)
 
const double kRatingEpsilon = 1.0 / 32
 
const int kMaxOffsetDist = 32
 
static const int kNumOffsetMaps = 2
 
const int kMinClusteredShapes = 1
 
const int kMaxUnicharsPerCluster = 2000
 
const float kFontMergeDistance = 0.025
 
const float kInfiniteDist = 999.0f
 
const int kRandomizingCenter = 128
 
static const int kNumCNParams = 4
 
static const int kSampleYShiftSize = 5
 
static const int kSampleScaleSize = 3
 
static const int kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2
 
const int kTestChar = -1
 
const int kSquareLimit = 25
 
const int kPrime1 = 17
 
const int kPrime2 = 13
 
static const int kMinAbsoluteGarbageWordLength = 10
 
static const float kMinAbsoluteGarbageAlphanumFrac = 0.5f
 
const int case_state_table [6][4]
 
static const bool kDawgSuccessors [DAWG_TYPE_COUNT][DAWG_TYPE_COUNT]
 
static const char kWildcard [] = "*"
 
static const int kRatingPad = 4
 
static const char kDictWildcard [] = "\u2606"
 
static const int kDictMaxWildcards = 2
 
static const char kHyphenSymbol [] = "-"
 
static const char kSlashSymbol [] = "/"
 
static const char kQuestionSymbol [] = "?"
 
static const char kApostropheSymbol [] = "'"
 
static const float kSimCertaintyScale = -10.0
 
static const float kSimCertaintyOffset = -10.0
 
static const float kSimilarityFloor = 100.0
 
static const int kDocDictMaxRepChars = 4
 
const char kDoNotReverse [] = "RRP_DO_NO_REVERSE"
 
const char kReverseIfHasRTL [] = "RRP_REVERSE_IF_HAS_RTL"
 
const char kForceReverse [] = "RRP_FORCE_REVERSE"
 
const char *const RTLReversePolicyNames []
 
double TanhTable [kTableSize]
 
double LogisticTable [kTableSize]
 
const int kTableSize = 4096
 
const double kScaleFactor = 256.0
 
const int kMaxInputHeight = 48
 
const double kStateClip = 100.0
 
const double kErrClip = 1.0f
 
const double kDictRatio = 2.25
 
const double kCertOffset = -0.085
 
const double kMinDivergenceRate = 50.0
 
const int kMinStallIterations = 10000
 
const double kSubTrainerMarginFraction = 3.0 / 128
 
const double kLearningRateDecay = sqrt(0.5)
 
const int kNumAdjustmentIterations = 100
 
const int kErrorGraphInterval = 1000
 
const int kNumPagesPerBatch = 100
 
const int kMinStartedErrorRate = 75
 
const double kStageTransitionThreshold = 10.0
 
const double kHighConfidence = 0.9375
 
const double kImprovementFraction = 15.0 / 16.0
 
const double kBestCheckpointFraction = 31.0 / 32.0
 
const int kTargetXScale = 5
 
const int kTargetYScale = 100
 
const int kMinWinSize = 500
 
const int kMaxWinSize = 2000
 
const int kXWinFrameSize = 30
 
const int kYWinFrameSize = 80
 
const float kMinCertainty = -20.0f
 
const float kMinProb = exp(kMinCertainty)
 
const char * kNodeContNames [] = {"Anything", "OnlyDup", "NoDup"}
 
const int kAdamCorrectionIterations = 200000
 
const double kAdamEpsilon = 1e-8
 
const int kInt8Flag = 1
 
const int kAdamFlag = 4
 
const int kDoubleFlag = 128
 
const int kHistogramBuckets = 16
 
const double kAlignedFraction = 0.03125
 
const double kRaggedFraction = 2.5
 
const double kAlignedGapFraction = 0.75
 
const double kRaggedGapFraction = 1.0
 
const int kVLineAlignment = 3
 
const int kVLineGutter = 1
 
const int kVLineSearchSize = 150
 
const int kMinRaggedTabs = 5
 
const int kMinAlignedTabs = 4
 
const int kVLineMinLength = 500
 
const double kMinTabGradient = 4.0
 
const int kMaxSkewFactor = 15
 
const double kMaxSmallNeighboursPerPix = 1.0 / 32
 
const int kMaxLargeOverlapsWithSmall = 3
 
const int kMaxMediumOverlapsWithSmall = 12
 
const int kMaxLargeOverlapsWithMedium = 12
 
const int kOriginalNoiseMultiple = 8
 
const int kNoisePadding = 4
 
const double kPhotoOffsetFraction = 0.375
 
const double kMinGoodTextPARatio = 1.5
 
const int kMaxIncompatibleColumnCount = 2
 
const double kHorizontalGapMergeFraction = 0.5
 
const double kMinGutterWidthGrid = 0.5
 
const double kMaxDistToPartSizeRatio = 1.5
 
const double kMaxSpacingDrift = 1.0 / 72
 
const double kMaxTopSpacingFraction = 0.25
 
const double kMaxSameBlockLineSpacing = 3
 
const double kMaxSizeRatio = 1.5
 
const double kMaxLeaderGapFractionOfMax = 0.25
 
const double kMaxLeaderGapFractionOfMin = 0.5
 
const int kMinLeaderCount = 5
 
const int kMinStrongTextValue = 6
 
const int kMinChainTextValue = 3
 
const int kHorzStrongTextlineCount = 8
 
const int kHorzStrongTextlineHeight = 10
 
const int kHorzStrongTextlineAspect = 5
 
const double kMaxBaselineError = 0.4375
 
const double kMinBaselineCoverage = 0.5
 
const int kMaxRMSColorNoise = 128
 
const int kMaxColorDistance = 900
 
static char kBlobTypes [BRT_COUNT+1] = "NHSRIUVT"
 
const int kRGBRMSColors = 4
 
const int kMaxPadFactor = 6
 
const int kMaxNeighbourDistFactor = 4
 
const int kMaxCaptionLines = 7
 
const double kMinCaptionGapRatio = 2.0
 
const double kMinCaptionGapHeightRatio = 0.5
 
const double kMarginOverlapFraction = 0.25
 
const double kBigPartSizeRatio = 1.75
 
const double kTinyEnoughTextlineOverlapFraction = 0.25
 
const double kMaxPartitionSpacing = 1.75
 
const int kSmoothDecisionMargin = 4
 
const double kMinColumnWidth = 2.0 / 3
 
const double kMinRectangularFraction = 0.125
 
const double kMaxRectangularFraction = 0.75
 
const double kMaxRectangularGradient = 0.1
 
const int kMinImageFindSize = 100
 
const double kRMSFitScaling = 8.0
 
const int kMinColorDifference = 16
 
const int kThinLineFraction = 20
 Denominator of resolution makes max pixel width to allow thin lines. More...
 
const int kMinLineLengthFraction = 4
 Denominator of resolution makes min pixels to demand line lengths to be. More...
 
const int kCrackSpacing = 100
 Spacing of cracks across the page to break up tall vertical lines. More...
 
const int kLineFindGridSize = 50
 Grid size used by line finder. Not very critical. More...
 
const int kMinThickLineWidth = 12
 
const int kMaxLineResidue = 6
 
const double kThickLengthMultiple = 0.75
 
const double kMaxNonLineDensity = 0.25
 
const double kMaxStaveHeight = 1.0
 
const double kMinMusicPixelFraction = 0.75
 
const double kStrokeWidthFractionTolerance = 0.125
 
const double kStrokeWidthTolerance = 1.5
 
const double kStrokeWidthFractionCJK = 0.25
 
const double kStrokeWidthCJK = 2.0
 
const int kCJKRadius = 2
 
const double kCJKBrokenDistanceFraction = 0.25
 
const int kCJKMaxComponents = 8
 
const double kCJKAspectRatio = 1.25
 
const double kCJKAspectRatioIncrease = 1.0625
 
const int kMaxCJKSizeRatio = 5
 
const double kBrokenCJKIterationFraction = 0.125
 
const double kDiacriticXPadRatio = 7.0
 
const double kDiacriticYPadRatio = 1.75
 
const double kMinDiacriticSizeRatio = 1.0625
 
const double kMaxDiacriticDistanceRatio = 1.25
 
const double kMaxDiacriticGapToBaseCharHeight = 1.0
 
const int kLineTrapLongest = 4
 
const int kLineTrapShortest = 2
 
const int kMostlyOneDirRatio = 3
 
const double kLineResidueAspectRatio = 8.0
 
const int kLineResiduePadRatio = 3
 
const double kLineResidueSizeRatio = 1.75
 
const float kSizeRatioToReject = 2.0
 
const double kNeighbourSearchFactor = 2.5
 
const double kNoiseOverlapGrowthFactor = 4.0
 
const double kNoiseOverlapAreaFactor = 1.0 / 512
 
const int kTabRadiusFactor = 5
 
const int kMinVerticalSearch = 3
 
const int kMaxVerticalSearch = 12
 
const int kMaxRaggedSearch = 25
 
const int kMinLinesInColumn = 10
 
const double kMinFractionalLinesInColumn = 0.125
 
const double kMaxGutterWidthAbsolute = 2.00
 
const int kRaggedGutterMultiple = 5
 
const double kLineFragmentAspectRatio = 10.0
 
const int kMinEvaluatedTabs = 3
 
const double kCosMaxSkewAngle = 0.866025
 
const int kColumnWidthFactor = 20
 
const int kMaxVerticalSpacing = 500
 
const int kMaxBlobWidth = 500
 
const double kSplitPartitionSize = 2.0
 
const double kAllowTextHeight = 0.5
 
const double kAllowTextWidth = 0.6
 
const double kAllowTextArea = 0.8
 
const double kAllowBlobHeight = 0.3
 
const double kAllowBlobWidth = 0.4
 
const double kAllowBlobArea = 0.05
 
const int kMinBoxesInTextPartition = 10
 
const int kMaxBoxesInDataPartition = 20
 
const double kMaxGapInTextPartition = 4.0
 
const double kMinMaxGapInTextPartition = 0.5
 
const double kMaxBlobOverlapFactor = 4.0
 
const double kMaxTableCellXheight = 2.0
 
const int kMaxColumnHeaderDistance = 4
 
const double kTableColumnThreshold = 3.0
 
const int kRulingVerticalMargin = 3
 
const double kMinOverlapWithTable = 0.6
 
const int kSideSpaceMargin = 10
 
const double kSmallTableProjectionThreshold = 0.35
 
const double kLargeTableProjectionThreshold = 0.45
 
const int kLargeTableRowCount = 6
 
const int kMinRowsInTable = 3
 
const int kAdjacentLeaderSearchPadding = 2
 
const double kParagraphEndingPreviousLineRatio = 1.3
 
const double kMaxParagraphEndingLeftSpaceMultiple = 3.0
 
const double kMinParagraphEndingTextToWhitespaceRatio = 3.0
 
const double kMaxXProjectionGapFactor = 2.0
 
const double kStrokeWidthFractionalTolerance = 0.25
 
const double kStrokeWidthConstantTolerance = 2.0
 
const double kHorizontalSpacing = 0.30
 
const double kVerticalSpacing = -0.2
 
const int kCellSplitRowThreshold = 0
 
const int kCellSplitColumnThreshold = 0
 
const int kLinedTableMinVerticalLines = 3
 
const int kLinedTableMinHorizontalLines = 3
 
const double kRequiredColumns = 0.7
 
const double kMarginFactor = 1.1
 
const double kMaxRowSize = 2.5
 
const double kGoodRowNumberOfColumnsSmall [] = { 2, 2, 2, 2, 2, 3, 3 }
 
const int kGoodRowNumberOfColumnsSmallSize
 
const double kGoodRowNumberOfColumnsLarge = 0.7
 
const double kMinFilledArea = 0.35
 
const int kGutterMultiple = 4
 
const int kGutterToNeighbourRatio = 3
 
const int kSimilarVectorDist = 10
 
const int kSimilarRaggedDist = 50
 
const int kMaxFillinMultiple = 11
 
const double kMinGutterFraction = 0.5
 
const double kLineCountReciprocal = 4.0
 
const double kMinAlignedGutter = 0.25
 
const double kMinRaggedGutter = 1.5
 
const char * kAlignmentNames []
 
const int kMaxLineLength = 1024
 
const float kRotationRange = 0.02f
 
const int kExposureFactor = 16
 
const int kSaltnPepper = 5
 
const int kMinRampSize = 1000
 
const int kMinLigature = 0xfb00
 
const int kMaxLigature = 0xfb17
 
const int kDefaultResolution = 300
 
static const int kDefaultOutputResolution = 300
 
static const char * kWordJoinerUTF8 = "\u2060"
 
static const char *const XHeightConsistencyEnumName []
 
static const char *const LMPainPointsTypeName []
 
static const float kScoreScaleFactor = 100.0f
 
static const float kMinFinalCost = 0.001f
 
static const float kMaxFinalCost = 100.0f
 

Detailed Description

The box file is assumed to contain box definitions, one per line, of the following format for blob-level boxes:

*   <UTF8 str> <left> <bottom> <right> <top> <page id>
* 

and for word/line-level boxes:

*   WordStr <left> <bottom> <right> <top> <page id> #<space-delimited word str>
* 

NOTES: The boxes use tesseract coordinates, i.e. 0,0 is at BOTTOM-LEFT.

<page id>=""> is 0-based, and the page number is used for multipage input (tiff).

In the blob-level form, each line represents a recognizable unit, which may be several UTF-8 bytes, but there is a bounding box around each recognizable unit, and no classifier is needed to train in this mode (bootstrapping.)

In the word/line-level form, the line begins with the literal "WordStr", and the bounding box bounds either a whole line or a whole word. The recognizable units in the word/line are listed after the # at the end of the line and are space delimited, ignoring any original spaces on the line. Eg.

* word -> #w o r d
* multi word line -> #m u l t i w o r d l i n e
* 

The recognizable units must be space-delimited in order to allow multiple unicodes to be used for a single recognizable unit, eg Hindi.

In this mode, the classifier must have been pre-trained with the desired character set, or it will not be able to find the character segmentations.

Make a word from the selected blobs and run Tess on them.

Parameters
page_resrecognise blobs
selection_boxwithin this box

fp_eval_word_spacing() Evaluation function for fixed pitch word lists.

Basically, count the number of "nice" characters - those which are in tess acceptable words or in dict words and are not rejected. Penalise any potential noise chars

build_menu()

Construct the menu tree used by the command window

process_cmd_win_event()

Process a command returned from the command window (Just call the appropriate command handler)

word_blank_and_set_display() Word processor

Blank display of word then redisplay word according to current display mode settings


Public Function Prototypes


Include Files and Type Defines

Typedef Documentation

◆ BlobGridSearch

using tesseract::BlobGridSearch = typedef GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>

◆ char32

using tesseract::char32 = typedef signed int

◆ CheckPointReader

◆ CheckPointWriter

◆ ColPartitionGridSearch

using tesseract::ColPartitionGridSearch = typedef GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>

◆ ColSegmentGrid

using tesseract::ColSegmentGrid = typedef BBGrid<ColSegment, ColSegment_CLIST, ColSegment_C_IT>

◆ ColSegmentGridSearch

using tesseract::ColSegmentGridSearch = typedef GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>

◆ DawgVector

◆ DictFunc

typedef int(Dict::* tesseract::DictFunc) (void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const

◆ FileReader

typedef bool(* tesseract::FileReader)(const STRING &filename, GenericVector< char > *data)

◆ FileWriter

typedef bool(* tesseract::FileWriter)(const GenericVector< char > &data, const STRING &filename)

◆ FillLatticeFunc

typedef void(Wordrec::* tesseract::FillLatticeFunc) (const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)

◆ IntKDPair

using tesseract::IntKDPair = typedef KDPairInc<int, int>

◆ LanguageModelFlagsType

using tesseract::LanguageModelFlagsType = typedef unsigned char

Used for expressing various language model flags.

◆ LigHash

using tesseract::LigHash = typedef std::unordered_map<std::string, std::string, StringHash>

◆ NodeChildVector

◆ PainPointHeap

◆ ParamsModelClassifyFunc

typedef float(Dict::* tesseract::ParamsModelClassifyFunc) (const char *lang, void *path)

◆ ParamsTrainingHypothesisList

◆ PartSetVector

◆ ProbabilityInContextFunc

typedef double(Dict::* tesseract::ProbabilityInContextFunc) (const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)

◆ RecodeHeap

◆ RecodePair

using tesseract::RecodePair = typedef KDPairInc<double, RecodeNode>

◆ RSCounts

using tesseract::RSCounts = typedef std::unordered_map<int, int>

◆ RSMap

using tesseract::RSMap = typedef std::unordered_map<int, std::unique_ptr<std::vector<int> >>

◆ SetOfModels

◆ ShapeQueue

◆ SuccessorList

◆ SuccessorListsVector

◆ TestCallback

typedef TessResultCallback4<STRING, int, const double*, const TessdataManager&, int>* tesseract::TestCallback

◆ TruthCallback

◆ UnicharAmbigsVector

◆ UnicharIdVector

using tesseract::UnicharIdVector = typedef GenericVector<UNICHAR_ID>

◆ WidthCallback

using tesseract::WidthCallback = typedef TessResultCallback1<bool, int>

◆ WordGrid

using tesseract::WordGrid = typedef BBGrid<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>

◆ WordRecognizer

typedef void(Tesseract::* tesseract::WordRecognizer) (const WordData &word_data, WERD_RES **in_word, PointerVector< WERD_RES > *out_words)

◆ WordSearch

using tesseract::WordSearch = typedef GridSearch<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>

Enumeration Type Documentation

◆ AmbigType

Enumerator
NOT_AMBIG 
REPLACE_AMBIG 
DEFINITE_AMBIG 
SIMILAR_AMBIG 
CASE_AMBIG 
AMBIG_TYPE_COUNT 

◆ CachingStrategy

Enumerator
CS_SEQUENTIAL 
CS_ROUND_ROBIN 

◆ CharSegmentationType

Enumerator
CST_FRAGMENT 
CST_WHOLE 
CST_IMPROPER 
CST_NGRAM 

◆ CMD_EVENTS

Enumerator
ACTION_1_CMD_EVENT 
RECOG_WERDS 
RECOG_PSEUDO 
ACTION_2_CMD_EVENT 

◆ ColSegType

Enumerator
COL_UNKNOWN 
COL_TEXT 
COL_TABLE 
COL_MIXED 
COL_COUNT 

◆ ColumnSpanningType

Enumerator
CST_NOISE 
CST_FLOWING 
CST_HEADING 
CST_PULLOUT 
CST_COUNT 

◆ CountTypes

Enumerator
CT_UNICHAR_TOP_OK 
CT_UNICHAR_TOP1_ERR 
CT_UNICHAR_TOP2_ERR 
CT_UNICHAR_TOPN_ERR 
CT_UNICHAR_TOPTOP_ERR 
CT_OK_MULTI_UNICHAR 
CT_OK_JOINED 
CT_OK_BROKEN 
CT_REJECT 
CT_FONT_ATTR_ERR 
CT_OK_MULTI_FONT 
CT_NUM_RESULTS 
CT_RANK 
CT_REJECTED_JUNK 
CT_ACCEPTED_JUNK 
CT_SIZE 

◆ DawgType

Enumerator
DAWG_TYPE_PUNCTUATION 
DAWG_TYPE_WORD 
DAWG_TYPE_NUMBER 
DAWG_TYPE_PATTERN 
DAWG_TYPE_COUNT 

◆ ErrorTypes

Enumerator
ET_RMS 
ET_DELTA 
ET_WORD_RECERR 
ET_CHAR_ERROR 
ET_SKIP_RATIO 
ET_COUNT 

◆ FactorNames

Enumerator
FN_INCOLOR 
FN_Y0 
FN_Y1 
FN_Y2 
FN_Y3 
FN_X0 
FN_X1 
FN_SHEAR 
FN_NUM_FACTORS 

◆ FlexDimensions

Enumerator
FD_BATCH 
FD_HEIGHT 
FD_WIDTH 
FD_DIMSIZE 

◆ GraphemeNorm

Enumerator
kNone 
kNormalize 

◆ GraphemeNormMode

Enumerator
kSingleString 
kCombined 
kGlyphSplit 
kIndividualUnicodes 

◆ kParamsTrainingFeatureType

Enumerator
PTRAIN_DIGITS_SHORT 
PTRAIN_DIGITS_MED 
PTRAIN_DIGITS_LONG 
PTRAIN_NUM_SHORT 
PTRAIN_NUM_MED 
PTRAIN_NUM_LONG 
PTRAIN_DOC_SHORT 
PTRAIN_DOC_MED 
PTRAIN_DOC_LONG 
PTRAIN_DICT_SHORT 
PTRAIN_DICT_MED 
PTRAIN_DICT_LONG 
PTRAIN_FREQ_SHORT 
PTRAIN_FREQ_MED 
PTRAIN_FREQ_LONG 
PTRAIN_SHAPE_COST_PER_CHAR 
PTRAIN_NGRAM_COST_PER_CHAR 
PTRAIN_NUM_BAD_PUNC 
PTRAIN_NUM_BAD_CASE 
PTRAIN_XHEIGHT_CONSISTENCY 
PTRAIN_NUM_BAD_CHAR_TYPE 
PTRAIN_NUM_BAD_SPACING 
PTRAIN_NUM_BAD_FONT 
PTRAIN_RATING_PER_CHAR 
PTRAIN_NUM_FEATURE_TYPES 

◆ LeftOrRight

Enumerator
LR_LEFT 
LR_RIGHT 

◆ LineType

Enumerator
LT_START 
LT_BODY 
LT_UNKNOWN 
LT_MULTIPLE 

◆ LMPainPointsType

Enumerator
LM_PPTYPE_BLAMER 
LM_PPTYPE_AMBIG 
LM_PPTYPE_PATH 
LM_PPTYPE_SHAPE 
LM_PPTYPE_NUM 

◆ LossType

Enumerator
LT_NONE 
LT_CTC 
LT_SOFTMAX 
LT_LOGISTIC 

◆ NeighbourPartitionType

Enumerator
NPT_HTEXT 
NPT_VTEXT 
NPT_WEAK_HTEXT 
NPT_WEAK_VTEXT 
NPT_IMAGE 
NPT_COUNT 

◆ NetworkFlags

Enumerator
NF_LAYER_SPECIFIC_LR 
NF_ADAM 

◆ NetworkType

Enumerator
NT_NONE 
NT_INPUT 
NT_CONVOLVE 
NT_MAXPOOL 
NT_PARALLEL 
NT_REPLICATED 
NT_PAR_RL_LSTM 
NT_PAR_UD_LSTM 
NT_PAR_2D_LSTM 
NT_SERIES 
NT_RECONFIG 
NT_XREVERSED 
NT_YREVERSED 
NT_XYTRANSPOSE 
NT_LSTM 
NT_LSTM_SUMMARY 
NT_LOGISTIC 
NT_POSCLIP 
NT_SYMCLIP 
NT_TANH 
NT_RELU 
NT_LINEAR 
NT_SOFTMAX 
NT_SOFTMAX_NO_CTC 
NT_LSTM_SOFTMAX 
NT_LSTM_SOFTMAX_ENCODED 
NT_TENSORFLOW 
NT_COUNT 

◆ NodeContinuation

Enumerator
NC_ANYTHING 
NC_ONLY_DUP 
NC_NO_DUP 
NC_COUNT 

◆ NormalizationMode

Enumerator
NM_BASELINE 
NM_CHAR_ISOTROPIC 
NM_CHAR_ANISOTROPIC 

◆ OcrEngineMode

When Tesseract/Cube is initialized we can choose to instantiate/load/run only the Tesseract part, only the Cube part or both along with the combiner. The preference of which engine to use is stored in tessedit_ocr_engine_mode.

ATTENTION: When modifying this enum, please make sure to make the appropriate changes to all the enums mirroring it (e.g. OCREngine in cityblock/workflow/detection/detection_storage.proto). Such enums will mention the connection to OcrEngineMode in the comments.

Enumerator
OEM_TESSERACT_ONLY 
OEM_LSTM_ONLY 
OEM_TESSERACT_LSTM_COMBINED 
OEM_DEFAULT 
OEM_COUNT 

◆ OCRNorm

enum tesseract::OCRNorm
strong
Enumerator
kNone 
kNormalize 

◆ Orientation

+---------------—+ Orientation Example: | 1 Aaaa Aaaa Aaaa | ==================== | Aaa aa aaa aa | To left is a diagram of some (1) English and | aaaaaa A aa aaa. | (2) Chinese text and a (3) photo credit. | 2 | | ####### c c C | Upright Latin characters are represented as A and a. | ####### c c c | '<' represents a latin character rotated | < ####### c c c | anti-clockwise 90 degrees. | < ####### c c | | < ####### . c | Upright Chinese characters are represented C and c. | 3 ####### c | +---------------—+ NOTA BENE: enum values here should match goodoc.proto

If you orient your head so that "up" aligns with Orientation, then the characters will appear "right side up" and readable.

In the example above, both the English and Chinese paragraphs are oriented so their "up" is the top of the page (page up). The photo credit is read with one's head turned leftward ("up" is to page left).

The values of this enum match the convention of Tesseract's osdetect.h

Enumerator
ORIENTATION_PAGE_UP 
ORIENTATION_PAGE_RIGHT 
ORIENTATION_PAGE_DOWN 
ORIENTATION_PAGE_LEFT 

◆ PageIteratorLevel

enum of the elements of the page hierarchy, used in ResultIterator to provide functions that operate on each level without having to have 5x as many functions.

Enumerator
RIL_BLOCK 
RIL_PARA 
RIL_TEXTLINE 
RIL_WORD 
RIL_SYMBOL 

◆ PageSegMode

Possible modes for page layout analysis. These must be kept in order of decreasing amount of layout analysis to be done, except for OSD_ONLY, so that the inequality test macros below work.

Enumerator
PSM_OSD_ONLY 

Orientation and script detection only.

PSM_AUTO_OSD 

Automatic page segmentation with orientation and script detection. (OSD)

PSM_AUTO_ONLY 

Automatic page segmentation, but no OSD, or OCR.

PSM_AUTO 

Fully automatic page segmentation, but no OSD.

PSM_SINGLE_COLUMN 

Assume a single column of text of variable sizes.

PSM_SINGLE_BLOCK_VERT_TEXT 

Assume a single uniform block of vertically aligned text.

PSM_SINGLE_BLOCK 

Assume a single uniform block of text. (Default.)

PSM_SINGLE_LINE 

Treat the image as a single text line.

PSM_SINGLE_WORD 

Treat the image as a single word.

PSM_CIRCLE_WORD 

Treat the image as a single word in a circle.

PSM_SINGLE_CHAR 

Treat the image as a single character.

PSM_SPARSE_TEXT 

Find as much text as possible in no particular order.

PSM_SPARSE_TEXT_OSD 

Sparse text with orientation and script det.

PSM_RAW_LINE 

Treat the image as a single text line, bypassing hacks that are Tesseract-specific.

PSM_COUNT 

Number of enum entries.

◆ ParagraphJustification

JUSTIFICATION_UNKNOWN The alignment is not clearly one of the other options. This could happen for example if there are only one or two lines of text or the text looks like source code or poetry.

NOTA BENE: Fully justified paragraphs (text aligned to both left and right margins) are marked by Tesseract with JUSTIFICATION_LEFT if their text is written with a left-to-right script and with JUSTIFICATION_RIGHT if their text is written in a right-to-left script.

Interpretation for text read in vertical lines: "Left" is wherever the starting reading position is.

JUSTIFICATION_LEFT Each line, except possibly the first, is flush to the same left tab stop.

JUSTIFICATION_CENTER The text lines of the paragraph are centered about a line going down through their middle of the text lines.

JUSTIFICATION_RIGHT Each line, except possibly the first, is flush to the same right tab stop.

Enumerator
JUSTIFICATION_UNKNOWN 
JUSTIFICATION_LEFT 
JUSTIFICATION_CENTER 
JUSTIFICATION_RIGHT 

◆ PartitionFindResult

Enumerator
PFR_OK 
PFR_SKEW 
PFR_NOISE 

◆ ScriptPos

Enumerator
SP_NORMAL 
SP_SUBSCRIPT 
SP_SUPERSCRIPT 
SP_DROPCAP 

◆ SerializeAmount

Enumerator
LIGHT 
NO_BEST_TRAINER 
FULL 

◆ SetParamConstraint

Enumerator
SET_PARAM_CONSTRAINT_NONE 
SET_PARAM_CONSTRAINT_DEBUG_ONLY 
SET_PARAM_CONSTRAINT_NON_DEBUG_ONLY 
SET_PARAM_CONSTRAINT_NON_INIT_ONLY 

◆ SubTrainerResult

Enumerator
STR_NONE 
STR_UPDATED 
STR_REPLACED 

◆ TabAlignment

Enumerator
TA_LEFT_ALIGNED 
TA_LEFT_RAGGED 
TA_CENTER_JUSTIFIED 
TA_RIGHT_ALIGNED 
TA_RIGHT_RAGGED 
TA_SEPARATOR 
TA_COUNT 

◆ TessdataType

Enumerator
TESSDATA_LANG_CONFIG 
TESSDATA_UNICHARSET 
TESSDATA_AMBIGS 
TESSDATA_INTTEMP 
TESSDATA_PFFMTABLE 
TESSDATA_NORMPROTO 
TESSDATA_PUNC_DAWG 
TESSDATA_SYSTEM_DAWG 
TESSDATA_NUMBER_DAWG 
TESSDATA_FREQ_DAWG 
TESSDATA_FIXED_LENGTH_DAWGS 
TESSDATA_CUBE_UNICHARSET 
TESSDATA_CUBE_SYSTEM_DAWG 
TESSDATA_SHAPE_TABLE 
TESSDATA_BIGRAM_DAWG 
TESSDATA_UNAMBIG_DAWG 
TESSDATA_PARAMS_MODEL 
TESSDATA_LSTM 
TESSDATA_LSTM_PUNC_DAWG 
TESSDATA_LSTM_SYSTEM_DAWG 
TESSDATA_LSTM_NUMBER_DAWG 
TESSDATA_LSTM_UNICHARSET 
TESSDATA_LSTM_RECODER 
TESSDATA_VERSION 
TESSDATA_NUM_ENTRIES 

◆ TextlineOrder

The text lines are read in the given sequence.

In English, the order is top-to-bottom. In Chinese, vertical text lines are read right-to-left. Mongolian is written in vertical columns top to bottom like Chinese, but the lines order left-to right.

Note that only some combinations make sense. For example, WRITING_DIRECTION_LEFT_TO_RIGHT implies TEXTLINE_ORDER_TOP_TO_BOTTOM

Enumerator
TEXTLINE_ORDER_LEFT_TO_RIGHT 
TEXTLINE_ORDER_RIGHT_TO_LEFT 
TEXTLINE_ORDER_TOP_TO_BOTTOM 

◆ TopNState

Enumerator
TN_TOP2 
TN_TOPN 
TN_ALSO_RAN 
TN_COUNT 

◆ Trainability

Enumerator
TRAINABLE 
PERFECT 
UNENCODABLE 
HI_PRECISION_ERR 
NOT_BOXED 

◆ TrainingFlags

Enumerator
TF_INT_MODE 
TF_COMPRESS_UNICHARSET 

◆ TrainingState

Enumerator
TS_DISABLED 
TS_ENABLED 
TS_TEMP_DISABLE 
TS_RE_ENABLE 

◆ UnicodeNormMode

Enumerator
kNFD 
kNFC 
kNFKD 
kNFKC 

◆ ViramaScript

Enumerator
kNonVirama 
kDevanagari 
kBengali 
kGurmukhi 
kGujarati 
kOriya 
kTamil 
kTelugu 
kKannada 
kMalayalam 
kSinhala 
kMyanmar 
kKhmer 
kJavanese 

◆ WritingDirection

The grapheme clusters within a line of text are laid out logically in this direction, judged when looking at the text line rotated so that its Orientation is "page up".

For English text, the writing direction is left-to-right. For the Chinese text in the above example, the writing direction is top-to-bottom.

Enumerator
WRITING_DIRECTION_LEFT_TO_RIGHT 
WRITING_DIRECTION_RIGHT_TO_LEFT 
WRITING_DIRECTION_TOP_TO_BOTTOM 

◆ XHeightConsistencyEnum

Enumerator
XH_GOOD 
XH_SUBNORMAL 
XH_INCONSISTENT 

Function Documentation

◆ AcceptableCJKMerge()

static bool tesseract::AcceptableCJKMerge ( const TBOX bbox,
const TBOX nbox,
bool  debug,
int  max_size,
int  max_dist,
int *  x_gap,
int *  y_gap 
)
static

◆ AcceptableRowArgs()

static bool tesseract::AcceptableRowArgs ( int  debug_level,
int  min_num_rows,
const char *  function_name,
const GenericVector< RowScratchRegisters > *  rows,
int  row_start,
int  row_end 
)
static

◆ AccumulateVector()

void tesseract::AccumulateVector ( int  n,
const double *  src,
double *  dest 
)
inline

◆ add_space()

static void tesseract::add_space ( TESS_CHAR_IT *  it)
static

◆ AddAllScriptsConverted()

static void tesseract::AddAllScriptsConverted ( const UNICHARSET sid_set,
const UNICHARSET osd_set,
GenericVector< int > *  allowed_ids 
)
static

◆ addAvailableLanguages()

static void tesseract::addAvailableLanguages ( const STRING datadir,
const STRING base,
GenericVector< STRING > *  langs 
)
static

◆ AddBaselineCoordsTohOCR()

static void tesseract::AddBaselineCoordsTohOCR ( const PageIterator it,
PageIteratorLevel  level,
STRING hocr_str 
)
static

Fits a line to the baseline at the given level, and appends its coefficients to the hOCR string. NOTE: The hOCR spec is unclear on how to specify baseline coefficients for rotated textlines. For this reason, on textlines that are not upright, this method currently only inserts a 'textangle' property to indicate the rotation direction and does not add any baseline information to the hocr string.

◆ AddBoxTohOCR()

static void tesseract::AddBoxTohOCR ( const ResultIterator it,
PageIteratorLevel  level,
STRING hocr_str 
)
static

◆ AddBoxToTSV()

static void tesseract::AddBoxToTSV ( const PageIterator it,
PageIteratorLevel  level,
STRING hocr_str 
)
static

◆ AddIdTohOCR() [1/2]

static void tesseract::AddIdTohOCR ( STRING hocr_str,
const std::string  base,
int  num1,
int  num2 
)
static

◆ AddIdTohOCR() [2/2]

static void tesseract::AddIdTohOCR ( STRING hocr_str,
const std::string  base,
int  num1,
int  num2,
int  num3 
)
static

◆ AddNearFeatures()

static void tesseract::AddNearFeatures ( const IntFeatureMap feature_map,
int  f,
int  levels,
GenericVector< int > *  good_features 
)
static

◆ AddStringsToUnicharset()

static void tesseract::AddStringsToUnicharset ( const GenericVector< STRING > &  strings,
int  norm_mode,
UNICHARSET unicharset 
)
static

◆ AffineMatrix()

static void tesseract::AffineMatrix ( int  writing_direction,
int  line_x1,
int  line_y1,
int  line_x2,
int  line_y2,
double *  a,
double *  b,
double *  c,
double *  d 
)
static

◆ AmbigSpec_zapper()

void tesseract::AmbigSpec_zapper ( ELIST_LINK link)

◆ AsciiLikelyListItem()

bool tesseract::AsciiLikelyListItem ( const STRING word)

◆ assign_blobs_to_blocks2()

void tesseract::assign_blobs_to_blocks2 ( Pix *  pix,
BLOCK_LIST *  blocks,
TO_BLOCK_LIST *  port_blocks 
)

◆ AssignIds()

static void tesseract::AssignIds ( const UnicityTable< FontInfo > &  all_fonts,
UnicityTable< FontInfo > *  lang_fonts 
)
static

◆ AtLeast2LineCrossings()

static bool tesseract::AtLeast2LineCrossings ( BLOBNBOX_CLIST *  blobs)
static

◆ AttemptBoxExpansion()

static TBOX tesseract::AttemptBoxExpansion ( const TBOX box,
const IntGrid noise_density,
int  pad 
)
static

◆ AttemptToShrinkBox()

static void tesseract::AttemptToShrinkBox ( const FCOORD rotation,
const FCOORD rerotation,
const TBOX im_box,
Pix *  pix,
TBOX slice 
)
static

◆ BestLabel()

static int tesseract::BestLabel ( const GENERIC_2D_ARRAY< float > &  outputs,
int  t 
)
static

◆ BlobToTrainingSample()

TrainingSample * tesseract::BlobToTrainingSample ( const TBLOB blob,
bool  nonlinear_norm,
INT_FX_RESULT_STRUCT fx_info,
GenericVector< INT_FEATURE_STRUCT > *  bl_features 
)

◆ BOOL_VAR() [1/13]

tesseract::BOOL_VAR ( textord_tabfind_show_color_fit  ,
false  ,
"Show stroke widths"   
)

◆ BOOL_VAR() [2/13]

tesseract::BOOL_VAR ( textord_tabfind_only_strokewidths  ,
false  ,
"Only run stroke widths"   
)

◆ BOOL_VAR() [3/13]

tesseract::BOOL_VAR ( textord_tabfind_show_initial_partitions  ,
false  ,
"Show partition bounds"   
)

◆ BOOL_VAR() [4/13]

tesseract::BOOL_VAR ( textord_tabfind_show_reject_blobs  ,
false  ,
"Show blobs rejected as noise"   
)

◆ BOOL_VAR() [5/13]

tesseract::BOOL_VAR ( textord_tabfind_show_initialtabs  ,
false  ,
"Show tab candidates"   
)

◆ BOOL_VAR() [6/13]

tesseract::BOOL_VAR ( textord_tabfind_show_columns  ,
false  ,
"Show column bounds"   
)

◆ BOOL_VAR() [7/13]

tesseract::BOOL_VAR ( textord_tabfind_show_finaltabs  ,
false  ,
"Show tab vectors"   
)

◆ BOOL_VAR() [8/13]

tesseract::BOOL_VAR ( textord_tabfind_show_blocks  ,
false  ,
"Show final block bounds"   
)

◆ BOOL_VAR() [9/13]

tesseract::BOOL_VAR ( textord_tabfind_find_tables  ,
true  ,
"run table detection"   
)

◆ BOOL_VAR() [10/13]

tesseract::BOOL_VAR ( textord_show_tables  ,
false  ,
"Show table regions"   
)

◆ BOOL_VAR() [11/13]

tesseract::BOOL_VAR ( textord_tablefind_show_mark  ,
false  ,
"Debug table marking steps in detail"   
)

◆ BOOL_VAR() [12/13]

tesseract::BOOL_VAR ( textord_tablefind_show_stats  ,
false  ,
"Show page stats used in table finding"   
)

◆ BOOL_VAR() [13/13]

tesseract::BOOL_VAR ( textord_tablefind_recognize_tables  ,
false  ,
"Enables the table recognizer for table layout and filtering."   
)

◆ BOOL_VAR_H()

tesseract::BOOL_VAR_H ( textord_tabfind_find_tables  ,
false  ,
"run table detection"   
)

◆ BoolFlagExists()

static bool tesseract::BoolFlagExists ( const char *  flag_name,
bool *  value 
)
static

◆ BoundsWithinBox()

static TBOX tesseract::BoundsWithinBox ( Pix *  pix,
const TBOX box 
)
static

◆ BoxFromHLine()

static TBOX tesseract::BoxFromHLine ( const TabVector hline)
static

◆ BoxMissMetric()

static double tesseract::BoxMissMetric ( const TBOX box1,
const TBOX box2 
)
static

Helper to compute the dispute resolution metric. Disputed blob resolution. The aim is to give the blob to the most appropriate boxfile box. Most of the time it is obvious, but if two boxfile boxes overlap significantly it is not. If a small boxfile box takes most of the blob, and a large boxfile box does too, then we want the small boxfile box to get it, but if the small box is much smaller than the blob, we don't want it to get it. Details of the disputed blob resolution: Given a box with area A, and a blob with area B, with overlap area C, then the miss metric is (A-C)(B-C)/(AB) and the box with minimum miss metric gets the blob.

◆ BuildFullyConnected()

static Network* tesseract::BuildFullyConnected ( const StaticShape input_shape,
NetworkType  type,
const STRING name,
int  depth 
)
static

◆ c_blob_comparator()

static int tesseract::c_blob_comparator ( const void *  blob1p,
const void *  blob2p 
)
static

◆ CairoARGB32ToPixFormat()

static Pix* tesseract::CairoARGB32ToPixFormat ( cairo_surface_t *  surface)
static

◆ CalculateTabStops()

static void tesseract::CalculateTabStops ( GenericVector< RowScratchRegisters > *  rows,
int  row_start,
int  row_end,
int  tolerance,
GenericVector< Cluster > *  left_tabs,
GenericVector< Cluster > *  right_tabs 
)
static

◆ CallWithUTF8()

static void tesseract::CallWithUTF8 ( TessCallback1< const char *> *  cb,
const WERD_CHOICE wc 
)
static

◆ CanonicalizeDetectionResults()

void tesseract::CanonicalizeDetectionResults ( GenericVector< PARA *> *  row_owners,
PARA_LIST *  paragraphs 
)

◆ ceil_log2()

static uint32_t tesseract::ceil_log2 ( uint32_t  n)
inlinestatic

◆ CharCoverageMapToBitmap()

static void tesseract::CharCoverageMapToBitmap ( PangoCoverage *  coverage,
std::vector< bool > *  unichar_bitmap 
)
static

◆ CheckSharedLibraryVersion()

static void tesseract::CheckSharedLibraryVersion ( )
inlinestatic

◆ clear_any_old_text()

static void tesseract::clear_any_old_text ( BLOCK_LIST *  block_list)
static

◆ ClearFeatureSpaceWindow()

void tesseract::ClearFeatureSpaceWindow ( NORM_METHOD  norm_method,
ScrollView window 
)

Clears the given window and draws the featurespace guides for the appropriate normalization method.

◆ ClipBaseline()

static void tesseract::ClipBaseline ( int  ppi,
int  x1,
int  y1,
int  x2,
int  y2,
int *  line_x1,
int *  line_y1,
int *  line_x2,
int *  line_y2 
)
static

◆ ClipCoord()

static void tesseract::ClipCoord ( const ICOORD bleft,
const ICOORD tright,
ICOORD pos 
)
static

◆ ClipVector()

template<typename T >
void tesseract::ClipVector ( int  n,
lower,
upper,
T *  vec 
)
inline

◆ ClosestCluster()

static int tesseract::ClosestCluster ( const GenericVector< Cluster > &  clusters,
int  value 
)
static

◆ cmp_eq()

template<typename T >
bool tesseract::cmp_eq ( T const &  t1,
T const &  t2 
)

◆ CmpPairSecond()

static bool tesseract::CmpPairSecond ( const std::pair< int, int > &  p1,
const std::pair< int, int > &  p2 
)
static

◆ CodeInBinary()

void tesseract::CodeInBinary ( int  n,
int  nf,
double *  vec 
)
inline

◆ CodepointToUtf16be()

static bool tesseract::CodepointToUtf16be ( int  code,
char  utf16[kMaxBytesPerCodepoint] 
)
static

◆ CollectFonts()

static void tesseract::CollectFonts ( const UnicityTable< FontInfo > &  new_fonts,
UnicityTable< FontInfo > *  all_fonts 
)
static

◆ CollectHorizVertBlobs()

static void tesseract::CollectHorizVertBlobs ( BLOBNBOX_LIST *  input_blobs,
int *  num_vertical_blobs,
int *  num_horizontal_blobs,
BLOBNBOX_CLIST *  vertical_blobs,
BLOBNBOX_CLIST *  horizontal_blobs,
BLOBNBOX_CLIST *  nondescript_blobs 
)
static

◆ CombineLangModel()

int tesseract::CombineLangModel ( const UNICHARSET unicharset,
const std::string &  script_dir,
const std::string &  version_str,
const std::string &  output_dir,
const std::string &  lang,
bool  pass_through_recoder,
const GenericVector< STRING > &  words,
const GenericVector< STRING > &  puncs,
const GenericVector< STRING > &  numbers,
bool  lang_is_rtl,
FileReader  reader,
FileWriter  writer 
)

◆ CompareFontInfo()

bool tesseract::CompareFontInfo ( const FontInfo fi1,
const FontInfo fi2 
)

◆ CompareFontSet()

bool tesseract::CompareFontSet ( const FontSet fs1,
const FontSet fs2 
)

◆ CompareSTRING()

static int tesseract::CompareSTRING ( const void *  p1,
const void *  p2 
)
static

◆ ComputeBlackWhite()

static void tesseract::ComputeBlackWhite ( Pix *  pix,
float *  black,
float *  white 
)
static

◆ ComputeFeatures()

static int tesseract::ComputeFeatures ( const FCOORD start_pt,
const FCOORD end_pt,
double  feature_length,
GenericVector< INT_FEATURE_STRUCT > *  features 
)
static

◆ ComputeSearchBoxAndScaling()

static void tesseract::ComputeSearchBoxAndScaling ( BlobNeighbourDir  direction,
const TBOX part_box,
int  min_padding,
TBOX search_box,
ICOORD dist_scaling 
)
static

◆ ConvertHypothesizedModelRunsToParagraphs()

static void tesseract::ConvertHypothesizedModelRunsToParagraphs ( int  debug_level,
const GenericVector< RowScratchRegisters > &  rows,
GenericVector< PARA *> *  row_owners,
ParagraphTheory theory 
)
static

◆ CopyVector()

void tesseract::CopyVector ( int  n,
const double *  src,
double *  dest 
)
inline

◆ CorrectRepcharChoices()

static void tesseract::CorrectRepcharChoices ( BLOB_CHOICE blob_choice,
WERD_RES word_res 
)
static

◆ CountNeighbourGaps()

static void tesseract::CountNeighbourGaps ( bool  debug,
BLOBNBOX_CLIST *  neighbours,
int *  pure_h_count,
int *  pure_v_count 
)
static

◆ CountNeighbourTypes()

static void tesseract::CountNeighbourTypes ( BLOBNBOX_CLIST *  neighbours,
int *  pure_h_count,
int *  pure_v_count 
)
static

◆ countof()

template<typename T , size_t N>
constexpr size_t tesseract::countof ( T   const(&)[N])
noexcept

◆ CountPixelsAdjacentToLine()

static int tesseract::CountPixelsAdjacentToLine ( int  line_width,
Box *  line_box,
Pix *  nonline_pix 
)
static

◆ CrownCompatible()

bool tesseract::CrownCompatible ( const GenericVector< RowScratchRegisters > *  rows,
int  a,
int  b,
const ParagraphModel model 
)

◆ CutChunkFromParts()

static void tesseract::CutChunkFromParts ( const TBOX box,
const TBOX im_box,
const FCOORD rotation,
const FCOORD rerotation,
Pix *  pix,
ColPartition_LIST *  part_list 
)
static

◆ DebugDump()

static void tesseract::DebugDump ( bool  should_print,
const STRING phase,
const ParagraphTheory theory,
const GenericVector< RowScratchRegisters > &  rows 
)
static

◆ DecodeRadicalLine()

static bool tesseract::DecodeRadicalLine ( STRING radical_data_line,
RSMap radical_map 
)
static

◆ DecodeRadicalTable()

static bool tesseract::DecodeRadicalTable ( STRING radical_data,
RSMap radical_map 
)
static

◆ DegradeImage()

struct Pix * tesseract::DegradeImage ( Pix *  input,
int  exposure,
TRand randomizer,
float *  rotation 
)

◆ DeleteObject()

template<typename T >
void tesseract::DeleteObject ( T *  object)

◆ DeletePartition()

static void tesseract::DeletePartition ( ColPartition part)
static

◆ DeleteSmallImages()

static void tesseract::DeleteSmallImages ( ColPartitionGrid part_grid)
static

◆ DeSerialize() [1/8]

bool tesseract::DeSerialize ( FILE *  fp,
char *  data,
size_t  n 
)

◆ DeSerialize() [2/8]

bool tesseract::DeSerialize ( FILE *  fp,
float *  data,
size_t  n 
)

◆ DeSerialize() [3/8]

bool tesseract::DeSerialize ( FILE *  fp,
int8_t *  data,
size_t  n 
)

◆ DeSerialize() [4/8]

bool tesseract::DeSerialize ( FILE *  fp,
int16_t *  data,
size_t  n 
)

◆ DeSerialize() [5/8]

bool tesseract::DeSerialize ( FILE *  fp,
int32_t *  data,
size_t  n 
)

◆ DeSerialize() [6/8]

bool tesseract::DeSerialize ( FILE *  fp,
uint8_t *  data,
size_t  n 
)

◆ DeSerialize() [7/8]

bool tesseract::DeSerialize ( FILE *  fp,
uint16_t *  data,
size_t  n 
)

◆ DeSerialize() [8/8]

bool tesseract::DeSerialize ( FILE *  fp,
uint32_t *  data,
size_t  n 
)

◆ DetectParagraphs() [1/2]

void tesseract::DetectParagraphs ( int  debug_level,
GenericVector< RowInfo > *  row_infos,
GenericVector< PARA *> *  row_owners,
PARA_LIST *  paragraphs,
GenericVector< ParagraphModel *> *  models 
)

◆ DetectParagraphs() [2/2]

void tesseract::DetectParagraphs ( int  debug_level,
bool  after_text_recognition,
const MutableIterator block_start,
GenericVector< ParagraphModel *> *  models 
)

◆ DiscardUnusedModels()

static void tesseract::DiscardUnusedModels ( const GenericVector< RowScratchRegisters > &  rows,
ParagraphTheory theory 
)
static

◆ DisplayBoxVector()

static void tesseract::DisplayBoxVector ( const GenericVector< BLOBNBOX *> &  boxes,
ScrollView win 
)
static

◆ dist2()

static long tesseract::dist2 ( int  x1,
int  y1,
int  x2,
int  y2 
)
static

◆ DivideImageIntoParts()

static void tesseract::DivideImageIntoParts ( const TBOX im_box,
const FCOORD rotation,
const FCOORD rerotation,
Pix *  pix,
ColPartitionGridSearch rectsearch,
ColPartition_LIST *  part_list 
)
static

◆ DotProductAVX()

double tesseract::DotProductAVX ( const double *  u,
const double *  v,
int  n 
)

◆ DotProductSSE()

double tesseract::DotProductSSE ( const double *  u,
const double *  v,
int  n 
)

◆ double_VAR() [1/2]

tesseract::double_VAR ( textord_tabvector_vertical_gap_fraction  ,
0.  5,
"max fraction of mean blob width allowed for vertical gaps in vertical text"   
)

◆ double_VAR() [2/2]

tesseract::double_VAR ( textord_tabvector_vertical_box_ratio  ,
0.  5,
"Fraction of box matches required to declare a line vertical"   
)

◆ double_VAR_H() [1/2]

tesseract::double_VAR_H ( textord_tabvector_vertical_gap_fraction  ,
0.  5,
"Max fraction of mean blob width allowed for vertical gaps in vertical text"   
)

◆ double_VAR_H() [2/2]

tesseract::double_VAR_H ( textord_tabvector_vertical_box_ratio  ,
0.  5,
"Fraction of box matches required to declare a line vertical"   
)

◆ DoubleFlagExists()

static bool tesseract::DoubleFlagExists ( const char *  flag_name,
double *  value 
)
static

◆ DowngradeWeakestToCrowns()

static void tesseract::DowngradeWeakestToCrowns ( int  debug_level,
ParagraphTheory theory,
GenericVector< RowScratchRegisters > *  rows 
)
static

◆ DrawDiacriticJoiner()

static void tesseract::DrawDiacriticJoiner ( const BLOBNBOX blob,
ScrollView window 
)
static

◆ EliminateWeakParts()

static void tesseract::EliminateWeakParts ( const TBOX im_box,
ColPartitionGrid part_grid,
ColPartition_LIST *  big_parts,
ColPartition_LIST *  part_list 
)
static

◆ EncodeAsUTF8() [1/2]

static std::string tesseract::EncodeAsUTF8 ( const char32  ch32)
static

◆ EncodeAsUTF8() [2/2]

static std::string tesseract::EncodeAsUTF8 ( const char32  ch32)
static

◆ Epsilon()

static int tesseract::Epsilon ( int  space_pix)
static

◆ EvaluateWordSpan()

static void tesseract::EvaluateWordSpan ( const PointerVector< WERD_RES > &  words,
int  first_index,
int  end_index,
float *  rating,
float *  certainty,
bool *  bad,
bool *  valid_permuter 
)
static

◆ ExpandImageBottom()

static int tesseract::ExpandImageBottom ( const TBOX box,
int  bottom_limit,
ColPartitionGrid part_grid 
)
static

◆ ExpandImageDir()

static int tesseract::ExpandImageDir ( BlobNeighbourDir  dir,
const TBOX im_box,
const TBOX limit_box,
ColPartitionGrid part_grid,
TBOX expanded_box 
)
static

◆ ExpandImageIntoParts()

static bool tesseract::ExpandImageIntoParts ( const TBOX max_image_box,
ColPartitionGridSearch rectsearch,
ColPartitionGrid part_grid,
ColPartition **  part_ptr 
)
static

◆ ExpandImageLeft()

static int tesseract::ExpandImageLeft ( const TBOX box,
int  left_limit,
ColPartitionGrid part_grid 
)
static

◆ ExpandImageRight()

static int tesseract::ExpandImageRight ( const TBOX box,
int  right_limit,
ColPartitionGrid part_grid 
)
static

◆ ExpandImageTop()

static int tesseract::ExpandImageTop ( const TBOX box,
int  top_limit,
ColPartitionGrid part_grid 
)
static

◆ extract_result()

static void tesseract::extract_result ( TESS_CHAR_IT *  out,
PAGE_RES page_res 
)
static

Extract the OCR results, costs (penalty points for uncertainty), and the bounding boxes of the characters.

◆ ExtractFeaturesFromRun()

static void tesseract::ExtractFeaturesFromRun ( const EDGEPT startpt,
const EDGEPT lastpt,
const DENORM denorm,
double  feature_length,
bool  force_poly,
GenericVector< INT_FEATURE_STRUCT > *  features 
)
static

◆ ExtractFontName()

void tesseract::ExtractFontName ( const STRING filename,
STRING fontname 
)

Public Code

◆ ExtractFontProperties()

static void tesseract::ExtractFontProperties ( const std::string &  utf8_text,
StringRenderer render,
const std::string &  output_base 
)
static

◆ FilterFalsePositives()

static int tesseract::FilterFalsePositives ( int  resolution,
Pix *  nonline_pix,
Pix *  intersection_pix,
Pix *  line_pix 
)
static

◆ FilterMusic()

static Pix* tesseract::FilterMusic ( int  resolution,
Pix *  pix_closed,
Pix *  pix_vline,
Pix *  pix_hline,
l_int32 *  v_empty,
l_int32 *  h_empty 
)
static

◆ find_modal_font()

static void tesseract::find_modal_font ( STATS fonts,
int16_t *  font_out,
int8_t *  font_count 
)
static

find_modal_font

Find the modal font and remove from the stats.

◆ FindBestMatchingChoice()

static BLOB_CHOICE* tesseract::FindBestMatchingChoice ( UNICHAR_ID  char_id,
WERD_RES word_res 
)
static

◆ FirstWordWouldHaveFit() [1/2]

bool tesseract::FirstWordWouldHaveFit ( const RowScratchRegisters before,
const RowScratchRegisters after,
tesseract::ParagraphJustification  justification 
)

◆ FirstWordWouldHaveFit() [2/2]

bool tesseract::FirstWordWouldHaveFit ( const RowScratchRegisters before,
const RowScratchRegisters after 
)

◆ FontInfoDeleteCallback()

void tesseract::FontInfoDeleteCallback ( FontInfo  f)

◆ FontSetDeleteCallback()

void tesseract::FontSetDeleteCallback ( FontSet  fs)

◆ FullwidthToHalfwidth()

char32 tesseract::FullwidthToHalfwidth ( const char32  ch)

◆ FuncInplace()

template<class Func >
void tesseract::FuncInplace ( int  n,
double *  inout 
)
inline

◆ FuncMultiply()

template<class Func >
void tesseract::FuncMultiply ( const double *  u,
const double *  v,
int  n,
double *  out 
)
inline

◆ GatherPoints()

static int tesseract::GatherPoints ( const C_OUTLINE outline,
double  feature_length,
const DENORM denorm,
const DENORM root_denorm,
int  start_index,
int  end_index,
ICOORD pos,
FCOORD pos_normed,
LLSQ points,
LLSQ dirs 
)
static

◆ GeneratePerspectiveDistortion()

void tesseract::GeneratePerspectiveDistortion ( int  width,
int  height,
TRand randomizer,
Pix **  pix,
GenericVector< TBOX > *  boxes 
)

◆ GeometricClassify()

static void tesseract::GeometricClassify ( int  debug_level,
GenericVector< RowScratchRegisters > *  rows,
int  row_start,
int  row_end,
ParagraphTheory theory 
)
static

◆ GeometricClassifyThreeTabStopTextBlock()

static void tesseract::GeometricClassifyThreeTabStopTextBlock ( int  debug_level,
GeometricClassifierState s,
ParagraphTheory theory 
)
static

◆ GetBlockTextOrientation()

static tesseract::Orientation tesseract::GetBlockTextOrientation ( const PageIterator it)
static

Gets the block orientation at the current iterator position.

◆ GetWordBaseline()

static void tesseract::GetWordBaseline ( int  writing_direction,
int  ppi,
int  height,
int  word_x1,
int  word_y1,
int  word_x2,
int  word_y2,
int  line_x1,
int  line_y1,
int  line_x2,
int  line_y2,
double *  x0,
double *  y0,
double *  length 
)
static

◆ GetXheightString()

std::string tesseract::GetXheightString ( const std::string &  script_dir,
const UNICHARSET unicharset 
)

◆ GridReducedPix()

static Pix* tesseract::GridReducedPix ( const TBOX box,
int  gridsize,
ICOORD  bleft,
int *  left,
int *  bottom 
)
static

◆ HasBetterCaseVariant()

static bool tesseract::HasBetterCaseVariant ( const UNICHARSET unicharset,
const BLOB_CHOICE choice,
BLOB_CHOICE_LIST *  choices 
)
static

Helper returns true if the given choice has a better case variant before it in the choice_list that is not distinguishable by size.

◆ HistogramRect()

void tesseract::HistogramRect ( Pix *  src_pix,
int  channel,
int  left,
int  top,
int  width,
int  height,
int *  histogram 
)

◆ HistogramWeight()

static void tesseract::HistogramWeight ( double  weight,
STATS histogram 
)
static

◆ HOcrEscape()

STRING tesseract::HOcrEscape ( const char *  text)

Escape a char string - remove <>&"' with HTML codes.

Escape a char string - remove &<>"' with HTML codes.

◆ HScanForEdge()

static bool tesseract::HScanForEdge ( uint32_t *  data,
int  wpl,
int  x_start,
int  x_end,
int  min_count,
int  mid_width,
int  max_count,
int  y_end,
int  y_step,
int *  y_start 
)
static

◆ IncreaseInOverlap()

static int tesseract::IncreaseInOverlap ( const ColPartition merge1,
const ColPartition merge2,
int  ok_overlap,
ColPartition_CLIST *  parts 
)
static

◆ InitializeRowInfo()

static void tesseract::InitializeRowInfo ( bool  after_recognition,
const MutableIterator it,
RowInfo info 
)
static

◆ InitializeTextAndBoxesPreRecognition()

static void tesseract::InitializeTextAndBoxesPreRecognition ( const MutableIterator it,
RowInfo info 
)
static

◆ INT_VAR() [1/2]

tesseract::INT_VAR ( textord_tabfind_show_strokewidths  ,
,
"Show stroke widths"   
)

◆ INT_VAR() [2/2]

tesseract::INT_VAR ( textord_tabfind_show_partitions  ,
,
"Show partition  bounds,
waiting  if,
1"   
)

◆ IntDotProductSSE()

int32_t tesseract::IntDotProductSSE ( const int8_t *  u,
const int8_t *  v,
int  n 
)

◆ InternalParagraphModelByOutline()

static ParagraphModel tesseract::InternalParagraphModelByOutline ( const GenericVector< RowScratchRegisters > *  rows,
int  start,
int  end,
int  tolerance,
bool *  consistent 
)
static

◆ IntersectArea()

static int tesseract::IntersectArea ( const TBOX box,
ColPartition_LIST *  part_list 
)
static

◆ InterwordSpace()

int tesseract::InterwordSpace ( const GenericVector< RowScratchRegisters > &  rows,
int  row_start,
int  row_end 
)

◆ IntFlagExists()

static bool tesseract::IntFlagExists ( const char *  flag_name,
int32_t *  value 
)
static

◆ is_double_quote()

static bool tesseract::is_double_quote ( const char32  ch)
static

◆ is_hyphen_punc()

static bool tesseract::is_hyphen_punc ( const char32  ch)
static

◆ is_single_quote()

static bool tesseract::is_single_quote ( const char32  ch)
static

◆ IsCombiner()

static bool tesseract::IsCombiner ( int  ch)
static

◆ IsDigitLike()

static bool tesseract::IsDigitLike ( int  ch)
static

◆ IsInterchangeValid()

bool tesseract::IsInterchangeValid ( const char32  ch)

◆ IsInterchangeValid7BitAscii()

bool tesseract::IsInterchangeValid7BitAscii ( const char32  ch)

◆ IsLatinLetter()

static bool tesseract::IsLatinLetter ( int  ch)
static

◆ IsLeftIndented()

bool tesseract::IsLeftIndented ( const EquationDetect::IndentType  type)
inline

◆ IsOCREquivalent()

bool tesseract::IsOCREquivalent ( char32  ch1,
char32  ch2 
)

◆ IsOpeningPunct()

static bool tesseract::IsOpeningPunct ( int  ch)
static

◆ IsRightIndented()

bool tesseract::IsRightIndented ( const EquationDetect::IndentType  type)
inline

◆ IsStrInList()

static bool tesseract::IsStrInList ( const STRING str,
const GenericVector< STRING > &  str_list 
)
static

◆ IsTerminalPunct()

static bool tesseract::IsTerminalPunct ( int  ch)
static

◆ IsTextOrEquationType()

bool tesseract::IsTextOrEquationType ( PolyBlockType  type)
inline

◆ IsThaiBeforeConsonantVowel()

static bool tesseract::IsThaiBeforeConsonantVowel ( char32  ch)
static

◆ IsThaiConsonant()

static bool tesseract::IsThaiConsonant ( char32  ch)
static

◆ IsThaiTonableVowel()

static bool tesseract::IsThaiTonableVowel ( char32  ch)
static

◆ IsThaiToneMark()

static bool tesseract::IsThaiToneMark ( char32  ch)
static

◆ IsUTF8Whitespace()

bool tesseract::IsUTF8Whitespace ( const char *  text)

◆ IsValidCodepoint()

bool tesseract::IsValidCodepoint ( const char32  ch)

◆ IsWhitespace()

bool tesseract::IsWhitespace ( const char32  ch)

◆ IsWhitespaceBox()

static bool tesseract::IsWhitespaceBox ( const BoxChar boxchar)
static

◆ LeftoverSegments()

static void tesseract::LeftoverSegments ( const GenericVector< RowScratchRegisters > &  rows,
GenericVector< Interval > *  to_fix,
int  row_start,
int  row_end 
)
static

◆ LeftWordAttributes()

void tesseract::LeftWordAttributes ( const UNICHARSET unicharset,
const WERD_CHOICE werd,
const STRING utf8,
bool *  is_list,
bool *  starts_idea,
bool *  ends_idea 
)

◆ LikelyListMark()

static bool tesseract::LikelyListMark ( const STRING word)
static

◆ LikelyListMarkUnicode()

static bool tesseract::LikelyListMarkUnicode ( int  ch)
static

◆ LikelyListNumeral()

static bool tesseract::LikelyListNumeral ( const STRING word)
static

◆ LikelyParagraphStart()

static bool tesseract::LikelyParagraphStart ( const RowScratchRegisters before,
const RowScratchRegisters after,
tesseract::ParagraphJustification  j 
)
static

◆ List2ndNeighbours()

static void tesseract::List2ndNeighbours ( const BLOBNBOX blob,
BLOBNBOX_CLIST *  neighbours 
)
static

◆ List3rdNeighbours()

static void tesseract::List3rdNeighbours ( const BLOBNBOX blob,
BLOBNBOX_CLIST *  neighbours 
)
static

◆ ListFontFamilies()

static void tesseract::ListFontFamilies ( PangoFontFamily ***  families,
int *  n_families 
)
static

◆ ListNeighbours()

static void tesseract::ListNeighbours ( const BLOBNBOX blob,
BLOBNBOX_CLIST *  neighbours 
)
static

◆ LoadDataFromFile() [1/2]

bool tesseract::LoadDataFromFile ( const char *  filename,
GenericVector< char > *  data 
)
inline

◆ LoadDataFromFile() [2/2]

bool tesseract::LoadDataFromFile ( const STRING filename,
GenericVector< char > *  data 
)
inline

◆ LoadFileLinesToStrings()

bool tesseract::LoadFileLinesToStrings ( const STRING filename,
GenericVector< STRING > *  lines 
)
inline

◆ LoadShapeTable()

ShapeTable * tesseract::LoadShapeTable ( const STRING file_prefix)

◆ LoadTrainingData()

MasterTrainer * tesseract::LoadTrainingData ( int  argc,
const char *const *  argv,
bool  replication,
ShapeTable **  shape_table,
STRING file_prefix 
)

Creates a MasterTrainer and loads the training data into it: Initializes feature_defs and IntegerFX. Loads the shape_table if shape_table != nullptr. Loads initial unicharset from -U command-line option. If FLAGS_T is set, loads the majority of data from there, else:

  • Loads font info from -F option.
  • Loads xheights from -X option.
  • Loads samples from .tr files in remaining command-line args.
  • Deletes outliers and computes canonical samples.
  • If FLAGS_output_trainer is set, saves the trainer for future use. TODO: Who uses that? There is currently no code which reads it. Computes canonical and cloud features. If shape_table is not nullptr, but failed to load, make a fake flat one, as shape clustering was not run.

◆ Logistic()

double tesseract::Logistic ( double  x)
inline

◆ LogSumExp()

static double tesseract::LogSumExp ( double  ln_x,
double  ln_y 
)
static

◆ Main()

static int tesseract::Main ( int  argc,
char **  argv 
)
static

◆ make_tesseract_blob()

static TBLOB* tesseract::make_tesseract_blob ( float  baseline,
float  xheight,
float  descender,
float  ascender,
bool  numeric_mode,
Pix *  pix 
)
static

Return a TBLOB * from the whole pix. To be freed later with delete.

◆ MakeIndividualGlyphs()

static bool tesseract::MakeIndividualGlyphs ( Pix *  pix,
const std::vector< BoxChar *> &  vbox,
const int  input_tiff_page 
)
static

◆ MarkAndDeleteImageParts()

static void tesseract::MarkAndDeleteImageParts ( const FCOORD rerotate,
ColPartitionGrid part_grid,
ColPartition_LIST *  image_parts,
Pix *  image_pix 
)
static

◆ MarkRowsWithModel()

static void tesseract::MarkRowsWithModel ( GenericVector< RowScratchRegisters > *  rows,
int  row_start,
int  row_end,
const ParagraphModel model,
bool  ltr,
int  eop_threshold 
)
static

◆ MarkStrongEvidence()

static void tesseract::MarkStrongEvidence ( GenericVector< RowScratchRegisters > *  rows,
int  row_start,
int  row_end 
)
static

◆ MaximalImageBoundingBox()

static void tesseract::MaximalImageBoundingBox ( ColPartitionGrid part_grid,
TBOX im_box 
)
static

◆ MaxStrokeWidth()

static int tesseract::MaxStrokeWidth ( Pix *  pix)
static

◆ MeanDirectionVector()

static FCOORD tesseract::MeanDirectionVector ( const LLSQ point_diffs,
const LLSQ dirs,
const FCOORD start_pt,
const FCOORD end_pt 
)
static

◆ MedianSpacing()

static int tesseract::MedianSpacing ( int  page_height,
ColPartition_IT  it 
)
static

◆ MedianXHeight()

static double tesseract::MedianXHeight ( BLOCK_LIST *  block_list)
static

◆ MergeBoxCharsToWords()

static void tesseract::MergeBoxCharsToWords ( std::vector< BoxChar *> *  boxchars)
static

◆ ModelStrongEvidence()

static void tesseract::ModelStrongEvidence ( int  debug_level,
GenericVector< RowScratchRegisters > *  rows,
int  row_start,
int  row_end,
bool  allow_flush_models,
ParagraphTheory theory 
)
static

◆ MoveBlobsToBlock()

static TO_BLOCK* tesseract::MoveBlobsToBlock ( bool  vertical_text,
int  line_spacing,
BLOCK block,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts 
)
static

◆ MultiplyAccumulate()

void tesseract::MultiplyAccumulate ( int  n,
const double *  u,
const double *  v,
double *  out 
)
inline

◆ MultiplyVectorsInPlace()

void tesseract::MultiplyVectorsInPlace ( int  n,
const double *  src,
double *  inout 
)
inline

◆ MutualUnusedHNeighbour()

static BLOBNBOX* tesseract::MutualUnusedHNeighbour ( const BLOBNBOX blob,
BlobNeighbourDir  dir 
)
static

◆ MutualUnusedVNeighbour()

static BLOBNBOX* tesseract::MutualUnusedVNeighbour ( const BLOBNBOX blob,
BlobNeighbourDir  dir 
)
static

◆ my_strnmove()

static char* tesseract::my_strnmove ( char *  dest,
const char *  src,
size_t  n 
)
static

◆ NonLinearity()

static NetworkType tesseract::NonLinearity ( char  func)
static

◆ NormalizeCleanAndSegmentUTF8()

bool tesseract::NormalizeCleanAndSegmentUTF8 ( UnicodeNormMode  u_mode,
OCRNorm  ocr_normalize,
GraphemeNormMode  g_mode,
bool  report_errors,
const char *  str8,
std::vector< std::string > *  graphemes 
)

◆ NormalizeDirection()

static uint8_t tesseract::NormalizeDirection ( uint8_t  dir,
const FCOORD unnormed_pos,
const DENORM denorm,
const DENORM root_denorm 
)
static

◆ NormalizeUTF8String()

bool tesseract::NormalizeUTF8String ( UnicodeNormMode  u_mode,
OCRNorm  ocr_normalize,
GraphemeNorm  grapheme_normalize,
const char *  str8,
std::string *  normalized 
)

◆ NormalizeUTF8ToUTF32()

static void tesseract::NormalizeUTF8ToUTF32 ( UnicodeNormMode  u_mode,
OCRNorm  ocr_normalize,
const char *  str8,
std::vector< char32 > *  normed32 
)
static

◆ NumTouchingIntersections()

static int tesseract::NumTouchingIntersections ( Box *  line_box,
Pix *  intersection_pix 
)
static

◆ OCRNormalize()

char32 tesseract::OCRNormalize ( char32  ch)

◆ OKMergeCandidate()

static bool tesseract::OKMergeCandidate ( const ColPartition part,
const ColPartition candidate,
bool  debug 
)
static

◆ OtsuStats()

int tesseract::OtsuStats ( const int *  histogram,
int *  H_out,
int *  omega0_out 
)

◆ OtsuThreshold()

int tesseract::OtsuThreshold ( Pix *  src_pix,
int  left,
int  top,
int  width,
int  height,
int **  thresholds,
int **  hi_values 
)

◆ ParagraphModelByOutline()

static ParagraphModel tesseract::ParagraphModelByOutline ( int  debug_level,
const GenericVector< RowScratchRegisters > *  rows,
int  start,
int  end,
int  tolerance 
)
static

◆ ParamsTrainingFeatureByName()

int tesseract::ParamsTrainingFeatureByName ( const char *  name)

◆ ParseCommandLineFlags()

void tesseract::ParseCommandLineFlags ( const char *  usage,
int *  argc,
char ***  argv,
const bool  remove_flags 
)

◆ prec()

static double tesseract::prec ( double  x)
static

◆ PrepareDistortedPix()

Pix * tesseract::PrepareDistortedPix ( const Pix *  pix,
bool  perspective,
bool  invert,
bool  white_noise,
bool  smooth_noise,
bool  blur,
int  box_reduction,
TRand randomizer,
GenericVector< TBOX > *  boxes 
)

◆ PrintBoxWidths()

static void tesseract::PrintBoxWidths ( BLOBNBOX neighbour)
static

◆ PrintCommandLineFlags()

static void tesseract::PrintCommandLineFlags ( )
static

◆ PrintDetectorState()

static void tesseract::PrintDetectorState ( const ParagraphTheory theory,
const GenericVector< RowScratchRegisters > &  rows 
)
static

◆ PrintMatrixPaths()

static void tesseract::PrintMatrixPaths ( int  col,
int  dim,
const MATRIX ratings,
int  length,
const BLOB_CHOICE **  blob_choices,
const UNICHARSET unicharset,
const char *  label,
FILE *  output_file 
)
static

◆ PrintPath()

static void tesseract::PrintPath ( int  length,
const BLOB_CHOICE **  blob_choices,
const UNICHARSET unicharset,
const char *  label,
FILE *  output_file 
)
static

◆ PrintRowRange()

static void tesseract::PrintRowRange ( const GenericVector< RowScratchRegisters > &  rows,
int  row_start,
int  row_end 
)
static

◆ PrintScriptDirs()

static void tesseract::PrintScriptDirs ( const GenericVector< StrongScriptDirection > &  dirs)
static

◆ PrintTable()

static void tesseract::PrintTable ( const GenericVector< GenericVector< STRING > > &  rows,
const STRING colsep 
)
static

◆ ProjectiveCoeffs()

int tesseract::ProjectiveCoeffs ( int  width,
int  height,
TRand randomizer,
float **  im_coeffs,
float **  box_coeffs 
)

◆ PSM_BLOCK_FIND_ENABLED()

bool tesseract::PSM_BLOCK_FIND_ENABLED ( int  pageseg_mode)
inline

◆ PSM_COL_FIND_ENABLED()

bool tesseract::PSM_COL_FIND_ENABLED ( int  pageseg_mode)
inline

◆ PSM_LINE_FIND_ENABLED()

bool tesseract::PSM_LINE_FIND_ENABLED ( int  pageseg_mode)
inline

◆ PSM_ORIENTATION_ENABLED()

bool tesseract::PSM_ORIENTATION_ENABLED ( int  pageseg_mode)
inline

◆ PSM_OSD_ENABLED()

bool tesseract::PSM_OSD_ENABLED ( int  pageseg_mode)
inline

Inline functions that act on a PageSegMode to determine whether components of layout analysis are enabled. Depend critically on the order of elements of PageSegMode. NOTE that arg is an int for compatibility with INT_PARAM.

◆ PSM_SPARSE()

bool tesseract::PSM_SPARSE ( int  pageseg_mode)
inline

◆ PSM_WORD_FIND_ENABLED()

bool tesseract::PSM_WORD_FIND_ENABLED ( int  pageseg_mode)
inline

◆ RadicalPreHash()

static int tesseract::RadicalPreHash ( const std::vector< int > &  rs)
static

◆ RandBool()

static bool tesseract::RandBool ( const double  prob,
TRand rand 
)
static

◆ rating_to_cost()

static float tesseract::rating_to_cost ( float  rating)
static

◆ read_info()

bool tesseract::read_info ( TFile f,
FontInfo fi 
)

◆ read_set()

bool tesseract::read_set ( TFile f,
FontSet fs 
)

◆ read_spacing_info()

bool tesseract::read_spacing_info ( TFile f,
FontInfo fi 
)

◆ read_t()

static bool tesseract::read_t ( PAGE_RES_IT page_res_it,
TBOX tbox 
)
static

◆ ReadFile()

STRING tesseract::ReadFile ( const std::string &  filename,
FileReader  reader 
)

◆ ReCachePagesFunc()

void* tesseract::ReCachePagesFunc ( void *  data)

◆ RecomputeMarginsAndClearHypotheses()

void tesseract::RecomputeMarginsAndClearHypotheses ( GenericVector< RowScratchRegisters > *  rows,
int  start,
int  end,
int  percentile 
)

◆ ReflectBlobList()

static void tesseract::ReflectBlobList ( BLOBNBOX_LIST *  bblobs)
static

◆ ReleaseAllBlobsAndDeleteUnused()

static void tesseract::ReleaseAllBlobsAndDeleteUnused ( BLOBNBOX_LIST *  blobs)
static

◆ RemoveBadBox()

static void tesseract::RemoveBadBox ( BLOBNBOX box,
ColPartition part,
ColPartition_LIST *  part_list 
)
static

◆ RemoveEnclosingCircle()

static Pix* tesseract::RemoveEnclosingCircle ( Pix *  pixs)
static

◆ RemoveUnusedLineSegments()

static void tesseract::RemoveUnusedLineSegments ( bool  horizontal_lines,
BLOBNBOX_LIST *  line_bblobs,
Pix *  line_pix 
)
static

◆ RightWordAttributes()

void tesseract::RightWordAttributes ( const UNICHARSET unicharset,
const WERD_CHOICE werd,
const STRING utf8,
bool *  is_list,
bool *  starts_idea,
bool *  ends_idea 
)

◆ RotateAndExplodeBlobList()

static void tesseract::RotateAndExplodeBlobList ( const FCOORD blob_rotation,
BLOBNBOX_LIST *  bblobs,
STATS widths,
STATS heights 
)
static

◆ RowIsStranded()

static bool tesseract::RowIsStranded ( const GenericVector< RowScratchRegisters > &  rows,
int  row 
)
static

◆ RowsFitModel()

bool tesseract::RowsFitModel ( const GenericVector< RowScratchRegisters > *  rows,
int  start,
int  end,
const ParagraphModel model 
)

◆ RtlEmbed()

static STRING tesseract::RtlEmbed ( const STRING word,
bool  rtlify 
)
static

◆ SafeAtod()

static bool tesseract::SafeAtod ( const char *  str,
double *  val 
)
static

◆ SafeAtoi()

static bool tesseract::SafeAtoi ( const char *  str,
int *  val 
)
static

◆ SaveDataToFile()

bool tesseract::SaveDataToFile ( const GenericVector< char > &  data,
const STRING filename 
)
inline

◆ ScanForOverlappingText()

static bool tesseract::ScanForOverlappingText ( ColPartitionGrid part_grid,
TBOX box 
)
static

◆ ScanParentsForCaseMix()

static void tesseract::ScanParentsForCaseMix ( const UNICHARSET unicharset,
LanguageModelState parent_node 
)
static

Helper scans the collection of predecessors for competing siblings that have the same letter with the opposite case, setting competing_vse.

◆ ScriptPosToString()

const char * tesseract::ScriptPosToString ( enum ScriptPos  script_pos)

◆ SelectBestWords()

static int tesseract::SelectBestWords ( double  rating_ratio,
double  certainty_margin,
bool  debug,
PointerVector< WERD_RES > *  new_words,
PointerVector< WERD_RES > *  best_words 
)
static

◆ SeparateSimpleLeaderLines()

static void tesseract::SeparateSimpleLeaderLines ( GenericVector< RowScratchRegisters > *  rows,
int  row_start,
int  row_end,
ParagraphTheory theory 
)
static

◆ Serialize() [1/8]

bool tesseract::Serialize ( FILE *  fp,
const char *  data,
size_t  n 
)

◆ Serialize() [2/8]

bool tesseract::Serialize ( FILE *  fp,
const float *  data,
size_t  n 
)

◆ Serialize() [3/8]

bool tesseract::Serialize ( FILE *  fp,
const int8_t *  data,
size_t  n 
)

◆ Serialize() [4/8]

bool tesseract::Serialize ( FILE *  fp,
const int16_t *  data,
size_t  n 
)

◆ Serialize() [5/8]

bool tesseract::Serialize ( FILE *  fp,
const int32_t *  data,
size_t  n 
)

◆ Serialize() [6/8]

bool tesseract::Serialize ( FILE *  fp,
const uint8_t *  data,
size_t  n 
)

◆ Serialize() [7/8]

bool tesseract::Serialize ( FILE *  fp,
const uint16_t *  data,
size_t  n 
)

◆ Serialize() [8/8]

bool tesseract::Serialize ( FILE *  fp,
const uint32_t *  data,
size_t  n 
)

◆ SetBlobStrokeWidth()

void tesseract::SetBlobStrokeWidth ( Pix *  pix,
BLOBNBOX blob 
)

◆ SetBoolFlagValue()

static void tesseract::SetBoolFlagValue ( const char *  flag_name,
const bool  new_val 
)
static

◆ SetDoubleFlagValue()

static void tesseract::SetDoubleFlagValue ( const char *  flag_name,
const double  new_val 
)
static

◆ SetIntFlagValue()

static void tesseract::SetIntFlagValue ( const char *  flag_name,
const int32_t  new_val 
)
static

◆ SetPropertiesForInputFile()

void tesseract::SetPropertiesForInputFile ( const std::string &  script_dir,
const std::string &  input_unicharset_file,
const std::string &  output_unicharset_file,
const std::string &  output_xheights_file 
)

◆ SetScriptProperties()

void tesseract::SetScriptProperties ( const std::string &  script_dir,
UNICHARSET unicharset 
)

◆ SetStringFlagValue()

static void tesseract::SetStringFlagValue ( const char *  flag_name,
const char *  new_val 
)
static

◆ SetupBasicProperties() [1/2]

void tesseract::SetupBasicProperties ( bool  report_errors,
UNICHARSET unicharset 
)
inline

◆ SetupBasicProperties() [2/2]

void tesseract::SetupBasicProperties ( bool  report_errors,
bool  decompose,
UNICHARSET unicharset 
)

◆ ShouldIgnoreFontFamilyName()

static bool tesseract::ShouldIgnoreFontFamilyName ( const char *  query)
static

◆ SkipChars() [1/2]

static const char* tesseract::SkipChars ( const char *  str,
const char *  toskip 
)
static

◆ SkipChars() [2/2]

static const char* tesseract::SkipChars ( const char *  str,
bool(*)(int)  skip 
)
static

◆ SkipOne()

static const char* tesseract::SkipOne ( const char *  str,
const char *  toskip 
)
static

◆ SkipWhitespace()

static void tesseract::SkipWhitespace ( char **  str)
static

◆ SoftmaxInPlace()

template<typename T >
void tesseract::SoftmaxInPlace ( int  n,
T *  inout 
)
inline

◆ sort_cmp()

template<typename T >
int tesseract::sort_cmp ( const void *  t1,
const void *  t2 
)

◆ sort_ptr_cmp()

template<typename T >
int tesseract::sort_ptr_cmp ( const void *  t1,
const void *  t2 
)

◆ sort_strings_by_dec_length()

static int tesseract::sort_strings_by_dec_length ( const void *  v1,
const void *  v2 
)
static

◆ SortByBoxBottom()

template<class BBC >
int tesseract::SortByBoxBottom ( const void *  void1,
const void *  void2 
)

◆ SortByBoxLeft()

template<class BBC >
int tesseract::SortByBoxLeft ( const void *  void1,
const void *  void2 
)

◆ SortByRating()

template<class BLOB_CHOICE >
int tesseract::SortByRating ( const void *  void1,
const void *  void2 
)

◆ SortByUnicharID()

template<class BLOB_CHOICE >
int tesseract::SortByUnicharID ( const void *  void1,
const void *  void2 
)

◆ SortCPByBottom()

static int tesseract::SortCPByBottom ( const void *  p1,
const void *  p2 
)
static

◆ SortCPByHeight()

static int tesseract::SortCPByHeight ( const void *  p1,
const void *  p2 
)
static

◆ SortCPByTopReverse()

static int tesseract::SortCPByTopReverse ( const void *  p1,
const void *  p2 
)
static

◆ SortRightToLeft()

template<class BBC >
int tesseract::SortRightToLeft ( const void *  void1,
const void *  void2 
)

◆ SpanUTF8NotWhitespace()

unsigned int tesseract::SpanUTF8NotWhitespace ( const char *  text)

◆ SpanUTF8Whitespace()

unsigned int tesseract::SpanUTF8Whitespace ( const char *  text)

◆ StringFlagExists()

static bool tesseract::StringFlagExists ( const char *  flag_name,
const char **  value 
)
static

◆ StringReplace()

static std::string tesseract::StringReplace ( const std::string &  in,
const std::string &  oldsub,
const std::string &  newsub 
)
static

◆ StripJoiners()

static void tesseract::StripJoiners ( std::vector< char32 > *  str32)
static

◆ StrOf()

static STRING tesseract::StrOf ( int  num)
static

◆ StrongEvidenceClassify()

static void tesseract::StrongEvidenceClassify ( int  debug_level,
GenericVector< RowScratchRegisters > *  rows,
int  row_start,
int  row_end,
ParagraphTheory theory 
)
static

◆ StrongModel()

bool tesseract::StrongModel ( const ParagraphModel model)
inline

◆ SubtractLinesAndResidue()

static void tesseract::SubtractLinesAndResidue ( Pix *  line_pix,
Pix *  non_line_pix,
int  resolution,
Pix *  src_pix 
)
static

◆ SumVectors()

void tesseract::SumVectors ( int  n,
const double *  v1,
const double *  v2,
const double *  v3,
const double *  v4,
const double *  v5,
double *  sum 
)
inline

◆ Tanh()

double tesseract::Tanh ( double  x)
inline

◆ TestCompatibleCandidates()

static bool tesseract::TestCompatibleCandidates ( const ColPartition part,
bool  debug,
ColPartition_CLIST *  candidates 
)
static

◆ TestWeakIntersectedPart()

static bool tesseract::TestWeakIntersectedPart ( const TBOX im_box,
ColPartition_LIST *  part_list,
ColPartition part 
)
static

◆ TextSupportsBreak()

static bool tesseract::TextSupportsBreak ( const RowScratchRegisters before,
const RowScratchRegisters after 
)
static

◆ TraceBlockOnReducedPix()

Pix * tesseract::TraceBlockOnReducedPix ( BLOCK block,
int  gridsize,
ICOORD  bleft,
int *  left,
int *  bottom 
)

◆ TraceOutlineOnReducedPix()

Pix * tesseract::TraceOutlineOnReducedPix ( C_OUTLINE outline,
int  gridsize,
ICOORD  bleft,
int *  left,
int *  bottom 
)

◆ TruncateBoxToMissNonText()

static void tesseract::TruncateBoxToMissNonText ( int  x_middle,
int  y_middle,
bool  split_on_x,
Pix *  nontext_map,
TBOX bbox 
)
static

◆ UnicodeFor()

int tesseract::UnicodeFor ( const UNICHARSET u,
const WERD_CHOICE werd,
int  pos 
)

◆ UniLikelyListItem()

static bool tesseract::UniLikelyListItem ( const UNICHARSET u,
const WERD_CHOICE werd 
)
static

◆ UpdateLeftMargin()

static bool tesseract::UpdateLeftMargin ( const ColPartition part,
int *  margin_left,
int *  margin_right 
)
static

◆ UpdateRightMargin()

static bool tesseract::UpdateRightMargin ( const ColPartition part,
int *  margin_left,
int *  margin_right 
)
static

◆ UpperQuartileCJKSize()

static int tesseract::UpperQuartileCJKSize ( int  gridsize,
BLOBNBOX_LIST *  blobs 
)
static

◆ ValidBodyLine()

bool tesseract::ValidBodyLine ( const GenericVector< RowScratchRegisters > *  rows,
int  row,
const ParagraphModel model 
)

◆ ValidFirstLine()

bool tesseract::ValidFirstLine ( const GenericVector< RowScratchRegisters > *  rows,
int  row,
const ParagraphModel model 
)

◆ VScanForEdge()

static bool tesseract::VScanForEdge ( uint32_t *  data,
int  wpl,
int  y_start,
int  y_end,
int  min_count,
int  mid_width,
int  max_count,
int  x_end,
int  x_step,
int *  x_start 
)
static

◆ WordGap()

static void tesseract::WordGap ( const PointerVector< WERD_RES > &  words,
int  index,
int *  right,
int *  next_left 
)
static

◆ WordsAcceptable()

static bool tesseract::WordsAcceptable ( const PointerVector< WERD_RES > &  words)
static

◆ write_info()

bool tesseract::write_info ( FILE *  f,
const FontInfo fi 
)

◆ write_set()

bool tesseract::write_set ( FILE *  f,
const FontSet fs 
)

◆ write_spacing_info()

bool tesseract::write_spacing_info ( FILE *  f,
const FontInfo fi 
)

◆ WriteDawg()

static bool tesseract::WriteDawg ( const GenericVector< STRING > &  words,
const UNICHARSET unicharset,
Trie::RTLReversePolicy  reverse_policy,
TessdataType  file_type,
TessdataManager traineddata 
)
static

◆ WriteDawgs()

static bool tesseract::WriteDawgs ( const GenericVector< STRING > &  words,
const GenericVector< STRING > &  puncs,
const GenericVector< STRING > &  numbers,
bool  lang_is_rtl,
const UNICHARSET unicharset,
TessdataManager traineddata 
)
static

◆ WriteFile()

bool tesseract::WriteFile ( const std::string &  output_dir,
const std::string &  lang,
const std::string &  suffix,
const GenericVector< char > &  data,
FileWriter  writer 
)

◆ WriteRecoder()

bool tesseract::WriteRecoder ( const UNICHARSET unicharset,
bool  pass_through,
const std::string &  output_dir,
const std::string &  lang,
FileWriter  writer,
STRING radical_table_data,
TessdataManager traineddata 
)

◆ WriteShapeTable()

void tesseract::WriteShapeTable ( const STRING file_prefix,
const ShapeTable shape_table 
)

◆ WriteUnicharset()

bool tesseract::WriteUnicharset ( const UNICHARSET unicharset,
const std::string &  output_dir,
const std::string &  lang,
FileWriter  writer,
TessdataManager traineddata 
)

◆ YOutlierPieces()

static void tesseract::YOutlierPieces ( WERD_RES word,
int  rebuilt_blob_index,
int  super_y_bottom,
int  sub_y_top,
ScriptPos leading_pos,
int *  num_leading_outliers,
ScriptPos trailing_pos,
int *  num_trailing_outliers 
)
static

Given a recognized blob, see if a contiguous collection of sub-pieces (chopped blobs) starting at its left might qualify as being a subscript or superscript letter based only on y position. Also do this for the right side.

◆ ZeroVector()

template<typename T >
void tesseract::ZeroVector ( int  n,
T *  vec 
)
inline

Variable Documentation

◆ case_state_table

const int tesseract::case_state_table[6][4]
Initial value:
= {
{
0, 1, 5, 4},
{
0, 3, 2, 4},
{
0, -1, 2, -1},
{
0, 3, -1, 4},
{
0, -1, -1, 4},
{
5, -1, 2, -1},
}

◆ kAdamCorrectionIterations

const int tesseract::kAdamCorrectionIterations = 200000

◆ kAdamEpsilon

const double tesseract::kAdamEpsilon = 1e-8

◆ kAdamFlag

const int tesseract::kAdamFlag = 4

◆ kAdjacentLeaderSearchPadding

const int tesseract::kAdjacentLeaderSearchPadding = 2

◆ kAlignedFraction

const double tesseract::kAlignedFraction = 0.03125

◆ kAlignedGapFraction

const double tesseract::kAlignedGapFraction = 0.75

◆ kAlignmentNames

const char* tesseract::kAlignmentNames[]
Initial value:
= {
"Left Aligned",
"Left Ragged",
"Center",
"Right Aligned",
"Right Ragged",
"Separator"
}

◆ kAllowBlobArea

const double tesseract::kAllowBlobArea = 0.05

◆ kAllowBlobHeight

const double tesseract::kAllowBlobHeight = 0.3

◆ kAllowBlobWidth

const double tesseract::kAllowBlobWidth = 0.4

◆ kAllowTextArea

const double tesseract::kAllowTextArea = 0.8

◆ kAllowTextHeight

const double tesseract::kAllowTextHeight = 0.5

◆ kAllowTextWidth

const double tesseract::kAllowTextWidth = 0.6

◆ kAmbigDelimiters

const char tesseract::kAmbigDelimiters[] = "\t "
static

◆ kAmbigNgramSeparator

const char tesseract::kAmbigNgramSeparator[] = { ' ', '\0' }
static

◆ kApostropheLikeUTF8

const char * tesseract::kApostropheLikeUTF8
Initial value:
= {
"'",
"`",
"\u2018",
"\u2019",
"\u2032",
nullptr,
}

◆ kApostropheSymbol

const char tesseract::kApostropheSymbol[] = "'"
static

◆ kBasicBufSize

const int tesseract::kBasicBufSize = 2048
static

◆ kBestCheckpointFraction

const double tesseract::kBestCheckpointFraction = 31.0 / 32.0

◆ kBigPartSizeRatio

const double tesseract::kBigPartSizeRatio = 1.75

◆ kBlobTypes

char tesseract::kBlobTypes[BRT_COUNT+1] = "NHSRIUVT"
static

◆ kBoxClipTolerance

const int tesseract::kBoxClipTolerance = 2

◆ kBrokenCJKIterationFraction

const double tesseract::kBrokenCJKIterationFraction = 0.125

◆ kBytesPer64BitNumber

const int tesseract::kBytesPer64BitNumber = 20

Max bytes in the decimal representation of int64_t.

◆ kBytesPerBoxFileLine

const int tesseract::kBytesPerBoxFileLine = (kBytesPerNumber + 1) * kNumbersPerBlob + 1

Multiplier for max expected textlength assumes (kBytesPerNumber + space)

  • kNumbersPerBlob plus the newline. Add to this the original UTF8 characters, and one kMaxBytesPerLine for safety.

◆ kBytesPerNumber

const int tesseract::kBytesPerNumber = 5

The number of bytes taken by each number. Since we use int16_t for ICOORD, assume only 5 digits max.

◆ kCellSplitColumnThreshold

const int tesseract::kCellSplitColumnThreshold = 0

◆ kCellSplitRowThreshold

const int tesseract::kCellSplitRowThreshold = 0

◆ kCertaintyScale

const float tesseract::kCertaintyScale = 7.0f

◆ kCertOffset

const double tesseract::kCertOffset = -0.085

◆ kCharWidth

const int tesseract::kCharWidth = 2
static

◆ kCJKAspectRatio

const double tesseract::kCJKAspectRatio = 1.25

◆ kCJKAspectRatioIncrease

const double tesseract::kCJKAspectRatioIncrease = 1.0625

◆ kCJKBrokenDistanceFraction

const double tesseract::kCJKBrokenDistanceFraction = 0.25

◆ kCJKMaxComponents

const int tesseract::kCJKMaxComponents = 8

◆ kCJKRadius

const int tesseract::kCJKRadius = 2

◆ kColumnWidthFactor

const int tesseract::kColumnWidthFactor = 20

Pixel resolution of column width estimates.

◆ kCosMaxSkewAngle

const double tesseract::kCosMaxSkewAngle = 0.866025

◆ kCrackSpacing

const int tesseract::kCrackSpacing = 100

Spacing of cracks across the page to break up tall vertical lines.

◆ kCrownLeft

const ParagraphModel * tesseract::kCrownLeft = reinterpret_cast<ParagraphModel *>(0xDEAD111F)

◆ kCrownRight

const ParagraphModel * tesseract::kCrownRight = reinterpret_cast<ParagraphModel *>(0xDEAD888F)

◆ kDawgSuccessors

const bool tesseract::kDawgSuccessors[DAWG_TYPE_COUNT][DAWG_TYPE_COUNT]
static
Initial value:
= {
{ 0, 1, 1, 0 },
{ 1, 0, 0, 0 },
{ 1, 0, 0, 0 },
{ 0, 0, 0, 0 },
}

◆ kDefaultOutputResolution

const int tesseract::kDefaultOutputResolution = 300
static

◆ kDefaultResolution

const int tesseract::kDefaultResolution = 300

◆ kDiacriticXPadRatio

const double tesseract::kDiacriticXPadRatio = 7.0

◆ kDiacriticYPadRatio

const double tesseract::kDiacriticYPadRatio = 1.75

◆ kDictMaxWildcards

const int tesseract::kDictMaxWildcards = 2
static

◆ kDictRatio

const double tesseract::kDictRatio = 2.25

◆ kDictWildcard

const char tesseract::kDictWildcard[] = "\u2606"
static

◆ kDocDictMaxRepChars

const int tesseract::kDocDictMaxRepChars = 4
static

◆ kDoNotReverse

const char tesseract::kDoNotReverse[] = "RRP_DO_NO_REVERSE"

◆ kDoubleFlag

const int tesseract::kDoubleFlag = 128

◆ kErrClip

const double tesseract::kErrClip = 1.0f

◆ kErrorGraphInterval

const int tesseract::kErrorGraphInterval = 1000

◆ kExposureFactor

const int tesseract::kExposureFactor = 16

◆ kFeaturePadding

const int tesseract::kFeaturePadding = 2

◆ kFontMergeDistance

const float tesseract::kFontMergeDistance = 0.025

◆ kForceReverse

const char tesseract::kForceReverse[] = "RRP_FORCE_REVERSE"

◆ kGoodRowNumberOfColumnsLarge

const double tesseract::kGoodRowNumberOfColumnsLarge = 0.7

◆ kGoodRowNumberOfColumnsSmall

const double tesseract::kGoodRowNumberOfColumnsSmall[] = { 2, 2, 2, 2, 2, 3, 3 }

◆ kGoodRowNumberOfColumnsSmallSize

const int tesseract::kGoodRowNumberOfColumnsSmallSize
Initial value:
=
sizeof(kGoodRowNumberOfColumnsSmall) / sizeof(double) - 1
const double kGoodRowNumberOfColumnsSmall[]
Definition: tablerecog.cpp:56

◆ kGutterMultiple

const int tesseract::kGutterMultiple = 4

◆ kGutterToNeighbourRatio

const int tesseract::kGutterToNeighbourRatio = 3

◆ kHighConfidence

const double tesseract::kHighConfidence = 0.9375

◆ kHistogramBuckets

const int tesseract::kHistogramBuckets = 16

◆ kHistogramSize

const int tesseract::kHistogramSize = 256

◆ kHorizontalGapMergeFraction

const double tesseract::kHorizontalGapMergeFraction = 0.5

◆ kHorizontalSpacing

const double tesseract::kHorizontalSpacing = 0.30

◆ kHorzStrongTextlineAspect

const int tesseract::kHorzStrongTextlineAspect = 5

◆ kHorzStrongTextlineCount

const int tesseract::kHorzStrongTextlineCount = 8

◆ kHorzStrongTextlineHeight

const int tesseract::kHorzStrongTextlineHeight = 10

◆ kHyphenLikeUTF8

const char * tesseract::kHyphenLikeUTF8
Initial value:
= {
"-",
"\u05BE",
"\u2010",
"\u2011",
"\u2012",
"\u2013",
"\u2014",
"\u2015",
"\u2212",
"\uFE58",
"\uFE63",
"\uFF0D",
nullptr,
}

The following are confusable internal word punctuation symbols which we normalize to the first variant when matching in dawgs.

◆ kHyphenSymbol

const char tesseract::kHyphenSymbol[] = "-"
static

◆ kIllegalMsg

const char tesseract::kIllegalMsg[]
static
Initial value:
=
"Illegal ambiguity specification on line %d\n"

◆ kIllegalUnicharMsg

const char tesseract::kIllegalUnicharMsg[]
static
Initial value:
=
"Illegal unichar %s in ambiguity specification\n"

◆ kImagePadding

const int tesseract::kImagePadding = 4

◆ kImprovementFraction

const double tesseract::kImprovementFraction = 15.0 / 16.0

◆ kInfiniteDist

const float tesseract::kInfiniteDist = 999.0f

◆ kInputFile

const char* tesseract::kInputFile = "noname.tif"

Filename used for input image file, from which to derive a name to search for a possible UNLV zone file, if none is specified by SetInputName.

◆ kInt8Flag

const int tesseract::kInt8Flag = 1

◆ kLargeTableProjectionThreshold

const double tesseract::kLargeTableProjectionThreshold = 0.45

◆ kLargeTableRowCount

const int tesseract::kLargeTableRowCount = 6

◆ kLatinChs

const int tesseract::kLatinChs[]
Initial value:
= {
0x00a2, 0x0022, 0x0022, 0x0027, 0x0027, 0x00b7, 0x002d, 0
}

Latin chars corresponding to the unicode chars above.

◆ kLearningRateDecay

const double tesseract::kLearningRateDecay = sqrt(0.5)

◆ kLeftIndentAlignmentCountTh

const int tesseract::kLeftIndentAlignmentCountTh = 1

◆ kLineCountReciprocal

const double tesseract::kLineCountReciprocal = 4.0

◆ kLinedTableMinHorizontalLines

const int tesseract::kLinedTableMinHorizontalLines = 3

◆ kLinedTableMinVerticalLines

const int tesseract::kLinedTableMinVerticalLines = 3

◆ kLineFindGridSize

const int tesseract::kLineFindGridSize = 50

Grid size used by line finder. Not very critical.

◆ kLineFragmentAspectRatio

const double tesseract::kLineFragmentAspectRatio = 10.0

◆ kLineResidueAspectRatio

const double tesseract::kLineResidueAspectRatio = 8.0

◆ kLineResiduePadRatio

const int tesseract::kLineResiduePadRatio = 3

◆ kLineResidueSizeRatio

const double tesseract::kLineResidueSizeRatio = 1.75

◆ kLineTrapLongest

const int tesseract::kLineTrapLongest = 4

◆ kLineTrapShortest

const int tesseract::kLineTrapShortest = 2

◆ kLRM

const char * tesseract::kLRM = "\u200E"

◆ kMarginFactor

const double tesseract::kMarginFactor = 1.1

◆ kMarginOverlapFraction

const double tesseract::kMarginOverlapFraction = 0.25

◆ kMathDigitDensityTh1

const float tesseract::kMathDigitDensityTh1 = 0.25

◆ kMathDigitDensityTh2

const float tesseract::kMathDigitDensityTh2 = 0.1

◆ kMathItalicDensityTh

const float tesseract::kMathItalicDensityTh = 0.5

◆ kMaxAmbigStringSize

const int tesseract::kMaxAmbigStringSize = 30 * ( 10 + 1)

◆ kMaxBaselineError

const double tesseract::kMaxBaselineError = 0.4375

◆ kMaxBlobOverlapFactor

const double tesseract::kMaxBlobOverlapFactor = 4.0

◆ kMaxBlobWidth

const int tesseract::kMaxBlobWidth = 500

◆ kMaxBoxEdgeDiff

const int16_t tesseract::kMaxBoxEdgeDiff = 2

◆ kMaxBoxesInDataPartition

const int tesseract::kMaxBoxesInDataPartition = 20

◆ kMaxBytesPerCodepoint

const int tesseract::kMaxBytesPerCodepoint = 20
static

◆ kMaxBytesPerLine

const int tesseract::kMaxBytesPerLine
Initial value:
UNICHAR_LEN
const int kNumbersPerBlob
Definition: baseapi.cpp:1815
const int kBytesPer64BitNumber
Definition: baseapi.cpp:1828

A maximal single box could occupy kNumbersPerBlob numbers at kBytesPer64BitNumber digits (if someone sneaks in a 64 bit value) and a space plus the newline and the maximum length of a UNICHAR. Test against this on each iteration for safety.

◆ kMaxCaptionLines

const int tesseract::kMaxCaptionLines = 7

◆ kMaxCharTopRange

const int tesseract::kMaxCharTopRange = 48

◆ kMaxCircleErosions

const int tesseract::kMaxCircleErosions = 8

◆ kMaxCJKSizeRatio

const int tesseract::kMaxCJKSizeRatio = 5

◆ kMaxColorDistance

const int tesseract::kMaxColorDistance = 900

◆ kMaxColumnHeaderDistance

const int tesseract::kMaxColumnHeaderDistance = 4

◆ kMaxDiacriticDistanceRatio

const double tesseract::kMaxDiacriticDistanceRatio = 1.25

◆ kMaxDiacriticGapToBaseCharHeight

const double tesseract::kMaxDiacriticGapToBaseCharHeight = 1.0

◆ kMaxDistToPartSizeRatio

const double tesseract::kMaxDistToPartSizeRatio = 1.5

◆ kMaxFillinMultiple

const int tesseract::kMaxFillinMultiple = 11

◆ kMaxFinalCost

const float tesseract::kMaxFinalCost = 100.0f
static

◆ kMaxGapInTextPartition

const double tesseract::kMaxGapInTextPartition = 4.0

◆ kMaxGutterWidthAbsolute

const double tesseract::kMaxGutterWidthAbsolute = 2.00

◆ kMaxIncompatibleColumnCount

const int tesseract::kMaxIncompatibleColumnCount = 2

◆ kMaxInputHeight

const int tesseract::kMaxInputHeight = 48

◆ kMaxIntSize

const int tesseract::kMaxIntSize = 22

Max string length of an int.

◆ kMaxLargeOverlapsWithMedium

const int tesseract::kMaxLargeOverlapsWithMedium = 12

◆ kMaxLargeOverlapsWithSmall

const int tesseract::kMaxLargeOverlapsWithSmall = 3

◆ kMaxLeaderGapFractionOfMax

const double tesseract::kMaxLeaderGapFractionOfMax = 0.25

◆ kMaxLeaderGapFractionOfMin

const double tesseract::kMaxLeaderGapFractionOfMin = 0.5

◆ kMaxLigature

const int tesseract::kMaxLigature = 0xfb17

◆ kMaxLineLength

const int tesseract::kMaxLineLength = 1024

◆ kMaxLineResidue

const int tesseract::kMaxLineResidue = 6

◆ kMaxMediumOverlapsWithSmall

const int tesseract::kMaxMediumOverlapsWithSmall = 12

◆ kMaxMediumWordUnichars

const int tesseract::kMaxMediumWordUnichars = 6
static

◆ kMaxNeighbourDistFactor

const int tesseract::kMaxNeighbourDistFactor = 4

◆ kMaxNonLineDensity

const double tesseract::kMaxNonLineDensity = 0.25

◆ kMaxNumTessdataEntries

const int tesseract::kMaxNumTessdataEntries = 1000
static

TessdataType could be updated to contain more entries, however we do not expect that number to be astronomically high. In order to automatically detect endianness TessdataManager will flip the bits if actual_tessdata_num_entries_ is larger than kMaxNumTessdataEntries.

◆ kMaxOffsetDist

const int tesseract::kMaxOffsetDist = 32

◆ kMaxPadFactor

const int tesseract::kMaxPadFactor = 6

◆ kMaxParagraphEndingLeftSpaceMultiple

const double tesseract::kMaxParagraphEndingLeftSpaceMultiple = 3.0

◆ kMaxPartitionSpacing

const double tesseract::kMaxPartitionSpacing = 1.75

◆ kMaxRaggedSearch

const int tesseract::kMaxRaggedSearch = 25

◆ kMaxRealDistance

const int tesseract::kMaxRealDistance = 2.0

◆ kMaxRectangularFraction

const double tesseract::kMaxRectangularFraction = 0.75

◆ kMaxRectangularGradient

const double tesseract::kMaxRectangularGradient = 0.1

◆ kMaxRMSColorNoise

const int tesseract::kMaxRMSColorNoise = 128

◆ kMaxRowSize

const double tesseract::kMaxRowSize = 2.5

◆ kMaxSameBlockLineSpacing

const double tesseract::kMaxSameBlockLineSpacing = 3

◆ kMaxSizeRatio

const double tesseract::kMaxSizeRatio = 1.5

◆ kMaxSkewFactor

const int tesseract::kMaxSkewFactor = 15

◆ kMaxSmallNeighboursPerPix

const double tesseract::kMaxSmallNeighboursPerPix = 1.0 / 32

◆ kMaxSmallWordUnichars

const int tesseract::kMaxSmallWordUnichars = 3
static

◆ kMaxSpacingDrift

const double tesseract::kMaxSpacingDrift = 1.0 / 72

◆ kMaxStaveHeight

const double tesseract::kMaxStaveHeight = 1.0

◆ kMaxTableCellXheight

const double tesseract::kMaxTableCellXheight = 2.0

◆ kMaxTopSpacingFraction

const double tesseract::kMaxTopSpacingFraction = 0.25

◆ kMaxUnicharsPerCluster

const int tesseract::kMaxUnicharsPerCluster = 2000

◆ kMaxVerticalSearch

const int tesseract::kMaxVerticalSearch = 12

◆ kMaxVerticalSpacing

const int tesseract::kMaxVerticalSpacing = 500

◆ kMaxWinSize

const int tesseract::kMaxWinSize = 2000

◆ kMaxXProjectionGapFactor

const double tesseract::kMaxXProjectionGapFactor = 2.0

◆ kMinAbsoluteGarbageAlphanumFrac

const float tesseract::kMinAbsoluteGarbageAlphanumFrac = 0.5f
static

◆ kMinAbsoluteGarbageWordLength

const int tesseract::kMinAbsoluteGarbageWordLength = 10
static

◆ kMinAlignedGutter

const double tesseract::kMinAlignedGutter = 0.25

◆ kMinAlignedTabs

const int tesseract::kMinAlignedTabs = 4

◆ kMinBaselineCoverage

const double tesseract::kMinBaselineCoverage = 0.5

◆ kMinBoxesInTextPartition

const int tesseract::kMinBoxesInTextPartition = 10

◆ kMinCaptionGapHeightRatio

const double tesseract::kMinCaptionGapHeightRatio = 0.5

◆ kMinCaptionGapRatio

const double tesseract::kMinCaptionGapRatio = 2.0

◆ kMinCertainty

const float tesseract::kMinCertainty = -20.0f

◆ kMinChainTextValue

const int tesseract::kMinChainTextValue = 3

◆ kMinClusteredShapes

const int tesseract::kMinClusteredShapes = 1

◆ kMinColorDifference

const int tesseract::kMinColorDifference = 16

◆ kMinColumnWidth

const int tesseract::kMinColumnWidth = 2.0 / 3

◆ kMinDiacriticSizeRatio

const double tesseract::kMinDiacriticSizeRatio = 1.0625

◆ kMinDivergenceRate

const double tesseract::kMinDivergenceRate = 50.0

◆ kMinEvaluatedTabs

const int tesseract::kMinEvaluatedTabs = 3

◆ kMinFilledArea

const double tesseract::kMinFilledArea = 0.35

◆ kMinFinalCost

const float tesseract::kMinFinalCost = 0.001f
static

◆ kMinFractionalLinesInColumn

const double tesseract::kMinFractionalLinesInColumn = 0.125

◆ kMinGoodTextPARatio

const double tesseract::kMinGoodTextPARatio = 1.5

◆ kMinGutterFraction

const double tesseract::kMinGutterFraction = 0.5

◆ kMinGutterWidthGrid

const double tesseract::kMinGutterWidthGrid = 0.5

◆ kMinImageFindSize

const int tesseract::kMinImageFindSize = 100

◆ kMinLeaderCount

const int tesseract::kMinLeaderCount = 5

◆ kMinLigature

const int tesseract::kMinLigature = 0xfb00

◆ kMinLineLengthFraction

const int tesseract::kMinLineLengthFraction = 4

Denominator of resolution makes min pixels to demand line lengths to be.

◆ kMinLinesInColumn

const int tesseract::kMinLinesInColumn = 10

◆ kMinMaxGapInTextPartition

const double tesseract::kMinMaxGapInTextPartition = 0.5

◆ kMinMusicPixelFraction

const double tesseract::kMinMusicPixelFraction = 0.75

◆ kMinOverlapWithTable

const double tesseract::kMinOverlapWithTable = 0.6

◆ kMinParagraphEndingTextToWhitespaceRatio

const double tesseract::kMinParagraphEndingTextToWhitespaceRatio = 3.0

◆ kMinPointsForErrorCount

const int tesseract::kMinPointsForErrorCount = 16

◆ kMinProb

const float tesseract::kMinProb = exp(kMinCertainty)

◆ kMinRaggedGutter

const double tesseract::kMinRaggedGutter = 1.5

◆ kMinRaggedTabs

const int tesseract::kMinRaggedTabs = 5

◆ kMinRampSize

const int tesseract::kMinRampSize = 1000

◆ kMinRectangularFraction

const double tesseract::kMinRectangularFraction = 0.125

◆ kMinRectSize

const int tesseract::kMinRectSize = 10

Minimum sensible image size to be worth running tesseract.

◆ kMinRowsInTable

const int tesseract::kMinRowsInTable = 3

◆ kMinStallIterations

const int tesseract::kMinStallIterations = 10000

◆ kMinStartedErrorRate

const int tesseract::kMinStartedErrorRate = 75

◆ kMinStrongTextValue

const int tesseract::kMinStrongTextValue = 6

◆ kMinTabGradient

const double tesseract::kMinTabGradient = 4.0

◆ kMinThickLineWidth

const int tesseract::kMinThickLineWidth = 12

◆ kMinVerticalSearch

const int tesseract::kMinVerticalSearch = 3

◆ kMinWinSize

const int tesseract::kMinWinSize = 500

◆ kMostlyOneDirRatio

const int tesseract::kMostlyOneDirRatio = 3

◆ kNeighbourSearchFactor

const double tesseract::kNeighbourSearchFactor = 2.5

◆ kNodeContNames

const char* tesseract::kNodeContNames[] = {"Anything", "OnlyDup", "NoDup"}

◆ kNoiseOverlapAreaFactor

const double tesseract::kNoiseOverlapAreaFactor = 1.0 / 512

◆ kNoiseOverlapGrowthFactor

const double tesseract::kNoiseOverlapGrowthFactor = 4.0

◆ kNoisePadding

const int tesseract::kNoisePadding = 4

◆ kNullChar

const char* tesseract::kNullChar = "<nul>"

◆ kNumAdjustmentIterations

const int tesseract::kNumAdjustmentIterations = 100

◆ kNumbersPerBlob

const int tesseract::kNumbersPerBlob = 5

The 5 numbers output for each box (the usual 4 and a page number.)

◆ kNumCNParams

const int tesseract::kNumCNParams = 4
static

◆ kNumEndPoints

const int tesseract::kNumEndPoints = 3

◆ kNumOffsetMaps

const int tesseract::kNumOffsetMaps = 2
static

◆ kNumPagesPerBatch

const int tesseract::kNumPagesPerBatch = 100

◆ kOldVarsFile

const char* tesseract::kOldVarsFile = "failed_vars.txt"

Temp file used for storing current parameters before applying retry values.

◆ kOriginalNoiseMultiple

const int tesseract::kOriginalNoiseMultiple = 8

◆ kParagraphEndingPreviousLineRatio

const double tesseract::kParagraphEndingPreviousLineRatio = 1.3

◆ kParamsTrainingFeatureTypeName

const char* const tesseract::kParamsTrainingFeatureTypeName[]
static
Initial value:
= {
"PTRAIN_DIGITS_SHORT",
"PTRAIN_DIGITS_MED",
"PTRAIN_DIGITS_LONG",
"PTRAIN_NUM_SHORT",
"PTRAIN_NUM_MED",
"PTRAIN_NUM_LONG",
"PTRAIN_DOC_SHORT",
"PTRAIN_DOC_MED",
"PTRAIN_DOC_LONG",
"PTRAIN_DICT_SHORT",
"PTRAIN_DICT_MED",
"PTRAIN_DICT_LONG",
"PTRAIN_FREQ_SHORT",
"PTRAIN_FREQ_MED",
"PTRAIN_FREQ_LONG",
"PTRAIN_SHAPE_COST_PER_CHAR",
"PTRAIN_NGRAM_COST_PER_CHAR",
"PTRAIN_NUM_BAD_PUNC",
"PTRAIN_NUM_BAD_CASE",
"PTRAIN_XHEIGHT_CONSISTENCY",
"PTRAIN_NUM_BAD_CHAR_TYPE",
"PTRAIN_NUM_BAD_SPACING",
"PTRAIN_NUM_BAD_FONT",
"PTRAIN_RATING_PER_CHAR",
}

◆ kPDF

const char * tesseract::kPDF = "\u202C"

◆ kPhotoOffsetFraction

const double tesseract::kPhotoOffsetFraction = 0.375

◆ kPrime1

const int tesseract::kPrime1 = 17

◆ kPrime2

const int tesseract::kPrime2 = 13

◆ kQuestionSymbol

const char tesseract::kQuestionSymbol[] = "?"
static

◆ kRadicalRadix

const int tesseract::kRadicalRadix = 29

◆ kRaggedFraction

const double tesseract::kRaggedFraction = 2.5

◆ kRaggedGapFraction

const double tesseract::kRaggedGapFraction = 1.0

◆ kRaggedGutterMultiple

const int tesseract::kRaggedGutterMultiple = 5

◆ kRandomizingCenter

const int tesseract::kRandomizingCenter = 128

◆ kRatingEpsilon

const double tesseract::kRatingEpsilon = 1.0 / 32

◆ kRatingPad

const int tesseract::kRatingPad = 4
static

◆ kRequiredColumns

const double tesseract::kRequiredColumns = 0.7

◆ kReverseIfHasRTL

const char tesseract::kReverseIfHasRTL[] = "RRP_REVERSE_IF_HAS_RTL"

◆ kRGBRMSColors

const int tesseract::kRGBRMSColors = 4

◆ kRLE

const char * tesseract::kRLE = "\u202A"

◆ kRLM

const char * tesseract::kRLM = "\u200F"

◆ kRMSFitScaling

const double tesseract::kRMSFitScaling = 8.0

◆ kRotationRange

const float tesseract::kRotationRange = 0.02f

◆ kRulingVerticalMargin

const int tesseract::kRulingVerticalMargin = 3

◆ kSaltnPepper

const int tesseract::kSaltnPepper = 5

◆ kSampleRandomSize

const int tesseract::kSampleRandomSize = kSampleYShiftSize * kSampleScaleSize - 2
static

◆ kSampleScaleSize

const int tesseract::kSampleScaleSize = 3
static

◆ kSampleYShiftSize

const int tesseract::kSampleYShiftSize = 5
static

◆ kScaleFactor

const double tesseract::kScaleFactor = 256.0

◆ kScoreScaleFactor

const float tesseract::kScoreScaleFactor = 100.0f
static

◆ kSeedBlobsCountTh

const int tesseract::kSeedBlobsCountTh = 10

◆ kSideSpaceMargin

const int tesseract::kSideSpaceMargin = 10

◆ kSimCertaintyOffset

const float tesseract::kSimCertaintyOffset = -10.0
static

◆ kSimCertaintyScale

const float tesseract::kSimCertaintyScale = -10.0
static

◆ kSimilarityFloor

const float tesseract::kSimilarityFloor = 100.0
static

◆ kSimilarRaggedDist

const int tesseract::kSimilarRaggedDist = 50

◆ kSimilarVectorDist

const int tesseract::kSimilarVectorDist = 10

◆ ksizeofUniversalAmbigsFile

const int tesseract::ksizeofUniversalAmbigsFile = sizeof(kUniversalAmbigsFile)

◆ kSizeRatioToReject

const float tesseract::kSizeRatioToReject = 2.0

◆ kSlashSymbol

const char tesseract::kSlashSymbol[] = "/"
static

◆ kSmallTableProjectionThreshold

const double tesseract::kSmallTableProjectionThreshold = 0.35

◆ kSmoothDecisionMargin

const int tesseract::kSmoothDecisionMargin = 4

◆ kSplitPartitionSize

const double tesseract::kSplitPartitionSize = 2.0

◆ kSquareLimit

const int tesseract::kSquareLimit = 25

◆ kStageTransitionThreshold

const double tesseract::kStageTransitionThreshold = 10.0

◆ kStateClip

const double tesseract::kStateClip = 100.0

◆ kStrokeWidthCJK

const double tesseract::kStrokeWidthCJK = 2.0

◆ kStrokeWidthConstantTolerance

const double tesseract::kStrokeWidthConstantTolerance = 2.0

◆ kStrokeWidthFractionalTolerance

const double tesseract::kStrokeWidthFractionalTolerance = 0.25

◆ kStrokeWidthFractionCJK

const double tesseract::kStrokeWidthFractionCJK = 0.25

◆ kStrokeWidthFractionTolerance

const double tesseract::kStrokeWidthFractionTolerance = 0.125

Allowed proportional change in stroke width to be the same font.

◆ kStrokeWidthTolerance

const double tesseract::kStrokeWidthTolerance = 1.5

Allowed constant change in stroke width to be the same font. Really 1.5 pixels.

◆ kSubTrainerMarginFraction

const double tesseract::kSubTrainerMarginFraction = 3.0 / 128

◆ kTableColumnThreshold

const double tesseract::kTableColumnThreshold = 3.0

◆ kTableSize

const int tesseract::kTableSize = 4096

◆ kTabRadiusFactor

const int tesseract::kTabRadiusFactor = 5

◆ kTargetXScale

const int tesseract::kTargetXScale = 5

◆ kTargetYScale

const int tesseract::kTargetYScale = 100

◆ kTessdataFileSuffixes

const char* const tesseract::kTessdataFileSuffixes[]
static
Initial value:
= {
kLangConfigFileSuffix,
kUnicharsetFileSuffix,
kAmbigsFileSuffix,
kBuiltInTemplatesFileSuffix,
kBuiltInCutoffsFileSuffix,
kNormProtoFileSuffix,
kPuncDawgFileSuffix,
kSystemDawgFileSuffix,
kNumberDawgFileSuffix,
kFreqDawgFileSuffix,
kFixedLengthDawgsFileSuffix,
kCubeUnicharsetFileSuffix,
kCubeSystemDawgFileSuffix,
kShapeTableFileSuffix,
kBigramDawgFileSuffix,
kUnambigDawgFileSuffix,
kParamsModelFileSuffix,
kLSTMModelFileSuffix,
kLSTMPuncDawgFileSuffix,
kLSTMSystemDawgFileSuffix,
kLSTMNumberDawgFileSuffix,
kLSTMUnicharsetFileSuffix,
kLSTMRecoderFileSuffix,
kVersionFileSuffix,
}

kTessdataFileSuffixes[i] indicates the file suffix for tessdata of type i (from TessdataType enum).

◆ kTesseractReject

const char tesseract::kTesseractReject = '~'

Character returned when Tesseract couldn't recognize as anything.

◆ kTestChar

const int tesseract::kTestChar = -1

◆ kThickLengthMultiple

const double tesseract::kThickLengthMultiple = 0.75

◆ kThinLineFraction

const int tesseract::kThinLineFraction = 20

Denominator of resolution makes max pixel width to allow thin lines.

◆ kTinyEnoughTextlineOverlapFraction

const double tesseract::kTinyEnoughTextlineOverlapFraction = 0.25

◆ kUnclearDensityTh

const float tesseract::kUnclearDensityTh = 0.25

◆ kUniChs

const int tesseract::kUniChs[]
Initial value:
= {
0x20ac, 0x201c, 0x201d, 0x2018, 0x2019, 0x2022, 0x2014, 0
}

Conversion table for non-latin characters. Maps characters out of the latin set into the latin set. TODO(rays) incorporate this translation into unicharset.

◆ kUnigramAmbigsBufferSize

const int tesseract::kUnigramAmbigsBufferSize = 1000
static

◆ kUniversalAmbigsFile

const char tesseract::kUniversalAmbigsFile

◆ kUNLVReject

const char tesseract::kUNLVReject = '~'

Character used by UNLV error counter as a reject.

◆ kUNLVSuspect

const char tesseract::kUNLVSuspect = '^'

Character used by UNLV as a suspect marker.

◆ kUTF8LineSeparator

const char * tesseract::kUTF8LineSeparator = "\u2028"

◆ kUTF8ParagraphSeparator

const char * tesseract::kUTF8ParagraphSeparator = "\u2029"

◆ kVerticalSpacing

const double tesseract::kVerticalSpacing = -0.2

◆ kVLineAlignment

const int tesseract::kVLineAlignment = 3

◆ kVLineGutter

const int tesseract::kVLineGutter = 1

◆ kVLineMinLength

const int tesseract::kVLineMinLength = 500

◆ kVLineSearchSize

const int tesseract::kVLineSearchSize = 150

◆ kWildcard

const char tesseract::kWildcard[] = "*"
static

◆ kWordJoinerUTF8

const char* tesseract::kWordJoinerUTF8 = "\u2060"
static

◆ kWorstDictCertainty

const float tesseract::kWorstDictCertainty = -25.0f

◆ kXWinFrameSize

const int tesseract::kXWinFrameSize = 30

◆ kYWinFrameSize

const int tesseract::kYWinFrameSize = 80

◆ LMPainPointsTypeName

const char* const tesseract::LMPainPointsTypeName[]
static
Initial value:
= {
"LM_PPTYPE_BLAMER",
"LM_PPTYPE_AMBIGS",
"LM_PPTYPE_PATH",
"LM_PPTYPE_SHAPE",
}

◆ LogisticTable

double tesseract::LogisticTable

◆ RTLReversePolicyNames

const char* const tesseract::RTLReversePolicyNames[]
Initial value:
= {
}
const char kForceReverse[]
Definition: trie.cpp:42
const char kReverseIfHasRTL[]
Definition: trie.cpp:41
const char kDoNotReverse[]
Definition: trie.cpp:40

◆ TanhTable

double tesseract::TanhTable

◆ tprintfMutex

CCUtilMutex tesseract::tprintfMutex

◆ XHeightConsistencyEnumName

const char* const tesseract::XHeightConsistencyEnumName[]
static
Initial value:
= {
"XH_GOOD",
"XH_SUBNORMAL",
"XH_INCONSISTENT",
}