tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
tesseract::UnicharCompress Class Reference

#include <unicharcompress.h>

Collaboration diagram for tesseract::UnicharCompress:

Public Member Functions

 UnicharCompress ()
 
 UnicharCompress (const UnicharCompress &src)
 
 ~UnicharCompress ()
 
UnicharCompressoperator= (const UnicharCompress &src)
 
bool ComputeEncoding (const UNICHARSET &unicharset, int null_id, STRING *radical_stroke_table)
 
void SetupPassThrough (const UNICHARSET &unicharset)
 
void SetupDirect (const GenericVector< RecodedCharID > &codes)
 
int code_range () const
 
int EncodeUnichar (int unichar_id, RecodedCharID *code) const
 
int DecodeUnichar (const RecodedCharID &code) const
 
bool IsValidFirstCode (int code) const
 
const GenericVector< int > * GetNextCodes (const RecodedCharID &code) const
 
const GenericVector< int > * GetFinalCodes (const RecodedCharID &code) const
 
bool Serialize (TFile *fp) const
 
bool DeSerialize (TFile *fp)
 
STRING GetEncodingAsString (const UNICHARSET &unicharset) const
 

Static Public Member Functions

static bool DecomposeHangul (int unicode, int *leading, int *vowel, int *trailing)
 

Static Public Attributes

static const int kFirstHangul = 0xac00
 
static const int kNumHangul = 11172
 
static const int kLCount = 19
 
static const int kVCount = 21
 
static const int kTCount = 28
 

Private Member Functions

void DefragmentCodeValues (int encoded_null)
 
void ComputeCodeRange ()
 
void SetupDecoder ()
 
void Cleanup ()
 

Private Attributes

GenericVector< RecodedCharIDencoder_
 
std::unordered_map< RecodedCharID, int, RecodedCharID::RecodedCharIDHashdecoder_
 
GenericVector< bool > is_valid_start_
 
std::unordered_map< RecodedCharID, GenericVectorEqEq< int > *, RecodedCharID::RecodedCharIDHashnext_codes_
 
std::unordered_map< RecodedCharID, GenericVectorEqEq< int > *, RecodedCharID::RecodedCharIDHashfinal_codes_
 
int code_range_
 

Constructor & Destructor Documentation

◆ UnicharCompress() [1/2]

tesseract::UnicharCompress::UnicharCompress ( )

◆ UnicharCompress() [2/2]

tesseract::UnicharCompress::UnicharCompress ( const UnicharCompress src)

◆ ~UnicharCompress()

tesseract::UnicharCompress::~UnicharCompress ( )

Member Function Documentation

◆ Cleanup()

void tesseract::UnicharCompress::Cleanup ( )
private

◆ code_range()

int tesseract::UnicharCompress::code_range ( ) const
inline

◆ ComputeCodeRange()

void tesseract::UnicharCompress::ComputeCodeRange ( )
private

◆ ComputeEncoding()

bool tesseract::UnicharCompress::ComputeEncoding ( const UNICHARSET unicharset,
int  null_id,
STRING radical_stroke_table 
)

◆ DecodeUnichar()

int tesseract::UnicharCompress::DecodeUnichar ( const RecodedCharID code) const

◆ DecomposeHangul()

bool tesseract::UnicharCompress::DecomposeHangul ( int  unicode,
int *  leading,
int *  vowel,
int *  trailing 
)
static

◆ DefragmentCodeValues()

void tesseract::UnicharCompress::DefragmentCodeValues ( int  encoded_null)
private

◆ DeSerialize()

bool tesseract::UnicharCompress::DeSerialize ( TFile fp)

◆ EncodeUnichar()

int tesseract::UnicharCompress::EncodeUnichar ( int  unichar_id,
RecodedCharID code 
) const

◆ GetEncodingAsString()

STRING tesseract::UnicharCompress::GetEncodingAsString ( const UNICHARSET unicharset) const

◆ GetFinalCodes()

const GenericVector<int>* tesseract::UnicharCompress::GetFinalCodes ( const RecodedCharID code) const
inline

◆ GetNextCodes()

const GenericVector<int>* tesseract::UnicharCompress::GetNextCodes ( const RecodedCharID code) const
inline

◆ IsValidFirstCode()

bool tesseract::UnicharCompress::IsValidFirstCode ( int  code) const
inline

◆ operator=()

UnicharCompress & tesseract::UnicharCompress::operator= ( const UnicharCompress src)

◆ Serialize()

bool tesseract::UnicharCompress::Serialize ( TFile fp) const

◆ SetupDecoder()

void tesseract::UnicharCompress::SetupDecoder ( )
private

◆ SetupDirect()

void tesseract::UnicharCompress::SetupDirect ( const GenericVector< RecodedCharID > &  codes)

◆ SetupPassThrough()

void tesseract::UnicharCompress::SetupPassThrough ( const UNICHARSET unicharset)

Member Data Documentation

◆ code_range_

int tesseract::UnicharCompress::code_range_
private

◆ decoder_

std::unordered_map<RecodedCharID, int, RecodedCharID::RecodedCharIDHash> tesseract::UnicharCompress::decoder_
private

◆ encoder_

GenericVector<RecodedCharID> tesseract::UnicharCompress::encoder_
private

◆ final_codes_

std::unordered_map<RecodedCharID, GenericVectorEqEq<int>*, RecodedCharID::RecodedCharIDHash> tesseract::UnicharCompress::final_codes_
private

◆ is_valid_start_

GenericVector<bool> tesseract::UnicharCompress::is_valid_start_
private

◆ kFirstHangul

const int tesseract::UnicharCompress::kFirstHangul = 0xac00
static

◆ kLCount

const int tesseract::UnicharCompress::kLCount = 19
static

◆ kNumHangul

const int tesseract::UnicharCompress::kNumHangul = 11172
static

◆ kTCount

const int tesseract::UnicharCompress::kTCount = 28
static

◆ kVCount

const int tesseract::UnicharCompress::kVCount = 21
static

◆ next_codes_

std::unordered_map<RecodedCharID, GenericVectorEqEq<int>*, RecodedCharID::RecodedCharIDHash> tesseract::UnicharCompress::next_codes_
private

The documentation for this class was generated from the following files: