tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
UNICHARSET Class Reference

#include <unicharset.h>

Collaboration diagram for UNICHARSET:

Classes

struct  UNICHAR_PROPERTIES
 
struct  UNICHAR_SLOT
 

Public Types

enum  Direction {
  U_LEFT_TO_RIGHT = 0, U_RIGHT_TO_LEFT = 1, U_EUROPEAN_NUMBER = 2, U_EUROPEAN_NUMBER_SEPARATOR = 3,
  U_EUROPEAN_NUMBER_TERMINATOR = 4, U_ARABIC_NUMBER = 5, U_COMMON_NUMBER_SEPARATOR = 6, U_BLOCK_SEPARATOR = 7,
  U_SEGMENT_SEPARATOR = 8, U_WHITE_SPACE_NEUTRAL = 9, U_OTHER_NEUTRAL = 10, U_LEFT_TO_RIGHT_EMBEDDING = 11,
  U_LEFT_TO_RIGHT_OVERRIDE = 12, U_RIGHT_TO_LEFT_ARABIC = 13, U_RIGHT_TO_LEFT_EMBEDDING = 14, U_RIGHT_TO_LEFT_OVERRIDE = 15,
  U_POP_DIRECTIONAL_FORMAT = 16, U_DIR_NON_SPACING_MARK = 17, U_BOUNDARY_NEUTRAL = 18, U_CHAR_DIRECTION_COUNT
}
 

Public Member Functions

 UNICHARSET ()
 
 ~UNICHARSET ()
 
UNICHAR_ID unichar_to_id (const char *const unichar_repr) const
 
UNICHAR_ID unichar_to_id (const char *const unichar_repr, int length) const
 
int step (const char *str) const
 
bool encodable_string (const char *str, int *first_bad_position) const
 
bool encode_string (const char *str, bool give_up_on_failure, GenericVector< UNICHAR_ID > *encoding, GenericVector< char > *lengths, int *encoded_length) const
 
const char * id_to_unichar (UNICHAR_ID id) const
 
const char * id_to_unichar_ext (UNICHAR_ID id) const
 
STRING debug_str (UNICHAR_ID id) const
 
STRING debug_str (const char *unichar_repr) const
 
void unichar_insert (const char *const unichar_repr, OldUncleanUnichars old_style)
 
void unichar_insert (const char *const unichar_repr)
 
void unichar_insert_backwards_compatible (const char *const unichar_repr)
 
bool contains_unichar_id (UNICHAR_ID unichar_id) const
 
bool contains_unichar (const char *const unichar_repr) const
 
bool contains_unichar (const char *const unichar_repr, int length) const
 
bool eq (UNICHAR_ID unichar_id, const char *const unichar_repr) const
 
void delete_pointers_in_unichars ()
 
void clear ()
 
int size () const
 
void reserve (int unichars_number)
 
bool save_to_file (const char *const filename) const
 
bool save_to_file (FILE *file) const
 
bool save_to_file (tesseract::TFile *file) const
 
bool save_to_string (STRING *str) const
 
bool load_from_inmemory_file (const char *const memory, int mem_size, bool skip_fragments)
 
bool load_from_inmemory_file (const char *const memory, int mem_size)
 
bool load_from_file (const char *const filename, bool skip_fragments)
 
bool load_from_file (const char *const filename)
 
bool load_from_file (FILE *file, bool skip_fragments)
 
bool load_from_file (FILE *file)
 
bool load_from_file (tesseract::TFile *file, bool skip_fragments)
 
void post_load_setup ()
 
bool major_right_to_left () const
 
void set_black_and_whitelist (const char *blacklist, const char *whitelist, const char *unblacklist)
 
void set_isalpha (UNICHAR_ID unichar_id, bool value)
 
void set_islower (UNICHAR_ID unichar_id, bool value)
 
void set_isupper (UNICHAR_ID unichar_id, bool value)
 
void set_isdigit (UNICHAR_ID unichar_id, bool value)
 
void set_ispunctuation (UNICHAR_ID unichar_id, bool value)
 
void set_isngram (UNICHAR_ID unichar_id, bool value)
 
void set_script (UNICHAR_ID unichar_id, const char *value)
 
void set_other_case (UNICHAR_ID unichar_id, UNICHAR_ID other_case)
 
void set_direction (UNICHAR_ID unichar_id, UNICHARSET::Direction value)
 
void set_mirror (UNICHAR_ID unichar_id, UNICHAR_ID mirror)
 
void set_normed (UNICHAR_ID unichar_id, const char *normed)
 
void set_normed_ids (UNICHAR_ID unichar_id)
 
bool get_isalpha (UNICHAR_ID unichar_id) const
 
bool get_islower (UNICHAR_ID unichar_id) const
 
bool get_isupper (UNICHAR_ID unichar_id) const
 
bool get_isdigit (UNICHAR_ID unichar_id) const
 
bool get_ispunctuation (UNICHAR_ID unichar_id) const
 
bool get_isngram (UNICHAR_ID unichar_id) const
 
bool get_isprivate (UNICHAR_ID unichar_id) const
 
bool top_bottom_useful () const
 
void set_ranges_empty ()
 
void SetPropertiesFromOther (const UNICHARSET &src)
 
void PartialSetPropertiesFromOther (int start_index, const UNICHARSET &src)
 
void ExpandRangesFromOther (const UNICHARSET &src)
 
void CopyFrom (const UNICHARSET &src)
 
void AppendOtherUnicharset (const UNICHARSET &src)
 
bool SizesDistinct (UNICHAR_ID id1, UNICHAR_ID id2) const
 
void get_top_bottom (UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
 
void set_top_bottom (UNICHAR_ID unichar_id, int min_bottom, int max_bottom, int min_top, int max_top)
 
void get_width_stats (UNICHAR_ID unichar_id, float *width, float *width_sd) const
 
void set_width_stats (UNICHAR_ID unichar_id, float width, float width_sd)
 
void get_bearing_stats (UNICHAR_ID unichar_id, float *bearing, float *bearing_sd) const
 
void set_bearing_stats (UNICHAR_ID unichar_id, float bearing, float bearing_sd)
 
void get_advance_stats (UNICHAR_ID unichar_id, float *advance, float *advance_sd) const
 
void set_advance_stats (UNICHAR_ID unichar_id, float advance, float advance_sd)
 
bool PropertiesIncomplete (UNICHAR_ID unichar_id) const
 
bool IsSpaceDelimited (UNICHAR_ID unichar_id) const
 
int get_script (UNICHAR_ID unichar_id) const
 
unsigned int get_properties (UNICHAR_ID unichar_id) const
 
char get_chartype (UNICHAR_ID unichar_id) const
 
UNICHAR_ID get_other_case (UNICHAR_ID unichar_id) const
 
Direction get_direction (UNICHAR_ID unichar_id) const
 
UNICHAR_ID get_mirror (UNICHAR_ID unichar_id) const
 
UNICHAR_ID to_lower (UNICHAR_ID unichar_id) const
 
UNICHAR_ID to_upper (UNICHAR_ID unichar_id) const
 
bool has_special_codes () const
 
bool AnyRepeatedUnicodes () const
 
const CHAR_FRAGMENTget_fragment (UNICHAR_ID unichar_id) const
 
bool get_isalpha (const char *const unichar_repr) const
 
bool get_islower (const char *const unichar_repr) const
 
bool get_isupper (const char *const unichar_repr) const
 
bool get_isdigit (const char *const unichar_repr) const
 
bool get_ispunctuation (const char *const unichar_repr) const
 
unsigned int get_properties (const char *const unichar_repr) const
 
char get_chartype (const char *const unichar_repr) const
 
int get_script (const char *const unichar_repr) const
 
const CHAR_FRAGMENTget_fragment (const char *const unichar_repr) const
 
bool get_isalpha (const char *const unichar_repr, int length) const
 
bool get_islower (const char *const unichar_repr, int length) const
 
bool get_isupper (const char *const unichar_repr, int length) const
 
bool get_isdigit (const char *const unichar_repr, int length) const
 
bool get_ispunctuation (const char *const unichar_repr, int length) const
 
const char * get_normed_unichar (UNICHAR_ID unichar_id) const
 
const GenericVector< UNICHAR_ID > & normed_ids (UNICHAR_ID unichar_id) const
 
int get_script (const char *const unichar_repr, int length) const
 
int get_script_table_size () const
 
const char * get_script_from_script_id (int id) const
 
int get_script_id_from_name (const char *script_name) const
 
bool is_null_script (const char *script) const
 
int add_script (const char *script)
 
bool get_enabled (UNICHAR_ID unichar_id) const
 
int null_sid () const
 
int common_sid () const
 
int latin_sid () const
 
int cyrillic_sid () const
 
int greek_sid () const
 
int han_sid () const
 
int hiragana_sid () const
 
int katakana_sid () const
 
int thai_sid () const
 
int hangul_sid () const
 
int default_sid () const
 
bool script_has_upper_lower () const
 
bool script_has_xheight () const
 

Static Public Member Functions

static STRING debug_utf8_str (const char *str)
 
static std::string CleanupString (const char *utf8_str)
 
static std::string CleanupString (const char *utf8_str, size_t length)
 

Static Public Attributes

static const char * kCustomLigatures [][2]
 
static const char * kSpecialUnicharCodes [SPECIAL_UNICHAR_CODES_COUNT]
 

Private Member Functions

void encode_string (const char *str, int str_index, int str_length, GenericVector< UNICHAR_ID > *encoding, GenericVector< char > *lengths, int *best_total_length, GenericVector< UNICHAR_ID > *best_encoding, GenericVector< char > *best_lengths) const
 
bool GetStrProperties (const char *utf8_str, UNICHAR_PROPERTIES *props) const
 
bool load_via_fgets (TessResultCallback2< char *, char *, int > *fgets_cb, bool skip_fragments)
 

Private Attributes

UNICHAR_SLOTunichars
 
UNICHARMAP ids
 
int size_used
 
int size_reserved
 
char ** script_table
 
int script_table_size_used
 
int script_table_size_reserved
 
bool top_bottom_set_
 
bool script_has_upper_lower_
 
bool script_has_xheight_
 
bool old_style_included_
 
int null_sid_
 
int common_sid_
 
int latin_sid_
 
int cyrillic_sid_
 
int greek_sid_
 
int han_sid_
 
int hiragana_sid_
 
int katakana_sid_
 
int thai_sid_
 
int hangul_sid_
 
int default_sid_
 

Static Private Attributes

static const char * kCleanupMaps [][2]
 
static const char * null_script = "NULL"
 

Member Enumeration Documentation

◆ Direction

Enumerator
U_LEFT_TO_RIGHT 
U_RIGHT_TO_LEFT 
U_EUROPEAN_NUMBER 
U_EUROPEAN_NUMBER_SEPARATOR 
U_EUROPEAN_NUMBER_TERMINATOR 
U_ARABIC_NUMBER 
U_COMMON_NUMBER_SEPARATOR 
U_BLOCK_SEPARATOR 
U_SEGMENT_SEPARATOR 
U_WHITE_SPACE_NEUTRAL 
U_OTHER_NEUTRAL 
U_LEFT_TO_RIGHT_EMBEDDING 
U_LEFT_TO_RIGHT_OVERRIDE 
U_RIGHT_TO_LEFT_ARABIC 
U_RIGHT_TO_LEFT_EMBEDDING 
U_RIGHT_TO_LEFT_OVERRIDE 
U_POP_DIRECTIONAL_FORMAT 
U_DIR_NON_SPACING_MARK 
U_BOUNDARY_NEUTRAL 
U_CHAR_DIRECTION_COUNT 

Constructor & Destructor Documentation

◆ UNICHARSET()

UNICHARSET::UNICHARSET ( )

◆ ~UNICHARSET()

UNICHARSET::~UNICHARSET ( )

Member Function Documentation

◆ add_script()

int UNICHARSET::add_script ( const char *  script)

◆ AnyRepeatedUnicodes()

bool UNICHARSET::AnyRepeatedUnicodes ( ) const

◆ AppendOtherUnicharset()

void UNICHARSET::AppendOtherUnicharset ( const UNICHARSET src)

◆ CleanupString() [1/2]

static std::string UNICHARSET::CleanupString ( const char *  utf8_str)
inlinestatic

◆ CleanupString() [2/2]

std::string UNICHARSET::CleanupString ( const char *  utf8_str,
size_t  length 
)
static

◆ clear()

void UNICHARSET::clear ( )
inline

◆ common_sid()

int UNICHARSET::common_sid ( ) const
inline

◆ contains_unichar() [1/2]

bool UNICHARSET::contains_unichar ( const char *const  unichar_repr) const

◆ contains_unichar() [2/2]

bool UNICHARSET::contains_unichar ( const char *const  unichar_repr,
int  length 
) const

◆ contains_unichar_id()

bool UNICHARSET::contains_unichar_id ( UNICHAR_ID  unichar_id) const
inline

◆ CopyFrom()

void UNICHARSET::CopyFrom ( const UNICHARSET src)

◆ cyrillic_sid()

int UNICHARSET::cyrillic_sid ( ) const
inline

◆ debug_str() [1/2]

STRING UNICHARSET::debug_str ( UNICHAR_ID  id) const

◆ debug_str() [2/2]

STRING UNICHARSET::debug_str ( const char *  unichar_repr) const
inline

◆ debug_utf8_str()

STRING UNICHARSET::debug_utf8_str ( const char *  str)
static

◆ default_sid()

int UNICHARSET::default_sid ( ) const
inline

◆ delete_pointers_in_unichars()

void UNICHARSET::delete_pointers_in_unichars ( )
inline

◆ encodable_string()

bool UNICHARSET::encodable_string ( const char *  str,
int *  first_bad_position 
) const

◆ encode_string() [1/2]

bool UNICHARSET::encode_string ( const char *  str,
bool  give_up_on_failure,
GenericVector< UNICHAR_ID > *  encoding,
GenericVector< char > *  lengths,
int *  encoded_length 
) const

◆ encode_string() [2/2]

void UNICHARSET::encode_string ( const char *  str,
int  str_index,
int  str_length,
GenericVector< UNICHAR_ID > *  encoding,
GenericVector< char > *  lengths,
int *  best_total_length,
GenericVector< UNICHAR_ID > *  best_encoding,
GenericVector< char > *  best_lengths 
) const
private

◆ eq()

bool UNICHARSET::eq ( UNICHAR_ID  unichar_id,
const char *const  unichar_repr 
) const

◆ ExpandRangesFromOther()

void UNICHARSET::ExpandRangesFromOther ( const UNICHARSET src)

◆ get_advance_stats()

void UNICHARSET::get_advance_stats ( UNICHAR_ID  unichar_id,
float *  advance,
float *  advance_sd 
) const
inline

◆ get_bearing_stats()

void UNICHARSET::get_bearing_stats ( UNICHAR_ID  unichar_id,
float *  bearing,
float *  bearing_sd 
) const
inline

◆ get_chartype() [1/2]

char UNICHARSET::get_chartype ( UNICHAR_ID  unichar_id) const

◆ get_chartype() [2/2]

char UNICHARSET::get_chartype ( const char *const  unichar_repr) const
inline

◆ get_direction()

Direction UNICHARSET::get_direction ( UNICHAR_ID  unichar_id) const
inline

◆ get_enabled()

bool UNICHARSET::get_enabled ( UNICHAR_ID  unichar_id) const
inline

◆ get_fragment() [1/2]

const CHAR_FRAGMENT* UNICHARSET::get_fragment ( UNICHAR_ID  unichar_id) const
inline

◆ get_fragment() [2/2]

const CHAR_FRAGMENT* UNICHARSET::get_fragment ( const char *const  unichar_repr) const
inline

◆ get_isalpha() [1/3]

bool UNICHARSET::get_isalpha ( UNICHAR_ID  unichar_id) const
inline

◆ get_isalpha() [2/3]

bool UNICHARSET::get_isalpha ( const char *const  unichar_repr) const
inline

◆ get_isalpha() [3/3]

bool UNICHARSET::get_isalpha ( const char *const  unichar_repr,
int  length 
) const
inline

◆ get_isdigit() [1/3]

bool UNICHARSET::get_isdigit ( UNICHAR_ID  unichar_id) const
inline

◆ get_isdigit() [2/3]

bool UNICHARSET::get_isdigit ( const char *const  unichar_repr) const
inline

◆ get_isdigit() [3/3]

bool UNICHARSET::get_isdigit ( const char *const  unichar_repr,
int  length 
) const
inline

◆ get_islower() [1/3]

bool UNICHARSET::get_islower ( UNICHAR_ID  unichar_id) const
inline

◆ get_islower() [2/3]

bool UNICHARSET::get_islower ( const char *const  unichar_repr) const
inline

◆ get_islower() [3/3]

bool UNICHARSET::get_islower ( const char *const  unichar_repr,
int  length 
) const
inline

◆ get_isngram()

bool UNICHARSET::get_isngram ( UNICHAR_ID  unichar_id) const
inline

◆ get_isprivate()

bool UNICHARSET::get_isprivate ( UNICHAR_ID  unichar_id) const

◆ get_ispunctuation() [1/3]

bool UNICHARSET::get_ispunctuation ( UNICHAR_ID  unichar_id) const
inline

◆ get_ispunctuation() [2/3]

bool UNICHARSET::get_ispunctuation ( const char *const  unichar_repr) const
inline

◆ get_ispunctuation() [3/3]

bool UNICHARSET::get_ispunctuation ( const char *const  unichar_repr,
int  length 
) const
inline

◆ get_isupper() [1/3]

bool UNICHARSET::get_isupper ( UNICHAR_ID  unichar_id) const
inline

◆ get_isupper() [2/3]

bool UNICHARSET::get_isupper ( const char *const  unichar_repr) const
inline

◆ get_isupper() [3/3]

bool UNICHARSET::get_isupper ( const char *const  unichar_repr,
int  length 
) const
inline

◆ get_mirror()

UNICHAR_ID UNICHARSET::get_mirror ( UNICHAR_ID  unichar_id) const
inline

◆ get_normed_unichar()

const char* UNICHARSET::get_normed_unichar ( UNICHAR_ID  unichar_id) const
inline

◆ get_other_case()

UNICHAR_ID UNICHARSET::get_other_case ( UNICHAR_ID  unichar_id) const
inline

◆ get_properties() [1/2]

unsigned int UNICHARSET::get_properties ( UNICHAR_ID  unichar_id) const

◆ get_properties() [2/2]

unsigned int UNICHARSET::get_properties ( const char *const  unichar_repr) const
inline

◆ get_script() [1/3]

int UNICHARSET::get_script ( UNICHAR_ID  unichar_id) const
inline

◆ get_script() [2/3]

int UNICHARSET::get_script ( const char *const  unichar_repr) const
inline

◆ get_script() [3/3]

int UNICHARSET::get_script ( const char *const  unichar_repr,
int  length 
) const
inline

◆ get_script_from_script_id()

const char* UNICHARSET::get_script_from_script_id ( int  id) const
inline

◆ get_script_id_from_name()

int UNICHARSET::get_script_id_from_name ( const char *  script_name) const

◆ get_script_table_size()

int UNICHARSET::get_script_table_size ( ) const
inline

◆ get_top_bottom()

void UNICHARSET::get_top_bottom ( UNICHAR_ID  unichar_id,
int *  min_bottom,
int *  max_bottom,
int *  min_top,
int *  max_top 
) const
inline

◆ get_width_stats()

void UNICHARSET::get_width_stats ( UNICHAR_ID  unichar_id,
float *  width,
float *  width_sd 
) const
inline

◆ GetStrProperties()

bool UNICHARSET::GetStrProperties ( const char *  utf8_str,
UNICHAR_PROPERTIES props 
) const
private

◆ greek_sid()

int UNICHARSET::greek_sid ( ) const
inline

◆ han_sid()

int UNICHARSET::han_sid ( ) const
inline

◆ hangul_sid()

int UNICHARSET::hangul_sid ( ) const
inline

◆ has_special_codes()

bool UNICHARSET::has_special_codes ( ) const
inline

◆ hiragana_sid()

int UNICHARSET::hiragana_sid ( ) const
inline

◆ id_to_unichar()

const char * UNICHARSET::id_to_unichar ( UNICHAR_ID  id) const

◆ id_to_unichar_ext()

const char * UNICHARSET::id_to_unichar_ext ( UNICHAR_ID  id) const

◆ is_null_script()

bool UNICHARSET::is_null_script ( const char *  script) const
inline

◆ IsSpaceDelimited()

bool UNICHARSET::IsSpaceDelimited ( UNICHAR_ID  unichar_id) const
inline

◆ katakana_sid()

int UNICHARSET::katakana_sid ( ) const
inline

◆ latin_sid()

int UNICHARSET::latin_sid ( ) const
inline

◆ load_from_file() [1/5]

bool UNICHARSET::load_from_file ( const char *const  filename,
bool  skip_fragments 
)
inline

◆ load_from_file() [2/5]

bool UNICHARSET::load_from_file ( const char *const  filename)
inline

◆ load_from_file() [3/5]

bool UNICHARSET::load_from_file ( FILE *  file,
bool  skip_fragments 
)

◆ load_from_file() [4/5]

bool UNICHARSET::load_from_file ( FILE *  file)
inline

◆ load_from_file() [5/5]

bool UNICHARSET::load_from_file ( tesseract::TFile file,
bool  skip_fragments 
)

◆ load_from_inmemory_file() [1/2]

bool UNICHARSET::load_from_inmemory_file ( const char *const  memory,
int  mem_size,
bool  skip_fragments 
)

◆ load_from_inmemory_file() [2/2]

bool UNICHARSET::load_from_inmemory_file ( const char *const  memory,
int  mem_size 
)
inline

◆ load_via_fgets()

bool UNICHARSET::load_via_fgets ( TessResultCallback2< char *, char *, int > *  fgets_cb,
bool  skip_fragments 
)
private

◆ major_right_to_left()

bool UNICHARSET::major_right_to_left ( ) const

◆ normed_ids()

const GenericVector<UNICHAR_ID>& UNICHARSET::normed_ids ( UNICHAR_ID  unichar_id) const
inline

◆ null_sid()

int UNICHARSET::null_sid ( ) const
inline

◆ PartialSetPropertiesFromOther()

void UNICHARSET::PartialSetPropertiesFromOther ( int  start_index,
const UNICHARSET src 
)

◆ post_load_setup()

void UNICHARSET::post_load_setup ( )

◆ PropertiesIncomplete()

bool UNICHARSET::PropertiesIncomplete ( UNICHAR_ID  unichar_id) const
inline

◆ reserve()

void UNICHARSET::reserve ( int  unichars_number)

◆ save_to_file() [1/3]

bool UNICHARSET::save_to_file ( const char *const  filename) const
inline

◆ save_to_file() [2/3]

bool UNICHARSET::save_to_file ( FILE *  file) const
inline

◆ save_to_file() [3/3]

bool UNICHARSET::save_to_file ( tesseract::TFile file) const
inline

◆ save_to_string()

bool UNICHARSET::save_to_string ( STRING str) const

◆ script_has_upper_lower()

bool UNICHARSET::script_has_upper_lower ( ) const
inline

◆ script_has_xheight()

bool UNICHARSET::script_has_xheight ( ) const
inline

◆ set_advance_stats()

void UNICHARSET::set_advance_stats ( UNICHAR_ID  unichar_id,
float  advance,
float  advance_sd 
)
inline

◆ set_bearing_stats()

void UNICHARSET::set_bearing_stats ( UNICHAR_ID  unichar_id,
float  bearing,
float  bearing_sd 
)
inline

◆ set_black_and_whitelist()

void UNICHARSET::set_black_and_whitelist ( const char *  blacklist,
const char *  whitelist,
const char *  unblacklist 
)

◆ set_direction()

void UNICHARSET::set_direction ( UNICHAR_ID  unichar_id,
UNICHARSET::Direction  value 
)
inline

◆ set_isalpha()

void UNICHARSET::set_isalpha ( UNICHAR_ID  unichar_id,
bool  value 
)
inline

◆ set_isdigit()

void UNICHARSET::set_isdigit ( UNICHAR_ID  unichar_id,
bool  value 
)
inline

◆ set_islower()

void UNICHARSET::set_islower ( UNICHAR_ID  unichar_id,
bool  value 
)
inline

◆ set_isngram()

void UNICHARSET::set_isngram ( UNICHAR_ID  unichar_id,
bool  value 
)
inline

◆ set_ispunctuation()

void UNICHARSET::set_ispunctuation ( UNICHAR_ID  unichar_id,
bool  value 
)
inline

◆ set_isupper()

void UNICHARSET::set_isupper ( UNICHAR_ID  unichar_id,
bool  value 
)
inline

◆ set_mirror()

void UNICHARSET::set_mirror ( UNICHAR_ID  unichar_id,
UNICHAR_ID  mirror 
)
inline

◆ set_normed()

void UNICHARSET::set_normed ( UNICHAR_ID  unichar_id,
const char *  normed 
)
inline

◆ set_normed_ids()

void UNICHARSET::set_normed_ids ( UNICHAR_ID  unichar_id)

◆ set_other_case()

void UNICHARSET::set_other_case ( UNICHAR_ID  unichar_id,
UNICHAR_ID  other_case 
)
inline

◆ set_ranges_empty()

void UNICHARSET::set_ranges_empty ( )

◆ set_script()

void UNICHARSET::set_script ( UNICHAR_ID  unichar_id,
const char *  value 
)
inline

◆ set_top_bottom()

void UNICHARSET::set_top_bottom ( UNICHAR_ID  unichar_id,
int  min_bottom,
int  max_bottom,
int  min_top,
int  max_top 
)
inline

◆ set_width_stats()

void UNICHARSET::set_width_stats ( UNICHAR_ID  unichar_id,
float  width,
float  width_sd 
)
inline

◆ SetPropertiesFromOther()

void UNICHARSET::SetPropertiesFromOther ( const UNICHARSET src)
inline

◆ size()

int UNICHARSET::size ( ) const
inline

◆ SizesDistinct()

bool UNICHARSET::SizesDistinct ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
) const

◆ step()

int UNICHARSET::step ( const char *  str) const

◆ thai_sid()

int UNICHARSET::thai_sid ( ) const
inline

◆ to_lower()

UNICHAR_ID UNICHARSET::to_lower ( UNICHAR_ID  unichar_id) const
inline

◆ to_upper()

UNICHAR_ID UNICHARSET::to_upper ( UNICHAR_ID  unichar_id) const
inline

◆ top_bottom_useful()

bool UNICHARSET::top_bottom_useful ( ) const
inline

◆ unichar_insert() [1/2]

void UNICHARSET::unichar_insert ( const char *const  unichar_repr,
OldUncleanUnichars  old_style 
)

◆ unichar_insert() [2/2]

void UNICHARSET::unichar_insert ( const char *const  unichar_repr)
inline

◆ unichar_insert_backwards_compatible()

void UNICHARSET::unichar_insert_backwards_compatible ( const char *const  unichar_repr)
inline

◆ unichar_to_id() [1/2]

UNICHAR_ID UNICHARSET::unichar_to_id ( const char *const  unichar_repr) const

◆ unichar_to_id() [2/2]

UNICHAR_ID UNICHARSET::unichar_to_id ( const char *const  unichar_repr,
int  length 
) const

Member Data Documentation

◆ common_sid_

int UNICHARSET::common_sid_
private

◆ cyrillic_sid_

int UNICHARSET::cyrillic_sid_
private

◆ default_sid_

int UNICHARSET::default_sid_
private

◆ greek_sid_

int UNICHARSET::greek_sid_
private

◆ han_sid_

int UNICHARSET::han_sid_
private

◆ hangul_sid_

int UNICHARSET::hangul_sid_
private

◆ hiragana_sid_

int UNICHARSET::hiragana_sid_
private

◆ ids

UNICHARMAP UNICHARSET::ids
private

◆ katakana_sid_

int UNICHARSET::katakana_sid_
private

◆ kCleanupMaps

const char * UNICHARSET::kCleanupMaps
staticprivate
Initial value:
= {
{"\u0640", ""},
{"\ufb01", "fi"},
{"\ufb02", "fl"},
{nullptr, nullptr}}

◆ kCustomLigatures

const char * UNICHARSET::kCustomLigatures
static
Initial value:
= {
{"ct", "\uE003"},
{"ſh", "\uE006"},
{"ſi", "\uE007"},
{"ſl", "\uE008"},
{"ſſ", "\uE009"},
{nullptr, nullptr}
}

◆ kSpecialUnicharCodes

const char * UNICHARSET::kSpecialUnicharCodes
static
Initial value:
= {
" ",
"Joined",
"|Broken|0|1"
}

◆ latin_sid_

int UNICHARSET::latin_sid_
private

◆ null_script

const char * UNICHARSET::null_script = "NULL"
staticprivate

◆ null_sid_

int UNICHARSET::null_sid_
private

◆ old_style_included_

bool UNICHARSET::old_style_included_
private

◆ script_has_upper_lower_

bool UNICHARSET::script_has_upper_lower_
private

◆ script_has_xheight_

bool UNICHARSET::script_has_xheight_
private

◆ script_table

char** UNICHARSET::script_table
private

◆ script_table_size_reserved

int UNICHARSET::script_table_size_reserved
private

◆ script_table_size_used

int UNICHARSET::script_table_size_used
private

◆ size_reserved

int UNICHARSET::size_reserved
private

◆ size_used

int UNICHARSET::size_used
private

◆ thai_sid_

int UNICHARSET::thai_sid_
private

◆ top_bottom_set_

bool UNICHARSET::top_bottom_set_
private

◆ unichars

UNICHAR_SLOT* UNICHARSET::unichars
private

The documentation for this class was generated from the following files: