#include <validate_grapheme.h>
|
| static bool | ValidateCleanAndSegment (GraphemeNormMode g_mode, bool report_errors, const std::vector< char32 > &src, std::vector< std::vector< char32 >> *dest) |
| |
| static bool | IsZeroWidthMark (char32 ch) |
| |
| static const char32 | kZeroWidthSpace = 0x200B |
| |
| static const char32 | kZeroWidthNonJoiner = 0x200C |
| |
| static const char32 | kZeroWidthJoiner = 0x200D |
| |
| static const char32 | kLeftToRightMark = 0x200E |
| |
| static const char32 | kRightToLeftMark = 0x200F |
| |
| static const char32 | kInvalid = 0xfffd |
| |
| enum | CharClass {
CharClass::kConsonant = 'C',
CharClass::kVowel = 'V',
CharClass::kVirama = 'H',
CharClass::kMatra = 'M',
CharClass::kMatraPiece = 'P',
CharClass::kVowelModifier = 'D',
CharClass::kZeroWidthNonJoiner = 'z',
CharClass::kZeroWidthJoiner = 'Z',
CharClass::kVedicMark = 'v',
CharClass::kNukta = 'N',
CharClass::kRobat = 'R',
CharClass::kOther = 'O',
CharClass::kWhitespace = ' ',
CharClass::kCombiner = 'c'
} |
| |
| using | IndicPair = std::pair< CharClass, char32 > |
| |
| static std::unique_ptr< Validator > | ScriptValidator (ViramaScript script, bool report_errors) |
| |
| static ViramaScript | MostFrequentViramaScript (const std::vector< char32 > &utf32) |
| |
| static bool | IsVirama (char32 unicode) |
| |
| static bool | IsVedicAccent (char32 unicode) |
| |
| ViramaScript | script_ |
| |
| std::vector< IndicPair > | codes_ |
| |
| std::vector< std::vector< char32 > > | parts_ |
| |
| std::vector< char32 > | output_ |
| |
| int | codes_used_ |
| |
| int | output_used_ |
| |
| bool | report_errors_ |
| |
| static const int | kIndicCodePageSize = 128 |
| |
| static const char32 | kMinIndicUnicode = 0x900 |
| |
| static const char32 | kMaxSinhalaUnicode = 0xdff |
| |
| static const char32 | kMaxViramaScriptUnicode = 0x17ff |
| |
| static const char32 | kSinhalaVirama = 0xdca |
| |
| static const char32 | kMyanmarVirama = 0x1039 |
| |
| static const char32 | kKhmerVirama = 0x17d2 |
| |
| static const char32 | kJavaneseVirama = 0xa9c0 |
| |
| static const char32 | kMaxJavaneseUnicode = 0xa9df |
| |
◆ ValidateGrapheme()
| tesseract::ValidateGrapheme::ValidateGrapheme |
( |
ViramaScript |
script, |
|
|
bool |
report_errors |
|
) |
| |
|
inline |
◆ ~ValidateGrapheme()
| tesseract::ValidateGrapheme::~ValidateGrapheme |
( |
| ) |
|
|
inline |
◆ ConsumeGraphemeIfValid()
| bool tesseract::ValidateGrapheme::ConsumeGraphemeIfValid |
( |
| ) |
|
|
overrideprotectedvirtual |
◆ IsBadlyFormed()
| bool tesseract::ValidateGrapheme::IsBadlyFormed |
( |
char32 |
prev_ch, |
|
|
char32 |
ch |
|
) |
| |
|
private |
◆ IsBadlyFormedIndicVowel()
| bool tesseract::ValidateGrapheme::IsBadlyFormedIndicVowel |
( |
char32 |
prev_ch, |
|
|
char32 |
ch |
|
) |
| |
|
staticprivate |
◆ IsBadlyFormedThai()
| bool tesseract::ValidateGrapheme::IsBadlyFormedThai |
( |
char32 |
prev_ch, |
|
|
char32 |
ch |
|
) |
| |
|
staticprivate |
◆ UnicodeToCharClass()
The documentation for this class was generated from the following files:
- /home/stephane/src/tesseract/src/training/validate_grapheme.h
- /home/stephane/src/tesseract/src/training/validate_grapheme.cpp