tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
tesseract::ValidateJavanese Class Reference

#include <validate_javanese.h>

Inheritance diagram for tesseract::ValidateJavanese:
Collaboration diagram for tesseract::ValidateJavanese:

Public Member Functions

 ValidateJavanese (ViramaScript script, bool report_errors)
 
 ~ValidateJavanese ()
 
- Public Member Functions inherited from tesseract::Validator
virtual ~Validator ()
 

Protected Member Functions

bool ConsumeGraphemeIfValid () override
 
Validator::CharClass UnicodeToCharClass (char32 ch) const override
 
- Protected Member Functions inherited from tesseract::Validator
 Validator (ViramaScript script, bool report_errors)
 
bool ValidateCleanAndSegmentInternal (GraphemeNormMode g_mode, const std::vector< char32 > &src, std::vector< std::vector< char32 >> *dest)
 
void MoveResultsToDest (GraphemeNormMode g_mode, std::vector< std::vector< char32 >> *dest)
 
bool IsSubscriptScript () const
 
bool CodeOnlyToOutput ()
 
void MultiCodePart (int length)
 
bool UseMultiCode (int length)
 
void ComputeClassCodes (const std::vector< char32 > &text)
 
void Clear ()
 

Private Member Functions

bool ConsumeViramaIfValid (IndicPair joiner, bool post_matra)
 
bool ConsumeConsonantHeadIfValid ()
 
bool ConsumeConsonantTailIfValid ()
 
bool ConsumeVowelIfValid ()
 

Static Private Attributes

static const char32 kPengkal = 0xa9be
 
static const char32 kCakra = 0xa9bf
 

Additional Inherited Members

- Static Public Member Functions inherited from tesseract::Validator
static bool ValidateCleanAndSegment (GraphemeNormMode g_mode, bool report_errors, const std::vector< char32 > &src, std::vector< std::vector< char32 >> *dest)
 
static bool IsZeroWidthMark (char32 ch)
 
- Static Public Attributes inherited from tesseract::Validator
static const char32 kZeroWidthSpace = 0x200B
 
static const char32 kZeroWidthNonJoiner = 0x200C
 
static const char32 kZeroWidthJoiner = 0x200D
 
static const char32 kLeftToRightMark = 0x200E
 
static const char32 kRightToLeftMark = 0x200F
 
static const char32 kInvalid = 0xfffd
 
- Protected Types inherited from tesseract::Validator
enum  CharClass {
  CharClass::kConsonant = 'C', CharClass::kVowel = 'V', CharClass::kVirama = 'H', CharClass::kMatra = 'M',
  CharClass::kMatraPiece = 'P', CharClass::kVowelModifier = 'D', CharClass::kZeroWidthNonJoiner = 'z', CharClass::kZeroWidthJoiner = 'Z',
  CharClass::kVedicMark = 'v', CharClass::kNukta = 'N', CharClass::kRobat = 'R', CharClass::kOther = 'O',
  CharClass::kWhitespace = ' ', CharClass::kCombiner = 'c'
}
 
using IndicPair = std::pair< CharClass, char32 >
 
- Static Protected Member Functions inherited from tesseract::Validator
static std::unique_ptr< ValidatorScriptValidator (ViramaScript script, bool report_errors)
 
static ViramaScript MostFrequentViramaScript (const std::vector< char32 > &utf32)
 
static bool IsVirama (char32 unicode)
 
static bool IsVedicAccent (char32 unicode)
 
- Protected Attributes inherited from tesseract::Validator
ViramaScript script_
 
std::vector< IndicPaircodes_
 
std::vector< std::vector< char32 > > parts_
 
std::vector< char32output_
 
int codes_used_
 
int output_used_
 
bool report_errors_
 
- Static Protected Attributes inherited from tesseract::Validator
static const int kIndicCodePageSize = 128
 
static const char32 kMinIndicUnicode = 0x900
 
static const char32 kMaxSinhalaUnicode = 0xdff
 
static const char32 kMaxViramaScriptUnicode = 0x17ff
 
static const char32 kSinhalaVirama = 0xdca
 
static const char32 kMyanmarVirama = 0x1039
 
static const char32 kKhmerVirama = 0x17d2
 
static const char32 kJavaneseVirama = 0xa9c0
 
static const char32 kMaxJavaneseUnicode = 0xa9df
 

Constructor & Destructor Documentation

◆ ValidateJavanese()

tesseract::ValidateJavanese::ValidateJavanese ( ViramaScript  script,
bool  report_errors 
)
inline

◆ ~ValidateJavanese()

tesseract::ValidateJavanese::~ValidateJavanese ( )
inline

Member Function Documentation

◆ ConsumeConsonantHeadIfValid()

bool tesseract::ValidateJavanese::ConsumeConsonantHeadIfValid ( )
private

◆ ConsumeConsonantTailIfValid()

bool tesseract::ValidateJavanese::ConsumeConsonantTailIfValid ( )
private

◆ ConsumeGraphemeIfValid()

bool tesseract::ValidateJavanese::ConsumeGraphemeIfValid ( )
overrideprotectedvirtual

Implements tesseract::Validator.

◆ ConsumeViramaIfValid()

bool tesseract::ValidateJavanese::ConsumeViramaIfValid ( IndicPair  joiner,
bool  post_matra 
)
private

◆ ConsumeVowelIfValid()

bool tesseract::ValidateJavanese::ConsumeVowelIfValid ( )
private

◆ UnicodeToCharClass()

Validator::CharClass tesseract::ValidateJavanese::UnicodeToCharClass ( char32  ch) const
overrideprotectedvirtual

Implements tesseract::Validator.

Member Data Documentation

◆ kCakra

const char32 tesseract::ValidateJavanese::kCakra = 0xa9bf
staticprivate

◆ kPengkal

const char32 tesseract::ValidateJavanese::kPengkal = 0xa9be
staticprivate

The documentation for this class was generated from the following files: