tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
tesseract::SquishedDawg Class Reference

#include <dawg.h>

Inheritance diagram for tesseract::SquishedDawg:
Collaboration diagram for tesseract::SquishedDawg:

Public Member Functions

 SquishedDawg (DawgType type, const STRING &lang, PermuterType perm, int debug_level)
 
 SquishedDawg (const char *filename, DawgType type, const STRING &lang, PermuterType perm, int debug_level)
 
 SquishedDawg (EDGE_ARRAY edges, int num_edges, DawgType type, const STRING &lang, PermuterType perm, int unicharset_size, int debug_level)
 
virtual ~SquishedDawg ()
 
bool Load (TFile *fp)
 
int NumEdges ()
 
EDGE_REF edge_char_of (NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const
 Returns the edge that corresponds to the letter out of this node. More...
 
void unichar_ids_of (NODE_REF node, NodeChildVector *vec, bool word_end) const
 
NODE_REF next_node (EDGE_REF edge) const
 
bool end_of_word (EDGE_REF edge_ref) const
 
UNICHAR_ID edge_letter (EDGE_REF edge_ref) const
 Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF. More...
 
void print_node (NODE_REF node, int max_num_edges) const
 
bool write_squished_dawg (TFile *file)
 Writes the squished/reduced Dawg to a file. More...
 
bool write_squished_dawg (const char *filename)
 
- Public Member Functions inherited from tesseract::Dawg
DawgType type () const
 
const STRINGlang () const
 
PermuterType permuter () const
 
virtual ~Dawg ()
 
bool word_in_dawg (const WERD_CHOICE &word) const
 Returns true if the given word is in the Dawg. More...
 
bool prefix_in_dawg (const WERD_CHOICE &prefix, bool requires_complete) const
 
int check_for_words (const char *filename, const UNICHARSET &unicharset, bool enable_wildcard) const
 
void iterate_words (const UNICHARSET &unicharset, TessCallback1< const WERD_CHOICE *> *cb) const
 
void iterate_words (const UNICHARSET &unicharset, TessCallback1< const char *> *cb) const
 
virtual void unichar_id_to_patterns (UNICHAR_ID unichar_id, const UNICHARSET &unicharset, GenericVector< UNICHAR_ID > *vec) const
 
virtual EDGE_REF pattern_loop_edge (EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const
 

Private Member Functions

void set_next_node (EDGE_REF edge_ref, EDGE_REF value)
 Sets the next node link for this edge. More...
 
void set_empty_edge (EDGE_REF edge_ref)
 Sets the edge to be empty. More...
 
void clear_all_edges ()
 Goes through all the edges and clears each one out. More...
 
void clear_marker_flag (EDGE_REF edge_ref)
 Clears the last flag of this edge. More...
 
bool forward_edge (EDGE_REF edge_ref) const
 Returns true if this edge is in the forward direction. More...
 
bool backward_edge (EDGE_REF edge_ref) const
 Returns true if this edge is in the backward direction. More...
 
bool edge_occupied (EDGE_REF edge_ref) const
 Returns true if the edge spot in this location is occupied. More...
 
bool last_edge (EDGE_REF edge_ref) const
 Returns true if this edge is the last edge in a sequence. More...
 
int32_t num_forward_edges (NODE_REF node) const
 Counts and returns the number of forward edges in this node. More...
 
bool read_squished_dawg (TFile *file)
 Reads SquishedDawg from a file. More...
 
void print_edge (EDGE_REF edge) const
 Prints the contents of an edge indicated by the given EDGE_REF. More...
 
void print_all (const char *msg)
 Prints the contents of the SquishedDawg. More...
 
std::unique_ptr< EDGE_REF[]> build_node_map (int32_t *num_nodes) const
 Constructs a mapping from the memory node indices to disk node indices. More...
 

Private Attributes

EDGE_ARRAY edges_
 
int32_t num_edges_
 
int num_forward_edges_in_node0
 

Additional Inherited Members

- Static Public Attributes inherited from tesseract::Dawg
static const int16_t kDawgMagicNumber = 42
 Magic number to determine endianness when reading the Dawg from file. More...
 
static const UNICHAR_ID kPatternUnicharID = 0
 
- Protected Member Functions inherited from tesseract::Dawg
 Dawg (DawgType type, const STRING &lang, PermuterType perm, int debug_level)
 
NODE_REF next_node_from_edge_rec (const EDGE_RECORD &edge_rec) const
 Returns the next node visited by following this edge. More...
 
bool marker_flag_from_edge_rec (const EDGE_RECORD &edge_rec) const
 Returns the marker flag of this edge. More...
 
int direction_from_edge_rec (const EDGE_RECORD &edge_rec) const
 Returns the direction flag of this edge. More...
 
bool end_of_word_from_edge_rec (const EDGE_RECORD &edge_rec) const
 Returns true if this edge marks the end of a word. More...
 
UNICHAR_ID unichar_id_from_edge_rec (const EDGE_RECORD &edge_rec) const
 Returns UNICHAR_ID recorded in this edge. More...
 
void set_next_node_in_edge_rec (EDGE_RECORD *edge_rec, EDGE_REF value)
 Sets the next node link for this edge in the Dawg. More...
 
void set_marker_flag_in_edge_rec (EDGE_RECORD *edge_rec)
 Sets this edge record to be the last one in a sequence of edges. More...
 
int given_greater_than_edge_rec (NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, const EDGE_RECORD &edge_rec) const
 
bool edge_rec_match (NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, NODE_REF other_next_node, bool other_word_end, UNICHAR_ID other_unichar_id) const
 
void init (int unicharset_size)
 
bool match_words (WERD_CHOICE *word, int32_t index, NODE_REF node, UNICHAR_ID wildcard) const
 
void iterate_words_rec (const WERD_CHOICE &word_so_far, NODE_REF to_explore, TessCallback1< const WERD_CHOICE *> *cb) const
 
- Protected Attributes inherited from tesseract::Dawg
DawgType type_
 
STRING lang_
 
PermuterType perm_
 Permuter code that should be used if the word is found in this Dawg. More...
 
int unicharset_size_
 
int flag_start_bit_
 
int next_node_start_bit_
 
uint64_t next_node_mask_
 
uint64_t flags_mask_
 
uint64_t letter_mask_
 
int debug_level_
 

Detailed Description

Concrete class that can operate on a compacted (squished) Dawg (read, search and write to file). This class is read-only in the sense that new words can not be added to an instance of SquishedDawg. The underlying representation of the nodes and edges in SquishedDawg is stored as a contiguous EDGE_ARRAY (read from file or given as an argument to the constructor).

Constructor & Destructor Documentation

◆ SquishedDawg() [1/3]

tesseract::SquishedDawg::SquishedDawg ( DawgType  type,
const STRING lang,
PermuterType  perm,
int  debug_level 
)
inline

◆ SquishedDawg() [2/3]

tesseract::SquishedDawg::SquishedDawg ( const char *  filename,
DawgType  type,
const STRING lang,
PermuterType  perm,
int  debug_level 
)
inline

◆ SquishedDawg() [3/3]

tesseract::SquishedDawg::SquishedDawg ( EDGE_ARRAY  edges,
int  num_edges,
DawgType  type,
const STRING lang,
PermuterType  perm,
int  unicharset_size,
int  debug_level 
)
inline

◆ ~SquishedDawg()

tesseract::SquishedDawg::~SquishedDawg ( )
virtual

Member Function Documentation

◆ backward_edge()

bool tesseract::SquishedDawg::backward_edge ( EDGE_REF  edge_ref) const
inlineprivate

Returns true if this edge is in the backward direction.

◆ build_node_map()

std::unique_ptr< EDGE_REF[]> tesseract::SquishedDawg::build_node_map ( int32_t *  num_nodes) const
private

Constructs a mapping from the memory node indices to disk node indices.

◆ clear_all_edges()

void tesseract::SquishedDawg::clear_all_edges ( )
inlineprivate

Goes through all the edges and clears each one out.

◆ clear_marker_flag()

void tesseract::SquishedDawg::clear_marker_flag ( EDGE_REF  edge_ref)
inlineprivate

Clears the last flag of this edge.

◆ edge_char_of()

EDGE_REF tesseract::SquishedDawg::edge_char_of ( NODE_REF  node,
UNICHAR_ID  unichar_id,
bool  word_end 
) const
virtual

Returns the edge that corresponds to the letter out of this node.

Implements tesseract::Dawg.

◆ edge_letter()

UNICHAR_ID tesseract::SquishedDawg::edge_letter ( EDGE_REF  edge_ref) const
inlinevirtual

Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF.

Implements tesseract::Dawg.

◆ edge_occupied()

bool tesseract::SquishedDawg::edge_occupied ( EDGE_REF  edge_ref) const
inlineprivate

Returns true if the edge spot in this location is occupied.

◆ end_of_word()

bool tesseract::SquishedDawg::end_of_word ( EDGE_REF  edge_ref) const
inlinevirtual

Returns true if the edge indicated by the given EDGE_REF marks the end of a word.

Implements tesseract::Dawg.

◆ forward_edge()

bool tesseract::SquishedDawg::forward_edge ( EDGE_REF  edge_ref) const
inlineprivate

Returns true if this edge is in the forward direction.

◆ last_edge()

bool tesseract::SquishedDawg::last_edge ( EDGE_REF  edge_ref) const
inlineprivate

Returns true if this edge is the last edge in a sequence.

◆ Load()

bool tesseract::SquishedDawg::Load ( TFile fp)
inline

◆ next_node()

NODE_REF tesseract::SquishedDawg::next_node ( EDGE_REF  edge) const
inlinevirtual

Returns the next node visited by following the edge indicated by the given EDGE_REF.

Implements tesseract::Dawg.

◆ num_forward_edges()

int32_t tesseract::SquishedDawg::num_forward_edges ( NODE_REF  node) const
private

Counts and returns the number of forward edges in this node.

◆ NumEdges()

int tesseract::SquishedDawg::NumEdges ( )
inline

◆ print_all()

void tesseract::SquishedDawg::print_all ( const char *  msg)
inlineprivate

Prints the contents of the SquishedDawg.

◆ print_edge()

void tesseract::SquishedDawg::print_edge ( EDGE_REF  edge) const
private

Prints the contents of an edge indicated by the given EDGE_REF.

◆ print_node()

void tesseract::SquishedDawg::print_node ( NODE_REF  node,
int  max_num_edges 
) const
virtual

Prints the contents of the node indicated by the given NODE_REF. At most max_num_edges will be printed.

Implements tesseract::Dawg.

◆ read_squished_dawg()

bool tesseract::SquishedDawg::read_squished_dawg ( TFile file)
private

Reads SquishedDawg from a file.

◆ set_empty_edge()

void tesseract::SquishedDawg::set_empty_edge ( EDGE_REF  edge_ref)
inlineprivate

Sets the edge to be empty.

◆ set_next_node()

void tesseract::SquishedDawg::set_next_node ( EDGE_REF  edge_ref,
EDGE_REF  value 
)
inlineprivate

Sets the next node link for this edge.

◆ unichar_ids_of()

void tesseract::SquishedDawg::unichar_ids_of ( NODE_REF  node,
NodeChildVector vec,
bool  word_end 
) const
inlinevirtual

Fills the given NodeChildVector with all the unichar ids (and the corresponding EDGE_REFs) for which there is an edge out of this node.

Implements tesseract::Dawg.

◆ write_squished_dawg() [1/2]

bool tesseract::SquishedDawg::write_squished_dawg ( TFile file)

Writes the squished/reduced Dawg to a file.

◆ write_squished_dawg() [2/2]

bool tesseract::SquishedDawg::write_squished_dawg ( const char *  filename)
inline

Opens the file with the given filename and writes the squished/reduced Dawg to the file.

Member Data Documentation

◆ edges_

EDGE_ARRAY tesseract::SquishedDawg::edges_
private

◆ num_edges_

int32_t tesseract::SquishedDawg::num_edges_
private

◆ num_forward_edges_in_node0

int tesseract::SquishedDawg::num_forward_edges_in_node0
private

The documentation for this class was generated from the following files: