tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
dawg.h
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File: dawg.h
5  * Description: Definition of a class that represents Directed Acyclic Word
6  * Graph (DAWG), functions to build and manipulate the DAWG.
7  * Author: Mark Seaman, SW Productivity
8  * Created: Fri Oct 16 14:37:00 1987
9  * Modified: Wed Jun 19 16:50:24 1991 (Mark Seaman) marks@hpgrlt
10  * Language: C
11  * Package: N/A
12  * Status: Reusable Software Component
13  *
14  * (c) Copyright 1987, Hewlett-Packard Company.
15  ** Licensed under the Apache License, Version 2.0 (the "License");
16  ** you may not use this file except in compliance with the License.
17  ** You may obtain a copy of the License at
18  ** http://www.apache.org/licenses/LICENSE-2.0
19  ** Unless required by applicable law or agreed to in writing, software
20  ** distributed under the License is distributed on an "AS IS" BASIS,
21  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22  ** See the License for the specific language governing permissions and
23  ** limitations under the License.
24  *
25  *********************************************************************************/
26 
27 #ifndef DICT_DAWG_H_
28 #define DICT_DAWG_H_
29 
30 /*----------------------------------------------------------------------
31  I n c l u d e s
32 ----------------------------------------------------------------------*/
33 
34 #include <memory>
35 #include "elst.h"
36 #include "params.h"
37 #include "ratngs.h"
38 #include "tesscallback.h"
39 
40 #ifndef __GNUC__
41 #ifdef _WIN32
42 #define NO_EDGE (int64_t) 0xffffffffffffffffi64
43 #endif /*_WIN32*/
44 #else
45 #define NO_EDGE (int64_t) 0xffffffffffffffffll
46 #endif /*__GNUC__*/
47 
48 /*----------------------------------------------------------------------
49  T y p e s
50 ----------------------------------------------------------------------*/
51 class UNICHARSET;
52 
53 using EDGE_RECORD = uint64_t;
54 using EDGE_ARRAY = EDGE_RECORD *;
55 using EDGE_REF = int64_t;
56 using NODE_REF = int64_t;
57 using NODE_MAP = EDGE_REF *;
58 
59 namespace tesseract {
60 
61 struct NodeChild {
62  UNICHAR_ID unichar_id;
63  EDGE_REF edge_ref;
64  NodeChild(UNICHAR_ID id, EDGE_REF ref): unichar_id(id), edge_ref(ref) {}
65  NodeChild(): unichar_id(INVALID_UNICHAR_ID), edge_ref(NO_EDGE) {}
66 };
67 
71 
72 enum DawgType {
77 
78  DAWG_TYPE_COUNT // number of enum entries
79 };
80 
81 /*----------------------------------------------------------------------
82  C o n s t a n t s
83 ----------------------------------------------------------------------*/
84 
85 #define FORWARD_EDGE (int32_t) 0
86 #define BACKWARD_EDGE (int32_t) 1
87 #define MAX_NODE_EDGES_DISPLAY (int64_t) 100
88 #define MARKER_FLAG (int64_t) 1
89 #define DIRECTION_FLAG (int64_t) 2
90 #define WERD_END_FLAG (int64_t) 4
91 #define LETTER_START_BIT 0
92 #define NUM_FLAG_BITS 3
93 #define REFFORMAT "%" PRId64
94 
96  { 0, 1, 1, 0 }, // for DAWG_TYPE_PUNCTUATION
97  { 1, 0, 0, 0 }, // for DAWG_TYPE_WORD
98  { 1, 0, 0, 0 }, // for DAWG_TYPE_NUMBER
99  { 0, 0, 0, 0 }, // for DAWG_TYPE_PATTERN
100 };
101 
102 static const char kWildcard[] = "*";
103 
104 
105 /*----------------------------------------------------------------------
106  C l a s s e s a n d S t r u c t s
107 ----------------------------------------------------------------------*/
108 //
118 //
119 class Dawg {
120  public:
122  static const int16_t kDawgMagicNumber = 42;
126  static const UNICHAR_ID kPatternUnicharID = 0;
127 
128  inline DawgType type() const { return type_; }
129  inline const STRING &lang() const { return lang_; }
130  inline PermuterType permuter() const { return perm_; }
131 
132  virtual ~Dawg();
133 
135  bool word_in_dawg(const WERD_CHOICE &word) const;
136 
137  // Returns true if the given word prefix is not contraindicated by the dawg.
138  // If requires_complete is true, then the exact complete word must be present.
139  bool prefix_in_dawg(const WERD_CHOICE &prefix, bool requires_complete) const;
140 
143  int check_for_words(const char *filename,
144  const UNICHARSET &unicharset,
145  bool enable_wildcard) const;
146 
147  // For each word in the Dawg, call the given (permanent) callback with the
148  // text (UTF-8) version of the word.
149  void iterate_words(const UNICHARSET &unicharset,
151 
152  // For each word in the Dawg, call the given (permanent) callback with the
153  // text (UTF-8) version of the word.
154  void iterate_words(const UNICHARSET &unicharset,
155  TessCallback1<const char *> *cb) const;
156 
157  // Pure virtual function that should be implemented by the derived classes.
158 
160  virtual EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id,
161  bool word_end) const = 0;
162 
165  virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec,
166  bool word_end) const = 0;
167 
170  virtual NODE_REF next_node(EDGE_REF edge_ref) const = 0;
171 
174  virtual bool end_of_word(EDGE_REF edge_ref) const = 0;
175 
177  virtual UNICHAR_ID edge_letter(EDGE_REF edge_ref) const = 0;
178 
181  virtual void print_node(NODE_REF node, int max_num_edges) const = 0;
182 
185  virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id,
186  const UNICHARSET &unicharset,
187  GenericVector<UNICHAR_ID> *vec) const {
188  (void)unichar_id;
189  (void)unicharset;
190  (void)vec;
191  }
192 
196  virtual EDGE_REF pattern_loop_edge(
197  EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const {
198  (void)edge_ref;
199  (void)unichar_id;
200  (void)word_end;
201  return false;
202  }
203 
204  protected:
205  Dawg(DawgType type, const STRING &lang, PermuterType perm, int debug_level)
206  : type_(type),
207  lang_(lang),
208  perm_(perm),
209  unicharset_size_(0),
210  debug_level_(debug_level) {}
211 
213  inline NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const {
214  return ((edge_rec & next_node_mask_) >> next_node_start_bit_);
215  }
217  inline bool marker_flag_from_edge_rec(const EDGE_RECORD &edge_rec) const {
218  return (edge_rec & (MARKER_FLAG << flag_start_bit_)) != 0;
219  }
221  inline int direction_from_edge_rec(const EDGE_RECORD &edge_rec) const {
222  return ((edge_rec & (DIRECTION_FLAG << flag_start_bit_))) ?
223  BACKWARD_EDGE : FORWARD_EDGE;
224  }
226  inline bool end_of_word_from_edge_rec(const EDGE_RECORD &edge_rec) const {
227  return (edge_rec & (WERD_END_FLAG << flag_start_bit_)) != 0;
228  }
230  inline UNICHAR_ID unichar_id_from_edge_rec(
231  const EDGE_RECORD &edge_rec) const {
232  return ((edge_rec & letter_mask_) >> LETTER_START_BIT);
233  }
236  EDGE_RECORD *edge_rec, EDGE_REF value) {
237  *edge_rec &= (~next_node_mask_);
238  *edge_rec |= ((value << next_node_start_bit_) & next_node_mask_);
239  }
241  inline void set_marker_flag_in_edge_rec(EDGE_RECORD *edge_rec) {
242  *edge_rec |= (MARKER_FLAG << flag_start_bit_);
243  }
251  inline int given_greater_than_edge_rec(NODE_REF next_node,
252  bool word_end,
253  UNICHAR_ID unichar_id,
254  const EDGE_RECORD &edge_rec) const {
255  UNICHAR_ID curr_unichar_id = unichar_id_from_edge_rec(edge_rec);
256  NODE_REF curr_next_node = next_node_from_edge_rec(edge_rec);
257  bool curr_word_end = end_of_word_from_edge_rec(edge_rec);
258  if (edge_rec_match(next_node, word_end, unichar_id, curr_next_node,
259  curr_word_end, curr_unichar_id)) return 0;
260  if (unichar_id > curr_unichar_id) return 1;
261  if (unichar_id == curr_unichar_id) {
262  if (next_node > curr_next_node) return 1;
263  if (next_node == curr_next_node) {
264  if (word_end > curr_word_end) return 1;
265  }
266  }
267  return -1;
268  }
272  inline bool edge_rec_match(NODE_REF next_node,
273  bool word_end,
274  UNICHAR_ID unichar_id,
275  NODE_REF other_next_node,
276  bool other_word_end,
277  UNICHAR_ID other_unichar_id) const {
278  return ((unichar_id == other_unichar_id) &&
279  (next_node == NO_EDGE || next_node == other_next_node) &&
280  (!word_end || (word_end == other_word_end)));
281  }
282 
285  void init(int unicharset_size);
286 
292  bool match_words(WERD_CHOICE *word, int32_t index,
293  NODE_REF node, UNICHAR_ID wildcard) const;
294 
295  // Recursively iterate over all words in a dawg (see public iterate_words).
296  void iterate_words_rec(const WERD_CHOICE &word_so_far,
297  NODE_REF to_explore,
299 
300  // Member Variables.
304  PermuterType perm_;
305  // Variables to construct various edge masks. Formerly:
306  // #define NEXT_EDGE_MASK (int64_t) 0xfffffff800000000i64
307  // #define FLAGS_MASK (int64_t) 0x0000000700000000i64
308  // #define LETTER_MASK (int64_t) 0x00000000ffffffffi64
312  uint64_t next_node_mask_;
313  uint64_t flags_mask_;
314  uint64_t letter_mask_;
315  // Level of debug statements to print to stdout.
317 };
318 
319 //
320 // DawgPosition keeps track of where we are in the primary dawg we're searching
321 // as well as where we may be in the "punctuation dawg" which may provide
322 // surrounding context.
323 //
324 // Example:
325 // punctuation dawg -- space is the "pattern character"
326 // " " // no punctuation
327 // "' '" // leading and trailing apostrophes
328 // " '" // trailing apostrophe
329 // word dawg:
330 // "cat"
331 // "cab"
332 // "cat's"
333 //
334 // DawgPosition(dawg_index, dawg_ref, punc_index, punc_ref, rtp)
335 //
336 // DawgPosition(-1, NO_EDGE, p, pe, false)
337 // We're in the punctuation dawg, no other dawg has been started.
338 // (1) If there's a pattern edge as a punc dawg child of us,
339 // for each punc-following dawg starting with ch, produce:
340 // Result: DawgPosition(k, w, p', false)
341 // (2) If there's a valid continuation in the punc dawg, produce:
342 // Result: DawgPosition(-k, NO_EDGE, p', false)
343 //
344 // DawgPosition(k, w, -1, NO_EDGE, false)
345 // We're in dawg k. Going back to punctuation dawg is not an option.
346 // Follow ch in dawg k.
347 //
348 // DawgPosition(k, w, p, pe, false)
349 // We're in dawg k. Continue in dawg k and/or go back to the punc dawg.
350 // If ending, check that the punctuation dawg is also ok to end here.
351 //
352 // DawgPosition(k, w, p, pe true)
353 // We're back in the punctuation dawg. Continuing there is the only option.
354 struct DawgPosition {
356  : dawg_index(-1), dawg_ref(NO_EDGE), punc_ref(NO_EDGE),
357  back_to_punc(false) {}
358  DawgPosition(int dawg_idx, EDGE_REF dawgref,
359  int punc_idx, EDGE_REF puncref,
360  bool backtopunc)
361  : dawg_index(dawg_idx), dawg_ref(dawgref),
362  punc_index(punc_idx), punc_ref(puncref),
363  back_to_punc(backtopunc) {
364  }
365  bool operator==(const DawgPosition &other) {
366  return dawg_index == other.dawg_index &&
367  dawg_ref == other.dawg_ref &&
368  punc_index == other.punc_index &&
369  punc_ref == other.punc_ref &&
370  back_to_punc == other.back_to_punc;
371  }
372 
373  int8_t dawg_index;
374  EDGE_REF dawg_ref;
375  int8_t punc_index;
376  EDGE_REF punc_ref;
377  // Have we returned to the punc dawg at the end of the word?
379 };
380 
381 class DawgPositionVector : public GenericVector<DawgPosition> {
382  public:
385  void clear() { size_used_ = 0; }
389  inline bool add_unique(const DawgPosition &new_pos,
390  bool debug,
391  const char *debug_msg) {
392  for (int i = 0; i < size_used_; ++i) {
393  if (data_[i] == new_pos) return false;
394  }
395  push_back(new_pos);
396  if (debug) {
397  tprintf("%s[%d, " REFFORMAT "] [punc: " REFFORMAT "%s]\n",
398  debug_msg, new_pos.dawg_index, new_pos.dawg_ref,
399  new_pos.punc_ref, new_pos.back_to_punc ? " returned" : "");
400  }
401  return true;
402  }
403 };
404 
405 //
412 //
413 class SquishedDawg : public Dawg {
414  public:
415  SquishedDawg(DawgType type, const STRING &lang, PermuterType perm,
416  int debug_level)
417  : Dawg(type, lang, perm, debug_level) {}
418  SquishedDawg(const char *filename, DawgType type, const STRING &lang,
419  PermuterType perm, int debug_level)
420  : Dawg(type, lang, perm, debug_level) {
421  TFile file;
422  ASSERT_HOST(file.Open(filename, nullptr));
423  ASSERT_HOST(read_squished_dawg(&file));
424  num_forward_edges_in_node0 = num_forward_edges(0);
425  }
426  SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type,
427  const STRING &lang, PermuterType perm, int unicharset_size,
428  int debug_level)
429  : Dawg(type, lang, perm, debug_level),
430  edges_(edges),
431  num_edges_(num_edges) {
432  init(unicharset_size);
433  num_forward_edges_in_node0 = num_forward_edges(0);
434  if (debug_level > 3) print_all("SquishedDawg:");
435  }
436  virtual ~SquishedDawg();
437 
438  // Loads using the given TFile. Returns false on failure.
439  bool Load(TFile *fp) {
440  if (!read_squished_dawg(fp)) return false;
441  num_forward_edges_in_node0 = num_forward_edges(0);
442  return true;
443  }
444 
445  int NumEdges() { return num_edges_; }
446 
448  EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id,
449  bool word_end) const;
450 
453  void unichar_ids_of(NODE_REF node, NodeChildVector *vec,
454  bool word_end) const {
455  EDGE_REF edge = node;
456  if (!edge_occupied(edge) || edge == NO_EDGE) return;
457  assert(forward_edge(edge)); // we don't expect any backward edges to
458  do { // be present when this function is called
459  if (!word_end || end_of_word_from_edge_rec(edges_[edge])) {
460  vec->push_back(NodeChild(unichar_id_from_edge_rec(edges_[edge]), edge));
461  }
462  } while (!last_edge(edge++));
463  }
464 
467  NODE_REF next_node(EDGE_REF edge) const {
468  return next_node_from_edge_rec((edges_[edge]));
469  }
470 
473  bool end_of_word(EDGE_REF edge_ref) const {
474  return end_of_word_from_edge_rec((edges_[edge_ref]));
475  }
476 
478  UNICHAR_ID edge_letter(EDGE_REF edge_ref) const {
479  return unichar_id_from_edge_rec((edges_[edge_ref]));
480  }
481 
484  void print_node(NODE_REF node, int max_num_edges) const;
485 
487  bool write_squished_dawg(TFile *file);
488 
491  bool write_squished_dawg(const char *filename) {
492  TFile file;
493  file.OpenWrite(nullptr);
494  if (!this->write_squished_dawg(&file)) {
495  tprintf("Error serializing %s\n", filename);
496  return false;
497  }
498  if (!file.CloseWrite(filename, nullptr)) {
499  tprintf("Error writing file %s\n", filename);
500  return false;
501  }
502  return true;
503  }
504 
505  private:
507  inline void set_next_node(EDGE_REF edge_ref, EDGE_REF value) {
508  set_next_node_in_edge_rec(&(edges_[edge_ref]), value);
509  }
511  inline void set_empty_edge(EDGE_REF edge_ref) {
512  (edges_[edge_ref] = next_node_mask_);
513  }
515  inline void clear_all_edges() {
516  for (int edge = 0; edge < num_edges_; edge++) set_empty_edge(edge);
517  }
519  inline void clear_marker_flag(EDGE_REF edge_ref) {
520  (edges_[edge_ref] &= ~(MARKER_FLAG << flag_start_bit_));
521  }
523  inline bool forward_edge(EDGE_REF edge_ref) const {
524  return (edge_occupied(edge_ref) &&
525  (FORWARD_EDGE == direction_from_edge_rec(edges_[edge_ref])));
526  }
528  inline bool backward_edge(EDGE_REF edge_ref) const {
529  return (edge_occupied(edge_ref) &&
530  (BACKWARD_EDGE == direction_from_edge_rec(edges_[edge_ref])));
531  }
533  inline bool edge_occupied(EDGE_REF edge_ref) const {
534  return (edges_[edge_ref] != next_node_mask_);
535  }
537  inline bool last_edge(EDGE_REF edge_ref) const {
538  return (edges_[edge_ref] & (MARKER_FLAG << flag_start_bit_)) != 0;
539  }
540 
542  int32_t num_forward_edges(NODE_REF node) const;
543 
545  bool read_squished_dawg(TFile *file);
546 
548  void print_edge(EDGE_REF edge) const;
549 
551  void print_all(const char* msg) {
552  tprintf("\n__________________________\n%s\n", msg);
553  for (int i = 0; i < num_edges_; ++i) print_edge(i);
554  tprintf("__________________________\n");
555  }
557  std::unique_ptr<EDGE_REF[]> build_node_map(int32_t *num_nodes) const;
558 
559  // Member variables.
560  EDGE_ARRAY edges_;
561  int32_t num_edges_;
563 };
564 
565 } // namespace tesseract
566 
567 #endif // DICT_DAWG_H_
DawgPosition()
Definition: dawg.h:355
DawgType
Definition: dawg.h:72
bool forward_edge(EDGE_REF edge_ref) const
Returns true if this edge is in the forward direction.
Definition: dawg.h:523
Definition: dawg.h:61
int32_t num_edges_
Definition: dawg.h:561
bool Open(const STRING &filename, FileReader reader)
Definition: serialis.cpp:196
void set_marker_flag_in_edge_rec(EDGE_RECORD *edge_rec)
Sets this edge record to be the last one in a sequence of edges.
Definition: dawg.h:241
EDGE_REF edge_ref
Definition: dawg.h:63
Definition: dawg.h:119
Definition: dawg.h:76
void set_next_node(EDGE_REF edge_ref, EDGE_REF value)
Sets the next node link for this edge.
Definition: dawg.h:507
int unicharset_size_
Definition: dawg.h:309
SquishedDawg(const char *filename, DawgType type, const STRING &lang, PermuterType perm, int debug_level)
Definition: dawg.h:418
uint64_t flags_mask_
Definition: dawg.h:313
NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the next node visited by following this edge.
Definition: dawg.h:213
int push_back(T object)
Definition: genericvector.h:799
Definition: unicharset.h:146
static const bool kDawgSuccessors[DAWG_TYPE_COUNT][DAWG_TYPE_COUNT]
Definition: dawg.h:95
NodeChild(UNICHAR_ID id, EDGE_REF ref)
Definition: dawg.h:64
uint64_t next_node_mask_
Definition: dawg.h:312
void print_all(const char *msg)
Prints the contents of the SquishedDawg.
Definition: dawg.h:551
Definition: boxword.h:31
bool backward_edge(EDGE_REF edge_ref) const
Returns true if this edge is in the backward direction.
Definition: dawg.h:528
Definition: serialis.h:77
DawgType type() const
Definition: dawg.h:128
UNICHAR_ID unichar_id
Definition: dawg.h:62
bool edge_rec_match(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, NODE_REF other_next_node, bool other_word_end, UNICHAR_ID other_unichar_id) const
Definition: dawg.h:272
bool write_squished_dawg(const char *filename)
Definition: dawg.h:491
int given_greater_than_edge_rec(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, const EDGE_RECORD &edge_rec) const
Definition: dawg.h:251
PermuterType permuter() const
Definition: dawg.h:130
int8_t punc_index
Definition: dawg.h:375
Definition: baseapi.cpp:94
UNICHAR_ID unichar_id_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns UNICHAR_ID recorded in this edge.
Definition: dawg.h:230
virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id, const UNICHARSET &unicharset, GenericVector< UNICHAR_ID > *vec) const
Definition: dawg.h:185
Definition: ratngs.h:273
bool edge_occupied(EDGE_REF edge_ref) const
Returns true if the edge spot in this location is occupied.
Definition: dawg.h:533
const STRING & lang() const
Definition: dawg.h:129
Definition: dawg.h:75
bool end_of_word(EDGE_REF edge_ref) const
Definition: dawg.h:473
UNICHAR_ID edge_letter(EDGE_REF edge_ref) const
Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF.
Definition: dawg.h:478
EDGE_ARRAY edges_
Definition: dawg.h:560
EDGE_REF dawg_ref
Definition: dawg.h:374
int debug_level_
Definition: dawg.h:316
bool operator==(const DawgPosition &other)
Definition: dawg.h:365
void clear()
Definition: dawg.h:385
Definition: dawg.h:381
STRING lang_
Definition: dawg.h:302
virtual EDGE_REF pattern_loop_edge(EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const
Definition: dawg.h:196
Definition: dawg.h:78
Definition: baseapi.h:37
int NumEdges()
Definition: dawg.h:445
uint64_t letter_mask_
Definition: dawg.h:314
Definition: strngs.h:45
bool CloseWrite(const STRING &filename, FileWriter writer)
Definition: serialis.cpp:310
int direction_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the direction flag of this edge.
Definition: dawg.h:221
Dawg(DawgType type, const STRING &lang, PermuterType perm, int debug_level)
Definition: dawg.h:205
void clear_all_edges()
Goes through all the edges and clears each one out.
Definition: dawg.h:515
Definition: dawg.h:413
void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const
Definition: dawg.h:453
int num_forward_edges_in_node0
Definition: dawg.h:562
NodeChild()
Definition: dawg.h:65
int8_t dawg_index
Definition: dawg.h:373
DawgType type_
Definition: dawg.h:301
int next_node_start_bit_
Definition: dawg.h:311
Definition: dawg.h:354
static const char kWildcard[]
Definition: dawg.h:102
Definition: dawg.h:74
bool back_to_punc
Definition: dawg.h:378
void set_empty_edge(EDGE_REF edge_ref)
Sets the edge to be empty.
Definition: dawg.h:511
bool last_edge(EDGE_REF edge_ref) const
Returns true if this edge is the last edge in a sequence.
Definition: dawg.h:537
bool end_of_word_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns true if this edge marks the end of a word.
Definition: dawg.h:226
SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type, const STRING &lang, PermuterType perm, int unicharset_size, int debug_level)
Definition: dawg.h:426
NODE_REF next_node(EDGE_REF edge) const
Definition: dawg.h:467
SquishedDawg(DawgType type, const STRING &lang, PermuterType perm, int debug_level)
Definition: dawg.h:415
EDGE_REF punc_ref
Definition: dawg.h:376
bool marker_flag_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the marker flag of this edge.
Definition: dawg.h:217
void clear_marker_flag(EDGE_REF edge_ref)
Clears the last flag of this edge.
Definition: dawg.h:519
PermuterType perm_
Permuter code that should be used if the word is found in this Dawg.
Definition: dawg.h:304
void set_next_node_in_edge_rec(EDGE_RECORD *edge_rec, EDGE_REF value)
Sets the next node link for this edge in the Dawg.
Definition: dawg.h:235
bool add_unique(const DawgPosition &new_pos, bool debug, const char *debug_msg)
Definition: dawg.h:389
bool Load(TFile *fp)
Definition: dawg.h:439
DawgPosition(int dawg_idx, EDGE_REF dawgref, int punc_idx, EDGE_REF puncref, bool backtopunc)
Definition: dawg.h:358
int flag_start_bit_
Definition: dawg.h:310
void OpenWrite(GenericVector< char > *data)
Definition: serialis.cpp:295