tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
indexmapbidi.h
1 // File: indexmapbidi.h
3 // Description: Bi-directional mapping between a sparse and compact space.
4 // Author: rays@google.com (Ray Smith)
5 // Created: Tue Apr 06 11:33:59 PDT 2010
6 //
7 // (C) Copyright 2010, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifndef TESSERACT_CCUTIL_INDEXMAPBIDI_H_
21 #define TESSERACT_CCUTIL_INDEXMAPBIDI_H_
22 
23 #include <cstdio>
24 #include "genericvector.h"
25 
26 namespace tesseract {
27 
28 class IndexMapBiDi;
29 
30 // Bidirectional one-to-one mapping between a sparse and a compact discrete
31 // space. Many entries in the sparse space are unmapped, but those that are
32 // mapped have a 1-1 mapping to (and from) the compact space, where all
33 // values are used. This is useful for forming subsets of larger collections,
34 // such as subsets of character sets, or subsets of binary feature spaces.
35 //
36 // This base class provides basic functionality with binary search for the
37 // SparseToCompact mapping to save memory.
38 // For a faster inverse mapping, or to allow a many-to-one mapping, use
39 // IndexMapBiDi below.
40 // NOTE: there are currently no methods to setup an IndexMap on its own!
41 // It must be initialized by copying from an IndexMapBiDi or by DeSerialize.
42 class IndexMap {
43  public:
44  virtual ~IndexMap();
45 
46  // SparseToCompact takes a sparse index to an index in the compact space.
47  // Uses a binary search to find the result. For faster speed use
48  // IndexMapBiDi, but that takes more memory.
49  virtual int SparseToCompact(int sparse_index) const;
50 
51  // CompactToSparse takes a compact index to the corresponding index in the
52  // sparse space.
53  int CompactToSparse(int compact_index) const {
54  return compact_map_[compact_index];
55  }
56  // The size of the sparse space.
57  virtual int SparseSize() const {
58  return sparse_size_;
59  }
60  // The size of the compact space.
61  int CompactSize() const {
62  return compact_map_.size();
63  }
64 
65  // Copy from the input.
66  void CopyFrom(const IndexMap& src);
67  void CopyFrom(const IndexMapBiDi& src);
68 
69  // Writes to the given file. Returns false in case of error.
70  bool Serialize(FILE* fp) const;
71  // Reads from the given file. Returns false in case of error.
72  // If swap is true, assumes a big/little-endian swap is needed.
73  bool DeSerialize(bool swap, FILE* fp);
74 
75  protected:
76  // The sparse space covers integers in the range [0, sparse_size_-1].
77  int32_t sparse_size_;
78  // The compact space covers integers in the range [0, compact_map_.size()-1].
79  // Each element contains the corresponding sparse index.
81 };
82 
83 // Bidirectional many-to-one mapping between a sparse and a compact discrete
84 // space. As with IndexMap, many entries may be unmapped, but unlike IndexMap,
85 // of those that are, many may be mapped to the same compact index.
86 // If the map is many-to-one, it is not possible to directly obtain all the
87 // sparse indices that map to a single compact index.
88 // This map is time- rather than space-efficient. It stores the entire sparse
89 // space.
90 // IndexMapBiDi may be initialized in one of 3 ways:
91 // 1. Init(size, true);
92 // Setup();
93 // Sets a complete 1:1 mapping with no unmapped elements.
94 // 2. Init(size, false);
95 // for ... SetMap(index, true);
96 // Setup();
97 // Specifies precisely which sparse indices are mapped. The mapping is 1:1.
98 // 3. Either of the above, followed by:
99 // for ... Merge(index1, index2);
100 // CompleteMerges();
101 // Allows a many-to-one mapping by merging compact space indices.
102 class IndexMapBiDi : public IndexMap {
103  public:
104  virtual ~IndexMapBiDi();
105 
106  // Top-level init function in a single call to initialize a map to select
107  // a single contiguous subrange [start, end) of the sparse space to be mapped
108  // 1 to 1 to the compact space, with all other elements of the sparse space
109  // left unmapped.
110  // No need to call Setup after this.
111  void InitAndSetupRange(int sparse_size, int start, int end);
112 
113  // Initializes just the sparse_map_ to the given size with either all
114  // forward indices mapped (all_mapped = true) or none (all_mapped = false).
115  // Call Setup immediately after, or make calls to SetMap first to adjust the
116  // mapping and then call Setup before using the map.
117  void Init(int size, bool all_mapped);
118  // Sets a given index in the sparse_map_ to be mapped or not.
119  void SetMap(int sparse_index, bool mapped);
120  // Sets up the sparse_map_ and compact_map_ properly after Init and
121  // some calls to SetMap. Assumes an ordered 1-1 map from set indices
122  // in the sparse space to the compact space.
123  void Setup();
124 
125  // Merges the two compact space indices. May be called many times, but
126  // the merges must be concluded by a call to CompleteMerges.
127  // Returns true if a merge was actually performed.
128  bool Merge(int compact_index1, int compact_index2);
129  // Returns true if the given compact index has been deleted.
130  bool IsCompactDeleted(int index) const {
131  return MasterCompactIndex(index) < 0;
132  }
133  // Completes one or more Merge operations by further compacting the
134  // compact space.
135  void CompleteMerges();
136 
137  // SparseToCompact takes a sparse index to an index in the compact space.
138  virtual int SparseToCompact(int sparse_index) const {
139  return sparse_map_[sparse_index];
140  }
141  // The size of the sparse space.
142  virtual int SparseSize() const {
143  return sparse_map_.size();
144  }
145 
146  // Copy from the input.
147  void CopyFrom(const IndexMapBiDi& src);
148 
149  // Writes to the given file. Returns false in case of error.
150  bool Serialize(FILE* fp) const;
151  // Reads from the given file. Returns false in case of error.
152  // If swap is true, assumes a big/little-endian swap is needed.
153  bool DeSerialize(bool swap, FILE* fp);
154 
155  // Bulk calls to SparseToCompact.
156  // Maps the given array of sparse indices to an array of compact indices.
157  // Assumes the input is sorted. The output indices are sorted and uniqued.
158  // Return value is the number of "missed" features, being features that
159  // don't map to the compact feature space.
160  int MapFeatures(const GenericVector<int>& sparse,
161  GenericVector<int>* compact) const;
162 
163  private:
164  // Returns the master compact index for a given compact index.
165  // During a multiple merge operation, several compact indices may be
166  // combined, so we need to be able to find the master of all.
167  int MasterCompactIndex(int compact_index) const {
168  while (compact_index >= 0 &&
169  sparse_map_[compact_map_[compact_index]] != compact_index)
170  compact_index = sparse_map_[compact_map_[compact_index]];
171  return compact_index;
172  }
173 
174  // Direct look-up of the compact index for each element in sparse space.
176 };
177 
178 } // namespace tesseract.
179 
180 #endif // TESSERACT_CCUTIL_INDEXMAPBIDI_H_
virtual int SparseSize() const
Definition: indexmapbidi.h:142
bool DeSerialize(bool swap, FILE *fp)
Definition: indexmapbidi.cpp:54
int CompactToSparse(int compact_index) const
Definition: indexmapbidi.h:53
bool Serialize(FILE *fp) const
Definition: indexmapbidi.cpp:48
void CopyFrom(const IndexMap &src)
Definition: indexmapbidi.cpp:38
bool IsCompactDeleted(int index) const
Definition: indexmapbidi.h:130
virtual int SparseSize() const
Definition: indexmapbidi.h:57
int MasterCompactIndex(int compact_index) const
Definition: indexmapbidi.h:167
Definition: baseapi.cpp:94
Definition: indexmapbidi.h:42
GenericVector< int32_t > sparse_map_
Definition: indexmapbidi.h:175
Definition: indexmapbidi.h:102
int size() const
Definition: genericvector.h:71
GenericVector< int32_t > compact_map_
Definition: indexmapbidi.h:80
virtual int SparseToCompact(int sparse_index) const
Definition: indexmapbidi.h:138
int CompactSize() const
Definition: indexmapbidi.h:61
int32_t sparse_size_
Definition: indexmapbidi.h:77
virtual int SparseToCompact(int sparse_index) const
Definition: indexmapbidi.cpp:32