tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
intfeaturemap.h
1 // Copyright 2010 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
4 // File: intfeaturemap.h
5 // Description: Encapsulation of IntFeatureSpace with IndexMapBiDi
6 // to provide a subspace mapping and fast feature lookup.
7 // Created: Tue Oct 26 08:58:30 PDT 2010
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
20 
21 #ifndef TESSERACT_CLASSIFY_INTFEATUREMAP_H_
22 #define TESSERACT_CLASSIFY_INTFEATUREMAP_H_
23 
24 #include "intfeaturespace.h"
25 #include "indexmapbidi.h"
26 #include "intproto.h"
27 
28 namespace tesseract {
29 
30 class SampleIterator;
31 
32 // Number of positive and negative offset maps.
33 static const int kNumOffsetMaps = 2;
34 
35 // Class to map a feature space defined by INT_FEATURE_STRUCT to a compact
36 // down-sampled subspace of actually used features.
37 // The IntFeatureMap copes with 2 stages of transformation:
38 // The first step is down-sampling (re-quantization) and converting to a
39 // single index value from the 3-D input:
40 // INT_FEATURE_STRUCT <-> index feature (via IntFeatureSpace) and
41 // the second is a feature-space compaction to map only the feature indices
42 // that are actually used. This saves space in classifiers that are built
43 // using the mapped feature space.
44 // index (sparse) feature <-> map (compact) feature via IndexMapBiDi.
45 // Although the transformations are reversible, the inverses are lossy and do
46 // not return the exact input INT_FEATURE_STRUCT, due to the many->one nature
47 // of both transformations.
49  public:
50  IntFeatureMap();
52 
53  // Accessors.
54  int sparse_size() const {
55  return feature_space_.Size();
56  }
57  int compact_size() const {
58  return compact_size_;
59  }
60  const IntFeatureSpace& feature_space() const {
61  return feature_space_;
62  }
63  const IndexMapBiDi& feature_map() const {
64  return feature_map_;
65  }
66 
67  // Pseudo-accessors.
68  int IndexFeature(const INT_FEATURE_STRUCT& f) const;
69  int MapFeature(const INT_FEATURE_STRUCT& f) const;
70  int MapIndexFeature(int index_feature) const;
71  INT_FEATURE_STRUCT InverseIndexFeature(int index_feature) const;
72  INT_FEATURE_STRUCT InverseMapFeature(int map_feature) const;
73  void DeleteMapFeature(int map_feature);
74  bool IsMapFeatureDeleted(int map_feature) const;
75 
76  // Copies the given feature_space and uses it as the index feature map
77  // from INT_FEATURE_STRUCT.
78  void Init(const IntFeatureSpace& feature_space);
79 
80  // Helper to return an offset index feature. In this context an offset
81  // feature with a dir of +/-1 is a feature of a similar direction,
82  // but shifted perpendicular to the direction of the feature. An offset
83  // feature with a dir of +/-2 is feature at the same position, but rotated
84  // by +/- one [compact] quantum. Returns the index of the generated offset
85  // feature, or -1 if it doesn't exist. Dir should be in
86  // [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction.
87  // A dir of 0 is an identity transformation.
88  // Both input and output are from the index(sparse) feature space, not
89  // the mapped/compact feature space, but the offset feature is the minimum
90  // distance moved from the input to guarantee that it maps to the next
91  // available quantum in the mapped/compact space.
92  int OffsetFeature(int index_feature, int dir) const;
93 
94  // Computes the features used by the subset of samples defined by
95  // the iterator and sets up the feature mapping.
96  // Returns the size of the compacted feature space.
98 
99  // After deleting some features, finish setting up the mapping, and map
100  // all the samples. Returns the size of the compacted feature space.
102 
103  // Indexes the given array of features to a vector of sorted indices.
105  int num_features,
106  GenericVector<int>* sorted_features) const {
107  feature_space_.IndexAndSortFeatures(features, num_features,
108  sorted_features);
109  }
110  // Maps the given array of index/sparse features to an array of map/compact
111  // features.
112  // Assumes the input is sorted. The output indices are sorted and uniqued.
113  // Returns the number of "missed" features, being features that
114  // don't map to the compact feature space.
115  int MapIndexedFeatures(const GenericVector<int>& index_features,
116  GenericVector<int>* map_features) const {
117  return feature_map_.MapFeatures(index_features, map_features);
118  }
119 
120  // Prints the map features from the set in human-readable form.
121  void DebugMapFeatures(const GenericVector<int>& map_features) const;
122 
123  private:
124  void Clear();
125 
126  // Helper to compute an offset index feature. In this context an offset
127  // feature with a dir of +/-1 is a feature of a similar direction,
128  // but shifted perpendicular to the direction of the feature. An offset
129  // feature with a dir of +/-2 is feature at the same position, but rotated
130  // by +/- one [compact] quantum. Returns the index of the generated offset
131  // feature, or -1 if it doesn't exist. Dir should be in
132  // [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction.
133  // A dir of 0 is an identity transformation.
134  // Both input and output are from the index(sparse) feature space, not
135  // the mapped/compact feature space, but the offset feature is the minimum
136  // distance moved from the input to guarantee that it maps to the next
137  // available quantum in the mapped/compact space.
138  int ComputeOffsetFeature(int index_feature, int dir) const;
139 
140  // True if the mapping has changed since it was last finalized.
142  // Size of the compacted feature space, after unused features are removed.
144  // Feature space quantization definition and indexing from INT_FEATURE_STRUCT.
146  // Mapping from indexed feature space to the compacted space with unused
147  // features mapping to -1.
149  // Index tables to map a feature index to the corresponding feature after a
150  // shift perpendicular to the feature direction, or a rotation in place.
151  // An entry of -1 indicates that there is no corresponding feature.
152  // Array of arrays of size feature_space_.Size() owned by this class.
155 
156  // Don't use default copy and assign!
158  void operator=(const IntFeatureMap&);
159 };
160 
161 } // namespace tesseract.
162 
163 #endif // TESSERACT_CLASSIFY_INTFEATUREMAP_H_
Definition: intfeaturespace.h:38
int sparse_size() const
Definition: intfeaturemap.h:54
void DebugMapFeatures(const GenericVector< int > &map_features) const
Definition: intfeaturemap.cpp:172
IntFeatureMap()
Definition: intfeaturemap.cpp:34
int MapIndexedFeatures(const GenericVector< int > &index_features, GenericVector< int > *map_features) const
Definition: intfeaturemap.h:115
static const int kNumOffsetMaps
Definition: intfeaturemap.h:33
int compact_size_
Definition: intfeaturemap.h:143
int OffsetFeature(int index_feature, int dir) const
Definition: intfeaturemap.cpp:113
Definition: intfeaturemap.h:48
int * offset_minus_[kNumOffsetMaps]
Definition: intfeaturemap.h:154
Definition: baseapi.cpp:94
void operator=(const IntFeatureMap &)
void DeleteMapFeature(int map_feature)
Definition: intfeaturemap.cpp:63
int FindNZFeatureMapping(SampleIterator *it)
IndexMapBiDi feature_map_
Definition: intfeaturemap.h:148
int ComputeOffsetFeature(int index_feature, int dir) const
Definition: intfeaturemap.cpp:201
INT_FEATURE_STRUCT InverseMapFeature(int map_feature) const
Definition: intfeaturemap.cpp:59
void IndexAndSortFeatures(const INT_FEATURE_STRUCT *features, int num_features, GenericVector< int > *sorted_features) const
Definition: intfeaturespace.cpp:67
const IntFeatureSpace & feature_space() const
Definition: intfeaturemap.h:60
int IndexFeature(const INT_FEATURE_STRUCT &f) const
Definition: intfeaturemap.cpp:47
int compact_size() const
Definition: intfeaturemap.h:57
int * offset_plus_[kNumOffsetMaps]
Definition: intfeaturemap.h:153
const IndexMapBiDi & feature_map() const
Definition: intfeaturemap.h:63
Definition: indexmapbidi.h:102
void IndexAndSortFeatures(const INT_FEATURE_STRUCT *features, int num_features, GenericVector< int > *sorted_features) const
Definition: intfeaturemap.h:104
int Size() const
Definition: intfeaturespace.h:51
int MapFeature(const INT_FEATURE_STRUCT &f) const
Definition: intfeaturemap.cpp:50
int MapIndexFeature(int index_feature) const
Definition: intfeaturemap.cpp:53
int MapFeatures(const GenericVector< int > &sparse, GenericVector< int > *compact) const
Definition: indexmapbidi.cpp:236
IntFeatureSpace feature_space_
Definition: intfeaturemap.h:145
Definition: intproto.h:132
bool mapping_changed_
Definition: intfeaturemap.h:141
int FinalizeMapping(SampleIterator *it)
Definition: intfeaturemap.cpp:159
Definition: sampleiterator.h:92
void Clear()
Definition: intfeaturemap.cpp:180
INT_FEATURE_STRUCT InverseIndexFeature(int index_feature) const
Definition: intfeaturemap.cpp:56
void Init(const IntFeatureSpace &feature_space)
Definition: intfeaturemap.cpp:73
~IntFeatureMap()
Definition: intfeaturemap.cpp:42
bool IsMapFeatureDeleted(int map_feature) const
Definition: intfeaturemap.cpp:67