tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
normalis.h
1 /**********************************************************************
2  * File: normalis.h (Formerly denorm.h)
3  * Description: Code for the DENORM class.
4  * Author: Ray Smith
5  * Created: Thu Apr 23 09:22:43 BST 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifndef NORMALIS_H
21 #define NORMALIS_H
22 
23 const int kBlnCellHeight = 256; // Full-height for baseline normalization.
24 const int kBlnXHeight = 128; // x-height for baseline normalization.
25 const int kBlnBaselineOffset = 64; // offset for baseline normalization.
26 
27 class BLOCK;
28 class FCOORD;
29 class TBOX;
30 class UNICHARSET;
31 
32 struct Pix;
33 struct TBLOB;
34 struct TPOINT;
35 
36 template <typename T> class GenericVector;
37 
38 namespace tesseract {
39 
40 // Possible normalization methods. Use NEGATIVE values as these also
41 // double up as markers for the last sub-classifier.
43  NM_BASELINE = -3, // The original BL normalization mode.
44  NM_CHAR_ISOTROPIC = -2, // Character normalization but isotropic.
45  NM_CHAR_ANISOTROPIC = -1 // The original CN normalization mode.
46 };
47 
48 } // namespace tesseract.
49 
50 class DENORM {
51  public:
52  DENORM();
53 
54  // Copying a DENORM is allowed.
55  DENORM(const DENORM &);
56  DENORM& operator=(const DENORM&);
57  ~DENORM();
58 
59  // Setup the normalization transformation parameters.
60  // The normalizations applied to a blob are as follows:
61  // 1. An optional block layout rotation that was applied during layout
62  // analysis to make the textlines horizontal.
63  // 2. A normalization transformation (LocalNormTransform):
64  // Subtract the "origin"
65  // Apply an x,y scaling.
66  // Apply an optional rotation.
67  // Add back a final translation.
68  // The origin is in the block-rotated space, and is usually something like
69  // the x-middle of the word at the baseline.
70  // 3. Zero or more further normalization transformations that are applied
71  // in sequence, with a similar pattern to the first normalization transform.
72  //
73  // A DENORM holds the parameters of a single normalization, and can execute
74  // both the LocalNormTransform (a forwards normalization), and the
75  // LocalDenormTransform which is an inverse transform or de-normalization.
76  // A DENORM may point to a predecessor DENORM, which is actually the earlier
77  // normalization, so the full normalization sequence involves executing all
78  // predecessors first and then the transform in "this".
79  // Let x be image co-ordinates and that we have normalization classes A, B, C
80  // where we first apply A then B then C to get normalized x':
81  // x' = CBAx
82  // Then the backwards (to original coordinates) would be:
83  // x = A^-1 B^-1 C^-1 x'
84  // and A = B->predecessor_ and B = C->predecessor_
85  // NormTransform executes all predecessors recursively, and then this.
86  // NormTransform would be used to transform an image-based feature to
87  // normalized space for use in a classifier
88  // DenormTransform inverts this and then all predecessors. It can be
89  // used to get back to the original image coordinates from normalized space.
90  // The LocalNormTransform member executes just the transformation
91  // in "this" without the layout rotation or any predecessors. It would be
92  // used to run each successive normalization, eg the word normalization,
93  // and later the character normalization.
94 
95  // Arguments:
96  // block: if not nullptr, then this is the first transformation, and
97  // block->re_rotation() needs to be used after the Denorm
98  // transformation to get back to the image coords.
99  // rotation: if not nullptr, apply this rotation after translation to the
100  // origin and scaling. (Usually a classify rotation.)
101  // predecessor: if not nullptr, then predecessor has been applied to the
102  // input space and needs to be undone to complete the inverse.
103  // The above pointers are not owned by this DENORM and are assumed to live
104  // longer than this denorm, except rotation, which is deep copied on input.
105  //
106  // x_origin: The x origin which will be mapped to final_xshift in the result.
107  // y_origin: The y origin which will be mapped to final_yshift in the result.
108  // Added to result of row->baseline(x) if not nullptr.
109  //
110  // x_scale: scale factor for the x-coordinate.
111  // y_scale: scale factor for the y-coordinate. Ignored if segs is given.
112  // Note that these scale factors apply to the same x and y system as the
113  // x-origin and y-origin apply, ie after any block rotation, but before
114  // the rotation argument is applied.
115  //
116  // final_xshift: The x component of the final translation.
117  // final_yshift: The y component of the final translation.
118  //
119  // In theory, any of the commonly used normalizations can be setup here:
120  // * Traditional baseline normalization on a word:
121  // SetupNormalization(block, nullptr, nullptr,
122  // box.x_middle(), baseline,
123  // kBlnXHeight / x_height, kBlnXHeight / x_height,
124  // 0, kBlnBaselineOffset);
125  // * "Numeric mode" baseline normalization on a word, in which the blobs
126  // are positioned with the bottom as the baseline is achieved by making
127  // a separate DENORM for each blob.
128  // SetupNormalization(block, nullptr, nullptr,
129  // box.x_middle(), box.bottom(),
130  // kBlnXHeight / x_height, kBlnXHeight / x_height,
131  // 0, kBlnBaselineOffset);
132  // * Anisotropic character normalization used by IntFx.
133  // SetupNormalization(nullptr, nullptr, denorm,
134  // centroid_x, centroid_y,
135  // 51.2 / ry, 51.2 / rx, 128, 128);
136  // * Normalize blob height to x-height (current OSD):
137  // SetupNormalization(nullptr, &rotation, nullptr,
138  // box.rotational_x_middle(rotation),
139  // box.rotational_y_middle(rotation),
140  // kBlnXHeight / box.rotational_height(rotation),
141  // kBlnXHeight / box.rotational_height(rotation),
142  // 0, kBlnBaselineOffset);
143  // * Secondary normalization for classification rotation (current):
144  // FCOORD rotation = block->classify_rotation();
145  // float target_height = kBlnXHeight / CCStruct::kXHeightCapRatio;
146  // SetupNormalization(nullptr, &rotation, denorm,
147  // box.rotational_x_middle(rotation),
148  // box.rotational_y_middle(rotation),
149  // target_height / box.rotational_height(rotation),
150  // target_height / box.rotational_height(rotation),
151  // 0, kBlnBaselineOffset);
152  // * Proposed new normalizations for CJK: Between them there is then
153  // no need for further normalization at all, and the character fills the cell.
154  // ** Replacement for baseline normalization on a word:
155  // Scales height and width independently so that modal height and pitch
156  // fill the cell respectively.
157  // float cap_height = x_height / CCStruct::kXHeightCapRatio;
158  // SetupNormalization(block, nullptr, nullptr,
159  // box.x_middle(), cap_height / 2.0f,
160  // kBlnCellHeight / fixed_pitch,
161  // kBlnCellHeight / cap_height,
162  // 0, 0);
163  // ** Secondary normalization for classification (with rotation) (proposed):
164  // Requires a simple translation to the center of the appropriate character
165  // cell, no further scaling and a simple rotation (or nothing) about the
166  // cell center.
167  // FCOORD rotation = block->classify_rotation();
168  // SetupNormalization(nullptr, &rotation, denorm,
169  // fixed_pitch_cell_center,
170  // 0.0f,
171  // 1.0f,
172  // 1.0f,
173  // 0, 0);
174  void SetupNormalization(const BLOCK* block,
175  const FCOORD* rotation,
176  const DENORM* predecessor,
177  float x_origin, float y_origin,
178  float x_scale, float y_scale,
179  float final_xshift, float final_yshift);
180 
181  // Sets up the DENORM to execute a non-linear transformation based on
182  // preserving an even distribution of stroke edges. The transformation
183  // operates only within the given box, scaling input coords within the box
184  // non-linearly to a box of target_width by target_height, with all other
185  // coords being clipped to the box edge. As with SetupNormalization above,
186  // final_xshift and final_yshift are applied after scaling, and the bottom-
187  // left of box is used as a pre-scaling origin.
188  // x_coords is a collection of the x-coords of vertical edges for each
189  // y-coord starting at box.bottom().
190  // y_coords is a collection of the y-coords of horizontal edges for each
191  // x-coord starting at box.left().
192  // Eg x_coords[0] is a collection of the x-coords of edges at y=bottom.
193  // Eg x_coords[1] is a collection of the x-coords of edges at y=bottom + 1.
194  // The second-level vectors must all be sorted in ascending order.
195  void SetupNonLinear(const DENORM* predecessor, const TBOX& box,
196  float target_width, float target_height,
197  float final_xshift, float final_yshift,
198  const GenericVector<GenericVector<int> >& x_coords,
199  const GenericVector<GenericVector<int> >& y_coords);
200 
201  // Transforms the given coords one step forward to normalized space, without
202  // using any block rotation or predecessor.
203  void LocalNormTransform(const TPOINT& pt, TPOINT* transformed) const;
204  void LocalNormTransform(const FCOORD& pt, FCOORD* transformed) const;
205  // Transforms the given coords forward to normalized space using the
206  // full transformation sequence defined by the block rotation, the
207  // predecessors, deepest first, and finally this. If first_norm is not nullptr,
208  // then the first and deepest transformation used is first_norm, ending
209  // with this, and the block rotation will not be applied.
210  void NormTransform(const DENORM* first_norm, const TPOINT& pt,
211  TPOINT* transformed) const;
212  void NormTransform(const DENORM* first_norm, const FCOORD& pt,
213  FCOORD* transformed) const;
214  // Transforms the given coords one step back to source space, without
215  // using to any block rotation or predecessor.
216  void LocalDenormTransform(const TPOINT& pt, TPOINT* original) const;
217  void LocalDenormTransform(const FCOORD& pt, FCOORD* original) const;
218  // Transforms the given coords all the way back to source image space using
219  // the full transformation sequence defined by this and its predecessors
220  // recursively, shallowest first, and finally any block re_rotation.
221  // If last_denorm is not nullptr, then the last transformation used will
222  // be last_denorm, and the block re_rotation will never be executed.
223  void DenormTransform(const DENORM* last_denorm, const TPOINT& pt,
224  TPOINT* original) const;
225  void DenormTransform(const DENORM* last_denorm, const FCOORD& pt,
226  FCOORD* original) const;
227 
228  // Normalize a blob using blob transformations. Less accurate, but
229  // more accurately copies the old way.
230  void LocalNormBlob(TBLOB* blob) const;
231 
232  // Fills in the x-height range accepted by the given unichar_id in blob
233  // coordinates, given its bounding box in the usual baseline-normalized
234  // coordinates, with some initial crude x-height estimate (such as word
235  // size) and this denoting the transformation that was used.
236  // Also returns the amount the character must have shifted up or down.
237  void XHeightRange(int unichar_id, const UNICHARSET& unicharset,
238  const TBOX& bbox,
239  float* min_xht,
240  float* max_xht,
241  float* yshift) const;
242 
243  // Prints the content of the DENORM for debug purposes.
244  void Print() const;
245 
246  Pix* pix() const {
247  return pix_;
248  }
249  void set_pix(Pix* pix) {
250  pix_ = pix;
251  }
252  bool inverse() const {
253  return inverse_;
254  }
255  void set_inverse(bool value) {
256  inverse_ = value;
257  }
258  const DENORM* RootDenorm() const {
259  if (predecessor_ != nullptr)
260  return predecessor_->RootDenorm();
261  return this;
262  }
263  const DENORM* predecessor() const {
264  return predecessor_;
265  }
266  // Accessors - perhaps should not be needed.
267  float x_scale() const {
268  return x_scale_;
269  }
270  float y_scale() const {
271  return y_scale_;
272  }
273  const BLOCK* block() const {
274  return block_;
275  }
276  void set_block(const BLOCK* block) {
277  block_ = block;
278  }
279 
280  private:
281  // Free allocated memory and clear pointers.
282  void Clear();
283  // Setup default values.
284  void Init();
285 
286  // Best available image.
287  Pix* pix_;
288  // True if the source image is white-on-black.
289  bool inverse_;
290  // Block the word came from. If not null, block->re_rotation() takes the
291  // "untransformed" coordinates even further back to the original image.
292  // Used only on the first DENORM in a chain.
293  const BLOCK* block_;
294  // Rotation to apply between translation to the origin and scaling.
296  // Previous transformation in a chain.
298  // Non-linear transformation maps directly from each integer offset from the
299  // origin to the corresponding x-coord. Owned by the DENORM.
301  // Non-linear transformation maps directly from each integer offset from the
302  // origin to the corresponding y-coord. Owned by the DENORM.
304  // x-coordinate to be mapped to final_xshift_ in the result.
305  float x_origin_;
306  // y-coordinate to be mapped to final_yshift_ in the result.
307  float y_origin_;
308  // Scale factors for x and y coords. Applied to pre-rotation system.
309  float x_scale_;
310  float y_scale_;
311  // Destination coords of the x_origin_ and y_origin_.
314 };
315 
316 #endif
Definition: normalis.h:43
float final_yshift_
Definition: normalis.h:313
float x_origin_
Definition: normalis.h:305
float y_origin_
Definition: normalis.h:307
Pix * pix() const
Definition: normalis.h:246
Definition: rect.h:34
Definition: unicharset.h:146
float y_scale_
Definition: normalis.h:310
GenericVector< float > * x_map_
Definition: normalis.h:300
const DENORM * predecessor() const
Definition: normalis.h:263
Definition: baseapi.cpp:94
float x_scale_
Definition: normalis.h:309
Definition: normalis.h:45
const DENORM * RootDenorm() const
Definition: normalis.h:258
float final_xshift_
Definition: normalis.h:312
void set_pix(Pix *pix)
Definition: normalis.h:249
Definition: ocrblock.h:30
float y_scale() const
Definition: normalis.h:270
GenericVector< float > * y_map_
Definition: normalis.h:303
Pix * pix_
Definition: normalis.h:287
Definition: baseapi.h:37
const BLOCK * block() const
Definition: normalis.h:273
void set_inverse(bool value)
Definition: normalis.h:255
bool inverse() const
Definition: normalis.h:252
Definition: normalis.h:50
NormalizationMode
Definition: normalis.h:42
const DENORM * predecessor_
Definition: normalis.h:297
void set_block(const BLOCK *block)
Definition: normalis.h:276
float x_scale() const
Definition: normalis.h:267
Definition: normalis.h:44
Definition: blobs.h:268
const FCOORD * rotation_
Definition: normalis.h:295
bool inverse_
Definition: normalis.h:289
Definition: blobs.h:57
Definition: points.h:189
const BLOCK * block_
Definition: normalis.h:293