tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
tabvector.h
1 // File: tabvector.h
3 // Description: Class to hold a near-vertical vector representing a tab-stop.
4 // Author: Ray Smith
5 // Created: Thu Apr 10 16:25:01 PST 2008
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifndef TESSERACT_TEXTORD_TABVECTOR_H_
21 #define TESSERACT_TEXTORD_TABVECTOR_H_
22 
23 #include "blobgrid.h"
24 #include "clst.h"
25 #include "elst.h"
26 #include "elst2.h"
27 #include "rect.h"
28 #include "bbgrid.h"
29 
30 #include <algorithm>
31 
32 class BLOBNBOX;
33 class ScrollView;
34 
35 namespace tesseract {
36 
37 
38 extern double_VAR_H(textord_tabvector_vertical_gap_fraction, 0.5,
39  "Max fraction of mean blob width allowed for vertical gaps in vertical text");
40 extern double_VAR_H(textord_tabvector_vertical_box_ratio, 0.5,
41  "Fraction of box matches required to declare a line vertical");
42 
43 // The alignment type that a tab vector represents.
44 // Keep this enum synced with kAlignmentNames in tabvector.cpp.
53 };
54 
55 // Forward declarations. The classes use their own list types, so we
56 // need to make the list types first.
57 class TabFind;
58 class TabVector;
59 class TabConstraint;
60 
61 ELIST2IZEH(TabVector)
62 CLISTIZEH(TabVector)
63 ELISTIZEH(TabConstraint)
64 
65 // TabConstraint is a totally self-contained class to maintain
66 // a list of [min,max] constraints, each referring to a TabVector.
67 // The constraints are manipulated through static methods that act
68 // on a list of constraints. The list itself is cooperatively owned
69 // by the TabVectors of the constraints on the list and managed
70 // by implicit reference counting via the elements of the list.
71 class TabConstraint : public ELIST_LINK {
72  public:
73  // This empty constructor is here only so that the class can be ELISTIZED.
74  // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
75  // and eliminate CLASSNAME##_copier.
76  TabConstraint() = default;
77 
78  // Create a constraint for the top or bottom of this TabVector.
79  static void CreateConstraint(TabVector* vector, bool is_top);
80 
81  // Test to see if the constraints are compatible enough to merge.
82  static bool CompatibleConstraints(TabConstraint_LIST* list1,
83  TabConstraint_LIST* list2);
84 
85  // Merge the lists of constraints and update the TabVector pointers.
86  // The second list is deleted.
87  static void MergeConstraints(TabConstraint_LIST* list1,
88  TabConstraint_LIST* list2);
89 
90  // Set all the tops and bottoms as appropriate to a mean of the
91  // constrained range. Delete all the constraints and list.
92  static void ApplyConstraints(TabConstraint_LIST* constraints);
93 
94  private:
95  TabConstraint(TabVector* vector, bool is_top);
96 
97  // Get the max of the mins and the min of the maxes.
98  static void GetConstraints(TabConstraint_LIST* constraints,
99  int* y_min, int* y_max);
100 
101  // The TabVector this constraint applies to.
103  // If true then we refer to the top of the vector_.
104  bool is_top_;
105  // The allowed range of this vector_.
106  int y_min_;
107  int y_max_;
108 };
109 
110 // Class to hold information about a single vector
111 // that represents a tab stop or a rule line.
112 class TabVector : public ELIST2_LINK {
113  public:
114  // TODO(rays) fix this in elst.h line 1076, where it should use the
115  // copy constructor instead of operator=.
116  TabVector() = default;
117  ~TabVector() = default;
118 
119  // Public factory to build a TabVector from a list of boxes.
120  // The TabVector will be of the given alignment type.
121  // The input vertical vector is used in fitting, and the output
122  // vertical_x, vertical_y have the resulting line vector added to them
123  // if the alignment is not ragged.
124  // The extended_start_y and extended_end_y are the maximum possible
125  // extension to the line segment that can be used to align with others.
126  // The input CLIST of BLOBNBOX good_points is consumed and taken over.
127  static TabVector* FitVector(TabAlignment alignment, ICOORD vertical,
128  int extended_start_y, int extended_end_y,
129  BLOBNBOX_CLIST* good_points,
130  int* vertical_x, int* vertical_y);
131 
132  // Build a ragged TabVector by copying another's direction, shifting it
133  // to match the given blob, and making its initial extent the height
134  // of the blob, but its extended bounds from the bounds of the original.
135  TabVector(const TabVector& src, TabAlignment alignment,
136  const ICOORD& vertical_skew, BLOBNBOX* blob);
137 
138  // Copies basic attributes of a tab vector for simple operations.
139  // Copies things such startpt, endpt, range, width.
140  // Does not copy things such as partners, boxes, or constraints.
141  // This is useful if you only need vector information for processing, such
142  // as in the table detection code.
143  TabVector* ShallowCopy() const;
144 
145  // Simple accessors.
146  const ICOORD& startpt() const {
147  return startpt_;
148  }
149  const ICOORD& endpt() const {
150  return endpt_;
151  }
152  int extended_ymax() const {
153  return extended_ymax_;
154  }
155  int extended_ymin() const {
156  return extended_ymin_;
157  }
158  int sort_key() const {
159  return sort_key_;
160  }
161  int mean_width() const {
162  return mean_width_;
163  }
164  void set_top_constraints(TabConstraint_LIST* constraints) {
165  top_constraints_ = constraints;
166  }
167  void set_bottom_constraints(TabConstraint_LIST* constraints) {
168  bottom_constraints_ = constraints;
169  }
170  TabVector_CLIST* partners() {
171  return &partners_;
172  }
173  void set_startpt(const ICOORD& start) {
174  startpt_ = start;
175  }
176  void set_endpt(const ICOORD& end) {
177  endpt_ = end;
178  }
179  bool intersects_other_lines() const {
181  }
182  void set_intersects_other_lines(bool value) {
183  intersects_other_lines_ = value;
184  }
185 
186  // Inline quasi-accessors that require some computation.
187 
188  // Compute the x coordinate at the given y coordinate.
189  int XAtY(int y) const {
190  int height = endpt_.y() - startpt_.y();
191  if (height != 0)
192  return (y - startpt_.y()) * (endpt_.x() - startpt_.x()) / height +
193  startpt_.x();
194  else
195  return startpt_.x();
196  }
197 
198  // Compute the vertical overlap with the other TabVector.
199  int VOverlap(const TabVector& other) const {
200  return std::min(other.endpt_.y(), endpt_.y()) -
201  std::max(other.startpt_.y(), startpt_.y());
202  }
203  // Compute the vertical overlap with the given y bounds.
204  int VOverlap(int top_y, int bottom_y) const {
205  return std::min(top_y, static_cast<int>(endpt_.y())) - std::max(bottom_y, static_cast<int>(startpt_.y()));
206  }
207  // Compute the extended vertical overlap with the given y bounds.
208  int ExtendedOverlap(int top_y, int bottom_y) const {
209  return std::min(top_y, extended_ymax_) - std::max(bottom_y, extended_ymin_);
210  }
211 
212  // Return true if this is a left tab stop, either aligned, or ragged.
213  bool IsLeftTab() const {
215  }
216  // Return true if this is a right tab stop, either aligned, or ragged.
217  bool IsRightTab() const {
219  }
220  // Return true if this is a separator.
221  bool IsSeparator() const {
222  return alignment_ == TA_SEPARATOR;
223  }
224  // Return true if this is a center aligned tab stop.
225  bool IsCenterTab() const {
227  }
228  // Return true if this is a ragged tab top, either left or right.
229  bool IsRagged() const {
231  }
232 
233  // Return true if this vector is to the left of the other in terms
234  // of sort_key_.
235  bool IsLeftOf(const TabVector& other) const {
236  return sort_key_ < other.sort_key_;
237  }
238 
239  // Return true if the vector has no partners.
240  bool Partnerless() {
241  return partners_.empty();
242  }
243 
244  // Return the number of tab boxes in this vector.
245  int BoxCount() {
246  return boxes_.length();
247  }
248 
249  // Lock the vector from refits by clearing the boxes_ list.
250  void Freeze() {
251  boxes_.shallow_clear();
252  }
253 
254  // Flip x and y on the ends so a vector can be created from flipped input.
255  void XYFlip() {
256  int x = startpt_.y();
258  startpt_.set_x(x);
259  x = endpt_.y();
260  endpt_.set_y(endpt_.x());
261  endpt_.set_x(x);
262  }
263 
264  // Reflect the tab vector in the y-axis.
265  void ReflectInYAxis() {
266  startpt_.set_x(-startpt_.x());
267  endpt_.set_x(-endpt_.x());
268  sort_key_ = -sort_key_;
271  else if (alignment_ == TA_RIGHT_ALIGNED)
273  if (alignment_ == TA_LEFT_RAGGED)
275  else if (alignment_ == TA_RIGHT_RAGGED)
277  }
278 
279  // Separate function to compute the sort key for a given coordinate pair.
280  static int SortKey(const ICOORD& vertical, int x, int y) {
281  ICOORD pt(x, y);
282  return pt * vertical;
283  }
284 
285  // Return the x at the given y for the given sort key.
286  static int XAtY(const ICOORD& vertical, int sort_key, int y) {
287  if (vertical.y() != 0)
288  return (vertical.x() * y + sort_key) / vertical.y();
289  else
290  return sort_key;
291  }
292 
293  // Sort function for E2LIST::sort to sort by sort_key_.
294  static int SortVectorsByKey(const void* v1, const void* v2) {
295  const TabVector* tv1 = *static_cast<const TabVector* const*>(v1);
296  const TabVector* tv2 = *static_cast<const TabVector* const*>(v2);
297  return tv1->sort_key_ - tv2->sort_key_;
298  }
299 
300  // More complex members.
301 
302  // Extend this vector to include the supplied blob if it doesn't
303  // already have it.
304  void ExtendToBox(BLOBNBOX* blob);
305 
306  // Set the ycoord of the start and move the xcoord to match.
307  void SetYStart(int start_y);
308  // Set the ycoord of the end and move the xcoord to match.
309  void SetYEnd(int end_y);
310 
311  // Rotate the ends by the given vector.
312  void Rotate(const FCOORD& rotation);
313 
314  // Setup the initial constraints, being the limits of
315  // the vector and the extended ends.
316  void SetupConstraints();
317 
318  // Setup the constraints between the partners of this TabVector.
320 
321  // Setup the constraints between this and its partner.
322  void SetupPartnerConstraints(TabVector* partner);
323 
324  // Use the constraints to modify the top and bottom.
325  void ApplyConstraints();
326 
327  // Merge close tab vectors of the same side that overlap.
328  static void MergeSimilarTabVectors(const ICOORD& vertical,
329  TabVector_LIST* vectors, BlobGrid* grid);
330 
331  // Return true if this vector is the same side, overlaps, and close
332  // enough to the other to be merged.
333  bool SimilarTo(const ICOORD& vertical,
334  const TabVector& other, BlobGrid* grid) const;
335 
336  // Eat the other TabVector into this and delete it.
337  void MergeWith(const ICOORD& vertical, TabVector* other);
338 
339  // Add a new element to the list of partner TabVectors.
340  // Partners must be added in order of increasing y coordinate of the text line
341  // that makes them partners.
342  // Groups of identical partners are merged into one.
343  void AddPartner(TabVector* partner);
344 
345  // Return true if other is a partner of this.
346  bool IsAPartner(const TabVector* other);
347 
348  // Print basic information about this tab vector.
349  void Print(const char* prefix);
350 
351  // Print basic information about this tab vector and every box in it.
352  void Debug(const char* prefix);
353 
354  // Draw this tabvector in place in the given window.
355  void Display(ScrollView* tab_win);
356 
357  // Refit the line and/or re-evaluate the vector if the dirty flags are set.
358  void FitAndEvaluateIfNeeded(const ICOORD& vertical, TabFind* finder);
359 
360  // Evaluate the vector in terms of coverage of its length by good-looking
361  // box edges. A good looking box is one where its nearest neighbour on the
362  // inside is nearer than half the distance its nearest neighbour on the
363  // outside of the putative column. Bad boxes are removed from the line.
364  // A second pass then further filters boxes by requiring that the gutter
365  // width be a minimum fraction of the mean gutter along the line.
366  void Evaluate(const ICOORD& vertical, TabFind* finder);
367 
368  // (Re)Fit a line to the stored points. Returns false if the line
369  // is degenerate. Althougth the TabVector code mostly doesn't care about the
370  // direction of lines, XAtY would give silly results for a horizontal line.
371  // The class is mostly aimed at use for vertical lines representing
372  // horizontal tab stops.
373  bool Fit(ICOORD vertical, bool force_parallel);
374 
375  // Return the partner of this TabVector if the vector qualifies as
376  // being a vertical text line, otherwise nullptr.
378 
379  // Return the matching tabvector if there is exactly one partner, or
380  // nullptr otherwise. This can be used after matching is done, eg. by
381  // VerticalTextlinePartner(), without checking if the line is vertical.
383 
384  private:
385  // Constructor is private as the static factory is the external way
386  // to build a TabVector.
388  TabAlignment alignment, BLOBNBOX_CLIST* boxes);
389 
390  // Delete this, but first, repoint all the partners to point to
391  // replacement. If replacement is nullptr, then partner relationships
392  // are removed.
393  void Delete(TabVector* replacement);
394 
395  private:
396  // The bottom of the tab line.
398  // The top of the tab line.
400  // The lowest y that the vector might extend to.
402  // The highest y that the vector might extend to.
404  // Perpendicular distance of vector from a given vertical for sorting.
406  // Result of Evaluate 0-100. Coverage of line with good boxes.
408  // The mean width of the blobs. Meaningful only for separator lines.
410  // True if the boxes_ list has been modified, so a refit is needed.
412  // True if a fit has been done, so re-evaluation is needed.
414  // True if a separator line intersects at least 2 other lines.
416  // The type of this TabVector.
418  // The list of boxes whose edges are aligned at this TabVector.
419  BLOBNBOX_CLIST boxes_;
420  // List of TabVectors that have a connection with this via a text line.
421  TabVector_CLIST partners_;
422  // Constraints used to resolve the exact location of the top and bottom
423  // of the tab line.
424  TabConstraint_LIST* top_constraints_;
425  TabConstraint_LIST* bottom_constraints_;
426 };
427 
428 } // namespace tesseract.
429 
430 #endif // TESSERACT_TEXTORD_TABVECTOR_H_
static int SortVectorsByKey(const void *v1, const void *v2)
Definition: tabvector.h:294
int sort_key_
Definition: tabvector.h:405
Definition: tabvector.h:51
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:356
bool Fit(ICOORD vertical, bool force_parallel)
Definition: tabvector.cpp:783
int mean_width() const
Definition: tabvector.h:161
const ICOORD & endpt() const
Definition: tabvector.h:149
int BoxCount()
Definition: tabvector.h:245
BLOBNBOX_CLIST boxes_
Definition: tabvector.h:419
void set_x(int16_t xin)
rewrite function
Definition: points.h:62
TabAlignment
Definition: tabvector.h:45
int16_t x() const
access function
Definition: points.h:53
int VOverlap(const TabVector &other) const
Definition: tabvector.h:199
bool IsLeftOf(const TabVector &other) const
Definition: tabvector.h:235
bool IsCenterTab() const
Definition: tabvector.h:225
bool Partnerless()
Definition: tabvector.h:240
int percent_score_
Definition: tabvector.h:407
Definition: tabvector.h:112
bool IsRagged() const
Definition: tabvector.h:229
int XAtY(int y) const
Definition: tabvector.h:189
Definition: tabvector.h:52
void Debug(const char *prefix)
Definition: tabvector.cpp:530
void Display(ScrollView *tab_win)
Definition: tabvector.cpp:542
void ReflectInYAxis()
Definition: tabvector.h:265
bool SimilarTo(const ICOORD &vertical, const TabVector &other, BlobGrid *grid) const
Definition: tabvector.cpp:389
void XYFlip()
Definition: tabvector.h:255
Definition: tabvector.h:46
bool intersects_other_lines() const
Definition: tabvector.h:179
int mean_width_
Definition: tabvector.h:409
Definition: baseapi.cpp:94
int y_max_
Definition: tabvector.h:107
void ExtendToBox(BLOBNBOX *blob)
Definition: tabvector.cpp:241
TabAlignment alignment_
Definition: tabvector.h:417
void set_bottom_constraints(TabConstraint_LIST *constraints)
Definition: tabvector.h:167
bool intersects_other_lines_
Definition: tabvector.h:415
int sort_key() const
Definition: tabvector.h:158
int extended_ymin() const
Definition: tabvector.h:155
TabVector * ShallowCopy() const
Definition: tabvector.cpp:228
double_VAR_H(textord_tabvector_vertical_gap_fraction, 0.5, "Max fraction of mean blob width allowed for vertical gaps in vertical text")
void Print(const char *prefix)
Definition: tabvector.cpp:520
void AddPartner(TabVector *partner)
Definition: tabvector.cpp:487
Definition: blobbox.h:144
TabVector_CLIST * partners()
Definition: tabvector.h:170
Definition: scrollview.h:102
bool IsLeftTab() const
Definition: tabvector.h:213
TabVector_CLIST partners_
Definition: tabvector.h:421
void SetupConstraints()
Definition: tabvector.cpp:291
static int XAtY(const ICOORD &vertical, int sort_key, int y)
Definition: tabvector.h:286
int16_t y() const
access_function
Definition: points.h:57
TabVector * GetSinglePartner()
Definition: tabvector.cpp:869
void ApplyConstraints()
Definition: tabvector.cpp:348
void set_endpt(const ICOORD &end)
Definition: tabvector.h:176
TabVector * vector_
Definition: tabvector.h:102
int extended_ymax() const
Definition: tabvector.h:152
Definition: blobgrid.h:33
void set_intersects_other_lines(bool value)
Definition: tabvector.h:182
Definition: tabvector.h:47
int VOverlap(int top_y, int bottom_y) const
Definition: tabvector.h:204
const ICOORD & startpt() const
Definition: tabvector.h:146
void set_startpt(const ICOORD &start)
Definition: tabvector.h:173
ICOORD endpt_
Definition: tabvector.h:399
Definition: tabvector.h:48
void FitAndEvaluateIfNeeded(const ICOORD &vertical, TabFind *finder)
Definition: tabvector.cpp:568
void set_y(int16_t yin)
rewrite function
Definition: points.h:66
Definition: tabvector.h:49
bool needs_evaluation_
Definition: tabvector.h:413
int y_min_
Definition: tabvector.h:106
integer coordinate
Definition: points.h:32
Definition: tabfind.h:53
bool IsAPartner(const TabVector *other)
Definition: tabvector.cpp:500
void Delete(TabVector *replacement)
Definition: tabvector.cpp:951
void Freeze()
Definition: tabvector.h:250
bool is_top_
Definition: tabvector.h:104
TabConstraint_LIST * top_constraints_
Definition: tabvector.h:424
Definition: tabvector.h:50
bool IsSeparator() const
Definition: tabvector.h:221
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:280
Definition: tabvector.h:71
void SetupPartnerConstraints()
Definition: tabvector.cpp:297
void Evaluate(const ICOORD &vertical, TabFind *finder)
Definition: tabvector.cpp:582
void Rotate(const FCOORD &rotation)
Definition: tabvector.cpp:276
bool needs_refit_
Definition: tabvector.h:411
ICOORD startpt_
Definition: tabvector.h:397
static TabVector * FitVector(TabAlignment alignment, ICOORD vertical, int extended_start_y, int extended_end_y, BLOBNBOX_CLIST *good_points, int *vertical_x, int *vertical_y)
Definition: tabvector.cpp:177
int extended_ymin_
Definition: tabvector.h:401
void MergeWith(const ICOORD &vertical, TabVector *other)
Definition: tabvector.cpp:453
void SetYStart(int start_y)
Definition: tabvector.cpp:265
Definition: points.h:189
TabConstraint_LIST * bottom_constraints_
Definition: tabvector.h:425
void SetYEnd(int end_y)
Definition: tabvector.cpp:270
bool IsRightTab() const
Definition: tabvector.h:217
int ExtendedOverlap(int top_y, int bottom_y) const
Definition: tabvector.h:208
int extended_ymax_
Definition: tabvector.h:403
void set_top_constraints(TabConstraint_LIST *constraints)
Definition: tabvector.h:164
TabVector * VerticalTextlinePartner()
Definition: tabvector.cpp:879