tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
colpartition.h
1 // File: colpartition.h
3 // Description: Class to hold partitions of the page that correspond
4 // roughly to text lines.
5 // Author: Ray Smith
6 // Created: Thu Aug 14 10:50:01 PDT 2008
7 //
8 // (C) Copyright 2008, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
20 
21 #ifndef TESSERACT_TEXTORD_COLPARTITION_H_
22 #define TESSERACT_TEXTORD_COLPARTITION_H_
23 
24 #include "bbgrid.h"
25 #include "blobbox.h" // For BlobRegionType.
26 #include "ocrblock.h"
27 #include "rect.h" // For TBOX.
28 #include "scrollview.h"
29 #include "tabfind.h" // For WidthCallback.
30 #include "tabvector.h" // For BLOBNBOX_CLIST.
31 
32 #include <algorithm>
33 
34 namespace tesseract {
35 
36 // Number of colors in the color1, color2 arrays.
37 const int kRGBRMSColors = 4;
38 
39 class ColPartition;
40 class ColPartitionSet;
41 class ColPartitionGrid;
42 class WorkingPartSet;
43 class WorkingPartSet_LIST;
44 
45 // An enum to indicate how a partition sits on the columns.
46 // The order of flowing/heading/pullout must be kept consistent with
47 // PolyBlockType.
49  CST_NOISE, // Strictly between columns.
50  CST_FLOWING, // Strictly within a single column.
51  CST_HEADING, // Spans multiple columns.
52  CST_PULLOUT, // Touches multiple columns, but doesn't span them.
53  CST_COUNT // Number of entries.
54 };
55 
56 ELIST2IZEH(ColPartition)
57 CLISTIZEH(ColPartition)
58 
59 
68 class ColPartition : public ELIST2_LINK {
69  public:
70  // This empty constructor is here only so that the class can be ELISTIZED.
71  // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
72  // and eliminate CLASSNAME##_copier.
73  ColPartition() = default;
74 
79  ColPartition(BlobRegionType blob_type, const ICOORD& vertical);
84  static ColPartition* MakeLinePartition(BlobRegionType blob_type,
85  const ICOORD& vertical,
86  int left, int bottom,
87  int right, int top);
88 
89  // Constructs and returns a fake ColPartition with a single fake BLOBNBOX,
90  // all made from a single TBOX.
91  // WARNING: Despite being on C_LISTs, the BLOBNBOX owns the C_BLOB and
92  // the ColPartition owns the BLOBNBOX!!!
93  // Call DeleteBoxes before deleting the ColPartition.
94  static ColPartition* FakePartition(const TBOX& box,
95  PolyBlockType block_type,
96  BlobRegionType blob_type,
97  BlobTextFlowType flow);
98 
99  // Constructs and returns a ColPartition with the given real BLOBNBOX,
100  // and sets it up to be a "big" partition (single-blob partition bigger
101  // than the surrounding text that may be a dropcap, two or more vertically
102  // touching characters, or some graphic element.
103  // If the given list is not nullptr, the partition is also added to the list.
104  static ColPartition* MakeBigPartition(BLOBNBOX* box,
105  ColPartition_LIST* big_part_list);
106 
107  ~ColPartition();
108 
109  // Simple accessors.
110  const TBOX& bounding_box() const {
111  return bounding_box_;
112  }
113  int left_margin() const {
114  return left_margin_;
115  }
116  void set_left_margin(int margin) {
117  left_margin_ = margin;
118  }
119  int right_margin() const {
120  return right_margin_;
121  }
122  void set_right_margin(int margin) {
123  right_margin_ = margin;
124  }
125  int median_top() const {
126  return median_top_;
127  }
128  int median_bottom() const {
129  return median_bottom_;
130  }
131  int median_left() const {
132  return median_left_;
133  }
134  int median_right() const {
135  return median_right_;
136  }
137  int median_height() const {
138  return median_height_;
139  }
140  void set_median_height(int height) {
141  median_height_ = height;
142  }
143  int median_width() const {
144  return median_width_;
145  }
146  void set_median_width(int width) {
147  median_width_ = width;
148  }
149  BlobRegionType blob_type() const {
150  return blob_type_;
151  }
152  void set_blob_type(BlobRegionType t) {
153  blob_type_ = t;
154  }
155  BlobTextFlowType flow() const {
156  return flow_;
157  }
158  void set_flow(BlobTextFlowType f) {
159  flow_ = f;
160  }
161  int good_blob_score() const {
162  return good_blob_score_;
163  }
164  bool good_width() const {
165  return good_width_;
166  }
167  bool good_column() const {
168  return good_column_;
169  }
170  bool left_key_tab() const {
171  return left_key_tab_;
172  }
173  int left_key() const {
174  return left_key_;
175  }
176  bool right_key_tab() const {
177  return right_key_tab_;
178  }
179  int right_key() const {
180  return right_key_;
181  }
182  PolyBlockType type() const {
183  return type_;
184  }
185  void set_type(PolyBlockType t) {
186  type_ = t;
187  }
188  BLOBNBOX_CLIST* boxes() {
189  return &boxes_;
190  }
191  int boxes_count() const {
192  return boxes_.length();
193  }
194  void set_vertical(const ICOORD& v) {
195  vertical_ = v;
196  }
197  ColPartition_CLIST* upper_partners() {
198  return &upper_partners_;
199  }
200  ColPartition_CLIST* lower_partners() {
201  return &lower_partners_;
202  }
203  void set_working_set(WorkingPartSet* working_set) {
204  working_set_ = working_set;
205  }
206  bool block_owned() const {
207  return block_owned_;
208  }
209  void set_block_owned(bool owned) {
210  block_owned_ = owned;
211  }
212  bool desperately_merged() const {
213  return desperately_merged_;
214  }
216  return column_set_;
217  }
218  void set_side_step(int step) {
219  side_step_ = step;
220  }
221  int bottom_spacing() const {
222  return bottom_spacing_;
223  }
224  void set_bottom_spacing(int spacing) {
225  bottom_spacing_ = spacing;
226  }
227  int top_spacing() const {
228  return top_spacing_;
229  }
230  void set_top_spacing(int spacing) {
231  top_spacing_ = spacing;
232  }
233 
234  void set_table_type() {
235  if (type_ != PT_TABLE) {
236  type_before_table_ = type_;
237  type_ = PT_TABLE;
238  }
239  }
241  if (type_ == PT_TABLE)
242  type_ = type_before_table_;
243  }
245  return inside_table_column_;
246  }
247  void set_inside_table_column(bool val) {
248  inside_table_column_ = val;
249  }
251  return nearest_neighbor_above_;
252  }
254  nearest_neighbor_above_ = part;
255  }
257  return nearest_neighbor_below_;
258  }
260  nearest_neighbor_below_ = part;
261  }
262  int space_above() const {
263  return space_above_;
264  }
265  void set_space_above(int space) {
266  space_above_ = space;
267  }
268  int space_below() const {
269  return space_below_;
270  }
271  void set_space_below(int space) {
272  space_below_ = space;
273  }
274  int space_to_left() const {
275  return space_to_left_;
276  }
277  void set_space_to_left(int space) {
278  space_to_left_ = space;
279  }
280  int space_to_right() const {
281  return space_to_right_;
282  }
283  void set_space_to_right(int space) {
284  space_to_right_ = space;
285  }
286  uint8_t* color1() {
287  return color1_;
288  }
289  uint8_t* color2() {
290  return color2_;
291  }
292  bool owns_blobs() const {
293  return owns_blobs_;
294  }
295  void set_owns_blobs(bool owns_blobs) {
296  // Do NOT change ownership flag when there are blobs in the list.
297  // Immediately set the ownership flag when creating copies.
298  ASSERT_HOST(boxes_.empty());
299  owns_blobs_ = owns_blobs;
300  }
301 
302  // Inline quasi-accessors that require some computation.
303 
304  // Returns the middle y-coord of the bounding box.
305  int MidY() const {
306  return (bounding_box_.top() + bounding_box_.bottom()) / 2;
307  }
308  // Returns the middle y-coord of the median top and bottom.
309  int MedianY() const {
310  return (median_top_ + median_bottom_) / 2;
311  }
312  // Returns the middle x-coord of the bounding box.
313  int MidX() const {
314  return (bounding_box_.left() + bounding_box_.right()) / 2;
315  }
316  // Returns the sort key at any given x,y.
317  int SortKey(int x, int y) const {
318  return TabVector::SortKey(vertical_, x, y);
319  }
320  // Returns the x corresponding to the sortkey, y pair.
321  int XAtY(int sort_key, int y) const {
322  return TabVector::XAtY(vertical_, sort_key, y);
323  }
324  // Returns the x difference between the two sort keys.
325  int KeyWidth(int left_key, int right_key) const {
326  return (right_key - left_key) / vertical_.y();
327  }
328  // Returns the column width between the left and right keys.
329  int ColumnWidth() const {
330  return KeyWidth(left_key_, right_key_);
331  }
332  // Returns the sort key of the box left edge.
333  int BoxLeftKey() const {
334  return SortKey(bounding_box_.left(), MidY());
335  }
336  // Returns the sort key of the box right edge.
337  int BoxRightKey() const {
338  return SortKey(bounding_box_.right(), MidY());
339  }
340  // Returns the left edge at the given y, using the sort key.
341  int LeftAtY(int y) const {
342  return XAtY(left_key_, y);
343  }
344  // Returns the right edge at the given y, using the sort key.
345  int RightAtY(int y) const {
346  return XAtY(right_key_, y);
347  }
348  // Returns true if the right edge of this is to the left of the right
349  // edge of other.
350  bool IsLeftOf(const ColPartition& other) const {
351  return bounding_box_.right() < other.bounding_box_.right();
352  }
353  // Returns true if the partition contains the given x coordinate at the y.
354  bool ColumnContains(int x, int y) const {
355  return LeftAtY(y) - 1 <= x && x <= RightAtY(y) + 1;
356  }
357  // Returns true if there are no blobs in the list.
358  bool IsEmpty() const {
359  return boxes_.empty();
360  }
361  // Returns true if there is a single blob in the list.
362  bool IsSingleton() const {
363  return boxes_.singleton();
364  }
365  // Returns true if this and other overlap horizontally by bounding box.
366  bool HOverlaps(const ColPartition& other) const {
367  return bounding_box_.x_overlap(other.bounding_box_);
368  }
369  // Returns true if this and other's bounding boxes overlap vertically.
370  // TODO(rays) Make HOverlaps and VOverlaps truly symmetric.
371  bool VOverlaps(const ColPartition& other) const {
372  return bounding_box_.y_gap(other.bounding_box_) < 0;
373  }
374  // Returns the vertical overlap (by median) of this and other.
375  // WARNING! Only makes sense on horizontal partitions!
376  int VCoreOverlap(const ColPartition& other) const {
377  if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
378  return 0;
379  }
380  return std::min(median_top_, other.median_top_) -
381  std::max(median_bottom_, other.median_bottom_);
382  }
383  // Returns the horizontal overlap (by median) of this and other.
384  // WARNING! Only makes sense on vertical partitions!
385  int HCoreOverlap(const ColPartition& other) const {
386  return std::min(median_right_, other.median_right_) -
387  std::max(median_left_, other.median_left_);
388  }
389  // Returns true if this and other overlap significantly vertically.
390  // WARNING! Only makes sense on horizontal partitions!
391  bool VSignificantCoreOverlap(const ColPartition& other) const {
392  if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
393  return false;
394  }
395  int overlap = VCoreOverlap(other);
396  int height = std::min(median_top_ - median_bottom_,
397  other.median_top_ - other.median_bottom_);
398  return overlap * 3 > height;
399  }
400  // Returns true if this and other can be combined without putting a
401  // horizontal step in either left or right edge of the resulting block.
402  bool WithinSameMargins(const ColPartition& other) const {
403  return left_margin_ <= other.bounding_box_.left() &&
404  bounding_box_.left() >= other.left_margin_ &&
405  bounding_box_.right() <= other.right_margin_ &&
406  right_margin_ >= other.bounding_box_.right();
407  }
408  // Returns true if the region types (aligned_text_) match.
409  // Lines never match anything, as they should never be merged or chained.
410  bool TypesMatch(const ColPartition& other) const {
411  return TypesMatch(blob_type_, other.blob_type_);
412  }
413  static bool TypesMatch(BlobRegionType type1, BlobRegionType type2) {
414  return (type1 == type2 || type1 == BRT_UNKNOWN || type2 == BRT_UNKNOWN) &&
415  !BLOBNBOX::IsLineType(type1) && !BLOBNBOX::IsLineType(type2);
416  }
417 
418  // Returns true if the types are similar to each other.
419  static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2) {
420  return (type1 == type2 ||
421  (type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) ||
422  (type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION));
423  }
424 
425  // Returns true if partitions is of horizontal line type
426  bool IsLineType() const {
427  return PTIsLineType(type_);
428  }
429  // Returns true if partitions is of image type
430  bool IsImageType() const {
431  return PTIsImageType(type_);
432  }
433  // Returns true if partitions is of text type
434  bool IsTextType() const {
435  return PTIsTextType(type_);
436  }
437  // Returns true if partitions is of pullout(inter-column) type
438  bool IsPulloutType() const {
439  return PTIsPulloutType(type_);
440  }
441  // Returns true if the partition is of an exclusively vertical type.
442  bool IsVerticalType() const {
443  return blob_type_ == BRT_VERT_TEXT || blob_type_ == BRT_VLINE;
444  }
445  // Returns true if the partition is of a definite horizontal type.
446  bool IsHorizontalType() const {
447  return blob_type_ == BRT_TEXT || blob_type_ == BRT_HLINE;
448  }
449  // Returns true is the partition is of a type that cannot be merged.
450  bool IsUnMergeableType() const {
451  return BLOBNBOX::UnMergeableType(blob_type_) || type_ == PT_NOISE;
452  }
453  // Returns true if this partition is a vertical line
454  // TODO(nbeato): Use PartitionType enum when Ray's code is submitted.
455  bool IsVerticalLine() const {
456  return IsVerticalType() && IsLineType();
457  }
458  // Returns true if this partition is a horizontal line
459  // TODO(nbeato): Use PartitionType enum when Ray's code is submitted.
460  bool IsHorizontalLine() const {
461  return IsHorizontalType() && IsLineType();
462  }
463 
464  // Adds the given box to the partition, updating the partition bounds.
465  // The list of boxes in the partition is updated, ensuring that no box is
466  // recorded twice, and the boxes are kept in increasing left position.
467  void AddBox(BLOBNBOX* box);
468 
469  // Removes the given box from the partition, updating the bounds.
470  void RemoveBox(BLOBNBOX* box);
471 
472  // Returns the tallest box in the partition, as measured perpendicular to the
473  // presumed flow of text.
474  BLOBNBOX* BiggestBox();
475 
476  // Returns the bounding box excluding the given box.
477  TBOX BoundsWithoutBox(BLOBNBOX* box);
478 
479  // Claims the boxes in the boxes_list by marking them with a this owner
480  // pointer.
481  void ClaimBoxes();
482 
483  // nullptr the owner of the blobs in this partition, so they can be deleted
484  // independently of the ColPartition.
485  void DisownBoxes();
486  // nullptr the owner of the blobs in this partition that are owned by this
487  // partition, so they can be deleted independently of the ColPartition.
488  // Any blobs that are not owned by this partition get to keep their owner
489  // without an assert failure.
490  void DisownBoxesNoAssert();
491  // Nulls the owner of the blobs in this partition that are owned by this
492  // partition and not leader blobs, removing them from the boxes_ list, thus
493  // turning this partition back to a leader partition if it contains a leader,
494  // or otherwise leaving it empty. Returns true if any boxes remain.
495  bool ReleaseNonLeaderBoxes();
496 
497  // Delete the boxes that this partition owns.
498  void DeleteBoxes();
499 
500  // Reflects the partition in the y-axis, assuming that its blobs have
501  // already been done. Corrects only a limited part of the members, since
502  // this function is assumed to be used shortly after initial creation, which
503  // is before a lot of the members are used.
504  void ReflectInYAxis();
505 
506  // Returns true if this is a legal partition - meaning that the conditions
507  // left_margin <= bounding_box left
508  // left_key <= bounding box left key
509  // bounding box left <= bounding box right
510  // and likewise for right margin and key
511  // are all met.
512  bool IsLegal();
513 
514  // Returns true if the left and right edges are approximately equal.
515  bool MatchingColumns(const ColPartition& other) const;
516 
517  // Returns true if the colors match for two text partitions.
518  bool MatchingTextColor(const ColPartition& other) const;
519 
520  // Returns true if the sizes match for two text partitions,
521  // taking orientation into account
522  bool MatchingSizes(const ColPartition& other) const;
523 
524  // Returns true if there is no tabstop violation in merging this and other.
525  bool ConfirmNoTabViolation(const ColPartition& other) const;
526 
527  // Returns true if other has a similar stroke width to this.
528  bool MatchingStrokeWidth(const ColPartition& other,
529  double fractional_tolerance,
530  double constant_tolerance) const;
531  // Returns true if candidate is an acceptable diacritic base char merge
532  // with this as the diacritic.
533  bool OKDiacriticMerge(const ColPartition& candidate, bool debug) const;
534 
535  // Sets the sort key using either the tab vector, or the bounding box if
536  // the tab vector is nullptr. If the tab_vector lies inside the bounding_box,
537  // use the edge of the box as a key any way.
538  void SetLeftTab(const TabVector* tab_vector);
539  void SetRightTab(const TabVector* tab_vector);
540 
541  // Copies the left/right tab from the src partition, but if take_box is
542  // true, copies the box instead and uses that as a key.
543  void CopyLeftTab(const ColPartition& src, bool take_box);
544  void CopyRightTab(const ColPartition& src, bool take_box);
545 
546  // Returns the left rule line x coord of the leftmost blob.
547  int LeftBlobRule() const;
548  // Returns the right rule line x coord of the rightmost blob.
549  int RightBlobRule() const;
550 
551  // Returns the density value for a particular BlobSpecialTextType.
552  float SpecialBlobsDensity(const BlobSpecialTextType type) const;
553  // Returns the number of blobs for a particular BlobSpecialTextType.
554  int SpecialBlobsCount(const BlobSpecialTextType type);
555  // Set the density value for a particular BlobSpecialTextType, should ONLY be
556  // used for debugging or testing. In production code, use
557  // ComputeSpecialBlobsDensity instead.
558  void SetSpecialBlobsDensity(
559  const BlobSpecialTextType type, const float density);
560  // Compute the SpecialTextType density of blobs, where we assume
561  // that the SpecialTextType in the boxes_ has been set.
562  void ComputeSpecialBlobsDensity();
563 
564  // Add a partner above if upper, otherwise below.
565  // Add them uniquely and keep the list sorted by box left.
566  // Partnerships are added symmetrically to partner and this.
567  void AddPartner(bool upper, ColPartition* partner);
568  // Removes the partner from this, but does not remove this from partner.
569  // This asymmetric removal is so as not to mess up the iterator that is
570  // working on partner's partner list.
571  void RemovePartner(bool upper, ColPartition* partner);
572  // Returns the partner if the given partner is a singleton, otherwise nullptr.
573  ColPartition* SingletonPartner(bool upper);
574 
575  // Merge with the other partition and delete it.
576  void Absorb(ColPartition* other, WidthCallback* cb);
577 
578  // Returns true if the overlap between this and the merged pair of
579  // merge candidates is sufficiently trivial to be allowed.
580  // The merged box can graze the edge of this by the ok_box_overlap
581  // if that exceeds the margin to the median top and bottom.
582  bool OKMergeOverlap(const ColPartition& merge1, const ColPartition& merge2,
583  int ok_box_overlap, bool debug);
584 
585  // Find the blob at which to split this to minimize the overlap with the
586  // given box. Returns the first blob to go in the second partition.
587  BLOBNBOX* OverlapSplitBlob(const TBOX& box);
588 
589  // Split this partition keeping the first half in this and returning
590  // the second half.
591  // Splits by putting the split_blob and the blobs that follow
592  // in the second half, and the rest in the first half.
593  ColPartition* SplitAtBlob(BLOBNBOX* split_blob);
594 
595  // Splits this partition at the given x coordinate, returning the right
596  // half and keeping the left half in this.
597  ColPartition* SplitAt(int split_x);
598 
599  // Recalculates all the coordinate limits of the partition.
600  void ComputeLimits();
601 
602  // Returns the number of boxes that overlap the given box.
603  int CountOverlappingBoxes(const TBOX& box);
604 
605  // Computes and sets the type_, first_column_, last_column_ and column_set_.
606  // resolution refers to the ppi resolution of the image.
607  void SetPartitionType(int resolution, ColPartitionSet* columns);
608 
609  // Returns the PartitionType from the current BlobRegionType and a column
610  // flow spanning type ColumnSpanningType, generated by
611  // ColPartitionSet::SpanningType, that indicates how the partition sits
612  // in the columns.
613  PolyBlockType PartitionType(ColumnSpanningType flow) const;
614 
615  // Returns the first and last column touched by this partition.
616  // resolution refers to the ppi resolution of the image.
617  void ColumnRange(int resolution, ColPartitionSet* columns,
618  int* first_col, int* last_col);
619 
620  // Sets the internal flags good_width_ and good_column_.
621  void SetColumnGoodness(WidthCallback* cb);
622 
623  // Determines whether the blobs in this partition mostly represent
624  // a leader (fixed pitch sequence) and sets the member blobs accordingly.
625  // Note that height is assumed to have been tested elsewhere, and that this
626  // function will find most fixed-pitch text as leader without a height filter.
627  // Leader detection is limited to sequences of identical width objects,
628  // such as .... or ----, so patterns, such as .-.-.-.-. will not be found.
629  bool MarkAsLeaderIfMonospaced();
630  // Given the result of TextlineProjection::EvaluateColPartition, (positive for
631  // horizontal text, negative for vertical text, and near zero for non-text),
632  // sets the blob_type_ and flow_ for this partition to indicate whether it
633  // is strongly or weakly vertical or horizontal text, or non-text.
634  void SetRegionAndFlowTypesFromProjectionValue(int value);
635 
636  // Sets all blobs with the partition blob type and flow, but never overwrite
637  // leader blobs, as we need to be able to identify them later.
638  void SetBlobTypes();
639 
640  // Returns true if a decent baseline can be fitted through the blobs.
641  // Works for both horizontal and vertical text.
642  bool HasGoodBaseline();
643 
644  // Adds this ColPartition to a matching WorkingPartSet if one can be found,
645  // otherwise starts a new one in the appropriate column, ending the previous.
646  void AddToWorkingSet(const ICOORD& bleft, const ICOORD& tright,
647  int resolution, ColPartition_LIST* used_parts,
648  WorkingPartSet_LIST* working_set);
649 
650  // From the given block_parts list, builds one or more BLOCKs and
651  // corresponding TO_BLOCKs, such that the line spacing is uniform in each.
652  // Created blocks are appended to the end of completed_blocks and to_blocks.
653  // The used partitions are put onto used_parts, as they may still be referred
654  // to in the partition grid. bleft, tright and resolution are the bounds
655  // and resolution of the original image.
656  static void LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright,
657  int resolution,
658  ColPartition_LIST* block_parts,
659  ColPartition_LIST* used_parts,
660  BLOCK_LIST* completed_blocks,
661  TO_BLOCK_LIST* to_blocks);
662  // Constructs a block from the given list of partitions.
663  // Arguments are as LineSpacingBlocks above.
664  static TO_BLOCK* MakeBlock(const ICOORD& bleft, const ICOORD& tright,
665  ColPartition_LIST* block_parts,
666  ColPartition_LIST* used_parts);
667 
668  // Constructs a block from the given list of vertical text partitions.
669  // Currently only creates rectangular blocks.
670  static TO_BLOCK* MakeVerticalTextBlock(const ICOORD& bleft,
671  const ICOORD& tright,
672  ColPartition_LIST* block_parts,
673  ColPartition_LIST* used_parts);
674 
675  // Makes a TO_ROW matching this and moves all the blobs to it, transferring
676  // ownership to to returned TO_ROW.
677  TO_ROW* MakeToRow();
678 
679 
680  // Returns a copy of everything except the list of boxes. The resulting
681  // ColPartition is only suitable for keeping in a column candidate list.
682  ColPartition* ShallowCopy() const;
683  // Returns a copy of everything with a shallow copy of the blobs.
684  // The blobs are still owned by their original parent, so they are
685  // treated as read-only.
686  ColPartition* CopyButDontOwnBlobs();
687 
688  #ifndef GRAPHICS_DISABLED
689  // Provides a color for BBGrid to draw the rectangle.
690  ScrollView::Color BoxColor() const;
691  #endif // GRAPHICS_DISABLED
692 
693  // Prints debug information on this.
694  void Print() const;
695  // Prints debug information on the colors.
696  void PrintColors();
697 
698  // Sets the types of all partitions in the run to be the max of the types.
699  void SmoothPartnerRun(int working_set_count);
700 
701  // Cleans up the partners of the given type so that there is at most
702  // one partner. This makes block creation simpler.
703  // If get_desperate is true, goes to more desperate merge methods
704  // to merge flowing text before breaking partnerships.
705  void RefinePartners(PolyBlockType type, bool get_desperate,
706  ColPartitionGrid* grid);
707 
708  // Returns true if this column partition is in the same column as
709  // part. This function will only work after the SetPartitionType function
710  // has been called on both column partitions. This is useful for
711  // doing a SideSearch when you want things in the same page column.
712  bool IsInSameColumnAs(const ColPartition& part) const;
713 
714  // Sort function to sort by bounding box.
715  static int SortByBBox(const void* p1, const void* p2) {
716  const ColPartition* part1 = *static_cast<const ColPartition* const*>(p1);
717  const ColPartition* part2 = *static_cast<const ColPartition* const*>(p2);
718  int mid_y1 = part1->bounding_box_.y_middle();
719  int mid_y2 = part2->bounding_box_.y_middle();
720  if ((part2->bounding_box_.bottom() <= mid_y1 &&
721  mid_y1 <= part2->bounding_box_.top()) ||
722  (part1->bounding_box_.bottom() <= mid_y2 &&
723  mid_y2 <= part1->bounding_box_.top())) {
724  // Sort by increasing x.
725  return part1->bounding_box_.x_middle() - part2->bounding_box_.x_middle();
726  }
727  // Sort by decreasing y.
728  return mid_y2 - mid_y1;
729  }
730 
731  // Sets the column bounds. Primarily used in testing.
732  void set_first_column(int column) {
733  first_column_ = column;
734  }
735  void set_last_column(int column) {
736  last_column_ = column;
737  }
738 
739  private:
740  // enum to refer to the entries in a neighbourhood of lines.
741  // Used by SmoothSpacings to test for blips with OKSpacingBlip.
749  PN_COUNT
750  };
751 
752  // Cleans up the partners above if upper is true, else below.
753  // If get_desperate is true, goes to more desperate merge methods
754  // to merge flowing text before breaking partnerships.
755  void RefinePartnersInternal(bool upper, bool get_desperate,
756  ColPartitionGrid* grid);
757  // Restricts the partners to only desirable types. For text and BRT_HLINE this
758  // means the same type_ , and for image types it means any image type.
759  void RefinePartnersByType(bool upper, ColPartition_CLIST* partners);
760  // Remove transitive partnerships: this<->a, and a<->b and this<->b.
761  // Gets rid of this<->b, leaving a clean chain.
762  // Also if we have this<->a and a<->this, then gets rid of this<->a, as
763  // this has multiple partners.
764  void RefinePartnerShortcuts(bool upper, ColPartition_CLIST* partners);
765  // If multiple text partners can be merged, then do so.
766  // If desperate is true, then an increase in overlap with the merge is
767  // allowed. If the overlap increases, then the desperately_merged_ flag
768  // is set, indicating that the textlines probably need to be regenerated
769  // by aggressive line fitting/splitting, as there are probably vertically
770  // joined blobs that cross textlines.
771  void RefineTextPartnersByMerge(bool upper, bool desperate,
772  ColPartition_CLIST* partners,
773  ColPartitionGrid* grid);
774  // Keep the partner with the biggest overlap.
775  void RefinePartnersByOverlap(bool upper, ColPartition_CLIST* partners);
776 
777  // Return true if bbox belongs better in this than other.
778  bool ThisPartitionBetter(BLOBNBOX* bbox, const ColPartition& other);
779 
780  // Smoothes the spacings in the list into groups of equal linespacing.
781  // resolution is the resolution of the original image, used as a basis
782  // for thresholds in change of spacing. page_height is in pixels.
783  static void SmoothSpacings(int resolution, int page_height,
784  ColPartition_LIST* parts);
785 
786  // Returns true if the parts array of pointers to partitions matches the
787  // condition for a spacing blip. See SmoothSpacings for what this means
788  // and how it is used.
789  static bool OKSpacingBlip(int resolution, int median_spacing,
790  ColPartition** parts);
791 
792  // Returns true if both the top and bottom spacings of this match the given
793  // spacing to within suitable margins dictated by the image resolution.
794  bool SpacingEqual(int spacing, int resolution) const;
795 
796  // Returns true if both the top and bottom spacings of this and other
797  // match to within suitable margins dictated by the image resolution.
798  bool SpacingsEqual(const ColPartition& other, int resolution) const;
799 
800  // Returns true if the sum spacing of this and other match the given
801  // spacing (or twice the given spacing) to within a suitable margin dictated
802  // by the image resolution.
803  bool SummedSpacingOK(const ColPartition& other,
804  int spacing, int resolution) const;
805 
806  // Returns a suitable spacing margin that can be applied to bottoms of
807  // text lines, based on the resolution and the stored side_step_.
808  int BottomSpacingMargin(int resolution) const;
809 
810  // Returns a suitable spacing margin that can be applied to tops of
811  // text lines, based on the resolution and the stored side_step_.
812  int TopSpacingMargin(int resolution) const;
813 
814  // Returns true if the median text sizes of this and other agree to within
815  // a reasonable multiplicative factor.
816  bool SizesSimilar(const ColPartition& other) const;
817 
818  // Computes and returns in start, end a line segment formed from a
819  // forwards-iterated group of left edges of partitions that satisfy the
820  // condition that the rightmost left margin is to the left of the
821  // leftmost left bounding box edge.
822  // TODO(rays) Not good enough. Needs improving to tightly wrap text in both
823  // directions, and to loosely wrap images.
824  static void LeftEdgeRun(ColPartition_IT* part_it,
825  ICOORD* start, ICOORD* end);
826  // Computes and returns in start, end a line segment formed from a
827  // backwards-iterated group of right edges of partitions that satisfy the
828  // condition that the leftmost right margin is to the right of the
829  // rightmost right bounding box edge.
830  // TODO(rays) Not good enough. Needs improving to tightly wrap text in both
831  // directions, and to loosely wrap images.
832  static void RightEdgeRun(ColPartition_IT* part_it,
833  ICOORD* start, ICOORD* end);
834 
835  // The margins are determined by the position of the nearest vertically
836  // overlapping neighbour to the side. They indicate the maximum extent
837  // that the block/column may be extended without touching something else.
838  // Leftmost coordinate that the region may occupy over the y limits.
840  // Rightmost coordinate that the region may occupy over the y limits.
842  // Bounding box of all blobs in the partition.
844  // Median top and bottom of blobs in this partition.
847  // Median height of blobs in this partition.
849  // Median left and right of blobs in this partition.
852  // Median width of blobs in this partition.
854  // blob_region_type_ for the blobs in this partition.
855  BlobRegionType blob_type_;
856  BlobTextFlowType flow_; // Quality of text flow.
857  // Total of GoodTextBlob results for all blobs in the partition.
859  // True if this partition has a common width.
861  // True if this is a good column candidate.
863  // True if the left_key_ is from a tab vector.
865  // True if the right_key_ is from a tab vector.
867  // Left and right sort keys for the edges of the partition.
868  // If the respective *_key_tab_ is true then this key came from a tab vector.
869  // If not, then the class promises to keep the key equal to the sort key
870  // for the respective edge of the bounding box at the MidY, so that
871  // LeftAtY and RightAtY always returns an x coordinate on the line parallel
872  // to vertical_ through the bounding box edge at MidY.
875  // Type of this partition after looking at its relation to the columns.
876  PolyBlockType type_;
877  // All boxes in the partition stored in increasing left edge coordinate.
878  BLOBNBOX_CLIST boxes_;
879  // The global vertical skew direction.
881  // The partitions above that matched this.
882  ColPartition_CLIST upper_partners_;
883  // The partitions below that matched this.
884  ColPartition_CLIST lower_partners_;
885  // The WorkingPartSet it lives in while blocks are being made.
887  // Flag is true when AddBox is sorting vertically, false otherwise.
889  // True when the partition's ownership has been taken from the grid and
890  // placed in a working set, or, after that, in the good_parts_ list.
892  // Flag to indicate that this partition was subjected to a desperate merge,
893  // and therefore the textlines need rebuilding.
895  // The first and last column that this partition applies to.
896  // Flowing partitions (see type_) will have an equal first and last value
897  // of the form 2n + 1, where n is the zero-based index into the partitions
898  // in column_set_. (See ColPartitionSet::GetColumnByIndex).
899  // Heading partitions will have unequal values of the same form.
900  // Pullout partitions will have equal values, but may have even values,
901  // indicating placement between columns.
904  // Column_set_ is the column layout applicable to this ColPartition.
906  // Linespacing data.
907  int side_step_; // Median y-shift to next blob on same line.
908  int top_spacing_; // Line spacing from median_top_.
909  int bottom_spacing_; // Line spacing from median_bottom_.
910 
911  // Type of this partition before considering it as a table cell. This is
912  // used to revert the type if a partition is first marked as a table cell but
913  // later filtering steps decide it does not belong to a table
914  PolyBlockType type_before_table_;
915  bool inside_table_column_; // Check whether the current partition has been
916  // assigned to a table column
917  // Nearest neighbor above with major x-overlap
919  // Nearest neighbor below with major x-overlap
921  int space_above_; // Distance from nearest_neighbor_above
922  int space_below_; // Distance from nearest_neighbor_below
923  int space_to_left_; // Distance from the left edge of the column
924  int space_to_right_; // Distance from the right edge of the column
925  // Color foreground/background data.
926  uint8_t color1_[kRGBRMSColors];
927  uint8_t color2_[kRGBRMSColors];
928  bool owns_blobs_; // Does the partition own its blobs?
929  // The density of special blobs.
930  float special_blobs_densities_[BSTT_COUNT];
931 };
932 
933 // Typedef it now in case it becomes a class later.
935  ColPartition_CLIST,
936  ColPartition_C_IT> ;
937 
938 } // namespace tesseract.
939 
940 #endif // TESSERACT_TEXTORD_COLPARTITION_H_
bool desperately_merged_
Definition: colpartition.h:894
bool left_key_tab() const
Definition: colpartition.h:170
void set_space_to_right(int space)
Definition: colpartition.h:283
Definition: colpartitiongrid.h:33
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:431
bool IsUnMergeableType() const
Definition: colpartition.h:450
Definition: workingpartset.h:32
void set_nearest_neighbor_below(ColPartition *part)
Definition: colpartition.h:259
void set_owns_blobs(bool owns_blobs)
Definition: colpartition.h:295
int top_spacing_
Definition: colpartition.h:908
int median_bottom_
Definition: colpartition.h:845
bool good_width_
Definition: colpartition.h:860
ColPartition * nearest_neighbor_below_
Definition: colpartition.h:920
int median_width_
Definition: colpartition.h:853
void set_type(PolyBlockType t)
Definition: colpartition.h:185
int right_key() const
Definition: colpartition.h:179
int right_margin_
Definition: colpartition.h:841
void set_space_to_left(int space)
Definition: colpartition.h:277
int16_t right() const
Definition: rect.h:79
BlobRegionType blob_type_
Definition: colpartition.h:855
int median_height() const
Definition: colpartition.h:137
int median_right() const
Definition: colpartition.h:134
Definition: colpartition.h:52
bool VOverlaps(const ColPartition &other) const
Definition: colpartition.h:371
int x_middle() const
Definition: rect.h:85
int median_width() const
Definition: colpartition.h:143
ColPartition_CLIST * lower_partners()
Definition: colpartition.h:200
bool IsHorizontalLine() const
Definition: colpartition.h:460
static int SortByBBox(const void *p1, const void *p2)
Definition: colpartition.h:715
Definition: colpartitionset.h:40
Definition: bbgrid.h:49
void set_median_height(int height)
Definition: colpartition.h:140
int space_to_right() const
Definition: colpartition.h:280
int LeftAtY(int y) const
Definition: colpartition.h:341
bool IsImageType() const
Definition: colpartition.h:430
int median_top() const
Definition: colpartition.h:125
int ColumnWidth() const
Definition: colpartition.h:329
Definition: tabvector.h:112
int XAtY(int y) const
Definition: tabvector.h:189
bool owns_blobs_
Definition: colpartition.h:928
ColPartitionSet * column_set_
Definition: colpartition.h:905
bool right_key_tab_
Definition: colpartition.h:866
Color
Definition: scrollview.h:105
Definition: rect.h:34
uint8_t * color1()
Definition: colpartition.h:286
int space_above_
Definition: colpartition.h:921
void set_right_margin(int margin)
Definition: colpartition.h:122
void set_nearest_neighbor_above(ColPartition *part)
Definition: colpartition.h:253
PolyBlockType type_before_table_
Definition: colpartition.h:914
bool right_key_tab() const
Definition: colpartition.h:176
PolyBlockType type_
Definition: colpartition.h:876
ColPartition_CLIST * upper_partners()
Definition: colpartition.h:197
int median_height_
Definition: colpartition.h:848
void set_blob_type(BlobRegionType t)
Definition: colpartition.h:152
ColPartition_CLIST upper_partners_
Definition: colpartition.h:882
int first_column_
Definition: colpartition.h:902
int left_margin_
Definition: colpartition.h:839
bool IsVerticalType() const
Definition: colpartition.h:442
int BoxLeftKey() const
Definition: colpartition.h:333
Definition: baseapi.cpp:94
int good_blob_score_
Definition: colpartition.h:858
int boxes_count() const
Definition: colpartition.h:191
bool good_column_
Definition: colpartition.h:862
bool HOverlaps(const ColPartition &other) const
Definition: colpartition.h:366
int space_below_
Definition: colpartition.h:922
int space_to_left() const
Definition: colpartition.h:274
void set_vertical(const ICOORD &v)
Definition: colpartition.h:194
void set_block_owned(bool owned)
Definition: colpartition.h:209
int16_t bottom() const
Definition: rect.h:65
bool left_key_tab_
Definition: colpartition.h:864
bool IsEmpty() const
Definition: colpartition.h:358
void set_bottom_spacing(int spacing)
Definition: colpartition.h:224
const TBOX & bounding_box() const
Definition: colpartition.h:110
int median_bottom() const
Definition: colpartition.h:128
ColPartition * nearest_neighbor_below() const
Definition: colpartition.h:256
bool good_width() const
Definition: colpartition.h:164
bool ColumnContains(int x, int y) const
Definition: colpartition.h:354
int KeyWidth(int left_key, int right_key) const
Definition: colpartition.h:325
bool TypesMatch(const ColPartition &other) const
Definition: colpartition.h:410
bool IsSingleton() const
Definition: colpartition.h:362
int RightAtY(int y) const
Definition: colpartition.h:345
ColPartition * nearest_neighbor_above_
Definition: colpartition.h:918
Definition: colpartition.h:743
static bool IsLineType(BlobRegionType type)
Definition: blobbox.h:427
void set_working_set(WorkingPartSet *working_set)
Definition: colpartition.h:203
int side_step_
Definition: colpartition.h:907
bool VSignificantCoreOverlap(const ColPartition &other) const
Definition: colpartition.h:391
Definition: blobbox.h:144
int left_margin() const
Definition: colpartition.h:113
Definition: colpartition.h:745
void set_inside_table_column(bool val)
Definition: colpartition.h:247
BLOBNBOX_CLIST boxes_
Definition: colpartition.h:878
void set_median_width(int width)
Definition: colpartition.h:146
void set_first_column(int column)
Definition: colpartition.h:732
int bottom_spacing_
Definition: colpartition.h:909
ICOORD vertical_
Definition: colpartition.h:880
int space_above() const
Definition: colpartition.h:262
bool IsPulloutType() const
Definition: colpartition.h:438
bool WithinSameMargins(const ColPartition &other) const
Definition: colpartition.h:402
uint8_t * color2()
Definition: colpartition.h:289
bool desperately_merged() const
Definition: colpartition.h:212
int right_margin() const
Definition: colpartition.h:119
BlobTextFlowType flow() const
Definition: colpartition.h:155
int SortKey(int x, int y) const
Definition: colpartition.h:317
WorkingPartSet * working_set_
Definition: colpartition.h:886
Definition: tesscallback.h:1673
int median_left_
Definition: colpartition.h:850
Definition: colpartition.h:746
ColPartition * nearest_neighbor_above() const
Definition: colpartition.h:250
int XAtY(int sort_key, int y) const
Definition: colpartition.h:321
static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2)
Definition: colpartition.h:419
int y_middle() const
Definition: rect.h:88
void clear_table_type()
Definition: colpartition.h:240
void set_flow(BlobTextFlowType f)
Definition: colpartition.h:158
int median_top_
Definition: colpartition.h:846
bool good_column() const
Definition: colpartition.h:167
int bottom_spacing() const
Definition: colpartition.h:221
ColPartitionSet * column_set() const
Definition: colpartition.h:215
Definition: blobbox.h:556
bool block_owned() const
Definition: colpartition.h:206
int left_key_
Definition: colpartition.h:873
integer coordinate
Definition: points.h:32
bool inside_table_column()
Definition: colpartition.h:244
int16_t top() const
Definition: rect.h:58
void set_table_type()
Definition: colpartition.h:234
bool IsTextType() const
Definition: colpartition.h:434
Definition: colpartition.h:53
void set_space_above(int space)
Definition: colpartition.h:265
int space_below() const
Definition: colpartition.h:268
bool last_add_was_vertical_
Definition: colpartition.h:888
int MidX() const
Definition: colpartition.h:313
int median_right_
Definition: colpartition.h:851
Definition: colpartition.h:51
void set_top_spacing(int spacing)
Definition: colpartition.h:230
bool block_owned_
Definition: colpartition.h:891
int top_spacing() const
Definition: colpartition.h:227
int HCoreOverlap(const ColPartition &other) const
Definition: colpartition.h:385
void set_last_column(int column)
Definition: colpartition.h:735
int16_t left() const
Definition: rect.h:72
bool IsLineType() const
Definition: colpartition.h:426
int VCoreOverlap(const ColPartition &other) const
Definition: colpartition.h:376
Definition: colpartition.h:744
void set_space_below(int space)
Definition: colpartition.h:271
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:280
Definition: colpartition.h:49
int MidY() const
Definition: colpartition.h:305
SpacingNeighbourhood
Definition: colpartition.h:742
bool IsVerticalLine() const
Definition: colpartition.h:455
int BoxRightKey() const
Definition: colpartition.h:337
void set_side_step(int step)
Definition: colpartition.h:218
Definition: blobbox.h:705
const int kRGBRMSColors
Definition: colpartition.h:37
int MedianY() const
Definition: colpartition.h:309
int space_to_right_
Definition: colpartition.h:924
BlobTextFlowType flow_
Definition: colpartition.h:856
ColPartition_CLIST lower_partners_
Definition: colpartition.h:884
BLOBNBOX_CLIST * boxes()
Definition: colpartition.h:188
PolyBlockType type() const
Definition: colpartition.h:182
bool IsLeftOf(const ColPartition &other) const
Definition: colpartition.h:350
bool inside_table_column_
Definition: colpartition.h:915
TBOX bounding_box_
Definition: colpartition.h:843
BlobRegionType blob_type() const
Definition: colpartition.h:149
void set_left_margin(int margin)
Definition: colpartition.h:116
ColumnSpanningType
Definition: colpartition.h:48
int median_left() const
Definition: colpartition.h:131
Definition: colpartition.h:50
bool owns_blobs() const
Definition: colpartition.h:292
int good_blob_score() const
Definition: colpartition.h:161
Definition: colpartition.h:747
bool IsHorizontalType() const
Definition: colpartition.h:446
int last_column_
Definition: colpartition.h:903
Definition: colpartition.h:68
Definition: colpartition.h:748
int right_key_
Definition: colpartition.h:874
static bool TypesMatch(BlobRegionType type1, BlobRegionType type2)
Definition: colpartition.h:413
int left_key() const
Definition: colpartition.h:173
int space_to_left_
Definition: colpartition.h:923