tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
split.h
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File: split.h
5  * Description:
6  * Author: Mark Seaman, SW Productivity
7  * Status: Reusable Software Component
8  *
9  * (c) Copyright 1987, Hewlett-Packard Company.
10  ** Licensed under the Apache License, Version 2.0 (the "License");
11  ** you may not use this file except in compliance with the License.
12  ** You may obtain a copy of the License at
13  ** http://www.apache.org/licenses/LICENSE-2.0
14  ** Unless required by applicable law or agreed to in writing, software
15  ** distributed under the License is distributed on an "AS IS" BASIS,
16  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  ** See the License for the specific language governing permissions and
18  ** limitations under the License.
19  *
20  *****************************************************************************/
21 #ifndef SPLIT_H
22 #define SPLIT_H
23 
24 /*----------------------------------------------------------------------
25  I n c l u d e s
26 ----------------------------------------------------------------------*/
27 
28 #include "blobs.h" // for EDGEPT, TBLOB, TESSLINE
29 #include "params.h" // for BOOL_VAR_H, BoolParam
30 #include "rect.h" // for TBOX
31 
32 class ScrollView;
33 
34 /*----------------------------------------------------------------------
35  T y p e s
36 ----------------------------------------------------------------------*/
37 struct SPLIT {
38  SPLIT() : point1(nullptr), point2(nullptr) {}
39  SPLIT(EDGEPT* pt1, EDGEPT* pt2) : point1(pt1), point2(pt2) {}
40 
41  // Returns the bounding box of all the points in the split.
42  TBOX bounding_box() const;
43  // Returns the bounding box of the outline from point1 to point2.
44  TBOX Box12() const { return point1->SegmentBox(point2); }
45  // Returns the bounding box of the outline from point1 to point1.
46  TBOX Box21() const { return point2->SegmentBox(point1); }
47  // Returns the bounding box of the out
48 
49  // Hides the SPLIT so the outlines appear not to be cut by it.
50  void Hide() const;
51  // Undoes hide, so the outlines are cut by the SPLIT.
52  void Reveal() const;
53 
54  // Returns true if the given EDGEPT is used by this SPLIT, checking only
55  // the EDGEPT pointer, not the coordinates.
56  bool UsesPoint(const EDGEPT* point) const {
57  return point1 == point || point2 == point;
58  }
59  // Returns true if the other SPLIT has any position shared with *this.
60  bool SharesPosition(const SPLIT& other) const {
61  return point1->EqualPos(*other.point1) || point1->EqualPos(*other.point2) ||
62  point2->EqualPos(*other.point1) || point2->EqualPos(*other.point2);
63  }
64  // Returns true if both points are contained within the blob.
65  bool ContainedByBlob(const TBLOB& blob) const {
66  return blob.Contains(point1->pos) && blob.Contains(point2->pos);
67  }
68  // Returns true if both points are contained within the outline.
69  bool ContainedByOutline(const TESSLINE& outline) const {
70  return outline.Contains(point1->pos) && outline.Contains(point2->pos);
71  }
72  // Compute a split priority based on the bounding boxes of the parts.
73  // The arguments here are config parameters defined in Wordrec. Add chop_
74  // to the beginning of the name.
75  float FullPriority(int xmin, int xmax, double overlap_knob,
76  int centered_maxwidth, double center_knob,
77  double width_change_knob) const;
78  // Returns true if *this SPLIT appears OK in the sense that it does not cross
79  // any outlines and does not chop off any ridiculously small pieces.
80  bool IsHealthy(const TBLOB& blob, int min_points, int min_area) const;
81  // Returns true if the split generates a small chunk in terms of either area
82  // or number of points.
83  bool IsLittleChunk(int min_points, int min_area) const;
84 
85  void Print() const;
86 #ifndef GRAPHICS_DISABLED
87  // Draws the split in the given window.
88  void Mark(ScrollView* window) const;
89 #endif
90 
91  // Creates two outlines out of one by splitting the original one in half.
92  // Inserts the resulting outlines into the given list.
93  void SplitOutlineList(TESSLINE* outlines) const;
94  // Makes a split between these two edge points, but does not affect the
95  // outlines to which they belong.
96  void SplitOutline() const;
97  // Undoes the effect of SplitOutlineList, correcting the outlines for undoing
98  // the split, but possibly leaving some duplicate outlines.
99  void UnsplitOutlineList(TBLOB* blob) const;
100  // Removes the split that was put between these two points.
101  void UnsplitOutlines() const;
102 
105 };
106 
107 /*----------------------------------------------------------------------
108  V a r i a b l e s
109 ----------------------------------------------------------------------*/
110 
111 extern BOOL_VAR_H(wordrec_display_splits, 0, "Display splits");
112 
113 /*----------------------------------------------------------------------
114  F u n c t i o n s
115 ----------------------------------------------------------------------*/
116 EDGEPT *make_edgept(int x, int y, EDGEPT *next, EDGEPT *prev);
117 
118 void remove_edgept(EDGEPT *point);
119 
120 #endif
bool Contains(const TPOINT &pt) const
Definition: blobs.h:331
EDGEPT * point1
Definition: split.h:103
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, double center_knob, double width_change_knob) const
Definition: split.cpp:87
Definition: split.h:37
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const
Definition: split.cpp:121
Definition: blobs.h:83
bool SharesPosition(const SPLIT &other) const
Definition: split.h:60
void SplitOutline() const
Definition: split.cpp:260
bool EqualPos(const EDGEPT &other) const
Definition: blobs.h:112
Definition: rect.h:34
bool Contains(const TPOINT &pt) const
Definition: blobs.h:241
bool UsesPoint(const EDGEPT *point) const
Definition: split.h:56
TBOX Box21() const
Definition: split.h:46
TBOX Box12() const
Definition: split.h:44
bool IsLittleChunk(int min_points, int min_area) const
Definition: split.cpp:128
void UnsplitOutlineList(TBLOB *blob) const
Definition: split.cpp:285
bool ContainedByBlob(const TBLOB &blob) const
Definition: split.h:65
Definition: blobs.h:187
void UnsplitOutlines() const
Definition: split.cpp:301
Definition: scrollview.h:102
SPLIT(EDGEPT *pt1, EDGEPT *pt2)
Definition: split.h:39
bool ContainedByOutline(const TESSLINE &outline) const
Definition: split.h:69
void SplitOutlineList(TESSLINE *outlines) const
Definition: split.cpp:241
TBOX SegmentBox(const EDGEPT *end) const
Definition: blobs.h:115
EDGEPT * point2
Definition: split.h:104
void Reveal() const
Definition: split.cpp:71
TBOX bounding_box() const
Definition: split.cpp:50
SPLIT()
Definition: split.h:38
void Print() const
Definition: split.cpp:225
TPOINT pos
Definition: blobs.h:170
Definition: blobs.h:268
void Mark(ScrollView *window) const
Definition: split.cpp:232
void Hide() const
Definition: split.cpp:57