tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
devanagari_processing.h
1 // Copyright 2008 Google Inc. All Rights Reserved.
2 // Author: shobhitsaxena@google.com (Shobhit Saxena)
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 // http://www.apache.org/licenses/LICENSE-2.0
7 // Unless required by applicable law or agreed to in writing, software
8 // distributed under the License is distributed on an "AS IS" BASIS,
9 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 // See the License for the specific language governing permissions and
11 // limitations under the License.
12 
13 #ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
14 #define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
15 
16 #include "allheaders.h"
17 #include "debugpixa.h"
18 #include "ocrblock.h"
19 #include "params.h"
20 
21 struct Pix;
22 struct Box;
23 struct Boxa;
24 
25 extern
26 INT_VAR_H(devanagari_split_debuglevel, 0,
27  "Debug level for split shiro-rekha process.");
28 
29 extern
30 BOOL_VAR_H(devanagari_split_debugimage, 0,
31  "Whether to create a debug image for split shiro-rekha process.");
32 
33 class TBOX;
34 
35 namespace tesseract {
36 
38  public:
40  hist_ = nullptr;
41  length_ = 0;
42  }
43 
45  Clear();
46  }
47 
48  void Clear() {
49  delete[] hist_;
50  length_ = 0;
51  }
52 
53  int* hist() const { return hist_; }
54 
55  int length() const {
56  return length_;
57  }
58 
59  // Methods to construct histograms from images. These clear any existing data.
60  void ConstructVerticalCountHist(Pix* pix);
61  void ConstructHorizontalCountHist(Pix* pix);
62 
63  // This method returns the global-maxima for the histogram. The frequency of
64  // the global maxima is returned in count, if specified.
65  int GetHistogramMaximum(int* count) const;
66 
67  private:
68  int* hist_;
69  int length_;
70 };
71 
73  public:
75  NO_SPLIT = 0, // No splitting is performed for the phase.
76  MINIMAL_SPLIT, // Blobs are split minimally.
77  MAXIMAL_SPLIT // Blobs are split maximally.
78  };
79 
81  virtual ~ShiroRekhaSplitter();
82 
83  // Top-level method to perform splitting based on current settings.
84  // Returns true if a split was actually performed.
85  // If split_for_pageseg is true, the pageseg_split_strategy_ is used for
86  // splitting. If false, the ocr_split_strategy_ is used.
87  bool Split(bool split_for_pageseg, DebugPixa* pixa_debug);
88 
89  // Clears the memory held by this object.
90  void Clear();
91 
92  // Refreshes the words in the segmentation block list by using blobs in the
93  // input blob list.
94  // The segmentation block list must be set.
95  void RefreshSegmentationWithNewBlobs(C_BLOB_LIST* new_blobs);
96 
97  // Returns true if the split strategies for pageseg and ocr are different.
99  return pageseg_split_strategy_ != ocr_split_strategy_;
100  }
101 
102  // This only keeps a copy of the block list pointer. At split call, the list
103  // object should still be alive. This block list is used as a golden
104  // segmentation when performing splitting.
105  void set_segmentation_block_list(BLOCK_LIST* block_list) {
106  segmentation_block_list_ = block_list;
107  }
108 
109  static const int kUnspecifiedXheight = -1;
110 
111  void set_global_xheight(int xheight) {
112  global_xheight_ = xheight;
113  }
114 
115  void set_perform_close(bool perform) {
116  perform_close_ = perform;
117  }
118 
119  // Returns the image obtained from shiro-rekha splitting. The returned object
120  // is owned by this class. Callers may want to clone the returned pix to keep
121  // it alive beyond the life of ShiroRekhaSplitter object.
122  Pix* splitted_image() {
123  return splitted_image_;
124  }
125 
126  // On setting the input image, a clone of it is owned by this class.
127  void set_orig_pix(Pix* pix);
128 
129  // Returns the input image provided to the object. This object is owned by
130  // this class. Callers may want to clone the returned pix to work with it.
131  Pix* orig_pix() {
132  return orig_pix_;
133  }
134 
136  return ocr_split_strategy_;
137  }
138 
140  ocr_split_strategy_ = strategy;
141  }
142 
144  return pageseg_split_strategy_;
145  }
146 
148  pageseg_split_strategy_ = strategy;
149  }
150 
151  BLOCK_LIST* segmentation_block_list() {
152  return segmentation_block_list_;
153  }
154 
155  // This method returns the computed mode-height of blobs in the pix.
156  // It also prunes very small blobs from calculation. Could be used to provide
157  // a global xheight estimate for images which have the same point-size text.
158  static int GetModeHeight(Pix* pix);
159 
160  private:
161  // Method to perform a close operation on the input image. The xheight
162  // estimate decides the size of sel used.
163  static void PerformClose(Pix* pix, int xheight_estimate);
164 
165  // This method resolves the cc bbox to a particular row and returns the row's
166  // xheight. This uses block_list_ if available, else just returns the
167  // global_xheight_ estimate currently set in the object.
168  int GetXheightForCC(Box* cc_bbox);
169 
170  // Returns a list of regions (boxes) which should be cleared in the original
171  // image so as to perform shiro-rekha splitting. Pix is assumed to carry one
172  // (or less) word only. Xheight measure could be the global estimate, the row
173  // estimate, or unspecified. If unspecified, over splitting may occur, since a
174  // conservative estimate of stroke width along with an associated multiplier
175  // is used in its place. It is advisable to have a specified xheight when
176  // splitting for classification/training.
177  void SplitWordShiroRekha(SplitStrategy split_strategy,
178  Pix* pix,
179  int xheight,
180  int word_left,
181  int word_top,
182  Boxa* regions_to_clear);
183 
184  // Returns a new box object for the corresponding TBOX, based on the original
185  // image's coordinate system.
186  Box* GetBoxForTBOX(const TBOX& tbox) const;
187 
188  // This method returns y-extents of the shiro-rekha computed from the input
189  // word image.
190  static void GetShiroRekhaYExtents(Pix* word_pix,
191  int* shirorekha_top,
192  int* shirorekha_bottom,
193  int* shirorekha_ylevel);
194 
195  Pix* orig_pix_; // Just a clone of the input image passed.
196  Pix* splitted_image_; // Image produced after the last splitting round. The
197  // object is owned by this class.
201  // This block list is used as a golden segmentation when performing splitting.
204  bool perform_close_; // Whether a morphological close operation should be
205  // performed before CCs are run through splitting.
206 };
207 
208 } // namespace tesseract.
209 
210 #endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
Pix * splitted_image()
Definition: devanagari_processing.h:122
SplitStrategy ocr_split_strategy_
Definition: devanagari_processing.h:199
Definition: devanagari_processing.h:76
SplitStrategy
Definition: devanagari_processing.h:74
void set_segmentation_block_list(BLOCK_LIST *block_list)
Definition: devanagari_processing.h:105
~PixelHistogram()
Definition: devanagari_processing.h:44
int GetHistogramMaximum(int *count) const
Definition: devanagari_processing.cpp:455
Definition: rect.h:34
void ConstructVerticalCountHist(Pix *pix)
Definition: devanagari_processing.cpp:469
Pix * debug_image_
Definition: devanagari_processing.h:200
Definition: baseapi.cpp:94
int * hist_
Definition: devanagari_processing.h:68
bool HasDifferentSplitStrategies() const
Definition: devanagari_processing.h:98
Definition: devanagari_processing.h:37
Pix * orig_pix()
Definition: devanagari_processing.h:131
void set_perform_close(bool perform)
Definition: devanagari_processing.h:115
BLOCK_LIST * segmentation_block_list()
Definition: devanagari_processing.h:151
PixelHistogram()
Definition: devanagari_processing.h:39
SplitStrategy ocr_split_strategy() const
Definition: devanagari_processing.h:135
Definition: debugpixa.h:10
int length_
Definition: devanagari_processing.h:69
bool perform_close_
Definition: devanagari_processing.h:204
Pix * orig_pix_
Definition: devanagari_processing.h:195
void Clear()
Definition: devanagari_processing.h:48
SplitStrategy pageseg_split_strategy() const
Definition: devanagari_processing.h:143
void set_ocr_split_strategy(SplitStrategy strategy)
Definition: devanagari_processing.h:139
void ConstructHorizontalCountHist(Pix *pix)
Definition: devanagari_processing.cpp:487
void set_pageseg_split_strategy(SplitStrategy strategy)
Definition: devanagari_processing.h:147
Pix * splitted_image_
Definition: devanagari_processing.h:196
BLOCK_LIST * segmentation_block_list_
Definition: devanagari_processing.h:202
Definition: devanagari_processing.h:72
int * hist() const
Definition: devanagari_processing.h:53
int global_xheight_
Definition: devanagari_processing.h:203
SplitStrategy pageseg_split_strategy_
Definition: devanagari_processing.h:198
void set_global_xheight(int xheight)
Definition: devanagari_processing.h:111
int length() const
Definition: devanagari_processing.h:55