tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
tesseract::TabFind Class Reference

#include <tabfind.h>

Inheritance diagram for tesseract::TabFind:
Collaboration diagram for tesseract::TabFind:

Public Member Functions

 TabFind (int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
 
virtual ~TabFind ()
 
void InsertBlobsToGrid (bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
bool InsertBlob (bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
void SetBlockRuleEdges (TO_BLOCK *block)
 
void SetBlobRuleEdges (BLOBNBOX_LIST *blobs)
 
int GutterWidth (int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables, int max_gutter_width, int *required_shift)
 
void GutterWidthAndNeighbourGap (int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap)
 
int RightEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
int LeftEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorRightTabForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorLeftTabForBox (const TBOX &box, bool crossing, bool extended)
 
bool CommonWidth (int width)
 
WidthCallbackWidthCB ()
 
const ICOORDimage_origin () const
 
- Public Member Functions inherited from tesseract::AlignedBlob
 AlignedBlob (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~AlignedBlob ()
 
ScrollViewDisplayTabs (const char *window_name, ScrollView *tab_win)
 
TabVectorFindVerticalAlignment (AlignedBlobParams align_params, BLOBNBOX *bbox, int *vertical_x, int *vertical_y)
 
- Public Member Functions inherited from tesseract::BlobGrid
 BlobGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~BlobGrid ()
 
void InsertBlobList (BLOBNBOX_LIST *blobs)
 
- Public Member Functions inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
 BBGrid ()
 
 BBGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~BBGrid ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
void Clear ()
 
void ClearGridData (void(*free_method)(BLOBNBOX *))
 
void InsertBBox (bool h_spread, bool v_spread, BLOBNBOX *bbox)
 
void InsertPixPtBBox (int left, int bottom, Pix *pix, BLOBNBOX *bbox)
 
void RemoveBBox (BLOBNBOX *bbox)
 
bool RectangleEmpty (const TBOX &rect)
 
IntGridCountCellElements ()
 
ScrollViewMakeWindow (int x, int y, const char *window_name)
 
void DisplayBoxes (ScrollView *window)
 
void AssertNoDuplicates ()
 
virtual void HandleClick (int x, int y)
 
- Public Member Functions inherited from tesseract::GridBase
 GridBase ()=default
 
 GridBase (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~GridBase ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
int gridsize () const
 
int gridwidth () const
 
int gridheight () const
 
const ICOORDbleft () const
 
const ICOORDtright () const
 
void GridCoords (int x, int y, int *grid_x, int *grid_y) const
 
void ClipGridCoords (int *x, int *y) const
 

Static Public Member Functions

static bool DifferentSizes (int size1, int size2)
 
static bool VeryDifferentSizes (int size1, int size2)
 
- Static Public Member Functions inherited from tesseract::AlignedBlob
static bool WithinTestRegion (int detail_level, int x, int y)
 

Protected Member Functions

TabVector_LIST * vectors ()
 
TabVector_LIST * dead_vectors ()
 
bool FindTabVectors (TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
 
void DontFindTabVectors (BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
 
void TidyBlobs (TO_BLOCK *block)
 
void SetupTabSearch (int x, int y, int *min_key, int *max_key)
 
ScrollViewDisplayTabVectors (ScrollView *tab_win)
 
ScrollViewFindInitialTabVectors (BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
 
void ResetForVerticalText (const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
 
void Reset ()
 
void ReflectInYAxis ()
 

Static Protected Member Functions

static void RotateBlobList (const FCOORD &rotation, BLOBNBOX_LIST *blobs)
 

Protected Attributes

ICOORD vertical_skew_
 
int resolution_
 
- Protected Attributes inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
BLOBNBOX_CLIST * grid_
 
- Protected Attributes inherited from tesseract::GridBase
int gridsize_
 
int gridwidth_
 
int gridheight_
 
int gridbuckets_
 
ICOORD bleft_
 
ICOORD tright_
 

Private Member Functions

ScrollViewFindTabBoxes (int min_gutter_width, double tabfind_aligned_gap_fraction)
 
bool TestBoxForTabs (BLOBNBOX *bbox, int min_gutter_width, double tabfind_aligned_gap_fraction)
 
bool ConfirmRaggedLeft (BLOBNBOX *bbox, int min_gutter)
 
bool ConfirmRaggedRight (BLOBNBOX *bbox, int min_gutter)
 
bool NothingYOverlapsInBox (const TBOX &search_box, const TBOX &target_box)
 
void FindAllTabVectors (int min_gutter_width)
 
int FindTabVectors (int search_size_multiple, TabAlignment alignment, int min_gutter_width, TabVector_LIST *vectors, int *vertical_x, int *vertical_y)
 
TabVectorFindTabVector (int search_size_multiple, int min_gutter_width, TabAlignment alignment, BLOBNBOX *bbox, int *vertical_x, int *vertical_y)
 
void SetVerticalSkewAndParallelize (int vertical_x, int vertical_y)
 
void SortVectors ()
 
void EvaluateTabs ()
 
void ComputeColumnWidths (ScrollView *tab_win, ColPartitionGrid *part_grid)
 
void ApplyPartitionsToColumnWidths (ColPartitionGrid *part_grid, STATS *col_widths)
 
void MakeColumnWidths (int col_widths_size, STATS *col_widths)
 
void MarkVerticalText ()
 
int FindMedianGutterWidth (TabVector_LIST *tab_vectors)
 
BLOBNBOXAdjacentBlob (const BLOBNBOX *bbox, bool look_left, bool ignore_images, double min_overlap_fraction, int gap_limit, int top_y, int bottom_y)
 
void AddPartnerVector (BLOBNBOX *left_blob, BLOBNBOX *right_blob, TabVector *left, TabVector *right)
 
void CleanupTabs ()
 
bool Deskew (TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
 
void ComputeDeskewVectors (FCOORD *deskew, FCOORD *reskew)
 
void ApplyTabConstraints ()
 

Private Attributes

ICOORD image_origin_
 
TabVector_LIST vectors_
 
TabVector_IT v_it_
 
TabVector_LIST dead_vectors_
 
ICOORDELT_LIST column_widths_
 
WidthCallbackwidth_cb_
 
GenericVector< BLOBNBOX * > left_tab_boxes_
 
GenericVector< BLOBNBOX * > right_tab_boxes_
 

Detailed Description

The TabFind class contains code to find tab-stops and maintain the vectors_ list of tab vectors. Also provides an interface to find neighbouring blobs in the grid of BLOBNBOXes that is used by multiple subclasses. Searching is a complex operation because of the need to enforce rule/separator lines, and tabstop boundaries, (when available), so as the holder of the list of TabVectors this class provides the functions.

Constructor & Destructor Documentation

◆ TabFind()

tesseract::TabFind::TabFind ( int  gridsize,
const ICOORD bleft,
const ICOORD tright,
TabVector_LIST *  vlines,
int  vertical_x,
int  vertical_y,
int  resolution 
)

◆ ~TabFind()

tesseract::TabFind::~TabFind ( )
virtual

Member Function Documentation

◆ AddPartnerVector()

void tesseract::TabFind::AddPartnerVector ( BLOBNBOX left_blob,
BLOBNBOX right_blob,
TabVector left,
TabVector right 
)
private

◆ AdjacentBlob()

BLOBNBOX * tesseract::TabFind::AdjacentBlob ( const BLOBNBOX bbox,
bool  look_left,
bool  ignore_images,
double  min_overlap_fraction,
int  gap_limit,
int  top_y,
int  bottom_y 
)
private

◆ ApplyPartitionsToColumnWidths()

void tesseract::TabFind::ApplyPartitionsToColumnWidths ( ColPartitionGrid part_grid,
STATS col_widths 
)
private

◆ ApplyTabConstraints()

void tesseract::TabFind::ApplyTabConstraints ( )
private

Compute and apply constraints to the end positions of TabVectors so that where possible partners end at the same y coordinate.

◆ CleanupTabs()

void tesseract::TabFind::CleanupTabs ( )
private

Remove separators and unused tabs from the main vectors_ list to the dead_vectors_ list.

◆ CommonWidth()

bool tesseract::TabFind::CommonWidth ( int  width)

Return true if the given width is close to one of the common widths in column_widths_.

◆ ComputeColumnWidths()

void tesseract::TabFind::ComputeColumnWidths ( ScrollView tab_win,
ColPartitionGrid part_grid 
)
private

◆ ComputeDeskewVectors()

void tesseract::TabFind::ComputeDeskewVectors ( FCOORD deskew,
FCOORD reskew 
)
private

◆ ConfirmRaggedLeft()

bool tesseract::TabFind::ConfirmRaggedLeft ( BLOBNBOX bbox,
int  min_gutter 
)
private

◆ ConfirmRaggedRight()

bool tesseract::TabFind::ConfirmRaggedRight ( BLOBNBOX bbox,
int  min_gutter 
)
private

◆ dead_vectors()

TabVector_LIST* tesseract::TabFind::dead_vectors ( )
inlineprotected

◆ Deskew()

bool tesseract::TabFind::Deskew ( TabVector_LIST *  hlines,
BLOBNBOX_LIST *  image_blobs,
TO_BLOCK block,
FCOORD deskew,
FCOORD reskew 
)
private

Deskew the tab vectors and blobs, computing the rotation and resetting the storked vertical_skew_. The deskew inverse is returned in reskew. Returns false if the detected skew angle is impossible.

◆ DifferentSizes()

bool tesseract::TabFind::DifferentSizes ( int  size1,
int  size2 
)
static

Return true if the sizes are more than a factor of 2 different.

◆ DisplayTabVectors()

ScrollView * tesseract::TabFind::DisplayTabVectors ( ScrollView tab_win)
protected

Display the tab vectors found in this grid.

◆ DontFindTabVectors()

void tesseract::TabFind::DontFindTabVectors ( BLOBNBOX_LIST *  image_blobs,
TO_BLOCK block,
FCOORD deskew,
FCOORD reskew 
)
protected

◆ EvaluateTabs()

void tesseract::TabFind::EvaluateTabs ( )
private

◆ FindAllTabVectors()

void tesseract::TabFind::FindAllTabVectors ( int  min_gutter_width)
private

◆ FindInitialTabVectors()

ScrollView * tesseract::TabFind::FindInitialTabVectors ( BLOBNBOX_LIST *  image_blobs,
int  min_gutter_width,
double  tabfind_aligned_gap_fraction,
TO_BLOCK block 
)
protected

◆ FindMedianGutterWidth()

int tesseract::TabFind::FindMedianGutterWidth ( TabVector_LIST *  tab_vectors)
private

◆ FindTabBoxes()

ScrollView * tesseract::TabFind::FindTabBoxes ( int  min_gutter_width,
double  tabfind_aligned_gap_fraction 
)
private

◆ FindTabVector()

TabVector * tesseract::TabFind::FindTabVector ( int  search_size_multiple,
int  min_gutter_width,
TabAlignment  alignment,
BLOBNBOX bbox,
int *  vertical_x,
int *  vertical_y 
)
private

◆ FindTabVectors() [1/2]

bool tesseract::TabFind::FindTabVectors ( TabVector_LIST *  hlines,
BLOBNBOX_LIST *  image_blobs,
TO_BLOCK block,
int  min_gutter_width,
double  tabfind_aligned_gap_fraction,
ColPartitionGrid part_grid,
FCOORD deskew,
FCOORD reskew 
)
protected

Top-level function to find TabVectors in an input page block. Returns false if the detected skew angle is impossible. Applies the detected skew angle to deskew the tabs, blobs and part_grid. tabfind_aligned_gap_fraction should be the value of parameter textord_tabfind_aligned_gap_fraction

◆ FindTabVectors() [2/2]

int tesseract::TabFind::FindTabVectors ( int  search_size_multiple,
TabAlignment  alignment,
int  min_gutter_width,
TabVector_LIST *  vectors,
int *  vertical_x,
int *  vertical_y 
)
private

◆ GutterWidth()

int tesseract::TabFind::GutterWidth ( int  bottom_y,
int  top_y,
const TabVector v,
bool  ignore_unmergeables,
int  max_gutter_width,
int *  required_shift 
)

◆ GutterWidthAndNeighbourGap()

void tesseract::TabFind::GutterWidthAndNeighbourGap ( int  tab_x,
int  mean_height,
int  max_gutter,
bool  left,
BLOBNBOX bbox,
int *  gutter_width,
int *  neighbour_gap 
)

Find the gutter width and distance to inner neighbour for the given blob.

◆ image_origin()

const ICOORD& tesseract::TabFind::image_origin ( ) const
inline

Return the coords at which to draw the image backdrop.

◆ InsertBlob()

bool tesseract::TabFind::InsertBlob ( bool  h_spread,
bool  v_spread,
BLOBNBOX blob,
BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *  grid 
)

Insert a single blob into the given grid (not necessarily this). If h_spread, then all cells covered horizontally by the box are used, otherwise, just the bottom-left. Similarly for v_spread. A side effect is that the left and right rule edges of the blob are set according to the tab vectors in this (not grid).

◆ InsertBlobsToGrid()

void tesseract::TabFind::InsertBlobsToGrid ( bool  h_spread,
bool  v_spread,
BLOBNBOX_LIST *  blobs,
BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *  grid 
)

Insert a list of blobs into the given grid (not necessarily this). See InsertBlob for the other arguments. It would seem to make more sense to swap this and grid, but this way around allows grid to not be derived from TabFind, eg a ColPartitionGrid, while the grid that provides the tab stops(this) has to be derived from TabFind.

◆ LeftEdgeForBox()

int tesseract::TabFind::LeftEdgeForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

As RightEdgeForBox, but finds the left Edge instead.

◆ LeftTabForBox()

TabVector * tesseract::TabFind::LeftTabForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

As RightTabForBox, but finds the left TabVector instead.

◆ MakeColumnWidths()

void tesseract::TabFind::MakeColumnWidths ( int  col_widths_size,
STATS col_widths 
)
private

◆ MarkVerticalText()

void tesseract::TabFind::MarkVerticalText ( )
private

◆ NothingYOverlapsInBox()

bool tesseract::TabFind::NothingYOverlapsInBox ( const TBOX search_box,
const TBOX target_box 
)
private

◆ ReflectInYAxis()

void tesseract::TabFind::ReflectInYAxis ( )
protected

◆ Reset()

void tesseract::TabFind::Reset ( )
protected

◆ ResetForVerticalText()

void tesseract::TabFind::ResetForVerticalText ( const FCOORD rotate,
const FCOORD rerotate,
TabVector_LIST *  horizontal_lines,
int *  min_gutter_width 
)
protected

◆ RightEdgeForBox()

int tesseract::TabFind::RightEdgeForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

Return the x-coord that corresponds to the right edge for the given box. If there is a rule line to the right that vertically overlaps it, then return the x-coord of the rule line, otherwise return the right edge of the page. For details see RightTabForBox below.

◆ RightTabForBox()

TabVector * tesseract::TabFind::RightTabForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

Return the TabVector that corresponds to the right edge for the given box. If there is a TabVector to the right that vertically overlaps it, then return it, otherwise return nullptr. Note that Right and Left refer to the position of the TabVector, not its type, ie RightTabForBox returns the nearest TabVector to the right of the box, regardless of its type. If a TabVector crosses right through the box (as opposed to grazing one edge or missing entirely), then crossing false will ignore such a line. Crossing true will return the line for BOTH left and right edges. If extended is true, then TabVectors are considered to extend to their extended_start/end_y, otherwise, just the startpt_ and endpt_. These functions make use of an internal iterator to the vectors_ list for speed when used repeatedly on neighbouring boxes. The caveat is that the iterator must be updated whenever the list is modified.

◆ RotateBlobList()

void tesseract::TabFind::RotateBlobList ( const FCOORD rotation,
BLOBNBOX_LIST *  blobs 
)
staticprotected

◆ SetBlobRuleEdges()

void tesseract::TabFind::SetBlobRuleEdges ( BLOBNBOX_LIST *  blobs)

◆ SetBlockRuleEdges()

void tesseract::TabFind::SetBlockRuleEdges ( TO_BLOCK block)

◆ SetupTabSearch()

void tesseract::TabFind::SetupTabSearch ( int  x,
int  y,
int *  min_key,
int *  max_key 
)
protected

◆ SetVerticalSkewAndParallelize()

void tesseract::TabFind::SetVerticalSkewAndParallelize ( int  vertical_x,
int  vertical_y 
)
private

◆ SortVectors()

void tesseract::TabFind::SortVectors ( )
private

◆ TestBoxForTabs()

bool tesseract::TabFind::TestBoxForTabs ( BLOBNBOX bbox,
int  min_gutter_width,
double  tabfind_aligned_gap_fraction 
)
private

◆ TidyBlobs()

void tesseract::TabFind::TidyBlobs ( TO_BLOCK block)
protected

◆ vectors()

TabVector_LIST* tesseract::TabFind::vectors ( )
inlineprotected

Accessors

◆ VeryDifferentSizes()

bool tesseract::TabFind::VeryDifferentSizes ( int  size1,
int  size2 
)
static

Return true if the sizes are more than a factor of 5 different.

◆ WidthCB()

WidthCallback* tesseract::TabFind::WidthCB ( )
inline

Return a callback for testing CommonWidth.

Member Data Documentation

◆ column_widths_

ICOORDELT_LIST tesseract::TabFind::column_widths_
private

◆ dead_vectors_

TabVector_LIST tesseract::TabFind::dead_vectors_
private

◆ image_origin_

ICOORD tesseract::TabFind::image_origin_
private

◆ left_tab_boxes_

GenericVector<BLOBNBOX*> tesseract::TabFind::left_tab_boxes_
private

◆ resolution_

int tesseract::TabFind::resolution_
protected

◆ right_tab_boxes_

GenericVector<BLOBNBOX*> tesseract::TabFind::right_tab_boxes_
private

◆ v_it_

TabVector_IT tesseract::TabFind::v_it_
private

◆ vectors_

TabVector_LIST tesseract::TabFind::vectors_
private

◆ vertical_skew_

ICOORD tesseract::TabFind::vertical_skew_
protected

◆ width_cb_

WidthCallback* tesseract::TabFind::width_cb_
private

Callback to test an int for being a common width.


The documentation for this class was generated from the following files: