tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
ocrclass.h
1 /**********************************************************************
2  * File: ocrclass.h
3  * Description: Class definitions and constants for the OCR API.
4  * Author: Hewlett-Packard Co
5  *
6  * (C) Copyright 1996, Hewlett-Packard Co.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 /**********************************************************************
20  * This file contains typedefs for all the structures used by
21  * the HP OCR interface.
22  * The code is designed to be used with either a C or C++ compiler.
23  * The structures are designed to allow them to be used with any
24  * structure alignment up to 8.
25  **********************************************************************/
26 
27 #ifndef CCUTIL_OCRCLASS_H_
28 #define CCUTIL_OCRCLASS_H_
29 
30 #ifndef __GNUC__
31 #ifdef _WIN32
32 #include "gettimeofday.h"
33 #endif
34 #else
35 #include <sys/time.h>
36 #endif
37 #include <ctime>
38 #include "host.h"
39 
40 /*Maximum lengths of various strings*/
41 #define MAX_FONT_NAME 34 /*name of font */
42 #define MAX_OCR_NAME 32 /*name of engine */
43 #define MAX_OCR_VERSION 17 /*version code of engine */
44 
45 /*pitch set definitions are identical to RTF*/
46 #define PITCH_DEF 0 /*default */
47 #define PITCH_FIXED 1 /*fixed pitch */
48 #define PITCH_VAR 2 /*variable pitch */
49 
50 /**********************************************************************
51  * EANYCODE_CHAR
52  * Description of a single character. The character code is defined by
53  * the character set of the current font.
54  * Output text is sent as an array of these structures.
55  * Spaces and line endings in the output are represented in the
56  * structures of the surrounding characters. They are not directly
57  * represented as characters.
58  * The first character in a word has a positive value of blanks.
59  * Missing information should be set to the defaults in the comments.
60  * If word bounds are known, but not character bounds, then the top and
61  * bottom of each character should be those of the word. The left of the
62  * first and right of the last char in each word should be set. All other
63  * lefts and rights should be set to -1.
64  * If set, the values of right and bottom are left+width and top+height.
65  * Most of the members come directly from the parameters to ocr_append_char.
66  * The formatting member uses the enhancement parameter and combines the
67  * line direction stuff into the top 3 bits.
68  * The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
69  * 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
70  * the coding is, only that it is backwards compatible with the previous
71  * version.
72  **********************************************************************/
73 
74 typedef struct { /*single character */
75 // It should be noted that the format for char_code for version 2.0 and beyond
76 // is UTF8 which means that ASCII characters will come out as one structure but
77 // other characters will be returned in two or more instances of this structure
78 // with a single byte of the UTF8 code in each, but each will have the same
79 // bounding box. Programs which want to handle languagues with different
80 // characters sets will need to handle extended characters appropriately, but
81 // *all* code needs to be prepared to receive UTF8 coded characters for
82 // characters such as bullet and fancy quotes.
83  uint16_t char_code; /*character itself */
84  int16_t left; /*of char (-1) */
85  int16_t right; /*of char (-1) */
86  int16_t top; /*of char (-1) */
87  int16_t bottom; /*of char (-1) */
88  int16_t font_index; /*what font (0) */
89  uint8_t confidence; /*0=perfect, 100=reject (0/100) */
90  uint8_t point_size; /*of char, 72=i inch, (10) */
91  int8_t blanks; /*no of spaces before this char (1) */
92  uint8_t formatting; /*char formatting (0) */
93 } EANYCODE_CHAR; /*single character */
94 
95 /**********************************************************************
96  * ETEXT_DESC
97  * Description of the output of the OCR engine.
98  * This structure is used as both a progress monitor and the final
99  * output header, since it needs to be a valid progress monitor while
100  * the OCR engine is storing its output to shared memory.
101  * During progress, all the buffer info is -1.
102  * Progress starts at 0 and increases to 100 during OCR. No other constraint.
103  * Additionally the progress callback contains the bounding box of the word that
104  * is currently being processed.
105  * Every progress callback, the OCR engine must set ocr_alive to 1.
106  * The HP side will set ocr_alive to 0. Repeated failure to reset
107  * to 1 indicates that the OCR engine is dead.
108  * If the cancel function is not null then it is called with the number of
109  * user words found. If it returns true then operation is cancelled.
110  **********************************************************************/
111 class ETEXT_DESC;
112 
113 typedef bool (*CANCEL_FUNC)(void* cancel_this, int words);
114 typedef bool (*PROGRESS_FUNC)(int progress, int left, int right, int top,
115  int bottom);
116 typedef bool (*PROGRESS_FUNC2)(ETEXT_DESC* ths, int left, int right, int top,
117  int bottom);
118 
119 class ETEXT_DESC { // output header
120  public:
121  int16_t count;
122  int16_t progress;
123 
126  int8_t more_to_come;
127  volatile int8_t ocr_alive;
128  int8_t err_code;
129  CANCEL_FUNC cancel;
130  PROGRESS_FUNC progress_callback;
131  PROGRESS_FUNC2 progress_callback2;
132  void* cancel_this;
133  struct timeval end_time;
134  EANYCODE_CHAR text[1];
136 
138  : count(0),
139  progress(0),
140  more_to_come(0),
141  ocr_alive(0),
142  err_code(0),
143  cancel(nullptr),
144  progress_callback(nullptr),
145  progress_callback2(&default_progress_func),
146  cancel_this(nullptr) {
147  end_time.tv_sec = 0;
148  end_time.tv_usec = 0;
149  }
150 
151  // Sets the end time to be deadline_msecs milliseconds from now.
152  void set_deadline_msecs(int32_t deadline_msecs) {
153  gettimeofday(&end_time, nullptr);
154  int32_t deadline_secs = deadline_msecs / 1000;
155  end_time.tv_sec += deadline_secs;
156  end_time.tv_usec += (deadline_msecs - deadline_secs * 1000) * 1000;
157  if (end_time.tv_usec > 1000000) {
158  end_time.tv_usec -= 1000000;
159  ++end_time.tv_sec;
160  }
161  }
162 
163  // Returns false if we've not passed the end_time, or have not set a deadline.
164  bool deadline_exceeded() const {
165  if (end_time.tv_sec == 0 && end_time.tv_usec == 0) return false;
166  struct timeval now;
167  gettimeofday(&now, nullptr);
168  return (now.tv_sec > end_time.tv_sec || (now.tv_sec == end_time.tv_sec &&
169  now.tv_usec > end_time.tv_usec));
170  }
171 
172 private:
173  static bool default_progress_func(ETEXT_DESC* ths, int left, int right, int top,
174  int bottom)
175  {
176  if (ths->progress_callback) {
177  return (*(ths->progress_callback))(ths->progress, left, right, top, bottom);
178  }
179  return true;
180  }
181 
182 };
183 
184 #endif // CCUTIL_OCRCLASS_H_
int16_t top
Definition: ocrclass.h:86
uint8_t point_size
Definition: ocrclass.h:90
int16_t font_index
Definition: ocrclass.h:88
ETEXT_DESC()
character data
Definition: ocrclass.h:137
uint8_t confidence
Definition: ocrclass.h:89
int16_t count
Definition: ocrclass.h:121
int8_t err_code
ocr sets to 1, HP 0
Definition: ocrclass.h:128
int8_t blanks
Definition: ocrclass.h:91
Definition: ocrclass.h:74
static bool default_progress_func(ETEXT_DESC *ths, int left, int right, int top, int bottom)
Definition: ocrclass.h:173
uint16_t char_code
Definition: ocrclass.h:83
CANCEL_FUNC cancel
for errcode use
Definition: ocrclass.h:129
PROGRESS_FUNC progress_callback
returns true to cancel
Definition: ocrclass.h:130
volatile int8_t ocr_alive
true if not last
Definition: ocrclass.h:127
int16_t bottom
Definition: ocrclass.h:87
struct timeval end_time
this or other data for cancel
Definition: ocrclass.h:133
int8_t more_to_come
percent complete increasing (0-100)
Definition: ocrclass.h:126
PROGRESS_FUNC2 progress_callback2
called whenever progress increases
Definition: ocrclass.h:131
void set_deadline_msecs(int32_t deadline_msecs)
Definition: ocrclass.h:152
bool deadline_exceeded() const
Definition: ocrclass.h:164
uint8_t formatting
Definition: ocrclass.h:92
int16_t right
Definition: ocrclass.h:85
Definition: ocrclass.h:119
void * cancel_this
monitor-aware progress callback
Definition: ocrclass.h:132
int16_t left
Definition: ocrclass.h:84
EANYCODE_CHAR text[1]
Definition: ocrclass.h:135
int16_t progress
chars in this buffer(0)
Definition: ocrclass.h:122