tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
baseapi.h
1 // File: baseapi.h
3 // Description: Simple API for calling tesseract.
4 // Author: Ray Smith
5 //
6 // (C) Copyright 2006, Google Inc.
7 // Licensed under the Apache License, Version 2.0 (the "License");
8 // you may not use this file except in compliance with the License.
9 // You may obtain a copy of the License at
10 // http://www.apache.org/licenses/LICENSE-2.0
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS,
13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 // See the License for the specific language governing permissions and
15 // limitations under the License.
16 //
18 
19 #ifndef TESSERACT_API_BASEAPI_H_
20 #define TESSERACT_API_BASEAPI_H_
21 
22 #include <cstdio>
23 // To avoid collision with other typenames include the ABSOLUTE MINIMUM
24 // complexity of includes here. Use forward declarations wherever possible
25 // and hide includes of complex types in baseapi.cpp.
26 #include "tess_version.h"
27 #include "apitypes.h"
28 #include "pageiterator.h"
29 #include "platform.h"
30 #include "publictypes.h"
31 #include "resultiterator.h"
32 #include "serialis.h"
33 #include "tesscallback.h"
34 #include "thresholder.h"
35 #include "unichar.h"
36 
37 template <typename T> class GenericVector;
38 class PAGE_RES;
39 class PAGE_RES_IT;
40 class ParagraphModel;
41 struct BlamerBundle;
42 class BLOCK_LIST;
43 class DENORM;
44 class MATRIX;
45 class ROW;
46 class STRING;
47 class WERD;
48 struct Pix;
49 struct Box;
50 struct Pixa;
51 struct Boxa;
52 class ETEXT_DESC;
53 struct OSResults;
54 class TBOX;
55 class UNICHARSET;
56 class WERD_CHOICE_LIST;
57 
58 struct INT_FEATURE_STRUCT;
60 struct TBLOB;
61 
62 namespace tesseract {
63 
64 class Dawg;
65 class Dict;
66 class EquationDetect;
67 class PageIterator;
68 class LTRResultIterator;
69 class ResultIterator;
70 class MutableIterator;
71 class TessResultRenderer;
72 class Tesseract;
73 class Trie;
74 class Wordrec;
75 
76 typedef int (Dict::*DictFunc)(void* void_dawg_args,
77  const UNICHARSET& unicharset,
78  UNICHAR_ID unichar_id, bool word_end) const;
79 typedef double (Dict::*ProbabilityInContextFunc)(const char* lang,
80  const char* context,
81  int context_bytes,
82  const char* character,
83  int character_bytes);
84 typedef float (Dict::*ParamsModelClassifyFunc)(
85  const char *lang, void *path);
86 typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings,
87  const WERD_CHOICE_LIST &best_choices,
88  const UNICHARSET &unicharset,
89  BlamerBundle *blamer_bundle);
92 
101 class TESS_API TessBaseAPI {
102  public:
103  TessBaseAPI();
104  virtual ~TessBaseAPI();
105 
109  static const char* Version();
110 
118  static size_t getOpenCLDevice(void **device);
119 
124  static void CatchSignals();
125 
130  void SetInputName(const char* name);
138  const char* GetInputName();
139  // Takes ownership of the input pix.
140  void SetInputImage(Pix *pix);
141  Pix* GetInputImage();
142  int GetSourceYResolution();
143  const char* GetDatapath();
144 
146  void SetOutputName(const char* name);
147 
161  bool SetVariable(const char* name, const char* value);
162  bool SetDebugVariable(const char* name, const char* value);
163 
168  bool GetIntVariable(const char *name, int *value) const;
169  bool GetBoolVariable(const char *name, bool *value) const;
170  bool GetDoubleVariable(const char *name, double *value) const;
171 
176  const char *GetStringVariable(const char *name) const;
177 
181  void PrintVariables(FILE *fp) const;
182 
186  bool GetVariableAsString(const char *name, STRING *val);
187 
226  int Init(const char* datapath, const char* language, OcrEngineMode mode,
227  char **configs, int configs_size,
228  const GenericVector<STRING> *vars_vec,
229  const GenericVector<STRING> *vars_values,
230  bool set_only_non_debug_params);
231  int Init(const char* datapath, const char* language, OcrEngineMode oem) {
232  return Init(datapath, language, oem, nullptr, 0, nullptr, nullptr, false);
233  }
234  int Init(const char* datapath, const char* language) {
235  return Init(datapath, language, OEM_DEFAULT, nullptr, 0, nullptr, nullptr, false);
236  }
237  // In-memory version reads the traineddata file directly from the given
238  // data[data_size] array, and/or reads data via a FileReader.
239  int Init(const char* data, int data_size, const char* language,
240  OcrEngineMode mode, char** configs, int configs_size,
241  const GenericVector<STRING>* vars_vec,
242  const GenericVector<STRING>* vars_values,
243  bool set_only_non_debug_params, FileReader reader);
244 
253  const char* GetInitLanguagesAsString() const;
254 
260  void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
261 
265  void GetAvailableLanguagesAsVector(GenericVector<STRING>* langs) const;
266 
273  int InitLangMod(const char* datapath, const char* language);
274 
279  void InitForAnalysePage();
280 
287  void ReadConfigFile(const char* filename);
289  void ReadDebugConfigFile(const char* filename);
290 
296  void SetPageSegMode(PageSegMode mode);
297 
299  PageSegMode GetPageSegMode() const;
300 
318  char* TesseractRect(const unsigned char* imagedata,
319  int bytes_per_pixel, int bytes_per_line,
320  int left, int top, int width, int height);
321 
326  void ClearAdaptiveClassifier();
327 
334  /* @{ */
335 
343  void SetImage(const unsigned char* imagedata, int width, int height,
344  int bytes_per_pixel, int bytes_per_line);
345 
354  void SetImage(Pix* pix);
355 
360  void SetSourceResolution(int ppi);
361 
367  void SetRectangle(int left, int top, int width, int height);
368 
376  void SetThresholder(ImageThresholder* thresholder) {
377  delete thresholder_;
378  thresholder_ = thresholder;
379  ClearResults();
380  }
381 
387  Pix* GetThresholdedImage();
388 
394  Boxa* GetRegions(Pixa** pixa);
395 
407  Boxa* GetTextlines(const bool raw_image, const int raw_padding,
408  Pixa** pixa, int** blockids, int** paraids);
409  /*
410  Helper method to extract from the thresholded image. (most common usage)
411  */
412  Boxa* GetTextlines(Pixa** pixa, int** blockids) {
413  return GetTextlines(false, 0, pixa, blockids, nullptr);
414  }
415 
424  Boxa* GetStrips(Pixa** pixa, int** blockids);
425 
431  Boxa* GetWords(Pixa** pixa);
432 
441  Boxa* GetConnectedComponents(Pixa** cc);
442 
456  Boxa* GetComponentImages(const PageIteratorLevel level,
457  const bool text_only, const bool raw_image,
458  const int raw_padding,
459  Pixa** pixa, int** blockids, int** paraids);
460  // Helper function to get binary images with no padding (most common usage).
462  const bool text_only,
463  Pixa** pixa, int** blockids) {
464  return GetComponentImages(level, text_only, false, 0, pixa, blockids, nullptr);
465  }
466 
473  int GetThresholdedImageScaleFactor() const;
474 
490  PageIterator* AnalyseLayout();
491  PageIterator* AnalyseLayout(bool merge_similar_words);
492 
499  int Recognize(ETEXT_DESC* monitor);
500 
506  #ifndef DISABLED_LEGACY_ENGINE
507 
508  int RecognizeForChopTest(ETEXT_DESC* monitor);
509  #endif
510 
533  bool ProcessPages(const char* filename, const char* retry_config,
534  int timeout_millisec, TessResultRenderer* renderer);
535  // Does the real work of ProcessPages.
536  bool ProcessPagesInternal(const char* filename, const char* retry_config,
537  int timeout_millisec, TessResultRenderer* renderer);
538 
548  bool ProcessPage(Pix* pix, int page_index, const char* filename,
549  const char* retry_config, int timeout_millisec,
550  TessResultRenderer* renderer);
551 
560  ResultIterator* GetIterator();
561 
570  MutableIterator* GetMutableIterator();
571 
576  char* GetUTF8Text();
577 
587  char* GetHOCRText(ETEXT_DESC* monitor, int page_number);
588 
595  char* GetHOCRText(int page_number);
596 
602  char* GetTSVText(int page_number);
603 
611  char* GetBoxText(int page_number);
612 
618  char* GetUNLVText();
619 
629  bool DetectOrientationScript(int* orient_deg, float* orient_conf,
630  const char** script_name, float* script_conf);
631 
637  char* GetOsdText(int page_number);
638 
640  int MeanTextConf();
647  int* AllWordConfidences();
648 
649 #ifndef DISABLED_LEGACY_ENGINE
650 
660  bool AdaptToWordStr(PageSegMode mode, const char* wordstr);
661 #endif // ndef DISABLED_LEGACY_ENGINE
662 
669  void Clear();
670 
677  void End();
678 
686  static void ClearPersistentCache();
687 
694  int IsValidWord(const char *word);
695  // Returns true if utf8_character is defined in the UniCharset.
696  bool IsValidCharacter(const char *utf8_character);
697 
698 
699  bool GetTextDirection(int* out_offset, float* out_slope);
700 
702  void SetDictFunc(DictFunc f);
703 
707  void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
708 
713  bool DetectOS(OSResults*);
714 
719  void GetBlockTextOrientations(int** block_orientation,
720  bool** vertical_writing);
721 
722 
723  #ifndef DISABLED_LEGACY_ENGINE
724 
726  void SetFillLatticeFunc(FillLatticeFunc f);
727 
729  BLOCK_LIST* FindLinesCreateBlockList();
730 
736  static void DeleteBlockList(BLOCK_LIST* block_list);
737 
739  static ROW *MakeTessOCRRow(float baseline, float xheight,
740  float descender, float ascender);
741 
743  static TBLOB *MakeTBLOB(Pix *pix);
744 
750  static void NormalizeTBLOB(TBLOB *tblob, ROW *row, bool numeric_mode);
751 
753  void GetFeaturesForBlob(TBLOB* blob, INT_FEATURE_STRUCT* int_features,
754  int* num_features, int* feature_outline_index);
755 
760  static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top,
761  int right, int bottom);
762 
767  void RunAdaptiveClassifier(TBLOB* blob,
768  int num_max_matches,
769  int* unichar_ids,
770  float* ratings,
771  int* num_matches_returned);
772 #endif // ndef DISABLED_LEGACY_ENGINE
773 
775  const char* GetUnichar(int unichar_id);
776 
778  const Dawg *GetDawg(int i) const;
779 
781  int NumDawgs() const;
782 
783  Tesseract* tesseract() const { return tesseract_; }
784 
785  OcrEngineMode oem() const { return last_oem_requested_; }
786 
787  void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
788 
789  void set_min_orientation_margin(double margin);
790  /* @} */
791 
792  protected:
793 
795  TESS_LOCAL bool InternalSetImage();
796 
801  TESS_LOCAL virtual bool Threshold(Pix** pix);
802 
807  TESS_LOCAL int FindLines();
808 
810  void ClearResults();
811 
817  TESS_LOCAL LTRResultIterator* GetLTRIterator();
818 
825  TESS_LOCAL int TextLength(int* blob_count);
826 
828  TESS_LOCAL void DetectParagraphs(bool after_text_recognition);
829 
830  #ifndef DISABLED_LEGACY_ENGINE
831 
833  /* @{ */
834 
839  TESS_LOCAL void AdaptToCharacter(const char *unichar_repr,
840  int length,
841  float baseline,
842  float xheight,
843  float descender,
844  float ascender);
845 
847  TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
848 
849  TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list,
850  PAGE_RES* pass1_result);
851 
856  TESS_LOCAL static int TesseractExtractResult(char** text,
857  int** lengths,
858  float** costs,
859  int** x0,
860  int** y0,
861  int** x1,
862  int** y1,
863  PAGE_RES* page_res);
864 
865  TESS_LOCAL const PAGE_RES* GetPageRes() const { return page_res_; }
866  /* @} */
867 #endif // ndef DISABLED_LEGACY_ENGINE
868 
869  protected:
876  BLOCK_LIST* block_list_;
885 
890  /* @{ */
897  /* @} */
898 
899  private:
900  // A list of image filenames gets special consideration
901  bool ProcessPagesFileList(FILE *fp,
902  STRING *buf,
903  const char* retry_config, int timeout_millisec,
904  TessResultRenderer* renderer,
905  int tessedit_page_number);
906  // TIFF supports multipage so gets special consideration.
907  bool ProcessPagesMultipageTiff(const unsigned char *data,
908  size_t size,
909  const char* filename,
910  const char* retry_config,
911  int timeout_millisec,
912  TessResultRenderer* renderer,
913  int tessedit_page_number);
914  // There's currently no way to pass a document title from the
915  // Tesseract command line, and we have multiple places that choose
916  // to set the title to an empty string. Using a single named
917  // variable will hopefully reduce confusion if the situation changes
918  // in the future.
919  const char *unknown_title_ = "";
920 }; // class TessBaseAPI.
921 
923 STRING HOcrEscape(const char* text);
924 } // namespace tesseract.
925 
926 #endif // TESSERACT_API_BASEAPI_H_
void DetectParagraphs(int debug_level, GenericVector< RowInfo > *row_infos, GenericVector< PARA *> *row_owners, PARA_LIST *paragraphs, GenericVector< ParagraphModel *> *models)
Definition: paragraphs.cpp:2271
ImageThresholder * thresholder_
Image thresholding module.
Definition: baseapi.h:874
Definition: werd.h:59
int Init(const char *datapath, const char *language)
Definition: baseapi.h:234
Tesseract * tesseract_
The underlying data object.
Definition: baseapi.h:870
int rect_top_
Definition: baseapi.h:892
Tesseract * tesseract() const
Definition: baseapi.h:783
Definition: ocrpara.h:114
Definition: wordrec.h:192
int rect_left_
Definition: baseapi.h:891
Definition: dawg.h:119
Definition: resultiterator.h:41
Definition: rect.h:34
Definition: unicharset.h:146
Definition: osdetect.h:49
STRING HOcrEscape(const char *text)
Definition: baseapi.cpp:2632
STRING * output_file_
Name used by debug code.
Definition: baseapi.h:879
Definition: matrix.h:575
bool(* FileReader)(const STRING &filename, GenericVector< char > *data)
Definition: genericvector.h:360
Definition: baseapi.cpp:94
STRING * language_
Last initialized language.
Definition: baseapi.h:881
int rect_width_
Definition: baseapi.h:893
TruthCallback * truth_cb_
Definition: baseapi.h:884
EquationDetect * equ_detect_
The equation detector.
Definition: baseapi.h:872
STRING * datapath_
Current location of tessdata.
Definition: baseapi.h:880
PAGE_RES * page_res_
The page-level data.
Definition: baseapi.h:877
Definition: equationdetect.h:39
int Init(const char *datapath, const char *language, OcrEngineMode oem)
Definition: baseapi.h:231
PageIteratorLevel
Definition: publictypes.h:219
bool recognition_done_
page_res_ contains recognition data.
Definition: baseapi.h:883
Tesseract * osd_tesseract_
For orientation & script detection.
Definition: baseapi.h:871
void InitTruthCallback(TruthCallback *cb)
Definition: baseapi.h:787
float(Dict::* ParamsModelClassifyFunc)(const char *lang, void *path)
Definition: baseapi.h:84
BLOCK_LIST * block_list_
The page layout.
Definition: baseapi.h:876
void(Wordrec::* FillLatticeFunc)(const MATRIX &ratings, const WERD_CHOICE_LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle)
Definition: baseapi.h:86
Definition: renderer.h:45
Boxa * GetComponentImages(const PageIteratorLevel level, const bool text_only, Pixa **pixa, int **blockids)
Definition: baseapi.h:461
Definition: pageres.h:675
Definition: dict.h:88
double(Dict::* ProbabilityInContextFunc)(const char *lang, const char *context, int context_bytes, const char *character, int character_bytes)
Definition: baseapi.h:79
Definition: blamer.h:100
TESS_LOCAL const PAGE_RES * GetPageRes() const
Definition: baseapi.h:865
STRING * input_file_
Name used by training code.
Definition: baseapi.h:878
OcrEngineMode oem() const
Definition: baseapi.h:785
void SetThresholder(ImageThresholder *thresholder)
Definition: baseapi.h:376
Definition: baseapi.h:37
Definition: strngs.h:45
Definition: ltrresultiterator.h:48
Definition: mutableiterator.h:44
OcrEngineMode last_oem_requested_
Last ocr language mode requested.
Definition: baseapi.h:882
Definition: ocrrow.h:36
Definition: normalis.h:50
int(Dict::* DictFunc)(void *void_dawg_args, const UNICHARSET &unicharset, UNICHAR_ID unichar_id, bool word_end) const
Definition: baseapi.h:76
Definition: tesseractclass.h:173
TessCallback4< const UNICHARSET &, int, PageIterator *, Pix * > TruthCallback
Definition: baseapi.h:91
Definition: pageres.h:77
int image_width_
Definition: baseapi.h:895
Definition: intproto.h:132
Definition: blobs.h:268
Definition: ocrclass.h:119
GenericVector< ParagraphModel * > * paragraph_models_
Definition: baseapi.h:875
Boxa * GetTextlines(Pixa **pixa, int **blockids)
Definition: baseapi.h:412
Definition: pageiterator.h:52
OcrEngineMode
Definition: publictypes.h:268
int image_height_
Definition: baseapi.h:896
Definition: publictypes.h:274
Definition: tesscallback.h:1709
FileReader reader_
Reads files from any filesystem.
Definition: baseapi.h:873
Definition: baseapi.h:101
Definition: thresholder.h:36
PageSegMode
Definition: publictypes.h:163
int rect_height_
Definition: baseapi.h:894