tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
lstmtester.h
1 // File: lstmtester.h
3 // Description: Top-level line evaluation class for LSTM-based networks.
4 // Author: Ray Smith
5 // Created: Wed Nov 23 11:05:06 PST 2016
6 //
7 // (C) Copyright 2016, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
18 
19 #ifndef TESSERACT_TRAINING_LSTMTESTER_H_
20 #define TESSERACT_TRAINING_LSTMTESTER_H_
21 
22 #include "genericvector.h"
23 #include "lstmtrainer.h"
24 #include "strngs.h"
25 #include "svutil.h"
26 
27 namespace tesseract {
28 
29 class LSTMTester {
30  public:
31  LSTMTester(int64_t max_memory);
32 
33  // Loads a set of lstmf files that were created using the lstm.train config to
34  // tesseract into memory ready for testing. Returns false if nothing was
35  // loaded. The arg is a filename of a file that lists the filenames, with one
36  // name per line. Conveniently, tesstrain.sh generates such a file, along
37  // with the files themselves.
38  bool LoadAllEvalData(const STRING& filenames_file);
39  // Loads a set of lstmf files that were created using the lstm.train config to
40  // tesseract into memory ready for testing. Returns false if nothing was
41  // loaded.
42  bool LoadAllEvalData(const GenericVector<STRING>& filenames);
43 
44  // Runs an evaluation asynchronously on the stored eval data and returns a
45  // string describing the results of the previous test. Args match TestCallback
46  // declared in lstmtrainer.h:
47  // iteration: Current learning iteration number.
48  // training_errors: If not null, is an array of size ET_COUNT, indexed by
49  // the ErrorTypes enum and indicates the current errors measured by the
50  // trainer, and this is a serious request to run an evaluation. If null,
51  // then the caller is just polling for the results of the previous eval.
52  // model_data: is the model to evaluate, which should be a serialized
53  // LSTMTrainer.
54  // training_stage: an arbitrary number on the progress of training.
55  STRING RunEvalAsync(int iteration, const double* training_errors,
56  const TessdataManager& model_mgr, int training_stage);
57  // Runs an evaluation synchronously on the stored eval data and returns a
58  // string describing the results. Args as RunEvalAsync, except verbosity,
59  // which outputs errors, if 1, or all results if 2.
60  STRING RunEvalSync(int iteration, const double* training_errors,
61  const TessdataManager& model_mgr, int training_stage,
62  int verbosity);
63 
64  private:
65  // Static helper thread function for RunEvalAsync, with a specific signature
66  // required by SVSync::StartThread. Actually a member function pretending to
67  // be static, its arg is a this pointer that it will cast back to LSTMTester*
68  // to call RunEvalSync using the stored args that RunEvalAsync saves in *this.
69  // LockIfNotRunning must have returned true before calling ThreadFunc, and
70  // it will call UnlockRunning to release the lock after RunEvalSync completes.
71  static void* ThreadFunc(void* lstmtester_void);
72  // Returns true if there is currently nothing running, and takes the lock
73  // if there is nothing running.
74  bool LockIfNotRunning();
75  // Releases the running lock.
76  void UnlockRunning();
77 
78  // The data to test with.
81  // Flag that indicates an asynchronous test is currently running.
82  // Protected by running_mutex_.
85  // Stored copies of the args for use while running asynchronously.
87  const double* test_training_errors_;
91 };
92 
93 } // namespace tesseract
94 
95 #endif // TESSERACT_TRAINING_LSTMTESTER_H_
const double * test_training_errors_
Definition: lstmtester.h:87
static void * ThreadFunc(void *lstmtester_void)
Definition: lstmtester.cpp:133
int test_iteration_
Definition: lstmtester.h:86
Definition: imagedata.h:314
STRING RunEvalAsync(int iteration, const double *training_errors, const TessdataManager &model_mgr, int training_stage)
Definition: lstmtester.cpp:52
Definition: baseapi.cpp:94
SVMutex running_mutex_
Definition: lstmtester.h:84
Definition: lstmtester.h:29
STRING RunEvalSync(int iteration, const double *training_errors, const TessdataManager &model_mgr, int training_stage, int verbosity)
Definition: lstmtester.cpp:82
bool LockIfNotRunning()
Definition: lstmtester.cpp:145
Definition: tessdatamanager.h:126
LSTMTester(int64_t max_memory)
Definition: lstmtester.cpp:24
Definition: svutil.h:78
STRING test_result_
Definition: lstmtester.h:90
int total_pages_
Definition: lstmtester.h:80
TessdataManager test_model_mgr_
Definition: lstmtester.h:88
int test_training_stage_
Definition: lstmtester.h:89
bool async_running_
Definition: lstmtester.h:83
Definition: strngs.h:45
bool LoadAllEvalData(const STRING &filenames_file)
Definition: lstmtester.cpp:30
void UnlockRunning()
Definition: lstmtester.cpp:153
DocumentCache test_data_
Definition: lstmtester.h:79