tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
genericvector.h
1 // File: genericvector.h
3 // Description: Generic vector class
4 // Author: Daria Antonova
5 // Created: Mon Jun 23 11:26:43 PDT 2008
6 //
7 // (C) Copyright 2007, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 //
20 #ifndef TESSERACT_CCUTIL_GENERICVECTOR_H_
21 #define TESSERACT_CCUTIL_GENERICVECTOR_H_
22 
23 #include <algorithm>
24 #include <cassert>
25 #include <cstdio>
26 #include <cstdlib>
27 
28 #include "tesscallback.h"
29 #include "helpers.h"
30 #include "serialis.h"
31 #include "strngs.h"
32 
33 // Use PointerVector<T> below in preference to GenericVector<T*>, as that
34 // provides automatic deletion of pointers, [De]Serialize that works, and
35 // sort that works.
36 template <typename T>
37 class GenericVector {
38  public:
41  }
42  GenericVector(int size, const T& init_val) {
43  init(size);
44  init_to_size(size, init_val);
45  }
46 
47  // Copy
48  GenericVector(const GenericVector& other) {
49  this->init(other.size());
50  this->operator+=(other);
51  }
54 
56 
57  // Reserve some memory.
58  void reserve(int size);
59  // Double the size of the internal array.
60  void double_the_size();
61 
62  // Resizes to size and sets all values to t.
63  void init_to_size(int size, const T& t);
64  // Resizes to size without any initialization.
65  void resize_no_init(int size) {
66  reserve(size);
67  size_used_ = size;
68  }
69 
70  // Return the size used.
71  int size() const {
72  return size_used_;
73  }
74  // Workaround to avoid g++ -Wsign-compare warnings.
75  size_t unsigned_size() const {
76  static_assert(sizeof(size_used_) <= sizeof(size_t),
77  "Wow! sizeof(size_t) < sizeof(int32_t)!!");
78  assert(0 <= size_used_);
79  return static_cast<size_t>(size_used_);
80  }
81  int size_reserved() const {
82  return size_reserved_;
83  }
84 
85  int length() const {
86  return size_used_;
87  }
88 
89  // Return true if empty.
90  bool empty() const {
91  return size_used_ == 0;
92  }
93 
94  // Return the object from an index.
95  T &get(int index) const;
96  T &back() const;
97  T &operator[](int index) const;
98  // Returns the last object and removes it.
99  T pop_back();
100 
101  // Return the index of the T object.
102  // This method NEEDS a compare_callback to be passed to
103  // set_compare_callback.
104  int get_index(const T& object) const;
105 
106  // Return true if T is in the array
107  bool contains(const T& object) const;
108 
109  // Return true if the index is valid
110  T contains_index(int index) const;
111 
112  // Push an element in the end of the array
113  int push_back(T object);
114  void operator+=(const T& t);
115 
116  // Push an element in the end of the array if the same
117  // element is not already contained in the array.
118  int push_back_new(const T& object);
119 
120  // Push an element in the front of the array
121  // Note: This function is O(n)
122  int push_front(const T& object);
123 
124  // Set the value at the given index
125  void set(const T& t, int index);
126 
127  // Insert t at the given index, push other elements to the right.
128  void insert(const T& t, int index);
129 
130  // Removes an element at the given index and
131  // shifts the remaining elements to the left.
132  void remove(int index);
133 
134  // Truncates the array to the given size by removing the end.
135  // If the current size is less, the array is not expanded.
136  void truncate(int size) {
137  if (size < size_used_)
138  size_used_ = size;
139  }
140 
141  // Add a callback to be called to delete the elements when the array took
142  // their ownership.
144 
145  // Add a callback to be called to compare the elements when needed (contains,
146  // get_id, ...)
148 
149  // Clear the array, calling the clear callback function if any.
150  // All the owned callbacks are also deleted.
151  // If you don't want the callbacks to be deleted, before calling clear, set
152  // the callback to nullptr.
153  void clear();
154 
155  // Delete objects pointed to by data_[i]
156  void delete_data_pointers();
157 
158  // This method clears the current object, then, does a shallow copy of
159  // its argument, and finally invalidates its argument.
160  // Callbacks are moved to the current object;
161  void move(GenericVector<T>* from);
162 
163  // Read/Write the array to a file. This does _NOT_ read/write the callbacks.
164  // The callback given must be permanent since they will be called more than
165  // once. The given callback will be deleted at the end.
166  // If the callbacks are nullptr, then the data is simply read/written using
167  // fread (and swapping)/fwrite.
168  // Returns false on error or if the callback returns false.
169  // DEPRECATED. Use [De]Serialize[Classes] instead.
170  bool write(FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const;
171  bool read(tesseract::TFile* f,
173  // Writes a vector of simple types to the given file. Assumes that bitwise
174  // read/write of T will work. Returns false in case of error.
175  // TODO(rays) Change all callers to use TFile and remove deprecated methods.
176  bool Serialize(FILE* fp) const;
177  bool Serialize(tesseract::TFile* fp) const;
178  // Reads a vector of simple types from the given file. Assumes that bitwise
179  // read/write will work with ReverseN according to sizeof(T).
180  // Returns false in case of error.
181  // If swap is true, assumes a big/little-endian swap is needed.
182  // TFile is assumed to know about swapping.
183  bool DeSerialize(bool swap, FILE* fp);
184  bool DeSerialize(tesseract::TFile* fp);
185  // Skips the deserialization of the vector.
186  static bool SkipDeSerialize(tesseract::TFile* fp);
187  // Writes a vector of classes to the given file. Assumes the existence of
188  // bool T::Serialize(FILE* fp) const that returns false in case of error.
189  // Returns false in case of error.
190  bool SerializeClasses(FILE* fp) const;
191  bool SerializeClasses(tesseract::TFile* fp) const;
192  // Reads a vector of classes from the given file. Assumes the existence of
193  // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
194  // error. Also needs T::T() and T::T(constT&), as init_to_size is used in
195  // this function. Returns false in case of error.
196  // If swap is true, assumes a big/little-endian swap is needed.
197  bool DeSerializeClasses(bool swap, FILE* fp);
199  // Calls SkipDeSerialize on the elements of the vector.
200  static bool SkipDeSerializeClasses(tesseract::TFile* fp);
201 
202  // Allocates a new array of double the current_size, copies over the
203  // information from data to the new location, deletes data and returns
204  // the pointed to the new larger array.
205  // This function uses memcpy to copy the data, instead of invoking
206  // operator=() for each element like double_the_size() does.
207  static T *double_the_size_memcpy(int current_size, T *data) {
208  T *data_new = new T[current_size * 2];
209  memcpy(data_new, data, sizeof(T) * current_size);
210  delete[] data;
211  return data_new;
212  }
213 
214  // Reverses the elements of the vector.
215  void reverse() {
216  for (int i = 0; i < size_used_ / 2; ++i)
217  Swap(&data_[i], &data_[size_used_ - 1 - i]);
218  }
219 
220  // Sorts the members of this vector using the less than comparator (cmp_lt),
221  // which compares the values. Useful for GenericVectors to primitive types.
222  // Will not work so great for pointers (unless you just want to sort some
223  // pointers). You need to provide a specialization to sort_cmp to use
224  // your type.
225  void sort();
226 
227  // Sort the array into the order defined by the qsort function comparator.
228  // The comparator function is as defined by qsort, ie. it receives pointers
229  // to two Ts and returns negative if the first element is to appear earlier
230  // in the result and positive if it is to appear later, with 0 for equal.
231  void sort(int (*comparator)(const void*, const void*)) {
232  qsort(data_, size_used_, sizeof(*data_), comparator);
233  }
234 
235  // Searches the array (assuming sorted in ascending order, using sort()) for
236  // an element equal to target and returns true if it is present.
237  // Use binary_search to get the index of target, or its nearest candidate.
238  bool bool_binary_search(const T& target) const {
239  int index = binary_search(target);
240  if (index >= size_used_)
241  return false;
242  return data_[index] == target;
243  }
244  // Searches the array (assuming sorted in ascending order, using sort()) for
245  // an element equal to target and returns the index of the best candidate.
246  // The return value is conceptually the largest index i such that
247  // data_[i] <= target or 0 if target < the whole vector.
248  // NOTE that this function uses operator> so really the return value is
249  // the largest index i such that data_[i] > target is false.
250  int binary_search(const T& target) const {
251  int bottom = 0;
252  int top = size_used_;
253  while (top - bottom > 1) {
254  int middle = (bottom + top) / 2;
255  if (data_[middle] > target)
256  top = middle;
257  else
258  bottom = middle;
259  }
260  return bottom;
261  }
262 
263  // Compact the vector by deleting elements using operator!= on basic types.
264  // The vector must be sorted.
265  void compact_sorted() {
266  if (size_used_ == 0)
267  return;
268 
269  // First element is in no matter what, hence the i = 1.
270  int last_write = 0;
271  for (int i = 1; i < size_used_; ++i) {
272  // Finds next unique item and writes it.
273  if (data_[last_write] != data_[i])
274  data_[++last_write] = data_[i];
275  }
276  // last_write is the index of a valid data cell, so add 1.
277  size_used_ = last_write + 1;
278  }
279 
280  // Compact the vector by deleting elements for which delete_cb returns
281  // true. delete_cb is a permanent callback and will be deleted.
283  int new_size = 0;
284  int old_index = 0;
285  // Until the callback returns true, the elements stay the same.
286  while (old_index < size_used_ && !delete_cb->Run(old_index++))
287  ++new_size;
288  // Now just copy anything else that gets false from delete_cb.
289  for (; old_index < size_used_; ++old_index) {
290  if (!delete_cb->Run(old_index)) {
291  data_[new_size++] = data_[old_index];
292  }
293  }
294  size_used_ = new_size;
295  delete delete_cb;
296  }
297 
298  T dot_product(const GenericVector<T>& other) const {
299  T result = static_cast<T>(0);
300  for (int i = std::min(size_used_, other.size_used_) - 1; i >= 0; --i)
301  result += data_[i] * other.data_[i];
302  return result;
303  }
304 
305  // Returns the index of what would be the target_index_th item in the array
306  // if the members were sorted, without actually sorting. Members are
307  // shuffled around, but it takes O(n) time.
308  // NOTE: uses operator< and operator== on the members.
309  int choose_nth_item(int target_index) {
310  // Make sure target_index is legal.
311  if (target_index < 0)
312  target_index = 0; // ensure legal
313  else if (target_index >= size_used_)
314  target_index = size_used_ - 1;
315  unsigned int seed = 1;
316  return choose_nth_item(target_index, 0, size_used_, &seed);
317  }
318 
319  // Swaps the elements with the given indices.
320  void swap(int index1, int index2) {
321  if (index1 != index2) {
322  T tmp = data_[index1];
323  data_[index1] = data_[index2];
324  data_[index2] = tmp;
325  }
326  }
327  // Returns true if all elements of *this are within the given range.
328  // Only uses operator<
329  bool WithinBounds(const T& rangemin, const T& rangemax) const {
330  for (int i = 0; i < size_used_; ++i) {
331  if (data_[i] < rangemin || rangemax < data_[i])
332  return false;
333  }
334  return true;
335  }
336 
337  protected:
338  // Internal recursive version of choose_nth_item.
339  int choose_nth_item(int target_index, int start, int end, unsigned int* seed);
340 
341  // Init the object, allocating size memory.
342  void init(int size);
343 
344  // We are assuming that the object generally placed in the
345  // vector are small enough that for efficiency it makes sense
346  // to start with a larger initial size.
347  static const int kDefaultVectorSize = 4;
348  int32_t size_used_;
349  int32_t size_reserved_;
350  T* data_;
352  // Mutable because Run method is not const
354 };
355 
356 namespace tesseract {
357 
358 // Function to read a GenericVector<char> from a whole file.
359 // Returns false on failure.
360 typedef bool (*FileReader)(const STRING& filename, GenericVector<char>* data);
361 // Function to write a GenericVector<char> to a whole file.
362 // Returns false on failure.
363 typedef bool (*FileWriter)(const GenericVector<char>& data,
364  const STRING& filename);
365 // The default FileReader loads the whole file into the vector of char,
366 // returning false on error.
367 inline bool LoadDataFromFile(const char* filename, GenericVector<char>* data) {
368  bool result = false;
369  FILE* fp = fopen(filename, "rb");
370  if (fp != nullptr) {
371  fseek(fp, 0, SEEK_END);
372  long size = ftell(fp);
373  fseek(fp, 0, SEEK_SET);
374  // Trying to open a directory on Linux sets size to LONG_MAX. Catch it here.
375  if (size > 0 && size < LONG_MAX) {
376  // reserve an extra byte in case caller wants to append a '\0' character
377  data->reserve(size + 1);
378  data->resize_no_init(size);
379  result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size;
380  }
381  fclose(fp);
382  }
383  return result;
384 }
385 
386 inline bool LoadDataFromFile(const STRING& filename,
387  GenericVector<char>* data) {
388  return LoadDataFromFile(filename.string(), data);
389 }
390 
391 // The default FileWriter writes the vector of char to the filename file,
392 // returning false on error.
393 inline bool SaveDataToFile(const GenericVector<char>& data,
394  const STRING& filename) {
395  FILE* fp = fopen(filename.string(), "wb");
396  if (fp == nullptr) return false;
397  bool result =
398  static_cast<int>(fwrite(&data[0], 1, data.size(), fp)) == data.size();
399  fclose(fp);
400  return result;
401 }
402 // Reads a file as a vector of STRING.
403 inline bool LoadFileLinesToStrings(const STRING& filename,
404  GenericVector<STRING>* lines) {
405  GenericVector<char> data;
406  if (!LoadDataFromFile(filename.string(), &data)) {
407  return false;
408  }
409  STRING lines_str(&data[0], data.size());
410  lines_str.split('\n', lines);
411  return true;
412 }
413 
414 template <typename T>
415 bool cmp_eq(T const & t1, T const & t2) {
416  return t1 == t2;
417 }
418 
419 // Used by sort()
420 // return < 0 if t1 < t2
421 // return 0 if t1 == t2
422 // return > 0 if t1 > t2
423 template <typename T>
424 int sort_cmp(const void* t1, const void* t2) {
425  const T* a = static_cast<const T *> (t1);
426  const T* b = static_cast<const T *> (t2);
427  if (*a < *b) {
428  return -1;
429  } else if (*b < *a) {
430  return 1;
431  } else {
432  return 0;
433  }
434 }
435 
436 // Used by PointerVector::sort()
437 // return < 0 if t1 < t2
438 // return 0 if t1 == t2
439 // return > 0 if t1 > t2
440 template <typename T>
441 int sort_ptr_cmp(const void* t1, const void* t2) {
442  const T* a = *static_cast<T* const*>(t1);
443  const T* b = *static_cast<T* const*>(t2);
444  if (*a < *b) {
445  return -1;
446  } else if (*b < *a) {
447  return 1;
448  } else {
449  return 0;
450  }
451 }
452 
453 // Subclass for a vector of pointers. Use in preference to GenericVector<T*>
454 // as it provides automatic deletion and correct serialization, with the
455 // corollary that all copy operations are deep copies of the pointed-to objects.
456 template<typename T>
457 class PointerVector : public GenericVector<T*> {
458  public:
460  explicit PointerVector(int size) : GenericVector<T*>(size) { }
462  // Clear must be called here, even though it is called again by the base,
463  // as the base will call the wrong clear.
464  clear();
465  }
466  // Copy must be deep, as the pointers will be automatically deleted on
467  // destruction.
468  PointerVector(const PointerVector& other) : GenericVector<T*>(other) {
469  this->init(other.size());
470  this->operator+=(other);
471  }
473  this->reserve(this->size_used_ + other.size_used_);
474  for (int i = 0; i < other.size(); ++i) {
475  this->push_back(new T(*other.data_[i]));
476  }
477  return *this;
478  }
479 
481  if (&other != this) {
482  this->truncate(0);
483  this->operator+=(other);
484  }
485  return *this;
486  }
487 
488  // Removes an element at the given index and
489  // shifts the remaining elements to the left.
490  void remove(int index) {
491  delete GenericVector<T*>::data_[index];
493  }
494 
495  // Truncates the array to the given size by removing the end.
496  // If the current size is less, the array is not expanded.
497  void truncate(int size) {
498  for (int i = size; i < GenericVector<T*>::size_used_; ++i)
499  delete GenericVector<T*>::data_[i];
501  }
502 
503  // Compact the vector by deleting elements for which delete_cb returns
504  // true. delete_cb is a permanent callback and will be deleted.
506  int new_size = 0;
507  int old_index = 0;
508  // Until the callback returns true, the elements stay the same.
509  while (old_index < GenericVector<T*>::size_used_ &&
510  !delete_cb->Run(GenericVector<T*>::data_[old_index++]))
511  ++new_size;
512  // Now just copy anything else that gets false from delete_cb.
513  for (; old_index < GenericVector<T*>::size_used_; ++old_index) {
514  if (!delete_cb->Run(GenericVector<T*>::data_[old_index])) {
515  GenericVector<T*>::data_[new_size++] =
516  GenericVector<T*>::data_[old_index];
517  } else {
518  delete GenericVector<T*>::data_[old_index];
519  }
520  }
522  delete delete_cb;
523  }
524 
525  // Clear the array, calling the clear callback function if any.
526  // All the owned callbacks are also deleted.
527  // If you don't want the callbacks to be deleted, before calling clear, set
528  // the callback to nullptr.
529  void clear() {
532  }
533 
534  // Writes a vector of (pointers to) classes to the given file. Assumes the
535  // existence of bool T::Serialize(FILE*) const that returns false in case of
536  // error. There is no Serialize for simple types, as you would have a
537  // normal GenericVector of those.
538  // Returns false in case of error.
539  bool Serialize(FILE* fp) const {
540  int32_t used = GenericVector<T*>::size_used_;
541  if (fwrite(&used, sizeof(used), 1, fp) != 1) return false;
542  for (int i = 0; i < used; ++i) {
543  int8_t non_null = GenericVector<T*>::data_[i] != nullptr;
544  if (fwrite(&non_null, sizeof(non_null), 1, fp) != 1) return false;
545  if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) return false;
546  }
547  return true;
548  }
549  bool Serialize(TFile* fp) const {
550  int32_t used = GenericVector<T*>::size_used_;
551  if (fp->FWrite(&used, sizeof(used), 1) != 1) return false;
552  for (int i = 0; i < used; ++i) {
553  int8_t non_null = GenericVector<T*>::data_[i] != nullptr;
554  if (fp->FWrite(&non_null, sizeof(non_null), 1) != 1) return false;
555  if (non_null && !GenericVector<T*>::data_[i]->Serialize(fp)) return false;
556  }
557  return true;
558  }
559  // Reads a vector of (pointers to) classes to the given file. Assumes the
560  // existence of bool T::DeSerialize(bool, Tfile*) const that returns false in
561  // case of error. There is no Serialize for simple types, as you would have a
562  // normal GenericVector of those.
563  // If swap is true, assumes a big/little-endian swap is needed.
564  // Also needs T::T(), as new T is used in this function.
565  // Returns false in case of error.
566  bool DeSerialize(bool swap, FILE* fp) {
567  uint32_t reserved;
568  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
569  if (swap) Reverse32(&reserved);
570  // Arbitrarily limit the number of elements to protect against bad data.
571  assert(reserved <= UINT16_MAX);
572  if (reserved > UINT16_MAX) {
573  return false;
574  }
575  GenericVector<T*>::reserve(reserved);
576  truncate(0);
577  for (uint32_t i = 0; i < reserved; ++i) {
578  int8_t non_null;
579  if (fread(&non_null, sizeof(non_null), 1, fp) != 1) return false;
580  T* item = nullptr;
581  if (non_null) {
582  item = new T;
583  if (!item->DeSerialize(swap, fp)) {
584  delete item;
585  return false;
586  }
587  this->push_back(item);
588  } else {
589  // Null elements should keep their place in the vector.
590  this->push_back(nullptr);
591  }
592  }
593  return true;
594  }
595  bool DeSerialize(TFile* fp) {
596  int32_t reserved;
597  if (!DeSerializeSize(fp, &reserved)) return false;
598  GenericVector<T*>::reserve(reserved);
599  truncate(0);
600  for (int i = 0; i < reserved; ++i) {
601  if (!DeSerializeElement(fp)) return false;
602  }
603  return true;
604  }
605  // Enables deserialization of a selection of elements. Note that in order to
606  // retain the integrity of the stream, the caller must call some combination
607  // of DeSerializeElement and DeSerializeSkip of the exact number returned in
608  // *size, assuming a true return.
609  static bool DeSerializeSize(TFile* fp, int32_t* size) {
610  return fp->FReadEndian(size, sizeof(*size), 1) == 1;
611  }
612  // Reads and appends to the vector the next element of the serialization.
614  int8_t non_null;
615  if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false;
616  T* item = nullptr;
617  if (non_null) {
618  item = new T;
619  if (!item->DeSerialize(fp)) {
620  delete item;
621  return false;
622  }
623  this->push_back(item);
624  } else {
625  // Null elements should keep their place in the vector.
626  this->push_back(nullptr);
627  }
628  return true;
629  }
630  // Skips the next element of the serialization.
631  static bool DeSerializeSkip(TFile* fp) {
632  int8_t non_null;
633  if (fp->FRead(&non_null, sizeof(non_null), 1) != 1) return false;
634  if (non_null) {
635  if (!T::SkipDeSerialize(fp)) return false;
636  }
637  return true;
638  }
639 
640  // Sorts the items pointed to by the members of this vector using
641  // t::operator<().
642  void sort() { this->GenericVector<T*>::sort(&sort_ptr_cmp<T>); }
643 };
644 
645 } // namespace tesseract
646 
647 // A useful vector that uses operator== to do comparisons.
648 template <typename T>
649 class GenericVectorEqEq : public GenericVector<T> {
650  public:
653  NewPermanentTessCallback(tesseract::cmp_eq<T>));
654  }
655  GenericVectorEqEq(int size) : GenericVector<T>(size) {
657  NewPermanentTessCallback(tesseract::cmp_eq<T>));
658  }
659 };
660 
661 template <typename T>
662 void GenericVector<T>::init(int size) {
663  size_used_ = 0;
664  if (size <= 0) {
665  data_ = nullptr;
666  size_reserved_ = 0;
667  } else {
668  if (size < kDefaultVectorSize) size = kDefaultVectorSize;
669  data_ = new T[size];
671  }
672  clear_cb_ = nullptr;
673  compare_cb_ = nullptr;
674 }
675 
676 template <typename T>
678  clear();
679 }
680 
681 // Reserve some memory. If the internal array contains elements, they are
682 // copied.
683 template <typename T>
685  if (size_reserved_ >= size || size <= 0)
686  return;
687  if (size < kDefaultVectorSize) size = kDefaultVectorSize;
688  T* new_array = new T[size];
689  for (int i = 0; i < size_used_; ++i)
690  new_array[i] = data_[i];
691  delete[] data_;
692  data_ = new_array;
694 }
695 
696 template <typename T>
698  if (size_reserved_ == 0) {
700  }
701  else {
702  reserve(2 * size_reserved_);
703  }
704 }
705 
706 // Resizes to size and sets all values to t.
707 template <typename T>
708 void GenericVector<T>::init_to_size(int size, const T& t) {
709  reserve(size);
710  size_used_ = size;
711  for (int i = 0; i < size; ++i)
712  data_[i] = t;
713 }
714 
715 
716 // Return the object from an index.
717 template <typename T>
718 T &GenericVector<T>::get(int index) const {
719  assert(index >= 0 && index < size_used_);
720  return data_[index];
721 }
722 
723 template <typename T>
724 T &GenericVector<T>::operator[](int index) const {
725  assert(index >= 0 && index < size_used_);
726  return data_[index];
727 }
728 
729 template <typename T>
731  assert(size_used_ > 0);
732  return data_[size_used_ - 1];
733 }
734 // Returns the last object and removes it.
735 template <typename T>
737  assert(size_used_ > 0);
738  return data_[--size_used_];
739 }
740 
741 // Return the object from an index.
742 template <typename T>
743 void GenericVector<T>::set(const T& t, int index) {
744  assert(index >= 0 && index < size_used_);
745  data_[index] = t;
746 }
747 
748 // Shifts the rest of the elements to the right to make
749 // space for the new elements and inserts the given element
750 // at the specified index.
751 template <typename T>
752 void GenericVector<T>::insert(const T& t, int index) {
753  assert(index >= 0 && index <= size_used_);
754  if (size_reserved_ == size_used_)
755  double_the_size();
756  for (int i = size_used_; i > index; --i) {
757  data_[i] = data_[i-1];
758  }
759  data_[index] = t;
760  size_used_++;
761 }
762 
763 // Removes an element at the given index and
764 // shifts the remaining elements to the left.
765 template <typename T>
766 void GenericVector<T>::remove(int index) {
767  assert(index >= 0 && index < size_used_);
768  for (int i = index; i < size_used_ - 1; ++i) {
769  data_[i] = data_[i+1];
770  }
771  size_used_--;
772 }
773 
774 // Return true if the index is valindex
775 template <typename T>
777  return index >= 0 && index < size_used_;
778 }
779 
780 // Return the index of the T object.
781 template <typename T>
782 int GenericVector<T>::get_index(const T& object) const {
783  for (int i = 0; i < size_used_; ++i) {
784  assert(compare_cb_ != nullptr);
785  if (compare_cb_->Run(object, data_[i]))
786  return i;
787  }
788  return -1;
789 }
790 
791 // Return true if T is in the array
792 template <typename T>
793 bool GenericVector<T>::contains(const T& object) const {
794  return get_index(object) != -1;
795 }
796 
797 // Add an element in the array
798 template <typename T>
800  int index = 0;
801  if (size_used_ == size_reserved_)
802  double_the_size();
803  index = size_used_++;
804  data_[index] = object;
805  return index;
806 }
807 
808 template <typename T>
809 int GenericVector<T>::push_back_new(const T& object) {
810  int index = get_index(object);
811  if (index >= 0)
812  return index;
813  return push_back(object);
814 }
815 
816 // Add an element in the array (front)
817 template <typename T>
818 int GenericVector<T>::push_front(const T& object) {
819  if (size_used_ == size_reserved_)
820  double_the_size();
821  for (int i = size_used_; i > 0; --i)
822  data_[i] = data_[i-1];
823  data_[0] = object;
824  ++size_used_;
825  return 0;
826 }
827 
828 template <typename T>
830  push_back(t);
831 }
832 
833 template <typename T>
835  this->reserve(size_used_ + other.size_used_);
836  for (int i = 0; i < other.size(); ++i) {
837  this->operator+=(other.data_[i]);
838  }
839  return *this;
840 }
841 
842 template <typename T>
844  if (&other != this) {
845  this->truncate(0);
846  this->operator+=(other);
847  }
848  return *this;
849 }
850 
851 // Add a callback to be called to delete the elements when the array took
852 // their ownership.
853 template <typename T>
855  clear_cb_ = cb;
856 }
857 
858 // Add a callback to be called to delete the elements when the array took
859 // their ownership.
860 template <typename T>
863  compare_cb_ = cb;
864 }
865 
866 // Clear the array, calling the callback function if any.
867 template <typename T>
869  if (size_reserved_ > 0 && clear_cb_ != nullptr) {
870  for (int i = 0; i < size_used_; ++i)
871  clear_cb_->Run(data_[i]);
872  }
873  delete[] data_;
874  data_ = nullptr;
875  size_used_ = 0;
876  size_reserved_ = 0;
877  delete clear_cb_;
878  clear_cb_ = nullptr;
879  delete compare_cb_;
880  compare_cb_ = nullptr;
881 }
882 
883 template <typename T>
885  for (int i = 0; i < size_used_; ++i) {
886  delete data_[i];
887  }
888 }
889 
890 
891 template <typename T>
893  FILE* f, TessResultCallback2<bool, FILE*, T const &>* cb) const {
894  if (fwrite(&size_reserved_, sizeof(size_reserved_), 1, f) != 1) return false;
895  if (fwrite(&size_used_, sizeof(size_used_), 1, f) != 1) return false;
896  if (cb != nullptr) {
897  for (int i = 0; i < size_used_; ++i) {
898  if (!cb->Run(f, data_[i])) {
899  delete cb;
900  return false;
901  }
902  }
903  delete cb;
904  } else {
905  if (fwrite(data_, sizeof(T), size_used_, f) != unsigned_size())
906  return false;
907  }
908  return true;
909 }
910 
911 template <typename T>
914  int32_t reserved;
915  if (f->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
916  reserve(reserved);
917  if (f->FReadEndian(&size_used_, sizeof(size_used_), 1) != 1) return false;
918  if (cb != nullptr) {
919  for (int i = 0; i < size_used_; ++i) {
920  if (!cb->Run(f, data_ + i)) {
921  delete cb;
922  return false;
923  }
924  }
925  delete cb;
926  } else {
927  if (f->FReadEndian(data_, sizeof(T), size_used_) != size_used_)
928  return false;
929  }
930  return true;
931 }
932 
933 // Writes a vector of simple types to the given file. Assumes that bitwise
934 // read/write of T will work. Returns false in case of error.
935 template <typename T>
936 bool GenericVector<T>::Serialize(FILE* fp) const {
937  if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false;
938  if (fwrite(data_, sizeof(*data_), size_used_, fp) != unsigned_size())
939  return false;
940  return true;
941 }
942 template <typename T>
944  if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) return false;
945  if (fp->FWrite(data_, sizeof(*data_), size_used_) != size_used_) return false;
946  return true;
947 }
948 
949 // Reads a vector of simple types from the given file. Assumes that bitwise
950 // read/write will work with ReverseN according to sizeof(T).
951 // Returns false in case of error.
952 // If swap is true, assumes a big/little-endian swap is needed.
953 template <typename T>
954 bool GenericVector<T>::DeSerialize(bool swap, FILE* fp) {
955  uint32_t reserved;
956  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
957  if (swap) Reverse32(&reserved);
958  // Arbitrarily limit the number of elements to protect against bad data.
959  assert(reserved <= UINT16_MAX);
960  if (reserved > UINT16_MAX) return false;
961  reserve(reserved);
962  size_used_ = reserved;
963  if (fread(data_, sizeof(T), size_used_, fp) != unsigned_size()) return false;
964  if (swap) {
965  for (int i = 0; i < size_used_; ++i)
966  ReverseN(&data_[i], sizeof(data_[i]));
967  }
968  return true;
969 }
970 template <typename T>
972  uint32_t reserved;
973  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
974  // Arbitrarily limit the number of elements to protect against bad data.
975  const uint32_t limit = 50000000;
976  assert(reserved <= limit);
977  if (reserved > limit) return false;
978  reserve(reserved);
979  size_used_ = reserved;
980  return fp->FReadEndian(data_, sizeof(T), size_used_) == size_used_;
981 }
982 template <typename T>
984  uint32_t reserved;
985  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
986  return (uint32_t)fp->FRead(nullptr, sizeof(T), reserved) == reserved;
987 }
988 
989 // Writes a vector of classes to the given file. Assumes the existence of
990 // bool T::Serialize(FILE* fp) const that returns false in case of error.
991 // Returns false in case of error.
992 template <typename T>
994  if (fwrite(&size_used_, sizeof(size_used_), 1, fp) != 1) return false;
995  for (int i = 0; i < size_used_; ++i) {
996  if (!data_[i].Serialize(fp)) return false;
997  }
998  return true;
999 }
1000 template <typename T>
1002  if (fp->FWrite(&size_used_, sizeof(size_used_), 1) != 1) return false;
1003  for (int i = 0; i < size_used_; ++i) {
1004  if (!data_[i].Serialize(fp)) return false;
1005  }
1006  return true;
1007 }
1008 
1009 // Reads a vector of classes from the given file. Assumes the existence of
1010 // bool T::Deserialize(bool swap, FILE* fp) that returns false in case of
1011 // error. Also needs T::T() and T::T(constT&), as init_to_size is used in
1012 // this function. Returns false in case of error.
1013 // If swap is true, assumes a big/little-endian swap is needed.
1014 template <typename T>
1015 bool GenericVector<T>::DeSerializeClasses(bool swap, FILE* fp) {
1016  int32_t reserved;
1017  if (fread(&reserved, sizeof(reserved), 1, fp) != 1) return false;
1018  if (swap) Reverse32(&reserved);
1019  T empty;
1020  init_to_size(reserved, empty);
1021  for (int i = 0; i < reserved; ++i) {
1022  if (!data_[i].DeSerialize(swap, fp)) return false;
1023  }
1024  return true;
1025 }
1026 template <typename T>
1028  int32_t reserved;
1029  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
1030  T empty;
1031  init_to_size(reserved, empty);
1032  for (int i = 0; i < reserved; ++i) {
1033  if (!data_[i].DeSerialize(fp)) return false;
1034  }
1035  return true;
1036 }
1037 template <typename T>
1039  int32_t reserved;
1040  if (fp->FReadEndian(&reserved, sizeof(reserved), 1) != 1) return false;
1041  for (int i = 0; i < reserved; ++i) {
1042  if (!T::SkipDeSerialize(fp)) return false;
1043  }
1044  return true;
1045 }
1046 
1047 // This method clear the current object, then, does a shallow copy of
1048 // its argument, and finally invalidates its argument.
1049 template <typename T>
1051  this->clear();
1052  this->data_ = from->data_;
1053  this->size_reserved_ = from->size_reserved_;
1054  this->size_used_ = from->size_used_;
1055  this->compare_cb_ = from->compare_cb_;
1056  this->clear_cb_ = from->clear_cb_;
1057  from->data_ = nullptr;
1058  from->clear_cb_ = nullptr;
1059  from->compare_cb_ = nullptr;
1060  from->size_used_ = 0;
1061  from->size_reserved_ = 0;
1062 }
1063 
1064 template <typename T>
1066  sort(&tesseract::sort_cmp<T>);
1067 }
1068 
1069 // Internal recursive version of choose_nth_item.
1070 // The algorithm used comes from "Algorithms" by Sedgewick:
1071 // http://books.google.com/books/about/Algorithms.html?id=idUdqdDXqnAC
1072 // The principle is to choose a random pivot, and move everything less than
1073 // the pivot to its left, and everything greater than the pivot to the end
1074 // of the array, then recurse on the part that contains the desired index, or
1075 // just return the answer if it is in the equal section in the middle.
1076 // The random pivot guarantees average linear time for the same reason that
1077 // n times vector::push_back takes linear time on average.
1078 // target_index, start and and end are all indices into the full array.
1079 // Seed is a seed for rand_r for thread safety purposes. Its value is
1080 // unimportant as the random numbers do not affect the result except
1081 // between equal answers.
1082 template <typename T>
1083 int GenericVector<T>::choose_nth_item(int target_index, int start, int end,
1084  unsigned int* seed) {
1085  // Number of elements to process.
1086  int num_elements = end - start;
1087  // Trivial cases.
1088  if (num_elements <= 1)
1089  return start;
1090  if (num_elements == 2) {
1091  if (data_[start] < data_[start + 1]) {
1092  return target_index > start ? start + 1 : start;
1093  } else {
1094  return target_index > start ? start : start + 1;
1095  }
1096  }
1097  // Place the pivot at start.
1098  #ifndef rand_r // _MSC_VER, ANDROID
1099  srand(*seed);
1100  #define rand_r(seed) rand()
1101  #endif // _MSC_VER
1102  int pivot = rand_r(seed) % num_elements + start;
1103  swap(pivot, start);
1104  // The invariant condition here is that items [start, next_lesser) are less
1105  // than the pivot (which is at index next_lesser) and items
1106  // [prev_greater, end) are greater than the pivot, with items
1107  // [next_lesser, prev_greater) being equal to the pivot.
1108  int next_lesser = start;
1109  int prev_greater = end;
1110  for (int next_sample = start + 1; next_sample < prev_greater;) {
1111  if (data_[next_sample] < data_[next_lesser]) {
1112  swap(next_lesser++, next_sample++);
1113  } else if (data_[next_sample] == data_[next_lesser]) {
1114  ++next_sample;
1115  } else {
1116  swap(--prev_greater, next_sample);
1117  }
1118  }
1119  // Now the invariant is set up, we recurse on just the section that contains
1120  // the desired index.
1121  if (target_index < next_lesser)
1122  return choose_nth_item(target_index, start, next_lesser, seed);
1123  else if (target_index < prev_greater)
1124  return next_lesser; // In equal bracket.
1125  else
1126  return choose_nth_item(target_index, prev_greater, end, seed);
1127 }
1128 
1129 
1130 #endif // TESSERACT_CCUTIL_GENERICVECTOR_H_
void sort()
Definition: genericvector.h:1065
Definition: resultiterator.h:33
GenericVector< T > & operator=(const GenericVector &other)
Definition: genericvector.h:843
void remove(int index)
Definition: genericvector.h:766
~PointerVector()
Definition: genericvector.h:461
bool LoadFileLinesToStrings(const STRING &filename, GenericVector< STRING > *lines)
Definition: genericvector.h:403
int sort_ptr_cmp(const void *t1, const void *t2)
Definition: genericvector.h:441
GenericVector(int size, const T &init_val)
Definition: genericvector.h:42
virtual R Run(A1, A2)=0
static const int kDefaultVectorSize
Definition: genericvector.h:347
GenericVectorEqEq()
Definition: genericvector.h:651
bool Serialize(TFile *fp) const
Definition: genericvector.h:549
int push_back_new(const T &object)
Definition: genericvector.h:809
bool DeSerialize(bool swap, FILE *fp)
Definition: genericvector.h:566
void set_compare_callback(TessResultCallback2< bool, T const &, T const &> *cb)
Definition: genericvector.h:861
void sort(int(*comparator)(const void *, const void *))
Definition: genericvector.h:231
void compact_sorted()
Definition: genericvector.h:265
bool SaveDataToFile(const GenericVector< char > &data, const STRING &filename)
Definition: genericvector.h:393
PointerVector< T > & operator+=(const PointerVector &other)
Definition: genericvector.h:472
static bool SkipDeSerialize(tesseract::TFile *fp)
Definition: genericvector.h:983
int push_back(T object)
Definition: genericvector.h:799
static T * double_the_size_memcpy(int current_size, T *data)
Definition: genericvector.h:207
void init(int size)
Definition: genericvector.h:662
void clear()
Definition: genericvector.h:868
Definition: serialis.h:77
T & get(int index) const
Definition: genericvector.h:718
int32_t size_reserved_
Definition: genericvector.h:349
void truncate(int size)
Definition: genericvector.h:136
int binary_search(const T &target) const
Definition: genericvector.h:250
bool SerializeClasses(FILE *fp) const
Definition: genericvector.h:993
Definition: baseapi.cpp:94
int FRead(void *buffer, size_t size, int count)
Definition: serialis.cpp:270
void swap(int index1, int index2)
Definition: genericvector.h:320
int choose_nth_item(int target_index)
Definition: genericvector.h:309
virtual R Run(A1)=0
bool write(FILE *f, TessResultCallback2< bool, FILE *, T const &> *cb) const
Definition: genericvector.h:892
T contains_index(int index) const
Definition: genericvector.h:776
PointerVector< T > & operator=(const PointerVector &other)
Definition: genericvector.h:480
void set(const T &t, int index)
Definition: genericvector.h:743
int push_front(const T &object)
Definition: genericvector.h:818
void move(GenericVector< T > *from)
Definition: genericvector.h:1050
const char * string() const
Definition: strngs.cpp:196
void split(const char c, GenericVector< STRING > *splited)
Definition: strngs.cpp:284
int sort_cmp(const void *t1, const void *t2)
Definition: genericvector.h:424
GenericVector< T > & operator+=(const GenericVector &other)
Definition: genericvector.h:834
void insert(const T &t, int index)
Definition: genericvector.h:752
int FReadEndian(void *buffer, size_t size, int count)
Definition: serialis.cpp:259
static bool SkipDeSerializeClasses(tesseract::TFile *fp)
Definition: genericvector.h:1038
bool bool_binary_search(const T &target) const
Definition: genericvector.h:238
void clear()
Definition: genericvector.h:529
int get_index(const T &object) const
Definition: genericvector.h:782
void delete_data_pointers()
Definition: genericvector.h:884
T & operator[](int index) const
Definition: genericvector.h:724
bool WithinBounds(const T &rangemin, const T &rangemax) const
Definition: genericvector.h:329
Definition: baseapi.h:37
bool Serialize(FILE *fp) const
Definition: genericvector.h:539
void sort()
Definition: genericvector.h:642
bool DeSerializeElement(TFile *fp)
Definition: genericvector.h:613
bool read(tesseract::TFile *f, TessResultCallback2< bool, tesseract::TFile *, T *> *cb)
Definition: genericvector.h:912
void compact(TessResultCallback1< bool, int > *delete_cb)
Definition: genericvector.h:282
Definition: tesscallback.h:1673
void compact(TessResultCallback1< bool, const T *> *delete_cb)
Definition: genericvector.h:505
bool DeSerialize(TFile *fp)
Definition: genericvector.h:595
T & back() const
Definition: genericvector.h:730
Definition: strngs.h:45
bool Serialize(FILE *fp) const
Definition: genericvector.h:936
void truncate(int size)
Definition: genericvector.h:497
size_t unsigned_size() const
Definition: genericvector.h:75
T * data_
Definition: genericvector.h:350
int FWrite(const void *buffer, size_t size, int count)
Definition: serialis.cpp:318
bool DeSerialize(bool swap, FILE *fp)
Definition: genericvector.h:954
int size() const
Definition: genericvector.h:71
Definition: genericvector.h:457
void reverse()
Definition: genericvector.h:215
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
Definition: genericvector.h:367
int32_t size_used_
Definition: genericvector.h:348
void init_to_size(int size, const T &t)
Definition: genericvector.h:708
static bool DeSerializeSize(TFile *fp, int32_t *size)
Definition: genericvector.h:609
static bool DeSerializeSkip(TFile *fp)
Definition: genericvector.h:631
PointerVector()
Definition: genericvector.h:459
PointerVector(const PointerVector &other)
Definition: genericvector.h:468
virtual void Run(A1)=0
Definition: blamer.h:43
T dot_product(const GenericVector< T > &other) const
Definition: genericvector.h:298
void reserve(int size)
Definition: genericvector.h:684
int size_reserved() const
Definition: genericvector.h:81
~GenericVector()
Definition: genericvector.h:677
int length() const
Definition: genericvector.h:85
bool cmp_eq(T const &t1, T const &t2)
Definition: genericvector.h:415
GenericVector()
Definition: genericvector.h:39
void set_clear_callback(TessCallback1< T > *cb)
Definition: genericvector.h:854
bool empty() const
Definition: genericvector.h:90
TessResultCallback2< bool, T const &, T const & > * compare_cb_
Definition: genericvector.h:353
GenericVector(const GenericVector &other)
Definition: genericvector.h:48
TessCallback1< T > * clear_cb_
Definition: genericvector.h:351
void resize_no_init(int size)
Definition: genericvector.h:65
T pop_back()
Definition: genericvector.h:736
bool contains(const T &object) const
Definition: genericvector.h:793
PointerVector(int size)
Definition: genericvector.h:460
void double_the_size()
Definition: genericvector.h:697
bool DeSerializeClasses(bool swap, FILE *fp)
Definition: genericvector.h:1015
GenericVectorEqEq(int size)
Definition: genericvector.h:655