tesseract  v4.0.0-17-g361f3264
Open Source OCR Engine
strngs.h
1 /**********************************************************************
2  * File: strngs.h (Formerly strings.h)
3  * Description: STRING class definition.
4  * Author: Ray Smith
5  * Created: Fri Feb 15 09:15:01 GMT 1991
6  *
7  * (C) Copyright 1991, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifndef STRNGS_H
21 #define STRNGS_H
22 
23 #include <cassert> // for assert
24 #include <cstdint> // for uint32_t
25 #include <cstdio> // for FILE
26 #include <cstring> // for strncpy
27 #include "platform.h" // for TESS_API
28 
29 namespace tesseract {
30 class TFile;
31 } // namespace tesseract.
32 
33 // STRING_IS_PROTECTED means that string[index] = X is invalid
34 // because you have to go through strings interface to modify it.
35 // This allows the string to ensure internal integrity and maintain
36 // its own string length. Unfortunately this is not possible because
37 // STRINGS are used as direct-manipulation data buffers for things
38 // like length arrays and many places cast away the const on string()
39 // to mutate the string. Turning this off means that internally we
40 // cannot assume we know the strlen.
41 #define STRING_IS_PROTECTED 0
42 
43 template <typename T> class GenericVector;
44 
45 class TESS_API STRING
46 {
47  public:
48  STRING();
49  STRING(const STRING &string);
50  STRING(const char *string);
51  STRING(const char *data, int length);
52  ~STRING();
53 
54  // Writes to the given file. Returns false in case of error.
55  bool Serialize(FILE* fp) const;
56  // Reads from the given file. Returns false in case of error.
57  // If swap is true, assumes a big/little-endian swap is needed.
58  bool DeSerialize(bool swap, FILE* fp);
59  // Writes to the given file. Returns false in case of error.
60  bool Serialize(tesseract::TFile* fp) const;
61  // Reads from the given file. Returns false in case of error.
62  // If swap is true, assumes a big/little-endian swap is needed.
63  bool DeSerialize(tesseract::TFile* fp);
64  // As DeSerialize, but only seeks past the data - hence a static method.
65  static bool SkipDeSerialize(tesseract::TFile* fp);
66 
67  bool contains(const char c) const;
68  int32_t length() const;
69  int32_t size() const { return length(); }
70  // Workaround to avoid g++ -Wsign-compare warnings.
71  uint32_t unsigned_size() const {
72  const int32_t len = length();
73  assert(0 <= len);
74  return static_cast<uint32_t>(len);
75  }
76  const char *string() const;
77  const char *c_str() const;
78 
79  inline char* strdup() const {
80  int32_t len = length() + 1;
81  return strncpy(new char[len], GetCStr(), len);
82  }
83 
84 #if STRING_IS_PROTECTED
85  const char &operator[] (int32_t index) const;
86  // len is number of chars in s to insert starting at index in this string
87  void insert_range(int32_t index, const char*s, int len);
88  void erase_range(int32_t index, int len);
89 #else
90  char &operator[] (int32_t index) const;
91 #endif
92  void split(const char c, GenericVector<STRING> *splited);
93  void truncate_at(int32_t index);
94 
95  bool operator== (const STRING & string) const;
96  bool operator!= (const STRING & string) const;
97  bool operator!= (const char *string) const;
98 
99  STRING & operator= (const char *string);
100  STRING & operator= (const STRING & string);
101 
102  STRING operator+ (const STRING & string) const;
103  STRING operator+ (const char ch) const;
104 
105  STRING & operator+= (const char *string);
106  STRING & operator+= (const STRING & string);
107  STRING & operator+= (const char ch);
108 
109  // Assignment for strings which are not null-terminated.
110  void assign(const char *cstr, int len);
111 
112  // Appends the given string and int (as a %d) to this.
113  // += cannot be used for ints as there as a char += operator that would
114  // be ambiguous, and ints usually need a string before or between them
115  // anyway.
116  void add_str_int(const char* str, int number);
117  // Appends the given string and double (as a %.8g) to this.
118  void add_str_double(const char* str, double number);
119 
120  // ensure capacity but keep pointer encapsulated
121  inline void ensure(int32_t min_capacity) { ensure_cstr(min_capacity); }
122 
123  private:
124  typedef struct STRING_HEADER {
125  // How much space was allocated in the string buffer for char data.
127 
128  // used_ is how much of the capacity is currently being used,
129  // including a '\0' terminator.
130  //
131  // If used_ is 0 then string is nullptr (not even the '\0')
132  // else if used_ > 0 then it is strlen() + 1 (because it includes '\0')
133  // else strlen is >= 0 (not nullptr) but needs to be computed.
134  // this condition is set when encapsulation is violated because
135  // an API returned a mutable string.
136  //
137  // capacity_ - used_ = excess capacity that the string can grow
138  // without reallocating
139  mutable int used_;
140  } STRING_HEADER;
141 
142  // To preserve the behavior of the old serialization, we only have space
143  // for one pointer in this structure. So we are embedding a data structure
144  // at the start of the storage that will hold additional state variables,
145  // then storing the actual string contents immediately after.
147 
148  // returns the header part of the storage
150  return data_;
151  }
152  inline const STRING_HEADER* GetHeader() const {
153  return data_;
154  }
155 
156  // returns the string data part of storage
157  inline char* GetCStr() { return ((char*)data_) + sizeof(STRING_HEADER); }
158 
159  inline const char* GetCStr() const {
160  return ((const char *)data_) + sizeof(STRING_HEADER);
161  }
162  inline bool InvariantOk() const {
163 #if STRING_IS_PROTECTED
164  return (GetHeader()->used_ == 0) ?
165  (string() == nullptr) : (GetHeader()->used_ == (strlen(string()) + 1));
166 #else
167  return true;
168 #endif
169  }
170 
171  // Ensure string has requested capacity as optimization
172  // to avoid unnecessary reallocations.
173  // The return value is a cstr buffer with at least requested capacity
174  char* ensure_cstr(int32_t min_capacity);
175 
176  void FixHeader() const; // make used_ non-negative, even if const
177 
178  char* AllocData(int used, int capacity);
179  void DiscardData();
180 };
181 #endif
char * GetCStr()
Definition: strngs.h:157
int capacity_
Definition: strngs.h:126
void ensure(int32_t min_capacity)
Definition: strngs.h:121
Definition: serialis.h:77
Definition: baseapi.cpp:94
int used_
Definition: strngs.h:139
Definition: strngs.h:124
STRING_HEADER * data_
Definition: strngs.h:146
int32_t size() const
Definition: strngs.h:69
uint32_t unsigned_size() const
Definition: strngs.h:71
bool Serialize(FILE *fp, const char *data, size_t n)
Definition: serialis.cpp:59
const STRING_HEADER * GetHeader() const
Definition: strngs.h:152
const char * GetCStr() const
Definition: strngs.h:159
Definition: baseapi.h:37
bool InvariantOk() const
Definition: strngs.h:162
Definition: strngs.h:45
char * strdup() const
Definition: strngs.h:79
bool DeSerialize(FILE *fp, char *data, size_t n)
Definition: serialis.cpp:27
STRING_HEADER * GetHeader()
Definition: strngs.h:149