1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 // Copyright (C) 2009-2012, International Business Machines
4 // Corporation and others. All Rights Reserved.
5 //
6 // Copyright 2007 Google Inc. All Rights Reserved.
7 // Author: sanjay@google.com (Sanjay Ghemawat)
8 //
9 // Abstract interface that consumes a sequence of bytes (ByteSink).
10 //
11 // Used so that we can write a single piece of code that can operate
12 // on a variety of output string types.
13 //
14 // Various implementations of this interface are provided:
15 //   ByteSink:
16 //      CheckedArrayByteSink    Write to a flat array, with bounds checking
17 //      StringByteSink          Write to an STL string
18 
19 // This code is a contribution of Google code, and the style used here is
20 // a compromise between the original Google code and the ICU coding guidelines.
21 // For example, data types are ICU-ified (size_t,int->int32_t),
22 // and API comments doxygen-ified, but function names and behavior are
23 // as in the original, if possible.
24 // Assertion-style error handling, not available in ICU, was changed to
25 // parameter "pinning" similar to UnicodeString.
26 //
27 // In addition, this is only a partial port of the original Google code,
28 // limited to what was needed so far. The (nearly) complete original code
29 // is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
30 // (see ICU ticket 6765, r25517).
31 
32 #ifndef __BYTESTREAM_H__
33 #define __BYTESTREAM_H__
34 
35 /**
36  * \file
37  * \brief C++ API: Interface for writing bytes, and implementation classes.
38  */
39 
40 #include "unicode/utypes.h"
41 
42 #if U_SHOW_CPLUSPLUS_API
43 
44 #include "unicode/uobject.h"
45 #include "unicode/std_string.h"
46 
47 U_NAMESPACE_BEGIN
48 
49 /**
50  * A ByteSink can be filled with bytes.
51  * @stable ICU 4.2
52  */
53 class U_COMMON_API ByteSink : public UMemory {
54 public:
55   /**
56    * Default constructor.
57    * @stable ICU 4.2
58    */
ByteSink()59   ByteSink() { }
60   /**
61    * Virtual destructor.
62    * @stable ICU 4.2
63    */
64   virtual ~ByteSink();
65 
66   /**
67    * Append "bytes[0,n-1]" to this.
68    * @param bytes the pointer to the bytes
69    * @param n the number of bytes; must be non-negative
70    * @stable ICU 4.2
71    */
72   virtual void Append(const char* bytes, int32_t n) = 0;
73 
74 #ifndef U_HIDE_DRAFT_API
75   /**
76    * Appends n bytes to this. Same as Append().
77    * Call AppendU8() with u8"string literals" which are const char * in C++11
78    * but const char8_t * in C++20.
79    * If the compiler does support char8_t as a distinct type,
80    * then an AppendU8() overload for that is defined and will be chosen.
81    *
82    * @param bytes the pointer to the bytes
83    * @param n the number of bytes; must be non-negative
84    * @draft ICU 67
85    */
AppendU8(const char * bytes,int32_t n)86   inline void AppendU8(const char* bytes, int32_t n) {
87     Append(bytes, n);
88   }
89 
90 #if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
91   /**
92    * Appends n bytes to this. Same as Append() but for a const char8_t * pointer.
93    * Call AppendU8() with u8"string literals" which are const char * in C++11
94    * but const char8_t * in C++20.
95    * If the compiler does support char8_t as a distinct type,
96    * then this AppendU8() overload for that is defined and will be chosen.
97    *
98    * @param bytes the pointer to the bytes
99    * @param n the number of bytes; must be non-negative
100    * @draft ICU 67
101    */
AppendU8(const char8_t * bytes,int32_t n)102   inline void AppendU8(const char8_t* bytes, int32_t n) {
103     Append(reinterpret_cast<const char*>(bytes), n);
104   }
105 #endif
106 #endif  // U_HIDE_DRAFT_API
107 
108   /**
109    * Returns a writable buffer for appending and writes the buffer's capacity to
110    * *result_capacity. Guarantees *result_capacity>=min_capacity.
111    * May return a pointer to the caller-owned scratch buffer which must have
112    * scratch_capacity>=min_capacity.
113    * The returned buffer is only valid until the next operation
114    * on this ByteSink.
115    *
116    * After writing at most *result_capacity bytes, call Append() with the
117    * pointer returned from this function and the number of bytes written.
118    * Many Append() implementations will avoid copying bytes if this function
119    * returned an internal buffer.
120    *
121    * Partial usage example:
122    *  int32_t capacity;
123    *  char* buffer = sink->GetAppendBuffer(..., &capacity);
124    *  ... Write n bytes into buffer, with n <= capacity.
125    *  sink->Append(buffer, n);
126    * In many implementations, that call to Append will avoid copying bytes.
127    *
128    * If the ByteSink allocates or reallocates an internal buffer, it should use
129    * the desired_capacity_hint if appropriate.
130    * If a caller cannot provide a reasonable guess at the desired capacity,
131    * it should pass desired_capacity_hint=0.
132    *
133    * If a non-scratch buffer is returned, the caller may only pass
134    * a prefix to it to Append().
135    * That is, it is not correct to pass an interior pointer to Append().
136    *
137    * The default implementation always returns the scratch buffer.
138    *
139    * @param min_capacity required minimum capacity of the returned buffer;
140    *                     must be non-negative
141    * @param desired_capacity_hint desired capacity of the returned buffer;
142    *                              must be non-negative
143    * @param scratch default caller-owned buffer
144    * @param scratch_capacity capacity of the scratch buffer
145    * @param result_capacity pointer to an integer which will be set to the
146    *                        capacity of the returned buffer
147    * @return a buffer with *result_capacity>=min_capacity
148    * @stable ICU 4.2
149    */
150   virtual char* GetAppendBuffer(int32_t min_capacity,
151                                 int32_t desired_capacity_hint,
152                                 char* scratch, int32_t scratch_capacity,
153                                 int32_t* result_capacity);
154 
155   /**
156    * Flush internal buffers.
157    * Some byte sinks use internal buffers or provide buffering
158    * and require calling Flush() at the end of the stream.
159    * The ByteSink should be ready for further Append() calls after Flush().
160    * The default implementation of Flush() does nothing.
161    * @stable ICU 4.2
162    */
163   virtual void Flush();
164 
165 private:
166   ByteSink(const ByteSink &) = delete;
167   ByteSink &operator=(const ByteSink &) = delete;
168 };
169 
170 // -------------------------------------------------------------
171 // Some standard implementations
172 
173 /**
174  * Implementation of ByteSink that writes to a flat byte array,
175  * with bounds-checking:
176  * This sink will not write more than capacity bytes to outbuf.
177  * If more than capacity bytes are Append()ed, then excess bytes are ignored,
178  * and Overflowed() will return true.
179  * Overflow does not cause a runtime error.
180  * @stable ICU 4.2
181  */
182 class U_COMMON_API CheckedArrayByteSink : public ByteSink {
183 public:
184   /**
185    * Constructs a ByteSink that will write to outbuf[0..capacity-1].
186    * @param outbuf buffer to write to
187    * @param capacity size of the buffer
188    * @stable ICU 4.2
189    */
190   CheckedArrayByteSink(char* outbuf, int32_t capacity);
191   /**
192    * Destructor.
193    * @stable ICU 4.2
194    */
195   virtual ~CheckedArrayByteSink();
196   /**
197    * Returns the sink to its original state, without modifying the buffer.
198    * Useful for reusing both the buffer and the sink for multiple streams.
199    * Resets the state to NumberOfBytesWritten()=NumberOfBytesAppended()=0
200    * and Overflowed()=FALSE.
201    * @return *this
202    * @stable ICU 4.6
203    */
204   virtual CheckedArrayByteSink& Reset();
205   /**
206    * Append "bytes[0,n-1]" to this.
207    * @param bytes the pointer to the bytes
208    * @param n the number of bytes; must be non-negative
209    * @stable ICU 4.2
210    */
211   virtual void Append(const char* bytes, int32_t n);
212   /**
213    * Returns a writable buffer for appending and writes the buffer's capacity to
214    * *result_capacity. For details see the base class documentation.
215    * @param min_capacity required minimum capacity of the returned buffer;
216    *                     must be non-negative
217    * @param desired_capacity_hint desired capacity of the returned buffer;
218    *                              must be non-negative
219    * @param scratch default caller-owned buffer
220    * @param scratch_capacity capacity of the scratch buffer
221    * @param result_capacity pointer to an integer which will be set to the
222    *                        capacity of the returned buffer
223    * @return a buffer with *result_capacity>=min_capacity
224    * @stable ICU 4.2
225    */
226   virtual char* GetAppendBuffer(int32_t min_capacity,
227                                 int32_t desired_capacity_hint,
228                                 char* scratch, int32_t scratch_capacity,
229                                 int32_t* result_capacity);
230   /**
231    * Returns the number of bytes actually written to the sink.
232    * @return number of bytes written to the buffer
233    * @stable ICU 4.2
234    */
NumberOfBytesWritten()235   int32_t NumberOfBytesWritten() const { return size_; }
236   /**
237    * Returns true if any bytes were discarded, i.e., if there was an
238    * attempt to write more than 'capacity' bytes.
239    * @return TRUE if more than 'capacity' bytes were Append()ed
240    * @stable ICU 4.2
241    */
Overflowed()242   UBool Overflowed() const { return overflowed_; }
243   /**
244    * Returns the number of bytes appended to the sink.
245    * If Overflowed() then NumberOfBytesAppended()>NumberOfBytesWritten()
246    * else they return the same number.
247    * @return number of bytes written to the buffer
248    * @stable ICU 4.6
249    */
NumberOfBytesAppended()250   int32_t NumberOfBytesAppended() const { return appended_; }
251 private:
252   char* outbuf_;
253   const int32_t capacity_;
254   int32_t size_;
255   int32_t appended_;
256   UBool overflowed_;
257 
258   CheckedArrayByteSink() = delete;
259   CheckedArrayByteSink(const CheckedArrayByteSink &) = delete;
260   CheckedArrayByteSink &operator=(const CheckedArrayByteSink &) = delete;
261 };
262 
263 /**
264  * Implementation of ByteSink that writes to a "string".
265  * The StringClass is usually instantiated with a std::string.
266  * @stable ICU 4.2
267  */
268 template<typename StringClass>
269 class StringByteSink : public ByteSink {
270  public:
271   /**
272    * Constructs a ByteSink that will append bytes to the dest string.
273    * @param dest pointer to string object to append to
274    * @stable ICU 4.2
275    */
StringByteSink(StringClass * dest)276   StringByteSink(StringClass* dest) : dest_(dest) { }
277   /**
278    * Constructs a ByteSink that reserves append capacity and will append bytes to the dest string.
279    *
280    * @param dest pointer to string object to append to
281    * @param initialAppendCapacity capacity beyond dest->length() to be reserve()d
282    * @stable ICU 60
283    */
StringByteSink(StringClass * dest,int32_t initialAppendCapacity)284   StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) {
285     if (initialAppendCapacity > 0 &&
286         (uint32_t)initialAppendCapacity > (dest->capacity() - dest->length())) {
287       dest->reserve(dest->length() + initialAppendCapacity);
288     }
289   }
290   /**
291    * Append "bytes[0,n-1]" to this.
292    * @param data the pointer to the bytes
293    * @param n the number of bytes; must be non-negative
294    * @stable ICU 4.2
295    */
Append(const char * data,int32_t n)296   virtual void Append(const char* data, int32_t n) { dest_->append(data, n); }
297  private:
298   StringClass* dest_;
299 
300   StringByteSink() = delete;
301   StringByteSink(const StringByteSink &) = delete;
302   StringByteSink &operator=(const StringByteSink &) = delete;
303 };
304 
305 U_NAMESPACE_END
306 
307 #endif /* U_SHOW_CPLUSPLUS_API */
308 
309 #endif  // __BYTESTREAM_H__
310