1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "nsUnicharInputStream.h"
8 #include "nsIInputStream.h"
9 #include "nsIServiceManager.h"
10 #include "nsString.h"
11 #include "nsTArray.h"
12 #include "nsAutoPtr.h"
13 #include "nsCRT.h"
14 #include "nsStreamUtils.h"
15 #include "nsUTF8Utils.h"
16 #include "mozilla/Attributes.h"
17 #include <fcntl.h>
18 #if defined(XP_WIN)
19 #include <io.h>
20 #else
21 #include <unistd.h>
22 #endif
23
24 #define STRING_BUFFER_SIZE 8192
25
26 class StringUnicharInputStream final : public nsIUnicharInputStream {
27 public:
StringUnicharInputStream(const nsAString & aString)28 explicit StringUnicharInputStream(const nsAString& aString)
29 : mString(aString), mPos(0), mLen(aString.Length()) {}
30
31 NS_DECL_ISUPPORTS
32 NS_DECL_NSIUNICHARINPUTSTREAM
33
34 nsString mString;
35 uint32_t mPos;
36 uint32_t mLen;
37
38 private:
~StringUnicharInputStream()39 ~StringUnicharInputStream() {}
40 };
41
42 NS_IMETHODIMP
Read(char16_t * aBuf,uint32_t aCount,uint32_t * aReadCount)43 StringUnicharInputStream::Read(char16_t* aBuf, uint32_t aCount,
44 uint32_t* aReadCount) {
45 if (mPos >= mLen) {
46 *aReadCount = 0;
47 return NS_OK;
48 }
49 nsAString::const_iterator iter;
50 mString.BeginReading(iter);
51 const char16_t* us = iter.get();
52 uint32_t amount = mLen - mPos;
53 if (amount > aCount) {
54 amount = aCount;
55 }
56 memcpy(aBuf, us + mPos, sizeof(char16_t) * amount);
57 mPos += amount;
58 *aReadCount = amount;
59 return NS_OK;
60 }
61
62 NS_IMETHODIMP
ReadSegments(nsWriteUnicharSegmentFun aWriter,void * aClosure,uint32_t aCount,uint32_t * aReadCount)63 StringUnicharInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
64 void* aClosure, uint32_t aCount,
65 uint32_t* aReadCount) {
66 uint32_t bytesWritten;
67 uint32_t totalBytesWritten = 0;
68
69 nsresult rv;
70 aCount = XPCOM_MIN(mString.Length() - mPos, aCount);
71
72 nsAString::const_iterator iter;
73 mString.BeginReading(iter);
74
75 while (aCount) {
76 rv = aWriter(this, aClosure, iter.get() + mPos, totalBytesWritten, aCount,
77 &bytesWritten);
78
79 if (NS_FAILED(rv)) {
80 // don't propagate errors to the caller
81 break;
82 }
83
84 aCount -= bytesWritten;
85 totalBytesWritten += bytesWritten;
86 mPos += bytesWritten;
87 }
88
89 *aReadCount = totalBytesWritten;
90
91 return NS_OK;
92 }
93
94 NS_IMETHODIMP
ReadString(uint32_t aCount,nsAString & aString,uint32_t * aReadCount)95 StringUnicharInputStream::ReadString(uint32_t aCount, nsAString& aString,
96 uint32_t* aReadCount) {
97 if (mPos >= mLen) {
98 *aReadCount = 0;
99 return NS_OK;
100 }
101 uint32_t amount = mLen - mPos;
102 if (amount > aCount) {
103 amount = aCount;
104 }
105 aString = Substring(mString, mPos, amount);
106 mPos += amount;
107 *aReadCount = amount;
108 return NS_OK;
109 }
110
Close()111 nsresult StringUnicharInputStream::Close() {
112 mPos = mLen;
113 return NS_OK;
114 }
115
116 NS_IMPL_ISUPPORTS(StringUnicharInputStream, nsIUnicharInputStream)
117
118 //----------------------------------------------------------------------
119
120 class UTF8InputStream final : public nsIUnicharInputStream {
121 public:
122 UTF8InputStream();
123 nsresult Init(nsIInputStream* aStream);
124
125 NS_DECL_ISUPPORTS
126 NS_DECL_NSIUNICHARINPUTSTREAM
127
128 private:
129 ~UTF8InputStream();
130
131 protected:
132 int32_t Fill(nsresult* aErrorCode);
133
134 static void CountValidUTF8Bytes(const char* aBuf, uint32_t aMaxBytes,
135 uint32_t& aValidUTF8bytes,
136 uint32_t& aValidUTF16CodeUnits);
137
138 nsCOMPtr<nsIInputStream> mInput;
139 FallibleTArray<char> mByteData;
140 FallibleTArray<char16_t> mUnicharData;
141
142 uint32_t mByteDataOffset;
143 uint32_t mUnicharDataOffset;
144 uint32_t mUnicharDataLength;
145 };
146
UTF8InputStream()147 UTF8InputStream::UTF8InputStream()
148 : mByteDataOffset(0), mUnicharDataOffset(0), mUnicharDataLength(0) {}
149
Init(nsIInputStream * aStream)150 nsresult UTF8InputStream::Init(nsIInputStream* aStream) {
151 if (!mByteData.SetCapacity(STRING_BUFFER_SIZE, mozilla::fallible) ||
152 !mUnicharData.SetCapacity(STRING_BUFFER_SIZE, mozilla::fallible)) {
153 return NS_ERROR_OUT_OF_MEMORY;
154 }
155 mInput = aStream;
156
157 return NS_OK;
158 }
159
NS_IMPL_ISUPPORTS(UTF8InputStream,nsIUnicharInputStream)160 NS_IMPL_ISUPPORTS(UTF8InputStream, nsIUnicharInputStream)
161
162 UTF8InputStream::~UTF8InputStream() { Close(); }
163
Close()164 nsresult UTF8InputStream::Close() {
165 mInput = nullptr;
166 mByteData.Clear();
167 mUnicharData.Clear();
168 return NS_OK;
169 }
170
Read(char16_t * aBuf,uint32_t aCount,uint32_t * aReadCount)171 nsresult UTF8InputStream::Read(char16_t* aBuf, uint32_t aCount,
172 uint32_t* aReadCount) {
173 NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
174 uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
175 nsresult errorCode;
176 if (0 == readCount) {
177 // Fill the unichar buffer
178 int32_t bytesRead = Fill(&errorCode);
179 if (bytesRead <= 0) {
180 *aReadCount = 0;
181 return errorCode;
182 }
183 readCount = bytesRead;
184 }
185 if (readCount > aCount) {
186 readCount = aCount;
187 }
188 memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset,
189 readCount * sizeof(char16_t));
190 mUnicharDataOffset += readCount;
191 *aReadCount = readCount;
192 return NS_OK;
193 }
194
195 NS_IMETHODIMP
ReadSegments(nsWriteUnicharSegmentFun aWriter,void * aClosure,uint32_t aCount,uint32_t * aReadCount)196 UTF8InputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter, void* aClosure,
197 uint32_t aCount, uint32_t* aReadCount) {
198 NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
199 uint32_t bytesToWrite = mUnicharDataLength - mUnicharDataOffset;
200 nsresult rv = NS_OK;
201 if (0 == bytesToWrite) {
202 // Fill the unichar buffer
203 int32_t bytesRead = Fill(&rv);
204 if (bytesRead <= 0) {
205 *aReadCount = 0;
206 return rv;
207 }
208 bytesToWrite = bytesRead;
209 }
210
211 if (bytesToWrite > aCount) {
212 bytesToWrite = aCount;
213 }
214
215 uint32_t bytesWritten;
216 uint32_t totalBytesWritten = 0;
217
218 while (bytesToWrite) {
219 rv = aWriter(this, aClosure, mUnicharData.Elements() + mUnicharDataOffset,
220 totalBytesWritten, bytesToWrite, &bytesWritten);
221
222 if (NS_FAILED(rv)) {
223 // don't propagate errors to the caller
224 break;
225 }
226
227 bytesToWrite -= bytesWritten;
228 totalBytesWritten += bytesWritten;
229 mUnicharDataOffset += bytesWritten;
230 }
231
232 *aReadCount = totalBytesWritten;
233
234 return NS_OK;
235 }
236
237 NS_IMETHODIMP
ReadString(uint32_t aCount,nsAString & aString,uint32_t * aReadCount)238 UTF8InputStream::ReadString(uint32_t aCount, nsAString& aString,
239 uint32_t* aReadCount) {
240 NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
241 uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
242 nsresult errorCode;
243 if (0 == readCount) {
244 // Fill the unichar buffer
245 int32_t bytesRead = Fill(&errorCode);
246 if (bytesRead <= 0) {
247 *aReadCount = 0;
248 return errorCode;
249 }
250 readCount = bytesRead;
251 }
252 if (readCount > aCount) {
253 readCount = aCount;
254 }
255 const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset;
256 aString.Assign(buf, readCount);
257
258 mUnicharDataOffset += readCount;
259 *aReadCount = readCount;
260 return NS_OK;
261 }
262
Fill(nsresult * aErrorCode)263 int32_t UTF8InputStream::Fill(nsresult* aErrorCode) {
264 if (!mInput) {
265 // We already closed the stream!
266 *aErrorCode = NS_BASE_STREAM_CLOSED;
267 return -1;
268 }
269
270 NS_ASSERTION(mByteData.Length() >= mByteDataOffset, "unsigned madness");
271 uint32_t remainder = mByteData.Length() - mByteDataOffset;
272 mByteDataOffset = remainder;
273 uint32_t nb;
274 *aErrorCode = NS_FillArray(mByteData, mInput, remainder, &nb);
275 if (nb == 0) {
276 // Because we assume a many to one conversion, the lingering data
277 // in the byte buffer must be a partial conversion
278 // fragment. Because we know that we have received no more new
279 // data to add to it, we can't convert it. Therefore, we discard
280 // it.
281 return nb;
282 }
283 NS_ASSERTION(remainder + nb == mByteData.Length(), "bad nb");
284
285 // Now convert as much of the byte buffer to unicode as possible
286 uint32_t srcLen, dstLen;
287 CountValidUTF8Bytes(mByteData.Elements(), remainder + nb, srcLen, dstLen);
288
289 // the number of UCS2 characters should always be <= the number of
290 // UTF8 chars
291 NS_ASSERTION(remainder + nb >= srcLen, "cannot be longer than out buffer");
292 NS_ASSERTION(dstLen <= mUnicharData.Capacity(),
293 "Ouch. I would overflow my buffer if I wasn't so careful.");
294 if (dstLen > mUnicharData.Capacity()) {
295 return 0;
296 }
297
298 ConvertUTF8toUTF16 converter(mUnicharData.Elements());
299
300 nsACString::const_char_iterator start = mByteData.Elements();
301 nsACString::const_char_iterator end = mByteData.Elements() + srcLen;
302
303 copy_string(start, end, converter);
304 if (converter.Length() != dstLen) {
305 *aErrorCode = NS_BASE_STREAM_BAD_CONVERSION;
306 return -1;
307 }
308
309 mUnicharDataOffset = 0;
310 mUnicharDataLength = dstLen;
311 mByteDataOffset = srcLen;
312
313 return dstLen;
314 }
315
CountValidUTF8Bytes(const char * aBuffer,uint32_t aMaxBytes,uint32_t & aValidUTF8bytes,uint32_t & aValidUTF16CodeUnits)316 void UTF8InputStream::CountValidUTF8Bytes(const char* aBuffer,
317 uint32_t aMaxBytes,
318 uint32_t& aValidUTF8bytes,
319 uint32_t& aValidUTF16CodeUnits) {
320 const char* c = aBuffer;
321 const char* end = aBuffer + aMaxBytes;
322 const char* lastchar = c; // pre-initialize in case of 0-length buffer
323 uint32_t utf16length = 0;
324 while (c < end && *c) {
325 lastchar = c;
326 utf16length++;
327
328 if (UTF8traits::isASCII(*c)) {
329 c++;
330 } else if (UTF8traits::is2byte(*c)) {
331 c += 2;
332 } else if (UTF8traits::is3byte(*c)) {
333 c += 3;
334 } else if (UTF8traits::is4byte(*c)) {
335 c += 4;
336 utf16length++; // add 1 more because this will be converted to a
337 // surrogate pair.
338 } else if (UTF8traits::is5byte(*c)) {
339 c += 5;
340 } else if (UTF8traits::is6byte(*c)) {
341 c += 6;
342 } else {
343 NS_WARNING(
344 "Unrecognized UTF8 string in UTF8InputStream::CountValidUTF8Bytes()");
345 break; // Otherwise we go into an infinite loop. But what happens now?
346 }
347 }
348 if (c > end) {
349 c = lastchar;
350 utf16length--;
351 }
352
353 aValidUTF8bytes = c - aBuffer;
354 aValidUTF16CodeUnits = utf16length;
355 }
356
NS_NewUnicharInputStream(nsIInputStream * aStreamToWrap,nsIUnicharInputStream ** aResult)357 nsresult NS_NewUnicharInputStream(nsIInputStream* aStreamToWrap,
358 nsIUnicharInputStream** aResult) {
359 *aResult = nullptr;
360
361 // Create converter input stream
362 RefPtr<UTF8InputStream> it = new UTF8InputStream();
363 nsresult rv = it->Init(aStreamToWrap);
364 if (NS_FAILED(rv)) {
365 return rv;
366 }
367
368 it.forget(aResult);
369 return NS_OK;
370 }
371