1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "nsUnicharInputStream.h"
8 #include "nsIInputStream.h"
9 #include "nsIServiceManager.h"
10 #include "nsString.h"
11 #include "nsTArray.h"
12 #include "nsAutoPtr.h"
13 #include "nsCRT.h"
14 #include "nsStreamUtils.h"
15 #include "nsUTF8Utils.h"
16 #include "mozilla/Attributes.h"
17 #include <fcntl.h>
18 #if defined(XP_WIN)
19 #include <io.h>
20 #else
21 #include <unistd.h>
22 #endif
23 
24 #define STRING_BUFFER_SIZE 8192
25 
26 class StringUnicharInputStream final : public nsIUnicharInputStream {
27  public:
StringUnicharInputStream(const nsAString & aString)28   explicit StringUnicharInputStream(const nsAString& aString)
29       : mString(aString), mPos(0), mLen(aString.Length()) {}
30 
31   NS_DECL_ISUPPORTS
32   NS_DECL_NSIUNICHARINPUTSTREAM
33 
34   nsString mString;
35   uint32_t mPos;
36   uint32_t mLen;
37 
38  private:
~StringUnicharInputStream()39   ~StringUnicharInputStream() {}
40 };
41 
42 NS_IMETHODIMP
Read(char16_t * aBuf,uint32_t aCount,uint32_t * aReadCount)43 StringUnicharInputStream::Read(char16_t* aBuf, uint32_t aCount,
44                                uint32_t* aReadCount) {
45   if (mPos >= mLen) {
46     *aReadCount = 0;
47     return NS_OK;
48   }
49   nsAString::const_iterator iter;
50   mString.BeginReading(iter);
51   const char16_t* us = iter.get();
52   uint32_t amount = mLen - mPos;
53   if (amount > aCount) {
54     amount = aCount;
55   }
56   memcpy(aBuf, us + mPos, sizeof(char16_t) * amount);
57   mPos += amount;
58   *aReadCount = amount;
59   return NS_OK;
60 }
61 
62 NS_IMETHODIMP
ReadSegments(nsWriteUnicharSegmentFun aWriter,void * aClosure,uint32_t aCount,uint32_t * aReadCount)63 StringUnicharInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
64                                        void* aClosure, uint32_t aCount,
65                                        uint32_t* aReadCount) {
66   uint32_t bytesWritten;
67   uint32_t totalBytesWritten = 0;
68 
69   nsresult rv;
70   aCount = XPCOM_MIN(mString.Length() - mPos, aCount);
71 
72   nsAString::const_iterator iter;
73   mString.BeginReading(iter);
74 
75   while (aCount) {
76     rv = aWriter(this, aClosure, iter.get() + mPos, totalBytesWritten, aCount,
77                  &bytesWritten);
78 
79     if (NS_FAILED(rv)) {
80       // don't propagate errors to the caller
81       break;
82     }
83 
84     aCount -= bytesWritten;
85     totalBytesWritten += bytesWritten;
86     mPos += bytesWritten;
87   }
88 
89   *aReadCount = totalBytesWritten;
90 
91   return NS_OK;
92 }
93 
94 NS_IMETHODIMP
ReadString(uint32_t aCount,nsAString & aString,uint32_t * aReadCount)95 StringUnicharInputStream::ReadString(uint32_t aCount, nsAString& aString,
96                                      uint32_t* aReadCount) {
97   if (mPos >= mLen) {
98     *aReadCount = 0;
99     return NS_OK;
100   }
101   uint32_t amount = mLen - mPos;
102   if (amount > aCount) {
103     amount = aCount;
104   }
105   aString = Substring(mString, mPos, amount);
106   mPos += amount;
107   *aReadCount = amount;
108   return NS_OK;
109 }
110 
Close()111 nsresult StringUnicharInputStream::Close() {
112   mPos = mLen;
113   return NS_OK;
114 }
115 
116 NS_IMPL_ISUPPORTS(StringUnicharInputStream, nsIUnicharInputStream)
117 
118 //----------------------------------------------------------------------
119 
120 class UTF8InputStream final : public nsIUnicharInputStream {
121  public:
122   UTF8InputStream();
123   nsresult Init(nsIInputStream* aStream);
124 
125   NS_DECL_ISUPPORTS
126   NS_DECL_NSIUNICHARINPUTSTREAM
127 
128  private:
129   ~UTF8InputStream();
130 
131  protected:
132   int32_t Fill(nsresult* aErrorCode);
133 
134   static void CountValidUTF8Bytes(const char* aBuf, uint32_t aMaxBytes,
135                                   uint32_t& aValidUTF8bytes,
136                                   uint32_t& aValidUTF16CodeUnits);
137 
138   nsCOMPtr<nsIInputStream> mInput;
139   FallibleTArray<char> mByteData;
140   FallibleTArray<char16_t> mUnicharData;
141 
142   uint32_t mByteDataOffset;
143   uint32_t mUnicharDataOffset;
144   uint32_t mUnicharDataLength;
145 };
146 
UTF8InputStream()147 UTF8InputStream::UTF8InputStream()
148     : mByteDataOffset(0), mUnicharDataOffset(0), mUnicharDataLength(0) {}
149 
Init(nsIInputStream * aStream)150 nsresult UTF8InputStream::Init(nsIInputStream* aStream) {
151   if (!mByteData.SetCapacity(STRING_BUFFER_SIZE, mozilla::fallible) ||
152       !mUnicharData.SetCapacity(STRING_BUFFER_SIZE, mozilla::fallible)) {
153     return NS_ERROR_OUT_OF_MEMORY;
154   }
155   mInput = aStream;
156 
157   return NS_OK;
158 }
159 
NS_IMPL_ISUPPORTS(UTF8InputStream,nsIUnicharInputStream)160 NS_IMPL_ISUPPORTS(UTF8InputStream, nsIUnicharInputStream)
161 
162 UTF8InputStream::~UTF8InputStream() { Close(); }
163 
Close()164 nsresult UTF8InputStream::Close() {
165   mInput = nullptr;
166   mByteData.Clear();
167   mUnicharData.Clear();
168   return NS_OK;
169 }
170 
Read(char16_t * aBuf,uint32_t aCount,uint32_t * aReadCount)171 nsresult UTF8InputStream::Read(char16_t* aBuf, uint32_t aCount,
172                                uint32_t* aReadCount) {
173   NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
174   uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
175   nsresult errorCode;
176   if (0 == readCount) {
177     // Fill the unichar buffer
178     int32_t bytesRead = Fill(&errorCode);
179     if (bytesRead <= 0) {
180       *aReadCount = 0;
181       return errorCode;
182     }
183     readCount = bytesRead;
184   }
185   if (readCount > aCount) {
186     readCount = aCount;
187   }
188   memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset,
189          readCount * sizeof(char16_t));
190   mUnicharDataOffset += readCount;
191   *aReadCount = readCount;
192   return NS_OK;
193 }
194 
195 NS_IMETHODIMP
ReadSegments(nsWriteUnicharSegmentFun aWriter,void * aClosure,uint32_t aCount,uint32_t * aReadCount)196 UTF8InputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter, void* aClosure,
197                               uint32_t aCount, uint32_t* aReadCount) {
198   NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
199   uint32_t bytesToWrite = mUnicharDataLength - mUnicharDataOffset;
200   nsresult rv = NS_OK;
201   if (0 == bytesToWrite) {
202     // Fill the unichar buffer
203     int32_t bytesRead = Fill(&rv);
204     if (bytesRead <= 0) {
205       *aReadCount = 0;
206       return rv;
207     }
208     bytesToWrite = bytesRead;
209   }
210 
211   if (bytesToWrite > aCount) {
212     bytesToWrite = aCount;
213   }
214 
215   uint32_t bytesWritten;
216   uint32_t totalBytesWritten = 0;
217 
218   while (bytesToWrite) {
219     rv = aWriter(this, aClosure, mUnicharData.Elements() + mUnicharDataOffset,
220                  totalBytesWritten, bytesToWrite, &bytesWritten);
221 
222     if (NS_FAILED(rv)) {
223       // don't propagate errors to the caller
224       break;
225     }
226 
227     bytesToWrite -= bytesWritten;
228     totalBytesWritten += bytesWritten;
229     mUnicharDataOffset += bytesWritten;
230   }
231 
232   *aReadCount = totalBytesWritten;
233 
234   return NS_OK;
235 }
236 
237 NS_IMETHODIMP
ReadString(uint32_t aCount,nsAString & aString,uint32_t * aReadCount)238 UTF8InputStream::ReadString(uint32_t aCount, nsAString& aString,
239                             uint32_t* aReadCount) {
240   NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
241   uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
242   nsresult errorCode;
243   if (0 == readCount) {
244     // Fill the unichar buffer
245     int32_t bytesRead = Fill(&errorCode);
246     if (bytesRead <= 0) {
247       *aReadCount = 0;
248       return errorCode;
249     }
250     readCount = bytesRead;
251   }
252   if (readCount > aCount) {
253     readCount = aCount;
254   }
255   const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset;
256   aString.Assign(buf, readCount);
257 
258   mUnicharDataOffset += readCount;
259   *aReadCount = readCount;
260   return NS_OK;
261 }
262 
Fill(nsresult * aErrorCode)263 int32_t UTF8InputStream::Fill(nsresult* aErrorCode) {
264   if (!mInput) {
265     // We already closed the stream!
266     *aErrorCode = NS_BASE_STREAM_CLOSED;
267     return -1;
268   }
269 
270   NS_ASSERTION(mByteData.Length() >= mByteDataOffset, "unsigned madness");
271   uint32_t remainder = mByteData.Length() - mByteDataOffset;
272   mByteDataOffset = remainder;
273   uint32_t nb;
274   *aErrorCode = NS_FillArray(mByteData, mInput, remainder, &nb);
275   if (nb == 0) {
276     // Because we assume a many to one conversion, the lingering data
277     // in the byte buffer must be a partial conversion
278     // fragment. Because we know that we have received no more new
279     // data to add to it, we can't convert it. Therefore, we discard
280     // it.
281     return nb;
282   }
283   NS_ASSERTION(remainder + nb == mByteData.Length(), "bad nb");
284 
285   // Now convert as much of the byte buffer to unicode as possible
286   uint32_t srcLen, dstLen;
287   CountValidUTF8Bytes(mByteData.Elements(), remainder + nb, srcLen, dstLen);
288 
289   // the number of UCS2 characters should always be <= the number of
290   // UTF8 chars
291   NS_ASSERTION(remainder + nb >= srcLen, "cannot be longer than out buffer");
292   NS_ASSERTION(dstLen <= mUnicharData.Capacity(),
293                "Ouch. I would overflow my buffer if I wasn't so careful.");
294   if (dstLen > mUnicharData.Capacity()) {
295     return 0;
296   }
297 
298   ConvertUTF8toUTF16 converter(mUnicharData.Elements());
299 
300   nsACString::const_char_iterator start = mByteData.Elements();
301   nsACString::const_char_iterator end = mByteData.Elements() + srcLen;
302 
303   copy_string(start, end, converter);
304   if (converter.Length() != dstLen) {
305     *aErrorCode = NS_BASE_STREAM_BAD_CONVERSION;
306     return -1;
307   }
308 
309   mUnicharDataOffset = 0;
310   mUnicharDataLength = dstLen;
311   mByteDataOffset = srcLen;
312 
313   return dstLen;
314 }
315 
CountValidUTF8Bytes(const char * aBuffer,uint32_t aMaxBytes,uint32_t & aValidUTF8bytes,uint32_t & aValidUTF16CodeUnits)316 void UTF8InputStream::CountValidUTF8Bytes(const char* aBuffer,
317                                           uint32_t aMaxBytes,
318                                           uint32_t& aValidUTF8bytes,
319                                           uint32_t& aValidUTF16CodeUnits) {
320   const char* c = aBuffer;
321   const char* end = aBuffer + aMaxBytes;
322   const char* lastchar = c;  // pre-initialize in case of 0-length buffer
323   uint32_t utf16length = 0;
324   while (c < end && *c) {
325     lastchar = c;
326     utf16length++;
327 
328     if (UTF8traits::isASCII(*c)) {
329       c++;
330     } else if (UTF8traits::is2byte(*c)) {
331       c += 2;
332     } else if (UTF8traits::is3byte(*c)) {
333       c += 3;
334     } else if (UTF8traits::is4byte(*c)) {
335       c += 4;
336       utf16length++;  // add 1 more because this will be converted to a
337       // surrogate pair.
338     } else if (UTF8traits::is5byte(*c)) {
339       c += 5;
340     } else if (UTF8traits::is6byte(*c)) {
341       c += 6;
342     } else {
343       NS_WARNING(
344           "Unrecognized UTF8 string in UTF8InputStream::CountValidUTF8Bytes()");
345       break;  // Otherwise we go into an infinite loop.  But what happens now?
346     }
347   }
348   if (c > end) {
349     c = lastchar;
350     utf16length--;
351   }
352 
353   aValidUTF8bytes = c - aBuffer;
354   aValidUTF16CodeUnits = utf16length;
355 }
356 
NS_NewUnicharInputStream(nsIInputStream * aStreamToWrap,nsIUnicharInputStream ** aResult)357 nsresult NS_NewUnicharInputStream(nsIInputStream* aStreamToWrap,
358                                   nsIUnicharInputStream** aResult) {
359   *aResult = nullptr;
360 
361   // Create converter input stream
362   RefPtr<UTF8InputStream> it = new UTF8InputStream();
363   nsresult rv = it->Init(aStreamToWrap);
364   if (NS_FAILED(rv)) {
365     return rv;
366   }
367 
368   it.forget(aResult);
369   return NS_OK;
370 }
371