xpcom/io/nsUnicharInputStream.cpp

/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "nsUnicharInputStream.h"
#include "nsIInputStream.h"
#include "nsIServiceManager.h"
#include "nsString.h"
#include "nsTArray.h"
#include "nsAutoPtr.h"
#include "nsCRT.h"
#include "nsStreamUtils.h"
#include "nsUTF8Utils.h"
#include "mozilla/Attributes.h"
#include <fcntl.h>
#if defined(XP_WIN)
#include <io.h>
#else
#include <unistd.h>
#endif

#define STRING_BUFFER_SIZE 8192

class StringUnicharInputStream final : public nsIUnicharInputStream {
 public:
  explicit StringUnicharInputStream(const nsAString& aString)
      : mString(aString), mPos(0), mLen(aString.Length()) {}

  NS_DECL_ISUPPORTS
  NS_DECL_NSIUNICHARINPUTSTREAM

  nsString mString;
  uint32_t mPos;
  uint32_t mLen;

 private:
  ~StringUnicharInputStream() {}
};

NS_IMETHODIMP
StringUnicharInputStream::Read(char16_t* aBuf, uint32_t aCount,
                               uint32_t* aReadCount) {
  if (mPos >= mLen) {
    *aReadCount = 0;
    return NS_OK;
  }
  nsAString::const_iterator iter;
  mString.BeginReading(iter);
  const char16_t* us = iter.get();
  uint32_t amount = mLen - mPos;
  if (amount > aCount) {
    amount = aCount;
  }
  memcpy(aBuf, us + mPos, sizeof(char16_t) * amount);
  mPos += amount;
  *aReadCount = amount;
  return NS_OK;
}

NS_IMETHODIMP
StringUnicharInputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter,
                                       void* aClosure, uint32_t aCount,
                                       uint32_t* aReadCount) {
  uint32_t bytesWritten;
  uint32_t totalBytesWritten = 0;

  nsresult rv;
  aCount = XPCOM_MIN(mString.Length() - mPos, aCount);

  nsAString::const_iterator iter;
  mString.BeginReading(iter);

  while (aCount) {
    rv = aWriter(this, aClosure, iter.get() + mPos, totalBytesWritten, aCount,
                 &bytesWritten);

    if (NS_FAILED(rv)) {
      // don't propagate errors to the caller
      break;
    }

    aCount -= bytesWritten;
    totalBytesWritten += bytesWritten;
    mPos += bytesWritten;
  }

  *aReadCount = totalBytesWritten;

  return NS_OK;
}

NS_IMETHODIMP
StringUnicharInputStream::ReadString(uint32_t aCount, nsAString& aString,
                                     uint32_t* aReadCount) {
  if (mPos >= mLen) {
    *aReadCount = 0;
    return NS_OK;
  }
  uint32_t amount = mLen - mPos;
  if (amount > aCount) {
    amount = aCount;
  }
  aString = Substring(mString, mPos, amount);
  mPos += amount;
  *aReadCount = amount;
  return NS_OK;
}

nsresult StringUnicharInputStream::Close() {
  mPos = mLen;
  return NS_OK;
}

NS_IMPL_ISUPPORTS(StringUnicharInputStream, nsIUnicharInputStream)

//----------------------------------------------------------------------

class UTF8InputStream final : public nsIUnicharInputStream {
 public:
  UTF8InputStream();
  nsresult Init(nsIInputStream* aStream);

  NS_DECL_ISUPPORTS
  NS_DECL_NSIUNICHARINPUTSTREAM

 private:
  ~UTF8InputStream();

 protected:
  int32_t Fill(nsresult* aErrorCode);

  static void CountValidUTF8Bytes(const char* aBuf, uint32_t aMaxBytes,
                                  uint32_t& aValidUTF8bytes,
                                  uint32_t& aValidUTF16CodeUnits);

  nsCOMPtr<nsIInputStream> mInput;
  FallibleTArray<char> mByteData;
  FallibleTArray<char16_t> mUnicharData;

  uint32_t mByteDataOffset;
  uint32_t mUnicharDataOffset;
  uint32_t mUnicharDataLength;
};

UTF8InputStream::UTF8InputStream()
    : mByteDataOffset(0), mUnicharDataOffset(0), mUnicharDataLength(0) {}

nsresult UTF8InputStream::Init(nsIInputStream* aStream) {
  if (!mByteData.SetCapacity(STRING_BUFFER_SIZE, mozilla::fallible) ||
      !mUnicharData.SetCapacity(STRING_BUFFER_SIZE, mozilla::fallible)) {
    return NS_ERROR_OUT_OF_MEMORY;
  }
  mInput = aStream;

  return NS_OK;
}

NS_IMPL_ISUPPORTS(UTF8InputStream, nsIUnicharInputStream)

UTF8InputStream::~UTF8InputStream() { Close(); }

nsresult UTF8InputStream::Close() {
  mInput = nullptr;
  mByteData.Clear();
  mUnicharData.Clear();
  return NS_OK;
}

nsresult UTF8InputStream::Read(char16_t* aBuf, uint32_t aCount,
                               uint32_t* aReadCount) {
  NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
  uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
  nsresult errorCode;
  if (0 == readCount) {
    // Fill the unichar buffer
    int32_t bytesRead = Fill(&errorCode);
    if (bytesRead <= 0) {
      *aReadCount = 0;
      return errorCode;
    }
    readCount = bytesRead;
  }
  if (readCount > aCount) {
    readCount = aCount;
  }
  memcpy(aBuf, mUnicharData.Elements() + mUnicharDataOffset,
         readCount * sizeof(char16_t));
  mUnicharDataOffset += readCount;
  *aReadCount = readCount;
  return NS_OK;
}

NS_IMETHODIMP
UTF8InputStream::ReadSegments(nsWriteUnicharSegmentFun aWriter, void* aClosure,
                              uint32_t aCount, uint32_t* aReadCount) {
  NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
  uint32_t bytesToWrite = mUnicharDataLength - mUnicharDataOffset;
  nsresult rv = NS_OK;
  if (0 == bytesToWrite) {
    // Fill the unichar buffer
    int32_t bytesRead = Fill(&rv);
    if (bytesRead <= 0) {
      *aReadCount = 0;
      return rv;
    }
    bytesToWrite = bytesRead;
  }

  if (bytesToWrite > aCount) {
    bytesToWrite = aCount;
  }

  uint32_t bytesWritten;
  uint32_t totalBytesWritten = 0;

  while (bytesToWrite) {
    rv = aWriter(this, aClosure, mUnicharData.Elements() + mUnicharDataOffset,
                 totalBytesWritten, bytesToWrite, &bytesWritten);

    if (NS_FAILED(rv)) {
      // don't propagate errors to the caller
      break;
    }

    bytesToWrite -= bytesWritten;
    totalBytesWritten += bytesWritten;
    mUnicharDataOffset += bytesWritten;
  }

  *aReadCount = totalBytesWritten;

  return NS_OK;
}

NS_IMETHODIMP
UTF8InputStream::ReadString(uint32_t aCount, nsAString& aString,
                            uint32_t* aReadCount) {
  NS_ASSERTION(mUnicharDataLength >= mUnicharDataOffset, "unsigned madness");
  uint32_t readCount = mUnicharDataLength - mUnicharDataOffset;
  nsresult errorCode;
  if (0 == readCount) {
    // Fill the unichar buffer
    int32_t bytesRead = Fill(&errorCode);
    if (bytesRead <= 0) {
      *aReadCount = 0;
      return errorCode;
    }
    readCount = bytesRead;
  }
  if (readCount > aCount) {
    readCount = aCount;
  }
  const char16_t* buf = mUnicharData.Elements() + mUnicharDataOffset;
  aString.Assign(buf, readCount);

  mUnicharDataOffset += readCount;
  *aReadCount = readCount;
  return NS_OK;
}

int32_t UTF8InputStream::Fill(nsresult* aErrorCode) {
  if (!mInput) {
    // We already closed the stream!
    *aErrorCode = NS_BASE_STREAM_CLOSED;
    return -1;
  }

  NS_ASSERTION(mByteData.Length() >= mByteDataOffset, "unsigned madness");
  uint32_t remainder = mByteData.Length() - mByteDataOffset;
  mByteDataOffset = remainder;
  uint32_t nb;
  *aErrorCode = NS_FillArray(mByteData, mInput, remainder, &nb);
  if (nb == 0) {
    // Because we assume a many to one conversion, the lingering data
    // in the byte buffer must be a partial conversion
    // fragment. Because we know that we have received no more new
    // data to add to it, we can't convert it. Therefore, we discard
    // it.
    return nb;
  }
  NS_ASSERTION(remainder + nb == mByteData.Length(), "bad nb");

  // Now convert as much of the byte buffer to unicode as possible
  uint32_t srcLen, dstLen;
  CountValidUTF8Bytes(mByteData.Elements(), remainder + nb, srcLen, dstLen);

  // the number of UCS2 characters should always be <= the number of
  // UTF8 chars
  NS_ASSERTION(remainder + nb >= srcLen, "cannot be longer than out buffer");
  NS_ASSERTION(dstLen <= mUnicharData.Capacity(),
               "Ouch. I would overflow my buffer if I wasn't so careful.");
  if (dstLen > mUnicharData.Capacity()) {
    return 0;
  }

  ConvertUTF8toUTF16 converter(mUnicharData.Elements());

  nsACString::const_char_iterator start = mByteData.Elements();
  nsACString::const_char_iterator end = mByteData.Elements() + srcLen;

  copy_string(start, end, converter);
  if (converter.Length() != dstLen) {
    *aErrorCode = NS_BASE_STREAM_BAD_CONVERSION;
    return -1;
  }

  mUnicharDataOffset = 0;
  mUnicharDataLength = dstLen;
  mByteDataOffset = srcLen;

  return dstLen;
}

void UTF8InputStream::CountValidUTF8Bytes(const char* aBuffer,
                                          uint32_t aMaxBytes,
                                          uint32_t& aValidUTF8bytes,
                                          uint32_t& aValidUTF16CodeUnits) {
  const char* c = aBuffer;
  const char* end = aBuffer + aMaxBytes;
  const char* lastchar = c;  // pre-initialize in case of 0-length buffer
  uint32_t utf16length = 0;
  while (c < end && *c) {
    lastchar = c;
    utf16length++;

    if (UTF8traits::isASCII(*c)) {
      c++;
    } else if (UTF8traits::is2byte(*c)) {
      c += 2;
    } else if (UTF8traits::is3byte(*c)) {
      c += 3;
    } else if (UTF8traits::is4byte(*c)) {
      c += 4;
      utf16length++;  // add 1 more because this will be converted to a
      // surrogate pair.
    } else if (UTF8traits::is5byte(*c)) {
      c += 5;
    } else if (UTF8traits::is6byte(*c)) {
      c += 6;
    } else {
      NS_WARNING(
          "Unrecognized UTF8 string in UTF8InputStream::CountValidUTF8Bytes()");
      break;  // Otherwise we go into an infinite loop.  But what happens now?
    }
  }
  if (c > end) {
    c = lastchar;
    utf16length--;
  }

  aValidUTF8bytes = c - aBuffer;
  aValidUTF16CodeUnits = utf16length;
}

nsresult NS_NewUnicharInputStream(nsIInputStream* aStreamToWrap,
                                  nsIUnicharInputStream** aResult) {
  *aResult = nullptr;

  // Create converter input stream
  RefPtr<UTF8InputStream> it = new UTF8InputStream();
  nsresult rv = it->Init(aStreamToWrap);
  if (NS_FAILED(rv)) {
    return rv;
  }

  it.forget(aResult);
  return NS_OK;
}