src/v8/src/unicode-decoder.h - cobalt - Git at Google

 // Copyright 2014 the V8 project authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #ifndef V8_UNICODE_DECODER_H_
 #define V8_UNICODE_DECODER_H_

 #include <sys/types.h>
 #include "src/globals.h"
 #include "src/utils.h"

 namespace unibrow {

 class V8_EXPORT_PRIVATE Utf8DecoderBase {
  public:
   // Initialization done in subclass.
   inline Utf8DecoderBase();
   inline Utf8DecoderBase(uint16_t* buffer, size_t buffer_length,
                          const uint8_t* stream, size_t stream_length);
   inline size_t Utf16Length() const { return utf16_length_; }

  protected:
   // This reads all characters and sets the utf16_length_.
   // The first buffer_length utf16 chars are cached in the buffer.
   void Reset(uint16_t* buffer, size_t buffer_length, const uint8_t* stream,
              size_t stream_length);
   static void WriteUtf16Slow(const uint8_t* stream, size_t stream_length,
                              uint16_t* data, size_t length);
   const uint8_t* unbuffered_start_;
   size_t unbuffered_length_;
   size_t utf16_length_;
   bool last_byte_of_buffer_unused_;

  private:
   DISALLOW_COPY_AND_ASSIGN(Utf8DecoderBase);
 };

 template <size_t kBufferSize>
 class Utf8Decoder : public Utf8DecoderBase {
  public:
   inline Utf8Decoder() {}
   inline Utf8Decoder(const char* stream, size_t length);
   inline void Reset(const char* stream, size_t length);
   inline size_t WriteUtf16(uint16_t* data, size_t length) const;

  private:
   uint16_t buffer_[kBufferSize];
 };

 Utf8DecoderBase::Utf8DecoderBase()
     : unbuffered_start_(nullptr),
       unbuffered_length_(0),
       utf16_length_(0),
       last_byte_of_buffer_unused_(false) {}

 Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, size_t buffer_length,
                                  const uint8_t* stream, size_t stream_length) {
   Reset(buffer, buffer_length, stream, stream_length);
 }


 template <size_t kBufferSize>
 Utf8Decoder<kBufferSize>::Utf8Decoder(const char* stream, size_t length)
     : Utf8DecoderBase(buffer_, kBufferSize,
                       reinterpret_cast<const uint8_t*>(stream), length) {}


 template <size_t kBufferSize>
 void Utf8Decoder<kBufferSize>::Reset(const char* stream, size_t length) {
   Utf8DecoderBase::Reset(buffer_, kBufferSize,
                          reinterpret_cast<const uint8_t*>(stream), length);
 }


 template <size_t kBufferSize>
 size_t Utf8Decoder<kBufferSize>::WriteUtf16(uint16_t* data,
                                             size_t length) const {
   DCHECK_GT(length, 0);
   if (length > utf16_length_) length = utf16_length_;
   // memcpy everything in buffer.
   size_t buffer_length =
       last_byte_of_buffer_unused_ ? kBufferSize - 1 : kBufferSize;
   size_t memcpy_length = length <= buffer_length ? length : buffer_length;
   v8::internal::MemCopy(data, buffer_, memcpy_length * sizeof(uint16_t));
   if (length <= buffer_length) return length;
   DCHECK_NOT_NULL(unbuffered_start_);
   // Copy the rest the slow way.
   WriteUtf16Slow(unbuffered_start_, unbuffered_length_, data + buffer_length,
                  length - buffer_length);
   return length;
 }

 class Latin1 {
  public:
   static const unsigned kMaxChar = 0xff;
   // Returns 0 if character does not convert to single latin-1 character
   // or if the character doesn't not convert back to latin-1 via inverse
   // operation (upper to lower, etc).
   static inline uint16_t ConvertNonLatin1ToLatin1(uint16_t);
 };


 uint16_t Latin1::ConvertNonLatin1ToLatin1(uint16_t c) {
   DCHECK_GT(c, Latin1::kMaxChar);
   switch (c) {
     // This are equivalent characters in unicode.
     case 0x39c:
     case 0x3bc:
       return 0xb5;
     // This is an uppercase of a Latin-1 character
     // outside of Latin-1.
     case 0x178:
       return 0xff;
   }
   return 0;
 }


 }  // namespace unibrow

 #endif  // V8_UNICODE_DECODER_H_
	// Copyright 2014 the V8 project authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#ifndef V8_UNICODE_DECODER_H_
	#define V8_UNICODE_DECODER_H_

	#include <sys/types.h>
	#include "src/globals.h"
	#include "src/utils.h"

	namespace unibrow {

	class V8_EXPORT_PRIVATE Utf8DecoderBase {
	public:
	// Initialization done in subclass.
	inline Utf8DecoderBase();
	inline Utf8DecoderBase(uint16_t* buffer, size_t buffer_length,
	const uint8_t* stream, size_t stream_length);
	inline size_t Utf16Length() const { return utf16_length_; }

	protected:
	// This reads all characters and sets the utf16_length_.
	// The first buffer_length utf16 chars are cached in the buffer.
	void Reset(uint16_t* buffer, size_t buffer_length, const uint8_t* stream,
	size_t stream_length);
	static void WriteUtf16Slow(const uint8_t* stream, size_t stream_length,
	uint16_t* data, size_t length);
	const uint8_t* unbuffered_start_;
	size_t unbuffered_length_;
	size_t utf16_length_;
	bool last_byte_of_buffer_unused_;

	private:
	DISALLOW_COPY_AND_ASSIGN(Utf8DecoderBase);
	};

	template <size_t kBufferSize>
	class Utf8Decoder : public Utf8DecoderBase {
	public:
	inline Utf8Decoder() {}
	inline Utf8Decoder(const char* stream, size_t length);
	inline void Reset(const char* stream, size_t length);
	inline size_t WriteUtf16(uint16_t* data, size_t length) const;

	private:
	uint16_t buffer_[kBufferSize];
	};

	Utf8DecoderBase::Utf8DecoderBase()
	: unbuffered_start_(nullptr),
	unbuffered_length_(0),
	utf16_length_(0),
	last_byte_of_buffer_unused_(false) {}

	Utf8DecoderBase::Utf8DecoderBase(uint16_t* buffer, size_t buffer_length,
	const uint8_t* stream, size_t stream_length) {
	Reset(buffer, buffer_length, stream, stream_length);
	}


	template <size_t kBufferSize>
	Utf8Decoder<kBufferSize>::Utf8Decoder(const char* stream, size_t length)
	: Utf8DecoderBase(buffer_, kBufferSize,
	reinterpret_cast<const uint8_t*>(stream), length) {}


	template <size_t kBufferSize>
	void Utf8Decoder<kBufferSize>::Reset(const char* stream, size_t length) {
	Utf8DecoderBase::Reset(buffer_, kBufferSize,
	reinterpret_cast<const uint8_t*>(stream), length);
	}


	template <size_t kBufferSize>
	size_t Utf8Decoder<kBufferSize>::WriteUtf16(uint16_t* data,
	size_t length) const {
	DCHECK_GT(length, 0);
	if (length > utf16_length_) length = utf16_length_;
	// memcpy everything in buffer.
	size_t buffer_length =
	last_byte_of_buffer_unused_ ? kBufferSize - 1 : kBufferSize;
	size_t memcpy_length = length <= buffer_length ? length : buffer_length;
	v8::internal::MemCopy(data, buffer_, memcpy_length * sizeof(uint16_t));
	if (length <= buffer_length) return length;
	DCHECK_NOT_NULL(unbuffered_start_);
	// Copy the rest the slow way.
	WriteUtf16Slow(unbuffered_start_, unbuffered_length_, data + buffer_length,
	length - buffer_length);
	return length;
	}

	class Latin1 {
	public:
	static const unsigned kMaxChar = 0xff;
	// Returns 0 if character does not convert to single latin-1 character
	// or if the character doesn't not convert back to latin-1 via inverse
	// operation (upper to lower, etc).
	static inline uint16_t ConvertNonLatin1ToLatin1(uint16_t);
	};


	uint16_t Latin1::ConvertNonLatin1ToLatin1(uint16_t c) {
	DCHECK_GT(c, Latin1::kMaxChar);
	switch (c) {
	// This are equivalent characters in unicode.
	case 0x39c:
	case 0x3bc:
	return 0xb5;
	// This is an uppercase of a Latin-1 character
	// outside of Latin-1.
	case 0x178:
	return 0xff;
	}
	return 0;
	}


	} // namespace unibrow

	#endif // V8_UNICODE_DECODER_H_