
// Copyright 2021 The Cobalt Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


#include "cobalt/encoding/text_decoder.h"

#include <string>
#include <utility>
#include <vector>

#include "cobalt/base/polymorphic_downcast.h"
#include "cobalt/dom/testing/stub_window.h"
#include "cobalt/script/array_buffer_view.h"
#include "cobalt/script/environment_settings.h"
#include "cobalt/script/testing/mock_exception_state.h"
#include "cobalt/script/typed_arrays.h"

#include "testing/gtest/include/gtest/gtest.h"

using ::testing::_;

namespace cobalt {
namespace encoding {
namespace {

//////////////////////////////////////////////////////////////////////////
// TextDecoderTest
//////////////////////////////////////////////////////////////////////////

class TextDecoderTest : public ::testing::Test {
 protected:
  TextDecoderTest();
  ~TextDecoderTest();

  cobalt::dom::testing::StubWindow stub_window_;
  script::testing::MockExceptionState exception_state_;
};

TextDecoderTest::TextDecoderTest() {}
TextDecoderTest::~TextDecoderTest() {}

//////////////////////////////////////////////////////////////////////////
// Test cases
//////////////////////////////////////////////////////////////////////////

TEST_F(TextDecoderTest, Constructors) {
  scoped_refptr<TextDecoder> text_decoder;

  EXPECT_CALL(exception_state_, SetSimpleExceptionVA(script::kRangeError, _, _))
      .Times(0);
  text_decoder = new TextDecoder(&exception_state_);
  EXPECT_EQ("utf-8", text_decoder->encoding());
  text_decoder.reset();

  EXPECT_CALL(exception_state_, SetSimpleExceptionVA(script::kRangeError, _, _))
      .Times(0);
  text_decoder = new TextDecoder("utf-16", &exception_state_);
  // It seems default is little endian.
  EXPECT_EQ("utf-16le", text_decoder->encoding());
  text_decoder.reset();

  EXPECT_CALL(exception_state_, SetSimpleExceptionVA(script::kRangeError, _, _))
      .Times(0);
  text_decoder = new TextDecoder("utf-16be", &exception_state_);
  EXPECT_EQ("utf-16be", text_decoder->encoding());
  text_decoder.reset();

  EXPECT_CALL(exception_state_,
              SetSimpleExceptionVA(script::kRangeError, _, _));
  text_decoder = new TextDecoder("foo-encoding", &exception_state_);
  EXPECT_EQ("", text_decoder->encoding());
  text_decoder.reset();
}

TEST_F(TextDecoderTest, DecodeUTF8) {
  EXPECT_CALL(exception_state_, SetSimpleExceptionVA(script::kRangeError, _, _))
      .Times(0);
  scoped_refptr<TextDecoder> text_decoder_ = new TextDecoder(&exception_state_);
  std::vector<std::pair<std::vector<uint8>, std::string>> tests = {
      {{}, ""},
      {{72, 101, 108, 108, 111, 32, 119, 111, 114, 108, 100, 33},
       "Hello world!"},
      {{72,  101, 106, 33, 32, 208, 159, 209, 128, 208, 184, 208, 178, 208,
        181, 209, 130, 33, 32, 228, 189, 160, 229, 165, 189, 239, 188, 129},
       "Hej! Привет! 你好！"},
      {{208, 148, 208, 176, 041}, "Да!"},
  };

  std::string want;
  std::vector<uint8> data;
  for (const auto &test : tests) {
    std::tie(data, want) = test;
    script::Handle<script::ArrayBuffer> array_buffer = script::ArrayBuffer::New(
        stub_window_.global_environment(), data.data(), data.size());
    EXPECT_CALL(exception_state_,
                SetSimpleExceptionVA(script::kRangeError, _, _))
        .Times(0);
    std::string got = text_decoder_->Decode(dom::BufferSource(array_buffer),
                                            &exception_state_);
    EXPECT_EQ(got, want);
  }
}

TEST_F(TextDecoderTest, DecodeUTF8Surrogates) {
  EXPECT_CALL(exception_state_, SetSimpleExceptionVA(script::kRangeError, _, _))
      .Times(0);
  scoped_refptr<TextDecoder> text_decoder_ = new TextDecoder(&exception_state_);
  std::vector<std::pair<std::vector<uint8>, std::string>> tests = {
      {{0x61, 0x62, 0x63, 0x31, 0x32, 0x33}, "abc123"},  // Sanity check
      {{0xef, 0xbf, 0xbd}, "\xEF\xBF\xBD"},              // Surrogate half (low)
      {{0x61, 0x62, 0x63, 0xef, 0xbf, 0xbd, 0x31, 0x32, 0x33},
       "abc\xEF\xBF\xBD"
       "123"},  // Surrogate half (high)
      {{0xef, 0xbf, 0xbd, 0xef, 0xbf, 0xbd},
       "\xEF\xBF\xBD\xEF\xBF\xBD"},     // Wrong order
      {{0xf0, 0x90, 0x80, 0x80}, "𐀀"},  // Right order
  };

  std::string want;
  std::vector<uint8> data;
  for (const auto &test : tests) {
    std::tie(data, want) = test;
    script::Handle<script::ArrayBuffer> array_buffer = script::ArrayBuffer::New(
        stub_window_.global_environment(), data.data(), data.size());
    EXPECT_CALL(exception_state_,
                SetSimpleExceptionVA(script::kRangeError, _, _))
        .Times(0);
    std::string got = text_decoder_->Decode(dom::BufferSource(array_buffer),
                                            &exception_state_);
    EXPECT_EQ(got, want);
  }
}

TEST_F(TextDecoderTest, DecodeUTF8Fatal) {
  std::vector<std::pair<std::string, std::vector<uint8>>> tests = {
      {"utf-8", {0xFF}},                                // Invalid code
      {"utf-8", {0xC0}},                                // Ends early
      {"utf-8", {0xE0}},                                // Ends early (2)
      {"utf-8", {0xC0, 0x00}},                          // Invalid trail
      {"utf-8", {0xC0, 0xC0}},                          // Invalid trail (2)
      {"utf-8", {0xE0, 0x00}},                          // Invalid trail (3)
      {"utf-8", {0xE0, 0xC0}},                          // Invalid trail (4)
      {"utf-8", {0xE0, 0x80, 0x00}},                    // Invalid trail (5)
      {"utf-8", {0xE0, 0x80, 0xC0}},                    // Invalid trail (6)
      {"utf-8", {0xFC, 0x80, 0x80, 0x80, 0x80, 0x80}},  // > 0x10FFFF
      {"utf-8", {0xFE, 0x80, 0x80, 0x80, 0x80, 0x80}},  // Obsolete lead byte
      {"utf-8", {0xC0, 0x80}},                    // Overlong U+0000 - 2 bytes
      {"utf-8", {0xE0, 0x80, 0x80}},              // overlong U+0000 - 3 bytes
      {"utf-8", {0xF0, 0x80, 0x80, 0x80}},        // Overlong U+0000 - 4 bytes
      {"utf-8", {0xF8, 0x80, 0x80, 0x80, 0x80}},  // Overlong U+0000 - 5 bytes
      {"utf-8",
       {0xFC, 0x80, 0x80, 0x80, 0x80, 0x80}},     // Overlong U+0000 - 6 bytes
      {"utf-8", {0xC1, 0xBF}},                    // Overlong U+007F - 2 bytes
      {"utf-8", {0xE0, 0x81, 0xBF}},              // Overlong U+007F - 3 bytes
      {"utf-8", {0xF0, 0x80, 0x81, 0xBF}},        // Overlong U+007F - 4 bytes
      {"utf-8", {0xF8, 0x80, 0x80, 0x81, 0xBF}},  // Overlong U+007F - 5 bytes
      {"utf-8",
       {0xFC, 0x80, 0x80, 0x80, 0x81, 0xBF}},     // Overlong U+007F - 6 bytes
      {"utf-8", {0xE0, 0x9F, 0xBF}},              // Overlong U+07FF - 3 bytes
      {"utf-8", {0xF0, 0x80, 0x9F, 0xBF}},        // Overlong U+07FF - 4 bytes
      {"utf-8", {0xF8, 0x80, 0x80, 0x9F, 0xBF}},  // Overlong U+07FF - 5 bytes
      {"utf-8",
       {0xFC, 0x80, 0x80, 0x80, 0x9F, 0xBF}},     // Overlong U+07FF - 6 bytes
      {"utf-8", {0xF0, 0x8F, 0xBF, 0xBF}},        // Overlong U+FFFF - 4 bytes
      {"utf-8", {0xF8, 0x80, 0x8F, 0xBF, 0xBF}},  // Overlong U+FFFF - 5 bytes
      {"utf-8",
       {0xFC, 0x80, 0x80, 0x8F, 0xBF, 0xBF}},     // Overlong U+FFFF - 6 bytes
      {"utf-8", {0xF8, 0x84, 0x8F, 0xBF, 0xBF}},  // Overlong U+10FFFF - 5 bytes
      {"utf-8",
       {0xFC, 0x80, 0x84, 0x8F, 0xBF, 0xBF}},  // Overlong U+10FFFF - 6 bytes
      {"utf-8", {0xED, 0xA0, 0x80}},           // Lead surrogate
      {"utf-8", {0xED, 0xB0, 0x80}},           // Trail surrogate
      {"utf-8", {0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80}},  // Surrogate pair
      {"utf-16le", {0x00}},                             // Truncated code unit
  };

  std::string label, want = "";
  std::vector<uint8> input;
  TextDecoderOptions options;
  options.set_fatal(true);
  for (const auto &test : tests) {
    std::tie(label, input) = test;
    // No errors expected while constructing the object.
    EXPECT_CALL(exception_state_,
                SetSimpleExceptionVA(script::kRangeError, _, _))
        .Times(0);
    scoped_refptr<TextDecoder> text_decoder_ =
        new TextDecoder(label, options, &exception_state_);
    script::Handle<script::ArrayBuffer> array_buffer = script::ArrayBuffer::New(
        stub_window_.global_environment(), input.data(), input.size());
    // Malformed strings must race an exception when fatal is set to true.
    EXPECT_CALL(exception_state_,
                SetSimpleExceptionVA(script::kTypeError, _, _))
        .Times(1);
    std::string got = text_decoder_->Decode(dom::BufferSource(array_buffer),
                                            &exception_state_);
    EXPECT_EQ(got, want);
  }
}

TEST_F(TextDecoderTest, DecodeIgnoreBOM) {
  std::vector<std::pair<std::string, std::vector<uint8>>> tests = {
      {"utf-8", {0xEF, 0xBB, 0xBF, 0x61, 0x62, 0x63}},
      {"utf-16le", {0xFF, 0xFE, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00}},
      {"utf-16be", {0xFE, 0xFF, 0x00, 0x61, 0x00, 0x62, 0x00, 0x63}},
  };

  // u8 prefix needed for compilation on Windows.
  const std::string kBOM = u8"\uFEFF";
  const std::string kABC = "abc";
  std::string label;
  std::vector<uint8> data;
  TextDecoderOptions options;

  for (const auto &test : tests) {
    std::tie(label, data) = test;
    {  // BOM should be present in decoded string if ignored.
      options.set_ignore_bom(true);
      EXPECT_CALL(exception_state_,
                  SetSimpleExceptionVA(script::kRangeError, _, _))
          .Times(0);
      scoped_refptr<TextDecoder> text_decoder_ =
          new TextDecoder(label, options, &exception_state_);

      script::Handle<script::ArrayBuffer> array_buffer =
          script::ArrayBuffer::New(stub_window_.global_environment(),
                                   data.data(), data.size());
      EXPECT_CALL(exception_state_,
                  SetSimpleExceptionVA(script::kRangeError, _, _))
          .Times(0);
      std::string got = text_decoder_->Decode(dom::BufferSource(array_buffer),
                                              &exception_state_);
      std::string want = kBOM + kABC;
      EXPECT_EQ(got, want);
    }
    {  // BOM should be absent from decoded string if not ignored.
      options.set_ignore_bom(false);
      EXPECT_CALL(exception_state_,
                  SetSimpleExceptionVA(script::kRangeError, _, _))
          .Times(0);
      scoped_refptr<TextDecoder> text_decoder_ =
          new TextDecoder(label, options, &exception_state_);

      script::Handle<script::ArrayBuffer> array_buffer =
          script::ArrayBuffer::New(stub_window_.global_environment(),
                                   data.data(), data.size());
      EXPECT_CALL(exception_state_,
                  SetSimpleExceptionVA(script::kRangeError, _, _))
          .Times(0);
      std::string got = text_decoder_->Decode(dom::BufferSource(array_buffer),
                                              &exception_state_);
      std::string want = kABC;
      EXPECT_EQ(got, want);
    }
    {  // BOM should be absent from decoded string by default.
      EXPECT_CALL(exception_state_,
                  SetSimpleExceptionVA(script::kRangeError, _, _))
          .Times(0);
      scoped_refptr<TextDecoder> text_decoder_ =
          new TextDecoder(label, &exception_state_);

      script::Handle<script::ArrayBuffer> array_buffer =
          script::ArrayBuffer::New(stub_window_.global_environment(),
                                   data.data(), data.size());
      EXPECT_CALL(exception_state_,
                  SetSimpleExceptionVA(script::kRangeError, _, _))
          .Times(0);
      std::string got = text_decoder_->Decode(dom::BufferSource(array_buffer),
                                              &exception_state_);
      std::string want = kABC;
      EXPECT_EQ(got, want);
    }
  }
}

TEST_F(TextDecoderTest, DecodeUTFStreamSimple) {
  std::string want = "abc123Да!𐀀";
  std::vector<std::pair<std::string, std::vector<uint8>>> tests = {
      {
          "utf-8",
          {0x61, 0x62, 0x63, 0x31, 0x32, 0x33, 0xd0, 0x94, 0xd0, 0xb0, 0x21,
           0xf0, 0x90, 0x80, 0x80},
      },
      {
          "utf-16be",
          {0x00, 0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x31, 0x00, 0x32, 0x00,
           0x33, 0x04, 0x14, 0x04, 0x30, 0x00, 0x21, 0xd8, 0x00, 0xdc, 0x00},
      },
      {
          "utf-16le",
          {0x61, 0x00, 0x62, 0x00, 0x63, 0x00, 0x31, 0x00, 0x32, 0x00, 0x33,
           0x00, 0x14, 0x04, 0x30, 0x04, 0x21, 0x00, 0x00, 0xd8, 0x00, 0xdc},
      },
  };
  std::string encoding_label;
  std::vector<uint8> data;
  TextDecodeOptions stream_option;
  stream_option.set_stream(true);
  for (int chunk_size = 1; chunk_size <= 1; ++chunk_size) {
    for (const auto &test : tests) {
      std::tie(encoding_label, data) = test;
      EXPECT_CALL(exception_state_,
                  SetSimpleExceptionVA(script::kRangeError, _, _))
          .Times(0);
      scoped_refptr<TextDecoder> text_decoder_ =
          new TextDecoder(encoding_label, &exception_state_);
      std::string got;
      for (std::size_t offset = 0; offset < data.size(); offset += chunk_size) {
        const auto start = data.begin() + offset;
        const auto end = (offset + chunk_size >= data.size())
                             ? data.end()
                             : data.begin() + (offset + chunk_size);
        std::vector<uint8> byte_chunk(start, end);
        script::Handle<script::ArrayBuffer> chunk =
            script::ArrayBuffer::New(stub_window_.global_environment(),
                                     byte_chunk.data(), byte_chunk.size());
        EXPECT_CALL(exception_state_,
                    SetSimpleExceptionVA(script::kRangeError, _, _))
            .Times(0);
        // decoding with {stream: true}
        got += text_decoder_->Decode(dom::BufferSource(chunk), stream_option,
                                     &exception_state_);
      }
      EXPECT_CALL(exception_state_,
                  SetSimpleExceptionVA(script::kRangeError, _, _))
          .Times(0);
      got += text_decoder_->Decode(&exception_state_);
      EXPECT_EQ(got, want);
    }
  }
}

TEST_F(TextDecoderTest, DecodeUTF16) {
  EXPECT_CALL(exception_state_, SetSimpleExceptionVA(script::kRangeError, _, _))
      .Times(0);
  scoped_refptr<TextDecoder> text_decoder_ =
      new TextDecoder("utf-16", &exception_state_);
  std::vector<std::pair<std::vector<uint8>, std::string>> tests = {
      {{0x14, 0x04, 0x30, 0x04, 0x21, 0x00}, "Да!"},
  };

  std::string want;
  std::vector<uint8> data;
  for (const auto &test : tests) {
    std::tie(data, want) = test;
    script::Handle<script::ArrayBuffer> array_buffer = script::ArrayBuffer::New(
        stub_window_.global_environment(), data.data(), data.size());
    EXPECT_CALL(exception_state_,
                SetSimpleExceptionVA(script::kRangeError, _, _))
        .Times(0);
    std::string got = text_decoder_->Decode(dom::BufferSource(array_buffer),
                                            &exception_state_);
    EXPECT_EQ(got, want);
  }
}

TEST_F(TextDecoderTest, DecodeUTF16BE) {
  EXPECT_CALL(exception_state_, SetSimpleExceptionVA(script::kRangeError, _, _))
      .Times(0);
  scoped_refptr<TextDecoder> text_decoder_ =
      new TextDecoder("utf-16be", &exception_state_);
  std::vector<std::pair<std::vector<uint8>, std::string>> tests = {
      {{0x04, 0x14, 0x04, 0x30, 0x00, 0x21}, "Да!"},
  };

  std::string want;
  std::vector<uint8> data;
  for (const auto &test : tests) {
    std::tie(data, want) = test;
    script::Handle<script::ArrayBuffer> array_buffer = script::ArrayBuffer::New(
        stub_window_.global_environment(), data.data(), data.size());
    EXPECT_CALL(exception_state_,
                SetSimpleExceptionVA(script::kRangeError, _, _))
        .Times(0);
    std::string got = text_decoder_->Decode(dom::BufferSource(array_buffer),
                                            &exception_state_);
    EXPECT_EQ(got, want);
  }
}

}  // namespace
}  // namespace encoding
}  // namespace cobalt
