// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "net/third_party/http2/hpack/varint/hpack_varint_decoder.h"

// Benchmarks of decoding HPACK variable length integers.

// clang-format off
/*
Results from 2016-04-13 Perflab runs on arch=ixion_haswell, averaged over 10
trials. Times are in picoseconds, which indicates how small a component of the
overall decoding time is taken up by varints, even though each HPACK entry has
between 1 and 3 of them.

In the table, RSD means Relative Standard Deviation, i.e. the standard deviation
of the trial values as a percentage of the mean. A large RSD indicates that the
benchmark isn't very stable.

SB# is the number of bytes in the encoding of the varint, where SBX means a
variable number of bytes was used based on a population model.

PL# is the number of bits of the first byte that make up the prefix of the
varint. PLX won't be available until the HpackEntryTypeDecoder benchmarks.

Inline(Both|None)(Extended)? indicate whether the Start and Resume calls
were (Both) or were not (None) inlined, and Extended indicates whether the
caller skipped calling the decoder if the varint was encoded in only one byte
(as we can expect is common for strings).

The rows are sorted by PL#, SB#, and finally cpu ps.

My conclusion is that InlineBoth is the best choice for how to call the
HpackVarintDecoder (i.e. leave the Start and Resume methods in the headers,
and don't special case 1 byte varints in the caller).

Benchmark                                      wall ps (RSD%)   cpu ps (RSD%)  Throughput (RSD%)  Trials
---------------------------------------------  --------------   -------------  -----------------  ------
SB1_PL4_InlineBoth_HpackVarintDecoder           1936.5 ( 0.4)   1932.3 ( 0.5)   471 MiB/s ( 0.5)      10
SB1_PL4_InlineNoneExtended_HpackVarintDecoder   2009.4 ( 0.4)   2006.1 ( 0.6)   453 MiB/s ( 0.6)      10
SB1_PL4_InlineBothExtended_HpackVarintDecoder   2010.6 ( 0.7)   2008.1 ( 0.8)   453 MiB/s ( 0.8)      10
SB1_PL4_InlineNone_HpackVarintDecoder           2258.3 ( 0.6)   2257.8 ( 0.6)   403 MiB/s ( 0.6)      10

SB2_PL4_InlineBothExtended_HpackVarintDecoder   3472.9 ( 0.5)   3468.8 ( 0.4)   524 MiB/s ( 0.4)      10
SB2_PL4_InlineBoth_HpackVarintDecoder           3933.3 ( 0.5)   3926.2 ( 0.5)   463 MiB/s ( 0.5)      10
SB2_PL4_InlineNoneExtended_HpackVarintDecoder   4338.7 ( 0.8)   4328.9 ( 0.6)   420 MiB/s ( 0.6)      10
SB2_PL4_InlineNone_HpackVarintDecoder           4416.7 ( 0.5)   4411.7 ( 0.6)   412 MiB/s ( 0.6)      10

SB3_PL4_InlineBothExtended_HpackVarintDecoder   5369.1 ( 0.6)   5347.5 ( 0.7)   510 MiB/s ( 0.7)      10
SB3_PL4_InlineNoneExtended_HpackVarintDecoder   5749.8 ( 0.6)   5744.9 ( 0.8)   475 MiB/s ( 0.8)      10
SB3_PL4_InlineBoth_HpackVarintDecoder           5775.7 ( 0.9)   5760.3 ( 1.0)   474 MiB/s ( 1.0)      10
SB3_PL4_InlineNone_HpackVarintDecoder           6003.6 ( 0.5)   5991.0 ( 0.5)   455 MiB/s ( 0.5)      10

SB4_PL4_InlineBothExtended_HpackVarintDecoder   7237.1 ( 0.2)   7229.5 ( 0.2)   503 MiB/s ( 0.2)      10
SB4_PL4_InlineNoneExtended_HpackVarintDecoder   7374.9 ( 0.5)   7353.3 ( 0.5)   495 MiB/s ( 0.5)      10
SB4_PL4_InlineBoth_HpackVarintDecoder           7471.2 ( 0.3)   7462.8 ( 0.6)   487 MiB/s ( 0.6)      10
SB4_PL4_InlineNone_HpackVarintDecoder           7710.5 ( 1.2)   7696.3 ( 1.1)   473 MiB/s ( 1.2)      10

SBX_PL4_InlineBothExtended_HpackVarintDecoder   4586.1 ( 0.2)   4570.9 ( 0.2)   364 MiB/s ( 0.2)      10
SBX_PL4_InlineBoth_HpackVarintDecoder           4769.8 ( 0.2)   4765.1 ( 0.3)   349 MiB/s ( 0.3)      10
SBX_PL4_InlineNoneExtended_HpackVarintDecoder   4961.6 ( 0.3)   4955.3 ( 0.2)   336 MiB/s ( 0.2)      10
SBX_PL4_InlineNone_HpackVarintDecoder           5444.5 ( 0.2)   5438.7 ( 0.4)   306 MiB/s ( 0.4)      10

SB1_PL5_InlineBoth_HpackVarintDecoder           1942.0 ( 0.3)   1938.2 ( 0.3)   469 MiB/s ( 0.4)      10
SB1_PL5_InlineBothExtended_HpackVarintDecoder   2006.6 ( 0.4)   2005.5 ( 0.5)   453 MiB/s ( 0.5)      10
SB1_PL5_InlineNoneExtended_HpackVarintDecoder   2010.2 ( 0.3)   2006.5 ( 0.4)   453 MiB/s ( 0.4)      10
SB1_PL5_InlineNone_HpackVarintDecoder           2266.1 ( 0.6)   2264.5 ( 0.6)   401 MiB/s ( 0.6)      10

SB2_PL5_InlineBothExtended_HpackVarintDecoder   3470.4 ( 0.2)   3463.4 ( 0.3)   525 MiB/s ( 0.3)      10
SB2_PL5_InlineBoth_HpackVarintDecoder           3929.0 ( 0.3)   3920.1 ( 0.4)   464 MiB/s ( 0.4)      10
SB2_PL5_InlineNoneExtended_HpackVarintDecoder   4002.2 ( 0.6)   3996.7 ( 0.7)   455 MiB/s ( 0.7)      10
SB2_PL5_InlineNone_HpackVarintDecoder           4414.5 ( 0.3)   4412.0 ( 0.4)   412 MiB/s ( 0.4)      10

SB3_PL5_InlineBothExtended_HpackVarintDecoder   5368.7 ( 0.4)   5364.0 ( 0.3)   509 MiB/s ( 0.3)      10
SB3_PL5_InlineBoth_HpackVarintDecoder           5754.0 ( 0.4)   5749.6 ( 0.4)   475 MiB/s ( 0.4)      10
SB3_PL5_InlineNoneExtended_HpackVarintDecoder   5767.8 ( 0.5)   5769.9 ( 0.8)   473 MiB/s ( 0.8)      10
SB3_PL5_InlineNone_HpackVarintDecoder           6054.8 ( 0.9)   6053.3 ( 0.7)   451 MiB/s ( 0.7)      10

SB4_PL5_InlineBothExtended_HpackVarintDecoder   7243.2 ( 0.6)   7220.1 ( 0.6)   504 MiB/s ( 0.6)      10
SB4_PL5_InlineNoneExtended_HpackVarintDecoder   7376.1 ( 0.4)   7365.5 ( 0.3)   494 MiB/s ( 0.3)      10
SB4_PL5_InlineBoth_HpackVarintDecoder           7482.2 ( 0.5)   7477.7 ( 0.7)   487 MiB/s ( 0.7)      10
SB4_PL5_InlineNone_HpackVarintDecoder           7707.7 ( 1.0)   7710.3 ( 1.1)   472 MiB/s ( 1.2)      10

SBX_PL5_InlineBothExtended_HpackVarintDecoder   8457.8 ( 0.5)   8443.1 ( 0.5)   345 MiB/s ( 0.5)      10
SBX_PL5_InlineBoth_HpackVarintDecoder           8598.6 ( 0.4)   8580.9 ( 0.7)   339 MiB/s ( 0.7)      10
SBX_PL5_InlineNoneExtended_HpackVarintDecoder   8762.8 ( 0.2)   8736.5 ( 0.2)   333 MiB/s ( 0.2)      10
SBX_PL5_InlineNone_HpackVarintDecoder           9354.8 ( 0.3)   9338.9 ( 0.3)   312 MiB/s ( 0.3)      10

SB1_PL6_InlineBoth_HpackVarintDecoder           1936.5 ( 0.2)   1934.3 ( 0.4)   470 MiB/s ( 0.4)      10
SB1_PL6_InlineNoneExtended_HpackVarintDecoder   2010.3 ( 0.4)   2006.8 ( 0.3)   453 MiB/s ( 0.3)      10
SB1_PL6_InlineBothExtended_HpackVarintDecoder   2014.2 ( 0.6)   2009.5 ( 0.5)   453 MiB/s ( 0.5)      10
SB1_PL6_InlineNone_HpackVarintDecoder           2260.2 ( 0.3)   2257.4 ( 0.2)   403 MiB/s ( 0.2)      10

SB2_PL6_InlineBothExtended_HpackVarintDecoder   3480.1 ( 0.5)   3475.5 ( 0.5)   523 MiB/s ( 0.5)      10
SB2_PL6_InlineBoth_HpackVarintDecoder           3934.1 ( 0.5)   3928.5 ( 0.7)   463 MiB/s ( 0.7)      10
SB2_PL6_InlineNoneExtended_HpackVarintDecoder   3954.3 ( 0.5)   3951.6 ( 0.5)   460 MiB/s ( 0.5)      10
SB2_PL6_InlineNone_HpackVarintDecoder           4422.7 ( 0.4)   4418.2 ( 0.5)   412 MiB/s ( 0.5)      10

SB3_PL6_InlineBothExtended_HpackVarintDecoder   5358.1 ( 0.4)   5355.8 ( 0.4)   509 MiB/s ( 0.4)      10
SB3_PL6_InlineBoth_HpackVarintDecoder           5740.8 ( 0.6)   5722.2 ( 0.3)   477 MiB/s ( 0.3)      10
SB3_PL6_InlineNoneExtended_HpackVarintDecoder   5766.6 ( 0.4)   5762.1 ( 0.6)   474 MiB/s ( 0.6)      10
SB3_PL6_InlineNone_HpackVarintDecoder           6206.9 ( 0.9)   6204.7 ( 0.8)   440 MiB/s ( 0.8)      10

SB4_PL6_InlineBothExtended_HpackVarintDecoder   7262.3 ( 0.8)   7252.2 ( 0.8)   502 MiB/s ( 0.8)      10
SB4_PL6_InlineNoneExtended_HpackVarintDecoder   7398.2 ( 0.5)   7392.7 ( 0.5)   492 MiB/s ( 0.5)      10
SB4_PL6_InlineBoth_HpackVarintDecoder           7478.2 ( 0.4)   7476.4 ( 0.7)   487 MiB/s ( 0.7)      10
SB4_PL6_InlineNone_HpackVarintDecoder           7792.7 ( 0.6)   7772.6 ( 0.4)   468 MiB/s ( 0.4)      10

SBX_PL6_InlineBoth_HpackVarintDecoder           4999.6 ( 0.5)   4987.7 ( 0.5)   251 MiB/s ( 0.5)      10
SBX_PL6_InlineBothExtended_HpackVarintDecoder   5026.8 ( 0.2)   5019.7 ( 0.2)   249 MiB/s ( 0.2)      10
SBX_PL6_InlineNoneExtended_HpackVarintDecoder   5324.5 ( 0.4)   5316.8 ( 0.6)   235 MiB/s ( 0.6)      10
SBX_PL6_InlineNone_HpackVarintDecoder           5645.6 ( 0.3)   5636.3 ( 0.3)   222 MiB/s ( 0.3)      10

SB1_PL7_InlineBoth_HpackVarintDecoder           1944.7 ( 0.5)   1942.0 ( 0.8)   468 MiB/s ( 0.8)      10
SB1_PL7_InlineNoneExtended_HpackVarintDecoder   2006.2 ( 0.1)   2001.8 ( 0.4)   454 MiB/s ( 0.4)      10
SB1_PL7_InlineBothExtended_HpackVarintDecoder   2009.2 ( 0.5)   2004.9 ( 0.6)   454 MiB/s ( 0.6)      10
SB1_PL7_InlineNone_HpackVarintDecoder           2260.9 ( 0.4)   2259.0 ( 0.3)   403 MiB/s ( 0.4)      10

SB2_PL7_InlineBothExtended_HpackVarintDecoder   3473.8 ( 0.3)   3468.7 ( 0.4)   524 MiB/s ( 0.4)      10
SB2_PL7_InlineBoth_HpackVarintDecoder           3935.3 ( 0.5)   3923.9 ( 0.4)   464 MiB/s ( 0.4)      10
SB2_PL7_InlineNoneExtended_HpackVarintDecoder   3936.1 ( 0.2)   3930.9 ( 0.4)   463 MiB/s ( 0.4)      10
SB2_PL7_InlineNone_HpackVarintDecoder           4422.5 ( 0.4)   4418.3 ( 0.3)   412 MiB/s ( 0.3)      10

SB3_PL7_InlineBothExtended_HpackVarintDecoder   5373.6 ( 0.9)   5365.3 ( 1.0)   509 MiB/s ( 0.9)      10
SB3_PL7_InlineBoth_HpackVarintDecoder           5748.5 ( 0.3)   5731.4 ( 0.2)   476 MiB/s ( 0.2)      10
SB3_PL7_InlineNoneExtended_HpackVarintDecoder   5738.0 ( 0.4)   5736.7 ( 0.5)   476 MiB/s ( 0.5)      10
SB3_PL7_InlineNone_HpackVarintDecoder           5997.3 ( 0.4)   5989.5 ( 0.4)   456 MiB/s ( 0.4)      10

SB4_PL7_InlineBothExtended_HpackVarintDecoder   7246.4 ( 0.6)   7240.9 ( 0.4)   502 MiB/s ( 0.5)      10
SB4_PL7_InlineNoneExtended_HpackVarintDecoder   7401.0 ( 0.4)   7389.7 ( 0.5)   492 MiB/s ( 0.5)      10
SB4_PL7_InlineBoth_HpackVarintDecoder           7480.3 ( 0.6)   7471.1 ( 0.4)   487 MiB/s ( 0.4)      10
SB4_PL7_InlineNone_HpackVarintDecoder           7737.9 ( 1.0)   7722.1 ( 0.7)   471 MiB/s ( 0.7)      10

SBX_PL7_InlineBoth_HpackVarintDecoder           2098.0 ( 0.1)   2097.3 ( 0.3)   441 MiB/s ( 0.3)      10
SBX_PL7_InlineNoneExtended_HpackVarintDecoder   2143.9 ( 0.4)   2139.4 ( 0.2)   432 MiB/s ( 0.2)      10
SBX_PL7_InlineBothExtended_HpackVarintDecoder   2161.6 ( 0.4)   2159.4 ( 0.2)   428 MiB/s ( 0.2)      10
SBX_PL7_InlineNone_HpackVarintDecoder           2405.3 ( 0.6)   2406.8 ( 0.5)   384 MiB/s ( 0.5)      10
*/
// clang-format on

#include <math.h>

#include <algorithm>
#include <map>

#include "base/callback.h"
#include "base/commandlineflags.h"
#include "base/init_google.h"
#include "base/logging.h"
#include "starboard/types.h"
#include "third_party/http2/decoder/decode_buffer.h"
#include "third_party/http2/decoder/decode_status.h"
#include "third_party/http2/platform/api/http2_string_utils.h"
#include "third_party/http2/hpack/tools/base_hpack_benchmark.h"
#include "third_party/http2/hpack/tools/hpack_block_builder.h"
#include "testing/base/public/benchmark.h"
#include "util/functional/to_callback.h"
#include "util/regexp/re2/re2.h"
#include "third_party/absl/strings/util.h"

DEFINE_bool(output_value_histogram,
            false,
            "Output the number of values generated of each size.");

DECLARE_string(benchmarks);

namespace http2 {
namespace test {
namespace {

template <class DECODER>
class VarintDecoderBenchmark : public BaseGenericDecoderBenchmark<DECODER> {
 public:
  typedef BaseGenericDecoderBenchmark<DECODER> Base;
  using Base::Random;

  explicit VarintDecoderBenchmark(uint32_t serialized_bytes)
      : serialized_bytes_(serialized_bytes),
        prefix_length_(DECODER::PrefixLength()) {
    VLOG(1) << "VarintDecoderBenchmark(" << serialized_bytes_ << ", "
            << prefix_length_ << ")";
    CHECK_LE(serialized_bytes_, DECODER::MaxExtensionBytes() + 1);
  }

  VarintDecoderBenchmark()
      : serialized_bytes_(0), prefix_length_(DECODER::PrefixLength()) {
    VLOG(1) << "VarintDecoderBenchmark(" << prefix_length_ << ")";
  }

  ~VarintDecoderBenchmark() override {}

 protected:
  // Adds one variable length integer to the HpackBlockBuilder.
  void GenerateItem(HpackBlockBuilder* hbb) override {
    uint32_t value;
    if (serialized_bytes_ > 0) {
      // This benchmark calls for all items to have the same length.
      value = GenerateValueWithExtensionBytes(serialized_bytes_ - 1);
    } else if (prefix_length_ == 7) {
      // A string length or an Indexed Header. The latter is more common.
      if (Base::Random().OneIn(10)) {
        // Generate a string length. Most are values, some are names (i.e. a
        // Indexed Literal has a name index and a literal value).
        if (Base::Random().OneIn(10)) {
          // Names are shorter than values, essentially never very long.
          value = Base::GenerateNameLength();
        } else {
          value = Base::GenerateValueLength();
        }
      } else {
        // Generate an index into the static or dynamic table (1 to N).
        value = Base::GenerateNonZeroIndex();
      }
    } else if (prefix_length_ == 6) {
      // An Indexed Literal Header, with either an index for the name or a zero
      // to indicate that a literal name follows. The former is more common.
      if (Base::Random().OneIn(20)) {
        value = 0;
      } else {
        value = Base::GenerateNonZeroIndex();
      }
    } else if (prefix_length_ == 5) {
      // kDynamicTableSizeUpdate
      value = Base::GenerateDynamicTableSize();
    } else {
      CHECK_EQ(prefix_length_, 4);
      // An Unindexed or Never Indexed Literal Header, with either an index for
      // the name or a zero to indicate that a literal name follows. The former
      // is more common.
      if (Base::Random().OneIn(20)) {
        value = 0;
      } else {
        value = Base::GenerateNonZeroIndex();
      }
    }
    ++(value_histogram_[value]);
    size_t old_size = hbb->size();
    hbb->AppendHighBitsAndVarint(0xff << prefix_length_, prefix_length_, value);
    size_t item_size = hbb->size() - old_size;
    DCHECK_LE(item_size, DECODER::MaxExtensionBytes() + 1)
        << "item_size=" << item_size
        << ", MaxExtensionBytes=" << DECODER::MaxExtensionBytes();
  }

  void OnAllItemsGenerated(size_t num_items, size_t num_bytes) override {
    if (FLAGS_output_value_histogram) {
      LOG(INFO) << "";
      LOG(INFO) << __PRETTY_FUNCTION__;
      LOG(INFO) << "";
      LOG(INFO)
          << "VALUE   COUNT  HISTOGRAM "
          << "------------------------------------------------------------";
      uint32_t highest_value = 0;
      size_t highest_count = 0;
      for (const auto& entry : value_histogram_) {
        highest_value = std::max(highest_value, entry.first);
        highest_count = std::max(highest_count, entry.second);
      }
      double num_dots = 40.0;
      double steps_per_dot = highest_count / num_dots;
      for (const auto& entry : value_histogram_) {
        size_t num_dots =
            static_cast<size_t>(std::round(entry.second / steps_per_dot));
        Http2String dots(num_dots, '*');
        LOG(INFO) << Http2StringPrintf("%5d : %6zu  %s", entry.first,
                                       entry.second, dots.c_str());
      }
    }
  }

 private:
  // Returns the highest value that can be encoded with the specified number
  // of |extension_bytes| and the indirectly specified prefix length.
  static size_t constexpr HiValueOfExtensionBytes(uint32_t extension_bytes) {
    return (1 << DECODER::PrefixLength()) - 2 +
           (extension_bytes == 0 ? 0 : (1LLU << (extension_bytes * 7)));
  }

  // Generate a value that requires |extension_bytes| to encode (i.e. whose
  // length when serialized is |extension_bytes| + 1).
  uint32_t GenerateValueWithExtensionBytes(int extension_bytes) {
    uint32_t lo = 0, hi = HiValueOfExtensionBytes(extension_bytes);
    if (extension_bytes > 0) {
      lo = HiValueOfExtensionBytes(extension_bytes - 1) + 1;
    }
    DVLOG(2) << "GenerateValueWithExtensionBytes(" << extension_bytes
             << "),   PrefixLength=" << DECODER::PrefixLength() + 0;
    uint32_t value = lo + Random().Uniform(hi - lo + 1);
    DVLOG(2) << "\tlo = " << lo << "\thi = " << hi << "\tvalue=" << value;
    return value;
  }

  std::map<uint32_t, size_t> value_histogram_;
  const uint32_t serialized_bytes_;
  const uint32_t prefix_length_;
};

// Call DECODER::Start and DECODER::Resume, defined in the header file, so
// *probably* inlined. Could go so far as to use the Clang attribute 'flatten'
// on InlineBoth::Start and InlineBoth::Resume to direct the compiler to inline
// methods that those methods call.
template <class DECODER, int PREFIX_LENGTH>
class InlineBoth {
 public:
  typedef DECODER Decoder;
  typedef uint64_t ResultType;

  DecodeStatus Start(DecodeBuffer* b) {
    uint8_t byte = b->DecodeUInt8();
    DecodeStatus status = decoder_.Start(byte, PrefixLength(), b);
    DCHECK(status == DecodeStatus::kDecodeDone ||
           (b->Empty() && status == DecodeStatus::kDecodeInProgress))
        << "status=" << status << ", Remaining=" << b->Remaining()
        << ", PREFIX_LENGTH=" << PREFIX_LENGTH << ", PrefixMask=" << std::hex
        << (PrefixMask() + 0) << "\ndecoder_: " << decoder_.DebugString()
        << "\n"
        << __PRETTY_FUNCTION__;
    MaybeCollectValue(status);
    return status;
  }

  DecodeStatus Resume(DecodeBuffer* b) {
    DecodeStatus status = decoder_.Resume(b);
    DCHECK(status == DecodeStatus::kDecodeDone ||
           (b->Empty() && status == DecodeStatus::kDecodeInProgress))
        << "status=" << status << ", Remaining=" << b->Remaining()
        << ", PREFIX_LENGTH=" << PREFIX_LENGTH << ", PrefixMask=" << std::hex
        << (PrefixMask() + 0) << "\ndecoder_: " << decoder_.DebugString()
        << "\n"
        << __PRETTY_FUNCTION__;
    MaybeCollectValue(status);
    return status;
  }

  ResultType ExtractResult() {
    DCHECK(have_value_);
    sum_ += value_;
    return sum_;
  }

  Http2String DebugString() { return decoder_.DebugString(); }

  static constexpr uint32_t MaxExtensionBytes() {
    return Decoder::MaxExtensionBytes();
  }
  static constexpr uint8_t PrefixLength() { return PREFIX_LENGTH; }
  static constexpr uint8_t PrefixMask() { return (1 << PREFIX_LENGTH) - 1; }
  static_assert(0 == (PrefixMask() & (PrefixMask() + 1)), "Bad Mask");

 private:
  void MaybeCollectValue(DecodeStatus status) {
    if (status == DecodeStatus::kDecodeDone) {
      value_ = decoder_.value();
      have_value_ = true;
    } else {
      have_value_ = false;
    }
  }

  Decoder decoder_;
  ResultType sum_ = 0;
  uint32_t value_;
  bool have_value_;
};

template <class DECODER, int PREFIX_LENGTH>
class InlineNone {
 public:
  typedef DECODER Decoder;
  typedef uint64_t ResultType;

  DecodeStatus Start(DecodeBuffer* b) {
    uint8_t byte = b->DecodeUInt8();
    DecodeStatus status = decoder_.StartForTest(byte, PrefixLength(), b);
    DCHECK(status == DecodeStatus::kDecodeDone ||
           (b->Empty() && status == DecodeStatus::kDecodeInProgress))
        << "status=" << status << ", Remaining=" << b->Remaining()
        << ", PREFIX_LENGTH=" << PREFIX_LENGTH << ", PrefixMask=" << std::hex
        << (PrefixMask() + 0) << "\ndecoder_: " << decoder_.DebugString()
        << "\n"
        << __PRETTY_FUNCTION__;
    MaybeCollectValue(status);
    return status;
  }

  DecodeStatus Resume(DecodeBuffer* b) {
    DecodeStatus status = decoder_.ResumeForTest(b);
    DCHECK(status == DecodeStatus::kDecodeDone ||
           (b->Empty() && status == DecodeStatus::kDecodeInProgress))
        << "status=" << status << ", Remaining=" << b->Remaining()
        << ", PREFIX_LENGTH=" << PREFIX_LENGTH << ", PrefixMask=" << std::hex
        << (PrefixMask() + 0) << "\ndecoder_: " << decoder_.DebugString()
        << "\n"
        << __PRETTY_FUNCTION__;
    MaybeCollectValue(status);
    return status;
  }

  ResultType ExtractResult() {
    DCHECK(have_value_);
    sum_ += value_;
    return sum_;
  }

  Http2String DebugString() { return decoder_.DebugString(); }

  static constexpr uint32_t MaxExtensionBytes() {
    return Decoder::MaxExtensionBytes();
  }
  static constexpr uint8_t PrefixLength() { return PREFIX_LENGTH; }
  static constexpr uint8_t PrefixMask() { return (1 << PREFIX_LENGTH) - 1; }
  static_assert(0 == (PrefixMask() & (PrefixMask() + 1)), "Bad Mask");

 private:
  void MaybeCollectValue(DecodeStatus status) {
    if (status == DecodeStatus::kDecodeDone) {
      value_ = decoder_.value();
      have_value_ = true;
    } else {
      have_value_ = false;
    }
  }

  Decoder decoder_;
  ResultType sum_ = 0;
  uint32_t value_;
  bool have_value_;
};

template <class DECODER, int PREFIX_LENGTH>
class InlineBothExtended {
 public:
  typedef DECODER Decoder;
  typedef uint64_t ResultType;
  InlineBothExtended() {
    static_assert(0 == (PrefixMask() & (PrefixMask() + 1)), "Bad Mask");
  }

  DecodeStatus Start(DecodeBuffer* b) {
    uint8_t byte = b->DecodeUInt8();
    DVLOG(1) << Http2StrCat("byte=", byte, " (0x", absl::Hex(byte), ")");
    byte &= PrefixMask();
    if (byte < PrefixMask()) {
      DVLOG(1) << Http2StrCat("single byte encoding ", byte);
      value_ = byte;
      have_value_ = true;
      return DecodeStatus::kDecodeDone;
    }
    DCHECK_EQ(byte, PrefixMask());
    have_value_ = false;
    DecodeStatus status = decoder_.StartExtended(PrefixLength(), b);
    DCHECK(status == DecodeStatus::kDecodeDone ||
           (b->Empty() && status == DecodeStatus::kDecodeInProgress))
        << "status=" << status << ", Remaining=" << b->Remaining()
        << ", PREFIX_LENGTH=" << PREFIX_LENGTH << ", PrefixMask=" << std::hex
        << (PrefixMask() + 0) << "\ndecoder_: " << decoder_.DebugString()
        << "\n"
        << __PRETTY_FUNCTION__;
    MaybeCollectValue(status);
    return status;
  }

  DecodeStatus Resume(DecodeBuffer* b) {
    DecodeStatus status = decoder_.Resume(b);
    DCHECK(status == DecodeStatus::kDecodeDone ||
           (b->Empty() && status == DecodeStatus::kDecodeInProgress))
        << "status=" << status << ", Remaining=" << b->Remaining()
        << ", PREFIX_LENGTH=" << PREFIX_LENGTH << ", PrefixMask=" << std::hex
        << (PrefixMask() + 0) << "\ndecoder_: " << decoder_.DebugString()
        << "\n"
        << __PRETTY_FUNCTION__;
    MaybeCollectValue(status);
    return status;
  }

  ResultType ExtractResult() {
    DCHECK(have_value_);
    sum_ += value_;
    return sum_;
  }

  Http2String DebugString() { return decoder_.DebugString(); }

  static constexpr uint32_t MaxExtensionBytes() {
    return Decoder::MaxExtensionBytes();
  }
  static constexpr uint8_t PrefixLength() { return PREFIX_LENGTH; }
  static constexpr uint8_t PrefixMask() { return (1 << PREFIX_LENGTH) - 1; }

 private:
  void MaybeCollectValue(DecodeStatus status) {
    if (status == DecodeStatus::kDecodeDone) {
      value_ = decoder_.value();
      have_value_ = true;
    } else {
      have_value_ = false;
    }
  }

  Decoder decoder_;
  ResultType sum_ = 0;
  uint32_t value_;
  bool have_value_;
};

template <class DECODER, int PREFIX_LENGTH>
class InlineNoneExtended {
 public:
  typedef DECODER Decoder;
  typedef uint64_t ResultType;

  DecodeStatus Start(DecodeBuffer* b) {
    uint8_t byte = b->DecodeUInt8();
    DVLOG(1) << Http2StrCat("byte=", byte, " (0x", absl::Hex(byte), ")");
    byte &= PrefixMask();
    if (byte < PrefixMask()) {
      DVLOG(1) << Http2StrCat("single byte encoding ", byte);
      value_ = byte;
      have_value_ = true;
      return DecodeStatus::kDecodeDone;
    }
    DCHECK_EQ(byte, PrefixMask());
    have_value_ = false;
    DecodeStatus status = decoder_.StartExtendedForTest(PrefixLength(), b);
    DCHECK(status == DecodeStatus::kDecodeDone ||
           (b->Empty() && status == DecodeStatus::kDecodeInProgress))
        << "status=" << status << ", Remaining=" << b->Remaining()
        << ", PREFIX_LENGTH=" << PREFIX_LENGTH << ", PrefixMask=" << std::hex
        << (PrefixMask() + 0) << "\ndecoder_: " << decoder_.DebugString()
        << "\n"
        << __PRETTY_FUNCTION__;
    MaybeCollectValue(status);
    return status;
  }

  DecodeStatus Resume(DecodeBuffer* b) {
    DecodeStatus status = decoder_.ResumeForTest(b);
    DCHECK(status == DecodeStatus::kDecodeDone ||
           (b->Empty() && status == DecodeStatus::kDecodeInProgress))
        << "status=" << status << ", Remaining=" << b->Remaining()
        << ", PREFIX_LENGTH=" << PREFIX_LENGTH << ", PrefixMask=" << std::hex
        << (PrefixMask() + 0) << "\ndecoder_: " << decoder_.DebugString()
        << "\n"
        << __PRETTY_FUNCTION__;
    MaybeCollectValue(status);
    return status;
  }

  ResultType ExtractResult() {
    DCHECK(have_value_);
    sum_ += value_;
    return sum_;
  }

  Http2String DebugString() { return decoder_.DebugString(); }

  static constexpr uint32_t MaxExtensionBytes() {
    return Decoder::MaxExtensionBytes();
  }
  static constexpr uint8_t PrefixLength() { return PREFIX_LENGTH; }
  static constexpr uint8_t PrefixMask() { return (1 << PREFIX_LENGTH) - 1; }
  static_assert(0 == (PrefixMask() & (PrefixMask() + 1)), "Bad Mask");

 private:
  void MaybeCollectValue(DecodeStatus status) {
    if (status == DecodeStatus::kDecodeDone) {
      value_ = decoder_.value();
      have_value_ = true;
    } else {
      have_value_ = false;
    }
  }

  Decoder decoder_;
  ResultType sum_ = 0;
  uint32_t value_;
  bool have_value_;
};

}  // namespace

// Registers one benchmark of DECODER, where the template class DECODE_METHOD
// controls which methods are called (e.g. inlined or not). Each generated item
// has the same PREFIX_LENGTH (in bits), and if SERIALIZED_BYTES is not zero,
// then each generated item has the same length (in bytes), else a
// population of vaguely realistic sized items is decoded.
template <int SERIALIZED_BYTES,
          int PREFIX_LENGTH,
          template <typename D, int PL> class DECODE_METHOD,
          class DECODER>
void RegisterBenchmarkOfSBPLDMAndDecoder() {
  static_assert(4 <= PREFIX_LENGTH, "PREFIX_LENGTH is too low.");
  static_assert(PREFIX_LENGTH <= 7, "PREFIX_LENGTH is too high.");

  if (SERIALIZED_BYTES > 0 &&
      SERIALIZED_BYTES > DECODER::MaxExtensionBytes() + 1) {
    DLOG(INFO) << "Skipping encoding that is too long to decode.";
    return;
  }

  typedef DECODE_METHOD<DECODER, PREFIX_LENGTH> DecoderWrapper;
  typedef VarintDecoderBenchmark<DecoderWrapper> DecoderBenchmark;

  // "Parse" __PRETTY_FUNCTION__ to determine what types are being used.
  // __PRETTY_FUNCTION__ is approximately (in a debug build):
  //
  //      void http2::test::RegisterBenchmarkOfSBPLDMAndDecoder()
  //     [SERIALIZED_BYTES = 0, PREFIX_LENGTH = 4,
  //      DECODE_METHOD = InlineBoth,
  //      DECODER = http2::HpackVarintDecoder]
  //
  // or in an optimized build:
  //
  //      void http2::test::RegisterBenchmarkOfSBPLDMAndDecoder()
  //      [with int SERIALIZED_BYTES = 0;
  //            int PREFIX_LENGTH = 4;
  //            DECODE_METHOD = http2::test::{anonymous}::InlineBoth;
  //            DECODER = http2::HpackVarintDecoder]

  VLOG(1) << __PRETTY_FUNCTION__;

  Http2String ns_pat = "(?:\\{anonymous\\}|\\w+)::";
  Http2String nss_pat = "(?:" + ns_pat + ")*";
  Http2String sym_pat = nss_pat + "(\\w+)";

  RE2 re("\\bDECODE_METHOD = " + sym_pat + "[;,]\\s+DECODER = " + sym_pat);
  CHECK_EQ(2, re.NumberOfCapturingGroups());

  Http2String decode_method, decoder;

  CHECK(RE2::PartialMatch(__PRETTY_FUNCTION__, re, &decode_method, &decoder))
      << __PRETTY_FUNCTION__;

  Http2String name;
  if (SERIALIZED_BYTES == 0) {
    name =
        Http2StrCat("SBX/PL", PREFIX_LENGTH, "/", decode_method, "/", decoder);
  } else {
    name = Http2StrCat("SB", SERIALIZED_BYTES, "/PL", PREFIX_LENGTH, "/",
                       decode_method, "/", decoder);
  }

  // TODO(jamessynge): Add a "validation" phase here, where the benchmarking
  // code is checked to make sure that the decoding is actually correct in this
  // context. I.e., create an instance of DecoderBenchmark, have it generate
  // a few items, decode those, checking that each matches the expected value.

  auto run_benchmark = [name](int iters) {
    StopBenchmarkTiming();
    VLOG(1) << "Running " << iters << " for benchmark " << name;
    DecoderBenchmark bm(SERIALIZED_BYTES);
    bm.Benchmark(iters);
  };
  Callback1<int>* benchmark_runner =
      util::functional::ToPermanentCallback(run_benchmark);

  LOG(INFO) << "Registering benchmark \"" << name << "\"";
  new ::testing::Benchmark(name, benchmark_runner);
}

template <int SERIALIZED_BYTES,
          int PREFIX_LENGTH,
          template <typename D, int PL> class DECODE_METHOD>
void RegisterBenchmarksOfSBPLAndDecodeMethod() {
  // Register benchmarks of the specified decoder class (currently only
  // HpackVarintDecoder).

  RegisterBenchmarkOfSBPLDMAndDecoder<SERIALIZED_BYTES, PREFIX_LENGTH,
                                      DECODE_METHOD, HpackVarintDecoder>();
  // If there are other HPACK varint decoder classes with the same API to be
  // compared with HpackVarintDecoder, register benchmarks for them here;
  // for example:
  //   RegisterBenchmarkOfSBPLDMAndDecoder<
  //       SERIALIZED_BYTES, PREFIX_LENGTH,
  //       DECODE_METHOD, HpackPeekAheadVarintDecoder>();
}

template <int SERIALIZED_BYTES, int PREFIX_LENGTH>
void RegisterBenchmarksOfSBAndPrefixLength() {
  // Register benchmarks with the specified decoding method (i.e. which methods
  // of the decoder class are called, whether inlining is occurring, etc.).

  RegisterBenchmarksOfSBPLAndDecodeMethod<SERIALIZED_BYTES, PREFIX_LENGTH,
                                          InlineBoth>();
  RegisterBenchmarksOfSBPLAndDecodeMethod<SERIALIZED_BYTES, PREFIX_LENGTH,
                                          InlineBothExtended>();
  RegisterBenchmarksOfSBPLAndDecodeMethod<SERIALIZED_BYTES, PREFIX_LENGTH,
                                          InlineNone>();
  RegisterBenchmarksOfSBPLAndDecodeMethod<SERIALIZED_BYTES, PREFIX_LENGTH,
                                          InlineNoneExtended>();
}

// TODO(jamessynge): Move SERIALIZED_BYTES to be a regular (i.e. runtime)
// parameter rather than a template parameter. Unlike PREFIX_LENGTH, it doesn't
// provide extra info that we'd like the compiler to use.
template <int SERIALIZED_BYTES>
void RegisterBenchmarksOfSerializedBytes() {
  // Register benchmarks of encodings with the specified prefix length (bits)...

  RegisterBenchmarksOfSBAndPrefixLength<SERIALIZED_BYTES, 4>();
  RegisterBenchmarksOfSBAndPrefixLength<SERIALIZED_BYTES, 5>();
  RegisterBenchmarksOfSBAndPrefixLength<SERIALIZED_BYTES, 6>();
  RegisterBenchmarksOfSBAndPrefixLength<SERIALIZED_BYTES, 7>();
}

void RegisterAllBenchmarks() {
  // Register benchmarks of encodings of the specified number of bytes...

  RegisterBenchmarksOfSerializedBytes<1>();
  RegisterBenchmarksOfSerializedBytes<2>();
  RegisterBenchmarksOfSerializedBytes<3>();

  // Skipping 4 and 5 because they're not important in practice.
  //   RegisterBenchmarksOfSerializedBytes<4>();
  //   RegisterBenchmarksOfSerializedBytes<5>();

  // Register benchmarks of various lengths, determined by a population model
  // (not yet an accurate model of production, just a guess).
  RegisterBenchmarksOfSerializedBytes<0>();
}

}  // namespace test
}  // namespace http2

int main(int argc, char** argv) {
  InitGoogle(argv[0], &argc, &argv, true);
  if (FLAGS_benchmarks.empty()) {
    FLAGS_benchmarks = "all";
  }
  http2::test::RegisterAllBenchmarks();
  RunSpecifiedBenchmarks();
  return 0;
}
