blob: 7777d57a797f3e14b839f45b629f0e177780a18a [file] [log] [blame]
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "media/cast/sender/vpx_encoder.h"
#include "base/logging.h"
#include "media/base/video_frame.h"
#include "media/cast/constants.h"
#include "third_party/libvpx/source/libvpx/vpx/vp8cx.h"
namespace media {
namespace cast {
namespace {
// After a pause in the video stream, what is the maximum duration amount to
// pass to the encoder for the next frame (in terms of 1/max_fps sized periods)?
// This essentially controls the encoded size of the first frame that follows a
// pause in the video stream.
const int kRestartFramePeriods = 3;
// The following constants are used to automactically tune the encoder
// parameters: |cpu_used| and |min_quantizer|.
// The |half-life| of the encoding speed accumulator.
// The smaller, the shorter of the time averaging window.
const int kEncodingSpeedAccHalfLife = 120000; // 0.12 second.
// The target encoder utilization signal. This is a trade-off between quality
// and less CPU usage. The range of this value is [0, 1]. Higher the value,
// better the quality and higher the CPU usage.
//
// For machines with more than two encoding threads.
const double kHiTargetEncoderUtilization = 0.7;
// For machines with two encoding threads.
const double kMidTargetEncoderUtilization = 0.6;
// For machines with single encoding thread.
const double kLoTargetEncoderUtilization = 0.5;
// This is the equivalent change on encoding speed for the change on each
// quantizer step.
const double kEquivalentEncodingSpeedStepPerQpStep = 1 / 20.0;
// Highest/lowest allowed encoding speed set to the encoder. The valid range
// is [4, 16]. Experiments show that with speed higher than 12, the saving of
// the encoding time is not worth the dropping of the quality. And with speed
// lower than 6, the increasing of quality is not worth the increasing of
// encoding time.
const int kHighestEncodingSpeed = 12;
const int kLowestEncodingSpeed = 6;
bool HasSufficientFeedback(
const FeedbackSignalAccumulator<base::TimeDelta>& accumulator) {
const base::TimeDelta amount_of_history =
accumulator.update_time() - accumulator.reset_time();
return amount_of_history.InMicroseconds() >= 250000; // 0.25 second.
}
} // namespace
VpxEncoder::VpxEncoder(const FrameSenderConfig& video_config)
: cast_config_(video_config),
target_encoder_utilization_(
video_config.video_codec_params.number_of_encode_threads > 2
? kHiTargetEncoderUtilization
: (video_config.video_codec_params.number_of_encode_threads > 1
? kMidTargetEncoderUtilization
: kLoTargetEncoderUtilization)),
key_frame_requested_(true),
bitrate_kbit_(cast_config_.start_bitrate / 1000),
next_frame_id_(FrameId::first()),
encoding_speed_acc_(base::Microseconds(kEncodingSpeedAccHalfLife)),
encoding_speed_(kHighestEncodingSpeed) {
config_.g_timebase.den = 0; // Not initialized.
DCHECK_LE(cast_config_.video_codec_params.min_qp,
cast_config_.video_codec_params.max_cpu_saver_qp);
DCHECK_LE(cast_config_.video_codec_params.max_cpu_saver_qp,
cast_config_.video_codec_params.max_qp);
DETACH_FROM_THREAD(thread_checker_);
}
VpxEncoder::~VpxEncoder() {
DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
if (is_initialized())
vpx_codec_destroy(&encoder_);
}
void VpxEncoder::Initialize() {
DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
DCHECK(!is_initialized());
// The encoder will be created/configured when the first frame encode is
// requested.
}
void VpxEncoder::ConfigureForNewFrameSize(const gfx::Size& frame_size) {
if (is_initialized()) {
// NOTE: Do we need this workaround for VP9?
// Workaround for VP8 bug: If the new size is strictly less-than-or-equal to
// the old size, in terms of area, the existing encoder instance can
// continue. Otherwise, completely tear-down and re-create a new encoder to
// avoid a shutdown crash.
if (frame_size.GetArea() <= gfx::Size(config_.g_w, config_.g_h).GetArea()) {
DVLOG(1) << "Continuing to use existing encoder at smaller frame size: "
<< gfx::Size(config_.g_w, config_.g_h).ToString() << " --> "
<< frame_size.ToString();
config_.g_w = frame_size.width();
config_.g_h = frame_size.height();
config_.rc_min_quantizer = cast_config_.video_codec_params.min_qp;
if (vpx_codec_enc_config_set(&encoder_, &config_) == VPX_CODEC_OK)
return;
DVLOG(1) << "libvpx rejected the attempt to use a smaller frame size in "
"the current instance.";
}
DVLOG(1) << "Destroying/Re-Creating encoder for larger frame size: "
<< gfx::Size(config_.g_w, config_.g_h).ToString() << " --> "
<< frame_size.ToString();
vpx_codec_destroy(&encoder_);
} else {
DVLOG(1) << "Creating encoder for the first frame; size: "
<< frame_size.ToString();
}
// Determine appropriate codec interface.
vpx_codec_iface_t* ctx;
if (cast_config_.codec == CODEC_VIDEO_VP9) {
ctx = vpx_codec_vp9_cx();
} else {
DCHECK(cast_config_.codec == CODEC_VIDEO_VP8);
ctx = vpx_codec_vp8_cx();
}
// Populate encoder configuration with default values.
CHECK_EQ(vpx_codec_enc_config_default(ctx, &config_, 0), VPX_CODEC_OK);
config_.g_threads = cast_config_.video_codec_params.number_of_encode_threads;
config_.g_w = frame_size.width();
config_.g_h = frame_size.height();
// Set the timebase to match that of base::TimeDelta.
config_.g_timebase.num = 1;
config_.g_timebase.den = base::Time::kMicrosecondsPerSecond;
// |g_pass| and |g_lag_in_frames| must be "one pass" and zero, respectively,
// in order for VPX to support changing frame sizes during encoding:
config_.g_pass = VPX_RC_ONE_PASS;
config_.g_lag_in_frames = 0; // Immediate data output for each frame.
// Rate control settings.
config_.rc_dropframe_thresh = 0; // The encoder may not drop any frames.
config_.rc_resize_allowed = 0; // TODO(miu): Why not? Investigate this.
config_.rc_end_usage = VPX_CBR;
config_.rc_target_bitrate = bitrate_kbit_;
config_.rc_min_quantizer = cast_config_.video_codec_params.min_qp;
config_.rc_max_quantizer = cast_config_.video_codec_params.max_qp;
// TODO(miu): Revisit these now that the encoder is being successfully
// micro-managed.
config_.rc_undershoot_pct = 100;
config_.rc_overshoot_pct = 15;
// TODO(miu): Document why these rc_buf_*_sz values were chosen and/or
// research for better values. Should they be computed from the target
// playout delay?
config_.rc_buf_initial_sz = 500;
config_.rc_buf_optimal_sz = 600;
config_.rc_buf_sz = 1000;
config_.kf_mode = VPX_KF_DISABLED;
vpx_codec_flags_t flags = 0;
CHECK_EQ(vpx_codec_enc_init(&encoder_, ctx, &config_, flags), VPX_CODEC_OK);
// Raise the threshold for considering macroblocks as static. The default is
// zero, so this setting makes the encoder less sensitive to motion. This
// lowers the probability of needing to utilize more CPU to search for motion
// vectors.
CHECK_EQ(vpx_codec_control(&encoder_, VP8E_SET_STATIC_THRESHOLD, 1),
VPX_CODEC_OK);
// This cpu_used setting is a trade-off between cpu usage and encoded video
// quality. The default is zero, with increasingly less CPU to be used as the
// value is more negative or more positive. The encoder does some automatic
// adjust on encoding speed for positive values, however at least at this
// stage the experiments show that this automatic behaviour is not reliable on
// windows machines. We choose to set negative values instead to directly set
// the encoding speed to the encoder. Starting with the highest encoding speed
// to avoid large cpu usage from the beginning.
encoding_speed_ = kHighestEncodingSpeed;
CHECK_EQ(vpx_codec_control(&encoder_, VP8E_SET_CPUUSED, -encoding_speed_),
VPX_CODEC_OK);
}
void VpxEncoder::Encode(scoped_refptr<media::VideoFrame> video_frame,
base::TimeTicks reference_time,
SenderEncodedFrame* encoded_frame) {
DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
DCHECK(encoded_frame);
// Note: This is used to compute the |encoder_utilization| and so it uses the
// real-world clock instead of the CastEnvironment clock, the latter of which
// might be simulated.
const base::TimeTicks start_time = base::TimeTicks::Now();
// Initialize on-demand. Later, if the video frame size has changed, update
// the encoder configuration.
const gfx::Size frame_size = video_frame->visible_rect().size();
if (!is_initialized() || gfx::Size(config_.g_w, config_.g_h) != frame_size)
ConfigureForNewFrameSize(frame_size);
// Wrapper for vpx_codec_encode() to access the YUV data in the |video_frame|.
// Only the VISIBLE rectangle within |video_frame| is exposed to the codec.
vpx_img_fmt_t vpx_format = video_frame->format() == PIXEL_FORMAT_NV12
? VPX_IMG_FMT_NV12
: VPX_IMG_FMT_I420;
vpx_image_t vpx_image;
vpx_image_t* const result = vpx_img_wrap(
&vpx_image, vpx_format, frame_size.width(), frame_size.height(), 1,
video_frame->data(VideoFrame::kYPlane));
DCHECK_EQ(result, &vpx_image);
switch (vpx_format) {
case VPX_IMG_FMT_I420:
vpx_image.planes[VPX_PLANE_Y] =
video_frame->visible_data(VideoFrame::kYPlane);
vpx_image.planes[VPX_PLANE_U] =
video_frame->visible_data(VideoFrame::kUPlane);
vpx_image.planes[VPX_PLANE_V] =
video_frame->visible_data(VideoFrame::kVPlane);
vpx_image.stride[VPX_PLANE_Y] = video_frame->stride(VideoFrame::kYPlane);
vpx_image.stride[VPX_PLANE_U] = video_frame->stride(VideoFrame::kUPlane);
vpx_image.stride[VPX_PLANE_V] = video_frame->stride(VideoFrame::kVPlane);
break;
case VPX_IMG_FMT_NV12:
vpx_image.planes[VPX_PLANE_Y] =
video_frame->visible_data(VideoFrame::kYPlane);
// In libvpx, the UV plane of NV12 frames is represented by two planes
// with the same stride, shifted by one byte.
vpx_image.planes[VPX_PLANE_U] =
video_frame->visible_data(VideoFrame::kUVPlane);
vpx_image.planes[VPX_PLANE_V] = vpx_image.planes[VPX_PLANE_U] + 1;
vpx_image.stride[VPX_PLANE_Y] = video_frame->stride(VideoFrame::kYPlane);
vpx_image.stride[VPX_PLANE_U] = video_frame->stride(VideoFrame::kUVPlane);
vpx_image.stride[VPX_PLANE_V] = video_frame->stride(VideoFrame::kUVPlane);
break;
default:
NOTREACHED();
break;
}
// The frame duration given to the VPX codecs affects a number of important
// behaviors, including: per-frame bandwidth, CPU time spent encoding,
// temporal quality trade-offs, and key/golden/alt-ref frame generation
// intervals. Bound the prediction to account for the fact that the frame
// rate can be highly variable, including long pauses in the video stream.
const base::TimeDelta minimum_frame_duration =
base::Seconds(1.0 / cast_config_.max_frame_rate);
const base::TimeDelta maximum_frame_duration = base::Seconds(
static_cast<double>(kRestartFramePeriods) / cast_config_.max_frame_rate);
base::TimeDelta predicted_frame_duration =
video_frame->metadata().frame_duration.value_or(base::TimeDelta());
if (predicted_frame_duration <= base::TimeDelta()) {
// The source of the video frame did not provide the frame duration. Use
// the actual amount of time between the current and previous frame as a
// prediction for the next frame's duration.
predicted_frame_duration = video_frame->timestamp() - last_frame_timestamp_;
}
predicted_frame_duration =
std::max(minimum_frame_duration,
std::min(maximum_frame_duration, predicted_frame_duration));
last_frame_timestamp_ = video_frame->timestamp();
// Encode the frame. The presentation time stamp argument here is fixed to
// zero to force the encoder to base its single-frame bandwidth calculations
// entirely on |predicted_frame_duration| and the target bitrate setting being
// micro-managed via calls to UpdateRates().
CHECK_EQ(vpx_codec_encode(&encoder_, &vpx_image, 0,
predicted_frame_duration.InMicroseconds(),
key_frame_requested_ ? VPX_EFLAG_FORCE_KF : 0,
VPX_DL_REALTIME),
VPX_CODEC_OK)
<< "BUG: Invalid arguments passed to vpx_codec_encode().";
// Pull data from the encoder, populating a new EncodedFrame.
encoded_frame->frame_id = next_frame_id_++;
const vpx_codec_cx_pkt_t* pkt = NULL;
vpx_codec_iter_t iter = NULL;
while ((pkt = vpx_codec_get_cx_data(&encoder_, &iter)) != NULL) {
if (pkt->kind != VPX_CODEC_CX_FRAME_PKT)
continue;
if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
// TODO(hubbe): Replace "dependency" with a "bool is_key_frame".
encoded_frame->dependency = EncodedFrame::KEY;
encoded_frame->referenced_frame_id = encoded_frame->frame_id;
} else {
encoded_frame->dependency = EncodedFrame::DEPENDENT;
// Frame dependencies could theoretically be relaxed by looking for the
// VPX_FRAME_IS_DROPPABLE flag, but in recent testing (Oct 2014), this
// flag never seems to be set.
encoded_frame->referenced_frame_id = encoded_frame->frame_id - 1;
}
encoded_frame->rtp_timestamp =
RtpTimeTicks::FromTimeDelta(video_frame->timestamp(), kVideoFrequency);
encoded_frame->reference_time = reference_time;
encoded_frame->data.assign(
static_cast<const uint8_t*>(pkt->data.frame.buf),
static_cast<const uint8_t*>(pkt->data.frame.buf) + pkt->data.frame.sz);
break; // Done, since all data is provided in one CX_FRAME_PKT packet.
}
DCHECK(!encoded_frame->data.empty())
<< "BUG: Encoder must provide data since lagged encoding is disabled.";
// Compute encoder utilization as the real-world time elapsed divided by the
// frame duration.
const base::TimeDelta processing_time = base::TimeTicks::Now() - start_time;
encoded_frame->encoder_utilization =
processing_time / predicted_frame_duration;
// Compute lossy utilization. The VPX encoder took an estimated guess at what
// quantizer value would produce an encoded frame size as close to the target
// as possible. Now that the frame has been encoded and the number of bytes
// is known, the perfect quantizer value (i.e., the one that should have been
// used) can be determined. This perfect quantizer is then normalized and
// used as the lossy utilization.
const double actual_bitrate =
encoded_frame->data.size() * 8.0 / predicted_frame_duration.InSecondsF();
const double target_bitrate = 1000.0 * config_.rc_target_bitrate;
DCHECK_GT(target_bitrate, 0.0);
const double bitrate_utilization = actual_bitrate / target_bitrate;
int quantizer = -1;
CHECK_EQ(vpx_codec_control(&encoder_, VP8E_GET_LAST_QUANTIZER_64, &quantizer),
VPX_CODEC_OK);
const double perfect_quantizer = bitrate_utilization * std::max(0, quantizer);
// Side note: If it was possible for the encoder to encode within the target
// number of bytes, the |perfect_quantizer| will be in the range [0.0,63.0].
// If it was never possible, the value will be greater than 63.0.
encoded_frame->lossy_utilization = perfect_quantizer / 63.0;
DVLOG(2) << "VPX encoded frame_id " << encoded_frame->frame_id
<< ", sized: " << encoded_frame->data.size()
<< ", encoder_utilization: " << encoded_frame->encoder_utilization
<< ", lossy_utilization: " << encoded_frame->lossy_utilization
<< " (quantizer chosen by the encoder was " << quantizer << ')';
if (encoded_frame->dependency == EncodedFrame::KEY) {
key_frame_requested_ = false;
}
if (encoded_frame->dependency == EncodedFrame::KEY) {
encoding_speed_acc_.Reset(kHighestEncodingSpeed, video_frame->timestamp());
} else {
// Equivalent encoding speed considering both cpu_used setting and
// quantizer.
double actual_encoding_speed =
encoding_speed_ +
kEquivalentEncodingSpeedStepPerQpStep *
std::max(0, quantizer - cast_config_.video_codec_params.min_qp);
double adjusted_encoding_speed = actual_encoding_speed *
encoded_frame->encoder_utilization /
target_encoder_utilization_;
encoding_speed_acc_.Update(adjusted_encoding_speed,
video_frame->timestamp());
}
if (HasSufficientFeedback(encoding_speed_acc_)) {
// Predict |encoding_speed_| and |min_quantizer| for next frame.
// When CPU is constrained, increase encoding speed and increase
// |min_quantizer| if needed.
double next_encoding_speed = encoding_speed_acc_.current();
int next_min_qp;
if (next_encoding_speed > kHighestEncodingSpeed) {
double remainder = next_encoding_speed - kHighestEncodingSpeed;
next_encoding_speed = kHighestEncodingSpeed;
next_min_qp =
static_cast<int>(remainder / kEquivalentEncodingSpeedStepPerQpStep +
cast_config_.video_codec_params.min_qp + 0.5);
next_min_qp = std::min(next_min_qp,
cast_config_.video_codec_params.max_cpu_saver_qp);
} else {
next_encoding_speed =
std::max<double>(kLowestEncodingSpeed, next_encoding_speed) + 0.5;
next_min_qp = cast_config_.video_codec_params.min_qp;
}
if (encoding_speed_ != static_cast<int>(next_encoding_speed)) {
encoding_speed_ = static_cast<int>(next_encoding_speed);
CHECK_EQ(vpx_codec_control(&encoder_, VP8E_SET_CPUUSED, -encoding_speed_),
VPX_CODEC_OK);
}
if (config_.rc_min_quantizer != static_cast<unsigned int>(next_min_qp)) {
config_.rc_min_quantizer = static_cast<unsigned int>(next_min_qp);
CHECK_EQ(vpx_codec_enc_config_set(&encoder_, &config_), VPX_CODEC_OK);
}
}
}
void VpxEncoder::UpdateRates(uint32_t new_bitrate) {
DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
if (!is_initialized())
return;
uint32_t new_bitrate_kbit = new_bitrate / 1000;
if (config_.rc_target_bitrate == new_bitrate_kbit)
return;
config_.rc_target_bitrate = bitrate_kbit_ = new_bitrate_kbit;
// Update encoder context.
if (vpx_codec_enc_config_set(&encoder_, &config_)) {
NOTREACHED() << "Invalid return value";
}
VLOG(1) << "VPX new rc_target_bitrate: " << new_bitrate_kbit << " kbps";
}
void VpxEncoder::GenerateKeyFrame() {
DCHECK_CALLED_ON_VALID_THREAD(thread_checker_);
key_frame_requested_ = true;
}
} // namespace cast
} // namespace media