blob: d8b4102da6bd9b3e6e97eddba6882ae11848ea31 [file] [log] [blame]
// Copyright 2016 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "cobalt/media/filters/audio_timestamp_validator.h"
namespace cobalt {
namespace media {
// Defines how many milliseconds of DecoderBuffer timestamp gap will be allowed
// before warning the user. See CheckForTimestampGap(). Value of 50 chosen, as
// this is low enough to catch issues early, but high enough to avoid noise for
// containers like WebM that default to low granularity timestamp precision.
const int kGapWarningThresholdMsec = 50;
// Limits the number of adjustments to |audio_ts_offset_| in order to reach a
// stable state where gaps between encoded timestamps match decoded output
// intervals. See CheckForTimestampGap().
const int kLimitTriesForStableTiming = 5;
// Limits the milliseconds of difference between expected and actual timestamps
// gaps to consider timestamp expectations "stable". 1 chosen because some
// containers (WebM) default to millisecond timestamp precision. See
// CheckForTimestampGap().
const int kStableTimeGapThrsholdMsec = 1;
AudioTimestampValidator::AudioTimestampValidator(
const AudioDecoderConfig& decoder_config,
const scoped_refptr<MediaLog>& media_log)
: has_codec_delay_(decoder_config.codec_delay() > 0),
media_log_(media_log),
audio_base_ts_(kNoTimestamp),
reached_stable_state_(false),
num_unstable_audio_tries_(0),
limit_unstable_audio_tries_(kLimitTriesForStableTiming),
drift_warning_threshold_msec_(kGapWarningThresholdMsec) {
DCHECK(decoder_config.IsValidConfig());
}
AudioTimestampValidator::~AudioTimestampValidator() {}
void AudioTimestampValidator::CheckForTimestampGap(
const scoped_refptr<DecoderBuffer>& buffer) {
if (buffer->end_of_stream()) return;
DCHECK_NE(kNoTimestamp, buffer->timestamp());
// If audio_base_ts_ == kNoTimestamp, we are processing our first buffer.
// If stream has neither codec delay nor discard padding, we should expect
// timestamps and output durations to line up from the start (i.e. be stable).
if (audio_base_ts_ == kNoTimestamp && !has_codec_delay_ &&
buffer->discard_padding().first == base::TimeDelta() &&
buffer->discard_padding().second == base::TimeDelta()) {
DVLOG(3) << __func__ << " Expecting stable timestamps - stream has neither "
<< "codec delay nor discard padding.";
limit_unstable_audio_tries_ = 0;
}
// Don't continue checking timestamps if we've exhausted tries to reach stable
// state. This suggests the media's encoded timestamps are way off.
if (num_unstable_audio_tries_ > limit_unstable_audio_tries_) return;
// Keep resetting encode base ts until we start getting decode output. Some
// codecs/containers (e.g. chained Ogg) will take several encoded buffers
// before producing the first decoded output.
if (!audio_output_ts_helper_) {
audio_base_ts_ = buffer->timestamp();
DVLOG(3) << __func__
<< " setting audio_base:" << audio_base_ts_.InMicroseconds();
return;
}
base::TimeDelta expected_ts = audio_output_ts_helper_->GetTimestamp();
base::TimeDelta ts_delta = buffer->timestamp() - expected_ts;
// Reconciling encoded buffer timestamps with decoded output often requires
// adjusting expectations by some offset. This accounts for varied (and at
// this point unknown) handling of front trimming and codec delay. Codec delay
// and skip trimming may or may not be accounted for in the encoded timestamps
// depending on the codec (e.g. MP3 vs Opus) and demuxers used (e.g. FFmpeg
// vs MSE stream parsers).
if (!reached_stable_state_) {
if (std::abs(ts_delta.InMilliseconds()) < kStableTimeGapThrsholdMsec) {
reached_stable_state_ = true;
DVLOG(3) << __func__ << " stabilized! tries:" << num_unstable_audio_tries_
<< " offset:"
<< audio_output_ts_helper_->base_timestamp().InMicroseconds();
} else {
base::TimeDelta orig_offset = audio_output_ts_helper_->base_timestamp();
// Save since this gets reset when we set new base time.
int64_t decoded_frame_count = audio_output_ts_helper_->frame_count();
audio_output_ts_helper_->SetBaseTimestamp(orig_offset + ts_delta);
audio_output_ts_helper_->AddFrames(decoded_frame_count);
DVLOG(3) << __func__
<< " NOT stabilized. tries:" << num_unstable_audio_tries_
<< " offset was:" << orig_offset.InMicroseconds() << " now:"
<< audio_output_ts_helper_->base_timestamp().InMicroseconds();
num_unstable_audio_tries_++;
// Let developers know if their files timestamps are way off from
if (num_unstable_audio_tries_ > limit_unstable_audio_tries_) {
MEDIA_LOG(ERROR, media_log_)
<< "Failed to reconcile encoded audio times with decoded output.";
}
}
// Don't bother with further checking until we reach stable state.
return;
}
if (std::abs(ts_delta.InMilliseconds()) > drift_warning_threshold_msec_) {
MEDIA_LOG(ERROR, media_log_)
<< " Large timestamp gap detected; may cause AV sync to drift."
<< " time:" << buffer->timestamp().InMicroseconds() << "us"
<< " expected:" << expected_ts.InMicroseconds() << "us"
<< " delta:" << ts_delta.InMicroseconds() << "us";
// Increase threshold to avoid log spam but, let us know if gap widens.
drift_warning_threshold_msec_ = std::abs(ts_delta.InMilliseconds());
}
DVLOG(3) << __func__ << " delta:" << ts_delta.InMicroseconds()
<< " expected_ts:" << expected_ts.InMicroseconds()
<< " actual_ts:" << buffer->timestamp().InMicroseconds()
<< " audio_ts_offset:"
<< audio_output_ts_helper_->base_timestamp().InMicroseconds();
}
void AudioTimestampValidator::RecordOutputDuration(
const scoped_refptr<AudioBuffer>& audio_buffer) {
if (!audio_output_ts_helper_) {
DCHECK_NE(audio_base_ts_, kNoTimestamp);
// SUBTLE: deliberately creating this with output buffer sample rate because
// demuxer stream config is potentially stale for implicit AAC.
audio_output_ts_helper_.reset(
new AudioTimestampHelper(audio_buffer->sample_rate()));
audio_output_ts_helper_->SetBaseTimestamp(audio_base_ts_);
}
DVLOG(3) << __func__ << " " << audio_buffer->frame_count() << " frames";
audio_output_ts_helper_->AddFrames(audio_buffer->frame_count());
}
} // namespace media
} // namespace cobalt