| // Copyright 2016 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "cobalt/media/filters/audio_timestamp_validator.h" |
| |
| namespace cobalt { |
| namespace media { |
| |
| // Defines how many milliseconds of DecoderBuffer timestamp gap will be allowed |
| // before warning the user. See CheckForTimestampGap(). Value of 50 chosen, as |
| // this is low enough to catch issues early, but high enough to avoid noise for |
| // containers like WebM that default to low granularity timestamp precision. |
| const int kGapWarningThresholdMsec = 50; |
| |
| // Limits the number of adjustments to |audio_ts_offset_| in order to reach a |
| // stable state where gaps between encoded timestamps match decoded output |
| // intervals. See CheckForTimestampGap(). |
| const int kLimitTriesForStableTiming = 5; |
| |
| // Limits the milliseconds of difference between expected and actual timestamps |
| // gaps to consider timestamp expectations "stable". 1 chosen because some |
| // containers (WebM) default to millisecond timestamp precision. See |
| // CheckForTimestampGap(). |
| const int kStableTimeGapThrsholdMsec = 1; |
| |
| AudioTimestampValidator::AudioTimestampValidator( |
| const AudioDecoderConfig& decoder_config, |
| const scoped_refptr<MediaLog>& media_log) |
| : has_codec_delay_(decoder_config.codec_delay() > 0), |
| media_log_(media_log), |
| audio_base_ts_(kNoTimestamp), |
| reached_stable_state_(false), |
| num_unstable_audio_tries_(0), |
| limit_unstable_audio_tries_(kLimitTriesForStableTiming), |
| drift_warning_threshold_msec_(kGapWarningThresholdMsec) { |
| DCHECK(decoder_config.IsValidConfig()); |
| } |
| |
| AudioTimestampValidator::~AudioTimestampValidator() {} |
| |
| void AudioTimestampValidator::CheckForTimestampGap( |
| const scoped_refptr<DecoderBuffer>& buffer) { |
| if (buffer->end_of_stream()) return; |
| DCHECK_NE(kNoTimestamp, buffer->timestamp()); |
| |
| // If audio_base_ts_ == kNoTimestamp, we are processing our first buffer. |
| // If stream has neither codec delay nor discard padding, we should expect |
| // timestamps and output durations to line up from the start (i.e. be stable). |
| if (audio_base_ts_ == kNoTimestamp && !has_codec_delay_ && |
| buffer->discard_padding().first == base::TimeDelta() && |
| buffer->discard_padding().second == base::TimeDelta()) { |
| DVLOG(3) << __func__ << " Expecting stable timestamps - stream has neither " |
| << "codec delay nor discard padding."; |
| limit_unstable_audio_tries_ = 0; |
| } |
| |
| // Don't continue checking timestamps if we've exhausted tries to reach stable |
| // state. This suggests the media's encoded timestamps are way off. |
| if (num_unstable_audio_tries_ > limit_unstable_audio_tries_) return; |
| |
| // Keep resetting encode base ts until we start getting decode output. Some |
| // codecs/containers (e.g. chained Ogg) will take several encoded buffers |
| // before producing the first decoded output. |
| if (!audio_output_ts_helper_) { |
| audio_base_ts_ = buffer->timestamp(); |
| DVLOG(3) << __func__ |
| << " setting audio_base:" << audio_base_ts_.InMicroseconds(); |
| return; |
| } |
| |
| base::TimeDelta expected_ts = audio_output_ts_helper_->GetTimestamp(); |
| base::TimeDelta ts_delta = buffer->timestamp() - expected_ts; |
| |
| // Reconciling encoded buffer timestamps with decoded output often requires |
| // adjusting expectations by some offset. This accounts for varied (and at |
| // this point unknown) handling of front trimming and codec delay. Codec delay |
| // and skip trimming may or may not be accounted for in the encoded timestamps |
| // depending on the codec (e.g. MP3 vs Opus) and demuxers used (e.g. FFmpeg |
| // vs MSE stream parsers). |
| if (!reached_stable_state_) { |
| if (std::abs(ts_delta.InMilliseconds()) < kStableTimeGapThrsholdMsec) { |
| reached_stable_state_ = true; |
| DVLOG(3) << __func__ << " stabilized! tries:" << num_unstable_audio_tries_ |
| << " offset:" |
| << audio_output_ts_helper_->base_timestamp().InMicroseconds(); |
| } else { |
| base::TimeDelta orig_offset = audio_output_ts_helper_->base_timestamp(); |
| |
| // Save since this gets reset when we set new base time. |
| int64_t decoded_frame_count = audio_output_ts_helper_->frame_count(); |
| audio_output_ts_helper_->SetBaseTimestamp(orig_offset + ts_delta); |
| audio_output_ts_helper_->AddFrames(decoded_frame_count); |
| |
| DVLOG(3) << __func__ |
| << " NOT stabilized. tries:" << num_unstable_audio_tries_ |
| << " offset was:" << orig_offset.InMicroseconds() << " now:" |
| << audio_output_ts_helper_->base_timestamp().InMicroseconds(); |
| num_unstable_audio_tries_++; |
| |
| // Let developers know if their files timestamps are way off from |
| if (num_unstable_audio_tries_ > limit_unstable_audio_tries_) { |
| MEDIA_LOG(ERROR, media_log_) |
| << "Failed to reconcile encoded audio times with decoded output."; |
| } |
| } |
| |
| // Don't bother with further checking until we reach stable state. |
| return; |
| } |
| |
| if (std::abs(ts_delta.InMilliseconds()) > drift_warning_threshold_msec_) { |
| MEDIA_LOG(ERROR, media_log_) |
| << " Large timestamp gap detected; may cause AV sync to drift." |
| << " time:" << buffer->timestamp().InMicroseconds() << "us" |
| << " expected:" << expected_ts.InMicroseconds() << "us" |
| << " delta:" << ts_delta.InMicroseconds() << "us"; |
| // Increase threshold to avoid log spam but, let us know if gap widens. |
| drift_warning_threshold_msec_ = std::abs(ts_delta.InMilliseconds()); |
| } |
| DVLOG(3) << __func__ << " delta:" << ts_delta.InMicroseconds() |
| << " expected_ts:" << expected_ts.InMicroseconds() |
| << " actual_ts:" << buffer->timestamp().InMicroseconds() |
| << " audio_ts_offset:" |
| << audio_output_ts_helper_->base_timestamp().InMicroseconds(); |
| } |
| |
| void AudioTimestampValidator::RecordOutputDuration( |
| const scoped_refptr<AudioBuffer>& audio_buffer) { |
| if (!audio_output_ts_helper_) { |
| DCHECK_NE(audio_base_ts_, kNoTimestamp); |
| // SUBTLE: deliberately creating this with output buffer sample rate because |
| // demuxer stream config is potentially stale for implicit AAC. |
| audio_output_ts_helper_.reset( |
| new AudioTimestampHelper(audio_buffer->sample_rate())); |
| audio_output_ts_helper_->SetBaseTimestamp(audio_base_ts_); |
| } |
| |
| DVLOG(3) << __func__ << " " << audio_buffer->frame_count() << " frames"; |
| audio_output_ts_helper_->AddFrames(audio_buffer->frame_count()); |
| } |
| |
| } // namespace media |
| } // namespace cobalt |