src/cobalt/media/filters/audio_timestamp_validator.cc - cobalt - Git at Google

 // Copyright 2016 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "cobalt/media/filters/audio_timestamp_validator.h"

 namespace cobalt {
 namespace media {

 // Defines how many milliseconds of DecoderBuffer timestamp gap will be allowed
 // before warning the user. See CheckForTimestampGap(). Value of 50 chosen, as
 // this is low enough to catch issues early, but high enough to avoid noise for
 // containers like WebM that default to low granularity timestamp precision.
 const int kGapWarningThresholdMsec = 50;

 // Limits the number of adjustments to |audio_ts_offset_| in order to reach a
 // stable state where gaps between encoded timestamps match decoded output
 // intervals. See CheckForTimestampGap().
 const int kLimitTriesForStableTiming = 5;

 // Limits the milliseconds of difference between expected and actual timestamps
 // gaps to consider timestamp expectations "stable". 1 chosen because some
 // containers (WebM) default to millisecond timestamp precision. See
 // CheckForTimestampGap().
 const int kStableTimeGapThrsholdMsec = 1;

 AudioTimestampValidator::AudioTimestampValidator(
     const AudioDecoderConfig& decoder_config,
     const scoped_refptr<MediaLog>& media_log)
     : has_codec_delay_(decoder_config.codec_delay() > 0),
       media_log_(media_log),
       audio_base_ts_(kNoTimestamp),
       reached_stable_state_(false),
       num_unstable_audio_tries_(0),
       limit_unstable_audio_tries_(kLimitTriesForStableTiming),
       drift_warning_threshold_msec_(kGapWarningThresholdMsec) {
   DCHECK(decoder_config.IsValidConfig());
 }

 AudioTimestampValidator::~AudioTimestampValidator() {}

 void AudioTimestampValidator::CheckForTimestampGap(
     const scoped_refptr<DecoderBuffer>& buffer) {
   if (buffer->end_of_stream()) return;
   DCHECK_NE(kNoTimestamp, buffer->timestamp());

   // If audio_base_ts_ == kNoTimestamp, we are processing our first buffer.
   // If stream has neither codec delay nor discard padding, we should expect
   // timestamps and output durations to line up from the start (i.e. be stable).
   if (audio_base_ts_ == kNoTimestamp && !has_codec_delay_ &&
       buffer->discard_padding().first == base::TimeDelta() &&
       buffer->discard_padding().second == base::TimeDelta()) {
     DVLOG(3) << __func__ << " Expecting stable timestamps - stream has neither "
              << "codec delay nor discard padding.";
     limit_unstable_audio_tries_ = 0;
   }

   // Don't continue checking timestamps if we've exhausted tries to reach stable
   // state. This suggests the media's encoded timestamps are way off.
   if (num_unstable_audio_tries_ > limit_unstable_audio_tries_) return;

   // Keep resetting encode base ts until we start getting decode output. Some
   // codecs/containers (e.g. chained Ogg) will take several encoded buffers
   // before producing the first decoded output.
   if (!audio_output_ts_helper_) {
     audio_base_ts_ = buffer->timestamp();
     DVLOG(3) << __func__
              << " setting audio_base:" << audio_base_ts_.InMicroseconds();
     return;
   }

   base::TimeDelta expected_ts = audio_output_ts_helper_->GetTimestamp();
   base::TimeDelta ts_delta = buffer->timestamp() - expected_ts;

   // Reconciling encoded buffer timestamps with decoded output often requires
   // adjusting expectations by some offset. This accounts for varied (and at
   // this point unknown) handling of front trimming and codec delay. Codec delay
   // and skip trimming may or may not be accounted for in the encoded timestamps
   // depending on the codec (e.g. MP3 vs Opus) and  demuxers used (e.g. FFmpeg
   // vs MSE stream parsers).
   if (!reached_stable_state_) {
     if (std::abs(ts_delta.InMilliseconds()) < kStableTimeGapThrsholdMsec) {
       reached_stable_state_ = true;
       DVLOG(3) << __func__ << " stabilized! tries:" << num_unstable_audio_tries_
                << " offset:"
                << audio_output_ts_helper_->base_timestamp().InMicroseconds();
     } else {
       base::TimeDelta orig_offset = audio_output_ts_helper_->base_timestamp();

       // Save since this gets reset when we set new base time.
       int64_t decoded_frame_count = audio_output_ts_helper_->frame_count();
       audio_output_ts_helper_->SetBaseTimestamp(orig_offset + ts_delta);
       audio_output_ts_helper_->AddFrames(decoded_frame_count);

       DVLOG(3) << __func__
                << " NOT stabilized. tries:" << num_unstable_audio_tries_
                << " offset was:" << orig_offset.InMicroseconds() << " now:"
                << audio_output_ts_helper_->base_timestamp().InMicroseconds();
       num_unstable_audio_tries_++;

       // Let developers know if their files timestamps are way off from
       if (num_unstable_audio_tries_ > limit_unstable_audio_tries_) {
         MEDIA_LOG(ERROR, media_log_)
             << "Failed to reconcile encoded audio times with decoded output.";
       }
     }

     // Don't bother with further checking until we reach stable state.
     return;
   }

   if (std::abs(ts_delta.InMilliseconds()) > drift_warning_threshold_msec_) {
     MEDIA_LOG(ERROR, media_log_)
         << " Large timestamp gap detected; may cause AV sync to drift."
         << " time:" << buffer->timestamp().InMicroseconds() << "us"
         << " expected:" << expected_ts.InMicroseconds() << "us"
         << " delta:" << ts_delta.InMicroseconds() << "us";
     // Increase threshold to avoid log spam but, let us know if gap widens.
     drift_warning_threshold_msec_ = std::abs(ts_delta.InMilliseconds());
   }
   DVLOG(3) << __func__ << " delta:" << ts_delta.InMicroseconds()
            << " expected_ts:" << expected_ts.InMicroseconds()
            << " actual_ts:" << buffer->timestamp().InMicroseconds()
            << " audio_ts_offset:"
            << audio_output_ts_helper_->base_timestamp().InMicroseconds();
 }

 void AudioTimestampValidator::RecordOutputDuration(
     const scoped_refptr<AudioBuffer>& audio_buffer) {
   if (!audio_output_ts_helper_) {
     DCHECK_NE(audio_base_ts_, kNoTimestamp);
     // SUBTLE: deliberately creating this with output buffer sample rate because
     // demuxer stream config is potentially stale for implicit AAC.
     audio_output_ts_helper_.reset(
         new AudioTimestampHelper(audio_buffer->sample_rate()));
     audio_output_ts_helper_->SetBaseTimestamp(audio_base_ts_);
   }

   DVLOG(3) << __func__ << " " << audio_buffer->frame_count() << " frames";
   audio_output_ts_helper_->AddFrames(audio_buffer->frame_count());
 }

 }  // namespace media
 }  // namespace cobalt
	// Copyright 2016 The Chromium Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	#include "cobalt/media/filters/audio_timestamp_validator.h"

	namespace cobalt {
	namespace media {

	// Defines how many milliseconds of DecoderBuffer timestamp gap will be allowed
	// before warning the user. See CheckForTimestampGap(). Value of 50 chosen, as
	// this is low enough to catch issues early, but high enough to avoid noise for
	// containers like WebM that default to low granularity timestamp precision.
	const int kGapWarningThresholdMsec = 50;

	// Limits the number of adjustments to \|audio_ts_offset_\| in order to reach a
	// stable state where gaps between encoded timestamps match decoded output
	// intervals. See CheckForTimestampGap().
	const int kLimitTriesForStableTiming = 5;

	// Limits the milliseconds of difference between expected and actual timestamps
	// gaps to consider timestamp expectations "stable". 1 chosen because some
	// containers (WebM) default to millisecond timestamp precision. See
	// CheckForTimestampGap().
	const int kStableTimeGapThrsholdMsec = 1;

	AudioTimestampValidator::AudioTimestampValidator(
	const AudioDecoderConfig& decoder_config,
	const scoped_refptr<MediaLog>& media_log)
	: has_codec_delay_(decoder_config.codec_delay() > 0),
	media_log_(media_log),
	audio_base_ts_(kNoTimestamp),
	reached_stable_state_(false),
	num_unstable_audio_tries_(0),
	limit_unstable_audio_tries_(kLimitTriesForStableTiming),
	drift_warning_threshold_msec_(kGapWarningThresholdMsec) {
	DCHECK(decoder_config.IsValidConfig());
	}

	AudioTimestampValidator::~AudioTimestampValidator() {}

	void AudioTimestampValidator::CheckForTimestampGap(
	const scoped_refptr<DecoderBuffer>& buffer) {
	if (buffer->end_of_stream()) return;
	DCHECK_NE(kNoTimestamp, buffer->timestamp());

	// If audio_base_ts_ == kNoTimestamp, we are processing our first buffer.
	// If stream has neither codec delay nor discard padding, we should expect
	// timestamps and output durations to line up from the start (i.e. be stable).
	if (audio_base_ts_ == kNoTimestamp && !has_codec_delay_ &&
	buffer->discard_padding().first == base::TimeDelta() &&
	buffer->discard_padding().second == base::TimeDelta()) {
	DVLOG(3) << __func__ << " Expecting stable timestamps - stream has neither "
	<< "codec delay nor discard padding.";
	limit_unstable_audio_tries_ = 0;
	}

	// Don't continue checking timestamps if we've exhausted tries to reach stable
	// state. This suggests the media's encoded timestamps are way off.
	if (num_unstable_audio_tries_ > limit_unstable_audio_tries_) return;

	// Keep resetting encode base ts until we start getting decode output. Some
	// codecs/containers (e.g. chained Ogg) will take several encoded buffers
	// before producing the first decoded output.
	if (!audio_output_ts_helper_) {
	audio_base_ts_ = buffer->timestamp();
	DVLOG(3) << __func__
	<< " setting audio_base:" << audio_base_ts_.InMicroseconds();
	return;
	}

	base::TimeDelta expected_ts = audio_output_ts_helper_->GetTimestamp();
	base::TimeDelta ts_delta = buffer->timestamp() - expected_ts;

	// Reconciling encoded buffer timestamps with decoded output often requires
	// adjusting expectations by some offset. This accounts for varied (and at
	// this point unknown) handling of front trimming and codec delay. Codec delay
	// and skip trimming may or may not be accounted for in the encoded timestamps
	// depending on the codec (e.g. MP3 vs Opus) and demuxers used (e.g. FFmpeg
	// vs MSE stream parsers).
	if (!reached_stable_state_) {
	if (std::abs(ts_delta.InMilliseconds()) < kStableTimeGapThrsholdMsec) {
	reached_stable_state_ = true;
	DVLOG(3) << __func__ << " stabilized! tries:" << num_unstable_audio_tries_
	<< " offset:"
	<< audio_output_ts_helper_->base_timestamp().InMicroseconds();
	} else {
	base::TimeDelta orig_offset = audio_output_ts_helper_->base_timestamp();

	// Save since this gets reset when we set new base time.
	int64_t decoded_frame_count = audio_output_ts_helper_->frame_count();
	audio_output_ts_helper_->SetBaseTimestamp(orig_offset + ts_delta);
	audio_output_ts_helper_->AddFrames(decoded_frame_count);

	DVLOG(3) << __func__
	<< " NOT stabilized. tries:" << num_unstable_audio_tries_
	<< " offset was:" << orig_offset.InMicroseconds() << " now:"
	<< audio_output_ts_helper_->base_timestamp().InMicroseconds();
	num_unstable_audio_tries_++;

	// Let developers know if their files timestamps are way off from
	if (num_unstable_audio_tries_ > limit_unstable_audio_tries_) {
	MEDIA_LOG(ERROR, media_log_)
	<< "Failed to reconcile encoded audio times with decoded output.";
	}
	}

	// Don't bother with further checking until we reach stable state.
	return;
	}

	if (std::abs(ts_delta.InMilliseconds()) > drift_warning_threshold_msec_) {
	MEDIA_LOG(ERROR, media_log_)
	<< " Large timestamp gap detected; may cause AV sync to drift."
	<< " time:" << buffer->timestamp().InMicroseconds() << "us"
	<< " expected:" << expected_ts.InMicroseconds() << "us"
	<< " delta:" << ts_delta.InMicroseconds() << "us";
	// Increase threshold to avoid log spam but, let us know if gap widens.
	drift_warning_threshold_msec_ = std::abs(ts_delta.InMilliseconds());
	}
	DVLOG(3) << __func__ << " delta:" << ts_delta.InMicroseconds()
	<< " expected_ts:" << expected_ts.InMicroseconds()
	<< " actual_ts:" << buffer->timestamp().InMicroseconds()
	<< " audio_ts_offset:"
	<< audio_output_ts_helper_->base_timestamp().InMicroseconds();
	}

	void AudioTimestampValidator::RecordOutputDuration(
	const scoped_refptr<AudioBuffer>& audio_buffer) {
	if (!audio_output_ts_helper_) {
	DCHECK_NE(audio_base_ts_, kNoTimestamp);
	// SUBTLE: deliberately creating this with output buffer sample rate because
	// demuxer stream config is potentially stale for implicit AAC.
	audio_output_ts_helper_.reset(
	new AudioTimestampHelper(audio_buffer->sample_rate()));
	audio_output_ts_helper_->SetBaseTimestamp(audio_base_ts_);
	}

	DVLOG(3) << __func__ << " " << audio_buffer->frame_count() << " frames";
	audio_output_ts_helper_->AddFrames(audio_buffer->frame_count());
	}

	} // namespace media
	} // namespace cobalt