blob: 741177126644a81e2f033899ad6ef1f2478c6693 [file] [log] [blame]
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "media/renderers/audio_renderer_impl.h"
#include <math.h>
#include <stddef.h>
#include <algorithm>
#include <memory>
#include <utility>
#include "base/bind.h"
#include "base/callback.h"
#include "base/callback_helpers.h"
#include "base/command_line.h"
#include "base/logging.h"
#include "base/metrics/histogram_macros.h"
#include "base/power_monitor/power_monitor.h"
#include "base/single_thread_task_runner.h"
#include "base/time/default_tick_clock.h"
#include "base/time/time.h"
#include "base/trace_event/trace_event.h"
#include "build/build_config.h"
#include "media/audio/null_audio_sink.h"
#include "media/base/audio_buffer.h"
#include "media/base/audio_buffer_converter.h"
#include "media/base/audio_latency.h"
#include "media/base/audio_parameters.h"
#include "media/base/bind_to_current_loop.h"
#include "media/base/channel_mixing_matrix.h"
#include "media/base/demuxer_stream.h"
#include "media/base/media_client.h"
#include "media/base/media_log.h"
#include "media/base/media_switches.h"
#include "media/base/renderer_client.h"
#include "media/base/timestamp_constants.h"
#include "media/filters/audio_clock.h"
#include "media/filters/decrypting_demuxer_stream.h"
namespace media {
AudioRendererImpl::AudioRendererImpl(
const scoped_refptr<base::SingleThreadTaskRunner>& task_runner,
AudioRendererSink* sink,
const CreateAudioDecodersCB& create_audio_decoders_cb,
MediaLog* media_log,
SpeechRecognitionClient* speech_recognition_client)
: task_runner_(task_runner),
expecting_config_changes_(false),
sink_(sink),
media_log_(media_log),
client_(nullptr),
tick_clock_(base::DefaultTickClock::GetInstance()),
last_audio_memory_usage_(0),
last_decoded_sample_rate_(0),
last_decoded_channel_layout_(CHANNEL_LAYOUT_NONE),
is_encrypted_(false),
last_decoded_channels_(0),
volume_(1.0f), // Default unmuted.
playback_rate_(0.0),
state_(kUninitialized),
create_audio_decoders_cb_(create_audio_decoders_cb),
buffering_state_(BUFFERING_HAVE_NOTHING),
rendering_(false),
sink_playing_(false),
pending_read_(false),
received_end_of_stream_(false),
rendered_end_of_stream_(false),
is_suspending_(false),
#if defined(OS_ANDROID)
is_passthrough_(false) {
#else
is_passthrough_(false),
speech_recognition_client_(speech_recognition_client) {
#endif
DCHECK(create_audio_decoders_cb_);
// PowerObserver's must be added and removed from the same thread, but we
// won't remove the observer until we're destructed on |task_runner_| so we
// must post it here if we're on the wrong thread.
if (task_runner_->BelongsToCurrentThread()) {
base::PowerMonitor::AddPowerSuspendObserver(this);
} else {
// Safe to post this without a WeakPtr because this class must be destructed
// on the same thread and construction has not completed yet.
task_runner_->PostTask(
FROM_HERE,
base::BindOnce(
IgnoreResult(&base::PowerMonitor::AddPowerSuspendObserver), this));
}
// Do not add anything below this line since the above actions are only safe
// as the last lines of the constructor.
}
AudioRendererImpl::~AudioRendererImpl() {
DVLOG(1) << __func__;
DCHECK(task_runner_->BelongsToCurrentThread());
base::PowerMonitor::RemovePowerSuspendObserver(this);
// If Render() is in progress, this call will wait for Render() to finish.
// After this call, the |sink_| will not call back into |this| anymore.
sink_->Stop();
if (null_sink_)
null_sink_->Stop();
if (init_cb_)
FinishInitialization(PIPELINE_ERROR_ABORT);
}
void AudioRendererImpl::StartTicking() {
DVLOG(1) << __func__;
DCHECK(task_runner_->BelongsToCurrentThread());
base::AutoLock auto_lock(lock_);
DCHECK(!rendering_);
rendering_ = true;
// Wait for an eventual call to SetPlaybackRate() to start rendering.
if (playback_rate_ == 0) {
DCHECK(!sink_playing_);
return;
}
StartRendering_Locked();
}
void AudioRendererImpl::StartRendering_Locked() {
DVLOG(1) << __func__;
DCHECK(task_runner_->BelongsToCurrentThread());
DCHECK_EQ(state_, kPlaying);
DCHECK(!sink_playing_);
DCHECK_NE(playback_rate_, 0.0);
lock_.AssertAcquired();
sink_playing_ = true;
was_unmuted_ = was_unmuted_ || volume_ != 0;
base::AutoUnlock auto_unlock(lock_);
if (volume_ || !null_sink_)
sink_->Play();
else
null_sink_->Play();
}
void AudioRendererImpl::StopTicking() {
DVLOG(1) << __func__;
DCHECK(task_runner_->BelongsToCurrentThread());
base::AutoLock auto_lock(lock_);
DCHECK(rendering_);
rendering_ = false;
// Rendering should have already been stopped with a zero playback rate.
if (playback_rate_ == 0) {
DCHECK(!sink_playing_);
return;
}
StopRendering_Locked();
}
void AudioRendererImpl::StopRendering_Locked() {
DCHECK(task_runner_->BelongsToCurrentThread());
DCHECK_EQ(state_, kPlaying);
DCHECK(sink_playing_);
lock_.AssertAcquired();
sink_playing_ = false;
base::AutoUnlock auto_unlock(lock_);
if (volume_ || !null_sink_)
sink_->Pause();
else
null_sink_->Pause();
stop_rendering_time_ = last_render_time_;
}
void AudioRendererImpl::SetMediaTime(base::TimeDelta time) {
DVLOG(1) << __func__ << "(" << time << ")";
DCHECK(task_runner_->BelongsToCurrentThread());
base::AutoLock auto_lock(lock_);
DCHECK(!rendering_);
DCHECK_EQ(state_, kFlushed);
start_timestamp_ = time;
ended_timestamp_ = kInfiniteDuration;
last_render_time_ = stop_rendering_time_ = base::TimeTicks();
first_packet_timestamp_ = kNoTimestamp;
audio_clock_ =
std::make_unique<AudioClock>(time, audio_parameters_.sample_rate());
}
base::TimeDelta AudioRendererImpl::CurrentMediaTime() {
base::AutoLock auto_lock(lock_);
// Return the current time based on the known extents of the rendered audio
// data plus an estimate based on the last time those values were calculated.
base::TimeDelta current_media_time = audio_clock_->front_timestamp();
if (!last_render_time_.is_null()) {
current_media_time +=
(tick_clock_->NowTicks() - last_render_time_) * playback_rate_;
if (current_media_time > audio_clock_->back_timestamp())
current_media_time = audio_clock_->back_timestamp();
}
return current_media_time;
}
bool AudioRendererImpl::GetWallClockTimes(
const std::vector<base::TimeDelta>& media_timestamps,
std::vector<base::TimeTicks>* wall_clock_times) {
base::AutoLock auto_lock(lock_);
DCHECK(wall_clock_times->empty());
// When playback is paused (rate is zero), assume a rate of 1.0.
const double playback_rate = playback_rate_ ? playback_rate_ : 1.0;
const bool is_time_moving = sink_playing_ && playback_rate_ &&
!last_render_time_.is_null() &&
stop_rendering_time_.is_null() && !is_suspending_;
// Pre-compute the time until playback of the audio buffer extents, since
// these values are frequently used below.
const base::TimeDelta time_until_front =
audio_clock_->TimeUntilPlayback(audio_clock_->front_timestamp());
const base::TimeDelta time_until_back =
audio_clock_->TimeUntilPlayback(audio_clock_->back_timestamp());
if (media_timestamps.empty()) {
// Return the current media time as a wall clock time while accounting for
// frames which may be in the process of play out.
wall_clock_times->push_back(std::min(
std::max(tick_clock_->NowTicks(), last_render_time_ + time_until_front),
last_render_time_ + time_until_back));
return is_time_moving;
}
wall_clock_times->reserve(media_timestamps.size());
for (const auto& media_timestamp : media_timestamps) {
// When time was or is moving and the requested media timestamp is within
// range of played out audio, we can provide an exact conversion.
if (!last_render_time_.is_null() &&
media_timestamp >= audio_clock_->front_timestamp() &&
media_timestamp <= audio_clock_->back_timestamp()) {
wall_clock_times->push_back(
last_render_time_ + audio_clock_->TimeUntilPlayback(media_timestamp));
continue;
}
base::TimeDelta base_timestamp, time_until_playback;
if (media_timestamp < audio_clock_->front_timestamp()) {
base_timestamp = audio_clock_->front_timestamp();
time_until_playback = time_until_front;
} else {
base_timestamp = audio_clock_->back_timestamp();
time_until_playback = time_until_back;
}
// In practice, most calls will be estimates given the relatively small
// window in which clients can get the actual time.
wall_clock_times->push_back(last_render_time_ + time_until_playback +
(media_timestamp - base_timestamp) /
playback_rate);
}
return is_time_moving;
}
TimeSource* AudioRendererImpl::GetTimeSource() {
return this;
}
void AudioRendererImpl::Flush(base::OnceClosure callback) {
DVLOG(1) << __func__;
DCHECK(task_runner_->BelongsToCurrentThread());
TRACE_EVENT_NESTABLE_ASYNC_BEGIN0("media", "AudioRendererImpl::Flush",
TRACE_ID_LOCAL(this));
// Flush |sink_| now. |sink_| must only be accessed on |task_runner_| and not
// be called under |lock_|.
DCHECK(!sink_playing_);
if (volume_ || !null_sink_)
sink_->Flush();
else
null_sink_->Flush();
base::AutoLock auto_lock(lock_);
DCHECK_EQ(state_, kPlaying);
DCHECK(!flush_cb_);
flush_cb_ = std::move(callback);
ChangeState_Locked(kFlushing);
if (pending_read_)
return;
ChangeState_Locked(kFlushed);
DoFlush_Locked();
}
void AudioRendererImpl::DoFlush_Locked() {
DCHECK(task_runner_->BelongsToCurrentThread());
lock_.AssertAcquired();
DCHECK(!pending_read_);
DCHECK_EQ(state_, kFlushed);
ended_timestamp_ = kInfiniteDuration;
audio_decoder_stream_->Reset(base::BindOnce(
&AudioRendererImpl::ResetDecoderDone, weak_factory_.GetWeakPtr()));
}
void AudioRendererImpl::ResetDecoderDone() {
DCHECK(task_runner_->BelongsToCurrentThread());
{
base::AutoLock auto_lock(lock_);
DCHECK_EQ(state_, kFlushed);
DCHECK(flush_cb_);
received_end_of_stream_ = false;
rendered_end_of_stream_ = false;
// Flush() may have been called while underflowed/not fully buffered.
if (buffering_state_ != BUFFERING_HAVE_NOTHING)
SetBufferingState_Locked(BUFFERING_HAVE_NOTHING);
if (buffer_converter_)
buffer_converter_->Reset();
algorithm_->FlushBuffers();
}
// Changes in buffering state are always posted. Flush callback must only be
// run after buffering state has been set back to nothing.
flush_cb_ = BindToCurrentLoop(std::move(flush_cb_));
FinishFlush();
}
void AudioRendererImpl::StartPlaying() {
DVLOG(1) << __func__;
DCHECK(task_runner_->BelongsToCurrentThread());
base::AutoLock auto_lock(lock_);
DCHECK(!sink_playing_);
DCHECK_EQ(state_, kFlushed);
DCHECK_EQ(buffering_state_, BUFFERING_HAVE_NOTHING);
DCHECK(!pending_read_) << "Pending read must complete before seeking";
ChangeState_Locked(kPlaying);
AttemptRead_Locked();
}
void AudioRendererImpl::Initialize(DemuxerStream* stream,
CdmContext* cdm_context,
RendererClient* client,
PipelineStatusCallback init_cb) {
DVLOG(1) << __func__;
DCHECK(task_runner_->BelongsToCurrentThread());
DCHECK(client);
DCHECK(stream);
DCHECK_EQ(stream->type(), DemuxerStream::AUDIO);
DCHECK(init_cb);
DCHECK(state_ == kUninitialized || state_ == kFlushed);
DCHECK(sink_);
TRACE_EVENT_NESTABLE_ASYNC_BEGIN0("media", "AudioRendererImpl::Initialize",
TRACE_ID_LOCAL(this));
// If we are re-initializing playback (e.g. switching media tracks), stop the
// sink first.
if (state_ == kFlushed) {
sink_->Stop();
if (null_sink_)
null_sink_->Stop();
}
state_ = kInitializing;
demuxer_stream_ = stream;
client_ = client;
// Always post |init_cb_| because |this| could be destroyed if initialization
// failed.
init_cb_ = BindToCurrentLoop(std::move(init_cb));
// Retrieve hardware device parameters asynchronously so we don't block the
// media thread on synchronous IPC.
sink_->GetOutputDeviceInfoAsync(
base::BindOnce(&AudioRendererImpl::OnDeviceInfoReceived,
weak_factory_.GetWeakPtr(), demuxer_stream_, cdm_context));
#if !defined(OS_ANDROID)
if (speech_recognition_client_) {
speech_recognition_client_->SetOnReadyCallback(BindToCurrentLoop(
base::BindOnce(&AudioRendererImpl::EnableSpeechRecognition,
weak_factory_.GetWeakPtr())));
}
#endif
}
void AudioRendererImpl::OnDeviceInfoReceived(
DemuxerStream* stream,
CdmContext* cdm_context,
OutputDeviceInfo output_device_info) {
DVLOG(1) << __func__;
DCHECK(task_runner_->BelongsToCurrentThread());
DCHECK(client_);
DCHECK(stream);
DCHECK_EQ(stream->type(), DemuxerStream::AUDIO);
DCHECK(init_cb_);
DCHECK_EQ(state_, kInitializing);
// Fall-back to a fake audio sink if the audio device can't be setup; this
// allows video playback in cases where there is no audio hardware.
//
// TODO(dalecurtis): We could disable the audio track here too.
UMA_HISTOGRAM_ENUMERATION("Media.AudioRendererImpl.SinkStatus",
output_device_info.device_status(),
OUTPUT_DEVICE_STATUS_MAX + 1);
if (output_device_info.device_status() != OUTPUT_DEVICE_STATUS_OK) {
MEDIA_LOG(ERROR, media_log_)
<< "Output device error, falling back to null sink. device_status="
<< output_device_info.device_status();
sink_ = new NullAudioSink(task_runner_);
output_device_info = sink_->GetOutputDeviceInfo();
} else if (base::FeatureList::IsEnabled(kSuspendMutedAudio)) {
// If playback is muted, we use a fake sink for output until it unmutes.
null_sink_ = new NullAudioSink(task_runner_);
}
current_decoder_config_ = stream->audio_decoder_config();
DCHECK(current_decoder_config_.IsValidConfig());
const AudioParameters& hw_params = output_device_info.output_params();
ChannelLayout hw_channel_layout =
hw_params.IsValid() ? hw_params.channel_layout() : CHANNEL_LAYOUT_NONE;
audio_decoder_stream_ = std::make_unique<AudioDecoderStream>(
std::make_unique<AudioDecoderStream::StreamTraits>(media_log_,
hw_channel_layout),
task_runner_, create_audio_decoders_cb_, media_log_);
audio_decoder_stream_->set_config_change_observer(base::BindRepeating(
&AudioRendererImpl::OnConfigChange, weak_factory_.GetWeakPtr()));
AudioCodec codec = stream->audio_decoder_config().codec();
if (auto* mc = GetMediaClient())
is_passthrough_ = mc->IsSupportedBitstreamAudioCodec(codec);
else
is_passthrough_ = false;
expecting_config_changes_ = stream->SupportsConfigChanges();
bool use_stream_params = !expecting_config_changes_ || !hw_params.IsValid() ||
hw_params.format() == AudioParameters::AUDIO_FAKE ||
!sink_->IsOptimizedForHardwareParameters();
if (stream->audio_decoder_config().channel_layout() ==
CHANNEL_LAYOUT_DISCRETE &&
sink_->IsOptimizedForHardwareParameters()) {
use_stream_params = false;
}
// Target ~20ms for our buffer size (which is optimal for power efficiency and
// responsiveness to play/pause events), but if the hardware needs something
// even larger (say for Bluetooth devices) prefer that.
//
// Even if |use_stream_params| is true we should choose a value here based on
// hardware parameters since it affects the initial buffer size used by
// AudioRendererAlgorithm. Too small and we will underflow if the hardware
// asks for a buffer larger than the initial algorithm capacity.
const int preferred_buffer_size =
std::max(2 * stream->audio_decoder_config().samples_per_second() / 100,
hw_params.IsValid() ? hw_params.frames_per_buffer() : 0);
if (is_passthrough_) {
AudioParameters::Format format = AudioParameters::AUDIO_FAKE;
if (codec == AudioCodec::kAC3) {
format = AudioParameters::AUDIO_BITSTREAM_AC3;
} else if (codec == AudioCodec::kEAC3) {
format = AudioParameters::AUDIO_BITSTREAM_EAC3;
} else {
NOTREACHED();
}
// If we want the precise PCM frame count here, we have to somehow peek the
// audio bitstream and parse the header ahead of time. Instead, we ensure
// audio bus being large enough to accommodate
// kMaxFramesPerCompressedAudioBuffer frames. The real data size and frame
// count for bitstream formats will be carried in additional fields of
// AudioBus.
const int buffer_size =
AudioParameters::kMaxFramesPerCompressedAudioBuffer *
stream->audio_decoder_config().bytes_per_frame();
audio_parameters_.Reset(
format, stream->audio_decoder_config().channel_layout(),
stream->audio_decoder_config().samples_per_second(), buffer_size);
buffer_converter_.reset();
} else if (use_stream_params) {
audio_parameters_.Reset(AudioParameters::AUDIO_PCM_LOW_LATENCY,
stream->audio_decoder_config().channel_layout(),
stream->audio_decoder_config().samples_per_second(),
preferred_buffer_size);
audio_parameters_.set_channels_for_discrete(
stream->audio_decoder_config().channels());
buffer_converter_.reset();
} else {
// To allow for seamless sample rate adaptations (i.e. changes from say
// 16kHz to 48kHz), always resample to the hardware rate.
int sample_rate = hw_params.sample_rate();
// If supported by the OS and the initial sample rate is not too low, let
// the OS level resampler handle resampling for power efficiency.
if (AudioLatency::IsResamplingPassthroughSupported(
AudioLatency::LATENCY_PLAYBACK) &&
stream->audio_decoder_config().samples_per_second() >= 44100) {
sample_rate = stream->audio_decoder_config().samples_per_second();
}
int stream_channel_count = stream->audio_decoder_config().channels();
bool try_supported_channel_layouts = false;
#if defined(OS_WIN)
try_supported_channel_layouts =
base::CommandLine::ForCurrentProcess()->HasSwitch(
switches::kTrySupportedChannelLayouts);
#endif
// We don't know how to up-mix for DISCRETE layouts (fancy multichannel
// hardware with non-standard speaker arrangement). Instead, pretend the
// hardware layout is stereo and let the OS take care of further up-mixing
// to the discrete layout (http://crbug.com/266674). Additionally, pretend
// hardware is stereo whenever kTrySupportedChannelLayouts is set. This flag
// is for savvy users who want stereo content to output in all surround
// speakers. Using the actual layout (likely 5.1 or higher) will mean our
// mixer will attempt to up-mix stereo source streams to just the left/right
// speaker of the 5.1 setup, nulling out the other channels
// (http://crbug.com/177872).
hw_channel_layout = hw_params.channel_layout() == CHANNEL_LAYOUT_DISCRETE ||
try_supported_channel_layouts
? CHANNEL_LAYOUT_STEREO
: hw_params.channel_layout();
int hw_channel_count = ChannelLayoutToChannelCount(hw_channel_layout);
// The layout we pass to |audio_parameters_| will be used for the lifetime
// of this audio renderer, regardless of changes to hardware and/or stream
// properties. Below we choose the max of stream layout vs. hardware layout
// to leave room for changes to the hardware and/or stream (i.e. avoid
// premature down-mixing - http://crbug.com/379288).
// If stream_channels < hw_channels:
// Taking max means we up-mix to hardware layout. If stream later changes
// to have more channels, we aren't locked into down-mixing to the
// initial stream layout.
// If stream_channels > hw_channels:
// We choose to output stream's layout, meaning mixing is a no-op for the
// renderer. Browser-side will down-mix to the hardware config. If the
// hardware later changes to equal stream channels, browser-side will stop
// down-mixing and use the data from all stream channels.
ChannelLayout renderer_channel_layout =
hw_channel_count > stream_channel_count
? hw_channel_layout
: stream->audio_decoder_config().channel_layout();
audio_parameters_.Reset(hw_params.format(), renderer_channel_layout,
sample_rate,
AudioLatency::GetHighLatencyBufferSize(
sample_rate, preferred_buffer_size));
}
audio_parameters_.set_effects(audio_parameters_.effects() |
AudioParameters::MULTIZONE);
audio_parameters_.set_latency_tag(AudioLatency::LATENCY_PLAYBACK);
if (!client_->IsVideoStreamAvailable()) {
// When video is not available, audio prefetch can be enabled. See
// crbug/988535.
audio_parameters_.set_effects(audio_parameters_.effects() |
AudioParameters::AUDIO_PREFETCH);
}
last_decoded_channel_layout_ =
stream->audio_decoder_config().channel_layout();
is_encrypted_ = stream->audio_decoder_config().is_encrypted();
last_decoded_channels_ = stream->audio_decoder_config().channels();
{
// Set the |audio_clock_| under lock in case this is a reinitialize and some
// external caller to GetWallClockTimes() exists.
base::AutoLock lock(lock_);
audio_clock_ = std::make_unique<AudioClock>(
base::TimeDelta(), audio_parameters_.sample_rate());
}
audio_decoder_stream_->Initialize(
stream,
base::BindOnce(&AudioRendererImpl::OnAudioDecoderStreamInitialized,
weak_factory_.GetWeakPtr()),
cdm_context,
base::BindRepeating(&AudioRendererImpl::OnStatisticsUpdate,
weak_factory_.GetWeakPtr()),
base::BindRepeating(&AudioRendererImpl::OnWaiting,
weak_factory_.GetWeakPtr()));
}
void AudioRendererImpl::OnAudioDecoderStreamInitialized(bool success) {
DVLOG(1) << __func__ << ": " << success;
DCHECK(task_runner_->BelongsToCurrentThread());
base::AutoLock auto_lock(lock_);
if (!success) {
state_ = kUninitialized;
FinishInitialization(DECODER_ERROR_NOT_SUPPORTED);
return;
}
if (!audio_parameters_.IsValid()) {
DVLOG(1) << __func__ << ": Invalid audio parameters: "
<< audio_parameters_.AsHumanReadableString();
ChangeState_Locked(kUninitialized);
// TODO(flim): If the channel layout is discrete but channel count is 0, a
// possible cause is that the input stream has > 8 channels but there is no
// Web Audio renderer attached and no channel mixing matrices defined for
// hardware renderers. Adding one for previewing content could be useful.
FinishInitialization(PIPELINE_ERROR_INITIALIZATION_FAILED);
return;
}
if (expecting_config_changes_) {
buffer_converter_ =
std::make_unique<AudioBufferConverter>(audio_parameters_);
}
// We're all good! Continue initializing the rest of the audio renderer
// based on the decoder format.
auto* media_client = GetMediaClient();
auto params =
(media_client ? media_client->GetAudioRendererAlgorithmParameters(
audio_parameters_)
: absl::nullopt);
if (params && !client_->IsVideoStreamAvailable()) {
algorithm_ =
std::make_unique<AudioRendererAlgorithm>(media_log_, params.value());
} else {
algorithm_ = std::make_unique<AudioRendererAlgorithm>(media_log_);
}
algorithm_->Initialize(audio_parameters_, is_encrypted_);
if (latency_hint_)
algorithm_->SetLatencyHint(latency_hint_);
algorithm_->SetPreservesPitch(preserves_pitch_);
ConfigureChannelMask();
ChangeState_Locked(kFlushed);
{
base::AutoUnlock auto_unlock(lock_);
sink_->Initialize(audio_parameters_, this);
if (null_sink_) {
null_sink_->Initialize(audio_parameters_, this);
null_sink_->Start(); // Does nothing but reduce state bookkeeping.
real_sink_needs_start_ = true;
} else {
// Even when kSuspendMutedAudio is enabled, we can hit this path if we are
// exclusively using NullAudioSink due to OnDeviceInfoReceived() failure.
sink_->Start();
sink_->Pause(); // Sinks play on start.
}
SetVolume(volume_);
}
DCHECK(!sink_playing_);
FinishInitialization(PIPELINE_OK);
}
void AudioRendererImpl::FinishInitialization(PipelineStatus status) {
DCHECK(init_cb_);
TRACE_EVENT_NESTABLE_ASYNC_END1("media", "AudioRendererImpl::Initialize",
TRACE_ID_LOCAL(this), "status",
PipelineStatusToString(status));
std::move(init_cb_).Run(status);
}
void AudioRendererImpl::FinishFlush() {
DCHECK(flush_cb_);
TRACE_EVENT_NESTABLE_ASYNC_END0("media", "AudioRendererImpl::Flush",
TRACE_ID_LOCAL(this));
std::move(flush_cb_).Run();
}
void AudioRendererImpl::OnPlaybackError(PipelineStatus error) {
DCHECK(task_runner_->BelongsToCurrentThread());
client_->OnError(error);
}
void AudioRendererImpl::OnPlaybackEnded() {
DCHECK(task_runner_->BelongsToCurrentThread());
client_->OnEnded();
}
void AudioRendererImpl::OnStatisticsUpdate(const PipelineStatistics& stats) {
DCHECK(task_runner_->BelongsToCurrentThread());
client_->OnStatisticsUpdate(stats);
}
void AudioRendererImpl::OnBufferingStateChange(BufferingState buffering_state) {
DCHECK(task_runner_->BelongsToCurrentThread());
// "Underflow" is only possible when playing. This avoids noise like blaming
// the decoder for an "underflow" that is really just a seek.
BufferingStateChangeReason reason = BUFFERING_CHANGE_REASON_UNKNOWN;
if (state_ == kPlaying && buffering_state == BUFFERING_HAVE_NOTHING) {
reason = audio_decoder_stream_->is_demuxer_read_pending()
? DEMUXER_UNDERFLOW
: DECODER_UNDERFLOW;
}
media_log_->AddEvent<MediaLogEvent::kBufferingStateChanged>(
SerializableBufferingState<SerializableBufferingStateType::kAudio>{
buffering_state, reason});
client_->OnBufferingStateChange(buffering_state, reason);
}
void AudioRendererImpl::OnWaiting(WaitingReason reason) {
DCHECK(task_runner_->BelongsToCurrentThread());
client_->OnWaiting(reason);
}
void AudioRendererImpl::SetVolume(float volume) {
DCHECK(task_runner_->BelongsToCurrentThread());
// Only consider audio as unmuted if the volume is set to a non-zero value
// when the state is kPlaying.
if (state_ == kPlaying) {
was_unmuted_ = was_unmuted_ || volume != 0;
}
if (state_ == kUninitialized || state_ == kInitializing) {
volume_ = volume;
return;
}
sink_->SetVolume(volume);
if (!null_sink_) {
// Either null sink suspension is not enabled or we're already on the null
// sink due to failing to get device parameters.
return;
}
null_sink_->SetVolume(volume);
// Two cases to handle:
// 1. Changing from muted to unmuted state.
// 2. Unmuted startup case.
if ((!volume_ && volume) || (volume && real_sink_needs_start_)) {
// Suspend null audio sink (does nothing if unused).
null_sink_->Pause();
// Complete startup for the real sink if needed.
if (real_sink_needs_start_) {
sink_->Start();
if (!sink_playing_)
sink_->Pause(); // Sinks play on start.
real_sink_needs_start_ = false;
}
// Start sink playback if needed.
if (sink_playing_)
sink_->Play();
} else if (volume_ && !volume) {
// Suspend the real sink (does nothing if unused).
sink_->Pause();
// Start fake sink playback if needed.
if (sink_playing_)
null_sink_->Play();
}
volume_ = volume;
}
void AudioRendererImpl::SetLatencyHint(
absl::optional<base::TimeDelta> latency_hint) {
base::AutoLock auto_lock(lock_);
latency_hint_ = latency_hint;
if (algorithm_) {
algorithm_->SetLatencyHint(latency_hint);
// See if we need further reads to fill up to the new playback threshold.
// This may be needed if rendering isn't active to schedule regular reads.
AttemptRead_Locked();
}
}
void AudioRendererImpl::SetPreservesPitch(bool preserves_pitch) {
base::AutoLock auto_lock(lock_);
preserves_pitch_ = preserves_pitch;
if (algorithm_)
algorithm_->SetPreservesPitch(preserves_pitch);
}
void AudioRendererImpl::SetAutoplayInitiated(bool autoplay_initiated) {
base::AutoLock auto_lock(lock_);
autoplay_initiated_ = autoplay_initiated;
}
void AudioRendererImpl::OnSuspend() {
base::AutoLock auto_lock(lock_);
is_suspending_ = true;
}
void AudioRendererImpl::OnResume() {
base::AutoLock auto_lock(lock_);
is_suspending_ = false;
}
void AudioRendererImpl::SetPlayDelayCBForTesting(PlayDelayCBForTesting cb) {
DCHECK_EQ(state_, kUninitialized);
play_delay_cb_for_testing_ = std::move(cb);
}
void AudioRendererImpl::DecodedAudioReady(
AudioDecoderStream::ReadResult result) {
DVLOG(2) << __func__ << "(" << result.code() << ")";
DCHECK(task_runner_->BelongsToCurrentThread());
base::AutoLock auto_lock(lock_);
DCHECK(state_ != kUninitialized);
CHECK(pending_read_);
pending_read_ = false;
if (result.has_error()) {
HandleAbortedReadOrDecodeError(result.code() == StatusCode::kAborted
? PIPELINE_OK
: PIPELINE_ERROR_DECODE);
return;
}
scoped_refptr<AudioBuffer> buffer = std::move(result).value();
DCHECK(buffer);
if (state_ == kFlushing) {
ChangeState_Locked(kFlushed);
DoFlush_Locked();
return;
}
bool need_another_buffer = true;
if (expecting_config_changes_) {
if (!buffer->end_of_stream()) {
if (last_decoded_sample_rate_ &&
buffer->sample_rate() != last_decoded_sample_rate_) {
DVLOG(1) << __func__ << " Updating audio sample_rate."
<< " ts:" << buffer->timestamp().InMicroseconds()
<< " old:" << last_decoded_sample_rate_
<< " new:" << buffer->sample_rate();
// Send a bogus config to reset timestamp state.
OnConfigChange(AudioDecoderConfig());
}
last_decoded_sample_rate_ = buffer->sample_rate();
if (last_decoded_channel_layout_ != buffer->channel_layout()) {
if (buffer->channel_layout() == CHANNEL_LAYOUT_DISCRETE) {
MEDIA_LOG(ERROR, media_log_)
<< "Unsupported midstream configuration change! Discrete channel"
<< " layout not allowed by sink.";
HandleAbortedReadOrDecodeError(PIPELINE_ERROR_DECODE);
return;
} else {
last_decoded_channel_layout_ = buffer->channel_layout();
last_decoded_channels_ = buffer->channel_count();
ConfigureChannelMask();
}
}
}
DCHECK(buffer_converter_);
buffer_converter_->AddInput(std::move(buffer));
while (buffer_converter_->HasNextBuffer()) {
need_another_buffer =
HandleDecodedBuffer_Locked(buffer_converter_->GetNextBuffer());
}
} else {
// TODO(chcunningham, tguilbert): Figure out if we want to support implicit
// config changes during src=. Doing so requires resampling each individual
// stream which is inefficient when there are many tags in a page.
//
// Check if the buffer we received matches the expected configuration.
// Note: We explicitly do not check channel layout here to avoid breaking
// weird behavior with multichannel wav files: http://crbug.com/600538.
if (!buffer->end_of_stream() &&
(buffer->sample_rate() != audio_parameters_.sample_rate() ||
buffer->channel_count() != audio_parameters_.channels())) {
MEDIA_LOG(ERROR, media_log_)
<< "Unsupported midstream configuration change!"
<< " Sample Rate: " << buffer->sample_rate() << " vs "
<< audio_parameters_.sample_rate()
<< ", Channels: " << buffer->channel_count() << " vs "
<< audio_parameters_.channels();
HandleAbortedReadOrDecodeError(PIPELINE_ERROR_DECODE);
return;
}
need_another_buffer = HandleDecodedBuffer_Locked(std::move(buffer));
}
if (!need_another_buffer && !CanRead_Locked())
return;
AttemptRead_Locked();
}
bool AudioRendererImpl::HandleDecodedBuffer_Locked(
scoped_refptr<AudioBuffer> buffer) {
lock_.AssertAcquired();
bool should_render_end_of_stream = false;
if (buffer->end_of_stream()) {
received_end_of_stream_ = true;
algorithm_->MarkEndOfStream();
// We received no audio to play before EOS, so enter the ended state.
if (first_packet_timestamp_ == kNoTimestamp)
should_render_end_of_stream = true;
} else {
if (buffer->IsBitstreamFormat() && state_ == kPlaying) {
if (IsBeforeStartTime(*buffer))
return true;
// Adjust the start time since we are unable to trim a compressed audio
// buffer.
if (buffer->timestamp() < start_timestamp_ &&
(buffer->timestamp() + buffer->duration()) > start_timestamp_) {
start_timestamp_ = buffer->timestamp();
audio_clock_ = std::make_unique<AudioClock>(
buffer->timestamp(), audio_parameters_.sample_rate());
}
} else if (state_ == kPlaying) {
if (IsBeforeStartTime(*buffer))
return true;
// Trim off any additional time before the start timestamp.
const base::TimeDelta trim_time = start_timestamp_ - buffer->timestamp();
if (trim_time > base::TimeDelta()) {
const int frames_to_trim = AudioTimestampHelper::TimeToFrames(
trim_time, buffer->sample_rate());
DVLOG(1) << __func__ << ": Trimming first audio buffer by "
<< frames_to_trim << " frames so it starts at "
<< start_timestamp_;
buffer->TrimStart(frames_to_trim);
buffer->set_timestamp(start_timestamp_);
}
// If the entire buffer was trimmed, request a new one.
if (!buffer->frame_count())
return true;
}
// Store the timestamp of the first packet so we know when to start actual
// audio playback.
if (first_packet_timestamp_ == kNoTimestamp)
first_packet_timestamp_ = buffer->timestamp();
#if !defined(OS_ANDROID)
// Do not transcribe muted streams initiated by autoplay if the stream was
// never unmuted.
if (transcribe_audio_callback_ && !(autoplay_initiated_ && !was_unmuted_)) {
transcribe_audio_callback_.Run(buffer);
}
#endif
if (state_ != kUninitialized)
algorithm_->EnqueueBuffer(std::move(buffer));
}
const size_t memory_usage = algorithm_->GetMemoryUsage();
PipelineStatistics stats;
stats.audio_memory_usage = memory_usage - last_audio_memory_usage_;
last_audio_memory_usage_ = memory_usage;
task_runner_->PostTask(FROM_HERE,
base::BindOnce(&AudioRendererImpl::OnStatisticsUpdate,
weak_factory_.GetWeakPtr(), stats));
switch (state_) {
case kUninitialized:
case kInitializing:
case kFlushing:
NOTREACHED();
return false;
case kFlushed:
DCHECK(!pending_read_);
return false;
case kPlaying:
if (received_end_of_stream_ || algorithm_->IsQueueAdequateForPlayback()) {
if (buffering_state_ == BUFFERING_HAVE_NOTHING)
SetBufferingState_Locked(BUFFERING_HAVE_ENOUGH);
// This must be done after SetBufferingState_Locked() to ensure the
// proper state transitions for higher levels.
if (should_render_end_of_stream) {
task_runner_->PostTask(
FROM_HERE, base::BindOnce(&AudioRendererImpl::OnPlaybackEnded,
weak_factory_.GetWeakPtr()));
}
return false;
}
return true;
}
return false;
}
void AudioRendererImpl::AttemptRead() {
base::AutoLock auto_lock(lock_);
AttemptRead_Locked();
}
void AudioRendererImpl::AttemptRead_Locked() {
DCHECK(task_runner_->BelongsToCurrentThread());
lock_.AssertAcquired();
if (!CanRead_Locked())
return;
pending_read_ = true;
// Don't hold the lock while calling Read(), if the demuxer is busy this will
// block audio rendering for an extended period of time.
// |audio_decoder_stream_| is only accessed on |task_runner_| so this is safe.
base::AutoUnlock auto_unlock(lock_);
audio_decoder_stream_->Read(base::BindOnce(
&AudioRendererImpl::DecodedAudioReady, weak_factory_.GetWeakPtr()));
}
bool AudioRendererImpl::CanRead_Locked() {
lock_.AssertAcquired();
switch (state_) {
case kUninitialized:
case kInitializing:
case kFlushing:
case kFlushed:
return false;
case kPlaying:
break;
}
return !pending_read_ && !received_end_of_stream_ &&
!algorithm_->IsQueueFull();
}
void AudioRendererImpl::SetPlaybackRate(double playback_rate) {
DVLOG(1) << __func__ << "(" << playback_rate << ")";
DCHECK(task_runner_->BelongsToCurrentThread());
DCHECK_GE(playback_rate, 0);
DCHECK(sink_);
base::AutoLock auto_lock(lock_);
if (is_passthrough_ && playback_rate != 0 && playback_rate != 1) {
MEDIA_LOG(INFO, media_log_) << "Playback rate changes are not supported "
"when output compressed bitstream."
<< " Playback Rate: " << playback_rate;
return;
}
// We have two cases here:
// Play: current_playback_rate == 0 && playback_rate != 0
// Pause: current_playback_rate != 0 && playback_rate == 0
double current_playback_rate = playback_rate_;
playback_rate_ = playback_rate;
if (!rendering_)
return;
if (current_playback_rate == 0 && playback_rate != 0) {
StartRendering_Locked();
return;
}
if (current_playback_rate != 0 && playback_rate == 0) {
StopRendering_Locked();
return;
}
}
bool AudioRendererImpl::IsBeforeStartTime(const AudioBuffer& buffer) {
DCHECK_EQ(state_, kPlaying);
return !buffer.end_of_stream() &&
(buffer.timestamp() + buffer.duration()) < start_timestamp_;
}
int AudioRendererImpl::Render(base::TimeDelta delay,
base::TimeTicks delay_timestamp,
int prior_frames_skipped,
AudioBus* audio_bus) {
TRACE_EVENT1("media", "AudioRendererImpl::Render", "id", media_log_->id());
int frames_requested = audio_bus->frames();
DVLOG(4) << __func__ << " delay:" << delay
<< " prior_frames_skipped:" << prior_frames_skipped
<< " frames_requested:" << frames_requested;
// Since this information is coming from the OS or potentially a fake stream,
// it may end up with spurious values.
if (delay < base::TimeDelta())
delay = base::TimeDelta();
int frames_written = 0;
{
base::AutoLock auto_lock(lock_);
last_render_time_ = tick_clock_->NowTicks();
int64_t frames_delayed = AudioTimestampHelper::TimeToFrames(
delay, audio_parameters_.sample_rate());
if (!stop_rendering_time_.is_null()) {
audio_clock_->CompensateForSuspendedWrites(
last_render_time_ - stop_rendering_time_, frames_delayed);
stop_rendering_time_ = base::TimeTicks();
}
// Ensure Stop() hasn't destroyed our |algorithm_| on the pipeline thread.
if (!algorithm_) {
audio_clock_->WroteAudio(0, frames_requested, frames_delayed,
playback_rate_);
return 0;
}
if (playback_rate_ == 0 || is_suspending_) {
audio_clock_->WroteAudio(0, frames_requested, frames_delayed,
playback_rate_);
return 0;
}
// Mute audio by returning 0 when not playing.
if (state_ != kPlaying) {
audio_clock_->WroteAudio(0, frames_requested, frames_delayed,
playback_rate_);
return 0;
}
if (is_passthrough_ && algorithm_->BufferedFrames() > 0) {
// TODO(tsunghung): For compressed bitstream formats, play zeroed buffer
// won't generate delay. It could be discarded immediately. Need another
// way to generate audio delay.
const base::TimeDelta play_delay =
first_packet_timestamp_ - audio_clock_->back_timestamp();
if (play_delay > base::TimeDelta()) {
MEDIA_LOG(ERROR, media_log_)
<< "Cannot add delay for compressed audio bitstream foramt."
<< " Requested delay: " << play_delay;
}
frames_written += algorithm_->FillBuffer(audio_bus, 0, frames_requested,
playback_rate_);
// See Initialize(), the |audio_bus| should be bigger than we need in
// bitstream cases. Fix |frames_requested| to avoid incorrent time
// calculation of |audio_clock_| below.
frames_requested = frames_written;
} else if (algorithm_->BufferedFrames() > 0) {
// Delay playback by writing silence if we haven't reached the first
// timestamp yet; this can occur if the video starts before the audio.
CHECK_NE(first_packet_timestamp_, kNoTimestamp);
CHECK_GE(first_packet_timestamp_, base::TimeDelta());
const base::TimeDelta play_delay =
first_packet_timestamp_ - audio_clock_->back_timestamp();
if (play_delay > base::TimeDelta()) {
DCHECK_EQ(frames_written, 0);
if (!play_delay_cb_for_testing_.is_null())
play_delay_cb_for_testing_.Run(play_delay);
// Don't multiply |play_delay| out since it can be a huge value on
// poorly encoded media and multiplying by the sample rate could cause
// the value to overflow.
if (play_delay.InSecondsF() > static_cast<double>(frames_requested) /
audio_parameters_.sample_rate()) {
frames_written = frames_requested;
} else {
frames_written =
play_delay.InSecondsF() * audio_parameters_.sample_rate();
}
audio_bus->ZeroFramesPartial(0, frames_written);
}
// If there's any space left, actually render the audio; this is where the
// aural magic happens.
if (frames_written < frames_requested) {
frames_written += algorithm_->FillBuffer(
audio_bus, frames_written, frames_requested - frames_written,
playback_rate_);
}
}
// We use the following conditions to determine end of playback:
// 1) Algorithm can not fill the audio callback buffer
// 2) We received an end of stream buffer
// 3) We haven't already signalled that we've ended
// 4) We've played all known audio data sent to hardware
//
// We use the following conditions to determine underflow:
// 1) Algorithm can not fill the audio callback buffer
// 2) We have NOT received an end of stream buffer
// 3) We are in the kPlaying state
//
// Otherwise the buffer has data we can send to the device.
//
// Per the TimeSource API the media time should always increase even after
// we've rendered all known audio data. Doing so simplifies scenarios where
// we have other sources of media data that need to be scheduled after audio
// data has ended.
//
// That being said, we don't want to advance time when underflowed as we
// know more decoded frames will eventually arrive. If we did, we would
// throw things out of sync when said decoded frames arrive.
int frames_after_end_of_stream = 0;
if (frames_written == 0) {
if (received_end_of_stream_) {
if (ended_timestamp_ == kInfiniteDuration)
ended_timestamp_ = audio_clock_->back_timestamp();
frames_after_end_of_stream = frames_requested;
} else if (state_ == kPlaying &&
buffering_state_ != BUFFERING_HAVE_NOTHING) {
// Don't increase queue capacity if the queue latency is explicitly
// specified.
if (!latency_hint_)
algorithm_->IncreasePlaybackThreshold();
SetBufferingState_Locked(BUFFERING_HAVE_NOTHING);
}
} else if (frames_written < frames_requested && !received_end_of_stream_ &&
state_ == kPlaying &&
buffering_state_ != BUFFERING_HAVE_NOTHING) {
// If we only partially filled the request and should have more data, go
// ahead and increase queue capacity to try and meet the next request.
// Trigger underflow to give us a chance to refill up to the new cap.
// When a latency hint is present, don't override the user's preference
// with a queue increase, but still signal HAVE_NOTHING for them to take
// action if they choose.
if (!latency_hint_)
algorithm_->IncreasePlaybackThreshold();
SetBufferingState_Locked(BUFFERING_HAVE_NOTHING);
}
audio_clock_->WroteAudio(frames_written + frames_after_end_of_stream,
frames_requested, frames_delayed, playback_rate_);
if (CanRead_Locked()) {
task_runner_->PostTask(FROM_HERE,
base::BindOnce(&AudioRendererImpl::AttemptRead,
weak_factory_.GetWeakPtr()));
}
if (audio_clock_->front_timestamp() >= ended_timestamp_ &&
!rendered_end_of_stream_) {
rendered_end_of_stream_ = true;
task_runner_->PostTask(FROM_HERE,
base::BindOnce(&AudioRendererImpl::OnPlaybackEnded,
weak_factory_.GetWeakPtr()));
}
}
DCHECK_LE(frames_written, frames_requested);
return frames_written;
}
void AudioRendererImpl::OnRenderError() {
MEDIA_LOG(ERROR, media_log_) << "audio render error";
// Post to |task_runner_| as this is called on the audio callback thread.
task_runner_->PostTask(
FROM_HERE,
base::BindOnce(&AudioRendererImpl::OnPlaybackError,
weak_factory_.GetWeakPtr(), AUDIO_RENDERER_ERROR));
}
void AudioRendererImpl::HandleAbortedReadOrDecodeError(PipelineStatus status) {
DCHECK(task_runner_->BelongsToCurrentThread());
lock_.AssertAcquired();
switch (state_) {
case kUninitialized:
case kInitializing:
NOTREACHED();
return;
case kFlushing:
ChangeState_Locked(kFlushed);
if (status == PIPELINE_OK) {
DoFlush_Locked();
return;
}
MEDIA_LOG(ERROR, media_log_)
<< "audio error during flushing, status: " << status;
client_->OnError(status);
FinishFlush();
return;
case kFlushed:
case kPlaying:
if (status != PIPELINE_OK) {
MEDIA_LOG(ERROR, media_log_)
<< "audio error during playing, status: " << status;
client_->OnError(status);
}
return;
}
}
void AudioRendererImpl::ChangeState_Locked(State new_state) {
DVLOG(1) << __func__ << " : " << state_ << " -> " << new_state;
lock_.AssertAcquired();
state_ = new_state;
}
void AudioRendererImpl::OnConfigChange(const AudioDecoderConfig& config) {
DCHECK(task_runner_->BelongsToCurrentThread());
DCHECK(expecting_config_changes_);
buffer_converter_->ResetTimestampState();
// An invalid config may be supplied by callers who simply want to reset
// internal state outside of detecting a new config from the demuxer stream.
// RendererClient only cares to know about config changes that differ from
// previous configs.
if (config.IsValidConfig() && !current_decoder_config_.Matches(config)) {
current_decoder_config_ = config;
client_->OnAudioConfigChange(config);
}
}
void AudioRendererImpl::SetBufferingState_Locked(
BufferingState buffering_state) {
DVLOG(1) << __func__ << " : " << buffering_state_ << " -> "
<< buffering_state;
DCHECK_NE(buffering_state_, buffering_state);
lock_.AssertAcquired();
buffering_state_ = buffering_state;
task_runner_->PostTask(
FROM_HERE, base::BindOnce(&AudioRendererImpl::OnBufferingStateChange,
weak_factory_.GetWeakPtr(), buffering_state_));
}
void AudioRendererImpl::ConfigureChannelMask() {
DCHECK(algorithm_);
DCHECK(audio_parameters_.IsValid());
DCHECK_NE(last_decoded_channel_layout_, CHANNEL_LAYOUT_NONE);
DCHECK_NE(last_decoded_channel_layout_, CHANNEL_LAYOUT_UNSUPPORTED);
// If we're actually downmixing the signal, no mask is necessary, but ensure
// we clear any existing mask if present.
if (last_decoded_channels_ >= audio_parameters_.channels()) {
algorithm_->SetChannelMask(
std::vector<bool>(audio_parameters_.channels(), true));
return;
}
// Determine the matrix used to upmix the channels.
std::vector<std::vector<float>> matrix;
ChannelMixingMatrix(last_decoded_channel_layout_, last_decoded_channels_,
audio_parameters_.channel_layout(),
audio_parameters_.channels())
.CreateTransformationMatrix(&matrix);
// All channels with a zero mix are muted and can be ignored.
std::vector<bool> channel_mask(audio_parameters_.channels(), false);
for (size_t ch = 0; ch < matrix.size(); ++ch) {
channel_mask[ch] = std::any_of(matrix[ch].begin(), matrix[ch].end(),
[](float mix) { return !!mix; });
}
algorithm_->SetChannelMask(std::move(channel_mask));
}
void AudioRendererImpl::EnableSpeechRecognition() {
#if !defined(OS_ANDROID)
DCHECK(task_runner_->BelongsToCurrentThread());
transcribe_audio_callback_ = base::BindRepeating(
&AudioRendererImpl::TranscribeAudio, weak_factory_.GetWeakPtr());
#endif
}
void AudioRendererImpl::TranscribeAudio(
scoped_refptr<media::AudioBuffer> buffer) {
#if !defined(OS_ANDROID)
DCHECK(task_runner_->BelongsToCurrentThread());
if (speech_recognition_client_)
speech_recognition_client_->AddAudio(std::move(buffer));
#endif
}
} // namespace media