blob: 9414daf251ae610cc0052a70b0d78b1de993415a [file] [log] [blame]
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "cobalt/media/base/audio_splicer.h"
#include <algorithm>
#include <cstdlib>
#include <deque>
#include <utility>
#include "base/basictypes.h"
#include "base/logging.h"
#include "cobalt/media/base/audio_buffer.h"
#include "cobalt/media/base/audio_bus.h"
#include "cobalt/media/base/audio_decoder_config.h"
#include "cobalt/media/base/audio_timestamp_helper.h"
#include "cobalt/media/base/media_log.h"
#include "cobalt/media/base/vector_math.h"
#include "starboard/types.h"
namespace cobalt {
namespace media {
namespace {
enum {
// Minimum gap size needed before the splicer will take action to
// fill a gap. This avoids periodically inserting and then dropping samples
// when the buffer timestamps are slightly off because of timestamp rounding
// in the source content. Unit is frames.
kMinGapSize = 2,
// Limits the number of MEDIA_LOG() per sanitizer instance warning the user
// about splicer overlaps within |kMaxTimeDeltaInMilliseconds| or gaps larger
// than |kMinGapSize| and less than |kMaxTimeDeltaInMilliseconds|. These
// warnings may be frequent for some streams, and number of sanitizer
// instances may be high, so keep this limit low to help reduce log spam.
kMaxSanitizerWarningLogs = 5,
};
// AudioBuffer::TrimStart() is not as accurate as the timestamp helper, so
// manually adjust the duration and timestamp after trimming.
void AccurateTrimStart(int frames_to_trim,
const scoped_refptr<AudioBuffer> buffer,
const AudioTimestampHelper& timestamp_helper) {
buffer->TrimStart(frames_to_trim);
buffer->set_timestamp(timestamp_helper.GetTimestamp());
}
// Returns an AudioBus whose frame buffer is backed by the provided AudioBuffer.
std::unique_ptr<AudioBus> CreateAudioBufferWrapper(
const scoped_refptr<AudioBuffer>& buffer) {
std::unique_ptr<AudioBus> wrapper =
AudioBus::CreateWrapper(buffer->channel_count());
wrapper->set_frames(buffer->frame_count());
for (int ch = 0; ch < buffer->channel_count(); ++ch) {
wrapper->SetChannelData(
ch, reinterpret_cast<float*>(buffer->channel_data()[ch]));
}
return wrapper;
}
} // namespace
class AudioStreamSanitizer {
public:
AudioStreamSanitizer(int samples_per_second,
const scoped_refptr<MediaLog>& media_log);
~AudioStreamSanitizer();
// Resets the sanitizer state by clearing the output buffers queue, and
// resetting the timestamp helper.
void Reset();
// Similar to Reset(), but initializes the timestamp helper with the given
// parameters.
void ResetTimestampState(int64_t frame_count, base::TimeDelta base_timestamp);
// Adds a new buffer full of samples or end of stream buffer to the splicer.
// Returns true if the buffer was accepted. False is returned if an error
// occurred.
bool AddInput(const scoped_refptr<AudioBuffer>& input);
// Returns true if the sanitizer has a buffer to return.
bool HasNextBuffer() const;
// Removes the next buffer from the output buffer queue and returns it; should
// only be called if HasNextBuffer() returns true.
scoped_refptr<AudioBuffer> GetNextBuffer();
// Returns the total frame count of all buffers available for output.
int GetFrameCount() const;
const AudioTimestampHelper& timestamp_helper() {
return output_timestamp_helper_;
}
// Transfer all buffers into |output|. Returns false if AddInput() on the
// |output| sanitizer fails for any buffer removed from |this|.
bool DrainInto(AudioStreamSanitizer* output);
private:
void AddOutputBuffer(const scoped_refptr<AudioBuffer>& buffer);
AudioTimestampHelper output_timestamp_helper_;
bool received_end_of_stream_ = false;
typedef std::deque<scoped_refptr<AudioBuffer> > BufferQueue;
BufferQueue output_buffers_;
scoped_refptr<MediaLog> media_log_;
// To prevent log spam, counts the number of audio gap or overlaps warned in
// logs.
int num_warning_logs_ = 0;
DISALLOW_ASSIGN(AudioStreamSanitizer);
};
AudioStreamSanitizer::AudioStreamSanitizer(
int samples_per_second, const scoped_refptr<MediaLog>& media_log)
: output_timestamp_helper_(samples_per_second), media_log_(media_log) {}
AudioStreamSanitizer::~AudioStreamSanitizer() {}
void AudioStreamSanitizer::Reset() { ResetTimestampState(0, kNoTimestamp); }
void AudioStreamSanitizer::ResetTimestampState(int64_t frame_count,
base::TimeDelta base_timestamp) {
output_buffers_.clear();
received_end_of_stream_ = false;
output_timestamp_helper_.SetBaseTimestamp(base_timestamp);
if (frame_count > 0) output_timestamp_helper_.AddFrames(frame_count);
}
bool AudioStreamSanitizer::AddInput(const scoped_refptr<AudioBuffer>& input) {
DCHECK(!received_end_of_stream_ || input->end_of_stream());
if (input->end_of_stream()) {
output_buffers_.push_back(input);
received_end_of_stream_ = true;
return true;
}
DCHECK(input->timestamp() != kNoTimestamp);
DCHECK(input->duration() > base::TimeDelta());
DCHECK_GT(input->frame_count(), 0);
if (output_timestamp_helper_.base_timestamp() == kNoTimestamp)
output_timestamp_helper_.SetBaseTimestamp(input->timestamp());
if (output_timestamp_helper_.base_timestamp() > input->timestamp()) {
MEDIA_LOG(ERROR, media_log_)
<< "Audio splicing failed: unexpected timestamp sequence. base "
"timestamp="
<< output_timestamp_helper_.base_timestamp().InMicroseconds()
<< "us, input timestamp=" << input->timestamp().InMicroseconds()
<< "us";
return false;
}
const base::TimeDelta timestamp = input->timestamp();
const base::TimeDelta expected_timestamp =
output_timestamp_helper_.GetTimestamp();
const base::TimeDelta delta = timestamp - expected_timestamp;
if (std::abs(delta.InMilliseconds()) >
AudioSplicer::kMaxTimeDeltaInMilliseconds) {
MEDIA_LOG(ERROR, media_log_)
<< "Audio splicing failed: coded frame timestamp differs from "
"expected timestamp "
<< expected_timestamp.InMicroseconds() << "us by "
<< delta.InMicroseconds() << "us, more than threshold of +/-"
<< AudioSplicer::kMaxTimeDeltaInMilliseconds
<< "ms. Expected timestamp is based on decoded frames and frame rate.";
return false;
}
int frames_to_fill = 0;
if (!delta.is_zero())
frames_to_fill = output_timestamp_helper_.GetFramesToTarget(timestamp);
if (frames_to_fill == 0 || std::abs(frames_to_fill) < kMinGapSize) {
AddOutputBuffer(input);
return true;
}
if (frames_to_fill > 0) {
LIMITED_MEDIA_LOG(DEBUG, media_log_, num_warning_logs_,
kMaxSanitizerWarningLogs)
<< "Audio splicer inserting silence for small gap of "
<< delta.InMicroseconds() << "us at time "
<< expected_timestamp.InMicroseconds() << "us.";
DVLOG(1) << "Gap detected @ " << expected_timestamp.InMicroseconds()
<< " us: " << delta.InMicroseconds() << " us";
// Create a buffer with enough silence samples to fill the gap and
// add it to the output buffer.
scoped_refptr<AudioBuffer> gap = AudioBuffer::CreateEmptyBuffer(
input->channel_layout(), input->channel_count(), input->sample_rate(),
frames_to_fill, expected_timestamp);
AddOutputBuffer(gap);
// Add the input buffer now that the gap has been filled.
AddOutputBuffer(input);
return true;
}
// Overlapping buffers marked as splice frames are handled by AudioSplicer,
// but decoder and demuxer quirks may sometimes produce overlapping samples
// which need to be sanitized.
//
// A crossfade can't be done here because only the current buffer is available
// at this point, not previous buffers.
LIMITED_MEDIA_LOG(DEBUG, media_log_, num_warning_logs_,
kMaxSanitizerWarningLogs)
<< "Audio splicer skipping frames for small overlap of "
<< -delta.InMicroseconds() << "us at time "
<< expected_timestamp.InMicroseconds() << "us.";
DVLOG(1) << "Overlap detected @ " << expected_timestamp.InMicroseconds()
<< " us: " << -delta.InMicroseconds() << " us";
const int frames_to_skip = -frames_to_fill;
if (input->frame_count() <= frames_to_skip) {
DVLOG(1) << "Dropping whole buffer";
return true;
}
// Copy the trailing samples that do not overlap samples already output
// into a new buffer. Add this new buffer to the output queue.
//
// TODO(acolwell): Implement a cross-fade here so the transition is less
// jarring.
AccurateTrimStart(frames_to_skip, input, output_timestamp_helper_);
AddOutputBuffer(input);
return true;
}
bool AudioStreamSanitizer::HasNextBuffer() const {
return !output_buffers_.empty();
}
scoped_refptr<AudioBuffer> AudioStreamSanitizer::GetNextBuffer() {
scoped_refptr<AudioBuffer> ret = output_buffers_.front();
output_buffers_.pop_front();
return ret;
}
void AudioStreamSanitizer::AddOutputBuffer(
const scoped_refptr<AudioBuffer>& buffer) {
output_timestamp_helper_.AddFrames(buffer->frame_count());
output_buffers_.push_back(buffer);
}
int AudioStreamSanitizer::GetFrameCount() const {
int frame_count = 0;
for (const auto& buffer : output_buffers_)
frame_count += buffer->frame_count();
return frame_count;
}
bool AudioStreamSanitizer::DrainInto(AudioStreamSanitizer* output) {
while (HasNextBuffer()) {
if (!output->AddInput(GetNextBuffer())) return false;
}
return true;
}
AudioSplicer::AudioSplicer(int samples_per_second,
const scoped_refptr<MediaLog>& media_log)
: max_crossfade_duration_(
base::TimeDelta::FromMilliseconds(kCrossfadeDurationInMilliseconds)),
splice_timestamp_(kNoTimestamp),
max_splice_end_timestamp_(kNoTimestamp),
output_sanitizer_(
new AudioStreamSanitizer(samples_per_second, media_log)),
pre_splice_sanitizer_(
new AudioStreamSanitizer(samples_per_second, media_log)),
post_splice_sanitizer_(
new AudioStreamSanitizer(samples_per_second, media_log)),
have_all_pre_splice_buffers_(false) {}
AudioSplicer::~AudioSplicer() {}
void AudioSplicer::Reset() {
output_sanitizer_->Reset();
pre_splice_sanitizer_->Reset();
post_splice_sanitizer_->Reset();
have_all_pre_splice_buffers_ = false;
reset_splice_timestamps();
}
bool AudioSplicer::AddInput(const scoped_refptr<AudioBuffer>& input) {
// If we're not processing a splice, add the input to the output queue.
if (splice_timestamp_ == kNoTimestamp) {
DCHECK(!pre_splice_sanitizer_->HasNextBuffer());
DCHECK(!post_splice_sanitizer_->HasNextBuffer());
return output_sanitizer_->AddInput(input);
}
const AudioTimestampHelper& output_ts_helper =
output_sanitizer_->timestamp_helper();
if (!have_all_pre_splice_buffers_) {
DCHECK(!input->end_of_stream());
// If the provided buffer is entirely before the splice point it can also be
// added to the output queue.
if (input->timestamp() + input->duration() < splice_timestamp_) {
DCHECK(!pre_splice_sanitizer_->HasNextBuffer());
return output_sanitizer_->AddInput(input);
}
// If we've encountered the first pre splice buffer, reset the pre splice
// sanitizer based on |output_sanitizer_|. This is done so that gaps and
// overlaps between buffers across the sanitizers are accounted for prior
// to calculating crossfade.
if (!pre_splice_sanitizer_->HasNextBuffer()) {
pre_splice_sanitizer_->ResetTimestampState(
output_ts_helper.frame_count(), output_ts_helper.base_timestamp());
}
return pre_splice_sanitizer_->AddInput(input);
}
// The first post splice buffer is expected to match |splice_timestamp_|.
if (!post_splice_sanitizer_->HasNextBuffer())
CHECK(splice_timestamp_ == input->timestamp());
// At this point we have all the fade out preroll buffers from the decoder.
// We now need to wait until we have enough data to perform the crossfade (or
// we receive an end of stream).
if (!post_splice_sanitizer_->AddInput(input)) return false;
// Ensure |output_sanitizer_| has a valid base timestamp so we can use it for
// timestamp calculations.
if (output_ts_helper.base_timestamp() == kNoTimestamp) {
output_sanitizer_->ResetTimestampState(
0, pre_splice_sanitizer_->timestamp_helper().base_timestamp());
}
// If a splice frame was incorrectly marked due to poor demuxed timestamps, we
// may not actually have a splice. Here we check if any frames exist before
// the splice. In this case, just transfer all data to the output sanitizer.
const int frames_before_splice =
output_ts_helper.base_timestamp() == kNoTimestamp
? 0
: output_ts_helper.GetFramesToTarget(splice_timestamp_);
if (frames_before_splice < 0 ||
pre_splice_sanitizer_->GetFrameCount() <= frames_before_splice) {
CHECK(pre_splice_sanitizer_->DrainInto(output_sanitizer_.get()));
// If the file contains incorrectly muxed timestamps, there may be huge gaps
// between the demuxed and decoded timestamps.
if (!post_splice_sanitizer_->DrainInto(output_sanitizer_.get()))
return false;
reset_splice_timestamps();
return true;
}
// Wait until we have enough data to crossfade or end of stream.
if (!input->end_of_stream() &&
input->timestamp() + input->duration() < max_splice_end_timestamp_) {
return true;
}
scoped_refptr<AudioBuffer> crossfade_buffer;
std::unique_ptr<AudioBus> pre_splice =
ExtractCrossfadeFromPreSplice(&crossfade_buffer);
// Crossfade the pre splice and post splice sections and transfer all relevant
// buffers into |output_sanitizer_|.
CrossfadePostSplice(std::move(pre_splice), crossfade_buffer);
// Clear the splice timestamp so new splices can be accepted.
reset_splice_timestamps();
return true;
}
bool AudioSplicer::HasNextBuffer() const {
return output_sanitizer_->HasNextBuffer();
}
scoped_refptr<AudioBuffer> AudioSplicer::GetNextBuffer() {
return output_sanitizer_->GetNextBuffer();
}
void AudioSplicer::SetSpliceTimestamp(base::TimeDelta splice_timestamp) {
if (splice_timestamp == kNoTimestamp) {
DCHECK(splice_timestamp_ != kNoTimestamp);
DCHECK(!have_all_pre_splice_buffers_);
have_all_pre_splice_buffers_ = true;
return;
}
if (splice_timestamp_ == splice_timestamp) return;
// TODO(dalecurtis): We may need the concept of a future_splice_timestamp_ to
// handle cases where another splice comes in before we've received 5ms of
// data from the last one. Leave this as a CHECK for now to figure out if
// this case is possible.
CHECK(splice_timestamp_ == kNoTimestamp);
splice_timestamp_ = splice_timestamp;
max_splice_end_timestamp_ = splice_timestamp_ + max_crossfade_duration_;
pre_splice_sanitizer_->Reset();
post_splice_sanitizer_->Reset();
have_all_pre_splice_buffers_ = false;
}
std::unique_ptr<AudioBus> AudioSplicer::ExtractCrossfadeFromPreSplice(
scoped_refptr<AudioBuffer>* crossfade_buffer) {
DCHECK(crossfade_buffer);
const AudioTimestampHelper& output_ts_helper =
output_sanitizer_->timestamp_helper();
int frames_before_splice =
output_ts_helper.GetFramesToTarget(splice_timestamp_);
// Determine crossfade frame count based on available frames in each splicer
// and capping to the maximum crossfade duration.
const int max_crossfade_frame_count =
output_ts_helper.GetFramesToTarget(max_splice_end_timestamp_) -
frames_before_splice;
const int frames_to_crossfade = std::min(
max_crossfade_frame_count,
std::min(pre_splice_sanitizer_->GetFrameCount() - frames_before_splice,
post_splice_sanitizer_->GetFrameCount()));
// There must always be frames to crossfade, otherwise the splice should not
// have been generated.
DCHECK_GT(frames_to_crossfade, 0);
int frames_read = 0;
std::unique_ptr<AudioBus> output_bus;
while (pre_splice_sanitizer_->HasNextBuffer() &&
frames_read < frames_to_crossfade) {
scoped_refptr<AudioBuffer> preroll = pre_splice_sanitizer_->GetNextBuffer();
// We don't know the channel count until we see the first buffer, so wait
// until the first buffer to allocate the output AudioBus.
if (!output_bus) {
output_bus =
AudioBus::Create(preroll->channel_count(), frames_to_crossfade);
// Allocate output buffer for crossfade.
*crossfade_buffer = AudioBuffer::CreateBuffer(
kSampleFormatPlanarF32, preroll->channel_layout(),
preroll->channel_count(), preroll->sample_rate(),
frames_to_crossfade);
}
// There may be enough of a gap introduced during decoding such that an
// entire buffer exists before the splice point.
if (frames_before_splice >= preroll->frame_count()) {
// Adjust the number of frames remaining before the splice. NOTE: This is
// safe since |pre_splice_sanitizer_| is a continuation of the timeline in
// |output_sanitizer_|. As such we're guaranteed there are no gaps or
// overlaps in the timeline between the two sanitizers.
frames_before_splice -= preroll->frame_count();
CHECK(output_sanitizer_->AddInput(preroll));
continue;
}
const int frames_to_read =
std::min(preroll->frame_count() - frames_before_splice,
output_bus->frames() - frames_read);
preroll->ReadFrames(frames_to_read, frames_before_splice, frames_read,
output_bus.get());
frames_read += frames_to_read;
// If only part of the buffer was consumed, trim it appropriately and stick
// it into the output queue.
if (frames_before_splice) {
preroll->TrimEnd(preroll->frame_count() - frames_before_splice);
CHECK(output_sanitizer_->AddInput(preroll));
frames_before_splice = 0;
}
}
// Ensure outputs were properly allocated. The method should not have been
// called if there is not enough data to crossfade.
// TODO(dalecurtis): Convert to DCHECK() once http://crbug.com/356073 fixed.
CHECK(output_bus);
CHECK(crossfade_buffer->get());
// All necessary buffers have been processed, it's safe to reset.
pre_splice_sanitizer_->Reset();
DCHECK_EQ(output_bus->frames(), frames_read);
DCHECK_EQ(output_ts_helper.GetFramesToTarget(splice_timestamp_), 0);
return output_bus;
}
void AudioSplicer::CrossfadePostSplice(
std::unique_ptr<AudioBus> pre_splice_bus,
const scoped_refptr<AudioBuffer>& crossfade_buffer) {
// Use the calculated timestamp and duration to ensure there's no extra gaps
// or overlaps to process when adding the buffer to |output_sanitizer_|.
const AudioTimestampHelper& output_ts_helper =
output_sanitizer_->timestamp_helper();
crossfade_buffer->set_timestamp(output_ts_helper.GetTimestamp());
// AudioBuffer::ReadFrames() only allows output into an AudioBus, so wrap
// our AudioBuffer in one so we can avoid extra data copies.
std::unique_ptr<AudioBus> output_bus =
CreateAudioBufferWrapper(crossfade_buffer);
// Extract crossfade section from the |post_splice_sanitizer_|.
int frames_read = 0, frames_to_trim = 0;
scoped_refptr<AudioBuffer> remainder;
while (post_splice_sanitizer_->HasNextBuffer() &&
frames_read < output_bus->frames()) {
scoped_refptr<AudioBuffer> postroll =
post_splice_sanitizer_->GetNextBuffer();
const int frames_to_read =
std::min(postroll->frame_count(), output_bus->frames() - frames_read);
postroll->ReadFrames(frames_to_read, 0, frames_read, output_bus.get());
frames_read += frames_to_read;
// If only part of the buffer was consumed, save it for after we've added
// the crossfade buffer
if (frames_to_read < postroll->frame_count()) {
DCHECK(!remainder.get());
remainder.swap(postroll);
frames_to_trim = frames_to_read;
}
}
DCHECK_EQ(output_bus->frames(), frames_read);
// Crossfade the audio into |crossfade_buffer|.
for (int ch = 0; ch < output_bus->channels(); ++ch) {
vector_math::Crossfade(pre_splice_bus->channel(ch),
pre_splice_bus->frames(), output_bus->channel(ch));
}
CHECK(output_sanitizer_->AddInput(crossfade_buffer));
DCHECK_EQ(crossfade_buffer->frame_count(), output_bus->frames());
if (remainder.get()) {
// Trim off consumed frames.
AccurateTrimStart(frames_to_trim, remainder, output_ts_helper);
CHECK(output_sanitizer_->AddInput(remainder));
}
// Transfer all remaining buffers out and reset once empty.
CHECK(post_splice_sanitizer_->DrainInto(output_sanitizer_.get()));
post_splice_sanitizer_->Reset();
}
} // namespace media
} // namespace cobalt